diff options
Diffstat (limited to 'source/luametatex/source/tex')
71 files changed, 75374 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/texadjust.c b/source/luametatex/source/tex/texadjust.c new file mode 100644 index 000000000..279af8950 --- /dev/null +++ b/source/luametatex/source/tex/texadjust.c @@ -0,0 +1,393 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +static void tex_scan_adjust_keys(halfword *options, halfword *code, halfword *index, scaled *depthbefore, scaled *depthafter, halfword *attrlist) +{ + *code = post_adjust_code; + *options = adjust_option_none; + *index = 0; + *depthbefore = 0; + *depthafter = 0; + *attrlist = null; + while (1) { + switch (tex_scan_character("abdipABDIP", 0, 1, 0)) { + case 'p': case 'P': + switch (tex_scan_character("roRO", 0, 0, 0)) { + case 'r': case 'R': + if (tex_scan_mandate_keyword("pre", 2)) { + *code = pre_adjust_code; + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("post", 2)) { + *code = post_adjust_code; + } + break; + default: + tex_aux_show_keyword_error("pre|post"); + goto DONE; + } + break; + case 'b': case 'B': + switch (tex_scan_character("aeAE", 0, 0, 0)) { + case 'a': case 'A': + if (tex_scan_mandate_keyword("baseline", 2)) { + *options |= adjust_option_baseline; + } + break; + case 'e': case 'E': + if (tex_scan_mandate_keyword("before", 2)) { + *options |= adjust_option_before; + } + break; + default: + tex_aux_show_keyword_error("baseline|before"); + goto DONE; + } + break; + case 'i': case 'I': + if (tex_scan_mandate_keyword("index", 1)) { + *index = tex_scan_int(0, NULL); + if (! tex_valid_adjust_index(*index)) { + *index = 0; /* for now no error */ + } + } + break; + case 'a': case 'A': + switch (tex_scan_character("ftFT", 0, 0, 0)) { + case 'f': case 'F': + if (tex_scan_mandate_keyword("after", 2)) { + *options &= ~(adjust_option_before | *options); + } + break; + case 't': case 'T': + if (tex_scan_mandate_keyword("attr", 2)) { + halfword i = tex_scan_attribute_register_number(); + halfword v = tex_scan_int(1, NULL); + if (eq_value(register_attribute_location(i)) != v) { + if (*attrlist) { + *attrlist = tex_patch_attribute_list(*attrlist, i, v); + } else { + *attrlist = tex_copy_attribute_list_set(tex_current_attribute_list(), i, v); + } + } + } + break; + default: + tex_aux_show_keyword_error("after|attr"); + goto DONE; + } + break; + case 'd': case 'D': + if (tex_scan_mandate_keyword("depth", 1)) { + switch (tex_scan_character("abclABCL", 0, 1, 0)) { /* so a space is permitted */ + case 'a': case 'A': + if (tex_scan_mandate_keyword("after", 1)) { + *options |= adjust_option_depth_after; + *depthafter = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'b': case 'B': + if (tex_scan_mandate_keyword("before", 1)) { + *options |= adjust_option_depth_before; + *depthbefore = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("check", 1)) { + *options |= adjust_option_depth_check; + } + break; + case 'l': case 'L': + if (tex_scan_mandate_keyword("last", 1)) { + *options |= adjust_option_depth_last; + } + break; + default: + tex_aux_show_keyword_error("after|before|check|last"); + goto DONE; + } + } + break; + default: + goto DONE; + } + } + DONE: + return; +} + +int tex_valid_adjust_index(halfword n) +{ + return n >= 0; +} + +void tex_run_vadjust(void) +{ + halfword code = post_adjust_code; + halfword options = adjust_option_none; + halfword index = 0; + scaled depthbefore = 0; + scaled depthafter = 0; + halfword attrlist = null; + tex_scan_adjust_keys(&options, &code, &index, &depthbefore, &depthafter, &attrlist); + tex_set_saved_record(saved_adjust_item_location, saved_adjust_location, 0, code); + tex_set_saved_record(saved_adjust_item_options, saved_adjust_options, 0, options); + tex_set_saved_record(saved_adjust_item_index, saved_adjust_index, 0, index); + tex_set_saved_record(saved_adjust_item_attr_list, saved_adjust_attr_list, 0, attrlist); + tex_set_saved_record(saved_adjust_item_depth_before, saved_adjust_depth_before, 0, depthbefore); + tex_set_saved_record(saved_adjust_item_depth_after, saved_adjust_depth_after, 0, depthafter); + lmt_save_state.save_stack_data.ptr += saved_adjust_n_of_items; + tex_new_save_level(vadjust_group); + tex_scan_left_brace(); + tex_normal_paragraph(vadjust_par_context); + tex_push_nest(); + cur_list.mode = -vmode; + cur_list.prev_depth = ignore_depth; +} + +void tex_finish_vadjust_group(void) +{ + if (! tex_wrapped_up_paragraph(vadjust_par_context)) { + halfword box, topskip, adjust; /*tex for short-term use */ + tex_end_paragraph(vadjust_group, vadjust_par_context); + topskip = tex_new_glue_node(split_top_skip_par, top_skip_code); /* cheat */ + tex_unsave(); + lmt_save_state.save_stack_data.ptr -= saved_adjust_n_of_items; + box = tex_vpack(node_next(cur_list.head), 0, packing_additional, max_dimen, direction_unknown, holding_none_option); + tex_pop_nest(); + adjust = tex_new_node(adjust_node, (quarterword) saved_value(saved_adjust_item_location)); + tex_tail_append(adjust); + adjust_list(adjust) = box_list(box); + adjust_options(adjust) = (halfword) saved_value(saved_adjust_item_options); + adjust_index(adjust) = (halfword) saved_value(saved_adjust_item_index); + adjust_depth_before(adjust) = (halfword) saved_value(saved_adjust_item_depth_before); + adjust_depth_after(adjust) = (halfword) saved_value(saved_adjust_item_depth_after); + tex_attach_attribute_list_attribute(adjust, (halfword) saved_value(saved_adjust_item_attr_list)); + tex_flush_node(topskip); + box_list(box) = null; + tex_flush_node(box); + /* we never do the callback ... maybe move it outside */ + if (lmt_nest_state.nest_data.ptr == 0) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(vadjust_page_context, 0); + } + tex_build_page(); + } + } +} + +/*tex Append or prepend vadjust nodes. Here head is a temp node! */ + +halfword tex_append_adjust_list(halfword head, halfword tail, halfword adjust) +{ + while (adjust && node_type(adjust) == adjust_node) { + halfword next = node_next(adjust); + if (tail == head) { + node_next(head) = adjust; + } else { + tex_couple_nodes(tail, adjust); + } + if (tracing_adjusts_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[adjust: index %i, location %s, append]", adjust_index(adjust), tex_aux_subtype_str(adjust)); + tex_print_node_list(adjust_list(adjust), "adjust",show_box_depth_par, show_box_breadth_par); + tex_end_diagnostic(); + } + tail = adjust; + adjust = next; + } + return tail; +} + +halfword tex_prepend_adjust_list(halfword head, halfword tail, halfword adjust) +{ + while (adjust && node_type(adjust) == adjust_node) { + halfword next = node_next(adjust); + if (tail == head) { + node_next(head) = adjust; + tail = adjust; + } else { + tex_try_couple_nodes(adjust, node_next(node_next(head))); + tex_couple_nodes(node_next(head), adjust); + } + if (tracing_adjusts_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[adjust: index %i, location %s, prepend]", adjust_index(adjust), tex_aux_subtype_str(adjust)); + tex_print_node_list(adjust_list(adjust), "adjust", show_box_depth_par, show_box_breadth_par); + tex_end_diagnostic(); + } + adjust = next; + } + return tail; +} + +void tex_inject_adjust_list(halfword adjust, int obeyoptions, halfword nextnode, const line_break_properties *properties) +{ + adjust = node_next(adjust); + if (adjust) { + while (adjust && node_type(adjust) == adjust_node) { + halfword list = adjust_list(adjust); + halfword next = node_next(adjust); + if (list) { + halfword prevnode = cur_list.tail; + if (tracing_adjusts_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[adjust: index %i, location %s, inject]", adjust_index(adjust), tex_aux_subtype_str(adjust)); + tex_print_node_list(adjust_list(adjust), "adjust", show_box_depth_par, show_box_breadth_par); + tex_end_diagnostic(); + } + if (obeyoptions && has_adjust_option(adjust, adjust_option_baseline)) { + /*tex + Here we attach data to a line. On the todo is to prepend and append to + the lines (nicer when we number lines). + */ + if (node_type(list) == hlist_node || node_type(list) == vlist_node) { + if (nextnode) { + /*tex + This is the |pre| case where |nextnode| is the line to be appended + after the adjust box |list|. + */ + if (node_type(nextnode) == hlist_node || node_type(nextnode) == vlist_node) { + if (box_height(nextnode) > box_height(list)) { + box_height(list) = box_height(nextnode); + } + if (box_depth(list) > box_depth(nextnode)) { + box_depth(nextnode) = box_depth(list); + } + /* not ok yet */ + box_y_offset(nextnode) += box_height(nextnode); + tex_check_box_geometry(nextnode); + /* till here */ + box_height(nextnode) = 0; + box_depth(list) = 0; + } + } else { + /*tex + Here we have the |post| case where the line will end up before the + adjusted content. + */ + if (node_type(prevnode) == hlist_node || node_type(prevnode) == vlist_node) { + if (box_height(prevnode) < box_height(list)) { + box_height(prevnode) = box_height(list); + } + if (box_depth(list) < box_depth(prevnode)) { + box_depth(list) = box_depth(prevnode); + } + box_height(list) = 0; + box_depth(prevnode) = 0; + } + } + } + } + if (obeyoptions && has_adjust_option(adjust, adjust_option_depth_before)) { + cur_list.prev_depth = adjust_depth_before(adjust); + } + if (obeyoptions && has_adjust_option(adjust, adjust_option_depth_check)) { + tex_append_to_vlist(list, -1, properties); + } else { + tex_couple_nodes(prevnode, list); + } + if (obeyoptions && has_adjust_option(adjust, adjust_option_depth_after)) { + cur_list.prev_depth = adjust_depth_after(adjust); + } else if (obeyoptions && has_adjust_option(adjust, adjust_option_depth_last)) { + cur_list.prev_depth = box_depth(list); + } + cur_list.tail = tex_tail_of_node_list(cur_list.tail); + if (! lmt_page_builder_state.output_active) { + lmt_append_line_filter_callback(post_adjust_append_line_context, adjust_index(adjust)); + } + } + adjust_list(adjust) = null; + tex_flush_node(adjust); + adjust = next; + } + } +} + +void tex_adjust_attach(halfword box, halfword adjust) +{ + if (adjust_list(adjust)) { + node_prev(adjust) = null; + node_next(adjust) = null; + switch (node_subtype(adjust)) { + case pre_adjust_code: + if (! box_pre_adjusted(box)) { + box_pre_adjusted(box) = adjust; + } else if (has_adjust_option(adjust, adjust_option_before)) { + tex_couple_nodes(adjust, box_pre_adjusted(box)); + box_pre_adjusted(box) = adjust; + } else { + tex_couple_nodes(tex_tail_of_node_list(box_pre_adjusted(box)), adjust); + } + node_subtype(adjust) = local_adjust_code; + break; + case post_adjust_code: + if (! box_post_adjusted(box)) { + box_post_adjusted(box) = adjust; + } else if (has_adjust_option(adjust, adjust_option_before)) { + tex_couple_nodes(adjust, box_post_adjusted(box)); + box_post_adjusted(box) = adjust; + } else { + tex_couple_nodes(tex_tail_of_node_list(box_post_adjusted(box)), adjust); + } + node_subtype(adjust) = local_adjust_code; + break; + case local_adjust_code: + tex_normal_error("vadjust post", "unexpected local attach"); + break; + } + } else { + tex_flush_node(adjust); + } +} + +void tex_adjust_passon(halfword box, halfword adjust) +{ + halfword head = adjust ? adjust_list(adjust) : null; + (void) box; + if (head) { + node_prev(adjust) = null; + node_next(adjust) = null; + switch (node_subtype(adjust)) { + case pre_adjust_code: + if (lmt_packaging_state.pre_adjust_tail) { + if (lmt_packaging_state.pre_adjust_tail != pre_adjust_head && has_adjust_option(adjust, adjust_option_before)) { + lmt_packaging_state.pre_adjust_tail = tex_prepend_adjust_list(pre_adjust_head, lmt_packaging_state.pre_adjust_tail, adjust); + } else { + lmt_packaging_state.pre_adjust_tail = tex_append_adjust_list(pre_adjust_head, lmt_packaging_state.pre_adjust_tail, adjust); + } + } else { + tex_normal_error("vadjust pre", "invalid list"); + } + break; + case post_adjust_code: + if (lmt_packaging_state.post_adjust_tail) { + if (lmt_packaging_state.post_adjust_tail != post_adjust_head && has_adjust_option(adjust, adjust_option_before)) { + lmt_packaging_state.post_adjust_tail = tex_prepend_adjust_list(post_adjust_head, lmt_packaging_state.post_adjust_tail, adjust); + } else { + lmt_packaging_state.post_adjust_tail = tex_append_adjust_list(post_adjust_head, lmt_packaging_state.post_adjust_tail, adjust); + } + } else { + tex_normal_error("vadjust post", "invalid list"); + } + break; + case local_adjust_code: + tex_normal_error("vadjust post", "unexpected local passon"); + break; + } + } else { + tex_flush_node(adjust); + } +} + +void tex_initialize_adjust(void) +{ +} + +void tex_cleanup_adjust(void) +{ +} diff --git a/source/luametatex/source/tex/texadjust.h b/source/luametatex/source/tex/texadjust.h new file mode 100644 index 000000000..19c116f26 --- /dev/null +++ b/source/luametatex/source/tex/texadjust.h @@ -0,0 +1,36 @@ +/* + See license.txt in the root of this project. +*/ + +/*tex More will move here. */ + +# ifndef LMT_ADJUST_H +# define LMT_ADJUST_H + +typedef enum saved_adjust_items { + saved_adjust_item_location = 0, + saved_adjust_item_options = 1, + saved_adjust_item_index = 2, + saved_adjust_item_attr_list = 3, + saved_adjust_item_depth_before = 4, + saved_adjust_item_depth_after = 5, + saved_adjust_n_of_items = 6, +} saved_adjust_items; + +extern void tex_initialize_adjust (void); +extern void tex_cleanup_adjust (void); + +extern void tex_run_vadjust (void); +extern void tex_finish_vadjust_group (void); + +extern int tex_valid_adjust_index (halfword n); + +extern void tex_inject_adjust_list (halfword list, int obeyoptions, halfword nextnode, const line_break_properties *properties); + +extern void tex_adjust_passon (halfword box, halfword adjust); +extern void tex_adjust_attach (halfword box, halfword adjust); + +extern halfword tex_prepend_adjust_list (halfword head, halfword tail, halfword adjust); +extern halfword tex_append_adjust_list (halfword head, halfword tail, halfword adjust); + +# endif
\ No newline at end of file diff --git a/source/luametatex/source/tex/texalign.c b/source/luametatex/source/tex/texalign.c new file mode 100644 index 000000000..207895a6d --- /dev/null +++ b/source/luametatex/source/tex/texalign.c @@ -0,0 +1,1854 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + It's sort of a miracle whenever |halign and |valign| work, because they cut across so many of + the control structures of \TEX. Therefore the present page is probably not the best place for + a beginner to start reading this program; it is better to master everything else first. + + Let us focus our thoughts on an example of what the input might be, in order to get some idea + about how the alignment miracle happens. The example doesn't do anything useful, but it is + sufficiently general to indicate all of the special cases that must be dealt with; please do + not be disturbed by its apparent complexity and meaninglessness. + + \starttyping + \tabskip 2pt plus 3pt + \halign to 300pt{u1#v1& + \hskip 50pt \tabskip 1pt plus 1fil u2#v2& + \hskip 50pt u3#v3\cr + \hskip 25pt a1&\omit a2&\vrule\cr + \hskip 25pt \noalign\{\vskip 3pt} + \hskip 25pt b1\span b2\cr + \hskip 25pt \omit&c2\span\omit\cr} + \stoptyping + + Here's what happens: + + \startitemize + + \startitem + When |\halign to 300pt {}| is scanned, the |scan_align_spec| routine places the 300pt + dimension onto the |save_stack|, and an |align_group| code is placed above it. This + will make it possible to complete the alignment when the matching right brace is found. + \stopitem + + \startitem + The preamble is scanned next. Macros in the preamble are not expanded, except as part + of a tabskip specification. For example, if |u2| had been a macro in the preamble above, + it would have been expanded, since \TEX\ must look for |minus ...| as part of the + tabskip glue. A preamble list is constructed based on the user's preamble; in our case + it contains the following seven items: + + \starttabulate + \NC \type{\glue 2pt plus 3pt} \NC the tabskip preceding column 1 \NC \NR + \NC \type{\alignrecord} of width $-\infty$ \NC preamble info for column 1 \NC \NR + \NC \type{\glue 2pt plus 3pt} \NC the tabskip between columns 1 and 2 \NC \NR + \NC \type{\alignrecord} of width $-\infty$ \NC preamble info for column 2 \NC \NR + \NC \type{\glue 1pt plus 1fil} \NC the tabskip between columns 2 and 3 \NC \NR + \NC \type{\alignrecord} of width $-\infty$ \NC preamble info for column 3 \NC \NR + \NC \type{\glue 1pt plus 1fil} \NC the tabskip following column 3 \NC \NR + \stoptabulate + + These \quote {alignrecord} entries have the same size as an |unset_node|, since they + will later be converted into such nodes. These alignrecord nodes have no |depth| field; + this is split into |u_part| and |v_part|, and they point to token lists for the + templates of the alignment. For example, the |u_part| field in the first alignrecord + points to the token list |u1|, i.e., the template preceding the \type {#} for column~1. + Furthermore, They have a |span_ptr| instead of a |node_attr| field, and these |span_ptr| + fields are initially set to the value |end_span|, for reasons explained below. + \stopitem + + \startitem + \TEX\ now looks at what follows the |\cr| that ended the preamble. It is not |\noalign| + or |\omit|, so this input is put back to be read again, and the template |u1| is fed to + the scanner. Just before reading |u1|, \TeX\ goes into restricted horizontal mode. Just + after reading |u1|, \TEX\ will see |a1|, and then (when the |&| is sensed) \TEX\ will + see |v1|. Then \TEX\ scans an |end_template| token, indicating the end of a column. At + this point an |unset_node| is created, containing the contents of the current hlist + (i.e., |u1a1v1|). The natural width of this unset node replaces the |width| field of + the alignrecord for column~1; in general, the alignrecords will record the maximum + natural width that has occurred so far in a given column. + \stopitem + + \startitem + Since |\omit| follows the |&|, the templates for column~2 are now bypassed. Again \TEX\ + goes into restricted horizontal mode and makes an |unset_node| from the resulting hlist; + but this time the hlist contains simply |a2|. The natural width of the new unset box is + remembered in the |width| field of the alignrecord for column~2. + \stopitem + + \startitem + A third |unset_node| is created for column 3, using essentially the mechanism that + worked for column~1; this unset box contains |u3\vrule v3|. The vertical rule in this + case has running dimensions that will later extend to the height and depth of the whole + first row, since each |unset_node| in a row will eventually inherit the height and depth + of its enclosing box. + \stopitem + + \startitem + The first row has now ended; it is made into a single unset box comprising the following + seven items: + + \starttyping + \glue 2pt plus 3pt + \unsetbox for 1 column: u1a1v1 + \glue 2pt plus 3pt + \unsetbox for 1 column: a2 + \glue 1pt plus 1fil + \unsetbox for 1 column: u3\vrule v3 + \glue 1pt plus 1fil + \stoptyping + + The width of this unset row is unimportant, but it has the correct height and depth, so + the correct baselineskip glue will be computed as the row is inserted into a vertical + list. + \stopitem + + \startitem + Since |\noalign| follows the current |\cr|, \TEX\ appends additional material (in this + case |\vskip 3pt|) to the vertical list. While processing this material, \TeX\ will be + in internal vertical mode, and |no_align_group| will be on |save_stack|. + \stopitem + + \startitem + The next row produces an unset box that looks like this: + + \starttyping + \glue 2pt plus 3pt + \unsetbox for 2 columns: u1b1v1u2b2v2 + \glue 1pt plus 1fil + \unsetbox for 1 column: {(empty)} + \glue 1pt plus 1fil + \stoptyping + + The natural width of the unset box that spans columns 1~and~2 is stored in a \quote + {span node}, which we will explain later; the |span_ptr| field of the alignrecord for + column~1 now points to the new span node, and the |span_ptr| of the span node points to + |end_span|. + \stopitem + + \startitem + + The final row produces the unset box + + \starttyping + \glue 2pt plus 3pt + \unsetbox for 1 column: (empty) + \glue 2pt plus 3pt + \unsetbox for 2 columns: u2c2v2 + \glue 1pt plus 1fil + \stoptyping + + A new span node is attached to the align record for column 2. + \stopitem + + \startitem + The last step is to compute the true column widths and to change all the unset boxes to + hboxes, appending the whole works to the vertical list that encloses the |\halign|. The + rules for deciding on the final widths of each unset column box will be explained below. + \stopitem + + \stopitemize + + Note that as |\halign| is being processed, we fearlessly give up control to the rest of \TEX. At + critical junctures, an alignment routine is called upon to step in and do some little action, but + most of the time these routines just lurk in the background. It's something like post-hypnotic + suggestion. + + We have mentioned that alignrecords contain no |height| or |depth| fields. Their |glue_sign| and + |glue_order| are pre-empted as well, since it is necessary to store information about what to do + when a template ends. This information is called the |extra_info| field. + + Alignments can occur within alignments, so a small stack is used to access the alignrecord + information. At each level we have a |preamble| pointer, indicating the beginning of the + preamble list; a |cur_align| pointer, indicating the current position in the preamble list; a + |cur_span| pointer, indicating the value of |cur_align| at the beginning of a sequence of + spanned columns; a |cur_loop| pointer, indicating the tabskip glue before an alignrecord that + should be copied next if the current list is extended; and the |align_state| variable, which + indicates the nesting of braces so that |\cr| and |\span| and tab marks are properly + intercepted. There also are pointers |cur_head| and |cur_tail| to the head and tail of a list + of adjustments being moved out from horizontal mode to vertical~mode, and alike |cur_pre_head| + and |cur_pre_tail| for pre-adjust lists. + + The current values of these nine quantities appear in global variables; when they have to be + pushed down, they are stored in 6-word nodes, and |align_ptr| points to the topmost such node. + +*/ + +/*tex + + So far, hardly anything has been added to the alignment code so the above, original \TEX\ + the program documentation still applies. Of course we have callbacks. Attributes are a bit + complicating here. I experimented with some row and cell specific ones but grouping will always + make it messy. One never knows what a preamble injects. So leaving it as-is is better than a + subtoptimal solution with side effects. To mention one aspect: we have unset nodes that use the + attribute fields for other purposes and get adapted later on anyway. I'll look into it again + at some point. + + Contrary to other mechanisms, there are not that many extensions. One is that we can nest + |\noalign| (so we don't need kludges at the macro level). The look ahead trickery has not been + changed but we might get some variants (we have protected macros so it's not as sensitive as + it was in the past. + + The |\tabsize| feature is experimental and possibly a prelude to more. I played with that + when a test file (korean font table) was allocating so many nodes that I wondered if we could + limit that (and redundant boxes and glue are the only things we can do here). It actually + also saves a bit of runtime. This feature has not been tested yet with |\span| and |\omit|. + +*/ + +/* + Todo: lefttabskip righttabskip middletabskip +*/ + +typedef struct alignment_state_info { + halfword cur_align; /*tex The current position in the preamble list. */ + halfword cur_span; /*tex The start of the currently spanned columns in the preamble list. */ + halfword cur_loop; /*tex A place to copy when extending a periodic preamble. */ + halfword align_ptr; /*tex The most recently pushed-down alignment stack node. */ + halfword cur_post_adjust_head; /*tex Adjustment list head pointer. */ + halfword cur_post_adjust_tail; /*tex Adjustment list tail pointer. */ + halfword cur_pre_adjust_head; /*tex Pre-adjustment list head pointer. */ + halfword cur_pre_adjust_tail; /*tex Pre-adjustment list tail pointer. */ + halfword cur_post_migrate_head; + halfword cur_post_migrate_tail; + halfword cur_pre_migrate_head; + halfword cur_pre_migrate_tail; + halfword hold_token_head; /*tex head of a temporary list of another kind */ + halfword omit_template; /*tex a constant token list */ + halfword no_align_level; + halfword no_tab_skips; + halfword attr_list; + halfword cell_source; + halfword wrap_source; + halfword callback; + // halfword reverse; // todo + // halfword discard_skips; // todo +} alignment_state_info ; + +static alignment_state_info lmt_alignment_state = { + .cur_align = null, + .cur_span = null, + .cur_loop = null, + .align_ptr = null, + .cur_post_adjust_head = null, + .cur_post_adjust_tail = null, + .cur_pre_adjust_head = null, + .cur_pre_adjust_tail = null, + .cur_post_migrate_head = null, + .cur_post_migrate_tail = null, + .cur_pre_migrate_head = null, + .cur_pre_migrate_tail = null, + .hold_token_head = null, /*tex head of a temporary list of another kind */ + .omit_template = null, /*tex a constant token list */ + .no_align_level = 0, + .no_tab_skips = 0, + .attr_list = null, + .cell_source = 0, + .wrap_source = 0, + .callback = 0, + // .reverse = 0, + // .discard_skips = 0, +}; + +/*tex We could as well save these in the alignment stack. */ + +typedef enum saved_align_items { + saved_align_specification, + saved_align_reverse, + saved_align_discard, + saved_align_noskips, /*tex Saving is not needed but it doesn't hurt either */ + saved_align_callback, + saved_align_n_of_items, +} saved_align_items; + +/*tex The current preamble list: */ + +# define preamble node_next(align_head) + +/*tex We use them before we define them: */ + +static void tex_aux_initialize_row (void); +static void tex_aux_initialize_column (void); +static void tex_aux_finish_row (void); +static int tex_aux_finish_column (void); +static void tex_aux_finish_align (void); + +/*tex + We get |alignment_record| into |unset_node| and |unset_node| into |[hv]list_node|. And because + we can access the fields later on w emake sure that we wipe them. The box orientation field kind + of protects reading them but still it's nicer this way. In general in \LUATEX\ and \LUAMETATEX\ + we need to be more careful because we expose fields. +*/ + +inline static void tex_aux_change_list_type(halfword n, quarterword type) +{ + node_type(n) = type; + box_w_offset(n) = 0; /* box_glue_stretch align_record_span_ptr */ + box_h_offset(n) = 0; /* box_glue_shrink align_record_extra_info */ + box_d_offset(n) = 0; /* box_span_count */ + box_x_offset(n) = 0; /* align_record_u_part */ + box_y_offset(n) = 0; /* align_record_v_part */ + // box_geometry(n) = 0; /* box_size */ + box_orientation(n) = 0; /* box_size */ +} + +/*tex + + The |align_state| and |preamble| variables are initialized elsewhere. Alignment stack + maintenance is handled by a pair of trivial routines called |push_alignment| and |pop_alignment|. + + It makes not much sense to add support for an |attr| keyword to |\halign| and |\valign| because + then we need to decide if we tag rows or cells or both or come up with |cellattr| and |rowattr| + and such. But then it even makes sense to have explicit commands (in addition to the seperator) + to tags individual cells. It's too much hassle for now and the advantages are not that large. + +*/ + +static void tex_aux_push_alignment(void) +{ + /*tex The new alignment stack node: */ + halfword p = tex_new_node(align_stack_node, 0); + align_stack_align_ptr(p) = lmt_alignment_state.align_ptr; + align_stack_cur_align(p) = lmt_alignment_state.cur_align; + align_stack_preamble(p) = preamble; + align_stack_cur_span(p) = lmt_alignment_state.cur_span; + align_stack_cur_loop(p) = lmt_alignment_state.cur_loop; + align_stack_align_state(p) = lmt_input_state.align_state; + align_stack_wrap_source(p) = lmt_alignment_state.wrap_source; + align_stack_no_align_level(p) = lmt_alignment_state.no_align_level; + align_stack_cur_post_adjust_head(p) = lmt_alignment_state.cur_post_adjust_head; + align_stack_cur_post_adjust_tail(p) = lmt_alignment_state.cur_post_adjust_tail; + align_stack_cur_pre_adjust_head(p) = lmt_alignment_state.cur_pre_adjust_head; + align_stack_cur_pre_adjust_tail(p) = lmt_alignment_state.cur_pre_adjust_tail; + align_stack_cur_post_migrate_head(p) = lmt_alignment_state.cur_post_migrate_head; + align_stack_cur_post_migrate_tail(p) = lmt_alignment_state.cur_post_migrate_tail; + align_stack_cur_pre_migrate_head(p) = lmt_alignment_state.cur_pre_migrate_head; + align_stack_cur_pre_migrate_tail(p) = lmt_alignment_state.cur_pre_migrate_tail; + align_stack_no_tab_skips(p) = lmt_alignment_state.no_tab_skips; + align_stack_attr_list(p) = lmt_alignment_state.attr_list; + lmt_alignment_state.align_ptr = p; + lmt_alignment_state.cur_post_adjust_head = tex_new_temp_node(); + lmt_alignment_state.cur_pre_adjust_head = tex_new_temp_node(); + lmt_alignment_state.cur_post_migrate_head = tex_new_temp_node(); + lmt_alignment_state.cur_pre_migrate_head = tex_new_temp_node(); + /* */ + lmt_alignment_state.cell_source = 0; + lmt_alignment_state.wrap_source = 0; +} + +static void tex_aux_pop_alignment(void) +{ + /*tex The top alignment stack node: */ + halfword p = lmt_alignment_state.align_ptr; + tex_flush_node(lmt_alignment_state.cur_post_adjust_head); + tex_flush_node(lmt_alignment_state.cur_pre_adjust_head); + tex_flush_node(lmt_alignment_state.cur_post_migrate_head); + tex_flush_node(lmt_alignment_state.cur_pre_migrate_head); + lmt_alignment_state.align_ptr = align_stack_align_ptr(p); + lmt_alignment_state.cur_align = align_stack_cur_align(p); + preamble = align_stack_preamble(p); + lmt_alignment_state.cur_span = align_stack_cur_span(p); + lmt_alignment_state.cur_loop = align_stack_cur_loop(p); + lmt_input_state.align_state = align_stack_align_state(p); + lmt_alignment_state.wrap_source = align_stack_wrap_source(p); + lmt_alignment_state.no_align_level = align_stack_no_align_level(p); + lmt_alignment_state.cur_post_adjust_head = align_stack_cur_post_adjust_head(p); + lmt_alignment_state.cur_post_adjust_tail = align_stack_cur_post_adjust_tail(p); + lmt_alignment_state.cur_pre_adjust_head = align_stack_cur_pre_adjust_head(p); + lmt_alignment_state.cur_pre_adjust_tail = align_stack_cur_pre_adjust_tail(p); + lmt_alignment_state.cur_post_migrate_head = align_stack_cur_post_migrate_head(p); + lmt_alignment_state.cur_post_migrate_tail = align_stack_cur_post_migrate_tail(p); + lmt_alignment_state.cur_pre_migrate_head = align_stack_cur_pre_migrate_head(p); + lmt_alignment_state.cur_pre_migrate_tail = align_stack_cur_pre_migrate_tail(p); + lmt_alignment_state.no_tab_skips = align_stack_no_tab_skips(p); + lmt_alignment_state.attr_list = align_stack_attr_list(p); + tex_flush_node(p); +} + +/*tex + + \TEX\ has eight procedures that govern alignments: |initialize_align| and |finish_align| are + used at the very beginning and the very end; |initialize_row| and |finish_row| are used at + the beginning and end of individual rows; |initialize_span| is used at the beginning of a + sequence of spanned columns (possibly involving only one column); |initialize_column| and + |finish_column| are used at the beginning and end of individual columns; and |align_peek| is + used after |\cr| to see whether the next item is |\noalign|. + + We shall consider these routines in the order they are first used during the course of a + complete |\halign|, namely |initialize_align|, |align_peek|, |initialize_row|, + |initialize_span|, |initialize_column|, |finish_column|, |finish_row|, |finish_align|. + + The preamble is copied directly, except that |\tabskip| causes a change to the tabskip glue, + thereby possibly expanding macros that immediately follow it. An appearance of |\span| also + causes such an expansion. + + Note that if the preamble contains |\global\tabskip|, the |\global| token survives in the + preamble and the |\tabskip| defines new tabskip glue (locally). + + We enter |\span| into |eqtb| with |tab_mark| as its command code, and with |span_code| as the + command modifier. This makes \TEX\ interpret it essentially the same as an alignment delimiter + like |&|, yet it is recognizably different when we need to distinguish it from a normal + delimiter. It also turns out to be useful to give a special |cr_code| to |\cr|, and an even + larger |cr_cr_code| to |\crcr|. + + The end of a template is represented by two frozen control sequences called |\endtemplate|. The + first has the command code |end_template|, which is |> outer_call|, so it will not easily + disappear in the presence of errors. The |get_x_token| routine converts the first into the + second, which has |endv| as its command code. + + The |cr_code| is distinct from |span_code| and from any character and |\crcr| differs from + |\cr|. +*/ + +/* + In \LUAMETATEX\ the code has been adapted a bit. Because we have some access to alignment + related properties (commands, lists, etc.) The command codes have been reshuffled and + combined. Instead of dedicated cmd codes, we have a shared cmd with subtypes. The logic + hasn't changed, just the triggering of actions. In theory there can be a performance penalty + (due to extra checking) but in practice that will not be noticed becasue this seldom happens. + The advange is that we have a uniform token interface. It also makes it possible to extend + the code. + +*/ + +static void tex_aux_get_preamble_token(void) +{ + RESTART: + tex_get_token(); + while (cur_cmd == alignment_cmd && cur_chr == span_code) { + /*tex This token will be expanded once. */ + tex_get_token(); + if (cur_cmd > max_command_cmd) { + tex_expand_current_token(); + tex_get_token(); + } + } + switch (cur_cmd) { + case end_template_cmd: + tex_alignment_interwoven_error(5); + break; + case internal_glue_cmd: + if (cur_chr == internal_glue_location(tab_skip_code)) { + halfword v = tex_scan_glue(glue_val_level, 1); + if (global_defs_par > 0) { + update_tex_tab_skip_global(v); + } else { + update_tex_tab_skip_local(v); + } + goto RESTART; + } else { + break; + } + case internal_dimen_cmd: + if (cur_chr == internal_dimen_location(tab_size_code)) { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + tex_word_define(global_defs_par > 0 ? global_flag_bit : 0, internal_dimen_location(tab_size_code), v); + goto RESTART; + } else { + break; + } + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + if (has_eq_flag_bits(cur_cs, noaligned_flag_bit)) { + tex_expand_current_token(); + goto RESTART; + } else { + break; + } + } +} + +/*tex + + When |\halign| or |\valign| has been scanned in an appropriate mode, \TEX\ calls + |initialize_align|, whose task is to get everything off to a good start. This mostly involves + scanning the preamble and putting its information into the preamble list. + +*/ + +static void tex_aux_scan_align_spec(quarterword c) +{ + quarterword mode = packing_additional; + quarterword reverse = 0; + quarterword discard = 0; + quarterword noskips = 0; + quarterword callback = 0; + scaled amount = 0; + halfword attrlist = null; + int brace = 0; + while (1) { + cur_val = 0; /* why */ + switch (tex_scan_character("acdnrtsACDNRTS", 1, 1, 1)) { + case 0: + goto DONE; + case 'a': case 'A': + if (tex_scan_mandate_keyword("attr", 1)) { + halfword i = tex_scan_attribute_register_number(); + halfword v = tex_scan_int(1, NULL); + if (eq_value(register_attribute_location(i)) != v) { + if (attrlist) { + attrlist = tex_patch_attribute_list(attrlist, i, v); + } else { + attrlist = tex_copy_attribute_list_set(tex_current_attribute_list(), i, v); + } + } + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("callback", 1)) { + callback = 1; + } + break; + case 'd': case 'D': + if (tex_scan_mandate_keyword("discard", 1)) { + discard = 1; + } + break; + case 'n': case 'N': + if (tex_scan_mandate_keyword("noskips", 1)) { + noskips = 1; + } + break; + case 'r': case 'R': + if (tex_scan_mandate_keyword("reverse", 1)) { + reverse = 1; + } + break; + case 't': case 'T': + if (tex_scan_mandate_keyword("to", 1)) { + mode = packing_exactly; + amount = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 's': case 'S': + if (tex_scan_mandate_keyword("spread", 1)) { + mode = packing_additional; + amount = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case '{': + brace = 1; + goto DONE; + default: + goto DONE; + } + } + DONE: + if (! attrlist) { + /* this alse sets the reference when not yet set */ + attrlist = tex_current_attribute_list(); + } + /*tex Now we're referenced. We need to preserve this over the group. */ + add_attribute_reference(attrlist); + tex_set_saved_record(saved_align_specification, saved_box_spec, mode, amount); + /* We save them but could put them in the state as we do for some anyway. */ + tex_set_saved_record(saved_align_reverse, saved_box_reverse, reverse, 0); + tex_set_saved_record(saved_align_discard, saved_box_discard, noskips ? 0 : discard, 0); + tex_set_saved_record(saved_align_noskips, saved_box_noskips, noskips, 0); + tex_set_saved_record(saved_align_callback, saved_box_callback, callback, 0); + lmt_save_state.save_stack_data.ptr += saved_align_n_of_items; + tex_new_save_level(c); + if (! brace) { + tex_scan_left_brace(); + } + lmt_alignment_state.no_tab_skips = noskips; + lmt_alignment_state.attr_list = attrlist; + lmt_alignment_state.callback = callback; +} + +/*tex + + The tricky part about alignments is getting the templates into the scanner at the right time, + and recovering control when a row or column is finished. + + We usually begin a row after each |\cr| has been sensed, unless that |\cr| is followed by + |\noalign| or by the right brace that terminates the alignment. The |align_peek| routine is + used to look ahead and do the right thing; it either gets a new row started, or gets a + |\noalign} started, or finishes off the alignment. + +*/ + +static void tex_aux_align_peek(void); + +static void tex_aux_trace_no_align(const char *s) +{ + if (tracing_alignments_par > 0) { + tex_begin_diagnostic(); + tex_print_format("[alignment: %s noalign, level %i]", s, lmt_alignment_state.no_align_level); + tex_end_diagnostic(); + } +} + +static void tex_aux_run_no_align(void) +{ + tex_scan_left_brace(); + tex_new_save_level(no_align_group); + ++lmt_alignment_state.no_align_level; + tex_aux_trace_no_align("entering"); + if (cur_list.mode == -vmode) { + tex_normal_paragraph(no_align_par_context); + } +} +static int tex_aux_nested_no_align(void) +{ + int state = lmt_alignment_state.no_align_level > 0; + if (state) { + tex_scan_left_brace(); + tex_new_save_level(no_align_group); + ++lmt_alignment_state.no_align_level; + tex_aux_trace_no_align("entering"); + if (cur_list.mode == -vmode) { + tex_normal_paragraph(no_align_par_context); + } + } + return state; +} + +void tex_finish_no_alignment_group(void) +{ + if (! tex_wrapped_up_paragraph(no_align_par_context)) { /* needs testing */ + tex_end_paragraph(no_align_group, no_align_par_context); + tex_aux_trace_no_align("leaving"); + --lmt_alignment_state.no_align_level; + tex_unsave(); + if (lmt_alignment_state.no_align_level == 0) { + tex_aux_align_peek(); + } + } +} + +static void tex_aux_align_peek(void) +{ + RESTART: + lmt_input_state.align_state = 1000000; + AGAIN: + tex_get_x_or_protected(); + switch (cur_cmd) { + case spacer_cmd: + goto AGAIN; + case right_brace_cmd: + tex_aux_finish_align(); + break; + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + if (has_eq_flag_bits(cur_cs, noaligned_flag_bit)) { + tex_expand_current_token(); + goto RESTART; + } else { + goto NEXTROW; + } + case alignment_cmd: + switch (cur_chr) { + case cr_cr_code: + /*tex Ignore |\crcr|. */ + goto RESTART; + case no_align_code: + tex_aux_run_no_align(); + return; + } + // fall through + default: + NEXTROW: + /*tex Start a new row. */ + tex_aux_initialize_row(); + /*tex Start a new column and replace what we peeked at. */ + tex_aux_initialize_column(); + break; + } +} + +/*tex +* + Magick numbers are used to indicate the level of alignment. However, keep in mind that in + \LUANETATEX\ the fundamental parts of the rendering are separated. Contrary to traditional + \TEX\ we don't have the interwoven hyphenation, ligature building, kerning, etc.\ code. + + In the end we have a list starting and ending with tabskips and align records seperated by + such skips. + +*/ + +void tex_run_alignment_initialize(void) +{ + halfword saved_cs = cur_cs; + tex_aux_push_alignment(); + lmt_input_state.align_state = -1000000; + /*tex + When |\halign| is used as a displayed formula, there should be no other pieces of mlists + present. + */ + if (cur_list.mode == mmode && ((cur_list.tail != cur_list.head) || cur_list.incomplete_noad)) { + tex_handle_error( + normal_error_type, + "Improper \\halign inside math mode", + "Displays can use special alignments (like \\eqalignno) only if nothing but the\n" + "alignment itself is in math mode. So I've deleted the formulas that preceded this\n" + "alignment." + ); + tex_flush_math(); + } + /*tex We enter a new semantic level. */ + tex_push_nest(); + /*tex + In vertical modes, |prev_depth| already has the correct value. But if we are in |mmode| + (displayed formula mode), we reach out to the enclosing vertical mode for the |prev_depth| + value that produces the correct baseline calculations. + */ + if (cur_list.mode == mmode) { + cur_list.mode = -vmode; + cur_list.prev_depth = lmt_nest_state.nest[lmt_nest_state.nest_data.ptr - 2].prev_depth; + } else if (cur_list.mode > 0) { + cur_list.mode = -cur_list.mode; + } + /*tex This one also saves some in the state. */ + tex_aux_scan_align_spec(align_group); + /*tex + Scan the preamble. Even when we ignore zero tabskips, we do store them in the list because + the machinery later on steps over them and checking for present glue makes the code + horrible. The overhead is small because it's only the preamble where we waste glues then. + */ + preamble = null; + lmt_alignment_state.cur_align = align_head; + lmt_alignment_state.cur_loop = null; + lmt_input_state.scanner_status = scanner_is_aligning; + lmt_input_state.warning_index = saved_cs; + lmt_input_state.align_state = -1000000; + /*tex At this point, |cur_cmd = left_brace|. */ + while (1) { + /*tex Append the current tabskip glue to the preamble list. */ + halfword glue = tex_new_param_glue_node(tab_skip_code, tab_skip_glue); + if (lmt_alignment_state.no_tab_skips && tex_glue_is_zero(glue)) { + node_subtype(glue) = ignored_glue; + } + tex_couple_nodes(lmt_alignment_state.cur_align, glue); + lmt_alignment_state.cur_align = glue; + if (cur_cmd == alignment_cmd && (cur_chr == cr_code || cur_chr == cr_cr_code)) { /* Also cr_cr here? */ + /*tex A |\cr| ends the preamble. */ + break; + } else { + /*tex + Scan preamble text until |cur_cmd| is |tab_mark| or |car_ret| and then scan the + template |u_j|, putting the resulting token list in |hold_token_head|. Spaces are + eliminated from the beginning of a template. + */ + halfword record = null; + halfword current = lmt_alignment_state.hold_token_head; + token_link(current) = null; + while (1) { + tex_aux_get_preamble_token(); + if (cur_cmd == parameter_cmd || (cur_cmd == alignment_cmd && cur_chr == align_content_code)) { + break; + } else if ((cur_cmd == alignment_cmd || cur_cmd == alignment_tab_cmd) && (lmt_input_state.align_state == -1000000)) { + if ((current == lmt_alignment_state.hold_token_head) && (! lmt_alignment_state.cur_loop) && (cur_cmd == alignment_tab_cmd)) { + lmt_alignment_state.cur_loop = lmt_alignment_state.cur_align; + } else { + tex_back_input(cur_tok); + tex_handle_error( + normal_error_type, + "Missing # inserted in alignment preamble", + "There should be exactly one # between &'s, when an \\halign or \\valign is being\n" + "set up. In this case you had none, so I've put one in; maybe that will work." + ); + break; + } + } else if (cur_cmd != spacer_cmd || current != lmt_alignment_state.hold_token_head) { + current = tex_store_new_token(current, cur_tok); + } + } + /*tex A new align record: */ + record = tex_new_node(align_record_node, 0); + tex_couple_nodes(lmt_alignment_state.cur_align, record); + lmt_alignment_state.cur_align = record; + align_record_span_ptr(record) = end_span; + box_width(record) = null_flag; + align_record_pre_part(record) = token_link(lmt_alignment_state.hold_token_head); + /*tex Scan the template |v_j|, putting the resulting token list in |hold_token_head|. */ + current = lmt_alignment_state.hold_token_head; + token_link(current) = null; + while (1) { + tex_aux_get_preamble_token(); + if ((cur_cmd == alignment_cmd || cur_cmd == alignment_tab_cmd) && (lmt_input_state.align_state == -1000000)) { + break; + } else if (cur_cmd == parameter_cmd || (cur_cmd == alignment_cmd && cur_chr == align_content_code)) { + tex_handle_error( + normal_error_type, + "Only one # is allowed per tab", + "There should be exactly one # between &'s, when an \\halign or \\valign is being\n" + "set up. In this case you had more than one, so I'm ignoring all but the first." + ); + } else { + current = tex_store_new_token(current, cur_tok); + } + } + if (tab_size_par > 0) { + box_size(record) = tab_size_par; + set_box_package_state(record, package_dimension_size_set); + } else { + box_width(record) = null_flag; + } + /*tex Put |\endtemplate| at the end: */ + current = tex_store_new_token(current, deep_frozen_end_template_1_token); + align_record_post_part(lmt_alignment_state.cur_align) = token_link(lmt_alignment_state.hold_token_head); + } + } + if (tracing_alignments_par > 1) { + tex_print_levels(); + tex_print_str("<alignment preamble>"); + tex_show_node_list(preamble, max_integer, max_integer); + } + if (lmt_alignment_state.callback) { + lmt_alignment_callback(cur_list.head, preamble_pass_alignment_context, lmt_alignment_state.attr_list, preamble); + } + lmt_input_state.scanner_status = scanner_is_normal; + tex_new_save_level(align_group); + if (every_cr_par) { + tex_begin_token_list(every_cr_par, every_cr_text); + } + /*tex Look for |\noalign| or |\omit|. */ + tex_aux_align_peek(); +} + +void tex_finish_alignment_group(void) +{ + tex_back_input(cur_tok); + cur_tok = deep_frozen_cr_token; + tex_handle_error( + insert_error_type, + "Missing \\cr inserted", + "I'm guessing that you meant to end an alignment here." + ); +} + +/*tex + + The parameter to |initialize_span| is a pointer to the alignrecord where the next column or group + of columns will begin. A new semantic level is entered, so that the columns will generate a list + for subsequent packaging. + +*/ + +static void tex_aux_initialize_span(halfword p) +{ + tex_push_nest(); + if (cur_list.mode == -hmode) { + cur_list.space_factor = 1000; + } else { + cur_list.prev_depth = ignore_depth; + tex_normal_paragraph(span_par_context); + } + lmt_alignment_state.cur_span = p; +} + +/*tex + + To start a row (i.e., a \quote {row} that rhymes with \quote {dough} but not with \quote + {bough}), we enter a new semantic level, copy the first tabskip glue, and change from internal + vertical mode to restricted horizontal mode or vice versa. The |space_factor| and |prev_depth| + are not used on this semantic level, but we clear them to zero just to be tidy. + +*/ + +static void tex_aux_initialize_row(void) +{ + tex_push_nest(); + cur_list.mode = (- hmode - vmode) - cur_list.mode; /* weird code */ + if (cur_list.mode == -hmode) { + cur_list.space_factor = 0; + } else { + cur_list.prev_depth = 0; + } + lmt_alignment_state.cur_align = preamble; + if (node_subtype(preamble) != ignored_glue) { + halfword glue = tex_new_glue_node(preamble, tab_skip_glue); + tex_tail_append(glue); + tex_attach_attribute_list_attribute(glue, lmt_alignment_state.attr_list); + } + lmt_alignment_state.cur_align = node_next(preamble); + lmt_alignment_state.cur_post_adjust_tail = lmt_alignment_state.cur_post_adjust_head; + lmt_alignment_state.cur_pre_adjust_tail = lmt_alignment_state.cur_pre_adjust_head; + lmt_alignment_state.cur_post_migrate_tail = lmt_alignment_state.cur_post_migrate_head; + lmt_alignment_state.cur_pre_migrate_tail = lmt_alignment_state.cur_pre_migrate_head; + tex_aux_initialize_span(lmt_alignment_state.cur_align); +} + +/*tex + + When a column begins, we assume that |cur_cmd| is either |omit| or else the current token should + be put back into the input until the \<u_j> template has been scanned. Note that |cur_cmd| might + be |tab_mark| or |car_ret|. We also assume that |align_state| is approximately 1000000 at this + time. We remain in the same mode, and start the template if it is called for. + +*/ + +static void tex_aux_initialize_column(void) +{ + align_record_cmd(lmt_alignment_state.cur_align) = cur_cmd; + align_record_chr(lmt_alignment_state.cur_align) = cur_chr; + if (cur_cmd == alignment_cmd && cur_chr == omit_code) { + lmt_input_state.align_state = 0; + } else { + tex_back_input(cur_tok); + if (every_tab_par) { + tex_begin_token_list(every_tab_par, every_tab_text); + } + tex_begin_token_list(align_record_pre_part(lmt_alignment_state.cur_align), template_pre_text); + } + /*tex Now |align_state = 1000000|, one of these magic numbers. */ +} + +/*tex + + The scanner sets |align_state| to zero when the |u_j| template ends. When a subsequent |\cr| + or |\span| or tab mark occurs with |align_state=0|, the scanner activates the following code, + which fires up the |v_j| template. We need to remember the |cur_chr|, which is either + |cr_cr_code|, |cr_code|, |span_code|, or a character code, depending on how the column text has + ended. + + This part of the program had better not be activated when the preamble to another alignment is + being scanned, or when no alignment preamble is active. + +*/ + +void tex_insert_alignment_template(void) +{ + if (lmt_input_state.scanner_status == scanner_is_aligning || ! lmt_alignment_state.cur_align) { + tex_alignment_interwoven_error(6); + } else { + /*tex in case of an |\omit| the gets discarded and is nowhere else referenced. */ + halfword cmd = align_record_cmd(lmt_alignment_state.cur_align); + halfword chr = align_record_chr(lmt_alignment_state.cur_align); + halfword tok = (cmd == alignment_cmd && chr == omit_code) ? lmt_alignment_state.omit_template : align_record_post_part(lmt_alignment_state.cur_align); + align_record_cmd(lmt_alignment_state.cur_align) = cur_cmd; + align_record_chr(lmt_alignment_state.cur_align) = cur_chr; + tex_begin_token_list(tok, template_post_text); + lmt_input_state.align_state = 1000000; + lmt_alignment_state.cell_source = alignment_cell_source_par; + if (alignment_wrap_source_par) { + lmt_alignment_state.wrap_source = alignment_wrap_source_par; + } + } +} + +/*tex Determine the stretch or shrink order */ + +inline static halfword tex_aux_determine_order(scaled *total) +{ + if (total[filll_glue_order]) return filll_glue_order; + else if (total[fill_glue_order]) return fill_glue_order; + else if (total[fil_glue_order]) return fil_glue_order; + else if (total[fi_glue_order]) return fi_glue_order; + else return normal_glue_order; +} + +/*tex + + A span node is a 3-word record containing |width|, |span_span|, and |span_ptr| fields. The + |span_span| field indicates the number of spanned columns; the |span_ptr| field points to a + span node for the same starting column, having a greater extent of spanning, or to |end_span|, + which has the largest possible |span_span| field; the |width| field holds the largest natural + width corresponding to a particular set of spanned columns. + + A list of the maximum widths so far, for spanned columns starting at a given column, begins + with the |span_ptr| field of the alignrecord for that column. The code has to make sure that + there is room for |span_ptr| in both the align record and the span nodes, which is why + |span_ptr| replaces |node_attr|. + +*/ + +static halfword tex_aux_new_span_node(halfword n, int s, scaled w) +{ + halfword p = tex_new_node(span_node, 0); + span_ptr(p) = n; /*tex This one overlaps with |alignment_record_ptr|. */ + span_span(p) = s; + span_width(p) = w; + return p; +} + +/*tex + + When the |end_template| command at the end of a |v_j| template comes through the scanner, + things really start to happen; and it is the |finialize_column| routine that makes them happen. + This routine returns |true| if a row as well as a column has been finished. + +*/ + +void tex_alignment_interwoven_error(int n) +{ + tex_formatted_error("alignment", "interwoven preambles are not allowed, case %d", n); +} + +halfword tex_alignment_hold_token_head(void) +{ + return lmt_alignment_state.hold_token_head; +} + +static int tex_aux_finish_column(void) +{ + if (! lmt_alignment_state.cur_align) { + tex_confusion("end template, case 1"); + } else { + halfword q = node_next(lmt_alignment_state.cur_align); + if (! q) { + tex_confusion("end template, case 2"); + } else if (lmt_input_state.align_state < 500000) { + tex_alignment_interwoven_error(1); + } else { + /*tex A few state variables. */ + halfword cmd = align_record_cmd(lmt_alignment_state.cur_align); + halfword chr = align_record_chr(lmt_alignment_state.cur_align); + /*tex + We check the alignrecord after the current one. If the preamble list has been + traversed, check that the row has ended. + */ + halfword record = node_next(q); + if (alignment_wrap_source_par) { + lmt_alignment_state.wrap_source = alignment_wrap_source_par; + } + if (! record && ! ((cmd == alignment_cmd) && (chr == cr_code || chr == cr_cr_code))) { + if (lmt_alignment_state.cur_loop) { + /*tex Lengthen the preamble periodically. A new align record: */ + record = tex_new_node(align_record_node, 0); + tex_couple_nodes(q, record); + align_record_span_ptr(record) = end_span; + box_width(record) = null_flag; + lmt_alignment_state.cur_loop = node_next(lmt_alignment_state.cur_loop); + /*tex Copy the templates from node |cur_loop| into node |p|. */ + { + halfword q = lmt_alignment_state.hold_token_head; + halfword r = align_record_pre_part(lmt_alignment_state.cur_loop); + while (r) { + q = tex_store_new_token(q, token_info(r)); + r = token_link(r); + } + token_link(q) = null; + align_record_pre_part(record) = token_link(lmt_alignment_state.hold_token_head); + } + { + halfword q = lmt_alignment_state.hold_token_head; + halfword r = align_record_post_part(lmt_alignment_state.cur_loop); + while (r) { + q = tex_store_new_token(q, token_info(r)); + r = token_link(r); + } + token_link(q) = null; + align_record_post_part(record) = token_link(lmt_alignment_state.hold_token_head); + } + lmt_alignment_state.cur_loop = node_next(lmt_alignment_state.cur_loop); + { + halfword glue = tex_new_glue_node(lmt_alignment_state.cur_loop, tab_skip_glue); + if (lmt_alignment_state.no_tab_skips && tex_glue_is_zero(glue)) { + node_subtype(glue) = ignored_glue; + } + tex_couple_nodes(record, glue); + } + } else { + chr = cr_code; + align_record_chr(lmt_alignment_state.cur_align) = chr; + tex_handle_error( + normal_error_type, + "Extra alignment tab has been changed to \\cr", + "You have given more \\span or & marks than there were in the preamble to the\n" + "\\halign or \\valign now in progress. So I'll assume that you meant to type \\cr\n" + "instead." + ); + } + } + if (! (cmd == alignment_cmd && chr == span_code)) { + /*tex a new unset box */ + halfword cell = null; + /*tex natural width */ + scaled width = 0; + scaled size = 0; + int state = 0; + int packing = packing_additional; + /*tex The span counter. */ + halfword spans = 0; + tex_unsave(); + tex_new_save_level(align_group); + /*tex Package an unset box for the current column and record its width. */ + state = has_box_package_state(lmt_alignment_state.cur_align, package_dimension_size_set); + if (state) { + size = box_size(lmt_alignment_state.cur_align); + packing = packing_exactly; + } + if (cur_list.mode == -hmode) { + lmt_packaging_state.post_adjust_tail = lmt_alignment_state.cur_post_adjust_tail; + lmt_packaging_state.pre_adjust_tail = lmt_alignment_state.cur_pre_adjust_tail; + lmt_packaging_state.post_migrate_tail = lmt_alignment_state.cur_post_migrate_tail; + lmt_packaging_state.pre_migrate_tail = lmt_alignment_state.cur_pre_migrate_tail; + cell = tex_filtered_hpack(cur_list.head, cur_list.tail, size, packing, align_set_group, direction_unknown, 0, null, 0, 0); + width = box_width(cell); + lmt_alignment_state.cur_post_adjust_tail = lmt_packaging_state.post_adjust_tail; + lmt_alignment_state.cur_pre_adjust_tail = lmt_packaging_state.pre_adjust_tail; + lmt_alignment_state.cur_post_migrate_tail = lmt_packaging_state.post_migrate_tail; + lmt_alignment_state.cur_pre_migrate_tail = lmt_packaging_state.pre_migrate_tail; + lmt_packaging_state.post_adjust_tail = null; + lmt_packaging_state.pre_adjust_tail = null; + lmt_packaging_state.post_migrate_tail = null; + lmt_packaging_state.pre_migrate_tail = null; + } else { + cell = tex_filtered_vpack(node_next(cur_list.head), size, packing, 0, align_set_group, direction_unknown, 0, null, 0, 0); + width = box_height(cell); + } + if (lmt_alignment_state.cell_source) { + box_source_anchor(cell) = lmt_alignment_state.cell_source; + tex_set_box_geometry(cell, anchor_geometry); + } + tex_attach_attribute_list_attribute(cell, lmt_alignment_state.attr_list); + if (lmt_alignment_state.cur_span != lmt_alignment_state.cur_align) { + /*tex Update width entry for spanned columns. */ + halfword ptr = lmt_alignment_state.cur_span; + do { + ++spans; + ptr = node_next(node_next(ptr)); + } while (ptr != lmt_alignment_state.cur_align); + if (spans > max_quarterword) { + /*tex This can happen, but won't. */ + tex_confusion("too many spans"); + } + ptr = lmt_alignment_state.cur_span; + while (span_span(align_record_span_ptr(ptr)) < spans) { + ptr = align_record_span_ptr(ptr); + } + if (span_span(align_record_span_ptr(ptr)) > spans) { + halfword span = tex_aux_new_span_node(align_record_span_ptr(ptr), spans, width); + align_record_span_ptr(ptr) = span; + } else if (span_width(align_record_span_ptr(ptr)) < width) { + span_width(align_record_span_ptr(ptr)) = width; + } + } else if (width > box_width(lmt_alignment_state.cur_align)) { + box_width(lmt_alignment_state.cur_align) = width; + } + tex_aux_change_list_type(cell, unset_node); + box_span_count(cell) = spans; + if (! state) { + halfword order = tex_aux_determine_order(lmt_packaging_state.total_stretch); + box_glue_order(cell) = order; + box_glue_stretch(cell) = lmt_packaging_state.total_stretch[order]; + order = tex_aux_determine_order(lmt_packaging_state.total_shrink); + box_glue_sign(cell) = order; /* hm, sign */ + box_glue_shrink(cell) = lmt_packaging_state.total_shrink[order]; + } + tex_pop_nest(); + tex_tail_append(cell); + /*tex Copy the tabskip glue between columns. */ + if (node_subtype(node_next(lmt_alignment_state.cur_align)) != ignored_glue) { + halfword glue = tex_new_glue_node(node_next(lmt_alignment_state.cur_align), tab_skip_glue); + tex_attach_attribute_list_attribute(cell, lmt_alignment_state.attr_list); + tex_tail_append(glue); + } + if (cmd == alignment_cmd && (chr == cr_code || chr == cr_cr_code)) { + return 1; + } else { + tex_aux_initialize_span(record); + } + } + lmt_input_state.align_state = 1000000; + do { + tex_get_x_or_protected(); + } while (cur_cmd == spacer_cmd); + lmt_alignment_state.cur_align = record; + tex_aux_initialize_column(); + } + } + return 0; +} + +/*tex + + At the end of a row, we append an unset box to the current vlist (for |\halign|) or the current + hlist (for |\valign|). This unset box contains the unset boxes for the columns, separated by + the tabskip glue. Everything will be set later. + +*/ + +static void tex_aux_finish_row(void) +{ + halfword row; + if (cur_list.mode == -hmode) { + row = tex_filtered_hpack(cur_list.head, cur_list.tail, 0, packing_additional, finish_row_group, direction_unknown, 0, null, 0, 0); + tex_pop_nest(); + if (lmt_alignment_state.cur_pre_adjust_head != lmt_alignment_state.cur_pre_adjust_tail) { + tex_inject_adjust_list(lmt_alignment_state.cur_pre_adjust_head, 0, null, NULL); + } + if (lmt_alignment_state.cur_pre_migrate_head != lmt_alignment_state.cur_pre_migrate_tail) { + tex_append_list(lmt_alignment_state.cur_pre_migrate_head, lmt_alignment_state.cur_pre_migrate_tail); + } + tex_append_to_vlist(row, lua_key_index(alignment), NULL); + if (lmt_alignment_state.cur_post_migrate_head != lmt_alignment_state.cur_post_migrate_tail) { + tex_append_list(lmt_alignment_state.cur_post_migrate_head, lmt_alignment_state.cur_post_migrate_tail); + } + if (lmt_alignment_state.cur_post_adjust_head != lmt_alignment_state.cur_post_adjust_tail) { + tex_inject_adjust_list(lmt_alignment_state.cur_post_adjust_head, 0, null, NULL); + } + } else { + row = tex_filtered_vpack(node_next(cur_list.head), 0, packing_additional, max_depth_par, finish_row_group, direction_unknown, 0, null, 0, 0); + tex_pop_nest(); + tex_tail_append(row); + cur_list.space_factor = 1000; + } + if (lmt_alignment_state.wrap_source) { + box_source_anchor(row) = lmt_alignment_state.wrap_source; + tex_set_box_geometry(row, anchor_geometry); + } + tex_aux_change_list_type(row, unset_node); + tex_attach_attribute_list_attribute(row, lmt_alignment_state.attr_list); + if (every_cr_par) { + tex_begin_token_list(every_cr_par, every_cr_text); + } + tex_aux_align_peek(); + /*tex Note that |glue_shrink(p) = 0| since |glue_shrink == shift_amount|. */ +} + +/*tex + + Finally, we will reach the end of the alignment, and we can breathe a sigh of relief that + memory hasn't overflowed. All the unset boxes will now be set so that the columns line up, + taking due account of spanned columns. + + Normalizing by stripping zero tabskips makes the lists a little smaller which then is easier + on later processing. But is is an option. We could actually not inject zero skips at all but + then the code starts deviating too much. In some cases it can save a lot of zero glue nodes + but we allocate them initially anyway. We don't save runtime here. (Some day I'll play a bit + more with this and then probably also implement some pending extensions.) + +*/ + +static void tex_aux_strip_zero_tab_skips(halfword q) +{ + halfword h = box_list(q); + halfword t = h; + while (t) { + halfword n = node_next(t); + if (node_type(t) == glue_node && node_subtype(t) == tab_skip_glue && tex_glue_is_zero(t)) { + tex_try_couple_nodes(node_prev(t),n); + if (t == h) { + /*tex We only come here once. */ + h = n; + box_list(q) = h; + } + tex_flush_node(t); + } + t = n; + } +} + +static void tex_aux_finish_align(void) +{ + /*tex a shared register for the list operations (others are localized) */ + halfword preroll; + /*tex shift offset for unset boxes */ + scaled offset = 0; + /*tex something new */ + halfword reverse = 0; + halfword callback = lmt_alignment_state.callback; + halfword discard = normalize_line_mode_permitted(normalize_line_mode_par, discard_zero_tab_skips_mode); + /*tex The |align_group| was for individual entries: */ + if (cur_group != align_group) { + tex_confusion("align, case 1"); + } + tex_unsave(); + /*tex The |align_group| was for the whole alignment: */ + if (cur_group != align_group) { + tex_confusion("align, case 2"); + } + tex_unsave(); + if (lmt_nest_state.nest[lmt_nest_state.nest_data.ptr - 1].mode == mmode) { + offset = display_indent_par; + } + lmt_save_state.save_stack_data.ptr -= saved_align_n_of_items; + lmt_packaging_state.pack_begin_line = -cur_list.mode_line; + reverse = saved_level(saved_align_reverse); /* we can as well save these in the state */ + discard = discard || saved_level(saved_align_discard); /* we can as well save these in the state */ + /*tex + All content is available now so this is a perfect spot for some processing. However, we + cannot mess with the unset boxes (as these can have special properties). The main reason + for some postprocessing can be to align (vertically) at a specific location in a cell + but then we also need to process twice (and adapt the width in the preamble record). + + We flush the tokenlists so that in principle we can access the align record nodes as normal + lists. + */ + { + halfword q = node_next(preamble); + do { + tex_flush_token_list(align_record_pre_part(q)); + tex_flush_token_list(align_record_post_part(q)); + align_record_pre_part(q) = null; + align_record_post_part(q) = null; + q = node_next(node_next(q)); + } while (q); + } + if (callback) { + lmt_alignment_callback(cur_list.head, preroll_pass_alignment_context, lmt_alignment_state.attr_list, preamble); + } + /*tex + + Go through the preamble list, determining the column widths and changing the alignrecords + to dummy unset boxes. + + It's time now to dismantle the preamble list and to compute the column widths. Let $w_{ij}$ + be the maximum of the natural widths of all entries that span columns $i$ through $j$, + inclusive. The alignrecord for column~$i$ contains $w_{ii}$ in its |width| field, and there + is also a linked list of the nonzero $w_{ij}$ for increasing $j$, accessible via the |info| + field; these span nodes contain the value $j-i+|min_quarterword|$ in their |link| fields. + The values of $w_{ii}$ were initialized to |null_flag|, which we regard as $-\infty$. + + The final column widths are defined by the formula $$ w_j = \max_{1\L i\L j} \biggl( w_{ij} + - \sum_{i\L k < j}(t_k + w_k) \biggr), $$ where $t_k$ is the natural width of the tabskip + glue between columns $k$ and~$k + 1$. However, if $w_{ij} = -\infty$ for all $i$ in the + range $1 <= i <= j$ (i.e., if every entry that involved column~$j$ also involved column~$j + + 1$), we let $w_j = 0$, and we zero out the tabskip glue after column~$j$. + + \TEX\ computes these values by using the following scheme: First $w_1 = w_{11}$. Then + replace $w_{2j}$ by $\max(w_{2j}, w_{1j} - t_1 - w_1)$, for all $j > 1$. Then $w_2 = + w_{22}$. Then replace $w_{3j}$ by $\max(w_{3j}, w_{2j} - t_2 - w_2)$ for all $j > 2$; and + so on. If any $w_j$ turns out to be $-\infty$, its value is changed to zero and so is the + next tabskip. + + */ + { + halfword q = node_next(preamble); + do { + /* So |q| and |p| point to alignment nodes that become unset ones. */ + halfword p = node_next(node_next(q)); + if (box_width(q) == null_flag) { + /*tex Nullify |width(q)| and the tabskip glue following this column. */ + box_width(q) = 0; + tex_reset_glue_to_zero(node_next(q)); + } + if (align_record_span_ptr(q) != end_span) { + /*tex + + Merge the widths in the span nodes of |q| with those of |p|, destroying the + span nodes of |q|. + + Merging of two span-node lists is a typical exercise in the manipulation of + linearly linked data structures. The essential invariant in the following + |repeat| loop is that we want to dispense with node |r|, in |q|'s list, and + |u| is its successor; all nodes of |p|'s list up to and including |s| have + been processed, and the successor of |s| matches |r| or precedes |r| or follows + |r|, according as |link(r) = n| or |link(r) > n| or |link(r) < n|. + + */ + halfword t = box_width(q) + glue_amount(node_next(q)); + halfword n = 1; + halfword r = align_record_span_ptr(q); + halfword s = end_span; + align_record_span_ptr(s) = p; + do { + halfword u = align_record_span_ptr(r); + span_width(r) -= t; + while (span_span(r) > n) { + s = align_record_span_ptr(s); + n = span_span(align_record_span_ptr(s)) + 1; + } + if (span_span(r) < n) { + align_record_span_ptr(r) = align_record_span_ptr(s); + align_record_span_ptr(s) = r; + --span_span(r); + s = r; + } else { + if (span_width(r) > span_width(align_record_span_ptr(s))) { + span_width(align_record_span_ptr(s)) = span_width(r); + } + tex_flush_node(r); + } + r = u; + } while (r != end_span); + } + tex_aux_change_list_type(q, unset_node); + box_glue_order(q) = normal_glue_order; + box_glue_sign(q) = normal_glue_sign; + box_height(q) = 0; + box_depth(q) = 0; + q = p; + } while (q); + } + if (callback) { + lmt_alignment_callback(cur_list.head, package_pass_alignment_context, lmt_alignment_state.attr_list, preamble); + } + /*tex + + Package the preamble list, to determine the actual tabskip glue amounts, and let |p| point + to this prototype box. + + Now the preamble list has been converted to a list of alternating unset boxes and tabskip + glue, where the box widths are equal to the final column sizes. In case of |\valign|, we + change the widths to heights, so that a correct error message will be produced if the + alignment is overfull or underfull. + + */ + if (cur_list.mode == -vmode) { + halfword rule_save = overfull_rule_par; + /*tex Prevent the rule from being packaged. */ + overfull_rule_par = 0; + preroll = tex_hpack(preamble, saved_value(saved_align_specification), saved_extra(saved_align_specification), direction_unknown, holding_none_option); + overfull_rule_par = rule_save; + } else { + halfword unset = node_next(preamble); + do { + box_height(unset) = box_width(unset); + box_width(unset) = 0; + unset = node_next(node_next(unset)); + } while (unset); + /* why filtered here ... */ + preroll = tex_filtered_vpack(preamble, saved_value(saved_align_specification), saved_extra(saved_align_specification), max_depth_par, preamble_group, direction_unknown, 0, 0, 0, holding_none_option); + /* ... so we'll do this soon instead: */ + /* preroll = tex_vpack(preamble, saved_value(saved_align_specification), saved_extra(saved_align_specification), max_depth_par, direction_unknown, migrate_all_option); */ + unset = node_next(preamble); + do { + box_width(unset) = box_height(unset); + box_height(unset) = 0; + unset = node_next(node_next(unset)); + } while (unset); + } + lmt_packaging_state.pack_begin_line = 0; + /*tex + Here we set the glue in all the unset boxes of the current list based on the prerolled + preamble. + */ + { + halfword rowptr = node_next(cur_list.head); + while (rowptr) { + switch (node_type(rowptr)) { + case unset_node: + { + /*tex + We set the unset box |q| and the unset boxes in it. The unset box |q| + represents a row that contains one or more unset boxes, depending on + how soon |\cr| occurred in that row. + + We also reset some fields but this needs checking because we never set + set them in these unset boxes but in the preamble ones. + */ + halfword preptr; + halfword colptr; + if (cur_list.mode == -vmode) { + tex_aux_change_list_type(rowptr, hlist_node); + box_width(rowptr) = box_width(preroll); + } else { + tex_aux_change_list_type(rowptr, vlist_node); + box_height(rowptr) = box_height(preroll); + } + node_subtype(rowptr) = align_row_list; + box_glue_order(rowptr) = box_glue_order(preroll); + box_glue_sign(rowptr) = box_glue_sign(preroll); + box_glue_set(rowptr) = box_glue_set(preroll); + box_shift_amount(rowptr) = offset; + colptr = box_list(rowptr); + preptr = box_list(preroll); + if (node_type(colptr) == glue_node) { + colptr = node_next(colptr); + } + if (node_type(preptr) == glue_node) { + preptr = node_next(preptr); + } + if (node_type(colptr) != unset_node) { + tex_formatted_error("alignment", "bad box"); + } + do { + /*tex + We set the glue in node |r| and change it from an unset node. A box + made from spanned columns will be followed by tabskip glue nodes + and by empty boxes as if there were no spanning. This permits + perfect alignment of subsequent entries, and it prevents values + that depend on floating point arithmetic from entering into the + dimensions of any boxes. + */ + halfword spans = box_span_count(colptr); + scaled total = box_width(preptr); + scaled width = total; /*tex The width of a column. */ + halfword tail = hold_head; + int state = has_box_package_state(preptr, package_dimension_size_set); + /*tex + When we have a span we need to add dummies. We append tabskip glue + and an empty box to list |u|, and update |s| and |t| as the + prototype nodes are passed. We could shortcut some code when we + have zero skips but we seldom end up in this branch anyway. + */ + while (spans > 0) { + --spans; + preptr = node_next(preptr); + if (node_subtype(preptr) != ignored_glue) { + /* halfword glue = tex_new_glue_node(preptr, tab_skip_glue); */ + halfword glue = tex_new_glue_node(preptr, node_subtype(preptr)); + tex_try_couple_nodes(tail, glue); + tex_attach_attribute_list_attribute(glue, lmt_alignment_state.attr_list); + total += glue_amount(preptr); + /*tex The |glueratio| case is redundant, anyway ... */ + switch (box_glue_sign(preroll)) { + case stretching_glue_sign: + if (glue_stretch_order(preptr) == box_glue_order(preroll)) { + total += glueround((glueratio) (box_glue_set(preroll)) * (glueratio) (glue_stretch(preptr))); + } + break; + case shrinking_glue_sign: + if (glue_shrink_order(preptr) == box_glue_order(preroll)) { + total -= glueround((glueratio) (box_glue_set(preroll)) * (glueratio) (glue_shrink(preptr))); + } + break; + } + tail = glue; + /*tex Move on to the box. */ + } + preptr = node_next(preptr); + { + halfword box = tex_new_null_box_node(cur_list.mode == -vmode ? hlist_node : vlist_node, align_cell_list); + tex_couple_nodes(tail, box); + tex_attach_attribute_list_attribute(box, lmt_alignment_state.attr_list); + total += box_width(preptr); + if (cur_list.mode == -vmode) { + box_width(box) = box_width(preptr); + } else { + box_height(box) = box_width(preptr); + } + tail = box; + } + } + if (cur_list.mode == -vmode) { + /*tex + Make the unset node |r| into an |hlist_node| of width |w|, + setting the glue as if the width were |t|. + */ + box_height(colptr) = box_height(rowptr); + box_depth(colptr) = box_depth(rowptr); + if (! state) { + if (total == box_width(colptr)) { + box_glue_sign(colptr) = normal_glue_sign; + box_glue_order(colptr) = normal_glue_order; + box_glue_set(colptr) = 0.0; + } else if (total > box_width(colptr)) { + box_glue_sign(colptr) = stretching_glue_sign; + if (box_glue_stretch(colptr) == 0) { + box_glue_set(colptr) = 0.0; + } else { + box_glue_set(colptr) = (glueratio) ( ( (glueratio) total - (glueratio) box_width(colptr) ) / ( (glueratio) box_glue_stretch(colptr) ) ); + } + } else { + box_glue_order(colptr) = box_glue_sign(colptr); + box_glue_sign(colptr) = shrinking_glue_sign; + if (box_glue_shrink(colptr) == 0) { + box_glue_set(colptr) = 0.0; + } else if ((box_glue_order(colptr) == normal_glue_order) && (box_width(colptr) - total > box_glue_shrink(colptr))) { + box_glue_set(colptr) = 1.0; + } else { + box_glue_set(colptr) = (glueratio) ( ( (glueratio) box_width(colptr) - (glueratio) total ) / ( (glueratio) box_glue_shrink(colptr) ) ); + } + } + } + box_width(colptr) = width; + tex_aux_change_list_type(colptr, hlist_node); + node_subtype(colptr) = align_cell_list; + } else { + /*tex + Make the unset node |r| into a |vlist_node| of height |w|, + setting the glue as if the height were |t|. + */ + box_width(colptr) = box_width(rowptr); + if (! state) { + if (total == box_height(colptr)) { + box_glue_sign(colptr) = normal_glue_sign; + box_glue_order(colptr) = normal_glue_order; + box_glue_set(colptr) = 0.0; + } else if (total > box_height(colptr)) { + box_glue_sign(colptr) = stretching_glue_sign; + if (box_glue_stretch(colptr) == 0) { + box_glue_set(colptr) = 0.0; + } else { + box_glue_set(colptr) = (glueratio) ( ( (glueratio) total - (glueratio) box_height(colptr) ) / ( (glueratio) box_glue_stretch(colptr) ) ); + } + } else { + box_glue_order(colptr) = box_glue_sign(colptr); + box_glue_sign(colptr) = shrinking_glue_sign; + if (box_glue_shrink(colptr) == 0) { + box_glue_set(colptr) = 0.0; + } else if ((box_glue_order(colptr) == normal_glue_order) && (box_height(colptr) - total > box_glue_shrink(colptr))) { + box_glue_set(colptr) = 1.0; + } else { + box_glue_set(colptr) = (glueratio) ( ( (glueratio) box_height(colptr) - (glueratio) total) / ( (glueratio) box_glue_shrink(colptr) ) ); + } + } + } + box_height(colptr) = width; + tex_aux_change_list_type(colptr, vlist_node); + node_subtype(colptr) = align_cell_list; + } + box_shift_amount(colptr) = 0; + if (tail != hold_head) { + /*tex Append blank boxes to account for spanned nodes. */ + tex_try_couple_nodes(tail, node_next(colptr)); + tex_try_couple_nodes(colptr, node_next(hold_head)); + colptr = tail; + } + colptr = node_next(colptr); + preptr = node_next(preptr); + if (node_type(colptr) == glue_node) { + colptr = node_next(colptr); + } + if (node_type(preptr) == glue_node) { + preptr = node_next(preptr); + } + } while (colptr); + if (discard) { + tex_aux_strip_zero_tab_skips(rowptr); + } + if (reverse) { + box_list(rowptr) = tex_reversed_node_list(box_list(rowptr)); + } + } + break; + case rule_node: + { + /*tex + Make the running dimensions in rule |q| extend to the boundaries of the + alignment. + */ + if (rule_width(rowptr) == null_flag) { + rule_width(rowptr) = box_width(preroll); + } + if (rule_height(rowptr) == null_flag) { + rule_height(rowptr) = box_height(preroll); + } + if (rule_depth(rowptr) == null_flag) { + rule_depth(rowptr) = box_depth(preroll); + } + /*tex We could use offset fields in rule instead. */ + if (offset) { + halfword prv = node_prev(rowptr); + halfword nxt = node_next(rowptr); + halfword box = null; + node_prev(rowptr) = null; + node_next(rowptr) = null; + box = tex_hpack(rowptr, 0, packing_additional, direction_unknown, holding_none_option); + tex_attach_attribute_list_attribute(box, rowptr); + box_shift_amount(box) = offset; + node_subtype(box) = align_cell_list; /*tex This is not really a cell. */ + // node_subtype(box) = unknown_list; /*tex So maybe we will do this. */ + tex_try_couple_nodes(prv, box); + tex_try_couple_nodes(box, nxt); + rowptr = box; + } + } + break; + default: + /*tex + When we're in a |\halign| we get the rows (the |unset_node|s) while the + rules are horizontal ones. Furthermore we can get (vertical) glues and + whatever else got kicked in between the rows, but all that is (currently) + not processed. + */ + break; + } + rowptr = node_next(rowptr); + } + } + if (callback) { + lmt_alignment_callback(cur_list.head, wrapup_pass_alignment_context, lmt_alignment_state.attr_list, preamble); + } + tex_flush_node_list(preroll); + delete_attribute_reference(lmt_alignment_state.attr_list); + tex_aux_pop_alignment(); + /*tex + We now have a completed alignment, in the list that starts at |cur_list.head| and ends at + |cur_list.tail|. This list will be merged with the one that encloses it. (In case the + enclosing mode is |mmode|, for displayed formulas, we will need to insert glue before and + after the display; that part of the program will be deferred until we're more familiar with + such operations.) + */ + { + scaled prevdepth = cur_list.prev_depth; + halfword head = node_next(cur_list.head); + halfword tail = cur_list.tail; + tex_pop_nest(); + if (cur_list.mode == mmode) { + tex_finish_display_alignment(head, tail, prevdepth); + } else { + cur_list.prev_depth = prevdepth; + if (head) { + tex_tail_append(head); + cur_list.tail = tail; + } + if (cur_list.mode == vmode) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(alignment_page_context, 0); + } + tex_build_page(); + } + } + } +} + +/*tex + + The token list |omit_template| just referred to is a constant token list that contains the + special control sequence |\endtemplate| only. + +*/ + +void tex_initialize_alignments(void) +{ + lmt_alignment_state.hold_token_head = tex_get_available_token(null); + lmt_alignment_state.omit_template = tex_get_available_token(deep_frozen_end_template_1_token); + span_span(end_span) = max_quarterword + 1; + align_record_span_ptr(end_span) = null; +} + +/*tex +* + We no longer store |hold_token_head| and |omit_template| in the format file. It is a bit + cleaner to just initialize them. So we free them. + +*/ + +void tex_cleanup_alignments(void) +{ + tex_put_available_token(lmt_alignment_state.hold_token_head); + tex_put_available_token(lmt_alignment_state.omit_template); + lmt_alignment_state.hold_token_head = null; + lmt_alignment_state.omit_template = null; +} + +/*tex + + We've now covered most of the abuses of |\halign| and |\valign|. Let's take a look at what + happens when they are used correctly. + + An |align_group| code is supposed to remain on the |save_stack| during an entire alignment, + until |finish_align| removes it. + + A devious user might force an |end_template| command to occur just about anywhere; we must + defeat such hacks. + +*/ + +void tex_run_alignment_end_template(void) +{ + lmt_input_state.base_ptr = lmt_input_state.input_stack_data.ptr; + lmt_input_state.input_stack[lmt_input_state.base_ptr] = lmt_input_state.cur_input; + while (( lmt_input_state.input_stack[lmt_input_state.base_ptr].index != template_post_text ) + && (! lmt_input_state.input_stack[lmt_input_state.base_ptr].loc) + && ( lmt_input_state.input_stack[lmt_input_state.base_ptr].state == token_list_state)) { + --lmt_input_state.base_ptr; + } + if (lmt_input_state.input_stack[lmt_input_state.base_ptr].index != template_post_text ) { + tex_alignment_interwoven_error(2); + } else if (lmt_input_state.input_stack[lmt_input_state.base_ptr].loc) { + tex_alignment_interwoven_error(3); + } else if (lmt_input_state.input_stack[lmt_input_state.base_ptr].state != token_list_state) { + tex_alignment_interwoven_error(4); + } else if (cur_group == align_group) { + if (! tex_wrapped_up_paragraph(align_par_context)) { /* needs testing */ + tex_end_paragraph(align_group, align_par_context); + if (tex_aux_finish_column()) { + tex_aux_finish_row(); + } + } + } else { + tex_off_save(); + } +} + +/*tex + + When |\cr| or |\span| or a tab mark comes through the scanner into |main_control|, it might be + that the user has foolishly inserted one of them into something that has nothing to do with + alignment. But it is far more likely that a left brace or right brace has been omitted, since + |get_next| takes actions appropriate to alignment only when |\cr| or |\span| or tab marks occur + with |align_state = 0|. The following program attempts to make an appropriate recovery. + + As an experiment we support nested |\noalign| usage but we do keep the braces so there is still + grouping. We don't flag these groups as |no_align_group| because then we need to do more work + and it's not worth the trouble. One can actually argue for not doing that anyway. + + I might now rename the next one to |run_alignment| (and then also a companion as we have two + cases of usage). + +*/ + +void tex_run_alignment_error(void) +{ + int cmd = cur_cmd; + int chr = cur_chr; + if (cmd == alignment_cmd && chr == no_align_code) { + if (! tex_aux_nested_no_align()) { + tex_handle_error( + normal_error_type, + "Misplaced \\noalign", + "I expect to see \\noalign only after the \\cr of an alignment. Proceed, and I'll\n" + "ignore this case." + ); + } + } else if (abs(lmt_input_state.align_state) > 2) { + /*tex + Express consternation over the fact that no alignment is in progress. In traditional + \TEX\ the ampersand case will show a specific tab help, while in case of another + character a more generic message is shown. + + We go for consistency here, so a little patch: + */ + switch (cmd) { + case alignment_tab_cmd: + tex_handle_error(normal_error_type, "Misplaced %C", cmd, chr, + "I can't figure out why you would want to use a tab mark here. If some right brace\n" + "up above has ended a previous alignment prematurely, you're probably due for more\n" + "error messages." + ); + break; + default: + tex_handle_error(normal_error_type, "Misplaced %C", cmd, chr, + "I can't figure out why you would want to use a tab mark or \\cr or \\span just\n" + "now. If something like a right brace up above has ended a previous alignment\n" + "prematurely, you're probably due for more error messages." + ); + break; + } + } else { + const char * helpinfo = + "I've put in what seems to be necessary to fix the current column of the current\n" + "alignment. Try to go on, since this might almost work."; + tex_back_input(cur_tok); + if (lmt_input_state.align_state < 0) { + ++lmt_input_state.align_state; + cur_tok = left_brace_token + '{'; + tex_handle_error( + insert_error_type, + "Missing { inserted", + helpinfo + ); + } else { + --lmt_input_state.align_state; + cur_tok = right_brace_token + '}'; + switch (cmd) { + case alignment_cmd: + tex_handle_error( + insert_error_type, + "Missing } inserted, unexpected ", + cmd, chr, + helpinfo + ); + break; + case alignment_tab_cmd: + tex_handle_error( + insert_error_type, + "Missing } inserted, unexpected tab character (normally &)", + helpinfo + ); + break; + } + } + } +} diff --git a/source/luametatex/source/tex/texalign.h b/source/luametatex/source/tex/texalign.h new file mode 100644 index 000000000..b2ecba445 --- /dev/null +++ b/source/luametatex/source/tex/texalign.h @@ -0,0 +1,24 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_ALIGN_H +# define LMT_ALIGN_H + +/* todo : rename */ + +extern void tex_initialize_alignments (void); +extern void tex_cleanup_alignments (void); + +extern void tex_insert_alignment_template (void); +extern void tex_run_alignment_initialize (void); +extern void tex_run_alignment_end_template (void); +extern void tex_run_alignment_error (void); + +extern void tex_finish_alignment_group (void); +extern void tex_finish_no_alignment_group (void); + +extern void tex_alignment_interwoven_error (int n); +extern halfword tex_alignment_hold_token_head (void); + +# endif diff --git a/source/luametatex/source/tex/texarithmetic.c b/source/luametatex/source/tex/texarithmetic.c new file mode 100644 index 000000000..d9cf9859d --- /dev/null +++ b/source/luametatex/source/tex/texarithmetic.c @@ -0,0 +1,433 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + The principal computations performed by \TEX\ are done entirely in terms of integers less than + $2^{31}$ in magnitude; and divisions are done only when both dividend and divisor are + nonnegative. Thus, the arithmetic specified in this program can be carried out in exactly the + same way on a wide variety of computers, including some small ones. Why? Because the arithmetic + calculations need to be spelled out precisely in order to guarantee that \TEX\ will produce + identical output on different machines. + + If some quantities were rounded differently in different implementations, we would find that + line breaks and even page breaks might occur in different places. Hence the arithmetic of \TEX\ + has been designed with care, and systems that claim to be implementations of \TEX82 should + follow precisely the \TEX82\ calculations as they appear in the present program. + + Actually there are three places where \TEX\ uses |div| with a possibly negative numerator. + These are harmless; see |div| in the index. Also if the user sets the |\time| or the |\year| to + a negative value, some diagnostic information will involve negative|-|numerator division. The + same remarks apply for |mod| as well as for |div|. + + The |half| routine, defined in the header file, calculates half of an integer, using an + unambiguous convention with respect to signed odd numbers. + + The |round_decimals| function, defined in the header file, is used to create a scaled integer + from a given decimal fraction $(.d_0d_1 \ldots d_{k-1})$, where |0 <= k <= 17|. The digit $d_i$ + is given in |dig[i]|, and the calculation produces a correctly rounded result. + + Keep in mind that in spite of these precautions results can be different over time. For + instance, fonts and hyphenation patterns do evolve over, and actually did in the many decades + that \TEX\ has been around. Also, delegating work to \LUA, which uses doubles, can have + consequences. + +*/ + +/*tex + + Physical sizes that a \TEX\ user specifies for portions of documents are represented internally + as scaled points. Thus, if we define an |sp| (scaled point) as a unit equal to $2^{-16}$ + printer's points, every dimension inside of \TEX\ is an integer number of sp. There are exactly + 4,736,286.72 sp per inch. Users are not allowed to specify dimensions larger than $2^{30} - 1$ + sp, which is a distance of about 18.892 feet (5.7583 meters); two such quantities can be added + without overflow on a 32-bit computer. + + The present implementation of \TEX\ does not check for overflow when dimensions are added or + subtracted. This could be done by inserting a few dozen tests of the form |if x >= 010000000000| + then |report_overflow|, but the chance of overflow is so remote that such tests do not seem + worthwhile. + + \TEX\ needs to do only a few arithmetic operations on scaled quantities, other than addition and + subtraction, and the following subroutines do most of the work. A single computation might use + several subroutine calls, and it is desirable to avoid producing multiple error messages in case + of arithmetic overflow; so the routines set the global variable |arith_error| to |true| instead + of reporting errors directly to the user. Another global variable, |tex_remainder|, holds the + remainder after a division. + + The first arithmetical subroutine we need computes $nx+y$, where |x| and~|y| are |scaled| and + |n| is an integer. We will also use it to multiply integers. + +*/ + +inline static scaled tex_aux_m_and_a(int n, scaled x, scaled y, scaled max_answer) +{ + if (n == 0) { + return y; + } else { + if (n < 0) { + x = -x; + n = -n; + } + if (((x <= (max_answer - y) / n) && (-x <= (max_answer + y) / n))) { + return n * x + y; + } else { + lmt_scanner_state.arithmic_error = 1; + return 0; + } + } +} + +scaled tex_multiply_and_add (int n, scaled x, scaled y, scaled max_answer) { return tex_aux_m_and_a(n, x, y, max_answer); } +scaled tex_nx_plus_y (int n, scaled x, scaled y) { return tex_aux_m_and_a(n, x, y, 07777777777); } +scaled tex_multiply_integers (int n, scaled x) { return tex_aux_m_and_a(n, x, 0, 017777777777); } + +/*tex We also need to divide scaled dimensions by integers. */ + +/* +scaled tex_x_over_n_r(scaled x, int n, int *remainder) +{ + if (n == 0) { + lmt_scanner_state.arithmic_error = 1; + if (remainder) { + *remainder = x; + } + return 0; + } else { + int negative = 0; + if (n < 0) { + x = -x; + n = -n; + negative = 1; + } + if (x >= 0) { + int r = x % n; + if (remainder) { + if (negative) { + r = -r; + } + *remainder = r; + } + return (x / n); + } else { + int r = -((-x) % n); + if (remainder) { + if (negative) { + r = -r; + } + *remainder = r; + } + return -((-x) / n); + } + } +} +*/ + +scaled tex_x_over_n_r(scaled x, int n, int *remainder) +{ + /*tex Should |tex_remainder| be negated? */ + if (n == 0) { + lmt_scanner_state.arithmic_error = 1; + *remainder = x; + return 0; + } else { + *remainder = x % n; + return x/n; + } +} + +/* +scaled tex_x_over_n(scaled x, int n) +{ + if (n == 0) { + lmt_scanner_state.arithmic_error = 1; + return 0; + } else { + if (n < 0) { + x = -x; + n = -n; + } + if (x >= 0) { + return (x / n); + } else { + return -((-x) / n); + } + } +} +*/ + +scaled tex_x_over_n(scaled x, int n) +{ + if (n == 0) { + lmt_scanner_state.arithmic_error = 1; + return 0; + } else { + return x/n; + } +} + +/*tex + + Then comes the multiplication of a scaled number by a fraction |n/d|, where |n| and |d| are + nonnegative integers |<= 2^16| and |d| is positive. It would be too dangerous to multiply by~|n| + and then divide by~|d|, in separate operations, since overflow might well occur; and it would + be too inaccurate to divide by |d| and then multiply by |n|. Hence this subroutine simulates + 1.5-precision arithmetic. + +*/ + +/* +scaled tex_xn_over_d_r(scaled x, int n, int d, int *remainder) +{ + if (x == 0) { + if (remainder) { + *remainder = 0; + } + return 0; + } else { + int positive = 1; + unsigned int t, u, v, xx, dd; + if (x < 0) { + x = -x; + positive = 0; + } + xx = (unsigned int) x; + dd = (unsigned int) d; + t = ((xx % 0100000) * (unsigned int) n); + u = ((xx / 0100000) * (unsigned int) n + (t / 0100000)); + v = (u % dd) * 0100000 + (t % 0100000); + if (u / dd >= 0100000) { + lmt_scanner_state.arithmic_error = 1; + } else { + u = 0100000 * (u / dd) + (v / dd); + } + if (positive) { + if (remainder) { + *remainder = (int) (v % dd); + } + return (scaled) u; + } else { + if (remainder) { + *remainder = - (int) (v % dd); + } + return - (scaled) u; + } + } +} +*/ + +scaled tex_xn_over_d_r(scaled x, int n, int d, int *remainder) +{ + if (x == 0) { + *remainder = 0; + return 0; + } else { + long long v = (long long) x * (long long) n; + *remainder = (scaled) (v % d); + return (scaled) (v / d); + } +} + +/* +scaled tex_xn_over_d(scaled x, int n, int d) +{ + if (x == 0) { + return 0; + } else { + int positive = 1; + unsigned int t, u, v, xx, dd; + if (x < 0) { + x = -x; + positive = 0; + } + xx = (unsigned int) x; + dd = (unsigned int) d; + t = ((xx % 0100000) * (unsigned int) n); + u = ((xx / 0100000) * (unsigned int) n + (t / 0100000)); + v = (u % dd) * 0100000 + (t % 0100000); + if (u / dd >= 0100000) { + lmt_scanner_state.arithmic_error = 1; + } else { + u = 0100000 * (u / dd) + (v / dd); + } + if (positive) { + return (scaled) u; + } else { + return - (scaled) u; + } + } +} +*/ + +scaled tex_xn_over_d(scaled x, int n, int d) +{ + if (x == 0) { + return 0; + } else { + long long v = (long long) x * (long long) n; + return (scaled) (v / d); + } +} + +/*tex + + When \TEX\ packages a list into a box, it needs to calculate the proportionality ratio by which + the glue inside the box should stretch or shrink. This calculation does not affect \TEX's + decision making, so the precise details of rounding, etc., in the glue calculation are not of + critical importance for the consistency of results on different computers. + + We shall use the type |glue_ratio| for such proportionality ratios. A glue ratio should take the + same amount of memory as an |integer| (usually 32 bits) if it is to blend smoothly with \TEX's + other data structures. Thus |glue_ratio| should be equivalent to |short_real| in some + implementations of \PASCAL. Alternatively, it is possible to deal with glue ratios using nothing + but fixed-point arithmetic; see {\em TUGboat \bf3},1 (March 1982), 10--27. (But the routines + cited there must be modified to allow negative glue ratios.) + +*/ + +/* +scaled tex_round_xn_over_d(scaled x, int n, unsigned int d) +{ + if (x == 0) { + return 0; + } else if (n == d) { + return x; + } else { + int positive = 1; + unsigned t, u, v; + if (x < 0) { + positive = ! positive; + x = -x; + } + if (n < 0) { + positive = ! positive; + n = -n; + } + t = (unsigned) ((x % 0100000) * n); + u = (unsigned) (((unsigned) (x) / 0100000) * (unsigned) n + (t / 0100000)); + v = (u % d) * 0100000 + (t % 0100000); + if (u / d >= 0100000) { + scanner_state.arithmic_error = 1; + } else { + u = 0100000 * (u / d) + (v / d); + } + v = v % d; + if (2 * v >= d) { + u++; + } + return positive ? (scaled) u : - (scaled) u; + } +} +*/ + +/* +scaled tex_round_xn_over_d(scaled x, int n, unsigned int d) +{ + if (x == 0|| n == d) { + return x; + } else { + double v = (1.0 / d) * n * x; + return (v < 0.0) ? (int) (v - 0.5) : (int) (v + 0.5); + } +} +*/ + +scaled tex_round_xn_over_d(scaled x, int n, unsigned int d) +{ + if (x == 0 || (unsigned int) n == d) { + return x; + } else { + return scaledround((1.0 / d) * n * x); + } +} + +/*tex + + The return value is a decimal number with the point |dd| places from the back, |scaled_out| is + the number of scaled points corresponding to that. + +*/ + +/* not used: + +scaled tex_divide_scaled(scaled s, scaled m, int dd) +{ + if (s == 0) { + return 0; + } else { + scaled q, r; + int sign = 1; + if (s < 0) { + sign = -sign; + s = -s; + } + if (m < 0) { + sign = -sign; + m = -m; + } + if (m == 0) { + normal_error("arithmetic", "divided by zero"); + } else if (m >= (max_integer / 10)) { + normal_error("arithmetic", "number too big"); + } + q = s / m; + r = s % m; + for (int i = 1; i <= (int) dd; i++) { + q = 10 * q + (10 * r) / m; + r = (10 * r) % m; + } + if (2 * r >= m) { + q++; // rounding + } + return sign * q; + } +} +*/ + +/* +scaled divide_scaled_n(double sd, double md, double n) +{ + scaled di = 0; + double dd = sd / md * n; + if (dd > 0.0) { + di = ifloor( dd + 0.5); + } else if (dd < 0.0) { + di = -ifloor((-dd) + 0.5); + } + return di; +} +*/ + +scaled tex_divide_scaled_n(double sd, double md, double n) +{ + return scaledround(sd / md * n); +} + +/* +scaled tex_ext_xn_over_d(scaled x, scaled n, scaled d) +{ + double r = (((double) x) * ((double) n)) / ((double) d); + if (r > DBL_EPSILON) { + r += 0.5; + } else { + r -= 0.5; + } + if (r >= (double) max_integer || r <= -(double) max_integer) { + tex_normal_warning("internal", "arithmetic number too big"); + } + return (scaled) r; +} +*/ + +scaled tex_ext_xn_over_d(scaled x, scaled n, scaled d) +{ + double r = (((double) x) * ((double) n)) / ((double) d); + if (r >= (double) max_integer || r <= -(double) max_integer) { + /* can we really run into this? */ + tex_normal_warning("internal", "arithmetic number too big"); + } + return scaledround(r); +} diff --git a/source/luametatex/source/tex/texarithmetic.h b/source/luametatex/source/tex/texarithmetic.h new file mode 100644 index 000000000..53deca36b --- /dev/null +++ b/source/luametatex/source/tex/texarithmetic.h @@ -0,0 +1,42 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_ARITHMETIC_H +# define LMT_ARITHMETIC_H + +/*tex + + Fixed-point arithmetic is done on {\em scaled integers} that are multiples of $2^{-16}$. In + other words, a binary point is assumed to be sixteen bit positions from the right end of a + binary computer word. + +*/ + +extern scaled tex_multiply_and_add (int n, scaled x, scaled y, scaled max_answer); +extern scaled tex_nx_plus_y (int n, scaled x, scaled y); +extern scaled tex_multiply_integers (int n, scaled x); +extern scaled tex_x_over_n_r (scaled x, int n, int *remainder); +extern scaled tex_x_over_n (scaled x, int n); +extern scaled tex_xn_over_d (scaled x, int n, int d); +extern scaled tex_xn_over_d_r (scaled x, int n, int d, int *remainder); +/* scaled tex_divide_scaled (scaled s, scaled m, int dd); */ +extern scaled tex_divide_scaled_n (double s, double m, double d); +extern scaled tex_ext_xn_over_d (scaled, scaled, scaled); +extern scaled tex_round_xn_over_d (scaled x, int n, unsigned int d); + +inline static scaled tex_round_decimals_digits(const unsigned char *digits, unsigned k) +{ + int a = 0; + while (k-- > 0) { + a = (a + digits[k] * two) / 10; + } + return (a + 1) / 2; +} + +inline static int tex_half_scaled(int x) +{ + return odd(x) ? ((x + 1) / 2) : (x / 2); +} + +# endif diff --git a/source/luametatex/source/tex/texbuildpage.c b/source/luametatex/source/tex/texbuildpage.c new file mode 100644 index 000000000..a0b5882dd --- /dev/null +++ b/source/luametatex/source/tex/texbuildpage.c @@ -0,0 +1,1271 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + When \TEX\ appends new material to its main vlist in vertical mode, it uses a method something + like |vsplit| to decide where a page ends, except that the calculations are done \quote {on + line} as new items come in. The main complication in this process is that insertions must be + put into their boxes and removed from the vlist, in a more-or-less optimum manner. + + We shall use the term \quote {current page} for that part of the main vlist that is being + considered as a candidate for being broken off and sent to the user's output routine. The + current page starts at |node_next(page_head)|, and it ends at |page_tail|. We have |page_head = + page_tail| if this list is empty. + + Utter chaos would reign if the user kept changing page specifications while a page is being + constructed, so the page builder keeps the pertinent specifications frozen as soon as the page + receives its first box or insertion. The global variable |page_contents| is |empty| when the + current page contains only mark nodes and content-less whatsit nodes; it is |inserts_only| + if the page contains only insertion nodes in addition to marks and whatsits. Glue nodes, kern + nodes, and penalty nodes are discarded until a box or rule node appears, at which time + |page_contents| changes to |box_there|. As soon as |page_contents| becomes non-|empty|, the + current |vsize| and |max_depth| are squirreled away into |page_goal| and |page_max_depth|; the + latter values will be used until the page has been forwarded to the user's output routine. The + |\topskip| adjustment is made when |page_contents| changes to |box_there|. + + Although |page_goal| starts out equal to |vsize|, it is decreased by the scaled natural + height-plus-depth of the insertions considered so far, and by the |\skip| corrections for + those insertions. Therefore it represents the size into which the non-inserted material + should fit, assuming that all insertions in the current page have been made. + + The global variables |best_page_break| and |least_page_cost| correspond respectively to the + local variables |best_place| and |least_cost| in the |vert_break| routine that we have already + studied; i.e., they record the location and value of the best place currently known for + breaking the current page. The value of |page_goal| at the time of the best break is stored in + |best_size|. + +*/ + +page_builder_state_info lmt_page_builder_state = { + .page_tail = null, + .contents = 0, + .max_depth = 0, + .best_break = null, + .least_cost = 0, + .best_size = 0, + .goal = 0, + .vsize = 0, + .total = 0, + .depth = 0, + .page_so_far = { 0 }, + .insert_penalties = 0, + .insert_heights = 0, + .last_glue = max_halfword, + .last_penalty = 0, + .last_kern = 0, + .last_node_type = unknown_node_type, + .last_node_subtype= unknown_node_subtype, + .last_extra_used = 0, + .last_boundary = 0, + .output_active = 0, + .dead_cycles = 0, + .current_state = 0 +}; + +# define page_stretch_1(order) lmt_page_builder_state.page_so_far[page_initial_state + order] /* was [1 + order] */ +# define page_stretch_2(order) lmt_page_builder_state.page_so_far[page_stretch_state + order] /* was [2 + order] */ + +static void tex_aux_fire_up (halfword c); + +/*tex + + The page builder has another data structure to keep track of insertions. This is a list of + four-word nodes, starting and ending at |page_insert_head|. That is, the first element of the + list is node |t$_1$ = node_next(page_insert_head)|; node $r_j$ is followed by |t$_{j+1}$ = + node_next(t$_j$)|; and if there are |n| items we have |$_{n+1}$ >= page_insert_head|. The + |subtype| field of each node in this list refers to an insertion number; for example, |\insert + 250| would correspond to a node whose |subtype| is |qi(250)| (the same as the |subtype| field + of the relevant |insert_node|). These |subtype| fields are in increasing order, and |subtype + (page_insert_head) = 65535|, so |page_insert_head| serves as a convenient sentinel at the end + of the list. A record is present for each insertion number that appears in the current page. + + The |type| field in these nodes distinguishes two possibilities that might occur as we look + ahead before deciding on the optimum page break. If |type(r) = inserting_node|, then |height(r)| + contains the total of the height-plus-depth dimensions of the box and all its inserts seen so + far. If |type(r) = split_up_node|, then no more insertions will be made into this box, because at + least one previous insertion was too big to fit on the current page; |broken_ptr(r)| points to + the node where that insertion will be split, if \TEX\ decides to split it, |broken_insert(r)| + points to the insertion node that was tentatively split, and |height(r)| includes also the + natural height plus depth of the part that would be split off. + + In both cases, |last_insert(r)| points to the last |insert_node| encountered for box + |qo(subtype(r))| that would be at least partially inserted on the next page; and + |best_insert(r)| points to the last such |insert_node| that should actually be inserted, to get + the page with minimum badness among all page breaks considered so far. We have |best_insert + (r) = null| if and only if no insertion for this box should be made to produce this optimum page. + + Pages are built by appending nodes to the current list in \TEX's vertical mode, which is at the + outermost level of the semantic nest. This vlist is split into two parts; the \quote {current + page} that we have been talking so much about already, and the |quote {contribution list} that + receives new nodes as they are created. The current page contains everything that the page + builder has accounted for in its data structures, as described above, while the contribution + list contains other things that have been generated by other parts of \TEX\ but have not yet + been seen by the page builder. The contribution list starts at |vlink (contribute_head)|, and it + ends at the current node in \TEX's vertical mode. + + When \TEX\ has appended new material in vertical mode, it calls the procedure |build_page|, + which tries to catch up by moving nodes from the contribution list to the current page. This + procedure will succeed in its goal of emptying the contribution list, unless a page break is + discovered, i.e., unless the current page has grown to the point where the optimum next page + break has been determined. In the latter case, the nodes after the optimum break will go back + onto the contribution list, and control will effectively pass to the user's output routine. + + We make |type (page_head) = glue_node|, so that an initial glue node on the current page will + not be considered a valid breakpoint. We keep this old tex trickery of cheating with node types + but have to make sure that the size is valid to do so (and we have different sizes!). + +*/ + +void tex_initialize_pagestate(void) +{ + lmt_page_builder_state.page_tail = page_head; + lmt_page_builder_state.contents = contribute_nothing; + lmt_page_builder_state.max_depth = 0; + lmt_page_builder_state.best_break = null; + lmt_page_builder_state.least_cost = 0; + lmt_page_builder_state.best_size = 0; + lmt_page_builder_state.goal = 0; + lmt_page_builder_state.vsize = 0; + lmt_page_builder_state.total = 0; + lmt_page_builder_state.depth = 0; + for (int i = page_stretch_state; i <= page_shrink_state; i++) { + lmt_page_builder_state.page_so_far[i] = 0; + } + lmt_page_builder_state.insert_penalties = 0; + lmt_page_builder_state.insert_heights = 0; + lmt_page_builder_state.last_glue = max_halfword; + lmt_page_builder_state.last_penalty = 0; + lmt_page_builder_state.last_kern = 0; + lmt_page_builder_state.last_extra_used = 0; + lmt_page_builder_state.last_boundary = 0; + lmt_page_builder_state.last_node_type = unknown_node_type; + lmt_page_builder_state.last_node_subtype = unknown_node_subtype; + lmt_page_builder_state.output_active = 0; + lmt_page_builder_state.dead_cycles = 0; + lmt_page_builder_state.current_state = 0; +} + +void tex_initialize_buildpage(void) +{ + node_type(page_insert_head) = split_node; + node_subtype(page_insert_head) = insert_split_subtype; + insert_index(page_insert_head) = 65535; /*tex some signal */ + node_next(page_insert_head) = page_insert_head; + node_type(page_head) = glue_node; /*tex brr, a temp node has a different size than a glue node */ + node_subtype(page_head) = page_glue; /*tex basically: unset */ +} + +/*tex + + An array |page_so_far| records the heights and depths of everything on the current page. This + array contains six |scaled| numbers, like the similar arrays already considered in |line_break| + and |vert_break|; and it also contains |page_goal| and |page_depth|, since these values are all + accessible to the user via |set_page_dimen| commands. The value of |page_so_far[1]| is also + called |page_total|. The stretch and shrink components of the |\skip| corrections for each + insertion are included in |page_so_far|, but the natural space components of these corrections + are not, since they have been subtracted from |page_goal|. + + The variable |page_depth| records the depth of the current page; it has been adjusted so that it + is at most |page_max_depth|. The variable |last_glue| points to the glue specification of the + most recent node contributed from the contribution list, if this was a glue node; otherwise + |last_glue = max_halfword|. (If the contribution list is nonempty, however, the value of + |last_glue| is not necessarily accurate.) The variables |last_penalty|, |last_kern|, and + |last_node_type| are similar. And finally, |insert_penalties| holds the sum of the penalties + associated with all split and floating insertions. + +*/ + +void tex_print_page_totals(void) +{ + tex_print_format("%P", page_total, page_stretch, page_filstretch, page_fillstretch, page_filllstretch, page_shrink); +} + +/*tex + + Here is a procedure that is called when the |page_contents| is changing from |empty| to + |inserts_only| or |box_there|. + +*/ + +static void tex_aux_freeze_page_specs(int s) +{ + lmt_page_builder_state.contents = s; + lmt_page_builder_state.max_depth = max_depth_par; + lmt_page_builder_state.least_cost = awful_bad; + /* page_builder_state.insert_heights = 0; */ /* up to the user */ + for (int i = page_stretch_state; i <= page_shrink_state; i++) { + lmt_page_builder_state.page_so_far[i] = 0; + } + page_goal = vsize_par; + page_vsize = vsize_par; + page_depth = 0; + page_total = 0; + if (tracing_pages_par > 0) { + tex_begin_diagnostic(); + tex_print_format( + "[page: frozen state, goal=%D, maxdepth=%D, contribution=%s, insertheights=%D]", + page_goal, pt_unit, + lmt_page_builder_state.max_depth, pt_unit, + lmt_interface.page_contribute_values[s].name, + lmt_page_builder_state.insert_heights, pt_unit + ); + tex_end_diagnostic(); + } +} + +static void update_page_goal(halfword index, scaled total, scaled delta) +{ + page_goal -= delta; + lmt_page_builder_state.insert_heights += total; + if (lmt_page_builder_state.insert_heights > max_dimen) { + lmt_page_builder_state.insert_heights = max_dimen; + } + if (tracing_inserts_par > 0) { + tex_begin_diagnostic(); + tex_print_format( + "[page: update page goal for insert, index=%i, total=%D, insertheights=%D, vsize=%D, delta=%D, goal=%D]", + index, total, pt_unit, lmt_page_builder_state.insert_heights, pt_unit, + page_vsize, pt_unit, delta, pt_unit, page_goal, pt_unit + ); + tex_end_diagnostic(); + } +} + +/*tex + + The global variable |output_active| is true during the time the user's output routine is + driving \TEX. The page builder is ready to start a fresh page if we initialize the following + state variables. (However, the page insertion list is initialized elsewhere.) + +*/ + +static void tex_aux_start_new_page(void) +{ + lmt_page_builder_state.contents = contribute_nothing; + lmt_page_builder_state.page_tail = page_head; + node_next(page_head) = null; + lmt_page_builder_state.last_glue = max_halfword; + lmt_page_builder_state.last_penalty = 0; + lmt_page_builder_state.last_kern = 0; + lmt_page_builder_state.last_boundary = 0; + lmt_page_builder_state.last_node_type = unknown_node_type; + lmt_page_builder_state.last_node_subtype = unknown_node_subtype; + page_depth = 0; + lmt_page_builder_state.max_depth = 0; +} + +/*tex + + At certain times box |\outputbox| is supposed to be void (i.e., |null|), or an insertion box is + supposed to be ready to accept a vertical list. If not, an error message is printed, and the + following subroutine flushes the unwanted contents, reporting them to the user. + +*/ + +static halfword tex_aux_delete_box_content(int n) +{ + tex_begin_diagnostic(); + tex_print_format("[page: deleting box]"); + tex_show_box(n); + tex_end_diagnostic(); + tex_flush_node_list(n); + return null; +} + +/*tex + + The following procedure guarantees that an insert box is not an |\hbox|. A user can actually + mess with this box, unless we decide to come up with a dedicated data structure for it. + +*/ + +static int tex_aux_valid_insert_content(halfword content) +{ + if (content && node_type(content) == hlist_node) { + /*tex It's not always a box so we need to adapt this message some day. */ + tex_handle_error( + normal_error_type, + "Insertions can only be added to a vbox", + "Tut tut: You're trying to \\insert into a \\box register that now contains an\n" + "\\hbox. Proceed, and I'll discard its present contents." + ); + return 0; + } else { + return 1; + } +} + +/*tex + + \TEX\ is not always in vertical mode at the time |build_page| is called; the current mode + reflects what \TEX\ should return to, after the contribution list has been emptied. A call on + |build_page| should be immediately followed by |goto big_switch|, which is \TEX's central + control point. + + Append contributions to the current page. + +*/ + +static void tex_aux_display_page_break_cost(halfword badness, halfword penalty, halfword cost, int moveon, int fireup) +{ + tex_begin_diagnostic(); + tex_print_format("[page: break, total %P, goal %D, badness %B, penalty %i, cost %B%s, moveon %s, fireup %s]", + page_total, page_stretch, page_filstretch, page_fillstretch, page_filllstretch, page_shrink, + page_goal, pt_unit, badness, penalty, cost, cost < lmt_page_builder_state.least_cost ? "#" : "", + moveon ? "yes" : "no", fireup ? "yes" : "no" + ); + tex_end_diagnostic(); +} + +static void tex_aux_display_insertion_split_cost(halfword index, scaled height, halfword penalty) +{ + /*tex Display the insertion split cost. */ + tex_begin_diagnostic(); + tex_print_format("[page: split insert %i: height %D, depth %D, penalty %i]", + index, height, pt_unit, lmt_packaging_state.best_height_plus_depth, pt_unit, penalty + ); + tex_end_diagnostic(); +} + +static halfword tex_aux_page_badness(scaled goal) +{ + if (page_total < goal) { + if (page_filstretch || page_fillstretch || page_filllstretch) { + return 0; + } else { + return tex_badness(goal - page_total, page_stretch); + } + } else if (page_total - goal > page_shrink) { + return awful_bad; + } else { + return tex_badness(page_total - goal, page_shrink); + } +} + +void tex_build_page(void) +{ + if (node_next(contribute_head) && ! lmt_page_builder_state.output_active) { + /*tex The (upcoming) penalty to be added to the badness: */ + int pi = 0; + do { + halfword p = node_next(contribute_head); + halfword last_type = node_type(p); + /*tex Update the values of |last_glue|, |last_penalty|, and |last_kern|. */ + if (lmt_page_builder_state.last_glue != max_halfword) { + tex_flush_node(lmt_page_builder_state.last_glue); + lmt_page_builder_state.last_glue = max_halfword; + } + lmt_page_builder_state.last_penalty = 0; + lmt_page_builder_state.last_kern = 0; + lmt_page_builder_state.last_boundary = 0; + lmt_page_builder_state.last_node_type = last_type; + lmt_page_builder_state.last_node_subtype = node_subtype(p); + lmt_page_builder_state.last_extra_used = 0; + switch (last_type) { + case glue_node: + lmt_page_builder_state.last_glue = tex_new_glue_node(p, node_subtype(p)); + break; + case penalty_node: + lmt_page_builder_state.last_penalty = penalty_amount(p); + break; + case kern_node: + lmt_page_builder_state.last_kern = kern_amount(p); + break; + case boundary_node: + lmt_page_builder_state.last_boundary = boundary_data(p); + break; + } + /*tex + + Move node |p| to the current page; if it is time for a page break, put the nodes + following the break back onto the contribution list, and |return| to the users + output routine if there is one. + + The code here is an example of a many-way switch into routines that merge together + in different places. Some people call this unstructured programming, but the author + doesn't see much wrong with it, as long as the various labels have a well-understood + meaning. + + If the current page is empty and node |p| is to be deleted, |goto done1|; otherwise + use node |p| to update the state of the current page; if this node is an insertion, + |goto contribute|; otherwise if this node is not a legal breakpoint, + |goto contribute| or |update_heights|; otherwise set |pi| to the penalty associated + with this breakpoint. + + The title of this section is already so long, it seems best to avoid making it more + accurate but still longer, by mentioning the fact that a kern node at the end of + the contribution list will not be contributed until we know its successor. + + */ + switch (last_type) { + case hlist_node: + case vlist_node: + if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_post)) { + halfword h = box_post_migrated(p); + if (h) { + halfword t = tex_tail_of_node_list(h); + if (node_next(p)) { + tex_couple_nodes(t, node_next(p)); + } else { + contribute_tail = t; + } + tex_couple_nodes(p, h); + box_post_migrated(p) = null; + } + } + if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_pre)) { + halfword h = box_pre_migrated(p); + if (h) { + halfword t = tex_tail_of_node_list(h); + tex_couple_nodes(t, p); + tex_couple_nodes(contribute_head, h); + box_pre_migrated(p) = null; + continue; + } + } + /* common with rule */ + if (lmt_page_builder_state.contents < contribute_box) { // nothing or insert + /*tex + Initialize the current page, insert the |\topskip| glue ahead of |p|, + and |goto continue|. + */ + halfword q; + if (lmt_page_builder_state.contents != contribute_nothing) { + lmt_page_builder_state.contents = contribute_box; + } else { + tex_aux_freeze_page_specs(contribute_box); + } + q = tex_new_param_glue_node(top_skip_code, top_skip_glue); + if (glue_amount(q) > box_height(p)) { + glue_amount(q) -= box_height(p); + } else { + glue_amount(q) = 0; + } + tex_couple_nodes(q, p); + tex_couple_nodes(contribute_head, q); + continue; + } else { + /*tex Move a box to the current page, then |goto contribute|. */ + page_total += page_depth + box_height(p); + page_depth = box_depth(p); + goto CONTRIBUTE; + } + case rule_node: + /* common with box */ + if (lmt_page_builder_state.contents < contribute_box) { + halfword q; + if (lmt_page_builder_state.contents != contribute_nothing) { + lmt_page_builder_state.contents = contribute_rule; + } else { + tex_aux_freeze_page_specs(contribute_rule); + } + q = tex_new_param_glue_node(top_skip_code, top_skip_glue); + if (glue_amount(q) > rule_height(p)) { + glue_amount(q) -= rule_height(p); + } else { + glue_amount(q) = 0; + } + tex_couple_nodes(q, p); + tex_couple_nodes(contribute_head, q); + continue; + } else { + page_total += page_depth + rule_height(p); + page_depth = rule_depth(p); + goto CONTRIBUTE; + } + case boundary_node: + if (lmt_page_builder_state.contents < contribute_box) { + goto DISCARD; + } else if (node_subtype(p) == page_boundary) { + /*tex + We just triggered the pagebuilder for which we needed a contribution. We fake + a zero penalty so that all gets processed. The main rationale is that we get + a better indication of what we do. Of course a callback can remove this node + so that it is never seen. Triggering from the callback is not doable. + */ + halfword n = tex_new_node(penalty_node, user_penalty_subtype); + /* todo: copy attributes */ + tex_page_boundary_message("processed as penalty", 0); + tex_try_couple_nodes(node_prev(p), n); + tex_try_couple_nodes(n, node_next(p)); + tex_flush_node(p); + penalty_amount(n) = boundary_data(p); + p = n; + node_next(contribute_head) = p; + pi = 0; + break; + } else { + goto DISCARD; + } + case whatsit_node: + goto CONTRIBUTE; + case glue_node: + if (lmt_page_builder_state.contents < contribute_box) { + goto DISCARD; + } else if (precedes_break(lmt_page_builder_state.page_tail)) { + pi = 0; + break; + } else { + goto UPDATEHEIGHTS; + } + case kern_node: + if (lmt_page_builder_state.contents < contribute_box) { + goto DISCARD; + } else if (! node_next(p)) { + return; + } else if (node_type(node_next(p)) == glue_node) { + pi = 0; + break; + } else { + goto UPDATEHEIGHTS; + } + case penalty_node: + if (lmt_page_builder_state.contents < contribute_box) { + goto DISCARD; + } else { + pi = penalty_amount(p); + break; + } + case mark_node: + goto CONTRIBUTE; + case insert_node: + { + /*tex + Append an insertion to the current page and |goto contribute|. The insertion + number (index) is registered in the subtype (not any more for a while). + */ + halfword index = insert_index(p); /* initially 65K */ + halfword location = page_insert_head; + halfword multiplier = tex_get_insert_multiplier(index); + halfword content = tex_get_insert_content(index); + scaled limit = tex_get_insert_limit(index); + int slot = 1; + if (lmt_page_builder_state.contents == contribute_nothing) { + tex_aux_freeze_page_specs(contribute_insert); + } + while (index >= insert_index(node_next(location))) { + location = node_next(location); + slot += 1 ; + } + if (insert_index(location) != index) { + /*tex + + Create a page insertion node with |subtype(r) = qi(n)|, and include + the glue correction for box |n| in the current page state. + + We take note of the value of |\skip| |n| and the height plus depth + of |\box| |n| only when the first |\insert n| node is encountered + for a new page. A user who changes the contents of |\box| |n| after + that first |\insert n| had better be either extremely careful or + extremely lucky, or both. + + We need to handle this too: + + [content] + [max(space shared,space n)] + [class n] + ......... + [space m] + [class m] + + For now a callback can deal with this but maybe we need to have a + more advanced mechanism for this (and more control over inserts in + general). + + */ + halfword q = tex_new_node(split_node, normal_split_subtype); + scaled advance = 0; + halfword distance = lmt_get_insert_distance(index, slot); /*tex Callback: we get a copy! */ + split_insert_index(q) = index; + tex_try_couple_nodes(q, node_next(location)); + tex_couple_nodes(location, q); + location = q; + if (! tex_aux_valid_insert_content(content)) { + content = tex_aux_delete_box_content(content); + tex_set_insert_content(index, content); + }; + if (content) { + box_height(location) = box_total(content); + } else { + box_height(location) = 0; + } + split_best_insert(location) = null; + if (multiplier == 1000) { + advance = box_height(location); + } else { + advance = tex_x_over_n(box_height(location), 1000) * multiplier; + } + advance += glue_amount(distance); + update_page_goal(index, 0, advance); /*tex Here gets no height added! */ + if (glue_stretch_order(distance) > 1) { + page_stretch_1(glue_stretch_order(distance)) += glue_stretch(distance); + } else { + page_stretch_2(glue_stretch_order(distance)) += glue_stretch(distance); + } + page_shrink += glue_shrink(distance); + if (glue_shrink_order(distance) != normal_glue_order && glue_shrink(distance)) { + tex_handle_error( + normal_error_type, + "Infinite glue shrinkage inserted from \\skip%i", + index, + "The correction glue for page breaking with insertions must have finite\n" + "shrinkability. But you may proceed, since the offensive shrinkability has been\n" + "made finite." + ); + } + tex_flush_node(distance); + } + /*tex I really need to check this logic with the original \LUATEX\ code. */ + if (node_type(location) == split_node && node_subtype(location) == insert_split_subtype) { + lmt_page_builder_state.insert_penalties += insert_float_cost(p); + } else { + scaled delta = page_goal - page_total - page_depth + page_shrink; + scaled needed = insert_total_height(p); + split_last_insert(location) = p; + /*tex This much room is left if we shrink the maximum. */ + if (multiplier != 1000) { + /*tex This much room is needed. */ + needed = tex_x_over_n(needed, 1000) * multiplier; + } + if ((needed <= 0 || needed <= delta) && (insert_total_height(p) + box_height(location) <= limit)) { + update_page_goal(index, insert_total_height(p), needed); + box_height(location) += insert_total_height(p); + } else { + /*tex + + Find the best way to split the insertion, and change |subtype(r)| + to |split_up_inserting_code|. + + Here is the code that will split a long footnote between pages, + in an emergency. The current situation deserves to be + recapitulated: Node |p| is an insertion into box |n|; the + insertion will not fit, in its entirety, either because it + would make the total contents of box |n| greater than |\dimen| + |n|, or because it would make the incremental amount of growth + |h| greater than the available space |delta|, or both. (This + amount |h| has been weighted by the insertion scaling factor, + i.e., by |\count| |n| over 1000.) Now we will choose the best + way to break the vlist of the insertion, using the same criteria + as in the |\vsplit| operation. + + */ + scaled height; + halfword brk, penalty; + if (multiplier <= 0) { + height = max_dimen; + } else { + height = page_goal - page_total - page_depth; + if (multiplier != 1000) { + height = tex_x_over_n(height, multiplier) * 1000; + } + } + if (height > limit - box_height(location)) { + height = limit - box_height(location); + } + brk = tex_vert_break(insert_list(p), height, insert_max_depth(p)); + box_height(location) += lmt_packaging_state.best_height_plus_depth; + penalty = brk ? (node_type(brk) == penalty_node ? penalty_amount(brk) : 0) : eject_penalty; + if (tracing_pages_par > 0) { + tex_aux_display_insertion_split_cost(index, height, penalty); + } + if (multiplier != 1000) { + lmt_packaging_state.best_height_plus_depth = tex_x_over_n(lmt_packaging_state.best_height_plus_depth, 1000) * multiplier; + } + update_page_goal(index, lmt_packaging_state.best_height_plus_depth, lmt_packaging_state.best_height_plus_depth); + node_subtype(location) = insert_split_subtype; + split_broken(location) = brk; + split_broken_insert(location) = p; + lmt_page_builder_state.insert_penalties += penalty; + } + } + goto CONTRIBUTE; + } + default: + tex_formatted_error("pagebuilder", "invalid node of type %d in vertical mode", node_type(p)); + break; + } + /*tex + Check if node |p| is a new champion breakpoint; then if it is time for a page break, + prepare for output, and either fire up the users output routine and |return| or + ship out the page and |goto done|. + */ + if (pi < infinite_penalty) { + /*tex + Compute the badness, |b|, of the current page, using |awful_bad| if the box is + too full. The |c| variable holds the costs. + */ + halfword badness, criterium; + /*tex + This could actually be a callback but not now. First we will experiment a lot + with this yet undocumented trick. + */ + lmt_page_builder_state.last_extra_used = 0; + badness = tex_aux_page_badness(page_goal); + if (page_extra_goal_par) { + if (badness >= awful_bad && page_total >= (page_goal + page_extra_goal_par)) { + halfword extrabadness = tex_aux_page_badness(page_goal + page_extra_goal_par); + if (tracing_pages_par > 0) { + tex_begin_diagnostic(); + tex_print_format( + "[page: extra check, total=%P, goal=%D, extragoal=%D, badness=%B, extrabadness=%B]", + page_total, page_stretch, page_filstretch, page_fillstretch, page_filllstretch, page_shrink, + page_goal, pt_unit, page_extra_goal_par, pt_unit, badness, extrabadness + ); + tex_end_diagnostic(); + } + lmt_page_builder_state.last_extra_used = 1; + badness = extrabadness; + } + } + if (badness >= awful_bad) { + criterium = badness; + } else if (pi <= eject_penalty) { + criterium = pi; + } else if (badness < infinite_bad) { + criterium = badness + pi + lmt_page_builder_state.insert_penalties; + } else { + criterium = deplorable; + } + if (lmt_page_builder_state.insert_penalties >= 10000) { + criterium = awful_bad; + } + { + int moveon = criterium <= lmt_page_builder_state.least_cost; + int fireup = criterium == awful_bad || pi <= eject_penalty; + if (tracing_pages_par > 0) { + tex_aux_display_page_break_cost(badness, pi, criterium, moveon, fireup); + } + if (moveon) { + halfword r = node_next(page_insert_head); + lmt_page_builder_state.best_break = p; + lmt_page_builder_state.best_size = page_goal; + lmt_page_builder_state.insert_penalties = 0; + lmt_page_builder_state.least_cost = criterium; + while (r != page_insert_head) { + split_best_insert(r) = split_last_insert(r); + r = node_next(r); + } + } + if (fireup) { + /*tex Output the current page at the best place. */ + tex_aux_fire_up(p); + if (lmt_page_builder_state.output_active) { + /*tex User's output routine will act. */ + return; + } else { + /*tex The page has been shipped out by default output routine. */ + continue; + } + } + } + } + UPDATEHEIGHTS: + /*tex + Go here to record glue in the |active_height| table. Update the current page + measurements with respect to the glue or kern specified by node~|p|. + */ + switch(node_type(p)) { + case kern_node: + page_total += page_depth + kern_amount(p); + page_depth = 0; + goto APPEND; + case glue_node: + if (glue_stretch_order(p) > 1) { + page_stretch_1(glue_stretch_order(p)) += glue_stretch(p); + } else { + page_stretch_2(glue_stretch_order(p)) += glue_stretch(p); + } + page_shrink += glue_shrink(p); + if (glue_shrink_order(p) != normal_glue_order && glue_shrink(p)) { + tex_handle_error( + normal_error_type, + "Infinite glue shrinkage found on current page", + "The page about to be output contains some infinitely shrinkable glue, e.g.,\n" + "'\\vss' or '\\vskip 0pt minus 1fil'. Such glue doesn't belong there; but you can\n" + "safely proceed, since the offensive shrinkability has been made finite." + ); + tex_reset_glue_to_zero(p); + glue_shrink_order(p) = normal_glue_order; + } + page_total += page_depth + glue_amount(p); + page_depth = 0; + goto APPEND; + } + CONTRIBUTE: + /*tex + Go here to link a node into the current page. Make sure that |page_max_depth| is + not exceeded. + */ + if (page_depth > lmt_page_builder_state.max_depth) { + page_total += page_depth - lmt_page_builder_state.max_depth; + page_depth = lmt_page_builder_state.max_depth; + } + APPEND: + /*tex Link node |p| into the current page and |goto done|. We assume a positive depth. */ + tex_couple_nodes(lmt_page_builder_state.page_tail, p); + lmt_page_builder_state.page_tail = p; + tex_try_couple_nodes(contribute_head, node_next(p)); + node_next(p) = null; + continue; + DISCARD: + /*tex Recycle node |p|. */ + tex_try_couple_nodes(contribute_head, node_next(p)); + node_next(p) = null; + if (saving_vdiscards_par > 0) { + if (lmt_packaging_state.page_discards_head) { + tex_couple_nodes(lmt_packaging_state.page_discards_tail, p); + } else { + lmt_packaging_state.page_discards_head = p; + } + lmt_packaging_state.page_discards_tail = p; + } else { + tex_flush_node_list(p); + } + } while (node_next(contribute_head)); + /*tex Make the contribution list empty by setting its tail to |contribute_head|. */ + contribute_tail = contribute_head; + } +} + +/*tex + + When the page builder has looked at as much material as could appear before the next page break, + it makes its decision. The break that gave minimum badness will be used to put a completed page + into box |\outputbox|, with insertions appended to their other boxes. + + We also set the values of |top_mark|, |first_mark|, and |bot_mark|. The program uses the fact + that |bot_mark(x) <> null| implies |first_mark(x) <> null|; it also knows that |bot_mark(x) = + null| implies |top_mark(x) = first_mark(x) = null|. + + The |fire_up| subroutine prepares to output the current page at the best place; then it fires + up the user's output routine, if there is one, or it simply ships out the page. There is one + parameter, |c|, which represents the node that was being contributed to the page when the + decision to force an output was made. + +*/ + +static void tex_aux_fire_up(halfword c) +{ + /*tex nodes being examined and/or changed */ + halfword p, q; + /*tex predecessor of |p|, we could just use node_prev(p) instead */ + halfword prev_p; + /*tex Set the value of |output_penalty|. */ + if (node_type(lmt_page_builder_state.best_break) == penalty_node) { + update_tex_output_penalty(penalty_amount(lmt_page_builder_state.best_break)); + penalty_amount(lmt_page_builder_state.best_break) = infinite_penalty; + } else { + update_tex_output_penalty(infinite_penalty); + } + tex_update_top_marks(); + /*tex + Put the optimal current page into box |output_box|, update |first_mark| and |bot_mark|, + append insertions to their boxes, and put the remaining nodes back on the contribution + list. + + As the page is finally being prepared for output, pointer |p| runs through the vlist, with + |prev_p| trailing behind; pointer |q| is the tail of a list of insertions that are being + held over for a subsequent page. + */ + if (c == lmt_page_builder_state.best_break) { + /*tex |c| not yet linked in */ + lmt_page_builder_state.best_break = null; + } + /*tex Ensure that box |output_box| is empty before output. */ + if (box_register(output_box_par)) { + tex_handle_error( + normal_error_type, + "\\box%i is not void", + output_box_par, + "You shouldn't use \\box\\outputbox except in \\output routines. Proceed, and I'll\n" + "discard its present contents." + ); + box_register(output_box_par) = tex_aux_delete_box_content(box_register(output_box_par)); + } + /* + { + int callback_id = lmt_callback_defined(fire_up_output_callback); + if (callback_id != 0) { + halfword insert = node_next(page_insert_head); + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->"); + } + } + */ + /*tex This will count the number of insertions held over. */ + { + halfword save_split_top_skip = split_top_skip_par; + lmt_page_builder_state.insert_penalties = 0; + if (holding_inserts_par <= 0) { + /*tex + Prepare all the boxes involved in insertions to act as queues. If many insertions are + supposed to go into the same box, we want to know the position of the last node in that + box, so that we don't need to waste time when linking further information into it. The + |last_insert| fields of the page insertion nodes are therefore used for this purpose + during the packaging phase. + + This is tricky: |last_insert| directly points to a \quote {address} in the node list, + that is: the row where |list_ptr| sits. The |raw_list_ptr| macro is just an offset to + the base index of the node. Then |node_next| will start out there and follow the list. + So, |last_insert| kind of points to a subnode (as in disc nodes) of size 1. + + last_insert => [shift][list] + + which fakes: + + last_insert => [type|subtype][next] => [real node with next] + + and with shift being zero this (when it would be queried) will be seen as a hlist node + of type zero with subtype zero, but ... it is not really such a node which means that + other properties are not valid! Normally this is ok, because \TEX\ only follows this + list and never looks at the parent. But, when accessing from \LUA\ this is asking for + troubles. However, as all happens in the page builder, we don't really expose this and + if we would (somehow, e.g. via a callback) then for sure we would need to make sure + that the node |last_insert(r)| points to is made into a new kind of node: one with + size 1 and type |fake_node| or so, just to be sure (so that at the \LUA\ end no + properties can be asked). + + Of course I can be wrong here and changing the approach would involve patching some + code that I don't want to touch. I need a test case for \quote {following the chain}. + */ + halfword r = node_next(page_insert_head); + while (r != page_insert_head) { + if (split_best_insert(r)) { + halfword index = insert_index(r); + halfword content = tex_get_insert_content(index); + if (! tex_aux_valid_insert_content(content)) { + content = tex_aux_delete_box_content(content); + } + if (! content) { + /*tex + So we package the content in a box. Originally this is a hlist which + is somewhat strange because we're operating in vmode. The box is still + empty! + */ + content = tex_new_null_box_node(vlist_node, insert_result_list); + tex_set_insert_content(index, content); + } + /*tex + We locate the place where we can add. We have an (unpackaged) list here so we + need to go to the end. Here we have this sort of hackery |box(n) + 5 == row of + list ptr, a fake node of size 1| trick. + */ + p = insert_first_box(content); + /*tex + From here on we access the regular |list_ptr == node_next| chain. + */ + while (node_next(p)) { + p = node_next(p); + } + /* + This is now a pointer into the node array (a fake - list_ptr row - or follow up). + */ + split_last_insert(r) = p; + } + r = node_next(r); + } + } + q = hold_head; + node_next(q) = null; + prev_p = page_head; + p = node_next(prev_p); + while (p != lmt_page_builder_state.best_break) { + switch (node_type(p)) { + case insert_node: + if (holding_inserts_par <= 0) { + /*tex + Either insert the material specified by node |p| into the appropriate box, or + hold it for the next page; also delete node |p| from the current page. + + We will set |best_insert := null| and package the box corresponding to + insertion node |r|, just after making the final insertion into that box. If + this final insertion is |split_up_node|, the remainder after splitting and + pruning (if any) will be carried over to the next page. + */ + /*tex should the present insertion be held over? */ + int wait = 0; + halfword r = node_next(page_insert_head); + while (insert_index(r) != insert_index(p)) { + r = node_next(r); + } + if (split_best_insert(r)) { + halfword s = split_last_insert(r); + // node_next(s) = insert_list(p); + tex_try_couple_nodes(s, insert_list(p)); + if (split_best_insert(r) == p) { + /*tex + Wrap up the box specified by node |r|, splitting node |p| if called + for and set |wait| if node |p| holds a remainder after splitting. + */ + if (node_type(r) == split_node && node_subtype(r) == insert_split_subtype && (split_broken_insert(r) == p) && split_broken(r)) { + while (node_next(s) != split_broken(r)) { + s = node_next(s); + } + node_next(s) = null; + split_top_skip_par = insert_split_top(p); + insert_list(p) = tex_prune_page_top(split_broken(r), 0); + if (insert_list(p)) { + /*tex + We only determine the total height of the list stored in + the insert node. + */ + halfword list = insert_list(p); + halfword result = tex_vpack(list, 0, packing_additional, max_dimen, direction_unknown, holding_none_option); + insert_total_height(p) = box_total(result); + box_list(result) = null; + tex_flush_node(result); + wait = 1; + } + } + { + split_best_insert(r) = null; + /*tex + We need this juggling in order to also set the old school box + when we're in traditional mode. + */ + halfword index = insert_index(r); + halfword content = tex_get_insert_content(index); + halfword list = box_list(content); + halfword result = tex_vpack(list, 0, packing_additional, max_dimen, dir_lefttoright, holding_none_option); + tex_set_insert_content(index, result); + box_list(content) = null; + tex_flush_node(content); + } + } else { + split_last_insert(r) = tex_tail_of_node_list(s); + } + } else { + wait = 1; + } + /*tex + Either append the insertion node |p| after node |q|, and remove it from the + current page, or delete |node(p)|. + */ + tex_try_couple_nodes(prev_p, node_next(p)); + node_next(p) = null; + if (wait) { + tex_couple_nodes(q, p); + q = p; + ++lmt_page_builder_state.insert_penalties; + } else { + insert_list(p) = null; + tex_flush_node(p); + } + p = prev_p; + } + break; + case mark_node: + tex_update_first_and_bot_mark(p); + break; + } + prev_p = p; + p = node_next(p); + } + split_top_skip_par = save_split_top_skip; + } + /*tex + Break the current page at node |p|, put it in box~|output_box|, and put the remaining nodes + on the contribution list. + + When the following code is executed, the current page runs from node |vlink (page_head)| to + node |prev_p|, and the nodes from |p| to |page_tail| are to be placed back at the front of + the contribution list. Furthermore the heldover insertions appear in a list from |vlink + (hold_head)| to |q|; we will put them into the current page list for safekeeping while the + user's output routine is active. We might have |q = hold_head|; and |p = null| if and only + if |prev_p = page_tail|. Error messages are suppressed within |vpackage|, since the box + might appear to be overfull or underfull simply because the stretch and shrink from the + |\skip| registers for inserts are not actually present in the box. + */ + if (p) { + if (! node_next(contribute_head)) { + contribute_tail = lmt_page_builder_state.page_tail; + } + tex_couple_nodes(lmt_page_builder_state.page_tail, node_next(contribute_head)); + tex_couple_nodes(contribute_head, p); + node_next(prev_p) = null; + } + /*tex When we pack the box we inhibit error messages. */ + { + halfword save_vbadness = vbadness_par; + halfword save_vfuzz = vfuzz_par; + vbadness_par = infinite_bad; + vfuzz_par = max_dimen; + tex_show_marks(); + // if (1) { + box_register(output_box_par) = tex_filtered_vpack(node_next(page_head), lmt_page_builder_state.best_size, packing_exactly, lmt_page_builder_state.max_depth, output_group, dir_lefttoright, 0, 0, 0, holding_none_option); + // } else { + // /* maybe an option one day */ + // box_register(output_box_par) = tex_filtered_vpack(node_next(page_head), 0, packing_additional, lmt_page_builder_state.max_depth, output_group, dir_lefttoright, 0, 0, 0); + // } + vbadness_par = save_vbadness; + vfuzz_par = save_vfuzz; + } + if (lmt_page_builder_state.last_glue != max_halfword) { + tex_flush_node(lmt_page_builder_state.last_glue); + } + /*tex Start a new current page. This sets |last_glue := max_halfword|. */ + tex_aux_start_new_page(); + if (q != hold_head) { + node_next(page_head) = node_next(hold_head); + lmt_page_builder_state.page_tail = q; + } + /*tex Delete the page-insertion nodes. */ + { + halfword r = node_next(page_insert_head); + while (r != page_insert_head) { + q = node_next(r); + tex_flush_node(r); + r = q; + } + } + node_next(page_insert_head) = page_insert_head; + tex_update_first_marks(); + if (output_routine_par) { + if (lmt_page_builder_state.dead_cycles >= max_dead_cycles_par) { + /*tex Explain that too many dead cycles have occurred in a row. */ + tex_handle_error( + normal_error_type, + "Output loop --- %i consecutive dead cycles", + lmt_page_builder_state.dead_cycles, + "I've concluded that your \\output is awry; it never does a \\shipout, so I'm\n" + "shipping \\box\\outputbox out myself. Next time increase \\maxdeadcycles if you\n" + "want me to be more patient!" + ); + } else { + /*tex Fire up the users output routine and |return|. */ + lmt_page_builder_state.output_active = 1; + ++lmt_page_builder_state.dead_cycles; + tex_push_nest(); + cur_list.mode = -vmode; + cur_list.prev_depth = ignore_depth; + cur_list.mode_line = -lmt_input_state.input_line; + tex_begin_token_list(output_routine_par, output_text); + tex_new_save_level(output_group); + tex_normal_paragraph(output_par_context); + tex_scan_left_brace(); + return; + } + } + /*tex + Perform the default output routine. The list of heldover insertions, running from |vlink + (page_head)| to |page_tail|, must be moved to the contribution list when the user has + specified no output routine. + */ + + /* todo: double link */ + + if (node_next(page_head)) { + if (node_next(contribute_head)) { + node_next(lmt_page_builder_state.page_tail) = node_next(contribute_head); + } + else { + contribute_tail = lmt_page_builder_state.page_tail; + } + node_next(contribute_head) = node_next(page_head); + node_next(page_head) = null; + lmt_page_builder_state.page_tail = page_head; + } + if (lmt_packaging_state.page_discards_head) { + tex_flush_node_list(lmt_packaging_state.page_discards_head); + lmt_packaging_state.page_discards_head = null; + } + if (box_register(output_box_par)) { + tex_flush_node_list(box_register(output_box_par)); + box_register(output_box_par) = null; + } +} + +/*tex + + When the user's output routine finishes, it has constructed a vlist in internal vertical mode, + and \TEX\ will do the following: + +*/ + +void tex_resume_after_output(void) +{ + if (lmt_input_state.cur_input.loc || ((lmt_input_state.cur_input.token_type != output_text) && (lmt_input_state.cur_input.token_type != backed_up_text))) { + /*tex Recover from an unbalanced output routine */ + tex_handle_error( + normal_error_type, + "Unbalanced output routine", + "Your sneaky output routine has problematic {'s and/or }'s. I can't handle that\n" + "very well; good luck." + ); + /*tex Loops forever if reading from a file, since |null = min_halfword <= 0|. */ + do { + tex_get_token(); + } while (lmt_input_state.cur_input.loc); + } + /*tex Conserve stack space in case more outputs are triggered. */ + tex_end_token_list(); + tex_end_paragraph(bottom_level_group, output_par_context); /*tex No |wrapped_up_paragraph| here. */ + tex_unsave(); + lmt_page_builder_state.output_active = 0; + lmt_page_builder_state.insert_penalties = 0; + /*tex Ensure that box |output_box| is empty after output. */ + if (box_register(output_box_par)) { + tex_handle_error( + normal_error_type, + "Output routine didn't use all of \\box%i", output_box_par, + "Your \\output commands should empty \\box\\outputbox, e.g., by saying\n" + "'\\shipout\\box\\outputbox'. Proceed; I'll discard its present contents." + ); + box_register(output_box_par) = tex_aux_delete_box_content(box_register(output_box_par));; + } + if (lmt_insert_state.storing == insert_storage_delay && tex_insert_stored()) { + if (tracing_inserts_par > 0) { + tex_print_levels(); + tex_print_str(lmt_insert_state.head ? "<delaying inserts>" : "<no inserts to delay>"); + if (lmt_insert_state.head && tracing_inserts_par > 1) { + tex_show_node_list(lmt_insert_state.head, max_integer, max_integer); + } + } + tex_try_couple_nodes(lmt_page_builder_state.page_tail, lmt_insert_state.head); + lmt_page_builder_state.page_tail = lmt_insert_state.tail; + lmt_insert_state.head = null; + lmt_insert_state.tail = null; + } + if (cur_list.tail != cur_list.head) { + /*tex Current list goes after heldover insertions. */ + tex_try_couple_nodes(lmt_page_builder_state.page_tail, node_next(cur_list.head)); + lmt_page_builder_state.page_tail = cur_list.tail; + } + if (node_next(page_head)) { + /* Both go before heldover contributions. */ + if (! node_next(contribute_head)) { + contribute_tail = lmt_page_builder_state.page_tail; + } + tex_try_couple_nodes(lmt_page_builder_state.page_tail, node_next(contribute_head)); + tex_try_couple_nodes(contribute_head, node_next(page_head)); + node_next(page_head) = null; + lmt_page_builder_state.page_tail = page_head; + } + if (lmt_insert_state.storing == insert_storage_inject) { + halfword h = node_next(contribute_head); + while (h) { + halfword n = node_next(h); + if (node_type(h) == insert_node) { + tex_try_couple_nodes(node_prev(h), n); + tex_insert_restore(h); + } + h = n; + } + if (tracing_inserts_par > 0) { + tex_print_levels(); + tex_print_str(lmt_insert_state.head ? "<storing inserts>" : "<no inserts to store>"); + if (lmt_insert_state.head && tracing_inserts_par > 1) { + tex_show_node_list(lmt_insert_state.head, max_integer, max_integer); + } + } + } + lmt_insert_state.storing = insert_storage_ignore; + tex_flush_node_list(lmt_packaging_state.page_discards_head); + lmt_packaging_state.page_discards_head = null; + tex_pop_nest(); + lmt_page_filter_callback(after_output_page_context, 0); + tex_build_page(); +} diff --git a/source/luametatex/source/tex/texbuildpage.h b/source/luametatex/source/tex/texbuildpage.h new file mode 100644 index 000000000..328bed2a1 --- /dev/null +++ b/source/luametatex/source/tex/texbuildpage.h @@ -0,0 +1,104 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_BUILDPAGE_H +# define LMT_BUILDPAGE_H + +/*tex + + The state of |page_contents| is indicated by two special values. + +*/ + +typedef enum contribution_codes { + contribute_nothing, + contribute_insert, /*tex An insert node has been contributed, but no boxes. */ + contribute_box, /*tex A box has been contributed. */ + contribute_rule, /*tex A rule has been contributed. */ +} contribution_codes; + +typedef struct page_builder_state_info { + halfword page_tail; /*tex The final node on the current page. */ + int contents; /*tex What is on the current page so far? */ + scaled max_depth; /*tex The maximum box depth on page being built. */ + halfword best_break; /*tex Break here to get the best page known so far. */ + int least_cost; /*tex The score for this currently best page. */ + scaled best_size; /*tex Its |page_goal| so it can go away. */ + scaled goal; + scaled vsize; + scaled total; + scaled depth; + union { + scaled page_so_far[6]; /*tex The height and glue of the current page. */ + struct { + scaled initial; + scaled stretch; + scaled filstretch; + scaled fillstretch; + scaled filllstretch; + scaled shrink; + }; + }; + int insert_penalties; /*tex The sum of the penalties for held-over insertions. */ + halfword insert_heights; + halfword last_glue; /*tex Used to implement |\lastskip|. */ + halfword last_penalty; /*tex Used to implement |\lastpenalty|. */ + scaled last_kern; /*tex Used to implement |\lastkern|. */ + int last_extra_used; + halfword last_boundary; + int last_node_type; /*tex Used to implement |\lastnodetype|. */ + int last_node_subtype; /*tex Used to implement |\lastnodesubtype|. */ + int output_active; + int dead_cycles; + int current_state; +} page_builder_state_info; + +extern page_builder_state_info lmt_page_builder_state; + +typedef enum page_property_states { + page_initial_state, /* we need an offset and are aligned anyway */ + page_stretch_state, + page_filstretch_state, + page_fillstretch_state, + page_filllstretch_state, + page_shrink_state, +} page_property_states; + +# define page_state_offset(c) (c - page_stretch_code + page_stretch_state) + +/*tex + + The data structure definitions here use the fact that the |height| field + appears in the fourth word of a box node. + +*/ + +extern void tex_initialize_buildpage (void); +extern void tex_initialize_pagestate (void); +extern void tex_build_page (void); +extern void tex_resume_after_output (void); +extern void tex_print_page_totals (void); + +/*tex The tail of the contribution list: */ + +# define contribute_tail lmt_nest_state.nest[0].tail + +# define page_goal lmt_page_builder_state.goal /*tex The desired height of information on page being built. */ +# define page_vsize lmt_page_builder_state.vsize +# define page_total lmt_page_builder_state.total /*tex The height of the current page. */ +# define page_depth lmt_page_builder_state.depth /*tex The depth of the current page. */ + +//# define page_stretch lmt_page_builder_state.page_so_far[page_stretch_state] +//# define page_filstretch lmt_page_builder_state.page_so_far[page_filstretch_state] +//# define page_fillstretch lmt_page_builder_state.page_so_far[page_fillstretch_state] +//# define page_filllstretch lmt_page_builder_state.page_so_far[page_filllstretch_state] +//# define page_shrink lmt_page_builder_state.page_so_far[page_shrink_state] + +# define page_stretch lmt_page_builder_state.stretch +# define page_filstretch lmt_page_builder_state.filstretch +# define page_fillstretch lmt_page_builder_state.fillstretch +# define page_filllstretch lmt_page_builder_state.filllstretch +# define page_shrink lmt_page_builder_state.shrink + +# endif diff --git a/source/luametatex/source/tex/texcommands.c b/source/luametatex/source/tex/texcommands.c new file mode 100644 index 000000000..3ac1a7b23 --- /dev/null +++ b/source/luametatex/source/tex/texcommands.c @@ -0,0 +1,1318 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + We start with a couple of \ETEX\ related comments: + + The |\showtokens| command displays a token list. The |\showifs| command displays all currently + active conditionals. + + The |\unexpanded| primitive prevents expansion of tokens much as the result from |\the| applied"-" + to a token variable. The |\detokenize| primitive converts a token list into a list of character + tokens much as if the token list were written to a file. We use the fact that the command + modifiers for |\unexpanded| and |\detokenize| are odd whereas those for |\the| and |\showthe| + are even. + + The |protected| feature of \ETEX\ defines the |\protected| prefix command for macro definitions. + Such macros are protected against expansions when lists of expanded tokens are built, e.g., for + |\edef| or during |\write|. + + The |\pagediscards| and |\splitdiscards| commands share the command code |un_vbox| with |\unvbox| + and |\unvcopy|, they are distinguished by their |chr_code| values |last_box_code| and + |vsplit_code|. These |chr_code| values are larger than |box_code| and |copy_code|. + + The |\interlinepenalties|, |\clubpenalties|, |\widowpenalties|, and |\displaywidowpenalties| + commands allow to define arrays of penalty values to be used instead of the corresponding single + values. + +*/ + +/*tex + + The symbolic names for glue parameters are put into \TEX's hash table by using the routine called + |primitive|, defined below. Let us enter them now, so that we don't have to list all those + parameter names anywhere else. + + Many of \TEX's primitives need no |equiv|, since they are identifiable by their |eq_type| alone. + These primitives are loaded into the hash table. + + The processing of |\input| involves the |start_input| subroutine, which will be declared later; + the processing of |\endinput| is trivial. + + The hash table is initialized with |\count|, |\attribute|, |\dimen|, |\skip|, and |\muskip| all + having |register| as their command code; they are distinguished by the |chr_code|, which is + either |int_val|, |attr_val|, |dimen_val|, |glue_val|, or |mu_val|. + + Because in \LUATEX\ and \LUAMETATEX\ we have more primitives, and use a lookup table, we combine + commands, for instance the |\aftergroup| and |\afterassignment| are just simple runners and + instead of the old two single cases, we now have one case that handles the four variants. This + keeps similar code close and also saves lookups. So, we have a few |cmd| less than normally in + a \TEX\ engine, but also a few more. Some have been renamed because they do more now (already + in \ETEX). + +*/ + +static void tex_aux_copy_deep_frozen_from_primitive(halfword code, const char *s) +{ + halfword p = tex_prim_lookup(tex_located_string(s)); + cs_text(code) = cs_text(p); + copy_eqtb_entry(code, p); +} + +void tex_initialize_commands(void) +{ + + if (lmt_main_state.run_state == initializing_state) { + + lmt_hash_state.no_new_cs = 0; + lmt_fileio_state.io_first = 0; + + /*tex glue */ + + tex_primitive(tex_command, "lineskip", internal_glue_cmd, line_skip_code, internal_glue_base); + tex_primitive(tex_command, "baselineskip", internal_glue_cmd, baseline_skip_code, internal_glue_base); + tex_primitive(tex_command, "parskip", internal_glue_cmd, par_skip_code, internal_glue_base); + tex_primitive(tex_command, "abovedisplayskip", internal_glue_cmd, above_display_skip_code, internal_glue_base); + tex_primitive(tex_command, "belowdisplayskip", internal_glue_cmd, below_display_skip_code, internal_glue_base); + tex_primitive(tex_command, "abovedisplayshortskip", internal_glue_cmd, above_display_short_skip_code, internal_glue_base); + tex_primitive(tex_command, "belowdisplayshortskip", internal_glue_cmd, below_display_short_skip_code, internal_glue_base); + tex_primitive(tex_command, "leftskip", internal_glue_cmd, left_skip_code, internal_glue_base); + tex_primitive(tex_command, "rightskip", internal_glue_cmd, right_skip_code, internal_glue_base); + tex_primitive(tex_command, "topskip", internal_glue_cmd, top_skip_code, internal_glue_base); + tex_primitive(tex_command, "splittopskip", internal_glue_cmd, split_top_skip_code, internal_glue_base); + tex_primitive(tex_command, "tabskip", internal_glue_cmd, tab_skip_code, internal_glue_base); + tex_primitive(tex_command, "spaceskip", internal_glue_cmd, space_skip_code, internal_glue_base); + tex_primitive(tex_command, "xspaceskip", internal_glue_cmd, xspace_skip_code, internal_glue_base); + tex_primitive(tex_command, "parfillleftskip", internal_glue_cmd, par_fill_left_skip_code, internal_glue_base); + tex_primitive(tex_command, "parfillskip", internal_glue_cmd, par_fill_right_skip_code, internal_glue_base); + tex_primitive(tex_command, "parinitleftskip", internal_glue_cmd, par_init_left_skip_code, internal_glue_base); + tex_primitive(tex_command, "parinitrightskip", internal_glue_cmd, par_init_right_skip_code, internal_glue_base); + tex_primitive(luatex_command, "mathsurroundskip", internal_glue_cmd, math_skip_code, internal_glue_base); + tex_primitive(luatex_command, "maththreshold", internal_glue_cmd, math_threshold_code, internal_glue_base); + + /*tex math glue */ + + tex_primitive(luatex_command, "pettymuskip", internal_mu_glue_cmd, petty_mu_skip_code, internal_mu_glue_base); + tex_primitive(luatex_command, "tinymuskip", internal_mu_glue_cmd, tiny_mu_skip_code, internal_mu_glue_base); + tex_primitive(tex_command, "thinmuskip", internal_mu_glue_cmd, thin_mu_skip_code, internal_mu_glue_base); + tex_primitive(tex_command, "medmuskip", internal_mu_glue_cmd, med_mu_skip_code, internal_mu_glue_base); + tex_primitive(tex_command, "thickmuskip", internal_mu_glue_cmd, thick_mu_skip_code, internal_mu_glue_base); + + /*tex tokens */ + + tex_primitive(tex_command, "output", internal_toks_cmd, output_routine_code, internal_toks_base); + tex_primitive(tex_command, "everypar", internal_toks_cmd, every_par_code, internal_toks_base); + tex_primitive(tex_command, "everymath", internal_toks_cmd, every_math_code, internal_toks_base); + tex_primitive(tex_command, "everydisplay", internal_toks_cmd, every_display_code, internal_toks_base); + tex_primitive(tex_command, "everyhbox", internal_toks_cmd, every_hbox_code, internal_toks_base); + tex_primitive(tex_command, "everyvbox", internal_toks_cmd, every_vbox_code, internal_toks_base); + tex_primitive(luatex_command, "everymathatom", internal_toks_cmd, every_math_atom_code, internal_toks_base); + tex_primitive(tex_command, "everyjob", internal_toks_cmd, every_job_code, internal_toks_base); + tex_primitive(tex_command, "everycr", internal_toks_cmd, every_cr_code, internal_toks_base); + tex_primitive(luatex_command, "everytab", internal_toks_cmd, every_tab_code, internal_toks_base); + /* tex_primitive(luatex_command, "endofpar", internal_toks_cmd, end_of_par_code, internal_toks_base); */ + tex_primitive(tex_command, "errhelp", internal_toks_cmd, error_help_code, internal_toks_base); + tex_primitive(etex_command, "everyeof", internal_toks_cmd, every_eof_code, internal_toks_base); + tex_primitive(luatex_command, "everybeforepar", internal_toks_cmd, every_before_par_code, internal_toks_base); + tex_primitive(no_command, "endofgroup", internal_toks_cmd, end_of_group_code, internal_toks_base); + + /*tex counters (we could omit the int_base here as effectively it is subtracted) */ + + tex_primitive(tex_command, "adjdemerits", internal_int_cmd, adj_demerits_code, internal_int_base); + tex_primitive(tex_command, "binoppenalty", internal_int_cmd, post_binary_penalty_code, internal_int_base); /*tex For old times sake. */ + tex_primitive(tex_command, "brokenpenalty", internal_int_cmd, broken_penalty_code, internal_int_base); + tex_primitive(tex_command, "clubpenalty", internal_int_cmd, club_penalty_code, internal_int_base); + tex_primitive(tex_command, "day", internal_int_cmd, day_code, internal_int_base); + tex_primitive(tex_command, "defaulthyphenchar", internal_int_cmd, default_hyphen_char_code, internal_int_base); + tex_primitive(tex_command, "defaultskewchar", internal_int_cmd, default_skew_char_code, internal_int_base); + tex_primitive(tex_command, "delimiterfactor", internal_int_cmd, delimiter_factor_code, internal_int_base); + tex_primitive(tex_command, "displaywidowpenalty", internal_int_cmd, display_widow_penalty_code, internal_int_base); + tex_primitive(tex_command, "doublehyphendemerits", internal_int_cmd, double_hyphen_demerits_code, internal_int_base); + tex_primitive(tex_command, "endlinechar", internal_int_cmd, end_line_char_code, internal_int_base); + tex_primitive(tex_command, "errorcontextlines", internal_int_cmd, error_context_lines_code, internal_int_base); + tex_primitive(tex_command, "escapechar", internal_int_cmd, escape_char_code, internal_int_base); + tex_primitive(tex_command, "exhyphenchar", internal_int_cmd, ex_hyphen_char_code, internal_int_base); + tex_primitive(tex_command, "exhyphenpenalty", internal_int_cmd, ex_hyphen_penalty_code, internal_int_base); + tex_primitive(tex_command, "fam", internal_int_cmd, family_code, internal_int_base); + tex_primitive(tex_command, "finalhyphendemerits", internal_int_cmd, final_hyphen_demerits_code, internal_int_base); + tex_primitive(tex_command, "floatingpenalty", internal_int_cmd, floating_penalty_code, internal_int_base); + tex_primitive(tex_command, "globaldefs", internal_int_cmd, global_defs_code, internal_int_base); + tex_primitive(tex_command, "hangafter", internal_int_cmd, hang_after_code, internal_int_base); + tex_primitive(tex_command, "hbadness", internal_int_cmd, hbadness_code, internal_int_base); + tex_primitive(tex_command, "holdinginserts", internal_int_cmd, holding_inserts_code, internal_int_base); + tex_primitive(luatex_command, "holdingmigrations", internal_int_cmd, holding_migrations_code, internal_int_base); + tex_primitive(tex_command, "hyphenpenalty", internal_int_cmd, hyphen_penalty_code, internal_int_base); + tex_primitive(tex_command, "interlinepenalty", internal_int_cmd, inter_line_penalty_code, internal_int_base); + tex_primitive(tex_command, "language", internal_int_cmd, language_code, internal_int_base); + tex_primitive(tex_command, "setlanguage", internal_int_cmd, language_code, internal_int_base); /* compatibility */ + tex_primitive(luatex_command, "setfontid", internal_int_cmd, font_code, internal_int_base); + tex_primitive(luatex_command, "hyphenationmode", internal_int_cmd, hyphenation_mode_code, internal_int_base); + tex_primitive(tex_command, "lefthyphenmin", internal_int_cmd, left_hyphen_min_code, internal_int_base); + tex_primitive(tex_command, "linepenalty", internal_int_cmd, line_penalty_code, internal_int_base); + tex_primitive(tex_command, "looseness", internal_int_cmd, looseness_code, internal_int_base); + /* tex_primitive(tex_command, "mag", internal_int_cmd, mag_code, internal_int_base); */ /* backend */ + tex_primitive(tex_command, "maxdeadcycles", internal_int_cmd, max_dead_cycles_code, internal_int_base); + tex_primitive(tex_command, "month", internal_int_cmd, month_code, internal_int_base); + tex_primitive(tex_command, "newlinechar", internal_int_cmd, new_line_char_code, internal_int_base); + tex_primitive(tex_command, "outputpenalty", internal_int_cmd, output_penalty_code, internal_int_base); + tex_primitive(tex_command, "pausing", internal_int_cmd, pausing_code, internal_int_base); + tex_primitive(tex_command, "postdisplaypenalty", internal_int_cmd, post_display_penalty_code, internal_int_base); + tex_primitive(tex_command, "predisplaypenalty", internal_int_cmd, pre_display_penalty_code, internal_int_base); + tex_primitive(luatex_command, "postinlinepenalty", internal_int_cmd, post_inline_penalty_code, internal_int_base); + tex_primitive(luatex_command, "preinlinepenalty", internal_int_cmd, pre_inline_penalty_code, internal_int_base); + tex_primitive(tex_command, "pretolerance", internal_int_cmd, pre_tolerance_code, internal_int_base); + tex_primitive(tex_command, "relpenalty", internal_int_cmd, post_relation_penalty_code, internal_int_base); /*tex For old times sake. */ + tex_primitive(tex_command, "righthyphenmin", internal_int_cmd, right_hyphen_min_code, internal_int_base); + tex_primitive(tex_command, "showboxbreadth", internal_int_cmd, show_box_breadth_code, internal_int_base); + tex_primitive(tex_command, "showboxdepth", internal_int_cmd, show_box_depth_code, internal_int_base); + tex_primitive(tex_command, "shownodedetails", internal_int_cmd, show_node_details_code, internal_int_base); + tex_primitive(tex_command, "time", internal_int_cmd, time_code, internal_int_base); + tex_primitive(tex_command, "tolerance", internal_int_cmd, tolerance_code, internal_int_base); + tex_primitive(tex_command, "tracingonline", internal_int_cmd, tracing_online_code, internal_int_base); + tex_primitive(tex_command, "tracingmacros", internal_int_cmd, tracing_macros_code, internal_int_base); + tex_primitive(tex_command, "tracingstats", internal_int_cmd, tracing_stats_code, internal_int_base); /* obsolete */ + tex_primitive(tex_command, "tracingparagraphs", internal_int_cmd, tracing_paragraphs_code, internal_int_base); + tex_primitive(tex_command, "tracingpages", internal_int_cmd, tracing_pages_code, internal_int_base); + tex_primitive(tex_command, "tracingoutput", internal_int_cmd, tracing_output_code, internal_int_base); + tex_primitive(tex_command, "tracinglostchars", internal_int_cmd, tracing_lost_chars_code, internal_int_base); + tex_primitive(tex_command, "tracingcommands", internal_int_cmd, tracing_commands_code, internal_int_base); + tex_primitive(tex_command, "tracingrestores", internal_int_cmd, tracing_restores_code, internal_int_base); + tex_primitive(luatex_command, "tracingfonts", internal_int_cmd, tracing_fonts_code, internal_int_base); + tex_primitive(etex_command, "tracingassigns", internal_int_cmd, tracing_assigns_code, internal_int_base); + tex_primitive(etex_command, "tracinggroups", internal_int_cmd, tracing_groups_code, internal_int_base); + tex_primitive(etex_command, "tracingifs", internal_int_cmd, tracing_ifs_code, internal_int_base); + tex_primitive(luatex_command, "tracingmath", internal_int_cmd, tracing_math_code, internal_int_base); + tex_primitive(luatex_command, "tracinglevels", internal_int_cmd, tracing_levels_code, internal_int_base); + tex_primitive(etex_command, "tracingnesting", internal_int_cmd, tracing_nesting_code, internal_int_base); + tex_primitive(luatex_command, "tracingalignments", internal_int_cmd, tracing_alignments_code, internal_int_base); + tex_primitive(luatex_command, "tracinginserts", internal_int_cmd, tracing_inserts_code, internal_int_base); + tex_primitive(luatex_command, "tracingmarks", internal_int_cmd, tracing_marks_code, internal_int_base); + tex_primitive(luatex_command, "tracingadjusts", internal_int_cmd, tracing_adjusts_code, internal_int_base); + tex_primitive(luatex_command, "tracinghyphenation", internal_int_cmd, tracing_hyphenation_code, internal_int_base); + tex_primitive(luatex_command, "tracingexpressions", internal_int_cmd, tracing_expressions_code, internal_int_base); + tex_primitive(luatex_command, "tracingnodes", internal_int_cmd, tracing_nodes_code, internal_int_base); + tex_primitive(luatex_command, "tracingfullboxes", internal_int_cmd, tracing_full_boxes_code, internal_int_base); + tex_primitive(luatex_command, "tracingpenalties", internal_int_cmd, tracing_penalties_code, internal_int_base); + tex_primitive(tex_command, "uchyph", internal_int_cmd, uc_hyph_code, internal_int_base); /* obsolete */ + tex_primitive(tex_command, "vbadness", internal_int_cmd, vbadness_code, internal_int_base); + tex_primitive(tex_command, "widowpenalty", internal_int_cmd, widow_penalty_code, internal_int_base); + tex_primitive(tex_command, "year", internal_int_cmd, year_code, internal_int_base); + tex_primitive(no_command, "internalparstate", internal_int_cmd, internal_par_state_code, internal_int_base); + tex_primitive(no_command, "internaldirstate", internal_int_cmd, internal_dir_state_code, internal_int_base); + tex_primitive(no_command, "internalmathstyle", internal_int_cmd, internal_math_style_code, internal_int_base); + tex_primitive(no_command, "internalmathscale", internal_int_cmd, internal_math_scale_code, internal_int_base); + tex_primitive(etex_command, "predisplaydirection", internal_int_cmd, pre_display_direction_code, internal_int_base); + tex_primitive(etex_command, "lastlinefit", internal_int_cmd, last_line_fit_code, internal_int_base); + tex_primitive(etex_command, "savingvdiscards", internal_int_cmd, saving_vdiscards_code, internal_int_base); + tex_primitive(etex_command, "savinghyphcodes", internal_int_cmd, saving_hyph_codes_code, internal_int_base); + tex_primitive(luatex_command, "adjustspacing", internal_int_cmd, adjust_spacing_code, internal_int_base); + tex_primitive(luatex_command, "adjustspacingstep", internal_int_cmd, adjust_spacing_step_code, internal_int_base); + tex_primitive(luatex_command, "adjustspacingstretch", internal_int_cmd, adjust_spacing_stretch_code, internal_int_base); + tex_primitive(luatex_command, "adjustspacingshrink", internal_int_cmd, adjust_spacing_shrink_code, internal_int_base); + tex_primitive(luatex_command, "automatichyphenpenalty", internal_int_cmd, automatic_hyphen_penalty_code, internal_int_base); + tex_primitive(luatex_command, "catcodetable", internal_int_cmd, cat_code_table_code, internal_int_base); + tex_primitive(luatex_command, "exceptionpenalty", internal_int_cmd, exception_penalty_code, internal_int_base); + tex_primitive(luatex_command, "explicithyphenpenalty", internal_int_cmd, explicit_hyphen_penalty_code, internal_int_base); + tex_primitive(luatex_command, "firstvalidlanguage", internal_int_cmd, first_valid_language_code, internal_int_base); + tex_primitive(luatex_command, "automigrationmode", internal_int_cmd, auto_migration_mode_code, internal_int_base); + tex_primitive(luatex_command, "normalizelinemode", internal_int_cmd, normalize_line_mode_code, internal_int_base); + tex_primitive(luatex_command, "normalizeparmode", internal_int_cmd, normalize_par_mode_code, internal_int_base); + tex_primitive(luatex_command, "glyphdatafield", internal_int_cmd, glyph_data_code, internal_int_base); + tex_primitive(luatex_command, "glyphstatefield", internal_int_cmd, glyph_state_code, internal_int_base); + tex_primitive(luatex_command, "glyphscriptfield", internal_int_cmd, glyph_script_code, internal_int_base); + /* tex_primitive(luatex_command, "gluedatafield", internal_int_cmd, glue_data_code, internal_int_base); */ + tex_primitive(luatex_command, "localbrokenpenalty", internal_int_cmd, local_broken_penalty_code, internal_int_base); + tex_primitive(luatex_command, "localinterlinepenalty", internal_int_cmd, local_interline_penalty_code, internal_int_base); + tex_primitive(luatex_command, "luacopyinputnodes", internal_int_cmd, copy_lua_input_nodes_code, internal_int_base); + tex_primitive(luatex_command, "mathcheckfencesmode", internal_int_cmd, math_check_fences_mode_code, internal_int_base); + /* tex_primitive(luatex_command, "mathdelimitersmode", internal_int_cmd, math_delimiters_mode_code, internal_int_base); */ + /* tex_primitive(luatex_command, "mathfencesmode", internal_int_cmd, math_fences_mode_code, internal_int_base); */ + tex_primitive(luatex_command, "mathslackmode", internal_int_cmd, math_slack_mode_code, internal_int_base); + /* tex_primitive(luatex_command, "mathflattenmode", internal_int_cmd, math_flatten_mode_code, internal_int_base); */ + tex_primitive(luatex_command, "mathpenaltiesmode", internal_int_cmd, math_penalties_mode_code, internal_int_base); + /* tex_primitive(luatex_command, "mathrulethicknessmode", internal_int_cmd, math_rule_thickness_mode_code, internal_int_base); */ + tex_primitive(luatex_command, "mathscriptsmode", internal_int_cmd, math_scripts_mode_code, internal_int_base); + /* tex_primitive(luatex_command, "mathscriptboxmode", internal_int_cmd, math_script_box_mode_code, internal_int_base); */ + /* tex_primitive(luatex_command, "mathscriptcharmode", internal_int_cmd, math_script_char_mode_code, internal_int_base); */ + tex_primitive(luatex_command, "mathsurroundmode", internal_int_cmd, math_skip_mode_code, internal_int_base); + tex_primitive(luatex_command, "mathdoublescriptmode", internal_int_cmd, math_double_script_mode_code, internal_int_base); + /* tex_primitive(luatex_command, "mathcontrolmode", internal_int_cmd, math_control_mode_code, internal_int_base); */ + tex_primitive(luatex_command, "mathfontcontrol", internal_int_cmd, math_font_control_code, internal_int_base); + tex_primitive(luatex_command, "mathdisplaymode", internal_int_cmd, math_display_mode_code, internal_int_base); + tex_primitive(luatex_command, "mathdictgroup", internal_int_cmd, math_dict_group_code, internal_int_base); + tex_primitive(luatex_command, "mathdictproperties", internal_int_cmd, math_dict_properties_code, internal_int_base); + tex_primitive(luatex_command, "nospaces", internal_int_cmd, disable_spaces_code, internal_int_base); + tex_primitive(luatex_command, "glyphoptions", internal_int_cmd, glyph_options_code, internal_int_base); + tex_primitive(luatex_command, "glyphscale", internal_int_cmd, glyph_scale_code, internal_int_base); + tex_primitive(luatex_command, "glyphtextscale", internal_int_cmd, glyph_text_scale_code, internal_int_base); + tex_primitive(luatex_command, "glyphscriptscale", internal_int_cmd, glyph_script_scale_code, internal_int_base); + tex_primitive(luatex_command, "glyphscriptscriptscale", internal_int_cmd, glyph_scriptscript_scale_code, internal_int_base); + tex_primitive(luatex_command, "glyphxscale", internal_int_cmd, glyph_x_scale_code, internal_int_base); + tex_primitive(luatex_command, "glyphyscale", internal_int_cmd, glyph_y_scale_code, internal_int_base); + tex_primitive(luatex_command, "outputbox", internal_int_cmd, output_box_code, internal_int_base); + tex_primitive(luatex_command, "prebinoppenalty", internal_int_cmd, pre_binary_penalty_code, internal_int_base); /*tex For old times sake. */ + tex_primitive(luatex_command, "predisplaygapfactor", internal_int_cmd, math_pre_display_gap_factor_code, internal_int_base); + tex_primitive(luatex_command, "prerelpenalty", internal_int_cmd, pre_relation_penalty_code, internal_int_base); /*tex For old times sake. */ + tex_primitive(luatex_command, "protrudechars", internal_int_cmd, protrude_chars_code, internal_int_base); + tex_primitive(luatex_command, "matheqnogapstep", internal_int_cmd, math_eqno_gap_step_code, internal_int_base); + tex_primitive(luatex_command, "mathdisplayskipmode", internal_int_cmd, math_display_skip_mode_code, internal_int_base); + tex_primitive(luatex_command, "mathnolimitsmode", internal_int_cmd, math_nolimits_mode_code, internal_int_base); + tex_primitive(luatex_command, "mathlimitsmode", internal_int_cmd, math_limits_mode_code, internal_int_base); + tex_primitive(luatex_command, "mathrulesmode", internal_int_cmd, math_rules_mode_code, internal_int_base); + tex_primitive(luatex_command, "mathrulesfam", internal_int_cmd, math_rules_fam_code, internal_int_base); + tex_primitive(luatex_command, "mathspacingmode", internal_int_cmd, math_spacing_mode_code, internal_int_base); /*tex Inject zero spaces, for tracing */ + tex_primitive(luatex_command, "mathgroupingmode", internal_int_cmd, math_grouping_mode_code, internal_int_base); + tex_primitive(luatex_command, "mathgluemode", internal_int_cmd, math_glue_mode_code, internal_int_base); + tex_primitive(luatex_command, "mathbeginclass", internal_int_cmd, math_begin_class_code, internal_int_base); + tex_primitive(luatex_command, "mathendclass", internal_int_cmd, math_end_class_code, internal_int_base); + tex_primitive(luatex_command, "mathleftclass", internal_int_cmd, math_left_class_code, internal_int_base); + tex_primitive(luatex_command, "mathrightclass", internal_int_cmd, math_right_class_code, internal_int_base); + tex_primitive(luatex_command, "supmarkmode", internal_int_cmd, sup_mark_mode_code, internal_int_base); + tex_primitive(luatex_command, "overloadmode", internal_int_cmd, overload_mode_code, internal_int_base); + tex_primitive(luatex_command, "autoparagraphmode", internal_int_cmd, auto_paragraph_mode_code, internal_int_base); + tex_primitive(luatex_command, "shapingpenaltiesmode", internal_int_cmd, shaping_penalties_mode_code, internal_int_base); + tex_primitive(luatex_command, "shapingpenalty", internal_int_cmd, shaping_penalty_code, internal_int_base); + tex_primitive(luatex_command, "orphanpenalty", internal_int_cmd, orphan_penalty_code, internal_int_base); + /* tex_primitive(luatex_command, "alignmentcellattr", internal_int_cmd, alignment_cell_attribute_code, internal_int_base); */ /* todo */ + tex_primitive(luatex_command, "alignmentcellsource", internal_int_cmd, alignment_cell_source_code, internal_int_base); + tex_primitive(luatex_command, "alignmentwrapsource", internal_int_cmd, alignment_wrap_source_code, internal_int_base); + /* tex_primitive(luatex_command, "pageboundarypenalty", internal_int_cmd, page_boundary_penalty_code, internal_int_base); */ + tex_primitive(luatex_command, "linebreakcriterium", internal_int_cmd, line_break_criterium_code, internal_int_base); + + /*tex dimensions */ + + tex_primitive(tex_command, "boxmaxdepth", internal_dimen_cmd, box_max_depth_code, internal_dimen_base); + tex_primitive(tex_command, "delimitershortfall", internal_dimen_cmd, delimiter_shortfall_code, internal_dimen_base); + tex_primitive(tex_command, "displayindent", internal_dimen_cmd, display_indent_code, internal_dimen_base); + tex_primitive(tex_command, "displaywidth", internal_dimen_cmd, display_width_code, internal_dimen_base); + tex_primitive(tex_command, "emergencystretch", internal_dimen_cmd, emergency_stretch_code, internal_dimen_base); + tex_primitive(tex_command, "hangindent", internal_dimen_cmd, hang_indent_code, internal_dimen_base); + tex_primitive(tex_command, "hfuzz", internal_dimen_cmd, hfuzz_code, internal_dimen_base); + /* tex_primitive(tex_command, "hoffset", internal_dimen_cmd, h_offset_code, internal_dimen_base); */ /* backend */ + tex_primitive(tex_command, "hsize", internal_dimen_cmd, hsize_code, internal_dimen_base); + tex_primitive(tex_command, "lineskiplimit", internal_dimen_cmd, line_skip_limit_code, internal_dimen_base); + tex_primitive(tex_command, "mathsurround", internal_dimen_cmd, math_surround_code, internal_dimen_base); + tex_primitive(tex_command, "maxdepth", internal_dimen_cmd, max_depth_code, internal_dimen_base); + tex_primitive(tex_command, "nulldelimiterspace", internal_dimen_cmd, null_delimiter_space_code, internal_dimen_base); + tex_primitive(tex_command, "overfullrule", internal_dimen_cmd, overfull_rule_code, internal_dimen_base); + tex_primitive(tex_command, "parindent", internal_dimen_cmd, par_indent_code, internal_dimen_base); + tex_primitive(tex_command, "predisplaysize", internal_dimen_cmd, pre_display_size_code, internal_dimen_base); + tex_primitive(tex_command, "scriptspace", internal_dimen_cmd, script_space_code, internal_dimen_base); + tex_primitive(tex_command, "splitmaxdepth", internal_dimen_cmd, split_max_depth_code, internal_dimen_base); + tex_primitive(tex_command, "vfuzz", internal_dimen_cmd, vfuzz_code, internal_dimen_base); + /* tex_primitive(tex_command, "voffset", internal_dimen_cmd, v_offset_code, internal_dimen_base); */ /* backend */ + tex_primitive(tex_command, "vsize", internal_dimen_cmd, vsize_code, internal_dimen_base); + tex_primitive(luatex_command, "glyphxoffset", internal_dimen_cmd, glyph_x_offset_code, internal_dimen_base); + tex_primitive(luatex_command, "glyphyoffset", internal_dimen_cmd, glyph_y_offset_code, internal_dimen_base); + tex_primitive(luatex_command, "pxdimen", internal_dimen_cmd, px_dimen_code, internal_dimen_base); + tex_primitive(luatex_command, "tabsize", internal_dimen_cmd, tab_size_code, internal_dimen_base); + tex_primitive(luatex_command, "pageextragoal", internal_dimen_cmd, page_extra_goal_code, internal_dimen_base); + + /*tex Probably never used with \UNICODE\ omnipresent now: */ + + tex_primitive(tex_command, "accent", accent_cmd, normal_code, 0); + + /*tex These three can go in one cmd: */ + + tex_primitive(tex_command, "advance", arithmic_cmd, advance_code, 0); + tex_primitive(tex_command, "divide", arithmic_cmd, divide_code, 0); + tex_primitive(tex_command, "multiply", arithmic_cmd, multiply_code, 0); + + /*tex We combined the after thingies into one category:*/ + + tex_primitive(tex_command, "afterassignment", after_something_cmd, after_assignment_code, 0); + tex_primitive(luatex_command, "afterassigned", after_something_cmd, after_assigned_code, 0); + tex_primitive(tex_command, "aftergroup", after_something_cmd, after_group_code, 0); + tex_primitive(luatex_command, "aftergrouped", after_something_cmd, after_grouped_code, 0); + tex_primitive(luatex_command, "atendofgroup", after_something_cmd, at_end_of_group_code, 0); + tex_primitive(luatex_command, "atendofgrouped", after_something_cmd, at_end_of_grouped_code, 0); + + tex_primitive(tex_command, "begingroup", begin_group_cmd, semi_simple_group_code, 0); + tex_primitive(luatex_command, "beginsimplegroup", begin_group_cmd, also_simple_group_code, 0); + tex_primitive(luatex_command, "beginmathgroup", begin_group_cmd, math_simple_group_code, 0); + + tex_primitive(luatex_command, "noboundary", boundary_cmd, cancel_boundary, 0); + tex_primitive(luatex_command, "boundary", boundary_cmd, user_boundary, 0); + tex_primitive(luatex_command, "protrusionboundary", boundary_cmd, protrusion_boundary, 0); + tex_primitive(luatex_command, "wordboundary", boundary_cmd, word_boundary, 0); + tex_primitive(luatex_command, "pageboundary", boundary_cmd, page_boundary, 0); + /* tex_primitive(luatex_command, "parboundary", boundary_cmd, par_boundary, 0); */ + + tex_primitive(tex_command, "penalty", penalty_cmd, normal_code, 0); + + tex_primitive(tex_command, "char", char_number_cmd, char_number_code, 0); + tex_primitive(luatex_command, "glyph", char_number_cmd, glyph_number_code, 0); + + tex_primitive(luatex_command, "etoks", combine_toks_cmd, expanded_toks_code, 0); + tex_primitive(luatex_command, "toksapp", combine_toks_cmd, append_toks_code, 0); + tex_primitive(luatex_command, "etoksapp", combine_toks_cmd, append_expanded_toks_code, 0); + tex_primitive(luatex_command, "tokspre", combine_toks_cmd, prepend_toks_code, 0); + tex_primitive(luatex_command, "etokspre", combine_toks_cmd, prepend_expanded_toks_code, 0); + tex_primitive(luatex_command, "xtoks", combine_toks_cmd, global_expanded_toks_code, 0); + tex_primitive(luatex_command, "gtoksapp", combine_toks_cmd, global_append_toks_code, 0); + tex_primitive(luatex_command, "xtoksapp", combine_toks_cmd, global_append_expanded_toks_code, 0); + tex_primitive(luatex_command, "gtokspre", combine_toks_cmd, global_prepend_toks_code, 0); + tex_primitive(luatex_command, "xtokspre", combine_toks_cmd, global_prepend_expanded_toks_code, 0); + + tex_primitive(tex_command, "csname", cs_name_cmd, cs_name_code, 0); + tex_primitive(luatex_command, "lastnamedcs", cs_name_cmd, last_named_cs_code, 0); + tex_primitive(luatex_command, "begincsname", cs_name_cmd, begin_cs_name_code, 0); + tex_primitive(luatex_command, "futurecsname", cs_name_cmd, future_cs_name_code, 0); /* Okay but rare applications (less tracing). */ + + tex_primitive(tex_command, "endcsname", end_cs_name_cmd, normal_code, 0); + + /* set_font_id could use def_font_cmd */ + + tex_primitive(tex_command, "font", define_font_cmd, normal_code, 0); + /* tex_primitive(tex_command, "nullfont", set_font_cmd, null_font, 0); */ /* See later. */ + + tex_primitive(tex_command, "delimiter", delimiter_number_cmd, math_delimiter_code, 0); + tex_primitive(luatex_command, "Udelimiter", delimiter_number_cmd, math_udelimiter_code, 0); + + /* tex_primitive(tex_command, "endgroup", end_group_cmd, normal_code, 0); */ /* See later. */ + + /*tex We don't combine these because they have different runners and mode handling. */ + + tex_primitive(tex_command, " ", explicit_space_cmd, normal_code, 0); /* These will get verbose equivalents: \explicitspace (and maybe a sfless variant too) */ + tex_primitive(tex_command, "/", italic_correction_cmd, normal_code, 0); /* These will get verbose equivalents: \italiccorrection */ + + tex_primitive(tex_command, "expandafter", expand_after_cmd, expand_after_code, 0); + tex_primitive(etex_command, "unless", expand_after_cmd, expand_unless_code, 0); + tex_primitive(luatex_command, "futureexpand", expand_after_cmd, future_expand_code, 0); + tex_primitive(luatex_command, "futureexpandis", expand_after_cmd, future_expand_is_code, 0); + tex_primitive(luatex_command, "futureexpandisap", expand_after_cmd, future_expand_is_ap_code, 0); + /* tex_primitive(luatex_command, "expandaftertwo", expand_after_cmd, expand_after_2_code, 0); */ /* Yes or no. */ + /* tex_primitive(luatex_command, "expandafterthree", expand_after_cmd, expand_after_3_code, 0); */ /* Yes or no. */ + tex_primitive(luatex_command, "expandafterspaces", expand_after_cmd, expand_after_spaces_code, 0); + tex_primitive(luatex_command, "expandafterpars", expand_after_cmd, expand_after_pars_code, 0); + tex_primitive(luatex_command, "expandtoken", expand_after_cmd, expand_token_code, 0); + tex_primitive(luatex_command, "expandcstoken", expand_after_cmd, expand_cs_token_code, 0); + tex_primitive(luatex_command, "expand", expand_after_cmd, expand_code, 0); + tex_primitive(luatex_command, "semiexpand", expand_after_cmd, semi_expand_code, 0); + tex_primitive(luatex_command, "expandedafter", expand_after_cmd, expand_after_toks_code, 0); + /* tex_primitive(luatex_command, "expandafterfi", expand_after_cmd, expand_after_fi, 0); */ + + tex_primitive(tex_command, "ignorespaces", ignore_something_cmd, ignore_space_code, 0); + tex_primitive(luatex_command, "ignorepars", ignore_something_cmd, ignore_par_code, 0); + tex_primitive(luatex_command, "ignorearguments", ignore_something_cmd, ignore_argument_code, 0); + + tex_primitive(tex_command, "input", input_cmd, normal_input_code, 0); + tex_primitive(tex_command, "endinput", input_cmd, end_of_input_code, 0); + tex_primitive(etex_command, "scantokens", input_cmd, token_input_code, 0); + tex_primitive(luatex_command, "scantextokens", input_cmd, tex_token_input_code, 0); + tex_primitive(luatex_command, "tokenized", input_cmd, tokenized_code, 0); + tex_primitive(luatex_command, "retokenized", input_cmd, retokenized_code, 0); + tex_primitive(luatex_command, "quitloop", input_cmd, quit_loop_code, 0); + + tex_primitive(tex_command, "insert", insert_cmd, normal_code, 0); + + tex_primitive(luatex_command, "luafunctioncall", lua_function_call_cmd, lua_function_call_code, 0); + tex_primitive(luatex_command, "luabytecodecall", lua_function_call_cmd, lua_bytecode_call_code, 0); + + tex_primitive(tex_command, "mark", set_mark_cmd, set_mark_code, 0); + tex_primitive(etex_command, "marks", set_mark_cmd, set_marks_code, 0); + tex_primitive(luatex_command, "clearmarks", set_mark_cmd, clear_marks_code, 0); + tex_primitive(luatex_command, "flushmarks", set_mark_cmd, flush_marks_code, 0); + + tex_primitive(tex_command, "mathaccent", math_accent_cmd, math_accent_code, 0); + tex_primitive(luatex_command, "Umathaccent", math_accent_cmd, math_uaccent_code, 0); + + tex_primitive(tex_command, "mathchar", math_char_number_cmd, math_char_number_code, 0); + tex_primitive(luatex_command, "Umathchar", math_char_number_cmd, math_xchar_number_code, 0); + tex_primitive(luatex_command, "Umathdict", math_char_number_cmd, math_dchar_number_code, 0); + /* tex_primitive(luatex_command, "Umathcharnum", math_char_number_cmd, math_uchar_number_code, 0); */ + tex_primitive(luatex_command, "Umathclass", math_char_number_cmd, math_class_number_code, 0); + + tex_primitive(tex_command, "mathchoice", math_choice_cmd, math_choice_code, 0); + tex_primitive(luatex_command, "Umathdiscretionary", math_choice_cmd, math_discretionary_code, 0); + tex_primitive(luatex_command, "Ustack", math_choice_cmd, math_ustack_code, 0); + + tex_primitive(tex_command, "noexpand", no_expand_cmd, normal_code, 0); + + /* tex_primitive(tex_command, "par", end_paragraph_cmd, too_big_char, too_big_char); */ /* See later. */ + + tex_primitive(tex_command, "radical", math_radical_cmd, normal_radical_subtype, 0); + tex_primitive(luatex_command, "Uradical", math_radical_cmd, radical_radical_subtype, 0); + tex_primitive(luatex_command, "Uroot", math_radical_cmd, root_radical_subtype, 0); + tex_primitive(luatex_command, "Urooted", math_radical_cmd, rooted_radical_subtype, 0); + tex_primitive(luatex_command, "Uunderdelimiter", math_radical_cmd, under_delimiter_radical_subtype, 0); + tex_primitive(luatex_command, "Uoverdelimiter", math_radical_cmd, over_delimiter_radical_subtype, 0); + tex_primitive(luatex_command, "Udelimiterunder", math_radical_cmd, delimiter_under_radical_subtype, 0); + tex_primitive(luatex_command, "Udelimiterover", math_radical_cmd, delimiter_over_radical_subtype, 0); + tex_primitive(luatex_command, "Udelimited", math_radical_cmd, delimited_radical_subtype, 0); + tex_primitive(luatex_command, "Uhextensible", math_radical_cmd, h_extensible_radical_subtype, 0); + + /* TEX_primitive(tex_command, "relax", relax_cmd, too_big_char, too_big_char); */ /* See later. */ + + tex_primitive(tex_command, "setbox", set_box_cmd, normal_code, 0); + + /*tex + Instead of |set_(e)tex_shape_cmd| we use |set_specification_cmd| because since \ETEX\ + it no longer relates to par shapes only. ALso, because there are nodes involved, that + themselves have a different implementation, it is less confusing. + */ + + tex_primitive(tex_command, "parshape", set_specification_cmd, par_shape_code, internal_specification_base); + tex_primitive(etex_command, "interlinepenalties", set_specification_cmd, inter_line_penalties_code, internal_specification_base); + tex_primitive(etex_command, "clubpenalties", set_specification_cmd, club_penalties_code, internal_specification_base); + tex_primitive(etex_command, "widowpenalties", set_specification_cmd, widow_penalties_code, internal_specification_base); + tex_primitive(etex_command, "displaywidowpenalties", set_specification_cmd, display_widow_penalties_code, internal_specification_base); + tex_primitive(luatex_command, "orphanpenalties", set_specification_cmd, orphan_penalties_code, internal_specification_base); + tex_primitive(luatex_command, "mathforwardpenalties", set_specification_cmd, math_forward_penalties_code, internal_specification_base); + tex_primitive(luatex_command, "mathbackwardpenalties", set_specification_cmd, math_backward_penalties_code, internal_specification_base); + + tex_primitive(tex_command, "the", the_cmd, the_code, 0); + tex_primitive(luatex_command, "thewithoutunit", the_cmd, the_without_unit_code, 0); + /* tex_primitive(luatex_command, "thewithproperty", the_cmd, the_with_property_code, 0); */ /* replaced by value functions */ + tex_primitive(etex_command, "unexpanded", the_cmd, unexpanded_code, 0); /* maybe convert_cmd */ + tex_primitive(etex_command, "detokenize", the_cmd, detokenize_code, 0); /* maybe convert_cmd */ + + tex_primitive(tex_command, "botmark", get_mark_cmd, bot_mark_code, 0); /* \botmarks 0 */ + tex_primitive(tex_command, "firstmark", get_mark_cmd, first_mark_code, 0); /* \firstmarks 0 */ + tex_primitive(tex_command, "splitbotmark", get_mark_cmd, split_bot_mark_code, 0); /* \splitbotmarks 0 */ + tex_primitive(tex_command, "splitfirstmark", get_mark_cmd, split_first_mark_code, 0); /* \splitfirstmarks 0 */ + tex_primitive(tex_command, "topmark", get_mark_cmd, top_mark_code, 0); /* \topmarks 0 */ + tex_primitive(etex_command, "botmarks", get_mark_cmd, bot_marks_code, 0); + tex_primitive(etex_command, "firstmarks", get_mark_cmd, first_marks_code, 0); + tex_primitive(etex_command, "splitbotmarks", get_mark_cmd, split_bot_marks_code, 0); + tex_primitive(etex_command, "splitfirstmarks", get_mark_cmd, split_first_marks_code, 0); + tex_primitive(etex_command, "topmarks", get_mark_cmd, top_marks_code, 0); + tex_primitive(luatex_command, "currentmarks", get_mark_cmd, current_marks_code, 0); + + tex_primitive(tex_command, "vadjust", vadjust_cmd, normal_code, 0); + + tex_primitive(tex_command, "halign", halign_cmd, normal_code, 0); + tex_primitive(tex_command, "valign", valign_cmd, normal_code, 0); + + tex_primitive(tex_command, "vcenter", vcenter_cmd, normal_code, 0); + + /* todo rule codes of nodes, so empty will move */ + + tex_primitive(tex_command, "vrule", vrule_cmd, normal_rule_code, 0); + tex_primitive(luatex_command, "novrule", vrule_cmd, empty_rule_code, 0); + tex_primitive(luatex_command, "srule", vrule_cmd, strut_rule_code, 0); + + tex_primitive(tex_command, "hrule", hrule_cmd, normal_rule_code, 0); + tex_primitive(luatex_command, "nohrule", hrule_cmd, empty_rule_code, 0); + + tex_primitive(tex_command, "count", register_cmd, int_val_level, 0); + tex_primitive(luatex_command, "attribute", register_cmd, attr_val_level, 0); + tex_primitive(tex_command, "dimen", register_cmd, dimen_val_level, 0); + tex_primitive(tex_command, "skip", register_cmd, glue_val_level, 0); + tex_primitive(tex_command, "muskip", register_cmd, mu_val_level, 0); + tex_primitive(tex_command, "toks", register_cmd, tok_val_level, 0); + + tex_primitive(tex_command, "spacefactor", set_auxiliary_cmd, space_factor_code, 0); + tex_primitive(tex_command, "prevdepth", set_auxiliary_cmd, prev_depth_code, 0); + tex_primitive(tex_command, "prevgraf", set_auxiliary_cmd, prev_graf_code, 0); + tex_primitive(etex_command, "interactionmode", set_auxiliary_cmd, interaction_mode_code, 0); + tex_primitive(luatex_command, "insertmode", set_auxiliary_cmd, insert_mode_code, 0); + + tex_primitive(tex_command, "pagegoal", set_page_property_cmd, page_goal_code, 0); + tex_primitive(tex_command, "pagetotal", set_page_property_cmd, page_total_code, 0); + tex_primitive(tex_command, "pagestretch", set_page_property_cmd, page_stretch_code, 0); + tex_primitive(tex_command, "pagefilstretch", set_page_property_cmd, page_filstretch_code, 0); + tex_primitive(tex_command, "pagefillstretch", set_page_property_cmd, page_fillstretch_code, 0); + tex_primitive(tex_command, "pagefilllstretch", set_page_property_cmd, page_filllstretch_code, 0); + tex_primitive(tex_command, "pageshrink", set_page_property_cmd, page_shrink_code, 0); + tex_primitive(tex_command, "pagedepth", set_page_property_cmd, page_depth_code, 0); + tex_primitive(luatex_command, "pagevsize", set_page_property_cmd, page_vsize_code, 0); + + tex_primitive(tex_command, "deadcycles", set_page_property_cmd, dead_cycles_code, 0); + + tex_primitive(tex_command, "insertpenalties", set_page_property_cmd, insert_penalties_code, 0); + tex_primitive(luatex_command, "insertheights", set_page_property_cmd, insert_heights_code, 0); + tex_primitive(luatex_command, "insertstoring", set_page_property_cmd, insert_storing_code, 0); + + tex_primitive(luatex_command, "insertdistance", set_page_property_cmd, insert_distance_code, 0); + tex_primitive(luatex_command, "insertmultiplier", set_page_property_cmd, insert_multiplier_code, 0); + tex_primitive(luatex_command, "insertlimit", set_page_property_cmd, insert_limit_code, 0); + tex_primitive(luatex_command, "insertstorage", set_page_property_cmd, insert_storage_code, 0); + tex_primitive(luatex_command, "insertpenalty", set_page_property_cmd, insert_penalty_code, 0); + tex_primitive(luatex_command, "insertmaxdepth", set_page_property_cmd, insert_maxdepth_code, 0); + tex_primitive(luatex_command, "insertheight", set_page_property_cmd, insert_height_code, 0); + tex_primitive(luatex_command, "insertdepth", set_page_property_cmd, insert_depth_code, 0); + tex_primitive(luatex_command, "insertwidth", set_page_property_cmd, insert_width_code, 0); + + tex_primitive(tex_command, "wd", set_box_property_cmd, box_width_code, 0); + tex_primitive(tex_command, "ht", set_box_property_cmd, box_height_code, 0); + tex_primitive(tex_command, "dp", set_box_property_cmd, box_depth_code, 0); + tex_primitive(luatex_command, "boxdirection", set_box_property_cmd, box_direction_code, 0); + tex_primitive(luatex_command, "boxgeometry", set_box_property_cmd, box_geometry_code, 0); + tex_primitive(luatex_command, "boxorientation", set_box_property_cmd, box_orientation_code, 0); + tex_primitive(luatex_command, "boxanchor", set_box_property_cmd, box_anchor_code, 0); + tex_primitive(luatex_command, "boxanchors", set_box_property_cmd, box_anchors_code, 0); + tex_primitive(luatex_command, "boxsource", set_box_property_cmd, box_source_code, 0); + tex_primitive(luatex_command, "boxtarget", set_box_property_cmd, box_target_code, 0); + tex_primitive(luatex_command, "boxxoffset", set_box_property_cmd, box_xoffset_code, 0); + tex_primitive(luatex_command, "boxyoffset", set_box_property_cmd, box_yoffset_code, 0); + tex_primitive(luatex_command, "boxxmove", set_box_property_cmd, box_xmove_code, 0); + tex_primitive(luatex_command, "boxymove", set_box_property_cmd, box_ymove_code, 0); + tex_primitive(luatex_command, "boxtotal", set_box_property_cmd, box_total_code, 0); + tex_primitive(luatex_command, "boxshift", set_box_property_cmd, box_shift_code, 0); + tex_primitive(luatex_command, "boxadapt", set_box_property_cmd, box_adapt_code, 0); + tex_primitive(luatex_command, "boxrepack", set_box_property_cmd, box_repack_code, 0); + tex_primitive(luatex_command, "boxfreeze", set_box_property_cmd, box_freeze_code, 0); + tex_primitive(luatex_command, "boxattribute", set_box_property_cmd, box_attribute_code, 0); + + tex_primitive(tex_command, "lastpenalty", some_item_cmd, lastpenalty_code, 0); + tex_primitive(tex_command, "lastkern", some_item_cmd, lastkern_code, 0); + tex_primitive(tex_command, "lastskip", some_item_cmd, lastskip_code, 0); + tex_primitive(luatex_command, "lastboundary", some_item_cmd, lastboundary_code, 0); + tex_primitive(etex_command, "lastnodetype", some_item_cmd, last_node_type_code, 0); + tex_primitive(luatex_command, "lastnodesubtype", some_item_cmd, last_node_subtype_code, 0); + tex_primitive(tex_command, "inputlineno", some_item_cmd, input_line_no_code, 0); + tex_primitive(tex_command, "badness", some_item_cmd, badness_code, 0); + tex_primitive(luatex_command, "overshoot", some_item_cmd, overshoot_code, 0); + tex_primitive(luatex_command, "luatexversion", some_item_cmd, luatex_version_code, 0); + tex_primitive(luatex_command, "luatexrevision", some_item_cmd, luatex_revision_code, 0); + tex_primitive(etex_command, "currentgrouplevel", some_item_cmd, current_group_level_code, 0); + tex_primitive(etex_command, "currentgrouptype", some_item_cmd, current_group_type_code, 0); + tex_primitive(etex_command, "currentiflevel", some_item_cmd, current_if_level_code, 0); + tex_primitive(etex_command, "currentiftype", some_item_cmd, current_if_type_code, 0); + tex_primitive(etex_command, "currentifbranch", some_item_cmd, current_if_branch_code, 0); + tex_primitive(etex_command, "gluestretchorder", some_item_cmd, glue_stretch_order_code, 0); + tex_primitive(etex_command, "glueshrinkorder", some_item_cmd, glue_shrink_order_code, 0); + tex_primitive(luatex_command, "fontid", some_item_cmd, font_id_code, 0); + tex_primitive(luatex_command, "glyphxscaled", some_item_cmd, glyph_x_scaled_code, 0); + tex_primitive(luatex_command, "glyphyscaled", some_item_cmd, glyph_y_scaled_code, 0); + tex_primitive(etex_command, "fontcharwd", some_item_cmd, font_char_wd_code, 0); + tex_primitive(etex_command, "fontcharht", some_item_cmd, font_char_ht_code, 0); + tex_primitive(etex_command, "fontchardp", some_item_cmd, font_char_dp_code, 0); + tex_primitive(etex_command, "fontcharic", some_item_cmd, font_char_ic_code, 0); + tex_primitive(luatex_command, "fontcharta", some_item_cmd, font_char_ta_code, 0); + tex_primitive(luatex_command, "fontspecid", some_item_cmd, font_spec_id_code, 0); + tex_primitive(luatex_command, "fontspecscale", some_item_cmd, font_spec_scale_code, 0); + tex_primitive(luatex_command, "fontspecxscale", some_item_cmd, font_spec_xscale_code, 0); + tex_primitive(luatex_command, "fontspecyscale", some_item_cmd, font_spec_yscale_code, 0); + tex_primitive(luatex_command, "fontspecifiedsize", some_item_cmd, font_size_code, 0); + tex_primitive(luatex_command, "fontmathcontrol", some_item_cmd, font_math_control_code, 0); + tex_primitive(luatex_command, "fonttextcontrol", some_item_cmd, font_text_control_code, 0); + tex_primitive(luatex_command, "mathscale", some_item_cmd, math_scale_code, 0); + tex_primitive(luatex_command, "mathstyle", some_item_cmd, math_style_code, 0); + tex_primitive(luatex_command, "mathmainstyle", some_item_cmd, math_main_style_code, 0); + tex_primitive(luatex_command, "mathstylefontid", some_item_cmd, math_style_font_id_code, 0); + tex_primitive(luatex_command, "mathstackstyle", some_item_cmd, math_stack_style_code, 0); + tex_primitive(luatex_command, "Umathcharclass", some_item_cmd, math_char_class_code, 0); + tex_primitive(luatex_command, "Umathcharfam", some_item_cmd, math_char_fam_code, 0); + tex_primitive(luatex_command, "Umathcharslot", some_item_cmd, math_char_slot_code, 0); + tex_primitive(luatex_command, "lastarguments", some_item_cmd, last_arguments_code, 0); + tex_primitive(luatex_command, "parametercount", some_item_cmd, parameter_count_code, 0); + /* tex_primitive(luatex_command, "luavaluefunction", some_item_cmd, lua_value_function_code, 0); */ + tex_primitive(luatex_command, "insertprogress", some_item_cmd, insert_progress_code, 0); + tex_primitive(luatex_command, "leftmarginkern", some_item_cmd, left_margin_kern_code, 0); + tex_primitive(luatex_command, "rightmarginkern", some_item_cmd, right_margin_kern_code, 0); + tex_primitive(etex_command, "parshapelength", some_item_cmd, par_shape_length_code, 0); + tex_primitive(etex_command, "parshapeindent", some_item_cmd, par_shape_indent_code, 0); + tex_primitive(etex_command, "parshapedimen", some_item_cmd, par_shape_dimen_code, 0); + tex_primitive(etex_command, "gluestretch", some_item_cmd, glue_stretch_code, 0); + tex_primitive(etex_command, "glueshrink", some_item_cmd, glue_shrink_code, 0); + tex_primitive(etex_command, "mutoglue", some_item_cmd, mu_to_glue_code, 0); + tex_primitive(etex_command, "gluetomu", some_item_cmd, glue_to_mu_code, 0); + tex_primitive(etex_command, "numexpr", some_item_cmd, numexpr_code, 0); + tex_primitive(etex_command, "dimexpr", some_item_cmd, dimexpr_code, 0); + tex_primitive(etex_command, "glueexpr", some_item_cmd, glueexpr_code, 0); + tex_primitive(etex_command, "muexpr", some_item_cmd, muexpr_code, 0); + tex_primitive(luatex_command, "numexpression", some_item_cmd, numexpression_code, 0); /* experiment */ + tex_primitive(luatex_command, "dimexpression", some_item_cmd, dimexpression_code, 0); /* experiment */ + // tex_primitive(luatex_command, "dimentoscale", some_item_cmd, dimen_to_scale_code, 0); + tex_primitive(luatex_command, "lastchknum", some_item_cmd, last_chk_num_code, 0); + tex_primitive(luatex_command, "lastchkdim", some_item_cmd, last_chk_dim_code, 0); + tex_primitive(luatex_command, "numericscale", some_item_cmd, numeric_scale_code, 0); + tex_primitive(luatex_command, "indexofregister", some_item_cmd, index_of_register_code, 0); + tex_primitive(luatex_command, "indexofcharacter", some_item_cmd, index_of_character_code, 0); + tex_primitive(luatex_command, "currentloopiterator", some_item_cmd, current_loop_iterator_code, 0); + tex_primitive(luatex_command, "currentloopnesting", some_item_cmd, current_loop_nesting_code, 0); + tex_primitive(luatex_command, "lastloopiterator", some_item_cmd, last_loop_iterator_code, 0); + tex_primitive(luatex_command, "lastparcontext", some_item_cmd, last_par_context_code, 0); + tex_primitive(luatex_command, "lastpageextra", some_item_cmd, last_page_extra_code, 0); + tex_primitive(luatex_command, "scaledslantperpoint", some_item_cmd, scaled_slant_per_point_code, 0); + tex_primitive(luatex_command, "scaledinterwordspace", some_item_cmd, scaled_interword_space_code, 0); + tex_primitive(luatex_command, "scaledinterwordstretch", some_item_cmd, scaled_interword_stretch_code, 0); + tex_primitive(luatex_command, "scaledinterwordshrink", some_item_cmd, scaled_interword_shrink_code, 0); + tex_primitive(luatex_command, "scaledexheight", some_item_cmd, scaled_ex_height_code, 0); + tex_primitive(luatex_command, "scaledemwidth", some_item_cmd, scaled_em_width_code, 0); + tex_primitive(luatex_command, "scaledextraspace", some_item_cmd, scaled_extra_space_code, 0); + tex_primitive(luatex_command, "mathatomglue", some_item_cmd, math_atom_glue_code, 0); + tex_primitive(luatex_command, "lastleftclass", some_item_cmd, last_left_class_code, 0); + tex_primitive(luatex_command, "lastrightclass", some_item_cmd, last_right_class_code, 0); + tex_primitive(luatex_command, "lastatomclass", some_item_cmd, last_atom_class_code, 0); + + tex_primitive(tex_command, "fontname", convert_cmd, font_name_code, 0); + tex_primitive(luatex_command, "fontspecifiedname", convert_cmd, font_specification_code, 0); + tex_primitive(tex_command, "jobname", convert_cmd, job_name_code, 0); + tex_primitive(tex_command, "meaning", convert_cmd, meaning_code, 0); + tex_primitive(luatex_command, "meaningfull", convert_cmd, meaning_full_code, 0); + tex_primitive(luatex_command, "meaningless", convert_cmd, meaning_less_code, 0); + tex_primitive(luatex_command, "meaningasis", convert_cmd, meaning_asis_code, 0); /* for manuals and articles */ + /*tex Maybe some day also |meaningonly| (no macro: in front). */ + tex_primitive(tex_command, "number", convert_cmd, number_code, 0); + tex_primitive(luatex_command, "tointeger", convert_cmd, to_integer_code, 0); + tex_primitive(luatex_command, "tohexadecimal", convert_cmd, to_hexadecimal_code, 0); + tex_primitive(luatex_command, "toscaled", convert_cmd, to_scaled_code, 0); + tex_primitive(luatex_command, "tosparsescaled", convert_cmd, to_sparse_scaled_code, 0); + tex_primitive(luatex_command, "todimension", convert_cmd, to_dimension_code, 0); + tex_primitive(luatex_command, "tosparsedimension", convert_cmd, to_sparse_dimension_code, 0); + tex_primitive(luatex_command, "tomathstyle", convert_cmd, to_mathstyle_code, 0); + tex_primitive(tex_command, "romannumeral", convert_cmd, roman_numeral_code, 0); + tex_primitive(tex_command, "string", convert_cmd, string_code, 0); + tex_primitive(luatex_command, "directlua", convert_cmd, lua_code, 0); + tex_primitive(luatex_command, "csstring", convert_cmd, cs_string_code, 0); + tex_primitive(luatex_command, "detokenized", convert_cmd, detokenized_code, 0); + tex_primitive(luatex_command, "expanded", convert_cmd, expanded_code, 0); + tex_primitive(luatex_command, "semiexpanded", convert_cmd, semi_expanded_code, 0); + tex_primitive(luatex_command, "formatname", convert_cmd, format_name_code, 0); + tex_primitive(luatex_command, "luabytecode", convert_cmd, lua_bytecode_code, 0); + tex_primitive(luatex_command, "luaescapestring", convert_cmd, lua_escape_string_code, 0); + tex_primitive(luatex_command, "luafunction", convert_cmd, lua_function_code, 0); + tex_primitive(luatex_command, "luatexbanner", convert_cmd, luatex_banner_code, 0); + tex_primitive(luatex_command, "Uchar", convert_cmd, uchar_code, 0); + + /* tex_primitive(tex_command, "fi", if_test_cmd, fi_code, 0); */ /* See later. */ + tex_primitive(tex_command, "or", if_test_cmd, or_code, 0); + tex_primitive(tex_command, "else", if_test_cmd, else_code, 0); + tex_primitive(luatex_command, "orelse", if_test_cmd, or_else_code, 0); + tex_primitive(luatex_command, "orunless", if_test_cmd, or_unless_code, 0); + + tex_primitive(tex_command, "if", if_test_cmd, if_char_code, 0); + tex_primitive(tex_command, "ifcat", if_test_cmd, if_cat_code, 0); + tex_primitive(tex_command, "ifnum", if_test_cmd, if_int_code, 0); + tex_primitive(tex_command, "ifdim", if_test_cmd, if_dim_code, 0); + tex_primitive(tex_command, "ifodd", if_test_cmd, if_odd_code, 0); + tex_primitive(tex_command, "ifvmode", if_test_cmd, if_vmode_code, 0); + tex_primitive(tex_command, "ifhmode", if_test_cmd, if_hmode_code, 0); + tex_primitive(tex_command, "ifmmode", if_test_cmd, if_mmode_code, 0); + tex_primitive(tex_command, "ifinner", if_test_cmd, if_inner_code, 0); + tex_primitive(tex_command, "ifvoid", if_test_cmd, if_void_code, 0); + tex_primitive(tex_command, "ifhbox", if_test_cmd, if_hbox_code, 0); + tex_primitive(tex_command, "ifvbox", if_test_cmd, if_vbox_code, 0); + tex_primitive(tex_command, "ifx", if_test_cmd, if_x_code, 0); + tex_primitive(tex_command, "iftrue", if_test_cmd, if_true_code, 0); + tex_primitive(tex_command, "iffalse", if_test_cmd, if_false_code, 0); + tex_primitive(tex_command, "ifcase", if_test_cmd, if_case_code, 0); + tex_primitive(etex_command, "ifdefined", if_test_cmd, if_def_code, 0); + tex_primitive(etex_command, "ifcsname", if_test_cmd, if_cs_code, 0); + tex_primitive(etex_command, "iffontchar", if_test_cmd, if_font_char_code, 0); + tex_primitive(luatex_command, "ifincsname", if_test_cmd, if_in_csname_code, 0); /* This is obsolete and might be dropped. */ + tex_primitive(luatex_command, "ifabsnum", if_test_cmd, if_abs_int_code, 0); + tex_primitive(luatex_command, "ifabsdim", if_test_cmd, if_abs_dim_code, 0); + tex_primitive(luatex_command, "ifchknum", if_test_cmd, if_chk_int_code, 0); + tex_primitive(luatex_command, "ifchkdim", if_test_cmd, if_chk_dim_code, 0); + tex_primitive(luatex_command, "ifcmpnum", if_test_cmd, if_cmp_int_code, 0); + tex_primitive(luatex_command, "ifcmpdim", if_test_cmd, if_cmp_dim_code, 0); + tex_primitive(luatex_command, "ifnumval", if_test_cmd, if_val_int_code, 0); + tex_primitive(luatex_command, "ifdimval", if_test_cmd, if_val_dim_code, 0); + tex_primitive(luatex_command, "iftok", if_test_cmd, if_tok_code, 0); + tex_primitive(luatex_command, "ifcstok", if_test_cmd, if_cstok_code, 0); + tex_primitive(luatex_command, "ifcondition", if_test_cmd, if_condition_code, 0); + tex_primitive(luatex_command, "ifflags", if_test_cmd, if_flags_code, 0); + tex_primitive(luatex_command, "ifempty", if_test_cmd, if_empty_cmd_code, 0); + tex_primitive(luatex_command, "ifrelax", if_test_cmd, if_relax_cmd_code, 0); + tex_primitive(luatex_command, "ifboolean", if_test_cmd, if_boolean_code, 0); + tex_primitive(luatex_command, "ifnumexpression", if_test_cmd, if_numexpression_code, 0); + tex_primitive(luatex_command, "ifdimexpression", if_test_cmd, if_dimexpression_code, 0); + tex_primitive(luatex_command, "ifmathparameter", if_test_cmd, if_math_parameter_code, 0); + tex_primitive(luatex_command, "ifmathstyle", if_test_cmd, if_math_style_code, 0); + tex_primitive(luatex_command, "ifarguments", if_test_cmd, if_arguments_code, 0); + tex_primitive(luatex_command, "ifparameters", if_test_cmd, if_parameters_code, 0); + tex_primitive(luatex_command, "ifparameter", if_test_cmd, if_parameter_code, 0); + tex_primitive(luatex_command, "ifhastok", if_test_cmd, if_has_tok_code, 0); + tex_primitive(luatex_command, "ifhastoks", if_test_cmd, if_has_toks_code, 0); + tex_primitive(luatex_command, "ifhasxtoks", if_test_cmd, if_has_xtoks_code, 0); + tex_primitive(luatex_command, "ifhaschar", if_test_cmd, if_has_char_code, 0); + tex_primitive(luatex_command, "ifinsert", if_test_cmd, if_insert_code, 0); + /* tex_primitive(luatex_command, "ifbitwiseand", if_test_cmd, if_bitwise_and_code, 0); */ + + tex_primitive(tex_command, "above", math_fraction_cmd, math_above_code, 0); + tex_primitive(tex_command, "abovewithdelims", math_fraction_cmd, math_above_delimited_code, 0); + tex_primitive(tex_command, "atop", math_fraction_cmd, math_atop_code, 0); + tex_primitive(tex_command, "atopwithdelims", math_fraction_cmd, math_atop_delimited_code, 0); + tex_primitive(tex_command, "over", math_fraction_cmd, math_over_code, 0); + tex_primitive(tex_command, "overwithdelims", math_fraction_cmd, math_over_delimited_code, 0); + /* tex_primitive(luatex_command, "skewed", math_fraction_cmd, math_skewed_code, 0); */ /* makes no sense */ + /* tex_primitive(luatex_command, "skewedwithdelims", math_fraction_cmd, math_skewed_delimited_code, 0); */ /* makes no sense */ + /* tex_primitive(luatex_command, "stretched", math_fraction_cmd, math_stretched_code, 0); */ /* makes no sense */ + /* tex_primitive(luatex_command, "stretchedwithdelims", math_fraction_cmd, math_stretched_delimited_code, 0); */ /* makes no sense */ + + tex_primitive(luatex_command, "Uabove", math_fraction_cmd, math_u_above_code, 0); + tex_primitive(luatex_command, "Uabovewithdelims", math_fraction_cmd, math_u_above_delimited_code, 0); + tex_primitive(luatex_command, "Uatop", math_fraction_cmd, math_u_atop_code, 0); + tex_primitive(luatex_command, "Uatopwithdelims", math_fraction_cmd, math_u_atop_delimited_code, 0); + tex_primitive(luatex_command, "Uover", math_fraction_cmd, math_u_over_code, 0); + tex_primitive(luatex_command, "Uoverwithdelims", math_fraction_cmd, math_u_over_delimited_code, 0); + tex_primitive(luatex_command, "Uskewed", math_fraction_cmd, math_u_skewed_code, 0); + tex_primitive(luatex_command, "Uskewedwithdelims", math_fraction_cmd, math_u_skewed_delimited_code, 0); + tex_primitive(luatex_command, "Ustretched", math_fraction_cmd, math_u_stretched_code, 0); + tex_primitive(luatex_command, "Ustretchedwithdelims", math_fraction_cmd, math_u_stretched_delimited_code, 0); + + tex_primitive(tex_command, "hyphenchar", set_font_property_cmd, font_hyphen_code, 0); + tex_primitive(tex_command, "skewchar", set_font_property_cmd, font_skew_code, 0); + tex_primitive(luatex_command, "efcode", set_font_property_cmd, font_ef_code, 0); + tex_primitive(luatex_command, "lpcode", set_font_property_cmd, font_lp_code, 0); + tex_primitive(luatex_command, "rpcode", set_font_property_cmd, font_rp_code, 0); + tex_primitive(tex_command, "fontdimen", set_font_property_cmd, font_dimen_code, 0); + tex_primitive(luatex_command, "scaledfontdimen", set_font_property_cmd, scaled_font_dimen_code, 0); + + tex_primitive(tex_command, "lowercase", case_shift_cmd, lower_case_code, 0); + tex_primitive(tex_command, "uppercase", case_shift_cmd, upper_case_code, 0); + + tex_primitive(tex_command, "catcode", define_char_code_cmd, catcode_charcode, 0); + tex_primitive(tex_command, "lccode", define_char_code_cmd, lccode_charcode, 0); + tex_primitive(tex_command, "uccode", define_char_code_cmd, uccode_charcode, 0); + tex_primitive(tex_command, "sfcode", define_char_code_cmd, sfcode_charcode, 0); + tex_primitive(luatex_command, "hccode", define_char_code_cmd, hccode_charcode, 0); + tex_primitive(luatex_command, "hmcode", define_char_code_cmd, hmcode_charcode, 0); + tex_primitive(tex_command, "mathcode", define_char_code_cmd, mathcode_charcode, 0); + tex_primitive(tex_command, "delcode", define_char_code_cmd, delcode_charcode, 0); + + tex_primitive(luatex_command, "Umathcode", define_char_code_cmd, extmathcode_charcode, 0); + /* tex_primitive(luatex_command, "Umathcodenum", define_char_code_cmd, extmathcodenum_charcode, 0); */ + tex_primitive(luatex_command, "Udelcode", define_char_code_cmd, extdelcode_charcode, 0); + /* tex_primitive(luatex_command, "Udelcodenum", define_char_code_cmd, extdelcodenum_charcode, 0); */ + + tex_primitive(tex_command, "edef", def_cmd, expanded_def_code, 0); + tex_primitive(tex_command, "def", def_cmd, def_code, 0); + tex_primitive(tex_command, "xdef", def_cmd, global_expanded_def_code, 0); + tex_primitive(tex_command, "gdef", def_cmd, global_def_code, 0); + tex_primitive(luatex_command, "edefcsname", def_cmd, expanded_def_csname_code, 0); + tex_primitive(luatex_command, "defcsname", def_cmd, def_csname_code, 0); + tex_primitive(luatex_command, "xdefcsname", def_cmd, global_expanded_def_csname_code, 0); + tex_primitive(luatex_command, "gdefcsname", def_cmd, global_def_csname_code, 0); + + tex_primitive(tex_command, "scriptfont", define_family_cmd, script_size, 0); + tex_primitive(tex_command, "scriptscriptfont", define_family_cmd, script_script_size, 0); + tex_primitive(tex_command, "textfont", define_family_cmd, text_size, 0); + + tex_primitive(tex_command, "discretionary", discretionary_cmd, normal_discretionary_code, 0); + tex_primitive(tex_command, "-", discretionary_cmd, explicit_discretionary_code, 0); + tex_primitive(luatex_command, "explicitdiscretionary", discretionary_cmd, explicit_discretionary_code, 0); + tex_primitive(luatex_command, "automaticdiscretionary", discretionary_cmd, automatic_discretionary_code, 0); + + tex_primitive(tex_command, "leqno", equation_number_cmd, left_location_code, 0); + tex_primitive(tex_command, "eqno", equation_number_cmd, right_location_code, 0); + + tex_primitive(tex_command, "moveright", hmove_cmd, move_forward_code, 0); + tex_primitive(tex_command, "moveleft", hmove_cmd, move_backward_code, 0); + + tex_primitive(tex_command, "hfil", hskip_cmd, fil_code, 0); + tex_primitive(tex_command, "hfill", hskip_cmd, fill_code, 0); + tex_primitive(tex_command, "hss", hskip_cmd, filll_code, 0); + tex_primitive(tex_command, "hfilneg", hskip_cmd, fil_neg_code, 0); + tex_primitive(tex_command, "hskip", hskip_cmd, skip_code, 0); + + tex_primitive(tex_command, "hyphenation", hyphenation_cmd, hyphenation_code, 0); + tex_primitive(tex_command, "patterns", hyphenation_cmd, patterns_code, 0); + tex_primitive(luatex_command, "prehyphenchar", hyphenation_cmd, prehyphenchar_code, 0); + tex_primitive(luatex_command, "posthyphenchar", hyphenation_cmd, posthyphenchar_code, 0); + tex_primitive(luatex_command, "preexhyphenchar", hyphenation_cmd, preexhyphenchar_code, 0); + tex_primitive(luatex_command, "postexhyphenchar", hyphenation_cmd, postexhyphenchar_code, 0); + tex_primitive(luatex_command, "hyphenationmin", hyphenation_cmd, hyphenationmin_code, 0); + tex_primitive(luatex_command, "hjcode", hyphenation_cmd, hjcode_code, 0); + + tex_primitive(tex_command, "kern", kern_cmd, normal_kern_code, 0); + /* tex_primitive(tex_command, "hkern", kern_cmd, h_kern_code, 0); */ + /* tex_primitive(tex_command, "vkern", kern_cmd, v_kern_code, 0); */ + /* tex_primitive(tex_command, "nonzerowidthkern", kern_cmd, non_zero_width_kern_code, 0); */ /* maybe */ + + tex_primitive(luatex_command, "localleftbox", local_box_cmd, local_left_box_code, 0); + tex_primitive(luatex_command, "localrightbox", local_box_cmd, local_right_box_code, 0); + tex_primitive(luatex_command, "localmiddlebox", local_box_cmd, local_middle_box_code, 0); + + tex_primitive(tex_command, "shipout", legacy_cmd, shipout_code, 0); + + tex_primitive(tex_command, "leaders", leader_cmd, a_leaders_code, 0); + tex_primitive(tex_command, "cleaders", leader_cmd, c_leaders_code, 0); + tex_primitive(tex_command, "xleaders", leader_cmd, x_leaders_code, 0); + tex_primitive(luatex_command, "gleaders", leader_cmd, g_leaders_code, 0); + tex_primitive(luatex_command, "uleaders", leader_cmd, u_leaders_code, 0); + + tex_primitive(tex_command, "left", math_fence_cmd, left_fence_side, 0); + tex_primitive(tex_command, "middle", math_fence_cmd, middle_fence_side, 0); + tex_primitive(tex_command, "right", math_fence_cmd, right_fence_side, 0); + tex_primitive(luatex_command, "Uvextensible", math_fence_cmd, no_fence_side, 0); + tex_primitive(luatex_command, "Uleft", math_fence_cmd, extended_left_fence_side, 0); + tex_primitive(luatex_command, "Umiddle", math_fence_cmd, extended_middle_fence_side, 0); + tex_primitive(luatex_command, "Uright", math_fence_cmd, extended_right_fence_side, 0); + tex_primitive(luatex_command, "Uoperator", math_fence_cmd, left_operator_side, 0); + + tex_primitive(luatex_command, "glet", let_cmd, global_let_code, 0); + tex_primitive(tex_command, "let", let_cmd, let_code, 0); + tex_primitive(tex_command, "futurelet", let_cmd, future_let_code, 0); + tex_primitive(luatex_command, "futuredef", let_cmd, future_def_code, 0); + tex_primitive(luatex_command, "letcharcode", let_cmd, let_charcode_code, 0); + tex_primitive(luatex_command, "swapcsvalues", let_cmd, swap_cs_values_code, 0); + tex_primitive(luatex_command, "letprotected", let_cmd, let_protected_code, 0); + tex_primitive(luatex_command, "unletprotected", let_cmd, unlet_protected_code, 0); + tex_primitive(luatex_command, "letfrozen", let_cmd, let_frozen_code, 0); + tex_primitive(luatex_command, "unletfrozen", let_cmd, unlet_frozen_code, 0); + tex_primitive(luatex_command, "letcsname", let_cmd, let_csname_code, 0); + tex_primitive(luatex_command, "gletcsname", let_cmd, global_let_csname_code, 0); + tex_primitive(luatex_command, "lettonothing", let_cmd, let_to_nothing_code, 0); /* more a def but a let is nicer */ + tex_primitive(luatex_command, "glettonothing", let_cmd, global_let_to_nothing_code, 0); /* more a def but a let is nicer */ + + tex_primitive(tex_command, "displaylimits", math_modifier_cmd, display_limits_modifier_code, 0); /*tex so |math_limits_cmd| became |math_modifier_cmd| */ + tex_primitive(tex_command, "limits", math_modifier_cmd, limits_modifier_code, 0); + tex_primitive(tex_command, "nolimits", math_modifier_cmd, no_limits_modifier_code, 0); + + /* beware, Umathaxis is overloaded ... maybe only a generic modifier with keywords */ + + tex_primitive(luatex_command, "Umathadapttoleft", math_modifier_cmd, adapt_to_left_modifier_code, 0); + tex_primitive(luatex_command, "Umathadapttoright", math_modifier_cmd, adapt_to_right_modifier_code, 0); + tex_primitive(luatex_command, "Umathuseaxis", math_modifier_cmd, axis_modifier_code, 0); + tex_primitive(luatex_command, "Umathnoaxis", math_modifier_cmd, no_axis_modifier_code, 0); + tex_primitive(luatex_command, "Umathphantom", math_modifier_cmd, phantom_modifier_code, 0); + tex_primitive(luatex_command, "Umathvoid", math_modifier_cmd, void_modifier_code, 0); + tex_primitive(luatex_command, "Umathsource", math_modifier_cmd, source_modifier_code, 0); + tex_primitive(luatex_command, "Umathopenupheight", math_modifier_cmd, openup_height_modifier_code, 0); + tex_primitive(luatex_command, "Umathopenupdepth", math_modifier_cmd, openup_depth_modifier_code, 0); + tex_primitive(luatex_command, "Umathlimits", math_modifier_cmd, limits_modifier_code, 0); + tex_primitive(luatex_command, "Umathnolimits", math_modifier_cmd, no_limits_modifier_code, 0); + + tex_primitive(tex_command, "box", make_box_cmd, box_code, 0); + tex_primitive(tex_command, "copy", make_box_cmd, copy_code, 0); + tex_primitive(tex_command, "lastbox", make_box_cmd, last_box_code, 0); + tex_primitive(tex_command, "vsplit", make_box_cmd, vsplit_code, 0); + tex_primitive(luatex_command, "tpack", make_box_cmd, tpack_code, 0); + tex_primitive(luatex_command, "vpack", make_box_cmd, vpack_code, 0); + tex_primitive(luatex_command, "hpack", make_box_cmd, hpack_code, 0); + tex_primitive(tex_command, "vtop", make_box_cmd, vtop_code, 0); + tex_primitive(tex_command, "vbox", make_box_cmd, vbox_code, 0); + tex_primitive(tex_command, "hbox", make_box_cmd, hbox_code, 0); + tex_primitive(luatex_command, "insertbox", make_box_cmd, insert_box_code, 0); + tex_primitive(luatex_command, "insertcopy", make_box_cmd, insert_copy_code, 0); + tex_primitive(luatex_command, "localleftboxbox", make_box_cmd, local_left_box_box_code, 0); + tex_primitive(luatex_command, "localrightboxbox", make_box_cmd, local_right_box_box_code, 0); + tex_primitive(luatex_command, "localmiddleboxbox", make_box_cmd, local_middle_box_box_code, 0); + + tex_primitive(tex_command, "mathord", math_component_cmd, math_component_ordinary_code, 0); + tex_primitive(tex_command, "mathop", math_component_cmd, math_component_operator_code, 0); + tex_primitive(tex_command, "mathbin", math_component_cmd, math_component_binary_code, 0); + tex_primitive(tex_command, "mathrel", math_component_cmd, math_component_relation_code, 0); + tex_primitive(tex_command, "mathopen", math_component_cmd, math_component_open_code, 0); + tex_primitive(tex_command, "mathclose", math_component_cmd, math_component_close_code, 0); + tex_primitive(tex_command, "mathpunct", math_component_cmd, math_component_punctuation_code, 0); + tex_primitive(tex_command, "mathinner", math_component_cmd, math_component_inner_code, 0); + tex_primitive(luatex_command, "mathfrac", math_component_cmd, math_component_fraction_code, 0); + tex_primitive(luatex_command, "mathrad", math_component_cmd, math_component_radical_code, 0); + tex_primitive(luatex_command, "mathmiddle", math_component_cmd, math_component_middle_code, 0); + tex_primitive(luatex_command, "mathaccent", math_component_cmd, math_component_accent_code, 0); + tex_primitive(luatex_command, "mathfenced", math_component_cmd, math_component_fenced_code, 0); + tex_primitive(tex_command, "underline", math_component_cmd, math_component_under_code, 0); + tex_primitive(tex_command, "overline", math_component_cmd, math_component_over_code, 0); + tex_primitive(luatex_command, "mathghost", math_component_cmd, math_component_ghost_code, 0); + tex_primitive(luatex_command, "mathatom", math_component_cmd, math_component_atom_code, 0); + + tex_primitive(luatex_command, "Ustartmath", math_shift_cs_cmd, begin_inline_math_code, 0); + tex_primitive(luatex_command, "Ustopmath", math_shift_cs_cmd, end_inline_math_code, 0); + tex_primitive(luatex_command, "Ustartdisplaymath", math_shift_cs_cmd, begin_display_math_code, 0); + tex_primitive(luatex_command, "Ustopdisplaymath", math_shift_cs_cmd, end_display_math_code, 0); + tex_primitive(luatex_command, "Ustartmathmode", math_shift_cs_cmd, begin_math_mode_code, 0); + tex_primitive(luatex_command, "Ustopmathmode", math_shift_cs_cmd, end_math_mode_code, 0); + + tex_primitive(tex_command, "displaystyle", math_style_cmd, display_style, 0); + tex_primitive(tex_command, "textstyle", math_style_cmd, text_style, 0); + tex_primitive(tex_command, "scriptstyle", math_style_cmd, script_style, 0); + tex_primitive(tex_command, "scriptscriptstyle", math_style_cmd, script_script_style, 0); + tex_primitive(luatex_command, "crampeddisplaystyle", math_style_cmd, cramped_display_style, 0); + tex_primitive(luatex_command, "crampedtextstyle", math_style_cmd, cramped_text_style, 0); + tex_primitive(luatex_command, "crampedscriptstyle", math_style_cmd, cramped_script_style, 0); + tex_primitive(luatex_command, "crampedscriptscriptstyle", math_style_cmd, cramped_script_script_style, 0); + tex_primitive(luatex_command, "Ustyle", math_style_cmd, yet_unset_math_style, 0); + tex_primitive(luatex_command, "scaledmathstyle", math_style_cmd, scaled_math_style, 0); + tex_primitive(luatex_command, "alldisplaystyles", math_style_cmd, all_display_styles, 0); + tex_primitive(luatex_command, "alltextstyles", math_style_cmd, all_text_styles, 0); + tex_primitive(luatex_command, "allscriptstyles", math_style_cmd, all_script_styles, 0); + tex_primitive(luatex_command, "allscriptscriptstyles", math_style_cmd, all_script_script_styles, 0); + tex_primitive(luatex_command, "allmathstyles", math_style_cmd, all_math_styles, 0); + tex_primitive(luatex_command, "allsplitstyles", math_style_cmd, all_split_styles, 0); + tex_primitive(luatex_command, "alluncrampedstyles", math_style_cmd, all_uncramped_styles, 0); + tex_primitive(luatex_command, "allcrampedstyles", math_style_cmd, all_cramped_styles, 0); + + tex_primitive(tex_command, "message", message_cmd, message_code, 0); + tex_primitive(tex_command, "errmessage", message_cmd, error_message_code, 0); + + tex_primitive(tex_command, "mkern", mkern_cmd, normal_code, 0); + + tex_primitive(tex_command, "mskip", mskip_cmd, normal_mskip_code, 0); + tex_primitive(luatex_command, "mathatomskip", mskip_cmd, atom_mskip_code, 0); + + /*tex + We keep |\long| and |\outer| as dummies, while |\protected| is promoted to a real cmd + and |\frozen| can provide a mild form of protection against overloads. We still intercept + the commands. + */ + + tex_primitive(luatex_command, "frozen", prefix_cmd, frozen_code, 0); + tex_primitive(luatex_command, "permanent", prefix_cmd, permanent_code, 0); + tex_primitive(luatex_command, "immutable", prefix_cmd, immutable_code, 0); + tex_primitive(luatex_command, "mutable", prefix_cmd, mutable_code, 0); + /* tex_primitive(luatex_command, "primitive", prefix_cmd, primitive_code, 0); */ + tex_primitive(luatex_command, "noaligned", prefix_cmd, noaligned_code, 0); + tex_primitive(luatex_command, "instance", prefix_cmd, instance_code, 0); + tex_primitive(luatex_command, "untraced", prefix_cmd, untraced_code, 0); + tex_primitive(tex_command, "global", prefix_cmd, global_code, 0); + tex_primitive(luatex_command, "tolerant", prefix_cmd, tolerant_code, 0); + tex_primitive(etex_command, "protected", prefix_cmd, protected_code, 0); + tex_primitive(luatex_command, "overloaded", prefix_cmd, overloaded_code, 0); + tex_primitive(luatex_command, "aliased", prefix_cmd, aliased_code, 0); + tex_primitive(luatex_command, "immediate", prefix_cmd, immediate_code, 0); + tex_primitive(luatex_command, "semiprotected", prefix_cmd, semiprotected_code, 0); + tex_primitive(luatex_command, "enforced", prefix_cmd, enforced_code, 0); + tex_primitive(luatex_command, "inherited", prefix_cmd, inherited_code, 0); + + tex_primitive(tex_command, "long", prefix_cmd, long_code, 0); + tex_primitive(tex_command, "outer", prefix_cmd, outer_code, 0); + + tex_primitive(tex_command, "unkern", remove_item_cmd, kern_item_code, 0); + tex_primitive(tex_command, "unpenalty", remove_item_cmd, penalty_item_code, 0); + tex_primitive(tex_command, "unskip", remove_item_cmd, skip_item_code, 0); + tex_primitive(tex_command, "unboundary", remove_item_cmd, boundary_item_code, 0); + + tex_primitive(tex_command, "batchmode", set_interaction_cmd, batch_mode, 0); + tex_primitive(tex_command, "errorstopmode", set_interaction_cmd, error_stop_mode, 0); + tex_primitive(tex_command, "nonstopmode", set_interaction_cmd, nonstop_mode, 0); + tex_primitive(tex_command, "scrollmode", set_interaction_cmd, scroll_mode, 0); + + tex_primitive(tex_command, "chardef", shorthand_def_cmd, char_def_code, 0); + tex_primitive(tex_command, "countdef", shorthand_def_cmd, count_def_code, 0); + tex_primitive(tex_command, "dimendef", shorthand_def_cmd, dimen_def_code, 0); + tex_primitive(tex_command, "mathchardef", shorthand_def_cmd, math_char_def_code, 0); + tex_primitive(tex_command, "muskipdef", shorthand_def_cmd, mu_skip_def_code, 0); + tex_primitive(tex_command, "skipdef", shorthand_def_cmd, skip_def_code, 0); + tex_primitive(tex_command, "toksdef", shorthand_def_cmd, toks_def_code, 0); + /* tex_primitive(tex_command, "stringdef", shorthand_def_cmd, string_def_code, 0); */ + tex_primitive(luatex_command, "Umathchardef", shorthand_def_cmd, math_xchar_def_code, 0); + tex_primitive(luatex_command, "Umathdictdef", shorthand_def_cmd, math_dchar_def_code, 0); + /* tex_primitive(luatex_command, "Umathcharnumdef", shorthand_def_cmd, math_uchar_def_code, 0); */ + tex_primitive(luatex_command, "attributedef", shorthand_def_cmd, attribute_def_code, 0); + tex_primitive(luatex_command, "luadef", shorthand_def_cmd, lua_def_code, 0); + tex_primitive(luatex_command, "integerdef", shorthand_def_cmd, integer_def_code, 0); + tex_primitive(luatex_command, "dimensiondef", shorthand_def_cmd, dimension_def_code, 0); + tex_primitive(luatex_command, "gluespecdef", shorthand_def_cmd, gluespec_def_code, 0); + tex_primitive(luatex_command, "mugluespecdef", shorthand_def_cmd, mugluespec_def_code, 0); + /* tex_primitive(luatex_command, "mathspecdef", shorthand_def_cmd, mathspec_def_code, 0); */ + tex_primitive(luatex_command, "fontspecdef", shorthand_def_cmd, fontspec_def_code, 0); + + tex_primitive(tex_command, "noindent", begin_paragraph_cmd, noindent_par_code, 0); + tex_primitive(tex_command, "indent", begin_paragraph_cmd, indent_par_code, 0); + tex_primitive(luatex_command, "quitvmode", begin_paragraph_cmd, quitvmode_par_code, 0); + tex_primitive(luatex_command, "undent", begin_paragraph_cmd, undent_par_code, 0); + tex_primitive(luatex_command, "snapshotpar", begin_paragraph_cmd, snapshot_par_code, 0); + tex_primitive(luatex_command, "parattribute", begin_paragraph_cmd, attribute_par_code, 0); + tex_primitive(luatex_command, "wrapuppar", begin_paragraph_cmd, wrapup_par_code, 0); + + tex_primitive(tex_command, "end", end_job_cmd, end_code, 0); + tex_primitive(tex_command, "dump", end_job_cmd, dump_code, 0); + + tex_primitive(luatex_command, "beginlocalcontrol", begin_local_cmd, local_control_begin_code, 0); + tex_primitive(luatex_command, "localcontrol", begin_local_cmd, local_control_token_code, 0); + tex_primitive(luatex_command, "localcontrolled", begin_local_cmd, local_control_list_code, 0); + tex_primitive(luatex_command, "localcontrolledloop", begin_local_cmd, local_control_loop_code, 0); + tex_primitive(luatex_command, "expandedloop", begin_local_cmd, expanded_loop_code, 0); + tex_primitive(luatex_command, "unexpandedloop", begin_local_cmd, unexpanded_loop_code, 0); + + tex_primitive(luatex_command, "endlocalcontrol", end_local_cmd, normal_code, 0); + + tex_primitive(tex_command, "unhbox", un_hbox_cmd, box_code, 0); + tex_primitive(tex_command, "unhcopy", un_hbox_cmd, copy_code, 0); + tex_primitive(luatex_command, "unhpack", un_hbox_cmd, unpack_code, 0); + tex_primitive(tex_command, "unvbox", un_vbox_cmd, box_code, 0); + tex_primitive(tex_command, "unvcopy", un_vbox_cmd, copy_code, 0); + tex_primitive(luatex_command, "unvpack", un_vbox_cmd, unpack_code, 0); + + tex_primitive(etex_command, "pagediscards", un_vbox_cmd, last_box_code, 0); + tex_primitive(etex_command, "splitdiscards", un_vbox_cmd, vsplit_code, 0); + + tex_primitive(luatex_command, "insertunbox", un_vbox_cmd, insert_box_code, 0); + tex_primitive(luatex_command, "insertuncopy", un_vbox_cmd, insert_copy_code, 0); + + tex_primitive(tex_command, "raise", vmove_cmd, move_backward_code, 0); + tex_primitive(tex_command, "lower", vmove_cmd, move_forward_code, 0); + + tex_primitive(tex_command, "vfil", vskip_cmd, fil_code, 0); + tex_primitive(tex_command, "vfill", vskip_cmd, fill_code, 0); + tex_primitive(tex_command, "vfilneg", vskip_cmd, fil_neg_code, 0); + tex_primitive(tex_command, "vskip", vskip_cmd, skip_code, 0); + tex_primitive(tex_command, "vss", vskip_cmd, filll_code, 0); + + tex_primitive(tex_command, "show", xray_cmd, show_code, 0); + tex_primitive(tex_command, "showbox", xray_cmd, show_box_code, 0); + tex_primitive(tex_command, "showthe", xray_cmd, show_the_code, 0); + tex_primitive(tex_command, "showlists", xray_cmd, show_lists_code, 0); + tex_primitive(etex_command, "showgroups", xray_cmd, show_groups_code, 0); + tex_primitive(etex_command, "showtokens", xray_cmd, show_tokens_code, 0); + tex_primitive(etex_command, "showifs", xray_cmd, show_ifs_code, 0); + + tex_primitive(luatex_command, "savecatcodetable", catcode_table_cmd, save_cat_code_table_code, 0); + tex_primitive(luatex_command, "initcatcodetable", catcode_table_cmd, init_cat_code_table_code, 0); + /* tex_primitive(luatex_command, "setcatcodetabledefault", catcode_table_cmd, dflt_cat_code_table_code, 0); */ /* This was an experiment. */ + + tex_primitive(luatex_command, "pardirection", internal_int_cmd, par_direction_code, internal_int_base); + tex_primitive(luatex_command, "textdirection", internal_int_cmd, text_direction_code, internal_int_base); + tex_primitive(luatex_command, "mathdirection", internal_int_cmd, math_direction_code, internal_int_base); + tex_primitive(luatex_command, "linedirection", internal_int_cmd, line_direction_code, internal_int_base); + + tex_primitive(luatex_command, "alignmark", parameter_cmd, normal_code, 0); + tex_primitive(luatex_command, "parametermark", parameter_cmd, normal_code, 0); /* proper primitive for syntax highlighting */ + + tex_primitive(luatex_command, "aligntab", alignment_tab_cmd, tab_mark_code, 0); + + tex_primitive(tex_command, "span", alignment_cmd, span_code, 0); + tex_primitive(tex_command, "omit", alignment_cmd, omit_code, 0); + tex_primitive(tex_command, "noalign", alignment_cmd, no_align_code, 0); + tex_primitive(luatex_command, "aligncontent", alignment_cmd, align_content_code, 0); + /* tex_primitive(tex_command, "cr", alignment_cmd, cr_code, 0); */ + /* tex_primitive(tex_command, "crcr", alignment_cmd, cr_cr_code, 0); */ + + tex_primitive(tex_command, "nonscript", math_script_cmd, math_no_script_code, 0); + tex_primitive(luatex_command, "noatomruling", math_script_cmd, math_no_ruling_code, 0); + tex_primitive(luatex_command, "Usuperscript", math_script_cmd, math_super_script_code, 0); + tex_primitive(luatex_command, "Usubscript", math_script_cmd, math_sub_script_code, 0); + tex_primitive(luatex_command, "Usuperprescript", math_script_cmd, math_super_pre_script_code, 0); + tex_primitive(luatex_command, "Usubprescript", math_script_cmd, math_sub_pre_script_code, 0); + tex_primitive(luatex_command, "Unosuperscript", math_script_cmd, math_no_super_script_code, 0); + tex_primitive(luatex_command, "Unosubscript", math_script_cmd, math_no_sub_script_code, 0); + tex_primitive(luatex_command, "Unosuperprescript", math_script_cmd, math_no_super_pre_script_code, 0); + tex_primitive(luatex_command, "Unosubprescript", math_script_cmd, math_no_sub_pre_script_code, 0); + tex_primitive(luatex_command, "Ushiftedsubscript", math_script_cmd, math_shifted_sub_script_code, 0); + tex_primitive(luatex_command, "Ushiftedsuperscript", math_script_cmd, math_shifted_super_script_code, 0); + tex_primitive(luatex_command, "Ushiftedsubprescript", math_script_cmd, math_shifted_sub_pre_script_code, 0); + tex_primitive(luatex_command, "Ushiftedsuperprescript", math_script_cmd, math_shifted_super_pre_script_code, 0); + tex_primitive(luatex_command, "Uprimescript", math_script_cmd, math_prime_script_code, 0); + + /* tex_primitive(luatex_command, "Umathbinbinspacing", set_math_parameter_cmd, math_parameter_binary_binary_spacing, 0); */ /* Gone, as are more of these! */ + + tex_primitive(luatex_command, "Umathaxis", set_math_parameter_cmd, math_parameter_axis, 0); + tex_primitive(luatex_command, "Umathaccentbaseheight", set_math_parameter_cmd, math_parameter_accent_base_height, 0); + tex_primitive(luatex_command, "Umathaccentbasedepth", set_math_parameter_cmd, math_parameter_accent_base_depth, 0); + tex_primitive(luatex_command, "Umathflattenedaccentbaseheight", set_math_parameter_cmd, math_parameter_flattened_accent_base_height, 0); + tex_primitive(luatex_command, "Umathflattenedaccentbasedepth", set_math_parameter_cmd, math_parameter_flattened_accent_base_depth, 0); + tex_primitive(luatex_command, "Umathconnectoroverlapmin", set_math_parameter_cmd, math_parameter_connector_overlap_min, 0); + tex_primitive(luatex_command, "Umathfractiondelsize", set_math_parameter_cmd, math_parameter_fraction_del_size, 0); + tex_primitive(luatex_command, "Umathfractiondenomdown", set_math_parameter_cmd, math_parameter_fraction_denom_down, 0); + tex_primitive(luatex_command, "Umathfractiondenomvgap", set_math_parameter_cmd, math_parameter_fraction_denom_vgap, 0); + tex_primitive(luatex_command, "Umathfractionnumup", set_math_parameter_cmd, math_parameter_fraction_num_up, 0); + tex_primitive(luatex_command, "Umathfractionnumvgap", set_math_parameter_cmd, math_parameter_fraction_num_vgap, 0); + tex_primitive(luatex_command, "Umathfractionrule", set_math_parameter_cmd, math_parameter_fraction_rule, 0); + tex_primitive(luatex_command, "Umathlimitabovebgap", set_math_parameter_cmd, math_parameter_limit_above_bgap, 0); + tex_primitive(luatex_command, "Umathlimitabovekern", set_math_parameter_cmd, math_parameter_limit_above_kern, 0); + tex_primitive(luatex_command, "Umathlimitabovevgap", set_math_parameter_cmd, math_parameter_limit_above_vgap, 0); + tex_primitive(luatex_command, "Umathlimitbelowbgap", set_math_parameter_cmd, math_parameter_limit_below_bgap, 0); + tex_primitive(luatex_command, "Umathlimitbelowkern", set_math_parameter_cmd, math_parameter_limit_below_kern, 0); + tex_primitive(luatex_command, "Umathlimitbelowvgap", set_math_parameter_cmd, math_parameter_limit_below_vgap, 0); + tex_primitive(luatex_command, "Umathnolimitsubfactor", set_math_parameter_cmd, math_parameter_nolimit_sub_factor, 0); /* These are bonus parameters. */ + tex_primitive(luatex_command, "Umathnolimitsupfactor", set_math_parameter_cmd, math_parameter_nolimit_sup_factor, 0); /* These are bonus parameters. */ + tex_primitive(luatex_command, "Umathoperatorsize", set_math_parameter_cmd, math_parameter_operator_size, 0); + tex_primitive(luatex_command, "Umathoverbarkern", set_math_parameter_cmd, math_parameter_overbar_kern, 0); + tex_primitive(luatex_command, "Umathoverbarrule", set_math_parameter_cmd, math_parameter_overbar_rule, 0); + tex_primitive(luatex_command, "Umathoverbarvgap", set_math_parameter_cmd, math_parameter_overbar_vgap, 0); + tex_primitive(luatex_command, "Umathoverdelimiterbgap", set_math_parameter_cmd, math_parameter_over_delimiter_bgap, 0); + tex_primitive(luatex_command, "Umathoverdelimitervgap", set_math_parameter_cmd, math_parameter_over_delimiter_vgap, 0); + tex_primitive(luatex_command, "Umathquad", set_math_parameter_cmd, math_parameter_quad, 0); + tex_primitive(luatex_command, "Umathradicaldegreeafter", set_math_parameter_cmd, math_parameter_radical_degree_after, 0); + tex_primitive(luatex_command, "Umathradicaldegreebefore", set_math_parameter_cmd, math_parameter_radical_degree_before, 0); + tex_primitive(luatex_command, "Umathradicaldegreeraise", set_math_parameter_cmd, math_parameter_radical_degree_raise, 0); + tex_primitive(luatex_command, "Umathradicalextensibleafter", set_math_parameter_cmd, math_parameter_radical_extensible_after, 0); + tex_primitive(luatex_command, "Umathradicalextensiblebefore", set_math_parameter_cmd, math_parameter_radical_extensible_before, 0); + tex_primitive(luatex_command, "Umathradicalkern", set_math_parameter_cmd, math_parameter_radical_kern, 0); + tex_primitive(luatex_command, "Umathradicalrule", set_math_parameter_cmd, math_parameter_radical_rule, 0); + tex_primitive(luatex_command, "Umathradicalvgap", set_math_parameter_cmd, math_parameter_radical_vgap, 0); + tex_primitive(luatex_command, "Umathskewedfractionhgap", set_math_parameter_cmd, math_parameter_skewed_fraction_hgap, 0); + tex_primitive(luatex_command, "Umathskewedfractionvgap", set_math_parameter_cmd, math_parameter_skewed_fraction_vgap, 0); + tex_primitive(luatex_command, "Umathspacebeforescript", set_math_parameter_cmd, math_parameter_space_before_script, 0); + tex_primitive(luatex_command, "Umathspaceafterscript", set_math_parameter_cmd, math_parameter_space_after_script, 0); + tex_primitive(luatex_command, "Umathstackdenomdown", set_math_parameter_cmd, math_parameter_stack_denom_down, 0); + tex_primitive(luatex_command, "Umathstacknumup", set_math_parameter_cmd, math_parameter_stack_num_up, 0); + tex_primitive(luatex_command, "Umathstackvgap", set_math_parameter_cmd, math_parameter_stack_vgap, 0); + tex_primitive(luatex_command, "Umathsubshiftdown", set_math_parameter_cmd, math_parameter_subscript_shift_down, 0); + tex_primitive(luatex_command, "Umathsubshiftdrop", set_math_parameter_cmd, math_parameter_subscript_shift_drop, 0); + tex_primitive(luatex_command, "Umathsubsupshiftdown", set_math_parameter_cmd, math_parameter_subscript_superscript_shift_down, 0); + tex_primitive(luatex_command, "Umathsubsupvgap", set_math_parameter_cmd, math_parameter_subscript_superscript_vgap, 0); + tex_primitive(luatex_command, "Umathsubtopmax", set_math_parameter_cmd, math_parameter_subscript_top_max, 0); + tex_primitive(luatex_command, "Umathsupbottommin", set_math_parameter_cmd, math_parameter_superscript_bottom_min, 0); + tex_primitive(luatex_command, "Umathsupshiftdrop", set_math_parameter_cmd, math_parameter_superscript_shift_drop, 0); + tex_primitive(luatex_command, "Umathsupshiftup", set_math_parameter_cmd, math_parameter_superscript_shift_up, 0); + tex_primitive(luatex_command, "Umathsupsubbottommax", set_math_parameter_cmd, math_parameter_superscript_subscript_bottom_max, 0); + tex_primitive(luatex_command, "Umathunderbarkern", set_math_parameter_cmd, math_parameter_underbar_kern, 0); + tex_primitive(luatex_command, "Umathunderbarrule", set_math_parameter_cmd, math_parameter_underbar_rule, 0); + tex_primitive(luatex_command, "Umathunderbarvgap", set_math_parameter_cmd, math_parameter_underbar_vgap, 0); + tex_primitive(luatex_command, "Umathunderdelimiterbgap", set_math_parameter_cmd, math_parameter_under_delimiter_bgap, 0); + tex_primitive(luatex_command, "Umathunderdelimitervgap", set_math_parameter_cmd, math_parameter_under_delimiter_vgap, 0); + tex_primitive(luatex_command, "Umathxscale", set_math_parameter_cmd, math_parameter_x_scale, 0); + tex_primitive(luatex_command, "Umathyscale", set_math_parameter_cmd, math_parameter_y_scale, 0); + /* */ + tex_primitive(luatex_command, "Umathextrasupshift", set_math_parameter_cmd, math_parameter_extra_superscript_shift, 0); + tex_primitive(luatex_command, "Umathextrasubshift", set_math_parameter_cmd, math_parameter_extra_subscript_shift, 0); + tex_primitive(luatex_command, "Umathextrasuppreshift", set_math_parameter_cmd, math_parameter_extra_superprescript_shift, 0); + tex_primitive(luatex_command, "Umathextrasubpreshift", set_math_parameter_cmd, math_parameter_extra_subprescript_shift, 0); + /* */ + tex_primitive(luatex_command, "Umathprimeraise", set_math_parameter_cmd, math_parameter_prime_raise, 0); + tex_primitive(luatex_command, "Umathprimeraisecomposed", set_math_parameter_cmd, math_parameter_prime_raise_composed, 0); + tex_primitive(luatex_command, "Umathprimeshiftup", set_math_parameter_cmd, math_parameter_prime_shift_up, 0); + tex_primitive(luatex_command, "Umathprimeshiftdrop", set_math_parameter_cmd, math_parameter_prime_shift_drop, 0); + tex_primitive(luatex_command, "Umathprimespaceafter", set_math_parameter_cmd, math_parameter_prime_space_after, 0); + tex_primitive(luatex_command, "Umathprimewidth", set_math_parameter_cmd, math_parameter_prime_width, 0); + /* */ + tex_primitive(luatex_command, "Umathruleheight", set_math_parameter_cmd, math_parameter_rule_height, 0); + tex_primitive(luatex_command, "Umathruledepth", set_math_parameter_cmd, math_parameter_rule_depth, 0); + /* */ + tex_primitive(luatex_command, "Umathsupshiftdistance", set_math_parameter_cmd, math_parameter_superscript_shift_distance, 0); + tex_primitive(luatex_command, "Umathsubshiftdistance", set_math_parameter_cmd, math_parameter_subscript_shift_distance, 0); + tex_primitive(luatex_command, "Umathpresupshiftdistance", set_math_parameter_cmd, math_parameter_superprescript_shift_distance, 0); + tex_primitive(luatex_command, "Umathpresubshiftdistance", set_math_parameter_cmd, math_parameter_subprescript_shift_distance, 0); + /* */ + tex_primitive(luatex_command, "Umathextrasupspace", set_math_parameter_cmd, math_parameter_extra_superscript_space, 0); + tex_primitive(luatex_command, "Umathextrasubspace", set_math_parameter_cmd, math_parameter_extra_subscript_space, 0); + tex_primitive(luatex_command, "Umathextrasupprespace", set_math_parameter_cmd, math_parameter_extra_superprescript_space, 0); + tex_primitive(luatex_command, "Umathextrasubprespace", set_math_parameter_cmd, math_parameter_extra_subprescript_space, 0); + /* */ + tex_primitive(luatex_command, "Umathskeweddelimitertolerance", set_math_parameter_cmd, math_parameter_skewed_delimiter_tolerance, 0); + /* */ + tex_primitive(luatex_command, "Umathaccenttopshiftup", set_math_parameter_cmd, math_parameter_accent_top_shift_up, 0); + tex_primitive(luatex_command, "Umathaccentbottomshiftdown", set_math_parameter_cmd, math_parameter_accent_bottom_shift_down, 0); + tex_primitive(luatex_command, "Umathflattenedaccenttopshiftup", set_math_parameter_cmd, math_parameter_flattened_accent_top_shift_up, 0); + tex_primitive(luatex_command, "Umathflattenedaccentbottomshiftdown", set_math_parameter_cmd, math_parameter_flattened_accent_bottom_shift_down, 0); + tex_primitive(luatex_command, "Umathaccenttopovershoot", set_math_parameter_cmd, math_parameter_accent_top_overshoot, 0); + tex_primitive(luatex_command, "Umathaccentbottomovershoot", set_math_parameter_cmd, math_parameter_accent_bottom_overshoot, 0); + tex_primitive(luatex_command, "Umathaccentsuperscriptdrop", set_math_parameter_cmd, math_parameter_accent_superscript_drop, 0); + tex_primitive(luatex_command, "Umathaccentsuperscriptpercent", set_math_parameter_cmd, math_parameter_accent_superscript_percent, 0); + tex_primitive(luatex_command, "Umathaccentextendmargin", set_math_parameter_cmd, math_parameter_accent_extend_margin, 0); + /* */ + tex_primitive(luatex_command, "Umathdelimiterpercent", set_math_parameter_cmd, math_parameter_delimiter_percent, 0); + tex_primitive(luatex_command, "Umathdelimitershortfall", set_math_parameter_cmd, math_parameter_delimiter_shortfall, 0); + /* */ + tex_primitive(luatex_command, "Umathoverlinevariant", set_math_parameter_cmd, math_parameter_over_line_variant, 0); + tex_primitive(luatex_command, "Umathunderlinevariant", set_math_parameter_cmd, math_parameter_under_line_variant, 0); + tex_primitive(luatex_command, "Umathoverdelimitervariant", set_math_parameter_cmd, math_parameter_over_delimiter_variant, 0); + tex_primitive(luatex_command, "Umathunderdelimitervariant", set_math_parameter_cmd, math_parameter_under_delimiter_variant, 0); + tex_primitive(luatex_command, "Umathdelimiterovervariant", set_math_parameter_cmd, math_parameter_delimiter_over_variant, 0); + tex_primitive(luatex_command, "Umathdelimiterundervariant", set_math_parameter_cmd, math_parameter_delimiter_under_variant, 0); + tex_primitive(luatex_command, "Umathhextensiblevariant", set_math_parameter_cmd, math_parameter_h_extensible_variant, 0); + tex_primitive(luatex_command, "Umathvextensiblevariant", set_math_parameter_cmd, math_parameter_v_extensible_variant, 0); + tex_primitive(luatex_command, "Umathfractionvariant", set_math_parameter_cmd, math_parameter_fraction_variant, 0); + tex_primitive(luatex_command, "Umathradicalvariant", set_math_parameter_cmd, math_parameter_radical_variant, 0); + tex_primitive(luatex_command, "Umathdegreevariant", set_math_parameter_cmd, math_parameter_accent_variant, 0); + tex_primitive(luatex_command, "Umathaccentvariant", set_math_parameter_cmd, math_parameter_degree_variant, 0); + tex_primitive(luatex_command, "Umathtopaccentvariant", set_math_parameter_cmd, math_parameter_top_accent_variant, 0); + tex_primitive(luatex_command, "Umathbottomaccentvariant", set_math_parameter_cmd, math_parameter_bottom_accent_variant, 0); + tex_primitive(luatex_command, "Umathoverlayaccentvariant", set_math_parameter_cmd, math_parameter_overlay_accent_variant, 0); + tex_primitive(luatex_command, "Umathnumeratorvariant", set_math_parameter_cmd, math_parameter_numerator_variant, 0); + tex_primitive(luatex_command, "Umathdenominatorvariant", set_math_parameter_cmd, math_parameter_denominator_variant, 0); + tex_primitive(luatex_command, "Umathsuperscriptvariant", set_math_parameter_cmd, math_parameter_superscript_variant, 0); + tex_primitive(luatex_command, "Umathsubscriptvariant", set_math_parameter_cmd, math_parameter_subscript_variant, 0); + tex_primitive(luatex_command, "Umathprimevariant", set_math_parameter_cmd, math_parameter_prime_variant, 0); + tex_primitive(luatex_command, "Umathstackvariant", set_math_parameter_cmd, math_parameter_stack_variant, 0); + + tex_primitive(luatex_command, "resetmathspacing", set_math_parameter_cmd, math_parameter_reset_spacing, 0); + tex_primitive(luatex_command, "setmathspacing", set_math_parameter_cmd, math_parameter_set_spacing, 0); + tex_primitive(luatex_command, "letmathspacing", set_math_parameter_cmd, math_parameter_let_spacing, 0); + tex_primitive(luatex_command, "copymathspacing", set_math_parameter_cmd, math_parameter_copy_spacing, 0); + tex_primitive(luatex_command, "letmathparent", set_math_parameter_cmd, math_parameter_let_parent, 0); + tex_primitive(luatex_command, "copymathparent", set_math_parameter_cmd, math_parameter_copy_parent, 0); + tex_primitive(luatex_command, "setmathprepenalty", set_math_parameter_cmd, math_parameter_set_pre_penalty, 0); + tex_primitive(luatex_command, "setmathpostpenalty", set_math_parameter_cmd, math_parameter_set_post_penalty, 0); + tex_primitive(luatex_command, "setmathatomrule", set_math_parameter_cmd, math_parameter_set_atom_rule, 0); + tex_primitive(luatex_command, "setmathdisplayprepenalty", set_math_parameter_cmd, math_parameter_set_display_pre_penalty, 0); + tex_primitive(luatex_command, "setmathdisplaypostpenalty", set_math_parameter_cmd, math_parameter_set_display_post_penalty, 0); + tex_primitive(luatex_command, "letmathatomrule", set_math_parameter_cmd, math_parameter_let_atom_rule, 0); + tex_primitive(luatex_command, "copymathatomrule", set_math_parameter_cmd, math_parameter_copy_atom_rule, 0); + tex_primitive(luatex_command, "setmathignore", set_math_parameter_cmd, math_parameter_ignore, 0); + tex_primitive(luatex_command, "setmathoptions", set_math_parameter_cmd, math_parameter_options, 0); + tex_primitive(luatex_command, "setdefaultmathcodes", set_math_parameter_cmd, math_parameter_set_defaults, 0); + + /*tex + + A bunch of commands that need a special treatment, so we delayed their initialization. + They are in the above list but commented. We start with those that alias to (already + defined) primitives. Actually we can say something like: + + \starttyping + primitive(tex_command, "fi", if_test_cmd, fi_code, 0); + cs_text(deep_frozen_cs_fi_code) = maketexstring("fi"); + copy_eqtb_entry(deep_frozen_cs_fi_code, cur_val); + \stoptyping + + but we use a helper that does a primitive lookup and shares the already allocated + string. The effect is the same but it adds a little abstraction and saves a few + redundant strings. + + */ + + tex_primitive(tex_command, "par", end_paragraph_cmd, normal_end_paragraph_code, 0); /* |too_big_char| */ + tex_primitive(no_command, "insertedpar", end_paragraph_cmd, inserted_end_paragraph_code, 0); + tex_primitive(no_command, "newlinepar", end_paragraph_cmd, new_line_end_paragraph_code, 0); + + /* tex_primitive(luatex_command, "linepar", undefined_cs_cmd, 0, 0); */ /*tex A user can define this one.*/ + + tex_primitive(tex_command, "endgroup", end_group_cmd, semi_simple_group_code, 0); + tex_primitive(luatex_command, "endsimplegroup", end_group_cmd, also_simple_group_code, 0); + tex_primitive(luatex_command, "endmathgroup", end_group_cmd, math_simple_group_code, 0); + + tex_primitive(tex_command, "relax", relax_cmd, relax_code, 0); + tex_primitive(luatex_command, "norelax", relax_cmd, no_relax_code, 0); + tex_primitive(no_command, "noexpandrelax", relax_cmd, no_expand_relax_code, 0); + + tex_primitive(tex_command, "fi", if_test_cmd, fi_code, 0); + tex_primitive(no_command, "noif", if_test_cmd, no_if_code, 0); + + tex_primitive(no_command, "always", prefix_cmd, always_code, 0); + + tex_primitive(tex_command, "nullfont", set_font_cmd, null_font, 0); + + tex_primitive(tex_command, "crcr", alignment_cmd, cr_cr_code, 0); + tex_primitive(tex_command, "cr", alignment_cmd, cr_code, 0); + + tex_aux_copy_deep_frozen_from_primitive(deep_frozen_cs_end_group_code, "endgroup"); + tex_aux_copy_deep_frozen_from_primitive(deep_frozen_cs_relax_code, "relax"); + tex_aux_copy_deep_frozen_from_primitive(deep_frozen_cs_fi_code, "fi"); + tex_aux_copy_deep_frozen_from_primitive(deep_frozen_cs_no_if_code, "noif"); + tex_aux_copy_deep_frozen_from_primitive(deep_frozen_cs_always_code, "always"); + tex_aux_copy_deep_frozen_from_primitive(deep_frozen_cs_right_code, "right"); + tex_aux_copy_deep_frozen_from_primitive(deep_frozen_cs_null_font_code, "nullfont"); + tex_aux_copy_deep_frozen_from_primitive(deep_frozen_cs_cr_code, "cr"); + + lmt_token_state.par_loc = tex_prim_lookup(tex_located_string("par")); + lmt_token_state.par_token = cs_token_flag + lmt_token_state.par_loc; + + /* lmt_token_state.line_par_loc = tex_prim_lookup(tex_located_string("linepar")); */ + /* lmt_token_state.line_par_token = cs_token_flag + lmt_token_state.line_par_loc; */ + + /*tex + These don't alias to existing commands. They are all inaccessible but might show up in + error messages and tracing. We could set the flags to resticted values. We need to + intercept them in the function that prints the |chr| because they can be out of range. + */ + + cs_text(deep_frozen_cs_end_template_1_code) = tex_maketexstring("endtemplate"); + set_eq_type(deep_frozen_cs_end_template_1_code, deep_frozen_end_template_cmd); + set_eq_flag(deep_frozen_cs_end_template_1_code, 0); + set_eq_value(deep_frozen_cs_end_template_1_code, lmt_token_state.null_list); + set_eq_level(deep_frozen_cs_end_template_1_code, level_one); + + cs_text(deep_frozen_cs_end_template_2_code) = tex_maketexstring("endtemplate"); + set_eq_type(deep_frozen_cs_end_template_2_code, end_template_cmd); + set_eq_flag(deep_frozen_cs_end_template_2_code, 0); + set_eq_value(deep_frozen_cs_end_template_2_code, lmt_token_state.null_list); + set_eq_level(deep_frozen_cs_end_template_2_code, level_one); + + cs_text(deep_frozen_cs_dont_expand_code) = tex_maketexstring("notexpanded"); + set_eq_type(deep_frozen_cs_dont_expand_code, deep_frozen_dont_expand_cmd); + set_eq_flag(deep_frozen_cs_dont_expand_code, 0); + + cs_text(deep_frozen_cs_protection_code) = tex_maketexstring("inaccessible"); + + cs_text(deep_frozen_cs_end_write_code) = tex_maketexstring("endwrite"); + set_eq_level(deep_frozen_cs_end_write_code, level_one); + set_eq_type(deep_frozen_cs_end_write_code, call_cmd); + set_eq_flag(deep_frozen_cs_end_write_code, 0); + set_eq_value(deep_frozen_cs_end_write_code, null); + + lmt_string_pool_state.reserved = lmt_string_pool_state.string_pool_data.ptr; + lmt_hash_state.no_new_cs = 1; + + } +} diff --git a/source/luametatex/source/tex/texcommands.h b/source/luametatex/source/tex/texcommands.h new file mode 100644 index 000000000..66fabb47e --- /dev/null +++ b/source/luametatex/source/tex/texcommands.h @@ -0,0 +1,1184 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_COMMANDS_H +# define LMT_COMMANDS_H + +/*tex + + Before we can go any further, we need to define symbolic names for the internal code numbers + that represent the various commands obeyed by \TEX. These codes are somewhat arbitrary, but + not completely so. For example, the command codes for character types are fixed by the + language, since a user says, e.g., |\catcode `\$ = 3| to make |\char'44| a math delimiter, + and the command code |math_shift| is equal to~3. Some other codes have been made adjacent so + that |case| statements in the program need not consider cases that are widely spaced, or so + that |case| statements can be replaced by |if| statements. + + At any rate, here is the list, for future reference. First come the catcode commands, several + of which share their numeric codes with ordinary commands when the catcode cannot emerge from + \TEX's scanning routine. + + Next are the ordinary run-of-the-mill command codes. Codes that are |min_internal| or more + represent internal quantities that might be expanded by |\the|. + + The next codes are special; they all relate to mode-independent assignment of values to \TEX's + internal registers or tables. Codes that are |max_internal| or less represent internal + quantities that might be expanded by |\the|. + + There is no matching primitive to go with |assign_attr|, but even if there was no + |\attributedef|, a reserved number would still be needed because there is an implied + correspondence between the |assign_xxx| commands and |xxx_val| expression values. That would + break down otherwise. + + The remaining command codes are extra special, since they cannot get through \TEX's scanner to + the main control routine. They have been given values higher than |max_command| so that their + special nature is easily discernible. The expandable commands come first. + + The extensions on top of standard \TEX\ came with extra |cmd| categories so at some point it + make sense to normalize soms of that. Similar commands became one category. Some more could be + combined, like rules and move etc.\ but for now it makes no sense. We could also move the mode + tests to the runners and make the main lookup simpler. Some commands need their own category + because they also can bind to characters (like super and subscript). + + Because much now uses |last_item_cmd| this one has been renamed to the more neutral + |some_item_cmd|. + + Watch out: check |command_names| in |lmttokenlib.c| after adding cmd's as these need to be in + sync. + + Maybe we should use |box_property|, |font property| and |page property| instead if the now + split ones. Actually we should drop setting font dimensions. + + todo: some codes -> subtypes (when not related to commands) + +*/ + +/*tex + Some commands are shared, for instance |car_ret_cmd| is never seen in a token list so it can be + used for signaling a parameter: |out_param_cmd| in a macro body. These constants relate to the + 21 bit shifting in token properties! + + These two are for nicer syntax highlighting in visual studio code or any IDE that is clever + enough to recognize enumerations. Otherwise they would get the color of a macro. + + \starttyping + # define escape_cmd relax_cmd + # define out_param_cmd car_ret_cmd + # define end_template_cmd ignore_cmd + # define active_char_cmd par_end_cmd + # define match_cmd par_end_cmd + # define comment_cmd stop_cmd + # define end_match_cmd stop_cmd + # define invalid_char_cmd delimiter_num_cmd + \stoptyping + + In the end sharing these command codes (as regular \TEX\ does) with character codes is not worth + the trouble because it gives fuzzy cmd codes in the \LUA\ token interface (and related tracing) + so at the cost of some extra slots they now are unique. The |foo_token| macros have to match the + cmd codes! Be aware that you need to map the new cmd names onto the original ones when you + consult the \TEX\ program source. + + As a consequence of having more commands, the need to be distinctive in the \LUA\ token interface, + some commands have been combined (at the cost of a little overhead in testing chr codes). Some + names have been made more generic as a side effect but the principles remain the same. Sorry for + any introduced confusion. + + An example of where some cmd codes were collapsed is alignments: |\omit|, |\span|, |\noalign|, + |\cr| and |\crcr| are now all handled by one cmd/chr code combination. This might make it a bit + easier to extend alignments when we're at it because it brings some code and logic together (of + course the principles are the same, but there can be slight differences in the way errors are + reported). +*/ + + +typedef enum tex_command_code { + /*tex + The first 16 command codes are used for characters with a special meaning. In traditional + \TEX\ some have different names and also aliases. because we have a public token interface + they now are uniquely used for characters and the aliases have their own cmd/chr codes. + */ + escape_cmd, /*tex 0: escape delimiter*/ + left_brace_cmd, /*tex 1: beginning of a group */ + right_brace_cmd, /*tex 2: ending of a group */ + math_shift_cmd, /*tex 3: mathematics shift character */ + alignment_tab_cmd, /*tex 4: alignment delimiter */ + end_line_cmd, /*tex 5: end of line */ + parameter_cmd, /*tex 6: macro parameter symbol */ + superscript_cmd, /*tex 7: superscript */ + subscript_cmd, /*tex 8: subscript */ + ignore_cmd, /*tex 9: characters to ignore */ + spacer_cmd, /*tex 10: characters equivalent to blank space */ + letter_cmd, /*tex 11: characters regarded as letters */ + other_char_cmd, /*tex 12: none of the special character types */ + active_char_cmd, /*tex 13: characters that invoke macros */ + comment_cmd, /*tex 14: characters that introduce comments */ + invalid_char_cmd, /*tex 15: characters that shouldn't appear (|^^|) */ + /*tex + The next set of commands is handled in the big switch where interpretation depends + on the current mode. It is a chicken or egg choice: either we have one runner per + command in which the mode is chosen, or we have a runner for each mode. The later is + used in \TEX. + */ + relax_cmd, /*tex do nothing (|\relax|) */ + end_template_cmd, /*tex end of |v_j| list in alignment template */ + alignment_cmd, /*tex |\cr|, |\crcr| and |\span| */ + match_cmd, /*tex match a macro parameter */ + end_match_cmd, /*tex end of parameters to macro */ + parameter_reference_cmd, /*tex the value passed as parameter */ + end_paragraph_cmd, /*tex end of paragraph (|\par|) */ + end_job_cmd, /*tex end of job (|\end|, |\dump|) */ + delimiter_number_cmd, /*tex specify delimiter numerically (|\delimiter|) */ + char_number_cmd, /*tex character specified numerically (|\char|) */ + math_char_number_cmd, /*tex explicit math code (|mathchar} ) */ + set_mark_cmd, /*tex mark definition (|mark|) */ + node_cmd, /*tex a node injected via \LUA */ + xray_cmd, /*tex peek inside of \TEX\ (|\show|, |\showbox|, etc.) */ + make_box_cmd, /*tex make a box (|\box|, |\copy|, |\hbox|, etc.) */ + hmove_cmd, /*tex horizontal motion (|\moveleft|, |\moveright|) */ + vmove_cmd, /*tex vertical motion (|\raise|, |\lower|) */ + un_hbox_cmd, /*tex unglue a box (|\unhbox|, |\unhcopy|) */ + un_vbox_cmd, /*tex unglue a box (|\unvbox|, |\unvcopy|, |\pagediscards|, |\splitdiscards|) */ + remove_item_cmd, /*tex nullify last item (|\unpenalty|, |\unkern|, |\unskip|) */ + hskip_cmd, /*tex horizontal glue (|\hskip|, |\hfil|, etc.) */ + vskip_cmd, /*tex vertical glue (|\vskip|, |\vfil|, etc.) */ + mskip_cmd, /*tex math glue (|\mskip|) */ + kern_cmd, /*tex fixed space (|\kern|) */ + mkern_cmd, /*tex math kern (|\mkern|) */ + leader_cmd, /*tex all these |\leaders| */ + legacy_cmd, /*tex obsolete |\shipout|,etc.) */ + local_box_cmd, /*tex use a box (|\localleftbox|, etc.) */ + halign_cmd, /*tex horizontal table alignment (|\halign|) */ + valign_cmd, /*tex vertical table alignment (|\valign|) */ + vrule_cmd, /*tex vertical rule (|\vrule|, etc.) */ + hrule_cmd, /*tex horizontal rule (|\hrule|. etc.) */ + insert_cmd, /*tex vlist inserted in box (|\insert|) */ + vadjust_cmd, /*tex vlist inserted in enclosing paragraph (|\vadjust|) */ + ignore_something_cmd, /*tex gobble |spacer| tokens (|\ignorespaces|) */ + after_something_cmd, /*tex save till assignment or group is done (|\after*|) */ + penalty_cmd, /*tex additional badness (|\penalty|) */ + begin_paragraph_cmd, /*tex (begin) paragraph (|\indent|, |\noindent|) */ + italic_correction_cmd, /*tex italic correction (|/|) */ + accent_cmd, /*tex attach accent in text (|\accent|) */ + math_accent_cmd, /*tex attach accent in math (|\mathaccent|) */ + discretionary_cmd, /*tex discretionary texts (|-|, |\discretionary|) */ + equation_number_cmd, /*tex equation number (|\eqno|, |\leqno|) */ + math_fence_cmd, /*tex variable delimiter (|\left|, |\right| or |\middle|) part of a fence */ + math_component_cmd, /*tex component of formula (|\mathbin|, etc.) */ + math_modifier_cmd, /*tex limit conventions (|\displaylimits|, etc.) */ + math_fraction_cmd, /*tex generalized fraction (|\above|, |\atop|, etc.) */ + math_style_cmd, /*tex style specification (|\displaystyle|, etc.) */ + math_choice_cmd, /*tex choice specification (|\mathchoice|) */ + vcenter_cmd, /*tex vertically center a vbox (|\vcenter|) */ + case_shift_cmd, /*tex force specific case (|\lowercase|, |\uppercase|) */ + message_cmd, /*tex send to user (|\message|, |\errmessage|) */ + catcode_table_cmd, /*tex manipulators for catcode tables */ + end_local_cmd, /*tex finishes a |local_cmd| */ + lua_function_call_cmd, /*tex an expandable function call */ + lua_protected_call_cmd, /*tex a function call that doesn's expand in edef like situations */ + begin_group_cmd, /*tex begin local grouping (|\begingroup|) */ + end_group_cmd, /*tex end local grouping (|\endgroup|) */ + explicit_space_cmd, /*tex explicit space (|\ |) */ + boundary_cmd, /*tex insert boundry node with value (|\*boundary|) */ + math_radical_cmd, /*tex square root and similar signs (|\radical|) */ + math_script_cmd, /*tex explicit super- or subscript */ + math_shift_cs_cmd, /*tex start- and endmath */ + end_cs_name_cmd, /*tex end control sequence (|\endcsname|) */ + /*tex + The next set can come after |\the| so they are either handled in the big switch or + during expansion of this serializer prefix. + */ + char_given_cmd, /*tex character code defined by |\chardef| */ + // math_char_given_cmd, /*tex math code defined by |\mathchardef| */ + // math_char_xgiven_cmd, /*tex math code defined by |\Umathchardef| or |\Umathcharnumdef| */ + some_item_cmd, /*tex most recent item (|\lastpenalty|, |\lastkern|, |\lastskip| and more) */ + /*tex + The previous command was described as \quotation {the last that cannot be prefixed by + |\global|} which is not entirely true any more. Actually more accurate is that the next + bunch can be prefixed and that's a mixed bag. It is used in |handle_assignments| which + deals with assignments in some special cases. + */ + internal_toks_cmd, /*tex special token list (|\output|, |\everypar|, etc.) */ + register_toks_cmd, /*tex user defined token lists */ + internal_int_cmd, /*tex integer (|\tolerance|, |\day|, etc.) */ + register_int_cmd, /*tex user-defined integers */ + internal_attribute_cmd, /*tex */ + register_attribute_cmd, /*tex user-defined attributes */ + internal_dimen_cmd, /*tex length (|\hsize|, etc.) */ + register_dimen_cmd, /*tex user-defined dimensions */ + internal_glue_cmd, /*tex glue (|\baselineskip|, etc.) */ + register_glue_cmd, /*tex user-defined glue */ + internal_mu_glue_cmd, /*tex */ + register_mu_glue_cmd, /*tex user-defined math glue */ + lua_value_cmd, /*tex reference to a regular lua function */ + iterator_value_cmd, + set_font_property_cmd, /*tex user-defined font integer (|\hyphenchar|, |\skewchar|) or (|\fontdimen|) */ + set_auxiliary_cmd, /*tex state info (|\spacefactor|, |\prevdepth|) */ + set_page_property_cmd, /*tex page info (|\pagegoal|, etc.) */ + set_box_property_cmd, /*tex change property of box (|\wd|, |\ht|, |\dp|) */ + set_specification_cmd, /*tex specifications (|\parshape|, |\interlinepenalties|, etc.) */ + define_char_code_cmd, /*tex define a character code (|\catcode|, etc.) */ + define_family_cmd, /*tex declare math fonts (|\textfont|, etc.) */ + set_math_parameter_cmd, /*tex set math parameters (|\mathquad|, etc.) */ + set_font_cmd, /*tex set current font (font identifiers) */ + define_font_cmd, /*tex define a font file (|\font|) */ + integer_cmd, /*tex the equivalent is a halfword number */ + dimension_cmd, /*tex the equivalent is a halfword number representing a dimension */ + gluespec_cmd, /*tex the equivalent is a halfword reference to glue */ + mugluespec_cmd, /*tex the equivalent is a halfword reference to glue with math units */ + mathspec_cmd, + fontspec_cmd, + register_cmd, /*tex internal register (|\count|, |\dimen|, etc.) */ + /* string_cmd, */ /*tex discarded experiment but maybe ... */ + combine_toks_cmd, /*tex the |toksapp| and similar token (list) combiners */ + /*tex + That was the last command that could follow |\the|. + */ + arithmic_cmd, /*tex |\advance|, |\multiply|, |\divide|, ... */ + prefix_cmd, /*tex qualify a definition (|\global|, |\long|, |\outer|) */ + let_cmd, /*tex assign a command code (|\let|, |\futurelet|) */ + shorthand_def_cmd, /*tex code definition (|\chardef|, |\countdef|, etc.) */ + def_cmd, /*tex macro definition (|\def|, |\gdef|, |\xdef|, |\edef|) */ + set_box_cmd, /*tex set a box (|\setbox|) */ + hyphenation_cmd, /*tex hyphenation data (|\hyphenation|, |\patterns|) */ + set_interaction_cmd, /*tex define level of interaction (|\batchmode|, etc.) */ + /*tex + Here ends the section that is part of the big switch. What follows are commands that are + intercepted when expanding tokens. The strint one came from a todo list and moved to a + maybe list. + */ + undefined_cs_cmd, /*tex initial state of most |eq_type| fields */ + expand_after_cmd, /*tex special expansion (|\expandafter|) */ + no_expand_cmd, /*tex special nonexpansion (|\noexpand|) */ + input_cmd, /*tex input a source file (|\input|, |\endinput| or |\scantokens| or |\scantextokens|) */ + lua_call_cmd, /*tex a reference to a \LUA\ function */ + lua_local_call_cmd, /*tex idem, but in a nested main loop */ + begin_local_cmd, /*tex enter a a nested main loop */ + if_test_cmd, /*tex conditional text (|\if|, |\ifcase|, etc.) */ + cs_name_cmd, /*tex make a control sequence from tokens (|\csname|) */ + convert_cmd, /*tex convert to text (|\number|, |\string|, etc.) */ + the_cmd, /*tex expand an internal quantity (|\the| or |\unexpanded|, |\detokenize|) */ + get_mark_cmd, /*tex inserted mark (|\topmark|, etc.) */ + /* string_cmd, */ + /*tex + These refer to macros. We might at some point promote the tolerant ones to have their own + cmd codes. Protected macros were done with an initial token signaling that property but + they became |protected_call_cmd|. After that we also got two frozen variants and later four + tolerant so we ended up with eight. When I wanted some more, a different solution was + chosen, so now we have just one again instead of |[tolerant_][frozen_][protected_]call_cmd|. + But ... in the end I setteled again for four basic call commands because it's nicer in + the token interface. + + The todo cmds come from a todo list and relate to |\expand| but then like \expand{...} even + when normally it's protected. But it adds overhead we don't want right now an din the end I + didn't need it. I keep it as reference so that I won't recycle it. + + */ + call_cmd, /*tex regular control sequence */ + protected_call_cmd, /*tex idem but doesn't expand in edef like situations */ + semi_protected_call_cmd, + tolerant_call_cmd, /*tex control sequence with tolerant arguments */ + tolerant_protected_call_cmd, /*tex idem but doesn't expand in edef like situations */ + tolerant_semi_protected_call_cmd, + /*tex + These are special and are inserted in token streams. They cannot end up in macros. + */ + deep_frozen_end_template_cmd, /*tex end of an alignment template */ + deep_frozen_dont_expand_cmd, /*tex the following token was marked by |\noexpand|) */ + /*tex + The next bunch is never seen directly as they are shortcuts to registers and special data + strutures. They are the internal register (pseudo) commands and are also needed for + token and node memory management. + */ + internal_glue_reference_cmd, /*tex the equivalent points to internal glue specification */ + register_glue_reference_cmd, /*tex the equivalent points to register glue specification */ + internal_mu_glue_reference_cmd, /*tex the equivalent points to internal muglue specification */ + register_mu_glue_reference_cmd, /*tex the equivalent points to egister muglue specification */ + internal_box_reference_cmd, /*tex the equivalent points to internal box node, or is |null| */ + register_box_reference_cmd, /*tex the equivalent points to register box node, or is |null| */ + internal_toks_reference_cmd, /*tex the equivalent points to internal token list */ + register_toks_reference_cmd, /*tex the equivalent points to register token list */ + specification_reference_cmd, /*tex the equivalent points to parshape or penalties specification */ + /* + We don't really need these but they are used to flag the registers eq entries properly. They + are not really references because the values are included but we want to be consistent here. + */ + internal_int_reference_cmd, + register_int_reference_cmd, + internal_attribute_reference_cmd, + register_attribute_reference_cmd, + internal_dimen_reference_cmd, + register_dimen_reference_cmd, + /*tex + This is how many commands we have: + */ + number_tex_commands, +} tex_command_code; + +# define max_char_code_cmd invalid_char_cmd /*tex largest catcode for individual characters */ +# define min_internal_cmd char_given_cmd /*tex the smallest code that can follow |the| */ +# define max_non_prefixed_cmd some_item_cmd /*tex largest command code that can't be |global| */ +# define max_internal_cmd register_cmd /*tex the largest code that can follow |the| */ +# define max_command_cmd set_interaction_cmd /*tex the largest command code seen at |big_switch| */ + +# define first_cmd escape_cmd +# define last_cmd register_dimen_reference_cmd + +# define first_call_cmd call_cmd +# define last_call_cmd tolerant_semi_protected_call_cmd + +# define last_visible_cmd tolerant_semi_protected_call_cmd + +# define is_call_cmd(cmd) (cmd >= first_call_cmd && cmd <= last_call_cmd) +# define is_protected_cmd(cmd) (cmd == protected_call_cmd || cmd == tolerant_protected_call_cmd) +# define is_semi_protected_cmd(cmd) (cmd == semi_protected_call_cmd || cmd == tolerant_semi_protected_call_cmd) +# define is_tolerant_cmd(cmd) (cmd == tolerant_call_cmd || cmd == tolerant_protected_call_cmd || cmd == tolerant_semi_protected_call_cmd) + +# define is_referenced_cmd(cmd) (cmd >= call_cmd) +# define is_nodebased_cmd(cmd) (cmd >= gluespec_cmd && cmd <= fontspec_cmd) + + +# if (main_control_mode == 1) + +/*tex Once these were different numbers, no series: */ + +typedef enum tex_modes { + nomode, + vmode, + hmode, + mmode, +} tex_modes; + +# else + +typedef enum tex_modes { + nomode = 0, + vmode = 1, /*tex vertical mode */ + hmode = 1 + max_command_cmd + 1, /*tex horizontal mode */ + mmode = 1 + 2*(max_command_cmd + 1), /*tex math mode */ +} tex_modes; + +# endif + +typedef enum arithmic_codes { + advance_code, + multiply_code, + divide_code, + /* bitwise_and_code, */ + /* bitwise_xor_code, */ + /* bitwise_or_code, */ + /* bitwise_not_code, */ +} arithmic_codes; + +# define last_arithmic_code divide_code + +typedef enum math_script_codes { + math_no_script_code, + math_no_ruling_code, + math_sub_script_code, + math_super_script_code, + math_super_pre_script_code, + math_sub_pre_script_code, + math_no_sub_script_code, + math_no_super_script_code, + math_no_sub_pre_script_code, + math_no_super_pre_script_code, + math_shifted_sub_script_code, + math_shifted_super_script_code, + math_shifted_sub_pre_script_code, + math_shifted_super_pre_script_code, + math_prime_script_code, +} math_script_codes; + +# define last_math_script_code math_prime_script_code + +typedef enum math_fraction_codes { + math_above_code, + math_above_delimited_code, + math_over_code, + math_over_delimited_code, + math_atop_code, + math_atop_delimited_code, + math_u_above_code, + math_u_above_delimited_code, + math_u_over_code, + math_u_over_delimited_code, + math_u_atop_code, + math_u_atop_delimited_code, + math_u_skewed_code, + math_u_skewed_delimited_code, + math_u_stretched_code, + math_u_stretched_delimited_code, +} math_fraction_codes; + +# define last_math_fraction_code math_u_skewed_code + +/*tex + These don't fit into the internal register model because they are for instance global or + bound to the current list. +*/ + +typedef enum auxiliary_codes { + space_factor_code, + prev_depth_code, + prev_graf_code, + interaction_mode_code, + insert_mode_code, +} auxiliary_codes; + +# define last_auxiliary_code insert_mode_code + +typedef enum convert_codes { + number_code, /*tex command code for |\number| */ + to_integer_code, /*tex command code for |\tointeger| (also gobbles |\relax|) */ + to_hexadecimal_code, /*tex command code for |\tohexadecimal| */ + to_scaled_code, /*tex command code for |\toscaled| (also gobbles |\relax|) */ + to_sparse_scaled_code, /*tex command code for |\tosparsescaled| (also gobbles |\relax|) */ + to_dimension_code, /*tex command code for |\todimension| (also gobbles |\relax|) */ + to_sparse_dimension_code, /*tex command code for |\tosparsedimension| */ + to_mathstyle_code, /*tex command code for |\tomathstyle| */ + lua_code, /*tex command code for |\directlua| */ + lua_function_code, /*tex command code for |\luafunction| */ + lua_bytecode_code, /*tex command code for |\luabytecode| */ + expanded_code, /*tex command code for |\expanded| */ + semi_expanded_code, /*tex command code for |\constantexpanded| */ + string_code, /*tex command code for |\string| */ + cs_string_code, /*tex command code for |\csstring| */ + detokenized_code, /*tex command code for |\detokenized| */ + roman_numeral_code, /*tex command code for |\romannumeral| */ + meaning_code, /*tex command code for |\meaning| */ + meaning_full_code, /*tex command code for |\meaningfull| */ + meaning_less_code, /*tex command code for |\meaningless| */ + meaning_asis_code, /*tex command code for |\meaningasis| */ + uchar_code, /*tex command code for |\Uchar| */ + lua_escape_string_code, /*tex command code for |\luaescapestring| */ + font_name_code, /*tex command code for |\fontname| */ + font_specification_code, /*tex command code for |\fontspecification| */ + job_name_code, /*tex command code for |\jobname| */ + format_name_code, /*tex command code for |\AlephVersion| */ + luatex_banner_code, /*tex command code for |\luatexbanner| */ + font_identifier_code, /*tex command code for |tex.fontidentifier| (virtual) */ +} convert_codes; + +# define first_convert_code number_code +# define last_convert_code luatex_banner_code + +typedef enum input_codes { + normal_input_code, + end_of_input_code, + token_input_code, + tex_token_input_code, + /* for now private */ + tokenized_code, + retokenized_code, + quit_loop_code, +} input_codes; + +# define last_input_code tex_token_input_code + +typedef enum some_item_codes { + lastpenalty_code, /*tex |\lastpenalty| */ + lastkern_code, /*tex |\lastkern| */ + lastskip_code, /*tex |\lastskip| */ + lastboundary_code, /*tex |\lastboundary| */ + last_node_type_code, /*tex |\lastnodetype| */ + last_node_subtype_code, /*tex |\lastnodesubtype| */ + input_line_no_code, /*tex |\inputlineno| */ + badness_code, /*tex |\badness| */ + overshoot_code, /*tex |\overshoot| */ + luatex_version_code, /*tex |\luatexversion| */ + luatex_revision_code, /*tex |\luatexrevision| */ + current_group_level_code, /*tex |\currentgrouplevel| */ + current_group_type_code, /*tex |\currentgrouptype| */ + current_if_level_code, /*tex |\currentiflevel| */ + current_if_type_code, /*tex |\currentiftype| */ + current_if_branch_code, /*tex |\currentifbranch| */ + glue_stretch_order_code, /*tex |\gluestretchorder| */ + glue_shrink_order_code, /*tex |\glueshrinkorder| */ + font_id_code, /*tex |\fontid| */ + glyph_x_scaled_code, /*tex |\glyphxscaled| */ + glyph_y_scaled_code, /*tex |\glyphyscaled| */ + font_char_wd_code, /*tex |\fontcharwd| */ + font_char_ht_code, /*tex |\fontcharht| */ + font_char_dp_code, /*tex |\fontchardp| */ + font_char_ic_code, /*tex |\fontcharic| */ + font_char_ta_code, /*tex |\fontcharta| */ + font_spec_id_code, /*tex |\fontspecid| */ + font_spec_scale_code, /*tex |\fontspecscale| */ + font_spec_xscale_code, /*tex |\fontspecxscale| */ + font_spec_yscale_code, /*tex |\fontspecyscale| */ + font_size_code, /*tex |\fontsize| */ + font_math_control_code, /*tex |\fontmathcontrol| */ + font_text_control_code, /*tex |\fonttextcontrol| */ + math_scale_code, /*tex |\mathscale| */ + math_style_code, /*tex |\mathstyle| */ + math_main_style_code, /*tex |\mathmainstyle| */ + math_style_font_id_code, /*tex |\mathstylefontid| */ + math_stack_style_code, /*tex |\mathstackstyle| */ + math_char_class_code, /*tex |\Umathcharclass| */ + math_char_fam_code, /*tex |\Umathcharfam| */ + math_char_slot_code, /*tex |\Umathcharslot| */ + scaled_slant_per_point_code, + scaled_interword_space_code, + scaled_interword_stretch_code, + scaled_interword_shrink_code, + scaled_ex_height_code, + scaled_em_width_code, + scaled_extra_space_code, + last_arguments_code, /*tex |\lastarguments| */ + parameter_count_code, /*tex |\parametercount| */ + /* lua_value_function_code, */ /*tex |\luavaluefunction| */ + insert_progress_code, /*tex |\insertprogress| */ + left_margin_kern_code, /*tex |\leftmarginkern| */ + right_margin_kern_code, /*tex |\rightmarginkern| */ + par_shape_length_code, /*tex |\parshapelength| */ + par_shape_indent_code, /*tex |\parshapeindent| */ + par_shape_dimen_code, /*tex |\parshapedimen| */ + glue_stretch_code, /*tex |\gluestretch| */ + glue_shrink_code, /*tex |\glueshrink| */ + mu_to_glue_code, /*tex |\mutoglue| */ + glue_to_mu_code, /*tex |\gluetomu| */ + numexpr_code, /*tex |\numexpr| */ + /* attrexpr_code, */ /*tex not used */ + dimexpr_code, /*tex |\dimexpr| */ + glueexpr_code, /*tex |\glueexpr| */ + muexpr_code, /*tex |\muexpr| */ + numexpression_code, /*tex |\numexpression| */ + dimexpression_code, /*tex |\dimexpression| */ + last_chk_num_code, /*tex |\ifchknum| */ + last_chk_dim_code, /*tex |\ifchkdim| */ + // dimen_to_scale_code, /*tex |\dimentoscale| */ + numeric_scale_code, /*tex |\numericscale| */ + index_of_register_code, + index_of_character_code, + math_atom_glue_code, + last_left_class_code, + last_right_class_code, + last_atom_class_code, + current_loop_iterator_code, + current_loop_nesting_code, + last_loop_iterator_code, + last_par_context_code, + last_page_extra_code, +} some_item_codes; + +# define last_some_item_code last_page_extra_code + +typedef enum catcode_table_codes { + save_cat_code_table_code, + init_cat_code_table_code, + /* dflt_cat_code_table_code, */ +} catcode_table_codes; + +# define last_catcode_table_code init_cat_code_table_code + +typedef enum font_property_codes { + font_hyphen_code, + font_skew_code, + font_lp_code, + font_rp_code, + font_ef_code, + font_dimen_code, + scaled_font_dimen_code, +} font_property_codes; + +# define last_font_property_code scaled_font_dimen_code + +typedef enum box_property_codes { + box_width_code, + box_height_code, + box_depth_code, + box_direction_code, + box_geometry_code, + box_orientation_code, + box_anchor_code, + box_anchors_code, + box_source_code, + box_target_code, + box_xoffset_code, + box_yoffset_code, + box_xmove_code, + box_ymove_code, + box_total_code, + box_shift_code, + box_adapt_code, + box_repack_code, + box_freeze_code, + /* we actually need set_box_int_cmd, or set_box_property */ + box_attribute_code, +} box_property_codes; + +# define last_box_property_code box_attribute_code + +typedef enum hyphenation_codes { + hyphenation_code, + patterns_code, + prehyphenchar_code, + posthyphenchar_code, + preexhyphenchar_code, + postexhyphenchar_code, + hyphenationmin_code, + hjcode_code, +} hyphenation_codes; + +# define last_hyphenation_code hjcode_code + +typedef enum begin_paragraph_codes { + noindent_par_code, + indent_par_code, + quitvmode_par_code, + undent_par_code, + snapshot_par_code, + attribute_par_code, + wrapup_par_code, +} begin_paragraph_codes; + +# define last_begin_paragraph_code wrapup_par_code + +extern void tex_initialize_commands (void); + +/*tex + + A |\chardef| creates a control sequence whose |cmd| is |char_given|; a |\mathchardef| creates a + control sequence whose |cmd| is |math_given|; and the corresponding |chr| is the character code + or math code. A |\countdef| or |\dimendef| or |\skipdef| or |\muskipdef| creates a control + sequence whose |cmd| is |assign_int| or \dots\ or |assign_mu_glue|, and the corresponding |chr| + is the |eqtb| location of the internal register in question. + + We have the following codes for |shorthand_def|: + +*/ + +typedef enum relax_codes { + relax_code, + no_relax_code, + no_expand_relax_code, +} relax_codes; + +# define last_relax_code no_relax_code + +typedef enum end_paragraph_codes { + normal_end_paragraph_code, + inserted_end_paragraph_code, + new_line_end_paragraph_code, +} end_paragraph_codes; + +# define last_end_paragraph_code new_line_end_paragraph_code + +typedef enum shorthand_def_codes { + char_def_code, /*tex |\chardef| */ + math_char_def_code, /*tex |\mathchardef| */ + math_xchar_def_code, /*tex |\Umathchardef| */ + math_dchar_def_code, /*tex |\Umathdictdef| */ + /* math_uchar_def_code, */ /* |\Umathcharnumdef| */ + count_def_code, /*tex |\countdef| */ + attribute_def_code, /*tex |\attributedef| */ + dimen_def_code, /*tex |\dimendef| */ + skip_def_code, /*tex |\skipdef| */ + mu_skip_def_code, /*tex |\muskipdef| */ + toks_def_code, /*tex |\toksdef| */ + /* string_def_code, */ + lua_def_code, /*tex |\luadef| */ + integer_def_code, + dimension_def_code, + gluespec_def_code, + mugluespec_def_code, + /* mathspec_def_code, */ + fontspec_def_code, +} shorthand_def_codes; + +# define last_shorthand_def_code fontspec_def_code + +typedef enum char_number_codes { + char_number_code, /*tex |\char| */ + glyph_number_code, /*tex |\glyph| */ +} char_number_codes; + +# define last_char_number_code glyph_number_code + +typedef enum math_char_number_codes { + math_char_number_code, /*tex |\mathchar| */ + math_xchar_number_code, /*tex |\Umathchar| */ + math_dchar_number_code, /*tex |\Umathdict| */ + /* math_uchar_number_code, */ /* |\Umathcharnum| */ + math_class_number_code, /*tex |\Umathclass| */ +} math_char_number_codes; + +# define last_math_char_number_code math_class_number_code + +typedef enum xray_codes { + show_code, /*tex |\show| */ + show_box_code, /*tex |\showbox| */ + show_the_code, /*tex |\showthe| */ + show_lists_code, /*tex |\showlists| */ + show_groups_code, /*tex |\showgroups| */ + show_tokens_code, /*tex |\showtokens|, must be odd! */ + show_ifs_code, /*tex |\showifs| */ +} xray_codes; + +# define last_xray_code show_ifs_code + +typedef enum the_codes { + the_code, + the_without_unit_code, + /* the_with_property_code, */ /* replaced by value functions */ + detokenize_code, + unexpanded_code, +} the_codes; + +# define last_the_code unexpanded_code + +typedef enum expand_after_codes { + expand_after_code, + expand_unless_code, + future_expand_code, + future_expand_is_code, /*tex nicer than: future_expand_ignore_spaces_code */ + future_expand_is_ap_code, /*tex nicer than: future_expand_ignore_spaces_and_pars_code */ + /* expand_after_2_code, */ + /* expand_after_3_code, */ + expand_after_spaces_code, + expand_after_pars_code, + expand_token_code, + expand_cs_token_code, + expand_code, + semi_expand_code, + expand_after_toks_code, + /* expand_after_fi, */ +} expand_after_codes; + +# define last_expand_after_code expand_after_toks_code + +typedef enum after_something_codes { + after_group_code, + after_assignment_code, + at_end_of_group_code, + after_grouped_code, + after_assigned_code, + at_end_of_grouped_code, +} after_something_codes; + +# define last_after_something_code at_end_of_grouped_code + +typedef enum begin_group_codes { + semi_simple_group_code, + also_simple_group_code, + math_simple_group_code, +} begin_group_codes; + +# define last_begin_group_code also_simple_group_code + +typedef enum end_job_codes { + end_code, + dump_code, +} end_job_codes; + +# define last_end_job_code dump_code + +typedef enum local_control_codes { + local_control_begin_code, + local_control_token_code, + local_control_list_code, + local_control_loop_code, + expanded_loop_code, + unexpanded_loop_code, +} local_control_codes; + +# define last_local_control_code unexpanded_loop_code + +/*tex + + Maybe also a prefix |\unfrozen| that avoids the warning or have a variant that only issues a + warning but then we get 8 more cmd codes and we don't want that. An alternative is to have some + bits for this but we don't have enough. Now, because frozen macros can be unfrozen we can + indeed have a prefix that bypasses the check. Explicit (re)definitions are then up to the user. + +*/ + +typedef enum prefix_codes { + frozen_code, + permanent_code, + immutable_code, + /* primitive_code, */ + mutable_code, + noaligned_code, + instance_code, + untraced_code, + global_code, + tolerant_code, + protected_code, + overloaded_code, + aliased_code, + immediate_code, + /* conditional_code */ + /* value_code */ + semiprotected_code, + enforced_code, + always_code, + inherited_code, + long_code, + outer_code, +} prefix_codes; + +# define last_prefix_code enforced_code + +typedef enum combine_toks_codes { + expanded_toks_code, + append_toks_code, + append_expanded_toks_code, + prepend_toks_code, + prepend_expanded_toks_code, + global_expanded_toks_code, + global_append_toks_code, + global_append_expanded_toks_code, + global_prepend_toks_code, + global_prepend_expanded_toks_code, +} combine_toks_codes; + +# define last_combine_toks_code global_prepend_expanded_toks_code + +typedef enum cs_name_codes { + cs_name_code, + last_named_cs_code, + begin_cs_name_code, + future_cs_name_code, +} cs_name_codes; + +# define last_cs_name_code begin_cs_name_code + +typedef enum def_codes { + expanded_def_code, + def_code, + global_expanded_def_code, + global_def_code, + expanded_def_csname_code, + def_csname_code, + global_expanded_def_csname_code, + global_def_csname_code, +} def_codes; + +# define last_def_code global_def_csname_code + +typedef enum let_codes { + global_let_code, + let_code, + future_let_code, + future_def_code, + let_charcode_code, + swap_cs_values_code, + let_protected_code, + unlet_protected_code, + let_frozen_code, + unlet_frozen_code, + let_csname_code, + global_let_csname_code, + let_to_nothing_code, + global_let_to_nothing_code, +} let_codes; + +# define last_let_code global_let_csname_code + +typedef enum message_codes { + message_code, + error_message_code, +} message_codes; + +# define last_message_code error_message_code + +/*tex + + These are no longer needed, but we keep them as reference: + + \starttyping + typedef enum in_stream_codes { + close_stream_code, + open_stream_code, + } in_stream_codes; + + # define last_in_stream_code open_stream_code + + typedef enum read_to_cs_codes { + read_code, + read_line_code, + } read_to_cs_codes; + + # define last_read_to_cs_code read_line_code + \stoptyping + +*/ + +typedef enum lua_call_codes { + lua_function_call_code, + lua_bytecode_call_code, +} lua_codes; + +typedef enum math_delimiter_codes { + math_delimiter_code, + math_udelimiter_code, +} math_delimiter_codes; + +# define last_math_delimiter_code math_udelimiter_code + +typedef enum math_choice_codes { + math_choice_code, + math_discretionary_code, + math_ustack_code, +} math_choice_codes; + +# define last_math_choice_code math_ustack_code + +typedef enum math_accent_codes { + math_accent_code, + math_uaccent_code, +} math_accent_codes; + +# define last_math_accent_code math_uaccent_code + +typedef enum lua_value_codes { + lua_value_none_code, + lua_value_integer_code, + lua_value_cardinal_code, + lua_value_dimension_code, + lua_value_skip_code, + lua_value_boolean_code, + lua_value_float_code, + lua_value_string_code, + lua_value_node_code, + lua_value_direct_code, + /*tex total number of lua values */ + number_lua_values, +} lua_value_codes; + +typedef enum math_shift_cs_codes { + begin_inline_math_code, + end_inline_math_code, + begin_display_math_code, + end_display_math_code, + begin_math_mode_code, + end_math_mode_code, +} math_shift_cs_codes; + +# define first_math_shift_cs_code begin_inline_math_code +# define last_math_shift_cs_code end_math_mode_code + +/*tex + The next base and offset are what we always had so we keep it but we do use a proper zero based + chr code that we adapt to the old value in the runner, so from then on we're in old mode again. + + \starttyping + # define leader_ship_base (a_leaders - 1) + # define leader_ship_offset (leader_flag - a_leaders) + \stoptyping + + Internal boxes are kind of special as they can have different scanners and as such they don't + really fit in the rest of the internals. Now, for consistency we treat local boxes as internal + ones but if we ever need more (which is unlikely) we can have a dedicated local_box_base. If + we ever extend the repertoire of interal boxes we havbe to keep the local ones at the start. + +*/ + +typedef enum legacy_codes { + shipout_code, +} legacy_codes; + +# define first_legacy_code shipout_code +# define last_legacy_code shipout_code + +typedef enum leader_codes { + a_leaders_code, + c_leaders_code, + x_leaders_code, + g_leaders_code, + u_leaders_code, +} leader_codes; + +# define first_leader_code a_leaders_code +# define last_leader_code u_leaders_code + +typedef enum local_box_codes { + local_left_box_code, + local_right_box_code, + local_middle_box_code, + /* room for more but then we go internal_box_codes */ + number_box_pars, +} local_box_codes; + +# define first_local_box_code local_left_box_code +# define last_local_box_code local_middle_box_code + +typedef enum local_box_options { + local_box_par_option = 0x1, + local_box_local_option = 0x2, + local_box_keep_option = 0x4, +} local_box_options; + +typedef enum skip_codes { + fil_code, /*tex |\hfil| and |\vfil| */ + fill_code, /*tex |\hfill| and |\vfill| */ + filll_code, /*tex |\hss| and |\vss|, aka |ss_code| */ + fil_neg_code, /*tex |\hfilneg| and |\vfilneg| */ + skip_code, /*tex |\hskip| and |\vskip| */ + mskip_code, /*tex |\mskip| */ +} skip_codes; + +# define first_skip_code fil_code +# define last_skip_code skip_code + +/*tex All kind of character related codes: */ + +typedef enum charcode_codes { + catcode_charcode, + lccode_charcode, + uccode_charcode, + sfcode_charcode, + hccode_charcode, + hmcode_charcode, + mathcode_charcode, + extmathcode_charcode, + /* extmathcodenum_charcode, */ + delcode_charcode, + extdelcode_charcode, + /* extdelcodenum_charcode, */ +} charcode_codes; + +# define first_charcode_code catcode_charcode +/*define last_charcode_code extdelcodenum_charcode */ +# define last_charcode_code extdelcode_charcode + +typedef enum math_styles { + display_style, /*tex |\displaystyle| */ + cramped_display_style, /*tex |\crampeddisplaystyle| */ + text_style, /*tex |\textstyle| */ + cramped_text_style, /*tex |\crampedtextstyle| */ + script_style, /*tex |\scriptstyle| */ + cramped_script_style, /*tex |\crampedscriptstyle| */ + script_script_style, /*tex |\scriptscriptstyle| */ + cramped_script_script_style, /*tex |\crampedscriptscriptstyle| */ + /* hidden */ + yet_unset_math_style, + former_choice_math_style, + scaled_math_style, + /* even more hidden */ /*tex These can be used to emulate the defaults. */ + all_display_styles, + all_text_styles, + all_script_styles, + all_script_script_styles, + all_math_styles, + all_split_styles, + all_uncramped_styles, + all_cramped_styles, +} math_styles; + +# define first_math_style display_style +# define last_math_style all_cramped_styles + +# define is_valid_math_style(n) (n >= display_style && n <= cramped_script_script_style) +# define are_valid_math_styles(n) (n >= all_display_styles && n <= all_cramped_styles) + +inline static halfword tex_math_style_to_size(halfword s) +{ + if (s == script_style || s == cramped_script_style) { + return script_size; + } else if (s == script_style || s == cramped_script_style) { + return script_script_size; + } else { + return text_size; + } +} + +typedef enum math_choices { + math_display_choice, + math_text_choice, + math_script_choice, + math_script_script_choice, +} math_choices; + +typedef enum math_discretionary_choices { + math_pre_break_choice, + math_post_break_choice, + math_no_break_choice, +} math_discretionary_choices; + +typedef enum math_aboves { + math_numerator_above, + math_denominator_above, +} math_aboves; + +typedef enum math_limits { + math_limits_top, + math_limits_bottom, +} math_limits; + +typedef enum dir_codes { + dir_lefttoright, + dir_righttoleft +} dir_codes; + +typedef enum quantitity_levels { + level_zero, /*tex level for undefined quantities */ + level_one, /*tex outermost level for defined quantities */ +} quantitity_levels; + +typedef enum move_codes { + move_forward_code, + move_backward_code, +} move_codes; + +# define last_move_code move_backward_code + +typedef enum ignore_something_codes { + ignore_space_code, + ignore_par_code, + ignore_argument_code, +} ignore_something_codes; + +# define last_ignore_something_code ignore_argument_code + +typedef enum case_shift_codes { + lower_case_code, + upper_case_code, +} case_shift_codes; + +# define last_case_shift_code upper_case_code + +typedef enum location_codes { + left_location_code, + right_location_code, + top_location_code, + bottom_location_code, +} location_codes; + +# define first_location_code left_location_code +# define last_location_code right_location_code + +typedef enum remove_item_codes { + kern_item_code, + penalty_item_code, + skip_item_code, + boundary_item_code, +} remove_item_codes; + +# define last_remove_item_code boundary_item_code + +typedef enum kern_codes { + normal_kern_code, + h_kern_code, /* maybe */ + v_kern_code, /* maybe */ + non_zero_width_kern_code, /* maybe */ +} kern_codes; + +# define last_kern_code normal_kern_code + +typedef enum tex_mskip_codes { + normal_mskip_code, + atom_mskip_code, +} tex_mskip_codes; + +# define last_mskip_code atom_mskip_code + +/*tex + All the other cases are zero but we use an indicator for that. +*/ + +# define normal_code 0 + +# endif diff --git a/source/luametatex/source/tex/texconditional.c b/source/luametatex/source/tex/texconditional.c new file mode 100644 index 000000000..95035f43e --- /dev/null +++ b/source/luametatex/source/tex/texconditional.c @@ -0,0 +1,1386 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + In \LUAMETATEX\ The condition code has been upgraded. Bits and pieces have been optimized and + on top of the extra checks in \LUATEX|\ we have a few more here. In order to get nicer looking + nested conditions |\orelse| has been introduced. Some conditionals are not really needed but + they give less noise when tracing macros. It's also possible to let \LUA\ code behave like + a test. + +*/ + +/*tex + + We consider now the way \TEX\ handles various kinds of |\if| commands. Conditions can be inside + conditions, and this nesting has a stack that is independent of the |save_stack|. + + Four global variables represent the top of the condition stack: |cond_ptr| points to + pushed-down entries, if any; |if_limit| specifies the largest code of a |fi_or_else| command + that is syntactically legal; |cur_if| is the name of the current type of conditional; and + |if_line| is the line number at which it began. + + If no conditions are currently in progress, the condition stack has the special state + |cond_ptr = null|, |if_limit = normal|, |cur_if = 0|, |if_line = 0|. Otherwise |cond_ptr| + points to a two-word node; the |type|, |subtype|, and |link| fields of the first word contain + |if_limit|, |cur_if|, and |cond_ptr| at the next level, and the second word contains the + corresponding |if_line|. + + In |cond_ptr| we keep track of the top of the condition stack while |if_limit| holds the upper + bound on |fi_or_else| codes. The type of conditional being worked on is stored in cur_if and + |if_line| keeps track of the line where that conditional began. When we skip conditional text, + |skip_line| keeps track of the line number where skipping began, for use in error messages. + + All these variables are collected in: + +*/ + +condition_state_info lmt_condition_state = { + .cond_ptr = null, + .if_limit = 0, + .cur_if = 0, + .if_line = 0, + .skip_line = 0, + .chk_num = 0, + .chk_dim = 0, + .if_nesting = 0, +}; + +/*tex + + Here is a procedure that ignores text until coming to an |\or|, |\else|, or |\fi| at level zero + of |\if| \unknown |\fi| nesting. After it has acted, |cur_chr| will indicate the token that was + found, but |cur_tok| will not be set (because this makes the procedure run faster). + + With |l| we keep track of the level of |\if|\unknown|\fi| nesting and |scanner_status| let us + return to the entry status. The |pass_text| function only returns when we have a |fi_or_else|. + +*/ + +static void tex_aux_pass_text(void) +{ + int level = 0; + int status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_skipping; + lmt_condition_state.skip_line = lmt_input_state.input_line; + while (1) { + tex_get_next(); + if (cur_cmd == if_test_cmd) { + switch (cur_chr) { + case fi_code: + if (level == 0) { + lmt_input_state.scanner_status = status; + return; + } else { + --level; + break; + } + case else_code: + case or_code: + if (level == 0) { + lmt_input_state.scanner_status = status; + return; + } else { + break; + } + case or_else_code: + case or_unless_code: + do { + tex_get_next(); + } while (cur_cmd == spacer_cmd); + break; + default: + ++level; + break; + } + } + } +} + +/*tex + We return when we have a |fi_or_else| or when we have a valid |or_else| followed by an + |if_test_cmd|. +*/ + +static int tex_aux_pass_text_x(int tracing_ifs, int tracing_commands) +{ + int level = 0; + int status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_skipping; + lmt_condition_state.skip_line = lmt_input_state.input_line; + while (1) { + tex_get_next(); + if (cur_cmd == if_test_cmd) { + switch (cur_chr) { + case fi_code: + if (level == 0) { + lmt_input_state.scanner_status = status; + return 0; + } else { + --level; + break; + } + case else_code: + case or_code: + if (level == 0) { + lmt_input_state.scanner_status = status; + return 0; + } else { + break; + } + case or_else_code: + case or_unless_code: + if (level == 0) { + int unless = cur_chr == or_unless_code; + if (tracing_commands > 1) { + tex_begin_diagnostic(); + tex_print_str(unless ? "{orunless}" : "{orelse}"); + tex_end_diagnostic(); + } else if (tracing_ifs) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + do { + tex_get_next(); + } while (cur_cmd == spacer_cmd); + if (lmt_condition_state.if_limit == if_code) { + if (cur_cmd == if_test_cmd && cur_chr >= first_real_if_test_code) { + goto OKAY; + } + tex_handle_error( + normal_error_type, + unless ? "No condition after \\orunless" : "No condition after \\orelse", + "I'd expected a proper if test command." + ); + OKAY: + lmt_input_state.scanner_status = status; + return unless; + } + } else { + --level; + } + break; + default: + ++level; + break; + } + } + } +} + +/*tex + + When we begin to process a new |\if|, we set |if_limit = if_code|; then, if |\or| or |\else| or + |\fi| occurs before the current |\if| condition has been evaluated, |\relax| will be inserted. + For example, a sequence of commands like |\ifvoid 1 \else ... \fi| would otherwise require + something after the |1|. + + When a conditional ends that was apparently started in a different input file, the |if_warning| + procedure is invoked in order to update the |if_stack|. If moreover |\tracingnesting| is + positive we want to give a warning message (with the same complications as above). + +*/ + +static void tex_aux_if_warning(void) +{ + /*tex Do we need a warning? */ + int warning = 0; + int index = lmt_input_state.in_stack_data.ptr; + lmt_input_state.base_ptr = lmt_input_state.input_stack_data.ptr; + /*tex Store current state. */ + lmt_input_state.input_stack[lmt_input_state.base_ptr] = lmt_input_state.cur_input; + while (lmt_input_state.in_stack[index].if_ptr == lmt_condition_state.cond_ptr) { + /*tex Set variable |w| to. */ + if (tracing_nesting_par > 0) { + while ((lmt_input_state.input_stack[lmt_input_state.base_ptr].state == token_list_state) || (lmt_input_state.input_stack[lmt_input_state.base_ptr].index > index)) { + --lmt_input_state.base_ptr; + } + if (lmt_input_state.input_stack[lmt_input_state.base_ptr].name > 17) { + warning = 1; + } + } + lmt_input_state.in_stack[index].if_ptr = node_next(lmt_condition_state.cond_ptr); + --index; + } + if (warning) { + tex_begin_diagnostic(); + tex_print_format("[conditional: end of %C%L of a different file]", if_test_cmd, lmt_condition_state.cur_if, lmt_condition_state.if_line); + tex_end_diagnostic(); + if (tracing_nesting_par > 1) { + tex_show_context(); + } + if (lmt_error_state.history == spotless) { + lmt_error_state.history = warning_issued; + } + } +} + +static void tex_aux_push_condition_stack(int code, int unless) +{ + halfword p = tex_get_node(if_node_size); + node_type(p) = if_node; + node_subtype(p) = 0; + node_next(p) = lmt_condition_state.cond_ptr; + if_limit_type(p) = (quarterword) lmt_condition_state.if_limit; + if_limit_subtype(p) = (quarterword) lmt_condition_state.cur_if; + if_limit_step(p) = (singleword) lmt_condition_state.cur_unless; + if_limit_unless(p) = (singleword) lmt_condition_state.if_unless; + if_limit_stepunless(p) = (singleword) lmt_condition_state.if_unless; + if_limit_line(p) = lmt_condition_state.if_line; + lmt_condition_state.cond_ptr = p; + lmt_condition_state.cur_if = cur_chr; + lmt_condition_state.cur_unless = unless; + lmt_condition_state.if_step = code; + lmt_condition_state.if_limit = if_code; + lmt_condition_state.if_line = lmt_input_state.input_line; + ++lmt_condition_state.if_nesting; +} + +static void tex_aux_pop_condition_stack(void) +{ + halfword p; + if (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].if_ptr == lmt_condition_state.cond_ptr) { + /*tex + Conditionals are possibly not properly nested with files. This test can become an + option. + */ + tex_aux_if_warning(); + } + p = lmt_condition_state.cond_ptr; + --lmt_condition_state.if_nesting; + lmt_condition_state.if_line = if_limit_line(p); + lmt_condition_state.cur_if = if_limit_subtype(p); + lmt_condition_state.cur_unless = if_limit_unless(p); + lmt_condition_state.if_step = if_limit_step(p); + lmt_condition_state.if_unless = if_limit_stepunless(p); + lmt_condition_state.if_limit = if_limit_type(p); + lmt_condition_state.cond_ptr = node_next(p); + tex_free_node(p, if_node_size); +} + +/*tex + Here's a procedure that changes the |if_limit| code corresponding to a given value of + |cond_ptr|. +*/ + +inline static void tex_aux_change_if_limit(int l, halfword p) +{ + if (p == lmt_condition_state.cond_ptr) { + lmt_condition_state.if_limit = l; + } else { + halfword q = lmt_condition_state.cond_ptr; + while (q) { + if (node_next(q) == p) { + if_limit_type(q) = (quarterword) l; + return; + } else { + q = node_next(q); + } + } + tex_confusion("if"); + } +} + +/*tex + + The conditional|\ifcsname| is equivalent to |\expandafter| |\expandafter| |\ifdefined| + |\csname|, except that no new control sequence will be entered into the hash table (once all + tokens preceding the mandatory |\endcsname| have been expanded). Because we have \UTF 8, we + find plenty of small helpers that are used in conversion. + + A csname resolve can itself have nested csname resolving. We keep track of the nesting level + and also remember the last match. + +*/ + +/* moved to texexpand */ + +/*tex + + An active character will be treated as category 13 following |\if \noexpand| or following + |\ifcat \noexpand|. + +*/ + +static void tex_aux_get_x_token_or_active_char(void) +{ + tex_get_x_token(); + // if (cur_cmd == relax_cmd && cur_chr == no_expand_flag && tex_is_active_cs(cs_text(cur_cs))) { + if (cur_cmd == relax_cmd && cur_chr == no_expand_relax_code && tex_is_active_cs(cs_text(cur_cs))) { + cur_cmd = active_char_cmd; + cur_chr = active_cs_value(cs_text(cur_tok - cs_token_flag)); + } +} + +/*tex + + A condition is started when the |expand| procedure encounters an |if_test| command; in that + case |expand| reduces to |conditional|, which is a recursive procedure. + +*/ + +static void tex_aux_missing_equal_error(int code) +{ + tex_handle_error(back_error_type, "Missing = inserted for %C", if_test_cmd, code, + "I was expecting to see '<', '=', or '>'. Didn't." + ); +} + +/*tex + + This is an important function because a bit larger macro package does lots of testing. Compared + to regular \TEX\ there is of course the penalty of larger data structures but there's not much + we can do about that. Then there are more variants, which in turn can lead to a performance hit + as there is more to test and more code involved, which might influence cache hits and such. + However, I already optimized the \LUATEX\ code a bit and here there are some more tiny potential + speedups. But \unknown\ they are hard to measure and especially their impact on a normal run: + \TEX\ is already pretty fast and often these tests themselves are not biggest bottleneck, at + least not in \CONTEXT. My guess is that the speedups compensate the extra if tests so in the end + we're still okay. Expansion, pushing back tokens, accessing memory all over the place, excessive + use of \LUA\ \unknown\ all that has probably way more impact on a run. But I keep an eye on the + next one anyway. + +*/ + +static void tex_aux_show_if_state(halfword code, halfword case_value) +{ + tex_begin_diagnostic(); + switch (code) { + case if_chk_int_code : tex_print_format("{chknum %i}", case_value); break; + case if_val_int_code : tex_print_format("{numval %i}", case_value); break; + case if_cmp_int_code : tex_print_format("{cmpnum %i}", case_value); break; + case if_chk_dim_code : tex_print_format("{chkdim %i}", case_value); break; + case if_val_dim_code : tex_print_format("{dimval %i}", case_value); break; + case if_cmp_dim_code : tex_print_format("{cmpdim %i}", case_value); break; + case if_case_code : tex_print_format("{case %i}", case_value); break; + case if_math_style_code: tex_print_format("{mathstyle %i}", case_value); break; + case if_arguments_code : tex_print_format("{arguments %i}", case_value); break; + default : tex_print_format("{todo %i}", case_value); break; + } + tex_end_diagnostic(); +} + +/*tex Why do we skip over relax? */ + +inline static halfword tex_aux_grab_toks(int expand, int expandlist, int *head) +{ + halfword p = null; + if (expand) { + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd); + } else { + do { + tex_get_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd); + } + switch (cur_cmd) { + case left_brace_cmd: + p = expandlist ? tex_scan_toks_expand(1, NULL, 0) : tex_scan_toks_normal(1, NULL); + *head = p; + break; + case register_cmd: + /* is this okay? probably not as cur_val can be way to large */ + if (cur_chr == tok_val_level) { + halfword n = tex_scan_toks_register_number(); + p = eq_value(register_toks_location(n)); + break; + } else { + goto DEFAULT; + } + case internal_toks_cmd: + case register_toks_cmd: + p = eq_value(cur_chr); + break; + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + p = eq_value(cur_cs); + break; + default: + DEFAULT: + { + halfword n; + tex_back_input(cur_tok); + n = tex_scan_toks_register_number(); + p = eq_value(register_toks_location(n)); + break; + } + } + /* skip over the ref count */ + return p ? token_link(p) : null; +} + +inline static halfword tex_aux_scan_comparison(int code) +{ + halfword r; + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + r = cur_tok - other_token; + if ((r < '<') || (r > '>')) { + tex_aux_missing_equal_error(code); + return '='; + } else { + return r; + } +} + +void tex_conditional_if(halfword code, int unless) +{ + /*tex The result or case value. */ + int result = 0; + /*tex The |cond_ptr| corresponding to this conditional: */ + halfword save_cond_ptr; + /*tex Tracing options */ + int tracing_ifs = tracing_ifs_par > 0; + int tracing_commands = tracing_commands_par; + int tracing_both = tracing_ifs && (tracing_commands <= 1); + if (tracing_both) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + tex_aux_push_condition_stack(code, unless); + save_cond_ptr = lmt_condition_state.cond_ptr; + /*tex Either process |\ifcase| or set |b| to the value of a boolean condition. */ + HERE: + /*tex We can get back here so we need to make sure result is always set! */ + lmt_condition_state.if_step = code; + lmt_condition_state.if_unless = unless; + switch (code) { + case if_char_code: + case if_cat_code: + /*tex Test if two characters match. Seldom used, this one. */ + { + halfword n, m; + tex_aux_get_x_token_or_active_char(); + if ((cur_cmd > active_char_cmd) || (cur_chr > max_character_code)) { + /*tex It's not a character. */ + m = relax_cmd; + n = relax_code; + } else { + m = cur_cmd; + n = cur_chr; + } + tex_aux_get_x_token_or_active_char(); + if ((cur_cmd > active_char_cmd) || (cur_chr > max_character_code)) { + cur_cmd = relax_cmd; + cur_chr = relax_code; + } + if (code == if_char_code) { + result = (n == cur_chr); + } else { + result = (m == cur_cmd); + } + } + goto RESULT; + case if_abs_int_code: + case if_int_code: + /*tex + Test the relation between integers or dimensions. Here we use the fact that |<|, + |=|, and |>| are consecutive ASCII codes. + */ + { + halfword n1 = tex_scan_int(0, NULL); + halfword cp = tex_aux_scan_comparison(code); + halfword n2 = tex_scan_int(0, NULL); + if (code == if_abs_int_code) { + if (n1 < 0) { + n1 = -n1; + } + if (n2 < 0) { + n2 = -n2; + } + } + switch (cp) { + case '<': result = (n1 < n2); break; + /* case '=': result = (n1 == n2); break; */ + case '>': result = (n1 > n2); break; + /* default: break; */ + default : result = (n1 == n2); break; + } + } + goto RESULT; + case if_abs_dim_code: + case if_dim_code: + /*tex + Test the relation between integers or dimensions. Here we use the fact that |<|, + |=|, and |>| are consecutive ASCII codes. + */ + { + scaled n1 = tex_scan_dimen(0, 0, 0, 0, NULL); + halfword cp = tex_aux_scan_comparison(code); + scaled n2 = tex_scan_dimen(0, 0, 0, 0, NULL); + if (code == if_abs_dim_code) { + if (n1 < 0) { + n1 = -n1; + } + if (n2 < 0) { + n2 = -n2; + } + } + switch (cp) { + case '<': result = (n1 < n2); break; + /* case '=': result = (n1 == n2); break; */ + case '>': result = (n1 > n2); break; + /* default: break; */ + default : result = (n1 == n2); break; + } + } + goto RESULT; + case if_odd_code: + /*tex Test if an integer is odd. */ + { + halfword v = tex_scan_int(0, NULL); + result = odd(v); + } + goto RESULT; + case if_vmode_code: + result = abs(cur_list.mode) == vmode; + goto RESULT; + case if_hmode_code: + result = abs(cur_list.mode) == hmode; + goto RESULT; + case if_mmode_code: + result = abs(cur_list.mode) == mmode; + goto RESULT; + case if_inner_code: + result = cur_list.mode < nomode; + goto RESULT; + case if_void_code: + { + halfword n = tex_scan_box_register_number(); + result = box_register(n) == null; + } + goto RESULT; + case if_hbox_code: + { + halfword n = tex_scan_box_register_number(); + halfword p = box_register(n); + result = p && (node_type(p) == hlist_node); + } + goto RESULT; + case if_vbox_code: + { + halfword n = tex_scan_box_register_number(); + halfword p = box_register(n); + result = p && (node_type(p) == vlist_node); + } + goto RESULT; + case if_tok_code: + case if_cstok_code: + { + halfword pp = null; + halfword qq = null; + halfword p, q; + int expand = code == if_tok_code; + int save_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + p = tex_aux_grab_toks(expand, 1, &pp); + q = tex_aux_grab_toks(expand, 1, &qq); + if (p == q) { + /* this is sneaky, a list always is different */ + result = 1; + } else { + while (p && q) { + if (token_info(p) != token_info(q)) { + p = null; + break; + } else { + p = token_link(p); + q = token_link(q); + } + } + result = (! p) && (! q); + } + if (pp) { + tex_flush_token_list(pp); + } + if (qq) { + tex_flush_token_list(qq); + } + lmt_input_state.scanner_status = save_scanner_status; + } + goto RESULT; + case if_x_code: + { + /*tex + Test if two tokens match. Note that |\ifx| will declare two macros different + if one is |\long| or |\outer| and the other isn't, even though the texts of + the macros are the same. + + We need to reset |scanner_status|, since |\outer| control sequences are + allowed, but we might be scanning a macro definition or preamble. + + This is no longer true as we dropped these properties but it does apply to + protected macros and such. + */ + halfword p, q, n; + int save_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + tex_get_next(); + n = cur_cs; + p = cur_cmd; + q = cur_chr; + tex_get_next(); + if (cur_cmd != p) { + result = 0; + } else if (cur_cmd < call_cmd) { + result = cur_chr == q; + } else { + /*tex + Test if two macro texts match. Note also that |\ifx| decides that macros + |\a| and |\b| are different in examples like this: + + \starttyping + \def\a{\c} \def\c{} + \def\b{\d} \def\d{} + \stoptyping + */ + p = token_link(cur_chr); + /*tex Omit reference counts. */ + q = token_link(eq_value(n)); + // is: q = token_link(q); + if (p == q) { + result = 1; + /* + } else if (! q) { + result = 0; + */ + } else { + while (p && q) { + if (token_info(p) != token_info(q)) { + p = null; + break; + } else { + p = token_link(p); + q = token_link(q); + } + } + result = (! p) && (! q); + } + } + lmt_input_state.scanner_status = save_scanner_status; + } + goto RESULT; + case if_true_code: + result = 1; + goto RESULT; + case if_false_code: + result = 0; + goto RESULT; + case if_chk_int_code: + { + lmt_error_state.intercept = 1; /* maybe ++ and -- so that we can nest */ + lmt_error_state.last_intercept = 0; + lmt_condition_state.chk_num = tex_scan_int(0, NULL); /* value is ignored */ + result = lmt_error_state.last_intercept ? 2 : 1; + lmt_error_state.intercept = 0; + lmt_error_state.last_intercept = 0; + goto CASE; + } + case if_val_int_code: + { + lmt_error_state.intercept = 1; + lmt_error_state.last_intercept = 0; + lmt_condition_state.chk_num = tex_scan_int(0, NULL); + result = lmt_error_state.last_intercept ? 4 : (lmt_condition_state.chk_num < 0) ? 1 : (lmt_condition_state.chk_num > 0) ? 3 : 2; + lmt_error_state.intercept = 0; + lmt_error_state.last_intercept = 0; + goto CASE; + } + case if_cmp_int_code: + { + halfword n1 = tex_scan_int(0, NULL); + halfword n2 = tex_scan_int(0, NULL); + result = (n1 < n2) ? 0 : (n1 > n2) ? 2 : 1; + goto CASE; + } + case if_chk_dim_code: + { + lmt_error_state.intercept = 1; + lmt_error_state.last_intercept = 0; + lmt_condition_state.chk_dim = tex_scan_dimen(0, 0, 0, 0, NULL); /* value is ignored */ + result = lmt_error_state.last_intercept ? 2 : 1; + lmt_error_state.intercept = 0; + lmt_error_state.last_intercept = 0; + goto CASE; + } + case if_val_dim_code: + { + lmt_error_state.intercept = 1; + lmt_error_state.last_intercept = 0; + lmt_condition_state.chk_dim = tex_scan_dimen(0, 0, 0, 0, NULL); + result = lmt_error_state.last_intercept ? 4 : (lmt_condition_state.chk_dim < 0) ? 1 : (lmt_condition_state.chk_dim > 0) ? 3 : 2; + lmt_error_state.intercept = 0; + lmt_error_state.last_intercept = 0; + goto CASE; + } + case if_cmp_dim_code: + { + scaled n1 = tex_scan_dimen(0, 0, 0, 0, NULL); + scaled n2 = tex_scan_dimen(0, 0, 0, 0, NULL); + result = (n1 < n2) ? 0 : (n1 > n2) ? 2 : 1; + goto CASE; + } + case if_case_code: + /*tex Select the appropriate case and |return| or |goto common_ending|. */ + result = tex_scan_int(0, NULL); + goto CASE; + case if_def_code: + /*tex + The conditional |\ifdefined| tests if a control sequence is defined. We need to + reset |scanner_status|, since |\outer| control sequences are allowed, but we + might be scanning a macro definition or preamble. + */ + { + int save_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + tex_get_next(); + result = cur_cmd != undefined_cs_cmd; + lmt_input_state.scanner_status = save_scanner_status; + goto RESULT; + } + case if_cs_code: + result = tex_is_valid_csname(); + goto RESULT; + case if_in_csname_code: + /*tex This one will go away. */ + result = lmt_expand_state.cs_name_level; + goto RESULT; + case if_font_char_code: + /*tex The conditional |\iffontchar| tests the existence of a character in a font. */ + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword chr = tex_scan_char_number(0); + result = tex_char_exists(fnt, chr); + } + goto RESULT; + case if_condition_code: + /*tex This can't happen! */ + goto RESULT; + case if_flags_code: + { + singleword flag, fl; + tex_get_r_token(); + flag = eq_flag(cur_cs); + /* todo: each prefix */ + tex_get_token(); + if (cur_cmd == prefix_cmd) { + switch (cur_chr) { + case permanent_code : result = is_permanent (flag); break; + case immutable_code : result = is_immutable (flag); break; + case mutable_code : result = is_mutable (flag); break; + case noaligned_code : result = is_noaligned (flag); break; + case instance_code : result = is_instance (flag); break; + case untraced_code : result = is_untraced (flag); break; + case global_code : result = is_global (flag); break; + case tolerant_code : result = is_tolerant (flag); break; + case protected_code : result = is_protected (flag); break; + case overloaded_code : result = is_overloaded (flag); break; + case aliased_code : result = is_aliased (flag); break; + case immediate_code : result = is_immediate (flag); break; + case semiprotected_code : result = is_semiprotected(flag); break; + } + } else { + tex_back_input(cur_tok); + fl = (singleword) tex_scan_int(1, NULL); /* maybe some checking or masking is needed here */ + result = (flag & fl) == fl; + if (! result) { + if (is_protected(fl)) { + result = is_protected_cmd(eq_type(cur_cs)); + } else if (is_tolerant(fl)) { + result = is_tolerant_cmd(eq_type(cur_cs)); + } else if (is_global(fl)) { + result = eq_level(cur_cs) == level_one; + } + } + } + goto RESULT; + } + case if_empty_cmd_code: + { + tex_get_token(); + EMPTY_CHECK_AGAIN: + switch (cur_cmd) { + case call_cmd: + result = ! token_link(cur_chr); + break; + case internal_toks_reference_cmd: + case register_toks_reference_cmd: + result = ! token_link(cur_chr); + break; + case register_cmd: + /*tex See |tex_aux_grab_toks|. */ + if (cur_chr == tok_val_level) { + halfword n = tex_scan_toks_register_number(); + halfword p = eq_value(register_toks_location(n)); + result = ! p || ! token_link(p); + } else { + result = 0; + } + break; + case internal_toks_cmd: + case register_toks_cmd: + { + halfword p = eq_value(cur_chr); + result = ! p || ! token_link(p); + } + break; + case cs_name_cmd: + if (cur_chr == last_named_cs_code && lmt_scanner_state.last_cs_name != null_cs) { + cur_cmd = eq_type(lmt_scanner_state.last_cs_name); + cur_chr = eq_value(lmt_scanner_state.last_cs_name); + goto EMPTY_CHECK_AGAIN; + } + /* fall through */ + default: + result = 0; + } + goto RESULT; + } + case if_relax_cmd_code: + { + tex_get_token(); + result = cur_cmd == relax_cmd; + goto RESULT; + } + case if_boolean_code: + result = tex_scan_int(0, NULL) ? 1 : 0; + goto RESULT; + case if_numexpression_code: + result = tex_scanned_expression(int_val_level) ? 1 : 0; + goto RESULT; + case if_dimexpression_code: + result = tex_scanned_expression(dimen_val_level) ? 1 : 0; + goto RESULT; + case if_math_parameter_code: + /*tex + A value of |1| means that the parameter is set to a non-zero value, while |2| means + that it is unset. + */ + { + // result = 0; + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd == set_math_parameter_cmd) { + int code = cur_chr; + int style = tex_scan_math_style_identifier(0, 0); + if (tex_get_math_parameter(style, code, NULL) == max_dimen) { + result = 2; + } else if (result) { + result = 1; + } + } else { + tex_normal_error("mathparameter", "a valid parameter expected"); + result = 0; + } + goto CASE; + } + case if_math_style_code: + result = tex_current_math_style(); + goto CASE; + case if_arguments_code: + result = lmt_expand_state.arguments; + goto CASE; + case if_parameters_code: + /*tex + The result has the last non-null count. We could have the test in the for but let's + keep it readable. + */ + result = tex_get_parameter_count(); + goto CASE; + case if_parameter_code: + { + /*tex + We need to pick up the next token but avoid replacement by the parameter which + happens in the getters: 0 = no parameter, 1 = okay, 2 = empty. This permits + usage like |\ifparameter#2\or yes\else no\fi| as with the other checkers. + */ + if (lmt_input_state.cur_input.loc) { + halfword t = token_info(lmt_input_state.cur_input.loc); + lmt_input_state.cur_input.loc = token_link(lmt_input_state.cur_input.loc); + if (t < cs_token_flag && token_cmd(t) == parameter_reference_cmd) { + // result = token_info(input_state.parameter_stack[input_state.cur_input.parameter_start + token_chr(t) - 1]) != null ? 1 : 2; + result = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + token_chr(t) - 1] != null ? 1 : 2; + } + } + goto CASE; + } + case if_has_tok_code: + { + halfword qq = null; + halfword p, q; + int save_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + p = tex_get_token(); + q = tex_aux_grab_toks(0, 0, &qq); + if (p == q) { + result = 1; + } else { + result = 0; + while (q) { + if (p == token_info(q)) { + result = 1; + break; + } else { + q = token_link(q); + } + } + } + if (qq) { + tex_flush_token_list(qq); + } + lmt_input_state.scanner_status = save_scanner_status; + goto RESULT; + } + case if_has_toks_code: + case if_has_xtoks_code: + { + halfword pp = null; + halfword p; + int expand = code == if_has_xtoks_code; + int save_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + p = tex_aux_grab_toks(expand, expand, &pp); + if (p) { + halfword qq = null; + halfword q = tex_aux_grab_toks(expand, expand, &qq); + if (p == q) { + result = 1; + } else { + int qh = q; + int ph = p; + result = 0; + while (p && q) { + halfword pt = token_info(p); + halfword qt = token_info(q); + AGAIN: + if (pt == qt) { + p = token_link(p); + q = token_link(q); + } else if (token_cmd(pt) == ignore_cmd + && token_cmd(qt) >= ignore_cmd && token_cmd(qt) <= other_char_cmd) { + p = token_link(p); + if (token_chr(pt) == token_chr(qt)) { + q = token_link(q); + } else { + pt = token_info(p); + goto AGAIN; + } + } else { + p = ph; + q = token_link(qh); + qh = q; + } + if (! p) { + result = 1; + break; + } + } + } + if (qq) { + tex_flush_token_list(qq); + } + } + if (pp) { + tex_flush_token_list(pp); + } + lmt_input_state.scanner_status = save_scanner_status; + goto RESULT; + } + case if_has_char_code: + { + halfword tok; + halfword qq = null; + halfword q; + int save_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + tok = tex_get_token(); + q = tex_aux_grab_toks(0, 0, &qq); + if (q) { + int nesting = 0; + result = 0; + while (q) { + if (! nesting && token_info(q) == tok) { + result = 1; + break; + } else if (token_cmd(token_info(q)) == left_brace_cmd) { + nesting += 1; + } else if (token_cmd(token_info(q)) == right_brace_cmd) { + nesting -= 1; + } + q = token_link(q); + } + } + if (qq) { + tex_flush_token_list(qq); + } + lmt_input_state.scanner_status = save_scanner_status; + goto RESULT; + } + case if_insert_code: + { + /* beware: it tests */ + result = ! tex_insert_is_void(tex_scan_int(0, NULL)); + goto RESULT; + } + // case if_bitwise_and_code: + // { + // halfword n1 = scan_int(0, NULL); + // halfword n2 = scan_int(0, NULL); + // result = n1 & n2 ? 1 : 0; + // goto RESULT; + // } + default: + { + int class; + strnumber u = tex_save_cur_string(); + int save_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + lmt_token_state.luacstrings = 0; + class = lmt_function_call_by_class(code - last_if_test_code, 0, &result); + tex_restore_cur_string(u); + lmt_input_state.scanner_status = save_scanner_status; + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + /* bad */ + } + switch (class) { + case lua_value_integer_code: + case lua_value_cardinal_code: + case lua_value_dimension_code: + goto CASE; + case lua_value_boolean_code: + goto RESULT; + default: + result = 0; + goto RESULT; + } + } + } + CASE: + /*tex + To be considered: |if (unless) { result = max_integer - result; }| so that we hit |\else| + and can do |\unless \ifcase \zero... \else \fi|. + */ + if (tracing_commands > 1) { + tex_aux_show_if_state(code, result); + } + while (result) { + unless = tex_aux_pass_text_x(tracing_ifs, tracing_commands); + if (tracing_both) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + if (lmt_condition_state.cond_ptr == save_cond_ptr) { + if (cur_chr >= first_real_if_test_code) { + /*tex + We have an |or_else_cmd| here, but keep in mind that |\expandafter \ifx| and + |\unless \ifx| and |\ifcondition| don't work in such cases! We stay in this + function call. + */ + if (cur_chr == if_condition_code) { + // goto COMMON_ENDING; + tex_aux_pop_condition_stack(); + return; + } else { + code = cur_chr; + goto HERE; + } + } else if (cur_chr == or_code) { + --result; + } else { + goto COMMON_ENDING; + } + } else if (cur_chr == fi_code) { + tex_aux_pop_condition_stack(); + } + } + tex_aux_change_if_limit(or_code, save_cond_ptr); + /*tex Wait for |\or|, |\else|, or |\fi|. */ + return; + RESULT: + if (unless) { + result = ! result; + } + if (tracing_commands > 1) { + /*tex Display the value of |b|. */ + tex_begin_diagnostic(); + tex_print_str(result ? "{true}" : "{false}"); + tex_end_diagnostic(); + } + if (result) { + tex_aux_change_if_limit(else_code, save_cond_ptr); + /*tex Wait for |\else| or |\fi|. */ + return; + } else { + /*tex + Skip to |\else| or |\fi|, then |goto common_ending|. In a construction like |\if \iftrue + abc\else d\fi|, the first |\else| that we come to after learning that the |\if| is false + is not the |\else| we're looking for. Hence the following curious logic is needed. + */ + while (1) { + unless = tex_aux_pass_text_x(tracing_ifs, tracing_commands); + if (tracing_both) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + if (lmt_condition_state.cond_ptr == save_cond_ptr) { + /* still fragile for |\unless| and |\expandafter| etc. */ + if (cur_chr >= first_real_if_test_code) { + if (cur_chr == if_condition_code) { + // goto COMMON_ENDING; + tex_aux_pop_condition_stack(); + return; + } else { + code = cur_chr; + goto HERE; + } + } else if (cur_chr != or_code) { + goto COMMON_ENDING; + } else { + tex_handle_error( + normal_error_type, + "Extra \\or", + "I'm ignoring this; it doesn't match any \\if." + ); + } + } else if (cur_chr == fi_code) { + tex_aux_pop_condition_stack(); + } + } + } + COMMON_ENDING: + if (cur_chr == fi_code) { + tex_aux_pop_condition_stack(); + } else { + /*tex Wait for |\fi|. */ +//lmt_condition_state.if_step = code; + + lmt_condition_state.if_limit = fi_code; + } +} + +/*tex + Terminate the current conditional and skip to |\fi| The processing of conditionals is complete + except for the following code, which is actually part of |expand|. It comes into play when + |\or|, |\else|, or |\fi| is scanned. +*/ + +void tex_conditional_fi_or_else(void) +{ + int tracing_ifs = tracing_ifs_par > 0; + if (tracing_ifs && tracing_commands_par <= 1) { + tex_show_cmd_chr(if_test_cmd, cur_chr); + } + if (cur_chr == or_else_code || cur_chr == or_unless_code) { + do { + tex_get_next(); + } while (cur_cmd == spacer_cmd); + } else if (cur_chr > lmt_condition_state.if_limit) { + if (lmt_condition_state.if_limit == if_code) { + /*tex + The condition is not yet evaluated. + */ + tex_insert_relax_and_cur_cs(); + } else { + tex_handle_error(normal_error_type, + "Extra %C", + if_test_cmd, cur_chr, + "I'm ignoring this; it doesn't match any \\if." + ); + } + /*tex We don't pop the stack! */ + return; + } + /*tex Skip to |\fi|. */ + while (! (cur_cmd == if_test_cmd && cur_chr == fi_code)) { + tex_aux_pass_text(); + if (tracing_ifs) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + } + /*tex Inline variant: */ + /* + if (! (cur_cmd == if_test_cmd && cur_chr == fi_code)) { + int level = 0; + int status = input_state.scanner_status; + input_state.scanner_status = scanner_is_skipping; + while (1) { + RESTART: + condition_state.skip_line = input_state.input_line; + while (1) { + get_next(); + if (cur_cmd == if_test_cmd) { + switch (cur_chr) { + case fi_code: + if (level == 0) { + goto DONE; + } else { + --level; + break; + } + case else_code: + case or_code: + if (level == 0) { + if (tracing_ifs) { + show_cmd_chr(cur_cmd, cur_chr); + } + goto RESTART; + } else { + break; + } + case or_else_code: + do { + get_next(); + } while (cur_cmd == spacer_cmd); + break; + default: + ++level; + break; + } + } + } + } + DONE: + if (tracing_ifs) { + show_cmd_chr(cur_cmd, cur_chr); + } + input_state.scanner_status = status; + } + */ + tex_aux_pop_condition_stack(); +} + +/*tex + + Negate a boolean conditional and |goto reswitch|. The result of a boolean condition is reversed + when the conditional is preceded by |\unless|. We silently ignore |\unless| for those tests that + act like an |\ifcase|. In \ETEX\ there was an error message. + +*/ + +void tex_conditional_unless(void) +{ + tex_get_token(); + if (cur_cmd == if_test_cmd) { + if (tracing_commands_par > 1) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + if (cur_chr != if_condition_code) {; + tex_conditional_if(cur_chr, 1); + } + } else { + tex_handle_error(back_error_type, + "You can't use '\\unless' before '%C'", + cur_cmd, cur_chr, + "Continue, and I'll forget that it ever happened." + ); + } +} + +void tex_show_ifs(void) +{ + if (lmt_condition_state.cond_ptr) { + /*tex First we determine the of |\if ... \fi| nesting. */ + int n = 0; + { + /*tex We start at the tail of a token list to show. */ + halfword p = lmt_condition_state.cond_ptr; + do { + ++n; + p = node_next(p); + } while (p); + } + /*tex Now reporting can start. */ + { + halfword cond_ptr = lmt_condition_state.cond_ptr; + int cur_if = lmt_condition_state.cur_if; + int cur_unless = lmt_condition_state.cur_unless; + int if_step = lmt_condition_state.if_step; + int if_unless = lmt_condition_state.if_unless; + int if_line = lmt_condition_state.if_line; + int if_limit = lmt_condition_state.if_limit; + do { + if (cur_unless) { + if (if_line) { + tex_print_format("[conditional: level %i, current %C %C, limit %C, %sstep %C, line %i]", + n, + expand_after_cmd, expand_unless_code, + if_test_cmd, cur_if, + if_test_cmd, if_limit, + if_unless ? "unless " : "", + if_test_cmd, if_step, + if_line + ); + } else { + tex_print_format("[conditional: level %i, current %C %C, limit %C, %sstep %C]", + n, + expand_after_cmd, expand_unless_code, + if_test_cmd, cur_if, + if_test_cmd, if_limit, + if_unless ? "unless " : "", + if_test_cmd, if_step + ); + } + } else { + if (if_line) { + tex_print_format("[conditional: level %i, current %C, limit %C, %sstep %C, line %i]", + n, + if_test_cmd, cur_if, + if_test_cmd, if_limit, + if_unless ? "unless " : "", + if_test_cmd, if_step, + if_line + ); + } else { + tex_print_format("[conditional: level %i, current %C, limit %C, %sstep %C]", + n, + if_test_cmd, cur_if, + if_test_cmd, if_limit, + if_unless ? "unless " : "", + if_test_cmd, if_step + ); + } + } + --n; + cur_if = if_limit_subtype(cond_ptr); + cur_unless = if_limit_unless(cond_ptr);; + if_step = if_limit_step(cond_ptr);; + if_unless = if_limit_stepunless(cond_ptr);; + if_line = if_limit_line(cond_ptr);; + if_limit = if_limit_type(cond_ptr);; + cond_ptr = node_next(cond_ptr); + if (cond_ptr) { + tex_print_levels(); + } + } while (cond_ptr); + } + } else { + tex_print_str("[conditional: none active]"); + } +} + +/* +void tex_conditional_after_fi(void) +{ + halfword t = get_token(); + int tracing_ifs = tracing_ifs_par > 0; + int tracing_commands = tracing_commands_par > 0; + while (1) { + pass_text_x(tracing_ifs, tracing_commands); + if (cur_chr == fi_code) { + pop_condition_stack(); + break; + } else { + // some error + } + } + back_input(t); +} +*/
\ No newline at end of file diff --git a/source/luametatex/source/tex/texconditional.h b/source/luametatex/source/tex/texconditional.h new file mode 100644 index 000000000..f3de5dcdb --- /dev/null +++ b/source/luametatex/source/tex/texconditional.h @@ -0,0 +1,131 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_CONDITIONAL_H +# define LMT_CONDITIONAL_H + +/*tex + + The next list should be in sync with |if_branch_mapping| at the top of the |c| file with the + same name. The next ones also go on the condition stack so we need to retain this order and + organization. + + There is a catch here: the codes of the |if_test_cmd|, |fi_or_else_cmd| and |or_else_cmd| are + all in this enumeration. This has to do with the history of not always checking for the cmd + code in the fast skipping branches. We could change that but not now. + + Well, in the end I combined |if_test_cmd|, |fi_or_else_cmd| and |or_else_cmd| because they use + the same chr range anyway and it also simplifies some of the testing (especially after some + more robust cmd/chr checking was added, and after that the |fi_or_else_cmd| and |or_else_cmd| + were combined. The main motivation is that we can have a more consistent \LUA\ token interface + end. It is debatable as we divert from the original, but we already did that by introducing + more conditionals, |\orelse| and the generic |\ifconditional| that also demandeed all kind of + adaptations. Sorry. The comments are mostly the same, including references to the older cmd + codes (pre 2.07 there used to be some switch/case statements in places but these were flattened). + + Btw, the |\unless| prefix is kept out of this because it relates to expansion and prefixes are + separate anyway. It would make the code less pretty. + + One reason for a split in cmd codes is performance but we didn't loose on the change. + +*/ + +typedef enum if_test_codes { + /*tex These are private chr codes: */ + + no_if_code, /*tex We're not in a condition. */ + if_code, /*tex We have a condition. */ + + /*tex These are public chr codes: */ + + fi_code, /*tex |\fi| */ + else_code, /*tex |\else| */ + or_code, /*tex |\or| */ + or_else_code, /*tex |\orelse| */ + or_unless_code, /*tex |\orunless| */ + + /*tex Here come the \if... codes: */ + + if_char_code, /*tex |\if| */ + if_cat_code, /*tex |\ifcat| */ + if_abs_int_code, /*tex |\ifabsnum| */ + if_int_code, /*tex |\ifnum| */ + if_abs_dim_code, /*tex |\ifabsdim| */ + if_dim_code, /*tex |\ifdim| */ + if_odd_code, /*tex |\ifodd| */ + if_vmode_code, /*tex |\ifvmode| */ + if_hmode_code, /*tex |\ifhmode| */ + if_mmode_code, /*tex |\ifmmode| */ + if_inner_code, /*tex |\ifinner| */ + if_void_code, /*tex |\ifvoid| */ + if_hbox_code, /*tex |\ifhbox| */ + if_vbox_code, /*tex |\ifvbox| */ + if_tok_code, /*tex |\iftok| */ + if_cstok_code, /*tex |\ifcstok| */ + if_x_code, /*tex |\ifx| */ + if_true_code, /*tex |\iftrue| */ + if_false_code, /*tex |\iffalse| */ + if_chk_int_code, /*tex |\ifchknum| */ + if_val_int_code, /*tex |\ifcmpnum| */ + if_cmp_int_code, /*tex |\ifcmpnum| */ + if_chk_dim_code, /*tex |\ifchkdim| */ + if_val_dim_code, /*tex |\ifchkdim| */ + if_cmp_dim_code, /*tex |\ifcmpdim| */ + if_case_code, /*tex |\ifcase| */ + if_def_code, /*tex |\ifdefined| */ + if_cs_code, /*tex |\ifcsname| */ + if_in_csname_code, /*tex |\ifincsname| */ + if_font_char_code, /*tex |\iffontchar| */ + if_condition_code, /*tex |\ifcondition| */ + if_flags_code, /*tex |\ifflags| */ + if_empty_cmd_code, /*tex |\ifempty| */ + if_relax_cmd_code, /*tex |\ifrelax| */ + if_boolean_code, /*tex |\ifboolean| */ + if_numexpression_code, /*tex |\ifnumexpression| */ + if_dimexpression_code, /*tex |\ifdimexpression| */ + if_math_parameter_code, /*tex |\ifmathparameter| */ + if_math_style_code, /*tex |\ifmathstyle| */ + if_arguments_code, /*tex |\ifarguments| */ + if_parameters_code, /*tex |\ifparameters| */ + if_parameter_code, /*tex |\ifparameter| */ + if_has_tok_code, /*tex |\ifhastok| */ + if_has_toks_code, /*tex |\ifhastoks| */ + if_has_xtoks_code, /*tex |\ifhasxtoks| */ + if_has_char_code, /*tex |\ifhaschar| */ + if_insert_code /*tex |\ifinsert| */ + // if_bitwise_and_code, /*tex |\ifbitwiseand| */ +} if_test_codes; + +# define first_if_test_code fi_code +# define last_if_test_code if_insert_code +//define last_if_test_code if_bitwise_and_code + +# define first_real_if_test_code if_char_code +# define last_real_if_test_code if_insert_code +//define last_real_if_test_code if_bitwise_and_code + +typedef struct condition_state_info { + halfword cond_ptr; /*tex top of the condition stack */ + int cur_if; /*tex type of conditional being worked on */ + int cur_unless; + int if_step; + int if_unless; + int if_limit; /*tex upper bound on |fi_or_else| codes */ + int if_line; /*tex line where that conditional began */ + int skip_line; /*tex skipping began here */ + halfword chk_num; + scaled chk_dim; + halfword if_nesting; + halfword padding; +} condition_state_info ; + +extern condition_state_info lmt_condition_state; + +extern void tex_conditional_if (halfword code, int unless); +extern void tex_conditional_fi_or_else (void); +extern void tex_conditional_unless (void); +extern void tex_show_ifs (void); +/* void tex_conditional_after_fi (void); */ + +# endif diff --git a/source/luametatex/source/tex/texdirections.c b/source/luametatex/source/tex/texdirections.c new file mode 100644 index 000000000..400b4c00d --- /dev/null +++ b/source/luametatex/source/tex/texdirections.c @@ -0,0 +1,172 @@ +/* + See license.txt in the root of this project. +*/ + +/*tex + + In \LUATEX\ we started with the \OMEGA\ direction model, although only a handful of directions + is supported there (four to be precise). For l2r and r2l typesetting the frontend can basically + ignore directions. Only the font handler needs to be direction aware. The vertical directions in + \LUATEX\ demand swapping height and width occasionally when doing calculations. In the end it is + the backend code that does the hard work. + + In the end, in \LUAMETATEX\ we only kept the horizontal directions. The vertical ones were not + really useful and didn't even work well. It's up to the macro package to cook up proper + solutions. The simplification (and rewrite) of the code also resulted in a more advanced box + model (with rotation and offsets) that can help implementing vertical rendering, but that code + is not here. + +*/ + +# include "luametatex.h" + +dir_state_info lmt_dir_state = { + .text_dir_ptr = null, + .padding = 0, +}; + +/*tex The next two are used by the linebreak routine; they could be macros. */ + +inline static halfword tex_aux_push_dir_node(halfword p, halfword d) +{ + halfword n = tex_copy_node(d); + node_next(n) = p; + return n; +} + +inline static halfword tex_aux_pop_dir_node(halfword p) +{ + halfword n = node_next(p); + tex_flush_node(p); + return n; +} + +halfword tex_update_dir_state(halfword p, halfword initial) +{ + if (node_subtype(p) == normal_dir_subtype) { + lmt_linebreak_state.dir_ptr = tex_aux_push_dir_node(lmt_linebreak_state.dir_ptr, p); + return dir_direction(p); + } else { + lmt_linebreak_state.dir_ptr = tex_aux_pop_dir_node(lmt_linebreak_state.dir_ptr); + if (lmt_linebreak_state.dir_ptr) { + return dir_direction(lmt_linebreak_state.dir_ptr); + } else { + return initial; + } + } +} + +halfword tex_sanitize_dir_state(halfword first, halfword last, halfword initial) +{ + for (halfword e = first; e && e != last; e = node_next(e)) { + if (node_type(e) == dir_node) { + if (node_subtype(e) == normal_dir_subtype) { + lmt_linebreak_state.dir_ptr = tex_aux_push_dir_node(lmt_linebreak_state.dir_ptr, e); + } else if (lmt_linebreak_state.dir_ptr && dir_direction(lmt_linebreak_state.dir_ptr) == dir_direction(e)) { + /*tex A bit strange test. */ + lmt_linebreak_state.dir_ptr = tex_aux_pop_dir_node(lmt_linebreak_state.dir_ptr); + } + } + } + if (lmt_linebreak_state.dir_ptr) { + return dir_direction(lmt_linebreak_state.dir_ptr); + } else { + return initial; + } +} + +halfword tex_complement_dir_state(halfword tail) +{ + halfword e = node_next(tail); + for (halfword p = lmt_linebreak_state.dir_ptr; p ; p = node_next(p)) { + halfword s = tex_new_dir(cancel_dir_subtype, dir_direction(p)); + tex_attach_attribute_list_copy(s, tail); + tex_couple_nodes(tail, s); + tex_try_couple_nodes(s, e); + tail = s; + } + return tail; +} + +void tex_initialize_directions(void) +{ + lmt_dir_state.text_dir_ptr = tex_new_dir(normal_dir_subtype, direction_def_value); +} + +void tex_cleanup_directions(void) +{ + tex_flush_node(lmt_dir_state.text_dir_ptr); /* tex_free_node(lmt_dir_state.text_dir_ptr, dir_node_size) */ +} + +halfword tex_new_dir(quarterword subtype, halfword direction) +{ + halfword p = tex_new_node(dir_node, subtype); + dir_direction(p) = direction; + dir_level(p) = cur_level; + return p; +} + +/* todo: |\tracingdirections| */ + +void tex_push_text_dir_ptr(halfword val) +{ + if (dir_level(lmt_dir_state.text_dir_ptr) == cur_level) { + /*tex update */ + dir_direction(lmt_dir_state.text_dir_ptr) = val; + } else { + /*tex add */ + halfword text_dir_tmp = tex_new_dir(normal_dir_subtype, val); + node_next(text_dir_tmp) = lmt_dir_state.text_dir_ptr; + lmt_dir_state.text_dir_ptr = text_dir_tmp; + } +} + +void tex_pop_text_dir_ptr(void) +{ + halfword text_dir_ptr = lmt_dir_state.text_dir_ptr; + if (dir_level(text_dir_ptr) == cur_level) { + /*tex remove */ + halfword text_dir_tmp = node_next(text_dir_ptr); + tex_flush_node(text_dir_ptr); + lmt_dir_state.text_dir_ptr = text_dir_tmp; + } +} + +void tex_set_math_dir(halfword d) +{ + if (valid_direction(d)) { + update_tex_math_direction(d); + } +} + +void tex_set_par_dir(halfword d) +{ + if (valid_direction(d)) { + update_tex_par_direction(d); + } +} + +void tex_set_text_dir(halfword d) +{ + if (valid_direction(d)) { + tex_inject_text_or_line_dir(d, 0); + update_tex_text_direction(d); + update_tex_internal_dir_state(internal_dir_state_par + 1); + } +} + +void tex_set_line_dir(halfword d) +{ + if (valid_direction(d)) { + tex_inject_text_or_line_dir(d, 1); + update_tex_text_direction(d); + update_tex_internal_dir_state(internal_dir_state_par + 1); + } +} + +void tex_set_box_dir(halfword b, singleword d) +{ + if (valid_direction(d)) { + box_dir(box_register(b)) = (singleword) d; + } +} diff --git a/source/luametatex/source/tex/texdirections.h b/source/luametatex/source/tex/texdirections.h new file mode 100644 index 000000000..cb85c2485 --- /dev/null +++ b/source/luametatex/source/tex/texdirections.h @@ -0,0 +1,123 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_DIRECTIONS_H +# define LMT_DIRECTIONS_H + +/*tex + + Originally we had quarterwords but some compiler versions then keep complaining about + comparisons always being true (something enumeration not being integer or so). Interesting it + all worked well and suddenly gcc on openbsd complained. So, in the end I decided to just make + these fields halfwords too. It leaves room for growth ... who knows what is needed some day. + + Actually, as we have only two subtypes now, I have considered: + + \starttyping + 0 = begin l2r 2 = end l2r + 1 = begin r2l 3 = end r2l + \stoptyping + + in which case a regular direction node becomes smaller (no dir_dir any more). But, it come with + a change at the \LUA\ end too, so it's a no-go in the end. + + For the moment we keep some geometry values here but these might move to their own file when + there is more to it. + +*/ + +# include "luametatex.h" + +typedef struct dir_state_info { + halfword text_dir_ptr; + /* alignment */ + int padding; +} dir_state_info; + +extern dir_state_info lmt_dir_state; + +typedef enum direction_codes { + direction_unknown = 0xFF, + direction_l2r = 0, + direction_r2l = 1 +} direction_codes; + +# define direction_def_value direction_l2r +# define direction_min_value direction_l2r +# define direction_max_value direction_r2l + +# define geometry_def_value 0 +# define geometry_min_value 0 +# define geometry_max_value 0xFF + +# define orientation_def_value 0 +# define orientation_min_value 0 +# define orientation_max_value 0x0FFF + +# define anchor_def_value 0 +# define anchor_min_value 0 +# define anchor_max_value 0x0FFF + +# define orientationonly(t) (t & 0x000F) + +# define valid_direction(d) ((d >= direction_min_value) && (d <= direction_max_value)) +# define valid_geometry(g) ((g >= geometry_min_value) && (g <= geometry_max_value)) +# define valid_orientation(o) ((o >= orientation_min_value) && (o <= orientation_max_value)) +# define valid_anchor(a) ((a >= anchor_min_value) && (a <= anchor_max_value)) + +# define checked_direction_value(d) (valid_direction(d) ? d : direction_def_value) +# define checked_geometry_value(g) (valid_geometry(g) ? g : geometry_def_value) +# define checked_orientation_value(o) (valid_orientation(o) ? o : orientation_def_value) +# define checked_anchor_value(a) (valid_anchor(a) ? a : anchor_def_value) + +# define check_direction_value(d) \ + if (! valid_direction(d)) { \ + d = direction_def_value; \ + } + +/* will become texgeometry.h|c and dir also in geometry */ + +inline static void tex_check_box_geometry(halfword n) +{ + if (box_x_offset(n) || box_y_offset(n)) { + tex_set_box_geometry(n, offset_geometry); + } else { + tex_unset_box_geometry(n, offset_geometry); + } + if (box_w_offset(n) || box_h_offset(n) || box_d_offset(n) || box_orientation(n)) { + tex_set_box_geometry(n, orientation_geometry); + } else { + tex_unset_box_geometry(n, orientation_geometry); + } + if (box_anchor(n) || box_source_anchor(n) || box_target_anchor(n)) { + tex_set_box_geometry(n, anchor_geometry); + } else { + tex_unset_box_geometry(n, anchor_geometry); + } +} + +inline static void tex_set_box_direction(halfword b, halfword v) +{ + box_dir(b) = (singleword) checked_direction_value(v); +} + +extern void tex_initialize_directions (void); +extern void tex_cleanup_directions (void); +extern halfword tex_new_dir (quarterword subtype, halfword direction); +extern void tex_push_text_dir_ptr (halfword val); +extern void tex_pop_text_dir_ptr (void); +extern void tex_set_text_dir (halfword d); +extern void tex_set_math_dir (halfword d); +extern void tex_set_line_dir (halfword d); +extern void tex_set_par_dir (halfword d); +extern void tex_set_box_dir (halfword b, singleword d); + +# define swap_hang_indent(dir,indentation) (dir == dir_righttoleft && normalize_line_mode_permitted(normalize_line_mode_par, swap_hangindent_mode) ? ( - indentation) : indentation) +# define swap_parshape_indent(dir,indentation,width) (dir == dir_righttoleft && normalize_line_mode_permitted(normalize_line_mode_par, swap_parshape_mode) ? (hsize_par - width - indentation) : indentation) + +extern halfword tex_update_dir_state (halfword p, halfword initial); +extern halfword tex_sanitize_dir_state (halfword first, halfword last, halfword initial); +extern halfword tex_complement_dir_state (halfword tail); + +# endif diff --git a/source/luametatex/source/tex/texdumpdata.c b/source/luametatex/source/tex/texdumpdata.c new file mode 100644 index 000000000..58f9e4442 --- /dev/null +++ b/source/luametatex/source/tex/texdumpdata.c @@ -0,0 +1,331 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +dump_state_info lmt_dump_state = { + .format_identifier = 0, + .format_name = 0 +}; + +/*tex + + After \INITEX\ has seen a collection of fonts and macros, it can write all the necessary + information on an auxiliary file so that production versions of \TEX\ are able to initialize + their memory at high speed. The present section of the program takes care of such output and + input. We shall consider simultaneously the processes of storing and restoring, so that the + inverse relation between them is clear. + + The global variable |format_ident| is a string that is printed right after the |banner| line + when \TEX\ is ready to start. For \INITEX\ this string says simply |(INITEX)|; for other + versions of \TEX\ it says, for example, |(preloaded format = plain 1982.11.19)|, showing the + year, month, and day that the format file was created. We have |format_ident = 0| before \TEX's + tables are loaded. |FORMAT_ID| is a new field of type int suitable for the identification of a + format: values between 0 and 256 (included) can not be used because in the previous format they + are used for the length of the name of the engine. + + Because most used processors are little endian, we flush that way, but after that we just stick + to the architecture. This also lets it come out as a readable 12 character (not nul terminated) + string on a little endian machine. By using integers we can be sure that when it's generated on + a different architecture the format is not seen as valid. + +*/ + +/* + + In \LUAMETATEX\ the code has been overhauled. The sections are better separated and we write + less to the file because we try to be sparse. Also, a more dynamic approach is used. In the + \CONTEXT\ macro package most of what goes into the format is \LUA\ bytecode. + + We no longer hand endian related code here which saves swapping bytes on the most popular + architectures. We also maintain some statistics and have several points where we check if + we're still okay. + + Here we only have the main chunk. The specific data sections are implemented where it makes + most sense. + +*/ + +# define MAGIC_FORMAT_NUMBER_LE_1 0x58544D4C // 0x4C4D5458 // LMTX +# define MAGIC_FORMAT_NUMBER_LE_2 0x5845542D // 0x2D544558 // -TEX +# define MAGIC_FORMAT_NUMBER_LE_3 0x544D462D // 0x2D464D54 // -FMT + +static int tex_aux_report_dump_state(dumpstream f, int pos, const char *what) +{ + int tmp = ftell(f); + tex_print_int(tmp - pos); + tex_print_char(' '); + tex_print_str(what); + fflush(stdout); + return tmp; +} + +/* todo: move more dumping to other files, then also the sizes. */ + +static void tex_aux_dump_fingerprint(dumpstream f) +{ + dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_1); + dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_2); + dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_3); + dump_via_int(f, luametatex_format_fingerprint); +} + +static void tex_aux_undump_fingerprint(dumpstream f) +{ + int x; + undump_int(f, x); + if (x == MAGIC_FORMAT_NUMBER_LE_1) { + undump_int(f, x); + if (x == MAGIC_FORMAT_NUMBER_LE_2) { + undump_int(f, x); + if (x == MAGIC_FORMAT_NUMBER_LE_3) { + undump_int(f, x); + if (x == luametatex_format_fingerprint) { + return; + } else { + tex_fatal_undump_error("version id"); + } + } + } + } + tex_fatal_undump_error("initial fingerprint"); +} + +static void tex_aux_dump_final_check(dumpstream f) +{ + dump_int(f, lmt_dump_state.format_identifier); + dump_int(f, lmt_dump_state.format_name); + dump_via_int(f, luametatex_format_fingerprint); +} + +static void tex_aux_undump_final_check(dumpstream f) +{ + int x; + undump_int(f, lmt_dump_state.format_identifier); + if (lmt_dump_state.format_identifier < 0 || lmt_dump_state.format_identifier > lmt_string_pool_state.string_pool_data.ptr) { + goto BAD; + } + undump_int(f, lmt_dump_state.format_name); + if (lmt_dump_state.format_name < 0 || lmt_dump_state.format_name > lmt_string_pool_state.string_pool_data.ptr) { + goto BAD; + } + undump_int(f, x); + if (x == luametatex_format_fingerprint) { + return; + } + BAD: + tex_fatal_undump_error("final fingerprint"); +} + +static void tex_aux_create_fmt_name(void) +{ + lmt_print_state.selector = new_string_selector_code; + tex_print_format("%s %i.%i.%i",lmt_fileio_state.fmt_name, year_par, month_par, day_par); + lmt_dump_state.format_identifier = tex_make_string(); + tex_print_str(lmt_fileio_state.job_name); + lmt_dump_state.format_name = tex_make_string(); + lmt_print_state.selector = terminal_and_logfile_selector_code; +} + +static void tex_aux_dump_preamble(dumpstream f) +{ + dump_via_int(f, hash_size); + dump_via_int(f, hash_prime); + dump_via_int(f, prim_size); + dump_via_int(f, prim_prime); + dump_int(f, lmt_hash_state.hash_data.allocated); + dump_int(f, lmt_hash_state.hash_data.ptr); + dump_int(f, lmt_hash_state.hash_data.top); +} + +static void tex_aux_undump_preamble(dumpstream f) +{ + int x; + undump_int(f, x); + if (x != hash_size) { + goto BAD; + } + undump_int(f, x); + if (x != hash_prime) { + goto BAD; + } + undump_int(f, x); + if (x != prim_size) { + goto BAD; + } + undump_int(f, x); + if (x != prim_prime) { + goto BAD; + } + undump_int(f, lmt_hash_state.hash_data.allocated); + undump_int(f, lmt_hash_state.hash_data.ptr); + undump_int(f, lmt_hash_state.hash_data.top); + /*tex + We can consider moving all these allocaters to the start instead of this exception. + */ + tex_initialize_hash_mem(); + return; + BAD: + tex_fatal_undump_error("preamble"); +} + +void tex_store_fmt_file(void) +{ + int pos = 0; + dumpstream f = NULL; + + /*tex + If dumping is not allowed, abort. The user is not allowed to dump a format file unless + |save_ptr = 0|. This condition implies that |cur_level=level_one|, hence the |xeq_level| + array is constant and it need not be dumped. + */ + + if (lmt_save_state.save_stack_data.ptr != 0) { + tex_handle_error( + succumb_error_type, + "You can't dump inside a group", + "'{...\\dump}' is a no-no." + ); + } + + /*tex + We don't store some things. + */ + + tex_dispose_specification_nodes(); + + /*tex + Create the |format_ident|, open the format file, and inform the user that dumping has begun. + */ + + { + int callback_id = lmt_callback_defined(pre_dump_callback); + if (callback_id > 0) { + (void) lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->"); + } + } + + /*tex + We report the usual plus some more statistics. When something is wrong the machine just + quits, hopefully with some meaningful error. We always create the format in normal log and + terminal mode. We create a format name first because we also use that in error reporting. + */ + + tex_aux_create_fmt_name(); + + f = tex_open_fmt_file(1); + if (! f) { + tex_formatted_error("system", "format file '%s' cannot be opened for writing", lmt_fileio_state.fmt_name); + return; + } + + tex_print_nlp(); + tex_print_format("Dumping format '%T' in file '%s': ", lmt_dump_state.format_identifier, lmt_fileio_state.fmt_name); + fflush(stdout); + + tex_compact_tokens(); + tex_compact_string_pool(); + + tex_aux_dump_fingerprint(f); pos = tex_aux_report_dump_state(f, pos, "fingerprint + "); + lmt_dump_engine_info(f); pos = tex_aux_report_dump_state(f, pos, "engine + "); + tex_aux_dump_preamble(f); pos = tex_aux_report_dump_state(f, pos, "preamble + "); + tex_dump_constants(f); pos = tex_aux_report_dump_state(f, pos, "constants + "); + tex_dump_string_pool(f); pos = tex_aux_report_dump_state(f, pos, "stringpool + "); + tex_dump_node_mem(f); pos = tex_aux_report_dump_state(f, pos, "nodes + "); + tex_dump_token_mem(f); pos = tex_aux_report_dump_state(f, pos, "tokens + "); + tex_dump_equivalents_mem(f); pos = tex_aux_report_dump_state(f, pos, "equivalents + "); + tex_dump_math_codes(f); pos = tex_aux_report_dump_state(f, pos, "math codes + "); + tex_dump_text_codes(f); pos = tex_aux_report_dump_state(f, pos, "text codes + "); + tex_dump_primitives(f); pos = tex_aux_report_dump_state(f, pos, "primitives + "); + tex_dump_hashtable(f); pos = tex_aux_report_dump_state(f, pos, "hashtable + "); + tex_dump_font_data(f); pos = tex_aux_report_dump_state(f, pos, "fonts + "); + tex_dump_math_data(f); pos = tex_aux_report_dump_state(f, pos, "math + "); + tex_dump_language_data(f); pos = tex_aux_report_dump_state(f, pos, "language + "); + tex_dump_insert_data(f); pos = tex_aux_report_dump_state(f, pos, "insert + "); + lmt_dump_registers(f); pos = tex_aux_report_dump_state(f, pos, "bytecodes + "); + tex_aux_dump_final_check(f); pos = tex_aux_report_dump_state(f, pos, "housekeeping = "); + + tex_aux_report_dump_state(f, 0, "total."); + tex_close_fmt_file(f); + tex_print_ln(); + +} + +/*tex + + Corresponding to the procedure that dumps a format file, we have a function that reads one in. + The function returns |false| if the dumped format is incompatible with the present \TEX\ table + sizes, etc. + + The inverse macros are slightly more complicated, since we need to check the range of the values + we are reading in. We say |undump (a) (b) (x)| to read an integer value |x| that is supposed to + be in the range |a <= x <= b|. + +*/ + +int tex_fatal_undump_error(const char *s) +{ + tex_emergency_message("system", "fatal format error, loading file '%s' failed with bad '%s' data, remake the format", emergency_fmt_name, s); + return tex_emergency_exit(); +} + +//define undumping(s) printf("undumping: %s\n",s); fflush(stdout); +# define undumping(s) + +static void tex_aux_undump_fmt_data(dumpstream f) +{ + undumping("warmingup") + + undumping("fingerprint") tex_aux_undump_fingerprint(f); + undumping("engineinfo") lmt_undump_engine_info(f); + undumping("preamble") tex_aux_undump_preamble(f); + undumping("constants") tex_undump_constants(f); + undumping("strings") tex_undump_string_pool(f); + undumping("nodes") tex_undump_node_mem(f); + undumping("tokens") tex_undump_token_mem(f); + undumping("equivalents") tex_undump_equivalents_mem(f); + undumping("mathcodes") tex_undump_math_codes(f); + undumping("textcodes") tex_undump_text_codes(f); + undumping("primitives") tex_undump_primitives(f); + undumping("hashtable") tex_undump_hashtable(f); + undumping("fonts") tex_undump_font_data(f); + undumping("math") tex_undump_math_data(f); + undumping("languages") tex_undump_language_data(f); + undumping("inserts") tex_undump_insert_data(f); + undumping("bytecodes") lmt_undump_registers(f); + undumping("finalcheck") tex_aux_undump_final_check(f); + + undumping("done") + + /*tex This should go elsewhere. */ + + cur_list.prev_depth = ignore_depth; +} + +/* + The next code plays nice but on an error we exit anyway so some code is never reached in that + case. +*/ + +int tex_load_fmt_file(void) +{ + dumpstream f = tex_open_fmt_file(0); + if (f) { + tex_aux_undump_fmt_data(f); + tex_close_fmt_file(f); + return 1; + } else { + return tex_fatal_undump_error("filehandle"); + } +} + +void tex_initialize_dump_state(void) +{ + lmt_dump_state.format_name = get_nullstr(); + if (lmt_main_state.run_state == initializing_state) { + lmt_dump_state.format_identifier = tex_maketexstring(" (INITEX)"); + } else { + lmt_dump_state.format_identifier = 0; + } +} diff --git a/source/luametatex/source/tex/texdumpdata.h b/source/luametatex/source/tex/texdumpdata.h new file mode 100644 index 000000000..6a9e11a7d --- /dev/null +++ b/source/luametatex/source/tex/texdumpdata.h @@ -0,0 +1,105 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_DUMPDATA_H +# define LMT_DUMPDATA_H + +/*tex + + Originally the dump file was a memory dump, in \TEX\ called a format and in \NETAFONT\ a base + and in \METAPOST\ a mem file. The \TEX\ program could reload that dump file and have a fast + start. In addition a pool file was used to store strings. Because it was a memory dump. It was + also pretty system dependent. + + When \WEBC\ showed up, \TEX\ installations got distributed on \CDROM\ and later \DVD, and + because one could run them from that medium, format files were shared. In order to do that the + file had to be endian neutral. Unfortunately the choice was such that for the most commonly + architecture (intel) the dump items had to be swapped. This could slow down a startup, depending + on how rigourous a compiler of operating system was in testing (it is a reason why startup on + \MSWINDOWS\ was somewhat slower). + + Because in \LUATEX\ we can also store \LUA\ bytecodes it made no sense to take that portability + aspect into account. The format file also got gzipped which at that time sped up loading. Later + in the project the endian swappign was removed so we gained a bit more. + + Because a format file that doesn't match an engine can actually result in a crash, we decided to + come up with amore robust approach: we use a magic number to register the version of the format! + Normally this number only increments when we add a new primitive of change command codes. At + some point in \LUATEX\ development we started with 907 which is the sum of the values of the + bytes of \quote {don knuth}. + + We sometimes also bump when the binary format (bytecode) of \LUA\ has changed in such a way that + the loader doesn't detect it. But that doesn't always help either because the cache is still + problematic then. There we actually hard code a different number then (a simple patch of a \LUA\ + file). + + By the time that the \LUAMETATEX\ code as in a state to be released, it became time to think + about a number that was definitely different from \LUATEX\ so here it is: + + \starttyping + initial = 2020//4 - 2020//100 + 2020//400 = 490 + \stoptyping + + Although \LUAMETATEX\ is already a bit older, we sort of released in leapyear 2020 so we take + the number of leapyears since zero (which is kind of \type {\undefined} as starting point). This + number actually jumps whenever something affects the format file (which can be an extra command or + some reshuffling of codes) so it is not always an indication of something really need. + + So to summarize: we don't share formats across architectures and operating systems, we use the + native endian property of an architecture, we don't compress, and we bump a magic number so that + we can intercept a potential crash. So much for a bit of history. + + We also bump the fingerprint when we have a new version of \LUA, just to play safe in case some + bytecodes have changed. + +*/ + +# define luametatex_format_fingerprint 670 + +/* These end up in the string pool. */ + +typedef struct dump_state_info { + strnumber format_identifier; + strnumber format_name; +} dump_state_info; + +extern dump_state_info lmt_dump_state; + +extern void tex_store_fmt_file (void); +extern int tex_load_fmt_file (void); +extern int tex_fatal_undump_error (const char *s); +extern void tex_initialize_dump_state (void); + +# define dump_items(f,p,item_size,nitems) fwrite((void *) p, (size_t) item_size, (size_t) nitems, f) +# define undump_items(f,p,item_size,nitems) { if (fread ((void *) p, (size_t) item_size, (size_t) nitems, f)) { } } + +# define dump_things(f,base,len) dump_items(f, (char *) &(base), sizeof (base), (int) (len)) +# define undump_things(f,base,len) undump_items(f, (char *) &(base), sizeof (base), (int) (len)) + +# define dump_int(f,x) dump_things(f,x,1) +# define undump_int(f,x) undump_things(f,x,1) + +/*tex + + Because sometimes we dump constants or the result of a function call we have |dump_via_int| + that puts the number into a variable first. Most integers come from structs and arrays. + Performance wise there is not that much gain. + +*/ + +# define dump_via_int(f,x) do { \ + int x_val = (x); \ + dump_int(f,x_val); \ +} while (0) + +# define dump_string(f,a) \ + if (a) { \ + int x = (int)strlen(a) + 1; \ + dump_int(f,x); \ + dump_things(f,*a, x); \ + } else { \ + dump_via_int(f,0); \ + } + +# endif diff --git a/source/luametatex/source/tex/texequivalents.c b/source/luametatex/source/tex/texequivalents.c new file mode 100644 index 000000000..7cc249a2e --- /dev/null +++ b/source/luametatex/source/tex/texequivalents.c @@ -0,0 +1,1964 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + The nested structure provided by |\char'173| \unknown\ |\char'175| groups in \TEX\ means that + |eqtb| entries valid in outer groups should be saved and restored later if they are overridden + inside the braces. When a new |eqtb| value is being assigned, the program therefore checks to + see if the previous entry belongs to an outer level. In such a case, the old value is placed on + the |save_stack| just before the new value enters |eqtb|. At the end of a grouping level, i.e., + when the right brace is sensed, the |save_stack| is used to restore the outer values, and the + inner ones are destroyed. + + Entries on the |save_stack| are of type |save_record|. The top item on this stack is + |save_stack[p]|, where |p=save_ptr-1|; it contains three fields called |save_type|, |save_level|, + and |save_value|, and it is interpreted in one of four ways: + + \startitemize[n] + + \startitem + If |save_type(p) = restore_old_value|, then |save_value(p)| is a location in |eqtb| whose + current value should be destroyed at the end of the current group and replaced by + |save_word(p-1)| (|save_type(p-1) == saved_eqtb|). Furthermore if |save_value(p) >= int_base|, + then |save_level(p)| should replace the corresponding entry in |xeq_level| (if |save_value(p) + < int_base|, then the level is part of |save_word(p-1)|). + \stopitem + + \startitem + If |save_type(p) = restore_zero|, then |save_value(p)| is a location in |eqtb| whose current + value should be destroyed at the end of the current group, when it should be replaced by the + current value of |eqtb[undefined_control_sequence]|. + \stopitem + + \startitem + If |save_type(p) = insert_token|, then |save_value(p)| is a token that should be inserted + into \TeX's input when the current group ends. + \stopitem + + \startitem + If |save_type(p) = level_boundary|, then |save_level(p)| is a code explaining what kind of + group we were previously in, and |save_value(p)| points to the level boundary word at the + bottom of the entries for that group. Furthermore, |save_value(p-1)| contains the source + line number at which the current level of grouping was entered, this field has itself a + type: |save_type(p-1) == saved_line|. + \stopitem + + \stopitemize + + Besides this \quote {official} use, various subroutines push temporary variables on the save + stack when it is handy to do so. These all have an explicit |save_type|, and they are: + + \starttabulate + \NC |saved_adjust| \NC signifies an adjustment is beging scanned \NC\NR + \NC |saved_insert| \NC an insertion is being scanned \NC\NR + \NC |saved_disc| \NC the |\discretionary| sublist we are working on right now \NC\NR + \NC |saved_boxtype| \NC whether a |\localbox| is |\left| or |\right| \NC\NR + \NC |saved_textdir| \NC a text direction to be restored \NC\NR + \NC |saved_eqno| \NC diffentiates between |\eqno| and |\leqno| \NC\NR + \NC |saved_choices| \NC the |\mathchoices| sublist we are working on right now \NC\NR + \NC |saved_above| \NC used for the \LUAMETATEX\ above variants \NC\NR + \NC |saved_math| \NC and interrupted math list \NC\NR + \NC |saved_boxcontext| \NC the box context value \NC\NR + \NC |saved_boxspec| \NC the box |to| or |spread| specification \NC\NR + \NC |saved_boxdir| \NC the box |dir| specification \NC\NR + \NC |saved_boxattr| \NC the box |attr| specification \NC\NR + \NC |saved_boxpack| \NC the box |pack| specification \NC\NR + \NC |...| \NC some more in \LUATEX\ and \LUAMETATEX \NC\NR + \stoptabulate + + The global variable |cur_group| keeps track of what sort of group we are currently in. Another + global variable, |cur_boundary|, points to the topmost |level_boundary| word. And |cur_level| + is the current depth of nesting. The routines are designed to preserve the condition that no + entry in the |save_stack| or in |eqtb| ever has a level greater than |cur_level|. + +*/ + +save_state_info lmt_save_state = { + .save_stack = NULL, + .save_stack_data = { + .minimum = min_save_size, + .maximum = max_save_size, + .size = siz_save_size, + .step = stp_save_size, + .allocated = 0, + .itemsize = sizeof(save_record), + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .current_level = 0, + .current_group = 0, + .current_boundary = 0, + .padding = 0, +}; + +/*tex + + The comments below are (of course) coming from \LUATEX's ancestor and are still valid! However, + in \LUATEX\ we use \UTF\ instead of \ASCII, have attributes, have more primites, etc. But the + principles remain the same. We are not 100\% compatible in output and will never be. + +*/ + +static void tex_aux_show_eqtb(halfword n); + +static void tex_aux_diagnostic_trace(halfword p, const char *s) +{ + tex_begin_diagnostic(); + /* print_format ... */ + tex_print_char('{'); + tex_print_str(s); + tex_print_char(' '); + tex_aux_show_eqtb(p); + tex_print_char('}'); + tex_end_diagnostic(); +} + +/*tex + + Now that we have studied the data structures for \TEX's semantic routines (in other modules), + we ought to consider the data structures used by its syntactic routines. In other words, our + next concern will be the tables that \TEX\ looks at when it is scanning what the user has + written. + + The biggest and most important such table is called |eqtb|. It holds the current \quote + {equivalents} of things; i.e., it explains what things mean or what their current values are, + for all quantities that are subject to the nesting structure provided by \TEX's grouping + mechanism. There are six parts to |eqtb|: + + \startitemize[n] + + \startitem + |eqtb[null_cs]| holds the current equivalent of the zero-length control sequence. + \stopitem + + \startitem + |eqtb[hash_base..(glue_base-1)]| holds the current equivalents of single- and multiletter + control sequences. + \stopitem + + \startitem + |eqtb[glue_base..(local_base-1)]| holds the current equivalents of glue parameters like + the current baselineskip. + \stopitem + + \startitem + |eqtb[local_base..(int_base-1)]| holds the current equivalents of local halfword + quantities like the current box registers, the current \quote {catcodes}, the current font, + and a pointer to the current paragraph shape. + \stopitem + + \startitem + |eqtb[int_base .. (dimen_base-1)]| holds the current equivalents of fullword integer + parameters like the current hyphenation penalty. + \stopitem + + \startitem + |eqtb[dimen_base .. eqtb_size]| holds the current equivalents of fullword dimension + parameters like the current hsize or amount of hanging indentation. + \stopitem + + \stopitemize + + Note that, for example, the current amount of baselineskip glue is determined by the setting of + a particular location in region~3 of |eqtb|, while the current meaning of the control sequence + |\baselineskip| (which might have been changed by |\def| or |\let|) appears in region~2. + + The last two regions of |eqtb| have fullword values instead of the three fields |eq_level|, + |eq_type|, and |equiv|. An |eq_type| is unnecessary, but \TEX\ needs to store the |eq_level| + information in another array called |xeq_level|. + + The last statement is no longer true. We have plenty of room in the 64 bit memory words now so + we no longer need the parallel |x| array. For the moment we keep the commented code. + +*/ + +// equivalents_state_info lmt_equivalents_state = { +// }; + +void tex_initialize_levels(void) +{ + cur_level = level_one; + cur_group = bottom_level_group; + lmt_scanner_state.last_cs_name = null_cs; +} + +void tex_initialize_undefined_cs(void) +{ + set_eq_type(undefined_control_sequence, undefined_cs_cmd); + set_eq_flag(undefined_control_sequence, 0); + set_eq_value(undefined_control_sequence, null); + set_eq_level(undefined_control_sequence, level_zero); +} + +void tex_dump_equivalents_mem(dumpstream f) +{ + /*tex + Dump regions 1 to 4 of |eqtb|, the table of equivalents: glue muglue toks boxes. The table + of equivalents usually contains repeated information, so we dump it in compressed form: The + sequence of $n + 2$ values $(n, x_1, \ldots, x_n, m)$ in the format file represents $n+m$ + consecutive entries of |eqtb|, with |m| extra copies of $x_n$, namely $(x_1, \ldots, x_n, + x_n, \ldots, x_n)$. + */ + int index = null_cs; + do { + int different = 1; + int equivalent = 0; + int j = index; + while (j < eqtb_size - 1) { + if (equal_eqtb_entries(j, j + 1)) { + ++equivalent; + goto FOUND1; + } else { + ++different; + } + ++j; + } + /*tex |j = int_base-1| */ + goto DONE1; + FOUND1: + j++; + while (j < eqtb_size - 1) { + if (equal_eqtb_entries(j, j + 1)) { + ++equivalent; + } else { + goto DONE1; + } + ++j; + } + DONE1: + // printf("index %i, different %i, equivalent %i\n",index,different,equivalent); + dump_int(f, different); + dump_things(f, lmt_hash_state.eqtb[index], different); + dump_int(f, equivalent); + index = index + different + equivalent; + } while (index <= eqtb_size); + /*tex Dump the |hash_extra| part: */ + dump_int(f, lmt_hash_state.hash_data.ptr); + if (lmt_hash_state.hash_data.ptr > 0) { + dump_things(f, lmt_hash_state.eqtb[eqtb_size + 1], lmt_hash_state.hash_data.ptr); + } + /*tex A special register. */ + dump_int(f, lmt_token_state.par_loc); + /* dump_int(f, lmt_token_state.line_par_loc); */ /*tex See note in textoken.c|. */ +} + +void tex_undump_equivalents_mem(dumpstream f) +{ + /*tex Undump regions 1 to 6 of the table of equivalents |eqtb|. */ + int index = null_cs; + do { + int different; + int equivalent; + undump_int(f, different); + if (different > 0) { + undump_things(f, lmt_hash_state.eqtb[index], different); + } + undump_int(f, equivalent); + // printf("index %i, different %i, equivalent %i\n",index,different,equivalent); + if (equivalent > 0) { + int last = index + different - 1; + for (int i = 1; i <= equivalent; i++) { + lmt_hash_state.eqtb[last + i] = lmt_hash_state.eqtb[last]; + } + } + index = index + different + equivalent; + } while (index <= eqtb_size); + /*tex Undump |hash_extra| part. */ + undump_int(f, lmt_hash_state.hash_data.ptr); + if (lmt_hash_state.hash_data.ptr > 0) { + /* we get a warning on possible overrun here */ + undump_things(f, lmt_hash_state.eqtb[eqtb_size + 1], lmt_hash_state.hash_data.ptr); + } + undump_int(f, lmt_token_state.par_loc); + if (lmt_token_state.par_loc >= hash_base && lmt_token_state.par_loc <= lmt_hash_state.hash_data.top) { + lmt_token_state.par_token = cs_token_flag + lmt_token_state.par_loc; + } else { + tex_fatal_undump_error("parloc"); + } + /* undump_int(f, lmt_token_state.line_par_loc); */ + /* if (lmt_token_state.line_par_loc >= hash_base && lmt_token_state.line_par_loc <= lmt_hash_state.hash_data.top) { */ + /* lmt_token_state.line_par_token = cs_token_flag + lmt_token_state.line_par_loc; */ + /* } else { */ + /* tex_fatal_undump_error("lineparloc"); */ + /* } */ + return; +} + +/*tex + + At this time it might be a good idea for the reader to review the introduction to |eqtb| that + was given above just before the long lists of parameter names. Recall that the \quote {outer + level} of the program is |level_one|, since undefined control sequences are assumed to be \quote + {defined} at |level_zero|. + + The following function is used to test if there is room for up to eight more entries on + |save_stack|. By making a conservative test like this, we can get by with testing for overflow + in only a few places. + + We now let the save stack dynamically grow. In practice the stack is small but when a large one + is needed, the overhead is probably neglectable compared to what the macro need. + +*/ + +# define reserved_save_stack_slots 32 /* We need quite some for boxes so we bump it. */ + +void tex_initialize_save_stack(void) +{ + int size = lmt_save_state.save_stack_data.minimum; + lmt_save_state.save_stack = aux_allocate_clear_array(sizeof(save_record), lmt_save_state.save_stack_data.step, reserved_save_stack_slots); + if (lmt_save_state.save_stack) { + lmt_save_state.save_stack_data.allocated = lmt_save_state.save_stack_data.step; + } else { + tex_overflow_error("save", size); + } +} + +static int tex_room_on_save_stack(void) +{ + int top = lmt_save_state.save_stack_data.ptr; + if (top > lmt_save_state.save_stack_data.top) { + lmt_save_state.save_stack_data.top = top; + if (top > lmt_save_state.save_stack_data.allocated) { + save_record *tmp = NULL; + top = lmt_save_state.save_stack_data.allocated + lmt_save_state.save_stack_data.step; + if (top > lmt_save_state.save_stack_data.size) { + top = lmt_save_state.save_stack_data.size; + } + if (top > lmt_save_state.save_stack_data.allocated) { + top = lmt_save_state.save_stack_data.allocated + lmt_save_state.save_stack_data.step; + lmt_save_state.save_stack_data.allocated = top; + tmp = aux_reallocate_array(lmt_save_state.save_stack, sizeof(save_record), top, reserved_save_stack_slots); + lmt_save_state.save_stack = tmp; + } + lmt_run_memory_callback("save", tmp ? 1 : 0); + if (! tmp) { + tex_overflow_error("save", top); + return 0; + } + } + } + return 1; +} + +void tex_save_halfword_on_stack(quarterword t, halfword v) +{ + if (tex_room_on_save_stack()) { + tex_set_saved_record(0, t, 0, v); + ++lmt_save_state.save_stack_data.ptr; + } +} + +/*tex + + Procedure |new_save_level| is called when a group begins. The argument is a group identification + code like |hbox_group|. After calling this routine, it is safe to put six more entries on + |save_stack|. + + In some cases integer-valued items are placed onto the |save_stack| just below a |level_boundary| + word, because this is a convenient place to keep information that is supposed to \quote {pop up} + just when the group has finished. For example, when |\hbox to 100pt| is being treated, the 100pt + dimension is stored on |save_stack| just before |new_save_level| is called. + + The |group_trace| procedure is called when a new level of grouping begins (|e=false|) or ends + (|e = true|) with |saved_value (-1)| containing the line number. + +*/ + +static void tex_aux_group_trace(int g) +{ + tex_begin_diagnostic(); + tex_print_format(g ? "{leaving %G}" : "{entering %G}", g); + tex_end_diagnostic(); +} + +/*tex + + A group entered (or a conditional started) in one file may end in a different file. Such + slight anomalies, although perfectly legitimate, may cause errors that are difficult to + locate. In order to be able to give a warning message when such anomalies occur, \ETEX\ + uses the |grp_stack| and |if_stack| arrays to record the initial |cur_boundary| and + |condition_ptr| values for each input file. + + When a group ends that was apparently entered in a different input file, the |group_warning| + procedure is invoked in order to update the |grp_stack|. If moreover |\tracingnesting| is + positive we want to give a warning message. The situation is, however, somewhat complicated + by two facts: + + \startitemize[n,packed] + \startitem + There may be |grp_stack| elements without a corresponding |\input| file or + |\scantokens| pseudo file (e.g., error insertions from the terminal); and + \stopitem + \startitem + the relevant information is recorded in the |name_field| of the |input_stack| only + loosely synchronized with the |in_open| variable indexing |grp_stack|. + \stopitem + \stopitemize + +*/ + +static void tex_aux_group_warning(void) +{ + /*tex do we need a warning? */ + int w = 0; + /*tex index into |grp_stack| */ + int i = lmt_input_state.in_stack_data.ptr; + lmt_input_state.base_ptr = lmt_input_state.input_stack_data.ptr; + /*tex store current state */ + lmt_input_state.input_stack[lmt_input_state.base_ptr] = lmt_input_state.cur_input; + while ((lmt_input_state.in_stack[i].group == cur_boundary) && (i > 0)) { + /*tex + + Set variable |w| to indicate if this case should be reported. This code scans the input + stack in order to determine the type of the current input file. + + */ + if (tracing_nesting_par > 0) { + while ((lmt_input_state.input_stack[lmt_input_state.base_ptr].state == token_list_state) || (lmt_input_state.input_stack[lmt_input_state.base_ptr].index > i)) { + --lmt_input_state.base_ptr; + } + if (lmt_input_state.input_stack[lmt_input_state.base_ptr].name > 17) { + /*tex |> max_file_input_code| .. hm */ + w = 1; + } + } + lmt_input_state.in_stack[i].group = save_value(lmt_save_state.save_stack_data.ptr); + --i; + } + if (w) { + tex_begin_diagnostic(); + tex_print_format("[warning: end of %G of a different file]", 1); + tex_end_diagnostic(); + if (tracing_nesting_par > 1) { + tex_show_context(); + } + if (lmt_error_state.history == spotless) { + lmt_error_state.history = warning_issued; + } + } +} + +void tex_new_save_level(quarterword c) +{ + /*tex We begin a new level of grouping. */ + if (tex_room_on_save_stack()) { + save_attribute_state_before(); + tex_set_saved_record(saved_group_line_number, saved_line_number, 0, lmt_input_state.input_line); + tex_set_saved_record(saved_group_level_boundary, level_boundary, cur_group, cur_boundary); + /*tex eventually we will have bumped |lmt_save_state.save_stack_data.ptr| by |saved_group_n_of_items|! */ + ++lmt_save_state.save_stack_data.ptr; + if (cur_level == max_quarterword) { + tex_overflow_error("grouping levels", max_quarterword - min_quarterword); + } + /*tex We quit if |(cur_level+1)| is too big to be stored in |eqtb|. */ + cur_boundary = lmt_save_state.save_stack_data.ptr; + cur_group = c; + if (tracing_groups_par > 0) { + tex_aux_group_trace(0); + } + ++cur_level; + ++lmt_save_state.save_stack_data.ptr; + save_attribute_state_after(); + if (end_of_group_par) { + update_tex_end_of_group(null); + } + /* no_end_group_par = null; */ + } +} + +int tex_saved_line_at_level(void) +{ + return lmt_save_state.save_stack_data.ptr > 0 ? (saved_value(-1) > 0 ? saved_value(-1) : 0) : 0; +} + +/*tex + + The |\showgroups| command displays all currently active grouping levels. The modifications of + \TEX\ required for the display produced by the |show_save_groups| procedure were first discussed + by Donald~E. Knuth in {\em TUGboat} {\bf 11}, 165--170 and 499--511, 1990. + + In order to understand a group type we also have to know its mode. Since unrestricted horizontal + modes are not associated with grouping, they are skipped when traversing the semantic nest. + + I have to admit that I never used (or needed) this so we might as well drop it from \LUAMETATEX\ + and given the already extensive tracing we can decide to drop it. + + The output is not (entirely) downward compatible which is no big deal because we output some more + details anyway. +*/ + +static int tex_aux_found_save_type(int id) +{ + int i = -1; + while (saved_valid(i) && saved_type(i) != id) { + i--; + } + return i; +} + +static int tex_aux_save_value(int id) +{ + int i = tex_aux_found_save_type(id); + return i ? saved_value(i) : 0; +} + +static int tex_aux_saved_box_spec(halfword *packing, halfword *amount) +{ + int i = tex_aux_found_save_type(saved_box_spec); + if (i) { + *packing = saved_level(i); + *amount = saved_value(i); + } else { + *packing = 0; + *amount = 0; + } + return (*amount != 0); +} + +static void tex_aux_show_group_count(int n) +{ + for (int i = 1; i <= n; i++) { + tex_print_str("{}"); + } +} + +void tex_show_save_groups(void) +{ + int pointer = lmt_nest_state.nest_data.ptr; + int saved_pointer = lmt_save_state.save_stack_data.ptr; + quarterword saved_level = cur_level; + quarterword saved_group = cur_group; + halfword saved_tracing = tracing_levels_par; + int alignmentstate = 1; /* to keep track of alignments */ + const char *package = NULL; + lmt_save_state.save_stack_data.ptr = cur_boundary; + --cur_level; + tracing_levels_par |= tracing_levels_group; + while (1) { + int mode; + tex_print_levels(); + tex_print_group(1); + if (cur_group == bottom_level_group) { + goto DONE; + } + do { + mode = lmt_nest_state.nest[pointer].mode; + if (pointer > 0) { + --pointer; + } else { + mode = vmode; + } + } while (mode == hmode); + tex_print_str(": "); + switch (cur_group) { + case simple_group: + ++pointer; + goto FOUND2; + case hbox_group: + case adjusted_hbox_group: + package = "hbox"; + break; + case vbox_group: + package = "vbox"; + break; + case vtop_group: + package = "vtop"; + break; + case align_group: + if (alignmentstate == 0) { + package = (mode == -vmode) ? "halign" : "valign"; + alignmentstate = 1; + goto FOUND1; + } else { + if (alignmentstate == 1) { + tex_print_str("align entry"); + } else { + tex_print_str_esc("cr"); + } + if (pointer >= alignmentstate) { + pointer -= alignmentstate; + } + alignmentstate = 0; + goto FOUND2; + } + case no_align_group: + ++pointer; + alignmentstate = -1; + tex_print_str_esc("noalign"); + goto FOUND2; + case output_group: + tex_print_str_esc("output"); + goto FOUND2; + case math_group: + goto FOUND2; + case discretionary_group: + tex_print_str_esc("discretionary"); + tex_aux_show_group_count(tex_aux_save_value(saved_discretionary_item_component)); + goto FOUND2; + case math_fraction_group: + tex_print_str_esc("fraction"); + tex_aux_show_group_count(tex_aux_save_value(saved_fraction_item_variant)); + goto FOUND2; + case math_operator_group: + tex_print_str_esc("operator"); + tex_aux_show_group_count(tex_aux_save_value(saved_operator_item_variant)); + goto FOUND2; + case math_choice_group: + tex_print_str_esc("mathchoice"); + tex_aux_show_group_count(tex_aux_save_value(saved_choice_item_count)); + goto FOUND2; + case insert_group: + tex_print_str_esc("insert"); + tex_print_int(tex_aux_save_value(saved_insert_item_index)); + goto FOUND2; + case vadjust_group: + tex_print_str_esc("vadjust"); + if (tex_aux_save_value(saved_adjust_item_location) == pre_adjust_code) { + tex_print_str(" pre"); + } + if (tex_aux_save_value(saved_adjust_item_options) & adjust_option_before) { + tex_print_str(" before"); + } + goto FOUND2; + case vcenter_group: + package = "vcenter"; + goto FOUND1; + case also_simple_group: + case semi_simple_group: + ++pointer; + tex_print_str_esc("begingroup"); + goto FOUND2; +//case math_simple_group: +// ++pointer; +// tex_print_str_esc("beginmathgroup"); +// goto FOUND2; + case math_shift_group: + if (mode == mmode) { + tex_print_char('$'); + } else if (lmt_nest_state.nest[pointer].mode == mmode) { + tex_print_cmd_chr(equation_number_cmd, tex_aux_save_value(saved_equation_number_item_location)); + goto FOUND2; + } + tex_print_char('$'); + goto FOUND2; + case math_fence_group: + /* kind of ugly ... maybe also save that one */ /* todo: operator */ + tex_print_str_esc((node_subtype(lmt_nest_state.nest[pointer + 1].delim) == left_fence_side) ? "left" : "middle"); + goto FOUND2; + default: + tex_confusion("show groups"); + break; + } + /*tex Show the box context */ + { + int i = tex_aux_save_value(saved_full_spec_item_context);; + if (i) { + if (i < box_flag) { + /* this is pretty horrible and likely wrong */ + singleword cmd = (abs(lmt_nest_state.nest[pointer].mode) == vmode) ? hmove_cmd : vmove_cmd; + tex_print_cmd_chr(cmd, (i > 0) ? move_forward_code : move_backward_code); + tex_print_dimension(abs(i), pt_unit); + } else if (i <= max_global_box_flag) { + if (i >= global_box_flag) { + tex_print_str_esc("global"); + i -= (global_box_flag - box_flag); + } + tex_print_str_esc("setbox"); + tex_print_int(i - box_flag); + tex_print_char('='); + } else { + switch (i) { + case a_leaders_flag: + tex_print_cmd_chr(leader_cmd, a_leaders); + break; + case c_leaders_flag: + tex_print_cmd_chr(leader_cmd, c_leaders); + break; + case x_leaders_flag: + tex_print_cmd_chr(leader_cmd, x_leaders); + break; + case g_leaders_flag: + tex_print_cmd_chr(leader_cmd, g_leaders); + break; + case u_leaders_flag: + tex_print_cmd_chr(leader_cmd, u_leaders); + break; + } + } + } + } + FOUND1: + { + /*tex Show the box packaging info. */ + tex_print_str_esc(package); + halfword packing, amount; + if (tex_aux_saved_box_spec(&packing, &amount)) { + tex_print_str(packing == packing_exactly ? " to " : " spread "); + tex_print_dimension(amount, pt_unit); + } + } + FOUND2: + --cur_level; + cur_group = save_level(lmt_save_state.save_stack_data.ptr); + lmt_save_state.save_stack_data.ptr = save_value(lmt_save_state.save_stack_data.ptr); + } + DONE: + lmt_save_state.save_stack_data.ptr = saved_pointer; + cur_level = saved_level; + cur_group = saved_group; + tracing_levels_par = saved_tracing; +} + +/*tex + This is an experiment. The |handle_overload| function can either go on or quit, depending on + how strong one wants to check for overloads. + + \starttabulate[||||||||] + \NC \NC \NC immutable \NC permanent \NC primitive \NC frozen \NC instance \NC \NR + \NC 1 \NC warning \NC + \NC + \NC + \NC \NC \NC \NR + \NC 2 \NC error \NC + \NC + \NC + \NC \NC \NC \NR + \NC 3 \NC warning \NC + \NC + \NC + \NC + \NC \NC \NR + \NC 4 \NC error \NC + \NC + \NC + \NC + \NC \NC \NR + \NC 5 \NC warning \NC + \NC + \NC + \NC + \NC + \NC \NR + \NC 6 \NC error \NC + \NC + \NC + \NC + \NC + \NC \NR + \stoptabulate + + The overload callback gets passed: + (boolean) error, + (integer) overload, + (string) csname, + (integer) flags. + + See January 2020 files for an alternative implementation. +*/ + +static void tex_aux_handle_overload(const char *s, halfword cs, int overload, int error_type) +{ + int callback_id = lmt_callback_defined(handle_overload_callback); + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "bdsd->", error_type == normal_error_type, overload, cs_text(cs), eq_flag(cs)); + } else { + tex_handle_error( + error_type, + "You can't redefine %s %S.", + s, cs, + NULL + ); + } +} + +static int tex_aux_report_overload(halfword cs, int overload) +{ + int error_type = overload & 1 ? warning_error_type : normal_error_type; + if (has_eq_flag_bits(cs, immutable_flag_bit)) { + tex_aux_handle_overload("immutable", cs, overload, error_type); + } else if (has_eq_flag_bits(cs, primitive_flag_bit)) { + tex_aux_handle_overload("primitive", cs, overload, error_type); + } else if (has_eq_flag_bits(cs, permanent_flag_bit)) { + tex_aux_handle_overload("permanent", cs, overload, error_type); + } else if (has_eq_flag_bits(cs, frozen_flag_bit)) { + tex_aux_handle_overload("frozen", cs, overload, error_type); + } else if (has_eq_flag_bits(cs, instance_flag_bit)) { + tex_aux_handle_overload("instance", cs, overload, warning_error_type); + return 1; + } + return error_type == warning_error_type; +} + +# define overload_error_type(overload) (overload & 1 ? warning_error_type : normal_error_type) + +int tex_define_permitted(halfword cs, halfword prefixes) +{ + halfword overload = overload_mode_par; + if (! cs || ! overload || has_eq_flag_bits(cs, mutable_flag_bit)) { + return 1; + } else if (is_overloaded(prefixes)) { + if (overload > 2 && has_eq_flag_bits(cs, immutable_flag_bit | permanent_flag_bit | primitive_flag_bit)) { + return tex_aux_report_overload(cs, overload); + } + } else if (overload > 4) { + if (has_eq_flag_bits(cs, immutable_flag_bit | permanent_flag_bit | primitive_flag_bit | frozen_flag_bit | instance_flag_bit)) { + return tex_aux_report_overload(cs, overload); + } + } else if (overload > 2) { + if (has_eq_flag_bits(cs, immutable_flag_bit | permanent_flag_bit | primitive_flag_bit | frozen_flag_bit)) { + return tex_aux_report_overload(cs, overload); + } + } else if (has_eq_flag_bits(cs, immutable_flag_bit)) { + return tex_aux_report_overload(cs, overload); + } + return 1; +} + +static int tex_aux_mutation_permitted(halfword cs) +{ + halfword overload = overload_mode_par; + if (cs && overload && has_eq_flag_bits(cs, immutable_flag_bit)) { + return tex_aux_report_overload(cs, overload); + } else { + return 1; + } +} + +/*tex + + Just before an entry of |eqtb| is changed, the following procedure should be called to update + the other data structures properly. It is important to keep in mind that reference counts in + |mem| include references from within |save_stack|, so these counts must be handled carefully. + + We don't need to destroy when an assignment has the same node: + +*/ + +static void tex_aux_eq_destroy(memoryword w) +{ + switch (eq_type_field(w)) { + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + case register_toks_reference_cmd: + case internal_toks_reference_cmd: + tex_delete_token_reference(eq_value_field(w)); + break; + case internal_glue_reference_cmd: + case register_glue_reference_cmd: + case internal_mu_glue_reference_cmd: + case register_mu_glue_reference_cmd: + case gluespec_cmd: + case mugluespec_cmd: + case mathspec_cmd: + case fontspec_cmd: + tex_flush_node(eq_value_field(w)); + break; + case internal_box_reference_cmd: + case register_box_reference_cmd: + tex_flush_node_list(eq_value_field(w)); + break; + case specification_reference_cmd: + { + halfword q = eq_value_field(w); + if (q) { + /*tex + We need to free a |\parshape| block. Such a block is |2n + 1| words long, + where |n = vinfo(q)|. It happens in the + flush function. + */ + tex_flush_node(q); + } + } + break; + default: + break; + } +} + +/*tex + + To save a value of |eqtb[p]| that was established at level |l|, we can use the following + subroutine. This code could be simplified after the xeq cleanup so we actually use one slot + less per saved value. + +*/ + +static void tex_aux_eq_save(halfword p, quarterword l) +{ + if (tex_room_on_save_stack()) { + if (l == level_zero) { + save_type(lmt_save_state.save_stack_data.ptr) = restore_zero; + } else { + save_type(lmt_save_state.save_stack_data.ptr) = restore_old_value; + save_word(lmt_save_state.save_stack_data.ptr) = lmt_hash_state.eqtb[p]; + } + save_level(lmt_save_state.save_stack_data.ptr) = l; + save_value(lmt_save_state.save_stack_data.ptr) = p; + ++lmt_save_state.save_stack_data.ptr; + } +} + +/*tex + + The procedure |eq_define| defines an |eqtb| entry having specified |eq_type| and |equiv| fields, + and saves the former value if appropriate. This procedure is used only for entries in the first + four regions of |eqtb|, i.e., only for entries that have |eq_type| and |equiv| fields. After + calling this routine, it is safe to put four more entries on |save_stack|, provided that there + was room for four more entries before the call, since |eq_save| makes the necessary test. + + The destroy if same branch comes from \ETEX\ but is it really right to destroy here if we + actually want to keep the value? In practice we only come here with zero cases but even then, + it looks like we can destroy the token list or node (list). Not, that might actually work ok in + the case of glue refs that have work by ref count and token lists and node (lists) are always + different so there we do no harm. + +*/ + +inline static int tex_aux_equal_eq(halfword p, singleword cmd, singleword flag, halfword chr) +{ + /* maybe keep flag test at call end and then only flip flags */ + if (eq_flag(p) == flag) { + // printf("eqtest> %03i %03i\n",eq_type(p),cmd); + switch (eq_type(p)) { + case internal_glue_reference_cmd: + case register_glue_reference_cmd: + case internal_mu_glue_reference_cmd: + case register_mu_glue_reference_cmd: + case gluespec_cmd: + case mugluespec_cmd: + /*tex We compare the pointer as well as the record. */ + if (tex_same_glue(eq_value(p), chr)) { + if (chr) { + tex_flush_node(chr); + } + return 1; + } else { + return 0; + } + case mathspec_cmd: + /*tex Idem here. */ + if (tex_same_mathspec(eq_value(p), chr)) { + if (chr) { + tex_flush_node(chr); + } + return 1; + } else { + return 0; + } + case fontspec_cmd: + /*tex And here. */ + if (tex_same_fontspec(eq_value(p), chr)) { + if (chr) { + tex_flush_node(chr); + } + return 1; + } else { + return 0; + } + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + /*tex The initial token reference will do as it is unique. */ +// if (eq_value(p) == chr) { + if (eq_value(p) == chr && eq_level(p) == cur_level) { + tex_delete_token_reference(eq_value(p)); + return 1; + } else { + return 0; + } + case specification_reference_cmd: + case internal_box_reference_cmd: + case register_box_reference_cmd: + /*tex These are also references. */ + if (eq_type(p) == cmd && eq_value(p) == chr && ! chr) { +// if (eq_type(p) == cmd && eq_value(p) == chr && ! chr && eq_level(p) == cur_level) { + return 1; + } else { + /* play safe */ + return 0; + } + case internal_toks_reference_cmd: + case register_toks_reference_cmd: + /*tex As are these. */ + if (p && chr && eq_value(p) == chr) { + tex_delete_token_reference(eq_value(p)); + return 1; + } else { + return 0; + } + case internal_toks_cmd: + case register_toks_cmd: + /*tex Again we have references. */ + if (eq_value(p) == chr) { +// if (eq_value(p) == chr && eq_level(p) == cur_level) { + return 1; + } else { + return 0; + } + // case dimension_cmd: + // case integer_cmd: + // if (eq_type(p) == cmd && eq_value(p) == chr && eq_level(p) == cur_level) { + // return 1; + // } + default: + /*tex + We can best also check the level because for integer defs etc we run into + issues otherwise (see testcase tests/luametatex/eqtest.tex based on MS's + math file). + */ + if (eq_type(p) == cmd && eq_value(p) == chr) { +// if (eq_type(p) == cmd && eq_value(p) == chr && eq_level(p) == cur_level) { + return 1; + } + return 0; + } + } else { + return 0; + } +} + +/*tex Used to define a not yet defined cs or box or ... */ + +void tex_eq_define(halfword p, singleword cmd, halfword chr) +{ + int trace = tracing_assigns_par > 0; + if (tex_aux_equal_eq(p, cmd, 0, chr)) { + if (trace) { + tex_aux_diagnostic_trace(p, "reassigning"); + } + } else { + if (trace) { + tex_aux_diagnostic_trace(p, "changing"); + } + if (eq_level(p) == cur_level) { + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + } else if (cur_level > level_one) { + tex_aux_eq_save(p, eq_level(p)); + } + set_eq_level(p, cur_level); + set_eq_type(p, cmd); + set_eq_flag(p, 0); + set_eq_value(p, chr); + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } + } +} + +/*tex + + The counterpart of |eq_define| for the remaining (fullword) positions in |eqtb| is called + |eq_word_define|. Since |xeq_level[p] >= level_one| for all |p|, a |restore_zero| will never + be used in this case. + +*/ + +void tex_eq_word_define(halfword p, int w) +{ + int trace = tracing_assigns_par > 0; + if (eq_value(p) == w) { + if (trace) { + tex_aux_diagnostic_trace(p, "reassigning"); + } + } else { + if (trace) { + tex_aux_diagnostic_trace(p, "changing"); + } + if (eq_level(p) != cur_level) { + tex_aux_eq_save(p, eq_level(p)); + set_eq_level(p, cur_level); + } + eq_value(p) = w; + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } + } +} + +/*tex + + The |eq_define| and |eq_word_define| routines take care of local definitions. Global definitions + are done in almost the same way, but there is no need to save old values, and the new value is + associated with |level_one|. + +*/ + +void tex_geq_define(halfword p, singleword cmd, halfword chr) +{ + int trace = tracing_assigns_par > 0; + if (trace) { + tex_aux_diagnostic_trace(p, "globally changing"); + } + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + set_eq_level(p, level_one); + set_eq_type(p, cmd); + set_eq_flag(p, 0); + set_eq_value(p, chr); + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } +} + +void tex_geq_word_define(halfword p, int w) +{ + int trace = tracing_assigns_par > 0; + if (trace) { + tex_aux_diagnostic_trace(p, "globally changing"); + } + eq_value(p) = w; + set_eq_level(p, level_one); + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } +} + +/*tex + Instead of a macro that distinguishes between global or not we now use a few normal functions. + That way we don't need to define a bogus variable |a| in some cases. This is typically one of + those changes that happened after other bits and pieces got redone. (One can also consider it + a side effect of looking at the code through a visual studio lense.) +*/ + +static inline void tex_aux_set_eq_data(halfword p, singleword t, halfword e, singleword f, quarterword l) +{ + singleword flag = eq_flag(p); + set_eq_level(p, l); + set_eq_type(p, t); + set_eq_value(p, e); + if (is_mutable(f) || is_mutable(flag)) { + set_eq_flag(p, (f | flag) & ~(noaligned_flag_bit | permanent_flag_bit | primitive_flag_bit | immutable_flag_bit)); + } else { + set_eq_flag(p, f); + } +} + +void tex_define(int g, halfword p, singleword t, halfword e) /* int g -> singleword g */ +{ + int trace = tracing_assigns_par > 0; + singleword f = make_eq_flag_bits(g); + if (is_global(g)) { + /* what if already global */ + if (trace) { + tex_aux_diagnostic_trace(p, "globally changing"); + } + // if (tex_aux_equal_eq(p, t, f, e) && (eq_level(p) == level_one)) { + // return; /* we can save some stack */ + // } + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + tex_aux_set_eq_data(p, t, e, f, level_one); + } else if (tex_aux_equal_eq(p, t, f, e)) { + /* hm, we tweak the ref ! */ + if (trace) { + tex_aux_diagnostic_trace(p, "reassigning"); + return; + } + } else { + if (trace) { + tex_aux_diagnostic_trace(p, "changing"); + } + if (eq_level(p) == cur_level) { + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + } else if (cur_level > level_one) { + tex_aux_eq_save(p, eq_level(p)); + } + tex_aux_set_eq_data(p, t, e, f, cur_level); + } + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } +} + +void tex_define_inherit(int g, halfword p, singleword f, singleword t, halfword e) +{ + int trace = tracing_assigns_par > 0; + if (is_global(g)) { + /* what if already global */ + if (trace) { + tex_aux_diagnostic_trace(p, "globally changing"); + } + // if (equal_eq(p, t, f, e) && (eq_level(p) == level_one)) { + // return; /* we can save some stack */ + // } + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + tex_aux_set_eq_data(p, t, e, f, level_one); + } else if (tex_aux_equal_eq(p, t, f, e)) { + if (trace) { + tex_aux_diagnostic_trace(p, "reassigning"); + return; + } + } else { + if (trace) { + tex_aux_diagnostic_trace(p, "changing"); + } + if (eq_level(p) == cur_level) { + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + } else if (cur_level > level_one) { + tex_aux_eq_save(p, eq_level(p)); + } + tex_aux_set_eq_data(p, t, e, f, cur_level); + } + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } +} + +/* beware: when we swap a global vsize with a local ... we can get side effect. */ + +static void tex_aux_just_define(int g, halfword p, halfword e) +{ + int trace = tracing_assigns_par > 0; + if (is_global(g)) { + if (trace) { + tex_aux_diagnostic_trace(p, "globally changing"); + } + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + set_eq_value(p, e); + } else { + if (trace) { + tex_aux_diagnostic_trace(p, "changing"); + } + if (eq_level(p) == cur_level) { + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + } else if (cur_level > level_one) { + tex_aux_eq_save(p, eq_level(p)); + } + set_eq_level(p, cur_level); + set_eq_value(p, e); + } + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } +} + +/* We can have a variant that doesn't save/restore so we just have to swap back then. */ + +void tex_define_swapped(int g, halfword p1, halfword p2, int force) +{ + halfword t1 = eq_type(p1); + halfword t2 = eq_type(p2); + halfword l1 = eq_level(p1); + halfword l2 = eq_level(p2); + singleword f1 = eq_flag(p1); + singleword f2 = eq_flag(p2); + halfword v1 = eq_value(p1); + halfword v2 = eq_value(p2); + if (t1 == t2 && l1 == l2) { + halfword overload = force ? 0 : overload_mode_par; + if (overload) { + if (f1 != f2) { + goto NOTDONE; + } else if (is_immutable(f1)) { + goto NOTDONE; + } + } + { + switch (t1) { + case register_int_cmd: + case register_attribute_cmd: + case register_dimen_cmd: + case register_glue_cmd: /* unchecked */ + case register_mu_glue_cmd: /* unchecked */ + case internal_mu_glue_cmd: /* unchecked */ + case integer_cmd: + case dimension_cmd: + tex_aux_just_define(g, p1, v2); + tex_aux_just_define(g, p2, v1); + return; + case register_toks_cmd: + case internal_toks_cmd: + if (v1) tex_add_token_reference(v1); + if (v2) tex_add_token_reference(v2); + tex_aux_just_define(g, p1, v2); + tex_aux_just_define(g, p2, v1); + if (v1) tex_delete_token_reference(v1); + if (v2) tex_delete_token_reference(v2); + return; + case internal_int_cmd: + tex_assign_internal_int_value(g, p1, v2); + tex_assign_internal_int_value(g, p2, v1); + return; + case internal_attribute_cmd: + tex_assign_internal_attribute_value(g, p1, v2); + tex_assign_internal_attribute_value(g, p2, v1); + return; + case internal_dimen_cmd: + tex_assign_internal_dimen_value(g, p1, v2); + tex_assign_internal_dimen_value(g, p2, v1); + return; + case internal_glue_cmd: + /* todo */ + tex_assign_internal_skip_value(g, p1, v2); + tex_assign_internal_skip_value(g, p2, v1); + return; + default: + if (overload > 2) { + if (has_flag_bits(f1, immutable_flag_bit | permanent_flag_bit | primitive_flag_bit)) { + if (overload > 3) { + goto NOTDONE; + } + } + } + if (is_call_cmd(t1)) { + if (v1) tex_add_token_reference(v1); + if (v2) tex_add_token_reference(v2); + tex_aux_just_define(g, p1, v2); + tex_aux_just_define(g, p2, v1); + /* no delete here .. hm */ + } else { + tex_handle_error( + normal_error_type, + "\\swapcsvalues not (yet) implemented for commands (%C, %C)", + t1, v1, t2, v2, NULL + ); + + } + return; + } + } + } + NOTDONE: + tex_handle_error( + normal_error_type, + "\\swapcsvalues requires equal commands (%C, %C), levels (%i, %i) and flags (%i, %i)", + t1, v1, t2, v2, l1, l2, f1, f2, NULL + ); +} + +void tex_forced_define(int g, halfword p, singleword f, singleword t, halfword e) +{ + int trace = tracing_assigns_par > 0; + if (is_global(g)) { + if (trace) { + tex_aux_diagnostic_trace(p, "globally changing"); + } + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + set_eq_level(p, level_one); + set_eq_type(p, t); + set_eq_flag(p, f); + set_eq_value(p, e); + } else { + if (trace) { + tex_aux_diagnostic_trace(p, "changing"); + } + if (eq_level(p) == cur_level) { + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + } else if (cur_level > level_one) { + tex_aux_eq_save(p, eq_level(p)); + } + set_eq_level(p, cur_level); + set_eq_type(p, t); + set_eq_flag(p, f); + set_eq_value(p, e); + } + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } +} + +// void forced_define(int l, halfword p, singleword f, singleword t, halfword e) +// { +// eq_destroy(hash_state.eqtb[p]); +// set_eq_level(p, l); +// set_eq_type(p, t); +// set_eq_flag(p, f); +// set_eq_value(p, e); +// } + +void tex_word_define(int g, halfword p, halfword w) +{ + if (tex_aux_mutation_permitted(p)) { + int trace = tracing_assigns_par > 0; + if (is_global(g)) { + if (trace) { + tex_aux_diagnostic_trace(p, "globally changing"); + } + eq_value(p) = w; + set_eq_level(p, level_one); + } else if (eq_value(p) == w) { + if (trace) { + tex_aux_diagnostic_trace(p, "reassigning"); + return; + } + } else { + if (trace) { + tex_aux_diagnostic_trace(p, "changing"); + } + if (eq_level(p) != cur_level) { + tex_aux_eq_save(p, eq_level(p)); + set_eq_level(p, cur_level); + } + eq_value(p) = w; + } + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } + if (is_immutable(g)) { + eq_flag(p) |= immutable_flag_bit; + } else if (is_mutable(g)) { + eq_flag(p) |= mutable_flag_bit; + } + } +} + +void tex_forced_word_define(int g, halfword p, singleword f, halfword w) +{ + if (tex_aux_mutation_permitted(p)) { + int trace = tracing_assigns_par > 0; + if (is_global(g)) { + if (trace) { + tex_aux_diagnostic_trace(p, "globally changing"); + } + eq_value(p) = w; + set_eq_level(p, level_one); + } else if (eq_value(p) == w) { + if (trace) { + tex_aux_diagnostic_trace(p, "reassigning"); + return; + } + } else { + if (trace) { + tex_aux_diagnostic_trace(p, "changing"); + } + if (eq_level(p) != cur_level) { + tex_aux_eq_save(p, eq_level(p)); + set_eq_level(p, cur_level); + } + eq_value(p) = w; + } + if (trace) { + tex_aux_diagnostic_trace(p, "into"); + } + eq_flag(p) = f; + } +} + +/*tex + + Subroutine |save_for_after_group| puts a token on the stack for save-keeping. + +*/ + +void tex_save_for_after_group(halfword t) +{ + if (cur_level > level_one && tex_room_on_save_stack()) { + save_type(lmt_save_state.save_stack_data.ptr) = insert_tokens; + save_level(lmt_save_state.save_stack_data.ptr) = level_zero; + save_value(lmt_save_state.save_stack_data.ptr) = t; + ++lmt_save_state.save_stack_data.ptr; + } +} + +/*tex + + The |unsave| routine goes the other way, taking items off of |save_stack|. This routine takes + care of restoration when a level ends. Here, everything belonging to the topmost group is + cleared off of the save stack. + + In \TEX\ there are a few |\after...| commands, like |\aftergroup| and |\afterassignment| while + |\futurelet| also has this property of postponed actions. The |\every...| token registers do + the opposite and do stuff up front. In addition to |\aftergrouped| we have a variant that + accepts a token list, as does |\afterassigned|. These items are saved on the stack. + + In \LUAMETATEX\ we can also do things just before a group ends as well as just before the + paragraph finishes. In the end it was not that hard to implement in the \LUATEX\ concept, + although it adds a little overhead, but the benefits compensate that. Because we can use some + mechanisms used in other extensions only a few extra lines are needed. All are accumulative + but the paragraph bound one is special in the sense that is is bound to the current paragraph, + so the actual implementation of that one happens elsewhere and differently. + + Side note: when |\par| overloading was introduced in \PDFTEX\ and per request also added to + |\LUATEX| it made no sense to add that to \LUAMETATEX\ too. We already have callbacks, and + there is information available about what triggered a |\par|. Another argument against + supporting this is that overloading |\par| is messy and unreliable (macro package and user + demand and actions can badly interfere). The mentioned hooks already give more than enough + opportunities. One doesn't expect users to overload |\relax| either. + + Side note: at some point I will look into |\after| hooks in for instance alignments and maybe + something nicer that |\afterassignment| can be used for pushing stuff into boxes (|\everybox| + is not that helpful). But again avoiding extra overhead might is a very good be a reason to + not do that at all. + +*/ + +void tex_unsave(void) +{ + if (end_of_group_par) { + tex_begin_inserted_list(tex_get_available_token(token_val(end_local_cmd, 0))); + tex_begin_token_list(end_of_group_par, end_of_group_text); + if (tracing_nesting_par > 2) { + tex_local_control_message("entering token scanner via endgroup"); + } + tex_local_control(1); + } + + unsave_attribute_state_before(); + + tex_unsave_math_codes(cur_level); + tex_unsave_cat_codes(cat_code_table_par, cur_level); + tex_unsave_text_codes(cur_level); + tex_unsave_math_data(cur_level); + if (cur_level > level_one) { + /*tex + Variable |a| registers if we already have processed an |\aftergroup|. We append when + >= 1. + */ + int a = 0; + int trace = tracing_restores_par > 0; + --cur_level; + /*tex Clear off top level from |save_stack|. */ + while (1) { + --lmt_save_state.save_stack_data.ptr; + switch (save_type(lmt_save_state.save_stack_data.ptr)) { + case level_boundary: + goto DONE; + case restore_old_value: + { + halfword p = save_value(lmt_save_state.save_stack_data.ptr); + /*tex + Store |save_stack[save_ptr]| in |eqtb[p]|, unless |eqtb[p]| holds a global + value A global definition, which sets the level to |level_one|, will not be + undone by |unsave|. If at least one global definition of |eqtb[p]| has been + carried out within the group that just ended, the last such definition will + therefore survive. + */ + if (p < internal_int_base || p > eqtb_size) { + if (eq_level(p) == level_one) { + tex_aux_eq_destroy(save_word(lmt_save_state.save_stack_data.ptr)); + if (trace) { + tex_aux_diagnostic_trace(p, "retaining"); + } + } else { + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + lmt_hash_state.eqtb[p] = save_word(lmt_save_state.save_stack_data.ptr); + if (trace) { + tex_aux_diagnostic_trace(p, "restoring"); + } + } + } else if (eq_level(p) == level_one) { + if (trace) { + tex_aux_diagnostic_trace(p, "retaining"); + } + } else { + lmt_hash_state.eqtb[p] = save_word(lmt_save_state.save_stack_data.ptr); + if (trace) { + tex_aux_diagnostic_trace(p, "restoring"); + } + } + break; + } + case insert_tokens: + { + /*tex A list starts a new input level (for now). */ + halfword p = save_value(lmt_save_state.save_stack_data.ptr); + if (a) { + /*tex We stay at the same input level (an \ETEX\ feature). */ + tex_append_input(p); + } else { + tex_insert_input(p); + a = 1; + } + break; + } + case restore_lua: + { + /* The same as lua_function_code in |textoken.c|. */ + halfword p = save_value(lmt_save_state.save_stack_data.ptr); + if (p > 0) { + strnumber u = tex_save_cur_string(); + lmt_token_state.luacstrings = 0; + lmt_function_call(p, 0); + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } + } else { + tex_normal_error("lua restore", "invalid number"); + } + a = 1; + break; + } + case restore_zero: + { + halfword p = save_value(lmt_save_state.save_stack_data.ptr); + if (eq_level(p) == level_one) { + if (trace) { + tex_aux_diagnostic_trace(p, "retaining"); + } + } else { + if (p < internal_int_base || p > eqtb_size) { + tex_aux_eq_destroy(lmt_hash_state.eqtb[p]); + } + lmt_hash_state.eqtb[p] = lmt_hash_state.eqtb[undefined_control_sequence]; + if (trace) { + tex_aux_diagnostic_trace(p, "restoring"); + } + } + break; + } + default: + /* we have a messed up save pointer */ + tex_formatted_error("tex unsave", "bad save type case %d, probably a stack pointer issue", save_type(lmt_save_state.save_stack_data.ptr)); + break; + } + } + DONE: + if (tracing_groups_par > 0) { + tex_aux_group_trace(1); + } + if (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].group == cur_boundary) { + /*tex Groups are possibly not properly nested with files. */ + tex_aux_group_warning(); + } + cur_group = save_level(lmt_save_state.save_stack_data.ptr); + cur_boundary = save_value(lmt_save_state.save_stack_data.ptr); + --lmt_save_state.save_stack_data.ptr; + } else { + /*tex |unsave| is not used when |cur_group=bottom_level| */ + tex_confusion("current level"); + } + unsave_attribute_state_after(); +} + +/*tex + + Most of the parameters kept in |eqtb| can be changed freely, but there's an exception: The + magnification should not be used with two different values during any \TEX\ job, since a + single magnification is applied to an entire run. The global variable |mag_set| is set to the + current magnification whenever it becomes necessary to \quote {freeze} it at a particular value. + + The |prepare_mag| subroutine is called whenever \TEX\ wants to use |mag| for magnification. If + nonzero, this magnification should be used henceforth. We might drop magnifaction at some point. + + {\em NB: As we delegate the backend to \LUA\ we have no mag.} + + Let's pause a moment now and try to look at the Big Picture. The \TEX\ program consists of three + main parts: syntactic routines, semantic routines, and output routines. The chief purpose of the + syntactic routines is to deliver the user's input to the semantic routines, one token at a time. + The semantic routines act as an interpreter responding to these tokens, which may be regarded as + commands. And the output routines are periodically called on to convert box-and-glue lists into a + compact set of instructions that will be sent to a typesetter. We have discussed the basic data + structures and utility routines of \TEX, so we are good and ready to plunge into the real activity + by considering the syntactic routines. + + Our current goal is to come to grips with the |get_next| procedure, which is the keystone of + \TEX's input mechanism. Each call of |get_next| sets the value of three variables |cur_cmd|, + |cur_chr|, and |cur_cs|, representing the next input token. + + \startitemize + \startitem + |cur_cmd| denotes a command code from the long list of codes given above; + \stopitem + \startitem + |cur_chr| denotes a character code or other modifier of the command code; + \stopitem + \startitem + |cur_cs| is the |eqtb| location of the current control sequence, if the current token + was a control sequence, otherwise it's zero. + \stopitem + \stopitemize + + Underlying this external behavior of |get_next| is all the machinery necessary to convert from + character files to tokens. At a given time we may be only partially finished with the reading of + several files (for which |\input| was specified), and partially finished with the expansion of + some user-defined macros and/or some macro parameters, and partially finished with the generation + of some text in a template for |\halign|, and so on. When reading a character file, special + characters must be classified as math delimiters, etc.; comments and extra blank spaces must be + removed, paragraphs must be recognized, and control sequences must be found in the hash table. + Furthermore there are occasions in which the scanning routines have looked ahead for a word like + |plus| but only part of that word was found, hence a few characters must be put back into the input + and scanned again. + + To handle these situations, which might all be present simultaneously, \TEX\ uses various stacks + that hold information about the incomplete activities, and there is a finite state control for each + level of the input mechanism. These stacks record the current state of an implicitly recursive + process, but the |get_next| procedure is not recursive. Therefore it will not be difficult to + translate these algorithms into low-level languages that do not support recursion. + + In general, |cur_cmd| is the current command as set by |get_next|, while |cur_chr| is the operand + of the current command. The control sequence found here is registsred in |cur_cs| and is zero if + none found. The |cur_tok| variable contains the packed representative of |cur_cmd| and |cur_chr| + and like the other ones is global. + + Here is a procedure that displays the current command. The variable |n| holds the level of |\if ... + \fi| nesting and |l| the line where |\if| started. + +*/ + +void tex_show_cmd_chr(halfword cmd, halfword chr) +{ + tex_begin_diagnostic(); + if (cur_list.mode != lmt_nest_state.shown_mode) { + if (tracing_commands_par >= 4) { + /*tex So, larger than \ETEX's extra info 3 value. We might just always do this. */ + tex_print_format("[mode: entering %M]", cur_list.mode); + tex_print_nlp(); + tex_print_levels(); + tex_print_str("{"); + } else { + tex_print_format("{%M: ", cur_list.mode); + } + lmt_nest_state.shown_mode = cur_list.mode; + } else { + tex_print_str("{"); + } + tex_print_cmd_chr((singleword) cmd, chr); + if (cmd == if_test_cmd && tracing_ifs_par > 0) { + halfword p; + int n, l; + if (tracing_commands_par >= 4) { + tex_print_str(": "); + } else { + tex_print_char(' '); + } + if (cur_chr >= first_real_if_test_code || cur_chr == or_else_code || cur_chr == or_unless_code) { /* can be other >= test */ + n = 1; + l = lmt_input_state.input_line; + } else { + tex_print_cmd_chr(if_test_cmd, lmt_condition_state.cur_if); + tex_print_char(' '); + n = 0; + l = lmt_condition_state.if_line; + } + /*tex + We now also have a proper counter but this is a check for a potential mess up. If + als is right, |lmt_condition_state.if_nesting| often should match |n|. + */ + p = lmt_condition_state.cond_ptr; + while (p) { + ++n; + p = node_next(p); + } + if (l) { + if (tracing_commands_par >= 4) { + tex_print_format("(level %i, line %i, nesting %i)", n, l, lmt_condition_state.if_nesting); + } else { + // tex_print_format("(level %i) entered on line %i", n, l); + tex_print_format("(level %i, line %i)", n, l); + } + } else { + tex_print_format("(level %i)", n); + } + } + tex_print_char('}'); + tex_end_diagnostic(); +} + +/*tex + + Here is a procedure that displays the contents of |eqtb[n]| symbolically. + + We're now at equivalent |n| in region 4. First we initialize most things to null or undefined + values. An undefined font is represented by the internal code |font_base|. + + However, the character code tables are given initial values based on the conventional + interpretation of \ASCII\ code. These initial values should not be changed when \TEX\ is + adapted for use with non-English languages; all changes to the initialization conventions + should be made in format packages, not in \TEX\ itself, so that global interchange of formats + is possible. + + The reorganization was done because I wanted a cleaner token interface at the \LUA\ end. So + we also do some more checking. The order differs from traditional \TEX\ but of course the + approach is similar. + + The regions in \LUAMETATEX\ are a bit adapted as a side effect of the \ETEX\ extensions as + well as our own. For instance, we tag all regions because we also need a consistent token + interface to \LUA. We also dropped fonts and some more from the table. + + A previous, efficient, still range based variant can be found in the my archive but it makes + no sense to keep it commented here (apart from sentimental reasons) so one now only can see + the range agnostic version here. + +*/ + +void tex_aux_show_eqtb(halfword n) +{ + if (n < null_cs) { + tex_print_format("bad token %i, case 1", n); + } else if (eqtb_indirect_range(n)) { + tex_print_cs(n); + tex_print_char('='); + tex_print_cmd_chr(eq_type(n), eq_value(n)); + if (eq_type(n) >= call_cmd) { + tex_print_char(':'); + tex_token_show(eq_value(n), default_token_show_min); + } + } else { + switch (eq_type(n)) { + case internal_toks_reference_cmd: + tex_print_cmd_chr(internal_toks_cmd, n); + goto TOKS; + case register_toks_reference_cmd: + tex_print_str_esc("toks"); + tex_print_int(register_toks_number(n)); + TOKS: + tex_print_char('='); + tex_token_show(eq_value(n), default_token_show_min); + break; + case internal_box_reference_cmd: + tex_print_cmd_chr(eq_type(n), n); + goto BOX; + case register_box_reference_cmd: + tex_print_str_esc("box"); + tex_print_int(register_box_number(n)); + BOX: + tex_print_char('='); + if (eq_value(n)) { + tex_show_node_list(eq_value(n), 0, 1); + tex_print_levels(); + } else { + tex_print_str("void"); + } + break; + case internal_glue_reference_cmd: + tex_print_cmd_chr(internal_glue_cmd, n); + goto SKIP; + case register_glue_reference_cmd: + tex_print_str_esc("skip"); + tex_print_int(register_glue_number(n)); + SKIP: + tex_print_char('='); + if (tracing_nodes_par > 2) { + tex_print_format("<%i>", eq_value(n)); + } + tex_print_spec(eq_value(n), pt_unit); + break; + case internal_mu_glue_reference_cmd: + tex_print_cmd_chr(internal_mu_glue_cmd, n); + goto MUSKIP; + case register_mu_glue_reference_cmd: + tex_print_str_esc("muskip"); + tex_print_int(register_mu_glue_number(n)); + MUSKIP: + if (tracing_nodes_par > 2) { + tex_print_format("<%i>", eq_value(n)); + } + tex_print_char('='); + tex_print_spec(eq_value(n), mu_unit); + break; + case internal_int_reference_cmd: + tex_print_cmd_chr(internal_int_cmd, n); + goto COUNT; + case register_int_reference_cmd: + tex_print_str_esc("count"); + tex_print_int(register_int_number(n)); + COUNT: + tex_print_char('='); + tex_print_int(eq_value(n)); + break; + case internal_attribute_reference_cmd: + tex_print_cmd_chr(internal_attribute_cmd, n); + goto ATTRIBUTE; + case register_attribute_reference_cmd: + tex_print_str_esc("attribute"); + tex_print_int(register_attribute_number(n)); + ATTRIBUTE: + tex_print_char('='); + tex_print_int(eq_value(n)); + break; + case internal_dimen_reference_cmd: + tex_print_cmd_chr(internal_dimen_cmd, n); + goto DIMEN; + case register_dimen_reference_cmd: + tex_print_str_esc("dimen"); + tex_print_int(register_dimen_number(n)); + DIMEN: + tex_print_char('='); + tex_print_dimension(eq_value(n), pt_unit); + break; + case specification_reference_cmd: + tex_print_cmd_chr(set_specification_cmd, n); + tex_print_char('='); + if (eq_value(n)) { + // if (tracing_nodes_par > 2) { + // tex_print_format("<%i>", eq_value(n)); + // } + tex_print_int(specification_count(eq_value(n))); + } else { + tex_print_char('0'); + } + break; + default: + tex_print_format("bad token %i, case 2", n); + break; + } + } +} + +/*tex + + A couple of (self documenting) convenient helpers. They do what we do in \LUATEX, but we now + have collapsed all the options in one mode parameter that also gets stored in the glyph so + the older functions are gone. Progress. + +*/ + +halfword tex_automatic_disc_penalty(halfword mode) +{ + return hyphenation_permitted(mode, automatic_penalty_hyphenation_mode) ? automatic_hyphen_penalty_par : ex_hyphen_penalty_par; +} + +halfword tex_explicit_disc_penalty(halfword mode) +{ + return hyphenation_permitted(mode, explicit_penalty_hyphenation_mode) ? explicit_hyphen_penalty_par : ex_hyphen_penalty_par; +} + +/*tex + + The table of equivalents needs to get (pre)populated by the right commands and references, so + that happens here (called in maincontrol at ini time). + + For diagnostic purposes we now have the type set for registers. As a consequence we not have + four |glue_ref| variants, which is a trivial extension. + +*/ + +inline static void tex_aux_set_eq(halfword base, quarterword level, singleword cmd, halfword value, halfword count) +{ + if (count > 0) { + set_eq_level(base, level); + set_eq_type(base, cmd); + set_eq_flag(base, 0); + set_eq_value(base, value); + for (int k = base + 1; k <= base + count; k++){ + copy_eqtb_entry(k, base); + } + } +} + +void tex_synchronize_equivalents(void) +{ + tex_aux_set_eq(null_cs, level_zero, undefined_cs_cmd, null, lmt_hash_state.hash_data.top - 1); +} + +void tex_initialize_equivalents(void) +{ + /*tex Order matters here! */ + tex_aux_set_eq(null_cs, level_zero, undefined_cs_cmd, null, lmt_hash_state.hash_data.top - 1); + tex_aux_set_eq(internal_glue_base, level_one, internal_glue_reference_cmd, zero_glue, number_glue_pars); + tex_aux_set_eq(register_glue_base, level_one, register_glue_reference_cmd, zero_glue, max_glue_register_index); + tex_aux_set_eq(internal_mu_glue_base, level_one, internal_mu_glue_reference_cmd, zero_glue, number_mu_glue_pars); + tex_aux_set_eq(register_mu_glue_base, level_one, register_mu_glue_reference_cmd, zero_glue, max_mu_glue_register_index); + tex_aux_set_eq(internal_toks_base, level_one, internal_toks_reference_cmd, null, number_tok_pars); + tex_aux_set_eq(register_toks_base, level_one, register_toks_reference_cmd, null, max_toks_register_index); + tex_aux_set_eq(internal_box_base, level_one, internal_box_reference_cmd, null, number_box_pars); + tex_aux_set_eq(register_box_base, level_one, register_box_reference_cmd, null, max_box_register_index); + tex_aux_set_eq(internal_int_base, level_one, internal_int_reference_cmd, 0, number_int_pars); + tex_aux_set_eq(register_int_base, level_one, register_int_reference_cmd, 0, max_int_register_index); + tex_aux_set_eq(internal_attribute_base, level_one, internal_attribute_reference_cmd, unused_attribute_value, number_attribute_pars); + tex_aux_set_eq(register_attribute_base, level_one, register_attribute_reference_cmd, unused_attribute_value, max_attribute_register_index); + tex_aux_set_eq(internal_dimen_base, level_one, internal_dimen_reference_cmd, 0, number_dimen_pars); + tex_aux_set_eq(register_dimen_base, level_one, register_dimen_reference_cmd, 0, max_dimen_register_index); + tex_aux_set_eq(internal_specification_base, level_one, specification_reference_cmd, null, number_specification_pars); + tex_aux_set_eq(undefined_control_sequence, level_zero, undefined_cs_cmd, null, 0); + /*tex why here? */ + cat_code_table_par = 0; +} + +int tex_located_save_value(int id) +{ + int i = lmt_save_state.save_stack_data.ptr - 1; + while (save_type(i) != level_boundary) { + i--; + } + while (i < lmt_save_state.save_stack_data.ptr) { + if (save_type(i) == restore_old_value && save_value(i) == id) { + /* + if (math_direction_par != save_value(i - 1)) { + return 1; + } + */ + return save_value(i - 1); + } + i++; + } + return 0; +} diff --git a/source/luametatex/source/tex/texequivalents.h b/source/luametatex/source/tex/texequivalents.h new file mode 100644 index 000000000..aaf45d0c1 --- /dev/null +++ b/source/luametatex/source/tex/texequivalents.h @@ -0,0 +1,1776 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_EQUIVALENTS_H +# define LMT_EQUIVALENTS_H + +# include "tex/textypes.h" + +/*tex + + Like the preceding parameters, the following quantities can be changed at compile time to extend + or reduce \TEX's capacity. But if they are changed, it is necessary to rerun the initialization + program |INITEX| to generate new tables for the production \TEX\ program. One can't simply make + helter-skelter changes to the following constants, since certain rather complex initialization + numbers are computed from them. They are defined here using \WEB\ macros, instead of being put + into \PASCAL's |const| list, in order to emphasize this distinction. + + The original token interface at the \LUA\ end used the \quote {real} chr values that are offsets + into the table of equivalents. However, that is sort of fragile when one also provides ways to + construct tokens. For that reason the \LUAMETATEX\ interface is a bit more abstract and therefore + can do some testing. After all, the real numbers don't matter. This means that registers for + instance run from |0..65535| (without the region offsets). + + In order to make this easier the token registers are now more consistent with the other registers + in the sense that there is no longer a special cmd for those registers. This was not that hard to + do because most code already was sort of prepared for that move. + + Now, there is one \quote {complication}: integers, dimensions etc references can be registers but + also internal variables. This means that we cannot simply remap the eq slots they refer to. When + we offset by some base (the first register) we end up with negative indices for the internal ones + because they come before this 64K range. So, this is why the \LUA\ interface works with negative + numbers for internal variables. + + Another side effect is that we now have the mu glue internals in the muglue region. This is + possible because we have separated the subtypes from the chr codes. I might also relocate the + special things (like penalties) some day. + + In a couple of cases a specific chr was used that made it possible to share for instance setters. + Examples are |\mkern| and |\mskip|. This resulted is (sort of) funny single numbers in the token + interface, so we have that now normalized as well (at the cost of a few split functions). Of course + that doesn't change the concept, unless one considers the fact that we have more granularity in + node subtypes (no longer parallel to the codes, as there are more) an issue. (Actually we can now + easily introduce hkern and vkern if we want.) + +*/ + +/*tex + + Each entry in |eqtb| is a |memoryword|. Most of these words are of type |two_halves|, and + subdivided into three fields: + + \startitemize + + \startitem + The |eq_level| (a quarterword) is the level of grouping at which this equivalent was + defined. If the level is |level_zero|, the equivalent has never been defined; + |level_one| refers to the outer level (outside of all groups), and this level is also + used for global definitions that never go away. Higher levels are for equivalents that + will disappear at the end of their group. + \stopitem + + \startitem + The |eq_type| (another quarterword) specifies what kind of entry this is. There are many + types, since each \TEX\ primitive like |\hbox|, |\def|, etc., has its own special code. + The list of command codes above includes all possible settings of the |eq_type| field. + \stopitem + + \startitem + The |equiv| (a halfword) is the current equivalent value. This may be a font number, a + pointer into |mem|, or a variety of other things. + \stopitem + + \stopitemize + + Many locations in |eqtb| have symbolic names. The purpose of the next paragraphs is to define + these names, and to set up the initial values of the equivalents. + + In the first region we have a single entry for the \quote {null csname} of length zero. In + \LUATEX, the active characters and and single-letter control sequence names are part of the + next region. + + Then comes region 2, which corresponds to the hash table that we will define later. The maximum + address in this region is used for a dummy control sequence that is perpetually undefined. + There also are several locations for control sequences that are perpetually defined (since they + are used in error recovery). + + Region 3 of |eqtb| contains the |number_regs| |\skip| registers, as well as the glue parameters + defined here. It is important that the \quote {muskip} parameters have larger numbers than the + others. + + Region 4 of |eqtb| contains the local quantities defined here. The bulk of this region is taken + up by five tables that are indexed by eight-bit characters; these tables are important to both + the syntactic and semantic portions of \TEX. There are also a bunch of special things like font + and token parameters, as well as the tables of |\toks| and |\box| registers. + + Region 5 of |eqtb| contains the integer parameters and registers defined here, as well as the + |del_code| table. The latter table differs from the |cat_code..math_code| tables that precede it, + since delimiter codes are fullword integers while the other kinds of codes occupy at most a + halfword. This is what makes region~5 different from region~4. We will store the |eq_level| + information in an auxiliary array of quarterwords that will be defined later. + + The integer parameters should really be initialized by a macro package; the following + initialization does the minimum to keep \TEX\ from complete failure. + + The final region of |eqtb| contains the dimension parameters defined here, and the |number_regs| + |\dimen| registers. + + Beware: in \LUATEX\ we have so many characters (\UNICODE) that we use a dedicated hash system + for special codes, math properties etc. This means that we have less in the regions than mentioned + here. On the other hand, we do have more registers (attributes) so that makes it a bit larger again. + + The registers get marked as being \quote {undefined} commands. We could actually gove them a the + right commmand code etc.\ bur for now we just use the ranges as traditional \TEX\ does. + + Most of the symbolic names and hard codes numbers are not enumerations. There is still room for + improvement and occasionally I enter a new round of doing that. However, it talkes a lot of time + and checking (more than writing from scratch) as we need to make sure it all behaves like \TEX\ + does. Quite some code went through several stages of reaching this abstraction, just to make sure + that it kept working. These intermediate versions ended up in the \CONTEXT\ distribution to that + any issue would show up soon. A rather major step was splitting the |assign_*_cmd|s into + internal and register commands and ranges. This was a side effect of getting the token interface + at the \LUA\ end a bit nicer; there is really no need to expose the user to codes that demand + catching up with the \TEX\ internals when we can just provide a nice interface. + + The font location is kind of special as it holds a halfword data field that points to a font + accessor and as such doesn't fit into a counter concept. Otherwise we could have made it a + counter. We could probably just use a font id and do a lookup elsewhere because this engine is + already doing it differently. So, eventually this needs checking. + +*/ + +/* + For practical reasons we have the regions a bit different. For instance, we also have attributes, local + boxes, no math characters here, etc. Maybe specification codes sould get their own region. + + HASH FROZEN + [I|R]FONTS + UNDEFINED + [I|R]GLUE + [I|R]MUGLUE + [I|R]TOKS + [I|R]BOXES + [I|R]INT + [I|R]ATTR + [I|R]DIMEN + SPECIFICATIONS + EQUIVPLUS + + When I'd done a bit of clean up and abstraction (actually it took quite some time because the only + reliable way to do it is stepwise with lots of testing) I wondered why there is a difference in + the way the level is kept track of. For those entries that store a value directly, a separate + |xeq_level| array is used. So, after \quote {following the code}, taking a look at the original + implementation, and a walk, I came to the conclusion that because \LUATEX\ uses 64 memory words, + we actually don't need that parallel array: we have plenty of room and, the level fields are not + shared. In traditional \TEX\ we have a memory word with two faces: + + [level] [type] + [ value ] + + but in \LUATEX\ it's wider. There is no overlap. + + [level] [type] [value] + + So, we can get rid of that extra array. Actually, in the \PASCAL\ source we see that this + parallel array is smaller because it only covers the value ranges (the first index starts at + the start of the first relevant register range). Keep in mind that the middle part of the hash + is registers and when we have a frozen hash size, that part is not present which is why there + was that parallel array needed; a side effect of the |extra_hash| extension. + + Another side effect of this simplification is that we can store and use the type which can be + handy too. + + For the changes, look for |xeq simplification| comments in the files and for the cleaned up + precursor in the archives of luametatex (in case there is doubt). When the save stack was made + more efficient the old commented |xeq| code has been removed. + + ----------------------------------- + null control sequence + hash entries (hash_size) + multiple frozen control sequences + special sequences (font, undefined) + ----------------------------------- + glue registers + mu glue registers + token registers + box registers + integer registers + attribute registers + dimension registers + specifications + ---------- eqtb size -------------- + extra hash entries + ----------------------------------- + + eqtb_top = eqtb_size + hash_extra + hash_top = hash_extra == 0 ? undefined_control_sequence : eqtb_top; + + There used to be a large font area but I moved that to the font record so that we don't waste + space (it saves some 500K on the format file and plenty of memory). + + Todo: split the eqtb and make the register arrays dynamic. We need to change the save/restore + code then and it might have a slight impact on performance (checking what table to use). + +*/ + +/*tex + Maybe we should multiply the following by 2 but there is no real gain. Many entries end up in the extended + area anyway. + + \starttyping + # define hash_size 65536 + # define hash_prime 55711 + \stoptyping + + Here |hash_size| is the maximum number of control sequences; it should be at most about + |(fix_mem_max - fix_mem_min)/10|. The value of |hash_prime| is a prime number equal to about + 85 percent of |hash_size|. + + The hash runs in parallel to the eqtb and a large hash table makes for many holes and that + compresses badly. For instance: + + 590023 => down to 1024 * 512 == 524288 ==> 85% = 445644 => prime 445633/445649 + + will make a much larger format and we gain nothing. Actually, because we have extra hash + anyway, this whole 85\% criterium is irrelevant: we only need to make sure that we have + enough room for the frozen sequences (assuming we stay within the concept). + + primes: + + \starttyping + 65447 65449 65479 65497 65519 65521 => 65536 (85% == 55711) + 131009 131011 131023 131041 131059 131063 => 131072 (85% == 111409) + \stoptyping + + lookups: + + \starttyping + n=131040 cs=46426 indirect= 9173 + n= 65496 cs=46426 indirect=14512 + \stoptyping + +*/ + +// # define hash_size 65536 +// # define hash_prime 65497 + +# define hash_size 131072 /*tex 128K */ +# define hash_prime 131041 /*tex Plenty of room for the frozen. */ + +# define null_cs 1 /*tex equivalent of |\csname\| |\endcsname| */ +# define hash_base (null_cs + 1) /*tex beginning of region 2, for the hash table */ +# define frozen_control_sequence (hash_base + hash_size) /*tex for error recovery */ + +typedef enum deep_frozen_cs_codes { + deep_frozen_cs_protection_code = frozen_control_sequence, /*tex inaccessible but definable */ + deep_frozen_cs_cr_code, /*tex permanent |\cr| */ + deep_frozen_cs_end_group_code, /*tex permanent |\endgroup| */ + deep_frozen_cs_right_code, /*tex permanent |\right| */ + deep_frozen_cs_fi_code, /*tex permanent |\fi| */ + deep_frozen_cs_no_if_code, /*tex hidden |\noif| */ + deep_frozen_cs_always_code, /*tex hidden internalized |\enforces| */ + deep_frozen_cs_end_template_1_code, /*tex permanent |\endtemplate| */ + deep_frozen_cs_end_template_2_code, /*tex second permanent |\endtemplate| */ + deep_frozen_cs_relax_code, /*tex permanent |\relax| */ + deep_frozen_cs_end_write_code, /*tex permanent |\endwrite| */ + deep_frozen_cs_dont_expand_code, /*tex permanent |\notexpanded:| */ + deep_frozen_cs_null_font_code, /*tex permanent |\nullfont| */ + deep_frozen_cs_undefined_code, +} deep_frozen_cs_codes; + +# define first_deep_frozen_cs_location deep_frozen_cs_protection_code +# define last_deep_frozen_cs_location deep_frozen_cs_undefined_code + +typedef enum glue_codes { + line_skip_code, /*tex interline glue if |baseline_skip| is infeasible */ + baseline_skip_code, /*tex desired glue between baselines */ + par_skip_code, /*tex extra glue just above a paragraph */ + above_display_skip_code, /*tex extra glue just above displayed math */ + below_display_skip_code, /*tex extra glue just below displayed math */ + above_display_short_skip_code, /*tex glue above displayed math following short lines */ + below_display_short_skip_code, /*tex glue below displayed math following short lines */ + left_skip_code, /*tex glue at left of justified lines */ + right_skip_code, /*tex glue at right of justified lines */ + top_skip_code, /*tex glue at top of main pages */ + split_top_skip_code, /*tex glue at top of split pages */ + tab_skip_code, /*tex glue between aligned entries */ + space_skip_code, /*tex glue between words (if not |zero_glue|) */ + xspace_skip_code, /*tex glue after sentences (if not |zero_glue|) */ + par_fill_left_skip_code, /*tex glue at the start of the last line of paragraph */ + par_fill_right_skip_code, /*tex glue on last line of paragraph */ + par_init_left_skip_code, + par_init_right_skip_code, + /* indent_skip_code, */ /*tex internal, might go away here */ + /* left_hang_skip_code, */ /*tex internal, might go away here */ + /* right_hang_skip_code, */ /*tex internal, might go away here */ + /* correction_skip_code, */ /*tex internal, might go away here */ + /* inter_math_skip_code, */ /*tex internal, might go away here */ + math_skip_code, /*tex glue before and after inline math */ + math_threshold_code, + /*tex total number of glue parameters */ + number_glue_pars, +} glue_codes; + +# define first_glue_code line_skip_code +# define last_glue_code math_threshold_code + +/*tex + + In addition to the three original predefined muskip registers we have two more. These muskips + are used in a symbolic way: by using a reference we can change their values on the fly and the + engine will pick up the value set at the end of the formula (and use it in the second pass). + In the other engines the threesome are hard coded into the atom pair spacing. + + In \LUAMETATEX\ we have a configurable system so these three registers are only used in the + initialization, can be overloaded in the macro package, and are saved in the format file (as + any other register). But there can be more than these. Before we had a way to link spacing to + arbitrary registers (in the user's register space) we added |\tinymuskip| because we needed it. + It is not used in initializations in the engine but is applied in the \CONTEXT\ format. We + could throw it out and use just a user register now but we consider it part of the (updated) + concept so it will stick around. Even more: we decided that a smaller one makes sense so end + June 2022 Mikael and I decided to also provide |\pettymuskip| for which Mikael saw a good use + case in the spacing in scripts between ordinary symbols and binary as well as relational ones. + + The Cambridge dictionary describes \quote {petty} as \quotation {not important and not worth + giving attention to}, but of course we do! It's just that till not we never saw any request + for an upgrade of the math (sub) engine, let alone that \TEX\ users bothered about the tiny + and petty spacing artifacts (and posibilities) of the engine. Both internal registers are + dedicated to Don Knuth who {\em does} pay a lot attentions to details but who of course will + not use this engine and thereby not spoiled. So, they are there and at the same time they + are not. But: in \CONTEXT\ they {\em are} definitely used! + +*/ + +typedef enum mu_glue_codes { + zero_mu_skip_code, + petty_mu_skip_code, /*tex petty space in math formula */ + tiny_mu_skip_code, /*tex tiny space in math formula */ + thin_mu_skip_code, /*tex thin space in math formula */ + med_mu_skip_code, /*tex medium space in math formula */ + thick_mu_skip_code, /*tex thick space in math formula */ + /*tex total number of mu glue parameters */ + number_mu_glue_pars, +} mu_glue_codes; + +# define first_mu_glue_code petty_mu_skip_code +# define last_mu_glue_code thick_mu_skip_code + +typedef enum tok_codes { + output_routine_code, /*tex points to token list for |\output| */ + every_par_code, /*tex points to token list for |\everypar| */ + every_math_code, /*tex points to token list for |\everymath| */ + every_display_code, /*tex points to token list for |\everydisplay| */ + every_hbox_code, /*tex points to token list for |\everyhbox| */ + every_vbox_code, /*tex points to token list for |\everyvbox| */ + every_math_atom_code, /*tex points to token list for |\everymathatom| */ + every_job_code, /*tex points to token list for |\everyjob|*/ + every_cr_code, /*tex points to token list for |\everycr| */ + every_tab_code, /*tex points to token list for |\everytab| */ + error_help_code, /*tex points to token list for |\errhelp|*/ + every_before_par_code, /*tex points to token list for |\everybeforepar| */ + every_eof_code, /*tex points to token list for |\everyeof| */ + end_of_group_code, /*tex collects end-of-group tokens, internal register */ + // end_of_par_code, + /*tex total number of token parameters */ + number_tok_pars, +} tok_codes; + +# define first_toks_code output_routine_code +# define last_toks_code every_eof_code + +typedef enum specification_codes { + par_shape_code, /*tex specifies paragraph shape, internal register */ + inter_line_penalties_code, /*tex additional penalties between lines */ + club_penalties_code, /*tex penalties for creating club lines */ + widow_penalties_code, /*tex penalties for creating widow lines */ + display_widow_penalties_code, /*tex ditto, just before a display */ + orphan_penalties_code, + math_forward_penalties_code, + math_backward_penalties_code, + number_specification_pars, +} specification_codes; + +# define first_specification_code par_shape_code +# define last_specification_code math_backward_penalties_code + +/*tex Beware: these are indices into |page_builder_state.page_so_far| array! */ + +typedef enum page_property_codes { + page_goal_code, + page_vsize_code, + page_total_code, + page_depth_code, + dead_cycles_code, + insert_penalties_code, + insert_heights_code, + insert_storing_code, /* page */ + insert_distance_code, + insert_multiplier_code, + insert_limit_code, + insert_storage_code, /* per insert */ + insert_penalty_code, + insert_maxdepth_code, + insert_height_code, + insert_depth_code, + insert_width_code, + page_stretch_code, + page_filstretch_code, + page_fillstretch_code, + page_filllstretch_code, + page_shrink_code, +} page_property_codes; + +# define first_page_property_code page_goal_code +# define last_page_property_code insert_width_code + +/*tex + We cheat: these previous bases are to really bases which is why math and del get separated by + one. See usage! Todo: group them better (also elsewhere in switches). +*/ + +typedef enum int_codes { + pre_tolerance_code, /*tex badness tolerance before hyphenation */ + tolerance_code, /*tex badness tolerance after hyphenation */ + line_penalty_code, /*tex added to the badness of every line */ + hyphen_penalty_code, /*tex penalty for break after discretionary hyphen */ + ex_hyphen_penalty_code, /*tex penalty for break after explicit hyphen */ + club_penalty_code, /*tex penalty for creating a club line */ + widow_penalty_code, /*tex penalty for creating a widow line */ + display_widow_penalty_code, /*tex ditto, just before a display */ + broken_penalty_code, /*tex penalty for breaking a page at a broken line */ + post_binary_penalty_code, /*tex penalty for breaking after a binary operation */ + post_relation_penalty_code, /*tex penalty for breaking after a relation */ + pre_display_penalty_code, /*tex penalty for breaking just before a displayed formula */ + post_display_penalty_code, /*tex penalty for breaking just after a displayed formula */ + pre_inline_penalty_code, /*tex penalty for breaking just before an inlined formula */ + post_inline_penalty_code, /*tex penalty for breaking just after an inlined formula */ + inter_line_penalty_code, /*tex additional penalty between lines */ + double_hyphen_demerits_code, /*tex demerits for double hyphen break */ + final_hyphen_demerits_code, /*tex demerits for final hyphen break */ + adj_demerits_code, /*tex demerits for adjacent incompatible lines */ + /* mag_code, */ /*tex magnification ratio */ + delimiter_factor_code, /*tex ratio for variable-size delimiters */ + looseness_code, /*tex change in number of lines for a paragraph */ + time_code, /*tex current time of day */ + day_code, /*tex current day of the month */ + month_code, /*tex current month of the year */ + year_code, /*tex current year of our Lord */ + show_box_breadth_code, /*tex nodes per level in |show_box| */ + show_box_depth_code, /*tex maximum level in |show_box| */ + show_node_details_code, /*tex controls subtype and attribute details */ + hbadness_code, /*tex hboxes exceeding this badness will be shown by |hpack| */ + vbadness_code, /*tex vboxes exceeding this badness will be shown by |vpack| */ + pausing_code, /*tex pause after each line is read from a file */ + tracing_online_code, /*tex show diagnostic output on terminal */ + tracing_macros_code, /*tex show macros as they are being expanded */ + tracing_stats_code, /*tex show memory usage if \TeX\ knows it */ + tracing_paragraphs_code, /*tex show line-break calculations */ + tracing_pages_code, /*tex show page-break calculations */ + tracing_output_code, /*tex show boxes when they are shipped out */ + tracing_lost_chars_code, /*tex show characters that aren't in the font */ + tracing_commands_code, /*tex show command codes at |big_switch| */ + tracing_restores_code, /*tex show equivalents when they are restored */ + tracing_fonts_code, + tracing_assigns_code, /*tex show assignments */ + tracing_groups_code, /*tex show save/restore groups */ + tracing_ifs_code, /*tex show conditionals */ + tracing_math_code, + tracing_levels_code, /*tex show levels when tracing */ + tracing_nesting_code, /*tex show incomplete groups and ifs within files */ + tracing_alignments_code, /*tex show nesting of noalign and preambles */ + tracing_inserts_code, /*tex show some info about insert processing */ + tracing_marks_code, /*tex show state of marks */ + tracing_adjusts_code, /*tex show state of marks */ + tracing_hyphenation_code, /*tex show some info regarding hyphenation */ + tracing_expressions_code, /*tex show some info regarding expressions */ + tracing_nodes_code, /*tex show node numbers too */ + tracing_full_boxes_code, /*tex show [over/under]full boxes in the log */ + tracing_penalties_code, + uc_hyph_code, /*tex hyphenate words beginning with a capital letter */ + output_penalty_code, /*tex penalty found at current page break */ + max_dead_cycles_code, /*tex bound on consecutive dead cycles of output */ + hang_after_code, /*tex hanging indentation changes after this many lines */ + floating_penalty_code, /*tex penalty for insertions heldover after a split */ + global_defs_code, /*tex override |\global| specifications */ + family_code, /*tex current family */ + escape_char_code, /*tex escape character for token output */ + default_hyphen_char_code, /*tex value of |\hyphenchar| when a font is loaded */ + default_skew_char_code, /*tex value of |\skewchar| when a font is loaded */ + end_line_char_code, /*tex character placed at the right end of the buffer */ + new_line_char_code, /*tex character that prints as |print_ln| */ + language_code, /*tex current language */ + font_code, /*tex current font */ + hyphenation_mode_code, + left_hyphen_min_code, /*tex minimum left hyphenation fragment size */ + right_hyphen_min_code, /*tex minimum right hyphenation fragment size */ + holding_inserts_code, /*tex do not remove insertion nodes from |\box255| */ + holding_migrations_code, + error_context_lines_code, /*tex maximum intermediate line pairs shown */ + local_interline_penalty_code, /*tex local |\interlinepenalty| */ + local_broken_penalty_code, /*tex local |\brokenpenalty| */ + disable_spaces_code, + glyph_scale_code, + glyph_x_scale_code, + glyph_y_scale_code, + glyph_data_code, + glyph_state_code, + glyph_script_code, + glyph_options_code, + glyph_text_scale_code, + glyph_script_scale_code, + glyph_scriptscript_scale_code, + /* glue_data_code, */ + cat_code_table_code, + output_box_code, + ex_hyphen_char_code, + adjust_spacing_code, /*tex level of spacing adjusting */ + adjust_spacing_step_code, /*tex level of spacing adjusting step */ + adjust_spacing_stretch_code, /*tex level of spacing adjusting stretch */ + adjust_spacing_shrink_code, /*tex level of spacing adjusting shrink */ + protrude_chars_code, /*tex protrude chars at left/right edge of paragraphs */ + pre_display_direction_code, /*tex text direction preceding a display */ + last_line_fit_code, /*tex adjustment for last line of paragraph */ + saving_vdiscards_code, /*tex save items discarded from vlists */ + saving_hyph_codes_code, /*tex save hyphenation codes for languages */ + math_eqno_gap_step_code, /*tex factor/1000 used for distance between eq and eqno */ + math_display_skip_mode_code, + math_scripts_mode_code, + /* math_script_box_mode_code, */ + /* math_script_char_mode_code, */ + math_limits_mode_code, + math_nolimits_mode_code, + math_rules_mode_code, + math_rules_fam_code, + math_penalties_mode_code, + math_check_fences_mode_code, + /* math_delimiters_mode_code, */ + /* math_fences_mode_code, */ + /* math_rule_thickness_mode_code, */ + math_slack_mode_code, + /* math_flatten_mode_code, */ + math_skip_mode_code, + math_double_script_mode_code, + /* math_control_mode_code, */ + math_font_control_code, + math_display_mode_code, + math_dict_group_code, + math_dict_properties_code, + math_pre_display_gap_factor_code, + pre_binary_penalty_code, + pre_relation_penalty_code, + first_valid_language_code, + automatic_hyphen_penalty_code, + explicit_hyphen_penalty_code, + exception_penalty_code, + copy_lua_input_nodes_code, + auto_migration_mode_code, + normalize_line_mode_code, + normalize_par_mode_code, + math_spacing_mode_code, + math_grouping_mode_code, + math_glue_mode_code, + math_begin_class_code, + math_end_class_code, + math_left_class_code, + math_right_class_code, + sup_mark_mode_code, + par_direction_code, + text_direction_code, + math_direction_code, + line_direction_code, /*tex gets remapped so is no real register */ + overload_mode_code, + auto_paragraph_mode_code, + shaping_penalties_mode_code, + shaping_penalty_code, + orphan_penalty_code, + alignment_cell_source_code, + alignment_wrap_source_code, + /* page_boundary_penalty_code, */ + line_break_criterium_code, + /* those below these are not interfaced via primitives */ + internal_par_state_code, + internal_dir_state_code, + internal_math_style_code, + internal_math_scale_code, + /*tex total number of integer parameters */ + first_math_class_code, + last_math_class_code = first_math_class_code + max_n_of_math_classes, + first_math_atom_code, + last_math_atom_code = first_math_atom_code + max_n_of_math_classes, + first_math_options_code, + last_math_options_code = first_math_options_code + max_n_of_math_classes, + first_math_parent_code, + last_math_parent_code = first_math_parent_code + max_n_of_math_classes, + first_math_pre_penalty_code, + last_math_pre_penalty_code = first_math_pre_penalty_code + max_n_of_math_classes, + first_math_post_penalty_code, + last_math_post_penalty_code = first_math_post_penalty_code + max_n_of_math_classes, + first_math_display_pre_penalty_code, + last_math_display_pre_penalty_code = first_math_display_pre_penalty_code + max_n_of_math_classes, + first_math_display_post_penalty_code, + last_math_display_post_penalty_code = first_math_display_post_penalty_code + max_n_of_math_classes, + first_math_ignore_code, + last_math_ignore_code = first_math_ignore_code + math_parameter_last, + /* */ + number_int_pars, +} int_codes; + +# define first_int_code pre_tolerance_code +# define last_int_code line_break_criterium_code + +typedef enum dimen_codes { + par_indent_code, /*tex indentation of paragraphs */ + math_surround_code, /*tex space around math in text */ + line_skip_limit_code, /*tex threshold for |line_skip| instead of |baseline_skip| */ + hsize_code, /*tex line width in horizontal mode */ + vsize_code, /*tex page height in vertical mode */ + max_depth_code, /*tex maximum depth of boxes on main pages */ + split_max_depth_code, /*tex maximum depth of boxes on split pages */ + box_max_depth_code, /*tex maximum depth of explicit vboxes */ + hfuzz_code, /*tex tolerance for overfull hbox messages */ + vfuzz_code, /*tex tolerance for overfull vbox messages */ + delimiter_shortfall_code, /*tex maximum amount uncovered by variable delimiters */ + null_delimiter_space_code, /*tex blank space in null delimiters */ + script_space_code, /*tex extra space after subscript or superscript */ + pre_display_size_code, /*tex length of text preceding a display */ + display_width_code, /*tex length of line for displayed equation */ + display_indent_code, /*tex indentation of line for displayed equation */ + overfull_rule_code, /*tex width of rule that identifies overfull hboxes */ + hang_indent_code, /*tex amount of hanging indentation */ + /* h_offset_code, */ /*tex amount of horizontal offset when shipping pages out */ + /* v_offset_code, */ /*tex amount of vertical offset when shipping pages out */ + emergency_stretch_code, /*tex reduces badnesses on final pass of line-breaking */ + glyph_x_offset_code, + glyph_y_offset_code, + px_dimen_code, + tab_size_code, + page_extra_goal_code, + /*tex total number of dimension parameters */ + number_dimen_pars, +} dimen_codes; + +# define first_dimen_code par_indent_code +# define last_dimen_code tab_size_code + +typedef enum attribute_codes { + /*tex total number of attribute parameters */ + number_attribute_pars, +} attribute_codes; + +// typedef enum special_sequence_codes { +// // current_font_sequence_code, +// undefined_control_sequence_code, +// n_of_special_sequences, +// } special_sequence_codes; +// +// /* The last one is frozen_null_font. */ +// +// # define special_sequence_base (last_frozen_cs_loc + 1) +// # define current_font_sequence (special_sequence_base + current_font_sequence_code) +// # define undefined_control_sequence (special_sequence_base + undefined_control_sequence_code) +// # define first_register_base (special_sequence_base + n_of_special_sequences) + +# define undefined_control_sequence deep_frozen_cs_undefined_code + +# define special_sequence_base (last_deep_frozen_cs_location + 1) +# define first_register_base (last_deep_frozen_cs_location + 1) + +# define internal_glue_base (first_register_base) +# define register_glue_base (internal_glue_base + number_glue_pars + 1) +# define internal_glue_location(a) (internal_glue_base + (a)) +# define register_glue_location(a) (register_glue_base + (a)) +# define internal_glue_number(a) ((a) - internal_glue_base) +# define register_glue_number(a) ((a) - register_glue_base) + +# define internal_mu_glue_base (register_glue_base + max_n_of_glue_registers) +# define register_mu_glue_base (internal_mu_glue_base + number_mu_glue_pars + 1) +# define internal_mu_glue_location(a) (internal_mu_glue_base + (a)) +# define register_mu_glue_location(a) (register_mu_glue_base + (a)) +# define internal_mu_glue_number(a) ((a) - internal_mu_glue_base) +# define register_mu_glue_number(a) ((a) - register_mu_glue_base) + +# define internal_toks_base (register_mu_glue_base + max_n_of_mu_glue_registers) +# define register_toks_base (internal_toks_base + number_tok_pars + 1) +# define internal_toks_location(a) (internal_toks_base + (a)) +# define register_toks_location(a) (register_toks_base + (a)) +# define internal_toks_number(a) ((a) - internal_toks_base) +# define register_toks_number(a) ((a) - register_toks_base) + +# define internal_box_base (register_toks_base + max_n_of_toks_registers) +# define register_box_base (internal_box_base + number_box_pars + 1) +# define internal_box_location(a) (internal_box_base + (a)) +# define register_box_location(a) (register_box_base + (a)) +# define internal_box_number(a) ((a) - internal_box_base) +# define register_box_number(a) ((a) - register_box_base) + +# define internal_int_base (register_box_base + max_n_of_box_registers) +# define register_int_base (internal_int_base + number_int_pars + 1) +# define internal_int_location(a) (internal_int_base + (a)) +# define register_int_location(a) (register_int_base + (a)) +# define internal_int_number(a) ((a) - internal_int_base) +# define register_int_number(a) ((a) - register_int_base) + +# define internal_attribute_base (register_int_base + max_n_of_int_registers) +# define register_attribute_base (internal_attribute_base + number_attribute_pars + 1) +# define internal_attribute_location(a) (internal_attribute_base + (a)) +# define register_attribute_location(a) (register_attribute_base + (a)) +# define internal_attribute_number(a) ((a) - internal_attribute_base) +# define register_attribute_number(a) ((a) - register_attribute_base) + +# define internal_dimen_base (register_attribute_base + max_n_of_attribute_registers) +# define register_dimen_base (internal_dimen_base + number_dimen_pars + 1) +# define internal_dimen_location(a) (internal_dimen_base + (a)) +# define register_dimen_location(a) (register_dimen_base + (a)) +# define internal_dimen_number(a) ((a) - internal_dimen_base) +# define register_dimen_number(a) ((a) - register_dimen_base) + +# define internal_specification_base (register_dimen_base + max_n_of_dimen_registers) +# define internal_specification_location(a) (internal_specification_base + (a)) +# define internal_specification_number(a) ((a) - internal_specification_base) + +# define eqtb_size (internal_specification_base + number_specification_pars) + +# define eqtb_indirect_range(n) ((n < internal_glue_base) || ((n > eqtb_size) && (n <= lmt_hash_state.hash_data.top))) +# define eqtb_out_of_range(n) ((n >= undefined_control_sequence) && ((n <= eqtb_size) || n > lmt_hash_state.hash_data.top)) +# define eqtb_valid_cs(n) ((n == 0) || (n > lmt_hash_state.hash_data.top) || ((n > frozen_control_sequence) && (n <= eqtb_size))) + +# define character_in_range(i) (i >= 0 && i <= max_character_code) +# define catcode_in_range(i) (i >= 0 && i <= max_category_code) +# define family_in_range(i) (i >= 0 && i <= max_math_family_index) +# define class_in_range(i) (i >= 0 && i <= max_math_class_code) +# define half_in_range(i) (i >= 0 && i <= max_half_value) +# define box_index_in_range(i) (i >= 0 && i <= max_box_index) + +/* These also have funny offsets: */ + +typedef enum align_codes { + tab_mark_code, + span_code, + omit_code, + align_content_code, + no_align_code, + cr_code, + cr_cr_code, +} align_codes; + +/* + typedef struct equivalents_state_info { + } equivalents_state_info ; + + extern equivalents_state_info lmt_equivalents_state; +*/ + +extern void tex_initialize_levels (void); +extern void tex_initialize_equivalents (void); +extern void tex_synchronize_equivalents (void); +extern void tex_initialize_undefined_cs (void); +extern void tex_dump_equivalents_mem (dumpstream f); +extern void tex_undump_equivalents_mem (dumpstream f); + +/*tex + The more low level |_field| shortcuts are used when we (for instance) work with copies, as done + in the save stack entries. In most cases we use the second triplet of shortcuts. We replaced + |equiv(A)| and |equiv_value(A)| by |eq_value(A)}|. +*/ + +# define eq_level_field(A) (A).quart01 +# define eq_full_field(A) (A).quart00 +# define eq_type_field(A) (A).single00 +# define eq_flag_field(A) (A).single01 +# define eq_value_field(A) (A).half1 + +# define eq_level(A) lmt_hash_state.eqtb[(A)].quart01 /*tex level of definition */ +# define eq_full(A) lmt_hash_state.eqtb[(A)].quart00 /*tex command code for equivalent */ +# define eq_type(A) lmt_hash_state.eqtb[(A)].single00 /*tex command code for equivalent */ +# define eq_flag(A) lmt_hash_state.eqtb[(A)].single01 +# define eq_value(A) lmt_hash_state.eqtb[(A)].half1 + +# define set_eq_level(A,B) lmt_hash_state.eqtb[(A)].quart01 = (quarterword) (B) +# define set_eq_type(A,B) lmt_hash_state.eqtb[(A)].single00 = (singleword) (B) +# define set_eq_flag(A,B) lmt_hash_state.eqtb[(A)].single01 = (singleword) (B) +# define set_eq_value(A,B) lmt_hash_state.eqtb[(A)].half1 = (B) + +# define copy_eqtb_entry(target,source) lmt_hash_state.eqtb[target] = lmt_hash_state.eqtb[source] + +# define equal_eqtb_entries(A,B) ( \ + (lmt_hash_state.eqtb[(A)].half0 == lmt_hash_state.eqtb[(B)].half0) \ + && (lmt_hash_state.eqtb[(A)].half1 == lmt_hash_state.eqtb[(B)].half1) \ +) + +/*tex + + Because we operate in 64 bit we padd with a halfword, and because if that we have an extra field. Now, + because we already no longer need the parallel eqtb level table, we can use this field to store the + value alongside which makes that we can turn the dual slot |restore_old_value| and |saved_eqtb| into + one which in turn makes stack usage shrink. The performance gain is probably neglectable. + +*/ + +typedef struct save_record { + quarterword saved_level; + quarterword saved_type; /*tex We need less so we can actually decide to store the offset as check. */ + halfword saved_value; /*tex Started out as padding, is now actually used for value. */ + memoryword saved_word; +} save_record; + +typedef struct save_state_info { + save_record *save_stack; + memory_data save_stack_data; + quarterword current_level; /*tex current nesting level for groups */ + quarterword current_group; /*tex current group type */ + int current_boundary; /*tex where the current level begins */ + int padding; +} save_state_info; + +extern save_state_info lmt_save_state; + +# define cur_level lmt_save_state.current_level +# define cur_group lmt_save_state.current_group +# define cur_boundary lmt_save_state.current_boundary + +/*tex + + We use the notation |saved(k)| to stand for an item that appears in location |save_ptr + k| of + the save stack. + + The level field is also available for other purposes, so maybe we need an alias that is more + generic. + +*/ + +# define save_type(A) lmt_save_state.save_stack[(A)].saved_type /*tex classifies a |save_stack| entry */ +# define save_extra(A) lmt_save_state.save_stack[(A)].saved_level /*tex a more generic alias: to be used */ +# define save_level(A) lmt_save_state.save_stack[(A)].saved_level /*tex saved level for regions 5 and 6, or group code, or ... */ +# define save_value(A) lmt_save_state.save_stack[(A)].saved_value /*tex |eqtb| location or token or |save_stack| location or ... */ +# define save_word(A) lmt_save_state.save_stack[(A)].saved_word /*tex |eqtb| entry */ + +# define saved_valid(A) (lmt_save_state.save_stack_data.ptr + (A) >= 0) +# define saved_type(A) lmt_save_state.save_stack[lmt_save_state.save_stack_data.ptr + (A)].saved_type +# define saved_extra(A) lmt_save_state.save_stack[lmt_save_state.save_stack_data.ptr + (A)].saved_level +# define saved_level(A) lmt_save_state.save_stack[lmt_save_state.save_stack_data.ptr + (A)].saved_level +# define saved_value(A) lmt_save_state.save_stack[lmt_save_state.save_stack_data.ptr + (A)].saved_value +# define saved_word(A) lmt_save_state.save_stack[lmt_save_state.save_stack_data.ptr + (A)].saved_word + +inline void tex_set_saved_record(halfword ptr, quarterword type, quarterword level, halfword value) +{ + saved_type(ptr) = type; + saved_level(ptr) = level; + saved_value(ptr) = value; +} + +# define reserved_save_stack_slots 32 /* was 8 */ + +/*tex + + The rather explicit |save_| items indicate a type. They are sometimes used to lookup a specific + field (when tracing). +*/ + +typedef enum save_types { + restore_old_value, /*tex a value should be restored later */ + restore_zero, /*tex an undefined entry should be restored */ + insert_tokens, + restore_lua, + level_boundary, /*tex the beginning of a group */ + /* */ + saved_line_number, + /* */ + saved_insert_index, + /* */ + saved_discretionary_count, + /* */ + saved_text_direction, + /* */ + saved_equation_number_location, + /* */ + saved_choices_count, + /* */ + saved_fraction_variant, + saved_fraction_auto_style, + saved_fraction_user_style, + saved_operator_variant, + /* */ + saved_attribute_list, + /* */ + saved_math_pointer, + saved_math_class, + /* */ + saved_box_type, + saved_box_context, + saved_box_spec, + saved_box_direction, + saved_box_attr_list, + saved_box_pack, + saved_box_orientation, + saved_box_anchor, + saved_box_geometry, + saved_box_xoffset, + saved_box_yoffset, + saved_box_xmove, + saved_box_ymove, + saved_box_reverse, + saved_box_discard, + saved_box_noskips, + saved_box_callback, + saved_box_container, + saved_box_shift, + saved_box_source, + saved_box_target, + saved_box_axis, + saved_box_class, + saved_box_state, + saved_box_retain, + /* */ + saved_local_box_location, + saved_local_box_index, + saved_local_box_options, + /* */ + saved_adjust_location, + saved_adjust_options, + saved_adjust_index, + saved_adjust_attr_list, + saved_adjust_depth_before, + saved_adjust_depth_after, +} save_types; + +/*tex Nota bena: |equiv_value| is the same as |equiv| but sometimes we use that name instead. */ + +// int_par(A) hash_state.eqtb_i_i[(A)].half1 + +# define int_parameter(A) eq_value(internal_int_location(A)) +# define count_parameter(A) eq_value(internal_int_location(A)) +# define attribute_parameter(A) eq_value(internal_attribute_location(A)) +# define dimen_parameter(A) eq_value(internal_dimen_location(A)) +# define toks_parameter(A) eq_value(internal_toks_location(A)) +# define glue_parameter(A) eq_value(internal_glue_location(A)) +# define mu_glue_parameter(A) eq_value(internal_mu_glue_location(A)) +# define box_parameter(A) eq_value(internal_box_location(A)) +# define specification_parameter(A) eq_value(internal_specification_location(A)) + +/*tex These come from |\ALEPH| aka |\OMEGA|: */ + + +# define is_valid_local_box_code(c) (c >= first_local_box_code && c <= last_local_box_code) + +/*tex + + Here are the group codes that are used to discriminate between different kinds of groups. They + allow \TEX\ to decide what special actions, if any, should be performed when a group ends. + + Some groups are not supposed to be ended by right braces. For example, the |$| that begins a + math formula causes a |math_shift_group| to be started, and this should be terminated by a + matching |$|. Similarly, a group that starts with |\left| should end with |\right|, and one + that starts with |\begingroup| should end with |\endgroup|. + +*/ + +typedef enum tex_group_codes { + bottom_level_group, /*tex group code for the outside world */ + simple_group, /*tex group code for local structure only */ + hbox_group, /*tex code for |\hbox| */ + adjusted_hbox_group, /*tex code for |\hbox| in vertical mode */ + vbox_group, /*tex code for |\vbox| */ + vtop_group, /*tex code for |\vtop| */ + align_group, /*tex code for |\halign|, |\valign| */ + no_align_group, /*tex code for |\noalign| */ + output_group, /*tex code for output routine */ + math_group, /*tex code for, e.g., |\char'136| */ + discretionary_group, /*tex code for |\discretionary|' */ + insert_group, /*tex code for |\insert| */ + vadjust_group, /*tex code for |\vadjust| */ + vcenter_group, /*tex code for |\vcenter| */ + math_fraction_group, /*tex code for |\over| and friends */ + math_operator_group, + math_choice_group, /*tex code for |\mathchoice| */ + also_simple_group, /*tex code for |\begingroup|\unknown|\egroup| */ + semi_simple_group, /*tex code for |\begingroup|\unknown|\endgroup| */ + math_simple_group, /*tex code for |\beginmathgroup|\unknown|\endmathgroup| */ + math_shift_group, /*tex code for |$|\unknown\|$| */ + math_fence_group, /*tex code for fences |\left|\unknown|\right| */ + local_box_group, /*tex code for |\localleftbox|\unknown|localrightbox| */ + split_off_group, /*tex box code for the top part of a |\vsplit| */ + split_keep_group, /*tex box code for the bottom part of a |\vsplit| */ + preamble_group, /*tex box code for the preamble processing in an alignment */ + align_set_group, /*tex box code for the final item pass in an alignment */ + finish_row_group, /*tex box code for a provisory line in an alignment */ + lua_group, +} tex_group_codes; + +typedef enum saved_group_items { + saved_group_line_number = 0, + saved_group_level_boundary = 1, + saved_group_n_of_items = 2, +} saved_group_items; + +/* + In the end I decided to split them into context and begin, but maybe some day + they all merge into one (easier on tracing and reporting in shared helpers). +*/ + +typedef enum tex_par_context_codes { + normal_par_context, + vmode_par_context, + vbox_par_context, + vtop_par_context, + vcenter_par_context, + vadjust_par_context, + insert_par_context, + output_par_context, + align_par_context, + no_align_par_context, + span_par_context, + reset_par_context, +} tex_par_context_codes; + +typedef enum tex_alignment_context_codes { + preamble_pass_alignment_context, + preroll_pass_alignment_context, + package_pass_alignment_context, + wrapup_pass_alignment_context, +} tex_alignment_context_codes; + +typedef enum tex_page_context_codes { + box_page_context, + end_page_context, + vadjust_page_context, + penalty_page_context, + boundary_page_context, + insert_page_context, + hmode_par_page_context, + vmode_par_page_context, + begin_paragraph_page_context, + before_display_page_context, + after_display_page_context, + after_output_page_context, + alignment_page_context, +} tex_page_context_codes; + +typedef enum tex_append_line_context_codes { + box_append_line_context, + pre_box_append_line_context, + pre_adjust_append_line_context, + post_adjust_append_line_context, + pre_migrate_append_line_context, + post_migrate_append_line_context, +} tex_append_line_context_codes; + +typedef enum tex_par_begin_codes { + normal_par_begin, + force_par_begin, + indent_par_begin, + no_indent_par_begin, + math_char_par_begin, + char_par_begin, + boundary_par_begin, + space_par_begin, + math_par_begin, + kern_par_begin, + hskip_par_begin, + un_hbox_char_par_begin, + valign_char_par_begin, + vrule_char_par_begin, +} tex_par_begin_codes; + +typedef enum tex_tracing_levels_codes { + tracing_levels_group = 0x01, + tracing_levels_input = 0x02, + tracing_levels_catcodes = 0x04, +} tex_tracing_levels_codes; + +extern void tex_initialize_save_stack (void); +/* int tex_room_on_save_stack (void); */ +extern void tex_save_halfword_on_stack (quarterword t, halfword v); +extern void tex_show_cmd_chr (halfword cmd, halfword chr); +extern void tex_new_save_level (quarterword c); /*tex begin a new level of grouping */ +extern int tex_saved_line_at_level (void); +extern void tex_eq_define (halfword p, singleword cmd, halfword chr); /*tex new data for |eqtb| */ +extern void tex_eq_word_define (halfword p, int w); +extern void tex_geq_define (halfword p, singleword cmd, halfword chr); /*tex global |eq_define| */ +extern void tex_geq_word_define (halfword p, int w); /*tex global |eq_word_define| */ +extern void tex_save_for_after_group (halfword t); +extern void tex_unsave (void); /*tex pops the top level off the save stack */ +extern void tex_show_save_groups (void); +extern int tex_located_save_value (int id); + +/*tex + + The |prefixed_command| does not have to adjust |a| so that |a mod 4 = 0|, since the following + routines test for the |\global| prefix as follows. Anyway, in the meantime we reshuffled the + bits and changed a lot. + + When we need more bits, we will do this: + + One one of these: + + \starttyping + primitive_flag = 00000001 : cannot be changed system set + permanent_flag = 00000010 : cannot be changed \permanent + immutable_flag = 00000011 : cannot be changed \immutable + frozen_flag = 00000100 : can be overloaded \frozen and \overloaded + mutable_flag = 00000101 : never checked \mutable + reserved_1_flag = 00000110 + \stoptyping + + Independent, not used combined: + + \starttyping + noaligned_flag = 00001000 : valid align peek \noaligned (can be more generic: \alignpeekable or \alignable, also span and omit?) + reserved_3_flag = 00010000 : maybe obsolete indicator + \stoptyping + + Informative: + + \starttyping + instance_flag = 00100000 : just a tag \instance + symbol_flag = 01000000 : just a tag \symbolic (or character) + c_quantity_flag = 01100000 + d_quantity-flag = 10000000 + reserved_4_flag = 10100000 + reserved_5_flag = 11100000 + \stoptyping + + Maybe names like \flaginstance \flagpermanent etc are better? Now we run out of meaningful + prefixes. Also testing the prefix then becomes more work. + +*/ + +typedef enum flag_bit { + /* properties and prefixes */ + frozen_flag_bit = 0x00001, + permanent_flag_bit = 0x00002, + immutable_flag_bit = 0x00004, + primitive_flag_bit = 0x00008, + mutable_flag_bit = 0x00010, + noaligned_flag_bit = 0x00020, + instance_flag_bit = 0x00040, + untraced_flag_bit = 0x00080, + /* prefixes */ + global_flag_bit = 0x00100, + tolerant_flag_bit = 0x00200, + protected_flag_bit = 0x00400, + overloaded_flag_bit = 0x00800, + aliased_flag_bit = 0x01000, + immediate_flag_bit = 0x02000, + conditional_flag_bit = 0x04000, + value_flag_bit = 0x08000, + semiprotected_flag_bit = 0x10000, + inherited_flag_bit = 0x20000, +} flag_bits; + +/*tex Flags: */ + +# define add_flag(a,b) ((a) | (b)) + +# define add_frozen_flag(a) ((a) | frozen_flag_bit) +# define add_permanent_flag(a) ((a) | permanent_flag_bit) +# define add_immutable_flag(a) ((a) | immutable_flag_bit) +# define add_primitive_flag(a) ((a) | primitive_flag_bit) +# define add_mutable_flag(a) ((a) | mutable_flag_bit) +# define add_noaligned_flag(a) ((a) | noaligned_flag_bit) +# define add_instance_flag(a) ((a) | instance_flag_bit) +# define add_untraced_flag(a) ((a) | untraced_flag_bit) + +# define add_global_flag(a) ((a) | global_flag_bit) +# define add_tolerant_flag(a) ((a) | tolerant_flag_bit) +# define add_protected_flag(a) ((a) | protected_flag_bit) +# define add_semiprotected_flag(a) ((a) | semiprotected_flag_bit) +# define add_overloaded_flag(a) ((a) | overloaded_flag_bit) +# define add_aliased_flag(a) ((a) | aliased_flag_bit) +# define add_immediate_flag(a) ((a) | immediate_flag_bit) +# define add_conditional_flag(a) ((a) | conditional_flag_bit) +# define add_value_flag(a) ((a) | value_flag_bit) +# define add_inherited_flag(a) ((a) | inherited_flag_bit) + +# define remove_flag(a,b) ((a) & ~(b)) + +# define remove_frozen_flag(a) ((a) & ~frozen_flag_bit) +# define remove_permanent_flag(a) ((a) & ~permanent_flag_bit) +# define remove_immutable_flag(a) ((a) & ~immutable_flag_bit) +# define remove_primitive_flag(a) ((a) & ~primitive_flag_bit) +# define remove_mutable_flag(a) ((a) & ~mutable_flag_bit) +# define remove_noaligned_flag(a) ((a) & ~noaligned_flag_bit) +# define remove_instance_flag(a) ((a) & ~instance_flag_bit) +# define remove_untraced_flag(a) ((a) & ~untraced_flag_bit) + +# define remove_global_flag(a) ((a) & ~global_flag_bit) +# define remove_tolerant_flag(a) ((a) & ~tolerant_flag_bit) +# define remove_protected_flag(a) ((a) & ~protected_flag_bit) +# define remove_overloaded_flag(a) ((a) & ~overloaded_flag_bit) +# define remove_aliased_flag(a) ((a) & ~aliased_flag_bit) +# define remove_immediate_flag(a) ((a) & ~immediate_flag_bit) +# define remove_conditional_flag(a) ((a) & ~conditional_flag_bit) +# define remove_value_flag(a) ((a) & ~value_flag_bit) + +# define is_frozen(a) (((a) & frozen_flag_bit) == frozen_flag_bit) +# define is_permanent(a) (((a) & permanent_flag_bit) == permanent_flag_bit) +# define is_immutable(a) (((a) & immutable_flag_bit) == immutable_flag_bit) +# define is_primitive(a) (((a) & primitive_flag_bit) == primitive_flag_bit) +# define is_mutable(a) (((a) & mutable_flag_bit) == mutable_flag_bit) +# define is_noaligned(a) (((a) & noaligned_flag_bit) == noaligned_flag_bit) +# define is_instance(a) (((a) & instance_flag_bit) == instance_flag_bit) +# define is_untraced(a) (((a) & untraced_flag_bit) == untraced_flag_bit) + +# define is_global(a) (((a) & global_flag_bit) == global_flag_bit) +# define is_tolerant(a) (((a) & tolerant_flag_bit) == tolerant_flag_bit) +# define is_protected(a) (((a) & protected_flag_bit) == protected_flag_bit) +# define is_semiprotected(a) (((a) & semiprotected_flag_bit) == semiprotected_flag_bit) +# define is_overloaded(a) (((a) & overloaded_flag_bit) == overloaded_flag_bit) +# define is_aliased(a) (((a) & aliased_flag_bit) == aliased_flag_bit) +# define is_immediate(a) (((a) & immediate_flag_bit) == immediate_flag_bit) +# define is_conditional(a) (((a) & conditional_flag_bit) == conditional_flag_bit) +# define is_value(a) (((a) & value_flag_bit) == value_flag_bit) +# define is_inherited(a) (((a) & inherited_flag_bit) == inherited_flag_bit) + +# define is_expandable(cmd) (cmd > max_command_cmd) + +# define global_or_local(a) (is_global(a) ? level_one : cur_level) + +# define has_flag_bits(p,a) ((p) & (a)) + +# define remove_overload_flags(a) ((a) & ~(permanent_flag_bit | immutable_flag_bit | primitive_flag_bit)) + +# define make_eq_flag_bits(a) ((singleword) ((a) & 0xFF)) +# define has_eq_flag_bits(p,a) (eq_flag(p) & (a)) +# define set_eq_flag_bits(p,a) set_eq_flag(p, make_eq_flag_bits(a)) + +inline static singleword tex_flags_to_cmd(int flags) +{ + if (is_tolerant(flags)) { + return is_protected (flags) ? tolerant_protected_call_cmd : + (is_semiprotected(flags) ? tolerant_semi_protected_call_cmd : tolerant_call_cmd); + } else { + return is_protected (flags) ? protected_call_cmd : + (is_semiprotected(flags) ? semi_protected_call_cmd : call_cmd); + } +} + +/*tex + The macros and functions for the frozen, tolerant, protected cmd codes are gone but + can be found in the archive. We now have just one |call_cmd| with properties stored + elsewhere. + + int g -> singleword g +*/ + +extern int tex_define_permitted (halfword cs, halfword prefixes); +extern void tex_define (int g, halfword p, singleword cmd, halfword chr); +extern void tex_define_inherit (int g, halfword p, singleword flag, singleword cmd, halfword chr); +extern void tex_define_swapped (int g, halfword p1, halfword p2, int force); +extern void tex_forced_define (int g, halfword p, singleword flag, singleword cmd, halfword chr); +extern void tex_word_define (int g, halfword p, halfword w); +extern void tex_forced_word_define (int g, halfword p, singleword flag, halfword w); + +/*tex + + The |*_par| macros expand to the variables that are (in most cases) also accessible at the users + end. Most are registers but some are in the (stack) lists. More |*_par| will move here: there is + no real need for these macros but because there were already a bunch and because they were defined + all over the place we moved them here. + +*/ + +# define space_skip_par glue_parameter(space_skip_code) +# define xspace_skip_par glue_parameter(xspace_skip_code) +# define math_skip_par glue_parameter(math_skip_code) +# define math_skip_mode_par count_parameter(math_skip_mode_code) +# define math_double_script_mode_par count_parameter(math_double_script_mode_code) +/*define math_control_mode_par count_parameter(math_control_mode_code) */ +# define math_font_control_par count_parameter(math_font_control_code) +# define math_display_mode_par count_parameter(math_display_mode_code) +# define math_dict_group_par count_parameter(math_dict_group_code) +# define math_dict_properties_par count_parameter(math_dict_properties_code) +# define math_threshold_par glue_parameter(math_threshold_code) +# define page_extra_goal_par dimen_parameter(page_extra_goal_code) + +# define pre_display_size_par dimen_parameter(pre_display_size_code) +# define display_width_par dimen_parameter(display_width_code) +# define display_indent_par dimen_parameter(display_indent_code) +# define math_surround_par dimen_parameter(math_surround_code) + +# define display_skip_mode_par count_parameter(math_display_skip_mode_code) +# define math_eqno_gap_step_par count_parameter(math_eqno_gap_step_code) + +# define par_direction_par count_parameter(par_direction_code) +# define text_direction_par count_parameter(text_direction_code) +# define math_direction_par count_parameter(math_direction_code) + +# define first_valid_language_par count_parameter(first_valid_language_code) + +# define hsize_par dimen_parameter(hsize_code) +# define vsize_par dimen_parameter(vsize_code) +# define hfuzz_par dimen_parameter(hfuzz_code) +# define vfuzz_par dimen_parameter(vfuzz_code) +# define hbadness_par count_parameter(hbadness_code) +# define vbadness_par count_parameter(vbadness_code) + +# define baseline_skip_par glue_parameter(baseline_skip_code) +# define line_skip_par glue_parameter(line_skip_code) +# define par_indent_par dimen_parameter(par_indent_code) +# define hang_indent_par dimen_parameter(hang_indent_code) +# define hang_after_par count_parameter(hang_after_code) +# define left_skip_par glue_parameter(left_skip_code) +# define right_skip_par glue_parameter(right_skip_code) +# define par_fill_left_skip_par glue_parameter(par_fill_left_skip_code) +# define par_fill_right_skip_par glue_parameter(par_fill_right_skip_code) +# define par_init_left_skip_par glue_parameter(par_init_left_skip_code) +# define par_init_right_skip_par glue_parameter(par_init_right_skip_code) +# define tab_skip_par glue_parameter(tab_skip_code) + +# define emergency_stretch_par dimen_parameter(emergency_stretch_code) +# define pre_tolerance_par count_parameter(pre_tolerance_code) +# define tolerance_par count_parameter(tolerance_code) +# define looseness_par count_parameter(looseness_code) +# define adjust_spacing_par count_parameter(adjust_spacing_code) +# define adjust_spacing_step_par count_parameter(adjust_spacing_step_code) +# define adjust_spacing_stretch_par count_parameter(adjust_spacing_stretch_code) +# define adjust_spacing_shrink_par count_parameter(adjust_spacing_shrink_code) +# define adj_demerits_par count_parameter(adj_demerits_code) +# define protrude_chars_par count_parameter(protrude_chars_code) +# define line_penalty_par count_parameter(line_penalty_code) +# define last_line_fit_par count_parameter(last_line_fit_code) +# define double_hyphen_demerits_par count_parameter(double_hyphen_demerits_code) +# define final_hyphen_demerits_par count_parameter(final_hyphen_demerits_code) +# define inter_line_penalty_par count_parameter(inter_line_penalty_code) +# define club_penalty_par count_parameter(club_penalty_code) +# define widow_penalty_par count_parameter(widow_penalty_code) +# define display_widow_penalty_par count_parameter(display_widow_penalty_code) +# define orphan_penalty_par count_parameter(orphan_penalty_code) +/*define page_boundary_penalty_par count_parameter(page_boundary_penalty_code) */ /* now in |\pageboundary| */ +# define line_break_criterium_par count_parameter(line_break_criterium_code) +# define broken_penalty_par count_parameter(broken_penalty_code) +# define line_skip_limit_par dimen_parameter(line_skip_limit_code) + +# define alignment_cell_source_par count_parameter(alignment_cell_source_code) +# define alignment_wrap_source_par count_parameter(alignment_wrap_source_code) + +# define delimiter_shortfall_par dimen_parameter(delimiter_shortfall_code) +# define null_delimiter_space_par dimen_parameter(null_delimiter_space_code) +# define script_space_par dimen_parameter(script_space_code) +# define max_depth_par dimen_parameter(max_depth_code) +# define box_max_depth_par dimen_parameter(box_max_depth_code) +# define split_max_depth_par dimen_parameter(split_max_depth_code) +# define overfull_rule_par dimen_parameter(overfull_rule_code) +# define box_max_depth_par dimen_parameter(box_max_depth_code) +# define top_skip_par glue_parameter(top_skip_code) +# define split_top_skip_par glue_parameter(split_top_skip_code) + +# define cur_fam_par count_parameter(family_code) +# define pre_display_direction_par count_parameter(pre_display_direction_code) +# define pre_display_penalty_par count_parameter(pre_display_penalty_code) +# define post_display_penalty_par count_parameter(post_display_penalty_code) +# define pre_inline_penalty_par count_parameter(pre_inline_penalty_code) +# define post_inline_penalty_par count_parameter(post_inline_penalty_code) + +# define local_interline_penalty_par count_parameter(local_interline_penalty_code) +# define local_broken_penalty_par count_parameter(local_broken_penalty_code) +# define local_left_box_par box_parameter(local_left_box_code) +# define local_right_box_par box_parameter(local_right_box_code) +# define local_middle_box_par box_parameter(local_middle_box_code) + +# define end_line_char_par count_parameter(end_line_char_code) +# define new_line_char_par count_parameter(new_line_char_code) +# define escape_char_par count_parameter(escape_char_code) + +# define end_line_char_inactive ((end_line_char_par < 0) || (end_line_char_par > 127)) + +# define delimiter_factor_par count_parameter(delimiter_factor_code) +/*define post_binary_penalty_par count_parameter(post_binary_penalty_code) */ +/*define post_relation_penalty_par count_parameter(post_relation_penalty_code) */ +/*define pre_binary_penalty_par count_parameter(pre_binary_penalty_code) */ +/*define pre_relation_penalty_par count_parameter(pre_relation_penalty_code) */ +# define math_penalties_mode_par count_parameter(math_penalties_mode_code) +# define math_check_fences_par count_parameter(math_check_fences_mode_code) +/*define math_delimiters_mode_par count_parameter(math_delimiters_mode_code) */ +/*define math_fences_mode_par count_parameter(math_fences_mode_code) */ +/*define math_rule_thickness_mode_par count_parameter(math_rule_thickness_mode_code) */ +# define math_slack_mode_par count_parameter(math_slack_mode_code) +/*define math_flatten_mode_par count_parameter(math_flatten_mode_code) */ +# define null_delimiter_space_par dimen_parameter(null_delimiter_space_code) +# define disable_spaces_par count_parameter(disable_spaces_code) +# define glyph_options_par count_parameter(glyph_options_code) +# define glyph_scale_par count_parameter(glyph_scale_code) +# define glyph_text_scale_par count_parameter(glyph_text_scale_code) +# define glyph_script_scale_par count_parameter(glyph_script_scale_code) +# define glyph_scriptscript_scale_par count_parameter(glyph_scriptscript_scale_code) +# define glyph_x_scale_par count_parameter(glyph_x_scale_code) +# define glyph_y_scale_par count_parameter(glyph_y_scale_code) +# define glyph_x_offset_par dimen_parameter(glyph_x_offset_code) +# define glyph_y_offset_par dimen_parameter(glyph_y_offset_code) +# define math_scripts_mode_par count_parameter(math_scripts_mode_code) +/*define math_script_box_mode_par count_parameter(math_script_box_mode_code) */ +/*define math_script_char_mode_par count_parameter(math_script_char_mode_code) */ +# define math_limits_mode_par count_parameter(math_limits_mode_code) +# define math_nolimits_mode_par count_parameter(math_nolimits_mode_code) +# define math_rules_mode_par count_parameter(math_rules_mode_code) +# define math_rules_fam_par count_parameter(math_rules_fam_code) +# define math_glue_mode_par count_parameter(math_glue_mode_code) + +typedef enum math_glue_modes { + math_glue_stretch_code = 0x01, + math_glue_shrink_code = 0x02, +} math_glue_modes; + +# define math_glue_stretch_enabled ((math_glue_mode_par & math_glue_stretch_code) == math_glue_stretch_code) +# define math_glue_shrink_enabled ((math_glue_mode_par & math_glue_shrink_code) == math_glue_shrink_code) +# define default_math_glue_mode (math_glue_stretch_code | math_glue_shrink_code) + +# define petty_mu_skip_par mu_glue_parameter(petty_mu_skip_code) +# define tiny_mu_skip_par mu_glue_parameter(tiny_mu_skip_code) +# define thin_mu_skip_par mu_glue_parameter(thin_mu_skip_code) +# define med_mu_skip_par mu_glue_parameter(med_mu_skip_code) +# define thick_mu_skip_par mu_glue_parameter(thick_mu_skip_code) + +# define every_math_par toks_parameter(every_math_code) +# define every_display_par toks_parameter(every_display_code) +# define every_cr_par toks_parameter(every_cr_code) +# define every_tab_par toks_parameter(every_tab_code) +# define every_hbox_par toks_parameter(every_hbox_code) +# define every_vbox_par toks_parameter(every_vbox_code) +# define every_math_atom_par toks_parameter(every_math_atom_code) +# define every_eof_par toks_parameter(every_eof_code) +# define every_par_par toks_parameter(every_par_code) +# define every_before_par_par toks_parameter(every_before_par_code) +# define every_job_par toks_parameter(every_job_code) +# define error_help_par toks_parameter(error_help_code) +# define end_of_group_par toks_parameter(end_of_group_code) +/*define end_of_par_par toks_parameter(end_of_par_code) */ + +# define internal_par_state_par count_parameter(internal_par_state_code) +# define internal_dir_state_par count_parameter(internal_dir_state_code) +# define internal_math_style_par count_parameter(internal_math_style_code) +# define internal_math_scale_par count_parameter(internal_math_scale_code) + +# define overload_mode_par count_parameter(overload_mode_code) + +# define auto_paragraph_mode_par count_parameter(auto_paragraph_mode_code) + +typedef enum auto_paragraph_modes { + auto_paragraph_text = 0x01, + auto_paragraph_macro = 0x02, + auto_paragraph_go_on = 0x04, +} auto_paragraph_modes; + +# define auto_paragraph_mode(flag) ((auto_paragraph_mode_par) & (flag)) + +# define shaping_penalties_mode_par count_parameter(shaping_penalties_mode_code) +# define shaping_penalty_par count_parameter(shaping_penalty_code) + +typedef enum shaping_penalties_mode_bits { + inter_line_penalty_shaping = 0x01, + widow_penalty_shaping = 0x02, + club_penalty_shaping = 0x04, + broken_penalty_shaping = 0x08, +} shaping_penalties_mode_bits; + +# define is_shaping_penalties_mode(what,flag) ((what) & (flag)) + +# define tab_size_par dimen_parameter(tab_size_code) + +/*define prev_graf_par cur_list.prev_graf */ +/*define prev_depth_par cur_list.prev_depth */ +/*define space_factor_par cur_list.space_factor */ + +/*define tail_par cur_list.tail */ +/*define head_par cur_list.head */ +/*define mode_par cur_list.mode */ +/*define dirs_par cur_list.dirs */ + +/*define incompleat_noad_par cur_list.incompleat_noad */ +/*define mode_line_par cur_list.mode_line */ +/*define delim_par cur_list.delim */ + +# define par_shape_par specification_parameter(par_shape_code) +# define inter_line_penalties_par specification_parameter(inter_line_penalties_code) +# define club_penalties_par specification_parameter(club_penalties_code) +# define widow_penalties_par specification_parameter(widow_penalties_code) +# define display_widow_penalties_par specification_parameter(display_widow_penalties_code) +# define orphan_penalties_par specification_parameter(orphan_penalties_code) +# define math_forward_penalties_par specification_parameter(math_forward_penalties_code) +# define math_backward_penalties_par specification_parameter(math_backward_penalties_code) + +/*define h_offset_par dimen_parameter(h_offset_code) */ +/*define v_offset_par dimen_parameter(v_offset_code) */ +# define px_dimen_par dimen_parameter(px_dimen_code) +/*define mag_par count_parameter(mag_code) */ + +# define max_dead_cycles_par count_parameter(max_dead_cycles_code) +# define output_box_par count_parameter(output_box_code) +# define holding_inserts_par count_parameter(holding_inserts_code) +# define holding_migrations_par count_parameter(holding_migrations_code) +# define output_routine_par toks_parameter(output_routine_code) +# define floating_penalty_par count_parameter(floating_penalty_code) + +# define global_defs_par count_parameter(global_defs_code) +# define cat_code_table_par count_parameter(cat_code_table_code) +# define saving_vdiscards_par count_parameter(saving_vdiscards_code) + +# define tracing_output_par count_parameter(tracing_output_code) +# define tracing_stats_par count_parameter(tracing_stats_code) +# define tracing_online_par count_parameter(tracing_online_code) +# define tracing_paragraphs_par count_parameter(tracing_paragraphs_code) +# define tracing_levels_par count_parameter(tracing_levels_code) +# define tracing_nesting_par count_parameter(tracing_nesting_code) +# define tracing_alignments_par count_parameter(tracing_alignments_code) +# define tracing_inserts_par count_parameter(tracing_inserts_code) +# define tracing_marks_par count_parameter(tracing_marks_code) +# define tracing_adjusts_par count_parameter(tracing_adjusts_code) +# define tracing_lost_chars_par count_parameter(tracing_lost_chars_code) +# define tracing_ifs_par count_parameter(tracing_ifs_code) +# define tracing_commands_par count_parameter(tracing_commands_code) +# define tracing_macros_par count_parameter(tracing_macros_code) +# define tracing_assigns_par count_parameter(tracing_assigns_code) +# define tracing_fonts_par count_parameter(tracing_fonts_code) +# define tracing_pages_par count_parameter(tracing_pages_code) +# define tracing_restores_par count_parameter(tracing_restores_code) +# define tracing_groups_par count_parameter(tracing_groups_code) +# define tracing_math_par count_parameter(tracing_math_code) +# define tracing_hyphenation_par count_parameter(tracing_hyphenation_code) +# define tracing_expressions_par count_parameter(tracing_expressions_code) +# define tracing_nodes_par count_parameter(tracing_nodes_code) +# define tracing_full_boxes_par count_parameter(tracing_full_boxes_code) +# define tracing_penalties_par count_parameter(tracing_penalties_code) + +# define show_box_depth_par count_parameter(show_box_depth_code) +# define show_box_breadth_par count_parameter(show_box_breadth_code) +# define show_node_details_par count_parameter(show_node_details_code) + +# define pausing_par count_parameter(pausing_code) + +# define error_context_lines_par count_parameter(error_context_lines_code) +# define copy_lua_input_nodes_par count_parameter(copy_lua_input_nodes_code) + +# define math_pre_display_gap_factor_par count_parameter(math_pre_display_gap_factor_code) + +# define time_par count_parameter(time_code) +# define day_par count_parameter(day_code) +# define month_par count_parameter(month_code) +# define year_par count_parameter(year_code) + +typedef enum hyphenation_mode_bits { + normal_hyphenation_mode = 0x00001, + automatic_hyphenation_mode = 0x00002, + explicit_hyphenation_mode = 0x00004, + syllable_hyphenation_mode = 0x00008, + uppercase_hyphenation_mode = 0x00010, + compound_hyphenation_mode = 0x00020, + strict_start_hyphenation_mode = 0x00040, + strict_end_hyphenation_mode = 0x00080, + automatic_penalty_hyphenation_mode = 0x00100, + explicit_penalty_hyphenation_mode = 0x00200, + permit_glue_hyphenation_mode = 0x00400, + permit_all_hyphenation_mode = 0x00800, + permit_math_replace_hyphenation_mode = 0x01000, + force_check_hyphenation_mode = 0x02000, + lazy_ligatures_hyphenation_mode = 0x04000, + force_handler_hyphenation_mode = 0x08000, + feedback_compound_hyphenation_mode = 0x10000, + ignore_bounds_hyphenation_mode = 0x20000, + collapse_hyphenation_mode = 0x40000, +} hyphenation_mode_bits; + +# define hyphenation_permitted(a,b) (((a) & (b)) == (b)) +# define set_hyphenation_mode(a,b) ((a) | (b)) +# define unset_hyphenation_mode(a,b) ((a) & ~(b)) +# define flip_hyphenation_mode(a,b) ((b) ? set_hyphenation_mode(a,b) : unset_hyphenation_mode(a,b)) +# define default_hyphenation_mode (normal_hyphenation_mode | automatic_hyphenation_mode | explicit_hyphenation_mode | syllable_hyphenation_mode | compound_hyphenation_mode | force_handler_hyphenation_mode | feedback_compound_hyphenation_mode) + +# define language_par count_parameter(language_code) +# define hyphenation_mode_par count_parameter(hyphenation_mode_code) +# define uc_hyph_par count_parameter(uc_hyph_code) +# define left_hyphen_min_par count_parameter(left_hyphen_min_code) +# define right_hyphen_min_par count_parameter(right_hyphen_min_code) +# define ex_hyphen_char_par count_parameter(ex_hyphen_char_code) +# define hyphen_penalty_par count_parameter(hyphen_penalty_code) +# define ex_hyphen_penalty_par count_parameter(ex_hyphen_penalty_code) +# define default_hyphen_char_par count_parameter(default_hyphen_char_code) +# define default_skew_char_par count_parameter(default_skew_char_code) +# define saving_hyph_codes_par count_parameter(saving_hyph_codes_code) + +# define automatic_hyphen_penalty_par count_parameter(automatic_hyphen_penalty_code) +# define explicit_hyphen_penalty_par count_parameter(explicit_hyphen_penalty_code) +# define exception_penalty_par count_parameter(exception_penalty_code) + +# define math_spacing_mode_par count_parameter(math_spacing_mode_code) +# define math_grouping_mode_par count_parameter(math_grouping_mode_code) +# define math_begin_class_par count_parameter(math_begin_class_code) +# define math_end_class_par count_parameter(math_end_class_code) +# define math_left_class_par count_parameter(math_left_class_code) +# define math_right_class_par count_parameter(math_right_class_code) +# define sup_mark_mode_par count_parameter(sup_mark_mode_code) + +# define glyph_data_par count_parameter(glyph_data_code) +# define glyph_state_par count_parameter(glyph_state_code) +# define glyph_script_par count_parameter(glyph_script_code) + +/*define glue_data_par count_parameter(glue_data_code) */ + +# define cur_lang_par count_parameter(language_code) +/*define cur_font_par eq_value(current_font_sequence) */ +# define cur_font_par count_parameter(font_code) + +typedef enum normalize_line_mode_bits { + normalize_line_mode = 0x0001, + parindent_skip_mode = 0x0002, + swap_hangindent_mode = 0x0004, + swap_parshape_mode = 0x0008, + break_after_dir_mode = 0x0010, + remove_margin_kerns_mode = 0x0020, /*tex When unpacking an hbox \unknown\ a \PDFTEX\ leftover. */ + clip_width_mode = 0x0040, + flatten_discretionaries_mode = 0x0080, + discard_zero_tab_skips_mode = 0x0100, + flatten_h_leaders_mode = 0x0200, +} normalize_line_mode_bits; + +typedef enum normalize_par_mode_bits { + normalize_par_mode = 0x0001, + flatten_v_leaders_mode = 0x0002, /* used to be 0x200 */ +} normalize_par_mode_bits; + +# define normalize_line_mode_permitted(a,b) ((a & b) == b) +# define normalize_par_mode_permitted(a,b) ((a & b) == b) + +# define normalize_line_mode_par count_parameter(normalize_line_mode_code) +# define normalize_par_mode_par count_parameter(normalize_par_mode_code) +# define auto_migration_mode_par count_parameter(auto_migration_mode_code) + +typedef enum auto_migration_mode_bits { + auto_migrate_mark = 0x01, + auto_migrate_insert = 0x02, + auto_migrate_adjust = 0x04, + auto_migrate_pre = 0x08, + auto_migrate_post = 0x10, +} auto_migration_mode_bits; + +# define auto_migrating_mode_permitted(what,flag) ((what & flag) == flag) + +# define attribute_register(j) eq_value(register_attribute_location(j)) +# define box_register(j) eq_value(register_box_location(j)) +# define count_register(j) eq_value(register_int_location(j)) +# define dimen_register(j) eq_value(register_dimen_location(j)) +# define mu_skip_register(j) eq_value(register_mu_glue_location(j)) +# define skip_register(j) eq_value(register_glue_location(j)) +# define toks_register(j) eq_value(register_toks_location(j)) + +/* + Injecting these frozen tokens can for instance happen when we scan for an integer or dimension + and run into an |\else| or |\fi| because (guess what) these scanners gobble trailing spaces! In + that case the |deep_frozen_relax_token| gets pushed back and can for instance end up in an + expansion (macro, write, etc) because we only look ahead. However, we can catch this side effect + in the scanners (that we redefined anyway). Removing those |\relax|'s was on the todo list and + now happens in the scanners. Actually it's one reason why we often use constants in tests + because these don't have that side effect because the scanner then quite earlier.) Another place + where that happens is in the |\input| command but there we can use braces. It is a typical + example of a more cosmetic adaptation that got a bit more priority when we converted the + \CONTEXT\ codebase from \MKIV\ to \LMTX, where testing involved checking the results. I also have + to check the other frozen tokens that can get reported when we have for instance alignments. It + is also why some of these tokens have an associated (private but serialized) |\csname|. + + For the record: we can these tokens deep_frozen because we don't want them to be confused with + the |\frozen| user macros and the ones below are really deeply hidden, although sometimes they + do surface. + +*/ + +typedef enum deep_frozen_cs_tokens { + deep_frozen_protection_token = cs_token_flag + deep_frozen_cs_protection_code, + deep_frozen_cr_token = cs_token_flag + deep_frozen_cs_cr_code, + deep_frozen_end_group_token = cs_token_flag + deep_frozen_cs_end_group_code, + deep_frozen_right_token = cs_token_flag + deep_frozen_cs_right_code, + deep_frozen_fi_token = cs_token_flag + deep_frozen_cs_fi_code, + deep_frozen_end_template_1_token = cs_token_flag + deep_frozen_cs_end_template_1_code, + deep_frozen_end_template_2_token = cs_token_flag + deep_frozen_cs_end_template_2_code, + deep_frozen_relax_token = cs_token_flag + deep_frozen_cs_relax_code, + deep_frozen_end_write_token = cs_token_flag + deep_frozen_cs_end_write_code, + deep_frozen_dont_expand_token = cs_token_flag + deep_frozen_cs_dont_expand_code, + deep_frozen_null_font_token = cs_token_flag + deep_frozen_cs_null_font_code, + deep_frozen_undefined_token = cs_token_flag + deep_frozen_cs_undefined_code, +} deep_frozen_cs_tokens; + +/*tex + + The next has been simplified and replaced by |\hyphenatiomode| but we keep it as reminder: + + \starttabulate[|T|T|T|] + \NC hyphen_penalty_mode_par \NC automatic_disc (-) \NC explicit_disc (\-) \NC \NR + \HL + \NC 0 (default) \NC ex_hyphen_penalty_par \NC ex_hyphen_penalty_par \NC \NR + \NC 1 \NC hyphen_penalty_par \NC hyphen_penalty_par \NC \NR + \NC 2 \NC ex_hyphen_penalty_par \NC hyphen_penalty_par \NC \NR + \NC 3 \NC hyphen_penalty_par \NC ex_hyphen_penalty_par \NC \NR + \NC 4 \NC automatic_hyphen_penalty_par \NC explicit_disc_penalty_par \NC \NR + \NC 5 \NC ex_hyphen_penalty_par \NC explicit_disc_penalty_par \NC \NR + \NC 6 \NC hyphen_penalty_par \NC explicit_disc_penalty_par \NC \NR + \NC 7 \NC automatic_hyphen_penalty_par \NC ex_hyphen_penalty_par \NC \NR + \NC 8 \NC automatic_hyphen_penalty_par \NC hyphen_penalty_par \NC \NR + \stoptabulate + +*/ + +extern halfword tex_automatic_disc_penalty (halfword mode); +extern halfword tex_explicit_disc_penalty (halfword mode); + +/*tex + + We add a bit more abstraction when setting the system parameters. This is not really + needed but it move all the |eq_| assignments to a place where we can keep an eye on + them. + +*/ + +# define update_tex_glyph_data(a,v) tex_word_define(a, internal_int_location(glyph_data_code), v) +# define update_tex_glyph_state(a,v) tex_word_define(a, internal_int_location(glyph_state_code), v) +# define update_tex_glyph_script(a,v) tex_word_define(a, internal_int_location(glyph_script_code), v) +# define update_tex_family(a,v) tex_word_define(a, internal_int_location(family_code), v) +# define update_tex_language(a,v) tex_word_define(a, internal_int_location(language_code), v) +# define update_tex_font(a,v) tex_word_define(a, internal_int_location(font_code), v) + +/*define update_tex_glue_data(a,v) tex_word_define(a, internal_int_location(glue_data_code), v) */ + +# define update_tex_display_indent(v) tex_eq_word_define(internal_dimen_location(display_indent_code), v) +# define update_tex_display_width(v) tex_eq_word_define(internal_dimen_location(display_width_code), v) +# define update_tex_hang_after(v) tex_eq_word_define(internal_int_location(hang_after_code), v) +# define update_tex_hang_indent(v) tex_eq_word_define(internal_dimen_location(hang_indent_code), v) +# define update_tex_looseness(v) tex_eq_word_define(internal_int_location(looseness_code), v) +# define update_tex_math_direction(v) tex_eq_word_define(internal_int_location(math_direction_code), v) +# define update_tex_internal_par_state(v) tex_eq_word_define(internal_int_location(internal_par_state_code), v) +# define update_tex_internal_dir_state(v) tex_eq_word_define(internal_int_location(internal_dir_state_code), v) +# define update_tex_internal_math_style(v) tex_eq_word_define(internal_int_location(internal_math_style_code), v) +# define update_tex_internal_math_scale(v) tex_eq_word_define(internal_int_location(internal_math_scale_code), v) +# define update_tex_output_penalty(v) tex_geq_word_define(internal_int_location(output_penalty_code), v) +# define update_tex_par_direction(v) tex_eq_word_define(internal_int_location(par_direction_code), v) +# define update_tex_pre_display_direction(v) tex_eq_word_define(internal_int_location(pre_display_direction_code), v) +# define update_tex_pre_display_size(v) tex_eq_word_define(internal_dimen_location(pre_display_size_code), v) +# define update_tex_text_direction(v) tex_eq_word_define(internal_int_location(text_direction_code), v) + +# define update_tex_font_identifier(v) tex_eq_word_define(internal_int_location(font_code), v) +# define update_tex_glyph_scale(v) tex_eq_word_define(internal_int_location(glyph_scale_code), v) +# define update_tex_glyph_x_scale(v) tex_eq_word_define(internal_int_location(glyph_x_scale_code), v) +# define update_tex_glyph_y_scale(v) tex_eq_word_define(internal_int_location(glyph_y_scale_code), v) + +# define update_tex_math_left_class(v) tex_eq_word_define(internal_int_location(math_left_class_code), v) +# define update_tex_math_right_class(v) tex_eq_word_define(internal_int_location(math_right_class_code), v) + +# define update_tex_par_shape(v) tex_eq_define(internal_specification_location(par_shape_code), specification_reference_cmd, v) +# define update_tex_inter_line_penalties(v) tex_eq_define(internal_specification_location(inter_line_penalties_code), specification_reference_cmd, v) +/*define update_tex_club_penalties(v) eq_define(internal_specification_location(club_penalties_code), specification_reference_cmd, v) */ +/*define update_tex_widow_penalties(v) eq_define(internal_specification_location(widow_penalties_code), specification_reference_cmd, v) */ +/*define update_tex_display_widow_penalties(v) eq_define(internal_specification_location(display_widow_penalties_code), specification_reference_cmd, v) */ +/*define update_tex_orphan_penalties(v) eq_define(internal_specification_location(orphan_penalties_code), specification_reference_cmd, v) */ + +# define update_tex_end_of_group(v) tex_eq_define(internal_toks_location(end_of_group_code), internal_toks_reference_cmd, v) +/*define update_tex_end_of_par(v) eq_define(internal_toks_location(end_of_par_code), internal_toks_cmd, v) */ + +# define update_tex_local_left_box(v) tex_eq_define(internal_box_location(local_left_box_code), internal_box_reference_cmd, v); +# define update_tex_local_right_box(v) tex_eq_define(internal_box_location(local_right_box_code), internal_box_reference_cmd, v); +# define update_tex_local_middle_box(v) tex_eq_define(internal_box_location(local_middle_box_code), internal_box_reference_cmd, v); + +# define update_tex_font_local(f,v) tex_eq_define(f, set_font_cmd, v); /* Here |f| already has the right offset. */ +# define update_tex_font_global(f,v) tex_geq_define(f, set_font_cmd, v); /* Here |f| already has the right offset. */ + +# define update_tex_tab_skip_local(v) tex_eq_define(internal_glue_location(tab_skip_code), internal_glue_reference_cmd, v); +# define update_tex_tab_skip_global(v) tex_geq_define(internal_glue_location(tab_skip_code), internal_glue_reference_cmd, v); + +# define update_tex_box_local(n,v) tex_eq_define(register_box_location(n) - box_flag, register_box_reference_cmd, v); +# define update_tex_box_global(n,v) tex_geq_define(register_box_location(n) - global_box_flag, register_box_reference_cmd, v); + +# define update_tex_insert_mode(a,v) tex_word_define(a, internal_int_location(insert_mode_code), v) + +/*tex For the moment here; a preparation for a dedicated insert structure. */ + +# define insert_content(A) box_register(A) +# define insert_multiplier(A) count_register(A) +# define insert_maxheight(A) dimen_register(A) +# define insert_distance(A) skip_register(A) + +# endif diff --git a/source/luametatex/source/tex/texerrors.c b/source/luametatex/source/tex/texerrors.c new file mode 100644 index 000000000..3252d2c50 --- /dev/null +++ b/source/luametatex/source/tex/texerrors.c @@ -0,0 +1,704 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +# include <string.h> + +/*tex + + When something anomalous is detected, \TEX\ typically does something like this (in \PASCAL\ + lingua): + + \starttyping + print_err("Something anomalous has been detected"); + help( + "This is the first line of my offer to help.\n" + "This is the second line. I'm trying to\n" + "explain the best way for you to proceed." + ); + error(); + \stoptyping + + A two-line help message would be given using |help2|, etc.; these informal helps should use + simple vocabulary that complements the words used in the official error message that was + printed. (Outside the U.S.A., the help messages should preferably be translated into the local + vernacular. Each line of help is at most 60 characters long, in the present implementation, so + that |max_print_line| will not be exceeded.) + + The |print_err| procedure supplies a |!| before the official message, and makes sure that the + terminal is awake if a stop is going to occur. The |error| procedure supplies a |.| after the + official message, then it shows the location of the error; and if |interaction = + error_stop_mode|, it also enters into a dialog with the user, during which time the help message + may be printed. + +*/ + +error_state_info lmt_error_state = { + .last_error = NULL, + .last_lua_error = NULL, + .last_warning_tag = NULL, + .last_warning = NULL, + .last_error_context = NULL, + .help_text = NULL, + .print_buffer = "", + .intercept = 0, + .last_intercept = 0, + .interaction = 0, + .default_exit_code = 0, + .set_box_allowed = 0, + .history = 0, + .error_count = 0, + .err_old_setting = 0, + .in_error = 0, + .long_help_seen = 0, + .context_indent = 4, + .padding = 0, + .line_limits = { + .maximum = max_error_line, + .minimum = min_error_line, + .size = min_error_line, + .top = 0, + }, + .half_line_limits = { + .maximum = max_half_error_line, + .minimum = min_half_error_line, + .size = min_half_error_line, + .top = 0, + }, +} ; + +/*tex + Because a |text_can| can be assembled we make a copy. There are not many cases where this is + really needed but there are seldom errors anyway so we can neglect this duplication of data. +*/ + +inline static void tex_aux_update_help_text(const char* str) +{ + if (lmt_error_state.help_text) { + lmt_memory_free(lmt_error_state.help_text); + lmt_error_state.help_text = NULL; + } + if (str) { + lmt_error_state.help_text = lmt_memory_strdup(str); + } +} + +/*tex + + The previously defines structure collects all relevant variables: the current level of + interaction: |interaction|, states like |last_error|, |last_lua_error|, |last_warning_tag|, + |last_warning_str| and |last_error_context|, and temporary variables like |err_old_setting| and + |in_error|. + + This is a variant on |show_runaway| that is used when we delegate error handling to a \LUA\ + callback. (Maybe some day that will be default.) + +*/ + +static void tex_aux_set_last_error_context(void) +{ + int saved_selector = lmt_print_state.selector; + int saved_new_line_char = new_line_char_par; + int saved_new_string_line = lmt_print_state.new_string_line; + lmt_print_state.selector = new_string_selector_code; + new_line_char_par = 10; + lmt_print_state.new_string_line = 10; + tex_show_validity(); + tex_show_context(); + lmt_memory_free(lmt_error_state.last_error_context); + lmt_error_state.last_error_context = tex_take_string(NULL); + lmt_print_state.selector = saved_selector; + new_line_char_par = saved_new_line_char; + lmt_print_state.new_string_line = saved_new_string_line; +} + +static void tex_aux_flush_error(void) +{ + if (lmt_error_state.in_error) { + lmt_print_state.selector = lmt_error_state.err_old_setting; + lmt_memory_free(lmt_error_state.last_error); + lmt_error_state.last_error = tex_take_string(NULL); + if (lmt_error_state.last_error) { + int callback_id = lmt_callback_defined(show_error_message_callback); + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->"); + } else { + tex_print_str(lmt_error_state.last_error); + } + } + lmt_error_state.in_error = 0; + } +} + +static int tex_aux_error_callback_set(void) +{ + int callback_id = lmt_callback_defined(show_error_message_callback); + return lmt_lua_state.lua_instance && callback_id > 0 ? callback_id : 0; +} + +static void tex_aux_start_error(void) +{ + if (tex_aux_error_callback_set()) { + lmt_error_state.err_old_setting = lmt_print_state.selector; + lmt_print_state.selector = new_string_selector_code; + lmt_error_state.in_error = 1 ; + lmt_memory_free(lmt_error_state.last_error); + lmt_error_state.last_error = NULL; + } else { + tex_print_nlp(); + tex_print_str("! "); + } +} + +/*tex + + \TEX\ is careful not to call |error| when the print |selector| setting might be unusual. The + only possible values of |selector| at the time of error messages are: + + \startitemize + \startitem |no_print|: |interaction=batch_mode| and |log_file| not yet open; \stopitem + \startitem |term_only|: |interaction>batch_mode| and |log_file| not yet open; \stopitem + \startitem |log_only|: |interaction=batch_mode| and |log_file| is open; \stopitem + \startitem |term_and_log|: |interaction>batch_mode| and |log_file| is open. \stopitem + \stopitemize + +*/ + +void tex_fixup_selector(int logopened) +{ + if (lmt_error_state.interaction == batch_mode) { + lmt_print_state.selector = logopened ? logfile_selector_code : no_print_selector_code ; + } else { + lmt_print_state.selector = logopened ? terminal_and_logfile_selector_code : terminal_selector_code; + } +} + +/*tex + + The variable |history| records the worst level of error that has been detected. It has four + possible values: |spotless|, |warning_issued|, |error_message_issued|, and |fatal_error_stop|. + + Another variable, |error_count|, is increased by one when an |error| occurs without an + interactive dialog, and it is reset to zero at the end of every paragraph. If |error_count| + reaches 100, \TEX\ decides that there is no point in continuing further. + + The value of |history| is initially |fatal_error_stop|, but it will be changed to |spotless| + if \TEX\ survives the initialization process. + +*/ + +void tex_initialize_errors(void) +{ + lmt_error_state.interaction = error_stop_mode; + lmt_error_state.set_box_allowed = 1; + if (lmt_error_state.half_line_limits.size > lmt_error_state.line_limits.size) { + lmt_error_state.half_line_limits.size = lmt_error_state.line_limits.size/2; + } + if (lmt_error_state.half_line_limits.size <= 30) { + lmt_error_state.half_line_limits.size = 31; + } else if (lmt_error_state.half_line_limits.size >= (lmt_error_state.line_limits.size - 15)) { + lmt_error_state.half_line_limits.size = lmt_error_state.line_limits.size - 16; + } +} + +/*tex + + It is possible for |error| to be called recursively if some error arises when |get_token| is + being used to delete a token, and/or if some fatal error occurs while \TEX\ is trying to fix + a non-fatal one. But such recursion is never more than two levels deep. + + Individual lines of help are recorded in the string |help_text|. There can be embedded + newlines. + + The |jump_out| procedure just cuts across all active procedure levels and exits the program. + It is used when there is no recovery from a particular error. The exit code can be overloaded. + + We don't close the lua state because we then have to collect lots of garbage and it really + slows doen the run. It's not needed anyway, as we exit. + +*/ + +static int tex_aux_final_exit(int code) +{ + exit(code); + return 0; /* unreachable */ +} + +int tex_normal_exit(void) +{ + tex_terminal_update(); + /* lua_close(lua_state.lua_instance); */ + lmt_main_state.ready_already = output_disabled_state; + if (lmt_error_state.history != spotless && lmt_error_state.history != warning_issued) { + return tex_aux_final_exit(EXIT_FAILURE); + } else { + return tex_aux_final_exit(lmt_error_state.default_exit_code); + } +} + +static void tex_aux_jump_out(void) +{ + tex_close_files_and_terminate(1); + tex_normal_exit(); +} + +/*tex + + This completes the job of error reporting, that is, in good old \TEX. But in \LUATEX\ it + doesn't make sense to suport this model of error handling, also because one cannot backtrack + over \LUA\ actions, so it would be a cheat. But we can keep the modes. + +*/ + +static void tex_aux_error(int type) +{ + int callback_id = lmt_callback_defined(intercept_tex_error_callback); + tex_aux_flush_error(); + if (lmt_error_state.history < error_message_issued && type != warning_error_type) { + lmt_error_state.history = error_message_issued; + } + if (lmt_lua_state.lua_instance && callback_id > 0) { + tex_aux_set_last_error_context(); + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dd->d", lmt_error_state.interaction, type, &lmt_error_state.interaction); + lmt_error_state.error_count = 0; + tex_terminal_update(); + switch (lmt_error_state.interaction) { + case batch_mode: /* Q */ + --lmt_print_state.selector; + return; + case nonstop_mode: /* R */ + return; + case scroll_mode: /* S */ + return; + case error_stop_mode: /* carry on */ + break; + default: /* exit */ + lmt_error_state.interaction = scroll_mode; + if (type != warning_error_type) { + tex_aux_jump_out(); + } + break; + } + } else { + tex_print_char('.'); + tex_show_context(); + } + if (type != warning_error_type) { + ++lmt_error_state.error_count; + if (lmt_error_state.error_count == 100) { + tex_print_message("That makes 100 errors; please try again."); + lmt_error_state.history = fatal_error_stop; + tex_aux_jump_out(); + } + } + /*tex + We assume that the callback handles the log file too. Otherwise we put the help message in + the log file. + */ + if (callback_id == 0) { + if (lmt_error_state.interaction > batch_mode) { + /*tex Avoid terminal output: */ + --lmt_print_state.selector; + } + tex_print_nlp(); + if (lmt_error_state.help_text) { + tex_print_str(lmt_error_state.help_text); + tex_print_nlp(); + } + if (lmt_error_state.interaction > batch_mode) { + /*tex Re-enable terminal output: */ + ++lmt_print_state.selector; + } + } + tex_print_ln(); +} + +/*tex + + In anomalous cases, the print selector might be in an unknown state; the following subroutine + is called to fix things just enough to keep running a bit longer. + +*/ + +static void tex_aux_normalize_selector(void) +{ + if (lmt_fileio_state.log_opened) { + lmt_print_state.selector = terminal_and_logfile_selector_code; + } else { + lmt_print_state.selector = terminal_selector_code; + } + if (! lmt_fileio_state.job_name) { + tex_open_log_file(); + } + if (lmt_error_state.interaction == batch_mode) { + /*tex It becomes no or terminal. */ + --lmt_print_state.selector; + } +} + +/*tex The following procedure prints \TEX's last words before dying: */ + +static void tex_aux_succumb_error(void) +{ + if (lmt_error_state.interaction == error_stop_mode) { + /*tex No more interaction: */ + lmt_error_state.interaction = scroll_mode; + } + if (lmt_fileio_state.log_opened) { + tex_aux_error(succumb_error_type); + } + lmt_error_state.history = fatal_error_stop; + /*tex Irrecoverable error: */ + tex_aux_jump_out(); +} + +/*tex This prints |s|, and that's it. */ + +void tex_fatal_error(const char *helpinfo) +{ + tex_aux_normalize_selector(); + tex_handle_error( + succumb_error_type, + "Emergency stop", + helpinfo + ); +} + +/*tex Here is the most dreaded error message. We stop due to finiteness. */ + +void tex_overflow_error(const char *s, int n) +{ + tex_aux_normalize_selector(); + tex_handle_error( + succumb_error_type, + "TeX capacity exceeded, sorry [%s=%i]", + s, n, + "If you really absolutely need more capacity, you can ask a wizard to enlarge me." + ); +} + +/*tex + + The program might sometime run completely amok, at which point there is no choice but to stop. + If no previous error has been detected, that's bad news; a message is printed that is really + intended for the \TEX\ maintenance person instead of the user (unless the user has been + particularly diabolical). The index entries for \quotation {this can't happen} may help to + pinpoint the problem. + +*/ + +int tex_confusion(const char *s) +{ + /*tex A consistency check violated; |s| tells where: */ + tex_aux_normalize_selector(); + if (lmt_error_state.history < error_message_issued) { + tex_handle_error( + succumb_error_type, + "This can't happen (%s)", + s, + "I'm broken. Please show this to someone who can fix me." + ); + } else { + tex_handle_error( + succumb_error_type, + "I can't go on meeting you like this", + "One of your faux pas seems to have wounded me deeply ... in fact, I'm barely\n" + "conscious. Please fix it and try again." + ); + } + return 0; +} + +/*tex + + When the program is interrupted we just quit. Here is the hook to deal with it. + +*/ + +void aux_quit_the_program(void) /*tex No |tex_| prefix here! */ +{ + tex_handle_error( + succumb_error_type, + "Forced stop", + NULL + ); +} + +/*tex + + The |back_error| routine is used when we want to replace an offending token just before issuing + an error message. This routine, like |back_input|, requires that |cur_tok| has been set. We + disable interrupts during the call of |back_input| so that the help message won't be lost. + +*/ + +static void tex_aux_back_error(void) +{ + tex_back_input(cur_tok); + tex_aux_error(back_error_type); +} + +/*tex Back up one inserted token and call |error|. */ + +static void tex_aux_insert_error(void) +{ + tex_back_input(cur_tok); + lmt_input_state.cur_input.token_type = inserted_text; + tex_aux_error(insert_error_type); +} + +int tex_normal_error(const char *t, const char *p) +{ + if (lmt_engine_state.lua_only) { + /*tex Normally ending up here means that we call the wrong error function. */ + tex_emergency_message(t, p); + } else { + tex_aux_normalize_selector(); + if (! tex_aux_error_callback_set()) { + tex_print_nlp(); + tex_print_str("! "); + } + tex_print_str("error"); + if (t) { + tex_print_format(" (%s)", t); + } + tex_print_str(": "); + if (p) { + tex_print_str(p); + } + lmt_error_state.history = fatal_error_stop; + tex_print_str("\n"); + } + return tex_aux_final_exit(EXIT_FAILURE); +} + +void tex_normal_warning(const char *t, const char *p) +{ + if (strcmp(t, "lua") == 0) { + int callback_id = lmt_callback_defined(intercept_lua_error_callback); + int saved_new_line_char = new_line_char_par; + new_line_char_par = 10; + if (lmt_lua_state.lua_instance && callback_id) { + (void) lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->"); + /* error(); */ + } else { + tex_handle_error( + normal_error_type, + p ? p : "unspecified lua error", + "The lua interpreter ran into a problem, so the remainder of this lua chunk will\n" + "be ignored." + ); + } + new_line_char_par = saved_new_line_char; + } else { + int callback_id = lmt_callback_defined(show_warning_message_callback); + if (callback_id > 0) { + /*tex Free the last ones, */ + lmt_memory_free(lmt_error_state.last_warning); + lmt_memory_free(lmt_error_state.last_warning_tag); + lmt_error_state.last_warning = lmt_memory_strdup(p); + lmt_error_state.last_warning_tag = lmt_memory_strdup(t); + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->"); + } else { + tex_print_ln(); + tex_print_str("warning"); + if (t) { + tex_print_format(" (%s)", t); + } + tex_print_str(": "); + if (p) { + tex_print_str(p); + } + tex_print_ln(); + } + if (lmt_error_state.history == spotless) { + lmt_error_state.history = warning_issued; + } + } +} + +int tex_formatted_error(const char *t, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vsnprintf(lmt_error_state.print_buffer, print_buffer_size, fmt, args); + return tex_normal_error(t, lmt_error_state.print_buffer); + /* + va_end(args); + return 0; + */ +} + +void tex_formatted_warning(const char *t, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vsnprintf(lmt_error_state.print_buffer, print_buffer_size, fmt, args); + tex_normal_warning(t, lmt_error_state.print_buffer); + va_end(args); +} + +void tex_emergency_message(const char *t, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vsnprintf(lmt_error_state.print_buffer, print_buffer_size, fmt, args); + fprintf(stdout,"%s : %s\n",t,lmt_error_state.print_buffer); + va_end(args); +} + +int tex_emergency_exit(void) +{ + return tex_aux_final_exit(EXIT_FAILURE); +} + +/*tex A prelude to more abstraction and maybe using sprint etc.*/ + +static void tex_aux_do_handle_error_type( + int type +) { + switch (type) { + case normal_error_type: + case eof_error_type: + case condition_error_type: + case runaway_error_type: + case warning_error_type: + tex_aux_error(type); + break; + case back_error_type: + tex_aux_back_error(); + break; + case insert_error_type: + tex_aux_insert_error(); + break; + case succumb_error_type: + tex_aux_succumb_error(); + break; + } +} + +void tex_handle_error_message_only( + const char *message +) +{ + tex_aux_start_error(); + tex_print_str(message); + if (tex_aux_error_callback_set()) { + lmt_error_state.in_error = 0; + lmt_memory_free(lmt_error_state.last_error); + lmt_error_state.last_error = lmt_memory_strdup(message); + } +} + +/*tex + + We had about 15 specific tuned message handlers as a prelude to a general template based one + and that one has arrived (we also have a print one, beginning 2021 only partially applied as + I'm undecided). We can now call a translation callback where we remap similar to how we do it + in ConTeXt but I;'m nor that sure if users really need it. The english is probably the least + problematic part of an error so first I will perfect the tracing bit. + +*/ + +/* + %c int char + %s *char string + %q *char 'string' + %i int integer + %e backslash (tex escape) + %C int int symbolic representation of cmd chr + %E *char \cs + %S int tex cs string + %M int mode + %T int tex string + %% percent + +*/ + +extern void tex_handle_error(error_types type, const char *format, ...) +{ + va_list args; + va_start(args, format); /* hm, weird, no number */ + /*tex Todo: a translation callback: |str, 1 => str|. */ + tex_aux_start_error(); + while (1) { + int chr = *format++; + switch (chr) { + case '\0': + goto DONE; + case '%': + { + chr = *format++; + switch (chr) { + case '\0': + goto DONE; + case 'c': + tex_print_char(va_arg(args, int)); + break; + case 's': + tex_print_str(va_arg(args, char *)); + break; + case 'q': + tex_print_char('\''); + tex_print_str(va_arg(args, char *)); + tex_print_char('\''); + break; + case 'm': + tex_print_cs_checked(va_arg(args, int)); + break; + case 'i': + tex_print_int(va_arg(args, int)); + break; + case 'e': + tex_print_str_esc(NULL); + break; + case 'C': + { + int cmd = va_arg(args, int); + int val = va_arg(args, int); + tex_print_cmd_chr((singleword) cmd, val); /* inlining doesn't work */ + break; + } + case 'E': + tex_print_str_esc(va_arg(args, char *)); + break; + case 'S': + { + halfword cs = va_arg(args, int); + tex_print_cs(cs); + break; + } + case 'M': + { + halfword mode = va_arg(args, int); + tex_print_str(tex_string_mode(mode)); + break; + } + case 'T': + { + strnumber s = va_arg(args, int); + tex_print_tex_str(s); + break; + } + case '%': + tex_print_char('%'); + break; + default: + /* ignore bad one */ + break; + } + } + break; + default: + tex_print_char(chr); /* todo: utf */ + break; + } + } + DONE: + /*tex Todo: a translation callback: |str, 2 => str|. */ + tex_aux_update_help_text(va_arg(args, char *)); + tex_aux_do_handle_error_type(type); + va_end(args); +} diff --git a/source/luametatex/source/tex/texerrors.h b/source/luametatex/source/tex/texerrors.h new file mode 100644 index 000000000..8c67b9a45 --- /dev/null +++ b/source/luametatex/source/tex/texerrors.h @@ -0,0 +1,117 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_ERRORS_H +# define LMT_ERRORS_H + +/*tex + + The global variable |interaction| has four settings, representing increasing amounts of user + interaction: + +*/ + +# define print_buffer_size 512 /*tex Watch out for alignment! Only used here. */ + +typedef enum interaction_levels { + batch_mode, /*tex omits all stops and omits terminal output */ + nonstop_mode, /*tex omits all stops */ + scroll_mode, /*tex omits error stops */ + error_stop_mode, /*tex stops at every opportunity to interact */ +} interaction_levels; + +# define last_interaction_level error_stop_mode + +typedef struct error_state_info { + char *last_error; + char *last_lua_error; + char *last_warning_tag; + char *last_warning; + char *last_error_context; + char *help_text; /*tex helps for the next |error| */ + char print_buffer[print_buffer_size]; + int intercept; /*tex intercept error state */ + int last_intercept; /*tex error state number / dimen scanner */ + int interaction; /*tex current level of interaction */ + int default_exit_code; /*tex the exit code can be overloaded */ + int set_box_allowed; + int history; + int error_count; + int err_old_setting; + int in_error; + int long_help_seen; + int context_indent; + int padding; + limits_data line_limits; + limits_data half_line_limits; +} error_state_info; + +extern error_state_info lmt_error_state; + +typedef enum error_states { + spotless, /*tex |history| value when nothing has been amiss yet */ + warning_issued, /*tex |history| value when |begin_diagnostic| has been called */ + error_message_issued, /*tex |history| value when |error| has been called */ + fatal_error_stop, /*tex |history| value when termination was premature */ +} error_states; + +extern void tex_initialize_errors (void); +extern void tex_fixup_selector (int log_opened); +extern void tex_fatal_error (const char *helpinfo); +extern void tex_overflow_error (const char *s, int n); +extern int tex_confusion (const char *s); +extern int tex_normal_error (const char *t, const char *p); +extern void tex_normal_warning (const char *t, const char *p); +extern int tex_formatted_error (const char *t, const char *fmt, ...); +extern void tex_formatted_warning (const char *t, const char *fmt, ...); +extern void tex_emergency_message (const char *t, const char *fmt, ...); +extern int tex_emergency_exit (void); +extern int tex_normal_exit (void); + +/*tex A bit more detail. */ + +# define error_string_clobbered(n) "[clobbered " LMT_TOSTRING(n) "]" +# define error_string_bad(n) "[bad " LMT_TOSTRING(n) "]" +# define error_string_impossible(n) "[impossible " LMT_TOSTRING(n) "]" +# define error_string_nonexistent(n) "[nonexistent " LMT_TOSTRING(n) "]" + +/*tex +* + We now have a template based error handler instead of more dan a dozen specific ones that took + an error type, a different set of variables, and the helptext. The template uses the (usual) + percent driven directives: + + \starttabulate + \NC \type {s} \NC string \NC \NR + \NC \type {c} \NC char \NC \NR + \NC \type {q} \NC 'string' \NC \NR + \NC \type {i} \NC integer \NC \NR + \NC \type {e} \NC escape char \NC \NR + \NC \type {C} \NC cmd chr \NC \NR + \NC \type {E} \NC escaped string \NC \NR + \NC \type {S} \NC cs \NC \NR + \NC \type {T} \NC texstring \NC \NR + \stoptabulate + + A placeholder starts with a percent sign. A double percent sign will print one. The last very + argument is the error message (or |NULL|). We flush on a per character basis but that happens + anyway and error messages are not really a bottleneck. + + */ + +typedef enum error_types { + normal_error_type, + back_error_type, + insert_error_type, + succumb_error_type, /* fatal error_type */ + eof_error_type, + condition_error_type, + runaway_error_type, + warning_error_type, +} error_types; + +extern void tex_handle_error (error_types type, const char *format, ...); +extern void tex_handle_error_message_only (const char *message); + +# endif diff --git a/source/luametatex/source/tex/texexpand.c b/source/luametatex/source/tex/texexpand.c new file mode 100644 index 000000000..25dcccdf3 --- /dev/null +++ b/source/luametatex/source/tex/texexpand.c @@ -0,0 +1,1411 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + Only a dozen or so command codes |> max_command| can possibly be returned by |get_next|; in + increasing order, they are |undefined_cs|, |expand_after|, |no_expand|, |input|, |if_test|, + |fi_or_else|, |cs_name|, |convert|, |the|, |get_mark|, |call|, |long_call|, |outer_call|, + |long_outer_call|, and |end_template|. + + Sometimes, recursive calls to the following |expand| routine may cause exhaustion of the + run-time calling stack, resulting in forced execution stops by the operating system. To + diminish the chance of this happening, a counter is used to keep track of the recursion depth, + in conjunction with a constant called |expand_depth|. + + Note that this does not catch all possible infinite recursion loops, just the ones that + exhaust the application calling stack. The actual maximum value of |expand_depth| is outside + of our control, but the initial setting of |100| should be enough to prevent problems. + +*/ + +expand_state_info lmt_expand_state = { + .limits = { + .minimum = min_expand_depth, + .maximum = max_expand_depth, + .size = min_expand_depth, + .top = 0, + }, + .depth = 0, + .cs_name_level = 0, + .arguments = 0, + .match_token_head = null, + .padding = 0, +}; + + static void tex_aux_macro_call (halfword cs, halfword cmd, halfword chr); +inline static void tex_aux_manufacture_csname (void); +inline static void tex_aux_manufacture_csname_use (void); +inline static void tex_aux_manufacture_csname_future (void); +inline static void tex_aux_inject_last_tested_cs (void); + +/*tex + + We no longer store |match_token_head| in the format file. It is a bit cleaner to just + initialize them. So we free them. + +*/ + +void tex_initialize_expansion(void) +{ + lmt_expand_state.match_token_head = tex_get_available_token(null); +} + +void tex_cleanup_expansion(void) +{ + tex_put_available_token(lmt_expand_state.match_token_head); +} + +halfword tex_expand_match_token_head(void) +{ + return lmt_expand_state.match_token_head; +} + +/*tex + + The |expand| subroutine is used when |cur_cmd > max_command|. It removes a \quote {call} or a + conditional or one of the other special operations just listed. It follows that |expand| might + invoke itself recursively. In all cases, |expand| destroys the current token, but it sets things + up so that the next |get_next| will deliver the appropriate next token. The value of |cur_tok| + need not be known when |expand| is called. + + Since several of the basic scanning routines communicate via global variables, their values are + saved as local variables of |expand| so that recursive calls don't invalidate them. + +*/ + +inline static void tex_aux_expand_after(void) +{ + /*tex + Expand the token after the next token. It takes only a little shuffling to do what \TEX\ + calls |\expandafter|. + */ + halfword t1 = tex_get_token(); + halfword t2 = tex_get_token(); + if (cur_cmd > max_command_cmd) { + tex_expand_current_token(); + } else { + tex_back_input(t2); + } + tex_back_input(t1); +} + +inline static void tex_aux_expand_toks_after(void) +{ + halfword t1 = tex_scan_toks_normal(0, NULL); + halfword t2 = tex_get_token(); + if (cur_cmd > max_command_cmd) { + tex_expand_current_token(); + } else { + tex_back_input(t2); + } + tex_begin_backed_up_list(token_link(t1)); + tex_put_available_token(t1); +} + +/*tex + Here we deal with stuff not in the big switch. Where that is discussed there is mentioning of + it all being a bit messy, also due to the fact that that switch (or actually a lookup table) + also uses the mode for determining what to do. We see no reason to change this model. +*/ + +void tex_expand_current_token(void) +{ + ++lmt_expand_state.depth; + if (lmt_expand_state.depth > lmt_expand_state.limits.top) { + if (lmt_expand_state.depth >= lmt_expand_state.limits.size) { + tex_overflow_error("expansion depth", lmt_expand_state.limits.size); + } else { + lmt_expand_state.limits.top += 1; + } + } + /*tex We're okay. */ + { + halfword saved_cur_val = cur_val; + halfword saved_cur_val_level = cur_val_level; + // halfword saved_head = token_link(token_data.backup_head); + if (cur_cmd < first_call_cmd) { + /*tex Expand a nonmacro. */ + if (tracing_commands_par > 1) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + switch (cur_cmd) { + case expand_after_cmd: + { + int mode = cur_chr; + switch (mode) { + case expand_after_code: + tex_aux_expand_after(); + break; + /* + case expand_after_3_code: + tex_aux_expand_after(); + // fall-through + case expand_after_2_code: + tex_aux_expand_after(); + tex_aux_expand_after(); + break; + */ + case expand_unless_code: + tex_conditional_unless(); + break; + case future_expand_code: + /*tex + This is an experiment: |\futureexpand| (2) which takes |\check \yes + \nop| as arguments. It's not faster, but gives less tracing noise + than a macro. The variant |\futureexpandis| (3) alternative doesn't + inject the gobbles space(s). + */ + tex_get_token(); + { + halfword spa = null; + halfword chr = cur_chr; + halfword cmd = cur_cmd; + halfword yes = tex_get_token(); /* when match */ + halfword nop = tex_get_token(); /* when no match */ + while (1) { + halfword t = tex_get_token(); + if (cur_cmd == spacer_cmd) { + spa = t; + } else { + tex_back_input(t); + break; + } + } + /*tex The value 1 means: same input level. */ + if (cur_cmd == cmd && cur_chr == chr) { + tex_reinsert_token(yes); + } else { + if (spa) { + tex_reinsert_token(space_token); + } + tex_reinsert_token(nop); + } + } + break; + case future_expand_is_code: + tex_get_token(); + { + halfword chr = cur_chr; + halfword cmd = cur_cmd; + halfword yes = tex_get_token(); /* when match */ + halfword nop = tex_get_token(); /* when no match */ + while (1) { + halfword t = tex_get_token(); + if (cur_cmd != spacer_cmd) { + tex_back_input(t); + break; + } + } + tex_reinsert_token((cur_cmd == cmd && cur_chr == chr) ? yes : nop); + } + break; + case future_expand_is_ap_code: + tex_get_token(); + { + halfword chr = cur_chr; + halfword cmd = cur_cmd; + halfword yes = tex_get_token(); /* when match */ + halfword nop = tex_get_token(); /* when no match */ + while (1) { + halfword t = tex_get_token(); + if (cur_cmd != spacer_cmd && cur_cmd != end_paragraph_cmd) { + tex_back_input(t); + break; + } + } + /*tex We stay at the same input level. */ + tex_reinsert_token((cur_cmd == cmd && cur_chr == chr) ? yes : nop); + } + break; + case expand_after_spaces_code: + { + /* maybe two variants: after_spaces and after_par like in the ignores */ + halfword t1 = tex_get_token(); + while (1) { + halfword t2 = tex_get_token(); + if (cur_cmd != spacer_cmd) { + tex_back_input(t2); + break; + } + } + tex_reinsert_token(t1); + break; + } + case expand_after_pars_code: + { + halfword t1 = tex_get_token(); + while (1) { + halfword t2 = tex_get_token(); + if (cur_cmd != spacer_cmd && cur_cmd != end_paragraph_cmd) { + tex_back_input(t2); + break; + } + } + tex_reinsert_token(t1); + break; + } + case expand_token_code: + { + /* we can share code with lmtokenlib .. todo */ + halfword cat = tex_scan_category_code(); + halfword chr = tex_scan_char_number(0); + /* too fragile: + halfword tok = null; + switch (cat) { + case letter_cmd: + case other_char_cmd: + case ignore_cmd: + case spacer_cmd: + tok = token_val(cat, chr); + break; + case active_char_cmd: + { + halfword cs = tex_active_to_cs(chr, ! lmt_hash_state.no_new_cs); + if (cs) { + chr = eq_value(cs); + tok = cs_token_flag + cs; + break; + } + } + default: + tok = token_val(other_char_cmd, chr); + break; + } + */ + switch (cat) { + case letter_cmd: + case other_char_cmd: + case ignore_cmd: + case spacer_cmd: + break; + default: + cat = other_char_cmd; + break; + } + tex_back_input(token_val(cat, chr)); + break; + } + case expand_cs_token_code: + { + tex_get_token(); + if (cur_tok >= cs_token_flag) { + halfword cmd = eq_type(cur_cs); + switch (cmd) { + case left_brace_cmd: + case right_brace_cmd: + case math_shift_cmd: + case alignment_tab_cmd: + case superscript_cmd: + case subscript_cmd: + case spacer_cmd: + case letter_cmd: + case other_char_cmd: + cur_tok = token_val(cmd, eq_value(cur_cs)); + break; + } + } + tex_back_input(cur_tok); + break; + } + case expand_code: + { + tex_get_token(); + if (cur_cmd >= first_call_cmd && cur_cmd <= last_call_cmd) { + tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); + } else { + /* Use expand_current_token so that protected lua call are dealt with too? */ + tex_back_input(cur_tok); + } + break; + } + case semi_expand_code: + { + tex_get_token(); + if (is_semi_protected_cmd(cur_cmd)) { + tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); + } else { + /* Use expand_current_token so that protected lua call are dealt with too? */ + tex_back_input(cur_tok); + } + break; + } + case expand_after_toks_code: + { + tex_aux_expand_toks_after(); + break; + } + /* + case expand_after_fi: + { + conditional_after_fi(); + break; + } + */ + } + } + break; + case cs_name_cmd: + /*tex Manufacture a control sequence name. */ + switch (cur_chr) { + case cs_name_code: + tex_aux_manufacture_csname(); + break; + case last_named_cs_code: + tex_aux_inject_last_tested_cs(); + break; + case begin_cs_name_code: + tex_aux_manufacture_csname_use(); + break; + case future_cs_name_code: + tex_aux_manufacture_csname_future(); + break; + } + break; + case no_expand_cmd: + { + /*tex + Suppress expansion of the next token. The implementation of |\noexpand| + is a bit trickier, because it is necessary to insert a special + |dont_expand| marker into \TEX's reading mechanism. This special marker + is processed by |get_next|, but it does not slow down the inner loop. + + Since |\outer| macros might arise here, we must also clear the + |scanner_status| temporarily. + */ + halfword t; + halfword save_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + t = tex_get_token(); + lmt_input_state.scanner_status = save_scanner_status; + tex_back_input(t); + /*tex Now |start| and |loc| point to the backed-up token |t|. */ + if (t >= cs_token_flag) { + halfword p = tex_get_available_token(deep_frozen_dont_expand_token); + set_token_link(p, lmt_input_state.cur_input.loc); + lmt_input_state.cur_input.start = p; + lmt_input_state.cur_input.loc = p; + } + } + break; + case if_test_cmd: + if (cur_chr < first_real_if_test_code) { + tex_conditional_fi_or_else(); + } else if (cur_chr != if_condition_code) { + tex_conditional_if(cur_chr, 0); + } else { + /*tex The |\ifcondition| primitive is a no-op unless we're in skipping mode. */ + } + break; + case the_cmd: + { + halfword h = tex_the_toks(cur_chr, NULL); + tex_begin_inserted_list(h); + break; + } + case lua_call_cmd: + if (cur_chr > 0) { + strnumber u = tex_save_cur_string(); + lmt_token_state.luacstrings = 0; + lmt_function_call(cur_chr, 0); + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } + } else { + tex_normal_error("luacall", "invalid number"); + } + break; + case lua_local_call_cmd: + if (cur_chr > 0) { + lua_State *L = lmt_lua_state.lua_instance; + strnumber u = tex_save_cur_string(); + lmt_token_state.luacstrings = 0; + /* todo: use a private table as we can overflow, unless we register early */ + lua_rawgeti(L, LUA_REGISTRYINDEX, cur_chr); + if (lua_pcall(L, 0, 0, 0)) { + tex_formatted_warning("luacall", "local call error: %s", lua_tostring(L, -1)); + } else { + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } + } + } else { + tex_normal_error("luacall", "invalid number"); + } + break; + case begin_local_cmd: + tex_begin_local_control(); + break; + case convert_cmd: + tex_run_convert_tokens(cur_chr); + break; + case input_cmd: + /*tex Initiate or terminate input from a file */ + switch (cur_chr) { + case normal_input_code: + if (lmt_fileio_state.name_in_progress) { + tex_insert_relax_and_cur_cs(); + } else { + tex_start_input(tex_read_file_name(0, NULL, texinput_extension)); + } + break; + case end_of_input_code: + lmt_token_state.force_eof = 1; + break; + case quit_loop_code: + lmt_main_control_state.quit_loop = 1; + break; + case token_input_code: + tex_tex_string_start(io_token_eof_input_code, cat_code_table_par); + break; + case tex_token_input_code: + tex_tex_string_start(io_token_input_code, cat_code_table_par); + break; + case tokenized_code: + case retokenized_code: + { + /*tex + This variant complements the other expandable primitives but + also supports an optional keyword, who knows when that comes in + handy; what goes in is detokenized anyway. For now it is an + undocumented feature. It is likely that there is a |cct| passed + so we don't need to optimize. If needed we can make a version + where this is mandate. + */ + int cattable = (cur_chr == retokenized_code || tex_scan_optional_keyword("catcodetable")) ? tex_scan_int(0, NULL) : cat_code_table_par; + full_scanner_status saved_full_status = tex_save_full_scanner_status(); + strnumber u = tex_save_cur_string(); + halfword s = tex_scan_toks_expand(0, NULL, 0); + tex_unsave_full_scanner_status(saved_full_status); + if (token_link(s)) { + tex_begin_inserted_list(tex_wrapped_token_list(s)); + tex_tex_string_start(io_token_input_code, cattable); + } + tex_put_available_token(s); + tex_restore_cur_string(u); + } + break; + default: + break; + } + break; + case get_mark_cmd: + { + /*tex Insert the appropriate mark text into the scanner. */ + halfword num = 0; + halfword code = cur_chr; + switch (code) { + case top_marks_code: + case first_marks_code: + case bot_marks_code: + case split_first_marks_code: + case split_bot_marks_code: + case current_marks_code: + num = tex_scan_mark_number(); + break; + } + if (tex_valid_mark(num)) { + halfword ptr = tex_get_some_mark(code, num); + if (ptr) { + tex_begin_token_list(ptr, mark_text); + } + } + break; + } + /* + case string_cmd: + { + halfword head = str_toks(str_lstring(cs_offset_value + cur_chr), NULL); + begin_inserted_list(head); + break; + } + */ + default: + /* Maybe ... or maybe an option */ + // if (lmt_expand_state.cs_name_level == 0) { + /*tex Complain about an undefined macro */ + tex_handle_error( + normal_error_type, + "Undefined control sequence %m", cur_cs, + "The control sequence at the end of the top line of your error message was never\n" + "\\def'ed. You can just continue as I'll forget about whatever was undefined." + ); + // } + break; + } + } else if (cur_cmd <= last_call_cmd) { + tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); + } else { + /*tex + Insert a token containing |frozen_endv|. An |end_template| command is effectively + changed to an |endv| command by the following code. (The reason for this is discussed + below; the |frozen_end_template| at the end of the template has passed the + |check_outer_validity| test, so its mission of error detection has been accomplished.) + */ + tex_back_input(deep_frozen_end_template_2_token); + } + cur_val = saved_cur_val; + cur_val_level = saved_cur_val_level; + // set_token_link(token_data.backup_head, saved_head); + } + --lmt_expand_state.depth; +} + +static void tex_aux_complain_missing_csname(void) +{ + tex_handle_error( + back_error_type, + "Missing \\endcsname inserted", + "The control sequence marked <to be read again> should not appear between \\csname\n" + "and \\endcsname." + ); +} + +inline static int tex_aux_uni_to_buffer(unsigned char *b, int m, int c) +{ + if (c <= 0x7F) { + b[m++] = (unsigned char) c; + } else if (c <= 0x7FF) { + b[m++] = (unsigned char) (0xC0 + c / 0x40); + b[m++] = (unsigned char) (0x80 + c % 0x40); + } else if (c <= 0xFFFF) { + b[m++] = (unsigned char) (0xE0 + c / 0x1000); + b[m++] = (unsigned char) (0x80 + (c % 0x1000) / 0x40); + b[m++] = (unsigned char) (0x80 + (c % 0x1000) % 0x40); + } else { + b[m++] = (unsigned char) (0xF0 + c / 0x40000); + b[m++] = (unsigned char) (0x80 + ( c % 0x40000) / 0x1000); + b[m++] = (unsigned char) (0x80 + ((c % 0x40000) % 0x1000) / 0x40); + b[m++] = (unsigned char) (0x80 + ((c % 0x40000) % 0x1000) % 0x40); + } + return m; +} + +/*tex + We also quit on a protected macro call, which is different from \LUATEX\ (and \PDFTEX) but makes + much sense. It also long token lists that never (should) match anyway. +*/ + + +static int tex_aux_collect_cs_tokens(halfword *p, int *n) +{ + while (1) { + tex_get_next(); + switch (cur_cmd) { + case left_brace_cmd: + case right_brace_cmd: + case math_shift_cmd: + case alignment_tab_cmd: + /* case end_line_cmd: */ + case parameter_cmd: + case superscript_cmd: + case subscript_cmd: + /* case ignore_cmd: */ + case spacer_cmd: + case letter_cmd: + case other_char_cmd: + // cur_tok = token_val(cur_cmd, cur_chr); + // *p = tex_store_new_token(*p, cur_tok); + *p = tex_store_new_token(*p, token_val(cur_cmd, cur_chr)); + *n += 1; + break; + /* case active_char_cmd: */ + /* case comment_cmd: */ + /* case invalid_char_cmd: */ + /* + case string_cmd: + cur_tok = token_val(cur_cmd, cur_chr); + *p = store_new_token(*p, cur_tok); + *n += str_length(cs_offset_value + cur_chr); + break; + */ + case call_cmd: + case tolerant_call_cmd: + tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); + break; + case end_cs_name_cmd: + return 1; + default: + if (cur_cmd > max_command_cmd && cur_cmd < first_call_cmd) { + tex_expand_current_token(); + } else { + return 0; + } + } + } +} + +int tex_is_valid_csname(void) +{ + halfword cs = null_cs; + int b = 0; + int n = 0; + halfword h = tex_get_available_token(null); + halfword p = h; + lmt_expand_state.cs_name_level += 1; + if (! tex_aux_collect_cs_tokens(&p, &n)) { + do { + tex_get_x_or_protected(); /* we skip unprotected ! */ + } while (cur_cmd != end_cs_name_cmd); + goto FINISH; + } else if (n) { + /*tex Look up the characters of list |n| in the hash table, and set |cur_cs|. */ + int f = lmt_fileio_state.io_first; + if (tex_room_in_buffer(f + n * 4)) { + int m = f; + halfword l = token_link(h); + while (l) { + m = tex_aux_uni_to_buffer(lmt_fileio_state.io_buffer, m, token_chr(token_info(l))); + l = token_link(l); + } + cs = tex_id_locate(f, m - f, 0); /*tex Don't create a new cs! */ + b = (cs != undefined_control_sequence) && (eq_type(cs) != undefined_cs_cmd); + } + } + FINISH: + tex_flush_token_list_head_tail(h, p, n + 1); + lmt_scanner_state.last_cs_name = cs; + lmt_expand_state.cs_name_level -= 1; + cur_cs = cs; + return b; +} + +inline static halfword tex_aux_get_cs_name(void) +{ + halfword h = tex_get_available_token(null); /* hm */ + halfword p = h; + int n = 0; + lmt_expand_state.cs_name_level += 1; + if (tex_aux_collect_cs_tokens(&p, &n)) { + /*tex Look up the characters of list |r| in the hash table, and set |cur_cs|. */ + int siz; + char *s = tex_tokenlist_to_tstring(h, 1, &siz, 0, 0, 0); + cur_cs = (siz > 0) ? tex_string_locate((char *) s, siz, 1) : null_cs; + } else { + tex_aux_complain_missing_csname(); + } + lmt_scanner_state.last_cs_name = cur_cs; + lmt_expand_state.cs_name_level -= 1; + tex_flush_token_list_head_tail(h, p, n); + return cur_cs; +} + +inline static void tex_aux_manufacture_csname(void) +{ + halfword cs = tex_aux_get_cs_name(); + if (eq_type(cs) == undefined_cs_cmd) { + /*tex The |save_stack| might change! */ + tex_eq_define(cs, relax_cmd, relax_code); + } + /*tex The control sequence will now match |\relax| */ + tex_back_input(cs + cs_token_flag); +} + +inline static void tex_aux_manufacture_csname_use(void) +{ + if (tex_is_valid_csname()) { + tex_back_input(cur_cs + cs_token_flag); + } else { + lmt_scanner_state.last_cs_name = deep_frozen_relax_token; + } +} + +inline static void tex_aux_manufacture_csname_future(void) +{ + halfword t = tex_get_token(); + if (tex_is_valid_csname()) { + tex_back_input(cur_cs + cs_token_flag); + } else { + lmt_scanner_state.last_cs_name = deep_frozen_relax_token; + tex_back_input(t); + } +} + +halfword tex_create_csname(void) +{ + halfword cs = tex_aux_get_cs_name(); + if (eq_type(cs) == undefined_cs_cmd) { + tex_eq_define(cs, relax_cmd, relax_code); + } + return cs; // cs + cs_token_flag; +} + +inline static void tex_aux_inject_last_tested_cs(void) +{ + if (lmt_scanner_state.last_cs_name != null_cs) { + tex_back_input(lmt_scanner_state.last_cs_name + cs_token_flag); + } +} + +/*tex + + Sometimes the expansion looks too far ahead, so we want to insert a harmless |\relax| into the + user's input. +*/ + +void tex_insert_relax_and_cur_cs(void) +{ + tex_back_input(cs_token_flag + cur_cs); + tex_reinsert_token(deep_frozen_relax_token); + lmt_input_state.cur_input.token_type = inserted_text; +} + +/*tex + + Here is a recursive procedure that is \TEX's usual way to get the next token of input. It has + been slightly optimized to take account of common cases. + +*/ + +halfword tex_get_x_token(void) +{ + /*tex This code sets |cur_cmd|, |cur_chr|, |cur_tok|, and expands macros. */ + while (1) { + tex_get_next(); + if (cur_cmd <= max_command_cmd) { + break; + } else if (cur_cmd < first_call_cmd) { + tex_expand_current_token(); + } else if (cur_cmd <= last_call_cmd) { + tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); + } else { + cur_cs = deep_frozen_cs_end_template_2_code; + cur_cmd = end_template_cmd; + /*tex Now |cur_chr = token_state.null_list|. */ + break; + } + } + if (cur_cs) { + cur_tok = cs_token_flag + cur_cs; + } else { + cur_tok = token_val(cur_cmd, cur_chr); + } + return cur_tok; +} + +/*tex + + The |get_x_token| procedure is equivalent to two consecutive procedure calls: |get_next; x_token|. + It's |get_x_token| without the initial |get_next|. + +*/ + +void tex_x_token(void) +{ + while (cur_cmd > max_command_cmd) { + tex_expand_current_token(); + tex_get_next(); + } + if (cur_cs) { + cur_tok = cs_token_flag + cur_cs; + } else { + cur_tok = token_val(cur_cmd, cur_chr); + } +} + +/*tex + + A control sequence that has been |\def|'ed by the user is expanded by \TEX's |macro_call| + procedure. Here we also need to deal with marks, but these are discussed elsewhere. + + So let's consider |macro_call| itself, which is invoked when \TEX\ is scanning a control + sequence whose |cur_cmd| is either |call|, |long_call|, |outer_call|, or |long_outer_call|. The + control sequence definition appears in the token list whose reference count is in location + |cur_chr| of |mem|. + + The global variable |long_state| will be set to |call| or to |long_call|, depending on whether + or not the control sequence disallows |\par| in its parameters. The |get_next| routine will set + |long_state| to |outer_call| and emit |\par|, if a file ends or if an |\outer| control sequence + occurs in the midst of an argument. + + The parameters, if any, must be scanned before the macro is expanded. Parameters are token + lists without reference counts. They are placed on an auxiliary stack called |pstack| while + they are being scanned, since the |param_stack| may be losing entries during the matching + process. (Note that |param_stack| can't be gaining entries, since |macro_call| is the only + routine that puts anything onto |param_stack|, and it is not recursive.) + + After parameter scanning is complete, the parameters are moved to the |param_stack|. Then the + macro body is fed to the scanner; in other words, |macro_call| places the defined text of the + control sequence at the top of \TEX's input stack, so that |get_next| will proceed to read it + next. + + The global variable |cur_cs| contains the |eqtb| address of the control sequence being expanded, + when |macro_call| begins. If this control sequence has not been declared |\long|, i.e., if its + command code in the |eq_type| field is not |long_call| or |long_outer_call|, its parameters are + not allowed to contain the control sequence |\par|. If an illegal |\par| appears, the macro call + is aborted, and the |\par| will be rescanned. + + Beware: we cannot use |cur_cmd| here because for instance |\bgroup| can be part of an argument + without there being an |\egroup|. We really need to check raw brace tokens (|{}|) here when we + pick up an argument! + + */ + +/*tex + + In \LUAMETATEX| we have an extended argument definition system. The approach is still the same + and the additional code kind of fits in. There is a bit more testing going on but the overhead + is kept at a minimum so performance is not hit. Macro packages like \CONTEXT\ spend a lot of + time expanding and the extra overhead of the extensions is compensated by some gain in using + them. However, the most important motive is in readability of macro code on the one hand and + the wish for less tracing (due to all this multi-step processing) on the other. It suits me + well. This is definitely a case of |goto| abuse. + +*/ + +static halfword tex_aux_prune_list(halfword h) +{ + halfword t = h; + halfword p = null; + int done = 0; + int last = null; + while (t) { + halfword l = token_link(t); + halfword i = token_info(t); + halfword c = token_cmd(i); + if (c != spacer_cmd && c != end_paragraph_cmd && i != lmt_token_state.par_token) { // c != 0xFF + done = 1; + last = null; + } else if (done) { + if (! last) { + last = p; /* before space */ + } + } else { + h = l; + tex_put_available_token(t); + } + p = t; + t = l; + } + if (last) { + halfword l = token_link(last); + token_link(last) = null; + tex_flush_token_list(l); + } + return h; +} + +int tex_get_parameter_count(void) +{ + int n = 0; + for (int i = lmt_input_state.cur_input.parameter_start; i < lmt_input_state.parameter_stack_data.ptr; i++) { + if (lmt_input_state.parameter_stack[i]) { + ++n; + } else { + break; + } + } + return n; +} + +static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr) +{ + int tracing = tracing_macros_par > 0; + if (tracing) { + /*tex + Setting |\tracingmacros| to 2 means that elsewhere marks etc are shown so in fact a bit + more detail. However, as we turn that on anyway, using a value of 3 is not that weird + for less info here. Introducing an extra parameter makes no sense. + */ + tex_begin_diagnostic(); + tex_print_cs_checked(cs); + if (is_untraced(eq_flag(cs))) { + tracing = 0; + } else { + if (! get_token_parameters(chr)) { + tex_print_str("->"); + } else { + /* maybe move the preamble scanner to here */ + } + tex_token_show(chr, default_token_show_max); + } + tex_end_diagnostic(); + } + if (get_token_parameters(chr)) { + halfword matchpointer = token_link(chr); + halfword matchtoken = token_info(matchpointer); + int save_scanner_status = lmt_input_state.scanner_status; + halfword save_warning_index = lmt_input_state.warning_index; + int nofscanned = 0; + int nofarguments = 0; + halfword pstack[9]; /* We could go for 15 if we accept |#A-#F|. */ + /*tex + Scan the parameters and make |link(r)| point to the macro body; but |return| if an + illegal |\par| is detected. + + At this point, the reader will find it advisable to review the explanation of token + list format that was presented earlier, since many aspects of that format are of + importance chiefly in the |macro_call| routine. + + The token list might begin with a string of compulsory tokens before the first + |match| or |end_match|. In that case the macro name is supposed to be followed by + those tokens; the following program will set |s=null| to represent this restriction. + Otherwise |s| will be set to the first token of a string that will delimit the next + parameter. + */ + int tolerant = is_tolerant_cmd(cmd); + /*tex the number of tokens or groups (usually) */ + halfword count = 0; + /*tex one step before the last |right_brace| token */ + halfword rightbrace = null; + /*tex the state, currently the character used in parameter */ + int match = 0; + int thrash = 0; + int quitting = 0; + int last = 0; + /*tex current node in parameter token list being built */ + halfword p = null; + /*tex backup pointer for parameter matching */ + halfword s = null; + int spacer = 0; + /*tex + One day I will check the next code for too many tests, no that much branching that it. + The numbers in |#n| are match tokens except the last one, which is has a different + token info. + */ + lmt_input_state.warning_index = cs; + lmt_input_state.scanner_status = tolerant ? scanner_is_tolerant : scanner_is_matching; + /* */ + do { + /*tex + So, can we use a local head here? After all, there is no expansion going on here, + so no need to access |temp_token_head|. On the other hand, it's also used as a + signal, so not now. + */ + RESTART: + set_token_link(lmt_expand_state.match_token_head, null); + AGAIN: + spacer = 0; + LATER: + if (matchtoken < match_token || matchtoken >= end_match_token) { + s = null; + } else { + switch (matchtoken) { + case spacer_match_token: + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + do { + tex_get_token(); + } while (cur_cmd == spacer_cmd); + last = 1; + goto AGAIN; + case mandate_match_token: + match = match_mandate; + goto MANDATE; + case mandate_keep_match_token: + match = match_bracekeeper; + MANDATE: + if (last) { + last = 0; + } else { + tex_get_token(); + last = 1; + } + if (cur_tok < left_brace_limit) { + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + s = matchpointer; + p = lmt_expand_state.match_token_head; + count = 0; + last = 0; + goto GROUPED; + } else { + if (tolerant) { + last = 0; + nofarguments = nofscanned; + tex_back_input(cur_tok); + goto QUITTING; + } else { + last = 0; + tex_back_input(cur_tok); + } + s = null; + goto BAD; + } + break; + case thrash_match_token: + match = 0; + thrash = 1; + break; + case leading_match_token: + match = match_spacekeeper; + break; + case prune_match_token: + match = match_pruner; + break; + case continue_match_token: + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + goto AGAIN; + case quit_match_token: + match = match_quitter; + if (tolerant) { + last = 0; + nofarguments = nofscanned; + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + goto QUITTING; + } else { + break; + } + case par_spacer_match_token: + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + do { + /* discard as we go */ + tex_get_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == end_paragraph_cmd); + last = 1; + goto AGAIN; + case keep_spacer_match_token: + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + do { + tex_get_token(); + if (cur_cmd == spacer_cmd) { + spacer = 1; + } else { + break; + } + } while (1); + last = 1; + goto LATER; + case par_command_match_token: + /* this discards till the next par token */ + do { + tex_get_token(); + } while (cur_cmd != end_paragraph_cmd); + goto DELIMITER; + default: + match = matchtoken - match_token; + break; + } + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + s = matchpointer; + p = lmt_expand_state.match_token_head; + count = 0; + } + /*tex + Scan a parameter until its delimiter string has been found; or, if |s = null|, + simply scan the delimiter string. If |info(r)| is a |match| or |end_match| + command, it cannot be equal to any token found by |get_token|. Therefore an + undelimited parameter --- i.e., a |match| that is immediately followed by + |match| or |end_match| --- will always fail the test |cur_tok=info(r)| in the + following algorithm. + */ + CONTINUE: + /*tex Set |cur_tok| to the next token of input. */ + if (last) { + last = 0; + } else { + tex_get_token(); + } + /* is token_cmd reliable here? */ + if (! count && token_cmd(matchtoken) == ignore_cmd) { + if (cur_cmd < ignore_cmd || cur_cmd > other_char_cmd || cur_chr != token_chr(matchtoken)) { + /*tex We could optimize this but it doesn't pay off now. */ + tex_back_input(cur_tok); + } + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + if (s) { + s = matchpointer; + } + goto AGAIN; + } + if (cur_tok == matchtoken) { + /*tex + When we end up here we have a match on a delimiter. Advance |r|; |goto found| + if the parameter delimiter has been fully matched, otherwise |goto continue|. + A slightly subtle point arises here: When the parameter delimiter ends with + |#|, the token list will have a left brace both before and after the + |end_match|. Only one of these should affect the |align_state|, but both will + be scanned, so we must make a correction. + */ + DELIMITER: + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + if (matchtoken >= match_token && matchtoken <= end_match_token) { + if (cur_tok < left_brace_limit) { + --lmt_input_state.align_state; + } + goto FOUND; + } else { + goto CONTINUE; + } + } else if (cur_cmd == ignore_something_cmd && cur_chr == ignore_argument_code) { + quitting = count ? 1 : count ? 2 : 3; + goto FOUND; + } + /*tex + Contribute the recently matched tokens to the current parameter, and |goto continue| + if a partial match is still in effect; but abort if |s = null|. + + When the following code becomes active, we have matched tokens from |s| to the + predecessor of |r|, and we have found that |cur_tok <> info(r)|. An interesting + situation now presents itself: If the parameter is to be delimited by a string such + as |ab|, and if we have scanned |aa|, we want to contribute one |a| to the current + parameter and resume looking for a |b|. The program must account for such partial + matches and for others that can be quite complex. But most of the time we have + |s = r| and nothing needs to be done. + + Incidentally, it is possible for |\par| tokens to sneak in to certain parameters of + non-|\long| macros. For example, consider a case like |\def\a#1\par!{...}| where + the first |\par| is not followed by an exclamation point. In such situations it + does not seem appropriate to prohibit the |\par|, so \TEX\ keeps quiet about this + bending of the rules. + */ + if (s != matchpointer) { + BAD: + if (tolerant) { + quitting = nofscanned ? 1 : count ? 2 : 3; + tex_back_input(cur_tok); + // last = 0; + goto FOUND; + } else if (s) { + /*tex cycle pointer for backup recovery */ + halfword t = s; + do { + halfword u, v; + if (match) { + p = tex_store_new_token(p, token_info(t)); + } + ++count; + u = token_link(t); + v = s; + while (1) { + if (u == matchpointer) { + if (cur_tok != token_info(v)) { + break; + } else { + matchpointer = token_link(v); + matchtoken = token_info(matchpointer); + goto CONTINUE; + } + } + if (token_info(u) != token_info(v)) { + break; + } else { + u = token_link(u); + v = token_link(v); + } + } + t = token_link(t); + } while (t != matchpointer); + matchpointer = s; + matchtoken = token_info(matchpointer); + /*tex At this point, no tokens are recently matched. */ + } else { + tex_handle_error( + normal_error_type, + "Use of %S doesn't match its definition", + lmt_input_state.warning_index, + "If you say, e.g., '\\def\\a1{...}', then you must always put '1' after '\\a',\n" + "since control sequence names are made up of letters only. The macro here has not\n" + "been followed by the required stuff, so I'm ignoring it." + ); + goto EXIT; + } + } + GROUPED: + if (cur_tok < left_brace_limit) { + /*tex Contribute an entire group to the current parameter. */ + int unbalance = 0; + while (1) { + if (match) { + p = tex_store_new_token(p, cur_tok); + } + if (last) { + last = 0; + } else { + tex_get_token(); + } + if (cur_tok < right_brace_limit) { + if (cur_tok < left_brace_limit) { + ++unbalance; + } else if (unbalance) { + --unbalance; + } else { + break; + } + } + } + rightbrace = p; + if (match) { + p = tex_store_new_token(p, cur_tok); + } + } else if (cur_tok < right_brace_limit) { + /*tex Report an extra right brace and |goto continue|. */ + tex_back_input(cur_tok); + /* moved up: */ + ++lmt_input_state.align_state; + tex_insert_paragraph_token(); + /* till here */ + tex_handle_error( + insert_error_type, + "Argument of %S has an extra }", + lmt_input_state.warning_index, + "I've run across a '}' that doesn't seem to match anything. For example,\n" + "'\\def\\a#1{...}' and '\\a}' would produce this error. The '\\par' that I've just\n" + "inserted will cause me to report a runaway argument that might be the root of the\n" + "problem." ); + goto CONTINUE; + /*tex A white lie; the |\par| won't always trigger a runaway. */ + } else { + /*tex + Store the current token, but |goto continue| if it is a blank space that would + become an undelimited parameter. + */ + if (cur_tok == space_token && matchtoken <= end_match_token && matchtoken >= match_token && matchtoken != leading_match_token) { + goto CONTINUE; + } + if (match) { + p = tex_store_new_token(p, cur_tok); + } + } + ++count; + if (matchtoken > end_match_token || matchtoken < match_token) { + goto CONTINUE; + } + FOUND: + if (s) { + /* + Tidy up the parameter just scanned, and tuck it away. If the parameter consists + of a single group enclosed in braces, we must strip off the enclosing braces. + That's why |rightbrace| was introduced. Actually, in most cases |m == 1|. + */ + if (! thrash) { + if (token_info(p) < right_brace_limit && count == 1 && p != lmt_expand_state.match_token_head && match != match_bracekeeper) { + set_token_link(rightbrace, null); + tex_put_available_token(p); + p = token_link(lmt_expand_state.match_token_head); + pstack[nofscanned] = token_link(p); + tex_put_available_token(p); + } else { + pstack[nofscanned] = token_link(lmt_expand_state.match_token_head); + } + if (match == match_pruner) { + pstack[nofscanned] = tex_aux_prune_list(pstack[nofscanned]); + } + ++nofscanned; + if (tracing) { + tex_begin_diagnostic(); + tex_print_format("%c%i<-", match_visualizer, nofscanned); + tex_show_token_list(pstack[nofscanned - 1], null, default_token_show_max, 0); + tex_end_diagnostic(); + } + } else { + thrash = 0; + } + } + /*tex + Now |info(r)| is a token whose command code is either |match| or |end_match|. + */ + if (quitting) { + nofarguments = quitting == 3 ? 0 : quitting == 2 && count == 0 ? 0 : nofscanned; + QUITTING: + if (spacer) { + tex_back_input(space_token); /* experiment */ + } + while (1) { + switch (matchtoken) { + case end_match_token: + goto QUITDONE; + case spacer_match_token: + case thrash_match_token: + case par_spacer_match_token: + case keep_spacer_match_token: + goto NEXTMATCH; + case mandate_match_token: + case leading_match_token: + pstack[nofscanned] = null; + break; + case mandate_keep_match_token: + p = tex_store_new_token(null, left_brace_token); + pstack[nofscanned] = p; + p = tex_store_new_token(p, right_brace_token); + break; + case continue_match_token: + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + quitting = 0; + goto RESTART; + case quit_match_token: + if (quitting) { + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + quitting = 0; + goto RESTART; + } else { + goto NEXTMATCH; + } + default: + if (matchtoken >= match_token && matchtoken < end_match_token) { + pstack[nofscanned] = null; + break; + } else { + goto NEXTMATCH; + } + } + nofscanned++; + if (tracing) { + tex_begin_diagnostic(); + tex_print_format("%c%i--", match_visualizer, nofscanned); + tex_end_diagnostic(); + } + NEXTMATCH: + matchpointer = token_link(matchpointer); + matchtoken = token_info(matchpointer); + } + } + } while (matchtoken != end_match_token); + nofarguments = nofscanned; + QUITDONE: + matchpointer = token_link(matchpointer); + /*tex + Feed the macro body and its parameters to the scanner Before we put a new token list on the + input stack, it is wise to clean off all token lists that have recently been depleted. Then + a user macro that ends with a call to itself will not require unbounded stack space. + */ + tex_cleanup_input_state(); + /*tex + We don't really start a list, it's more housekeeping. The starting point is the body and + the later set |loc| reflects that. + */ + tex_begin_macro_list(chr); + /*tex + Beware: here the |name| is used for symbolic locations but also for macro indices but these + are way above the symbolic |token_types| that we use. Better would be to have a dedicated + variable but let's not open up a can of worms now. We can't use |warning_index| combined + with a symbolic name either. We're at |end_match_token| now so we need to advance. + */ + lmt_input_state.cur_input.name = cs; + lmt_input_state.cur_input.loc = matchpointer; + /*tex + This comes last, after the cleanup and the start of the macro list. + */ + if (nofscanned) { + tex_copy_pstack_to_param_stack(&pstack[0], nofscanned); + } + EXIT: + lmt_expand_state.arguments = nofarguments; + lmt_input_state.scanner_status = save_scanner_status; + lmt_input_state.warning_index = save_warning_index; + } else { + tex_cleanup_input_state(); + if (token_link(chr)) { + tex_begin_macro_list(chr); + lmt_expand_state.arguments = 0; + lmt_input_state.cur_input.name = lmt_input_state.warning_index; + lmt_input_state.cur_input.loc = token_link(chr); + } else { + /* We ignore empty bodies but it doesn't gain us that much. */ + } + } +} diff --git a/source/luametatex/source/tex/texexpand.h b/source/luametatex/source/tex/texexpand.h new file mode 100644 index 000000000..1db266b20 --- /dev/null +++ b/source/luametatex/source/tex/texexpand.h @@ -0,0 +1,35 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_EXPAND_H +# define LMT_EXPAND_H + +typedef struct expand_state_info { + limits_data limits; + int depth; + int cs_name_level; + int arguments; + halfword match_token_head; + int padding; +} expand_state_info ; + +extern expand_state_info lmt_expand_state ; + +/* we can also have a get_x_token_ignore_spaces */ + +extern void tex_initialize_expansion (void); +extern void tex_cleanup_expansion (void); + +extern halfword tex_expand_match_token_head (void); +extern void tex_expand_current_token (void); +extern halfword tex_get_x_token (void); /* very texie names */ +extern void tex_x_token (void); /* very texie names */ +extern void tex_insert_relax_and_cur_cs (void); + +extern halfword tex_create_csname (void); +extern int tex_is_valid_csname (void); + +extern int tex_get_parameter_count (void); + +# endif diff --git a/source/luametatex/source/tex/texfileio.c b/source/luametatex/source/tex/texfileio.c new file mode 100644 index 000000000..4f712401b --- /dev/null +++ b/source/luametatex/source/tex/texfileio.c @@ -0,0 +1,939 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +fileio_state_info lmt_fileio_state = { + .io_buffer = NULL, + .io_buffer_data = { + .minimum = min_buffer_size, + .maximum = max_buffer_size, + .size = siz_buffer_size, + .step = stp_buffer_size, + .allocated = 0, + .itemsize = sizeof(unsigned char), + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .io_first = 0, + .io_last = 0, + .name_in_progress = 0, + .log_opened = 0, + .job_name = NULL, + .log_name = NULL, + .fmt_name = NULL +}; + +/*tex + + Once \TEX\ is working, you should be able to diagnose most errors with the |\show| commands and + other diagnostic features. Because we have made some internal changes the optional debug interface + has been removed. + +*/ + +# define reserved_io_buffer_slots 256 + +void tex_initialize_fileio_state(void) +{ + int size = lmt_fileio_state.io_buffer_data.minimum; + lmt_fileio_state.io_buffer = aux_allocate_clear_array(sizeof(unsigned char), size, reserved_io_buffer_slots); + if (lmt_fileio_state.io_buffer) { + lmt_fileio_state.io_buffer_data.allocated = size; + } else { + tex_overflow_error("buffer", size); + } +} + +int tex_room_in_buffer(int top) +{ + /*tex Beware: |top| can exceed the old size plus the step. */ + if (top > lmt_fileio_state.io_buffer_data.top) { + lmt_fileio_state.io_buffer_data.top = top; + if (top > lmt_fileio_state.io_buffer_data.allocated) { + unsigned char *tmp = NULL; + if (top <= lmt_fileio_state.io_buffer_data.size) { + if (lmt_fileio_state.io_buffer_data.allocated + lmt_fileio_state.io_buffer_data.step > top) { + top = lmt_fileio_state.io_buffer_data.allocated + lmt_fileio_state.io_buffer_data.step; + if (top > lmt_fileio_state.io_buffer_data.size) { + top = lmt_fileio_state.io_buffer_data.size; + } + } + if (top > lmt_fileio_state.io_buffer_data.allocated) { + lmt_fileio_state.io_buffer_data.allocated = top; + tmp = aux_reallocate_array(lmt_fileio_state.io_buffer, sizeof(unsigned char), top, reserved_io_buffer_slots); + lmt_fileio_state.io_buffer = tmp; + } + } + lmt_run_memory_callback("buffer", tmp ? 1 : 0); + if (! tmp) { + tex_overflow_error("buffer", top); + return 0; + } + } + } + return 1; +} + +static int tex_aux_open_outfile(FILE **f, const char *name, const char *mode) +{ + FILE *res = aux_utf8_fopen(name, mode); + if (res) { + *f = res; + return 1; + } + return 0; +} + +/*tex + + We conform to the way \WEBC\ does handle trailing tabs and spaces. This decade old behaviour + was changed in September 2017 and can introduce compatibility issues in existing workflows. + Because we don't want too many differences with upstream \TEX live we just follow up on that + patch and it's up to macro packages to deal with possible issues (which can be done via the + usual callbacks. One can wonder why we then still prune spaces but we leave that to the reader. + + Patched original comment: + + Make last be one past the last non-space character in \quote {buffer}, ignoring line + terminators (but not, e.g., tabs). This is because we are supposed to treat this like a line of + TeX input. Although there are pathological cases (|SP CR SC CR|) where this differs from + input_line below, and from previous behavior of removing all whitespace, the simplicity of + removing all trailing line terminators seems more in keeping with actual command line + processing. + + The |IS_SPC_OR_EOL| macro deals with space characters (|SPACE 32|) and newlines (|CR| and |LF|) + and no longer looks at tabs (|TAB 9|). + +*/ + +/* + The terminal input code is gone as is the read related code (that had already been nicely + cleaned up and abstracted but that is the price we pay for stepwise progress. That code is + still in the git repository of course. + + At some point I might do the same as we do in mplib: four callbacks for open, close, read + and write (in which case the log goes via write). Part of the management is them moved to + \LUA\ and we save a lookup. + + When I adapted the code in this module and the one dealing with errors, I decided to delegate + all interaction to \LUA, also because the sometimes tight integration in the scanning and + expansion mechanisms. In the 2021 TeX tuneup there have been some patches in the interaction + code and some remarks ring a bell: especially the relation between offering feedback and + waiting for input. However, because we delegate to \LUA, the engine is no longer responsible + for what the macro package lets the user do in case of an error. For instance, in \CONTEXT\ we + just abort the run: it makes no sense to carry on the wrong way. Computers are fast enough for + a \quotation {Fix and run again.} approach. But we do offer the message and optional help as + cue. On the agenda is a further abstraction of error handling. This deviation is fine as we + obey Don's wish to not call it \TEX\ but instead add some more letters to the name. + +*/ + +int tex_lua_a_open_in(const char *fn) +{ + int callback_id = lmt_callback_defined(open_data_file_callback); + if (callback_id > 0) { + int k = lmt_run_and_save_callback(lmt_lua_state.lua_instance, callback_id, "S->", fn); + lmt_input_state.in_stack[lmt_input_state.cur_input.index].input_file_callback_id = k; + return k > 0; + } else { + tex_emergency_message("startup error", "missing open_data_file callback"); + tex_emergency_exit(); + return 0; + } +} + +void tex_lua_a_close_in() +{ + int k = lmt_input_state.in_stack[lmt_input_state.cur_input.index].input_file_callback_id; + if (k > 0) { + lmt_run_saved_callback_close(lmt_lua_state.lua_instance, k); + lmt_destroy_saved_callback(lmt_lua_state.lua_instance, k); + lmt_input_state.in_stack[lmt_input_state.cur_input.index].input_file_callback_id = 0; + } +} + +/*tex + + Binary input and output are done with \CCODE's ordinary procedures, so we don't have to make + any other special arrangements for binary \IO. Text output is also easy to do with standard + routines. The treatment of text input is more difficult, however, because of the necessary + translation to |unsigned char| values. \TEX's conventions should be efficient, and they should + blend nicely with the user's operating environment. + + Input from text files is read one line at a time, using a routine called |lua_input_ln|. This + function is defined in terms of global variables called |buffer|, |first|, and |last| that will + be described in detail later; for now, it suffices for us to know that |buffer| is an array of + |unsigned char| values, and that |first| and |last| are indices into this array representing + the beginning and ending of a line of text. + + The lines of characters being read: |buffer|, the first unused position in |first|, the end of + the line just input |last|, the largest index used in |buffer|: |max_buf_stack|. + + The |lua_input_ln| function brings the next line of input from the specified file into available + positions of the buffer array and returns the value |true|, unless the file has already been + entirely read, in which case it returns |false| and sets |last:=first|. In general, the + |unsigned char| numbers that represent the next line of the file are input into |buffer[first]|, + |buffer[first + 1]|, \dots, |buffer[last - 1]|; and the global variable |last| is set equal to + |first| plus the length of the line. Trailing blanks are removed from the line; thus, either + |last = first| (in which case the line was entirely blank) or |buffer[last - 1] <> " "|. + + An overflow error is given, however, if the normal actions of |lua_input_ln| would make |last + >= buf_size|; this is done so that other parts of \TEX\ can safely look at the contents of + |buffer[last+1]| without overstepping the bounds of the |buffer| array. Upon entry to + |lua_input_ln|, the condition |first < buf_size| will always hold, so that there is always room + for an \quote {empty} line. + + The variable |max_buf_stack|, which is used to keep track of how large the |buf_size| parameter + must be to accommodate the present job, is also kept up to date by |lua_input_ln|. + + If the |bypass_eoln| parameter is |true|, |lua_input_ln| will do a |get| before looking at the + first character of the line; this skips over an |eoln| that was in |f^|. The procedure does not + do a |get| when it reaches the end of the line; therefore it can be used to acquire input from + the user's terminal as well as from ordinary text files. + + Since the inner loop of |lua_input_ln| is part of \TEX's \quote {inner loop} --- each character + of input comes in at this place --- it is wise to reduce system overhead by making use of + special routines that read in an entire array of characters at once, if such routines are + available. + +*/ + +int tex_lua_input_ln(void) /*tex |bypass_eoln| was not used */ +{ + int callback_id = lmt_input_state.in_stack[lmt_input_state.cur_input.index].input_file_callback_id; + if (callback_id > 0) { + lua_State *L = lmt_lua_state.lua_instance; + int last_ptr = 0; + lmt_fileio_state.io_last = lmt_fileio_state.io_first; + last_ptr = lmt_run_saved_callback_line(L, callback_id, lmt_fileio_state.io_first); + if (last_ptr < 0) { + return 0; + } else if (last_ptr > 0) { + lmt_fileio_state.io_last = last_ptr; + if (last_ptr > lmt_fileio_state.io_buffer_data.top) { + lmt_fileio_state.io_buffer_data.top = last_ptr; + } + } + return 1; + } else { + return 0; + } +} + +/*tex + + We need a special routine to read the first line of \TEX\ input from the user's terminal. + This line is different because it is read before we have opened the transcript file; there is + sort of a \quote {chicken and egg} problem here. If the user types |\input paper| on the first + line, or if some macro invoked by that line does such an |\input|, the transcript file will be + named |paper.log|; but if no |\input| commands are performed during the first line of terminal + input, the transcript file will acquire its default name |texput.log|. (The transcript file + will not contain error messages generated by the first line before the first |\input| command.) + + The first line is special also because it may be read before \TEX\ has input a format file. In + such cases, normal error messages cannot yet be given. The following code uses concepts that + will be explained later. + + Different systems have different ways to get started. But regardless of what conventions are + adopted, the routine that initializes the terminal should satisfy the following specifications: + + \startitemize[n] + + \startitem + It should open file |term_in| for input from the terminal. + \stopitem + + \startitem + If the user has given a command line, this line should be considered the first line of + terminal input. Otherwise the user should be prompted with |**|, and the first line of + input should be whatever is typed in response. + \stopitem + + \startitem + The first line of input, which might or might not be a command line, should appear in + locations |first| to |last-1| of the |buffer| array. + \stopitem + + \startitem + The global variable |loc| should be set so that the character to be read next by \TEX\ + is in |buffer[loc]|. This character should not be blank, and we should have |loc < last|. + \stopitem + + \stopitemize + + It may be necessary to prompt the user several times before a non-blank line comes in. The + prompt is |**| instead of the later |*| because the meaning is slightly different: |\input| + need not be typed immediately after |**|.) + + The following code does the required initialization. If anything has been specified on the + command line, then |t_open_in| will return with |last > first|. + + This code has been adapted and we no longer ask for a name. It makes no sense because one needs + to initialize the primitives and backend anyway and no one is going to do that interactively. + Of course one can implement a session in \LUA. We keep the \TEX\ trick to push the name into + the input buffer and then exercise an |\input| which ensures proper housekeeping. There is a + bit overkill in the next function but for now we keep it (as reference). + + For a while copying the argument to th ebuffer lived in the engine lib but it made no sense + to duplicate code, so now it's here. Anyway, the following does no longer apply: + + \startquotation + This is supposed to open the terminal for input, but what we really do is copy command line + arguments into \TEX's buffer, so it can handle them. If nothing is available, or we've been + called already (and hence, |argc == 0|), we return with |last = first|. + \stopquotation + + In \LUAMETATEX\ we don't really have a terminal. In the \LUATEX\ precursor we used to append + all the remaining arguments but now we just take the first one. If one wants filenames with + spaces \unknown\ use quotes. Keep in mind that original \TEX\ permits this: + + \starttyping + tex ... filename \\hbox{!} \\end + \stoptyping + + But we don't follow that route in the situation where \LUA\ is mostly in charge of passing + input from files and the console. + + In the end I went for an easier solution: just pass the name to the file reader. But we keep + this as nostalgic reference to how \TEX\ originally kin dof did these things. + + \starttyping + int input_file_name_pushed(void) + { + const char *ptr = engine_input_filename(); + if (ptr) { + int len = strlen(ptr); + fileio_state.io_buffer[fileio_state.io_first] = 0; + if (len > 0 && room_in_buffer(len + 1)) { + // We cannot use strcat, because we have multibyte UTF-8 input. Hm, why not. + fileio_state.io_last= fileio_state.io_first; + while (*ptr) { + fileio_state.io_buffer[fileio_state.io_last++] = (unsigned char) * (ptr++); + } + // Backtrack over spaces and newlines. + for ( + --fileio_state.io_last; + fileio_state.io_last >= fileio_state.io_first && IS_SPC_OR_EOL(fileio_state.io_buffer[fileio_state.io_last]); + --fileio_state.io_last + ); + // Terminate the string. + fileio_state.io_buffer[++fileio_state.io_last] = 0; + // One more time, this time converting to \TEX's internal character representation. + if (fileio_state.io_last > fileio_state.io_first) { + input_state.cur_input.loc = fileio_state.io_first; + while ((input_state.cur_input.loc < fileio_state.io_last) && (fileio_state.io_buffer[input_state.cur_input.loc] == ' ')) { + ++input_state.cur_input.loc; + } + if (input_state.cur_input.loc < fileio_state.io_last) { + input_state.cur_input.limit = fileio_state.io_last; + fileio_state.io_first = fileio_state.io_last + 1; + } + if (input_state.cur_input.loc < input_state.cur_input.limit) { + return 1; + } + } + } + } + fileio_state.io_first = 1; + fileio_state.io_last = 1; + return 0; + } + \stopttyping + + It's this kind of magic that can take lots of time to play with and figure out, also because + we cannot break expectations too much. + +*/ + +/*tex + + Per June 22 2020 the terminal code is gone. See |texlegacy.c| for the old, already adapted + long ago, code. It was already shedulded for removal a while. We only keep the update. + +*/ + +void tex_terminal_update(void) /* renamed, else conflict in |lmplib|. */ +{ + fflush(stdout); +} + +/*tex + + It's time now to fret about file names. Besides the fact that different operating systems treat + files in different ways, we must cope with the fact that completely different naming conventions + are used by different groups of people. The following programs show what is required for one + particular operating system; similar routines for other systems are not difficult to devise. + + \TEX\ assumes that a file name has three parts: the name proper; its \quote {extension}; and a + \quote {file area} where it is found in an external file system. The extension of an input file + or a write file is assumed to be |.tex| unless otherwise specified; it is |transcript_extension| + on the transcript file that records each run of \TEX; it is |.tfm| on the font metric files that + describe characters in the fonts \TEX\ uses; it is |.dvi| on the output files that specify + typesetting information; and it is |format_extension| on the format files written by \INITEX\ + to initialize \TEX. The file area can be arbitrary on input files, but files are usually output + to the user's current area. + + Simple uses of \TEX\ refer only to file names that have no explicit extension or area. For + example, a person usually says |\input paper| or |\font \tenrm = helvetica| instead of |\input + {paper.new}| or |\font \tenrm = {test}|. Simple file names are best, because they make the \TEX\ + source files portable; whenever a file name consists entirely of letters and digits, it should be + treated in the same way by all implementations of \TEX. However, users need the ability to refer + to other files in their environment, especially when responding to error messages concerning + unopenable files; therefore we want to let them use the syntax that appears in their favorite + operating system. + + The following procedures don't allow spaces to be part of file names; but some users seem to like + names that are spaced-out. System-dependent changes to allow such things should probably be made + with reluctance, and only when an entire file name that includes spaces is \quote {quoted} somehow. + + Here are the global values that file names will be scanned into. + + \starttyping + strnumber cur_name; + strnumber cur_area; + strnumber cur_ext; + \stoptyping + + The file names we shall deal with have the following structure: If the name contains |/| or |:| + (for Amiga only), the file area consists of all characters up to and including the final such + character; otherwise the file area is null. If the remaining file name contains |.|, the file + extension consists of all such characters from the last |.| to the end, otherwise the file + extension is null. + + We can scan such file names easily by using two global variables that keep track of the + occurrences of area and extension delimiters: + + Input files that can't be found in the user's area may appear in a standard system area called + |TEX_area|. Font metric files whose areas are not given explicitly are assumed to appear in a + standard system area called |TEX_font_area|. These system area names will, of course, vary from + place to place. + + This whole model has been adapted a little but we do keep the |area|, |name|, |ext| distinction + for now although we don't use the string pool. + +*/ + +static char *tex_aux_pack_file_name(char *s, int l, const char *name, const char *ext) +{ + const char *fn = (char *) s; + if ((! fn) || (l <= 0)) { + fn = name; + } + if (! fn) { + return NULL; + } else if (! ext) { + return lmt_memory_strdup(fn); + } else { + int e = -1; + for (int i = 0; i < l; i++) { + if (IS_DIR_SEP(fn[i])) { + e = -1; + } else if (fn[i] == '.') { + e = i; + } + } + if (e >= 0) { + return lmt_memory_strdup(fn); + } else { + char *f = lmt_memory_malloc(strlen(fn) + strlen(ext) + 1); + if (f) { + sprintf(f, "%s%s", fn, ext); + } + return f; + } + } +} + +/*tex + + Here is a routine that manufactures the output file names, assuming that |job_name <> 0|. It + ignores and changes the current settings of |cur_area| and |cur_ext|; |s = transcript_extension|, + |".dvi"|, or |format_extension| + + The packer does split the basename every time but isn't called that often so we can use it in + the checker too. + +*/ + +static char *tex_aux_pack_job_name(const char *e, int keeppath, int keepsuffix) +{ + char *n = lmt_fileio_state.job_name; + int ln = (n) ? (int) strlen(n) : 0; + if (! ln) { + tex_fatal_error("bad jobname"); + return NULL; + } else { + int le = (e) ? (int) strlen(e) : 0; + int f = -1; /* first */ + int l = -1; /* last */ + char *fn = NULL; + int k = 0; + for (int i = 0; i < ln; i++) { + if (IS_DIR_SEP(n[i])) { + f = i; + l = -1; + } else if (n[i] == '.') { + l = i; + } + } + if (keeppath) { + f = 0; + } else if (f < 0) { + f = 0; + } else { + f += 1; + } + if (keepsuffix || l < 0) { + l = ln; + } + fn = (char*) lmt_memory_malloc((l - f) + le + 2); /* a bit too much */ + if (fn) { + for (int i = f; i < l; i++) { + fn[k++] = n[i]; + } + for (int i = 0; i < le; i++) { + fn[k++] = e[i]; + } + fn[k] = 0; + } + return fn; + } +} + +/*tex + + The following comment is obsolete but we keep it as reference because it tells some history. + + \startquotation + Because the format is zipped we read and write dump files through zlib. Earlier versions recast + |*f| from |FILE *| to |gzFile|, but there is no guarantee that these have the same size, so a + static variable is needed. + + We no longer do byte-swapping so formats are generated for the system and not shared. It + actually slowed down loading of the format on the majority of used platforms (intel). + + A \CONTEXT\ format is uncompressed some 16 MB but that used to be over 30MB due to more + (preallocated) memory usage. A compressed format is 11 MB so the saving is not that much. If + we were in lua I'd load the whole file in one go and use a fast decompression after which we + could access the bytes in memory. But it's not worth the trouble. + + Tests has shown that a level 3 compression is the most optimal tradeoff between file size and + load time. + + So, in principle we can undefine |FMT_COMPRESSION| below and experiment a bit with it. With + SSD's it makes no dent, but on a network it still might. + + Per end May 2019 the |FMT_COMPRESSION| branch is gone so that we can simplify the opener and + closer. + \stopquotation + +*/ + +void tex_check_fmt_name(void) +{ + if (lmt_engine_state.dump_name) { + char *tmp = lmt_fileio_state.job_name; + lmt_fileio_state.job_name = lmt_engine_state.dump_name; + lmt_fileio_state.fmt_name = tex_aux_pack_job_name(format_extension, 1, 0); + lmt_fileio_state.job_name = tmp; + } else if (lmt_main_state.run_state != initializing_state) { + /*tex For |dump_name| to be NULL is a bug. */ + tex_emergency_message("startup error", "no format file given, quitting"); + tex_emergency_exit(); + } +} + +void tex_check_job_name(char * fn) +{ + if (! lmt_fileio_state.job_name) { + if (lmt_engine_state.startup_jobname) { + lmt_fileio_state.job_name = lmt_engine_state.startup_jobname; /* not freed here */ + lmt_fileio_state.job_name = tex_aux_pack_job_name(NULL, 0, 0); + } else if (fn) { + lmt_fileio_state.job_name = fn; + lmt_fileio_state.job_name = tex_aux_pack_job_name(NULL, 0, 0); /* not freed here */ + } else { + tex_emergency_message("startup warning", "using fallback jobname 'texput', continuing"); + lmt_fileio_state.job_name = lmt_memory_strdup("texput"); + } + } + if (! lmt_fileio_state.log_name) { + lmt_fileio_state.log_name = tex_aux_pack_job_name(transcript_extension, 0, 1); + } + if (! lmt_fileio_state.fmt_name) { + lmt_fileio_state.fmt_name = tex_aux_pack_job_name(format_extension, 0, 1); + } +} + +/*tex + + A messier routine is also needed, since format file names must be scanned before \TEX's + string mechanism has been initialized. We shall use the global variable |TEX_format_default| + to supply the text for default system areas and extensions related to format files. + + Under \UNIX\ we don't give the area part, instead depending on the path searching that will + happen during file opening. Also, the length will be set in the main program. + + \starttyping + char *TEX_format_default; + \stoptyping + + This part of the program becomes active when a \quote {virgin} \TEX\ is trying to get going, + just after the preliminary initialization, or when the user is substituting another format file + by typing |&| after the initial |**| prompt. The buffer contains the first line of input in + |buffer[loc .. (last - 1)]|, where |loc < last| and |buffer[loc] <> " "|. + +*/ + +dumpstream tex_open_fmt_file(int writemode) +{ + dumpstream f = NULL; + if (! lmt_fileio_state.fmt_name) { + /* this can't happen */ + tex_emergency_message("startup error", "no format output file '%s' given, quitting", emergency_fmt_name); + tex_emergency_exit(); + } else if (writemode) { + f = aux_utf8_fopen(lmt_fileio_state.fmt_name, FOPEN_WBIN_MODE); + if (! f) { + tex_emergency_message("startup error", "invalid format output file '%s' given, quitting", lmt_fileio_state.fmt_name); + tex_emergency_exit(); + } + } else { + int callbackid = lmt_callback_defined(find_format_file_callback); + if (callbackid > 0) { + char *fnam = NULL; + int test = lmt_run_callback(lmt_lua_state.lua_instance, callbackid, "S->R", lmt_fileio_state.fmt_name, &fnam); + if (test && fnam && strlen(fnam) > 0) { + lmt_memory_free(lmt_fileio_state.fmt_name); + lmt_fileio_state.fmt_name = fnam; + } else { + lmt_memory_free(fnam); + } + f = aux_utf8_fopen(lmt_fileio_state.fmt_name, FOPEN_RBIN_MODE); + if (! f) { + tex_emergency_message("startup error", "invalid format input file '%s' given, quitting", emergency_fmt_name); + tex_emergency_exit(); + } + } else { + /*tex For the moment we make this mandate! */ + tex_emergency_message("startup error", "missing find_format_file callback"); + tex_emergency_exit(); + } + } + return f; +} + +void tex_close_fmt_file(dumpstream f) +{ + if (f) { + fclose(f); + } +} + +/*tex + + The variable |name_in_progress| is used to prevent recursive use of |scan_file_name|, since the + |begin_name| and other procedures communicate via global variables. Recursion would arise only + by devious tricks like |\input \input f|; such attempts at sabotage must be thwarted. + Furthermore, |name_in_progress| prevents |\input| from being initiated when a font size + specification is being scanned. + + Another variable, |job_name|, contains the file name that was first |\input| by the user. This + name is extended by |transcript_extension| and |.dvi| and |format_extension| in the names of + \TEX's output files. The fact if the transcript file been opened is registered in + |log_opened_global|. + + Initially |job_name = 0|; it becomes nonzero as soon as the true name is known. We have + |job_name = 0| if and only if the |log| file has not been opened, except of course for a short + time just after |job_name| has become nonzero. + + The full name of the log file is stored in |log_name|. The |open_log_file| routine is used to + open the transcript file and to help it catch up to what has previously been printed on the + terminal. + +*/ + +void tex_open_log_file(void) +{ + if (! lmt_fileio_state.log_opened) { + int callback_id = lmt_callback_defined(find_log_file_callback); + if (callback_id > 0) { + char *filename = NULL; + int okay = 0; + tex_check_job_name(NULL); + okay = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "S->R", lmt_fileio_state.log_name, &filename); + if (okay && filename && (strlen(filename) > 0)) { + lmt_memory_free(lmt_fileio_state.log_name); + lmt_fileio_state.log_name = filename; + } else { + lmt_memory_free(filename); + } + } else { + /*tex For the moment we make this mandate! */ + tex_emergency_message("startup error", "missing find_log_file callback"); + tex_emergency_exit(); + } + if (tex_aux_open_outfile(&lmt_print_state.logfile, lmt_fileio_state.log_name, FOPEN_W_MODE)) { + /*tex The previous |selector| setting is saved:*/ + int saved_selector = lmt_print_state.selector; + lmt_print_state.selector = logfile_selector_code; + lmt_fileio_state.log_opened = 1; + /*tex Again we resolve a callback id: */ + callback_id = lmt_callback_defined(start_run_callback); + /*tex There is no need to free |fn|! */ + if (callback_id == 0) { + tex_print_banner(); + /*tex Print the banner line, including current date and time. */ + tex_print_log_banner(); + /*tex Make sure bottom level is in memory. */ + lmt_input_state.input_stack[lmt_input_state.input_stack_data.ptr] = lmt_input_state.cur_input; + /*tex We don't have a first line so that code is gone. */ + tex_print_ln(); + } else if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->"); + } else { + tex_print_banner(); + } + /*tex should be done always */ + if (lmt_print_state.loggable_info) { + fprintf(lmt_print_state.logfile, "%s\n", lmt_print_state.loggable_info); + lmt_memory_free(lmt_print_state.loggable_info); + lmt_print_state.loggable_info = NULL; + } + switch (saved_selector) { + case no_print_selector_code : lmt_print_state.selector = logfile_selector_code; break; + case terminal_selector_code : lmt_print_state.selector = terminal_and_logfile_selector_code; break; + default : lmt_print_state.selector = saved_selector; break; + } + } else { + tex_emergency_message("startup error", "log file '%s' cannot be opened, quitting", emergency_log_name); + tex_emergency_exit(); + } + } +} + +void tex_close_log_file(void) +{ + fclose(lmt_print_state.logfile); + lmt_fileio_state.log_opened = 0; +} + +/*tex + + Let's turn now to the procedure that is used to initiate file reading when an |\input| command + is being processed. This function is used with |\\input| as well as in the start up. + +*/ + +void tex_start_input(char *fn) +{ + /*tex Set up |cur_file| and new level of input. */ + tex_begin_file_reading(); + if (! tex_lua_a_open_in(fn)) { + /*tex + Normally this is catched earler, as we have lookup callbacks but the first file, the + one passed on the command line can fall though this checking. + */ + tex_end_file_reading(); + tex_emergency_message("runtime error", "input file '%s' is not found, quitting", fn); + tex_emergency_exit(); + } + lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].full_source_filename = fn; + lmt_input_state.cur_input.name = io_file_input_code; + /*tex + |open_log_file| doesn't |show_context|, so |limit| and |loc| needn't be set to meaningful + values yet. + */ + tex_report_start_file((unsigned char *) fn); + ++lmt_input_state.open_files; + tex_terminal_update(); + lmt_input_state.cur_input.state = new_line_state; + /*tex + + Read the first line of the new file. Here we have to remember to tell the |lua_input_ln| + routine not to start with a |get|. If the file is empty, it is considered to contain a + single blank line. + + */ + lmt_input_state.input_line = 1; + tex_lua_input_ln(); + lmt_input_state.cur_input.limit = lmt_fileio_state.io_last; /*tex Was |firm_up_the_line();|. */ + if (end_line_char_inactive) { + --lmt_input_state.cur_input.limit; + } else { + lmt_fileio_state.io_buffer[lmt_input_state.cur_input.limit] = (unsigned char) end_line_char_par; + } + lmt_fileio_state.io_first = lmt_input_state.cur_input.limit + 1; + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.start; +} + +/*tex + + In order to isolate the system-dependent aspects of file names, the system-independent parts of + \TEX\ are expressed in terms of three system-dependent procedures called |begin_name|, + |more_name|, and |end_name|. In essence, if the user-specified characters of the file name are + |c_1|\unknown|c_n|, the system-independent driver program does the operations + + \starttyping + |begin_name|; + |more_name|(c_1); + ..... + |more_name|(c_n); + |end_name| + \stoptyping + + These three procedures communicate with each other via global variables. Afterwards the file + name will appear in the string pool as three strings called |cur_name|, |cur_area|, and + |cur_ext|; the latter two are null (i.e., |""|), unless they were explicitly specified by the + user. + + Actually the situation is slightly more complicated, because \TEX\ needs to know when the file + name ends. The |more_name| routine is a function (with side effects) that returns |true| on the + calls |more_name (c_1)|, \dots, |more_name (c_{n - 1})|. The final call |more_name(c_n)| returns + |false|; or, it returns |true| and the token following |c_n| is something like |\hbox| (i.e., + not a character). In other words, |more_name| is supposed to return |true| unless it is sure that + the file name has been completely scanned; and |end_name| is supposed to be able to finish the + assembly of |cur_name|, |cur_area|, and |cur_ext| regardless of whether |more_name (c_n)| + returned |true| or |false|. + + This code has been adapted and the string pool is no longer used. We also don't ask for another + name on the console. + +*/ + +/*tex + + And here's the second. The string pool might change as the file name is being scanned, since a + new |\csname| might be entered; therefore we keep |area_delimiter| and |ext_delimiter| relative + to the beginning of the current string, instead of assigning an absolute address like |pool_ptr| + to them. + + Now let's consider the \quote {driver} routines by which \TEX\ deals with file names in a + system-independent manner. First comes a procedure that looks for a file name in the input by + calling |get_x_token| for the information. + +*/ + +char *tex_read_file_name(int optionalequal, const char * name, const char* ext) +{ + char *fn = NULL; + int l = 0; + char *s = NULL; + halfword result; + if (optionalequal) { + tex_scan_optional_equals(); + } + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd); + if (cur_cmd == left_brace_cmd) { + result = tex_scan_toks_expand(1, NULL, 0); + } else { + int quote = 0; + halfword p = get_reference_token(); + result = p; + while (1) { + switch (cur_cmd) { + case escape_cmd: + case left_brace_cmd: + case right_brace_cmd: + case math_shift_cmd: + case alignment_tab_cmd: + case parameter_cmd: + case superscript_cmd: + case subscript_cmd: + case letter_cmd: + case other_char_cmd: + if (cur_chr == '"') { + if (quote) { + goto DONE; + } else { + quote = 1; + } + } else { + p = tex_store_new_token(p, cur_tok); + } + break; + case spacer_cmd: + case end_line_cmd: + if (quote) { + p = tex_store_new_token(p, token_val(spacer_cmd, ' ')); + } else { + goto DONE; + } + case ignore_cmd: + break; + default: + tex_back_input(cur_tok); + goto DONE; + } + tex_get_x_token(); + } + } + DONE: + s = tex_tokenlist_to_tstring(result, 1, &l, 0, 0, 0); + fn = s ? tex_aux_pack_file_name(s, l, name, ext) : NULL; + /*tex Shouldn't we also free |result| ? */ + tex_flush_token_list(result); + return fn; +} + +void tex_print_file_name(unsigned char *name) +{ + int must_quote = 0; + if (name) { + unsigned char *j = name; + while (*j) { + if (*j == ' ') { + must_quote = 1; + break; + } else { + j++; + } + } + } + if (must_quote) { + /* initial quote */ + tex_print_char('"'); + } + if (name) { + unsigned char *j = name; + while (*j) { + if (*j == '"') { + /* skip embedded quote, maybe escape */ + } else { + tex_print_char(*j); + } + j++; + } + } + if (must_quote) { + /* final quote */ + tex_print_char('"'); + } +} + +void tex_report_start_file(unsigned char *name) +{ + int callback_id = lmt_callback_defined(start_file_callback); + if (callback_id) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "S->", name); + } else { + tex_print_char('('); + tex_print_file_name((unsigned char *) name); + } +} + +void tex_report_stop_file(void) +{ + int callback_id = lmt_callback_defined(stop_file_callback); + if (callback_id) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->"); + } else { + tex_print_char(')'); + } +} diff --git a/source/luametatex/source/tex/texfileio.h b/source/luametatex/source/tex/texfileio.h new file mode 100644 index 000000000..1f7005342 --- /dev/null +++ b/source/luametatex/source/tex/texfileio.h @@ -0,0 +1,81 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXFILEIO_H +# define LMT_TEXFILEIO_H + +# include "textypes.h" + +# define FOPEN_R_MODE "r" +# define FOPEN_W_MODE "wb" +# define FOPEN_RBIN_MODE "rb" +# define FOPEN_WBIN_MODE "wb" + +# define IS_SPC_OR_EOL(c) ((c) == ' ' || (c) == '\r' || (c) == '\n') + +extern void tex_initialize_fileio_state (void); +extern int tex_room_in_buffer (int top); +extern int tex_lua_a_open_in (const char *fn); +extern void tex_lua_a_close_in (void); +extern int tex_lua_input_ln (void); + +/*tex + + The user's terminal acts essentially like other files of text, except that it is used both for + input and for output. In traditional \TEX, when the terminal is considered an input file, the + file variable is called |term_in|, and when it is considered an output file the file variable + is |term_out|. + + However, in \LUATEX\ in addition to files we also have pseudo files (something \ETEX) and input + coming from \LUA, which makes for a much more complex system. In \LUAMETATEX\ the model has + been stepwise simplified: pseudo files are gone and use a mechanism simular to \LUA\ input, and + the terminal is left up to the (anyway kind of mandate) file related callbacks, with read file + id zero still being the console. Output to the console is part of a model that intercepts output + to the log file and/or the console and can delegate handling to callbacks as well. + + So, in the end, the terminal code in \LUAMETATEX\ is gone as all goes through \LUA, which also + means that |terminal_update|, |clear_terminal| and |wake_up_terminal| are no longer needed. + + It is important to notice that reading from files is split into two: the files explicitly opened + with |\openin| are managed independent from the files opened with |\input|. The first category + is not part of input file nesting management. + +*/ + +# define format_extension ".fmt" +# define transcript_extension ".log" +# define texinput_extension ".tex" + +typedef struct fileio_state_info { + unsigned char *io_buffer; /*tex lines of characters being read */ + memory_data io_buffer_data; + int io_first; /*tex the first unused position in |buffer| */ + int io_last; /*tex end of the line just input to |buffer| */ + int name_in_progress; /*tex Is a file name being scanned? */ + int log_opened; /*tex the transcript file has been opened */ + char *job_name; /*tex the principal file name */ + char *log_name; /*tex full name of the log file */ + char *fmt_name; +} fileio_state_info ; + +extern fileio_state_info lmt_fileio_state; + +# define emergency_job_name (lmt_fileio_state.job_name ? lmt_fileio_state.job_name : "unknown job name") +# define emergency_log_name (lmt_fileio_state.log_name ? lmt_fileio_state.log_name : "unknown log name") +# define emergency_fmt_name (lmt_fileio_state.fmt_name ? lmt_fileio_state.fmt_name : "unknown fmt name") + +extern void tex_terminal_update (void); +extern void tex_open_log_file (void); +extern void tex_close_log_file (void); +extern void tex_start_input (char *fn); +extern void tex_check_fmt_name (void); +extern void tex_check_job_name (char *fn); +extern dumpstream tex_open_fmt_file (int writemode); +extern void tex_close_fmt_file (dumpstream f); +extern char *tex_read_file_name (int optionalequal, const char * name, const char* ext); +extern void tex_print_file_name (unsigned char *name); +extern void tex_report_start_file (unsigned char *name); +extern void tex_report_stop_file (void); + +# endif diff --git a/source/luametatex/source/tex/texfont.c b/source/luametatex/source/tex/texfont.c new file mode 100644 index 000000000..dd63044ec --- /dev/null +++ b/source/luametatex/source/tex/texfont.c @@ -0,0 +1,2062 @@ +/* + See license.txt in the root of this project. +*/ + +/*tex + + Here is the main font API implementation for the original pascal parts. Stuff to watch out for: + + \startitemize + + \startitem + Knuth had a |null_character| that was used when a character could not be found by the + |fetch()| routine, to signal an error. This has been deleted, but it may mean that the + output of luatex is incompatible with TeX after |fetch()| has detected an error + condition. + \stopitem + + \startitem + Knuth also had a |font_glue()| optimization. This has been removed because it was a bit + of dirty programming and it also was problematic |if 0 != null|. + \stopitem + + \stopitemize + +*/ + +# include "luametatex.h" + +# define proper_char_index(f, c) (c >= font_first_character(f) && c <= font_last_character(f)) + +inline static scaled tex_aux_font_x_scaled(scaled v) +{ + return v ? scaledround(0.000001 * (glyph_scale_par ? glyph_scale_par : 1000) * (glyph_x_scale_par ? glyph_x_scale_par : 1000) * v) : 0; +} + +inline static scaled tex_aux_font_y_scaled(scaled v) +{ + return v ? scaledround(0.000001 * (glyph_scale_par ? glyph_scale_par : 1000) * (glyph_y_scale_par ? glyph_y_scale_par : 1000) * v) : 0; +} + +inline static scaled tex_aux_glyph_x_scaled(halfword g, scaled v) +{ + return v ? scaledround(0.000001 * (glyph_scale(g) ? glyph_scale(g) : 1000) * (glyph_x_scale(g) ? glyph_x_scale(g) : 1000) * v) : 0; +} + +inline static scaled tex_aux_glyph_y_scaled(halfword g, scaled v) +{ + return v ? scaledround(0.000001 * (glyph_scale(g) ? glyph_scale(g) : 1000) * (glyph_y_scale(g) ? glyph_y_scale(g) : 1000) * v) : 0; +} + +font_state_info lmt_font_state = { + .fonts = NULL, + .adjust_stretch = 0, + .adjust_shrink = 0, + .adjust_step = 0, + .padding = 0, + .font_data = { + .minimum = min_font_size, + .maximum = max_font_size, + .size = memory_data_unset, + .step = stp_font_size, + .allocated = 0, + .itemsize = 1, + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, +}; + +/*tex + There can be holes in the font id range. And \unknown\ nullfont is special! Contrary + to other places, here we don't reallocate an array of records but one of pointers. +*/ + +void tex_initialize_fonts(void) +{ + texfont **tmp = aux_allocate_clear_array(sizeof(texfont *), lmt_font_state.font_data.minimum, 0); + if (tmp) { + for (int i = 0; i < lmt_font_state.font_data.minimum; i++) { + tmp[i] = NULL; + } + lmt_font_state.fonts = tmp; + lmt_font_state.font_data.allocated += lmt_font_state.font_data.minimum * sizeof(texfont *); + lmt_font_state.font_data.top = lmt_font_state.font_data.minimum; + lmt_font_state.font_data.ptr = -1; /* we need to end up with id zero first */ + tex_create_null_font(); + } else { + tex_overflow_error("fonts", lmt_font_state.font_data.minimum); + } +} + +/*tex If a slot is not used .. so be it. We want sequential numbers. */ + +int tex_new_font_id(void) +{ + if (lmt_font_state.font_data.ptr < lmt_font_state.font_data.top) { + ++lmt_font_state.font_data.ptr; + return lmt_font_state.font_data.ptr; + } else if (lmt_font_state.font_data.top < lmt_font_state.font_data.maximum) { + texfont **tmp ; + int top = lmt_font_state.font_data.top + lmt_font_state.font_data.step; + if (top > lmt_font_state.font_data.maximum) { + top = lmt_font_state.font_data.maximum; + } + tmp = aux_reallocate_array(lmt_font_state.fonts, sizeof(texfont *), top, 0); + if (tmp) { + for (int i = lmt_font_state.font_data.top + 1; i < top; i++) { + tmp[i] = NULL; + } + lmt_font_state.fonts = tmp; + lmt_font_state.font_data.allocated += ((size_t) top - lmt_font_state.font_data.top) * sizeof(texfont *); + lmt_font_state.font_data.top = top; + lmt_font_state.font_data.ptr += 1; + return lmt_font_state.font_data.ptr; + } + } + tex_overflow_error("fonts", lmt_font_state.font_data.maximum); + return 0; +} + +int tex_get_font_max_id(void) +{ + return lmt_font_state.font_data.ptr; +} + +void tex_dump_font_data(dumpstream f) { + dump_int(f, lmt_font_state.font_data.ptr); +} + +void tex_undump_font_data(dumpstream f) { + int x; + undump_int(f, x); + lmt_font_state.font_data.ptr = 0; +} + +void tex_set_charinfo_vertical_parts(charinfo *ci, extinfo *ext) +{ + if (ci->math) { + if (ci->math->vertical_parts) { + extinfo *lst = ci->math->vertical_parts; + while (lst) { + extinfo *c = lst->next; + lmt_memory_free(lst); + lst = c; + } + } + ci->math->vertical_parts = ext; + } +} + +void tex_set_charinfo_horizontal_parts(charinfo *ci, extinfo *ext) +{ + if (ci->math) { + if (ci->math->horizontal_parts) { + extinfo *lst = ci->math->horizontal_parts; + while (lst) { + extinfo *c = lst->next; + lmt_memory_free(lst); + lst = c; + } + } + ci->math->horizontal_parts = ext; + } +} + +void tex_set_font_parameters(halfword f, int b) +{ + int i = font_parameter_count(f); + if (b > i) { + /*tex If really needed this can be a calloc. */ + int s = (b + 2) * (int) sizeof(int); + int *a = lmt_memory_realloc(font_parameter_base(f), (size_t) s); + if (a) { + lmt_font_state.font_data.allocated += (b - i + 1) * (int) sizeof(scaled); + font_parameter_base(f) = a; + font_parameter_count(f) = b; + while (i < b) { + font_parameter(f, ++i) = 0; + } + } else { + tex_overflow_error("font", s); + } + } +} + +/*tex Most stuff is zero: */ + +int tex_new_font(void) +{ + int size = sizeof(charinfo); + charinfo *ci = lmt_memory_calloc(1, (size_t) size); + if (ci) { + texfont *t = NULL; + size = sizeof(texfont); + t = lmt_memory_calloc(1, (size_t) size); + if (t) { + sa_tree_item sa_value = { 0 }; + int id = tex_new_font_id(); + lmt_font_state.font_data.allocated += size; + lmt_font_state.fonts[id] = t; + set_font_name(id, NULL); + set_font_original(id, NULL); + set_font_left_boundary(id, NULL); + set_font_right_boundary(id, NULL); + set_font_parameter_base(id, NULL); + set_font_math_parameter_base(id, NULL); + /*tex |ec = 0| */ + set_font_first_character(id, 1); + set_font_hyphen_char(id, '-'); + set_font_skew_char(id, -1); + /*tex allocate eight values including 0 */ + tex_set_font_parameters(id, 7); + for (int k = 0; k <= 7; k++) { + tex_set_font_parameter(id, k, 0); + } + /*tex character info zero is reserved for |notdef|. The stack size 1, default item value 0. */ + t->characters = sa_new_tree(1, 4, sa_value); + t->chardata = ci; + t->chardata_size = 1; + return id; + } + } + tex_overflow_error("font", size); + return 0; +} + +void tex_font_malloc_charinfo(halfword f, int num) +{ + int glyph = lmt_font_state.fonts[f]->chardata_size; + int size = (glyph + num) * sizeof(charinfo); + charinfo *data = lmt_memory_realloc(lmt_font_state.fonts[f]->chardata , (size_t) size); + if (data) { + lmt_font_state.font_data.allocated += num * sizeof(charinfo); + lmt_font_state.fonts[f]->chardata = data; + memset(&data[glyph], 0, (size_t) num * sizeof(charinfo)); + lmt_font_state.fonts[f]->chardata_size += num; + } else { + tex_overflow_error("font", size); + } +} + +void tex_char_malloc_mathinfo(charinfo *ci) +{ + int size = sizeof(mathinfo); + mathinfo *mi = lmt_memory_calloc(1, (size_t) size); + if (mi) { + mi->horizontal_parts = NULL; + mi->vertical_parts = NULL; + mi->top_left_math_kern_array = NULL; + mi->top_right_math_kern_array = NULL; + mi->bottom_right_math_kern_array = NULL; + mi->bottom_left_math_kern_array = NULL; + mi->top_left_kern = 0; + mi->top_right_kern = 0; + mi->bottom_left_kern = 0; + mi->bottom_right_kern = 0; + mi->left_margin = 0; + mi->right_margin = 0; + mi->top_margin = 0; + mi->bottom_margin = 0; + if (ci->math) { + /*tex This seldom or probably never happens. */ + tex_set_charinfo_vertical_parts(ci, NULL); + tex_set_charinfo_horizontal_parts(ci, NULL); + set_charinfo_top_left_math_kern_array(ci, NULL); + set_charinfo_top_right_math_kern_array(ci, NULL); + set_charinfo_bottom_right_math_kern_array(ci, NULL); + set_charinfo_bottom_left_math_kern_array(ci, NULL); + lmt_memory_free(ci->math); + } else { + lmt_font_state.font_data.allocated += size; + } + ci->math = mi; + } else { + tex_overflow_error("font", size); + } +} + +# define find_charinfo_id(f,c) (sa_get_item_4(lmt_font_state.fonts[f]->characters,c).int_value) + +charinfo *tex_get_charinfo(halfword f, int c) +{ + if (proper_char_index(f, c)) { + int glyph = sa_get_item_4(lmt_font_state.fonts[f]->characters, c).int_value; + if (! glyph) { + sa_tree_item sa_value = { 0 }; + int tglyph = ++lmt_font_state.fonts[f]->chardata_count; + if (tglyph >= lmt_font_state.fonts[f]->chardata_size) { + tex_font_malloc_charinfo(f, 256); + } + lmt_font_state.fonts[f]->chardata[tglyph].expansion = 1000; + sa_value.int_value = tglyph; + /*tex 1 means global */ + sa_set_item_4(lmt_font_state.fonts[f]->characters, c, sa_value, 1); + glyph = tglyph; + } + return &(lmt_font_state.fonts[f]->chardata[glyph]); + } else if (c == left_boundary_char) { + if (! font_has_left_boundary(f)) { + int size = sizeof(charinfo); + charinfo *ci = lmt_memory_calloc(1, (size_t) size); + if (ci) { + lmt_font_state.font_data.allocated += size; + set_font_left_boundary(f, ci); + } else { + tex_overflow_error("font", size); + } + } + return font_left_boundary(f); + } else if (c == right_boundary_char) { + if (! font_has_right_boundary(f)) { + int size = sizeof(charinfo); + charinfo *ci = lmt_memory_calloc(1, (size_t) size); + if (ci) { + lmt_font_state.font_data.allocated += size; + set_font_right_boundary(f, ci); + } else { + tex_overflow_error("font", size); + } + } + return font_right_boundary(f); + } else { + return &(lmt_font_state.fonts[f]->chardata[0]); + } +} + +static charinfo *tex_aux_char_info(halfword f, int c) +{ + if (f > lmt_font_state.font_data.ptr) { + return NULL; + } else if (proper_char_index(f, c)) { + return &(lmt_font_state.fonts[f]->chardata[(int) find_charinfo_id(f, c)]); + } else if (c == left_boundary_char) { + if (font_left_boundary(f)) { + return font_left_boundary(f); + } + } else if (c == right_boundary_char) { + if (font_right_boundary(f)) { + return font_right_boundary(f); + } + } + return &(lmt_font_state.fonts[f]->chardata[0]); +} + +void tex_char_process(halfword f, int c) +{ + if (tex_char_has_tag_from_font(f, c, callback_tag)) { + int callback_id = lmt_callback_defined(process_character_callback); + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dd->", f, c); + } + tex_char_reset_tag_from_font(f, c, callback_tag); + } +} + +int tex_char_exists(halfword f, int c) +{ + if (f > lmt_font_state.font_data.ptr) { + return 0; + } else if (proper_char_index(f, c)) { + return (int) find_charinfo_id(f, c); + } else if (c == left_boundary_char) { + if (font_has_left_boundary(f)) { + return 1; + } + } else if (c == right_boundary_char) { + if (font_has_right_boundary(f)) { + return 1; + } + } + return 0; +} + +/* + +static int check_math_char(halfword f, int c, int size) +{ + int callback_id = lmt_callback_defined(get_math_char_callback); + if (callback_id > 0) { + halfword s = c; + lmt_run_callback(lua_state.lua_instance, callback_id, "ddd->d", f, c, size, &s); + if (s && proper_char_index(f, s) && find_charinfo_id(f, s)) { + return s; + } + } + return c; +} +*/ + +int tex_math_char_exists(halfword f, int c, int size) +{ + (void) size; + return (f > 0 && f <= lmt_font_state.font_data.ptr && proper_char_index(f, c)); +} + +/*tex + There is a bit overhead due to first fetching but we don't need to check again, so that saves + a little. +*/ + +int tex_get_math_char(halfword f, int c, int size, scaled *scale) +{ + int id = find_charinfo_id(f, c); + texfont *tf = lmt_font_state.fonts[f]; + if (id && size && tf->compactmath) { + for (int i=1;i<=size;i++) { + charinfo *ci = &tf->chardata[id]; + if (ci->math) { + int s = ci->math->smaller; + if (s && proper_char_index(f, s)) { + id = find_charinfo_id(f, s); + if (id) { + /* todo: trace */ + c = s; + } else { + break; + } + } else { + break; + } + } else { + break; + } + } + } + if (scale) { + *scale = tex_get_math_font_scale(f, size); + if (! *scale) { + *scale = 1000; + } + } + /* + if (! id && ! tf->oldmath) { + c = check_math_char(f, c, size); + } + */ + return c; +} + +extinfo *tex_new_charinfo_part(int glyph, int startconnect, int endconnect, int advance, int extender) +{ + int size = sizeof(extinfo); + extinfo *ext = lmt_memory_malloc((size_t) size); + if (ext) { + ext->next = NULL; + ext->glyph = glyph; + ext->start_overlap = startconnect; + ext->end_overlap = endconnect; + ext->advance = advance; + ext->extender = extender; + } else { + tex_overflow_error("font", size); + } + return ext; +} + +void tex_add_charinfo_vertical_part(charinfo *ci, extinfo *ext) +{ + if (ci->math) { + if (ci->math->vertical_parts) { + extinfo *lst = ci->math->vertical_parts; + while (lst->next) + lst = lst->next; + lst->next = ext; + } else { + ci->math->vertical_parts = ext; + } + } +} + +void tex_add_charinfo_horizontal_part(charinfo *ci, extinfo *ext) +{ + if (ci->math) { + if (ci->math->horizontal_parts) { + extinfo *lst = ci->math->horizontal_parts; + while (lst->next) { + lst = lst->next; + } + lst->next = ext; + } else { + ci->math->horizontal_parts = ext; + } + } +} + +/*tex + + Note that many more small things like this are implemented as macros in the header file. + +*/ + +int tex_get_charinfo_math_kerns(charinfo *ci, int id) +{ + /*tex All callers check for |result > 0|. */ + if (ci->math) { + switch (id) { + case top_left_kern: + return ci->math->top_left_math_kerns; + case bottom_left_kern: + return ci->math->bottom_left_math_kerns; + case top_right_kern: + return ci->math->top_right_math_kerns; + case bottom_right_kern: + return ci->math->bottom_right_math_kerns; + default: + tex_confusion("weird math kern"); + break; + } + } + return 0; +} + +void tex_add_charinfo_math_kern(charinfo *ci, int id, scaled ht, scaled krn) +{ + if (ci->math) { + int k = 0; + int s = 0; + scaled *a = NULL; + switch (id) { + case top_right_kern: + { + k = ci->math->top_right_math_kerns; + s = 2 * (k + 1) * (int) sizeof(scaled); + a = lmt_memory_realloc(ci->math->top_right_math_kern_array, (size_t) s); + if (a) { + ci->math->top_right_math_kern_array = a; + ci->math->top_right_math_kerns++; + } + break; + } + case bottom_right_kern: + { + k = ci->math->bottom_right_math_kerns; + s = 2 * (k + 1) * (int) sizeof(scaled); + a = lmt_memory_realloc(ci->math->bottom_right_math_kern_array, (size_t) s); + if (a) { + ci->math->bottom_right_math_kern_array = a; + ci->math->bottom_right_math_kerns++; + } + break; + } + case bottom_left_kern: + { + k = ci->math->bottom_left_math_kerns; + s = 2 * (k + 1) * (int) sizeof(scaled); + a = lmt_memory_realloc(ci->math->bottom_left_math_kern_array, (size_t) s); + if (a) { + ci->math->bottom_left_math_kern_array = a; + ci->math->bottom_left_math_kerns++; + } + break; + } + case top_left_kern: + { + k = ci->math->top_left_math_kerns; + s = 2 * (k + 1) * (int) sizeof(scaled); + a = lmt_memory_realloc(ci->math->top_left_math_kern_array, (size_t) s); + if (a) { + ci->math->top_left_math_kern_array = a; + ci->math->top_left_math_kerns++; + } + break; + } + default: + tex_confusion("add math kern"); + return; + } + if (a) { + a[2 * k] = ht; + a[(2 * k) + 1] = krn; + } else { + tex_overflow_error("font", s); + } + } +} + +/*tex + + In \TEX, extensibles were fairly simple things. This function squeezes a \TFM\ extensible into + the vertical extender structures. |advance == 0| is a special case for \TFM\ fonts, because + finding the proper advance width during \TFM\ reading can be tricky. + + A small complication arises if |rep| is the only non-zero: it needs to be doubled as a + non-repeatable to avoid mayhem. + +*/ + +void tex_set_charinfo_extensible(charinfo *ci, int top, int bottom, int middle, int extender) +{ + if (ci->math) { + extinfo *ext; + /*tex Clear old data: */ + tex_set_charinfo_vertical_parts(ci, NULL); + if (bottom == 0 && top == 0 && middle == 0 && extender != 0) { + ext = tex_new_charinfo_part(extender, 0, 0, 0, math_extension_normal); + tex_add_charinfo_vertical_part(ci, ext); + ext = tex_new_charinfo_part(extender, 0, 0, 0, math_extension_repeat); + tex_add_charinfo_vertical_part(ci, ext); + } else { + if (bottom) { + ext = tex_new_charinfo_part(bottom, 0, 0, 0, math_extension_normal); + tex_add_charinfo_vertical_part(ci, ext); + } + if (extender) { + ext = tex_new_charinfo_part(extender, 0, 0, 0, math_extension_repeat); + tex_add_charinfo_vertical_part(ci, ext); + } + if (middle) { + ext = tex_new_charinfo_part(middle, 0, 0, 0, math_extension_normal); + tex_add_charinfo_vertical_part(ci, ext); + if (extender) { + ext = tex_new_charinfo_part(extender, 0, 0, 0, math_extension_repeat); + tex_add_charinfo_vertical_part(ci, ext); + } + } + if (top) { + ext = tex_new_charinfo_part(top, 0, 0, 0, math_extension_normal); + tex_add_charinfo_vertical_part(ci, ext); + } + } + } +} + +/*tex why not just preallocate for all math otf parameters */ + +void tex_set_font_math_parameters(halfword f, int b) +{ + int i = font_math_parameter_count(f); + if (i < b) { + size_t size = ((size_t) b + 2) * sizeof(scaled); + scaled *data = lmt_memory_realloc(font_math_parameter_base(f), size); + if (data) { + lmt_font_state.font_data.allocated += (int) (((size_t) b - i + 1) * sizeof(scaled)); + font_math_parameter_base(f) = data; + font_math_parameter_count(f) = b; + while (i < b) { + ++i; /* in macro, make the next a function */ + // set_font_math_parameter(f, i, undefined_math_parameter); + font_math_parameter(f, i) = undefined_math_parameter; + } + } else { + tex_overflow_error("font", (int) size); + } + } +} + +void tex_delete_font(int f) +{ + if (lmt_font_state.fonts[f]) { + tex_set_font_name(f, NULL); + tex_set_font_original(f, NULL); + set_font_left_boundary(f, NULL); + set_font_right_boundary(f, NULL); + for (int i = font_first_character(f); i <= font_last_character(f); i++) { + if (quick_char_exists(f, i)) { + charinfo *co = tex_aux_char_info(f, i); + set_charinfo_kerns(co, NULL); + set_charinfo_ligatures(co, NULL); + if (co->math) { + tex_set_charinfo_vertical_parts(co, NULL); + tex_set_charinfo_horizontal_parts(co, NULL); + set_charinfo_top_left_math_kern_array(co, NULL); + set_charinfo_top_right_math_kern_array(co, NULL); + set_charinfo_bottom_right_math_kern_array(co, NULL); + set_charinfo_bottom_left_math_kern_array(co, NULL); + set_charinfo_math(co, NULL); + } + } + } + /*tex free |notdef| */ + lmt_memory_free(lmt_font_state.fonts[f]->chardata); + sa_destroy_tree(lmt_font_state.fonts[f]->characters); + lmt_memory_free(font_parameter_base(f)); + if (font_math_parameter_base(f)) { + lmt_memory_free(font_math_parameter_base(f)); + } + lmt_memory_free(lmt_font_state.fonts[f]); + lmt_font_state.fonts[f] = NULL; + if (lmt_font_state.font_data.ptr == f) { + lmt_font_state.font_data.ptr--; + } + } +} + +void tex_create_null_font(void) +{ + int id = tex_new_font(); + tex_set_font_name(id, "nullfont"); + tex_set_font_original(id, "nullfont"); + /* set_font_touched(id, 1); */ +} + +int tex_is_valid_font(halfword f) +{ + return (f >= 0 && f <= lmt_font_state.font_data.ptr && lmt_font_state.fonts[f]); +} + +int tex_checked_font(halfword f) +{ + return (f >= 0 && f <= lmt_font_state.font_data.ptr && lmt_font_state.fonts[f]) ? f : null_font; +} + +halfword tex_get_font_identifier(halfword fontspec) +{ + if (fontspec) { + halfword fnt = font_spec_identifier(fontspec); + if ((fnt >= 0 && fnt <= lmt_font_state.font_data.ptr && lmt_font_state.fonts[fnt])) { + return fnt; + } + } + return null_font; +} + +/*tex + + Here come some subroutines to deal with expanded fonts. Returning 1 means that they are + identical. + +*/ + +ligatureinfo tex_get_ligature(halfword f, int lc, int rc) +{ + ligatureinfo t = { 0, 0, 0, 0 }; + if (lc != non_boundary_char && rc != non_boundary_char && tex_has_ligature(f, lc)) { + int k = 0; + charinfo *co = tex_aux_char_info(f, lc); + while (1) { + ligatureinfo u = charinfo_ligature(co, k); + if (ligature_end(u)) { + break; + } else if (ligature_char(u) == rc) { + return ligature_disabled(u) ? t : u; + } + k++; + } + } + return t; +} + +int tex_raw_get_kern(halfword f, int lc, int rc) +{ + if (lc != non_boundary_char && rc != non_boundary_char) { + int k = 0; + charinfo *co = tex_aux_char_info(f, lc); + while (1) { + kerninfo u = charinfo_kern(co, k); + if (kern_end(u)) { + break; + } else if (kern_char(u) == rc) { + return kern_disabled(u) ? 0 : kern_kern(u); + } + k++; + } + } + return 0; +} + +int tex_get_kern(halfword f, int lc, int rc) +{ + if (lc == non_boundary_char || rc == non_boundary_char || (! tex_has_kern(f, lc))) { + return 0; + } else { + return tex_raw_get_kern(f, lc, rc); + } +} + +scaled tex_valid_kern(halfword left, halfword right) +{ + if (node_type(left) == glyph_node && node_type(right) == glyph_node) { + halfword fl = glyph_font(left); + halfword fr = glyph_font(right); + halfword cl = glyph_character(left); + halfword cr = glyph_character(right); + if (fl == fr && cl != non_boundary_char && cr != non_boundary_char && tex_has_kern(fl, cl) && ! tex_has_glyph_option(left, glyph_option_no_right_kern) && ! tex_has_glyph_option(right, glyph_option_no_left_kern)) { + return tex_raw_get_kern(fl, cl, cr); + } + } + return 0; +} + +/*tex + + Experiment: + +*/ + +halfword tex_checked_font_adjust(halfword adjust_spacing, halfword adjust_spacing_step, halfword adjust_spacing_shrink, halfword adjust_spacing_stretch) +{ + if (adjust_spacing >= adjust_spacing_full) { + if (adjust_spacing_step > 0) { + lmt_font_state.adjust_step = adjust_spacing_step; + lmt_font_state.adjust_shrink = adjust_spacing_shrink; + lmt_font_state.adjust_stretch = adjust_spacing_stretch; + if (lmt_font_state.adjust_step > 100) { + lmt_font_state.adjust_step = 100; + } + if (lmt_font_state.adjust_shrink < 0) { + lmt_font_state.adjust_shrink = 0; + } else if (lmt_font_state.adjust_shrink > 500) { + lmt_font_state.adjust_shrink = 500; + } + if (lmt_font_state.adjust_stretch < 0) { + lmt_font_state.adjust_stretch = 0; + } else if (lmt_font_state.adjust_stretch > 1000) { + lmt_font_state.adjust_stretch = 1000; + } + return adjust_spacing; + } + } else { + adjust_spacing = adjust_spacing_off; + } + lmt_font_state.adjust_step = 0; + lmt_font_state.adjust_shrink = 0; + lmt_font_state.adjust_stretch = 0; + return adjust_spacing; +} + +/*tex + + This returns the multiple of |font_step(f)| that is nearest to |e|. + +*/ + +int tex_fix_expand_value(halfword f, int e) +{ + int max_expand, neg; + if (e == 0) { + return 0; + } else if (e < 0) { + e = -e; + neg = 1; + max_expand = font_max_shrink(f); + } else { + neg = 0; + max_expand = font_max_stretch(f); + } + if (e > max_expand) { + e = max_expand; + } else { + int step = font_step(f); + if (e % step > 0) { + e = step * tex_round_xn_over_d(e, 1, step); + } + } + return neg ? -e : e; +} + +int tex_read_font_info(char *cnom, scaled s) +{ + int callback_id = lmt_callback_defined(define_font_callback); + if (callback_id > 0) { + int f = 0; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Sd->d", cnom, s, &f); + if (tex_is_valid_font(f)) { + tex_set_font_original(f, (char *) cnom); + return f; + } else { + return 0; + } + } else { + tex_normal_warning("fonts","no font has been read, you need to enable or fix the callback"); + return 0; + } +} + +/*tex Abstraction: */ + +halfword tex_get_font_parameter(halfword f, halfword code) /* todo: math */ +{ + if (font_parameter_count(f) < code) { + tex_set_font_parameters(f, code); + } + return font_parameter(f, code); +} + +void tex_set_font_parameter(halfword f, halfword code, scaled v) +{ + if (font_parameter_count(f) < code) { + tex_set_font_parameters(f, code); + } + font_parameter(f, code) = v; +} + +scaled tex_get_font_slant (halfword f) { return font_parameter(f, slant_code); } +scaled tex_get_font_space (halfword f) { return font_parameter(f, space_code); } +scaled tex_get_font_space_stretch (halfword f) { return font_parameter(f, space_stretch_code); } +scaled tex_get_font_space_shrink (halfword f) { return font_parameter(f, space_shrink_code); } +scaled tex_get_font_ex_height (halfword f) { return font_parameter(f, ex_height_code); } +scaled tex_get_font_em_width (halfword f) { return font_parameter(f, em_width_code); } +scaled tex_get_font_extra_space (halfword f) { return font_parameter(f, extra_space_code); } + +scaled tex_get_scaled_slant (halfword f) { return font_parameter(f, slant_code); } +scaled tex_get_scaled_space (halfword f) { return tex_aux_font_x_scaled(font_parameter(f, space_code)); } +scaled tex_get_scaled_space_stretch (halfword f) { return tex_aux_font_x_scaled(font_parameter(f, space_stretch_code)); } +scaled tex_get_scaled_space_shrink (halfword f) { return tex_aux_font_x_scaled(font_parameter(f, space_shrink_code)); } +scaled tex_get_scaled_ex_height (halfword f) { return tex_aux_font_y_scaled(font_parameter(f, ex_height_code)); } +scaled tex_get_scaled_em_width (halfword f) { return tex_aux_font_x_scaled(font_parameter(f, em_width_code)); } +scaled tex_get_scaled_extra_space (halfword f) { return tex_aux_font_x_scaled(font_parameter(f, extra_space_code)); } + +scaled tex_font_x_scaled (scaled v) { return tex_aux_font_x_scaled(v); } +scaled tex_font_y_scaled (scaled v) { return tex_aux_font_y_scaled(v); } + +halfword tex_get_scaled_parameter(halfword f, halfword code) /* todo: math */ +{ + if (font_parameter_count(f) < code) { + tex_set_font_parameters(f, code); + } + switch (code) { + case slant_code: + return font_parameter(f, code); + case ex_height_code: + return tex_aux_font_y_scaled(font_parameter(f, code)); + default: + return tex_aux_font_x_scaled(font_parameter(f, code)); + } +} + +void tex_set_scaled_parameter(halfword f, halfword code, scaled v) +{ + if (font_parameter_count(f) < code) { + tex_set_font_parameters(f, code); + } + font_parameter(f, code) = tex_aux_font_x_scaled(v); +} + +halfword tex_get_scaled_glue(halfword f) +{ + halfword p = tex_new_glue_node(zero_glue, space_skip_glue); + glue_amount(p) = tex_aux_font_x_scaled(font_parameter(f, space_code)); + glue_stretch(p) = tex_aux_font_x_scaled(font_parameter(f, space_stretch_code)); + glue_shrink(p) = tex_aux_font_x_scaled(font_parameter(f, space_shrink_code)); + glue_font(p) = f; + return p; +} + +halfword tex_get_scaled_parameter_glue(quarterword p, quarterword s) +{ + halfword n = tex_new_glue_node(zero_glue, s); + halfword g = glue_parameter(p); + // if (g) { + // memcpy((void *) (node_memory_state.nodes + n + 2), (void *) (node_memory_state.nodes + g + 2), (glue_spec_size - 2) * (sizeof(memoryword))); + // } + glue_amount(n) = tex_aux_font_x_scaled(glue_amount(g)); + glue_stretch(n) = tex_aux_font_x_scaled(glue_stretch(g)); + glue_shrink(n) = tex_aux_font_x_scaled(glue_shrink(g)); + return n; +} + +halfword tex_get_parameter_glue(quarterword p, quarterword s) +{ + halfword n = tex_new_glue_node(zero_glue, s); + halfword g = glue_parameter(p); + if (g) { + memcpy((void *) (lmt_node_memory_state.nodes + n + 2), (void *) (lmt_node_memory_state.nodes + g + 2), (glue_spec_size - 2) * (sizeof(memoryword))); + } + return n; +} + +/*tex Ligaturing starts here */ + +static void tex_aux_nesting_append(halfword nest1, halfword newn) +{ + halfword tail = node_tail(nest1); + tex_couple_nodes(tail ? tail : nest1, newn); + node_tail(nest1) = newn; +} + +static void tex_aux_nesting_prepend(halfword nest1, halfword newn) +{ + halfword head = node_next(nest1); + tex_couple_nodes(nest1, newn); + if (head) { + tex_couple_nodes(newn, head); + } else { + node_tail(nest1) = newn; + } +} + +static void tex_aux_nesting_prepend_list(halfword nest1, halfword newn) +{ + halfword head = node_next(nest1); + halfword tail = tex_tail_of_node_list(newn); + tex_couple_nodes(nest1, newn); + if (head) { + tex_couple_nodes(tail, head); + } else { + node_tail(nest1) = tail; + } +} + +int tex_valid_ligature(halfword left, halfword right, int *slot) +{ + if (node_type(left) != glyph_node) { + return -1; + } else if (glyph_font(left) != glyph_font(right)) { + return -1; + } else if (tex_has_glyph_option(left, glyph_option_no_right_ligature) || tex_has_glyph_option(right, glyph_option_no_left_ligature)) { + return -1; + } else { + ligatureinfo lig = tex_get_ligature(glyph_font(left), glyph_character(left), glyph_character(right)); + if (ligature_is_valid(lig)) { + *slot = ligature_replacement(lig); + return ligature_type(lig); + } else { + return -1; + } + } +} + +static int tex_aux_found_ligature(halfword left, halfword right) +{ + if (node_type(left) != glyph_node) { + return 0; + } else if (glyph_font(left) != glyph_font(right)) { + return 0; + } else if (tex_has_glyph_option(left, glyph_option_no_right_ligature) || tex_has_glyph_option(right, glyph_option_no_left_ligature)) { + return 0; + } else { + return ligature_is_valid(tex_get_ligature(glyph_font(left), glyph_character(left), glyph_character(right))); + } +} + +/*tex + We could be more efficient and reuse the possibly later removed node but it takes more code and + we don't have that many ligatures anyway. +*/ + +static int tex_aux_try_ligature(halfword *first, halfword forward) +{ + halfword cur = *first; + if (glyph_scale(cur) == glyph_scale(forward) && glyph_x_scale(cur) == glyph_x_scale(forward) && glyph_y_scale(cur) == glyph_y_scale(forward)) { + halfword slot; + halfword type = tex_valid_ligature(cur, forward, &slot); + if (type >= 0) { + int move_after = (type & 0x0C) >> 2; + int keep_right = (type & 0x01) != 0; + int keep_left = (type & 0x02) != 0; + halfword parent = (glyph_character(cur) >= 0) ? cur : ((glyph_character(forward) >= 0) ? forward : null); + halfword ligature = tex_new_glyph_node(glyph_ligature_subtype, glyph_font(cur), slot, parent); + if (keep_left) { + tex_couple_nodes(cur, ligature); + if (move_after) { + move_after--; + cur = ligature; + } + } else { + halfword prev = node_prev(cur); + tex_uncouple_node(cur); + tex_flush_node(cur); + tex_couple_nodes(prev, ligature); + cur = ligature; + } + if (keep_right) { + tex_couple_nodes(ligature, forward); + if (move_after) { + move_after--; + cur = forward; + } + } else { + halfword next = node_next(forward); + tex_uncouple_node(forward); + tex_flush_node(forward); + if (next) { + tex_couple_nodes(ligature, next); + } + } + *first = cur; + return 1; + } + } + return 0; +} + +/*tex + + There shouldn't be any ligatures here - we only add them at the end of |xxx_break| in a |DISC-1 + - DISC-2| situation and we stop processing |DISC-1| (we continue with |DISC-1|'s |post_| and + |no_break|. + +*/ + +static halfword tex_aux_handle_ligature_nesting(halfword root, halfword cur) +{ + if (cur) { + while (node_next(cur)) { + halfword fwd = node_next(cur); + if (node_type(cur) == glyph_node && node_type(fwd) == glyph_node && glyph_font(cur) == glyph_font(fwd) && tex_aux_try_ligature(&cur, fwd)) { + continue; + } + cur = node_next(cur); + } + node_tail(root) = cur; + } + return root; +} + +/*tex + + In \LUATEX\ we have a chained variant of discretionaries (init and select) but that never really + works out ok. It was there for basemode to be compatible with original \TEX\ but it was also means + for border cases that in practice never occur. A least no \CONTEXT\ user ever complained about + ligatures and hyphenation of these border cases. Keep in mind that in node mode (which we normally + use) the select discs never showed up anyway. Another reason for dropping these discretionaries is + that by not using them we get more predictable (or at least easier) handling of node lists that do + have (any kind of) discretionaries. It is still on my agenda to look into nested discretionaries + i.e. discs nodes in disc fields but it might never result in useable code. + + Remark: there is now a patch for \LUATEX\ that fixes some long pending issue with select discs but + still it's kind of fuzzy. It also complicates the par builder in a way that I don't really want + (at least in \CONTEXT). It was anyway a good reason for removing traces of these special disc nodes + in \LUAMETATEX. + +*/ + +static halfword tex_aux_handle_ligature_word(halfword cur) +{ + halfword right = null; + if (node_type(cur) == boundary_node) { + halfword prev = node_prev(cur); + halfword fwd = node_next(cur); + /*tex There is no need to uncouple |cur|, it is freed. */ + tex_flush_node(cur); + if (fwd) { + tex_couple_nodes(prev, fwd); + if (node_type(fwd) != glyph_node) { + return prev; + } else { + cur = fwd; + } + } else { + node_next(prev) = fwd; + return prev; + } + } else if (font_has_left_boundary(glyph_font(cur))) { + halfword prev = node_prev(cur); + halfword p = tex_new_glyph_node(glyph_unset_subtype, glyph_font(cur), left_boundary_char, cur); + tex_couple_nodes(prev, p); + tex_couple_nodes(p, cur); + cur = p; + } + if (font_has_right_boundary(glyph_font(cur))) { + right = tex_new_glyph_node(glyph_unset_subtype, glyph_font(cur), right_boundary_char, cur); + } + /* todo: switch */ + while (1) { + halfword t = node_type(cur); + /*tex A glyph followed by \unknown */ + if (t == glyph_node) { + halfword fwd = node_next(cur); + if (fwd) { + t = node_type(fwd); + if (t == glyph_node) { + /*tex a glyph followed by a glyph */ + if (glyph_font(cur) != glyph_font(fwd)) { + break; + } else if (tex_aux_try_ligature(&cur, fwd)) { + continue; + } + } else if (t == disc_node) { + /*tex a glyph followed by a disc */ + halfword pre = disc_pre_break_head(fwd); + halfword nob = disc_no_break_head(fwd); + halfword next, tail; + /*tex Check on: |a{b?}{?}{?}| and |a+b=>B| : |{B?}{?}{a?}| */ + /*tex Check on: |a{?}{?}{b?}| and |a+b=>B| : |{a?}{?}{B?}| */ + if ((pre && node_type(pre) == glyph_node && tex_aux_found_ligature(cur, pre)) + || (nob && node_type(nob) == glyph_node && tex_aux_found_ligature(cur, nob))) { + /*tex Move |cur| from before disc to skipped part */ + halfword prev = node_prev(cur); + tex_uncouple_node(cur); + tex_couple_nodes(prev, fwd); + tex_aux_nesting_prepend(disc_no_break(fwd), cur); + /*tex Now ligature the |pre_break|. */ + tex_aux_nesting_prepend(disc_pre_break(fwd), tex_copy_node(cur)); + /*tex As we have removed cur, we need to start again. */ + cur = prev; + } + /*tex Check on: |a{?}{?}{}b| and |a+b=>B| : |{a?}{?b}{B}|. */ + next = node_next(fwd); + if ((! nob) && next && node_type(next) == glyph_node && tex_aux_found_ligature(cur, next)) { + /*tex Move |cur| from before |disc| to |no_break| part. */ + halfword prev = node_prev(cur); + tex_uncouple_node(cur); + tex_couple_nodes(prev, fwd); + /*tex We {\em know} it's empty. */ + tex_couple_nodes(disc_no_break(fwd), cur); + /*tex Now copy |cur| the |pre_break|. */ + tex_aux_nesting_prepend(disc_pre_break(fwd), tex_copy_node(cur)); + /*tex Move next from after disc to |no_break| part. */ + tail = node_next(next); + tex_uncouple_node(next); + tex_try_couple_nodes(fwd, tail); + /*tex We {\em know} this works. */ + tex_couple_nodes(cur, next); + /*tex Make sure the list is correct. */ + disc_no_break_tail(fwd) = next; + /*tex Now copy next to the |post_break|. */ + tex_aux_nesting_append(disc_post_break(fwd), tex_copy_node(next)); + /*tex As we have removed cur, we need to start again. */ + cur = prev; + } + /*tex We are finished with the |pre_break|. */ + tex_aux_handle_ligature_nesting(disc_pre_break(fwd), disc_pre_break_head(fwd)); + } else if (t == boundary_node) { + halfword next = node_next(fwd); + tex_try_couple_nodes(cur, next); + tex_flush_node(fwd); + if (right) { + /*tex Shame, didn't need it. */ + tex_flush_node(right); + /*tex No need to reset |right|, we're going to leave the loop anyway. */ + } + break; + } else if (right) { + tex_couple_nodes(cur, right); + tex_couple_nodes(right, fwd); + right = null; + continue; + } else { + break; + } + } else { + /*tex The last character of a paragraph. */ + if (right) { + /*tex |par| prohibits the use of |couple_nodes| here. */ + tex_try_couple_nodes(cur, right); + right = null; + continue; + } else { + break; + } + } + /*tex A discretionary followed by \unknown */ + } else if (t == disc_node) { + /*tex If |{?}{x}{?}| or |{?}{?}{y}| then: */ + if (disc_no_break_head(cur) || disc_post_break_head(cur)) { + halfword fwd; + if (disc_post_break_head(cur)) { + tex_aux_handle_ligature_nesting(disc_post_break(cur), disc_post_break_head(cur)); + } + if (disc_no_break_head(cur)) { + tex_aux_handle_ligature_nesting(disc_no_break(cur), disc_no_break_head(cur)); + } + fwd = node_next(cur); + while (fwd) { + if (node_type(fwd) == glyph_node) { + halfword nob = disc_no_break_tail(cur); + halfword pst = disc_post_break_tail(cur); + if ((! nob || ! tex_aux_found_ligature(nob, fwd)) && (! pst || ! tex_aux_found_ligature(pst, fwd))) { + break; + } else { + halfword next = node_next(fwd); + tex_aux_nesting_append(disc_no_break(cur), tex_copy_node(fwd)); + tex_aux_handle_ligature_nesting(disc_no_break(cur), nob); + tex_uncouple_node(fwd); + tex_try_couple_nodes(cur, next); + tex_aux_nesting_append(disc_post_break(cur), fwd); + tex_aux_handle_ligature_nesting(disc_post_break(cur), pst); + fwd = node_next(cur); + } + } else { + break; + } + } + if (fwd && node_type(fwd) == disc_node) { + /*tex This only deals with simple pre-only discretionaries and a following glyph. */ + halfword next = node_next(fwd); + if (next + && ! disc_no_break_head(fwd) + && ! disc_post_break_head(fwd) + && node_type(next) == glyph_node + && ((disc_post_break_tail(cur) && tex_aux_found_ligature(disc_post_break_tail(cur), next)) || + (disc_no_break_tail (cur) && tex_aux_found_ligature(disc_no_break_tail (cur), next)))) { + halfword last = node_next(next); + tex_uncouple_node(next); + tex_try_couple_nodes(fwd, last); + /*tex Just a hidden flag, used for (base mode) experiments. */ + if (hyphenation_permitted(hyphenation_mode_par, lazy_ligatures_hyphenation_mode)) { + /*tex f-f-i -> f-fi */ + halfword tail = disc_no_break_tail(cur); + tex_aux_nesting_append(disc_no_break(cur), tex_copy_node(next)); + tex_aux_handle_ligature_nesting(disc_no_break(cur), tail); + tail = disc_post_break_tail(cur); + tex_aux_nesting_append(disc_post_break(cur), next); + tex_aux_handle_ligature_nesting(disc_post_break(cur), tail); + tex_try_couple_nodes(node_prev(fwd), node_next(fwd)); + tex_flush_node(fwd); + } else { + /*tex f-f-i -> ff-i : |{a-}{b}{AB} {-}{c}{}| => |{AB-}{c}{ABc}| */ + tex_aux_nesting_append(disc_post_break(fwd), tex_copy_node(next)); + if (disc_no_break_head(cur)) { + halfword tail; + tex_aux_nesting_prepend_list(disc_no_break(fwd), tex_copy_node_list(disc_no_break_head(cur), null)); + tail = disc_no_break_tail(fwd); + tex_aux_nesting_append(disc_no_break(fwd), next); + tex_aux_handle_ligature_nesting(disc_no_break(fwd), tail); + tex_aux_nesting_prepend_list(disc_pre_break(fwd), tex_copy_node_list(disc_no_break_head(cur), null)); + } + tex_try_couple_nodes(node_prev(cur), node_next(cur)); + tex_flush_node(cur); + cur = fwd; + } + } + } + } + } else { + /*tex We have glyph nor disc. */ + return cur; + } + /*tex Goto the next node, where |\par| allows |node_next(cur)| to be NULL. */ + cur = node_next(cur); + } + return cur; +} + + +/*tex The return value is the new tail, head should be a dummy: */ + +halfword tex_handle_ligaturing(halfword head, halfword tail) +{ + if (node_next(head)) { + /*tex A trick to allow explicit |node == null| tests. */ + halfword save_tail = null; + halfword cur, prev; + if (tail) { + save_tail = node_next(tail); + node_next(tail) = null; + } + prev = head; + cur = node_next(prev); + while (cur) { + if (node_type(cur) == glyph_node || node_type(cur) == boundary_node) { + cur = tex_aux_handle_ligature_word(cur); + } + prev = cur; + cur = node_next(cur); + } + if (! prev) { + prev = tail; + } + tex_try_couple_nodes(prev, save_tail); + // if (tail) { + // } + return prev; + } else { + return tail; + } +} + +/*tex Kerning starts here: */ + +static void tex_aux_add_kern_before(halfword left, halfword right) +{ + if ( + glyph_font(left) == glyph_font(right) && + glyph_scale(left) == glyph_scale(right) && + glyph_x_scale(left) == glyph_x_scale(right) && + glyph_y_scale(left) == glyph_y_scale(right) && + ! tex_has_glyph_option(left, glyph_option_no_right_kern) && + ! tex_has_glyph_option(right, glyph_option_no_left_kern) && + tex_has_kern(glyph_font(left), glyph_character(left)) + ) { + scaled k = tex_raw_get_kern(glyph_font(left), glyph_character(left), glyph_character(right)); + if (k) { + scaled kern = tex_new_kern_node(k, font_kern_subtype); + halfword prev = node_prev(right); + tex_couple_nodes(prev, kern); + tex_couple_nodes(kern, right); + tex_attach_attribute_list_copy(kern, left); + } + } +} + +static void tex_aux_add_kern_after(halfword left, halfword right, halfword aft) +{ + if ( + glyph_font(left) == glyph_font(right) && + glyph_scale(left) == glyph_scale(right) && + glyph_x_scale(left) == glyph_x_scale(right) && + glyph_y_scale(left) == glyph_y_scale(right) && + ! tex_has_glyph_option(left, glyph_option_no_right_kern) && + ! tex_has_glyph_option(right, glyph_option_no_left_kern) && + tex_has_kern(glyph_font(left), glyph_character(left)) + ) { + scaled k = tex_raw_get_kern(glyph_font(left), glyph_character(left), glyph_character(right)); + if (k) { + scaled kern = tex_new_kern_node(k, font_kern_subtype); + halfword next = node_next(aft); + tex_couple_nodes(aft, kern); + tex_try_couple_nodes(kern, next); + tex_attach_attribute_list_copy(kern, aft); + } + } +} + +static void tex_aux_do_handle_kerning(halfword root, halfword init_left, halfword init_right) +{ + halfword cur = node_next(root); + if (cur) { + halfword left = null; + if (node_type(cur) == glyph_node) { + if (init_left) { + tex_aux_add_kern_before(init_left, cur); + } + left = cur; + } + cur = node_next(cur); + while (cur) { + halfword t = node_type(cur); + if (t == glyph_node) { + if (left) { + tex_aux_add_kern_before(left, cur); + if (glyph_character(left) < 0) { + halfword prev = node_prev(left); + tex_couple_nodes(prev, cur); + tex_flush_node(left); + } + } + left = cur; + } else { + if (t == disc_node) { + halfword right = node_type(node_next(cur)) == glyph_node ? node_next(cur) : null; + tex_aux_do_handle_kerning(disc_pre_break(cur), left, null); + if (disc_pre_break_head(cur)) { + disc_pre_break_tail(cur) = tex_tail_of_node_list(disc_pre_break_head(cur)); + } + tex_aux_do_handle_kerning(disc_post_break(cur), null, right); + if (disc_post_break_head(cur)) { + disc_post_break_tail(cur) = tex_tail_of_node_list(disc_post_break_head(cur)); + } + tex_aux_do_handle_kerning(disc_no_break(cur), left, right); + if (disc_no_break_head(cur)) { + disc_no_break_tail(cur) = tex_tail_of_node_list(disc_no_break_head(cur)); + } + } + if (left) { + if (glyph_character(left) < 0) { + halfword prev = node_prev(left); + tex_couple_nodes(prev, cur); + tex_flush_node(left); + } + left = null; + } + } + cur = node_next(cur); + } + if (left) { + if (init_right) { + tex_aux_add_kern_after(left, init_right, left); + } + if (glyph_character(left) < 0) { + halfword prev = node_prev(left); + halfword next = node_next(left); + if (next) { + tex_couple_nodes(prev, next); + node_tail(root) = next; + } else if (prev != root) { + node_next(prev) = null; + node_tail(root) = prev; + } else { + node_next(root) = null; + node_tail(root) = null; + } + tex_flush_node(left); + } + } + } else if (init_left && init_right ) { + tex_aux_add_kern_after(init_left, init_right, root); + node_tail(root) = node_next(root); + } +} + +halfword tex_handle_kerning(halfword head, halfword tail) +{ + halfword save_link = null; + if (tail) { + save_link = node_next(tail); + node_next(tail) = null; + node_tail(head) = tail; + tex_aux_do_handle_kerning(head, null, null); + tail = node_tail(head); + if (tex_valid_node(save_link)) { + /* no need for check */ + tex_try_couple_nodes(tail, save_link); + } + } else { + node_tail(head) = null; + tex_aux_do_handle_kerning(head, null, null); + } + return tail; +} + +/*tex The ligaturing and kerning \LUA\ interface: */ + +static halfword tex_aux_run_lua_ligkern_callback(lua_State *L, halfword head, halfword group, halfword direction, int callback_id) +{ + int top = 0; + if (lmt_callback_okay(L, callback_id, &top)) { + int i; + lmt_node_list_to_lua(L, head); + lmt_push_group_code(L, group); + lua_pushinteger(L, direction); + i = lmt_callback_call(L, 3, 1, top); + if (i) { + lmt_callback_error(L, top, i); + } else { + head = lmt_node_list_from_lua(L, -1); + lmt_callback_wrapup(L, top); + } + } + return head; +} + +halfword tex_handle_glyphrun(halfword head, halfword group, halfword direction) +{ + if (head) { + int callback_id = lmt_callback_defined(glyph_run_callback); + if (callback_id) { + return tex_aux_run_lua_ligkern_callback(lmt_lua_state.lua_instance, head, group, direction, callback_id); + } else { + callback_id = lmt_callback_defined(ligaturing_callback); + if (callback_id) { + head = tex_aux_run_lua_ligkern_callback(lmt_lua_state.lua_instance, head, group, direction, callback_id); + } else { + tex_handle_ligaturing(head, null); + } + callback_id = lmt_callback_defined(kerning_callback); + if (callback_id) { + head = tex_aux_run_lua_ligkern_callback(lmt_lua_state.lua_instance, head, group, direction, callback_id); + } else { + halfword nest = tex_new_node(nesting_node, unset_nesting_code); + tex_couple_nodes(nest, head); + tex_aux_do_handle_kerning(nest, null, null); + head = node_next(nest); + node_prev(head) = null; + node_next(nest) = null; + tex_flush_node(nest); + } + } + } + return head; +} + +/*tex + + When the user defines |\font\f|, say, \TEX\ assigns an internal number to the user's font |\f|. + Adding this number to |font_id_base| gives the |eqtb| location of a \quote {frozen} control + sequence that will always select the + font. + + The variable |a| in the following code indicates the global nature of the value to be set. It's + used in the |define| macro. Here we're never global. + + There's not much scanner code here because the other scanners are defined where they make most + sense. + +*/ + +void tex_set_cur_font(halfword g, halfword f) +{ + update_tex_font(g, f); +} + +/*tex This prints a scaled real, rounded to five digits. */ + +static char *tex_aux_scaled_to_string(scaled s) +{ + static char result[16]; + int k = 0; + /*tex The amount of allowable inaccuracy: */ + scaled delta; + if (s < 0) { + /*tex Only print the sign, if negative */ + result[k++] = '-'; + s = -s; + } + { + int l = 0; + char digs[8] = { 0 }; + int n = s / unity; + /*tex Process the integer part: */ + do { + digs[l++] = (char) (n % 10); + n = n / 10;; + } while (n > 0); + while (l > 0) { + result[k++] = (char) (digs[--l] + '0'); + } + } + result[k++] = '.'; + s = 10 * (s % unity) + 5; + delta = 10; + do { + if (delta > unity) { + /*tex Round the last digit: */ + s = s + 0100000 - 050000; + } + result[k++] = (char) ('0' + (s / unity)); + s = 10 * (s % unity); + delta = delta * 10; + } while (s > delta); + result[k] = 0; + return (char *) result; +} + +/*tex + + Because we do fonts in \LUA\ we can decide to drop this one and assume a definition using the + token scanner. It also avoids the filename (split) mess. + +*/ + +int tex_tex_def_font(int a) +{ + if (! lmt_fileio_state.job_name) { + /*tex Avoid confusing |texput| with the font name. */ + tex_open_log_file(); + } + tex_get_r_token(); + if (tex_define_permitted(cur_cs, a)) { + /*tex The user's font identifier. */ + halfword u = cur_cs; + /*tex This runs through existing fonts. */ + halfword f; + /*tex Stated 'at' size, or negative of scaled magnification. */ + scaled s = -1000; + char *fn; + /*tex Here |a| detemines if we define global or not. */ + if (is_global(a)) { + update_tex_font_global(u, null_font); + } else { + update_tex_font_local(u, null_font); + } + fn = tex_read_file_name(1, NULL, NULL); + /*tex Scan the font size specification. */ + lmt_fileio_state.name_in_progress = 1; + if (tex_scan_keyword("at")) { + /*tex Put the positive 'at' size into |s|. */ + s = tex_scan_dimen(0, 0, 0, 0, NULL); + if ((s <= 0) || (s >= 01000000000)) { + char msg[256]; + snprintf(msg, 255, + "Improper 'at' size (%spt), replaced by 10pt", + tex_aux_scaled_to_string(s) + ); + tex_handle_error( + normal_error_type, + msg, + "I can only handle fonts at positive sizes that are less than 2048pt, so I've\n" + "changed what you said to 10pt." ); + s = 10 * unity; + } + } else if (tex_scan_keyword("scaled")) { + s = tex_scan_int(0, NULL); + if ((s <= 0) || (s > 32768)) { + char msg[256]; + snprintf(msg, 255, + "Illegal magnification has been changed to 1000 (%d)", + (int) s + ); + tex_handle_error( + normal_error_type, + msg, + "The magnification ratio must be between 1 and 32768." + ); + s = -1000; + } else { + s = -s; + } + } + lmt_fileio_state.name_in_progress = 0; + f = tex_read_font_info(fn, s); + eq_value(u) = f; + lmt_memory_free(fn); + return 1; + } else { + return 0; + } +} + +/*tex + + When \TEX\ wants to typeset a character that doesn't exist, the character node is not created; + thus the output routine can assume that characters exist when it sees them. The following + procedure prints a warning message unless the user has suppressed it. + +*/ + +void tex_char_warning(halfword f, int c) +{ + if (tracing_lost_chars_par > 0) { + /*tex saved value of |tracing_online| */ + int old_setting = tracing_online_par; + /*tex index to current digit; we assume that $0\L n<16^{22}$ */ + if (tracing_lost_chars_par > 1) { + tracing_online_par = 1; + } + tex_begin_diagnostic(); + tex_print_format("[font: missing character, character %c (%U), font '%s']", c, c, font_name(f)); + tex_end_diagnostic(); + tracing_online_par = old_setting; + } +} + +/* Getters. */ + +scaled tex_char_width_from_font(halfword f, halfword c) +{ + return tex_aux_char_info(f, c)->width; +} + +scaled tex_char_height_from_font(halfword f, halfword c) +{ + return tex_aux_char_info(f, c)->height; +} + +scaled tex_char_depth_from_font(halfword f, halfword c) +{ + return tex_aux_char_info(f, c)->depth; +} + +scaled tex_char_total_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->height + ci->depth; +} + +scaled tex_char_italic_from_font(halfword f, halfword c) +{ + return tex_aux_char_info(f, c)->italic; +} + + +// scaled tex_char_options_from_font(halfword f, halfword c) +// { +// charinfo *ci = tex_aux_char_info(f, c); +// return ci->math ? ci->math->options : 0; +// } +// +// int tex_char_has_option_from_font(halfword f, halfword c, int option) +// { +// charinfo *ci = tex_aux_char_info(f, c); +// return ci->math ? math_font_option(ci->math->options, option) : 0; +// } + +scaledwhd tex_char_whd_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return (scaledwhd) { + .wd = ci->width, + .ht = ci->height, + .dp = ci->depth, + .ic = ci->italic + }; +} + +scaled tex_char_ef_from_font(halfword f, halfword c) +{ + return tex_aux_char_info(f, c)->expansion; +} + +scaled tex_char_lp_from_font(halfword f, halfword c) +{ + return tex_aux_char_info(f, c)->leftprotrusion; +} + +scaled tex_char_rp_from_font(halfword f, halfword c) +{ + return tex_aux_char_info(f, c)->rightprotrusion; +} + +halfword tex_char_has_tag_from_font(halfword f, halfword c, halfword tag) +{ + return (charinfo_tag(tex_aux_char_info(f, c)->tagrem) & tag) == tag; +} + +void tex_char_reset_tag_from_font(halfword f, halfword c, halfword tag) +{ + charinfo *ci = tex_aux_char_info(f, c); + // tag = charinfo_tag(ci->tagrem) & ~(tag | charinfo_tag(ci->tagrem)); + tag = charinfo_tag(ci->tagrem) & ~(tag); + ci->tagrem = charinfo_tagrem(tag,charinfo_rem(ci->tagrem)); + +} + +halfword tex_char_tag_from_font(halfword f, halfword c) +{ + return charinfo_tag(tex_aux_char_info(f, c)->tagrem); +} + +halfword tex_char_remainder_from_font(halfword f, halfword c) +{ + return charinfo_rem(tex_aux_char_info(f, c)->tagrem); +} + +halfword tex_char_vertical_italic_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->vertical_italic : INT_MIN; +} + +halfword tex_char_top_accent_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->top_accent : INT_MIN; +} + +halfword tex_char_top_anchor_from_font(halfword f, halfword c) +{ + scaled n = tex_char_top_accent_from_font(f, c); + return n == INT_MIN ? 0 : n; +} + +halfword tex_char_bot_accent_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->bottom_accent : INT_MIN; +} + +halfword tex_char_bot_anchor_from_font(halfword f, halfword c) +{ + scaled n = tex_char_bot_accent_from_font(f, c); + return n == INT_MIN ? 0 : n; +} + +halfword tex_char_flat_accent_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->flat_accent : INT_MIN; +} + +scaled tex_char_top_left_kern_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->top_left_kern : 0; +} + +scaled tex_char_top_right_kern_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->top_right_kern : 0; +} + +scaled tex_char_bottom_left_kern_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->bottom_left_kern : 0; +} + +scaled tex_char_bottom_right_kern_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->bottom_right_kern : 0; +} + +extinfo *tex_char_vertical_parts_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->vertical_parts : NULL; +} + +extinfo *tex_char_horizontal_parts_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->horizontal_parts : NULL; +} + +scaled tex_char_left_margin_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->left_margin : 0; +} + +scaled tex_char_right_margin_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->right_margin : 0; +} + +scaled tex_char_top_margin_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->top_margin : 0; +} + +scaled tex_char_bottom_margin_from_font(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci->math ? ci->math->bottom_margin : 0; +} + +/* Nodes */ + +scaled tex_char_width_from_glyph(halfword g) +{ + charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); + return tex_aux_glyph_x_scaled(g, ci->width); +} + +scaled tex_char_height_from_glyph(halfword g) +{ + charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); + return tex_aux_glyph_y_scaled(g, ci->height); +} + +scaled tex_char_depth_from_glyph(halfword g) +{ + charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); + return tex_aux_glyph_y_scaled(g, ci->depth); +} + +scaled tex_char_total_from_glyph(halfword g) +{ + charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); + return tex_aux_glyph_y_scaled(g, ci->height + ci->depth); +} + +scaled tex_char_italic_from_glyph(halfword g) +{ + charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); + return tex_aux_glyph_x_scaled(g, ci->italic); +} + +// halfword tex_char_options_from_glyph(halfword g) +// { +// charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); +// return ci->math ? ci->math->options : 0; +// } + +// int tex_char_has_option_from_glyph(halfword g, int t) +// { +// if (node_type(g) == glyph_node) { +// charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); +// return ci->math ? math_font_option(ci->math->options, t) : 0; +// } else { +// return 0; +// } +// } + +scaledwhd tex_char_whd_from_glyph(halfword g) +{ + charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); + return (scaledwhd) { + .wd = tex_aux_glyph_x_scaled(g, ci->width), + .ht = tex_aux_glyph_y_scaled(g, ci->height), + .dp = tex_aux_glyph_y_scaled(g, ci->depth), + .ic = tex_aux_glyph_x_scaled(g, ci->italic) + }; +} + +scaled tex_char_width_italic_from_glyph(halfword g) +{ + charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); + return tex_aux_glyph_x_scaled(g, ci->width + ci->italic); +} + +/* More */ + +scaled tex_calculated_char_width(halfword f, halfword c, halfword ex) +{ + scaled wd = tex_aux_char_info(f, c)->width; + return ex ? tex_round_xn_over_d(wd, 1000 + ex, 1000) : wd; +} + +scaled tex_calculated_glyph_width(halfword g, halfword ex) +{ + charinfo *ci = tex_aux_char_info(glyph_font(g), glyph_character(g)); + scaled wd = tex_aux_glyph_x_scaled(g, ci->width); + return ex ? tex_round_xn_over_d(wd, 1000 + ex, 1000) : wd; +} + +/* Checkers: */ + +int tex_has_ligature(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci ? ci->ligatures != NULL : 0; +} + +int tex_has_kern(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci ? ci->kerns != NULL : 0; +} + +int tex_char_has_math(halfword f, halfword c) +{ + charinfo *ci = tex_aux_char_info(f, c); + return ci ? ci->math != NULL : 0; +} + +/* Setters: */ + +void tex_set_lpcode_in_font(halfword f, halfword c, halfword i) +{ + charinfo *ci = tex_aux_char_info(f, c); + if (ci) { + ci->leftprotrusion = i; + } +} + +void tex_set_rpcode_in_font(halfword f, halfword c, halfword i) +{ + charinfo *ci = tex_aux_char_info(f, c); + if (ci) { + ci->rightprotrusion = i; + } +} + +void tex_set_efcode_in_font(halfword f, halfword c, halfword i) { + charinfo *ci = tex_aux_char_info(f, c); + if (ci) { + ci->expansion = i; + } +} + +void tex_set_font_name(halfword f, const char *s) +{ + if (font_name(f)) { + lmt_memory_free(font_name(f)); + } + set_font_name(f, s ? lmt_memory_strdup(s) : NULL); +} + +void tex_set_font_original(halfword f, const char *s) +{ + if (font_original(f)) { + lmt_memory_free(font_original(f)); + } + set_font_original(f, s ? lmt_memory_strdup(s) : NULL); +} + +scaled tex_get_math_font_scale(halfword f, halfword size) +{ + scaled scale = 1000; + switch (size) { + case 2: scale = lmt_font_state.fonts[f]->mathscales[2] ? lmt_font_state.fonts[f]->mathscales[2] : glyph_scriptscript_scale_par; break; + case 1: scale = lmt_font_state.fonts[f]->mathscales[1] ? lmt_font_state.fonts[f]->mathscales[1] : glyph_script_scale_par; break; + case 0: scale = lmt_font_state.fonts[f]->mathscales[0] ? lmt_font_state.fonts[f]->mathscales[0] : glyph_text_scale_par; break; + } + return scale ? scale : 1000; +} + +/*tex + Experiment. +*/ + +void tex_run_font_spec(void) +{ + update_tex_font_identifier(font_spec_identifier(cur_chr)); + if (font_spec_scale(cur_chr) != unused_scale_value) { + update_tex_glyph_scale(font_spec_scale(cur_chr)); + } + if (font_spec_x_scale(cur_chr) != unused_scale_value) { + update_tex_glyph_x_scale(font_spec_x_scale(cur_chr)); + } + if (font_spec_y_scale(cur_chr) != unused_scale_value) { + update_tex_glyph_y_scale(font_spec_y_scale(cur_chr)); + } +} + diff --git a/source/luametatex/source/tex/texfont.h b/source/luametatex/source/tex/texfont.h new file mode 100644 index 000000000..a13c6e13d --- /dev/null +++ b/source/luametatex/source/tex/texfont.h @@ -0,0 +1,667 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXFONT_H +# define LMT_TEXFONT_H + +# include "tex/textypes.h" + +/*tex + + In the \WEBC\ infrastructrure there is code that deals with endianness of the machine but in + \LUAMETATEX\ we don't need this. In \LUATEX\ sharing the format file was already dropped, simply + because we can also store \LUA\ bytecode in the format. In the other engines font data can end + up in the format file and that in turn then also can be endian dependent. But in \LUAMETATEX\ + we no longer stored font data, and that is yet another reason why there is no endian related + code here. + + The ligature and kern structures are for traditional \TEX\ fonts, thise that are handles by the + built in reference handlers. Although \OPENTYPE\ is more versatile, we should not forget that + for many (latin) scripts these so called base fonts are quite adequate and efficient. We could + of course implement base support in \LUA\ but although \LUAMETATEX\ can delegate a lot, we also + keep the reference implementation available: it is well documented, was for a long time the best + one could get and doesn't take that much code. So, here come the basic structures: + +*/ + +typedef struct ligatureinfo { + int type; + int ligature; + int adjacent; + /* alignment */ + int padding; +} ligatureinfo; + +typedef struct kerninfo { + int kern; + int adjacent; +} kerninfo; + +/*tex + + In \LUAMETATEX, at runtime, after a font is loaded via a callback, we only store the little + information that is needed for basic ligature building and kerning, math rendering (like + extensibles), and par building which includes protrusion and expansion. We don't need anything + related to the backend because outpout is delegated to \LUA. + + The most extensive data structures are those related to \OPENTYPE\ math. When passing a font we + can save memory by using the |hasmath| directive. In \LUAMETATEX\ we can then have a different + |struct| with 15 fields less than in \LUATEX\ which, combined with other savings, saves some 60 + bytes. The disadvantage is that accessors of those fields also need to act upon that flag, which + involves more testing. However, because in practice math font access is not that prominent so + the gain outweights this potential performance hit. For an average \CJK\ font with 5000 + characters we saves 300000 bytes. Because a complete Latin font with various features also can + have thousands of glyphs, it can save some memory there too. It's changes like this that give + \LUAMETATEX\ a much smaller memory footprint than its predecessor. + + The next record relates to math extensibles. It is good to realize that traditional \TEX\ fonts + are handled differently in the math subengine than \OPENTYPE\ math fonts. However, we use the + more extensive \OPENTYPE\ structure for both type of fonts. + +*/ + +typedef struct extinfo { + struct extinfo *next; + int glyph; + int start_overlap; + int end_overlap; + int advance; + int extender; + /* alignment */ + int padding; +} extinfo; + +// typedef enum math_font_options { +// math_font_ignore_italic_option = 0x01, +// } math_font_options; +// +// # define math_font_option(options,option) ((options & option) == option) + +typedef struct mathinfo { + scaled vertical_italic; + scaled top_accent; + scaled bottom_accent; + int smaller; + scaled scale; + int flat_accent; + int top_left_math_kerns; + int top_right_math_kerns; + int bottom_right_math_kerns; + int bottom_left_math_kerns; + extinfo *horizontal_parts; + extinfo *vertical_parts; + scaled *top_left_math_kern_array; + scaled *top_right_math_kern_array; + scaled *bottom_right_math_kern_array; + scaled *bottom_left_math_kern_array; + /* these are for specific (script) anchoring */ + scaled top_left_kern; + scaled bottom_left_kern; + scaled top_right_kern; + scaled bottom_right_kern; + scaled left_margin; + scaled right_margin; + scaled top_margin; + scaled bottom_margin; +} mathinfo; + +typedef struct charinfo { + /*tex + This is what \TEX\ uses when it calculates the dimensions needed for building boxes and + breaking paragraphs into lines. The italic correction is part of that as it has a primitive + that needs the value. + */ + scaled width; + scaled height; + scaled depth; + scaled italic; + /*tex + The next three variables relate to expansion and protrusion, properties introduced in the + \PDFTEX\ engine. Handling of protrusion and expansion is the only features that we inherit + from this important extension to traditional \TEX. + */ + scaled expansion; + scaled leftprotrusion; + scaled rightprotrusion; + /* halfword padding; */ /* when we pack |tag| and |remainder| we can safe 4 bytes */ + /*tex + These two are used in a \TFM\ file for signaling ligatures. They are also used for math + extensions in traditional \TEX\ fonts, so we just keep them. + */ + /* halfword tag; */ /* 2 bits is enough (flags) */ + /* halfword remainder; */ /* 21 bits is enough (unicode) */ + halfword tagrem; /* just an integer, less (arm) alignment hassle that way */ + /*tex + Traditional \TEX\ fonts use these two lists for ligature building and inter-character + kerning and these are now optional (via pointers). By also using an indirect structure for + math data we save quite a bit of memory when we have no math font. + */ + ligatureinfo *ligatures; + kerninfo *kerns; + mathinfo *math; +} charinfo; + +/*tex + We can just abuse the token setters and getters here. +*/ + +# define charinfo_tag token_cmd +# define charinfo_rem token_chr +# define charinfo_tagrem token_val + +/*tex + + For a font instance we only store the bits that are used by the engine itself. Of course more + data can (and normally will be) be kept at the \TEX\ cq.\ \LUA\ end. + + We could store a scale (/1000) and avoid copying a font but then we also need to multiply + width, height, etc. when queried (extra overhead). A bit tricky is then dealing with (virtual) + commands. It is not that big a deal in \CONTEXT\ so I might actually add this feature but only + very few documents use many font instances so in the end the gain is neglectable (we only save + some memory). Also, we then need to adapt the math processing quite a bit which is always kind + of tricky. + + Again, compared to \LUATEX\ there is less data stored here because we don't need to control the + backend. Of course in \CONTEXT\ we keep plenty of data at the \LUA\ end, but we did that already + anyway. + +*/ + +typedef struct texfont { + /*tex the range of (allocated) characters */ + int first_character; + int last_character; + /*tex the (sparse) character (glyph) array */ + sa_tree characters; + charinfo *chardata; + int chardata_count; + int chardata_size; + /*tex properties used in messages */ + int size; + int design_size; + char *name; + char *original; + /*tex for experimental new thingies */ + int compactmath; + /*tex default to false when MathConstants not seen */ + int oldmath; + /*tex this controls the engine */ + int mathcontrol; + int textcontrol; + /*tex expansion */ + int max_shrink; + int max_stretch; + int step; + /*tex special characters, see \TEX book */ + int hyphen_char; + int skew_char; + /*tex all parameters, although only some are used */ + int parameter_count; + scaled *parameter_base; + /* */ + int padding; + /*tex also special */ + charinfo *left_boundary; + charinfo *right_boundary; + /*tex all math parameters */ + scaled *math_parameter_base; + int math_parameter_count; + /* zero is alignment */ + int mathscales[3]; +} texfont; + +/*tex + + Instead of global variables we store some properties that are shared between the different components + in a dedicated struct. + +*/ + +typedef struct font_state_info { + texfont **fonts; + halfword adjust_stretch; + halfword adjust_shrink; + halfword adjust_step; + int padding; + memory_data font_data; +} font_state_info ; + +extern font_state_info lmt_font_state; + +# define font_size(a) lmt_font_state.fonts[a]->size +# define font_name(a) lmt_font_state.fonts[a]->name +# define font_original(a) lmt_font_state.fonts[a]->original +# define font_design_size(a) lmt_font_state.fonts[a]->design_size +# define font_first_character(a) lmt_font_state.fonts[a]->first_character +# define font_last_character(a) lmt_font_state.fonts[a]->last_character +/*define font_touched(a) font_state.fonts[a]->touched */ +# define font_oldmath(a) lmt_font_state.fonts[a]->oldmath +# define font_compactmath(a) lmt_font_state.fonts[a]->compactmath +# define font_mathcontrol(a) lmt_font_state.fonts[a]->mathcontrol +# define font_textcontrol(a) lmt_font_state.fonts[a]->textcontrol +# define font_hyphen_char(a) lmt_font_state.fonts[a]->hyphen_char +# define font_skew_char(a) lmt_font_state.fonts[a]->skew_char +# define font_max_shrink(a) (lmt_font_state.adjust_step > 0 ? lmt_font_state.adjust_shrink : lmt_font_state.fonts[a]->max_shrink) +# define font_max_stretch(a) (lmt_font_state.adjust_step > 0 ? lmt_font_state.adjust_stretch : lmt_font_state.fonts[a]->max_stretch) +# define font_step(a) (lmt_font_state.adjust_step > 0 ? lmt_font_state.adjust_step : lmt_font_state.fonts[a]->step) +# define font_mathscale(a,b) lmt_font_state.fonts[a]->mathscales[b] + +# define set_font_size(a,b) lmt_font_state.fonts[a]->size = b +# define set_font_name(a,b) lmt_font_state.fonts[a]->name = b +# define set_font_original(a,b) lmt_font_state.fonts[a]->original = b +# define set_font_design_size(a,b) lmt_font_state.fonts[a]->design_size = b +# define set_font_first_character(a,b) lmt_font_state.fonts[a]->first_character = b +# define set_font_last_character(a,b) lmt_font_state.fonts[a]->last_character = b +/*define set_font_touched(a,b) font_state.fonts[a]->touched = b */ +# define set_font_oldmath(a,b) lmt_font_state.fonts[a]->oldmath = b +# define set_font_compactmath(a,b) lmt_font_state.fonts[a]->compactmath = b +# define set_font_mathcontrol(a,b) lmt_font_state.fonts[a]->mathcontrol = b +# define set_font_textcontrol(a,b) lmt_font_state.fonts[a]->textcontrol = b +# define set_font_hyphen_char(a,b) lmt_font_state.fonts[a]->hyphen_char = b +# define set_font_skew_char(a,b) lmt_font_state.fonts[a]->skew_char = b +# define set_font_max_shrink(a,b) lmt_font_state.fonts[a]->max_shrink = b +# define set_font_max_stretch(a,b) lmt_font_state.fonts[a]->max_stretch = b +# define set_font_step(a,b) lmt_font_state.fonts[a]->step = b + +# define set_font_textsize(a,b) lmt_font_state.fonts[a]->mathscales[0] = b +# define set_font_scriptsize(a,b) lmt_font_state.fonts[a]->mathscales[1] = b +# define set_font_scriptscriptsize(a,b) lmt_font_state.fonts[a]->mathscales[2] = b + +/*tex + These are bound to a font. There might be a few more in the future. An example is collapsing + hyphens. One can do that using (in context speak) tlig feature but actually it is some very + \TEX\ thing, that happened to be implemented using ligatures. In \LUAMETATEX\ it's also a bit + special because, although it is not really dependent on a language, hyphen handling in \TEX\ + is very present in the hyphenator (also sequences of them). So, naturally it moved there. But + users who don't want it can disable it per font. +*/ + +typedef enum text_control_codes { + text_control_collapse_hyphens = 0x00001, +} text_control_codes; + +# define has_font_text_control(f,c) ((font_textcontrol(f) & c) == c) + +/*tex + + These are special codes that are used in the traditional ligature builder. In \OPENTYPE\ + fonts we don't see these. + +*/ + +typedef enum boundarychar_codes { + left_boundary_char = -1, + right_boundary_char = -2, + non_boundary_char = -3, +} boundarychar_codes; + +/*tex These are pointers, so: |NULL| */ + +# define font_left_boundary(a) lmt_font_state.fonts[a]->left_boundary +# define font_right_boundary(a) lmt_font_state.fonts[a]->right_boundary + +# define font_has_left_boundary(a) (font_left_boundary(a)) +# define font_has_right_boundary(a) (font_right_boundary(a)) + +# define set_font_left_boundary(a,b) { if (font_left_boundary(a)) { lmt_memory_free(font_left_boundary(a)); } font_left_boundary(a) = b; } +# define set_font_right_boundary(a,b) { if (font_right_boundary(a)) { lmt_memory_free(font_right_boundary(a)); } font_right_boundary(a) = b; } + +/*tex + + In traditional \TEX\ there are just over a handful of font specific parameters for text fonts + and some more in math fonts. Actually, these parameters were stored in a way that permitted + adding more at runtime, something that made no real sense, but can be abused for creeating + more dimensions than the 256 that traditional \TEX\ provides. + +*/ + +# define font_parameter_count(a) lmt_font_state.fonts[a]->parameter_count +# define font_parameter_base(a) lmt_font_state.fonts[a]->parameter_base +# define font_parameter(a,b) lmt_font_state.fonts[a]->parameter_base[b] + +# define font_math_parameter_count(a) lmt_font_state.fonts[a]->math_parameter_count +# define font_math_parameter_base(a) lmt_font_state.fonts[a]->math_parameter_base +# define font_math_parameter(a,b) lmt_font_state.fonts[a]->math_parameter_base[b] + +# define set_font_parameter_base(a,b) lmt_font_state.fonts[a]->parameter_base = b; +# define set_font_math_parameter_base(a,b) lmt_font_state.fonts[a]->math_parameter_base = b; + +/*tex + + These font parameters could be adapted at runtime but one should really wonder if that is such + a good idea nowadays. + + */ + +//define set_font_parameter(f,n,b) { if (font_parameter_count(f) < n) { tex_set_font_parameters(f, n); } font_parameter(f, n) = b; } +// # define set_font_math_parameter(f,n,b) { if (font_math_parameter_count(f) < n) { tex_set_font_math_parameters(f, n); } font_math_parameter(f, n) = b; } + +extern void tex_set_font_parameters (halfword f, int b); +extern void tex_set_font_math_parameters (halfword f, int b); +extern int tex_get_font_max_id (void); +extern int tex_get_font_max_id (void); + +extern halfword tex_checked_font_adjust ( + halfword adjust_spacing, + halfword adjust_spacing_step, + halfword adjust_spacing_shrink, + halfword adjust_spacing_stretch +); + +/*tex + + Font parameters are sometimes referred to as |slant(f)|, |space(f)|, etc. These numbers are + also the font dimen numbers. + +*/ + +typedef enum font_parameter_codes { + slant_code = 1, + space_code, + space_stretch_code, + space_shrink_code, + ex_height_code, + em_width_code, + extra_space_code, +} font_parameter_codes; + +extern scaled tex_get_font_slant (halfword f); +extern scaled tex_get_font_space (halfword f); +extern scaled tex_get_font_space_stretch (halfword f); +extern scaled tex_get_font_space_shrink (halfword f); +extern scaled tex_get_font_ex_height (halfword f); +extern scaled tex_get_font_em_width (halfword f); +extern scaled tex_get_font_extra_space (halfword f); +extern scaled tex_get_font_parameter (halfword f, halfword code); +extern void tex_set_font_parameter (halfword f, halfword code, scaled v); + +extern scaled tex_get_scaled_space (halfword f); +extern scaled tex_get_scaled_space_stretch (halfword f); +extern scaled tex_get_scaled_space_shrink (halfword f); +extern scaled tex_get_scaled_ex_height (halfword f); +extern scaled tex_get_scaled_em_width (halfword f); +extern scaled tex_get_scaled_extra_space (halfword f); +extern scaled tex_get_scaled_parameter (halfword f, halfword code); +extern void tex_set_scaled_parameter (halfword f, halfword code, scaled v); + +extern halfword tex_get_scaled_glue (halfword f); +extern halfword tex_get_scaled_parameter_glue (quarterword p, quarterword s); +extern halfword tex_get_parameter_glue (quarterword p, quarterword s); + +extern halfword tex_get_font_identifier (halfword fs); + +/*tex + + The \OPENTYPE\ math fonts have four edges and reference points for kerns. Here we go: + +*/ + +typedef enum font_math_kern_codes { + top_right_kern = 1, + bottom_right_kern, + bottom_left_kern, + top_left_kern, +} font_math_kern_codes; + +extern charinfo *tex_get_charinfo (halfword f, int c); +extern int tex_char_exists (halfword f, int c); +extern void tex_char_process (halfword f, int c); +extern int tex_math_char_exists (halfword f, int c, int size); +extern int tex_get_math_char (halfword f, int c, int size, scaled *scale); + +/*tex + + Here is a quick way to test if a glyph exists, when you are already certain the font |f| exists, + and that the |c| is a regular glyph id, not one of the two special boundary objects. Contrary + to traditional \TEX\ we store character information in a hash table instead of an array. Keep + in mind that we talk \UNICODE: plenty of characters in the code space, but less so in a font, + so we can best be sparse. + +*/ + +# define quick_char_exists(f,c) (sa_get_item_4(lmt_font_state.fonts[f]->characters,c).int_value) + +/*tex + These low level setters are not publis and used in helpers. They might become functions + when I feel the need. +*/ + +# define set_charinfo_width(ci,val) ci->width = val; +# define set_charinfo_height(ci,val) ci->height = val; +# define set_charinfo_depth(ci,val) ci->depth = val; +# define set_charinfo_italic(ci,val) ci->italic = val; +# define set_charinfo_expansion(ci,val) ci->expansion = val; +# define set_charinfo_leftprotrusion(ci,val) ci->leftprotrusion = val; +# define set_charinfo_rightprotrusion(ci,val) ci->rightprotrusion = val; + +# define set_charinfo_tag(ci,tag) ci->tagrem = charinfo_tagrem(charinfo_tag(ci->tagrem) | tag,charinfo_rem(ci->tagrem)); +# define set_charinfo_remainder(ci,rem) ci->tagrem = charinfo_tagrem(charinfo_tag(ci->tagrem),rem); + +# define has_charinfo_tag(ci,p) (charinfo_tag(ci->tagrem) & (p) == (p)) +# define get_charinfo_tag(ci) charinfo_tag(ci->tagrem) + +# define set_charinfo_ligatures(ci,val) { lmt_memory_free(ci->ligatures); ci->ligatures = val; } +# define set_charinfo_kerns(ci,val) { lmt_memory_free(ci->kerns); ci->kerns = val; } +# define set_charinfo_math(ci,val) { lmt_memory_free(ci->math); ci->math = val; } + +# define set_charinfo_top_left_math_kern_array(ci,val) if (ci->math) { lmt_memory_free(ci->math->top_left_math_kern_array); ci->math->top_left_math_kern_array = val; } +# define set_charinfo_top_right_math_kern_array(ci,val) if (ci->math) { lmt_memory_free(ci->math->top_right_math_kern_array); ci->math->top_left_math_kern_array = val; } +# define set_charinfo_bottom_right_math_kern_array(ci,val) if (ci->math) { lmt_memory_free(ci->math->bottom_right_math_kern_array); ci->math->top_left_math_kern_array = val; } +# define set_charinfo_bottom_left_math_kern_array(ci,val) if (ci->math) { lmt_memory_free(ci->math->bottom_left_math_kern_array); ci->math->top_left_math_kern_array = val; } + +//define set_charinfo_options(ci,val) if (ci->math) { ci->math->options = val; } + +# define set_ligature_item(f,b,c,d) { f.type = b; f.adjacent = c; f.ligature = d; } +# define set_kern_item(f,b,c) { f.adjacent = b; f.kern = c; } + +# define set_charinfo_left_margin(ci,val) if (ci->math) { ci->math->left_margin = val; } +# define set_charinfo_right_margin(ci,val) if (ci->math) { ci->math->right_margin = val; } +# define set_charinfo_top_margin(ci,val) if (ci->math) { ci->math->top_margin = val; } +# define set_charinfo_bottom_margin(ci,val) if (ci->math) { ci->math->bottom_margin = val; } + +# define set_charinfo_smaller(ci,val) if (ci->math) { ci->math->smaller = val; } +# define set_charinfo_vertical_italic(ci,val) if (ci->math) { ci->math->vertical_italic = val; } +# define set_charinfo_top_accent(ci,val) if (ci->math) { ci->math->top_accent = val; } +# define set_charinfo_bottom_accent(ci,val) if (ci->math) { ci->math->bottom_accent = val; } +# define set_charinfo_flat_accent(ci,val) if (ci->math) { ci->math->flat_accent = val; } + +# define set_charinfo_top_left_kern(ci,val) if (ci->math) { ci->math->top_left_kern = val; } +# define set_charinfo_top_right_kern(ci,val) if (ci->math) { ci->math->top_right_kern = val; } +# define set_charinfo_bottom_left_kern(ci,val) if (ci->math) { ci->math->bottom_left_kern = val; } +# define set_charinfo_bottom_right_kern(ci,val) if (ci->math) { ci->math->bottom_right_kern = val; } + +/*tex Setters: */ + +void tex_set_lpcode_in_font (halfword f, halfword c, halfword i); +void tex_set_rpcode_in_font (halfword f, halfword c, halfword i); +void tex_set_efcode_in_font (halfword f, halfword c, halfword i); + +extern void tex_set_charinfo_extensible (charinfo *ci, int top, int bottom, int middle, int extender); +extern void tex_add_charinfo_math_kern (charinfo *ci, int type, scaled ht, scaled krn); +extern int tex_get_charinfo_math_kerns (charinfo *ci, int id); +extern void tex_set_charinfo_horizontal_parts (charinfo *ci, extinfo *ext); +extern void tex_set_charinfo_vertical_parts (charinfo *ci, extinfo *ext); +extern void tex_add_charinfo_vertical_part (charinfo *ci, extinfo *ext); +extern void tex_add_charinfo_horizontal_part (charinfo *ci, extinfo *ext); +extern extinfo *tex_new_charinfo_part (int glyph, int startconnect, int endconnect, int advance, int repeater); + +/*tex Checkers: */ + +int tex_char_has_math (halfword f, halfword c); +int tex_has_ligature (halfword f, halfword c); +int tex_has_kern (halfword f, halfword c); + +/*tex Getters: */ + +# define MATH_KERN_NOT_FOUND 0x7FFFFFFF + +extern scaled tex_char_width_from_font (halfword f, halfword c); /* math + maincontrol */ +extern scaled tex_char_height_from_font (halfword f, halfword c); /* math + maincontrol */ +extern scaled tex_char_depth_from_font (halfword f, halfword c); /* math + maincontrol */ +extern scaled tex_char_total_from_font (halfword f, halfword c); /* math */ +extern scaled tex_char_italic_from_font (halfword f, halfword c); /* math + maincontrol */ +// halfword tex_char_options_from_font (halfword f, halfword c); +extern scaled tex_char_ef_from_font (halfword f, halfword c); /* packaging + maincontrol */ +extern scaled tex_char_lp_from_font (halfword f, halfword c); /* packaging + maincontrol */ +extern scaled tex_char_rp_from_font (halfword f, halfword c); /* packaging + maincontrol */ +extern halfword tex_char_tag_from_font (halfword f, halfword c); /* math */ +extern halfword tex_char_remainder_from_font (halfword f, halfword c); /* math */ +extern halfword tex_char_has_tag_from_font (halfword f, halfword c, halfword tag); +extern void tex_char_reset_tag_from_font (halfword f, halfword c, halfword tag); +// int tex_char_has_option_from_font (halfword g, halfword c, int option); + +extern scaled tex_char_top_left_kern_from_font (halfword f, halfword c); /* math */ +extern scaled tex_char_top_right_kern_from_font (halfword f, halfword c); /* math */ +extern scaled tex_char_bottom_left_kern_from_font (halfword f, halfword c); /* math */ +extern scaled tex_char_bottom_right_kern_from_font (halfword f, halfword c); /* math */ + +extern scaledwhd tex_char_whd_from_font (halfword f, halfword c); /* math + maincontrol */ + +extern scaled tex_font_x_scaled (scaled v); +extern scaled tex_font_y_scaled (scaled v); + +extern scaled tex_char_width_from_glyph (halfword g); /* x/y scaled */ +extern scaled tex_char_height_from_glyph (halfword g); /* x/y scaled */ +extern scaled tex_char_depth_from_glyph (halfword g); /* x/y scaled */ +extern scaled tex_char_total_from_glyph (halfword g); /* x/y scaled */ +extern scaled tex_char_italic_from_glyph (halfword g); /* x/y scaled */ +// int tex_char_options_from_glyph (halfword g); +extern scaled tex_char_width_italic_from_glyph (halfword g); /* x/y scaled */ +// int tex_char_has_option_from_glyph (halfword g, int option); + +extern scaledwhd tex_char_whd_from_glyph (halfword g); /* x/y scaled */ + +extern halfword tex_char_vertical_italic_from_font (halfword f, halfword c); +extern halfword tex_char_top_accent_from_font (halfword f, halfword c); +extern halfword tex_char_bot_accent_from_font (halfword f, halfword c); +extern halfword tex_char_flat_accent_from_font (halfword f, halfword c); + +extern halfword tex_char_top_anchor_from_font (halfword f, halfword c); +extern halfword tex_char_bot_anchor_from_font (halfword f, halfword c); + +extern scaled tex_char_left_margin_from_font (halfword f, halfword c); +extern scaled tex_char_right_margin_from_font (halfword f, halfword c); +extern scaled tex_char_top_margin_from_font (halfword f, halfword c); +extern scaled tex_char_bottom_margin_from_font (halfword f, halfword c); + +extern extinfo *tex_char_vertical_parts_from_font (halfword f, halfword c); +extern extinfo *tex_char_horizontal_parts_from_font (halfword f, halfword c); + +/* scaled tex_math_kern_at (halfword f, int c, int side, int v); */ +/* scaled tex_find_math_kern (halfword l_f, int l_c, halfword r_f, int r_c, int cmd, scaled shift); */ + +extern int tex_valid_kern (halfword left, halfword right); /* returns kern */ +extern int tex_valid_ligature (halfword left, halfword right, int *slot); /* returns type */ + +extern scaled tex_calculated_char_width (halfword f, halfword c, halfword ex); +extern scaled tex_calculated_glyph_width (halfword g, halfword ex); /* scale */ + +/* + Kerns: the |otherchar| value signals \quote {stop}. These are not really public and only + to be used in the helpers. But we keep them as reference. +*/ + +# define end_kern 0x7FFFFF + +# define charinfo_kern(b,c) b->kerns[c] + +# define kern_char(b) (b).adjacent +# define kern_kern(b) (b).kern +# define kern_end(b) ((b).adjacent == end_kern) +# define kern_disabled(b) ((b).adjacent > end_kern) + +/* + Ligatures: the |otherchar| value signals \quote {stop}. These are not really public and only + to be used in the helpers. But we keep them as reference. +*/ + +# define end_of_ligature_code 0x7FFFFF + +# define charinfo_ligature(b,c) b->ligatures[c] + +# define ligature_is_valid(a) ((a).type != 0) +# define ligature_type(a) ((a).type >> 1) +# define ligature_char(a) (a).adjacent +# define ligature_replacement(a) (a).ligature +# define ligature_end(a) ((a).adjacent == end_of_ligature_code) +# define ligature_disabled(a) ((a).adjacent > end_of_ligature_code) + +/* Remainders and related flags: */ + +typedef enum math_extension_modes { + math_extension_normal, + math_extension_repeat, +} math_extension_modes; + +/* Expansion */ + +typedef enum adjust_spacing_modes { + adjust_spacing_off, + adjust_spacing_unused, + adjust_spacing_full, + adjust_spacing_font, +} adjust_spacing_modes; + +typedef enum protrude_chars_modes { + protrude_chars_off, + protrude_chars_unused, + protrude_chars_normal, + protrude_chars_advanced, +} protrude_chars_modes; + +/* +typedef enum math_extension_locations { + extension_top, + extension_bottom, + extension_middle, + extension_repeat, +} math_extension_locations; +*/ + +/* Tags: */ + +typedef enum tag_codes { + no_tag = 0x00, /*tex vanilla character */ + ligature_tag = 0x01, /*tex character has a ligature/kerning program */ + list_tag = 0x02, /*tex character has a successor in a charlist */ + extension_tag = 0x04, /*tex character is extensible */ + callback_tag = 0x08, + extend_last_tag = 0x10, +} tag_codes; + +extern halfword tex_checked_font (halfword f); +extern int tex_is_valid_font (halfword f); +extern int tex_raw_get_kern (halfword f, int lc, int rc); +extern int tex_get_kern (halfword f, int lc, int rc); +extern ligatureinfo tex_get_ligature (halfword f, int lc, int rc); +extern int tex_new_font (void); +extern int tex_new_font_id (void); +extern void tex_font_malloc_charinfo (halfword f, int num); +extern void tex_char_malloc_mathinfo (charinfo * ci); +extern void tex_dump_font_data (dumpstream f); +extern void tex_undump_font_data (dumpstream f); +extern void tex_create_null_font (void); +extern void tex_delete_font (int id); +extern int tex_read_font_info (char *cnom, scaled s); +extern int tex_fix_expand_value (halfword f, int e); + +extern halfword tex_handle_glyphrun (halfword head, halfword group, halfword direction); +extern halfword tex_handle_ligaturing (halfword head, halfword tail); +extern halfword tex_handle_kerning (halfword head, halfword tail); + +extern void tex_set_cur_font (halfword g, halfword f); +extern int tex_tex_def_font (int a); + +extern void tex_char_warning (halfword f, int c); + +extern void tex_initialize_fonts (void); + +extern void tex_set_font_name (halfword f, const char *s); +extern void tex_set_font_original (halfword f, const char *s); + +extern scaled tex_get_math_font_scale (halfword f, halfword size); + +extern void tex_run_font_spec (void); + +# endif diff --git a/source/luametatex/source/tex/texinputstack.c b/source/luametatex/source/tex/texinputstack.c new file mode 100644 index 000000000..9823fe137 --- /dev/null +++ b/source/luametatex/source/tex/texinputstack.c @@ -0,0 +1,1159 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +input_state_info lmt_input_state = { + .input_stack = NULL, + .input_stack_data = { + .minimum = min_stack_size, + .maximum = max_stack_size, + .size = siz_stack_size, + .step = stp_stack_size, + .allocated = 0, + .itemsize = sizeof(in_state_record), + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .in_stack = NULL, + .in_stack_data = { + .minimum = min_in_open, + .maximum = max_in_open, + .size = siz_in_open, + .step = stp_in_open, + .allocated = 0, + .itemsize = sizeof(input_stack_record), + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .parameter_stack = NULL, + .parameter_stack_data = { + .minimum = min_parameter_size, + .maximum = max_parameter_size, + .size = siz_parameter_size, + .step = stp_parameter_size, + .allocated = 0, + .itemsize = sizeof(halfword), + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .cur_input = { 0 }, + .input_line = 0, + .scanner_status = 0, + .def_ref = 0, + .align_state = 0, + .base_ptr = 0, + .warning_index = 0, + .open_files = 0, + .padding = 0, +} ; + +input_file_state_info input_file_state = { + .forced_file = 0, + .forced_line = 0, + .mode = 0, + .line = 0, +}; + +#define reserved_input_stack_slots 2 +#define reserved_in_stack_slots 2 +#define reserved_param_stack_slots 10 /*tex We play safe and always keep 10 in reserve (we have 9 max anyway). */ + +void tex_initialize_input_state(void) +{ + { + int size = lmt_input_state.input_stack_data.minimum; + lmt_input_state.input_stack = aux_allocate_clear_array(sizeof(in_state_record), size, reserved_input_stack_slots); + if (lmt_input_state.input_stack) { + lmt_input_state.input_stack_data.allocated = size; + } else { + tex_overflow_error("input", size); + } + } + { + int size = lmt_input_state.in_stack_data.minimum; + lmt_input_state.in_stack = aux_allocate_clear_array(sizeof(input_stack_record), size, reserved_in_stack_slots); + if (lmt_input_state.in_stack) { + lmt_input_state.in_stack_data.allocated = size; + } else { + tex_overflow_error("file", size); + } + } + { + int size = lmt_input_state.parameter_stack_data.minimum; + lmt_input_state.parameter_stack = aux_allocate_clear_array(sizeof(halfword), size, reserved_param_stack_slots); + if (lmt_input_state.parameter_stack) { + lmt_input_state.parameter_stack_data.allocated = size; + } else { + tex_overflow_error("parameter", size); + } + } +} + +static int tex_aux_room_on_input_stack(void) /* quite similar to save_stack checker so maybe share */ +{ + int top = lmt_input_state.input_stack_data.ptr; + if (top > lmt_input_state.input_stack_data.top) { + lmt_input_state.input_stack_data.top = top; + if (top > lmt_input_state.input_stack_data.allocated) { + in_state_record *tmp = NULL; + top = lmt_input_state.input_stack_data.allocated + lmt_input_state.input_stack_data.step; + if (top > lmt_input_state.input_stack_data.size) { + top = lmt_input_state.input_stack_data.size; + } + if (top > lmt_input_state.input_stack_data.allocated) { + lmt_input_state.input_stack_data.allocated = top; + tmp = aux_reallocate_array(lmt_input_state.input_stack, sizeof(in_state_record), top, reserved_input_stack_slots); + lmt_input_state.input_stack = tmp; + } + lmt_run_memory_callback("input", tmp ? 1 : 0); + if (! tmp) { + tex_overflow_error("input", top); + return 0; + } + } + } + return 1; +} + +static int tex_aux_room_on_in_stack(void) /* quite similar to save_stack checker so maybe share */ +{ + int top = lmt_input_state.in_stack_data.ptr; + if (top > lmt_input_state.in_stack_data.top) { + lmt_input_state.in_stack_data.top = top; + if (top > lmt_input_state.in_stack_data.allocated) { + input_stack_record *tmp = NULL; + top = lmt_input_state.in_stack_data.allocated + lmt_input_state.in_stack_data.step; + if (top > lmt_input_state.in_stack_data.size) { + top = lmt_input_state.in_stack_data.size; + } + if (top > lmt_input_state.in_stack_data.allocated) { + lmt_input_state.in_stack_data.allocated = top; + tmp = aux_reallocate_array(lmt_input_state.in_stack, sizeof(input_stack_record), top, reserved_in_stack_slots); + lmt_input_state.in_stack = tmp; + } + lmt_run_memory_callback("file", tmp ? 1 : 0); + if (! tmp) { + tex_overflow_error("file", top); + return 0; + } + } + } + return 1; +} + +static int tex_aux_room_on_param_stack(void) /* quite similar to save_stack checker so maybe share */ +{ + int top = lmt_input_state.parameter_stack_data.ptr; + if (top > lmt_input_state.parameter_stack_data.top) { + lmt_input_state.parameter_stack_data.top = top; + if (top > lmt_input_state.parameter_stack_data.allocated) { + halfword *tmp = NULL; + top = lmt_input_state.parameter_stack_data.allocated + lmt_input_state.parameter_stack_data.step; + if (top > lmt_input_state.parameter_stack_data.size) { + top = lmt_input_state.parameter_stack_data.size; + } + if (top > lmt_input_state.parameter_stack_data.allocated) { + lmt_input_state.parameter_stack_data.allocated = top; + tmp = aux_reallocate_array(lmt_input_state.parameter_stack, sizeof(halfword), top, reserved_param_stack_slots); + lmt_input_state.parameter_stack = tmp; + } + lmt_run_memory_callback("parameter", tmp ? 1 : 0); + if (! tmp) { + tex_overflow_error("parameter", top); + return 0; + } + } + } + return 1; +} + +void tex_copy_pstack_to_param_stack(halfword *pstack, int n) +{ + if (tex_aux_room_on_param_stack()) { + memcpy(&lmt_input_state.parameter_stack[lmt_input_state.parameter_stack_data.ptr], pstack, n * sizeof(halfword)); + lmt_input_state.parameter_stack_data.ptr += n; + } +} + +/*tex + + As elsewhere we keep variables that belong together in a structure: |input_stack|, the first + unused location of |input_stack| being |input_ptr|, the largest value of |input_ptr| when + pushing |max_input_stack|, the the \quote {top} input state|cur_input|, the number of lines in + the buffer, less one, |in_open|, the number of open text files |open_files| (in regular \TEX\ + called |open_parens| because it relates to the way files are reported), the |input_file| and + the current line number in the current source file |line|. Furthermore some stacks: + |line_stack|. |source_filename_stack| and |full_source_filename_stack|. The |scanner_status| + tells if we can a end a subfile now. There is an obscure identifier relevant to non-|normal| + scanner status |warning_index|. Then there is the often used reference count pointer of token + list being defined: |def_ref|. + + Here is a procedure that uses |scanner_status| to print a warning message when a subfile has + ended, and at certain other crucial times. Actually it is only called when we run out of + token memory. Because memory errors can be of any kind, we normall will not use the \TEX\ + error handler (but we do have a callback). + + Similar code is is us in |texerrors.c| for use with the error callback. Maybe some day that + will be default. + +*/ + +void tex_show_validity(void) +{ + halfword p = null; + switch (lmt_input_state.scanner_status) { + case scanner_is_defining: + p = lmt_input_state.def_ref; + break; + case scanner_is_matching: + case scanner_is_tolerant: + p = tex_expand_match_token_head(); + break; + case scanner_is_aligning: + p = tex_alignment_hold_token_head(); + break; + case scanner_is_absorbing: + p = lmt_input_state.def_ref; + break; + } + if (p) { + tex_print_ln(); + tex_token_show(p, default_token_show_max > lmt_error_state.line_limits.size - 10 ? lmt_error_state.line_limits.size - 10 : default_token_show_max); + tex_print_ln(); + } +} + +void tex_show_runaway(void) +{ + if (lmt_input_state.scanner_status > scanner_is_skipping) { + tex_print_nlp(); + switch (lmt_input_state.scanner_status) { + case scanner_is_defining: + tex_print_str("We ran into troubles when scanning a definition."); + break; + case scanner_is_matching: + tex_print_str("We ran into troubles scanning an argument."); + break; + case scanner_is_tolerant: + return; + case scanner_is_aligning: + tex_print_str("We ran into troubles scanning an alignment preamle."); + break; + case scanner_is_absorbing: + tex_print_str("We ran into troubles absorbing something."); + break; + default: + return; + } + tex_print_nlp(); + tex_show_validity(); + } +} + +/*tex + + The |param_stack| is an auxiliary array used to hold pointers to the token lists for parameters + at the current level and subsidiary levels of input. This stack is maintained with convention + (2), and it grows at a different rate from the others. + + So, the token list pointers for parameters is |param_stack|, the first unused entry in + |param_stack| is |param_ptr| which is in the range |0 .. param_size + 9|. + + The input routines must also interact with the processing of |\halign| and |\valign|, since the + appearance of tab marks and |\cr| in certain places is supposed to trigger the beginning of + special |v_j| template text in the scanner. This magic is accomplished by an |align_state| + variable that is increased by~1 when a |\char'173| is scanned and decreased by~1 when a |\char + '175| is scanned. The |align_state| is nonzero during the $u_j$ template, after which it is set + to zero; the |v_j| template begins when a tab mark or |\cr| occurs at a time that |align_state + = 0|. + + Thus, the \quote {current input state} can be very complicated indeed; there can be many levels + and each level can arise in a variety of ways. The |show_context| procedure, which is used by + \TEX's error-reporting routine to print out the current input state on all levels down to the + most recent line of characters from an input file, illustrates most of these conventions. The + global variable |base_ptr| contains the lowest level that was displayed by this procedure. + + The status at each level is indicated by printing two lines, where the first line indicates + what was read so far and the second line shows what remains to be read. The context is cropped, + if necessary, so that the first line contains at most |half_error_line| characters, and the + second contains at most |error_line|. Non-current input levels whose |token_type| is |backed_up| + are shown only if they have not been fully read. + + When applicable, print the location of the current line. This routine should be changed, if + necessary, to give the best possible indication of where the current line resides in the input + file. For example, on some systems it is best to print both a page and line number. + + Because we also have \LUA\ input en output and because error messages and contexts can go + through \LUA, reporting is a bit different in \LUAMETATEX. + +*/ + +static void tex_aux_print_indent(void) +{ + for (int q = 1; q <= lmt_error_state.context_indent; q++) { + tex_print_char(' '); + } +} + +static void tex_aux_print_current_input_state(void) +{ + int macro = 0; + tex_print_str("<"); + if (lmt_input_state.cur_input.state == token_list_state) { + switch (lmt_input_state.cur_input.token_type) { + case parameter_text: + tex_print_str("argument"); + break; + case template_pre_text: + tex_print_str("templatepre"); + break; + case template_post_text: + tex_print_str("templatepost"); + break; + case backed_up_text: + tex_print_str(lmt_input_state.cur_input.loc ? "to be read again" : "recently read"); + break; + case inserted_text: + tex_print_str("inserted text"); + break; + case macro_text: + tex_print_str("macro"); + macro = lmt_input_state.cur_input.name; + break; + case output_text: + tex_print_str("output"); + break; + case every_par_text: + tex_print_str("everypar"); + break; + case every_math_text: + tex_print_str("everymath"); + break; + case every_display_text: + tex_print_str("everydisplay"); + break; + case every_hbox_text: + tex_print_str("everyhbox"); + break; + case every_vbox_text: + tex_print_str("everyvbox"); + break; + case every_math_atom_text: + tex_print_str("everymathatom"); + break; + case every_job_text: + tex_print_str("everyjob"); + break; + case every_cr_text: + tex_print_str("everycr"); + break; + case every_tab_text: + tex_print_str("everytab"); + break; + case end_of_group_text: + tex_print_str("endofgroup"); + break; + case mark_text: + tex_print_str("mark"); + break; + case loop_text: + tex_print_str("loop"); + break; + case every_eof_text: + tex_print_str("everyeof"); + break; + case every_before_par_text: + tex_print_str("everybeforepar"); + break; + case end_paragraph_text: + tex_print_str("endpar"); + break; + case write_text: + tex_print_str("write"); + break; + case local_text: + tex_print_str("local"); + break; + case local_loop_text: + tex_print_str("localloop"); + break; + default: + tex_print_str("unknown"); + break; + } + } else { + switch (lmt_input_state.cur_input.name) { + case io_initial_input_code: + tex_print_str("initial"); + break; + case io_lua_input_code: + tex_print_str("lua output"); + break; + case io_token_input_code: + case io_token_eof_input_code: + tex_print_str("scantokens"); + break; + case io_tex_macro_code: + case io_file_input_code: + default: + { + /* Todo : figure out what the weird line is when we have a premature file end. */ + tex_print_str("line "); + tex_print_int(lmt_input_state.cur_input.index); + tex_print_char('.'); + tex_print_int(lmt_input_state.cur_input.index == lmt_input_state.in_stack_data.ptr ? lmt_input_state.input_line : lmt_input_state.in_stack[lmt_input_state.cur_input.index + 1].line); + } + break; + } + } + tex_print_str("> "); + if (macro) { + tex_print_cs_checked(macro); + } +} + +/*tex + + Here it is necessary to explain a little trick. We don't want to store a long string that + corresponds to a token list, because that string might take up lots of memory; and we are + printing during a time when an error message is being given, so we dare not do anything that + might overflow one of \TEX's tables. So \quote {pseudoprinting} is the answer: We enter a mode + of printing that stores characters into a buffer of length |error_line|, where character $k + + 1$ is placed into |trick_buf [k mod error_line]| if |k < trick_count|, otherwise character |k| + is dropped. Initially we set |tally := 0| and |trick_count := 1000000|; then when we reach the + point where transition from line 1 to line 2 should occur, we set |first_count := tally| and + |trick_count := tmax > (error_line, tally + 1 + error_line - half_error_line)|. At the end + of the pseudoprinting, the values of |first_count|, |tally|, and |trick_count| give us all the + information we need to print the two lines, and all of the necessary text is in |trick_buf|. + + Namely, let |l| be the length of the descriptive information that appears on the first line. + The length of the context information gathered for that line is |k = first_count|, and the + length of the context information gathered for line~2 is $m=\min(|tally|, |trick_count|) - k$. + If |l + k <= h|, where |h = half_error_line|, we print |trick_buf[0 .. k-1]| after the + descriptive information on line~1, and set |n := l + k|; here |n| is the length of line~1. If + |l + k > h|, some cropping is necessary, so we set |n := h| and print |...| followed by + |trick_buf[(l + k - h + 3) .. k - 1]| where subscripts of |trick_buf| are circular modulo + |error_line|. The second line consists of |n|~spaces followed by |trick_buf[k .. (k + m - 1)]|, + unless |n + m > error_line|; in the latter case, further cropping is done. This is easier to + program than to explain. + + The following code sets up the print routines so that they will gather the desired information. + +*/ + +void tex_set_trick_count(void) +{ + lmt_print_state.first_count = lmt_print_state.tally; + lmt_print_state.trick_count = lmt_print_state.tally + 1 + lmt_error_state.line_limits.size - lmt_error_state.half_line_limits.size; + if (lmt_print_state.trick_count < lmt_error_state.line_limits.size) { + lmt_print_state.trick_count = lmt_error_state.line_limits.size; + } +} + +/*tex + + We don't care too much if we stay a bit too much below the max error_line even if we have more + room on the line. If length is really an issue then any length is. After all one can set the + length larger. + +*/ + +static void tex_aux_print_valid_utf8(int q) +{ + int l = lmt_error_state.line_limits.size; + int c = (int) lmt_print_state.trick_buffer[q % l]; + if (c < 128) { + tex_print_char(c); + } else if (c < 194) { + /* invalid */ + } else if (c < 224) { + tex_print_char(c); + tex_print_char(lmt_print_state.trick_buffer[(q + 1) % l]); + } else if (c < 240) { + tex_print_char(c); + tex_print_char(lmt_print_state.trick_buffer[(q + 1) % l]); + tex_print_char(lmt_print_state.trick_buffer[(q + 2) % l]); + } else if (c < 245) { + tex_print_char(c); + tex_print_char(lmt_print_state.trick_buffer[(q + 1) % l]); + tex_print_char(lmt_print_state.trick_buffer[(q + 2) % l]); + tex_print_char(lmt_print_state.trick_buffer[(q + 3) % l]); + } else { + /*tex Invalid character! */ + } +} + +void tex_show_context(void) +{ + int context_lines = -1; /*tex Number of contexts shown so far, less one: */ + int bottom_line = 0; /*tex Have we reached the final context to be shown? */ + lmt_input_state.base_ptr = lmt_input_state.input_stack_data.ptr; + lmt_input_state.input_stack[lmt_input_state.base_ptr] = lmt_input_state.cur_input; + while (1) { + /*tex Enter into the context. */ + lmt_input_state.cur_input = lmt_input_state.input_stack[lmt_input_state.base_ptr]; + if ((lmt_input_state.cur_input.state != token_list_state) && (io_file_input(lmt_input_state.cur_input.name) || (lmt_input_state.base_ptr == 0))) { + bottom_line = 1; + } + if ((lmt_input_state.base_ptr == lmt_input_state.input_stack_data.ptr) || bottom_line || (context_lines < error_context_lines_par)) { + /*tex Display the current context. */ + if ((lmt_input_state.base_ptr == lmt_input_state.input_stack_data.ptr) || (lmt_input_state.cur_input.state != token_list_state) || (lmt_input_state.cur_input.token_type != backed_up_text) || (lmt_input_state.cur_input.loc)) { + /*tex + We omit backed-up token lists that have already been read. Get ready to count + characters. We start pseudo printing. + + This is complex code. When we display a context, we loop over context lines, but + actually we're talking of two lines: the discriptive line and the token list or + something from the buffer. Then there is that trick buffer stuff. In order to + get a better picture I expanded some variable names. Also, the length of the + input state line never got registered as there was no pseudo printing used. + + Because in \LUAMETATEX\ the content can come from \LUA\ we display the state + somewhat differently: we also show the input level for line numbers and we tag + for instance a macro, just for consistency. The contexts are separated by + newlines. + */ + int skip = 0; + tex_print_nlp(); + tex_aux_print_current_input_state(); + /* + The |pseudo_selector_code| selector value is only set in this context. It makes + sure that we end up at the place where the problem happens. + */ + { + int saved_selector = lmt_print_state.selector; + lmt_print_state.tally = 0; + lmt_print_state.selector = pseudo_selector_code; + lmt_print_state.trick_count = 1000000; + if (lmt_input_state.cur_input.state == token_list_state) { + halfword head = lmt_input_state.cur_input.token_type < macro_text ? lmt_input_state.cur_input.start : token_link(lmt_input_state.cur_input.start); + tex_show_token_list(head, lmt_input_state.cur_input.loc, default_token_show_max, 0); + } else if (lmt_input_state.cur_input.name == io_lua_input_code) { + skip = 1; + } else { + /*tex Before we pseudo print the line we determine the effective end. */ + int j = lmt_input_state.cur_input.limit; + if (lmt_fileio_state.io_buffer[lmt_input_state.cur_input.limit] != end_line_char_par) { + ++j; + } + if (j > 0) { + for (int i = lmt_input_state.cur_input.start; i <= j - 1; i++) { + if (i == lmt_input_state.cur_input.loc) { + tex_set_trick_count(); + } + tex_print_char(lmt_fileio_state.io_buffer[i]); + } + } + } + lmt_print_state.selector = saved_selector; + } + /*tex Print two lines using the tricky pseudoprinted information. */ + if (! skip) { + int p; /*tex Starting or ending place in |trick_buf|. */ + int m; /*tex Context information gathered for line 2. */ + int n; /*tex Length of line 1. */ + tex_print_nlp(); + tex_aux_print_indent(); + if (lmt_print_state.trick_count == 1000000) { + tex_set_trick_count(); + } + /*tex The |set_trick_count| must be performed. */ + if (lmt_print_state.tally < lmt_print_state.trick_count) { + m = lmt_print_state.tally - lmt_print_state.first_count; + } else { + m = lmt_print_state.trick_count - lmt_print_state.first_count; + } + if (lmt_print_state.first_count <= lmt_error_state.half_line_limits.size) { + p = 0; + n = lmt_print_state.first_count; + } else { + tex_print_str("..."); + p = lmt_print_state.first_count - lmt_error_state.half_line_limits.size + 3; + n = lmt_error_state.half_line_limits.size; + } + for (int q = p; q <= lmt_print_state.first_count - 1; q++) { + tex_aux_print_valid_utf8(q); + } + /*tex + Print |n| spaces to begin line 2. Instead of |n| we use a fixed value of + |error_context_indent|. + */ + if (m + n > lmt_error_state.line_limits.size) { + p = lmt_print_state.first_count + (lmt_error_state.line_limits.size - n - 3); + } else { + p = lmt_print_state.first_count + m; + } + if (lmt_print_state.first_count <= p - 1) { + tex_print_nlp(); + tex_aux_print_indent(); + for (int q = lmt_print_state.first_count; q <= p - 1; q++) { + tex_aux_print_valid_utf8(q); + } + if (m + n > lmt_error_state.line_limits.size) { + tex_print_str(" ..."); + } + } + } + ++context_lines; + } + } else if (context_lines == error_context_lines_par) { + tex_print_nlp(); + tex_print_str(" ..."); + tex_print_nlp(); + ++context_lines; + /*tex Omitted if |error_context_lines_par < 0|. */ + } + if (bottom_line) { + break; + } else { + --lmt_input_state.base_ptr; + } + } + /*tex Restore the original state. */ + lmt_input_state.cur_input = lmt_input_state.input_stack[lmt_input_state.input_stack_data.ptr]; + tex_print_ln(); + tex_print_nlp(); +} + +/*tex + + The following subroutines change the input status in commonly needed ways. First comes + |push_input|, which stores the current state and creates a new level (having, initially, the + same properties as the old). Enter a new input level, save the old: + +*/ + +inline static void tex_aux_push_input(void) +{ + if (tex_aux_room_on_input_stack()) { + lmt_input_state.input_stack[lmt_input_state.input_stack_data.ptr] = lmt_input_state.cur_input; + ++lmt_input_state.input_stack_data.ptr; + } else { + tex_overflow_error("input stack size", lmt_input_state.input_stack_data.size); + } +} + +inline static void tex_aux_pop_input(void) +{ + lmt_input_state.cur_input = lmt_input_state.input_stack[--lmt_input_state.input_stack_data.ptr]; +} + +/*tex + + Here is a procedure that starts a new level of token-list input, given a token list |p| and its + type |t|. If |t=macro|, the calling routine should set |name| and |loc|. + + I added a few few simple variants because the compiler will then inline the little code involved + and these are used often. + +*/ + +void tex_begin_token_list(halfword t, quarterword kind) +{ + tex_aux_push_input(); + lmt_input_state.cur_input.state = token_list_state; + lmt_input_state.cur_input.start = t; + lmt_input_state.cur_input.token_type = kind; + if (kind < macro_text) { + lmt_input_state.cur_input.loc = t; + } else if (kind == macro_text) { + /*tex More frequently when processing a document: */ + tex_add_token_reference(t); + lmt_input_state.cur_input.parameter_start = lmt_input_state.parameter_stack_data.ptr; + } else { + /*tex More frequently when making a format: */ + tex_add_token_reference(t); + /*tex The token list started with a reference count. */ + lmt_input_state.cur_input.loc = token_link(t); + if (tracing_macros_par > 0) { + tex_begin_diagnostic(); + switch (kind) { + case mark_text: + tex_print_str("mark"); + break; + case loop_text: + tex_print_str("loop"); + break; + case write_text: + tex_print_str("write"); + break; + case local_text: + tex_print_str("local"); + break; + case local_loop_text: + tex_print_str("localloop"); + break; + case end_paragraph_text: + tex_print_str("endpar"); + break; + default: + /* messy offsets */ + tex_print_cmd_chr(internal_toks_cmd, kind - output_text + internal_toks_location(output_routine_code)); + break; + } + tex_print_str("->"); + tex_token_show(t, default_token_show_max); + tex_end_diagnostic(); + } + } +} + +void tex_begin_parameter_list(halfword t) +{ + if (t) { + tex_aux_push_input(); + lmt_input_state.cur_input.state = token_list_state; + lmt_input_state.cur_input.start = t; + lmt_input_state.cur_input.loc = t; + lmt_input_state.cur_input.token_type = parameter_text; + } else { + // can happen + } +} + +void tex_begin_backed_up_list(halfword t) +{ + if (t) { + tex_aux_push_input(); + lmt_input_state.cur_input.state = token_list_state; + lmt_input_state.cur_input.start = t; + lmt_input_state.cur_input.loc = t; + lmt_input_state.cur_input.token_type = backed_up_text; + } else { + // can happen + } +} + +void tex_begin_inserted_list(halfword t) +{ + // if (t) { + tex_aux_push_input(); + lmt_input_state.cur_input.state = token_list_state; + lmt_input_state.cur_input.start = t; + lmt_input_state.cur_input.loc = t; + lmt_input_state.cur_input.token_type = inserted_text; + // } else { + // // never happens + // } +} + +void tex_begin_macro_list(halfword t) +{ + // if (t) { + tex_aux_push_input(); + lmt_input_state.cur_input.state = token_list_state; + lmt_input_state.cur_input.start = t; + tex_add_token_reference(t); + lmt_input_state.cur_input.token_type = macro_text; + lmt_input_state.cur_input.parameter_start = lmt_input_state.parameter_stack_data.ptr; + // } else { + // // never happens + // } +} + +/*tex + + When a token list has been fully scanned, the following computations should be done as we leave + that level of input. The |token_type| tends to be equal to either |backed_up| or |inserted| + about 2/3 of the time. + +*/ + +void tex_end_token_list(void) +{ + /*tex Leave a token-list input level: */ + switch (lmt_input_state.cur_input.token_type) { + case parameter_text: + break; + case template_pre_text: + if (lmt_input_state.align_state > 500000) { + lmt_input_state.align_state = 0; + } else { + tex_alignment_interwoven_error(7); + } + break; + case template_post_text: + break; + case backed_up_text: + case inserted_text: + case end_of_group_text: + /* case local_text: */ + tex_flush_token_list(lmt_input_state.cur_input.start); + break; + case macro_text: + { + tex_delete_token_reference(lmt_input_state.cur_input.start); + if (get_token_parameters(lmt_input_state.cur_input.start)) { + /*tex Parameters must be flushed: */ + int ptr = lmt_input_state.parameter_stack_data.ptr; + int start = lmt_input_state.cur_input.parameter_start; + while (ptr > start) { + --ptr; + if (lmt_input_state.parameter_stack[ptr]) { + tex_flush_token_list(lmt_input_state.parameter_stack[ptr]); + } + } + lmt_input_state.parameter_stack_data.ptr = start; + } else { + /*tex We have no arguments but we save very little runtime here. */ + } + break; + } + default: + /*tex Update the reference count: */ + tex_delete_token_reference(lmt_input_state.cur_input.start); + break; + } + tex_aux_pop_input(); + /* check_interrupt(); */ +} + +/*tex A special version used in macro expansion. Maybe some day I'll optimize it. */ + +void tex_cleanup_input_state(void) +{ + while (! lmt_input_state.cur_input.loc && lmt_input_state.cur_input.state == token_list_state) { + switch (lmt_input_state.cur_input.token_type) { + case parameter_text: + break; + case template_pre_text: + if (lmt_input_state.align_state > 500000) { + lmt_input_state.align_state = 0; + } else { + tex_alignment_interwoven_error(7); + } + break; + case template_post_text: + break; + case backed_up_text: + case inserted_text: + case end_of_group_text: + /* case local_text: */ + tex_flush_token_list(lmt_input_state.cur_input.start); + break; + case macro_text: + { + /*tex Using a simple version for no arguments has no gain. */ + tex_delete_token_reference(lmt_input_state.cur_input.start); + /*tex Parameters must be flushed: */ + int ptr = lmt_input_state.parameter_stack_data.ptr; + int start = lmt_input_state.cur_input.parameter_start; + while (ptr > start) { + --ptr; + if (lmt_input_state.parameter_stack[ptr]) { + tex_flush_token_list(lmt_input_state.parameter_stack[ptr]); + } + } + lmt_input_state.parameter_stack_data.ptr = start; + break; + } + default: + /*tex Update the reference count: */ + tex_delete_token_reference(lmt_input_state.cur_input.start); + break; + } + tex_aux_pop_input(); + } +} + +/*tex + + Sometimes \TEX\ has read too far and wants to \quote {unscan} what it has seen. The |back_input| + procedure takes care of this by putting the token just scanned back into the input stream, ready + to be read again. This procedure can be used only if |cur_tok| represents the token to be + replaced. Some applications of \TEX\ use this procedure a lot, so it has been slightly optimized + for speed. + +*/ + +/*tex Undo one token of input: */ + +void tex_back_input(halfword t) +{ + while ((lmt_input_state.cur_input.state == token_list_state) && (! lmt_input_state.cur_input.loc) && (lmt_input_state.cur_input.token_type != template_post_text)) { + tex_end_token_list(); + } + { + /*tex A token list of length one: */ + halfword p = tex_get_available_token(t); + if (t < right_brace_limit) { + if (t < left_brace_limit) { + --lmt_input_state.align_state; + } else { + ++lmt_input_state.align_state; + } + } + /* + if (token_type == backed_up_text && istate == token_list_state && istart == iloc) { + token_link(p) = istart; + istart = p; + iloc = p; + } else { + */ + tex_aux_push_input(); + /*tex This is |back_list(p)|, without procedure overhead: */ + lmt_input_state.cur_input.start = p; + lmt_input_state.cur_input.loc = p; + lmt_input_state.cur_input.state = token_list_state; + lmt_input_state.cur_input.token_type = backed_up_text; + /* } */ + } +} + +/*tex Insert token |p| into \TEX's input: */ + +void tex_reinsert_token(halfword t) +{ + halfword p = tex_get_available_token(t); + set_token_link(p, lmt_input_state.cur_input.loc); + lmt_input_state.cur_input.start = p; + lmt_input_state.cur_input.loc = p; + if (t < right_brace_limit) { + if (t < left_brace_limit) { + --lmt_input_state.align_state; + } else { + ++lmt_input_state.align_state; + } + } +} + +/*tex Some aftergroup related code: */ + +void tex_insert_input(halfword h) +{ + if (h) { + while ((lmt_input_state.cur_input.state == token_list_state) && (! lmt_input_state.cur_input.loc) && (lmt_input_state.cur_input.token_type != template_post_text)) { + tex_end_token_list(); + } + if (token_info(h) < right_brace_limit) { + if (token_info(h) < left_brace_limit) { + --lmt_input_state.align_state; + } else { + ++lmt_input_state.align_state; + } + } + tex_aux_push_input(); + lmt_input_state.cur_input.start = h; + lmt_input_state.cur_input.loc = h; + lmt_input_state.cur_input.state = token_list_state; + lmt_input_state.cur_input.token_type = inserted_text; + } +} + +void tex_append_input(halfword h) +{ + if (h) { + halfword n = h; + if (n) { + while (token_link(n)) { + n = token_link(n); + } + set_token_link(n, lmt_input_state.cur_input.loc); + } else { + set_token_link(h, lmt_input_state.cur_input.loc); + } + lmt_input_state.cur_input.start = h; + lmt_input_state.cur_input.loc = h; + } +} + +/*tex + + The |begin_file_reading| procedure starts a new level of input for lines of characters to be + read from a file, or as an insertion from the terminal. It does not take care of opening the + file, nor does it set |loc| or |limit| or |line|. + +*/ + +void tex_begin_file_reading(void) +{ + ++lmt_input_state.in_stack_data.ptr; + if (tex_aux_room_on_in_stack() && tex_room_in_buffer(lmt_fileio_state.io_first)) { + tex_aux_push_input(); + lmt_input_state.cur_input.index = (short) lmt_input_state.in_stack_data.ptr; + lmt_input_state.in_stack[lmt_input_state.cur_input.index].full_source_filename = NULL; + lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen = 0; + lmt_input_state.in_stack[lmt_input_state.cur_input.index].group = cur_boundary; + lmt_input_state.in_stack[lmt_input_state.cur_input.index].line = lmt_input_state.input_line; + lmt_input_state.in_stack[lmt_input_state.cur_input.index].if_ptr = lmt_condition_state.cond_ptr; + lmt_input_state.cur_input.start = lmt_fileio_state.io_first; + lmt_input_state.cur_input.state = mid_line_state; + lmt_input_state.cur_input.name = io_initial_input_code; + lmt_input_state.cur_input.cattable = default_catcode_table_preset; + lmt_input_state.cur_input.partial = 0; + /*tex Prepare terminal input \SYNCTEX\ information. */ + lmt_input_state.cur_input.state_file = 0; + lmt_input_state.cur_input.state_line = 0; + } +} + +/*tex + + Conversely, the variables must be downdated when such a level of input is finished. What needs + to be closed depends on what was opened. + +*/ + +void tex_end_file_reading(void) +{ + lmt_fileio_state.io_first = lmt_input_state.cur_input.start; + lmt_input_state.input_line = lmt_input_state.in_stack[lmt_input_state.cur_input.index].line; + switch (lmt_input_state.cur_input.name) { + case io_initial_input_code: + break; + case io_lua_input_code: + case io_token_input_code: + case io_token_eof_input_code: + /*tex happens more frequently than reading from file */ + lmt_cstring_close(); + break; + case io_tex_macro_code: + break; + default: + /*tex A file opened with |\input|, |\read...| is handled by \LUA. */ + tex_lua_a_close_in(); + if (lmt_input_state.in_stack[lmt_input_state.cur_input.index].full_source_filename) { + lmt_memory_free(lmt_input_state.in_stack[lmt_input_state.cur_input.index].full_source_filename); + lmt_input_state.in_stack[lmt_input_state.cur_input.index].full_source_filename = NULL; + } + break; + } + tex_aux_pop_input(); + --lmt_input_state.in_stack_data.ptr; +} + +/*tex + + To get \TEX's whole input mechanism going, we perform the following actions. + +*/ + +void tex_initialize_inputstack(void) +{ + lmt_input_state.input_stack_data.ptr = 0; + lmt_input_state.input_stack_data.top = 0; + lmt_input_state.in_stack[0].full_source_filename = NULL; + lmt_input_state.in_stack_data.ptr = 0; + lmt_input_state.open_files = 0; + lmt_fileio_state.io_buffer_data.top = 0; + lmt_input_state.in_stack[0].group = 0; + lmt_input_state.in_stack[0].if_ptr = null; + lmt_input_state.parameter_stack_data.ptr = 0; + lmt_input_state.parameter_stack_data.top = 0; + lmt_input_state.scanner_status = scanner_is_normal; + lmt_input_state.warning_index = null; + lmt_fileio_state.io_first = 1; + lmt_input_state.cur_input.state = new_line_state; + lmt_input_state.cur_input.start = 1; + lmt_input_state.cur_input.index = 0; + lmt_input_state.input_line = 0; + lmt_input_state.cur_input.name = io_initial_input_code; + lmt_token_state.force_eof = 0; + lmt_token_state.luacstrings = 0; + lmt_input_state.cur_input.cattable = default_catcode_table_preset; + lmt_input_state.cur_input.partial = 0; + lmt_input_state.align_state = 1000000; +} + +void tex_tex_string_start(int iotype, int cattable) +{ + (void) iotype; + { + halfword head = tex_scan_general_text(NULL); + int saved_selector = lmt_print_state.selector; + lmt_print_state.selector = new_string_selector_code; + tex_show_token_list(head, null, extreme_token_show_max, 0); + lmt_print_state.selector = saved_selector; + tex_flush_token_list(head); + } + { + int len; + char *str = tex_take_string(&len); + lmt_cstring_store(str, len, tex_valid_catcode_table(cattable) ? cattable : cat_code_table_par); + tex_begin_file_reading(); + lmt_input_state.input_line = 0; + lmt_input_state.cur_input.limit = lmt_input_state.cur_input.start; + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1; + lmt_input_state.cur_input.name = io_token_input_code; + lmt_cstring_start(); + } +} + + +void tex_lua_string_start(void) +{ + /*tex Set up |cur_file| and a new level of input: */ + tex_begin_file_reading(); + lmt_input_state.input_line = 0; + lmt_input_state.cur_input.limit = lmt_input_state.cur_input.start; + /*tex Force line read: */ + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1; + lmt_input_state.cur_input.name = io_lua_input_code; + lmt_cstring_start(); +} + +void tex_any_string_start(char* s) +{ + /* via terminal emulator */ + /* + int len = strlen(s); + if (len > 0 && room_in_buffer(len + 1)) { + fileio_state.io_last = fileio_state.io_first; + strcpy((char *) &fileio_state.io_buffer[fileio_state.io_first], s); + fileio_state.io_last += len; + input_state.cur_input.loc = fileio_state.io_first; + input_state.cur_input.limit = fileio_state.io_last; + fileio_state.io_first = fileio_state.io_last + 1; + } + */ + /* via token input emulator */ + lmt_cstring_store(s, (int) strlen(s), cat_code_table_par); + tex_begin_file_reading(); + lmt_input_state.input_line = 0; + lmt_input_state.cur_input.limit = lmt_input_state.cur_input.start; + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1; + lmt_input_state.cur_input.name = io_token_input_code; + lmt_cstring_start(); +} + +/*tex a list without ref count*/ + +halfword tex_wrapped_token_list(halfword list) +{ + halfword head = tex_store_new_token(null, left_brace_token + '{'); + halfword tail = head; + token_link(tail) = token_link(list); + while (token_link(tail)) { + tail = token_link(tail); + } + tail = tex_store_new_token(tail, right_brace_token + '}'); + return head; +} + +const char *tex_current_input_file_name(void) +{ + int level = lmt_input_state.in_stack_data.ptr; + while (level > 0) { + const char *s = lmt_input_state.in_stack[level--].full_source_filename; + if (s) { + return s; + } + } + /*tex old method */ + level = lmt_input_state.in_stack_data.ptr; + while (level > 0) { + int t = lmt_input_state.input_stack[level--].name; + if (t >= cs_offset_value) { + return (const char *) str_string(t); + } + } + return NULL; +} diff --git a/source/luametatex/source/tex/texinputstack.h b/source/luametatex/source/tex/texinputstack.h new file mode 100644 index 000000000..7ae677d56 --- /dev/null +++ b/source/luametatex/source/tex/texinputstack.h @@ -0,0 +1,452 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_INPUTSTACK_H +# define LMT_INPUTSTACK_H + +/*tex + + The state of \TEX's input mechanism appears in the input stack, whose entries are records with + six fields, called |state|, |index|, |start|, |loc|, |limit|, and |name|. + +*/ + +/* todo: there is no need to be sparse here */ + +typedef struct in_state_record { + halfword start; + halfword loc; + unsigned short state; + union { unsigned short index; unsigned short token_type; }; /*tex: So, no macro but name. */ + union { halfword limit; halfword parameter_start; }; /*tex: So, no macro but name. */ + halfword name; + signed short cattable; /*tex The category table used by the current line (see |textoken.c|). */ + unsigned short partial; /*tex Is the current line partial (see |textoken.c|)? */ + int state_file; /*tex Here we stack the tag of the current file. */ + int state_line; /*tex Not used. */ +} in_state_record; + +typedef struct input_stack_record { + halfword input_file_callback_id; + halfword line; + halfword end_of_file_seen; + halfword group; + halfword if_ptr; + halfword padding; + char *full_source_filename; +} input_stack_record; + +// todo: better names for in_state_record and input_stack_record ... now mixed up + +typedef struct input_state_info { + in_state_record *input_stack; + memory_data input_stack_data; + input_stack_record *in_stack; + memory_data in_stack_data; + halfword *parameter_stack; + memory_data parameter_stack_data; + in_state_record cur_input; /*tex The \quote {top} input state. Why not just pointing. */ + int input_line; + int scanner_status; + halfword def_ref; /*tex Has to be set for error recovery etc. */ + int align_state; + int base_ptr; + halfword warning_index; + int open_files; + int padding; +} input_state_info; + +extern input_state_info lmt_input_state; + +typedef struct input_file_state_info { + int forced_file; + int forced_line; + halfword mode; + halfword line; +} input_file_state_info; + +extern input_file_state_info input_file_state; + +static inline int input_file_value(void) +{ + return input_file_state.forced_file ? input_file_state.forced_file : lmt_input_state.cur_input.state_file; +} + +static inline int input_line_value(void) +{ + return input_file_state.forced_line ? input_file_state.forced_line : (input_file_state.line ? input_file_state.line : lmt_input_state.input_line); +} + +/*tex + + In \LUAMETATEX\ the io model was stepwise changed a bit, mostly in the \LUA\ feedback area. + Support for nodes, tokens, short and long string were improved. Around 2.06.17 specification + nodes became dynamic and that left the pseudo files as only variable node type. By removing + variable nodes we can avoid some code in node management so getting rid of pseudo files made + sense. The token scan macros used these but now use a lightweight varian tof the \LUA\ scanner, + which we had anyway. The only complication is the |\everyeof| of |\scantokens|. Also, tracing + (if at all) is now different but these three scanners are seldom used and were introduced in + \ETEX\ (|scantokens|), \LUATEX\ (|\scantextokens|) and \LUAMETATEX\ (|tokenized|). The new + approach also gives more room for future extensions. + + All this has been a very stepwise process, because we know that there are users who use \LMTX\ + in production and small steps are easier to test. Experiments mostly happen in parts of the + code that is less critital ... after all \LUAMETATEX\ is also an experimental engine ... but + io related code changes are kind of critital. + + Just to remember wahat we came from: the first 15 were reserved read channels but that is now + delegated to \LUA, so we had an offset of 16 in: + +*/ + +typedef enum io_codes { + io_initial_input_code, + io_lua_input_code, + io_token_input_code, + io_token_eof_input_code, + io_tex_macro_code, + io_file_input_code, +} io_codes; + +/* +* + Now, these |io_codes| are used in the name field but that field can also be a way larger number, + i.e.\ the string index of the file. That also assumes that the first used index is above the last + io_code. It can be the warning index too, just for the sake of an error context message. So: + symbolic (small) number, tex string being the filename, and macro name. But, because we also + have that information in other places (partly as side effect of luafication) a simpler model is + used now where we use a few dedicates codes. It also means that we no longer store the filename + in the string pool. + +*/ + +# define io_token_input(c) (c >= io_lua_input_code && c <= io_token_eof_input_code) +# define io_file_input(c) (c >= io_file_input_code) + +/*tex + + Let's look more closely now at the control variables (|state|, |index|, |start|, |loc|, |limit|, + |name|), assuming that \TEX\ is reading a line of characters that have been input from some file + or from the user's terminal. There is an array called |buffer| that acts as a stack of all lines + of characters that are currently being read from files, including all lines on subsidiary levels + of the input stack that are not yet completed. \TEX\ will return to the other lines when it is + finished with the present input file. + + (Incidentally, on a machine with byte-oriented addressing, it might be appropriate to combine + |buffer| with the |str_pool| array, letting the buffer entries grow downward from the top of the + string pool and checking that these two tables don't bump into each other.) + + The line we are currently working on begins in position |start| of the buffer; the next character + we are about to read is |buffer[loc]|; and |limit| is the location of the last character present. + If |loc > limit|, the line has been completely read. Usually |buffer[limit]| is the + |end_line_char|, denoting the end of a line, but this is not true if the current line is an + insertion that was entered on the user's terminal in response to an error message. + + The |name| variable is a string number that designates the name of the current file, if we are + reading a text file. It is zero if we are reading from the terminal; it is |n+1| if we are reading + from input stream |n|, where |0 <= n <= 16|. (Input stream 16 stands for an invalid stream number; + in such cases the input is actually from the terminal, under control of the procedure |read_toks|.) + Finally |18 <= name <=20| indicates that we are reading a pseudo file created by the |\scantokens| + or |\scantextokens| command. A larger value is reserved for input coming from \LUA. + + The |state| variable has one of three values, when we are scanning such files: + + \startitemize + \startitem + |mid_line| is the normal state. + \stopitem + \startitem + |skip_blanks| is like |mid_line|, but blanks are ignored. + \stopitem + \startitem + |new_line| is the state at the beginning of a line. + \stopitem + \stopitemize + + These state values are assigned numeric codes so that if we add the state code to the next + character's command code, we get distinct values. For example, |mid_line + spacer| stands for the + case that a blank space character occurs in the middle of a line when it is not being ignored; + after this case is processed, the next value of |state| will be |skip_blanks|. + + As with other constants, we only add some prefix or suffix but keep the normal name as much as + possible, so that the original documentation still applies. + +*/ + +typedef enum state_codes { + token_list_state = 0, + /*tex when scanning a line of characters */ + mid_line_state = 1, + /*tex when ignoring blanks */ + skip_blanks_state = 2 + max_char_code, + /*tex at the start of a line */ + new_line_state = 3 + max_char_code + max_char_code, +} state_codes; + +/*tex + + Additional information about the current line is available via the |index| variable, which + counts how many lines of characters are present in the buffer below the current level. We + have |index = 0| when reading from the terminal and prompting the user for each line; then if + the user types, e.g., |\input paper|, we will have |index = 1| while reading the file + |paper.tex|. However, it does not follow that |index| is the same as the input stack pointer, + since many of the levels on the input stack may come from token lists. For example, the + instruction |\input paper| might occur in a token list. + + The global variable |in_open| is equal to the |index| value of the highest \quote {non token + list} level. Thus, the number of partially read lines in the buffer is |in_open + 1|, and we + have |in_open = index| when we are not reading a token list. + + If we are not currently reading from the terminal, or from an input stream, we are reading from + the file variable |input_file [index]|. We use the notation |terminal_input| as a convenient + abbreviation for |name = 0|, and |cur_file| as an abbreviation for |input_file [index]|. + + The global variable |line| contains the line number in the topmost open file, for use in error + messages. If we are not reading from the terminal, |line_stack [index]| holds the line number + or the enclosing level, so that |line| can be restored when the current file has been read. + Line numbers should never be negative, since the negative of the current line number is used to + identify the user's output routine in the |mode_line| field of the semantic nest entries. + + If more information about the input state is needed, it can be included in small arrays like + those shown here. For example, the current page or segment number in the input file might be + put into a variable |page|, maintained for enclosing levels in ||page_stack:array [1 .. + max_input_open] of integer| by analogy with |line_stack|. + + Users of \TEX\ sometimes forget to balance left and right braces properly, and one of the ways + \TEX\ tries to spot such errors is by considering an input file as broken into subfiles by + control sequences that are declared to be |\outer|. + + A variable called |scanner_status| tells \TEX\ whether or not to complain when a subfile ends. + This variable has six possible values: + + \startitemize + + \startitem + |normal|, means that a subfile can safely end here without incident. + \stopitem + + \startitem + |skipping|, means that a subfile can safely end here, but not a file, because we're reading + past some conditional text that was not selected. + \stopitem + + \startitem + |defining|, means that a subfile shouldn't end now because a macro is being defined. + \stopitem + + \startitem + |matching|, means that a subfile shouldn't end now because a macro is being used and we are + searching for the end of its arguments. + \stopitem + + \startitem + |aligning|, means that a subfile shouldn't end now because we are not finished with the + preamble of an |\halign| or |\valign|. + \stopitem + + \startitem + |absorbing|, means that a subfile shouldn't end now because we are reading a balanced token + list for |\message|, |\write|, etc. + \stopitem + + \stopitemize + + If the |scanner_status| is not |normal|, the variable |warning_index| points to the |eqtb| + location for the relevant control sequence name to print in an error message. + +*/ + +typedef enum scanner_states { + scanner_is_normal, /*tex passing conditional text */ + scanner_is_skipping, /*tex passing conditional text */ + scanner_is_defining, /*tex reading a macro definition */ + scanner_is_matching, /*tex reading macro arguments */ + scanner_is_tolerant, /*tex reading tolerant macro arguments */ + scanner_is_aligning, /*tex reading an alignment preamble */ + scanner_is_absorbing, /*tex reading a balanced text */ +} scanner_states; + +extern void tex_show_runaway(void); /*tex This is only used when running out of token memory. */ + +/*tex + + However, the discussion about input state really applies only to the case that we are inputting + from a file. There is another important case, namely when we are currently getting input from a + token list. In this case |state = token_list|, and the conventions about the other state + variables are + different: + + \startitemize + + \startitem + |loc| is a pointer to the current node in the token list, i.e., the node that will be read + next. If |loc=null|, the token list has been fully read. + \stopitem + + \startitem + |start| points to the first node of the token list; this node may or may not contain a + reference count, depending on the type of token list involved. + \stopitem + + \startitem + |token_type|, which takes the place of |index| in the discussion above, is a code number + that explains what kind of token list is being scanned. + \stopitem + + \startitem + |name| points to the |eqtb| address of the control sequence being expanded, if the current + token list is a macro. + \stopitem + + \startitem + |param_start|, which takes the place of |limit|, tells where the parameters of the current + macro begin in the |param_stack|, if the current token list is a macro. + \stopitem + + \stopitemize + + The |token_type| can take several values, depending on where the current token list came from: + + \startitemize + + \startitem + |parameter|, if a parameter is being scanned; + \stopitem + + \startitem + |u_template|, if the |u_j| part of an alignment template is being scanned; + \stopitem + + \startitem + |v_template|, if the |v_j| part of an alignment template is being scanned; + \stopitem + + \startitem + |backed_up|, if the token list being scanned has been inserted as \quotation {to be read + again}. + \stopitem + + \startitem + |inserted|, if the token list being scanned has been inserted as the text expansion of a + |\count| or similar variable; + \stopitem + + \startitem + |macro|, if a user-defined control sequence is being scanned; + \stopitem + + \startitem + |output_text|, if an |\output| routine is being scanned; + \stopitem + + \startitem + |every_par_text|, if the text of |\everypar| is being scanned; + \stopitem + + \startitem + |every_math_text|, if the text of |\everymath| is being scanned; + \stopitem + + \startitem + |every_display_text|, if the text of \everydisplay| is being scanned; + \stopitem + + \startitem + |every_hbox_text|, if the text of |\everyhbox| is being scanned; + \stopitem + + \startitem + |every_vbox_text|, if the text of |\everyvbox| is being scanned; + \stopitem + + \startitem + |every_job_text|, if the text of |\everyjob| is being scanned; + \stopitem + + \startitem + |every_cr_text|, if the text of |\everycr| is being scanned; + \stopitem + + \startitem + |mark_text|, if the text of a |\mark| is being scanned; + \stopitem + + \startitem + |write_text|, if the text of a |\write| is being scanned. + \stopitem + + \stopitemize + + The codes for |output_text|, |every_par_text|, etc., are equal to a constant plus the + corresponding codes for token list parameters |output_routine_loc|, |every_par_loc|, etc. + + The token list begins with a reference count if and only if |token_type >= macro|. + + Since \ETEX's additional token list parameters precede |toks_base|, the corresponding token + types must precede |write_text|. However, in \LUAMETATEX\ we delegate all the read and write + primitives to \LUA\ so that model has been simplified. + +*/ + +/* #define token_type input_state.cur_input.token_type */ /*tex type of current token list */ +/* #define param_start input_state.cur_input.param_start */ /*tex base of macro parameters in |param_stack| */ + +typedef enum token_types { + parameter_text, /*tex parameter */ + template_pre_text, /*tex |u_j| template */ + template_post_text, /*tex |v_j| template */ + backed_up_text, /*tex text to be reread */ + inserted_text, /*tex inserted texts */ + macro_text, /*tex defined control sequences */ + output_text, /*tex output routines */ + every_par_text, /*tex |\everypar| */ + every_math_text, /*tex |\everymath| */ + every_display_text, /*tex |\everydisplay| */ + every_hbox_text, /*tex |\everyhbox| */ + every_vbox_text, /*tex |\everyvbox| */ + every_math_atom_text, /*tex |\everymathatom| */ + every_job_text, /*tex |\everyjob| */ + every_cr_text, /*tex |\everycr| */ + every_tab_text, /*tex |\everytab| */ + error_help_text, + every_before_par_text, /*tex |\everybeforeeof| */ + every_eof_text, /*tex |\everyeof| */ + end_of_group_text, + mark_text, /*tex |\topmark|, etc. */ + loop_text, + end_paragraph_text, /*tex |\everyendpar| */ + write_text, /*tex |\write| */ + local_text, + local_loop_text, +} token_types; + +extern void tex_initialize_input_state (void); +/* int tex_room_on_param_stack (void); */ +/* int tex_room_on_in_stack (void); */ +/* int tex_room_on_input_stack (void); */ +extern void tex_copy_pstack_to_param_stack (halfword *pstack, int n); +extern void tex_show_context (void); +extern void tex_show_validity (void); +extern void tex_set_trick_count (void); +extern void tex_begin_token_list (halfword t, quarterword kind); /* include some tracing */ +extern void tex_begin_parameter_list (halfword t); /* less inlining code */ +extern void tex_begin_backed_up_list (halfword t); /* less inlining code */ +extern void tex_begin_inserted_list (halfword t); /* less inlining code */ +extern void tex_begin_macro_list (halfword t); /* less inlining code */ +extern void tex_end_token_list (void); +extern void tex_cleanup_input_state (void); +extern void tex_back_input (halfword t); +extern void tex_reinsert_token (halfword t); +extern void tex_insert_input (halfword h); +extern void tex_append_input (halfword h); +extern void tex_begin_file_reading (void); +extern void tex_end_file_reading (void); +extern void tex_initialize_inputstack (void); +extern void tex_lua_string_start (void); +extern void tex_tex_string_start (int iotype, int cattable); +extern void tex_any_string_start (char *s); +extern halfword tex_wrapped_token_list (halfword h); +extern const char *tex_current_input_file_name (void); + +# endif diff --git a/source/luametatex/source/tex/texinserts.c b/source/luametatex/source/tex/texinserts.c new file mode 100644 index 000000000..874dcf24d --- /dev/null +++ b/source/luametatex/source/tex/texinserts.c @@ -0,0 +1,517 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + In traditional \TEX\ inserts are implemented using a quadruple of box, dimen, count and skip + registers. This means that the allocate macro |\newinsert| as well as the other allocators + have to keep a range of registers free. In \CONTEXT\ (\MKII\ and \MKIV) for instance the + indices 132 upto 254 are reserved for that. + + When pondering about improvements this implementation detail always puts some strains on + the possible solutions and it is for that reason that an alternative code path is present, + one that keep the relevant data in dedicated data structures. When that got implemented all + accessors ended up here. Most were already abstracted anyway. For now it means that the old + interface still works (and is default). By setting the |\insertmode| to 2 the alternative + path is chosen. For practical reasons the first time an insert is used that value gets + frozen; a mixed approach was too messy. + + Actually the new variant, which is tagged |class| instead of |index|, also better suits the + extended box model. There is access to the basic three dimension but that's all. One can wrap + in a box and mess with others but doing that with the boxes inserts makes no sense because + the output routine expects simple boxes. + + A side effect is of course that we now have more primitives, starting with |\insert...| and + also helpers at the \LUA\ end. A few more will follow and likely some enhancements will show + up too. + + In this new mode we also store the floatingpenalty and maxdepth so these can now differ per + class. They were already stored in the node, but this way we don't need to set the shared + variable every time we do an insert. + +*/ + +insert_state_info lmt_insert_state = { + .inserts = NULL, + .insert_data = { + .minimum = min_insert_size, + .maximum = max_insert_size, + .size = memory_data_unset, + .step = stp_insert_size, + .allocated = 0, + .itemsize = sizeof(insert_record), + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .mode = unset_insert_mode, + .storing = 0, +}; + +void tex_initialize_inserts(void) +{ + insert_record *tmp = aux_allocate_clear_array(sizeof(insert_record), lmt_insert_state.insert_data.minimum, 1); + if (tmp) { + lmt_insert_state.inserts = tmp; + lmt_insert_state.insert_data.allocated = lmt_insert_state.insert_data.minimum * sizeof(insert_record); + lmt_insert_state.insert_data.top = lmt_insert_state.insert_data.minimum; + lmt_insert_state.insert_data.ptr = 0; + } else { + tex_overflow_error("inserts", lmt_insert_state.insert_data.minimum); + } +} + +/*tex + This one is not sparse but we don't have many inserts so we're okay. I need to check the 0/1 + offsets here. +*/ + +int tex_valid_insert_id(halfword n) +{ + switch (lmt_insert_state.mode) { + case index_insert_mode: + return (n >= 0 && n <= max_box_register_index); + case class_insert_mode: + if (n <= 0) { + tex_handle_error( + normal_error_type, + "In \\insertmode 2 you can't use zero as index.", + NULL + ); + } else if (n <= lmt_insert_state.insert_data.ptr) { + return 1; + } else if (n < lmt_insert_state.insert_data.top) { + lmt_insert_state.insert_data.ptr = n; + return 1; + } else if (n < lmt_insert_state.insert_data.maximum && lmt_insert_state.insert_data.top < lmt_insert_state.insert_data.maximum) { + insert_record *tmp ; + int top = n + lmt_insert_state.insert_data.step; + if (top > lmt_insert_state.insert_data.maximum) { + top = lmt_insert_state.insert_data.maximum; + } + tmp = aux_reallocate_array(lmt_insert_state.inserts, sizeof(insert_record), top, 1); // 1 slack + if (tmp) { + size_t extra = ((size_t) top - lmt_insert_state.insert_data.top) * sizeof(insert_record); + memset(&tmp[lmt_insert_state.insert_data.top + 1], 0, extra); + // memset(&tmp[lmt_insert_state.insert_data.top], 0, extra); + lmt_insert_state.inserts = tmp; + lmt_insert_state.insert_data.allocated += (int) extra; + lmt_insert_state.insert_data.top = top; + lmt_insert_state.insert_data.ptr = n; + return 1; + } + } + tex_overflow_error("inserts", lmt_insert_state.insert_data.maximum); + } + return 0; +} + +scaled tex_get_insert_limit(halfword i) +{ + if (tex_valid_insert_id(i)) { + return lmt_insert_state.mode == index_insert_mode ? insert_maxheight(i) : lmt_insert_state.inserts[i].limit; + } else { + return 0; + } +} + +halfword tex_get_insert_multiplier(halfword i) +{ + if (tex_valid_insert_id(i)) { + return lmt_insert_state.mode == index_insert_mode ? insert_multiplier(i) : lmt_insert_state.inserts[i].multiplier; + } else { + return 0; + } +} + +halfword tex_get_insert_penalty(halfword i) +{ + if (tex_valid_insert_id(i)) { + return lmt_insert_state.mode == index_insert_mode ? floating_penalty_par : lmt_insert_state.inserts[i].penalty; + } else { + return 0; + } +} + +halfword tex_get_insert_maxdepth(halfword i) +{ + if (tex_valid_insert_id(i)) { + return lmt_insert_state.mode == index_insert_mode ? split_max_depth_par : lmt_insert_state.inserts[i].maxdepth; + } else { + return 0; + } +} + +halfword tex_get_insert_distance(halfword i) +{ + if (tex_valid_insert_id(i)) { + return lmt_insert_state.mode == index_insert_mode ? insert_distance(i) : lmt_insert_state.inserts[i].distance; + } else { + return 0; + } +} + +static inline halfword tex_aux_insert_box(halfword i) +{ + if (tex_valid_insert_id(i)) { + return lmt_insert_state.mode == index_insert_mode ? insert_content(i) : lmt_insert_state.inserts[i].content; + } else { + return null; + } +} + +scaled tex_get_insert_height(halfword i) +{ + halfword b = tex_aux_insert_box(i); + return b ? box_height(b) : 0; +} + +scaled tex_get_insert_depth(halfword i) +{ + halfword b = tex_aux_insert_box(i); + return b ? box_depth(b) : 0; +} + +scaled tex_get_insert_width(halfword i) +{ + halfword b = tex_aux_insert_box(i); + return b ? box_width(b) : 0; +} + +halfword tex_get_insert_content(halfword i) +{ + return tex_aux_insert_box(i); +} + +halfword tex_get_insert_storage(halfword i) +{ + if (lmt_insert_state.mode == class_insert_mode && tex_valid_insert_id(i)) { + return has_insert_option(i, insert_option_storing); + } else { + return 0; + } +} + +void tex_set_insert_limit(halfword i, scaled v) +{ + if (tex_valid_insert_id(i)) { + switch (lmt_insert_state.mode) { + case index_insert_mode: insert_maxheight(i) = v; break; + case class_insert_mode: lmt_insert_state.inserts[i].limit = v; break; + } + } +} + +void tex_set_insert_multiplier(halfword i, halfword v) { + if (tex_valid_insert_id(i)) { + switch (lmt_insert_state.mode) { + case index_insert_mode: insert_multiplier(i) = v; break; + case class_insert_mode: lmt_insert_state.inserts[i].multiplier = v; break; + } + } +} + +void tex_set_insert_penalty(halfword i, halfword v) { + if (tex_valid_insert_id(i) && lmt_insert_state.mode == class_insert_mode) { + lmt_insert_state.inserts[i].options = set_insert_option(lmt_insert_state.inserts[i].options, insert_option_penalty); + lmt_insert_state.inserts[i].penalty = v; + } +} + +void tex_set_insert_maxdepth(halfword i, halfword v) { + if (tex_valid_insert_id(i) && lmt_insert_state.mode == class_insert_mode) { + lmt_insert_state.inserts[i].options = set_insert_option(lmt_insert_state.inserts[i].options, insert_option_maxdepth); + lmt_insert_state.inserts[i].maxdepth = v; + } +} + +void tex_set_insert_distance(halfword i, halfword v) { + if (tex_valid_insert_id(i)) { + int d = null; + switch (lmt_insert_state.mode) { + case index_insert_mode: + d = insert_distance(i); + insert_distance(i) = v; + break; + case class_insert_mode: + d = lmt_insert_state.inserts[i].distance; + lmt_insert_state.inserts[i].distance = v; + break; + } + tex_flush_node(d); + } +} + +void tex_set_insert_height(halfword i, scaled v) { + halfword b = tex_aux_insert_box(i); + if (b) { + box_height(b) = v; + } +} + +void tex_set_insert_depth(halfword i, scaled v) { + halfword b = tex_aux_insert_box(i); + if (b) { + box_depth(b) = v; + } +} + +void tex_set_insert_width(halfword i, scaled v) { + halfword b = tex_aux_insert_box(i); + if (b) { + box_width(b) = v; + } +} + +void tex_set_insert_content(halfword i, halfword v) { + switch (lmt_insert_state.mode) { + case index_insert_mode: insert_content(i) = v; break; + case class_insert_mode: if (tex_valid_insert_id(i)) { lmt_insert_state.inserts[i].content = v; } break; + } +} + +void tex_set_insert_storage(halfword i, halfword v) +{ + if (lmt_insert_state.mode == class_insert_mode && tex_valid_insert_id(i)) { + lmt_insert_state.inserts[i].options = v + ? set_insert_option(lmt_insert_state.inserts[i].options, insert_option_storing) + : unset_insert_option(lmt_insert_state.inserts[i].options, insert_option_storing); + } +} + +void tex_wipe_insert(halfword i) { + if (lmt_insert_state.mode == class_insert_mode && i >= 0 && i <= lmt_insert_state.insert_data.ptr) { +// if (lmt_insert_state.mode == class_insert_mode && tex_valid_insert_id(i)) { + halfword b = lmt_insert_state.inserts[i].content; + if (b) { + tex_flush_node(b); + lmt_insert_state.inserts[i].content = null; + } + } +} + +halfword lmt_get_insert_distance(halfword i, int slot) +{ + int callback_id = lmt_callback_defined(build_page_insert_callback); + if (callback_id != 0) { + halfword replacement = null; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dd->N", i, slot, &replacement); + if (replacement) { + return replacement; + } else { + halfword distance = null; + switch (lmt_insert_state.mode) { + case index_insert_mode: + distance = insert_distance(i); + break; + case class_insert_mode: + if (tex_valid_insert_id(i)) { + distance = lmt_insert_state.inserts[i].distance; + } + break; + } + if (distance) { + return tex_copy_node(distance); + } + } + } + return tex_new_glue_spec_node(null); +} + +halfword tex_get_insert_progress(halfword i) +{ + if (tex_valid_insert_id(i)) { + halfword p = page_insert_head; + while (p && i >= insert_index(node_next(p))) { + p = node_next(p); + if (p == page_insert_head) { + break; + } + } + return insert_index(p) == i ? insert_total_height(p) : 0; + } else { + return 0; + } +} + +/*tex The |class_insert| zero serves as a garbage bin. */ + +halfword tex_scan_insert_index(void) +{ + halfword index = 0; + switch (lmt_insert_state.mode) { + case unset_insert_mode: + lmt_insert_state.mode = index_insert_mode; + // fall-through + case index_insert_mode: + index = tex_scan_box_register_number(); + if (index == output_box_par) { + tex_handle_error( + normal_error_type, + "You can't \\insert%i", + output_box_par, + "I'm changing to \\insert0; box \\outputbox is special." + ); + index = 0; + } + break; + case class_insert_mode: + index = tex_scan_int(0, NULL); + if (! tex_valid_insert_id(index)) { + index = 0; + } + break; + } + return index; +} + +void tex_set_insert_mode(halfword mode) +{ + if (lmt_insert_state.mode == unset_insert_mode && (mode == index_insert_mode || mode == class_insert_mode)) { + lmt_insert_state.mode = mode; + } else if (mode != lmt_insert_state.mode) { + tex_handle_error( + normal_error_type, + "Bad \\insertmode (%i)", + mode, + "This mode can be set once and has value 1 or 2. It will be automatically\n" + "set when \\insert is used." + ); + } +} + +int tex_insert_is_void(halfword i) +{ + halfword b = tex_aux_insert_box(i); + return (! b) || box_list(b) == null; /*tex So also an empty box test! */ +} + +/* playground */ + +int tex_insert_stored(void) +{ + return lmt_insert_state.head != null; +} + +void tex_insert_restore(halfword n) +{ + if (lmt_insert_state.tail) { + tex_couple_nodes(lmt_insert_state.tail, n); + } else { + lmt_insert_state.head = n; + } + lmt_insert_state.tail = n; +} + +void tex_insert_store(halfword i, halfword n) +{ + if (tex_get_insert_storage(i)) { + tex_insert_restore(n); + } +} + +/* not sparse (yet) ... makes no sense (unless we make the list pointers) */ + +void tex_dump_insert_data(dumpstream f) { + dump_int(f, lmt_insert_state.mode); + dump_int(f, lmt_insert_state.insert_data.ptr); + dump_int(f, lmt_insert_state.insert_data.top); + dump_things(f, lmt_insert_state.inserts[0], lmt_insert_state.insert_data.ptr); +} + +void tex_undump_insert_data(dumpstream f) { + undump_int(f, lmt_insert_state.mode); + undump_int(f, lmt_insert_state.insert_data.ptr); + undump_int(f, lmt_insert_state.insert_data.top); + insert_record *tmp = aux_allocate_clear_array(sizeof(insert_record), lmt_insert_state.insert_data.top, 1); + if (tmp) { + lmt_insert_state.inserts = tmp; + lmt_insert_state.insert_data.allocated = lmt_insert_state.insert_data.top * sizeof(insert_record); + undump_things(f, lmt_insert_state.inserts[0], lmt_insert_state.insert_data.ptr); + } else { + tex_overflow_error("inserts", lmt_insert_state.insert_data.top); + } +} + +/*tex + Inserts, not the easiest mechanism and a candicate for more opening up. +*/ + +void tex_run_insert(void) +{ + tex_set_saved_record(saved_insert_item_index, saved_insert_index, 0, tex_scan_insert_index()); + lmt_save_state.save_stack_data.ptr += saved_insert_n_of_items; + tex_new_save_level(insert_group); + tex_scan_left_brace(); + tex_normal_paragraph(insert_par_context); + tex_push_nest(); + cur_list.mode = -vmode; + cur_list.prev_depth = ignore_depth; +} + +void tex_finish_insert_group(void) +{ + if (! tex_wrapped_up_paragraph(insert_par_context)) { + halfword p, q; /*tex for short-term use */ + scaled d; /*tex holds |split_max_depth| in |insert_group| */ + halfword f; /*tex holds |floating_penalty| in |insert_group| */ + tex_end_paragraph(insert_group, insert_par_context); + q = tex_new_glue_node(split_top_skip_par, top_skip_code); + d = split_max_depth_par; + f = floating_penalty_par; + tex_unsave(); + lmt_save_state.save_stack_data.ptr -= saved_insert_n_of_items; + // p = tex_vpack(node_next(cur_list.head), 0, packing_additional, max_dimen, direction_unknown); + // /* we don't do this: */ + // /* p = tex_filtered_vpack(node_next(cur_list.head), 0, packing_additional, max_dimen, insert_group, direction_unknown, 0, 0); */ + // /* because it can induce loops. */ + // tex_pop_nest(); + p = node_next(cur_list.head); + tex_pop_nest(); + p = tex_vpack(p, 0, packing_additional, max_dimen, direction_unknown, holding_none_option); + { + halfword index = saved_value(saved_insert_item_index); + halfword insert = tex_new_node(insert_node, 0); + halfword maxdepth = tex_get_insert_maxdepth(index); + halfword floating = tex_get_insert_penalty(index); + if (tex_get_insert_storage(index)) { + tex_insert_store(index, insert); + } else { + tex_tail_append(insert); + } + /*tex + An |\insert| is just a list. We package it because we want to know the height but + then discard the wrapper |vlist| node. So the |insert_list| is not packaged. + */ + insert_index(insert) = index; + insert_total_height(insert) = box_total(p); + insert_list(insert) = box_list(p); + insert_split_top(insert) = q; + insert_max_depth(insert) = has_insert_option(index, insert_option_maxdepth) ? d : maxdepth; + insert_float_cost(insert) = has_insert_option(index, insert_option_penalty) ? f : floating; + box_list(p) = null; + tex_flush_node(p); + if (tracing_inserts_par > 0) { + tex_begin_diagnostic(); + tex_print_levels(); + tex_print_format("[insert: setting, index %i, height %D, penalty %i]", + index, insert_total_height(insert), pt_unit, insert_float_cost(insert)); + if (tracing_inserts_par > 1) { + tex_print_node_list(insert_list(insert), "insert", show_box_depth_par, show_box_breadth_par); + } + tex_end_diagnostic(); + } + } + /* we never do the callback ... maybe move it outside */ + if (lmt_nest_state.nest_data.ptr == 0) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(insert_page_context, 0); + } + tex_build_page(); + } + } +} diff --git a/source/luametatex/source/tex/texinserts.h b/source/luametatex/source/tex/texinserts.h new file mode 100644 index 000000000..e91965e6f --- /dev/null +++ b/source/luametatex/source/tex/texinserts.h @@ -0,0 +1,101 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_INSERTS_H +# define LMT_INSERTS_H + +typedef struct insert_record { + halfword limit; + halfword multiplier; + halfword distance; + halfword content; + halfword initialized; + halfword options; + halfword penalty; + halfword maxdepth; +} insert_record; + +typedef enum insert_modes { + unset_insert_mode, + index_insert_mode, + class_insert_mode, +} insert_modes; + +typedef enum insert_class_options { + insert_option_storing = 0x1, + insert_option_penalty = 0x2, + insert_option_maxdepth = 0x4, +} insert_class_options; + +typedef enum insert_storage_actions { + insert_storage_ignore, + insert_storage_delay, + insert_storage_inject, +} insert_storage_actions; + +typedef enum saved_insert_items { + saved_insert_item_index = 0, + saved_insert_n_of_items = 1, +} saved_insert_items; + +typedef struct insert_state_info { + insert_record *inserts; + memory_data insert_data; + int mode; + halfword storing; + halfword head; + halfword tail; +} insert_state_info; + +extern insert_state_info lmt_insert_state; + +# define has_insert_option(a,b) (lmt_insert_state.mode == class_insert_mode && (lmt_insert_state.inserts[a].options & b) == b) +# define set_insert_option(a,b) (lmt_insert_state.inserts[a].options |= b) +# define unset_insert_option(a,b) (lmt_insert_state.inserts[a].options & ~(b)) + +extern scaled tex_get_insert_limit (halfword i); +extern halfword tex_get_insert_multiplier (halfword i); +extern halfword tex_get_insert_penalty (halfword i); +extern halfword tex_get_insert_distance (halfword i); +extern halfword tex_get_insert_maxdepth (halfword i); +extern scaled tex_get_insert_height (halfword i); +extern scaled tex_get_insert_depth (halfword i); +extern scaled tex_get_insert_width (halfword i); +extern halfword tex_get_insert_content (halfword i); +extern halfword tex_get_insert_storage (halfword i); + +extern void tex_set_insert_limit (halfword i, scaled v); +extern void tex_set_insert_multiplier (halfword i, halfword v); +extern void tex_set_insert_penalty (halfword i, halfword v); +extern void tex_set_insert_distance (halfword i, halfword v); +extern void tex_set_insert_maxdepth (halfword i, halfword v); +extern void tex_set_insert_height (halfword i, scaled v); +extern void tex_set_insert_depth (halfword i, scaled v); +extern void tex_set_insert_width (halfword i, scaled v); +extern void tex_set_insert_content (halfword i, halfword v); +extern void tex_set_insert_storage (halfword i, halfword v); + +extern void tex_wipe_insert (halfword i); + +extern void tex_initialize_inserts (void); +extern int tex_valid_insert_id (halfword n); +extern void tex_dump_insert_data (dumpstream f); +extern void tex_undump_insert_data (dumpstream f); + +extern halfword lmt_get_insert_distance (halfword i, int slot); /* callback */ + +extern halfword tex_get_insert_progress (halfword i); + +extern void tex_insert_store (halfword i, halfword n); +extern void tex_insert_restore (halfword n); +extern int tex_insert_stored (void); + +extern halfword tex_scan_insert_index (void); +extern void tex_set_insert_mode (halfword mode); +extern int tex_insert_is_void (halfword i); + +extern void tex_run_insert (void); +extern void tex_finish_insert_group (void); + +# endif diff --git a/source/luametatex/source/tex/texlanguage.c b/source/luametatex/source/tex/texlanguage.c new file mode 100644 index 000000000..6f3460c22 --- /dev/null +++ b/source/luametatex/source/tex/texlanguage.c @@ -0,0 +1,1774 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + We no longer dump the patterns and exeptions as they as supposed to be loaded runtime. There is + no gain getting them from the format. But we do dump some of the properties. + + There were all kind of checks for simple characters i.e. not ligatures but there is no need for + that in \LUAMETATEX. We have separated stages and the hyphenator sees just glyphs. And when a + traditional font has glyphs we can assume that the old school font encoding matches the patterns + i.e. that ligatures are not in the normal character slots. + + Exceptions are stored at the \LUA\ end. We cannot easilly go dynamic because fonts are stored + in the eqtb so we would have to use some more indirect mechanism (doable as we do it for other + items) too. + +*/ + +language_state_info lmt_language_state = { + .languages = NULL, + .language_data = { + .minimum = min_language_size, + .maximum = max_language_size, + .size = memory_data_unset, + .step = stp_language_size, + .allocated = 0, + .itemsize = 1, + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .handler_table_id = 0, + .handler_count = 0, +}; + +/*tex + We can enforce a language id but we want to be sequential so we accept holes! So one + has to define bottom-up. As with fonts, we have a zero language but that one normally + is not set. +*/ + +static void tex_aux_reset_language(halfword id) +{ + tex_language *lang = lmt_language_state.languages[id]; + lang->id = id; + lang->exceptions = 0; + lang->patterns = NULL; + lang->wordhandler = 0; + lang->pre_hyphen_char = '-'; + lang->post_hyphen_char = 0; + lang->pre_exhyphen_char = 0; + lang->post_exhyphen_char = 0; + lang->hyphenation_min = -1; + lang->hjcode_head = NULL; +} + +/*tex + A value below zero will bump the language id. Because we have a rather limited number of + languages there is no configuration, size is just maximum. +*/ + +static halfword tex_aux_new_language_id(halfword id) +{ + int top; + if (id >= 0) { + if (id <= lmt_language_state.language_data.top) { + if (lmt_language_state.languages[id]) { + return tex_formatted_error("languages", "the language with id %d is already created", id); + } else { + return id; + } + } else if (id > lmt_language_state.language_data.maximum) { + goto OVERFLOWERROR; + } else { + top = id; + } + } else if (lmt_language_state.language_data.ptr < lmt_language_state.language_data.top) { + ++lmt_language_state.language_data.ptr; + return lmt_language_state.language_data.ptr; + } else if (lmt_language_state.language_data.top >= lmt_language_state.language_data.maximum) { + goto OVERFLOWERROR; + } else if (lmt_language_state.language_data.top + lmt_language_state.language_data.step > lmt_language_state.language_data.maximum) { + top = lmt_language_state.language_data.maximum; + } else { + top = lmt_language_state.language_data.top + lmt_language_state.language_data.step; + } + /*tex Finally we can bump memory. */ + { + tex_language **tmp = aux_reallocate_array(lmt_language_state.languages, sizeof(tex_language *), top, 0); + if (tmp) { + for (int i = lmt_language_state.language_data.top + 1; i <= top; i++) { + tmp[i] = NULL; + } + lmt_language_state.languages = tmp; + lmt_language_state.language_data.allocated += ((size_t) top - lmt_language_state.language_data.top) * sizeof(tex_language *); + lmt_language_state.language_data.top = top; + lmt_language_state.language_data.ptr += 1; + return lmt_language_state.language_data.ptr; + } + } + OVERFLOWERROR: + tex_overflow_error("languages", lmt_language_state.language_data.maximum); + return 0; +} + +void tex_initialize_languages(void) +{ + tex_language **tmp = aux_allocate_clear_array(sizeof(tex_language *), lmt_language_state.language_data.minimum, 0); + if (tmp) { + for (int i = 0; i < lmt_language_state.language_data.minimum; i++) { + tmp[i] = NULL; + } + lmt_language_state.languages = tmp; + lmt_language_state.language_data.allocated += lmt_language_state.language_data.minimum * sizeof(tex_language *); + lmt_language_state.language_data.top = lmt_language_state.language_data.minimum; + } else { + tex_overflow_error("languages", lmt_language_state.language_data.minimum); + } +} + +/* +halfword tex_aux_maximum_language_id(void) +{ + return language_state.language_data.maximum; +} +*/ + +int tex_is_valid_language(halfword n) +{ + if (n == 0) { + return 1; + } else if (n > 0 && n <= lmt_language_state.language_data.top) { + return lmt_language_state.languages[n] ? 1 : 0; + } else { + return 0; + } +} + +tex_language *tex_new_language(halfword n) +{ + halfword id = tex_aux_new_language_id(n); + if (id >= 0) { + tex_language *lang = lmt_memory_malloc(sizeof(struct tex_language)); + if (lang) { + lmt_language_state.languages[id] = lang; + lmt_language_state.language_data.allocated += sizeof(struct tex_language); + tex_aux_reset_language(id); + if (saving_hyph_codes_par) { + /*tex + For now, we might just use specific value for whatever task. This will become + obsolete. + */ + tex_hj_codes_from_lc_codes(id); + } + } else { + tex_overflow_error("language", sizeof(struct tex_language)); + } + return lang; + } else { + return NULL; + } +} + +tex_language *tex_get_language(halfword n) +{ + if (n >= 0) { + if (n <= lmt_language_state.language_data.top && lmt_language_state.languages[n]) { + return lmt_language_state.languages[n]; + } + if (n <= lmt_language_state.language_data.maximum) { + return tex_new_language(n); + } + } + return NULL; +} + +/*tex + Freeing, dumping, undumping languages: +*/ + +/* +void free_languages(void) +{ + for (int i = 0; i < language_state.language_data.top; i++) { + if (language_state.languages[i]) { + lmt_memory_free(language_state.languages[i]); + language_state.languages[i] = NULL; + } + } +} +*/ + +void tex_dump_language_data(dumpstream f) +{ + dump_int(f, lmt_language_state.language_data.top); + dump_int(f, lmt_language_state.language_data.ptr); + if (lmt_language_state.language_data.top > 0) { + for (int i = 0; i < lmt_language_state.language_data.top; i++) { + tex_language *lang = lmt_language_state.languages[i]; + if (lang) { + dump_via_int(f, 1); + dump_int(f, lang->id); + dump_int(f, lang->pre_hyphen_char); + dump_int(f, lang->post_hyphen_char); + dump_int(f, lang->pre_exhyphen_char); + dump_int(f, lang->post_exhyphen_char); + dump_int(f, lang->hyphenation_min); + tex_dump_language_hj_codes(f, i); + } else { + dump_via_int(f, 0); + } + } + } +} + +void tex_undump_language_data(dumpstream f) +{ + int top, ptr; + undump_int(f, top); + undump_int(f, ptr); + if (top > 0) { + tex_language **tmp = aux_allocate_clear_array(sizeof(tex_language *), top, 0); + if (tmp) { + lmt_language_state.language_data.top = top; + lmt_language_state.language_data.ptr = ptr; + lmt_language_state.languages = tmp; + for (int i = 0; i < top; i++) { + int x; + undump_int(f, x); + if (x == 1) { + tex_language *lang = lmt_memory_malloc(sizeof(struct tex_language)); + if (lang) { + lmt_language_state.languages[i] = lang; + lmt_language_state.language_data.allocated += sizeof(struct tex_language); + lang->exceptions = 0; + lang->patterns = NULL; + lang->wordhandler = 0; + lang->hjcode_head = NULL; + undump_int(f, lang->id); + undump_int(f, lang->pre_hyphen_char); + undump_int(f, lang->post_hyphen_char); + undump_int(f, lang->pre_exhyphen_char); + undump_int(f, lang->post_exhyphen_char); + undump_int(f, lang->hyphenation_min); + tex_undump_language_hj_codes(f, i); + if (lang->id != i) { + tex_formatted_warning("languages", "undumped language id mismatch: %d <> %d", lang->id, i); + lang->id = i; + } + } else { + tex_overflow_error("languages", i); + } + tmp[i] = lang; + } else { + tmp[i] = NULL; + } + } + lmt_language_state.language_data.initial = lmt_language_state.language_data.ptr; + } else { + tex_overflow_error("languages", top); + lmt_language_state.language_data.initial = 0; + } + } else { + /*tex Indeed we can have no languages stored. */ + tex_initialize_languages(); + } +} + +/*tex All kind of accessors. */ + +void tex_set_pre_hyphen_char(halfword n, halfword v) +{ + struct tex_language *l = tex_get_language(n); + if (l) { + l->pre_hyphen_char = v; + } +} + +void tex_set_post_hyphen_char(halfword n, halfword v) +{ + struct tex_language *l = tex_get_language(n); + if (l) { + l->post_hyphen_char = v; + } +} + +void tex_set_pre_exhyphen_char(halfword n, halfword v) +{ + struct tex_language *l = tex_get_language(n); + if (l) { + l->pre_exhyphen_char = v; + } +} + +void tex_set_post_exhyphen_char(halfword n, halfword v) +{ + struct tex_language *l = tex_get_language(n); + if (l) { + l->post_exhyphen_char = v; + } +} + +halfword tex_get_pre_hyphen_char(halfword n) +{ + struct tex_language *l = tex_get_language(n); + return l ? l->pre_hyphen_char : -1; +} + +halfword tex_get_post_hyphen_char(halfword n) +{ + struct tex_language *l = tex_get_language(n); + return l ? l->post_hyphen_char : -1; +} + +halfword tex_get_pre_exhyphen_char(halfword n) +{ + struct tex_language *l = tex_get_language(n); + return l ? l->pre_exhyphen_char : -1; +} + +halfword tex_get_post_exhyphen_char(halfword n) +{ + struct tex_language *l = tex_get_language(n); + return (l) ? (int) l->post_exhyphen_char : -1; +} + +void tex_set_hyphenation_min(halfword n, halfword v) +{ + struct tex_language *l = tex_get_language(n); + if (l) { + l->hyphenation_min = v; + } +} + +halfword tex_get_hyphenation_min(halfword n) +{ + struct tex_language *l = tex_get_language((int) n); + return l ? l->hyphenation_min : -1; +} + +void tex_load_patterns(struct tex_language *lang, const unsigned char *buff) +{ + if ((! lang) || (! buff) || strlen((const char *) buff) == 0) { + return; + } else { + if (! lang->patterns) { + lang->patterns = hnj_dictionary_new(); + } + hnj_dictionary_load(lang->patterns, buff, tracing_hyphenation_par > 0); + } +} + +void tex_clear_patterns(struct tex_language *lang) +{ + if (lang && lang->patterns) { + hnj_dictionary_clear(lang->patterns); + } +} + +void tex_load_tex_patterns(halfword curlang, halfword head) +{ + char *s = tex_tokenlist_to_tstring(head, 1, NULL, 0, 0, 0); + if (s) { + tex_load_patterns(tex_get_language(curlang), (unsigned char *) s); + } +} + +/* + This cleans one word which is returned in |cleaned|, returns the new offset into |buffer|. +*/ + +/* define tex_isspace(c) (c == ' ' || c == '\t') */ +# define tex_isspace(c) (c == ' ') + +const char *tex_clean_hyphenation(halfword id, const char *buff, char **cleaned) +{ + int items = 0; + /*tex Work buffer for bytes: */ + unsigned char word[max_size_of_word + 1]; + /*tex Work buffer for \UNICODE: */ + unsigned uword[max_size_of_word + 1] = { 0 }; + /*tex The \UNICODE\ buffer value: */ + int i = 0; + char *uindex = (char *) word; + const char *s = buff; + while (*s && ! tex_isspace((unsigned char)*s)) { + word[i++] = (unsigned char) *s; + s++; + if ((s-buff) > max_size_of_word) { + /*tex Todo: this is too strict, should count \UNICODE, not bytes. */ + *cleaned = NULL; + tex_handle_error( + normal_error_type, + "Exception too long", + NULL + ); + return s; + } + } + /*tex Now convert the input to \UNICODE. */ + word[i] = '\0'; + aux_splitutf2uni(uword, (const char *)word); + /*tex + Build the new word string. The hjcode values < 32 indicate a length, so that + for instance \|hjcode`ܽ2| makes that ligature count okay. + */ + i = 0; + while (uword[i] > 0) { + int u = uword[i++]; + if (u == '-') { + /*tex Skip. */ + } else if (u == '=') { + unsigned c = tex_get_hj_code(id, '-'); + uindex = aux_uni2string(uindex, (! c || c <= 32) ? '-' : c); + } else if (u == '{') { + u = uword[i++]; + items = 0; + while (u && u != '}') { + u = uword[i++]; + } + if (u == '}') { + items++; + u = uword[i++]; + } + while (u && u != '}') { + u = uword[i++]; + } + if (u == '}') { + items++; + u = uword[i++]; + } + if (u == '{') { + u = uword[i++]; + } + while (u && u != '}') { + unsigned c = tex_get_hj_code(id, u); + uindex = aux_uni2string(uindex, (! c || c <= 32) ? u : c); + u = uword[i++]; + } + if (u == '}') { + items++; + } + if (items != 3) { + /* hm, we intercept that elsewhere in a better way so why here? Best remove the test here or move the other one here. */ + *cleaned = NULL; + tex_handle_error( + normal_error_type, + "Exception syntax error, a discretionary has three components: {}{}{}.", + NULL + ); + return s; + } else { + /* skip replacement (chars) */ + if (uword[i] == '(') { + while (uword[++i] && uword[i] != ')') { }; + if (uword[i] != ')') { + tex_handle_error( + normal_error_type, + "Exception syntax error, an alternative replacement is defined as (text).", + NULL + ); + return s; + } else if (uword[i]) { + i++; + } + } + /* skip penalty: [digit] but we intercept multiple digits */ + if (uword[i] == '[') { + if (uword[i+1] && uword[i+1] >= '0' && uword[i+1] <= '9' && uword[i+2] && uword[i+2] == ']') { + i += 3; + } else { + tex_handle_error( + normal_error_type, + "Exception syntax error, a penalty is defined as [digit].", + NULL + ); + return s; + } + } + } + } else { + unsigned c = tex_get_hj_code(id, u); + uindex = aux_uni2string(uindex, (! c || c <= 32) ? u : c); + } + } + *uindex = '\0'; + *cleaned = lmt_memory_strdup((char *) word); + return s; +} + +void tex_load_hyphenation(struct tex_language *lang, const unsigned char *buff) +{ + if (lang) { + lua_State *L = lmt_lua_state.lua_instance; + const char *s = (const char *) buff; + char *cleaned = NULL; + int id = lang->id; + if (lang->exceptions == 0) { + lua_newtable(L); + lang->exceptions = luaL_ref(L, LUA_REGISTRYINDEX); + } + lua_rawgeti(L, LUA_REGISTRYINDEX, lang->exceptions); + while (*s) { + while (tex_isspace((unsigned char) *s)) { + s++; + } + if (*s) { + const char *value = s; + s = tex_clean_hyphenation(id, s, &cleaned); + if (cleaned) { + size_t len = s - value; + if (len > 0) { + lua_pushstring(L, cleaned); + lua_pushlstring(L, value, len); + lua_rawset(L, -3); + } + lmt_memory_free(cleaned); + } else { + /* tex_formatted_warning("hyphenation","skipping invalid hyphenation exception: %s", value); */ + } + } + } + } +} + +void tex_clear_hyphenation(struct tex_language *lang) +{ + if (lang && lang->exceptions != 0) { + lua_State *L = lmt_lua_state.lua_instance; + luaL_unref(L, LUA_REGISTRYINDEX, lang->exceptions); + lang->exceptions = 0; + } +} + +void tex_load_tex_hyphenation(halfword curlang, halfword head) +{ + char *s = tex_tokenlist_to_tstring(head, 1, NULL, 0, 0, 0); + if (s) { + tex_load_hyphenation(tex_get_language(curlang), (unsigned char *) s); + } +} + +static halfword tex_aux_insert_discretionary(halfword t, halfword pre, halfword post, halfword replace, quarterword subtype, int penalty) +{ + /*tex For compound words following explicit hyphens we take the current font. */ + halfword d = tex_new_disc_node(subtype); + halfword a = node_attr(t) ; + disc_penalty(d) = penalty; + if (t == replace) { + /*tex We have |prev disc next-next|. */ + tex_try_couple_nodes(d, node_next(t)); + tex_try_couple_nodes(node_prev(t), d); + node_prev(t) = null; + node_next(t) = null; + replace = t; + } else { + /*tex We have |prev disc next|. */ + tex_try_couple_nodes(d, node_next(t)); + tex_couple_nodes(t, d); + } + if (a) { + tex_attach_attribute_list_attribute(d, a); + } + tex_set_disc_field(d, pre_break_code, pre); + tex_set_disc_field(d, post_break_code, post); + tex_set_disc_field(d, no_break_code, replace); + return d; +} + +static halfword tex_aux_insert_syllable_discretionary(halfword t, lang_variables *lan) +{ + halfword n = tex_new_disc_node(syllable_discretionary_code); + disc_penalty(n) = hyphen_penalty_par; + tex_couple_nodes(n, node_next(t)); + tex_couple_nodes(t, n); + tex_attach_attribute_list_attribute(n, get_attribute_list(t)); + if (lan->pre_hyphen_char > 0) { + halfword g = tex_new_glyph_node(glyph_unset_subtype, glyph_font(t), lan->pre_hyphen_char, t); + tex_set_disc_field(n, pre_break_code, g); + } + if (lan->post_hyphen_char > 0) { + halfword g = tex_new_glyph_node(glyph_unset_subtype, glyph_font(t), lan->post_hyphen_char, t); + tex_set_disc_field(n, post_break_code, g); + } + return n; +} + +static halfword tex_aux_compound_word_break(halfword t, halfword clang, halfword chr) +{ + halfword prechar, postchar, pre, post, disc; + if (chr == ex_hyphen_char_par) { + halfword pre_exhyphen_char = tex_get_pre_exhyphen_char(clang); + halfword post_exhyphen_char = tex_get_post_exhyphen_char(clang); + prechar = pre_exhyphen_char > 0 ? pre_exhyphen_char : ex_hyphen_char_par; + postchar = post_exhyphen_char > 0 ? post_exhyphen_char : null; + } else { + /* we need a flag : use pre/post cf language spec */ + prechar = chr; + postchar = null; + } + pre = prechar > 0 ? tex_new_glyph_node(glyph_unset_subtype, glyph_font(t), prechar, t) : null; + post = postchar > 0 ? tex_new_glyph_node(glyph_unset_subtype, glyph_font(t), postchar, t) : null; + disc = tex_aux_insert_discretionary(t, pre, post, t, automatic_discretionary_code, tex_automatic_disc_penalty(glyph_hyphenate(t))); + return disc; +} + +static char *tex_aux_hyphenation_exception(int exceptions, char *w) +{ + lua_State *L = lmt_lua_state.lua_instance; + char *ret = NULL; + if (lua_rawgeti(L, LUA_REGISTRYINDEX, exceptions) == LUA_TTABLE) { + /*tex Word table: */ + lua_pushstring(L, w); + lua_rawget(L, -2); + if (lua_type(L, -1) == LUA_TSTRING) { + ret = lmt_memory_strdup(lua_tostring(L, -1)); + } + lua_pop(L, 2); + } else { + lua_pop(L, 1); + } + return ret; +} + +/*tex Kept as reference: */ + +/* +char *get_exception_strings(struct tex_language *lang) +{ + char *ret = NULL; + if (lang && lang->exceptions) { + lua_State *L = lua_state.lua_instance; + if (lua_rawgeti(L, LUA_REGISTRYINDEX, lang->exceptions) == LUA_TTABLE) { + size_t size = 0; + size_t current = 0; + lua_pushnil(L); + while (lua_next(L, -2)) { + size_t l = 0; + const char *value = lua_tolstring(L, -1, &l); + if (current + l + 2 > size) { + size_t new = (size + size/5) + current + l + 1024; + char *tmp = lmt_memory_realloc(ret, new); + if (tmp) { + ret = tmp; + size = new; + } else { + overflow_error("exceptions", (int) size); + } + } + if (ret) { + ret[current] = ' '; + strcpy(&ret[current + 1], value); + current += l + 1; + } + lua_pop(L, 1); + } + } + } + return ret; +} +*/ + +/*tex + + The sequence from |wordstart| to |r| can contain only normal characters it could be faster to + modify a halfword pointer and return an integer + +*/ + +# define zws 0x200B /* zero width space makes no sense */ +# define zwnj 0x200C +# define zwj 0x200D + +static halfword tex_aux_find_exception_part(unsigned int *j, unsigned int *uword, int len, halfword parent, char final) +{ + halfword head = null; + halfword tail = null; + unsigned i = *j; + int noligature = 0; + int nokerning = 0; + /*tex This puts uword[i] on the |{|. */ + i++; + while (i < (unsigned) len && uword[i + 1] != (unsigned int) final) { + if (tail) { + switch (uword[i + 1]) { + case zwj: + noligature = 1; + nokerning = 0; + break; + case zwnj: + noligature = 1; + nokerning = 1; + break; + default: + { + halfword s = tex_new_glyph_node(glyph_unset_subtype, glyph_font(parent), (int) uword[i + 1], parent); /* todo: data */ + tex_couple_nodes(tail, s); + if (noligature) { + tex_add_glyph_option(tail, glyph_option_no_right_ligature); + tex_add_glyph_option(s, glyph_option_no_left_ligature); + noligature = 0; + } + if (nokerning) { + tex_add_glyph_option(tail, glyph_option_no_right_kern); + tex_add_glyph_option(s, glyph_option_no_left_kern); + nokerning = 0; + } + tail = node_next(tail); + break; + } + } + } else { + head = tex_new_glyph_node(glyph_unset_subtype, glyph_font(parent), (int) uword[i + 1], parent); /* todo: data */ + tail = head; + } + i++; + } + *j = ++i; + return head; +} + +static int tex_aux_count_exception_part(unsigned int *j, unsigned int *uword, int len) +{ + int n = 0; + unsigned i = *j; + /*tex This puts uword[i] on the |{|. */ + i++; + while (i < (unsigned) len && uword[i + 1] != '}') { + n++; + i++; + } + *j = ++i; + return n; +} + +static void tex_aux_show_exception_error(const char *part) +{ + tex_handle_error( + normal_error_type, + "Invalid %s part in exception", + part, + "Exception discretionaries should contain three pairs of braced items.\n" + "No intervening spaces are allowed." + ); +} + +/*tex + + The exceptions are taken as-is: no min values are taken into account. One can add normal + patterns on-the-fly if needed. + +*/ + +static void tex_aux_do_exception(halfword wordstart, halfword r, char *replacement) +{ + halfword t = wordstart; + lang_variables langdata; + unsigned uword[max_size_of_word + 1] = { 0 }; + unsigned len = aux_splitutf2uni(uword, replacement); + int clang = get_glyph_language(wordstart); + langdata.pre_hyphen_char = tex_get_pre_hyphen_char(clang); + langdata.post_hyphen_char = tex_get_post_hyphen_char(clang); + for (unsigned i = 0; i < len; i++) { + if (uword[i + 1] == 0 ) { + /*tex We ran out of the exception pattern. */ + break; + } else if (uword[i + 1] == '-') { + /*tex A hyphen follows. */ + if (node_next(t) == r) { + break; + } else { + tex_aux_insert_syllable_discretionary(t, &langdata); + /*tex Skip the new disc */ + t = node_next(t); + } + } else if (uword[i + 1] == '=') { + /*tex We skip a disc. */ + t = node_next(t); + } else if (uword[i + 1] == '{') { + /*tex We ran into an exception |{}{}{}| or |{}{}{}[]|. */ + halfword pre = null; + halfword post = null; + halfword replace = null; + int count = 0; + int alternative = null; + halfword penalty; + /*tex |pre| */ + pre = tex_aux_find_exception_part(&i, uword, (int) len, wordstart, '}'); + if (i == len || uword[i + 1] != '{') { + tex_aux_show_exception_error("pre"); + } + /*tex |post| */ + post = tex_aux_find_exception_part(&i, uword, (int) len, wordstart, '}'); + if (i == len || uword[i + 1] != '{') { + tex_aux_show_exception_error("post"); + } + /*tex |replace| */ + count = tex_aux_count_exception_part(&i, uword, (int) len); + if (i == len) { + tex_aux_show_exception_error("replace"); + } else if (uword[i] && uword[i + 1] == '(') { + alternative = tex_aux_find_exception_part(&i, uword, (int) len, wordstart, ')');; + } + /*tex Play safe. */ + if (node_next(t) == r) { + break; + } else { + /*tex Let's deal with an (optional) replacement. */ + if (count > 0) { + /*tex Assemble the replace stream. */ + halfword q = t; + replace = node_next(q); + while (count > 0 && q) { + halfword t = node_type(q); + q = node_next(q); + if (t == glyph_node || t == disc_node) { + count--; + } else { + break ; + } + } + /*tex Remove it from the main stream */ + tex_try_couple_nodes(t, node_next(q)); + /*tex and finish it in the replace. */ + node_next(q) = null; + if (alternative) { + tex_flush_node_list(replace); + replace = alternative; + } else { + /*tex Sanitize the replace stream (we could use the flattener instead). */ + q = replace ; + while (q) { + halfword n = node_next(q); + if (node_type(q) == disc_node) { + /*tex Beware: the replacement starts after the no_break pointer. */ + halfword nb = disc_no_break_head(q); + disc_no_break_head(q) = null; + node_prev(nb) = null ; /* used at all? */ + /*tex Insert the replacement glyph. */ + if (q == replace) { + replace = nb; + } else { + tex_try_couple_nodes(node_prev(q), nb); + } + /*tex Append the glyph (one). */ + tex_try_couple_nodes(nb, n); + /*tex Flush the disc. */ + tex_flush_node(q); + } + q = n ; + } + } + } + /*tex Let's check if we have a penalty spec. If we have more then we're toast, we just ignore them. */ + if (uword[i] && uword[i + 1] == '[') { + i += 2; + if (uword[i] && uword[i] >= '0' && uword[i] <= '9') { + if (exception_penalty_par > 0) { + if (exception_penalty_par > infinite_penalty) { + penalty = exception_penalty_par; + } else { + penalty = (uword[i] - '0') * exception_penalty_par ; + } + } else { + penalty = hyphen_penalty_par; + } + ++i; + while (uword[i] && uword[i] != ']') { + ++i; + } + } else { + penalty = hyphen_penalty_par; + } + } else { + penalty = hyphen_penalty_par; + } + /*tex And now we insert a disc node (this was |syllable_discretionary_code|). */ + t = tex_aux_insert_discretionary(t, pre, post, replace, normal_discretionary_code, penalty); + /*tex We skip the new disc node. */ + t = node_next(t); + /*tex + We need to check if we have two discretionaries in a row, test case: |\hyphenation + {a{>}{<}{b}{>}{<}{c}de} \hsize 1pt abcde \par| which gives |a> <> <de|. + */ + if (uword[i] && uword[i + 1] == '{') { + i--; + t = node_prev(t); /*tex Tricky! */ + } + } + } else { + t = node_next(t); + } + /*tex Again we play safe. */ + if (! t || node_next(t) == r) { + break; + } + } +} + +/*tex + + The following description is no longer valid for \LUATEX. Although we use the same algorithm + for hyphenation, it is not integrated in the par builder. Instead it is a separate run over + the node list, preceding the line-breaking routine, possibly replaced by a callback. We keep + the description here because the principles remain. + + \startnarrower + + When the line-breaking routine is unable to find a feasible sequence of breakpoints, it makes + a second pass over the paragraph, attempting to hyphenate the hyphenatable words. The goal of + hyphenation is to insert discretionary material into the paragraph so that there are more + potential places to break. + + The general rules for hyphenation are somewhat complex and technical, because we want to be + able to hyphenate words that are preceded or followed by punctuation marks, and because we + want the rules to work for languages other than English. We also must contend with the fact + that hyphens might radically alter the ligature and kerning structure of a word. + + A sequence of characters will be considered for hyphenation only if it belongs to a \quotation + {potentially hyphenatable part} of the current paragraph. This is a sequence of nodes $p_0p_1 + \ldots p_m$ where $p_0$ is a glue node, $p_1\ldots p_{m-1}$ are either character or ligature + or whatsit or implicit kern nodes, and $p_m$ is a glue or penalty or insertion or adjust or + mark or whatsit or explicit kern node. (Therefore hyphenation is disabled by boxes, math + formulas, and discretionary nodes already inserted by the user.) The ligature nodes among $p_1 + \ldots p_{m-1}$ are effectively expanded into the original non-ligature characters; the kern + nodes and whatsits are ignored. Each character |c| is now classified as either a nonletter (if + |lc_code(c)=0|), a lowercase letter (if |lc_code(c)=c|), or an uppercase letter (otherwise); an + uppercase letter is treated as if it were |lc_code(c)| for purposes of hyphenation. The + characters generated by $p_1\ldots p_{m-1}$ may begin with nonletters; let $c_1$ be the first + letter that is not in the middle of a ligature. Whatsit nodes preceding $c_1$ are ignored; a + whatsit found after $c_1$ will be the terminating node $p_m$. All characters that do not have + the same font as $c_1$ will be treated as nonletters. The |hyphen_char| for that font must be + between 0 and 255, otherwise hyphenation will not be attempted. \TeX\ looks ahead for as many + consecutive letters $c_1\ldots c_n$ as possible; however, |n| must be less than 64, so a + character that would otherwise be $c_{64}$ is effectively not a letter. Furthermore $c_n$ must + not be in the middle of a ligature. In this way we obtain a string of letters $c_1\ldots c_n$ + that are generated by nodes $p_a\ldots p_b$, where |1<=a<=b+1<=m|. If |n>=l_hyf+r_hyf|, this + string qualifies for hyphenation; however, |uc_hyph| must be positive, if $c_1$ is uppercase. + + The hyphenation process takes place in three stages. First, the candidate sequence $c_1 \ldots + c_n$ is found; then potential positions for hyphens are determined by referring to hyphenation + tables; and finally, the nodes $p_a\ldots p_b$ are replaced by a new sequence of nodes that + includes the discretionary breaks found. + + Fortunately, we do not have to do all this calculation very often, because of the way it has + been taken out of \TEX's inner loop. For example, when the second edition of the author's + 700-page book {\sl Seminumerical Algorithms} was typeset by \TEX, only about 1.2 hyphenations + needed to be tried per paragraph, since the line breaking algorithm needed to use two passes on + only about 5 per cent of the paragraphs. (This is not true in \LUATEX: we always hyphenate the + whole list.) + + When a word been set up to contain a candidate for hyphenation, \TEX\ first looks to see if it + is in the user's exception dictionary. If not, hyphens are inserted based on patterns that + appear within the given word, using an algorithm due to Frank~M. Liang. + + \stopnarrower + + This is incompatible with \TEX\ because the first word of a paragraph can be hyphenated, but + most European users seem to agree that prohibiting hyphenation there was not the best idea ever. + + To be documented: |\hyphenationmode| (a bit set). + + \startbuffer + \parindent0pt \hsize=1.1cm + 12-34-56 \par + 12-34-\hbox{56} \par + 12-34-\vrule width 1em height 1.5ex \par + 12-\hbox{34}-56 \par + 12-\vrule width 1em height 1.5ex-56 \par + \hjcode`\1=`\1 \hjcode`\2=`\2 \hjcode`\3=`\3 \hjcode`\4=`\4 \vskip.5cm + 12-34-56 \par + 12-34-\hbox{56} \par + 12-34-\vrule width 1em height 1.5ex \par + 12-\hbox{34}-56 \par + 12-\vrule width 1em height 1.5ex-56 \par + \stopbuffer + + \typebuffer + + \startpacked \getbuffer \stopbuffer + + We only accept an explicit hyphen when there is a preceding glyph and we skip a sequence of + explicit hyphens as that normally indicates a \type {--} or \type {---} ligature in which case + we can in a worse case usage get bad node lists later on due to messed up ligature building as + these dashes are ligatures in base fonts. This is a side effect of the separating the + hyphenation, ligaturing and kerning steps. A test is cmr with \type {------}. + + A font handler can collapse successive hyphens but it's not nice to put the burden there. A + somewhat messy border case is \type {----} but in \LUATEX\ we don't treat \type {--} and \type + {---} special. Also, traditional \TEX\ will break a line at \type {-foo} but this can be + disabled by setting the automatic mode to \type {1}. + +*/ + +// # define is_hyphen_char(chr) (get_hc_code(chr) || chr == ex_hyphen_char_par) + +inline static halfword tex_aux_is_hyphen_char(halfword chr) +{ + if (tex_get_hc_code(chr)) { + return tex_get_hc_code(chr); + } else if (chr == ex_hyphen_char_par) { + return ex_hyphen_char_par; + } else { + return null; + } +} + +static halfword tex_aux_find_next_wordstart(halfword r, halfword first_language) +{ + int start_ok = 1; + int mathlevel = 1; + halfword lastglyph = r; + while (r) { + switch (node_type(r)) { + case boundary_node: + if (node_subtype(r) == word_boundary) { + start_ok = 1; + } + break; + case disc_node: + start_ok = has_disc_option(r, disc_option_post_word); + break; + case hlist_node: + case vlist_node: + case rule_node: + case dir_node: + case whatsit_node: + if (hyphenation_permitted(glyph_hyphenate(lastglyph), strict_start_hyphenation_mode)) { + start_ok = 0; + } + break; + case glue_node: + start_ok = 1; + break; + case math_node: + while (mathlevel > 0) { + r = node_next(r); + if (! r) { + return r; + } else if (node_type(r) == math_node) { + if (node_subtype(r) == begin_inline_math) { + mathlevel++; + } else { + mathlevel--; + } + } + } + break; + case glyph_node: + { + /*tex + When we have no word yet and meet a hyphen (equivalent) we should just + keep going. This is not compatible but it does make sense. + */ + int chr = glyph_character(r); + int hyp = tex_aux_is_hyphen_char(chr); + lastglyph = r; + if (hyp) { + if (hyphenation_permitted(glyph_hyphenate(r), ignore_bounds_hyphenation_mode)) { + /* maybe some tracing */ + } else { + /* todo: already check if we have hj chars left/right i.e. no digits and minus mess */ + halfword t = node_next(r) ; + /*tex Kind of weird that we have the opposite flag test here. */ + if (t && (node_type(t) == glyph_node) && (! tex_aux_is_hyphen_char(glyph_character(t))) && ! hyphenation_permitted(glyph_hyphenate(r), automatic_hyphenation_mode)) { + /*tex We have no word yet and the next character is a non hyphen. */ + r = tex_aux_compound_word_break(r, get_glyph_language(r), hyp); + // test case: \automatichyphenmode0 10\high{-6-1-2-4} + start_ok = 1; // todo: also in luatex + } else { + /*tex We jump over the sequence of hyphens. */ + while (t && (node_type(t) == glyph_node) && tex_aux_is_hyphen_char(glyph_character(t))) { + r = t ; + t = node_next(r) ; + } + if (t) { + /*tex We need a restart. */ + start_ok = 0; + } else { + /*tex We reached the end of the list so we have no word start. */ + return null; + } + } + } + } else if (start_ok && (get_glyph_language(r) >= first_language) && get_glyph_dohyph(r)) { + int l = tex_get_hj_code(get_glyph_language(r), chr); + if (l > 0) { + if (l == chr || l <= 32 || get_glyph_uchyph(r)) { + return r; + } else { + start_ok = 0; + } + } else { + /*tex We go on. */ + } + } else { + /*tex We go on. */ + } + } + break; + default: + start_ok = 0; + break; + } + r = node_next(r); + } + return r; /* null */ +} + +/*tex + + This is the original test, extended with bounds, but still the complex expression turned into + a function. However, it actually is part of the old mechanism where hyphenation was mixed + with ligature building and kerning, so there was this skipping over a font kern whuch is no + longer needed as we have separate steps. + + We keep this as reference: + + \starttyping + static int valid_wordend(halfword s, halfword strict_bound) + { + if (s) { + halfword r = s; + int clang = get_glyph_language(s); + while ( (r) && + ( (type(r) == glyph_node && clang == get_glyph_language(r)) + || (type(r) == kern_node && (subtype(r) == font_kern)) + ) + ) { + r = node_next(r); + } + return (! r || (type(r) == glyph_node && clang != get_glyph_language(r)) + || type(r) == glue_node + || type(r) == penalty_node + || (type(r) == kern_node && (subtype(r) == explicit_kern || + subtype(r) == italic_kern || + subtype(r) == accent_kern )) + || ((type(r) == hlist_node || + type(r) == vlist_node || + type(r) == rule_node || + type(r) == dir_node || + type(r) == whatsit_node || + type(r) == insert_node || + type(r) == adjust_node + ) && ! (strict_bound == 2 || strict_bound == 3)) + || type(r) == boundary_node + ); + } else { + return 1; + } + } + \stopttyping + +*/ + +static int tex_aux_valid_wordend(halfword end_word, halfword r) +{ + if (r) { + switch (node_type(r)) { + // case glyph_node: + // case glue_node: + // case penalty_node: + // case kern_node: + // return 1; + case disc_node: + return has_disc_option(r, disc_option_pre_word); + case hlist_node: + case vlist_node: + case rule_node: + case dir_node: + case whatsit_node: + case insert_node: + case adjust_node: + return ! hyphenation_permitted(glyph_hyphenate(end_word), strict_end_hyphenation_mode); + } + } + return 1; +} + +void tex_handle_hyphenation(halfword head, halfword tail) +{ + if (head && node_next(head)) { + int callback_id = lmt_callback_defined(hyphenate_callback); + if (callback_id > 0) { + lua_State *L = lmt_lua_state.lua_instance; + int top = 0; + if (lmt_callback_okay(L, callback_id, &top)) { + int i; + lmt_node_list_to_lua(L, head); + lmt_node_list_to_lua(L, tail); + i = lmt_callback_call(L, 2, 0, top); + if (i) { + lmt_callback_error(L, top, i); + } else { + lmt_callback_wrapup(L, top); + } + } + } else if (callback_id == 0) { + tex_hyphenate_list(head, tail); + } else { + /* -1 : disabled */ + } + } +} + +static int tex_aux_hnj_hyphen_hyphenate( + hjn_dictionary *dict, + halfword first, + halfword last, + int length, + halfword left, + halfword right, + lang_variables *lan +) +{ + /*tex +2 for dots at each end, +1 for points outside characters. */ + int ext_word_len = length + 2; + int hyphen_len = ext_word_len + 1; + /*tex Because we have a limit of 64 characters we could just use a static array here: */ + char *hyphens = lmt_memory_calloc(hyphen_len, sizeof(unsigned char)); + if (hyphens) { + halfword here; + int state = 0; + int char_num = 0; + int done = 0; + /*tex Add a '.' to beginning and end to facilitate matching. */ + node_next(begin_period) = first; + node_next(end_period) = node_next(last); + node_next(last) = end_period; + + // for (int i = 0; i < hyphen_len; i++) { + // hyphens[i] = '0'; + // } + // hyphens[hyphen_len] = 0; + + /*tex Now, run the finite state machine. */ + for (char_num = 0, here = begin_period; here != node_next(end_period); here = node_next(here)) { + int ch; + if (here == begin_period || here == end_period) { + ch = '.'; + } else { + ch = tex_get_hj_code(get_glyph_language(here), glyph_character(here)); + if (ch <= 32) { + ch = glyph_character(here); + } + } + while (state != -1) { + hjn_state *hstate = &dict->states[state]; + for (int k = 0; k < hstate->num_trans; k++) { + if (hstate->trans[k].uni_ch == ch) { + char *match; + state = hstate->trans[k].new_state; + match = dict->states[state].match; + if (match) { + /*tex + We add +2 because 1 string length is one bigger than offset and 1 + hyphenation starts before first character. + + Why not store the length in states[state] instead of calculating + it each time? Okay, performance is okay but still ... + */ + int offset = (int) (char_num + 2 - (int) strlen(match)); + for (int m = 0; match[m]; m++) { + if (hyphens[offset + m] < match[m]) { + hyphens[offset + m] = match[m]; + } + } + } + goto NEXTLETTER; + } + } + state = hstate->fallback_state; + } + /*tex Nothing worked, let's go to the next character. */ + state = 0; + NEXTLETTER:; + char_num++; + } + /*tex Restore the correct pointers. */ + node_next(last) = node_next(end_period); + /*tex + Pattern is |.word.| and |word_len| is 4, |ext_word_len| is 6 and |hyphens| is 7; drop first + two and stop after |word_len-1|. + */ + for (here = first, char_num = 2; here != left; here = node_next(here)) { + char_num++; + } + for (; here != right; here = node_next(here)) { + if (hyphens[char_num] & 1) { + here = tex_aux_insert_syllable_discretionary(here, lan); + done += 1; + } + char_num++; + } + lmt_memory_free(hyphens); + return done; + } else { + tex_overflow_error("patterns", hyphen_len); + return 0; + } +} + +/* we can also check the original */ + +static int tex_aux_still_okay(halfword f, halfword l, halfword r, int n, const char *utf8original) { + if (_valid_node_(f) && _valid_node_(l) && node_next(l) == r) { + int i = 0; + while (f) { + ++i; + if (node_type(f) != glyph_node) { + tex_normal_warning("language", "the hyphenated word contains non-glyphs, skipping"); + return 0; + } else { + halfword c = (halfword) aux_str2uni((const unsigned char *) utf8original); + utf8original += utf8_size(c); + if (! (c && c == glyph_character(f))) { + tex_normal_warning("language", "the hyphenated word contains different characters, skipping"); + return 0; + } else if (f != l) { + f = node_next(f); + } else if (i == n) { + return 1; + } else { + tex_normal_warning("language", "the hyphenated word changed length, skipping"); + return 0; + } + } + } + } + tex_normal_warning("language", "the hyphenation list is messed up, skipping"); + return 0; +} + +static void tex_aux_hyphenate_show(halfword beg, halfword end) +{ + if (_valid_node_(beg) && _valid_node_(end)) { + halfword nxt = node_next(end); + node_next(end) = null; + tex_show_node_list(beg, 100, 10000); + node_next(end) = nxt; + } +} + +/* maybe split: first a processing run */ + +inline static int is_traditional_hyphen(halfword n) +{ + return ( + (glyph_character(n) == ex_hyphen_char_par) /*tex parameter */ + && (has_font_text_control(glyph_font(n),text_control_collapse_hyphens)) /*tex font driven */ + && (hyphenation_permitted(glyph_hyphenate(n), collapse_hyphenation_mode)) /*tex language driven */ + ); +} + +int tex_collapse_list(halfword head, halfword c1, halfword c2, halfword c3) /* ex_hyphen_char_par 0x2013 0x2014 */ +{ + /*tex Let's play safe: */ + halfword found = 0; + if (head && c1 && c2 && c3) { + halfword n1 = head; + while (n1) { + halfword n2 = node_next(n1); + switch (node_type(n1)) { + case glyph_node: + if (is_traditional_hyphen(n1)) { + set_glyph_discpart(n1, glyph_discpart_always); + if (n2 && node_type(n2) == glyph_node && is_traditional_hyphen(n2) && glyph_font(n1) == glyph_font(n2)) { + halfword n3 = node_next(n2); + if (n3 && node_type(n3) == glyph_node && is_traditional_hyphen(n3) && glyph_font(n1) == glyph_font(n3)) { + halfword n4 = node_next(n3); + glyph_character(n1) = c3; + tex_try_couple_nodes(n1, n4); + tex_flush_node(n2); + tex_flush_node(n3); + n1 = n4; + } else { + glyph_character(n1) = c2; + tex_try_couple_nodes(n1, n3); + tex_flush_node(n2); + n1 = n3; + } + found = 1; + goto AGAIN; + } else { + glyph_character(n1) = c1; /* can become language dependent */ + } + } + break; + case disc_node: + { + halfword done = 0; + if (disc_pre_break_head(n1) && tex_collapse_list(disc_pre_break_head(n1), c1, c2, c3)) { + ++done; + } + if (disc_post_break_head(n1) && tex_collapse_list(disc_post_break_head(n1), c1, c2, c3)) { + ++done; + } + if (disc_no_break_head(n1) && tex_collapse_list(disc_no_break_head(n1), c1, c2, c3)) { + ++done; + } + if (done) { + tex_check_disc_field(n1); + } + break; + } + default: + break; + } + n1 = n2; + AGAIN:; + } + } + return found; +} + +void tex_hyphenate_list(halfword head, halfword tail) +{ + /*tex Let's play safe: */ + if (tail) { + halfword first_language = first_valid_language_par; /* combine with check below */ + halfword trace = tracing_hyphenation_par; + halfword r = head; + /*tex + This first movement assures two things: + + \startitemize + \startitem + That we won't waste lots of time on something that has been handled already (in + that case, none of the glyphs match |simple_character|). + \stopitem + \startitem + That the first word can be hyphenated. If the movement was not explicit, then + the indentation at the start of a paragraph list would make |find_next_wordstart()| + look too far ahead. + \stopitem + \stopitemize + */ + while (r && node_type(r) != glyph_node) { + r = node_next(r); + } + if (r) { + r = tex_aux_find_next_wordstart(r, first_language); + if (r) { + lang_variables langdata; + char utf8word[(4 * max_size_of_word) + 1] = { 0 }; + char utf8original[(4 * max_size_of_word) + 1] = { 0 }; + char *utf8ptr = utf8word; + char *utf8ori = utf8original; + int word_length = 0; + int explicit_hyphen = 0; + int last_char = 0; + int valid = 0; + halfword explicit_start = null; + halfword saved_tail = node_next(tail); + halfword penalty = tex_new_penalty_node(0, word_penalty_subtype); + /* kind of curious hack, this addition that we later remove */ + tex_attach_attribute_list_copy(penalty, r); + tex_couple_nodes(tail, penalty); /* todo: attrobute */ + while (r) { + halfword word_start = r; + int word_language = get_glyph_language(word_start); + if (tex_is_valid_language(word_language)) { + halfword word_end = r; + int lhmin = get_glyph_lhmin(word_start); + int rhmin = get_glyph_rhmin(word_start); + int hmin = tex_get_hyphenation_min(word_language); + halfword word_font = glyph_font(word_start); + if (! tex_is_valid_font(word_font) || font_hyphen_char(word_font) < 0) { + /*tex For backward compatibility we set: */ + word_font = 0; + } + langdata.pre_hyphen_char = tex_get_pre_hyphen_char(word_language); + langdata.post_hyphen_char = tex_get_post_hyphen_char(word_language); + while (r && node_type(r) == glyph_node && word_language == get_glyph_language(r)) { + halfword chr = glyph_character(r); + halfword hyp = tex_aux_is_hyphen_char(chr); + if (word_language >= first_language) { + last_char = tex_get_hj_code(word_language, chr); + if (last_char > 0) { + goto GOFORWARD; + } + } + if (hyp) { + last_char = hyp; + // if (last_char) { + // goto GOFORWARD; + // } + } else { + break; + } + GOFORWARD: + // explicit_hyphen = is_hyphen_char(chr); + explicit_hyphen = hyp; + if (explicit_hyphen && node_next(r) && node_type(node_next(r)) != glyph_node && hyphenation_permitted(glyph_hyphenate(r), ignore_bounds_hyphenation_mode)) { + /* maybe some tracing */ + explicit_hyphen = 0; + } + if (explicit_hyphen) { + break; + } else { + word_length++; + if (word_length >= max_size_of_word) { + /* tex_normal_warning("language", "ignoring long word"); */ + while (r && node_type(r) == glyph_node) { + r = node_next(r); + } + goto PICKUP; + } else { + if (last_char <= 32) { + if (last_char == 32) { + last_char = 0 ; + } + if (word_length <= lhmin) { + lhmin = lhmin - last_char + 1 ; + if (lhmin < 0) { + lhmin = 1; + } + } + if (word_length >= rhmin) { + rhmin = rhmin - last_char + 1 ; + if (rhmin < 0) { + rhmin = 1; + } + } + hmin = hmin - last_char + 1 ; + if (hmin < 0) { + rhmin = 1; + } + last_char = chr ; + } + utf8ori = aux_uni2string(utf8ori, (unsigned) chr); + utf8ptr = aux_uni2string(utf8ptr, (unsigned) last_char); + word_end = r; + r = node_next(r); + } + } + } + if (explicit_hyphen) { + /*tex We are not at the start, so we only need to look ahead. */ + if ((get_glyph_discpart(r) == glyph_discpart_replace && ! hyphenation_permitted(glyph_hyphenate(r), syllable_hyphenation_mode))) { + /*tex + This can be the consequence of inhibition too, see |finish_discretionary| + in which case the replace got injected which can have a hyphen. And we want + to run the callback if set in order to replace. + */ + valid = 1; + goto MESSYCODE; + } else { + /*tex Maybe we should get rid of this ----- stuff. */ + halfword t = node_next(r); + if (t && node_type(t) == glyph_node && ! tex_aux_is_hyphen_char(glyph_character(t)) && hyphenation_permitted(glyph_hyphenate(t), automatic_hyphenation_mode)) { + /*tex we have a word already but the next character may not be a hyphen too */ + halfword g = r; + r = tex_aux_compound_word_break(r, get_glyph_language(g), explicit_hyphen); + if (trace > 1) { + *utf8ori = 0; + tex_begin_diagnostic(); + tex_print_format("[language: compound word break after %s]", utf8original); + tex_end_diagnostic(); + } + if (hyphenation_permitted(glyph_hyphenate(g), compound_hyphenation_mode)) { + explicit_hyphen = 0; + if (hyphenation_permitted(glyph_hyphenate(g), force_handler_hyphenation_mode) || hyphenation_permitted(glyph_hyphenate(g), feedback_compound_hyphenation_mode)) { + set_disc_option(r, disc_option_pre_word | disc_option_post_word); + explicit_start = null; + valid = 1; + goto MESSYCODE; + } else { + if (! explicit_start) { + explicit_start = word_start; + } + /*tex For exceptions. */ + utf8ptr = aux_uni2string(utf8ptr, '-'); + r = t; + continue; + } + } + } else { + /*tex We jump over the sequence of hyphens ... traditional. */ + while (t && node_type(t) == glyph_node && tex_aux_is_hyphen_char(glyph_character(t))) { + r = t; + t = node_next(r); + } + if (! t) { + /*tex we reached the end of the list and will quit the loop later */ + r = null; + } + } + } + } else { + valid = tex_aux_valid_wordend(word_end, r); + MESSYCODE: + /*tex We have a word, r is at the next node. */ + if (word_font && word_language >= first_language) { + /*tex We have a language, actually we already tested that. */ + struct tex_language *lang = lmt_language_state.languages[word_language]; + if (lang) { + char *replacement = NULL; + halfword start = explicit_start ? explicit_start : word_start; + int okay = word_length >= lhmin + rhmin && (hmin <= 0 || word_length >= hmin) && hyphenation_permitted(glyph_hyphenate(start), syllable_hyphenation_mode); + *utf8ptr = 0; + *utf8ori = 0; + if (lang->wordhandler && hyphenation_permitted(glyph_hyphenate(start), force_handler_hyphenation_mode)) { + halfword restart = node_prev(start); /*tex before the word. */ + int done = lmt_handle_word(lang, utf8original, utf8word, word_length, start, word_end, &replacement); + if (replacement) { + if (tex_aux_still_okay(start, word_end, r, word_length, utf8original)) { + goto EXCEPTIONS2; + } else { + goto PICKUP; + } + } else { + /* 1: restart 2: exceptions+patterns 3: patterns *: next word */ + switch (done) { + case 1: + if (_valid_node_(restart)) { + r = restart; + } else if (_valid_node_(start)) { + r = node_prev(start); + } + if (! r) { + if (_valid_node_(head)) { + tex_normal_warning("language", "the hyphenation list is messed up, recovering"); + r = head; + } else { + tex_normal_error("language", "the hyphenated head is messed up, aborting"); + return; + } + } + goto PICKUP; + case 2: + if (tex_aux_still_okay(start, word_end, r, word_length, utf8original)) { + goto EXCEPTIONS1; + } else { + goto PICKUP; + } + case 3: + if (tex_aux_still_okay(start, word_end, r, word_length, utf8original)) { + goto PATTERNS; + } else { + goto PICKUP; + } + default: + if (_valid_node_(r)) { /* or word_end */ + goto PICKUP; + } else if (_valid_node_(tail)) { + tex_normal_warning("language", "the hyphenation list is messed up, quitting"); + goto ABORT; + } else { + // tex_normal_error("language","the hyphenated tail is messed up, aborting"); + return; + } + } + } + } + if (! okay || ! valid) { + goto PICKUP; + } + /*tex + This is messy and nasty: we can have a word with a - in it which is why + we have two branches. Also, every word that suits the length criteria + is checked via \LUA. Optimizing this because tests have demonstrated + that checking against the min and max lengths of exception strings has + no gain. + */ + EXCEPTIONS1: + if (lang->exceptions) { + replacement = tex_aux_hyphenation_exception(lang->exceptions, utf8word); + } + EXCEPTIONS2: + if (replacement) { + /*tex handle the exception and go on to the next word */ + halfword start = explicit_start ? explicit_start : word_start; + halfword beg = node_prev(start); + tex_aux_do_exception(start, r, replacement); // r == next_node(word_end) + if (trace > 1) { + tex_begin_diagnostic(); + tex_print_format("[language: exception %s to %s]", utf8original, replacement); + if (trace > 2) { + tex_aux_hyphenate_show(node_next(beg), node_prev(r)); + } + tex_end_diagnostic(); + } + lmt_memory_free(replacement); + goto PICKUP; + } + PATTERNS: + if (lang->patterns) { + if (explicit_start) { + /*tex We're done already */ + } else if (hyphenation_permitted(glyph_hyphenate(word_start), syllable_hyphenation_mode)) { + halfword left = word_start; + halfword right = r; /*tex We're one after |word_end|. */ + for (int i = lhmin; i > 1; i--) { + left = node_next(left); + if (! left || left == right) { + goto PICKUP; + } + } + if (right != left) { + int done = 0; + for (int i = rhmin; i > 0; i--) { + right = node_prev(right); + if (! right || right == left) { + goto PICKUP; + } + } + done = tex_aux_hnj_hyphen_hyphenate(lang->patterns, word_start, word_end, word_length, left, right, &langdata); + if (trace > 1) { + tex_begin_diagnostic(); + if (done) { + tex_print_format("[language: hyphenated %s at %i positions]", utf8original, done); + if (trace > 2) { + tex_aux_hyphenate_show(node_next(left), node_prev(right)); + } + } else { + tex_print_format("[language: not hyphenated %s]", utf8original); + } + tex_end_diagnostic(); + } + } + } + } + } + } + } + } + PICKUP: + explicit_start = null ; + explicit_hyphen = 0; + word_length = 0; + utf8ptr = utf8word; + utf8ori = utf8original; + if (r) { + r = tex_aux_find_next_wordstart(r, first_language); + } else { + break; + } + } + ABORT: + tex_flush_node(node_next(tail)); + node_next(tail) = saved_tail; + } + } + } +} + +halfword tex_glyph_to_discretionary(halfword glyph, quarterword code, int keepkern) +{ + halfword prev = node_prev(glyph); + halfword next = node_next(glyph); + halfword disc = tex_new_disc_node(code); + halfword kern = null; + if (keepkern && next && node_type(next) == kern_node && node_subtype(next) == italic_kern_subtype) { + kern = node_next(next); + next = node_next(kern); + node_next(kern) = null; + } else { + node_next(glyph) = null; + } + node_prev(glyph) = null; + tex_attach_attribute_list_copy(disc, glyph); + tex_set_disc_field(disc, pre_break_code, tex_copy_node_list(glyph, null)); + tex_set_disc_field(disc, post_break_code, tex_copy_node_list(glyph, null)); + tex_set_disc_field(disc, no_break_code, glyph); + tex_try_couple_nodes(prev, disc); + tex_try_couple_nodes(disc, next); + return disc; +}
\ No newline at end of file diff --git a/source/luametatex/source/tex/texlanguage.h b/source/luametatex/source/tex/texlanguage.h new file mode 100644 index 000000000..f00bf16c6 --- /dev/null +++ b/source/luametatex/source/tex/texlanguage.h @@ -0,0 +1,94 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXLANG_H +# define LMT_TEXLANG_H + +/*tex We resolve the properties beforehand and store them in a struct. */ + +typedef struct language_state_info { + struct tex_language **languages; + memory_data language_data; + lua_Integer handler_table_id; + int handler_count; +} language_state_info; + +extern language_state_info lmt_language_state; + +typedef struct lang_variables { + halfword pre_hyphen_char; + halfword post_hyphen_char; + halfword pre_exhyphen_char; + halfword post_exhyphen_char; +} lang_variables; + +/*tex This is used in: */ + +typedef struct tex_language { + halfword pre_hyphen_char; + halfword post_hyphen_char; + halfword pre_exhyphen_char; + halfword post_exhyphen_char; + halfword hyphenation_min; + halfword id; + hjn_dictionary *patterns; + int exceptions; + int wordhandler; + sa_tree hjcode_head; +} tex_language; + +extern tex_language *tex_new_language (halfword n); +extern tex_language *tex_get_language (halfword n); +/* void tex_free_languages (void); */ + +extern void tex_load_patterns (struct tex_language *lang, const unsigned char *buf); +extern void tex_load_hyphenation (struct tex_language *lang, const unsigned char *buf); + +extern void tex_handle_hyphenation (halfword h, halfword t); +extern void tex_clear_patterns (struct tex_language *lang); +extern void tex_clear_hyphenation (struct tex_language *lang); +extern const char *tex_clean_hyphenation (halfword id, const char *buffer, char **cleaned); + +extern void tex_hyphenate_list (halfword head, halfword tail); +extern int tex_collapse_list (halfword head, halfword c1, halfword c2, halfword c3); + +extern void tex_set_pre_hyphen_char (halfword lan, halfword val); +extern void tex_set_post_hyphen_char (halfword lan, halfword val); +extern halfword tex_get_pre_hyphen_char (halfword lan); +extern halfword tex_get_post_hyphen_char (halfword lan); + +extern void tex_set_pre_exhyphen_char (halfword lan, halfword val); +extern void tex_set_post_exhyphen_char (halfword lan, halfword val); +extern halfword tex_get_pre_exhyphen_char (halfword lan); +extern halfword tex_get_post_exhyphen_char (halfword lan); + +extern void tex_set_hyphenation_min (halfword lan, halfword val); +extern halfword tex_get_hyphenation_min (halfword lan); + +extern void tex_dump_language_data (dumpstream f); +extern void tex_undump_language_data (dumpstream f); + +/* char *tex_get_exception_strings (struct tex_language *lang); */ + +extern void tex_load_tex_patterns (halfword curlang, halfword head); +extern void tex_load_tex_hyphenation (halfword curlang, halfword head); + +extern void tex_initialize_languages (void); +extern int tex_is_valid_language (halfword n); + +extern halfword tex_glyph_to_discretionary (halfword glyph, quarterword code, int keepkern); + +/* +void tex_hnj_hyphen_hyphenate( + HyphenDict *dict, + halfword first, + halfword last, + int size, + halfword left, + halfword right, + lang_variables *lan +); +*/ + +# endif diff --git a/source/luametatex/source/tex/texlegacy.c b/source/luametatex/source/tex/texlegacy.c new file mode 100644 index 000000000..78723a25a --- /dev/null +++ b/source/luametatex/source/tex/texlegacy.c @@ -0,0 +1,11 @@ +/*tex + + For a while I keep some recently changed code here. Just in case ... we can always look in + the GIT history if needed. Much more happened but that code has gone already. This is from + end januari 2021. Eventually all here goes (or already went) into the void, including code + that has been rewritten, pruned, upgraded but eventually got discarded. The fittest code + survives. + +*/ + +/* pagebuilder state experiment in 2021-05-23 zip */
\ No newline at end of file diff --git a/source/luametatex/source/tex/texlinebreak.c b/source/luametatex/source/tex/texlinebreak.c new file mode 100644 index 000000000..2172277ec --- /dev/null +++ b/source/luametatex/source/tex/texlinebreak.c @@ -0,0 +1,3531 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + We come now to what is probably the most interesting algorithm of \TEX: the mechanism for + choosing the \quote {best possible} breakpoints that yield the individual lines of a paragraph. + \TEX's line-breaking algorithm takes a given horizontal list and converts it to a sequence of + boxes that are appended to the current vertical list. In the course of doing this, it creates + a special data structure containing three kinds of records that are not used elsewhere in + \TEX. Such nodes are created while a paragraph is being processed, and they are destroyed + afterwards; thus, the other parts of \TEX\ do not need to know anything about how line-breaking + is done. + + The method used here is based on an approach devised by Michael F. Plass and the author in 1977, + subsequently generalized and improved by the same two people in 1980. A detailed discussion + appears in {\sl SOFTWARE---Practice \AM\ Experience \bf11} (1981), 1119--1184, where it is + shown that the line-breaking problem can be regarded as a special case of the problem of + computing the shortest path in an acyclic network. The cited paper includes numerous examples + and describes the history of line breaking as it has been practiced by printers through the + ages. The present implementation adds two new ideas to the algorithm of 1980: Memory space + requirements are considerably reduced by using smaller records for inactive nodes than for + active ones, and arithmetic overflow is avoided by using \quote {delta distances} instead of + keeping track of the total distance from the beginning of the paragraph to the current point. + + The |line_break| procedure should be invoked only in horizontal mode; it leaves that mode and + places its output into the current vlist of the enclosing vertical mode (or internal vertical + mode). There is one explicit parameter: |d| is true for partial paragraphs preceding display + math mode; in this case the amount of additional penalty inserted before the final line is + |display_widow_penalty| instead of |widow_penalty|. + + There are also a number of implicit parameters: The hlist to be broken starts at |node_next + (head)|, and it is nonempty. The value of |prev_graf| in the enclosing semantic level tells + where the paragraph should begin in the sequence of line numbers, in case hanging indentation + or |\parshape| are in use; |prev_graf| is zero unless this paragraph is being continued after a + displayed formula. Other implicit parameters, such as the |par_shape_ptr| and various penalties + to use for hyphenation, etc., appear in |eqtb|. + + After |line_break| has acted, it will have updated the current vlist and the value of + |prev_graf|. Furthermore, the global variable |just_box| will point to the final box created + by |line_break|, so that the width of this line can be ascertained when it is necessary to + decide whether to use |above_display_skip| or |above_display_short_skip| before a displayed + formula. + + We have an additional parameter |\parfillleftskip| and below we cheat a bit. We add two glue + nodes so that the par builder will work the same and doesn't need to be adapted, but when we're + done we move the leftbound node to the beginning of the (last) line. + + Todo: change some variable names to more meaningful ones so that the code is easier to + understand. (Remark for myself: the lua variant that i use for playing around occasionally is + not in sync with the code here!) + +*/ + +linebreak_state_info lmt_linebreak_state = { + .just_box = 0, + .last_line_fill = 0, + .no_shrink_error_yet = 0, + .second_pass = 0, + .final_pass = 0, + .threshold = 0, + .adjust_spacing = 0, + .adjust_spacing_step = 0, + .adjust_spacing_shrink = 0, + .adjust_spacing_stretch = 0, + .max_stretch_ratio = 0, + .max_shrink_ratio = 0, + .current_font_step = 0, + .passive = 0, + .printed_node = 0, + .pass_number = 0, + .active_width = { 0 }, + .background = { 0 }, + .break_width = { 0 }, + // .auto_breaking = 0, + // .math_level = 0, + .internal_penalty_interline = 0, + .internal_penalty_broken = 0, + .internal_left_box = null, + .internal_left_box_width = 0, + .init_internal_left_box = 0, + .init_internal_left_box_width = 0, + .internal_right_box = null, + .internal_right_box_width = 0, + .internal_middle_box = null, + .disc_width = { 0 }, + .minimal_demerits = { 0 }, + .minimum_demerits = 0, + .easy_line = 0, + .last_special_line = 0, + .first_width = 0, + .second_width = 0, + .first_indent = 0, + .second_indent = 0, + .best_bet = 0, + .fewest_demerits = 0, + .best_line = 0, + .actual_looseness = 0, + .line_difference = 0, + .do_last_line_fit = 0, + .fill_width = { 0 }, + .dir_ptr = 0, + .warned = 0, + .calling_back = 0, +}; + +/*tex + We could use a bit larger array and glue_orders where normal starts at 0 so we then need a larger + array. Let's not do that now. +*/ + +typedef enum fill_orders { + fi_order = 0, + fil_order = 1, + fill_order = 2, + filll_order = 3, +} fill_orders; + +/*tex + + The |just_box| variable has the |hlist_node| for the last line of the new paragraph. In it's + complete form, |line_break| is a rather lengthy procedure --- sort of a small world unto itself + --- we must build it up little by little. Below you see only the general outline. The main task + performed here is to move the list from |head| to |temp_head| and go into the enclosing semantic + level. We also append the |\parfillskip| glue to the end of the paragraph, removing a space (or + other glue node) if it was there, since spaces usually precede blank lines and instances of + |$$|. The |par_fill_skip| is preceded by an infinite penalty, so it will never be considered as + a potential breakpoint. + + */ + +void tex_line_break_prepare( + halfword par, + halfword *tail, + halfword *parinit_left_skip_glue, + halfword *parinit_right_skip_glue, + halfword *parfill_left_skip_glue, + halfword *parfill_right_skip_glue, + halfword *final_penalty +) +{ + /* too much testing of next */ + if (node_type(par) == par_node) { + *tail = *tail ? *tail : tex_tail_of_node_list(par); + *final_penalty = tex_new_penalty_node(infinite_penalty, line_penalty_subtype); + *parfill_left_skip_glue = tex_new_glue_node(tex_get_par_par(par, par_par_fill_left_skip_code), par_fill_left_skip_glue); + *parfill_right_skip_glue = tex_new_glue_node(tex_get_par_par(par, par_par_fill_right_skip_code), par_fill_right_skip_glue); + *parinit_left_skip_glue = null; + *parinit_right_skip_glue = null; + if (par != *tail && node_type(*tail) == glue_node && ! tex_is_par_init_glue(*tail)) { + halfword prev = node_prev(*tail); + node_next(prev) = null; + tex_flush_node(*tail); + *tail = prev; + } + tex_attach_attribute_list_copy(*final_penalty, par); + tex_attach_attribute_list_copy(*parfill_left_skip_glue, par); + tex_attach_attribute_list_copy(*parfill_right_skip_glue, par); + tex_try_couple_nodes(*tail, *final_penalty); + tex_try_couple_nodes(*final_penalty, *parfill_left_skip_glue); + tex_try_couple_nodes(*parfill_left_skip_glue, *parfill_right_skip_glue); + *tail = *parfill_right_skip_glue; + if (node_next(par)) { + halfword n = node_next(par); + while (n) { + if (node_type(n) == glue_node && node_subtype(n) == indent_skip_glue) { + *parinit_left_skip_glue = tex_new_glue_node(tex_get_par_par(par, par_par_init_left_skip_code), par_init_left_skip_glue); + *parinit_right_skip_glue = tex_new_glue_node(tex_get_par_par(par, par_par_init_right_skip_code), par_init_right_skip_glue); + tex_attach_attribute_list_copy(*parinit_left_skip_glue, par); + tex_attach_attribute_list_copy(*parinit_right_skip_glue, par); + tex_try_couple_nodes(*parinit_right_skip_glue, n); + tex_try_couple_nodes(*parinit_left_skip_glue, *parinit_right_skip_glue); + tex_try_couple_nodes(par, *parinit_left_skip_glue); + break; + } else { + n = node_next(n); + } + } + } + } +} + +void tex_line_break(int d, int line_break_context) +{ + halfword head = node_next(cur_list.head); + /*tex There should be a local par node at the beginning! */ + if (node_type(head) == par_node) { + /*tex We need this for over- or underfull box messages. */ + halfword tail = cur_list.tail; + lmt_packaging_state.pack_begin_line = cur_list.mode_line; + node_prev(head) = null; + /*tex Hyphenate, driven by callback or fallback to normal \TEX. */ + if (tex_list_has_glyph(head)) { + tex_handle_hyphenation(head, tail); + head = tex_handle_glyphrun(head, line_break_context, par_dir(head)); + tail = tex_tail_of_node_list(head); + tex_try_couple_nodes(cur_list.head, head); + cur_list.tail = tail; + } + /*tex We remove (only one) trailing glue node, when present. */ + // if (head != tail && node_type(tail) == glue_node && ! tex_is_par_init_glue(tail)) { + // halfword prev = node_prev(tail); + // node_next(prev) = null; + // tex_flush_node(tail); + // cur_list.tail = prev; + // } + node_next(temp_head) = head; + /*tex There should be a local par node at the beginning! */ + if (node_type(head) == par_node) { + /*tex + The tail thing is a bit weird here as it's not the tail. One day I will look into + this. One complication is that we have the normal break routing or a callback that + replaces it but that callback can call the normal routine itself with specific + parameters set. + */ + halfword start_of_par; + halfword par = head; + halfword parinit_left_skip_glue = null; + halfword parinit_right_skip_glue = null; + halfword parfill_left_skip_glue = null; + halfword parfill_right_skip_glue = null; + halfword final_penalty = null; + tex_line_break_prepare(par, &tail, &parinit_left_skip_glue, &parinit_right_skip_glue, &parfill_left_skip_glue, &parfill_right_skip_glue, &final_penalty); + cur_list.tail = tail; + /*tex + We start with a prepared list. If you mess with that the linebreak routine might not + work well especially if the pointers are messed up. So be it. + */ + lmt_node_filter_callback(pre_linebreak_filter_callback, line_break_context, temp_head, &(cur_list.tail)); + /*tex + We assume that the list is still okay. + */ + lmt_linebreak_state.last_line_fill = cur_list.tail; + tex_pop_nest(); + start_of_par = cur_list.tail; + lmt_linebreak_state.calling_back = 1; + if (lmt_linebreak_callback(d, temp_head, &(cur_list.tail))) { + /*tex + When we end up here we have a prepared list so we need to make sure that when + the callback usaes that list with the built in break routine we don't do that + twice. One should work on copies! Afterwards we need to find the correct value + for the |just_box|. + */ + halfword box_search = cur_list.tail; + lmt_linebreak_state.just_box = null; + if (box_search) { + do { + if (node_type(box_search) == hlist_node) { + lmt_linebreak_state.just_box = box_search; + } + box_search = node_next(box_search); + } while (box_search); + } + if (! lmt_linebreak_state.just_box) { + tex_handle_error( + succumb_error_type, + "Invalid linebreak_filter", + "A linebreaking routine should return a non-empty list of nodes and at least one\n" + "of those has to be a \\hbox. Sorry, I cannot recover from this." + ); + } + } else { + line_break_properties properties = { + .initial_par = par, + .display_math = d, + .tracing_paragraphs = tracing_paragraphs_par, + .paragraph_dir = par_dir(par), + .parfill_left_skip = parfill_left_skip_glue, + .parfill_right_skip = parfill_right_skip_glue, + .parinit_left_skip = parinit_left_skip_glue, + .parinit_right_skip = parinit_right_skip_glue, + .pretolerance = tex_get_par_par(par, par_pre_tolerance_code), + .tolerance = tex_get_par_par(par, par_tolerance_code), + .emergency_stretch = tex_get_par_par(par, par_emergency_stretch_code), + .looseness = tex_get_par_par(par, par_looseness_code), + .adjust_spacing = tex_get_par_par(par, par_adjust_spacing_code), + .protrude_chars = tex_get_par_par(par, par_protrude_chars_code), + .adj_demerits = tex_get_par_par(par, par_adj_demerits_code), + .line_penalty = tex_get_par_par(par, par_line_penalty_code), + .last_line_fit = tex_get_par_par(par, par_last_line_fit_code), + .double_hyphen_demerits = tex_get_par_par(par, par_double_hyphen_demerits_code), + .final_hyphen_demerits = tex_get_par_par(par, par_final_hyphen_demerits_code), + .hsize = tex_get_par_par(par, par_hsize_code), + .left_skip = tex_get_par_par(par, par_left_skip_code), + .right_skip = tex_get_par_par(par, par_right_skip_code), + .hang_indent = tex_get_par_par(par, par_hang_indent_code), + .hang_after = tex_get_par_par(par, par_hang_after_code), + .par_shape = tex_get_par_par(par, par_par_shape_code), + .inter_line_penalty = tex_get_par_par(par, par_inter_line_penalty_code), + .inter_line_penalties = tex_get_par_par(par, par_inter_line_penalties_code), + .club_penalty = tex_get_par_par(par, par_club_penalty_code), + .club_penalties = tex_get_par_par(par, par_club_penalties_code), + .widow_penalty = tex_get_par_par(par, par_widow_penalty_code), + .widow_penalties = tex_get_par_par(par, par_widow_penalties_code), + .display_widow_penalty = tex_get_par_par(par, par_display_widow_penalty_code), + .display_widow_penalties = tex_get_par_par(par, par_display_widow_penalties_code), + .orphan_penalty = tex_get_par_par(par, par_orphan_penalty_code), + .orphan_penalties = tex_get_par_par(par, par_orphan_penalties_code), + .broken_penalty = tex_get_par_par(par, par_broken_penalty_code), + .baseline_skip = tex_get_par_par(par, par_baseline_skip_code), + .line_skip = tex_get_par_par(par, par_line_skip_code), + .line_skip_limit = tex_get_par_par(par, par_line_skip_limit_code), + .adjust_spacing_step = tex_get_par_par(par, par_adjust_spacing_step_code), + .adjust_spacing_shrink = tex_get_par_par(par, par_adjust_spacing_shrink_code), + .adjust_spacing_stretch = tex_get_par_par(par, par_adjust_spacing_stretch_code), + .hyphenation_mode = tex_get_par_par(par, par_hyphenation_mode_code), + .shaping_penalties_mode = tex_get_par_par(par, par_shaping_penalties_mode_code), + .shaping_penalty = tex_get_par_par(par, par_shaping_penalty_code), + }; + tex_do_line_break(&properties); + /*tex + We assume that the list is still okay when we do some post line break stuff. + */ + } + lmt_linebreak_state.calling_back = 0; + lmt_node_filter_callback(post_linebreak_filter_callback, line_break_context, start_of_par, &(cur_list.tail)); + lmt_packaging_state.pack_begin_line = 0; + return; + } + } + tex_confusion("missing local par node"); +} + +/*tex + + Glue nodes in a horizontal list that is being paragraphed are not supposed to include \quote + {infinite} shrinkability; that is why the algorithm maintains four registers for stretching but + only one for shrinking. If the user tries to introduce infinite shrinkability, the shrinkability + will be reset to finite and an error message will be issued. A boolean variable + |no_shrink_error_yet| prevents this error message from appearing more than once per paragraph. + + Beware, this does an in-place fix to the glue (which can be a register!). As we store glues a + bit different we do a different fix here. + +*/ + +static scaled tex_aux_checked_shrink(halfword p) +{ + if (glue_shrink(p) && glue_shrink_order(p) != normal_glue_order) { + if (lmt_linebreak_state.no_shrink_error_yet) { + lmt_linebreak_state.no_shrink_error_yet = 0; + tex_handle_error( + normal_error_type, + "Infinite glue shrinkage found in a paragraph", + "The paragraph just ended includes some glue that has infinite shrinkability,\n" + "e.g., '\\hskip 0pt minus 1fil'. Such glue doesn't belong there---it allows a\n" + "paragraph of any length to fit on one line. But it's safe to proceed, since the\n" + "offensive shrinkability has been made finite." + ); + } + glue_shrink_order(p) = normal_glue_order; + } + return glue_shrink(p); +} + +/*tex + + A pointer variable |cur_p| runs through the given horizontal list as we look for breakpoints. + This variable is global, since it is used both by |line_break| and by its subprocedure + |try_break|. + + Another global variable called |threshold| is used to determine the feasibility of individual + lines: breakpoints are feasible if there is a way to reach them without creating lines whose + badness exceeds |threshold|. (The badness is compared to |threshold| before penalties are + added, so that penalty values do not affect the feasibility of breakpoints, except that no + break is allowed when the penalty is 10000 or more.) If |threshold| is 10000 or more, all + legal breaks are considered feasible, since the |badness| function specified above never + returns a value greater than~10000. + + Up to three passes might be made through the paragraph in an attempt to find at least one set + of feasible breakpoints. On the first pass, we have |threshold=pretolerance| and |second_pass + = final_pass = false|. If this pass fails to find a feasible solution, |threshold| is set to + |tolerance|, |second_pass| is set |true|, and an attempt is made to hyphenate as many words as + possible. If that fails too, we add |emergency_stretch| to the background stretchability and + set |final_pass = true|. + + |second_pass| is this our second attempt to break this paragraph and |final_path| our final + attempt to break this paragraph while |threshold| is the maximum badness on feasible lines. + + The maximum fill level for |hlist_stack|. Maybe good if larger than |2 * max_quarterword|, so + that box nesting level would overflow first. The stack for |find_protchar_left()| and + |find_protchar_right()|; |hlist_stack_level| is the fill level for |hlist_stack| + +*/ + +# define max_hlist_stack 512 + +/* We can optimize this when we have a global setting. */ + +static void tex_aux_warn_expand_pars(void) +{ + if (! lmt_linebreak_state.warned) { + tex_normal_warning("font expansion", "using fonts with different limit of expansion in one paragraph is not allowed"); + lmt_linebreak_state.warned = 1; + } +} + +static int tex_aux_check_expand_pars(halfword adjust_spacing_step, halfword f) +{ + if (adjust_spacing_step > 0) { + return 1; + } else if ((font_step(f) == 0) || ((font_max_stretch(f) == 0) && (font_max_shrink(f) == 0))) { + return 0; + } else if (lmt_linebreak_state.current_font_step < 0) { + lmt_linebreak_state.current_font_step = font_step(f); + } else if (lmt_linebreak_state.current_font_step != font_step(f)) { + tex_normal_error("font expansion", "using fonts with different step of expansion in one paragraph is not allowed"); + } + { + int m = font_max_stretch(f); + if (m) { + if (lmt_linebreak_state.max_stretch_ratio < 0) { + lmt_linebreak_state.max_stretch_ratio = m; + } else if (lmt_linebreak_state.max_stretch_ratio > m) { + lmt_linebreak_state.max_stretch_ratio = m; + tex_aux_warn_expand_pars(); + } + } + } + { + int m = font_max_shrink(f); + if (m) { + if (lmt_linebreak_state.max_shrink_ratio < 0) { + lmt_linebreak_state.max_shrink_ratio = -m; + } else if (-lmt_linebreak_state.max_shrink_ratio > -m) { + lmt_linebreak_state.max_shrink_ratio = -m; + tex_aux_warn_expand_pars(); + } + } + } + return 1; +} + +/*tex + + Search left to right from list head |l|, returns 1st non-skipable item: + +*/ + +static halfword tex_aux_find_protchar_left(halfword l, int d) +{ + int done = 0 ; + halfword initial = l; + while (node_next(l) && node_type(l) == hlist_node && tex_zero_box_dimensions(l) && ! box_list(l)) { + /*tex For paragraph start with |\parindent = 0pt| or any empty hbox. */ + l = node_next(l); + done = 1 ; + } + if (! done && node_type(l) == par_node) { + l = node_next(l); + done = 1 ; + } + if (! done && d) { + while (node_next(l) && ! (node_type(l) == glyph_node || non_discardable(l))) { + /*tex standard discardables at line break, \TEX book, p 95 */ + l = node_next(l); + } + } + if (node_type(l) != glyph_node) { + halfword t; + int run = 1; + halfword hlist_stack[max_hlist_stack]; + int hlist_stack_level = 0; + do { + t = l; + while (run && node_type(l) == hlist_node && box_list(l)) { + if (hlist_stack_level >= max_hlist_stack) { + /* return tex_normal_error("push_node", "stack overflow"); */ + return initial; + } else { + hlist_stack[hlist_stack_level++] = l; + } + l = box_list(l); + } + while (run && tex_protrusion_skipable(l)) { + while (! node_next(l) && hlist_stack_level > 0) { + /*tex Don't visit this node again. */ + if (hlist_stack_level <= 0) { + /*tex This can point to some bug. */ + /* return tex_normal_error("pop_node", "stack underflow (internal error)"); */ + return initial; + } else { + l = hlist_stack[--hlist_stack_level]; + } + run = 0; + } + if (node_next(l) && node_type(l) == boundary_node && node_subtype(l) == protrusion_boundary && (boundary_data(l) == 1 || boundary_data(l) == 3)) { + /*tex Skip next node. */ + l = node_next(l); + } + if (node_next(l)) { + l = node_next(l); + } else if (hlist_stack_level == 0) { + run = 0; + } + } + } while (t != l); + } + return l; +} + +/*tex + + Search right to left from list tail |r| to head |l|, returns 1st non-skipable item. + +*/ + +static halfword tex_aux_find_protchar_right(halfword l, halfword r) +{ + if (r) { + halfword t; + int run = 1; + halfword initial = r; + halfword hlist_stack[max_hlist_stack]; + int hlist_stack_level = 0; + do { + t = r; + while (run && node_type(r) == hlist_node && box_list(r)) { + if (hlist_stack_level >= max_hlist_stack) { + /* tex_normal_error("push_node", "stack overflow"); */ + return initial; + } else { + hlist_stack[hlist_stack_level++] = l; + } + if (hlist_stack_level >= max_hlist_stack) { + /* tex_normal_error("push_node", "stack overflow"); */ + return initial; + } else { + hlist_stack[hlist_stack_level++] = r; + } + l = box_list(r); + r = l; + while (node_next(r)) { + halfword s = r; + r = node_next(r); + node_prev(r) = s; + } + } + while (run && tex_protrusion_skipable(r)) { + while (r == l && hlist_stack_level > 0) { + /*tex Don't visit this node again. */ + if (hlist_stack_level <= 0) { + /*tex This can point to some bug. */ + /* return tex_normal_error("pop_node", "stack underflow (internal error)"); */ + return initial; + } else { + r = hlist_stack[--hlist_stack_level]; + } + + if (hlist_stack_level <= 0) { + /*tex This can point to some bug. */ + /* return tex_normal_error("pop_node", "stack underflow (internal error)"); */ + return initial; + } else { + l = hlist_stack[--hlist_stack_level]; + } + } + if ((r != l) && r) { + if (node_prev(r) && node_type(r) == boundary_node && node_subtype(r) == protrusion_boundary && (boundary_data(r) == 2 || boundary_data(r) == 3)) { + /*tex Skip next node. */ + r = node_prev(r); + } + if (node_prev(r)) { + r = node_prev(r); + } else { + /*tex This is the input: |\leavevmode \penalty -10000 \penalty -10000| */ + run = 0; + } + } else if (r == l && hlist_stack_level == 0) { + run = 0; + } + } + } while (t != r); + } + return r; +} + +/*tex + + The algorithm essentially determines the best possible way to achieve each feasible combination + of position, line, and fitness. Thus, it answers questions like, \quotation {What is the best + way to break the opening part of the paragraph so that the fourth line is a tight line ending at + such-and-such a place?} However, the fact that all lines are to be the same length after a + certain point makes it possible to regard all sufficiently large line numbers as equivalent, when + the looseness parameter is zero, and this makes it possible for the algorithm to save space and + time. + + An \quote {active node} and a \quote {passive node} are created in |mem| for each feasible + breakpoint that needs to be considered. Active nodes are three words long and passive nodes + are two words long. We need active nodes only for breakpoints near the place in the + paragraph that is currently being examined, so they are recycled within a comparatively short + time after they are created. + + An active node for a given breakpoint contains six fields: + + \startitemize[n] + + \startitem + |vlink| points to the next node in the list of active nodes; the last active node has + |vlink=active|. + \stopitem + + \startitem + |break_node| points to the passive node associated with this breakpoint. + \stopitem + + \startitem + |line_number| is the number of the line that follows this breakpoint. + \stopitem + + \startitem + |fitness| is the fitness classification of the line ending at this breakpoint. + \stopitem + + \startitem + |type| is either |hyphenated_node| or |unhyphenated_node|, depending on whether this + breakpoint is a |disc_node|. + \stopitem + + \startitem + |total_demerits| is the minimum possible sum of demerits over all lines leading from + the beginning of the paragraph to this breakpoint. + \stopitem + + \stopitemize + + The value of |node_next(active)| points to the first active node on a vlinked list of all currently + active nodes. This list is in order by |line_number|, except that nodes with |line_number > + easy_line| may be in any order relative to each other. + +*/ + +void tex_initialize_active(void) +{ + node_type(active_head) = hyphenated_node; + active_line_number(active_head) = max_halfword; + /*tex + The |subtype| is actually the |fitness|. It is set with |new_node| to one of the fitness + values. + */ + active_fitness(active_head) = very_loose_fit; +} + +/*tex + + The passive node for a given breakpoint contains eight fields: + + \startitemize + + \startitem + |vlink| points to the passive node created just before this one, if any, otherwise it + is |null|. + \stopitem + + \startitem + |cur_break| points to the position of this breakpoint in the horizontal list for the + paragraph being broken. + \stopitem + + \startitem + |prev_break| points to the passive node that should precede this one in an optimal path + to this breakpoint. + \stopitem + + \startitem + |serial| is equal to |n| if this passive node is the |n|th one created during the + current pass. (This field is used only when printing out detailed statistics about the + line-breaking calculations.) + \stopitem + + \startitem + |passive_pen_inter| holds the current |localinterlinepenalty| + \stopitem + + \startitem + |passive_pen_broken| holds the current |localbrokenpenalty| + \stopitem + + \stopitemize + + There is a global variable called |passive| that points to the most recently created passive + node. Another global variable, |printed_node|, is used to help print out the paragraph when + detailed information about the line-breaking computation is being displayed. + + The most recent node on passive list, the most recent node that has been printed, and the number + of passive nodes allocated on this pass, is registered in the passive field. + + The active list also contains \quote {delta} nodes that help the algorithm compute the badness + of individual lines. Such nodes appear only between two active nodes, and they have |type = + delta_node|. If |p| and |r| are active nodes and if |q| is a delta node between them, so that + |vlink (p) = q| and |vlink (q) = r|, then |q| tells the space difference between lines in the + horizontal list that start after breakpoint |p| and lines that start after breakpoint |r|. In + other words, if we know the length of the line that starts after |p| and ends at our current + position, then the corresponding length of the line that starts after |r| is obtained by adding + the amounts in node~|q|. A delta node contains seven scaled numbers, since it must record the + net change in glue stretchability with respect to all orders of infinity. The natural width + difference appears in |mem[q+1].sc|; the stretch differences in units of pt, sfi, fil, fill, + and filll appear in |mem[q + 2 .. q + 6].sc|; and the shrink difference appears in |mem[q + + 7].sc|. The |subtype| field of a delta node is not used. + + {\em NB: Actually, we have more fields now.} + + As the algorithm runs, it maintains a set of seven delta-like registers for the length of the + line following the first active breakpoint to the current position in the given hlist. When it + makes a pass through the active list, it also maintains a similar set of seven registers for + the length following the active breakpoint of current interest. A third set holds the length + of an empty line (namely, the sum of |\leftskip| and |\rightskip|); and a fourth set is used + to create new delta nodes. + + When we pass a delta node we want to do operations like: + + \starttyping + for k := 1 to 7 do + cur_active_width[k] := cur_active_width[k] + mem[q+k].sc|}; + \stoptyping + + and we want to do this without the overhead of |for| loops so we use update macros. + + |active_width| is he distance from first active node to~|cur_p|, |background| the length of an + \quote {empty} line, and |break_width| the length being computed after current break. + + We make |auto_breaking| accessible out of |line_break|. + + Let's state the principles of the delta nodes more precisely and concisely, so that the + following programs will be less obscure. For each legal breakpoint~|p| in the paragraph, we + define two quantities $\alpha(p)$ and $\beta(p)$ such that the length of material in a line + from breakpoint~|p| to breakpoint~|q| is $\gamma+\beta(q)-\alpha(p)$, for some fixed $\gamma$. + Intuitively, $\alpha(p)$ and $\beta(q)$ are the total length of material from the beginning + of the paragraph to a point after a break at |p| and to a point before a break at |q|; and + $\gamma$ is the width of an empty line, namely the length contributed by |\leftskip| and + |\rightskip|. + + Suppose, for example, that the paragraph consists entirely of alternating boxes and glue + skips; let the boxes have widths $x_1\ldots x_n$ and let the skips have widths $y_1\ldots + y_n$, so that the paragraph can be represented by $x_1y_1\ldots x_ny_n$. Let $p_i$ be the + legal breakpoint at $y_i$; then $\alpha(p_i) = x_1 + y_1 + \cdots + x_i + y_i$, and $\beta + (p_i) = x_1 + y_1 + \cdots + x_i$. To check this, note that the length of material from + $p_2$ to $p_5$, say, is $\gamma + x_3 + y_3 + x_4 + y_4 + x_5 = \gamma + \beta (p_5) - + \alpha (p_2)$. + + The quantities $\alpha$, $\beta$, $\gamma$ involve glue stretchability and shrinkability as + well as a natural width. If we were to compute $\alpha(p)$ and $\beta(p)$ for each |p|, we + would need multiple precision arithmetic, and the multiprecise numbers would have to be kept + in the active nodes. \TeX\ avoids this problem by working entirely with relative differences + or \quote {deltas}. Suppose, for example, that the active list contains $a_1\,\delta_1\,a_2\, + \delta_2\,a_3$, where the |a|'s are active breakpoints and the $\delta$'s are delta nodes. + Then $\delta_1 = \alpha(a_1) - \alpha(a_2)$ and $\delta_2 = \alpha(a_2) - \alpha(a_3)$. If the + line breaking algorithm is currently positioned at some other breakpoint |p|, the |active_width| + array contains the value $\gamma +\beta(p) - \alpha(a_1)$. If we are scanning through the list + of active nodes and considering a tentative line that runs from $a_2$ to~|p|, say, the + |cur_active_width| array will contain the value $\gamma + \beta(p) - \alpha(a_2)$. Thus, when we + move from $a_2$ to $a_3$, we want to add $\alpha(a_2) - \alpha(a_3)$ to |cur_active_width|; and + this is just $\delta_2$, which appears in the active list between $a_2$ and $a_3$. The + |background| array contains $\gamma$. The |break_width| array will be used to calculate values + of new delta nodes when the active list is being updated. + + The heart of the line-breaking procedure is |try_break|, a subroutine that tests if the current + breakpoint |cur_p| is feasible, by running through the active list to see what lines of text + can be made from active nodes to~|cur_p|. If feasible breaks are possible, new break nodes are + created. If |cur_p| is too far from an active node, that node is deactivated. + + The parameter |pi| to |try_break| is the penalty associated with a break at |cur_p|; we have + |pi = eject_penalty| if the break is forced, and |pi=inf_penalty| if the break is illegal. + + The other parameter, |break_type|, is set to |hyphenated_node| or |unhyphenated_node|, depending + on whether or not the current break is at a |disc_node|. The end of a paragraph is also regarded + as |hyphenated_node|; this case is distinguishable by the condition |cur_p = null|. + + \startlines + |internal_pen_inter|: running |\localinterlinepenalty| + |internal_pen_broken|: running |\localbrokenpenalty| + |internal_left_box|: running |\localleftbox| + |internal_left_box_width|: running |\localleftbox| + |init_internal_left_box|: running |\localleftbox| + |init_internal_left_box_width|: running |\localleftbox| width + |internal_right_box|: running |\localrightbox| + |internal_right_box_width|: running |\localrightbox| width + |disc_width|: the length of discretionary material preceding a break + \stoplines + + As we consider various ways to end a line at |cur_p|, in a given line number class, we keep + track of the best total demerits known, in an array with one entry for each of the fitness + classifications. For example, |minimal_demerits [tight_fit]| contains the fewest total + demerits of feasible line breaks ending at |cur_p| with a |tight_fit| line; |best_place + [tight_fit]| points to the passive node for the break before |cur_p| that achieves such an + optimum; and |best_pl_line[tight_fit]| is the |line_number| field in the active node + corresponding to |best_place [tight_fit]|. When no feasible break sequence is known, the + |minimal_demerits| entries will be equal to |awful_bad|, which is $2^{30}-1$. Another variable, + |minimum_demerits|, keeps track of the smallest value in the |minimal_demerits| array. + + The length of lines depends on whether the user has specified |\parshape| or |\hangindent|. If + |par_shape_ptr| is not null, it points to a $(2n+1)$-word record in |mem|, where the |vinfo| + in the first word contains the value of |n|, and the other $2n$ words contain the left margins + and line lengths for the first |n| lines of the paragraph; the specifications for line |n| + apply to all subsequent lines. If |par_shape_ptr = null|, the shape of the paragraph depends on + the value of |n = hang_after|; if |n >= 0|, hanging indentation takes place on lines |n + 1|, + |n + 2|, \dots, otherwise it takes place on lines 1, \dots, $\vert n\vert$. When hanging + indentation is active, the left margin is |hang_indent|, if |hang_indent >= 0|, else it is 0; + the line length is $|hsize|-\vert|hang_indent|\vert$. The normal setting is |par_shape_ptr = + null|, |hang_after = 1|, and |hang_indent = 0|. Note that if |hang_indent = 0|, the value of + |hang_after| is irrelevant. + + Some more variables and remarks: + + line numbers |> easy_line| are equivalent in break nodes + + line numbers |> last_special_line| all have the same width + + |first_width| is the width of all lines |<= last_special_line|, if no |\parshape| has been + specified + + |second_width| is the width of all lines |> last_special_line| + + |first_indent| is the left margin to go with |first_width| + + |second_indent| s the left margin to go with |second_width| + + |best_bet| indicated the passive node and its predecessors + + |fewest_demerits| are the demerits associated with |best_bet| + + |best_line| is the line number following the last line of the new paragraph + + |actual_looseness| is the difference between |line_number (best_bet)| and the optimum + |best_line| + + |line_diff| is the difference between the current line number and the optimum |best_line| + + \TEX\ makes use of the fact that |hlist_node|, |vlist_node|, |rule_node|, |insert_node|, + |mark_node|, |adjust_node|, |disc_node|, |whatsit_node|, and |math_node| are at the low end of + the type codes, by permitting a break at glue in a list if and only if the |type| of the + previous node is less than |math_node|. Furthermore, a node is discarded after a break if its + type is |math_node| or~more. + +*/ + +static halfword tex_aux_clean_up_the_memory(halfword p) +{ + halfword q = node_next(active_head); + while (q != active_head) { + p = node_next(q); + tex_flush_node(q); + q = p; + } + q = lmt_linebreak_state.passive; + while (q) { + p = node_next(q); + tex_flush_node(q); + q = p; + } + return p; +} + +/*tex + Instead of macros we use inline functions. Nowadays compilers generate code that is quite + similar as when we use macros (and sometimes even better). +*/ + +inline static void tex_aux_add_disc_source_to_target(halfword adjust_spacing, scaled target[], const scaled source[]) +{ + target[total_glue_amount] += source[total_glue_amount]; + if (adjust_spacing) { + target[font_stretch_amount] += source[font_stretch_amount]; + target[font_shrink_amount] += source[font_shrink_amount]; + } +} + +inline static void tex_aux_sub_disc_target_from_source(halfword adjust_spacing, scaled target[], const scaled source[]) +{ + target[total_glue_amount] -= source[total_glue_amount]; + if (adjust_spacing) { + target[font_stretch_amount] -= source[font_stretch_amount]; + target[font_shrink_amount] -= source[font_shrink_amount]; + } +} + +inline static void tex_aux_reset_disc_target(halfword adjust_spacing, scaled *target) +{ + target[total_glue_amount] = 0; + if (adjust_spacing) { + target[font_stretch_amount] = 0; + target[font_shrink_amount] = 0; + } +} + +/* A memcopy for the whole array is probably more efficient. */ + +inline static void tex_aux_set_target_to_source(halfword adjust_spacing, scaled target[], const scaled source[]) +{ + for (int i = total_glue_amount; i <= total_shrink_amount; i++) { + target[i] = source[i]; + } + if (adjust_spacing) { + target[font_shrink_amount] = source[font_shrink_amount]; + target[font_stretch_amount] = source[font_stretch_amount]; + } +} + +/* + These delta nodes use an offset and as a result we waste half of the memory words. So, by not + using an offset but just named fields, we can save 4 memory words (32 bytes) per delta node. So, + instead of this: + + \starttyping + inline void add_to_target_from_delta(halfword adjust_spacing, scaled *target, halfword delta) + { + for (int i = total_glue_amount; i <= total_shrink_amount; i++) { + target[i] += delta_field(delta, i); + } + if (adjust_spacing) { + target[font_stretch_amount] += delta_field(delta, font_stretch_amount); + target[font_shrink_amount] += delta_field(delta, font_shrink_amount); + } + } + \stoptyping + + We use the more verbose variants and let the compiler optimize the lot. + +*/ + +inline static void tex_aux_add_to_target_from_delta(halfword adjust_spacing, scaled target[], halfword delta) +{ + target[total_glue_amount] += delta_field_total_glue(delta); + target[total_stretch_amount] += delta_field_total_stretch(delta); + target[total_fi_amount] += delta_field_total_fi_amount(delta); + target[total_fil_amount] += delta_field_total_fil_amount(delta); + target[total_fill_amount] += delta_field_total_fill_amount(delta); + target[total_filll_amount] += delta_field_total_filll_amount(delta); + target[total_shrink_amount] += delta_field_total_shrink(delta); + if (adjust_spacing) { + target[font_stretch_amount] += delta_field_font_stretch(delta); + target[font_shrink_amount] += delta_field_font_shrink(delta); + } +} + +inline static void tex_aux_sub_delta_from_target(halfword adjust_spacing, scaled target[], halfword delta) +{ + target[total_glue_amount] -= delta_field_total_glue(delta); + target[total_stretch_amount] -= delta_field_total_stretch(delta); + target[total_fi_amount] -= delta_field_total_fi_amount(delta); + target[total_fil_amount] -= delta_field_total_fil_amount(delta); + target[total_fill_amount] -= delta_field_total_fill_amount(delta); + target[total_filll_amount] -= delta_field_total_filll_amount(delta); + target[total_shrink_amount] -= delta_field_total_shrink(delta); + if (adjust_spacing) { + target[font_stretch_amount] -= delta_field_font_stretch(delta); + target[font_shrink_amount] -= delta_field_font_shrink(delta); + } +} + +inline static void tex_aux_add_to_delta_from_delta(halfword adjust_spacing, halfword target, halfword source) +{ + delta_field_total_glue(target) += delta_field_total_glue(source); + delta_field_total_stretch(target) += delta_field_total_stretch(source); + delta_field_total_fi_amount(target) += delta_field_total_fi_amount(source); + delta_field_total_fil_amount(target) += delta_field_total_fil_amount(source); + delta_field_total_fill_amount(target) += delta_field_total_fill_amount(source); + delta_field_total_filll_amount(target) += delta_field_total_filll_amount(source); + delta_field_total_shrink(target) += delta_field_total_shrink(source); + if (adjust_spacing) { + delta_field_font_stretch(target) += delta_field_font_stretch(source); + delta_field_font_shrink(target) += delta_field_font_shrink(source); + } +} + +inline static void tex_aux_set_delta_from_difference(halfword adjust_spacing, halfword delta, const scaled source_1[], const scaled source_2[]) +{ + delta_field_total_glue(delta) = (source_1[total_glue_amount] - source_2[total_glue_amount]); + delta_field_total_stretch(delta) = (source_1[total_stretch_amount] - source_2[total_stretch_amount]); + delta_field_total_fi_amount(delta) = (source_1[total_fi_amount] - source_2[total_fi_amount]); + delta_field_total_fil_amount(delta) = (source_1[total_fil_amount] - source_2[total_fil_amount]); + delta_field_total_fill_amount(delta) = (source_1[total_fill_amount] - source_2[total_fill_amount]); + delta_field_total_filll_amount(delta) = (source_1[total_filll_amount] - source_2[total_filll_amount]); + delta_field_total_shrink(delta) = (source_1[total_shrink_amount] - source_2[total_shrink_amount]); + if (adjust_spacing) { + delta_field_font_stretch(delta) = (source_1[font_stretch_amount] - source_2[font_stretch_amount]); + delta_field_font_shrink(delta) = (source_1[font_shrink_amount] - source_2[font_shrink_amount]); + } +} + +inline static void tex_aux_add_delta_from_difference(halfword adjust_spacing, halfword delta, const scaled source_1[], const scaled source_2[]) +{ + delta_field_total_glue(delta) += (source_1[total_glue_amount] - source_2[total_glue_amount]); + delta_field_total_stretch(delta) += (source_1[total_stretch_amount] - source_2[total_stretch_amount]); + delta_field_total_fi_amount(delta) += (source_1[total_fi_amount] - source_2[total_fi_amount]); + delta_field_total_fil_amount(delta) += (source_1[total_fil_amount] - source_2[total_fil_amount]); + delta_field_total_fill_amount(delta) += (source_1[total_fill_amount] - source_2[total_fill_amount]); + delta_field_total_filll_amount(delta) += (source_1[total_filll_amount] - source_2[total_filll_amount]); + delta_field_total_shrink(delta) += (source_1[total_shrink_amount] - source_2[total_shrink_amount]); + if (adjust_spacing) { + delta_field_font_stretch(delta) += (source_1[font_stretch_amount] - source_2[font_stretch_amount]); + delta_field_font_shrink(delta) += (source_1[font_shrink_amount] - source_2[font_shrink_amount]); + } +} + +/*tex + + This function is used to add the width of a list of nodes (from a discretionary) to one of the + width arrays. Replacement texts and discretionary texts are supposed to contain only character + nodes, kern nodes, and box or rule nodes. + + From now on we just ignore \quite {invalid} nodes. If any such node influences the width, so be + it. + + \starttyping + static void bad_node_in_disc_error(halfword p) + { + tex_formatted_error( + "linebreak", + "invalid node with type %s found in discretionary", + node_data[node_type(p)].name + ); + } + \stoptyping +*/ + +static void tex_aux_add_to_widths(halfword s, int adjust_spacing, int adjust_spacing_step, scaled widths[]) +{ + /* todo only check_expand_pars once per font (or don't check) */ + while (s) { + switch (node_type(s)) { + case glyph_node: + widths[total_glue_amount] += tex_glyph_width(s); + if (adjust_spacing && ! tex_has_glyph_option(s, glyph_option_no_expansion) && tex_aux_check_expand_pars(adjust_spacing_step, glyph_font(s))) { + lmt_packaging_state.previous_char_ptr = s; + widths[font_stretch_amount] += tex_char_stretch(s); + widths[font_shrink_amount] += tex_char_shrink(s); + }; + break; + case hlist_node: + case vlist_node: + widths[total_glue_amount] += box_width(s); + break; + case rule_node: + widths[total_glue_amount] += rule_width(s); + break; + case glue_node: + widths[total_glue_amount] += glue_amount(s); + widths[2 + glue_stretch_order(s)] += glue_stretch(s); + widths[total_shrink_amount] += glue_shrink(s); + break; + case kern_node: + widths[total_glue_amount] += kern_amount(s); + if (adjust_spacing == adjust_spacing_full && node_subtype(s) == font_kern_subtype) { + halfword n = node_prev(s); + if (n && node_type(n) == glyph_node && ! tex_has_glyph_option(node_next(s), glyph_option_no_expansion)) { + widths[font_stretch_amount] += tex_kern_stretch(s); + widths[font_shrink_amount] += tex_kern_shrink(s); + } + } + break; + case disc_node: + break; + default: + /* bad_node_in_disc_error(s); */ + break; + } + s = node_next(s); + } +} + +/*tex + + This function is used to substract the width of a list of nodes (from a discretionary) from one + of the width arrays. It is used only once, but deserves it own function because of orthogonality + with the |add_to_widths| function. + +*/ + +static void tex_aux_sub_from_widths(halfword s, int adjust_spacing, int adjust_spacing_step, scaled widths[]) +{ + while (s) { + /*tex Subtract the width of node |s| from |break_width|; */ + switch (node_type(s)) { + case glyph_node: + widths[total_glue_amount] -= tex_glyph_width(s); + if (adjust_spacing && ! tex_has_glyph_option(s, glyph_option_no_expansion) && tex_aux_check_expand_pars(adjust_spacing_step, glyph_font(s))) { + lmt_packaging_state.previous_char_ptr = s; + widths[font_stretch_amount] -= tex_char_stretch(s); + widths[font_shrink_amount] -= tex_char_shrink(s); + } + break; + case hlist_node: + case vlist_node: + widths[total_glue_amount] -= box_width(s); + break; + case rule_node: + widths[total_glue_amount] -= rule_width(s); + break; + case glue_node: + widths[total_glue_amount] -= glue_amount(s); + widths[2 + glue_stretch_order(s)] -= glue_stretch(s); + widths[total_shrink_amount] -= glue_shrink(s); + break; + case kern_node: + widths[total_glue_amount] -= kern_amount(s); + if (adjust_spacing == adjust_spacing_full && node_subtype(s) == font_kern_subtype) { + halfword n = node_prev(s); + if (n && node_type(n) == glyph_node && ! tex_has_glyph_option(node_next(s), glyph_option_no_expansion)) { + widths[font_stretch_amount] -= tex_kern_stretch(s); + widths[font_shrink_amount] -= tex_kern_shrink(s); + } + } + break; + case disc_node: + break; + default: + /* bad_node_in_disc_error(s); */ + break; + } + s = node_next(s); + } +} + +/*tex + + When we insert a new active node for a break at |cur_p|, suppose this new node is to be placed + just before active node |a|; then we essentially want to insert $\delta\,|cur_p|\,\delta ^ + \prime$ before |a|, where $\delta = \alpha (a) - \alpha (|cur_p|)$ and $\delta ^ \prime = + \alpha (|cur_p|) - \alpha (a)$ in the notation explained above. The |cur_active_width| array + now holds $\gamma + \beta (|cur_p|) - \alpha (a)$; so $\delta$ can be obtained by subtracting + |cur_active_width| from the quantity $\gamma + \beta (|cur_p|) - \alpha (|cur_p|)$. The latter + quantity can be regarded as the length of a line from |cur_p| to |cur_p|; we call it the + |break_width| at |cur_p|. + + The |break_width| is usually negative, since it consists of the background (which is normally + zero) minus the width of nodes following~|cur_p| that are eliminated after a break. If, for + example, node |cur_p| is a glue node, the width of this glue is subtracted from the background; + and we also look ahead to eliminate all subsequent glue and penalty and kern and math nodes, + subtracting their widths as well. + + Kern nodes do not disappear at a line break unless they are |explicit|. + +*/ + +static void tex_aux_compute_break_width(int break_type, int adjust_spacing, int adjust_spacing_step, halfword p) +{ + /*tex + + Glue and other whitespace to be skipped after a break; used if unhyphenated, or |post_break + = null|. + + */ + halfword s = p; + if (p) { + switch (break_type) { + case hyphenated_node: + case delta_node: + case passive_node: + /*tex + + Compute the discretionary |break_width| values. When |p| is a discretionary + break, the length of a line \quotation {from |p| to |p|} has to be defined + properly so that the other calculations work out. Suppose that the pre-break + text at |p| has length $l_0$, the post-break text has length $l_1$, and the + replacement text has length |l|. Suppose also that |q| is the node following + the replacement text. Then length of a line from |p| to |q| will be computed as + $\gamma + \beta (q) - \alpha (|p|)$, where $\beta (q) = \beta (|p|) - l_0 + l$. + The actual length will be the background plus $l_1$, so the length from |p| to + |p| should be $\gamma + l_0 + l_1 - l$. If the post-break text of the + discretionary is empty, a break may also discard~|q|; in that unusual case we + subtract the length of~|q| and any other nodes that will be discarded after the + discretionary break. + + The value of $l_0$ need not be computed, since |line_break| will put it into the + global variable |disc_width| before calling |try_break|. In case of nested + discretionaries, we always follow the no-break path, as we are talking about the + breaking on {\it this} position. + + */ + tex_aux_sub_from_widths(disc_no_break_head(p), adjust_spacing, adjust_spacing_step, lmt_linebreak_state.break_width); + tex_aux_add_to_widths(disc_post_break_head(p), adjust_spacing, adjust_spacing_step, lmt_linebreak_state.break_width); + tex_aux_add_disc_source_to_target(adjust_spacing, lmt_linebreak_state.break_width, lmt_linebreak_state.disc_width); + if (disc_post_break_head(p)) { + s = null; + } else { + /*tex no |post_break|: skip any whitespace following */ + s = node_next(p); + } + break; + } + } + while (s) { + switch (node_type(s)) { + case glue_node: + /*tex Subtract glue from |break_width|; */ + lmt_linebreak_state.break_width[total_glue_amount] -= glue_amount(s); + lmt_linebreak_state.break_width[2 + glue_stretch_order(s)] -= glue_stretch(s); + lmt_linebreak_state.break_width[total_shrink_amount] -= glue_shrink(s); + break; + case penalty_node: + break; + case kern_node: + if (node_subtype(s) != explicit_kern_subtype && node_subtype(s) != italic_kern_subtype) { + return; + } else { + lmt_linebreak_state.break_width[total_glue_amount] -= kern_amount(s); + break; + } + case math_node: + if (tex_math_glue_is_zero(s)) { + lmt_linebreak_state.break_width[total_glue_amount] -= math_surround(s); + } else { + lmt_linebreak_state.break_width[total_glue_amount] -= math_amount(s); + lmt_linebreak_state.break_width[2 + math_stretch_order(s)] -= math_stretch(s); + lmt_linebreak_state.break_width[total_shrink_amount] -= math_shrink(s); + } + break; + default: + return; + }; + s = node_next(s); + } +} + +static void tex_aux_print_break_node(halfword q, halfword fit_class, halfword break_type, halfword cur_p, const line_break_properties *properties) +{ + (void) properties; + /*tex Print a symbolic description of the new break node. */ + tex_print_format( + "%l[break: serial %i, line %i.%i,%s demerits %i, ", + passive_serial(lmt_linebreak_state.passive), + active_line_number(q) - 1, + fit_class, + break_type == hyphenated_node ? " hyphenated, " : "", + active_total_demerits(q) + ); + if (lmt_linebreak_state.do_last_line_fit) { + /*tex Print additional data in the new active node. */ + tex_print_format( + " short %D, %s %D, ", + active_short(q), pt_unit, + cur_p ? "glue" : "active", + active_glue(q), pt_unit + ); + } + tex_print_format( + "previous %i]", + passive_prev_break(lmt_linebreak_state.passive) ? passive_serial(passive_prev_break(lmt_linebreak_state.passive)) : 0 + ); +} + +static const char *tex_aux_node_name(halfword cur_p) +{ + if (cur_p) { + /*tex This could be more generic helper. */ + switch (node_type(cur_p)) { + case penalty_node : return "penalty"; + case disc_node : return "discretionary"; + case kern_node : return "kern"; + case glue_node : return "glue"; /* in traditional tex "" */ + default : return "math"; + } + } else { + return "par"; + } +} + +static void tex_aux_print_feasible_break(halfword cur_p, halfword r, halfword b, int pi, int d, int artificial_demerits, const line_break_properties *properties) +{ + (void) properties; + /*tex Print a symbolic description of this feasible break. */ + if (lmt_linebreak_state.printed_node != cur_p) { + /*tex Print the list between |printed_node| and |cur_p|, then set |printed_node := cur_p|. */ + tex_print_nlp(); + if (cur_p) { + halfword save_link = node_next(cur_p); + node_next(cur_p) = null; + tex_short_display(node_next(lmt_linebreak_state.printed_node)); + node_next(cur_p) = save_link; + } else { + tex_short_display(node_next(lmt_linebreak_state.printed_node)); + } + lmt_linebreak_state.printed_node = cur_p; + } + tex_print_format( + "%l[break: feasible, trigger %s, serial %i, badness %B, penalty %i, demerits %B]", + tex_aux_node_name(cur_p), + active_break_node(r) ? passive_serial(active_break_node(r)) : 0, + b, + pi, + artificial_demerits ? awful_bad : d + ); +} + +# define total_font_stretch cur_active_width[font_stretch_amount] +# define total_font_shrink cur_active_width[font_shrink_amount] + +/*tex We implement this one later on. */ + +/* + The only reason why we still have line_break_dir is because we have some experimental protrusion + trickery depending on it. +*/ + +static void tex_aux_post_line_break(const line_break_properties *properties, halfword line_break_dir); + +/*tex + + The next subroutine is used to compute the badness of glue, when a total |t| is supposed to be + made from amounts that sum to~|s|. According to {\em The \TEX book}, the badness of this + situation is $100(t/s)^3$; however, badness is simply a heuristic, so we need not squeeze out + the last drop of accuracy when computing it. All we really want is an approximation that has + similar properties. + + The actual method used to compute the badness is easier to read from the program than to + describe in words. It produces an integer value that is a reasonably close approximation to + $100(t/s)^3$, and all implementations of \TEX\ should use precisely this method. Any badness of + $2^{13}$ or more is treated as infinitely bad, and represented by 10000. + + It is not difficult to prove that |badness (t + 1, s) >= badness (t, s) >= badness (t, s + 1)| + The badness function defined here is capable of computing at most 1095 distinct values, but + that is plenty. + + A core aspect of the linebreak algorithm is the calculation of badness. The formula currently + used has evolved with the tex versions before Don Knuth settled on this approach. And I (HH) + admit that I see no real reason to change something here. The only possible extension could + be changing the hardcoded |loose_criterium| of 99 and |decent_criterium| of 12. These could + become parameters instead. When looking at the code you will notice a loop that runs from + |very_loose_fit| to |tight_fit| with the following four steps: + + \starttyping + very_loose_fit loose_fit decent_fit tight_fit + \stoptyping + + where we have only |loose_fit| and |decent_fit| with associated criteria later on. So, as an + experiment I decided to add two steps in between. + + \starttyping + very_loose_fit semi_loose_fit loose_fit decent_fit semi_tight_fit tight_fit + \stoptyping + + Watch how we keep the assymetrical nature of this sequence: there is basicaly one tight + step less than loose steps. Adding these steps took hardly any code so it was a cheap + experiment. However, the result is not that spectacular: I'm pretty sure that users will + not be able to choose consistently what result looks better, but who knows. For the moment + I keep it, if only to be able to discuss it as useless extension. Configuring the value s + is done with |\linebreakcriterium| which gets split into 4 parts (2 bytes per criterium). + + It is probably hard to explain to users what a different setting does and although one can + force different output in narrow raggedright text it would probbably enough to just make + the |decent_criterium| configureable. Anyway, because we're talking heuristics and pretty + good estimates from Don Knuth here, it would be pretentious to suggest that I really did + research this fuzzy topic (if it was worth the effort at all). + +*/ + +halfword tex_badness(scaled t, scaled s) +{ + /*tex Approximation to $\alpha t/s$, where $\alpha^3\approx 100\cdot2^{18}$ */ + if (t == 0) { + return 0; + } else if (s <= 0) { + return infinite_bad; + } else { + /*tex $297^3=99.94\times2^{18}$ */ + if (t <= large_width_excess) { + t = (t * 297) / s; + } else if (s >= small_stretchability) { + t = t / (s / 297); + } + if (t > 1290) { + /*tex $1290^3<2^{31}<1291^3$ */ + return infinite_bad; + } else { + /*tex This is $t^3/2^{18}$, rounded to the nearest integer. */ + return ((t * t * t + 0400000) / 01000000); + } + } +} + +static inline void tex_split_line_break_criterium(halfword criterium, halfword *semi_tight, halfword *decent, halfword *semi_loose, halfword *loose) { + *semi_tight = (criterium >> 24) & 0x7F; + *decent = (criterium >> 16) & 0x7F; + *semi_loose = (criterium >> 8) & 0x7F; + *loose = criterium & 0x7F; + if (! *semi_tight) { + *semi_tight = semi_tight_criterium; + } + if (! *decent) { + *decent = decent_criterium; + } + if (! *semi_loose) { + *semi_loose = semi_loose_criterium; + } + if (! *loose) { + *loose = loose_criterium; + } +} + +static inline halfword tex_normalized_loose_badness(halfword b, halfword loose, halfword semi_loose, halfword decent) +{ + // if (b > loose_criterium) { + // return very_loose_fit; + // } else if (b > decent_criterium) { + // return loose_fit; + // } else { + // return decent_fit; + // } + if (b > loose) { + return very_loose_fit; + } else if (b > semi_loose) { + return semi_loose_fit; + } else if (b > decent) { + return loose_fit; + } else { + return decent_fit; + } +} + +static inline halfword tex_normalized_tight_badness(halfword b, halfword decent, halfword semi_tight) +{ + // if (b > decent_criterium) { + // return tight_fit; + // } else { + // return decent_fit; + // } + if (b > semi_tight) { + return semi_tight_fit; + } else if (b > decent) { + return tight_fit; + } else { + return decent_fit; + } +} + +static void tex_aux_try_break( + const line_break_properties *properties, + halfword pi, /* a penalty */ + halfword break_type, + halfword first_p, + halfword cur_p +) +{ + /*tex runs through the active list */ + halfword r; + /*tex stays a step behind |r| */ + halfword prev_r = active_head; + /*tex a step behind |prev_r|, if |type(prev_r) = delta_node| */ + halfword prev_prev_r = null; + /*tex maximum line number in current equivalence class of lines */ + halfword old_l = 0; + /*tex have we found a feasible break at |cur_p|? */ + int no_break_yet = 1; + /*tex line number of current active node */ + halfword l; + /*tex should node |r| remain in the active list? */ + int node_r_stays_active; + /*tex the current line will be justified to this width */ + scaled line_width = 0; + /*tex possible fitness class of test line */ + halfword fit_class; + /*tex badness of test line */ + halfword b; + /*tex demerits of test line */ + int d; + /*tex has |d| been forced to zero? */ + int artificial_demerits; + /*tex used in badness calculations */ + scaled shortfall = 0; + /*tex glue stretch or shrink of test line, adjustment for last line */ + scaled g = 0; + /*tex distance from current active node */ + scaled cur_active_width[10] = { 0 }; + halfword best_place[n_of_finess_values]; + halfword best_place_line[n_of_finess_values]; + scaled best_place_short[n_of_finess_values]; + scaled best_place_glue[n_of_finess_values]; + /*tex Experiment */ + halfword semi_tight, decent, semi_loose, loose; + /* in par node */ + tex_split_line_break_criterium(line_break_criterium_par, &semi_tight, &decent, &semi_loose, &loose); + /*tex Make sure that |pi| is in the proper range; */ + if (pi >= infinite_penalty) { + /*tex this breakpoint is inhibited by infinite penalty */ + return; + } else if (pi <= -infinite_penalty) { + /*tex this breakpoint will be forced */ + pi = eject_penalty; + } + tex_aux_set_target_to_source(properties->adjust_spacing, cur_active_width, lmt_linebreak_state.active_width); + while (1) { + r = node_next(prev_r); + /*tex + + If node |r| is of type |delta_node|, update |cur_active_width|, set |prev_r| and + |prev_prev_r|, then |goto continue|. The following code uses the fact that |type + (active) <> delta_node|. + + Here we get: |unhyphenated_node|, |hyphenated_node, |delta_node|, |passive_node| + + */ + if (node_type(r) == delta_node) { + /*tex implicit */ + tex_aux_add_to_target_from_delta(properties->adjust_spacing, cur_active_width, r); + prev_prev_r = prev_r; + prev_r = r; + continue; + } + /*tex + + If a line number class has ended, create new active nodes for the best feasible breaks + in that class; then |return| if |r = active|, otherwise compute the new |line_width|. + + The first part of the following code is part of \TEX's inner loop, so we don't want to + waste any time. The current active node, namely node |r|, contains the line number that + will be considered next. At the end of the list we have arranged the data structure so + that |r = active| and |line_number (active) > old_l|. + + */ + l = active_line_number(r); + if (l > old_l) { + /*tex now we are no longer in the inner loop */ + if ((lmt_linebreak_state.minimum_demerits < awful_bad) && ((old_l != lmt_linebreak_state.easy_line) || (r == active_head))) { + /*tex + + Create new active nodes for the best feasible breaks just found. It is not + necessary to create new active nodes having |minimal_demerits| greater than + |linebreak_state.minimum_demerits + abs (adj_demerits)|, since such active + nodes will never be chosen in the final paragraph breaks. This observation + allows us to omit a substantial number of feasible breakpoints from further + consideration. + + */ + if (no_break_yet) { + no_break_yet = 0; + tex_aux_set_target_to_source(properties->adjust_spacing, lmt_linebreak_state.break_width, lmt_linebreak_state.background); + tex_aux_compute_break_width(break_type, properties->adjust_spacing, properties->adjust_spacing_step, cur_p); + } + /*tex + + Insert a delta node to prepare for breaks at |cur_p|. We use the fact that + |type (active) <> delta_node|. + + */ + if (node_type(prev_r) == delta_node) { + /*tex modify an existing delta node */ + tex_aux_add_delta_from_difference(properties->adjust_spacing, prev_r, lmt_linebreak_state.break_width, cur_active_width); + } else if (prev_r == active_head) { + /*tex no delta node needed at the beginning */ + tex_aux_set_target_to_source(properties->adjust_spacing, lmt_linebreak_state.active_width, lmt_linebreak_state.break_width); + } else { + halfword q = tex_new_node(delta_node, (quarterword) very_loose_fit); + node_next(q) = r; + tex_aux_set_delta_from_difference(properties->adjust_spacing, q, lmt_linebreak_state.break_width, cur_active_width); + node_next(prev_r) = q; + prev_prev_r = prev_r; + prev_r = q; + } + if (abs(properties->adj_demerits) >= awful_bad - lmt_linebreak_state.minimum_demerits) { + lmt_linebreak_state.minimum_demerits = awful_bad - 1; + } else { + lmt_linebreak_state.minimum_demerits += abs(properties->adj_demerits); + } + for (halfword fit_class = very_loose_fit; fit_class <= tight_fit; fit_class++) { + if (lmt_linebreak_state.minimal_demerits[fit_class] <= lmt_linebreak_state.minimum_demerits) { + /*tex + + Insert a new active node from |best_place [fit_class]| to |cur_p|. When + we create an active node, we also create the corresponding passive node. + + */ + halfword q = tex_new_node(passive_node, (quarterword) very_loose_fit); + node_next(q) = lmt_linebreak_state.passive; + lmt_linebreak_state.passive = q; + passive_cur_break(q) = cur_p; + ++lmt_linebreak_state.pass_number; + passive_serial(q) = lmt_linebreak_state.pass_number; + passive_prev_break(q) = best_place[fit_class]; + /*tex + + Here we keep track of the subparagraph penalties in the break nodes. + + */ + passive_pen_inter(q) = lmt_linebreak_state.internal_penalty_interline; + passive_pen_broken(q) = lmt_linebreak_state.internal_penalty_broken; + passive_last_left_box(q) = lmt_linebreak_state.internal_left_box; + passive_last_left_box_width(q) = lmt_linebreak_state.internal_left_box_width; + if (passive_prev_break(q)) { + passive_left_box(q) = passive_last_left_box(passive_prev_break(q)); + passive_left_box_width(q) = passive_last_left_box_width(passive_prev_break(q)); + } else { + passive_left_box(q) = lmt_linebreak_state.init_internal_left_box; + passive_left_box_width(q) = lmt_linebreak_state.init_internal_left_box_width; + } + passive_right_box(q) = lmt_linebreak_state.internal_right_box; + passive_right_box_width(q) = lmt_linebreak_state.internal_right_box_width; + passive_middle_box(q) = lmt_linebreak_state.internal_middle_box; + q = tex_new_node((quarterword) break_type, (quarterword) fit_class); + active_break_node(q) = lmt_linebreak_state.passive; + active_line_number(q) = best_place_line[fit_class] + 1; + active_total_demerits(q) = lmt_linebreak_state.minimal_demerits[fit_class]; + if (lmt_linebreak_state.do_last_line_fit) { + /*tex + + Store additional data in the new active node. Here we save these + data in the active node representing a potential line break. + + */ + active_short(q) = best_place_short[fit_class]; + active_glue(q) = best_place_glue[fit_class]; + } + node_next(q) = r; + node_next(prev_r) = q; + prev_r = q; + if (properties->tracing_paragraphs > 0) { + tex_aux_print_break_node(q, fit_class, break_type, cur_p, properties); + } + } + lmt_linebreak_state.minimal_demerits[fit_class] = awful_bad; + } + lmt_linebreak_state.minimum_demerits = awful_bad; + /*tex + + Insert a delta node to prepare for the next active node. When the following + code is performed, we will have just inserted at least one active node before + |r|, so |type (prev_r) <> delta_node|. + + */ + if (r != active_head) { + halfword q = tex_new_node(delta_node, (quarterword) very_loose_fit); + node_next(q) = r; + tex_aux_set_delta_from_difference(properties->adjust_spacing, q, cur_active_width, lmt_linebreak_state.break_width); + node_next(prev_r) = q; + prev_prev_r = prev_r; + prev_r = q; + } + } + /*tex + + Quit on an active node, otherwise compute the new line width. When we come to the + following code, we have just encountered the first active node~|r| whose + |line_number| field contains |l|. Thus we want to compute the length of the + $l\mskip1mu$th line of the current paragraph. Furthermore, we want to set |old_l| + to the last number in the class of line numbers equivalent to~|l|. + + */ + if (r == active_head) { + return; + } else if (l > lmt_linebreak_state.easy_line) { + old_l = max_halfword - 1; + line_width = lmt_linebreak_state.second_width; + } else { + old_l = l; + /* if (properties->par_shape && specification_repeat(properties->par_shape)) { + line_width = get_specification_width(properties->par_shape, l); + } else */ if (l > lmt_linebreak_state.last_special_line) { + line_width = lmt_linebreak_state.second_width; + } else if (properties->par_shape) { + line_width = tex_get_specification_width(properties->par_shape, l); + } else { + line_width = lmt_linebreak_state.first_width; + } + } + } + /*tex + + If a line number class has ended, create new active nodes for the best feasible breaks + in that class; then |return| if |r = active|, otherwise compute the new |line_width|. + + Consider the demerits for a line from |r| to |cur_p|; deactivate node |r| if it should + no longer be active; then |goto continue| if a line from |r| to |cur_p| is infeasible, + otherwise record a new feasible break. + + */ + artificial_demerits = 0; + shortfall = line_width - cur_active_width[total_glue_amount]; + if (active_break_node(r)) { + shortfall -= passive_last_left_box_width(active_break_node(r)); + } else { + shortfall -= lmt_linebreak_state.init_internal_left_box_width; + } + shortfall -= lmt_linebreak_state.internal_right_box_width; + // halfword margin_kern_stretch = 0; + // halfword margin_kern_shrink = 0; + if (properties->protrude_chars) { + // if (line_break_dir == dir_righttoleft) { + // /*tex Not now, we need to keep more track. */ + // } else { + halfword o = null; + halfword l1 = active_break_node(r) ? passive_cur_break(active_break_node(r)) : first_p; + if (cur_p) { + o = node_prev(cur_p); + if (node_next(o) != cur_p) { + tex_normal_error("linebreak", "the node list is messed up"); + } + } + /*tex + + The last characters (hyphenation character) if these two list should always be + the same anyway, so we just look at |pre_break|. Let's look at the right margin + first. + + */ + if (cur_p && node_type(cur_p) == disc_node && disc_pre_break_head(cur_p)) { + /*tex + A |disc_node| with non-empty |pre_break|, protrude the last char of + |pre_break|: + */ + o = disc_pre_break_tail(cur_p); + } else { + o = tex_aux_find_protchar_right(l1, o); + } + if (o && node_type(o) == glyph_node) { + shortfall += tex_char_protrusion(o, right_margin_kern_subtype); + // char_pw_kern(o, right_margin_kern, &margin_kern_stretch, &margin_kern_shrink); + } + /*tex now the left margin */ + if (l1 && (node_type(l1) == disc_node) && (disc_post_break_head(l1))) { + /*tex The first char could be a disc! Protrude the first char. */ + o = disc_post_break_head(l1); + } else { + o = tex_aux_find_protchar_left(l1, 1); + } + if (o && node_type(o) == glyph_node) { + shortfall += tex_char_protrusion(o, left_margin_kern_subtype); + // char_pw_kern(o, left_margin_kern, &margin_kern_stretch, &margin_kern_shrink); + } + // } + } + /*tex + The only reason why we have a shared ratio is that we need to calculate the shortfall + for a line with mixed fonts. BTW, why do we divide by 2? + */ + if (shortfall == 0) { + /*tex We're okay. */ + } else if (shortfall > 0) { + halfword total_stretch = total_font_stretch; + // halfword total_stretch = total_font_stretch + margin_kern_stretch; + if (total_stretch > 0) { + if (total_stretch > shortfall) { + shortfall = (total_stretch / (lmt_linebreak_state.max_stretch_ratio / lmt_linebreak_state.current_font_step)) / 2; + } else { + shortfall -= total_stretch; + } + } + } else if (shortfall < 0) { + halfword total_shrink = total_font_shrink; + // halfword total_shrink = total_font_shrink + margin_kern_shrink; + if (total_shrink > 0) { + if (total_shrink > -shortfall) { + shortfall = - (total_shrink / (lmt_linebreak_state.max_shrink_ratio / lmt_linebreak_state.current_font_step)) / 2; + } else { + shortfall += total_shrink; + } + } + } + if (shortfall > 0) { + /*tex + + Set the value of |b| to the badness for stretching the line, and compute the + corresponding |fit_class|. When a line must stretch, the available stretchability + can be found in the subarray |cur_active_width [2 .. 6]|, in units of points, sfi, + fil, fill and filll. + + The present section is part of \TEX's inner loop, and it is most often performed + when the badness is infinite; therefore it is worth while to make a quick test for + large width excess and small stretchability, before calling the |badness| subroutine. + + */ + if (cur_active_width[total_fi_amount] || cur_active_width[total_fil_amount] || + cur_active_width[total_fill_amount] || cur_active_width[total_filll_amount]) { + if (lmt_linebreak_state.do_last_line_fit) { + if (! cur_p) { + /*tex + + The last line of a paragraph. Perform computations for last line and + |goto found|. Here we compute the adjustment |g| and badness |b| for a + line from |r| to the end of the paragraph. When any of the criteria for + adjustment is violated we fall through to the normal algorithm. The last + line must be too short, and have infinite stretch entirely due to + |par_fill_skip|. + + */ + if (active_short(r) == 0 || active_glue(r) <= 0) { + /*tex + + Previous line was neither stretched nor shrunk, or was infinitely + bad. + + */ + goto NOT_FOUND; + } + if (cur_active_width[total_fi_amount] != lmt_linebreak_state.fill_width[fi_order] || cur_active_width[total_fil_amount] != lmt_linebreak_state.fill_width[fil_order] || + cur_active_width[total_fill_amount] != lmt_linebreak_state.fill_width[fill_order] || cur_active_width[total_filll_amount] != lmt_linebreak_state.fill_width[filll_order]) { + /*tex + Infinite stretch of this line not entirely due to |par_fill_skip|. + */ + goto NOT_FOUND; + } + if (active_short(r) > 0) { + g = cur_active_width[total_stretch_amount]; + } else { + g = cur_active_width[total_shrink_amount]; + } + if (g <= 0) { + /*tex No finite stretch resp.\ no shrink. */ + goto NOT_FOUND; + } + lmt_scanner_state.arithmic_error = 0; + g = tex_fract(g, active_short(r), active_glue(r), max_dimen); + if (properties->last_line_fit < 1000) { + g = tex_fract(g, properties->last_line_fit, 1000, max_dimen); + } + if (lmt_scanner_state.arithmic_error) { + g = (active_short(r) > 0) ? max_dimen : -max_dimen; + } + if (g > 0) { + /*tex + + Set the value of |b| to the badness of the last line for stretching, + compute the corresponding |fit_class, and |goto found|. These + badness computations are rather similar to those of the standard + algorithm, with the adjustment amount |g| replacing the |shortfall|. + + */ + if (g > shortfall) { + g = shortfall; + } + if (g > large_width_excess && (cur_active_width[total_stretch_amount] < small_stretchability)) { + b = infinite_bad; + fit_class = very_loose_fit; + goto FOUND; + } + b = tex_badness(g, cur_active_width[total_stretch_amount]); + fit_class = tex_normalized_loose_badness(b, loose, semi_loose, decent); + goto FOUND; + } else if (g < 0) { + /*tex + + Set the value of |b| to the badness of the last line for shrinking, + compute the corresponding |fit_class, and |goto found||. + + */ + if (-g > cur_active_width[total_shrink_amount]) { + g = -cur_active_width[total_shrink_amount]; + } + b = tex_badness(-g, cur_active_width[total_shrink_amount]); + fit_class = tex_normalized_tight_badness(b, decent, semi_tight); + goto FOUND; + } + } + NOT_FOUND: + shortfall = 0; + } + b = 0; + /*tex Infinite stretch. */ + fit_class = decent_fit; + } else if (shortfall > large_width_excess && cur_active_width[total_stretch_amount] < small_stretchability) { + b = infinite_bad; + fit_class = very_loose_fit; + } else { + b = tex_badness(shortfall, cur_active_width[total_stretch_amount]); + fit_class = tex_normalized_loose_badness(b, loose, semi_loose, decent); + } + } else { + /*tex + + Set the value of |b| to the badness for shrinking the line, and compute the + corresponding |fit_class|. Shrinkability is never infinite in a paragraph; we + can shrink the line from |r| to |cur_p| by at most |cur_active_width + [total_shrink_amount]|. + + */ + if (-shortfall > cur_active_width[total_shrink_amount]) { + b = infinite_bad + 1; + } else { + b = tex_badness(-shortfall, cur_active_width[total_shrink_amount]); + } + fit_class = tex_normalized_tight_badness(b, decent, semi_tight); + } + if (lmt_linebreak_state.do_last_line_fit) { + /*tex Adjust the additional data for last line; */ + if (! cur_p) { + shortfall = 0; + g = 0; + } else if (shortfall > 0) { + g = cur_active_width[total_stretch_amount]; + } else if (shortfall < 0) { + g = cur_active_width[total_shrink_amount]; + } else { + g = 0; + } + } + FOUND: + if ((b > infinite_bad) || (pi == eject_penalty)) { + /*tex + + Prepare to deactivate node~|r|, and |goto deactivate| unless there is a reason to + consider lines of text from |r| to |cur_p|. During the final pass, we dare not + lose all active nodes, lest we lose touch with the line breaks already found. The + code shown here makes sure that such a catastrophe does not happen, by permitting + overfull boxes as a last resort. This particular part of \TEX\ was a source of + several subtle bugs before the correct program logic was finally discovered; readers + who seek to improve \TEX\ should therefore think thrice before daring to make any + changes here. + + */ + if (lmt_linebreak_state.final_pass && (lmt_linebreak_state.minimum_demerits == awful_bad) && + (node_next(r) == active_head) && (prev_r == active_head)) { + /*tex Set demerits zero, this break is forced. */ + artificial_demerits = 1; + } else if (b > lmt_linebreak_state.threshold) { + goto DEACTIVATE; + } + node_r_stays_active = 0; + } else { + prev_r = r; + if (b > lmt_linebreak_state.threshold) { + continue; + } else { + node_r_stays_active = 1; + } + } + /*tex + + Record a new feasible break. When we get to this part of the code, the line from |r| to + |cur_p| is feasible, its badness is~|b|, and its fitness classification is |fit_class|. + We don't want to make an active node for this break yet, but we will compute the total + demerits and record them in the |minimal_demerits| array, if such a break is the current + champion among all ways to get to |cur_p| in a given line-number class and fitness class. + + */ + if (artificial_demerits) { + d = 0; + } else { + /*tex Compute the demerits, |d|, from |r| to |cur_p|. */ + d = properties->line_penalty + b; + if (abs(d) >= 10000) { + d = 100000000; + } else { + d = d * d; + } + if (pi != 0) { + if (pi > 0) { + d += (pi * pi); + } else if (pi > eject_penalty) { + d -= (pi * pi); + } + } + if (break_type == hyphenated_node && node_type(r) == hyphenated_node) { + if (cur_p) { + d += properties->double_hyphen_demerits; + } else { + d += properties->final_hyphen_demerits; + } + } + /*tex + Here |fitness| is just the subtype, so we could have put the cast in the macro + instead: |# define fitness (n) ((halfword) (subtype (n))|. We need to cast because + some compilers (versions or whatever) get confused by the type of (unsigned) integer + used. + */ + if (abs(fit_class - (halfword) active_fitness(r)) > 1) { + d = d + properties->adj_demerits; + } + } + if (properties->tracing_paragraphs > 0) { + tex_aux_print_feasible_break(cur_p, r, b, pi, d, artificial_demerits, properties); + } + /*tex This is the minimum total demerits from the beginning to |cur_p| via |r|. */ + d += active_total_demerits(r); + if (d <= lmt_linebreak_state.minimal_demerits[fit_class]) { + lmt_linebreak_state.minimal_demerits[fit_class] = d; + best_place[fit_class] = active_break_node(r); + best_place_line[fit_class] = l; + if (lmt_linebreak_state.do_last_line_fit) { + /*tex + + Store additional data for this feasible break. For each feasible break we record + the shortfall and glue stretch or shrink (or adjustment). + + */ + best_place_short[fit_class] = shortfall; + best_place_glue[fit_class] = g; + } + if (d < lmt_linebreak_state.minimum_demerits) { + lmt_linebreak_state.minimum_demerits = d; + } + } + /*tex Record a new feasible break. */ + if (node_r_stays_active) { + /*tex |prev_r| has been set to |r|. */ + continue; + } + DEACTIVATE: + /*tex + + Deactivate node |r|. When an active node disappears, we must delete an adjacent delta + node if the active node was at the beginning or the end of the active list, or if it + was surrounded by delta nodes. We also must preserve the property that |cur_active_width| + represents the length of material from |vlink (prev_r)| to~|cur_p|. + + */ + node_next(prev_r) = node_next(r); + tex_flush_node(r); + if (prev_r == active_head) { + /*tex + + Update the active widths, since the first active node has been deleted. The following + code uses the fact that |type (active) <> delta_node|. If the active list has just + become empty, we do not need to update the |active_width| array, since it will be + initialized when an active node is next inserted. + + */ + r = node_next(active_head); + if (node_type(r) == delta_node) { + tex_aux_add_to_target_from_delta(properties->adjust_spacing, lmt_linebreak_state.active_width, r); + tex_aux_set_target_to_source(properties->adjust_spacing, cur_active_width, lmt_linebreak_state.active_width); + node_next(active_head) = node_next(r); + tex_flush_node(r); + } + } else if (node_type(prev_r) == delta_node) { + r = node_next(prev_r); + if (r == active_head) { + tex_aux_sub_delta_from_target(properties->adjust_spacing, cur_active_width, prev_r); + node_next(prev_prev_r) = active_head; + tex_flush_node(prev_r); + prev_r = prev_prev_r; + } else if (node_type(r) == delta_node) { + tex_aux_add_to_target_from_delta(properties->adjust_spacing, cur_active_width, r); + tex_aux_add_to_delta_from_delta(properties->adjust_spacing, prev_r, r); + node_next(prev_r) = node_next(r); + tex_flush_node(r); + } + } + } +} + +static halfword tex_aux_inject_orphan_penalty(halfword current, halfword amount) +{ + halfword previous = node_prev(current); + if (previous && node_type(previous) != penalty_node) { + halfword penalty = tex_new_penalty_node(amount, orphan_penalty_subtype); + tex_couple_nodes(previous, penalty); + tex_couple_nodes(penalty, current); + current = previous; + } + return current; +} + +inline static int tex_aux_valid_glue_break(halfword p) +{ + halfword prv = node_prev(p); + return (prv && prv != temp_head && (node_type(prv) == glyph_node || precedes_break(prv) || precedes_kern(prv) || precedes_dir(prv))); +} + +void tex_do_line_break(line_break_properties *properties) +{ + /*tex Miscellaneous nodes of temporary interest. */ + halfword cur_p, l, r; + int line_break_dir = properties->paragraph_dir; + int force_check_hyphenation = hyphenation_permitted(properties->hyphenation_mode, force_check_hyphenation_mode); + (void) (properties->inter_line_penalties); /* avoid not used message */ + /*tex Get ready to start */ + lmt_linebreak_state.fewest_demerits = 0; + lmt_linebreak_state.actual_looseness = 0; + lmt_linebreak_state.minimum_demerits = awful_bad; + for (int i = very_loose_fit; i <= tight_fit; i++) { + lmt_linebreak_state.minimal_demerits[i] = awful_bad; + } + /*tex + This has been moved here: + */ + if (properties->adjust_spacing) { + lmt_linebreak_state.adjust_spacing = properties->adjust_spacing; + if (properties->adjust_spacing_step > 0) { + lmt_linebreak_state.adjust_spacing_step = properties->adjust_spacing_step; + lmt_linebreak_state.adjust_spacing_shrink = -properties->adjust_spacing_shrink; /* watch the sign */ + lmt_linebreak_state.adjust_spacing_stretch = properties->adjust_spacing_stretch; + } else { + lmt_linebreak_state.adjust_spacing_step = 0; + lmt_linebreak_state.adjust_spacing_shrink = 0; + lmt_linebreak_state.adjust_spacing_stretch = 0; + } + properties->adjust_spacing = tex_checked_font_adjust( + properties->adjust_spacing, + lmt_linebreak_state.adjust_spacing_step, + lmt_linebreak_state.adjust_spacing_shrink, + lmt_linebreak_state.adjust_spacing_stretch + ); + } else { + lmt_linebreak_state.adjust_spacing_step = 0; + lmt_linebreak_state.adjust_spacing_shrink = 0; + lmt_linebreak_state.adjust_spacing_stretch = 0; + properties->adjust_spacing = adjust_spacing_off; + } + lmt_linebreak_state.current_font_step = -1; + lmt_linebreak_state.max_shrink_ratio = -1; + lmt_linebreak_state.max_stretch_ratio = -1; + /*tex + + We compute the values of |easy_line| and the other local variables relating to line length + when the |line_break| procedure is initializing itself. + + The orphan penalty injection is something new. It works backward so the first penalty in + the list is injected first. If there is a penalty before a space we skip that space and + also skip a penalty in the list. + + */ + if (properties->orphan_penalties || properties->orphan_penalty) { + halfword current = node_prev(properties->parfill_right_skip); + if (current) { + /*tex Skip over trailing glue and penalties. */ + while (current) { + switch (node_type(current)) { + case glue_node: + case penalty_node: + current = node_prev(current); + break; + default: + goto INJECT; + } + } + INJECT: + if (properties->orphan_penalties) { + /*tex Inject specified penalties before spaces. */ + int n = specification_count(properties->orphan_penalties); + if (n > 0) { + halfword i = 0; + while (current) { + if (node_type(current) == glue_node) { + switch (node_subtype(current)) { + case space_skip_glue: + case xspace_skip_glue: + case zero_space_skip_glue: + current = tex_aux_inject_orphan_penalty(current, tex_get_specification_penalty(properties->orphan_penalties, ++i)); + if (i == n) { + goto ALLDONE; + } else { + break; + } + } + } + current = node_prev(current); + } + } + } else { + while (current) { + if (node_type(current) == glue_node) { + switch (node_subtype(current)) { + case space_skip_glue: + case xspace_skip_glue: + case zero_space_skip_glue: + tex_aux_inject_orphan_penalty(current, properties->orphan_penalty); + goto ALLDONE; + } + } + current = node_prev(current); + } + } + } + ALLDONE: ; + } + if (properties->par_shape) { + int n = specification_count(properties->par_shape); + if (n > 0) { + if (specification_repeat(properties->par_shape)) { + lmt_linebreak_state.last_special_line = max_halfword; + } else { + lmt_linebreak_state.last_special_line = n - 1; + } + lmt_linebreak_state.second_indent = tex_get_specification_indent(properties->par_shape, n); + lmt_linebreak_state.second_width = tex_get_specification_width(properties->par_shape, n); + lmt_linebreak_state.second_indent = swap_parshape_indent(properties->paragraph_dir, lmt_linebreak_state.second_indent, lmt_linebreak_state.second_width); + } else { + lmt_linebreak_state.last_special_line = 0; + lmt_linebreak_state.second_width = properties->hsize; + lmt_linebreak_state.second_indent = 0; + } + } else if (properties->hang_indent == 0) { + lmt_linebreak_state.last_special_line = 0; + lmt_linebreak_state.second_width = properties->hsize; + lmt_linebreak_state.second_indent = 0; + } else { + halfword used_hang_indent = swap_hang_indent(properties->paragraph_dir, properties->hang_indent); + /*tex + + Set line length parameters in preparation for hanging indentation. We compute the + values of |easy_line| and the other local variables relating to line length when the + |line_break| procedure is initializing itself. + + */ + lmt_linebreak_state.last_special_line = abs(properties->hang_after); + if (properties->hang_after < 0) { + lmt_linebreak_state.first_width = properties->hsize - abs(used_hang_indent); + if (used_hang_indent >= 0) { + lmt_linebreak_state.first_indent = used_hang_indent; + } else { + lmt_linebreak_state.first_indent = 0; + } + lmt_linebreak_state.second_width = properties->hsize; + lmt_linebreak_state.second_indent = 0; + } else { + lmt_linebreak_state.first_width = properties->hsize; + lmt_linebreak_state.first_indent = 0; + lmt_linebreak_state.second_width = properties->hsize - abs(used_hang_indent); + if (used_hang_indent >= 0) { + lmt_linebreak_state.second_indent = used_hang_indent; + } else { + lmt_linebreak_state.second_indent = 0; + } + } + } + if (properties->looseness == 0) { + lmt_linebreak_state.easy_line = lmt_linebreak_state.last_special_line; + } else { + lmt_linebreak_state.easy_line = max_halfword; + } + lmt_linebreak_state.no_shrink_error_yet = 1; + l = properties->left_skip; + r = properties->right_skip; + lmt_linebreak_state.background[total_glue_amount] = glue_amount(l) + glue_amount(r); + lmt_linebreak_state.background[total_stretch_amount] = 0; + lmt_linebreak_state.background[total_fi_amount] = 0; + lmt_linebreak_state.background[total_fil_amount] = 0; + lmt_linebreak_state.background[total_fill_amount] = 0; + lmt_linebreak_state.background[total_filll_amount] = 0; + lmt_linebreak_state.background[total_stretch_amount + glue_stretch_order(l)] = glue_stretch(l); + lmt_linebreak_state.background[total_stretch_amount + glue_stretch_order(r)] += glue_stretch(r); + lmt_linebreak_state.background[total_shrink_amount] = tex_aux_checked_shrink(l) + tex_aux_checked_shrink(r); + if (properties->adjust_spacing) { + lmt_linebreak_state.background[font_stretch_amount] = 0; + lmt_linebreak_state.background[font_shrink_amount] = 0; + lmt_linebreak_state.max_stretch_ratio = -1; + lmt_linebreak_state.max_shrink_ratio = -1; + lmt_linebreak_state.current_font_step = -1; + lmt_packaging_state.previous_char_ptr = null; + } + /*tex + + Check for special treatment of last line of paragraph. The new algorithm for the last line + requires that the stretchability |par_fill_skip| is infinite and the stretchability of + |left_skip| plus |right_skip| is finite. + + */ + lmt_linebreak_state.do_last_line_fit = 0; + if (properties->last_line_fit > 0) { + halfword q = lmt_linebreak_state.last_line_fill; + if (glue_stretch(q) > 0 && glue_stretch_order(q) > normal_glue_order) { + if (lmt_linebreak_state.background[total_fi_amount] == 0 && lmt_linebreak_state.background[total_fil_amount] == 0 && + lmt_linebreak_state.background[total_fill_amount] == 0 && lmt_linebreak_state.background[total_filll_amount] == 0) { + lmt_linebreak_state.do_last_line_fit = 1; + lmt_linebreak_state.fill_width[fi_order] = 0; + lmt_linebreak_state.fill_width[fil_order] = 0; + lmt_linebreak_state.fill_width[fill_order] = 0; + lmt_linebreak_state.fill_width[filll_order] = 0; + lmt_linebreak_state.fill_width[glue_stretch_order(q) - fi_glue_order] = glue_stretch(q); + } + } + } + /*tex Initialize |dir_ptr| for |line_break|. */ + if (lmt_linebreak_state.dir_ptr) { + tex_flush_node_list(lmt_linebreak_state.dir_ptr); + lmt_linebreak_state.dir_ptr = null; + } + /*tex Find optimal breakpoints. */ + lmt_linebreak_state.threshold = properties->pretolerance; + + if (properties->tracing_paragraphs > 1) { + tex_begin_diagnostic(); + tex_print_str("[linebreak: original]"); + tex_short_display(node_next(temp_head)); + tex_end_diagnostic(); + } + + if (lmt_linebreak_state.threshold >= 0) { + if (properties->tracing_paragraphs > 0) { + tex_begin_diagnostic(); + tex_print_str("[linebreak: first pass]"); /* @firstpass */ + } + lmt_linebreak_state.second_pass = 0; + lmt_linebreak_state.final_pass = 0; + } else { + lmt_linebreak_state.threshold = properties->tolerance; + lmt_linebreak_state.second_pass = 1; + lmt_linebreak_state.final_pass = (properties->emergency_stretch <= 0); + if (properties->tracing_paragraphs > 0) { + tex_begin_diagnostic(); + } + } + while (1) { + halfword first_p, q; + halfword nest_stack[10]; + int nest_index = 0; + if (lmt_linebreak_state.threshold > infinite_bad) { + lmt_linebreak_state.threshold = infinite_bad; + } + /*tex Create an active breakpoint representing the beginning of the paragraph. */ + q = tex_new_node(unhyphenated_node, (quarterword) decent_fit); + node_next(q) = active_head; + active_break_node(q) = null; + active_line_number(q) = cur_list.prev_graf + 1; + active_total_demerits(q) = 0; + active_short(q) = 0; + active_glue(q) = 0; + node_next(active_head) = q; /* we create a cycle */ + tex_aux_set_target_to_source(properties->adjust_spacing, lmt_linebreak_state.active_width, lmt_linebreak_state.background); + lmt_linebreak_state.passive = null; + lmt_linebreak_state.printed_node = temp_head; + lmt_linebreak_state.pass_number = 0; + lmt_print_state.font_in_short_display = null_font; + /*tex Create an active breakpoint representing the beginning of the paragraph. */ + /* lmt_linebreak_state.auto_breaking = 1; */ /* gone */ + cur_p = node_next(temp_head); + /*tex Initialize with first (or current) |par| node. */ + if (cur_p && node_type(cur_p) == par_node) { + node_prev(cur_p) = temp_head; + lmt_linebreak_state.internal_penalty_interline = tex_get_local_interline_penalty(cur_p); + lmt_linebreak_state.internal_penalty_broken = tex_get_local_broken_penalty(cur_p); + lmt_linebreak_state.init_internal_left_box = par_box_left(cur_p); + lmt_linebreak_state.init_internal_left_box_width = tex_get_local_left_width(cur_p); + lmt_linebreak_state.internal_right_box = par_box_right(cur_p); + lmt_linebreak_state.internal_right_box_width = tex_get_local_right_width(cur_p); + lmt_linebreak_state.internal_middle_box = par_box_middle(cur_p); + } else { + lmt_linebreak_state.internal_penalty_interline = 0; + lmt_linebreak_state.internal_penalty_broken = 0; + lmt_linebreak_state.init_internal_left_box = null; + lmt_linebreak_state.init_internal_left_box_width = 0; + lmt_linebreak_state.internal_right_box = null; + lmt_linebreak_state.internal_right_box_width = 0; + lmt_linebreak_state.internal_middle_box = null; + } + lmt_linebreak_state.internal_left_box = lmt_linebreak_state.init_internal_left_box; + lmt_linebreak_state.internal_left_box_width = lmt_linebreak_state.init_internal_left_box_width; + lmt_packaging_state.previous_char_ptr = null; + first_p = cur_p; + /*tex + + To access the first node of paragraph as the first active node has |break_node = null|. + + Determine legal breaks: As we move through the hlist, we need to keep the |active_width| + array up to date, so that the badness of individual lines is readily calculated by + |try_break|. It is convenient to use the short name |active_width [1]| for the component + of active width that represents real width as opposed to glue. + + Advance |cur_p| to the node following the present string of characters. The code that + passes over the characters of words in a paragraph is part of \TEX's inner loop, so it + has been streamlined for speed. We use the fact that |\parfillskip| glue appears at the + end of each paragraph; it is therefore unnecessary to check if |vlink (cur_p) = null| + when |cur_p| is a character node. + + */ + while (cur_p && (node_next(active_head) != active_head)) { /* we check the cycle */ + switch (node_type(cur_p)) { + case glyph_node: + lmt_linebreak_state.active_width[total_glue_amount] += tex_glyph_width_ex(cur_p); + if (properties->adjust_spacing && tex_aux_check_expand_pars(properties->adjust_spacing_step, glyph_font(cur_p))) { + lmt_packaging_state.previous_char_ptr = cur_p; + lmt_linebreak_state.active_width[font_stretch_amount] += tex_char_stretch(cur_p); + lmt_linebreak_state.active_width[font_shrink_amount] += tex_char_shrink(cur_p); + } + break; + case hlist_node: + case vlist_node: + lmt_linebreak_state.active_width[total_glue_amount] += box_width(cur_p); + break; + case rule_node: + lmt_linebreak_state.active_width[total_glue_amount] += rule_width(cur_p); + break; + case dir_node: + /*tex Adjust the dir stack for the |line_break| routine. */ + line_break_dir = tex_update_dir_state(cur_p, properties->paragraph_dir); + break; + case par_node: + /*tex Advance past a |par| node. */ + lmt_linebreak_state.internal_penalty_interline = tex_get_local_interline_penalty(cur_p); + lmt_linebreak_state.internal_penalty_broken = tex_get_local_broken_penalty(cur_p); + lmt_linebreak_state.internal_left_box = par_box_left(cur_p); + lmt_linebreak_state.internal_left_box_width = tex_get_local_left_width(cur_p); + lmt_linebreak_state.internal_right_box = par_box_right(cur_p); + lmt_linebreak_state.internal_right_box_width = tex_get_local_right_width(cur_p); + lmt_linebreak_state.internal_middle_box = par_box_middle(cur_p); + break; + case glue_node: + /*tex + + If node |cur_p| is a legal breakpoint, call |try_break|; then update the + active widths by including the glue in |glue_ptr(cur_p)|. + + When node |cur_p| is a glue node, we look at the previous to see whether or + not a breakpoint is legal at |cur_p|, as explained above. + + We only break after certain nodes (see texnodes.h), a font related kern and + a dir node when |\breakafterdirmode = 1|. + + */ + if (tex_has_glue_option(cur_p, glue_option_no_auto_break)) { + /*tex Glue in math is not a valid breakpoint. */ + } else if (tex_is_par_init_glue(cur_p)) { + /*tex Of course we don't break here. */ + } else if (tex_aux_valid_glue_break(cur_p)) { + tex_aux_try_break(properties, 0, unhyphenated_node, first_p, cur_p); + } + lmt_linebreak_state.active_width[total_glue_amount] += glue_amount(cur_p); + lmt_linebreak_state.active_width[2 + glue_stretch_order(cur_p)] += glue_stretch(cur_p); + lmt_linebreak_state.active_width[total_shrink_amount] += tex_aux_checked_shrink(cur_p); + break; + case kern_node: + switch (node_subtype(cur_p)) { + case explicit_kern_subtype: + case italic_kern_subtype: + { + /* there used to a ! is_char_node(node_next(cur_p)) test */ + halfword nxt = node_next(cur_p); + if (nxt && node_type(nxt) == glue_node && ! tex_has_glue_option(nxt, glue_option_no_auto_break)) { + tex_aux_try_break(properties, 0, unhyphenated_node, first_p, cur_p); + } + } + break; + case font_kern_subtype: + if (properties->adjust_spacing == adjust_spacing_full) { + lmt_linebreak_state.active_width[font_stretch_amount] += tex_kern_stretch(cur_p); + lmt_linebreak_state.active_width[font_shrink_amount] += tex_kern_shrink(cur_p); + } + break; + } + lmt_linebreak_state.active_width[total_glue_amount] += kern_amount(cur_p); + break; + case disc_node: + /*tex + + Try to break after a discretionary fragment, then |goto done5|. The + following code knows that discretionary texts contain only character + nodes, kern nodes, box nodes, and rule nodes. This branch differs a bit + from older engines because in \LUATEX\ we already have hyphenated the list. + This means that we need to skip automatic disc nodes. Or better, we need + to treat discretionaries and explicit hyphens always, even in the first + pass. + + We used to have |init_disc| followed by |select disc| variants where the + |select_disc|s were handled by the leading |init_disc|. The question is: should + we bother about select nodes? Knuth indicates in the original source that only + a very few cases need hyphenation so the exceptional case of >2 char ligatures + having hyphenation points in between is rare. We'd better have proper compound + word handling. Keep in mind that these (old) init and select subtypes always + came in isolated pairs and that they only were meant for the simple (enforced) + hyphenation discretionaries. + + Therefore, this feature has been dropped from \LUAMETATEX. It not only makes + the code simpler, it also avoids having code on board for border cases that + even when dealt with are suboptimal. It's better to have nothing that something + fuzzy. It also makes dealing with (intermediate) node lists easier. If I want + something like this it should be okay for any situation. + + */ + if (force_check_hyphenation || lmt_linebreak_state.second_pass || (node_subtype(cur_p) != syllable_discretionary_code)) { + halfword actual_penalty = disc_penalty(cur_p); + halfword s = disc_pre_break_head(cur_p); + tex_aux_reset_disc_target(properties->adjust_spacing, lmt_linebreak_state.disc_width); + if (s) { + tex_aux_add_to_widths(s, properties->adjust_spacing, properties->adjust_spacing_step, lmt_linebreak_state.disc_width); + tex_aux_add_disc_source_to_target(properties->adjust_spacing, lmt_linebreak_state.active_width, lmt_linebreak_state.disc_width); + tex_aux_try_break(properties, actual_penalty, hyphenated_node, first_p, cur_p); + tex_aux_sub_disc_target_from_source(properties->adjust_spacing, lmt_linebreak_state.active_width, lmt_linebreak_state.disc_width); + } else { + /*tex trivial pre-break */ + tex_aux_try_break(properties, actual_penalty, hyphenated_node, first_p, cur_p); + } + } + tex_aux_add_to_widths(disc_no_break_head(cur_p), properties->adjust_spacing, properties->adjust_spacing_step, lmt_linebreak_state.active_width); + break; + case penalty_node: + tex_aux_try_break(properties, penalty_amount(cur_p), unhyphenated_node, first_p, cur_p); + break; + case math_node: + { + /* there used to a ! is_char_node(node_next(cur_p)) test */ + int finishing = node_subtype(cur_p) == end_inline_math; + // lmt_linebreak_state.auto_breaking = finishing; + if (tex_math_glue_is_zero(cur_p) || tex_ignore_math_skip(cur_p)) { + /*tex + When we end up here we assume |\mathsurround| but we only check for + a break when we're ending math. Maybe this is something we need to + open up. The math specific penalty only kicks in when we break. + */ + if (finishing && node_type(node_next(cur_p)) == glue_node) { + tex_aux_try_break(properties, math_penalty(cur_p), unhyphenated_node, first_p, cur_p); + } + lmt_linebreak_state.active_width[total_glue_amount] += math_surround(cur_p); + } else { + /*tex + This one does quite some testing, is that still needed? + */ + if (finishing && tex_aux_valid_glue_break(cur_p)) { + tex_aux_try_break(properties, math_penalty(cur_p), unhyphenated_node, first_p, cur_p); + } + lmt_linebreak_state.active_width[total_glue_amount] += math_amount(cur_p); + lmt_linebreak_state.active_width[2 + math_stretch_order(cur_p)] += math_stretch(cur_p); + lmt_linebreak_state.active_width[total_shrink_amount] += tex_aux_checked_shrink(cur_p); + } + } + break; + case boundary_node: + case whatsit_node: + case mark_node: + case insert_node: + case adjust_node: + /*tex Advance past these nodes in the |line_break| loop. */ + break; + default: + tex_formatted_error("parbuilder", "weird node %d in paragraph", node_type(cur_p)); + } + cur_p = node_next(cur_p); + while (! cur_p && nest_index > 0) { + cur_p = nest_stack[--nest_index]; + } + } + if (! cur_p) { + /*tex + + Try the final line break at the end of the paragraph, and |goto done| if the desired + breakpoints have been found. + + The forced line break at the paragraph's end will reduce the list of breakpoints so + that all active nodes represent breaks at |cur_p = null|. On the first pass, we + insist on finding an active node that has the correct \quote {looseness.} On the + final pass, there will be at least one active node, and we will match the desired + looseness as well as we can. + + The global variable |best_bet| will be set to the active node for the best way to + break the paragraph, and a few other variables are used to help determine what is + best. + + */ + tex_aux_try_break(properties, eject_penalty, hyphenated_node, first_p, cur_p); + if (node_next(active_head) != active_head) { + /*tex Find an active node with fewest demerits. */ + r = node_next(active_head); + lmt_linebreak_state.fewest_demerits = awful_bad; + do { + if ((node_type(r) != delta_node) && (active_total_demerits(r) < lmt_linebreak_state.fewest_demerits)) { + lmt_linebreak_state.fewest_demerits = active_total_demerits(r); + lmt_linebreak_state.best_bet = r; + } + r = node_next(r); + } while (r != active_head); + lmt_linebreak_state.best_line = active_line_number(lmt_linebreak_state.best_bet); + /*tex Find an active node with fewest demerits. */ + if (properties->looseness == 0) { + goto DONE; + } else { + /*tex + + Find the best active node for the desired looseness. The adjustment for a + desired looseness is a slightly more complicated version of the loop just + considered. Note that if a paragraph is broken into segments by displayed + equations, each segment will be subject to the looseness calculation, + independently of the other segments. + + */ + r = node_next(active_head); // can be local + lmt_linebreak_state.actual_looseness = 0; + do { + if (node_type(r) != delta_node) { + lmt_linebreak_state.line_difference = active_line_number(r) - lmt_linebreak_state.best_line; + if (((lmt_linebreak_state.line_difference < lmt_linebreak_state.actual_looseness) && (properties->looseness <= lmt_linebreak_state.line_difference)) + || ((lmt_linebreak_state.line_difference > lmt_linebreak_state.actual_looseness) && (properties->looseness >= lmt_linebreak_state.line_difference))) { + lmt_linebreak_state.best_bet = r; + lmt_linebreak_state.actual_looseness = lmt_linebreak_state.line_difference; + lmt_linebreak_state.fewest_demerits = active_total_demerits(r); + } else if ((lmt_linebreak_state.line_difference == lmt_linebreak_state.actual_looseness) && (active_total_demerits(r) < lmt_linebreak_state.fewest_demerits)) { + lmt_linebreak_state.best_bet = r; + lmt_linebreak_state.fewest_demerits = active_total_demerits(r); + } + } + r = node_next(r); + } while (r != active_head); + lmt_linebreak_state.best_line = active_line_number(lmt_linebreak_state.best_bet); + /*tex + Find the best active node for the desired looseness. + */ + if ((lmt_linebreak_state.actual_looseness == properties->looseness) || lmt_linebreak_state.final_pass) { + goto DONE; + } + } + } + } + /*tex Clean up the memory by removing the break nodes. */ + cur_p = tex_aux_clean_up_the_memory(cur_p); + if (! lmt_linebreak_state.second_pass) { + if (properties->tracing_paragraphs > 0) { + tex_print_str("%l[linebreak: second pass]"); /* @secondpass */; + } + lmt_linebreak_state.threshold = properties->tolerance; + lmt_linebreak_state.second_pass = 1; + lmt_linebreak_state.final_pass = (properties->emergency_stretch <= 0); + } else { + /*tex If at first you do not succeed, then: */ + if (properties->tracing_paragraphs > 0) { + tex_print_str("%l[linebreak: emergency pass]"); /* @emergencypass */ + } + lmt_linebreak_state.background[total_stretch_amount] += properties->emergency_stretch; + lmt_linebreak_state.final_pass = 1; + } + } + DONE: + if (properties->tracing_paragraphs > 0) { + tex_end_diagnostic(); + /*tex + This is a bit weird, as only here: |normalize_selector()| while we have diagnostics + all over the place. + */ + } + if (lmt_linebreak_state.do_last_line_fit) { + /*tex + Adjust the final line of the paragraph; here we either reset |do_last_line_fit| or + adjust the |par_fill_skip| glue. + */ + if (active_short(lmt_linebreak_state.best_bet) == 0) { + lmt_linebreak_state.do_last_line_fit = 0; + } else { + glue_amount(lmt_linebreak_state.last_line_fill) += (active_short(lmt_linebreak_state.best_bet) - active_glue(lmt_linebreak_state.best_bet)); + glue_stretch(lmt_linebreak_state.last_line_fill) = 0; + } + } + /*tex + Break the paragraph at the chosen. Once the best sequence of breakpoints has been found + (hurray), we call on the procedure |post_line_break| to finish the remainder of the work. + By introducing this subprocedure, we are able to keep |line_break| from getting extremely + long. The first thing |ext_post_line_break| does is reset |dir_ptr|. + */ + tex_flush_node_list(lmt_linebreak_state.dir_ptr); + lmt_linebreak_state.dir_ptr = null; + /*tex Here we still have a temp node as head. */ + tex_aux_post_line_break(properties, line_break_dir); + /*tex Clean up the memory by removing the break nodes. */ + cur_p = tex_aux_clean_up_the_memory(cur_p); +} + +void tex_get_linebreak_info(int *f, int *a) +{ + *f = lmt_linebreak_state.fewest_demerits; + *a = lmt_linebreak_state.actual_looseness; +} + +/*tex + + So far we have gotten a little way into the |line_break| routine, having covered its important + |try_break| subroutine. Now let's consider the rest of the process. + + The main loop of |line_break| traverses the given hlist, starting at |vlink (temp_head)|, and + calls |try_break| at each legal breakpoint. A variable called |auto_breaking| is set to true + except within math formulas, since glue nodes are not legal breakpoints when they appear in + formulas. + + The current node of interest in the hlist is pointed to by |cur_p|. Another variable, |prev_p|, + is usually one step behind |cur_p|, but the real meaning of |prev_p| is this: If |type (cur_p) + = glue_node| then |cur_p| is a legal breakpoint if and only if |auto_breaking| is true and + |prev_p| does not point to a glue node, penalty node, explicit kern node, or math node. + + The total number of lines that will be set by |post_line_break| is |best_line - prev_graf - 1|. + The last breakpoint is specified by |break_node (best_bet)|, and this passive node points to + the other breakpoints via the |prev_break| links. The finishing-up phase starts by linking the + relevant passive nodes in forward order, changing |prev_break| to |next_break|. (The + |next_break| fields actually reside in the same memory space as the |prev_break| fields did, + but we give them a new name because of their new significance.) Then the lines are justified, + one by one. + + The |post_line_break| must also keep an dir stack, so that it can output end direction + instructions at the ends of lines and begin direction instructions at the beginnings of lines. + +*/ + +/*tex The new name for |prev_break| after links are reversed: */ + +# define passive_next_break passive_prev_break + +/*tex The |int|s are actually |halfword|s or |scaled|s. */ + +static void tex_aux_trace_penalty(const char *what, int line, int index, halfword penalty, halfword total) +{ + if (tracing_penalties_par > 0) { + tex_begin_diagnostic(); + tex_print_format("[linebreak: %s penalty, line %i, index %i, adding %i, total %i]", what, line, index, penalty, total); + tex_end_diagnostic(); + } +} + +static void tex_aux_post_line_break(const line_break_properties *properties, halfword line_break_dir) +{ + /*tex temporary registers for list manipulation */ + halfword q, r; + halfword ls = null; + halfword rs = null; + /*tex was a break at glue? */ + int glue_break; + /*tex are we in some shape */ + int shaping = 0; + /*tex was the current break at a discretionary node? */ + int disc_break; + /*tex and did it have a nonempty post-break part? */ + int post_disc_break; + /*tex width of line number |cur_line| */ + scaled cur_width; + /*tex left margin of line number |cur_line| */ + scaled cur_indent; + /*tex |cur_p| will become the first breakpoint; */ + halfword cur_p = null; + /*tex the current line number being justified */ + halfword cur_line; + /*tex this saves calculations: */ + int last_line = 0; + int first_line = 0; + /*tex the current direction: */ + lmt_linebreak_state.dir_ptr = cur_list.direction_stack; + /*tex + Reverse the links of the relevant passive nodes, setting |cur_p| to the first breakpoint. + The job of reversing links in a list is conveniently regarded as the job of taking items + off one stack and putting them on another. In this case we take them off a stack pointed + to by |q| and having |prev_break| fields; we put them on a stack pointed to by |cur_p| + and having |next_break| fields. Node |r| is the passive node being moved from stack to + stack. + */ + q = active_break_node(lmt_linebreak_state.best_bet); + do { + r = q; + q = passive_prev_break(q); + passive_next_break(r) = cur_p; + cur_p = r; + } while (q); + /*tex prevgraf + 1 */ + cur_line = cur_list.prev_graf + 1; + do { + /*tex + Justify the line ending at breakpoint |cur_p|, and append it to the current vertical + list, together with associated penalties and other insertions. + + The current line to be justified appears in a horizontal list starting at |vlink + (temp_head)| and ending at |cur_break (cur_p)|. If |cur_break (cur_p)| is a glue node, + we reset the glue to equal the |right_skip| glue; otherwise we append the |right_skip| + glue at the right. If |cur_break (cur_p)| is a discretionary node, we modify the list + so that the discretionary break is compulsory, and we set |disc_break| to |true|. We + also append the |left_skip| glue at the left of the line, unless it is zero. + */ + /*tex + We want to get rid of it. + */ + halfword cur_disc = null; + /*tex + Local left and right boxes come from \OMEGA\ but have been adapted and extended. + */ + halfword leftbox = null; + halfword rightbox = null; + halfword middlebox = null; + if (lmt_linebreak_state.dir_ptr) { + /*tex Insert dir nodes at the beginning of the current line. */ + for (halfword q = lmt_linebreak_state.dir_ptr; q; q = node_next(q)) { + halfword tmp = tex_new_dir(normal_dir_subtype, dir_direction(q)); + halfword nxt = node_next(temp_head); + tex_attach_attribute_list_copy(tmp, nxt ? nxt : temp_head); + tex_couple_nodes(temp_head, tmp); + /*tex |\break\par| */ + tex_try_couple_nodes(tmp, nxt); + } + tex_flush_node_list(lmt_linebreak_state.dir_ptr); + lmt_linebreak_state.dir_ptr = null; + } + /*tex + Modify the end of the line to reflect the nature of the break and to include + |\rightskip|; also set the proper value of |disc_break|. At the end of the following + code, |q| will point to the final node on the list about to be justified. In the + meanwhile |r| will point to the node we will use to insert end-of-line stuff after. + |q == null| means we use the final position of |r|. + */ + /*tex begin mathskip code */ + q = temp_head; + while (q) { + switch (node_type(q)) { + case glyph_node: + goto DONE; + case hlist_node: + if (node_subtype(q) == indent_list) { + break; + } else { + goto DONE; + } + case glue_node: + if (tex_is_par_init_glue(q)) { + break; + } else { + goto DONE; + } + case kern_node: + if (node_subtype(q) != explicit_kern_subtype && node_subtype(q) != italic_kern_subtype) { + goto DONE; + } else { + break; + } + case math_node: + math_surround(q) = 0; + tex_reset_math_glue_to_zero(q); + goto DONE; + default: + if (non_discardable(q)) { + goto DONE; + } else { + break; + } + } + q = node_next(q); + } + DONE: + /*tex end mathskip code */ + r = passive_cur_break(cur_p); + q = null; + disc_break = 0; + post_disc_break = 0; + glue_break = 0; + if (r) { + switch (node_type(r)) { + case glue_node: + tex_copy_glue_values(r, properties->right_skip); + node_subtype(r) = right_skip_glue; + glue_break = 1; + /*tex |q| refers to the last node of the line */ + q = r; + rs = q; + r = node_prev(r); + /*tex |r| refers to the node after which the dir nodes should be closed */ + break; + case disc_node: + { + halfword prv = node_prev(r); + halfword nxt = node_next(r); + halfword h = disc_no_break_head(r); + if (h) { + tex_flush_node_list(h); + disc_no_break_head(r) = null; + disc_no_break_tail(r) = null; + } + h = disc_pre_break_head(r); + if (h) { + halfword t = disc_pre_break_tail(r); + tex_set_discpart(r, h, t, glyph_discpart_pre); + tex_couple_nodes(prv, h); + tex_couple_nodes(t, r); + disc_pre_break_head(r) = null; + disc_pre_break_tail(r) = null; + } + h = disc_post_break_head(r); + if (h) { + halfword t = disc_post_break_tail(r); + tex_set_discpart(r, h, t, glyph_discpart_post); + tex_couple_nodes(r, h); + tex_couple_nodes(t, nxt); + disc_post_break_head(r) = null; + disc_post_break_tail(r) = null; + post_disc_break = 1; + } + cur_disc = r; + disc_break = 1; + } + break; + case kern_node: + kern_amount(r) = 0; + break; + case math_node : + math_surround(r) = 0; + tex_reset_math_glue_to_zero(r); + break; + } + } else { + /*tex Again a tail run ... maybe combine. */ + // for (r = temp_head; node_next(r); r = node_next(r)); + r = tex_tail_of_node_list(temp_head); + /*tex Now we're at the end. */ + if (r == properties->parfill_right_skip) { + /*tex This should almost always be true... */ + q = r; + /*tex |q| refers to the last node of the line (and paragraph) */ + r = node_prev(r); + } + /*tex |r| refers to the node after which the dir nodes should be closed */ + } + /*tex Adjust the dir stack based on dir nodes in this line. */ + line_break_dir = tex_sanitize_dir_state(node_next(temp_head), passive_cur_break(cur_p), properties->paragraph_dir); + /*tex Insert dir nodes at the end of the current line. */ + r = tex_complement_dir_state(r); + /*tex + Modify the end of the line to reflect the nature of the break and to include |\rightskip|; + also set the proper value of |disc_break|; Also put the |\leftskip| glue at the left and + detach this line. + + The following code begins with |q| at the end of the list to be justified. It ends with + |q| at the beginning of that list, and with |node_next(temp_head)| pointing to the remainder + of the paragraph, if any. + + Now [q] refers to the last node on the line and therefore the rightmost breakpoint. The + only exception is the case of a discretionary break with non-empty |pre_break|, then + |q| s been changed to the last node of the |pre_break| list. If the par ends with a + |\break| command, the last line is utterly empty. That is the case of |q == temp_head|. + + This code needs to be cleaned up as we now have protrusion and boxes at the edges to + deal with. Old hybrid code. + */ + leftbox = tex_use_local_boxes(passive_left_box(cur_p), local_left_box_code); + rightbox = tex_use_local_boxes(passive_right_box(cur_p), local_right_box_code); + middlebox = tex_use_local_boxes(passive_middle_box(cur_p), local_middle_box_code); + /*tex + First we append the right box. It is part of the content so inside the skips. + */ + if (rightbox) { + halfword nxt = node_next(r); + tex_couple_nodes(r, rightbox); + tex_try_couple_nodes(rightbox, nxt); + r = rightbox; + } + if (middlebox) { + /*tex + These middle boxes might become more advanced as we can process them by a pass over + the line so that we retain the spot but then, we also loose that with left and right, + so why bother. It would also complicate uniqueness. + */ + halfword nxt = node_next(r); + tex_couple_nodes(r, middlebox); + tex_try_couple_nodes(middlebox, nxt); + r = middlebox; + } + if (! q) { + q = r; + } + if (q != temp_head && properties->protrude_chars) { + if (line_break_dir == dir_righttoleft && properties->protrude_chars == protrude_chars_advanced) { + halfword p = q; + halfword l = null; + /*tex Backtrack over the last zero glues and dirs. */ + while (p) { + switch (node_type(p)) { + case dir_node: + if (node_subtype(p) != cancel_dir_subtype) { + goto DONE1; + } else { + break; + } + case glue_node: + if (glue_amount(p)) { + goto DONE3; + } else { + break; + } + case glyph_node: + goto DONE1; + default: + goto DONE3; + } + p = node_prev(p); + } + DONE1: + /*tex When |p| is non zero we have something. */ + while (p) { + switch (node_type(p)) { + case glyph_node: + l = p ; + break; + case glue_node: + if (glue_amount(p)) { + l = null; + } + break; + case dir_node: + if (dir_direction(p) != dir_righttoleft) { + goto DONE3; + } else { + goto DONE2; + } + case par_node: + goto DONE2; + case temp_node: + /*tex Go on. */ + break; + default: + l = null; + break; + } + p = node_prev(p); + } + DONE2: + /*tex Time for action. */ + if (l && p) { + scaled w = tex_char_protrusion(l, right_margin_kern_subtype); + halfword k = tex_new_kern_node(-w, right_margin_kern_subtype); + tex_attach_attribute_list_copy(k, l); + tex_couple_nodes(p, k); + tex_couple_nodes(k, l); + } + } else { + scaled w = 0; + halfword p, ptmp; + if (disc_break && (node_type(q) == glyph_node || node_type(q) != disc_node)) { + /*tex |q| is reset to the last node of |pre_break| */ + p = q; + } else { + /*tex get |node_next(p) = q| */ + p = node_prev(q); + } + ptmp = p; + p = tex_aux_find_protchar_right(node_next(temp_head), p); + w = tex_char_protrusion(p, right_margin_kern_subtype); + if (w && lmt_packaging_state.last_rightmost_char) { + /*tex we have found a marginal kern, append it after |ptmp| */ + halfword k = tex_new_kern_node(-w, right_margin_kern_subtype); + tex_attach_attribute_list_copy(k, p); + tex_try_couple_nodes(k, node_next(ptmp)); + tex_couple_nodes(ptmp, k); + if (ptmp == q) { + q = node_next(q); + } + } + } + } + DONE3: + /*tex + If |q| was not a breakpoint at glue and has been reset to |rightskip| then we append + |rightskip| after |q| now? + */ + if (glue_break) { + /*tex A rightskip has already been added. */ + } else { + /*tex We add one, even when zero. */ + halfword g = tex_new_glue_node(properties->right_skip ? properties->right_skip : zero_glue, right_skip_glue); + tex_attach_attribute_list_copy(g, q); /* or next of it? or q */ + tex_try_couple_nodes(g, node_next(q)); + tex_couple_nodes(q, g); + q = g; + } + rs = q; + /*tex + More preparations. + */ + r = node_next(q); + node_next(q) = null; + q = node_next(temp_head); + tex_try_couple_nodes(temp_head, r); + /*tex + Now we prepend the left box. It is part of the content so inside the skips. + */ + if (leftbox) { + halfword nxt = node_next(q); + tex_couple_nodes(leftbox, q); + q = leftbox; + if (nxt && (cur_line == cur_list.prev_graf + 1) && (node_type(nxt) == hlist_node) && ! box_list(nxt)) { + /* what is special about an empty hbox, needs checking */ + q = node_next(q); + tex_try_couple_nodes(leftbox, node_next(nxt)); + tex_try_couple_nodes(nxt, leftbox); + } + } + /*tex + At this point |q| is the leftmost node; all discardable nodes have been discarded. + */ + if (properties->protrude_chars) { + if (line_break_dir == dir_righttoleft && properties->protrude_chars == protrude_chars_advanced) { + halfword p = tex_aux_find_protchar_left(q, 0); + halfword w = tex_char_protrusion(p, right_margin_kern_subtype); + if (w && lmt_packaging_state.last_leftmost_char) { + halfword k = tex_new_kern_node(-w, right_margin_kern_subtype); + tex_attach_attribute_list_copy(k, p); + tex_couple_nodes(k, q); + q = k; + } + } else { + halfword p = tex_aux_find_protchar_left(q, 0); + halfword w = tex_char_protrusion(p, left_margin_kern_subtype); + if (w && lmt_packaging_state.last_leftmost_char) { + halfword k = tex_new_kern_node(-w, left_margin_kern_subtype); + tex_attach_attribute_list_copy(k, p); + tex_couple_nodes(k, q); + q = k; + } + } + } + /*tex + Fix a possible mess up. + */ + if (node_type(q) == par_node && ! tex_is_start_of_par_node(q)) { + node_subtype(q) = hmode_par_par_subtype ; + } + /*tex + Put the |\leftskip| glue at the left and detach this line. Call the packaging + subroutine, setting |just_box| to the justified box. Now|q| points to the hlist that + represents the current line of the paragraph. We need to compute the appropriate line + width, pack the line into a box of this size, and shift the box by the appropriate + amount of indentation. In \LUAMETATEX\ we always add the leftskip. + */ + r = tex_new_glue_node(properties->left_skip, left_skip_glue); + tex_attach_attribute_list_copy(r, q); + tex_couple_nodes(r, q); + q = r; + ls = q; + /*tex + We have these |par| nodes that, when we have callbacks, kind of polute the list. Let's + get rid of them now. We could have done this in previous loops but for the sake of + clearity we do it here. That way we keep the existing code as it is in older engines. + Okay, I might collapse it eventually. This is code that has been prototyped using \LUA. + */ + if (cur_line > lmt_linebreak_state.last_special_line) { // && (! (properties->par_shape && specification_repeat(properties->par_shape)))) { + cur_width = lmt_linebreak_state.second_width; + cur_indent = lmt_linebreak_state.second_indent; + } else if (properties->par_shape) { + if (specification_count(properties->par_shape)) { + cur_indent = tex_get_specification_indent(properties->par_shape, cur_line); + cur_width = tex_get_specification_width(properties->par_shape, cur_line); + cur_indent = swap_parshape_indent(properties->paragraph_dir, cur_indent, cur_width); + } else { + cur_width = lmt_linebreak_state.first_width; + cur_indent = lmt_linebreak_state.first_indent; + } + } else { + cur_width = lmt_linebreak_state.first_width; + cur_indent = lmt_linebreak_state.first_indent; + } + /*tex + When we have a left hang, the width is the (hsize-hang) and there is a shift if hang + applied. The overall linewidth is hsize. When we vbox the result, we get a box with + width hsize. + + When we have a right hang, the width is the (hsize-hang) and therefore we end up with + a box that is less that the hsize. When we vbox the result, we get a box with width + hsize minus the hang, so definitely not consistent with the previous case. + + In both cases we can consider the hang to be at the edge, simply because the whole lot + gets packaged and then shift gets applied. Although, for practical reasons we could + decide to put it after the left and before the right skips, which actually opens up + some options. + + Anyway, after a period of nasty heuristics we can now do a better job because we still + have the information that we started with. + + */ + first_line = rs && (cur_line == 1) && properties->parinit_left_skip && properties->parinit_right_skip; + if (first_line) { + halfword n = node_next(properties->parinit_left_skip); + while (n) { + if (n == properties->parinit_right_skip) { + tex_couple_nodes(node_prev(n), node_next(n)); + tex_couple_nodes(node_prev(rs), n); + tex_couple_nodes(n, rs); + break; + } else { + n = node_next(n); + } + } + if (! n) { + /*tex For the moment: */ + tex_normal_warning("tex", "right parinit skip is gone"); + } + } + last_line = ls && (cur_line + 1 == lmt_linebreak_state.best_line) && properties->parfill_left_skip && properties->parfill_right_skip; + if (last_line) { + halfword n = node_prev(properties->parfill_right_skip); + while (n) { + if (n == properties->parfill_left_skip) { + tex_couple_nodes(node_prev(n), node_next(n)); + tex_couple_nodes(n, node_next(ls)); + tex_couple_nodes(ls, n); + break; + } else { + n = node_prev(n); + } + } + if (! n) { + /*tex For the moment: */ + tex_normal_warning("tex", "left parfill skip is gone"); + } + } + /*tex Some housekeeping. */ + lmt_packaging_state.post_adjust_tail = post_adjust_head; + lmt_packaging_state.pre_adjust_tail = pre_adjust_head; + lmt_packaging_state.post_migrate_tail = post_migrate_head; + lmt_packaging_state.pre_migrate_tail = pre_migrate_head; + /*tex A bonus feature. */ + if (normalize_line_mode_permitted(normalize_line_mode_par, flatten_discretionaries_mode)) { + int count = 0; + q = tex_flatten_discretionaries(q, &count, 0); /* there is no need to nest */ + cur_disc = null; + if (properties->tracing_paragraphs > 1) { + tex_begin_diagnostic(); + tex_print_format("[linebreak: flatten, line %i, count %i]", cur_line, count); + tex_end_diagnostic(); + } + } + /*tex Finally we pack the lot. */ + shaping = 0; + if (normalize_line_mode_permitted(normalize_line_mode_par, normalize_line_mode)) { + halfword head = q; + halfword tail = rs ? rs : head; + halfword lefthang = 0; + halfword righthang = 0; + // we already have the tail somewhere + while (node_next(tail)) { + tail = node_next(tail); + } + if (properties->par_shape) { + int n = specification_count(properties->par_shape); + if (n > 0) { + if (specification_repeat(properties->par_shape)) { + n = cur_line; + } else { + n = cur_line > n ? n : cur_line; + } + lefthang = tex_get_specification_indent(properties->par_shape, n); + righthang = properties->hsize - lefthang - tex_get_specification_width(properties->par_shape, n); + // lefthang = swap_parshape_indent(paragraph_dir, lefthang, width); // or so + } + } else if (properties->hang_after) { + if (properties->hang_after > 0 && cur_line > properties->hang_after) { + if (properties->hang_indent < 0) { + righthang = -properties->hang_indent; + } + if (properties->hang_indent > 0) { + lefthang = properties->hang_indent; + } + } else if (properties->hang_after < 0 && cur_line <= -properties->hang_after) { + if (properties->hang_indent < 0) { + righthang = -properties->hang_indent; + } + if (properties->hang_indent > 0) { + lefthang = properties->hang_indent; + } + } + } + shaping = (lefthang || righthang); + lmt_linebreak_state.just_box = tex_hpack(head, cur_width, properties->adjust_spacing ? packing_linebreak : packing_exactly, (singleword) properties->paragraph_dir, holding_none_option); + // attach_attribute_list_copy(linebreak_state.just_box, properties->initial_par); + if (normalize_line_mode_permitted(normalize_line_mode_par, flatten_h_leaders_mode)) { + tex_flatten_leaders(lmt_linebreak_state.just_box, NULL); + } + if (node_type(tail) != glue_node || node_subtype(tail) != right_skip_glue) { + halfword rs = tex_new_glue_node((properties->right_skip ? properties->right_skip : zero_glue), right_skip_glue); + tex_attach_attribute_list_copy(rs, tail); + tex_try_couple_nodes(rs, node_next(q)); + tex_couple_nodes(tail, rs); + tail = rs; + } + { + halfword lh = tex_new_glue_node(zero_glue, left_hang_skip_glue); + halfword rh = tex_new_glue_node(zero_glue, right_hang_skip_glue); + glue_amount(lh) = lefthang; + glue_amount(rh) = righthang; + tex_attach_attribute_list_copy(lh, head); + tex_attach_attribute_list_copy(rh, tail); + tex_try_couple_nodes(lh, head); + tex_try_couple_nodes(tail, rh); + head = lh; + tail = rh; + } + /*tex + This is kind of special. Instead of using |cur_width| also on an overfull box as well + as shifts, we want \quote {real} dimensions. A disadvantage is that we need to adapt + analyzers that assume this correction not being there (unpack and repack). So we have + a flag to control it. + */ + if (normalize_line_mode_permitted(normalize_line_mode_par, clip_width_mode)) { + if (lmt_packaging_state.last_overshoot) { + halfword g = tex_new_glue_node(zero_glue, correction_skip_glue); + glue_amount(g) = -lmt_packaging_state.last_overshoot; + tex_attach_attribute_list_copy(g, rs); + tex_try_couple_nodes(node_prev(rs), g); + tex_try_couple_nodes(g, rs); + } + box_width(lmt_linebreak_state.just_box) = properties->hsize; + } + box_list(lmt_linebreak_state.just_box) = head; + q = head; + /*tex So only callback when we normalize. */ + if (leftbox || rightbox || middlebox) { + halfword linebox = lmt_linebreak_state.just_box; + lmt_local_box_callback( + linebox, leftbox, rightbox, middlebox, cur_line, + tex_effective_glue(linebox, properties->left_skip), + tex_effective_glue(linebox, properties->right_skip), + lefthang, righthang, cur_indent, + (first_line && properties->parinit_left_skip) ? tex_effective_glue(linebox, properties->parinit_left_skip) : null, + (first_line && properties->parinit_right_skip) ? tex_effective_glue(linebox, properties->parinit_right_skip) : null, + (last_line && properties->parfill_left_skip) ? tex_effective_glue(linebox, properties->parfill_left_skip) : null, + (last_line && properties->parfill_right_skip) ? tex_effective_glue(linebox, properties->parfill_right_skip) : null, + lmt_packaging_state.last_overshoot + ); + } + } else { + /*tex Here we can have a right skip way to the right due to an overshoot! */ + lmt_linebreak_state.just_box = tex_hpack(q, cur_width, properties->adjust_spacing ? packing_linebreak : packing_exactly, (singleword) properties->paragraph_dir, holding_none_option); + // attach_attribute_list_copy(linebreak_state.just_box, properties->initial_par); + if (normalize_line_mode_permitted(normalize_line_mode_par, flatten_h_leaders_mode)) { + tex_flatten_leaders(lmt_linebreak_state.just_box, NULL); + } + box_shift_amount(lmt_linebreak_state.just_box) = cur_indent; + } + /*tex Call the packaging subroutine, setting |just_box| to the justified box. */ + node_subtype(lmt_linebreak_state.just_box) = line_list; + /*tex Pending content (callback). */ + if (node_next(contribute_head)) { + if (! lmt_page_builder_state.output_active) { + lmt_append_line_filter_callback(pre_box_append_line_context, 0); + } + } + /* Pre-adjust content (no callback). */ + if (pre_adjust_head != lmt_packaging_state.pre_adjust_tail) { + tex_inject_adjust_list(pre_adjust_head, 1, lmt_linebreak_state.just_box, properties); + } + lmt_packaging_state.pre_adjust_tail = null; + /* Pre-migrate content (callback). */ + if (pre_migrate_head != lmt_packaging_state.pre_migrate_tail) { + tex_append_list(pre_migrate_head, lmt_packaging_state.pre_migrate_tail); + if (! lmt_page_builder_state.output_active) { + lmt_append_line_filter_callback(pre_migrate_append_line_context, 0); + } + } + lmt_packaging_state.pre_migrate_tail = null; + /* Line content (callback). */ + tex_append_to_vlist(lmt_linebreak_state.just_box, lua_key_index(post_linebreak), properties); + if (! lmt_page_builder_state.output_active) { + /* Here we could use the par specific baselineskip and lineskip. */ + lmt_append_line_filter_callback(box_append_line_context, 0); + } + /* Post-migrate content (callback). */ + if (post_migrate_head != lmt_packaging_state.post_migrate_tail) { + tex_append_list(post_migrate_head, lmt_packaging_state.post_migrate_tail); + if (! lmt_page_builder_state.output_active) { + lmt_append_line_filter_callback(post_migrate_append_line_context, 0); + } + } + lmt_packaging_state.post_migrate_tail = null; + /* Post-adjust content (callback). */ + if (post_adjust_head != lmt_packaging_state.post_adjust_tail) { + tex_inject_adjust_list(post_adjust_head, 1, null, properties); + } + lmt_packaging_state.post_adjust_tail = null; + /*tex + Append the new box to the current vertical list, followed by the list of special nodes + taken out of the box by the packager. Append a penalty node, if a nonzero penalty is + appropriate. Penalties between the lines of a paragraph come from club and widow lines, + from the |inter_line_penalty| parameter, and from lines that end at discretionary breaks. + Breaking between lines of a two-line paragraph gets both club-line and widow-line + penalties. The local variable |pen| will be set to the sum of all relevant penalties for + the current line, except that the final line is never penalized. + */ + if (cur_line + 1 != lmt_linebreak_state.best_line) { + /*tex + When we end up here we hale multiple lines so we need to add penalties between them + according to (several) specifications. + */ + halfword pen = 0; + halfword spm = properties->shaping_penalties_mode; + if (! spm) { + shaping = 0; + } + if (tracing_penalties_par > 0) { + tex_begin_diagnostic(); + tex_print_format("[linebreak: penalty, line %i, best line %i, prevgraf %i, mode %x (i=%i c=%i w=%i b=%i)]", + cur_line, lmt_linebreak_state.best_line, cur_list.prev_graf, spm, + is_shaping_penalties_mode(spm, inter_line_penalty_shaping), + is_shaping_penalties_mode(spm, club_penalty_shaping), + is_shaping_penalties_mode(spm, widow_penalty_shaping), + is_shaping_penalties_mode(spm, broken_penalty_shaping) + ); + tex_end_diagnostic(); + } + if (! (shaping && is_shaping_penalties_mode(spm, inter_line_penalty_shaping))) { + halfword p; + q = properties->inter_line_penalties; + if (q) { + r = cur_line; + if (r > specification_count(q)) { + r = specification_count(q); + } else if (r < 1) { + r = 1; + } + p = tex_get_specification_penalty(q, r); + } else if (passive_pen_inter(cur_p)) { + p = passive_pen_inter(cur_p); + } else { + p = properties->inter_line_penalty; + } + if (p) { + pen += p; + tex_aux_trace_penalty("interline", cur_line, r, p, pen); + } + } + if (! (shaping && is_shaping_penalties_mode(spm, club_penalty_shaping))) { + halfword p; + q = properties->club_penalties; + if (q) { + /*tex prevgraf */ + r = cur_line - cur_list.prev_graf; + if (r > specification_count(q)) { + r = specification_count(q); + } else if (r < 1) { + r = 1; + } + p = tex_get_specification_penalty(q, r); + } else if (cur_line == cur_list.prev_graf + 1) { + /*tex prevgraf */ + p = properties->club_penalty; + } else { + p = 0; + } + if (p) { + pen += p; + tex_aux_trace_penalty("club", cur_line, r, p, pen); + } + } + if (! (shaping && is_shaping_penalties_mode(spm, widow_penalty_shaping))) { + halfword p; + q = properties->display_math ? properties->display_widow_penalties : properties->widow_penalties; + if (q) { + r = lmt_linebreak_state.best_line - cur_line - 1; + if (r > specification_count(q)) { + r = specification_count(q); + } else if (r < 1) { + r = 1; + } + p = tex_get_specification_penalty(q, r); + } else if (cur_line + 2 == lmt_linebreak_state.best_line) { + p = properties->display_math ? properties->display_widow_penalty : properties->widow_penalty; + } else { + p = 0; + } + if (p) { + pen += p; + tex_aux_trace_penalty("widow", cur_line, r, p, pen); + } + } + if (disc_break && ! (shaping && is_shaping_penalties_mode(spm, broken_penalty_shaping))) { + halfword p; + if (passive_pen_broken(cur_p) != 0) { + p = passive_pen_broken(cur_p); + } else { + p = properties->broken_penalty; + } + if (p) { + pen += p; + tex_aux_trace_penalty("broken", cur_line, 0, p, pen); + } + } + if (shaping && ! pen) { + pen = properties->shaping_penalty; + if (pen) { + tex_aux_trace_penalty("shaping", cur_line, 0, pen, pen); + } + } + if (pen) { + r = tex_new_penalty_node(pen, linebreak_penalty_subtype); + tex_couple_nodes(cur_list.tail, r); + cur_list.tail = r; + } + } else { + // if (tracing_penalties_par > 0) { + // tex_begin_diagnostic(); + // tex_print_format("[linebreak: no penalties injected]"); + // tex_end_diagnostic(); + // } + } + /*tex + Append a penalty node, if a nonzero penalty is appropriate. Justify the line ending at + breakpoint |cur_p|, and append it to the current vertical list, together with associated + penalties and other insertions. + */ + ++cur_line; + cur_p = passive_next_break(cur_p); + if (cur_p && ! post_disc_break) { + /*tex + Prune unwanted nodes at the beginning of the next line. Glue and penalty and kern + and math nodes are deleted at the beginning of a line, except in the anomalous case + that the node to be deleted is actually one of the chosen breakpoints. Otherwise + the pruning done here is designed to match the lookahead computation in + |try_break|, where the |break_width| values are computed for non-discretionary + breakpoints. + */ + r = temp_head; + /*tex + Normally we have a matching math open and math close node but when we cross a line + the open node is removed, including any glue or penalties following it. This is + however not that nice for callbacks that rely on symmetry. Of course this only + counts for one liners, as we can still have only a begin or end node on a line. The + end_of_math lua helper is made robust against this although there you should be + aware of the fact that one can end up in the middle of math in callbacks that don't + work on whole paragraphs, but at least this branch makes sure that some proper + analysis is possible. (todo: check if math glyphs have the subtype marked done). + */ + /*tex Suboptimal but not critical. Todo.*/ + while (1) { + q = node_next(r); + if (node_type(q) == math_node) { + /*tex begin mathskip code */ + math_surround(q) = 0 ; + tex_reset_math_glue_to_zero(q); + /*tex end mathskip code */ + } + if (q == passive_cur_break(cur_p)) { + break; + } else if (node_type(q) == glyph_node) { + break; + } else if (node_type(q) == glue_node && (node_subtype(q) == par_fill_left_skip_glue || node_subtype(q) == par_init_left_skip_glue)) { + /*tex Keep it. Can be tricky after a |\break| with no follow up (loops). */ + break; + } else if (node_type(q) == par_node && node_subtype(q) == local_box_par_subtype) { + /*tex weird, in the middle somewhere .. these local penalties do this */ + break; /* if not we leak, so maybe this needs more testing */ + } else if (non_discardable(q)) { + break; + } else if (node_type(q) == kern_node && node_subtype(q) != explicit_kern_subtype && node_subtype(q) != italic_kern_subtype) { + break; + } + r = q; + } + if (r != temp_head) { + node_next(r) = null; + tex_flush_node_list(node_next(temp_head)); + tex_try_couple_nodes(temp_head, q); + } + } + if (cur_disc) { + tex_try_couple_nodes(node_prev(cur_disc),node_next(cur_disc)); + tex_flush_node(cur_disc); + } + /* We can clean up the par nodes. */ + } while (cur_p); + if ((cur_line != lmt_linebreak_state.best_line) || (node_next(temp_head))) { + tex_confusion("line breaking"); + } + /*tex |prevgraf| etc */ + cur_list.prev_graf = lmt_linebreak_state.best_line - 1; + cur_list.direction_stack = lmt_linebreak_state.dir_ptr; + lmt_linebreak_state.dir_ptr = null; +} diff --git a/source/luametatex/source/tex/texlinebreak.h b/source/luametatex/source/tex/texlinebreak.h new file mode 100644 index 000000000..27c8607e0 --- /dev/null +++ b/source/luametatex/source/tex/texlinebreak.h @@ -0,0 +1,206 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_LINEBREAK_H +# define LMT_LINEBREAK_H + +// # define max_hlist_stack 1024 /*tex This should be more than enough for sane usage. */ + + +/*tex + + When looking for optimal line breaks, \TEX\ creates a \quote {break node} for each break that + is {\em feasible}, in the sense that there is a way to end a line at the given place without + requiring any line to stretch more than a given tolerance. A break node is characterized by + three things: the position of the break (which is a pointer to a |glue_node|, |math_node|, + |penalty_node|, or |disc_node|); the ordinal number of the line that will follow this breakpoint; + and the fitness classification of the line that has just ended, i.e., |tight_fit|, |decent_fit|, + |loose_fit|, or |very_loose_fit|. + + Todo: 0..0.25 / 0.25-0.50 / 0.50-0.75 / 0.75-1.00 + + TeX by Topic gives a good explanation of the way lines are broken. + + veryloose stretch badness >= 100 + loose stretch badness >= 13 + decent badness <= 12 + tight shrink badness >= 13 + + adjacent delta two lines > 1 : visually incompatible + + if badness of any line > pretolerance : second pass + if pretolerance < 0 : first pass is skipped + if badness of any line > tolerance : third pass (with emergencystretch) + + in lua(meta)tex: always hypnehenated lists (in regular tex second pass+) + + badness of 800 : stretch ratio 2 + + One day I will play with a pluggedin badness calculation but there os some performance impact + there as well as danger to overflow (unless we go double or very long integers). + +*/ + +typedef enum fitness_value { + very_loose_fit, /*tex lines stretching more than their stretchability */ + loose_fit, /*tex lines stretching 0.5 to 1.0 of their stretchability */ + semi_loose_fit, + decent_fit, /*tex for all other lines */ + semi_tight_fit, + tight_fit, /*tex lines shrinking 0.5 to 1.0 of their shrinkability */ + n_of_finess_values +} fitness_value; + +/*tex + + Some of the next variables can now be local but I don't want to divert too much from the + orginal, so for now we keep them in the info variable. + +*/ + +typedef struct linebreak_state_info { + /*tex the |hlist_node| for the last line of the new paragraph */ + halfword just_box; + halfword last_line_fill; + int no_shrink_error_yet; + int second_pass; + int final_pass; + int threshold; + halfword adjust_spacing; + halfword adjust_spacing_step; + halfword adjust_spacing_shrink; + halfword adjust_spacing_stretch; + int max_stretch_ratio; + int max_shrink_ratio; + halfword current_font_step; + halfword passive; + halfword printed_node; + halfword pass_number; + /* int auto_breaking; */ /* is gone */ + /* int math_level; */ /* was never used */ + scaled active_width[10]; + scaled background[10]; + scaled break_width[10]; + scaled disc_width[10]; + scaled fill_width[4]; + halfword internal_penalty_interline; + halfword internal_penalty_broken; + halfword internal_left_box; + scaled internal_left_box_width; + halfword init_internal_left_box; + scaled init_internal_left_box_width; + halfword internal_right_box; + scaled internal_right_box_width; + scaled internal_middle_box; + halfword minimal_demerits[n_of_finess_values]; + halfword minimum_demerits; + halfword easy_line; + halfword last_special_line; + scaled first_width; + scaled second_width; + scaled first_indent; + scaled second_indent; + halfword best_bet; + halfword fewest_demerits; + halfword best_line; + halfword actual_looseness; + halfword line_difference; + int do_last_line_fit; + halfword dir_ptr; + halfword warned; + halfword calling_back; +} linebreak_state_info; + +extern linebreak_state_info lmt_linebreak_state; + +void tex_line_break_prepare ( + halfword par, + halfword *tail, + halfword *parinit_left_skip_glue, + halfword *parinit_right_skip_glue, + halfword *parfill_left_skip_glue, + halfword *parfill_right_skip_glue, + halfword *final_penalty +); + +extern void tex_line_break ( + int d, + int line_break_context +); + +extern void tex_initialize_active ( + void +); + +extern void tex_get_linebreak_info ( + int *f, + int *a +); + +extern void tex_do_line_break ( + line_break_properties *properties +); + + +/*tex + + We can have skipable nodes at the margins during character protrusion. Two extra functions are + defined for usage in |cp_skippable|. + +*/ + +inline static int tex_zero_box_dimensions(halfword a) +{ + return box_width(a) == 0 && box_height(a) == 0 && box_depth(a) == 0; +} + +inline static int tex_zero_rule_dimensions(halfword a) +{ + return rule_width(a) == 0 && rule_height(a) == 0 && rule_depth(a) == 0; +} + +inline static int tex_empty_disc(halfword a) +{ + return (! disc_pre_break_head(a)) && (! disc_post_break_head(a)) && (! disc_no_break_head(a)); +} + +inline static int tex_protrusion_skipable(halfword a) +{ + if (a) { + switch (node_type(a)) { + case glyph_node: + return 0; + case glue_node: + return tex_glue_is_zero(a); + case disc_node: + return tex_empty_disc(a); + case kern_node: + return (kern_amount(a) == 0) || (node_subtype(a) == font_kern_subtype); + case rule_node: + return tex_zero_rule_dimensions(a); + case math_node: + return (math_surround(a) == 0) || tex_math_glue_is_zero(a); + case hlist_node: + return (! box_list(a)) && tex_zero_box_dimensions(a); + case penalty_node: + case dir_node: + case par_node: + case insert_node: + case mark_node: + case adjust_node: + case boundary_node: + case whatsit_node: + return 1; + } + } + return 0; + } + +inline static void tex_append_list(halfword head, halfword tail) +{ + tex_couple_nodes(cur_list.tail, node_next(head)); + cur_list.tail = tail; +} + +# endif diff --git a/source/luametatex/source/tex/texlocalboxes.c b/source/luametatex/source/tex/texlocalboxes.c new file mode 100644 index 000000000..0def018d4 --- /dev/null +++ b/source/luametatex/source/tex/texlocalboxes.c @@ -0,0 +1,313 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + The concept of local left and right boxes originates in \OMEGA\ but in \LUATEX\ it already was + adapted and made more robust. Here we use an upgraded version with more features. These boxes + are sort of a mix between marks (states) and inserts (with dimensions). + + We have linked lists of left or right boxes. This permits selective updating and multiple usage + of these boxes. It also means that we need to do additional packing and width calculations. + + When we were in transition local boxes were handled as special boxes (alongside leader and + shipout boxes but they got their own cmd again when we were done). +*/ + +/*tex + Here we set fields in a new par node. We could have an extra width |_par| hut it doesn't really + pay off (now). +*/ + +inline static scaled tex_aux_local_boxes_width(halfword n) +{ + scaled width = 0; + while (n) { + if (node_type(n) == hlist_node) { + width += box_width(n); + } else { + /*tex Actually this is an error. */ + } + n = node_next(n); + } + return width; +} + +void tex_add_local_boxes(halfword p) +{ + if (local_left_box_par) { + halfword copy = tex_copy_node_list(local_left_box_par, null); + tex_set_local_left_width(p, tex_aux_local_boxes_width(copy)); + par_box_left(p) = copy; + } + if (local_right_box_par) { + halfword copy = tex_copy_node_list(local_right_box_par, null); + tex_set_local_right_width(p, tex_aux_local_boxes_width(copy)); + par_box_right(p) = copy; + } + if (local_middle_box_par) { + halfword copy = tex_copy_node_list(local_middle_box_par, null); + par_box_middle(p) = copy; + } +} + +/*tex + Pass on to Lua or inject in the current list. So, we still have a linked list here + with only boxes. +*/ + +halfword tex_get_local_boxes(halfword location) +{ + switch (location) { + case local_left_box_code : return tex_use_local_boxes(local_left_box_par, local_left_box_code); + case local_right_box_code : return tex_use_local_boxes(local_right_box_par, local_right_box_code); + case local_middle_box_code: return tex_use_local_boxes(local_middle_box_par, local_middle_box_code); + } + return null; +} + +/*tex Set them from Lua, watch out; not an eq update */ + +void tex_set_local_boxes(halfword b, halfword location) +{ + switch (location) { + case local_left_box_code : tex_flush_node_list(local_left_box_par); local_left_box_par = b; break; + case local_right_box_code : tex_flush_node_list(local_right_box_par); local_right_box_par = b; break; + case local_middle_box_code: tex_flush_node_list(local_middle_box_par); local_middle_box_par = b; break; + } +} + +/*tex Set them from TeX, watch out; this is an eq update */ + +static halfword tex_aux_reset_boxes(halfword head, halfword index) +{ + if (head && index) { + halfword current = head; + while (current) { + halfword next = node_next(current); + if (node_type(current) == hlist_node && box_index(current) == index) { + if (current == head) { + head = node_next(head); + node_prev(head) = null; + next = head; + } else { + tex_try_couple_nodes(node_prev(current), next); + } + tex_flush_node(current); + break; + } else { + current = next; + } + } + return head; + } else { + tex_flush_node_list(head); + return null; + } +} + +void tex_reset_local_boxes(halfword index, halfword location) +{ + switch (location) { + case local_left_box_code : local_left_box_par = tex_aux_reset_boxes(local_left_box_par, index); break; + case local_right_box_code : local_right_box_par = tex_aux_reset_boxes(local_right_box_par, index); break; + case local_middle_box_code: local_right_box_par = tex_aux_reset_boxes(local_middle_box_par, index); break; + } +} + +static halfword tex_aux_update_boxes(halfword head, halfword b, halfword index) +{ + if (head && index) { + halfword current = head; + while (current) { + halfword next = node_next(current); + if (node_type(current) == hlist_node && box_index(current) == index) { + tex_try_couple_nodes(b, node_next(current)); + if (current == head) { + head = b; + } else { + tex_couple_nodes(node_prev(current), b); + } + tex_flush_node(current); + break; + } else if (next) { + current = next; + } else { + tex_couple_nodes(current, b); + break; + } + } + return head; + } + return b; +} + +void tex_update_local_boxes(halfword b, halfword index, halfword location) /* todo: avoid copying */ +{ + switch (location) { + case local_left_box_code: + if (b) { + halfword c = local_left_box_par ? tex_copy_node_list(local_left_box_par, null) : null; + b = tex_aux_update_boxes(c, b, index); + } else if (index) { + halfword c = local_left_box_par ? tex_copy_node_list(local_left_box_par, null) : null; + b = tex_aux_reset_boxes(c, index); + } + update_tex_local_left_box(b); + break; + case local_right_box_code: + if (b) { + halfword c = local_right_box_par ? tex_copy_node_list(local_right_box_par, null) : null; + b = tex_aux_update_boxes(c, b, index); + } else if (index) { + halfword c = local_right_box_par ? tex_copy_node_list(local_right_box_par, null) : null; + b = tex_aux_reset_boxes(c, index); + } + update_tex_local_right_box(b); + break; + default: + if (b) { + halfword c = local_middle_box_par ? tex_copy_node_list(local_middle_box_par, null) : null; + b = tex_aux_update_boxes(c, b, index); + } else if (index) { + halfword c = local_middle_box_par ? tex_copy_node_list(local_middle_box_par, null) : null; + b = tex_aux_reset_boxes(c, index); + } + update_tex_local_middle_box(b); + break; + } +} + +/*tex The |par| option: */ + +/* todo: use helper */ + +static halfword tex_aux_replace_local_box(halfword b, halfword index, halfword par_box) +{ + if (b) { + halfword c = par_box ? tex_copy_node_list(par_box, null) : null; + b = tex_aux_update_boxes(c, b, index); + } else if (index) { + halfword c = par_box ? tex_copy_node_list(par_box, null) : null; + b = tex_aux_reset_boxes(c, index); + } + if (par_box) { + tex_flush_node_list(par_box); + } + return b; +} + +void tex_replace_local_boxes(halfword par, halfword b, halfword index, halfword location) /* todo: avoid copying */ +{ + switch (location) { + case local_left_box_code: + par_box_left(par) = tex_aux_replace_local_box(b, index, par_box_left(par)); + par_box_left_width(par) = tex_aux_local_boxes_width(b); + break; + case local_right_box_code: + par_box_right(par) = tex_aux_replace_local_box(b, index, par_box_right(par)); + par_box_right_width(par) = tex_aux_local_boxes_width(b); + break; + case local_middle_box_code: + par_box_middle(par) = tex_aux_replace_local_box(b, index, par_box_middle(par)); + /*tex We keep the zero width! */ + break; + } +} + +/*tex Get them for line injection. */ + +halfword tex_use_local_boxes(halfword p, halfword location) +{ + if (p) { + p = tex_hpack(tex_copy_node_list(p, null), 0, packing_additional, direction_unknown, holding_none_option); + switch (location) { + case local_left_box_code : node_subtype(p) = local_left_list ; break; + case local_right_box_code : node_subtype(p) = local_right_list ; break; + case local_middle_box_code: node_subtype(p) = local_middle_list; break; + } + } + return p; +} + +/* */ + +void tex_scan_local_boxes_keys(quarterword *options, halfword *index) +{ + *options = 0; + *index = 0; + while (1) { + switch (tex_scan_character("iklpIKLP", 0, 1, 0)) { + case 'i': case 'I': + if (tex_scan_mandate_keyword("index", 1)) { + *index = tex_scan_box_index(); + } + break; + case 'k': case 'K': + if (tex_scan_mandate_keyword("keep", 1)) { + *options |= local_box_keep_option; + } + break; + case 'l': case 'L': + if (tex_scan_mandate_keyword("local", 1)) { + *options |= local_box_local_option; + } + break; + case 'p': case 'P': + if (tex_scan_mandate_keyword("par", 1)) { + *options |= local_box_par_option; + } + break; + default: + return; + } + } +} + +halfword tex_valid_box_index(halfword n) +{ + return box_index_in_range(n); +} + + +scaled tex_get_local_left_width(halfword p) +{ + return par_box_left_width(p); +} + +scaled tex_get_local_right_width(halfword p) +{ + return par_box_right_width(p); +} + +void tex_set_local_left_width(halfword p, scaled width) +{ + par_box_left_width(p) = width; +} + +void tex_set_local_right_width(halfword p, scaled width) +{ + par_box_right_width(p) = width; +} + +halfword tex_get_local_interline_penalty(halfword p) +{ + return par_penalty_interline(p); +} + +halfword tex_get_local_broken_penalty(halfword p) +{ + return par_penalty_broken(p); +} + +void tex_set_local_interline_penalty(halfword p, halfword penalty) +{ + par_penalty_interline(p) = penalty; +} + +void tex_set_local_broken_penalty(halfword p, halfword penalty) +{ + par_penalty_broken(p) = penalty; +} diff --git a/source/luametatex/source/tex/texlocalboxes.h b/source/luametatex/source/tex/texlocalboxes.h new file mode 100644 index 000000000..6c37ea1ae --- /dev/null +++ b/source/luametatex/source/tex/texlocalboxes.h @@ -0,0 +1,35 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_LOCALBOXES_H +# define LMT_LOCALBOXES_H + +/*tex Todo: determine when to update (grouping, copying) or when to replace. */ + +extern halfword tex_get_local_boxes (halfword location); +extern void tex_set_local_boxes (halfword b, halfword location); +extern halfword tex_use_local_boxes (halfword p, halfword location); +extern void tex_update_local_boxes (halfword b, halfword index, halfword location); +extern void tex_replace_local_boxes (halfword par, halfword b, halfword index, halfword location); +extern void tex_reset_local_boxes (halfword index, halfword location); + +extern void tex_add_local_boxes (halfword p); +extern void tex_scan_local_boxes_keys (quarterword *options, halfword *index); +extern halfword tex_valid_box_index (halfword n); + +/*tex Helpers, just in case we decide to be more sparse. */ + +extern scaled tex_get_local_left_width (halfword p); +extern scaled tex_get_local_right_width (halfword p); + +extern void tex_set_local_left_width (halfword p, scaled width); +extern void tex_set_local_right_width (halfword p, scaled width); + +extern halfword tex_get_local_interline_penalty (halfword p); +extern halfword tex_get_local_broken_penalty (halfword p); + +extern void tex_set_local_interline_penalty (halfword p, halfword penalty); +extern void tex_set_local_broken_penalty (halfword p, halfword penalty); + +# endif
\ No newline at end of file diff --git a/source/luametatex/source/tex/texmainbody.c b/source/luametatex/source/tex/texmainbody.c new file mode 100644 index 000000000..57b7d34be --- /dev/null +++ b/source/luametatex/source/tex/texmainbody.c @@ -0,0 +1,590 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + This is where the action starts. We're speaking of \LUATEX, a continuation of \PDFTEX\ (which + included \ETEX) and \ALEPH. As \TEX, \LUATEX\ is a document compiler intended to simplify high + quality typesetting for many of the world's languages. It is an extension of D.E. Knuth's \TEX, + which was designed essentially for the typesetting of languages using the Latin alphabet. + Although it is a direct decendant of \TEX, and therefore mostly compatible, there are some + subtle differences that relate to \UNICODE\ support and \OPENTYPE\ math. + + The \ALEPH\ subsystem loosens many of the restrictions imposed by~\TeX: register numbers are no + longer limited to 8~bits. Fonts may have more than 256~characters, more than 256~fonts may be + used, etc. We use a similar model. We also borrowed the directional model but have upgraded it a + bit as well as integrated it more tightly. + + This program is directly derived from Donald E. Knuth's \TEX; the change history which follows + and the reward offered for finders of bugs refer specifically to \TEX; they should not be taken + as referring to \LUATEX, \PDFTEX, nor \ETEX, although the change history is relevant in that it + demonstrates the evolutionary path followed. This program is not \TEX; that name is reserved + strictly for the program which is the creation and sole responsibility of Professor Knuth. + + \starttyping + % Version 0 was released in September 1982 after it passed a variety of tests. + % Version 1 was released in November 1983 after thorough testing. + % Version 1.1 fixed "disappearing font identifiers" et alia (July 1984). + % Version 1.2 allowed '0' in response to an error, et alia (October 1984). + % Version 1.3 made memory allocation more flexible and local (November 1984). + % Version 1.4 fixed accents right after line breaks, et alia (April 1985). + % Version 1.5 fixed \the\toks after other expansion in \edefs (August 1985). + % Version 2.0 (almost identical to 1.5) corresponds to "Volume B" (April 1986). + % Version 2.1 corrected anomalies in discretionary breaks (January 1987). + % Version 2.2 corrected "(Please type...)" with null \endlinechar (April 1987). + % Version 2.3 avoided incomplete page in premature termination (August 1987). + % Version 2.4 fixed \noaligned rules in indented displays (August 1987). + % Version 2.5 saved cur_order when expanding tokens (September 1987). + % Version 2.6 added 10sp slop when shipping leaders (November 1987). + % Version 2.7 improved rounding of negative-width characters (November 1987). + % Version 2.8 fixed weird bug if no \patterns are used (December 1987). + % Version 2.9 made \csname\endcsname's "relax" local (December 1987). + % Version 2.91 fixed \outer\def\a0{}\a\a bug (April 1988). + % Version 2.92 fixed \patterns, also file names with complex macros (May 1988). + % Version 2.93 fixed negative halving in allocator when mem_min<0 (June 1988). + % Version 2.94 kept open_log_file from calling fatal_error (November 1988). + % Version 2.95 solved that problem a better way (December 1988). + % Version 2.96 corrected bug in "Infinite shrinkage" recovery (January 1989). + % Version 2.97 corrected blunder in creating 2.95 (February 1989). + % Version 2.98 omitted save_for_after at outer level (March 1989). + % Version 2.99 caught $$\begingroup\halign..$$ (June 1989). + % Version 2.991 caught .5\ifdim.6... (June 1989). + % Version 2.992 introduced major changes for 8-bit extensions (September 1989). + % Version 2.993 fixed a save_stack synchronization bug et alia (December 1989). + % Version 3.0 fixed unusual displays; was more \output robust (March 1990). + % Version 3.1 fixed nullfont, disabled \write{\the\prevgraf} (September 1990). + % Version 3.14 fixed unprintable font names and corrected typos (March 1991). + % Version 3.141 more of same; reconstituted ligatures better (March 1992). + % Version 3.1415 preserved nonexplicit kerns, tidied up (February 1993). + % Version 3.14159 allowed fontmemsize to change; bulletproofing (March 1995). + % Version 3.141592 fixed \xleaders, glueset, weird alignments (December 2002). + % Version 3.1415926 was a general cleanup with minor fixes (February 2008). + % Succesive versions have been checked and if needed fixes havebeen applied. + \stoptyping + + Although considerable effort has been expended to make the \LUATEX\ program correct and + reliable, no warranty is implied; the authors disclaim any obligation or liability for damages, + including but not limited to special, indirect, or consequential damages arising out of or in + connection with the use or performance of this software. This work has been a \quote {labor + of love| and the authors (Hartmut Henkel, Taco Hoekwater, Hans Hagen and Luigi Scarso) hope that + users enjoy it. + + After a decade years of experimenting and reaching a more or less stable state, \LUATEX\ 1.0 was + released and a few years later end 2018 we were at version 1.1 which is a meant to be a stable + version. No more substantial additions will take place (that happens in \LUAMETATEX). As a + follow up we decided to experiment with a stripped down version, basically the \TEX\ core + without backend and with minimal font and file management. We'll see where that ends. + + {\em You will find a lot of comments that originate in original \TEX. We kept them as a side + effect of the conversion from \WEB\ to \CWEB. Because there is not much webbing going on here + eventually the files became regular \CCODE\ files with still potentially typeset comments. As + we add our own comments, and also comments are there from \PDFTEX, \ALEPH\ and \ETEX, we get a + curious mix. The best comments are of course from Don Knuth. All bad comments are ours. All + errors are ours too! + + Not all comments make sense, because some things are implemented differently, for instance some + memory management. But the principles of tokens and nodes stayed. It anyway means that you + sometimes need to keep in mind that the explanation is more geared to traditional \TEX. But that's + not a bad thing. Sorry Don for any confusion we introduced. The readers should have a copy of the + \TEX\ books at hand anyway.} + + A large piece of software like \TEX\ has inherent complexity that cannot be reduced below a certain + level of difficulty, although each individual part is fairly simple by itself. The \WEB\ language + is intended to make the algorithms as readable as possible, by reflecting the way the individual + program pieces fit together and by providing the cross-references that connect different parts. + Detailed comments about what is going on, and about why things were done in certain ways, have been + liberally sprinkled throughout the program. These comments explain features of the implementation, + but they rarely attempt to explain the \TeX\ language itself, since the reader is supposed to be + familiar with {\em The \TeX book}. + + The present implementation has a long ancestry, beginning in the summer of~1977, when Michael~F. + Plass and Frank~M. Liang designed and coded a prototype based on some specifications that the + author had made in May of that year. This original proto\TEX\ included macro definitions and + elementary manipulations on boxes and glue, but it did not have line-breaking, page-breaking, + mathematical formulas, alignment routines, error recovery, or the present semantic nest; + furthermore, it used character lists instead of token lists, so that a control sequence like | + \halign| was represented by a list of seven characters. A complete version of \TEX\ was designed + and coded by the author in late 1977 and early 1978; that program, like its prototype, was + written in the SAIL language, for which an excellent debugging system was available. Preliminary + plans to convert the SAIL code into a form somewhat like the present \quotation {web} were + developed by Luis Trabb~Pardo and the author at the beginning of 1979, and a complete + implementation was created by Ignacio~A. Zabala in 1979 and 1980. The \TEX82 program, which was + written by the author during the latter part of 1981 and the early part of 1982, also + incorporates ideas from the 1979 implementation of \TeX\ in {MESA} that was written by Leonidas + Guibas, Robert Sedgewick, and Douglas Wyatt at the Xerox Palo Alto Research Center. Several + hundred refinements were introduced into \TEX82 based on the experiences gained with the original + implementations, so that essentially every part of the system has been substantially improved. + After the appearance of Version 0 in September 1982, this program benefited greatly from the + comments of many other people, notably David~R. Fuchs and Howard~W. Trickey. A final revision in + September 1989 extended the input character set to eight-bit codes and introduced the ability to + hyphenate words from different languages, based on some ideas of Michael~J. Ferguson. + + No doubt there still is plenty of room for improvement, but the author is firmly committed to + keeping \TEX82 frozen from now on; stability and reliability are to be its main virtues. On the + other hand, the \WEB\ description can be extended without changing the core of \TEX82 itself, + and the program has been designed so that such extensions are not extremely difficult to make. + The |banner| string defined here should be changed whenever \TEX\ undergoes any modifications, + so that it will be clear which version of \TEX\ might be the guilty party when a problem arises. + + This program contains code for various features extending \TEX, therefore this program is called + \LUATEX\ and not \TEX; the official name \TEX\ by itself is reserved for software systems that + are fully compatible with each other. A special test suite called the \quote {TRIP test} is + available for helping to determine whether a particular implementation deserves to be known + as \TEX\ [cf.~Stanford Computer Science report CS1027, November 1984]. + + A similar test suite called the \quote {e-TRIP test} is available for helping to determine + whether a particular implementation deserves to be known as \ETEX. + + {\em NB: Although \LUATEX\ can pass lots of the test it's not trip compatible: we use \UTF, + support different font models, have adapted the backend to todays demands, etc.} + + This is the first of many sections of \TEX\ where global variables are defined. + + The \LUAMETATEX\ source is an adaptation of the \LUATEX\ source and it took quite a bit of + work to get there. I tried to stay close to the original Knuthian names and code but there are + all kind of subtle differences with the \LUATEX\ code, which came from the \PASCAL\ code. And + yes, all errors are mine (Hans). + +*/ + +/*tex + + This program (we're talking of original \TEX\ here) has two important variations: + + \startitemize[n] + \startitem + There is a long and slow version called \INITEX, which does the extra calculations + needed to initialize \TEX's internal tables; and + \stopitem + \startitem + there is a shorter and faster production version, which cuts the initialization to + a bare minimum. + \stopitem + \stopitemize + + Remark: Due to faster processors and media, the difference is not as large as it used to be, + so \quote {long} and \quote {slow] no longer really apply. Making a \PDFTEX\ format takes 6 + seconds because patterns are loaded in \UTF-8 format which demands interpretation, while + \XETEX\ which has native \UTF-8\ support takes just over 3 seconds. Making \CONTEXT\ \LMTX\ + format with \LUAMETATEX taked 2.54 seconds, and it involves loading hundreds of files with + megabytes of code (much more than in \MKII). So it's not that bad. Loading a format file for + a production run takes less than half a second (which includes quite some \LUA\ initialization). + On a more modern machine these times are less of course. + +*/ + +main_state_info lmt_main_state = { + .run_state = production_state, + .ready_already = output_disabled_state, + .start_time = 0.0, +}; + +/*tex + + This state registers if are we are |INITEX| with |ini_version|, keeps the \TEX\ width of + context lines on terminal error messages in |error_line| and the width of first lines of + contexts in terminal error messages in |half_error_line| which should be between 30 and + |error_line - 15|. The width of longest text lines output, which should be at least 60, + is strored in |max_print_line| and the maximum number of strings, which must not exceed + |max_halfword| is kept in |max_strings|. + + The number of strings available after format loaded is |strings_free|, the maximum number of + characters simultaneously present in current lines of open files and in control sequences + between |\csname| and |\endcsname|, which must not exceed |max_halfword|, is kept in + |buf_size|. The maximum number of simultaneous input sources is in |stack_size| and the + maximum number of input files and error insertions that can be going on simultaneously in + |max_in_open|. The maximum number of simultaneous macro parameters is in |param_size| and + the maximum number of semantic levels simultaneously active in |nest_size|. The space for + saving values outside of current group, which must be at most |max_halfword|, is in + |save_size| and the depth of recursive calls of the |expand| procedure is limited by + |expand_depth|. + + The times recent outputs that didn't ship anything out is tracked with |dead_cycles|. All + these (formally single global) variables are collected in one state structure. (The error + reporting is to some extent an implementation detail. As errors can be intercepted by \LUA\ + we keep things simple.) + + We have noted that there are two versions of \TEX82. One, called \INITEX, has to be run + first; it initializes everything from scratch, without reading a format file, and it has the + capability of dumping a format file. The other one is called \VIRTEX; it is a \quote {virgin} + program that needs to input a format file in order to get started. (This model has been + adapted for a long time by the \TEX\ distributions, that ship multiple platforms and provide a + large infrastructure.) + + For \LUATEX\ it is important to know that we still dump a format. But, in order to gain speed + and a smaller footprint, we gzip the format (level 3). We also store some information that + makes an abort possible in case of an incompatible engine version, which is important as + \LUATEX\ develops. It is possible to store \LUA\ code in the format but not the current + upvalues so you still need to initialize. Also, traditional fonts are stored, as are extended + fonts but any additional information needed for instance to deal with \OPENTYPE\ fonts is to + be handled by \LUA\ code and therefore not present in the format. (Actually, this version no + longer stores fonts at all.) + +*/ + +static void final_cleanup(int code); + +void tex_main_body(void) +{ + + tex_engine_set_limits_data("errorlinesize", &lmt_error_state.line_limits); + tex_engine_set_limits_data("halferrorlinesize", &lmt_error_state.half_line_limits); + tex_engine_set_limits_data("expandsize", &lmt_expand_state.limits); + + tex_engine_set_memory_data("buffersize", &lmt_fileio_state.io_buffer_data); + tex_engine_set_memory_data("filesize", &lmt_input_state.in_stack_data); + tex_engine_set_memory_data("fontsize", &lmt_font_state.font_data); + tex_engine_set_memory_data("hashsize", &lmt_hash_state.hash_data); + tex_engine_set_memory_data("inputsize", &lmt_input_state.input_stack_data); + tex_engine_set_memory_data("languagesize", &lmt_language_state.language_data); + tex_engine_set_memory_data("marksize", &lmt_mark_state.mark_data); + tex_engine_set_memory_data("insertsize", &lmt_insert_state.insert_data); + tex_engine_set_memory_data("nestsize", &lmt_nest_state.nest_data); + tex_engine_set_memory_data("nodesize", &lmt_node_memory_state.nodes_data); + tex_engine_set_memory_data("parametersize", &lmt_input_state.parameter_stack_data); + tex_engine_set_memory_data("poolsize", &lmt_string_pool_state.string_body_data); + tex_engine_set_memory_data("savesize", &lmt_save_state.save_stack_data); + tex_engine_set_memory_data("stringsize", &lmt_string_pool_state.string_pool_data); + tex_engine_set_memory_data("tokensize", &lmt_token_memory_state.tokens_data); + + tex_initialize_fileio_state(); + tex_initialize_nest_state(); + tex_initialize_save_stack(); + tex_initialize_input_state(); + + if (lmt_main_state.run_state == initializing_state) { + tex_initialize_string_mem(); + } + + if (lmt_main_state.run_state == initializing_state) { + tex_initialize_string_pool(); + } + + if (lmt_main_state.run_state == initializing_state) { + tex_initialize_token_mem(); + tex_initialize_hash_mem(); + } + + tex_initialize_errors(); + tex_initialize_nesting(); + tex_initialize_pagestate(); + tex_initialize_levels(); + tex_initialize_primitives(); + tex_initialize_marks(); + + if (lmt_main_state.run_state == initializing_state) { + tex_initialize_inserts(); + } + + if (lmt_main_state.run_state == initializing_state) { + tex_initialize_node_mem(); + } + + if (lmt_main_state.run_state == initializing_state) { + tex_initialize_nodes(); + tex_initialize_tokens(); + tex_initialize_expansion(); + tex_initialize_alignments(); + tex_initialize_buildpage(); + tex_initialize_active(); + tex_initialize_equivalents(); + tex_initialize_math_codes(); + tex_initialize_text_codes(); + tex_initialize_cat_codes(0); + tex_initialize_xx_codes(); + } + + tex_initialize_dump_state(); + tex_initialize_variables(); + tex_initialize_commands(); + tex_initialize_fonts(); + + if (lmt_main_state.run_state == initializing_state) { + tex_initialize_languages(); + } + + lmt_main_state.ready_already = output_enabled_state; + + /*tex in case we quit during initialization */ + + lmt_error_state.history = fatal_error_stop; + + /*tex + Get the first line of input and prepare to start When we begin the following code, \TEX's + tables may still contain garbage; the strings might not even be present. Thus we must + proceed cautiously to get bootstrapped in. + + But when we finish this part of the program, \TEX\ is ready to call on the |main_control| + routine to do its work. + + This copies the command line: + */ + + tex_initialize_inputstack(); + + if (lmt_main_state.run_state == initializing_state) { + /* We start out fresh. */ + } else if (tex_load_fmt_file()) { + + tex_initialize_expansion(); + tex_initialize_alignments(); + + aux_get_date_and_time(&time_par, &day_par, &month_par, &year_par, &lmt_engine_state.utc_time); + + while ((lmt_input_state.cur_input.loc < lmt_input_state.cur_input.limit) && (lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc] == ' ')) { + ++lmt_input_state.cur_input.loc; + } + } else { + tex_normal_exit(); + } + + if (end_line_char_inactive) { + --lmt_input_state.cur_input.limit; + } else { + lmt_fileio_state.io_buffer[lmt_input_state.cur_input.limit] = (unsigned char) end_line_char_par; + } + + aux_get_date_and_time(&time_par, &day_par, &month_par, &year_par, &lmt_engine_state.utc_time); + + tex_initialize_math(); + + tex_fixup_selector(lmt_fileio_state.log_opened); /* hm, the log is not yet opened anyway */ + + tex_engine_check_configuration(); + + tex_initialize_directions(); + + { + char *ptr = tex_engine_input_filename(); + char *fln = NULL; + tex_check_job_name(ptr); + tex_open_log_file(); + tex_engine_get_config_string("firstline", &fln); + if (fln) { + tex_any_string_start(fln); /* experiment, see context lmtx */ + } + if (ptr) { + tex_start_input(ptr); + } else if (! fln) { + tex_emergency_message("startup error", "no input found, quitting"); + tex_emergency_exit(); + } + } + + /*tex Ready to go, so come to life. */ + + lmt_error_state.history = spotless; + + { + int dump = tex_main_control(); + if (dump && lmt_main_state.run_state != initializing_state) { + /*tex Maybe we need to issue a warning here. For now we just ignore it. */ + dump = 0; + } + final_cleanup(dump); + } + + tex_close_files_and_terminate(0); + + tex_normal_exit(); +} + +/*tex + + Here we do whatever is needed to complete \TEX's job gracefully on the local operating system. + The code here might come into play after a fatal error; it must therefore consist entirely of + \quote {safe} operations that cannot produce error messages. For example, it would be a mistake + to call |str_room| or |make_string| at this time, because a call on |overflow| might lead to an + infinite loop. + + Actually there's one way to get error messages, via |prepare_mag|; but that can't cause infinite + recursion. + + This program doesn't bother to close the input files that may still be open. + + We can decide to remove the reporting code here as it can (and in \CONTEXT\ will) be done in a + callback anyway, so we never enter that branch. + + The output statistics go directly to the log file instead of using |print| commands, because + there's no need for these strings to take up |string_pool| memory. + + We now assume a callback being set, if wanted at all, but we keep this as a reference so that + we know what is of interest: + + \starttyping + void close_files_and_terminate(int error) + { + int callback_id = lmt_callback_defined(stop_run_callback); + if (fileio_state.log_opened) { + if (callback_id == 0) { + fprintf(print_state.log_file, + "\n\nHere is how much memory " My_Name " used:\n" + ); + fprintf(print_state.log_file, + " %d strings out of %d\n", + string_pool_state.string_pool_data.ptr - string_pool_state.reserved, + string_pool_state.string_pool_data.allocated - string_pool_state.reserved + STRING_OFFSET + ); + fprintf(print_state.log_file, + " %d multiletter control sequences out of %d + %d extra\n", + hash_state.hash_data.real, + hash_size, + hash_state.hash_data.allocated + ); + fprintf(print_state.log_file, + " %d words of node memory allocated out of %d", + node_memory_state.nodes_data.allocated, + node_memory_state.nodes_data.size + ); + fprintf(print_state.log_file, + " %d words of token memory allocated out of %d", + token_memory_state.tokens_data.allocated, + token_memory_state.tokens_data.size + ); + fprintf(print_state.log_file, + " %d font%s using %d bytes\n", + get_font_max_id(), + (get_font_max_id() == 1 ? "" : "s"), + font_state.font_bytes + ); + fprintf(print_state.log_file, + " %d input stack positions out of %d\n", + input_state.input_stack_data.top, + input_state.input_stack_data.size + ); + fprintf(print_state.log_file, + " %d nest stack positions out of %d\n", + nest_state.nest_data.top, + nest_state.nest_data.size + ); + fprintf(print_state.log_file, + " %d parameter stack positions out of %d\n", + input_state.param_stack_data.top, + input_state.param_stack_data.size + ); + fprintf(print_state.log_file, + " %d buffer stack positions out of %d\n", + fileio_state.io_buffer_data.top, + fileio_state.io_buffer_data.size + ); + fprintf(print_state.log_file, + " %d save stack positions out of %d\n", + save_state.save_stack_data.top, + save_state.save_stack_data.size + ); + } + print_state.selector = print_state.selector - 2; + if ((print_state.selector == term_only_selector_code) && (callback_id == 0)) { + print_str_nl("Transcript written on "); + print_file_name((unsigned char *) fileio_state.log_name); + print_char('.'); + print_ln(); + } + close_log_file(); + } + callback_id = lmt_callback_defined(wrapup_run_callback); + if (callback_id > 0) { + lmt_run_callback(lua_state.lua_instance, callback_id, "b->", error); + } + free_text_codes(); + free_math_codes(); + free_languages(); + } + \stoptyping +*/ + +void tex_close_files_and_terminate(int error) +{ + int callback_id = lmt_callback_defined(wrapup_run_callback); + if (lmt_fileio_state.log_opened) { + tex_close_log_file(); + } + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "b->", error); + } +} + +/*tex + + We get to the |final_cleanup| routine when |\end| or |\dump| has been scanned and it's all + over now. + +*/ + +static void final_cleanup(int dump) +{ + int badrun = 0; + if (! lmt_fileio_state.job_name) { + tex_open_log_file (); + } + tex_cleanup_directions(); + while (lmt_input_state.input_stack_data.ptr > 0) + if (lmt_input_state.cur_input.state == token_list_state) { + tex_end_token_list(); + } else { + tex_end_file_reading(); + } + while (lmt_input_state.open_files > 0) { + tex_report_stop_file(); + --lmt_input_state.open_files; + } + if (cur_level > level_one) { + tex_print_format("(\\end occurred inside a group at level %i)", cur_level - level_one); + tex_show_save_groups(); + badrun = 1; + } + while (lmt_condition_state.cond_ptr) { + halfword t; + if (lmt_condition_state.if_line != 0) { + tex_print_format("(\\end occurred when %C on line %i was incomplete)", if_test_cmd, lmt_condition_state.cur_if, lmt_condition_state.if_line); + badrun = 2; + } else { + tex_print_format("(\\end occurred when %C was incomplete)"); + badrun = 3; + } + lmt_condition_state.if_line = if_limit_line(lmt_condition_state.cond_ptr); + lmt_condition_state.cur_if = node_subtype(lmt_condition_state.cond_ptr); + t = lmt_condition_state.cond_ptr; + lmt_condition_state.cond_ptr = node_next(lmt_condition_state.cond_ptr); + tex_flush_node(t); + } + if (lmt_print_state.selector == terminal_and_logfile_selector_code && lmt_callback_defined(stop_run_callback) == 0) { + if ((lmt_error_state.history == warning_issued) || (lmt_error_state.history != spotless && lmt_error_state.interaction < error_stop_mode)) { + lmt_print_state.selector = terminal_selector_code; + tex_print_message("see the transcript file for additional information"); + lmt_print_state.selector = terminal_and_logfile_selector_code; + } + } + if (dump) { + tex_cleanup_alignments(); + tex_cleanup_expansion(); + if (lmt_main_state.run_state == initializing_state) { + for (int i = 0; i <= lmt_mark_state.mark_data.ptr; i++) { + tex_wipe_mark(i); + } + tex_flush_node_list(lmt_packaging_state.page_discards_head); + tex_flush_node_list(lmt_packaging_state.split_discards_head); + if (lmt_page_builder_state.last_glue != max_halfword) { + tex_flush_node(lmt_page_builder_state.last_glue); + } + for (int i = 0; i <= lmt_insert_state.insert_data.ptr; i++) { + tex_wipe_insert(i); + } + tex_store_fmt_file(); + } else { + tex_print_message("\\dump is performed only by INITEX"); + badrun = 4; + } + } + if (lmt_callback_defined(stop_run_callback)) { + /* + We don't issue the error callback here (yet), mainly because we don't really know what + bad things happened. This might evolve as currently it is not seen as fatal error. + */ + lmt_run_callback(lmt_lua_state.lua_instance, stop_run_callback, "d->", badrun); + } +} + diff --git a/source/luametatex/source/tex/texmainbody.h b/source/luametatex/source/tex/texmainbody.h new file mode 100644 index 000000000..d0d329e8a --- /dev/null +++ b/source/luametatex/source/tex/texmainbody.h @@ -0,0 +1,43 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_MAINBODY_H +# define LMT_MAINBODY_H + +/* Global variables */ + +typedef enum run_states { + initializing_state, + updating_state, + production_state, +} run_states; + +typedef enum ready_states { + output_disabled_state, + output_enabled_state, +} ready_states; + +typedef struct main_state_info { + int run_state; /*tex Are we |INITEX|? */ + int ready_already; /*tex A typical \TEX\ variable name. */ + double start_time; +} main_state_info ; + +extern main_state_info lmt_main_state ; + +/*tex + + The following procedure, which is called just before \TEX\ initializes its input and output, + establishes the initial values of the date and time. It calls a macro-defined |dateandtime| + routine. |dateandtime| in turn is also a |CCODE\ macro, which calls |get_date_and_time|, + passing it the addresses of the day, month, etc., so they can be set by the routine. + |get_date_and_time| also sets up interrupt catching if that is conditionally compiled in the + \CCODE\ code. + +*/ + +extern void tex_main_body (void); +extern void tex_close_files_and_terminate (int error); + +# endif diff --git a/source/luametatex/source/tex/texmaincontrol.c b/source/luametatex/source/tex/texmaincontrol.c new file mode 100644 index 000000000..a1983ac4f --- /dev/null +++ b/source/luametatex/source/tex/texmaincontrol.c @@ -0,0 +1,6412 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + We come now to the |main_control| routine, which contains the master switch that causes all the + various pieces of \TEX\ to do their things, in the right order. + + In a sense, this is the grand climax of the program: It applies all the tools that we have + worked so hard to construct. In another sense, this is the messiest part of the program: It + necessarily refers to other pieces of code all over the place, so that a person can't fully + understand what is going on without paging back and forth to be reminded of conventions that + are defined elsewhere. We are now at the hub of the web, the central nervous system that + touches most of the other parts and ties them together. + + The structure of |main_control| itself is quite simple. There's a label called |big_switch|, + at which point the next token of input is fetched using |get_x_token|. Then the program + branches at high speed into one of about 100 possible directions, based on the value of the + current mode and the newly fetched command code; the sum |abs(mode) + cur_cmd| indicates what + to do next. For example, the case |vmode + letter| arises when a letter occurs in vertical + mode (or internal vertical mode); this case leads to instructions that initialize a new + paragraph and enter horizontal mode.p + + The big |case| statement that contains this multiway switch has been labeled |reswitch|, so + that the program can |goto reswitch| when the next token has already been fetched. Most of + the cases are quite short; they call an \quote {action procedure} that does the work for that + case, and then they either |goto reswitch| or they \quote {fall through} to the end of the + |case| statement, which returns control back to |big_switch|. Thus, |main_control| is not an + extremely large procedure, in spite of the multiplicity of things it must do; it is small + enough to be handled by \PASCAL\ compilers that put severe restrictions on procedure size. + + One case is singled out for special treatment, because it accounts for most of \TEX's + activities in typical applications. The process of reading simple text and converting it + into |char_node| records, while looking for ligatures and kerns, is part of \TEX's \quote + {inner loop}; the whole program runs efficiently when its inner loop is fast, so this part + has been written with particular care. (This is no longer true in \LUATEX.) + + We leave the |space_factor| unchanged if |sf_code(cur_chr) = 0|; otherwise we set it equal + to |sf_code(cur_chr)|, except that it should never change from a value less than 1000 to a + value exceeding 1000. The most common case is |sf_code(cur_chr)=1000|, so we want that case to + be fast. + + All action is done via runners in the function table. Some runners are implemented here, + others are spread over modules. In due time I will use more prefixes to indicate where they + belong. Also, more runners will move to their respective modules, a stepwise process. This + split up is not always consistent which relates to the fact that \TEX\ is a monolothic program + which in turn means that we keep all the smaller (and more dependen) bits here. There are + subsystems but they hook into each other, take inserts and adjusts that hook into the builders + and packagers. + +*/ + +main_control_state_info lmt_main_control_state = { + .control_state = goto_next_state, + .local_level = 0, + .after_token = null, + .after_tokens = null, + .last_par_context = 0, + .loop_iterator = 0, + .loop_nesting = 0, + .quit_loop = 0, +}; + +/*tex + A few helpers: +*/ + +inline scaled tex_aux_checked_dimen1(scaled v) +{ + if (v > max_dimen) { + return max_dimen; + } else if (v < -max_dimen) { + return -max_dimen; + } else { + return v; + } +} + +inline scaled tex_aux_checked_dimen2(scaled v) +{ + if (v > max_dimen) { + return max_dimen; + } else if (v < 0) { + return 0; + } else { + return v; + } +} + +/*tex + These two helpers, of which the second one is still experimental, actually belong in another + file so then might be moved. Watch how the first one has the |unsave| call! + */ + +static void tex_aux_fixup_directions_and_unsave(void) +{ + int saved_par_state = internal_par_state_par; + int saved_dir_state = internal_dir_state_par; + int saved_direction = text_direction_par; + tex_pop_text_dir_ptr(); + tex_unsave(); + if (cur_mode == hmode) { + if (saved_dir_state) { + /* Add local dir node. */ + tex_tail_append(tex_new_dir(cancel_dir_subtype, text_direction_par)); + dir_direction(cur_list.tail) = saved_direction; + } + if (saved_par_state) { + /*tex Add local paragraph node. This resets after a group. */ + tex_tail_append(tex_new_par_node(hmode_par_par_subtype)); + } + } +} + +static void tex_aux_fixup_directions_only(void) +{ + int saved_dir_state = internal_dir_state_par; + int saved_direction = text_direction_par; + tex_pop_text_dir_ptr(); + if (saved_dir_state) { + /* Add local dir node. */ + tex_tail_append(tex_new_dir(cancel_dir_subtype, saved_direction)); + } +} + +static void tex_aux_fixup_math_and_unsave(void) +{ + int saved_math_style = internal_math_style_par; + int saved_math_scale = internal_math_scale_par; + tex_unsave(); + if (cur_mode == mmode) { + if (saved_math_style >= 0 && saved_math_style != cur_list.math_style) { + halfword noad = tex_new_node(style_node, (quarterword) saved_math_style); + cur_list.math_style = saved_math_style; + tex_tail_append(noad); + } + if (saved_math_scale != cur_list.math_scale) { + halfword noad = tex_new_node(style_node, scaled_math_style); + style_scale(noad) = saved_math_scale; + tex_tail_append(noad); + } + } +} + +/*tex + + If the user says, e.g., |\global \global|, the redundancy is silently accepted. The different + types of code values have different legal ranges; the following program is careful to check + each case properly. + +*/ + +static void tex_aux_out_of_range_error(halfword val, halfword max) +{ + tex_handle_error( + normal_error_type, + "Invalid code (%i), should be in the range %i..%i", + val, 0, max, + "I'm going to use 0 instead of that illegal code value." + ); +} + +/*tex + + The |run_| functions hook in the main control handler. Some immediately do something, others + trigger a follow up scan, driven by the cmd code. Here come some forward declarations; there + are more that the following |run_| functions. Some runners are defined in other modules. Some + runners finish what another started, for instance when we see a left brace, depending on state + another runner can kick in. + + */ + +static void tex_aux_adjust_space_factor(halfword chr) +{ + halfword s = tex_get_sf_code(chr); + if (s == 1000) { + cur_list.space_factor = 1000; + } else if (s < 1000) { + if (s > 0) { + cur_list.space_factor = s; + } else { + /* s <= 0 */ + } + } else if (cur_list.space_factor < 1000) { + cur_list.space_factor = 1000; + } else { + cur_list.space_factor = s; + } +} + +static void tex_aux_run_text_char_number(void) +{ + switch (cur_chr) { + case char_number_code: + { + halfword chr = tex_scan_char_number(0); + tex_aux_adjust_space_factor(chr); + tex_tail_append(tex_new_char_node(glyph_unset_subtype, cur_font_par, chr, 1)); + break; + } + case glyph_number_code: + { + scaled xoffset = glyph_x_offset_par; + scaled yoffset = glyph_y_offset_par; + halfword xscale = glyph_x_scale_par; + halfword yscale = glyph_y_scale_par; + halfword scale = glyph_scale_par; + halfword options = glyph_options_par; + halfword font = cur_font_par; + scaled left = 0; + scaled right = 0; + scaled raise = 0; + halfword chr = 0; + halfword glyph; + while (1) { + switch (tex_scan_character("xyofislrXYOFISLR", 0, 1, 0)) { + case 0: + goto DONE; + case 'x': case 'X': + switch (tex_scan_character("osOS", 0, 0, 0)) { + case 'o': case 'O': + if (tex_scan_mandate_keyword("xoffset", 2)) { + xoffset = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 's': case 'S': + if (tex_scan_mandate_keyword("xscale", 2)) { + xscale = tex_scan_int(0, NULL); + } + break; + default: + tex_aux_show_keyword_error("xoffset|xscale"); + goto DONE; + } + break; + case 'y': case 'Y': + switch (tex_scan_character("osOS", 0, 0, 0)) { + case 'o': case 'O': + if (tex_scan_mandate_keyword("yoffset", 2)) { + yoffset = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 's': case 'S': + if (tex_scan_mandate_keyword("yscale", 2)) { + yscale = tex_scan_int(0, NULL); + } + break; + default: + tex_aux_show_keyword_error("yoffset|yscale"); + goto DONE; + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("options", 1)) { + options = tex_scan_int(0, NULL); + if (options < glyph_option_normal_glyph) { + options = glyph_option_normal_glyph; + } else if (options > glyph_option_all) { + options = glyph_option_all; + } + } + break; + case 'f': case 'F': + if (tex_scan_mandate_keyword("font", 1)) { + font = tex_scan_font_identifier(NULL); + } + break; + case 'i': case 'I': + if (tex_scan_mandate_keyword("id", 1)) { + halfword f = tex_scan_int(0, NULL); + if (f > 0 && tex_is_valid_font(f)) { + font = f; + } + } + break; + case 's': case 'S': + if (tex_scan_mandate_keyword("scale", 1)) { + yscale = tex_scan_int(0, NULL); + } + break; + case 'l': case 'L': + if (tex_scan_mandate_keyword("left", 1)) { + left = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'r': case 'R': + switch (tex_scan_character("aiAI", 0, 0, 0)) { + case 'i': case 'I': + if (tex_scan_mandate_keyword("right", 2)) { + right = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'a': case 'A': + if (tex_scan_mandate_keyword("raise", 2)) { + raise = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + default: + tex_aux_show_keyword_error("right|raise"); + goto DONE; + } + break; + default: + goto DONE; + } + } + DONE: + chr = tex_scan_char_number(0); + tex_aux_adjust_space_factor(chr); + glyph = tex_new_char_node(glyph_unset_subtype, font, chr, 1); + set_glyph_x_offset(glyph, xoffset); + set_glyph_y_offset(glyph, yoffset); + set_glyph_scale(glyph, scale); + set_glyph_x_scale(glyph, xscale); + set_glyph_y_scale(glyph, yscale); + set_glyph_left(glyph, left); + set_glyph_right(glyph, right); + set_glyph_raise(glyph, raise); + set_glyph_options(glyph, options); + tex_tail_append(glyph); + break; + } + } +} + +static void tex_aux_run_text_letter(void) { + tex_aux_adjust_space_factor(cur_chr); + tex_tail_append(tex_new_char_node(glyph_unset_subtype, cur_font_par, cur_chr, 1)); +} + +/*tex + + Here are all the functions that are called from |main_control| that are not already defined + elsewhere. For the moment, this list simply in the order that the appear in |init_main_control|, + below. + +*/ + +static void tex_aux_run_node(void) { + halfword n = cur_chr; + if (node_token_flagged(n)) { + tex_get_token(); + n = node_token_sum(n,cur_chr); + } + if (copy_lua_input_nodes_par) { + n = tex_copy_node_list(n, null); + } + tex_tail_append(n); + if (tex_nodetype_has_attributes(node_type(n)) && ! node_attr(n)) { + attach_current_attribute_list(n); + } + while (node_next(n)) { + n = node_next(n); + tex_tail_append(n); + if (tex_nodetype_has_attributes(node_type(n)) && ! node_attr(n)) { + attach_current_attribute_list(n); + } + } +} + +/* */ + +inline static void lmt_bytecode_run(int index) +{ + strnumber u = tex_save_cur_string(); + lmt_token_state.luacstrings = 0; + lmt_bytecode_call(index); + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } +} + +inline static void lmt_lua_run(int reference, int prefix) +{ + strnumber u = tex_save_cur_string(); + lmt_token_state.luacstrings = 0; + lmt_function_call(reference, prefix); + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } +} + +static void tex_aux_run_lua_protected_call(void) { + if (cur_chr > 0) { + lmt_lua_run(cur_chr, 0); + } else { + tex_normal_error("luacall", "invalid number"); + } +} + +static void tex_aux_set_lua_value(int a) { + if (cur_chr > 0) { + lmt_lua_run(cur_chr, a); + } else { + tex_normal_error("luavalue", "invalid number"); + } +} + +/*tex + + The occurrence of blank spaces is almost part of \TEX's inner loop, since we usually encounter + about one space for every five non-blank characters. Therefore |main_control| gives second + highest priority to ordinary spaces. + + When a glue parameter like |\spaceskip| is set to |0pt|, we will see to it later that the + corresponding glue specification is precisely |zero_glue|, not merely a pointer to some + specification that happens to be full of zeroes. Therefore it is simple to test whether a glue + parameter is zero or~not. + + There is a special treatment for spaces when |space_factor <> 1000|. + + */ + +static void tex_aux_run_math_space(void) { + if (! disable_spaces_par) { + if (node_type(cur_list.tail) == simple_noad) { + noad_options(cur_list.tail) |= noad_option_followed_by_space; + } + } +} + +static void tex_aux_run_space(void) { + switch (disable_spaces_par) { + case 1: + /*tex Don't inject anything, not even zero skip. */ + return; + case 2: + /*tex Inject nothing but zero glue. */ + tex_tail_append(tex_new_glue_node(zero_glue, zero_space_skip_glue)); /* todo: subtype, zero_space_glue? */ + break; + default: + /*tex + The tradional treatment. A difference with other \TEX's is that we store the spacing + in the node instead of using the (end of) paragraph bound value. + */ + { + halfword p; + if (cur_mode == hmode && cur_cmd == spacer_cmd && cur_list.space_factor != 1000) { + if ((cur_list.space_factor >= 2000) && (! tex_glue_is_zero(xspace_skip_par))) { + p = tex_get_scaled_parameter_glue(xspace_skip_code, xspace_skip_glue); + } else { + halfword cur_font = cur_font_par; + if (tex_glue_is_zero(space_skip_par)) { + p = tex_get_scaled_glue(cur_font); + } else { + p = tex_get_parameter_glue(space_skip_code, space_skip_glue); /* not scaled */ + } + /* Modify the glue specification in |q| according to the space factor */ + if (cur_list.space_factor >= 2000) { + glue_amount(p) += tex_get_scaled_extra_space(cur_font); + } + glue_stretch(p) = tex_xn_over_d(glue_stretch(p), cur_list.space_factor, 1000); + glue_shrink(p) = tex_xn_over_d(glue_shrink(p), 1000, cur_list.space_factor); + } + } else if (tex_glue_is_zero(space_skip_par)) { + /*tex Find the glue specification for text spaces in the current font. */ + p = tex_get_scaled_glue(cur_font_par); + } else { + /*tex Append a normal inter-word space to the current list. */ + p = tex_get_parameter_glue(space_skip_code, space_skip_glue); /* not scaled */ + } + tex_tail_append(p); + } + } +} + +/*tex A fast one, also used to silently ignore |\par|s in a math formula. */ + +static void tex_aux_run_relax(void) { + return; +} + +/*tex + + |ignore_spaces| is a special case: after it has acted, |get_x_token| has already fetched the + next token from the input, so that operation in |main_control| should be skipped. + +*/ + +static void tex_aux_run_ignore_something(void) { + switch (cur_chr) { + case ignore_space_code: + /*tex Get the next non-blank call. */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + lmt_main_control_state.control_state = goto_skip_token_state; + break; + case ignore_par_code: + /*tex Get the next non-blank/par call. */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == end_paragraph_cmd); + lmt_main_control_state.control_state = goto_skip_token_state; + break; + case ignore_argument_code: + /*tex There is nothing to show here. */ + break; + default: + break; + } +} + +/* */ + +static void tex_aux_run_math_non_math(void) { + if (tracing_commands_par >= 4) { + tex_begin_diagnostic(); + tex_print_format("[math: pushing back %C]", cur_cmd, cur_chr); + tex_end_diagnostic(); + } + tex_back_input(cur_tok); + tex_begin_paragraph(1, math_char_par_begin); +} + +/*tex + + The most important parts of |main_control| are concerned with \TEX's chief mission of box + making. We need to control the activities that put entries on vlists and hlists, as well as + the activities that convert those lists into boxes. All of the necessary machinery has already + been developed; it remains for us to \quote {push the buttons} at the right times. + + As an introduction to these routines, let's consider one of the simplest cases: What happens + when |\hrule| occurs in vertical mode, or |\vrule| in horizontal mode or math mode? The code + in |main_control| is short, since the |scan_rule_spec| routine already does most of what is + required; thus, there is no need for a special action procedure. + + Note that baselineskip calculations are disabled after a rule in vertical mode, by setting + |prev_depth := ignore_depth|. + + First we define a procedure that returns a pointer to a rule node. This routine is called just + after \TEX\ has seen |\hrule| or |\vrule|; therefore |cur_cmd| will be either |hrule| or + |vrule|. The idea is to store the default rule dimensions in the node, then to override them if + |height| or |width| or |depth| specifications are found (in any order). + + For a moment I considered this: + + \starttyping + if (scan_keyword("to")) { + scan_dimen(0, 0, 0, 0); rule_width(q) = cur_val; + scan_dimen(0, 0, 0, 0); rule_height(q) = cur_val; + scan_dimen(0, 0, 0, 0); rule_depth(q) = cur_val; + return q; + } + \stoptyping + +*/ + + +/*tex + + Many of the actions related to box-making are triggered by the appearance of braces in the + input. For example, when the user says |\hbox to 100pt {<hlist>}| in vertical mode, the + information about the box size (100pt, |exactly|) is put onto |save_stack| with a level + boundary word just above it, and |cur_group:=adjusted_hbox_group|; \TEX\ enters restricted + horizontal mode to process the hlist. The right brace eventually causes |save_stack| to be + restored to its former state, at which time the information about the box size (100pt, + |exactly|) is available once again; a box is packaged and we leave restricted horizontal mode, + appending the new box to the current list of the enclosing mode (in this case to the current + list of vertical mode), followed by any vertical adjustments that were removed from the box by + |hpack|. + + The next few sections of the program are therefore concerned with the treatment of left and + right curly braces. + + If a left brace occurs in the middle of a page or paragraph, it simply introduces a new level + of grouping, and the matching right brace will not have such a drastic effect. Such grouping + affects neither the mode nor the current list. + +*/ + +static void tex_aux_run_left_brace(void) { + tex_new_save_level(simple_group); + update_tex_internal_par_state(0); + update_tex_internal_dir_state(0); +} + +/*tex + + The |also_simple_group| variant is triggered by |\beginsimplegroup|. It permits a mixed group + ending model: + + \starttyping + \def\foo{\beginsimplegroup\bf\let\next} \foo{text} + \stoptyping + + So, such a group can end with |\endgroup| as well as |\egroup| or equivalents. This trick is + mostly meant for math where a complex group produces a list which in turn influences spacing. + +*/ + +static void tex_aux_run_begin_group(void) { + switch (cur_chr) { + case semi_simple_group_code: + case also_simple_group_code: + tex_new_save_level(cur_chr ? also_simple_group : semi_simple_group); + update_tex_internal_par_state(0); + update_tex_internal_dir_state(0); + break; + case math_simple_group_code: + tex_new_save_level(math_simple_group); + update_tex_internal_math_style(cur_mode == mmode ? cur_list.math_style : -1); + update_tex_internal_math_scale(cur_mode == mmode ? cur_list.math_scale : 0); + break; + } +} + +static void tex_aux_run_end_group(void) { +// /* cur_chr can be 1 for a endsimplegroup but it's equivalent */ +// if (cur_group == semi_simple_group || cur_group == also_simple_group) { +// tex_aux_fixup_directions_and_unsave(); /*tex Includes the |save()| call! */ +// } else { +// tex_off_save(); /*tex Recover with error. */ +// } + switch (cur_group) { + case semi_simple_group: + case also_simple_group: + tex_aux_fixup_directions_and_unsave(); /*tex Includes the |save()| call! */ + break; + case math_simple_group: + tex_aux_fixup_math_and_unsave(); /*tex Includes the |save()| call! */ + break; + default: + tex_off_save(); /*tex Recover with error. */ + break; + } +} + +/*tex + + Constructions that require a box are started by calling |scan_box| with a specified context + code. The |scan_box| routine verifies that a |make_box| command comes next and then it calls + |begin_box|. + + Maybe we should just have three variants as sharing this makes it messy: |cur_cmd| combined + with |cur_chr| and funny flags for leaders. Due to grouping we have a shared |box_end| so + it doesn't become much prettier anyway. + + */ + +static void tex_aux_scan_box(int boxcontext, int optional_equal, scaled shift) +{ + /*tex Get the next non-blank non-relax... and optionally skip an equal sign */ + while (1) { + tex_get_x_token(); + if (cur_cmd == spacer_cmd) { + /*tex Go on. */ + } else if (cur_cmd == relax_cmd) { + optional_equal = 0; + } else if (optional_equal && cur_tok == equal_token) { + optional_equal = 0; + } else { + break; + } + } + switch (cur_cmd) { + case make_box_cmd: + { + tex_begin_box(boxcontext, shift); + return; + } + case vcenter_cmd: + { + tex_run_vcenter(); + return; + } + case lua_call_cmd: + case lua_protected_call_cmd: + { + if (box_leaders_flag(boxcontext)) { + tex_aux_run_lua_protected_call(); + tex_get_next(); + if (cur_cmd == node_cmd) { + /*tex So we only fetch the tail; the rest can mess up in the current list! */ + halfword boxnode = null; + tex_aux_run_node(); + boxnode = tex_pop_tail(); + if (boxnode) { + switch (node_type(boxnode)) { + case hlist_node: + case vlist_node: + case rule_node: + case glyph_node: + tex_box_end(boxcontext, boxnode, shift, unset_noad_class); + return; + } + } + } + tex_formatted_error("lua", "invalid function call, proper leader content expected"); + return; + } + break; + } + case lua_value_cmd: + { + halfword v = tex_scan_lua_value(cur_chr); + switch (v) { + case no_val_level: + tex_box_end(boxcontext, null, shift, unset_noad_class); + return; + case list_val_level: + if (box_leaders_flag(boxcontext)) { + switch (node_type(cur_val)) { + case hlist_node: + case vlist_node: + case rule_node: + // case glyph_node: + tex_box_end(boxcontext, cur_val, shift, unset_noad_class); + return; + } + } else { + switch (node_type(cur_val)) { + case hlist_node: + case vlist_node: + tex_box_end(boxcontext, cur_val, shift, unset_noad_class); + return; + } + } + } + tex_formatted_error("lua", "invalid function call, return type %i instead of %i", v, list_val_level); + return; + } + case hrule_cmd: + case vrule_cmd: + { + if (box_leaders_flag(boxcontext)) { + halfword rulenode = tex_aux_scan_rule_spec(cur_cmd == hrule_cmd ? h_rule_type : (cur_cmd == vrule_cmd ? v_rule_type : m_rule_type), cur_chr); + tex_box_end(boxcontext, rulenode, shift, unset_noad_class); + return; + } else { + break; + } + } + case char_number_cmd: + { + if (cur_mode == hmode && box_leaders_flag(boxcontext)) { + /*tex We cheat by just appending to the current list. */ + halfword boxnode = null; + tex_aux_run_text_char_number(); + boxnode = tex_pop_tail(); + tex_box_end(boxcontext, boxnode, shift, unset_noad_class); + return; + } else { + break; + } + } + } + tex_handle_error( + back_error_type, + "A <box> was supposed to be here", + "I was expecting to see \\hbox or \\vbox or \\copy or \\box or something like\n" + "that. So you might find something missing in your output. But keep trying; you\n" + "can fix this later." + ); + if (boxcontext == lua_scan_flag) { + tex_box_end(boxcontext, null, shift, unset_noad_class); + } +} + +/*tex + The |tex_aux_scan_box| call takes a |context| parameter and that is is somewhat weird: it + can be a box number, a flag signaling a special kind of box like a leader, or it can be the + shift in a move. It all relates to passing something in a way that make it possible to pick + it up later. +*/ + +static void tex_aux_run_move(void) { + int code = cur_chr; + halfword val = tex_scan_dimen(0, 0, 0, 0, NULL); + tex_aux_scan_box(0, 0, code == move_forward_code ? val : - val); +} + +/*tex + Local boxes are something that comes from \OMEGA\ but we implement them somewhat differently. + When we finish, the test for |p != null| ensures that empty |\localleftbox| and |\localrightbox| + commands are not applied. But it is stull kind of a mess, this mechanism. Resetting these boxes + involves registering a state but now we also check if it has been set at all. When I need this + feature I will probably check it out and redo some of the code. + + Options: \quote {par} will set the initial par node, when present. + +*/ + +typedef enum saved_localbox_items { + saved_localbox_item_location = 0, + saved_localbox_item_index = 1, + saved_localbox_item_options = 2, + saved_localbox_n_of_items = 3, +} saved_localbox_items; + +static void tex_aux_scan_local_box(int code) { + quarterword options = 0; + halfword class = 0; + tex_scan_local_boxes_keys(&options, &class); + tex_set_saved_record(saved_localbox_item_location, saved_local_box_location, 0, code); + tex_set_saved_record(saved_localbox_item_index, saved_local_box_index, 0, class); + tex_set_saved_record(saved_localbox_item_options, saved_local_box_options, 0, options); + lmt_save_state.save_stack_data.ptr += saved_localbox_n_of_items; + tex_new_save_level(local_box_group); + tex_scan_left_brace(); + tex_push_nest(); + cur_list.mode = -hmode; + cur_list.space_factor = 1000; +} + +static void tex_aux_finish_local_box(void) +{ + tex_unsave(); + if (saved_type(saved_localbox_item_location - saved_localbox_n_of_items) == saved_local_box_location) { + halfword p; + halfword location = saved_value(saved_localbox_item_location - saved_localbox_n_of_items); + quarterword options = (quarterword) saved_value(saved_localbox_item_options - saved_localbox_n_of_items); + halfword index = saved_value(saved_localbox_item_index - saved_localbox_n_of_items); + int islocal = (options & local_box_local_option) == local_box_local_option; + int keep = (options & local_box_keep_option) == local_box_keep_option; + int atpar = (options & local_box_par_option) == local_box_par_option; + lmt_save_state.save_stack_data.ptr -= saved_localbox_n_of_items; + p = node_next(cur_list.head); + tex_pop_nest(); + if (p) { + /*tex Somehow |filtered_hpack| goes beyond the first node so we loose it. */ + node_prev(p) = null; + if (tex_list_has_glyph(p)) { + tex_handle_hyphenation(p, null); + p = tex_handle_glyphrun(p, local_box_group, text_direction_par); + } + if (p) { + p = lmt_hpack_filter_callback(p, 0, packing_additional, local_box_group, direction_unknown, null); + } + /*tex + We really need something packed so we play safe! This feature is inherited but could + have been delegated to a callback anyway. + */ + p = tex_hpack(p, 0, packing_additional, direction_unknown, holding_none_option); + // node_subtype(p) = location == local_left_box_code ? local_left_list : local_right_list; + node_subtype(p) = local_list; + box_index(p) = index; + // attach_current_attribute_list(p); // leaks + } + // what to do with reset + if (islocal) { + /*tex There no copy needed either! */ + } else { + tex_update_local_boxes(p, index, location); + } + // if (cur_mode == hmode) { + if (cur_mode == hmode || cur_mode == mmode) { + if (atpar) { + halfword par = tex_find_par_par(cur_list.head); + if (par) { + if (p && ! islocal) { + p = tex_copy_node(p); + } + tex_replace_local_boxes(par, p, index, location); + } + } else { + /*tex + We had a null check here but we also want to be able to reset these boxes so we + no longer check. + */ + tex_tail_append(tex_new_par_node(local_box_par_subtype)); + if (! keep) { + /*tex So we can group and keep it. */ + update_tex_internal_par_state(internal_par_state_par + 1); + } + } + } + } else { + tex_confusion("build local box"); + } +} + +// static void tex_aux_run_leader(void) { +// switch (cur_chr) { +// case a_leaders_code: +// tex_aux_scan_box(a_leaders_flag, 0, 0); +// break; +// case c_leaders_code: +// tex_aux_scan_box(c_leaders_flag, 0, 0); +// break; +// case x_leaders_code: +// tex_aux_scan_box(x_leaders_flag, 0, 0); +// break; +// case g_leaders_code: +// tex_aux_scan_box(g_leaders_flag, 0, 0); +// break; +// } +// } + +static int leader_flags[] = { + a_leaders_flag, + c_leaders_flag, + x_leaders_flag, + g_leaders_flag, + u_leaders_flag, +}; + +static void tex_aux_run_leader(void) { + tex_aux_scan_box(leader_flags[cur_chr], 0, null_flag); +} + +static void tex_aux_run_legacy(void) { + switch (cur_chr) { + case shipout_code: + tex_aux_scan_box(shipout_flag, 0, null_flag); + break; + default: + /* cant_happen */ + break; + } +} + +static void tex_aux_run_local_box(void) { + tex_aux_scan_local_box(cur_chr); +} + +static void tex_aux_run_make_box(void) { + tex_begin_box(0, null_flag); +} + +/*tex + + There is a really small patch to add a new primitive called |\quitvmode|. In vertical modes, it + is identical to |\indent|, but in horizontal and math modes it is really a no-op (as opposed to + |\indent|, which executes the |indent_in_hmode| procedure). + + A paragraph begins when horizontal-mode material occurs in vertical mode, or when the paragraph + is explicitly started by |\quitvmode|, |\indent| or |\noindent|. We can revert this to zero + while at the same time keeping the node. + + To be considered: delay (as with parfilskip), skip + boundary, pre/post anchor etc. + +*/ + +static void tex_aux_insert_parindent(int indented) +{ + if (normalize_line_mode_permitted(normalize_line_mode_par, parindent_skip_mode)) { + /*tex We cannot use |new_param_glue| yet, because it's a dimen */ + halfword p = tex_new_glue_node(zero_glue, indent_skip_glue); + if (indented) { + glue_amount(p) = par_indent_par; + } + tex_tail_append(p); + } else if (indented) { + halfword p = tex_new_null_box_node(hlist_node, indent_list); + box_dir(p) = (singleword) par_direction_par; + box_width(p) = par_indent_par; + tex_tail_append(p); + } +} + +static void tex_aux_remove_parindent(void) +{ + halfword tail = cur_list.tail; + switch (node_type(tail)) { + case glue_node: + if (tex_is_par_init_glue(tail)) { + glue_amount(tail) = 0; + } + break; + case hlist_node: + if (node_subtype(tail) == indent_list) { + box_width(tail) = 0; + } + break; + } +} + +static void tex_aux_run_begin_paragraph_vmode(void) { + switch (cur_chr) { + case noindent_par_code: + tex_begin_paragraph(0, no_indent_par_begin); + break; + case indent_par_code: + tex_begin_paragraph(1, indent_par_begin); + break; + case quitvmode_par_code: + tex_begin_paragraph(1, force_par_begin); + break; + case snapshot_par_code: + /* silently ignore */ + tex_scan_int(0, NULL); + break; + case attribute_par_code: + /* silently ignore */ + tex_scan_attribute_register_number(); + tex_scan_int(1, NULL); + break; + case wrapup_par_code: + tex_you_cant_error(NULL); + break; + } +} + +static void tex_aux_run_begin_paragraph_hmode(void) { + switch (cur_chr) { + case noindent_par_code: + /*tex We do as traditional \TEX, so no zero skip either when normalizing */ + break; + case indent_par_code: + /*tex We can have |\hbox {\indent x\indent x\indent}| */ + tex_aux_insert_parindent(1); + break; + case undent_par_code: + tex_aux_remove_parindent(); + break; + case snapshot_par_code: + { + halfword tag = tex_scan_int(0, NULL); + halfword par = tex_find_par_par(cur_list.head); + if (par) { + tex_snapshot_par(par, tag); + } + break; + } + case attribute_par_code: + { + halfword att = tex_scan_attribute_register_number(); + halfword val = tex_scan_int(1, NULL); + halfword par = tex_find_par_par(cur_list.head); + if (par) { + if (val == unused_attribute_value) { + tex_unset_attribute(par, att, val); + } else { + tex_set_attribute(par, att, val); + } + } + break; + } + case wrapup_par_code: + { + halfword par = tex_find_par_par(cur_list.head); + if (par) { + halfword eop = par_end_par_tokens(par); + int reverse = tex_scan_optional_keyword("reverse"); + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd == left_brace_cmd) { + halfword source = tex_scan_toks_normal(1, NULL); + if (source) { + if (eop) { + if (reverse) { + halfword p = token_link(source); + if (p) { + while (token_link(p)) { + p = token_link(p); + } + token_link(p) = token_link(par_end_par_tokens(par)); + token_link(par_end_par_tokens(par)) = null; + tex_flush_token_list(par_end_par_tokens(par)); + par_end_par_tokens(par) = source; + } + } else { + halfword p = eop; + while (token_link(p)) { + p = token_link(p); + } + token_link(p) = token_link(source); + token_link(source) = null; + tex_flush_token_list(source); + } + } else { + par_end_par_tokens(par) = source; + } + } + } else { + tex_handle_error( + normal_error_type, + "I expected a {", + "The '\\wrapuppar' command only accepts an explicit token list." + ); + } + } + break; + } + } +} + +static void tex_aux_run_begin_paragraph_mmode(void) { + switch (cur_chr) { + case indent_par_code: + { + halfword p = tex_new_null_box_node(hlist_node, indent_list); + box_width(p) = par_indent_par; + p = tex_new_sub_box(p); + tex_tail_append(p); + break; + } + case snapshot_par_code: + /* silently ignore */ + tex_scan_int(0, NULL); + break; + case attribute_par_code: + /* silently ignore */ + tex_scan_attribute_register_number(); + tex_scan_int(1, NULL); + break; + case wrapup_par_code: + tex_you_cant_error(NULL); + break; + } +} + +static void tex_aux_run_new_paragraph(void) { + int context; + switch (cur_cmd) { + case char_given_cmd: + case other_char_cmd: + case letter_cmd: + case accent_cmd: + case char_number_cmd: + case discretionary_cmd: + context = char_par_begin; + break; + case boundary_cmd: + context = boundary_par_begin; + break; + case explicit_space_cmd: + context = space_par_begin; + break; + case math_shift_cmd: + case math_shift_cs_cmd: + context = math_par_begin; + break; + case hskip_cmd: + context = hskip_par_begin; + break; + case kern_cmd: + context = kern_par_begin; + break; + case un_hbox_cmd: + context = un_hbox_char_par_begin; + break; + case valign_cmd: + context = valign_char_par_begin; + break; + case vrule_cmd: + context = vrule_char_par_begin; + break; + default: + context = normal_par_begin; + break; + } + if (tracing_commands_par >= 4) { + tex_begin_diagnostic(); + tex_print_format("[text: pushing back %C]", cur_cmd, cur_chr); + tex_end_diagnostic(); + } + tex_back_input(cur_tok); + tex_begin_paragraph(1, context); +} + +/*tex + Append a |boundary_node|. The |page_boundary| case is kind of special. It adds a node node to + the list of contributions and triggers the page builder (that only kicks in when there is some + contribution). That itself can result in firing up the output routine if the page is filled up. + An alternative is to inject a penalty but we don't want anything to stay behind and using some + special penalty would be incompatible. + + In order to really trigger a check we change the boundary node into zero penalty in the builder + when it still present (as the callback can decide to wipe it). It's a bit weird mechanism but + it closely relates to triggering something that gets logged in the core engine. Anyway, we + basically have a zero penalty equivalent (but one that doesn't register as last node). +*/ + +void tex_page_boundary_message(const char *s, halfword n) +{ + if (tracing_pages_par >= 0) { + tex_begin_diagnostic(); + tex_print_format("[page: boundary, %s, trigger %i]", s, n); + tex_end_diagnostic(); + } +} + +static void tex_aux_run_par_boundary(void) { + switch (cur_chr) { + case page_boundary: + { + halfword n = tex_scan_int(0, NULL); + if (lmt_nest_state.nest_data.ptr == 0 && ! lmt_page_builder_state.output_active) { + halfword n = tex_new_node(boundary_node, (quarterword) cur_chr); + boundary_data(n) = n; + tex_tail_append(n); + if (cur_list.mode == vmode) { + if (! lmt_page_builder_state.output_active) { + tex_page_boundary_message("callback triggered", n); + lmt_page_filter_callback(boundary_page_context, n); + } + tex_page_boundary_message("build triggered", n); + tex_build_page(); + } else { + tex_page_boundary_message("appended", n); + } + } else { + tex_page_boundary_message("ignored", n); + } + break; + } + /*tex Not yet, first I need a proper use case. */ /* + case par_boundary: + { + halfword n = tex_new_node(boundary_node, (quarterword) cur_chr); + boundary_data(n) = tex_scan_int(0, NULL); + tex_tail_append(n); + break; + } + */ + default: + /*tex Go into horizontal mode and try again (was already the modus operandi). */ + tex_aux_run_new_paragraph(); + break; + } +} + +static void tex_aux_run_text_boundary(void) { + halfword n = tex_new_node(boundary_node, (quarterword) cur_chr); + switch (cur_chr) { + case user_boundary: + case protrusion_boundary: + boundary_data(n) = tex_scan_int(0, NULL); + break; + default: + break; + } + tex_tail_append(n); +} + +static void tex_aux_run_math_boundary(void) { + switch (cur_chr) { + case user_boundary: + { + halfword n = tex_new_node(boundary_node, user_boundary); + boundary_data(n) = tex_scan_int(0, NULL); + tex_tail_append(n); + break; + } + case protrusion_boundary: + tex_scan_int(0, NULL); + break; + } +} + +/*tex + + A paragraph ends when a |par_end| command is sensed, or when we are in horizontal mode when + reaching the right brace of vertical-mode routines like |\vbox|, |\insert|, or |\output|. + +*/ + +static void tex_aux_run_paragraph_end_vmode(void) { + // tex_normal_paragraph(normal_par_context); + tex_normal_paragraph(vmode_par_context); + if (cur_list.mode > nomode) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(vmode_par_page_context, 0); + } + tex_build_page(); + } +} + +/*tex We could pass the group and context here if needed and set some parameter. */ + +int tex_wrapped_up_paragraph(int context) { + halfword par = tex_find_par_par(cur_list.head); + lmt_main_control_state.last_par_context = context; + if (par) { + int done = 0; + if (par_end_par_tokens(par)) { + halfword eop = par_end_par_tokens(par); + par_end_par_tokens(par) = null; + tex_back_input(cur_tok); + /*tex We inject the tokens, which increments the ref count; this one has tracing. */ + tex_begin_token_list(eop, end_paragraph_text); + /*tex So we need to decrement the token ref here. */ + tex_delete_token_reference(eop); + done = 1; + } + // if (end_of_par_par) { + // if (! done) { + // back_input(cur_tok); + // } + // begin_token_list(end_of_par_par, end_paragraph_text); + // update_tex_end_of_par(null); + // done = 1; + // } + return done; + } else { + return 0; + } +} + +static void tex_aux_run_paragraph_end_hmode(void) { + if (! tex_wrapped_up_paragraph(normal_par_context)) { + if (lmt_input_state.align_state < 0) { + /*tex This tries to recover from an alignment that didn't end properly. */ + tex_off_save(); + } + /* This takes us to the enclosing mode, if |mode > 0|. */ + tex_end_paragraph(bottom_level_group, normal_par_context); + if (cur_list.mode == vmode) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(hmode_par_page_context, 0); + } + tex_build_page(); + } + } +} + +/* */ + +static void tex_aux_run_halign_mmode(void) { + if (tex_in_privileged_mode()) { + if (cur_group == math_shift_group) { + tex_run_alignment_initialize(); + } else { + tex_off_save(); + } + } +} + +/*tex + + The |\afterassignment| command puts a token into the global variable |after_token|. This global + variable is examined just after every assignment has been performed. It's value is zero, or a + saved token. + + Todo: combine code in helper. + +*/ + +static void tex_aux_run_after_something(void) { + switch (cur_chr) { + case after_group_code: + { + halfword t = tex_get_token(); /* avoid realloc issues */ + t = tex_get_available_token(t); + tex_save_for_after_group(t); + break; + } + case after_assignment_code: + { + lmt_main_control_state.after_token = tex_get_token(); + break; + } + case at_end_of_group_code: + { + halfword t = tex_get_token(); /* avoid realloc issues */ + halfword r = tex_get_available_token(t); + if (end_of_group_par) { + halfword p = end_of_group_par; + while (token_link(p)) { + p = token_link(p); + } + token_link(p) = r; + } else { + halfword p = tex_get_available_token(null); + token_link(p) = r; + update_tex_end_of_group(p); + } + break; + } + case after_grouped_code: + { + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd == left_brace_cmd) { + halfword source = tex_scan_toks_normal(1, NULL); + if (source) { + tex_save_for_after_group(token_link(source)); + token_link(source) = null; + } + tex_flush_token_list(source); + } else { + tex_handle_error( + normal_error_type, + "I expected a {", + "The '\\aftergrouped' command only accepts an explicit token list." + ); + } + break; + } + case after_assigned_code: + { + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd == left_brace_cmd) { + halfword source = tex_scan_toks_normal(1, NULL); + if (source) { + lmt_main_control_state.after_tokens = token_link(source); + token_link(source) = null; + } + tex_flush_token_list(source); + } else { + tex_handle_error( + normal_error_type, + "I expected a {", + "The '\\afterassigned' command only accepts an explicit token list." + ); + } + break; + } + case at_end_of_grouped_code: + { + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd == left_brace_cmd) { + halfword source = tex_scan_toks_normal(1, NULL); + if (source) { + if (end_of_group_par) { + halfword p = end_of_group_par; + while (token_link(p)) { + p = token_link(p); + } + token_link(p) = token_link(source); + token_link(source) = null; + tex_flush_token_list(source); + } else { + update_tex_end_of_group(source); + } + } + } else { + tex_handle_error( + normal_error_type, + "I expected a {", + "The '\\endofgrouped' command only accepts an explicit token list." + ); + } + break; + } + } +} + +inline static void tex_aux_finish_after_assignment(void) +{ + if (lmt_main_control_state.after_token) { + tex_back_input(lmt_main_control_state.after_token); + lmt_main_control_state.after_token = null; + } + if (lmt_main_control_state.after_tokens) { + tex_begin_inserted_list(lmt_main_control_state.after_tokens); + lmt_main_control_state.after_tokens = null; + } +} + +static void tex_aux_invalid_catcode_table_error(void) { + tex_handle_error( + normal_error_type, + "Invalid \\catcode table", + "All \\catcode table ids must be between 0 and " LMT_TOSTRING(max_n_of_catcode_tables-1) + ); +} + +static void tex_aux_overwrite_catcode_table_error(void) { + tex_handle_error( + normal_error_type, + "Invalid \\catcode table", + "You cannot overwrite the current \\catcode table" + ); +} + +static void tex_aux_run_catcode_table(void) { + switch (cur_chr) { + case save_cat_code_table_code: + { + halfword v = tex_scan_int(0, NULL); + if ((v < 0) || (v >= max_n_of_catcode_tables)) { + tex_aux_invalid_catcode_table_error(); + } else if (v == cat_code_table_par) { + tex_aux_overwrite_catcode_table_error(); + } else { + tex_copy_cat_codes(cat_code_table_par, v); + } + break; + } + case init_cat_code_table_code: + { + halfword v = tex_scan_int(0, NULL); + if ((v < 0) || (v >= max_n_of_catcode_tables)) { + tex_aux_invalid_catcode_table_error(); + } else if (v == cat_code_table_par) { + tex_aux_overwrite_catcode_table_error(); + } else { + tex_initialize_cat_codes(v); + } + break; + } + /* + case dflt_cat_code_table_code: + { + halfword v = scan_int(1); + if ((v < 0) || (v > CATCODE_MAX)) { + invalid_catcode_table_error(); + } else { + set_cat_code_table_default(cat_code_table_par, v); + } + } + break; + */ + default: + break; + } +} + +static void tex_aux_run_end_local(void) +{ + if (tracing_nesting_par > 2) { + tex_local_control_message("leaving token scanner due to local end token"); + } + tex_end_local_control(); +} + +static void tex_aux_run_lua_function_call(void) +{ + switch (cur_chr) { + case lua_function_call_code: + { + halfword v = tex_scan_function_reference(0); + lmt_lua_run(v, 0); + break; + } + case lua_bytecode_call_code: + { + halfword v = tex_scan_bytecode_reference(0); + lmt_bytecode_run(v); + break; + } + default: + break; + } +} + +/*tex + + The |main_control| uses a jump table, and |init_main_control| sets that table up. We need to + assign an entry for {\em each} of the three modes! + + For mode-independent commands, the following macro is useful. Also, there is a list of cases + where the user has probably gotten into or out of math mode by mistake. \TEX\ will insert a + dollar sign and rescan the current token, and it makes sense to have a macro for that as well. + +*/ + +# if (main_control_mode == 0) + + typedef void (*main_control_function)(void); + + static main_control_function *jump_table; + +# endif + +/*tex + + Here is |main_control| itself. It is quite short nowadays. The initializer is at the end of + this file which saves a nunch of forward declarations. + + */ + +inline static void tex_aux_big_switch (int mode, int cmd); + +int tex_main_control(void) +{ + lmt_main_control_state.control_state = goto_next_state; + if (every_job_par) { + tex_begin_token_list(every_job_par, every_job_text); + } + while (1) { + if (lmt_main_control_state.control_state == goto_skip_token_state) { + lmt_main_control_state.control_state = goto_next_state; + } else { + tex_get_x_token(); + } + /*tex + Give diagnostic information, if requested When a new token has just been fetched at + |big_switch|, we have an ideal place to monitor \TEX's activity. + */ + if (tracing_commands_par > 0) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + /*tex Run the command: */ + tex_aux_big_switch(cur_mode, cur_cmd); + if (lmt_main_control_state.control_state == goto_return_state) { + return cur_chr == dump_code; + } + } + /*tex not reached */ + return 0; +} + +/*tex + + We assume a trailing |\relax|: |{...}\relax|, so we don't need a |back_input ()| here. + +*/ + +void tex_local_control_message(const char *s) +{ + tex_begin_diagnostic(); + tex_print_format("[local control: level %i, %s]", lmt_main_control_state.local_level, s); + tex_end_diagnostic(); +} + +/*tex + + We can save in two ways but when, for symmetry I want it to happen at the current level, we need + to use the save stack. It depends a bit on how this will evolve. + + This one is used in the runlocal \LUA\ helper. This local control is in fact like the main loop, + so it can result in stuff being injected in for instance the main vertical list. I played with + control over the mode but that gave weird side effects, so I dropped that immediately. + + The implementation of local control in \LUAMETATEX\ is a bit different from \LUATEX\ because we + use it in several ways. + +*/ + +void tex_local_control(int obeymode) +{ + full_scanner_status saved_full_status = tex_save_full_scanner_status(); + int old_mode = cur_list.mode; + int at_level = lmt_main_control_state.local_level; + lmt_main_control_state.local_level += 1; + lmt_main_control_state.control_state = goto_next_state; + if (! obeymode) { + cur_list.mode = -hmode; + } + while (1) { + if (lmt_main_control_state.control_state == goto_skip_token_state) { + lmt_main_control_state.control_state = goto_next_state; + } else { + tex_get_x_token(); + } + if (tracing_commands_par > 0) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + tex_aux_big_switch(cur_mode, cur_cmd); + if (lmt_main_control_state.local_level <= at_level) { + lmt_main_control_state.control_state = goto_next_state; + if (tracing_nesting_par > 2) { + /*tex This is a kind of duplicate message, which can be confusing */ + tex_local_control_message("leaving local control due to level change"); + } + break; + } else if (lmt_main_control_state.control_state == goto_return_state) { + if (tracing_nesting_par > 2) { + tex_local_control_message("leaving local control due to triggering"); + } + break; + } + } + if (! obeymode) { + cur_list.mode = old_mode; + } + tex_unsave_full_scanner_status(saved_full_status); +} + +inline int tex_aux_is_iterator_value(halfword tokeninfo) +{ + if (tokeninfo >= cs_token_flag) { + halfword cs = tokeninfo - cs_token_flag; + return eq_type(cs) == some_item_cmd && eq_value(cs) == last_loop_iterator_code; + } else { + return 0; + } +} + +void tex_begin_local_control(void) +{ + halfword code = cur_chr; + if (tracing_nesting_par > 2) { + tex_local_control_message("entering token scanner via primitive"); + } + switch (code) { + case local_control_list_code: + { + halfword t; + halfword h = tex_scan_toks_normal(0, &t); + halfword r = tex_get_available_token(token_val(end_local_cmd, 0)); + tex_begin_inserted_list(r); + tex_begin_token_list(h, local_text); + break; + } + case local_control_token_code: + { + halfword t = tex_get_token(); /* avoid realloc issues */ + halfword h = get_reference_token(); + halfword r = tex_get_available_token(token_val(end_local_cmd, 0)); + tex_store_new_token(h, t); + tex_begin_inserted_list(r); + tex_begin_token_list(h, local_text); + break; + } + /*tex + For the moment al three are here because they share some code. At some point I might + move the last two to the |convert_cmd| which is more natural spot but this is easier + for debugging. + + The align_state hack was tricky and took me a while to figure out because it only was + an issue with +10K loops (where 10K is this magic state number). + + We support a leading optional equal sign because that can help make robust macros that + get |\the \dimexpr 1pt| etc fed which can lead to \TEX\ seeing one huge number. + */ + case local_control_loop_code: + case expanded_loop_code: + case unexpanded_loop_code: + { + halfword tail; + halfword first = tex_scan_int(1, NULL); + halfword last = tex_scan_int(1, NULL); + halfword step = tex_scan_int(1, NULL); + halfword head = tex_scan_toks_normal(0, &tail); + if (token_link(head) && step) { + int savedloop = lmt_main_control_state.loop_iterator; + int savedquit = lmt_main_control_state.quit_loop; + ++lmt_main_control_state.loop_nesting; + switch (code) { + case local_control_loop_code: + { + /*tex: + Appending to tail gives issues at the outer level, for instance + |\dorecurse {3} {\startTEXpage \stopTEXpage}| without |\starttext + \stoptext| wrapping. So, no: + */ + /* tex_store_new_token(tail, token_val(end_local_cmd, 0)); */ + for (halfword i = first; step > 0 ? i <= last : i >= last; i += step) { + lmt_main_control_state.loop_iterator = i; + lmt_main_control_state.quit_loop = 0; + /*tex But this, so that we get a proper |\end message|: */ + tex_begin_inserted_list(tex_get_available_token(token_val(end_local_cmd, 0))); + /*tex ... maybe we need to enforce a level > 0 instead. */ + tex_begin_token_list(head, local_loop_text); + tex_local_control(1); + /*tex We need to avoid build-up. */ + tex_cleanup_input_state(); + if (lmt_main_control_state.quit_loop) { + break; + } + } + tex_flush_token_list(head); + break; + } + case expanded_loop_code: + { + halfword h = null; + halfword t = null; + full_scanner_status saved_full_status = tex_save_full_scanner_status(); + strnumber u = tex_save_cur_string(); + tex_store_new_token(tail, right_brace_token + '}'); + for (halfword i = first; step > 0 ? i <= last : i >= last; i += step) { + halfword lt = null; + halfword lh = null; + ++lmt_input_state.align_state; + lmt_main_control_state.loop_iterator = i; + tex_begin_token_list(head, loop_text); /* ref counted */ + lh = tex_scan_toks_expand(1, <, 0); + if (token_link(lh)) { + if (h) { + token_link(t) = token_link(lh); + } else { + h = token_link(lh); + } + t = lt; + } + tex_put_available_token(lh); + tex_cleanup_input_state(); + if (lmt_main_control_state.quit_loop) { + break; + } + } + tex_unsave_full_scanner_status(saved_full_status); + tex_restore_cur_string(u); + tex_flush_token_list(head); + tex_begin_inserted_list(h); + break; + } + case unexpanded_loop_code: + { + /* + A |\currentloopiterator| will not adapt itself in this kind of + loop so we can as well replace it by the current one value which + is what we do here. There is some overhead but I can live with + that. + */ + + halfword h = token_link(head); + halfword tt = null; + halfword t = h; + halfword b = 0; /* we can count and then break out */ + while (token_link(t)) { + t = token_link(t); + if (! b && tex_aux_is_iterator_value(token_info(t))) { + b = 1; + } + } + tt = t; + for (halfword i = first + step; step > 0 ? i <= last : i >= last; i += step) { + halfword hh = h; + while (1) { + t = tex_store_new_token(t, token_info(hh)); + if (b && tex_aux_is_iterator_value(token_info(t))) { + halfword v = (i < min_iterator_value) ? min_iterator_value : (i > max_iterator_value ? max_iterator_value : i); + token_info(t) = token_val(iterator_value_cmd, v < 0 ? 0x100000 - v : v); + } + if (hh == tt) { + break; + } else { + hh = token_link(hh); + } + } + } + if (b) { + halfword hh = h; + while (1) { + if (tex_aux_is_iterator_value(token_info(hh))) { + halfword v = (first < min_iterator_value) ? min_iterator_value : (first > max_iterator_value ? max_iterator_value : first); + token_info(hh) = token_val(iterator_value_cmd, v < 0 ? 0x100000 - v : v); + } + if (hh == tt) { + break; + } else { + hh = token_link(hh); + } + } + } + tex_put_available_token(head); + tex_begin_inserted_list(h); + break; + } + } + --lmt_main_control_state.loop_nesting; + lmt_main_control_state.quit_loop = savedquit; + lmt_main_control_state.loop_iterator = savedloop; + return; + } else { + tex_flush_token_list(head); + } + return; + } + } + tex_local_control(1); /*tex In this case nicer than 0. */ + // tex_cleanup_input_state(); /*tex Yes or no? */ +} + +void tex_end_local_control(void ) +{ + if (lmt_main_control_state.local_level > 0) { + lmt_main_control_state.local_level -= 1; + } else { + tex_local_control_message("redundant end local control"); + } +} + +/*tex + + We need to go back to the main loop. This is rather nasty and dirty and counterintuive code and + there might be a cleaner way. Basically we trigger the main control state from here. + + \starttyping + 0 0 \directlua{token.scan_box()}\hbox{!} + -1 0 \setbox0\hbox{x}\directlua{token.scan_box()}\box0 + 1 1 \toks0={\directlua{token.scan_box()}\hbox{x}}\directlua{tex.runtoks(0)} + 0 0 1 1 \directlua{tex.box[0]=token.scan_box()}\hbox{x\directlua{node.write(token.scan_box())}\hbox{x}} + 0 0 0 1 \setbox0\hbox{x}\directlua{tex.box[0]=token.scan_box()}\hbox{x\directlua{node.write(token.scan_box())}\box0} + \stoptyping + + It's rather fragile code so we added some tracing options. + +*/ + +halfword tex_local_scan_box(void) +{ + int old_mode = cur_list.mode; + int old_level = lmt_main_control_state.local_level; + cur_list.mode = -hmode; + tex_aux_scan_box(lua_scan_flag, 0, null_flag); + if (lmt_main_control_state.local_level == old_level) { + /*tex |\directlua{print(token.scan_list())}\hbox{!}| (n n) */ + if (tracing_nesting_par > 2) { + tex_local_control_message("entering at end of box scanning"); + } + tex_local_control(1); + } else { + /*tex |\directlua{print(token.scan_list())}\box0| (n-1 n) */ + /* + if (tracing_nesting_par > 2) { + local_control_message("setting level after box scanning"); + } + */ + lmt_main_control_state.local_level = old_level; + } + cur_list.mode = old_mode; + return cur_box; +} + +/*tex + + We have an issue with modes when we quit here because we're coming from and still staying at + the \LUA\ end. So, unless we're already nested, we trigger an end_local_level token (an + extension code). + +*/ + +static void tex_aux_wrapup_local_scan_box(void) +{ + /* + if (tracing_nesting_par > 2) { + local_control_message("leaving box scanner"); + } + */ + lmt_main_control_state.local_level -= 1; +} + +static void tex_aux_run_insert_dollar_sign(void) +{ + tex_back_input(cur_tok); + cur_tok = math_shift_token + '$'; + tex_handle_error( + insert_error_type, + "Missing $ inserted", + "I've inserted a begin-math/end-math symbol since I think you left one out.\n" + "Proceed, with fingers crossed." + ); +} + +/*tex + + The |you_cant| procedure prints a line saying that the current command is illegal in the current + mode; it identifies these things symbolically. + +*/ + +void tex_you_cant_error(const char *helpinfo) +{ + tex_handle_error( + normal_error_type, + "You can't use '%C' in %M", cur_cmd, cur_chr, cur_list.mode, + helpinfo + ); +} + +/*tex + + When erroneous situations arise, \TEX\ usually issues an error message specific to the particular + error. For example, |\noalign| should not appear in any mode, since it is recognized by the + |align_peek| routine in all of its legitimate appearances; a special error message is given when + |\noalign| occurs elsewhere. But sometimes the most appropriate error message is simply that the + user is not allowed to do what he or she has attempted. For example, |\moveleft| is allowed only + in vertical mode, and |\lower| only in non-vertical modes. + +*/ + +static void tex_aux_run_illegal_case(void) +{ + tex_you_cant_error( + "Sorry, but I'm not programmed to handle this case;\n" + "I'll just pretend that you didn''t ask for it.\n" + "If you're in the wrong mode, you might be able to\n" + "return to the right one by typing 'I}' or 'I$' or 'I\\par'." + ); +} + +/*tex + + Some operations are allowed only in privileged modes, i.e., in cases that |mode > 0|. The + |privileged| function is used to detect violations of this rule; it issues an error message and + returns |false| if the current |mode| is negative. + +*/ + +int tex_in_privileged_mode(void) +{ + if (cur_list.mode > nomode) { + return 1; + } else { + tex_aux_run_illegal_case(); + return 0; + } +} + +/*tex + + We don't want to leave |main_control| immediately when a |stop| command is sensed, because it + may be necessary to invoke an |\output| routine several times before things really grind to a + halt. (The output routine might even say |\gdef \end {...}|, to prolong the life of the job.) + Therefore |its_all_over| is |true| only when the current page and contribution list are empty, + and when the last output was not a \quote {dead cycle}. We do this when |\end| or |\dump| + occurs. This |stop| is a special case as we want |main_control| to return to its caller if there + is nothing left to do. + +*/ + +static void tex_aux_run_end_job(void) { + if (tex_in_privileged_mode()) { + if ((page_head == lmt_page_builder_state.page_tail) + && (cur_list.head == cur_list.tail) + && (lmt_page_builder_state.dead_cycles == 0)) { + /*tex this is the only way out */ + lmt_main_control_state.control_state = goto_return_state; + } else { + /*tex we will try to end again after ejecting residual material */ + tex_back_input(cur_tok); + tex_tail_append(tex_new_null_box_node(hlist_node, unknown_list)); + box_width(cur_list.tail) = hsize_par; + tex_tail_append(tex_new_glue_node(fill_glue, user_skip_glue)); /* todo: subtype, final_skip_glue? */ + tex_tail_append(tex_new_penalty_node(-010000000000, final_penalty_subtype)); /* -0x40000000 */ + lmt_page_filter_callback(end_page_context, 0); + /*tex append |\hbox to \hsize{}\vfill\penalty-'10000000000| */ + tex_build_page(); + } + } +} + +/*tex + + The |hskip| and |vskip| command codes are used for control sequences like |\hss| and |\vfil| as + well as for |\hskip| and |\vskip|. The difference is in the value of |cur_chr|. + + All the work relating to glue creation has been relegated to the following subroutine. It does + not call |build_page|, because it is used in at least one place where that would be a mistake. + +*/ + +static void tex_aux_run_glue(void) +{ + switch (cur_chr) { + case fil_code: + tex_tail_append(tex_new_glue_node(fil_glue, user_skip_glue)); + break; + case fill_code: + tex_tail_append(tex_new_glue_node(fill_glue, user_skip_glue)); + break; + case filll_code: /*tex aka |ss_code| */ + tex_tail_append(tex_new_glue_node(filll_glue, user_skip_glue)); + break; + case fil_neg_code: + tex_tail_append(tex_new_glue_node(fil_neg_glue, user_skip_glue)); + break; + case skip_code: + { + halfword v = tex_scan_glue(glue_val_level, 0); + halfword g = tex_new_glue_node(v, user_skip_glue); + /* glue_data(g) = glue_data_par; */ + tex_tail_append(g); + tex_flush_node(v); + break; + } + default: + break; + } +} + +static void tex_aux_run_mglue(void) +{ + switch (cur_chr) { + case normal_mskip_code: + { + halfword v = tex_scan_glue(mu_val_level, 0); + tex_tail_append(tex_new_glue_node(v, mu_glue)); + tex_flush_node(v); + break; + } + case atom_mskip_code: + { + halfword left = tex_scan_math_class_number(0); + halfword right = tex_scan_math_class_number(0); + halfword style = tex_scan_math_style_identifier(0, 0); + halfword node = tex_math_spacing_glue(left, right, style); + if (node) { + tex_tail_append(node); + } else { + /*tex This could be an option: */ + tex_tail_append(tex_new_glue_node(zero_glue, mu_glue)); + } + break; + } + } +} + +/*tex + + We have to deal with errors in which braces and such things are not properly nested. Sometimes + the user makes an error of commission by inserting an extra symbol, but sometimes the user makes + an error of omission. \TEX\ can't always tell one from the other, so it makes a guess and tries + to avoid getting into a loop. + + The |off_save| routine is called when the current group code is wrong. It tries to insert + something into the user's input that will help clean off the top level. + +*/ + +void tex_off_save(void) +{ + if (cur_group == bottom_level_group) { + /*tex Drop current token and complain that it was unmatched */ + tex_handle_error(normal_error_type, "Extra %C", cur_cmd, cur_chr, + "Things are pretty mixed up, but I think the worst is over." + ); + } else { + const char * helpinfo = + "I've inserted something that you may have forgotten. (See the <inserted text>\n" + "above.) With luck, this will get me unwedged."; + halfword h = tex_get_available_token(null); + tex_back_input(cur_tok); + /*tex + Prepare to insert a token that matches |cur_group|, and print what it is. At this point, + |link (temp_token_head) = p|, a pointer to an empty one-word node. + */ + switch (cur_group) { + case also_simple_group: + case semi_simple_group: + case math_simple_group: + { + set_token_info(h, deep_frozen_end_group_token); + tex_handle_error( + normal_error_type, + "Missing \\endgroup inserted", + helpinfo + ); + break; + } + case math_shift_group: + { + set_token_info(h, math_shift_token + '$'); + tex_handle_error( + normal_error_type, + "Missing $ inserted", + helpinfo + ); + break; + } + case math_fence_group: + { + /* maybe nicer is just a zero delimiter one */ + halfword q = tex_get_available_token(period_token); + halfword f = node_next(cur_list.head); + set_token_info(h, deep_frozen_right_token); + set_token_link(h, q); + if (! (f && node_type(f) == fence_noad && has_noad_option_nocheck(f))) { + tex_handle_error( + normal_error_type, + "Missing \\right. inserted", + helpinfo + ); + } + break; + } + default: + { + set_token_info(h, right_brace_token + '}'); + tex_handle_error( + normal_error_type, + "Missing } inserted", + helpinfo + ); + break; + } + } + tex_begin_inserted_list(h); + } +} + +/*tex + + Discretionary nodes are easy in the common case |\-|, but in the general case we must process + three braces full of items. + + The space factor does not change when we append a discretionary node, but it starts out as 1000 + in the subsidiary lists. + +*/ + +static void tex_aux_run_discretionary(void) +{ + switch (cur_chr) { + case normal_discretionary_code: + /*tex |\discretionary| */ + { + halfword d = tex_new_disc_node(normal_discretionary_code); + tex_tail_append(d); + while (1) { + switch (tex_scan_character("pocPOC", 0, 1, 0)) { + case 0: + goto DONE; + case 'p': case 'P': + switch (tex_scan_character("eorEOR", 0, 0, 0)) { + case 'e': case 'E': + if (tex_scan_mandate_keyword("penalty", 2)) { + set_disc_penalty(d, tex_scan_int(0, NULL)); + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("postword", 2)) { + set_disc_option(d, disc_option_post_word); + } + break; + case 'r': case 'R': + if (tex_scan_mandate_keyword("preword", 2)) { + set_disc_option(d, disc_option_pre_word); + } + break; + default: + tex_aux_show_keyword_error("penalty|postword|preword"); + goto DONE; + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("options", 1)) { + set_disc_options(d, tex_scan_int(0, NULL)); + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("class", 1)) { + set_disc_class(d, tex_scan_math_class_number(0)); + } + break; + default: + goto DONE; + } + } + DONE: + tex_set_saved_record(saved_discretionary_item_component, saved_discretionary_count, 0, 0); + lmt_save_state.save_stack_data.ptr += saved_discretionary_n_of_items; + tex_new_save_level(discretionary_group); + tex_scan_left_brace(); + tex_push_nest(); + cur_list.mode = -hmode; + cur_list.space_factor = default_space_factor; /* hm, quite hard coded */ + } + break; + case explicit_discretionary_code: + /*tex |\-| */ + if (hyphenation_permitted(hyphenation_mode_par, explicit_hyphenation_mode)) { + halfword d = tex_new_disc_node(explicit_discretionary_code); + tex_tail_append(d); + int c = tex_get_pre_hyphen_char(cur_lang_par); + if (c > 0) { + tex_set_disc_field(d, pre_break_code, tex_new_char_node(glyph_unset_subtype, cur_font_par, c, 1)); + } + c = tex_get_post_hyphen_char(cur_lang_par); + if (c > 0) { + tex_set_disc_field(d, post_break_code, tex_new_char_node(glyph_unset_subtype, cur_font_par, c, 1)); + } + disc_penalty(d) = tex_explicit_disc_penalty(hyphenation_mode_par); + } + break; + case automatic_discretionary_code: + case mathematics_discretionary_code: + /*tex |-| */ + if (hyphenation_permitted(hyphenation_mode_par, automatic_hyphenation_mode)) { + halfword d = tex_new_disc_node(automatic_discretionary_code); + tex_tail_append(d); + /*tex As done in hyphenator: */ + halfword c = tex_get_pre_exhyphen_char(cur_lang_par); + if (c <= 0) { + c = ex_hyphen_char_par; + } + if (c > 0) { + tex_set_disc_field(d, pre_break_code, tex_new_char_node(glyph_unset_subtype, cur_font_par, c, 1)); + } + c = tex_get_post_exhyphen_char(cur_lang_par); + if (c > 0) { + tex_set_disc_field(d, post_break_code, tex_new_char_node(glyph_unset_subtype, cur_font_par, c, 1)); + } + c = ex_hyphen_char_par; + if (c > 0) { + tex_set_disc_field(d, no_break_code, tex_new_char_node(glyph_unset_subtype, cur_font_par, c, 1)); + } + disc_penalty(d) = tex_automatic_disc_penalty(hyphenation_mode_par); + } else { + halfword c = ex_hyphen_char_par; + if (c > 0) { + c = tex_new_char_node(glyph_unset_subtype, cur_font_par, c, 1); + set_glyph_discpart(c, glyph_discpart_always); + tex_tail_append(c); + } + } + break; + } +} + +/*tex + + The three discretionary lists are constructed somewhat as if they were hboxes. A subroutine + called |finish_discretionary| handles the transitions. (This is sort of fun.) + +*/ + +static void tex_aux_finish_discretionary(void) +{ + halfword p, q, d; /* for link manipulation */ + int n = 0; /* length of discretionary list */ + tex_unsave(); + /*tex + Prune the current list, if necessary, until it contains only |char_node|, |kern_node|, + |hlist_node|, |vlist_node| and |rule_node| items; set |n| to the length of the list, and + set |q| to the lists tail. During this loop, |p = node_next(q)| and there are |n| items + preceding |p|. + */ + q = cur_list.head; + p = node_next(q); + while (p) { + switch (node_type(p)) { + case glyph_node: + case hlist_node: + case vlist_node: + case rule_node: + case kern_node: + break; + case glue_node: + if (hyphenation_permitted(hyphenation_mode_par, permit_glue_hyphenation_mode)) { + if (glue_stretch_order(p)) { + glue_stretch(p) = 0; + glue_stretch_order(p) = 0; + } + if (glue_shrink_order(p)) { + glue_shrink(p) = 0; + glue_shrink_order(p) = 0; + } + break; + } else { + // fall through + } + default: + if (hyphenation_permitted(hyphenation_mode_par, permit_all_hyphenation_mode)) { + break; + } else { + tex_handle_error( + normal_error_type, + "Improper discretionary list", + "Discretionary lists must contain only glyphs, boxes, rules and kerns." + ); + tex_begin_diagnostic(); + tex_print_str("The following discretionary sublist has been deleted:"); + tex_print_levels(); + tex_show_box(p); + tex_end_diagnostic(); + tex_flush_node_list(p); + node_next(q) = null; + goto DONE; + } + } + node_prev(p) = q; + q = p; + p = node_next(q); + ++n; + } + DONE: + p = node_next(cur_list.head); + tex_pop_nest(); + d = cur_list.tail; + if (saved_type(saved_discretionary_item_component - saved_discretionary_n_of_items) == saved_discretionary_count) { + switch (saved_value(saved_discretionary_item_component - saved_discretionary_n_of_items)) { + case 0: + if (n > 0) { + tex_set_disc_field(d, pre_break_code, p); + } + break; + case 1: + if (n > 0) { + tex_set_disc_field(d, post_break_code, p); + } + break; + case 2: + /*tex + Attach list |p| to the current list, and record its length; then finish up and + |return|. + */ + if (n > 0) { + if (cur_mode == mmode && ! hyphenation_permitted(hyphenation_mode_par, permit_math_replace_hyphenation_mode)) { + tex_handle_error( + normal_error_type, + "Illegal math \\discretionary", + "Sorry: The third part of a discretionary break must be empty, in math formulas. I\n" + "had to delete your third part." + ); + tex_flush_node_list(p); + } else { + tex_set_disc_field(d, no_break_code, p); + } + } + if (! hyphenation_permitted(hyphenation_mode_par, normal_hyphenation_mode)) { + halfword n = disc_no_break_head(d); + cur_list.tail = node_prev(cur_list.tail); + node_next(cur_list.tail) = null; + if (n) { + tex_tail_append(n); + cur_list.tail = disc_no_break_tail(d); + tex_set_disc_field(d, no_break_code, null); + tex_set_discpart(d, n, disc_no_break_tail(d), glyph_discpart_replace); + } + tex_flush_node(d); + } else if (cur_mode == mmode && disc_class(d) != unset_disc_class) { + halfword n = null; + cur_list.tail = node_prev(d); + node_prev(d) = null; + node_next(d) = null; + n = tex_math_make_disc(d); + tex_tail_append(n); + } + /*tex There are no other cases. */ + lmt_save_state.save_stack_data.ptr -= saved_discretionary_n_of_items; + return; + default: + break; + } + tex_set_saved_record(saved_discretionary_item_component - saved_discretionary_n_of_items, saved_discretionary_count, 0, saved_value(saved_discretionary_item_component - saved_discretionary_n_of_items) + 1); + tex_new_save_level(discretionary_group); + tex_scan_left_brace(); + tex_push_nest(); + cur_list.mode = -hmode; + cur_list.space_factor = default_space_factor; + } else { + tex_confusion("finish discretionary"); + } +} + +/*tex + + The routine for a |right_brace| character branches into many subcases, since a variety of things + may happen, depending on |cur_group|. Some types of groups are not supposed to be ended by a + right brace; error messages are given in hopes of pinpointing the problem. Most branches of this + routine will be filled in later, when we are ready to understand them; meanwhile, we must prepare + ourselves to deal with such errors. + + When the right brace occurs at the end of an |\hbox| or |\vbox| or |\vtop| construction, the + |package| routine comes into action. We might also have to finish a paragraph that hasn't ended. +*/ + +static void tex_aux_extra_right_brace_error(void) +{ + const char * helpinfo = + "I've deleted a group-closing symbol because it seems to be spurious, as in\n" + "'$x}$'. But perhaps the } is legitimate and you forgot something else, as in\n" + "'\\hbox{$x}'."; + switch (cur_group) { + case also_simple_group: + case semi_simple_group: + tex_handle_error( + normal_error_type, + "Extra }, or forgotten %eendgroup", + helpinfo + ); + break; + case math_simple_group: + tex_handle_error( + normal_error_type, + "Extra }, or forgotten %eendmathgroup", + helpinfo + ); + break; + case math_shift_group: + tex_handle_error( + normal_error_type, + "Extra }, or forgotten $", + helpinfo + ); + break; + case math_fence_group: + tex_handle_error( + normal_error_type, + "Extra }, or forgotten %eright", + helpinfo + ); + break; + } + ++lmt_input_state.align_state; +} + +inline static void tex_aux_finish_hbox(void) +{ + tex_aux_fixup_directions_only(); + tex_package(hpack_code); +} + +inline static void tex_aux_finish_adjusted_hbox(void) +{ + lmt_packaging_state.post_adjust_tail = post_adjust_head; + lmt_packaging_state.pre_adjust_tail = pre_adjust_head; + lmt_packaging_state.post_migrate_tail = post_migrate_head; + lmt_packaging_state.pre_migrate_tail = pre_migrate_head; + tex_package(hpack_code); +} + +inline static void tex_aux_finish_vbox(void) +{ + if (! tex_wrapped_up_paragraph(vbox_par_context)) { + tex_end_paragraph(vbox_group, vbox_par_context); + tex_package(vpack_code); + } +} + +inline static void tex_aux_finish_vtop(void) +{ + if (! tex_wrapped_up_paragraph(vtop_par_context)) { + tex_end_paragraph(vtop_group, vtop_par_context); + tex_package(vtop_code); + } +} + +inline static void tex_aux_finish_simple_group(void) +{ + tex_aux_fixup_directions_and_unsave(); +} + +static void tex_aux_finish_bottom_level_group(void) +{ + tex_handle_error( + normal_error_type, + "Too many }'s", + "You've closed more groups than you opened. Such booboos are generally harmless,\n" + "so keep going." + ); +} + +inline static void tex_aux_finish_output(void) +{ + tex_pop_text_dir_ptr(); + tex_resume_after_output(); +} + +static void tex_aux_run_right_brace(void) +{ + switch (cur_group) { + case bottom_level_group: + tex_aux_finish_bottom_level_group(); + break; + case simple_group: + tex_aux_finish_simple_group(); + break; + case hbox_group: + tex_aux_finish_hbox(); + break; + case adjusted_hbox_group: + tex_aux_finish_adjusted_hbox(); + break; + case vbox_group: + tex_aux_finish_vbox(); + break; + case vtop_group: + tex_aux_finish_vtop(); + break; + case align_group: + tex_finish_alignment_group(); + break; + case no_align_group: + tex_finish_no_alignment_group(); + break; + case output_group: + tex_aux_finish_output(); + break; + case math_group: + tex_finish_math_group(); + break; + case discretionary_group: + tex_aux_finish_discretionary(); + break; + case insert_group: + tex_finish_insert_group(); + break; + case vadjust_group: + tex_finish_vadjust_group(); + break; + case vcenter_group: + tex_finish_vcenter_group(); + break; + case math_fraction_group: + tex_finish_math_fraction(); + break; + case math_operator_group: + tex_finish_math_operator(); + break; + case math_choice_group: + tex_finish_math_choice(); + break; + case also_simple_group: + case math_simple_group: + // cur_group = semi_simple_group; /* probably not needed */ + tex_aux_run_end_group(); + break; + case semi_simple_group: + case math_shift_group: + case math_fence_group: /*tex See above, let's see when we are supposed to end up here. */ + tex_aux_extra_right_brace_error(); + break; + case local_box_group: + tex_aux_finish_local_box(); + break; + default: + tex_confusion("right brace"); + break; + } +} + +/*tex + + Here is where we clear the parameters that are supposed to revert to their default values after + every paragraph and when internal vertical mode is entered. + +*/ + +void tex_normal_paragraph(int context) +{ + int ignore = 0; + lmt_main_control_state.last_par_context = context; + lmt_paragraph_context_callback(context, &ignore); + if (! ignore) { + if (looseness_par) { + update_tex_looseness(0); + } + if (hang_indent_par) { + update_tex_hang_indent(0); + } + if (hang_after_par != 1) { + update_tex_hang_after(1); + } + if (par_shape_par) { + update_tex_par_shape(null); + } + if (inter_line_penalties_par) { + update_tex_inter_line_penalties(null); + } + } +} + +/*tex + + The global variable |cur_box| will point to a newly-made box. If the box is void, we will have + |cur_box = null|. Otherwise we will have |type(cur_box) = hlist_node| or |vlist_node| or + |rule_node|; the |rule_node| case can occur only with leaders. + + The |box_end| procedure does the right thing with |boxnode|, if |boxcontext| represents the + context as explained above. The |boxnode| variable is either a list node or a register index. + In some cases we communicate via a state variable. + +*/ + +static void tex_aux_wrapup_leader_box(halfword boxcontext, halfword boxnode) +{ + /*tex Append a new leader node that uses |box| and get the next non-blank non-relax. */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd); + if ((cur_cmd == hskip_cmd && cur_mode != vmode) || (cur_cmd == vskip_cmd && cur_mode == vmode)) { + tex_aux_run_glue(); /* uses cur_chr */ + switch (boxcontext) { + case a_leaders_flag: + node_subtype(cur_list.tail) = a_leaders; + break; + case c_leaders_flag: + node_subtype(cur_list.tail) = c_leaders; + break; + case x_leaders_flag: + node_subtype(cur_list.tail) = x_leaders; + break; + case g_leaders_flag: + node_subtype(cur_list.tail) = g_leaders; + break; + case u_leaders_flag: + switch (node_type(boxnode)) { + case hlist_node: + if (cur_mode != vmode) { + node_subtype(cur_list.tail) = u_leaders; + glue_amount(cur_list.tail) += box_width(boxnode); + } else { + node_subtype(cur_list.tail) = a_leaders; + } + break; + case vlist_node: + if (cur_mode == vmode) { + node_subtype(cur_list.tail) = u_leaders; + glue_amount(cur_list.tail) += box_total(boxnode); + } else { + node_subtype(cur_list.tail) = a_leaders; + } + break; + default: + /* yet unsupported */ + node_subtype(cur_list.tail) = a_leaders; + break; + } + break; + } + glue_leader_ptr(cur_list.tail) = boxnode; + } else { + tex_handle_error( + back_error_type, + "Leaders not followed by proper glue", + "You should say '\\leaders <box or rule><hskip or vskip>'. I found the <box or\n" + "rule>, but there's no suitable <hskip or vskip>, so I'm ignoring these leaders." + ); + tex_flush_node_list(boxnode); + } +} + +void tex_box_end(int boxcontext, halfword boxnode, scaled shift, halfword mainclass) +{ + cur_box = boxnode; + if (boxcontext < box_flag) { + /*tex + + Append box |boxnode| to the current list, shifted by |boxcontext|. The global variable + |adjust_tail| will be non-null if and only if the current box might include adjustments + that should be appended to the current vertical list. + + Having shift in the box context is kind of strange but as long as we stay below maxdimen + it works. + + We now pass the shift directly, so no boxcontext trick here. + + */ + + if (boxnode) { + // box_shift_amount(boxnode) = boxcontext; + if (shift != null_flag) { + box_shift_amount(boxnode) = shift; + } + switch (cur_mode) { + case vmode: + if (lmt_packaging_state.pre_adjust_tail) { + if (pre_adjust_head != lmt_packaging_state.pre_adjust_tail) { + tex_inject_adjust_list(pre_adjust_head, 1, boxnode, NULL); + } + lmt_packaging_state.pre_adjust_tail = null; + } + if (lmt_packaging_state.pre_migrate_tail) { + if (pre_migrate_head != lmt_packaging_state.pre_migrate_tail) { + tex_append_list(pre_migrate_head, lmt_packaging_state.pre_migrate_tail); + } + lmt_packaging_state.pre_migrate_tail = null; + } + tex_append_to_vlist(boxnode, lua_key_index(box), NULL); + if (lmt_packaging_state.post_migrate_tail) { + if (post_migrate_head != lmt_packaging_state.post_migrate_tail) { + tex_append_list(post_migrate_head, lmt_packaging_state.post_migrate_tail); + } + lmt_packaging_state.post_migrate_tail = null; + } + if (lmt_packaging_state.post_adjust_tail) { + if (post_adjust_head != lmt_packaging_state.post_adjust_tail) { + tex_inject_adjust_list(post_adjust_head, 1, null, NULL); + } + lmt_packaging_state.post_adjust_tail = null; + } + if (cur_list.mode > nomode) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(box_page_context, 0); + } + tex_build_page(); + } + break; + case hmode: + cur_list.space_factor = default_space_factor; + tex_couple_nodes(cur_list.tail, boxnode); + cur_list.tail = boxnode; + break; + /* case mmode: */ + default: + boxnode = tex_new_sub_box(boxnode); + tex_couple_nodes(cur_list.tail, boxnode); + cur_list.tail = boxnode; + if (mainclass != unset_noad_class) { + set_noad_classes(boxnode, mainclass); + } + break; + } + } else { + /* just scanning */ + } + } else if (boxcontext < global_box_flag) { + /*tex Store |box| in a local box register */ + update_tex_box_local(boxcontext, boxnode); + } else if (boxcontext <= max_global_box_flag) { + /*tex Store |box| in a global box register */ + update_tex_box_global(boxcontext, boxnode); + } else { + switch (boxcontext) { + case shipout_flag: + /*tex This normally can't happen as some backend code needs to kick in. */ + if (boxnode) { + /*tex We just show the box ... */ + tex_begin_diagnostic(); + tex_show_node_list(boxnode, max_integer, max_integer); + tex_end_diagnostic(); + /*tex ... and wipe it when it's a register ... */ + if (box_register(boxnode)) { + tex_flush_node_list(boxnode); + box_register(boxnode) = null; + } + /*tex ... so there is at least an indication that we flushed. */ + } + break; + case left_box_flag: + case right_box_flag: + case middle_box_flag: + /*tex Actualy, this cannot happen ... will go away. */ + tex_aux_finish_local_box(); + break; + case lua_scan_flag: + /*tex We are done with scanning so let's return to the caller. */ + tex_aux_wrapup_local_scan_box(); + cur_box = boxnode; + break; + case a_leaders_flag: + case c_leaders_flag: + case x_leaders_flag: + case g_leaders_flag: + case u_leaders_flag: + tex_aux_wrapup_leader_box(boxcontext, boxnode); + break; + default: + /* fatal error */ + break; + } + } +} + +/*tex + + The canonical \TEX\ engine(s) inject an indentation box, so there is always something at the beginning that + also acts as a boundary. However, when snapshotting was introduced it made also sense to turn the parindent + related hlist into a glue. We might need to adapt the parbuilder but it looks liek that is not needed. Of + course, an |\unskip| will now also unskip the parindent but there are ways to prevent this. I'll test it for + a while, which is why we have a way to enable it. The glue is {\em always} injected, also when it's zero. + +*/ + +void tex_begin_paragraph(int doindent, int context) +{ + halfword q; + int indented = doindent; + int isvmode = cur_list.mode == vmode; + if (isvmode || cur_list.head != cur_list.tail) { + /*tex + Actually we could remove the callback and hook it into the |\everybeforepar| but that one + started out as a |tex.expandmacro| itself and we don't want the callback overhead every + time, so now we have both. However, in the end I decided to do this one {\em before} the + parskip is injected. + */ + if (every_before_par_par) { + tex_begin_inserted_list(tex_get_available_token(token_val(end_local_cmd, 0))); + tex_begin_token_list(every_before_par_par, every_before_par_text); + if (tracing_nesting_par > 2) { + tex_local_control_message("entering local control via \\everybeforepar"); + } + tex_local_control(1); + } + // if (type(cur_list.tail) == glue_node && subtype(cur_list.tail) == par_skip_glue) { + // /* ignore */ + // } else { + tex_tail_append(tex_new_param_glue_node(par_skip_code, par_skip_glue)); + // } + } + lmt_begin_paragraph_callback(isvmode, &indented, context); + /*tex We'd better not messed up things in the callback! */ + cur_list.prev_graf = 0; + tex_push_nest(); + cur_list.mode = hmode; + cur_list.space_factor = default_space_factor; + /*tex Add local paragraph node */ + tex_tail_append(tex_new_par_node(vmode_par_par_subtype)); + // if (end_of_par_par) { + // update_tex_end_of_par(null); /* option */ + // } + q = cur_list.tail; + /*tex We will move this to after the dir nodes have been dealt with. */ + tex_aux_insert_parindent(indented); + /*tex Dir nodes end up before the indent box. */ + { + halfword dir_rover = lmt_dir_state.text_dir_ptr; + while (dir_rover) { + if ((node_next(dir_rover)) || (dir_direction(dir_rover) != par_direction_par)) { + halfword dir_graf_tmp = tex_new_dir(normal_dir_subtype, dir_direction(dir_rover)); + tex_try_couple_nodes(dir_graf_tmp, node_next(q)); + tex_couple_nodes(q, dir_graf_tmp); + } + dir_rover = node_next(dir_rover); + } + } + /*tex We might need to go to the last injected dir and/or indent node. */ + while (node_next(q)) { + q = node_next(q); + } + cur_list.tail = q; + /*tex The |\everypar| tokens are injected after dir nodes have been added. */ + if (every_par_par) { + tex_begin_token_list(every_par_par, every_par_text); + } + if (lmt_nest_state.nest_data.ptr == 1) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(begin_paragraph_page_context, 0); + } + /*tex put |par_skip| glue on current page */ + tex_build_page(); + } +} + +void tex_insert_paragraph_token(void) +{ + if (auto_paragraph_mode_par > 0) { + cur_tok = token_val(end_paragraph_cmd, inserted_end_paragraph_code); + // cur_tok = token_val(end_paragraph_cmd, normal_end_paragraph_code); + // cur_cs = null; + } else { + cur_tok = lmt_token_state.par_token; + } +} + +static void tex_aux_run_head_for_vmode(void) +{ + if (cur_list.mode >= nomode) { + tex_back_input(cur_tok); + /*tex + We could have a callback here but on the other hand, we really need to be in vmode + afterwards! Also, a macro package can just test for the mode at that spot which is + less hassle than making a callback identify what is needed. A return value would + indicate to not inject a par when we're in vmode and only very dirty \LUA\ code can + change modes here by messing with the list so far. So, unless I find a real use case + we just continue. + */ + tex_insert_paragraph_token(); + tex_back_input(cur_tok); + lmt_input_state.cur_input.token_type = inserted_text; + } else if (cur_cmd != hrule_cmd) { + tex_off_save(); + } else { + tex_handle_error( + normal_error_type, + "You can't use '\\hrule' here except with leaders", + "To put a horizontal rule in an hbox or an alignment, you should use \\leaders or\n" + "\\hrulefill (see The TeXbook)." + ); + } +} + +/*tex + + We don't have |hkern_cmd| and |vkern_cmd| and it makes no sense to introduce them now so instead + of handling modes in the big switch we do it here. Because we need to be compatible we would end + up with three |cmd| codes anyway. The rationale for |\hkern| and |\vkern| is consistency of + primitives, while |\nonzerowidthkern| can make node lists smaller which is nice for \LUA\ based + juggling. + +*/ + +/* +static void tex_aux_run_kern(void) +{ + halfword val = tex_scan_dimen(0, 0, 0, 0, NULL); + tex_tail_append(tex_new_kern_node(val, explicit_kern)); +} +*/ + +static void tex_aux_run_kern(void) +{ + halfword code = cur_chr; + halfword val = tex_scan_dimen(0, 0, 0, 0, NULL); + switch (code) { + case normal_kern_code: + break; + case h_kern_code: + if (cur_mode == mmode) { + break; + } else { + cur_tok = token_val(kern_cmd, normal_kern_code); + tex_aux_run_new_paragraph(); + return; + } + break; + case v_kern_code: + if (cur_mode == mmode) { + break; + } else { + cur_tok = token_val(kern_cmd, normal_kern_code); + tex_aux_run_head_for_vmode(); + return; + } + case non_zero_width_kern_code: + if (val) { + break; + } else { + return; + } + } + tex_tail_append(tex_new_kern_node(val, explicit_kern_subtype)); +} + +static void tex_aux_run_mkern(void) +{ + halfword val = tex_scan_dimen(1, 0, 0, 0, NULL); + tex_tail_append(tex_new_kern_node(val, explicit_math_kern_subtype)); +} + +/*tex + + |cur_list.dirs| would have been set by |line_break| by means of |post_line_break|, but this is + not done right now, as it introduces pretty heavy memory leaks. This means the current code + might be wrong in some way that relates to in-paragraph displays. + +*/ + +static int tex_aux_only_dirs(halfword n) +{ + while (n) { + switch (node_type(n)) { + case par_node: + case dir_node: + n = node_next(n); + break; + /*tex + This can become an option if realy needed but it kind of violates the enforced + hmode, so we stay compatible. But contrary to \LUATEX\ a |\noindent| is seen as + content trigger. + */ + case glue_node: + if (tex_is_par_init_glue(n)) { + n = node_next(n); + break; + } + default: + return 0; + } + } + return 1; +} + +void tex_end_paragraph(int group, int context) +{ + if (cur_list.mode == hmode) { + if (cur_list.head == cur_list.tail) { + /*tex |null| paragraphs are ignored, all contain a |par| node */ + tex_pop_nest(); + } else if (tex_aux_only_dirs(node_next(cur_list.head))) { + tex_flush_node(node_next(cur_list.head)); + /* cur_list.tail = cur_list.head; */ /* probably needed */ + tex_pop_nest(); + // if (cur_list.head == cur_list.tail || node_next(cur_list.head) == cur_list.tail) { + // if (node_next(cur_list.head) == cur_list.tail) { + // tex_flush_node(node_next(cur_list.head)); + // // cur_list.tail = cur_list.head; + // } + // tex_pop_nest(); + } else { + tex_line_break(0, group); + } + if (cur_list.direction_stack) { + tex_flush_node_list(cur_list.direction_stack); + cur_list.direction_stack = null; + } + tex_normal_paragraph(context); + lmt_error_state.error_count = 0; + } +} + +static void tex_aux_run_penalty(void) +{ + halfword value = tex_scan_int(0, NULL); + tex_tail_append(tex_new_penalty_node(value, user_penalty_subtype)); + if (cur_list.mode == vmode) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(penalty_page_context, 0); + } + tex_build_page(); + } +} + +/*tex + + When |delete_last| is called, |cur_chr| is the |type| of node that will be deleted, if present. + The |remove_item| command removes a penalty, kern, or glue node if it appears at the tail of + the current list, using a brute-force linear scan. Like |\lastbox|, this command is not allowed + in vertical mode (except internal vertical mode), since the current list in vertical mode is + sent to the page builder. But if we happen to be able to implement it in vertical mode, we do. + +*/ + +static void tex_aux_run_remove_item(void) +{ + halfword code = cur_chr; + halfword head = cur_list.head; + halfword tail = cur_list.tail; + if (cur_list.mode == vmode && tail == head) { + /*tex + Apologize for inability to do the operation now, unless |\unskip| + follows non-glue. It's a bit weird test. + */ + if ((code != skip_item_code) || (lmt_page_builder_state.last_glue != max_halfword)) { + switch (code) { + case kern_item_code: + tex_you_cant_error( + "Sorry...I usually can't take things from the current page.\n" + "Try '\\kern-\\lastkern' instead." + ); + break; + case penalty_item_code: + case boundary_item_code: + tex_you_cant_error( + "Sorry...I usually can't take things from the current page.\n" + "Perhaps you can make the output routine do it." + ); + break; + case skip_item_code: + tex_you_cant_error( + "Sorry...I usually can't take things from the current page.\n" + "Try '\\vskip-\\lastskip' instead." + ); + break; + } + } +// } else if (node_type(tail) != glyph_node) { +// /*tex +// Officially we don't need to check what we remove because it can be only one of +// three, unless one creates one indendently (in \LUA). So, we just do check and +// silently ignore bad code. +// */ +// halfword p; +// switch (code) { +// case kern_item_code : if (node_type(tail) != kern_node ) { return; } else { break; } +// case penalty_item_code : if (node_type(tail) != penalty_node) { return; } else { break; } +// case skip_item_code : if (node_type(tail) != glue_node ) { return; } else { break; } +// } +// /*tex +// There is some magic testing here that makes sure we don't mess up any discretionary +// nodes. But why do we care? +// */ +// do { +// p = head; +// if (p == tail && node_type(head) == disc_node) { +// return; +// } else { +// head = node_next(p); +// } +// } while (head != tail); +// node_next(p) = null; +// tex_flush_node_list(tail); +// cur_list.tail = p; +// } + } else { + /*tex + Officially we don't need to check what we remove because it can be only one of + three, unless one creates one indendently (in \LUA). So, we just do check and + silently ignore bad code. + */ + switch (node_type(tail)) { + case kern_node : + if (code == kern_item_code) { + break; + } else { + return; + } + case penalty_node : + if (code == penalty_item_code) { + break; + } else { + return; + } + case glue_node : + if (code == skip_item_code) { + break; + } else { + return; + } + case boundary_node : + if (node_subtype(tail) == user_boundary && code == boundary_item_code) { + break; + } else { + return; + } + default: + return; + } + { + /*tex + There is some magic testing here that makes sure we don't mess up any discretionary + nodes. But why do we care? + */ + halfword p; + do { + p = head; + if (p == tail && node_type(head) == disc_node) { + return; + } else { + head = node_next(p); + } + } while (head != tail); + node_next(p) = null; + tex_flush_node_list(tail); + cur_list.tail = p; + } + } + +} + +/*tex + + Italic corrections are converted to kern nodes when the |italic_correction| command follows a + character. In math mode the same effect is achieved by appending a kern of zero here, since + italic corrections are supplied later. + +*/ + +static void tex_aux_run_text_italic_correction(void) +{ + halfword tail = cur_list.tail; + if (tail != cur_list.head && node_type(tail) == glyph_node) { + // tex_tail_append(tex_new_kern_node(tex_char_italic_from_font(glyph_font(tail), glyph_character(tail)), italic_kern)); + tex_tail_append(tex_new_kern_node(tex_char_italic_from_glyph(tail), italic_kern_subtype)); /* scaled */ + } +} + +/*tex + + The positioning of accents is straightforward but tedious. Given an accent of width |a|, + designed for characters of height |x| and slant |s|; and given a character of width |w|, + height |h|, and slant |t|: We will shift the accent down by |x - h|, and we will insert kern + nodes that have the effect of centering the accent over the character and shifting the accent + to the right by $\delta = {1 \over 2} (w-a) + h \cdot t - x \cdot s$. If either character is + absent from the font, we will simply use the other, without shifting. + + While much is delegated to builders this is one of the few places where the action happens + directly. Of course, in a \UNICODE\ engine this command is not really relevant but here we + even extended it with optional offsets! + +*/ + +static void tex_aux_run_text_accent(void) +{ + halfword fnt = cur_font_par; + halfword accent = null; + halfword base = null; + scaled xoffset = 0; + scaled yoffset = 0; + while (1) { + switch (tex_scan_character("xyXY", 0, 1, 0)) { + case 'x': case 'X': + if (tex_scan_mandate_keyword("xoffset", 1)) { + xoffset = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'y': case 'Y': + if (tex_scan_mandate_keyword("yoffset", 1)) { + yoffset = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + default: + goto DONE; + } + } + DONE: + accent = tex_new_char_node(glyph_unset_subtype, fnt, tex_scan_char_number(0), 1); + if (accent) { + /*tex + Create a character node |q| for the next character, but set |q := null| if problems + arise. + */ + scaled x = tex_get_scaled_ex_height(fnt); + double s = (double) (tex_get_font_slant(fnt)) / (double) (65536); + scaled a = tex_glyph_width(accent); + /*tex + Here we had |handle_assignments| which is a bit confusing one so we inlined it, probably + at the cost of some error recovery compatibility, which we don't worry too much about. + It looks like skipping spaces and relax is okay. The (original \TEX\ idea is that one + can change a font in between which is why the |fnt| variable gets set again. Because in + practice switching a font can involve more than assignments wd could be more tolerant + and often wrapping in |\localcontrolled| is more robust then. + */ + /* handle_assignments(); */ + fnt = cur_font_par; + PICKUP: + switch (cur_cmd) { + case spacer_cmd: + case relax_cmd: + tex_get_x_token(); + goto PICKUP; + case letter_cmd: + case other_char_cmd: + case char_given_cmd: + base = tex_new_glyph_node(glyph_unset_subtype, fnt, cur_chr, accent); + break; + case char_number_cmd: + /* We don't accept keywords for |\glyph|. */ + base = tex_new_glyph_node(glyph_unset_subtype, fnt, tex_scan_char_number(0), accent); + break; + default: + /* compatibility hack, not that useful nowadays */ + if (cur_cmd <= max_non_prefixed_cmd) { + tex_back_input(cur_tok); + break; + } else { + lmt_error_state.set_box_allowed = 0; + tex_run_prefixed_command(); + lmt_error_state.set_box_allowed = 0; + goto PICKUP; + } + } + if (base) { + /*tex + Append the accent with appropriate kerns, then set |p := q|. The kern nodes + appended here must be distinguished from other kerns, lest they be wiped away by + the hyphenation algorithm or by a previous line break. The two kerns are computed + with (machine dependent) |real| arithmetic, but their sum is machine independent; + the net effect is machine independent, because the user cannot remove these nodes + nor access them via |\lastkern|. + + This goes away: not listening to scaled yet. + + */ + double t = (double) (tex_get_font_slant(fnt)) / (double) (65536); /* amount of slant */ + scaled w = tex_glyph_width(base); + scaled h = tex_glyph_height(base); + scaled delta = glueround((double) (w - a) / (double) (2) + h * t - x * s); + halfword left = tex_new_kern_node(delta, accent_kern_subtype); + halfword right = tex_new_kern_node(- a - delta, accent_kern_subtype); + glyph_x_offset(accent) = xoffset; + glyph_y_offset(accent) = yoffset; + if (h != x) { + /*tex the accent must be shifted up or down */ + // accent = hpack(accent, 0, packing_additional, direction_unknown); + // box_shift_amount(accent) = x - h; + glyph_y_offset(accent) += x - h; + } + tex_couple_nodes(cur_list.tail, left); + tex_couple_nodes(left, accent); + tex_couple_nodes(accent, right); + tex_couple_nodes(right,base); + cur_list.tail = base; + } else { + tex_couple_nodes(cur_list.tail, accent); + cur_list.tail = accent; + } + cur_list.space_factor = default_space_factor; + } +} + +/*tex Finally, |\endcsname| is not supposed to get through to |main_control|. */ + +static void tex_aux_run_cs_error(void) +{ + tex_handle_error( + normal_error_type, + "Extra \\endcsname", + "I'm ignoring this, since I wasn't doing a \\csname." + ); +} + +/*tex + + Assignments to values in |eqtb| can be global or local. Furthermore, a control sequence can + be defined to be |\long|, |\protected|, or |\outer|, and it might or might not be expanded. + The prefixes |\global|, |\long|, |\protected|, and |\outer| can occur in any order. Therefore + we assign binary numeric codes, making it possible to accumulate the union of all specified + prefixes by adding the corresponding codes. (\PASCAL's |set| operations could also have been + used.) + + Every prefix, and every command code that might or might not be prefixed, calls the action + procedure |prefixed_command|. This routine accumulates a sequence of prefixes until coming to + a non-prefix, then it carries out the command. + +*/ + +void tex_inject_text_or_line_dir(int val, int check_glue) +{ + if (cur_mode == hmode && internal_dir_state_par > 0) { + /*tex |tail| is non zero but we test anyway. */ + halfword dirn = tex_new_dir(cancel_dir_subtype, text_direction_par); + halfword tail = cur_list.tail; + if (check_glue && tail && node_type(tail) == glue_node) { + halfword prev = node_prev(tail); + tex_couple_nodes(prev, dirn); + tex_couple_nodes(dirn, tail); + } else { + tex_tail_append(dirn); + } + } + tex_push_text_dir_ptr(val); + if (cur_mode == hmode) { + halfword dir = tex_new_dir(normal_dir_subtype, val); + dir_level(dir) = cur_level; + tex_tail_append(dir); + } +} + +static void tex_aux_show_frozen_error(halfword cs) +{ + if (cs) { + tex_handle_error( + normal_error_type, + "You can't redefine the frozen macro %S.", cs, + NULL + ); + } else { + tex_handle_error( + normal_error_type, + "You can't redefine a frozen macro.", + NULL + ); + + } +} + +/*tex + + We use the fact that |register| $<$ |advance| $<$ |multiply| $<$ |divide| We compute the + register location |l| and its type |p| but |return| if invalid. Here we use the fact that + the consecutive codes |int_val .. mu_val| and |assign_int .. assign_mu_glue| correspond + to each other nicely. + +*/ + +inline static halfword tex_aux_get_register_index(int level) +{ + switch (level) { + case int_val_level: + { + halfword index = tex_scan_int_register_number(); + return register_int_location(index); + } + case dimen_val_level: + { + halfword index = tex_scan_dimen_register_number(); + return register_dimen_location(index); + } + case attr_val_level: + { + halfword index = tex_scan_attribute_register_number(); + return register_attribute_location(index); + } + case glue_val_level: + { + halfword index = tex_scan_glue_register_number(); + return register_glue_location(index); + } + case mu_val_level: + { + halfword index = tex_scan_mu_glue_register_number(); + return register_mu_glue_location(index); + } + case tok_val_level: + { + halfword index = tex_scan_toks_register_number(); + return register_toks_location(index); + } + default: + return 0; + } +} + +inline static halfword tex_aux_get_register_value(int level, int optionalequal) +{ + switch (level) { + case int_val_level: + case attr_val_level: + return tex_scan_int(optionalequal, NULL); + case dimen_val_level: + return tex_scan_dimen(0, 0, 0, optionalequal, NULL); + default: + return tex_scan_glue(level, optionalequal); + } +} + +static int tex_aux_valid_arithmic(int cmd, int *index, int *level, int *varcmd) +{ + /*tex So: |\multiply|, |\divide| or |\advance|. */ + tex_get_x_token(); + *varcmd = cur_cmd; + switch (cur_cmd) { + case register_int_cmd: + case internal_int_cmd: + *index = cur_chr; + *level = int_val_level; + return 1; + case register_attribute_cmd: + case internal_attribute_cmd: + *index = cur_chr; + *level = attr_val_level; + return 1; + case register_dimen_cmd: + case internal_dimen_cmd: + *index = cur_chr; + *level = dimen_val_level; + return 1; + case register_glue_cmd: + case internal_glue_cmd: + *index = cur_chr; + *level = glue_val_level; + return 1; + case register_mu_glue_cmd: + case internal_mu_glue_cmd: + *index = cur_chr; + *level = mu_val_level; + return 1; + case register_cmd: + *level = cur_chr; + *index = tex_aux_get_register_index(*level); + return 1; + default: + tex_handle_error( + normal_error_type, + "You can't use '%C' after %C", + cur_cmd, cur_chr, cmd, 0, + "I'm forgetting what you said and not changing anything." + ); + return 0; + } +} + +static void tex_aux_arithmic_overflow_error(int level, halfword value) +{ + if (level >= glue_val_level) { + tex_flush_node(value); + } + tex_handle_error( + normal_error_type, + "Arithmetic overflow", + "I can't carry out that multiplication or division, since the result is out of\n" + "range." + ); +} + +inline static void tex_aux_update_register(int a, int level, halfword index, halfword value, halfword cmd) +{ + switch (level) { + case int_val_level: + tex_word_define(a, index, value); + if (is_frozen(a) && cmd == internal_int_cmd && cur_mode == hmode) { + tex_update_par_par(internal_int_cmd, index - lmt_primitive_state.prim_data[cmd].offset); + } + break; + case attr_val_level: + if ((register_attribute_number(index)) > lmt_node_memory_state.max_used_attribute) { + lmt_node_memory_state.max_used_attribute = register_attribute_number(index); + } + change_attribute_register(a, index, value); + tex_word_define(a, index, value); + break; + case dimen_val_level: + tex_word_define(a, index, value); + if (is_frozen(a) && cmd == internal_dimen_cmd && cur_mode == hmode) { + tex_update_par_par(internal_dimen_cmd, index - lmt_primitive_state.prim_data[cmd].offset); + } + break; + case glue_val_level: +// tex_define(a, index, register_glue_reference_cmd, value); + tex_define(a, index, cmd == internal_glue_cmd ? internal_glue_reference_cmd : register_glue_reference_cmd, value); + if (is_frozen(a) && cmd == internal_glue_cmd && cur_mode == hmode) { + tex_update_par_par(internal_glue_cmd, index - lmt_primitive_state.prim_data[cmd].offset); + } + break; + case mu_val_level: +// tex_define(a, index, register_glue_reference_cmd, value); + tex_define(a, index, cmd == internal_glue_cmd ? internal_mu_glue_reference_cmd : register_mu_glue_reference_cmd, value); + break; + default: + /* can't happen */ + tex_word_define(a, index, value); + break; + } +} + +static void tex_aux_set_register(int a) +{ + halfword level = cur_chr; + halfword varcmd = cur_cmd; + halfword index = tex_aux_get_register_index(level); + halfword value = tex_aux_get_register_value(level, 1); + tex_aux_update_register(a, level, index, value, varcmd); +} + +static void tex_aux_arithmic_register(int a, int code) +{ + halfword cmd = cur_cmd; + halfword level = cur_chr; + halfword index = 0; + halfword varcmd = 0; + if (tex_aux_valid_arithmic(cmd, &index, &level, &varcmd)) { + halfword value = null; + tex_scan_optional_keyword("by"); + lmt_scanner_state.arithmic_error = 0; + switch (code) { + case advance_code: + { + value = tex_aux_get_register_value(level, 0); + switch (level) { + case int_val_level: + case attr_val_level: + case dimen_val_level: + value += eq_value(index); + break; + case glue_val_level: + case mu_val_level: + { + /* Compute the sum of two glue specs */ + halfword oldvalue = eq_value(index); + halfword newvalue = tex_new_glue_spec_node(value); + tex_flush_node(value); + glue_amount(newvalue) += glue_amount(oldvalue); + if (glue_stretch(newvalue) == 0) { + glue_stretch_order(newvalue) = normal_glue_order; + } + if (glue_stretch_order(newvalue) == glue_stretch_order(oldvalue)) { + glue_stretch(newvalue) += glue_stretch(oldvalue); + } else if ((glue_stretch_order(newvalue) < glue_stretch_order(oldvalue)) && (glue_stretch(oldvalue))) { + glue_stretch(newvalue) = glue_stretch(oldvalue); + glue_stretch_order(newvalue) = glue_stretch_order(oldvalue); + } + if (glue_shrink(newvalue) == 0) { + glue_shrink_order(newvalue) = normal_glue_order; + } + if (glue_shrink_order(newvalue) == glue_shrink_order(oldvalue)) { + glue_shrink(newvalue) += glue_shrink(oldvalue); + } else if ((glue_shrink_order(newvalue) < glue_shrink_order(oldvalue)) && (glue_shrink(oldvalue))) { + glue_shrink(newvalue) = glue_shrink(oldvalue); + glue_shrink_order(newvalue) = glue_shrink_order(oldvalue); + } + value = newvalue; + break; + } + default: + /* error */ + break; + } + /*tex There is no overflow detection for addition, just wraparound. */ + tex_aux_update_register(a, level, index, value, varcmd); + break; + } + case multiply_code: + { + halfword amount = tex_scan_int(0, NULL); + switch (level) { + case int_val_level: + case attr_val_level: + value = tex_multiply_integers(eq_value(index), amount); + break; + case dimen_val_level: + value = tex_nx_plus_y(eq_value(index), amount, 0); + break; + case glue_val_level: + case mu_val_level: + { + halfword s = eq_value(index); + halfword r = tex_new_glue_spec_node(s); + glue_amount(r) = tex_nx_plus_y(glue_amount(s), amount, 0); + glue_stretch(r) = tex_nx_plus_y(glue_stretch(s), amount, 0); + glue_shrink(r) = tex_nx_plus_y(glue_shrink(s), amount, 0); + value = r; + break; + } + default: + /* error */ + break; + } + if (lmt_scanner_state.arithmic_error) { + tex_aux_arithmic_overflow_error(level, value); + } else { + tex_aux_update_register(a, level, index, value, varcmd); + } + break; + } + case divide_code: + { + halfword amount = tex_scan_int(0, NULL); + switch (level) { + case int_val_level: + case attr_val_level: + case dimen_val_level: + value = tex_x_over_n(eq_value(index), amount); + break; + case glue_val_level: + case mu_val_level: + { + halfword s = eq_value(index); + halfword r = tex_new_glue_spec_node(s); + glue_amount(r) = tex_x_over_n(glue_amount(s), amount); + glue_stretch(r) = tex_x_over_n(glue_stretch(s), amount); + glue_shrink(r) = tex_x_over_n(glue_shrink(s), amount); + value = r; + break; + } + default: + /* error */ + break; + } + if (lmt_scanner_state.arithmic_error) { + tex_aux_arithmic_overflow_error(level, value); + } else { + tex_aux_update_register(a, level, index, value, varcmd); + } + break; + } + } + } +} + +/*tex + The value of |c| is 0 for |\deadcycles|, 1 for |\insertpenalties|, etc. In traditional \TEX\ + the interaction mode is set by primitives so no checking is needed. However, in \ETEX\ the + value can be set. As a consequence there is an error message for wrong values but here we + just clip the values. After all, we can also set values from \LUA\ so either we bark or we + just recover. So, gone is: + + \starttyping + handle_error_int( + normal_error_type, + "Bad interaction mode (", val, ")", + "Modes are 0=batch, 1=nonstop, 2=scroll, and 3=errorstop. Proceed, and I'll ignore\n" + "this case." + ); + \stoptyping + + I could have decided to ignore bad values but clipping is probably better. + +*/ + +inline static void tex_aux_set_interaction(halfword mode) +{ + tex_print_ln(); + if (mode < batch_mode) { + lmt_error_state.interaction = batch_mode; + } else if (mode > error_stop_mode) { + lmt_error_state.interaction = error_stop_mode; + } else { + lmt_error_state.interaction = mode; + } + tex_fixup_selector(lmt_fileio_state.log_opened); +} + +static void tex_aux_set_page_property(void) +{ + switch (cur_chr) { + case page_goal_code: + lmt_page_builder_state.goal = tex_scan_dimen(0, 0, 0, 1, NULL); + break; + case page_vsize_code: + lmt_page_builder_state.vsize = tex_scan_dimen(0, 0, 0, 1, NULL); + break; + case page_total_code: + lmt_page_builder_state.total = tex_scan_dimen(0, 0, 0, 1, NULL); + break; + case page_depth_code: + lmt_page_builder_state.depth = tex_scan_dimen(0, 0, 0, 1, NULL); + break; + case dead_cycles_code: + lmt_page_builder_state.dead_cycles = tex_scan_int(1, NULL); + break; + case insert_penalties_code: + lmt_page_builder_state.insert_penalties = tex_scan_int(1, NULL); + break; + case insert_heights_code: + lmt_page_builder_state.insert_heights = tex_scan_dimen(0, 0, 0, 1, NULL); + break; + case insert_storing_code: + lmt_insert_state.storing = tex_scan_int(1, NULL); + break; + case insert_distance_code: + { + /*tex + We need to scan the index first because when we do that in the call we somehow + get an out-of-order issue (index too large). The same is true for teh rest. + */ + int index = tex_scan_int(0, NULL); + tex_set_insert_distance(index, tex_scan_glue(glue_val_level, 1)); + } + break; + case insert_multiplier_code: + { + int index = tex_scan_int(0, NULL); + tex_set_insert_multiplier(index, tex_scan_int(1, NULL)); + } + break; + case insert_limit_code: + { + int index = tex_scan_int(0, NULL); + tex_set_insert_limit(index, tex_scan_dimen(0, 0, 0, 1, NULL)); + } + break; + case insert_storage_code: + { + int index = tex_scan_int(0, NULL); + tex_set_insert_storage(index, tex_scan_int(1, NULL)); + } + break; + case insert_penalty_code: + { + int index = tex_scan_int(0, NULL); + tex_set_insert_penalty(index, tex_scan_int(1, NULL)); + } + break; + case insert_maxdepth_code: + { + int index = tex_scan_int(0, NULL); + tex_set_insert_maxdepth(index, tex_scan_dimen(0, 0, 0, 1, NULL)); + } + break; + case insert_height_code: + { + int index = tex_scan_int(0, NULL); + tex_set_insert_height(index, tex_scan_dimen(0, 0, 0, 1, NULL)); + } + break; + case insert_depth_code: + { + int index = tex_scan_int(0, NULL); + tex_set_insert_depth(index, tex_scan_dimen(0, 0, 0, 1, NULL)); + } + break; + case insert_width_code: + { + int index = tex_scan_int(0, NULL); + tex_set_insert_width(index, tex_scan_dimen(0, 0, 0, 1, NULL)); + } + break; + default: + lmt_page_builder_state.page_so_far[page_state_offset(cur_chr)] = tex_scan_dimen(0, 0, 0, 1, NULL); + break; + } +} + +/*tex + The |space_factor| or |prev_depth| settings are changed when a |set_aux| command is sensed. + Similarly, |prev_graf| is changed in the presence of |set_prev_graf|, and |dead_cycles| or + |insert_penalties| in the presence of |set_page_int|. These definitions are always global. +*/ + +static void tex_aux_set_auxiliary(int a) +{ + (void) a; + switch (cur_chr) { + case space_factor_code: + if (cur_mode == hmode) { + halfword v = tex_scan_int(1, NULL); + if ((v <= min_space_factor) || (v > max_space_factor)) { + tex_handle_error( + normal_error_type, + "Bad space factor (%i). I allow only values in the range %i..%i here.", + v, min_space_factor + 1, max_space_factor, + NULL + ); + } else { + cur_list.space_factor = v; + } + } else { + tex_aux_run_illegal_case(); + } + break; + case prev_depth_code: + if (cur_mode == vmode) { + cur_list.prev_depth = tex_scan_dimen(0, 0, 0, 1, NULL); + } else { + tex_aux_run_illegal_case(); + } + break; + case prev_graf_code: + { + halfword v = tex_scan_int(1, NULL); + if (v >= 0) { + lmt_nest_state.nest[tex_vmode_nest_index()].prev_graf = v; + } else { + tex_handle_error( + normal_error_type, + "Bad \\prevgraf (%i)", + v, + "I allow only nonnegative values here." + ); + } + break; + } + case interaction_mode_code: + { + tex_aux_set_interaction(tex_scan_int(1, NULL)); + break; + } + case insert_mode_code: + { + tex_set_insert_mode(tex_scan_int(1, NULL)); + break; + } + } +} + +/*tex + When some dimension of a box register is changed, the change isn't exactly global; but \TEX\ + does not look at the |\global| switch. +*/ + +static void tex_aux_set_box_property(void) +{ + halfword code = cur_chr; + halfword n = tex_scan_box_register_number(); + halfword b = box_register(n); + switch (code) { + case box_width_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_width(b) = v; + } + break; + } + case box_height_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_height(b) = v; + } + break; + } + case box_depth_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_depth(b) = v; + } + break; + } + case box_direction_code: + { + halfword v = tex_scan_direction(1); + if (b) { + tex_set_box_direction(b, v); + } + break; + } + case box_geometry_code: + { + halfword v = tex_scan_geometry(1); + if (b) { + box_geometry(b) = (singleword) v; + } + break; + } + case box_orientation_code: + { + halfword v = tex_scan_orientation(1); + if (b) { + box_orientation(b) = v; + tex_set_box_geometry(b, orientation_geometry); + } + break; + } + case box_anchor_code: + case box_anchors_code: + { + halfword v = code == box_anchor_code ? tex_scan_anchor(1) : tex_scan_anchors(1); + if (b) { + box_anchor(b) = v; + tex_set_box_geometry(b, anchor_geometry); + } + break; + } + case box_source_code: + { + halfword v = tex_scan_int(1, NULL); + if (b) { + box_source_anchor(b) = v; + tex_set_box_geometry(b, anchor_geometry); + } + break; + } + case box_target_code: + { + halfword v = tex_scan_int(1, NULL); + if (b) { + box_target_anchor(b) = v; + tex_set_box_geometry(b, anchor_geometry); + } + break; + } + case box_xoffset_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_x_offset(b) = v; + tex_set_box_geometry(b, offset_geometry); + } + break; + } + case box_yoffset_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_y_offset(b) = v; + tex_set_box_geometry(b, offset_geometry); + } + break; + } + case box_xmove_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_x_offset(b) = tex_aux_checked_dimen1(box_x_offset(b) + v); + box_width(b) = tex_aux_checked_dimen2(box_width(b) + v); + tex_set_box_geometry(b, offset_geometry); + } + break; + } + case box_ymove_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_y_offset(b) = tex_aux_checked_dimen1(box_y_offset(b) + v); + box_height(b) = tex_aux_checked_dimen2(box_height(b) + v); + box_depth(b) = tex_aux_checked_dimen2(box_depth(b) - v); + tex_set_box_geometry(b, offset_geometry); + } + break; + } + case box_total_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_height(b) = v / 2; + box_depth(b) = v - (v / 2); + } + } + break; + case box_shift_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + box_shift_amount(b) = v; + } + } + break; + case box_adapt_code: + { + scaled v = tex_scan_limited_scale(1); + if (b) { + tex_repack(b, v, packing_adapted); + } + } + break; + case box_repack_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + if (b) { + tex_repack(b, v, packing_additional); + } + } + break; + case box_freeze_code: + { + scaled v = tex_scan_int(1, NULL); + if (b) { + tex_freeze(b, v); + } + } + break; + case box_attribute_code: + { + halfword att = tex_scan_box_register_number(); + halfword val = tex_scan_int(1, NULL); + if (b) { + if (val == unused_attribute_value) { + tex_unset_attribute(b, att, val); + } else { + tex_set_attribute(b, att, val); + } + } + } + break; + default: + break; + } +} + +/*tex + The processing of boxes is somewhat different, because we may need to scan and create an entire + box before we actually change the value of the old one. +*/ + +static void tex_aux_set_box(int a) +{ + halfword n = tex_scan_box_register_number() + (is_global(a) ? global_box_flag : box_flag); + if (lmt_error_state.set_box_allowed) { + tex_aux_scan_box(n, 1, null_flag); + } else { + tex_handle_error( + normal_error_type, + "Improper \\setbox", + "Sorry, \\setbox is not allowed after \\halign in a display, between \\accent and\n" + "an accented character, or in immediate assignments." + ); + } +} + +/*tex + We temporarily define |p| to be |relax|, so that an occurrence of |p| while scanning the + definition will simply stop the scanning instead of producing an \quote {undefined control + sequence} error or expanding the previous meaning. This allows, for instance, |\chardef + \foo = 123\foo|. +*/ + +static void tex_aux_set_shorthand_def(int a, int force) +{ + halfword code = cur_chr; + tex_get_r_token(); + if (force || tex_define_permitted(cur_cs, a)) { + halfword p = cur_cs; + tex_define(a, p, relax_cmd, relax_code); + tex_scan_optional_equals(); + switch (code) { + case char_def_code: + { + halfword chr = tex_scan_char_number(0); /* maybe 1 */ + tex_define(a, p, char_given_cmd, chr); + break; + } + case math_char_def_code: + { + mathcodeval mval = tex_scan_mathchar(tex_mathcode); + tex_define(a, p, mathspec_cmd, tex_new_math_spec(mval, tex_mathcode)); + // tex_define(a, p, math_char_given_cmd, math_old_packed_character(mval.class_value,mval.family_value,mval.character_value)); + break; + } + case math_dchar_def_code: + { + mathdictval dval = tex_scan_mathdict(); + mathcodeval mval = tex_scan_mathchar(umath_mathcode); + tex_define(a, p, mathspec_cmd, tex_new_math_dict_spec(dval, mval, umath_mathcode)); + // tex_define(a, p, math_char_xgiven_cmd, math_packed_character(mval.class_value,mval.family_value,mval.character_value)); + break; + } + case math_xchar_def_code: + { + mathcodeval mval = tex_scan_mathchar(umath_mathcode); + tex_define(a, p, mathspec_cmd, tex_new_math_spec(mval, umath_mathcode)); + // tex_define(a, p, math_char_xgiven_cmd, math_packed_character(mval.class_value,mval.family_value,mval.character_value)); + break; + } + /* + case math_uchar_def_code: + { + mathcodeval mval = tex_scan_mathchar(umathnum_mathcode); + tex_define(a, p, mathspec_cmd, tex_new_math_spec(mval, umathnum_mathcode)); + // tex_define(a, p, math_char_xgiven_cmd, math_packed_character(mval.class_value,mval.family_value,mval.character_value)); + break; + } + */ + case count_def_code: + { + halfword n = tex_scan_int_register_number(); + tex_define(a, p, register_int_cmd, register_int_location(n)); + break; + } + case attribute_def_code: + { + halfword n = tex_scan_attribute_register_number(); + tex_define(a, p, register_attribute_cmd, register_attribute_location(n)); + break; + } + case dimen_def_code: + { + scaled n = tex_scan_dimen_register_number(); + tex_define(a, p, register_dimen_cmd, register_dimen_location(n)); + break; + } + case skip_def_code: + { + halfword n = tex_scan_glue_register_number(); + tex_define(a, p, register_glue_cmd, register_glue_location(n)); + break; + } + case mu_skip_def_code: + { + halfword n = tex_scan_mu_glue_register_number(); + tex_define(a, p, register_mu_glue_cmd, register_mu_glue_location(n)); + break; + } + case toks_def_code: + { + halfword n = tex_scan_toks_register_number(); + tex_define(a, p, register_toks_cmd, register_toks_location(n)); + break; + } + case lua_def_code: + { + halfword v = tex_scan_function_reference(1); + tex_define(a, p, is_protected(a) ? lua_protected_call_cmd : lua_call_cmd, v); + } + break; + case integer_def_code: + { + halfword v = tex_scan_int(1, NULL); + tex_define(a, p, integer_cmd, v); + } + break; + case dimension_def_code: + { + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + tex_define(a, p, dimension_cmd, v); + } + break; + case gluespec_def_code: + { + halfword v = tex_scan_glue(glue_val_level, 1); + tex_define(a, p, gluespec_cmd, v); + } + break; + case mugluespec_def_code: + { + halfword v = tex_scan_glue(mu_val_level, 1); + tex_define(a, p, mugluespec_cmd, v); + } + break; + /* + case mathspec_def_code: + { + halfword v = tex_scan_math_spec(1); + tex_define(a, p, mathspec_cmd, v); + } + break; + */ + case fontspec_def_code: + { + halfword v = tex_scan_font(1); + tex_define(a, p, fontspec_cmd, v); + } + break; + /* + case string_def_code: + { + halfword t = scan_toks_expand(0, NULL); + halfword s = tokens_to_string(t); + define(a, p, string_cmd, s - cs_offset_value); + flush_list(t); + break; + } + */ + default: + tex_confusion("shorthand definition"); + break; + } + } +} + +/*tex This deals with the shapes and penalty lists: */ + +static void tex_aux_set_specification(int a) +{ + halfword loc = cur_chr; + quarterword num = (quarterword) internal_specification_number(loc); + halfword p = null; + halfword options = 0; + halfword count = tex_scan_int(1, NULL); + if (tex_scan_keyword("options")) { + options = tex_scan_int(0, NULL); + } + if (count > 0) { + p = tex_new_specification_node(count, num, options); + if (num == par_shape_code) { + for (int j = 1; j <= count; j++) { + tex_set_specification_indent(p, j, tex_scan_dimen(0, 0, 0, 0, NULL)); /*tex indentation */ + tex_set_specification_width(p, j, tex_scan_dimen(0, 0, 0, 0, NULL)); /*tex width */ + } + } else { + for (int j = 1; j <= count; j++) { + tex_set_specification_penalty(p, j, tex_scan_int(0, NULL)); /*tex penalty values */ + } + } + } + tex_define(a, loc, specification_reference_cmd, p); + if (is_frozen(a) && cur_mode == hmode) { + tex_update_par_par(specification_reference_cmd, num); + } +} + +/*tex + All of \TEX's parameters are kept in |eqtb| except the font and language information, including + the hyphenation tables; these are strictly global. +*/ + +static void tex_aux_set_hyph_data(void) +{ + switch (cur_chr) { + case hyphenation_code: + tex_scan_toks_expand(0, NULL, 0); + tex_load_tex_hyphenation(language_par, lmt_input_state.def_ref); /* hm, why not use return value */ + tex_flush_token_list(lmt_input_state.def_ref); + break; + case patterns_code: + tex_scan_toks_expand(0, NULL, 0); + tex_load_tex_patterns(language_par, lmt_input_state.def_ref); /* hm, why not use return value */ + tex_flush_token_list(lmt_input_state.def_ref); + break; + case prehyphenchar_code: + tex_set_pre_hyphen_char(language_par, tex_scan_int(1, NULL)); + break; + case posthyphenchar_code: + tex_set_post_hyphen_char(language_par, tex_scan_int(1, NULL)); + break; + case preexhyphenchar_code: + tex_set_pre_exhyphen_char(language_par, tex_scan_int(1, NULL)); + break; + case postexhyphenchar_code: + tex_set_post_exhyphen_char(language_par, tex_scan_int(1, NULL)); + break; + case hyphenationmin_code: + tex_set_hyphenation_min(language_par, tex_scan_int(1, NULL)); + break; + case hjcode_code: + { + halfword lan = tex_scan_int(0, NULL); + halfword val = tex_scan_int(1, NULL); + tex_set_hj_code(language_par, lan, val, -1); + } + break; + default: + break; + } +} + +/*tex move to font */ + +static void tex_aux_set_font_property(void) +{ + halfword code = cur_chr; + switch (code) { + case font_hyphen_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword val = tex_scan_int(1, NULL); + set_font_hyphen_char(fnt, val); + break; + } + case font_skew_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword val = tex_scan_int(1, NULL); + set_font_skew_char(fnt, val); + break; + } + case font_lp_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_dimen(0, 0, 0, 1, NULL); + tex_set_lpcode_in_font(fnt, chr, val); + break; + } + case font_rp_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_dimen(0, 0, 0, 1, NULL); + tex_set_rpcode_in_font(fnt, chr, val); + break; + } + case font_ef_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_int(1, NULL); + tex_set_efcode_in_font(fnt, chr, val); + break; + } + case font_dimen_code: + { + tex_set_font_dimen(); + break; + } + case scaled_font_dimen_code: + { + tex_set_scaled_font_dimen(); + break; + } + default: + break; + } +} + +/*tex + Here is where the information for a new font gets loaded. We start with fonts. Unfortunately, + they aren't all as simple as this. +*/ + +static void tex_aux_set_font(int a) +{ + tex_set_cur_font(a, cur_chr); +} + +static void tex_aux_set_define_font(int a) +{ + if (! tex_tex_def_font(a)) { + tex_aux_show_frozen_error(cur_cs); + } +} + +/*tex + When a |def| command has been scanned, |cur_chr| is odd if the definition is supposed to be + global, and |cur_chr >= 2| if the definition is supposed to be expanded. Remark: this is + different in \LUAMETATEX. +*/ + +static void tex_aux_set_def(int a, int force) +{ + halfword expand = 0; + switch (cur_chr) { + case expanded_def_code: + expand = 1; + break; + case def_code: + break; + case global_expanded_def_code: + expand = 1; + // fall through + case global_def_code: + a = add_global_flag(a); + break; + case expanded_def_csname_code: + expand = 1; + // fall through + case def_csname_code: + cur_cs = tex_create_csname(); + goto DONE; + case global_expanded_def_csname_code: + expand = 1; + // fall through + case global_def_csname_code: + cur_cs = tex_create_csname(); + a = add_global_flag(a); + goto DONE; + } + tex_get_r_token(); + DONE: + if (global_defs_par > 0) { + a = add_global_flag(a); + } + if (force || tex_define_permitted(cur_cs, a)) { + halfword p = cur_cs; + halfword t = expand ? tex_scan_macro_expand() : tex_scan_macro_normal(); + tex_define(a, p, tex_flags_to_cmd(a), t); + } +} + +static void tex_aux_set_let(int a, int force) +{ + halfword code = cur_chr; + halfword p = null; + halfword q = null; + switch (code) { + case global_let_code: + /*tex |\glet| */ + if (global_defs_par >= 0) { + a = add_global_flag(a); + } + // fall through + case let_code: + /*tex |\let| */ + // LET: + tex_get_r_token(); + LETINDEED: + if (force || tex_define_permitted(cur_cs, a)) { + p = cur_cs; + do { + tex_get_token(); + } while (cur_cmd == spacer_cmd); + if (cur_tok == equal_token) { + tex_get_token(); + if (cur_cmd == spacer_cmd) { + tex_get_token(); + } + } + } + break; + case future_let_code: + case future_def_code: + /*tex |\futurelet| */ + tex_get_r_token(); + /*tex + Checking for a frozen macro here is tricky but not doing it would be kind of weird. + */ + if (force || tex_define_permitted(cur_cs, a)) { + p = cur_cs; + q = tex_get_token(); + tex_back_input(tex_get_token()); + /*tex + We look ahead and then back up. Note that |back_input| doesn't affect |cur_cmd|, + |cur_chr|. + */ + tex_back_input(q); + if (code == future_def_code) { + halfword result = get_reference_token(); + halfword r = result; + r = tex_store_new_token(r, cur_tok); + cur_cmd = tex_flags_to_cmd(a); + cur_chr = result; + } + } + break; + case let_charcode_code: + /*tex |\letcharcode| (todo: protection) */ + { + halfword v = tex_scan_int(0, NULL); + if (v > 0) { + p = tex_active_to_cs(v, 1); + do { + tex_get_token(); + } while (cur_cmd == spacer_cmd); + if (cur_tok == equal_token) { + tex_get_token(); + if (cur_cmd == spacer_cmd) { + tex_get_token(); + } + } + } else { + p = null; + tex_handle_error( + normal_error_type, + "invalid number for \\letcharcode", + NULL + ); + } + break; + } + case swap_cs_values_code: + { + /*tex + There is no real gain in performance but it looks nicer when tracing when we + just swap natively (like no save and restore of a temporary variable and + such). Maybe we should be more restrictive but it's a cheap experiment anyway. + + Flags should match and should not contain permanent, primitive or immutable. + */ + halfword s1, s2; + tex_get_r_token(); + s1 = cur_cs; + tex_get_r_token(); + s2 = cur_cs; + tex_define_swapped(a, s1, s2, force); + return; + } + case let_protected_code: + tex_get_r_token(); + if (force || tex_define_permitted(cur_cs, a)) { + switch (cur_cmd) { + case call_cmd: + case semi_protected_call_cmd: + set_eq_type(cur_cs, protected_call_cmd); + break; + case tolerant_call_cmd: + case tolerant_semi_protected_call_cmd: + set_eq_type(cur_cs, tolerant_protected_call_cmd); + break; + } + } + return; + case unlet_protected_code: + tex_get_r_token(); + if (force || tex_define_permitted(cur_cs, a)) { + switch (cur_cmd) { + case protected_call_cmd: + case semi_protected_call_cmd: + set_eq_type(cur_cs, call_cmd); + break; + case tolerant_call_cmd: + case tolerant_semi_protected_call_cmd: + set_eq_type(cur_cs, tolerant_call_cmd); + break; + } + } + return; + case let_frozen_code: + tex_get_r_token(); + if (is_call_cmd(cur_cmd) && (force || tex_define_permitted(cur_cs, a))) { + set_eq_flag(cur_cs, add_frozen_flag(eq_flag(cur_cs))); + } + return; + case unlet_frozen_code: + tex_get_r_token(); + if (is_call_cmd(cur_cmd) && (force || tex_define_permitted(cur_cs, a))) { + set_eq_flag(cur_cs, remove_frozen_flag(eq_flag(cur_cs))); + } + return; + case global_let_csname_code: + if (global_defs_par >= 0) { + a = add_global_flag(a); + } + // fall through + case let_csname_code: + cur_cs = tex_create_csname(); + goto LETINDEED; + case global_let_to_nothing_code: + a = add_global_flag(a); + // fall through + case let_to_nothing_code: + tex_get_r_token(); + if (global_defs_par > 0) { + a = add_global_flag(a); + } + if (force || tex_define_permitted(cur_cs, a)) { + tex_define(a, cur_cs, tex_flags_to_cmd(a), get_reference_token()); + } + return; + default: + /*tex We please the compiler. */ + p = null; + tex_confusion("let"); + break; + } + if (is_referenced_cmd(cur_cmd)) { + tex_add_token_reference(cur_chr); + } else if (is_nodebased_cmd(cur_cmd)) { + cur_chr = tex_copy_node(cur_chr); + } + // if (p && cur_cmd >= relax_cmd) { + if (p && cur_cmd >= 0) { + singleword oldf = eq_flag(cur_cs); + singleword newf = 0; + singleword cmd = (singleword) cur_cmd; + if (is_aliased(a)) { + newf = oldf; + } else { + oldf = remove_overload_flags(oldf); + newf = oldf | make_eq_flag_bits(a); + } + if (is_protected(a)) { + switch (cmd) { + case call_cmd: + cmd = protected_call_cmd; + break; + case tolerant_call_cmd: + cmd = tolerant_protected_call_cmd; + break; + } + } + tex_define_inherit(a, p, (singleword) newf, (singleword) cmd, cur_chr); + } else { + tex_define(a, p, (singleword) cur_cmd, cur_chr); + } +} + +/*tex + The token-list parameters, |\output| and |\everypar|, etc., receive their values in the + following way. (For safety's sake, we place an enclosing pair of braces around an |\output| + list.) +*/ + +static void tex_aux_set_assign_toks(int a) // better just pass cmd and chr +{ + halfword cs = cur_cs; + halfword cmd = cur_cmd; + halfword chr; + halfword loc; + halfword tail; + if (cmd == register_cmd) { + loc = register_toks_location(tex_scan_toks_register_number()); + } else { + /*tex |every_par_loc| or |output_routine_loc| or \dots */ + loc = cur_chr; + } + /*tex + Skip an optional equal sign and get the next non-blank non-relax non-call token. + */ + { + int n = 1 ; + while (1) { + tex_get_x_token(); + if (cur_cmd == spacer_cmd) { + /*tex Go on! */ + } else if (cur_cmd == relax_cmd) { + n = 0; + } else if (n && cur_tok == equal_token) { + n = 0; + } else { + break; + } + } + } + if (cur_cmd != left_brace_cmd) { + /*tex + If the right-hand side is a token parameter or token register, finish + the assignment and |goto done| + */ + if (cur_cmd == register_cmd && cur_chr == tok_val_level) { + chr = eq_value(register_toks_location(tex_scan_toks_register_number())); + if (chr) { + tex_add_token_reference(chr); + } + goto DEFINE; + } else if (cur_cmd == register_toks_cmd || cur_cmd == internal_toks_cmd) { + chr = eq_value(cur_chr); + if (chr) { + tex_add_token_reference(chr); + } + goto DEFINE; + } else { + /*tex Recover possibly with error message. */ + tex_back_input(cur_tok); + cur_cs = cs; + chr = tex_scan_toks_normal(0, &tail); + } + } else { + cur_cs = cs; + chr = tex_scan_toks_normal(1, &tail); + } + if (! token_link(chr)) { + tex_put_available_token(chr); + chr = null; + } else if (loc == internal_toks_location(output_routine_code)) { + halfword head = token_link(chr); + halfword list = tex_store_new_token(null, left_brace_token + '{'); + tex_store_new_token(tail, right_brace_token + '}'); + set_token_link(list, head); + set_token_link(chr, list); + } + DEFINE: + tex_define(a, loc, cmd == internal_toks_cmd ? internal_toks_reference_cmd : register_toks_reference_cmd, chr); +} + +/*tex Let |n| be the largest legal code value, based on |cur_chr| */ + +static void tex_aux_set_define_char_code(int a) /* maybe make |a| already a boolean */ +{ + switch (cur_chr) { + case catcode_charcode: + { + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_int(1, NULL); + if (val < 0 || val > max_char_code) { + tex_aux_out_of_range_error(val, max_char_code); + } + tex_set_cat_code(cat_code_table_par, chr, val, global_or_local(a)); + } + break; + case lccode_charcode: + { + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_int(1, NULL); + if (val < 0 || val > max_character_code) { + tex_aux_out_of_range_error(val, max_character_code); + } + tex_set_lc_code(chr, val, global_or_local(a)); + } + break; + case uccode_charcode: + { + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_int(1, NULL); + if (val < 0 || val > max_character_code) { + tex_aux_out_of_range_error(val, max_character_code); + } + tex_set_uc_code(chr, val, global_or_local(a)); + } + break; + case sfcode_charcode: + { + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_int(1, NULL); + if (val < min_space_factor || val > max_space_factor) { + tex_aux_out_of_range_error(val, max_space_factor); + } + tex_set_sf_code(chr, val, global_or_local(a)); + } + break; + case hccode_charcode: + { + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_char_number(1); + tex_set_hc_code(chr, val, global_or_local(a)); + } + break; + case hmcode_charcode: + { + halfword chr = tex_scan_char_number(0); + halfword val = tex_scan_math_discretionary_number(1); + tex_set_hm_code(chr, val, global_or_local(a)); + } + break; + case mathcode_charcode: + tex_scan_extdef_math_code((is_global(a)) ? level_one: cur_level, tex_mathcode); + break; + case extmathcode_charcode: + tex_scan_extdef_math_code((is_global(a)) ? level_one : cur_level, umath_mathcode); + break; + /* + case extmathcodenum_charcode: + tex_scan_extdef_math_code((is_global(a)) ? level_one : cur_level, umathnum_mathcode); + break; + */ + case delcode_charcode: + tex_scan_extdef_del_code((is_global(a)) ? level_one : cur_level, tex_mathcode); + break; + case extdelcode_charcode: + tex_scan_extdef_del_code((is_global(a)) ? level_one : cur_level, umath_mathcode); + break; + /* + case extdelcodenum_charcode: + tex_scan_extdef_del_code((is_global(a)) ? level_one : cur_level, umathnum_mathcode); + break; + */ + default: + break; + } +} + +static void tex_aux_skip_optional_equal(void) +{ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_tok == equal_token) { + tex_get_x_token(); + } +} + +static void tex_aux_set_math_parameter(int a) +{ + halfword code = cur_chr; + halfword value = null; /* can also be scaled */ + switch (code) { + case math_parameter_reset_spacing: + { + tex_reset_all_styles(global_or_local(a)); + return; + } + case math_parameter_set_spacing: + case math_parameter_set_atom_rule: + { + halfword left = tex_scan_math_class_number(0); + halfword right = tex_scan_math_class_number(0); + switch (code) { + case math_parameter_set_spacing: + code = tex_to_math_spacing_parameter(left, right); + break; + case math_parameter_set_atom_rule: + code = tex_to_math_rules_parameter(left, right); + break; + } + if (code < 0) { + tex_handle_error( + normal_error_type, + "Invalid math class pair", + "I'm going to assume ordinary atoms." + ); + switch (code) { + case math_parameter_set_spacing: + code = tex_to_math_spacing_parameter(ordinary_noad_subtype, ordinary_noad_subtype); + break; + case math_parameter_set_atom_rule: + code = tex_to_math_rules_parameter(ordinary_noad_subtype, ordinary_noad_subtype); + break; + } + } + break; + } + case math_parameter_let_spacing: + case math_parameter_let_atom_rule: + { + halfword class = tex_scan_math_class_number(0); + halfword display = tex_scan_math_class_number(1); + halfword text = tex_scan_math_class_number(0); + halfword script = tex_scan_math_class_number(0); + halfword scriptscript = tex_scan_math_class_number(0); + if (valid_math_class_code(class)) { + switch (code) { + case math_parameter_let_spacing: + code = internal_int_location(first_math_class_code + class); + break; + case math_parameter_let_atom_rule: + code = internal_int_location(first_math_atom_code + class); + break; + } + value = (display << 24) + (text << 16) + (script << 8) + scriptscript; + // tex_assign_internal_int_value(a, code, value); + tex_word_define(a, code, value); + } else { + tex_handle_error( + normal_error_type, + "Invalid math class", + "I'm going to ignore this alias." + ); + } + return; + } + case math_parameter_copy_spacing: + case math_parameter_copy_atom_rule: + case math_parameter_copy_parent: + { + halfword class = tex_scan_math_class_number(0); + halfword parent = tex_scan_math_class_number(1); + if (valid_math_class_code(class) && valid_math_class_code(parent)) { + switch (code) { + case math_parameter_copy_spacing: + code = internal_int_location(first_math_class_code + class); + value = count_parameter(first_math_class_code + parent); + break; + case math_parameter_copy_atom_rule: + code = internal_int_location(first_math_atom_code + class); + value = count_parameter(first_math_atom_code + parent); + break; + case math_parameter_copy_parent: + code = internal_int_location(first_math_parent_code + class); + value = count_parameter(first_math_parent_code + parent); + break; + } + tex_word_define(a, code, value); + } else { + tex_handle_error( + normal_error_type, + "Invalid math class", + "I'm going to ignore this alias." + ); + } + return; + } + case math_parameter_set_pre_penalty: + case math_parameter_set_post_penalty: + case math_parameter_set_display_pre_penalty: + case math_parameter_set_display_post_penalty: + { + halfword class = tex_scan_math_class_number(0); + halfword penalty = tex_scan_int(1, NULL); + if (valid_math_class_code(class)) { + switch (code) { + case math_parameter_set_pre_penalty: + code = internal_int_location(first_math_pre_penalty_code + class); + break; + case math_parameter_set_post_penalty: + code = internal_int_location(first_math_post_penalty_code + class); + break; + case math_parameter_set_display_pre_penalty: + code = internal_int_location(first_math_display_pre_penalty_code + class); + break; + case math_parameter_set_display_post_penalty: + code = internal_int_location(first_math_display_post_penalty_code + class); + break; + } + tex_word_define(a, code, penalty); + // tex_assign_internal_int_value(a, code, penalty); + } else { + tex_handle_error( + normal_error_type, + "Invalid math class", + "I'm going to ignore this atom penalty." + ); + } + return; + } + case math_parameter_let_parent: + { + halfword class = tex_scan_math_class_number(0); + halfword pre = tex_scan_math_class_number(1); + halfword post = tex_scan_math_class_number(0); + halfword options = tex_scan_math_class_number(0); + halfword reserved = tex_scan_math_class_number(0); + if (valid_math_class_code(class)) { + code = internal_int_location(first_math_parent_code + class); + value = (reserved << 24) + (options << 16) + (pre << 8) + post; + tex_word_define(a, code, value); + // tex_assign_internal_int_value(a, code, value); + } else { + tex_handle_error( + normal_error_type, + "Invalid math class", + "I'm going to ignore this penalty alias." + ); + } + return; + } + case math_parameter_ignore: + { + halfword param = tex_scan_math_parameter(); + if (param >= 0) { + code = internal_int_location(first_math_ignore_code + param); + value = tex_scan_int(1, NULL); + tex_word_define(a, code, value); + } + return; + } + case math_parameter_options: + { + halfword class = tex_scan_math_class_number(0); + if (valid_math_class_code(class)) { + code = internal_int_location(first_math_options_code + class); + value = tex_scan_int(1, NULL); + tex_word_define(a, code, value); + // tex_assign_internal_int_value(a, code, value); + } else { + tex_handle_error( + normal_error_type, + "Invalid math class", + "I'm going to ignore these options." + ); + } + return; + } + case math_parameter_set_defaults: + tex_set_default_math_codes(); + return; + } + { + halfword style = tex_scan_math_style_identifier(0, 1); + halfword indirect = indirect_math_regular; + int freeze = is_frozen(a) && cur_mode == mmode; + if (! freeze && is_inherited(a)) { + tex_aux_skip_optional_equal(); + /* maybe also let inherit from another mathparam but that can become circular */ + switch (math_parameter_value_type(code)) { + case math_int_parameter: + switch (cur_cmd) { + case integer_cmd: + value = cur_cs; + indirect = indirect_math_integer; + break; + case register_int_cmd: + value = cur_chr; + indirect = indirect_math_register_integer; + break; + } + break; + case math_dimen_parameter: + switch (cur_cmd) { + case dimension_cmd: + value = cur_cs; + indirect = indirect_math_dimension; + break; + case register_dimen_cmd: + value = cur_chr; + indirect = indirect_math_register_dimension; + break; + } + break; + case math_muglue_parameter: + switch (cur_cmd) { + case mugluespec_cmd: + value = cur_cs; + indirect = indirect_math_mugluespec; + break; + case register_mu_glue_cmd: + value = cur_chr; + indirect = indirect_math_register_mugluespec; + break; + case internal_mu_glue_cmd: + value = cur_chr; + indirect = indirect_math_internal_mugluespec; + break; + case dimension_cmd: + value = cur_cs; + indirect = indirect_math_dimension; + break; + case register_dimen_cmd: + value = cur_chr; + indirect = indirect_math_register_dimension; + break; + case gluespec_cmd: + value = cur_cs; + indirect = indirect_math_gluespec; + break; + case register_glue_cmd: + value = cur_chr; + indirect = indirect_math_register_gluespec; + break; + case internal_glue_cmd: + value = cur_chr; + indirect = indirect_math_internal_gluespec; + break; + } + break; + case math_pair_parameter: + { + halfword left = tex_scan_math_class_number(0); + halfword right = tex_scan_math_class_number(0); + value = (left << 16) + right; + } + break; + } + if (indirect == indirect_math_regular) { + tex_handle_error( + normal_error_type, + "Invalid inherited math parameter type", + "The inheritance type should match the math parameter type" + ); + return; + } + } else { + switch (math_parameter_value_type(code)) { + case math_int_parameter: + value = tex_scan_int(1, NULL); + break; + case math_dimen_parameter: + value = tex_scan_dimen(0, 0, 0, 1, NULL); + break; + case math_muglue_parameter: + value = tex_scan_glue(mu_val_level, 1); + break; + case math_style_parameter: + value = tex_scan_int(1, NULL); + if (value < 0 || value > last_math_style_variant) { + /* maybe a warning */ + value = math_normal_style_variant; + } + break; + case math_pair_parameter: + { + halfword left = tex_scan_math_class_number(0); + halfword right = tex_scan_math_class_number(0); + value = (left << 16) + right; + } + break; + default: + tex_confusion("math parameter type"); + return; + } + } + if (freeze) { + halfword n = tex_new_node(parameter_node, (quarterword) style); + parameter_name(n) = code; + parameter_value(n) = value; + attach_current_attribute_list(n); + tex_tail_append(n); + } else { + switch (style) { + case all_display_styles: + tex_set_display_styles(code, value, global_or_local(a), indirect); + break; + case all_text_styles: + tex_set_text_styles(code, value, global_or_local(a), indirect); + break; + case all_script_styles: + tex_set_script_styles(code, value, global_or_local(a), indirect); + break; + case all_script_script_styles: + tex_set_script_script_styles(code, value, global_or_local(a), indirect); + break; + case all_math_styles: + tex_set_all_styles(code, value, global_or_local(a), indirect); + break; + case all_split_styles: + tex_set_split_styles(code, value, global_or_local(a), indirect); + break; + case all_uncramped_styles: + tex_set_uncramped_styles(code, value, global_or_local(a), indirect); + break; + case all_cramped_styles: + tex_set_cramped_styles(code, value, global_or_local(a), indirect); + break; + default: + tex_def_math_parameter(style, code, value, global_or_local(a), indirect); + break; + } + + } + } +} + +/* */ + +static void tex_aux_set_define_family(int a) +{ + halfword p = cur_chr; + halfword fnt; + halfword fam = tex_scan_math_family_number(); + tex_scan_optional_equals(); + fnt = tex_scan_font_identifier(NULL); + tex_def_fam_fnt(fam, p, fnt, global_or_local(a)); +} + +/*tex Similar routines are used to assign values to the numeric parameters. */ + +static void tex_aux_set_internal_int(int a) +{ + halfword p = cur_chr; + halfword v = tex_scan_int(1, NULL); + tex_assign_internal_int_value(a, p, v); +} + +static void tex_aux_set_register_int(int a) +{ + halfword p = cur_chr; + halfword v = tex_scan_int(1, NULL); + tex_word_define(a, p, v); +} + +static void tex_aux_set_internal_attr(int a) +{ + halfword p = cur_chr; + halfword v = tex_scan_int(1, NULL); + if (internal_attribute_number(p) > lmt_node_memory_state.max_used_attribute) { + lmt_node_memory_state.max_used_attribute = internal_attribute_number(p); + } + change_attribute_register(a, p, v); + tex_word_define(a, p, v); +} + +static void tex_aux_set_register_attr(int a) +{ + halfword p = cur_chr; + halfword v = tex_scan_int(1, NULL); + if (register_attribute_number(p) > lmt_node_memory_state.max_used_attribute) { + lmt_node_memory_state.max_used_attribute = register_attribute_number(p); + } + change_attribute_register(a, p, v); + tex_word_define(a, p, v); +} + +static void tex_aux_set_internal_dimen(int a) +{ + halfword p = cur_chr; + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + tex_assign_internal_dimen_value(a, p, v); +} + +static void tex_aux_set_register_dimen(int a) +{ + halfword p = cur_chr; + scaled v = tex_scan_dimen(0, 0, 0, 1, NULL); + tex_word_define(a, p, v); +} + +static void tex_aux_set_internal_glue(int a) +{ + halfword p = cur_chr; + halfword v = tex_scan_glue(glue_val_level, 1); + // define(a, p, internal_glue_ref_cmd, v); + tex_assign_internal_skip_value(a, p, v); +} + +static void tex_aux_set_register_glue(int a) +{ + halfword p = cur_chr; + halfword v = tex_scan_glue(glue_val_level, 1); + tex_define(a, p, register_glue_reference_cmd, v); +} + +static void tex_aux_set_internal_mu_glue(int a) +{ + halfword p = cur_chr; + halfword v = tex_scan_glue(mu_val_level, 1); + tex_define(a, p, internal_mu_glue_reference_cmd, v); +} + +static void tex_aux_set_register_mu_glue(int a) +{ + halfword p = cur_chr; + halfword v = tex_scan_glue(mu_val_level, 1); + tex_define(a, p, register_mu_glue_reference_cmd, v); +} + +/*tex + We ignore prefixes that don't apply as we might apply then in the future: just like |\immediate| + so it's not that alien. And maybe frozen can be applied some day in other cases as well. As + reference we keep the old code (long and outer code has been removed elsewhere.) Most of the + calls are the only call so the functions are likely to be inlined. + +*/ + +static void tex_aux_set_combine_toks(halfword a) +{ + if (is_global(a)) { + switch (cur_chr) { + case expanded_toks_code: cur_chr = global_expanded_toks_code; break; + case append_toks_code: cur_chr = global_append_toks_code; break; + case append_expanded_toks_code: cur_chr = global_append_expanded_toks_code; break; + case prepend_toks_code: cur_chr = global_prepend_toks_code; break; + case prepend_expanded_toks_code: cur_chr = global_prepend_expanded_toks_code; break; + } + } + tex_run_combine_the_toks(); +} + +static int tex_aux_set_some_item(halfword a) +{ + (void) a; + switch (cur_chr) { + case lastpenalty_code: + lmt_page_builder_state.last_penalty = tex_scan_int(1, NULL); + return 1; + case lastkern_code: + lmt_page_builder_state.last_kern = tex_scan_int(1, NULL); + return 1; + case lastskip_code: + lmt_page_builder_state.last_glue = tex_scan_glue(glue_val_level, 1); + return 1; + case lastboundary_code: + lmt_page_builder_state.last_penalty = tex_scan_int(1, NULL); + return 1; + case last_node_type_code: + lmt_page_builder_state.last_node_type = tex_scan_int(1, NULL); + return 1; + case last_node_subtype_code: + lmt_page_builder_state.last_node_subtype = tex_scan_int(1, NULL); + return 1; + case last_left_class_code: + lmt_math_state.last_left = tex_scan_math_class_number(1); + return 1; + case last_right_class_code: + lmt_math_state.last_right = tex_scan_math_class_number(1); + return 1; + case last_atom_class_code: + lmt_math_state.last_atom = tex_scan_math_class_number(1); + return 1; + default: + return 0; + } +} + +void tex_run_prefixed_command(void) +{ + /*tex accumulated prefix codes so far */ + int flags = 0; + int force = 0; + halfword lastprefix = -1; + while (cur_cmd == prefix_cmd) { + switch (cur_chr) { + case frozen_code: flags = add_frozen_flag (flags); break; + case tolerant_code: flags = add_tolerant_flag (flags); break; + case protected_code: flags = add_protected_flag (flags); break; + case permanent_code: flags = add_permanent_flag (flags); break; + case immutable_code: flags = add_immutable_flag (flags); break; + case mutable_code: flags = add_mutable_flag (flags); break; + case noaligned_code: flags = add_noaligned_flag (flags); break; + case instance_code: flags = add_instance_flag (flags); break; + case untraced_code: flags = add_untraced_flag (flags); break; + case global_code: flags = add_global_flag (flags); break; + case overloaded_code: flags = add_overloaded_flag (flags); break; + case aliased_code: flags = add_aliased_flag (flags); break; + case immediate_code: flags = add_immediate_flag (flags); break; + case semiprotected_code: flags = add_semiprotected_flag(flags); break; + /*tex This one is bound. */ + case always_code: flags = add_aliased_flag (flags); force = 1; break; + /*tex This one is special */ + case inherited_code: flags = add_inherited_flag (flags); break; + default: + goto PICKUP; + } + lastprefix = cur_chr; + PICKUP: + /*tex We no longer report prefixes. */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd); + if (tracing_commands_par > 2) { + tex_show_cmd_chr(cur_cmd, cur_chr); + } + } + + /*tex: Here we can quit when we have a constant! */ + + /*tex + Adjust for the setting of |\globaldefs|. + */ + if (global_defs_par) { + flags = global_defs_par > 0 ? add_global_flag(flags) : remove_global_flag(flags); + } + /*tex + Now we arrived at all the def variants. We only apply the prefixes that make sense (for + now). + */ + switch (cur_cmd) { + case set_font_cmd: + tex_aux_set_font(flags); + break; + case def_cmd: + tex_aux_set_def(flags, force); + break; + case let_cmd: + tex_aux_set_let(flags, force); + break; + case shorthand_def_cmd: + tex_aux_set_shorthand_def(flags, force); + break; + case internal_toks_cmd: + case register_toks_cmd: + tex_aux_set_assign_toks(flags); + break; + case internal_int_cmd: + tex_aux_set_internal_int(flags); + break; + case register_int_cmd: + tex_aux_set_register_int(flags); + break; + case internal_attribute_cmd: + tex_aux_set_internal_attr(flags); + break; + case register_attribute_cmd: + tex_aux_set_register_attr(flags); + break; + case internal_dimen_cmd: + tex_aux_set_internal_dimen(flags); + break; + case register_dimen_cmd: + tex_aux_set_register_dimen(flags); + break; + case internal_glue_cmd: + tex_aux_set_internal_glue(flags); + break; + case register_glue_cmd: + tex_aux_set_register_glue(flags); + break; + case internal_mu_glue_cmd: + tex_aux_set_internal_mu_glue(flags); + break; + case register_mu_glue_cmd: + tex_aux_set_register_mu_glue(flags); + break; + case lua_value_cmd: + tex_aux_set_lua_value(flags); + break; + case define_char_code_cmd: + tex_aux_set_define_char_code(flags); + break; + case define_family_cmd: + tex_aux_set_define_family(flags); + break; + case set_math_parameter_cmd: + tex_aux_set_math_parameter(flags); + break; + case register_cmd: + if (cur_chr == tok_val_level) { + tex_aux_set_assign_toks(flags); + } else { + tex_aux_set_register(flags); + } + break; + case arithmic_cmd: + tex_aux_arithmic_register(flags, cur_chr); + break; + case set_box_cmd: + tex_aux_set_box(flags); + break; + case set_auxiliary_cmd: + tex_aux_set_auxiliary(flags); + break; + case set_page_property_cmd: + tex_aux_set_page_property(); + break; + case set_box_property_cmd: + tex_aux_set_box_property(); + break; + case set_specification_cmd: + tex_aux_set_specification(flags); + break; + case hyphenation_cmd: + tex_aux_set_hyph_data(); + break; + case set_font_property_cmd: + tex_aux_set_font_property(); + break; + case define_font_cmd: + tex_aux_set_define_font(flags); + break; + case set_interaction_cmd: + tex_aux_set_interaction(cur_chr); + break; + case combine_toks_cmd: + tex_aux_set_combine_toks(flags); + break; + case some_item_cmd: + if (! tex_aux_set_some_item(flags)) { + tex_aux_run_illegal_case(); + } + break; + default: + if (lastprefix < 0) { + tex_confusion("prefixed command"); + } else { + tex_handle_error( + normal_error_type, + "You can't use a prefix %C with %C", + prefix_cmd, lastprefix, cur_cmd, cur_chr, + "A prefix should be followed by a quantity that can be assigned to. Intermediate\n" + "spaces and \\relax tokens are gobbled in the process.\n" + ); + break; + } + } + /*tex + End of assignments cases. We insert a token saved by |\afterassignment|, if any. + */ + tex_aux_finish_after_assignment(); +} + +/*tex + + When a control sequence is to be defined, by |\def| or |\let| or something similar, the + |get_r_token| routine will substitute a special control sequence for a token that is not + redefinable. + +*/ + +void tex_get_r_token(void) +{ + RESTART: + do { + tex_get_token(); + } while (cur_tok == space_token); + if (eqtb_valid_cs(cur_cs)) { + if (cur_cs == 0) { + tex_back_input(cur_tok); + } + cur_tok = deep_frozen_protection_token; + /* moved down but this might interfere with input on the console */ + tex_handle_error( + insert_error_type, + "Missing control sequence inserted", + "Please don't say '\\def cs{...}', say '\\def\\cs{...}'. I've inserted an\n" + "inaccessible control sequence so that your definition will be completed without\n" + "mixing me up too badly.\n" + ); + goto RESTART; + } +} + +/*tex + Some of the internal int values need a special treatment. This used to be a more complex + function, also dealing with other registers than didn't really need a check, also because we + now split into internals and registers. + + Beware: the post binary and relation penalties are not synchronzed here because we assume a + proper overload of the primitive. They can still be set and their setting is reflected in the + atom panalties but that's all. No need for more code. +*/ + +void tex_assign_internal_int_value(int a, halfword p, int val) +{ + switch (internal_int_number(p)) { + case par_direction_code: + { + check_direction_value(val); + tex_word_define(a, p, val); + } + break; + case math_direction_code: + { + check_direction_value(val); + tex_word_define(a, p, val); + } + break; + case text_direction_code: + { + check_direction_value(val); + tex_inject_text_or_line_dir(val, 0); + tex_word_define(a, p, val); + /*tex Plus: */ + update_tex_internal_dir_state(internal_dir_state_par + 1); + } + break; + case line_direction_code: + { + check_direction_value(val); + tex_inject_text_or_line_dir(val, 1); + p = internal_int_location(text_direction_code); + tex_word_define(a, p, val); + /*tex Plus: */ + update_tex_internal_dir_state(internal_dir_state_par + 1); + } + break; + case cat_code_table_code: + if (tex_valid_catcode_table(val)) { + if (val != cat_code_table_par) { + tex_word_define(a, p, val); + } + } else { + tex_handle_error( + normal_error_type, + "Invalid \\catcode table", + "You can only switch to a \\catcode table that is initialized using\n" + "\\savecatcodetable or \\initcatcodetable, or to table 0" + ); + } + break; + case glyph_scale_code: + case glyph_x_scale_code: + case glyph_y_scale_code: + if (! val) { + /* maybe an error message */ + return; + } else { + /* todo: check for reasonable */ + goto DEFINE; + } + case glyph_text_scale_code: + case glyph_script_scale_code: + case glyph_scriptscript_scale_code: + /* here zero is a signal */ + if (val < min_limited_scale || val > max_limited_scale) { + tex_handle_error( + normal_error_type, + "Invalid \\glyph..scale", + "The value for \\glyph..scale has to be between 0 and 1000 where\n" + "a value of zero forces font percentage scaling to be used." + ); + val = max_limited_scale; + } + goto DEFINE; + case math_begin_class_code: + case math_end_class_code: + case math_left_class_code: + case math_right_class_code: + if (! valid_math_class_code(val)) { + val = unset_noad_class; + } + tex_word_define(a, p, val); + break; + case output_box_code: + if (val < 0 || val > max_box_index) { + tex_handle_error( + normal_error_type, + "Invalid \\outputbox", + "The value for \\outputbox has to be between 0 and " LMT_TOSTRING(max_box_index) "." + ); + } else { + tex_word_define(a, p, val); + } + break; + case new_line_char_code: + if (val > max_newline_character) { + tex_handle_error( + normal_error_type, + "Invalid \\newlinechar", + "The value for \\newlinechar has to be no higher than " LMT_TOSTRING(max_newline_character) ".\n" + "Your invalid assignment will be ignored." + ); + } + else { + tex_word_define(a, p, val); + } + break; + case end_line_char_code: + if (val > 127) { + tex_handle_error( + normal_error_type, + "Invalid \\endlinechar", + "The value for \\endlinechar has to be no higher than 127." + ); + } + else { + tex_word_define(a, p, val); + } + break; + case language_code: + /* this is |\language| */ + if (val < 0) { + val = 0; + } + if (tex_is_valid_language(val)) { + update_tex_language(a, val); + } + else { + tex_handle_error( + normal_error_type, + "Invalid \\language", + "The value for \\language has to be defined and in the range 0 .. " LMT_TOSTRING(max_n_of_languages) "." + ); + } + break; + case font_code: + if (val < 0) { + val = 0; + } + if (tex_is_valid_font(val)) { + tex_set_cur_font(a, val); + } + else { + tex_handle_error( + normal_error_type, + "Invalid \\fontid", + "The value for \\fontid has to be defined and in the range 0 .. " LMT_TOSTRING(max_n_of_fonts) "." + ); + } + break; + case hyphenation_mode_code: + if (val < 0) { + val = 0; + } + /* We don't update |\uchyph| here. */ + tex_word_define(a, p, val); + break; + case uc_hyph_code: + /*tex For old times sake. */ + tex_word_define(a, p, val); + /*tex But we do use this instead. */ + val = val ? set_hyphenation_mode(hyphenation_mode_par, uppercase_hyphenation_mode) : unset_hyphenation_mode(hyphenation_mode_par, uppercase_hyphenation_mode); + tex_word_define(a, internal_int_location(hyphenation_mode_code), val); + break; + case local_interline_penalty_code: + case local_broken_penalty_code: + /*tex + If we are defining subparagraph penalty levels while we are in hmode, then we + put out a whatsit immediately, otherwise we leave it alone. This mechanism might + not be sufficiently powerful, and some other algorithm, searching down the stack, + might be necessary. Good first step. + */ + tex_word_define(a, p, val); + if (cur_mode == hmode) { + /*tex Add local paragraph node */ + tex_tail_append(tex_new_par_node(penalty_par_subtype)); + update_tex_internal_par_state(internal_par_state_par + 1); + } + break; + case adjust_spacing_code: + if (val < adjust_spacing_off) { + val = adjust_spacing_off; + } + else if (val > adjust_spacing_font) { + val = adjust_spacing_font; + } + goto DEFINE; + case protrude_chars_code: + if (val < protrude_chars_off) { + val = protrude_chars_off; + } + else if (val > protrude_chars_advanced) { + val = protrude_chars_advanced; + } + goto DEFINE; + case glyph_options_code: + if (val < glyph_option_normal_glyph) { + val = glyph_option_normal_glyph; + } else if (val > glyph_option_all) { + val = glyph_option_all; + } + goto DEFINE; + case overload_mode_code: + if (overload_mode_par == 255) { + return; + } else { + goto DEFINE; + } + /* We only synchronize these four one way. */ + case post_binary_penalty_code: + tex_word_define(a, internal_int_location(first_math_post_penalty_code + binary_noad_subtype), val); + tex_word_define(a, internal_int_location(first_math_display_post_penalty_code + binary_noad_subtype), val); + break; + case post_relation_penalty_code: + tex_word_define(a, internal_int_location(first_math_post_penalty_code + relation_noad_subtype), val); + tex_word_define(a, internal_int_location(first_math_display_post_penalty_code + relation_noad_subtype), val); + break; + case pre_binary_penalty_code: + tex_word_define(a, internal_int_location(first_math_pre_penalty_code + binary_noad_subtype), val); + tex_word_define(a, internal_int_location(first_math_display_pre_penalty_code + binary_noad_subtype), val); + break; + case pre_relation_penalty_code: + tex_word_define(a, internal_int_location(first_math_pre_penalty_code + relation_noad_subtype), val); + tex_word_define(a, internal_int_location(first_math_display_pre_penalty_code + relation_noad_subtype), val); + break; + /* We could do this, but then we also need to do day and check it per month. */ /* + case month_code: + if (val < 1) { + val = 1; + } else if (val > 12) { + val = 12; + } + goto DEFINE; + */ + default: + DEFINE: + tex_word_define(a, p, val); + if (is_frozen(a) && cur_mode == hmode) { + tex_update_par_par(internal_int_cmd, internal_int_number(p)); + } + } +} + +void tex_assign_internal_attribute_value(int a, halfword p, int val) +{ + if (register_attribute_number(p) > lmt_node_memory_state.max_used_attribute) { + lmt_node_memory_state.max_used_attribute = register_attribute_number(p); + } + change_attribute_register(a, p, val); + tex_word_define(a, p, val); +} + +void tex_assign_internal_dimen_value(int a, halfword p, int val) +{ + tex_word_define(a, p, val); + if (is_frozen(a) && cur_mode == hmode) { + tex_update_par_par(internal_dimen_cmd, internal_dimen_number(p)); + } +} + +void tex_assign_internal_skip_value(int a, halfword p, int val) +{ + tex_define(a, p, internal_glue_reference_cmd, val); + if (is_frozen(a) && cur_mode == hmode) { + tex_update_par_par(internal_glue_cmd, internal_glue_number(p)); + } +} + +/*tex + + Here is a procedure that might be called \quotation {Get the next non-blank non-relax non-call + non-assignment token}. It is a runner used in text accents and math alignments. It probably + has to be adapted to the additional command codes that we have. + +*/ + +void tex_handle_assignments(void) +{ + while (1) { + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd); + if (cur_cmd <= max_non_prefixed_cmd) { + return; + } else { + lmt_error_state.set_box_allowed = 0; + tex_run_prefixed_command(); + lmt_error_state.set_box_allowed = 1; + } + } +} + +/*tex Has the long |\errmessage| help been used? */ + +static strnumber tex_aux_scan_string(void) +{ + int saved_selector = lmt_print_state.selector; /*tex holds |selector| setting */ + halfword result = tex_scan_toks_expand(0, NULL, 0); + // saved_selector = lmt_print_state.selector; + lmt_print_state.selector = new_string_selector_code; + tex_token_show(result, extreme_token_show_max); + tex_flush_token_list(result); + lmt_print_state.selector = saved_selector; + return tex_make_string(); /* todo: we can use take_string instead but happens only @ error */ +} + +static void tex_aux_run_message(void) +{ + switch (cur_chr) { + case message_code: + { + /*tex Print string |s| on the terminal */ + strnumber s = tex_aux_scan_string(); + if ((lmt_print_state.terminal_offset > 0) || (lmt_print_state.logfile_offset > 0)) { + tex_print_char(' '); + } + tex_print_tex_str(s); + tex_terminal_update(); + tex_flush_str(s); + break; + } + case error_message_code: + { + /*tex + Print string |s| as an error message. If |\errmessage| occurs often in + |scroll_mode|, without user-defined |\errhelp|, we don't want to give a long + help message each time. So we give a verbose explanation only once. These + help messages are not expanded because that could itself generate an error. + */ + strnumber s = tex_aux_scan_string(); + if (error_help_par) { + strnumber helpinfo = tex_tokens_to_string(error_help_par); + char *h = tex_makecstring(helpinfo); + tex_handle_error( + normal_error_type, + "%T", + s, + h + ); + lmt_memory_free(h); + tex_flush_str(helpinfo); + } else if (lmt_error_state.long_help_seen) { + tex_handle_error( + normal_error_type, + "%T", + s, + "(That was another \\errmessage.)" + ); + } else { + if (lmt_error_state.interaction < error_stop_mode) { + lmt_error_state.long_help_seen = 1; + } + tex_handle_error( + normal_error_type, + "%T", + s, + "This error message was generated by an \\errmessage command, so I can't give any\n" + "explicit help. Pretend that you're Hercule Poirot: Examine all clues, and deduce\n" + "the truth by order and method." + ); + } + tex_flush_str(s); + break; + } + } +} + +/*tex + + The |\uppercase| and |\lowercase| commands are implemented by building a token list and then + changing the cases of the letters in it. + + Change the case of the token in |p|, if a change is appropriate. When the case of a |chr_code| + changes, we don't change the |cmd|. We also change active characters. (The last fact permits + trickery.) + +*/ + +static void tex_aux_run_shift_case(void) +{ + int upper = cur_chr == upper_case_code; + halfword l = tex_scan_toks_normal(0, NULL); + halfword p = token_link(l); + while (p) { + halfword t = token_info(p); + if (t < cs_token_flag) { + halfword c = t % cs_offset_value; + halfword i = upper ? tex_get_uc_code(c) : tex_get_lc_code(c); + if (i) { + set_token_info(p, t - c + i); + } + } else if (tex_is_active_cs(cs_text(t - cs_token_flag))) { + halfword c = active_cs_value(cs_text(t - cs_token_flag)); + halfword i = upper ? tex_get_uc_code(c) : tex_get_lc_code(c); + if (i) { + set_token_info(p, tex_active_to_cs(i, 1) + cs_token_flag); + } + } + p = token_link(p); + } + tex_begin_backed_up_list(token_link(l)); + tex_put_available_token(l); +} + +/*tex + + We come finally to the last pieces missing from |main_control|, namely the |\show| commands that + are useful when debugging. + +*/ + +static void tex_aux_run_show_whatever(void) +{ + int justshow = 1; + switch (cur_chr) { + case show_code: + /*tex Show the current meaning of a token, then |goto common_ending|. */ + { + tex_get_token(); + tex_print_nlp(); + tex_print_str("> "); + if (cur_cs != 0) { + tex_print_cs(cur_cs); + tex_print_char('='); + } + tex_print_meaning(meaning_full_code); + goto COMMON_ENDING; + } + case show_box_code: + /*tex Show the current contents of a box. */ + { + int nolevels = 0; + int diagnose = 0; + int content = 0; + int online = 0; + int max = 0; + while (1) { + switch (tex_scan_character("ocdnaOCDNA", 0, 0, 0)) { + case 'a': case 'A': + if (tex_scan_mandate_keyword("all", 1)) { + max = 1; + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("content", 1)) { + content = 1; + } + break; + case 'd': case 'D': + if (tex_scan_mandate_keyword("diagnose", 1)) { + diagnose = 1; + } + break; + case 'n': case 'N': + if (tex_scan_mandate_keyword("nolevels", 1)) { + nolevels = 1; + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("online", 1)) { + online = 1; + } + break; + default: + goto DONE; + } + } + DONE: + /*tex This can become a general helper. */ + { + halfword n = tex_scan_box_register_number(); + halfword r = box_register(n); + halfword l = tracing_levels_par; + halfword o = tracing_online_par; + halfword d = show_box_depth_par; + halfword b = show_box_breadth_par; + if (nolevels) { + tracing_levels_par = 0; + } + if (online) { + tracing_online_par = 2; + } + if (max) { + show_box_depth_par = max_integer; + show_box_breadth_par = max_integer; + } + if (diagnose) { + tex_begin_diagnostic(); + } + if (! content) { + tex_print_str("> \\box"); + tex_print_int(n); + tex_print_char('='); + } + if (r) { + tex_show_box(r); + } else { + tex_print_str("void"); + } + if (diagnose) { + tex_end_diagnostic(); + } + tracing_levels_par = l; + tracing_online_par = o; + show_box_depth_par = d; + show_box_breadth_par = b; + } + break; + } + case show_the_code: + { + halfword head = tex_the_value_toks(1, NULL, 0); + tex_print_nlp(); + tex_print_str("> "); + tex_show_token_list(head, null, default_token_show_max, 0); + tex_flush_token_list(head); + goto COMMON_ENDING; + } + case show_lists_code: + { + tex_begin_diagnostic(); + tex_show_activities(); + tex_end_diagnostic(); + break; + } + case show_groups_code: + { + tex_begin_diagnostic(); + tex_show_save_groups(); + tex_end_diagnostic(); + break; + } + case show_tokens_code: + { + halfword head = tex_the_detokenized_toks(NULL); + tex_print_nlp(); + tex_print_str("> "); + tex_show_token_list(head, null, default_token_show_max, 0); + tex_flush_token_list(head); + goto COMMON_ENDING; + } + case show_ifs_code: + { + // if (! justshow) { + tex_begin_diagnostic(); + // } + tex_show_ifs(); + // if (! justshow) { + tex_end_diagnostic(); + // } + break; + } + default: + /* can't happen */ + break; + } + if (justshow) { + return; + } else { + /*tex By default we |justshow| now so the next is dead code. */ + } + /*tex Complete a potentially long |\show| command: */ + tex_handle_error_message_only("OK"); + if (lmt_print_state.selector == terminal_and_logfile_selector_code && tracing_online_par <= 0) { + lmt_print_state.selector = terminal_selector_code; + tex_print_str(" (see the transcript file)"); /*tex Here |transcript| means |log|.*/ + lmt_print_state.selector = terminal_and_logfile_selector_code; + } + COMMON_ENDING: + if (justshow) { + return; + } else if (lmt_error_state.interaction < error_stop_mode) { + tex_handle_error( + normal_error_type, + NULL, /* no message */ + NULL /* no help */ + ); + --lmt_error_state.error_count; + /* } else if (tracing_online_par > 0) { */ + } else { + tex_handle_error( + normal_error_type, + NULL, /* no message */ + "This isn't an error message; I'm just \\showing something.\n" + ); + } +} + +/*tex + + These procedures get things started properly. The initializer sets up the function table. We + have a few aliases to run_functions that are also used otherwise. + + We actually only have some 50 cases where there is a difference between the modes and it makes + sense now to combine the handling and move the mode checking to those combined functions. That + way we get a switch no longer a jump. Actually, some already share a function and check for the + mode. On the other hand, this is how \TEX\ does it. + + When we have version 2.10 released I might move the mode tests to the runners so that we get a + smaller case cq. jump table and we might also go for mode 1 permanently. A side effect will be + that some commands codes will be collapsed (move and such). + +*/ + +# if (main_control_mode == 0) + +# define register_runner(A,B,C,D) \ + jump_table[vmode+(A)] = B; \ + jump_table[hmode+(A)] = C; \ + jump_table[mmode+(A)] = D + +# define register_simple(A,B) \ + jump_table[vmode+(A)] = B; \ + jump_table[hmode+(A)] = B; \ + jump_table[mmode+(A)] = B + +# define register_asmath(A,B,C) \ + jump_table[vmode+(A)] = B; \ + jump_table[hmode+(A)] = B; \ + jump_table[mmode+(A)] = C + +inline static void init_main_control(void) +{ + + jump_table = lmt_memory_malloc((mmode + max_command_cmd + 1) * sizeof(main_control_function)) ; + + if (jump_table) { + +# elif (main_control_mode == 1) + +# define register_runner(A,B,C,D) \ + case A: \ + switch (mode) { \ + case vmode: B(); break; \ + case hmode: C(); break; \ + case mmode: D(); break; \ + } \ + break + +# define register_simple(A,B) \ + case A: B(); break + +# define register_asmath(A,B,C) \ + case A: if (mode == mmode) { C(); } else { B(); } break + +inline static void tex_aux_big_switch(int mode, int cmd) +{ + + switch (cmd) { + +# else + +# define register_runner(A,B,C,D) \ + case (vmode + A): B(); break; \ + case (hmode + A): C(); break; \ + case (mmode + A): D(); break; + +# define register_simple(A,B) \ + case (vmode + A): B(); break; \ + case (hmode + A): B(); break; \ + case (mmode + A): B(); break; + +# define register_asmath(A,B,C) \ + case (vmode + A): B(); break; \ + case (hmode + A): B(); break; \ + case (mmode + A): C(); break; + +inline static void tex_aux_big_switch(int mode, int cmd) +{ + + switch (mode + cmd) { + +# endif + + /*tex These have the same handler for each mode: */ + + register_simple(arithmic_cmd, tex_run_prefixed_command); + register_simple(register_attribute_cmd, tex_run_prefixed_command); + register_simple(internal_attribute_cmd, tex_run_prefixed_command); + register_simple(register_dimen_cmd, tex_run_prefixed_command); + register_simple(internal_dimen_cmd, tex_run_prefixed_command); + register_simple(set_font_property_cmd, tex_run_prefixed_command); + register_simple(register_glue_cmd, tex_run_prefixed_command); + register_simple(internal_glue_cmd, tex_run_prefixed_command); + register_simple(register_int_cmd, tex_run_prefixed_command); + register_simple(internal_int_cmd, tex_run_prefixed_command); + register_simple(register_mu_glue_cmd, tex_run_prefixed_command); + register_simple(internal_mu_glue_cmd, tex_run_prefixed_command); + register_simple(register_toks_cmd, tex_run_prefixed_command); + register_simple(internal_toks_cmd, tex_run_prefixed_command); + register_simple(define_char_code_cmd, tex_run_prefixed_command); + register_simple(def_cmd, tex_run_prefixed_command); + register_simple(define_family_cmd, tex_run_prefixed_command); + register_simple(define_font_cmd, tex_run_prefixed_command); + register_simple(hyphenation_cmd, tex_run_prefixed_command); + register_simple(let_cmd, tex_run_prefixed_command); + register_simple(prefix_cmd, tex_run_prefixed_command); + register_simple(register_cmd, tex_run_prefixed_command); + register_simple(set_auxiliary_cmd, tex_run_prefixed_command); + register_simple(set_box_cmd, tex_run_prefixed_command); + register_simple(set_box_property_cmd, tex_run_prefixed_command); + register_simple(set_font_cmd, tex_run_prefixed_command); + register_simple(set_interaction_cmd, tex_run_prefixed_command); + register_simple(set_math_parameter_cmd, tex_run_prefixed_command); + register_simple(set_page_property_cmd, tex_run_prefixed_command); + register_simple(set_specification_cmd, tex_run_prefixed_command); + register_simple(shorthand_def_cmd, tex_run_prefixed_command); + register_simple(lua_value_cmd, tex_run_prefixed_command); + + register_simple(integer_cmd, tex_aux_run_illegal_case); /*tex This is better than |run_relax|. */ + register_simple(dimension_cmd, tex_aux_run_illegal_case); /*tex This is better than |run_relax|. */ + register_simple(gluespec_cmd, tex_aux_run_illegal_case); /*tex This is better than |run_relax|. */ + register_simple(mugluespec_cmd, tex_aux_run_illegal_case); /*tex This is better than |run_relax|. */ + + register_simple(fontspec_cmd, tex_run_font_spec); + + // register_simple(some_item_cmd, tex_aux_run_illegal_case); + register_simple(some_item_cmd, tex_run_prefixed_command); + register_simple(iterator_value_cmd, tex_aux_run_illegal_case); + register_simple(parameter_cmd, tex_aux_run_illegal_case); + + register_simple(after_something_cmd, tex_aux_run_after_something); + register_simple(begin_group_cmd, tex_aux_run_begin_group); + register_simple(penalty_cmd, tex_aux_run_penalty); + register_simple(case_shift_cmd, tex_aux_run_shift_case); + register_simple(catcode_table_cmd, tex_aux_run_catcode_table); + register_simple(combine_toks_cmd, tex_run_prefixed_command); + // register_simple(combine_toks_cmd, tex_run_combine_the_toks); + register_simple(end_cs_name_cmd, tex_aux_run_cs_error); + register_simple(end_group_cmd, tex_aux_run_end_group); + register_simple(end_local_cmd, tex_aux_run_end_local); + register_simple(ignore_something_cmd, tex_aux_run_ignore_something); + register_simple(insert_cmd, tex_run_insert); + register_simple(kern_cmd, tex_aux_run_kern); + register_simple(leader_cmd, tex_aux_run_leader); + register_simple(legacy_cmd, tex_aux_run_legacy); + register_simple(local_box_cmd, tex_aux_run_local_box); + register_simple(lua_protected_call_cmd, tex_aux_run_lua_protected_call); + register_simple(lua_function_call_cmd, tex_aux_run_lua_function_call); + register_simple(make_box_cmd, tex_aux_run_make_box); + register_simple(set_mark_cmd, tex_run_mark); + register_simple(message_cmd, tex_aux_run_message); + register_simple(node_cmd, tex_aux_run_node); + register_simple(relax_cmd, tex_aux_run_relax); + register_simple(remove_item_cmd, tex_aux_run_remove_item); + register_simple(right_brace_cmd, tex_aux_run_right_brace); + register_simple(vcenter_cmd, tex_run_vcenter); + register_simple(xray_cmd, tex_aux_run_show_whatever); + + register_simple(alignment_cmd, tex_run_alignment_error); + register_simple(end_template_cmd, tex_run_alignment_end_template); + register_simple(alignment_tab_cmd, tex_run_alignment_error); + + /*tex These have different handlers but a common h/v mode: */ + + register_asmath(math_fraction_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_fraction); + register_asmath(delimiter_number_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_delimiter_number); + register_asmath(math_fence_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_fence); + register_asmath(math_modifier_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_modifier); + register_asmath(math_accent_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_accent); + register_asmath(math_choice_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_choice); + register_asmath(math_component_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_math_component); + register_asmath(math_style_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_style); + register_asmath(mkern_cmd, tex_aux_run_insert_dollar_sign, tex_aux_run_mkern); + register_asmath(mskip_cmd, tex_aux_run_insert_dollar_sign, tex_aux_run_mglue); + register_asmath(math_radical_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_radical); + register_asmath(subscript_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_script); + register_asmath(superscript_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_script); + register_asmath(math_script_cmd, tex_aux_run_insert_dollar_sign, tex_run_math_script); + + register_asmath(equation_number_cmd, tex_aux_run_illegal_case, tex_run_math_equation_number); + + register_asmath(left_brace_cmd, tex_aux_run_left_brace, tex_run_math_left_brace); + + /*tex These have different handlers: */ + + register_runner(italic_correction_cmd, tex_aux_run_illegal_case, tex_aux_run_text_italic_correction, tex_run_math_italic_correction); + register_runner(math_char_number_cmd, tex_aux_run_math_non_math, tex_run_text_math_char_number, tex_run_math_math_char_number); + // register_runner(math_char_given_cmd, tex_aux_run_math_non_math, tex_run_text_math_char_given, tex_run_math_math_char_given); + // register_runner(math_char_xgiven_cmd, tex_aux_run_math_non_math, tex_run_text_math_char_xgiven, tex_run_math_math_char_xgiven); + register_runner(mathspec_cmd, tex_aux_run_math_non_math, tex_run_text_math_spec, tex_run_math_math_spec); + register_runner(vadjust_cmd, tex_aux_run_illegal_case, tex_run_vadjust, tex_run_vadjust); + + register_runner(char_given_cmd, tex_aux_run_new_paragraph, tex_aux_run_text_letter, tex_run_math_letter); + register_runner(other_char_cmd, tex_aux_run_new_paragraph, tex_aux_run_text_letter, tex_run_math_letter); + register_runner(letter_cmd, tex_aux_run_new_paragraph, tex_aux_run_text_letter, tex_run_math_letter); + + register_runner(accent_cmd, tex_aux_run_new_paragraph, tex_aux_run_text_accent, tex_run_math_accent); + register_runner(boundary_cmd, tex_aux_run_par_boundary, tex_aux_run_text_boundary, tex_aux_run_math_boundary); + register_runner(char_number_cmd, tex_aux_run_new_paragraph, tex_aux_run_text_char_number, tex_run_math_char_number); + register_runner(discretionary_cmd, tex_aux_run_new_paragraph, tex_aux_run_discretionary, tex_aux_run_discretionary); + register_runner(explicit_space_cmd, tex_aux_run_new_paragraph, tex_aux_run_space, tex_aux_run_space); + register_runner(math_shift_cmd, tex_aux_run_new_paragraph, tex_run_math_initialize, tex_run_math_shift); + register_runner(math_shift_cs_cmd, tex_aux_run_new_paragraph, tex_run_math_initialize, tex_run_math_shift); + + register_runner(end_paragraph_cmd, tex_aux_run_paragraph_end_vmode, tex_aux_run_paragraph_end_hmode, tex_aux_run_relax); + register_runner(spacer_cmd, tex_aux_run_relax, tex_aux_run_space, tex_aux_run_math_space); + register_runner(begin_paragraph_cmd, tex_aux_run_begin_paragraph_vmode, tex_aux_run_begin_paragraph_hmode, tex_aux_run_begin_paragraph_mmode); + register_runner(end_job_cmd, tex_aux_run_end_job, tex_aux_run_head_for_vmode, tex_aux_run_insert_dollar_sign); + + /*tex + These can share a handler if we move the mode test (we then also have 5 command codes + less) but it becomes less pretty for rules and so. When in the wrong more, a mode change + is enforced and the token is pushed back and ready for a new inspection. + */ + + register_runner(hmove_cmd, tex_aux_run_move, tex_aux_run_illegal_case, tex_aux_run_illegal_case); + register_runner(vmove_cmd, tex_aux_run_illegal_case, tex_aux_run_move, tex_aux_run_move); + + register_runner(hskip_cmd, tex_aux_run_new_paragraph, tex_aux_run_glue, tex_aux_run_glue); + register_runner(vskip_cmd, tex_aux_run_glue, tex_aux_run_head_for_vmode, tex_aux_run_insert_dollar_sign); + + register_runner(un_hbox_cmd, tex_aux_run_new_paragraph, tex_run_unpackage, tex_run_unpackage); + register_runner(un_vbox_cmd, tex_run_unpackage, tex_aux_run_head_for_vmode, tex_aux_run_insert_dollar_sign); + + register_runner(halign_cmd, tex_run_alignment_initialize, tex_aux_run_head_for_vmode, tex_aux_run_halign_mmode); + register_runner(valign_cmd, tex_aux_run_new_paragraph, tex_run_alignment_initialize, tex_aux_run_insert_dollar_sign); + + register_runner(hrule_cmd, tex_aux_run_hrule, tex_aux_run_head_for_vmode, tex_aux_run_insert_dollar_sign); + register_runner(vrule_cmd, tex_aux_run_new_paragraph, tex_aux_run_vrule, tex_aux_run_mrule); + + /* Just in case: */ + + register_runner(ignore_cmd, tex_aux_run_relax, tex_aux_run_relax, tex_aux_run_relax); + + /*tex The next is unlikely to happen but compilers like the check. */ + +# if (main_control_mode == 0) + } else { +# else + default: + printf("cmd code %i", cmd); + tex_confusion("unknown cmd code"); + break; +# endif + } + +} + +# if (main_control_mode == 0) + +inline static void tex_aux_big_switch(int mode, int cmd) +{ + (jump_table[mode + cmd])(); +} + +# endif + +/*tex + Some preset values no longer make sense, like family 1 for some math symbols but we keep them + for compatibility reasons. All settings are moved to the relevant modules. + +*/ + +void tex_initialize_variables(void) +{ + if (lmt_main_state.run_state == initializing_state) { + /* mag_par = 1000; */ + tolerance_par = default_tolerance; + hang_after_par = default_hangafter; + max_dead_cycles_par = default_deadcycles; + math_pre_display_gap_factor_par = default_pre_display_gap; + /* pre_binary_penalty_par = infinite_penalty; */ + /* pre_relation_penalty_par = infinite_penalty; */ + /* math_script_box_mode_par = 1; */ + /* math_script_char_mode_par = 1; */ + /* math_flatten_mode_par = 1; */ /*tex We default to ord */ /* obsolete */ + math_font_control_par = assumed_math_control; + math_eqno_gap_step_par = default_eqno_gap_step; + px_dimen_par = one_bp; + show_node_details_par = 2; /*tex $>1$: |[subtype]| $>2$: |[attributes]| */ + ex_hyphen_char_par = '-'; + escape_char_par = '\\'; + end_line_char_par = '\r'; + output_box_par = default_output_box; + adjust_spacing_step_par = -1; + adjust_spacing_stretch_par = -1; + adjust_spacing_shrink_par = -1; + math_double_script_mode_par = -1, + math_glue_mode_par = default_math_glue_mode; + hyphenation_mode_par = default_hyphenation_mode; + glyph_scale_par = 1000; + glyph_x_scale_par = 1000; + glyph_y_scale_par = 1000; + glyph_x_offset_par = 0; + glyph_y_offset_par = 0; + math_begin_class_par = math_begin_class; + math_end_class_par = math_end_class; + math_left_class_par = unset_noad_class; + math_right_class_par = unset_noad_class; + aux_get_date_and_time(&time_par, &day_par, &month_par, &year_par, &lmt_engine_state.utc_time); + } +} diff --git a/source/luametatex/source/tex/texmaincontrol.h b/source/luametatex/source/tex/texmaincontrol.h new file mode 100644 index 000000000..b71aaedac --- /dev/null +++ b/source/luametatex/source/tex/texmaincontrol.h @@ -0,0 +1,76 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_MAINCONTROL_H +# define LMT_MAINCONTROL_H + +/*tex + + To handle the execution state of |main_control|'s eternal loop, an extra global variable is + used, along with a macro to define its values. + +*/ + +typedef enum control_states { + goto_next_state, + goto_skip_token_state, + goto_return_state, +} control_states; + +typedef struct main_control_state_info { + control_states control_state; + int local_level; + halfword after_token; + halfword after_tokens; + halfword last_par_context; + halfword loop_iterator; + halfword loop_nesting; + halfword quit_loop; +} main_control_state_info; + +typedef enum saved_discretionary_items { + saved_discretionary_item_component = 0, + saved_discretionary_n_of_items = 1, +} saved_discretionary_items; + +extern main_control_state_info lmt_main_control_state; + +extern void tex_initialize_variables (void); +extern int tex_main_control (void); + +extern void tex_normal_paragraph (int context); +extern void tex_begin_paragraph (int doindent, int context); +extern void tex_end_paragraph (int group, int context); +extern int tex_wrapped_up_paragraph (int context); + +extern void tex_insert_paragraph_token (void); + +extern int tex_in_privileged_mode (void); +extern void tex_you_cant_error (const char *helpinfo); + +extern void tex_off_save (void); + +extern halfword tex_local_scan_box (void); +extern void tex_box_end (int boxcontext, halfword boxnode, scaled shift, halfword mainclass); + +extern void tex_get_r_token (void); + +extern void tex_begin_local_control (void); +extern void tex_end_local_control (void); +extern void tex_local_control (int obeymode); +extern void tex_local_control_message (const char *s); +extern void tex_page_boundary_message (const char *s, halfword boundary); + +extern void tex_inject_text_or_line_dir (int d, int check_glue); + +extern void tex_run_prefixed_command (void); + +extern void tex_handle_assignments (void); /*tex Used in math. */ + +extern void tex_assign_internal_int_value (int a, halfword p, int val); +extern void tex_assign_internal_attribute_value (int a, halfword p, int val); +extern void tex_assign_internal_dimen_value (int a, halfword p, int val); +extern void tex_assign_internal_skip_value (int a, halfword p, int val); + +# endif diff --git a/source/luametatex/source/tex/texmarks.c b/source/luametatex/source/tex/texmarks.c new file mode 100644 index 000000000..060c5f579 --- /dev/null +++ b/source/luametatex/source/tex/texmarks.c @@ -0,0 +1,346 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + A control sequence that has been |\def|'ed by the user is expanded by \TEX's |macro_call| + procedure. + + Before we get into the details of |macro_call|, however, let's consider the treatment of + primitives like |\topmark|, since they are essentially macros without parameters. The token + lists for such marks are kept in five global arrays of pointers; we refer to the individual + entries of these arrays by symbolic macros |top_mark|, etc. The value of |top_mark (x)|, etc. + is either |null| or a pointer to the reference count of a token list. + + The variable |biggest_used_mark| is an aid to try and keep the code somehwat efficient without + too much extra work: it registers the highest mark class ever instantiated by the user, so the + loops in |fire_up| and |vsplit| do not have to traverse the full range |0 .. biggest_mark|. + + Watch out: zero is always valid and the good old single mark! + + Todo: class -> index + +*/ + +mark_state_info lmt_mark_state = { + .data = NULL, + .min_used = -1, + .max_used = -1, + .mark_data = { + .minimum = min_mark_size, + .maximum = max_mark_size, + .size = memory_data_unset, + .step = stp_mark_size, + .allocated = 0, + .itemsize = sizeof(mark_record), + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, +}; + +void tex_initialize_marks(void) +{ + /* allocated: minimum + 1 */ + lmt_mark_state.data = aux_allocate_clear_array(sizeof(mark_record), lmt_mark_state.mark_data.minimum, 1); + if (lmt_mark_state.data) { + lmt_mark_state.mark_data.allocated = sizeof(mark_record) * lmt_mark_state.mark_data.minimum; + lmt_mark_state.mark_data.top = lmt_mark_state.mark_data.minimum; + } +} + +void tex_reset_mark(halfword m) +{ + if (m >= lmt_mark_state.mark_data.top) { + int step = lmt_mark_state.mark_data.step; + int size = lmt_mark_state.mark_data.top; + /* regular stepwise bump */ + while (m >= size) { + size += step; + } + /* last resort */ + if (size > lmt_mark_state.mark_data.maximum) { + size = m; + } + if (size <= lmt_mark_state.mark_data.maximum) { + mark_record *tmp = aux_reallocate_array(lmt_mark_state.data, sizeof(mark_record), (size_t) size, 1); + if (tmp) { + lmt_mark_state.data = tmp; + memset(&lmt_mark_state.data[lmt_mark_state.mark_data.top], 0, sizeof(mark_record) * (size - lmt_mark_state.mark_data.top)); + lmt_mark_state.mark_data.top = size; + lmt_mark_state.mark_data.allocated = sizeof(mark_record) * ((size_t) size); + } else { + tex_overflow_error("marks", size); + } + } else { + tex_overflow_error("marks", lmt_mark_state.mark_data.maximum); + } + } + if (m > lmt_mark_state.mark_data.ptr) { + lmt_mark_state.mark_data.ptr = m; + } + tex_wipe_mark(m); +} + +halfword tex_get_mark(halfword m, halfword s) +{ + if (s >= 0 && s <= last_unique_mark_code) { + return lmt_mark_state.data[m][s]; + } else { + return null; + } +} + +void tex_set_mark(halfword m, halfword s, halfword v) +{ + if (s >= 0 && s <= last_unique_mark_code) { + if (lmt_mark_state.data[m][s]) { + tex_delete_token_reference(lmt_mark_state.data[m][s]); + } + if (v) { + tex_add_token_reference(v); + } + lmt_mark_state.data[m][s] = v; + } +} + +int tex_valid_mark(halfword m) { + if (m >= lmt_mark_state.mark_data.top) { + tex_reset_mark(m); + } + return m < lmt_mark_state.mark_data.top; +} + +halfword tex_new_mark(quarterword subtype, halfword class, halfword ptr) +{ + halfword mark = tex_new_node(mark_node, subtype); + mark_index(mark) = class; + mark_ptr(mark) = ptr; + if (lmt_mark_state.min_used < 0) { + lmt_mark_state.min_used = class; + lmt_mark_state.max_used = class; + } else { + if (class < lmt_mark_state.min_used) { + lmt_mark_state.min_used = class; + } + if (class > lmt_mark_state.max_used) { + lmt_mark_state.max_used = class; + } + } + tex_set_mark(class, current_marks_code, ptr); + return mark; +} + +static void tex_aux_print_mark(const char *s, halfword t) +{ + if (t) { + tex_print_token_list(s, token_link(t)); + } +} + +void tex_show_marks() +{ + if (tracing_marks_par > 0 && lmt_mark_state.min_used >= 0) { + tex_begin_diagnostic(); + for (halfword m = lmt_mark_state.min_used; m <= lmt_mark_state.max_used; m++) { + if (tex_has_mark(m)) { + tex_print_format("[mark: class %i, page state]",m); + tex_aux_print_mark("top", tex_get_mark(m, top_marks_code)); + tex_aux_print_mark("first", tex_get_mark(m, first_marks_code)); + tex_aux_print_mark("bot", tex_get_mark(m, bot_marks_code)); + tex_aux_print_mark("split first", tex_get_mark(m, split_first_marks_code)); + tex_aux_print_mark("split bot", tex_get_mark(m, split_bot_marks_code)); + tex_aux_print_mark("current", tex_get_mark(m, current_marks_code)); + } + } + tex_end_diagnostic(); + } +} + +void tex_update_top_marks() +{ + if (lmt_mark_state.min_used >= 0) { + for (halfword m = lmt_mark_state.min_used; m <= lmt_mark_state.max_used; m++) { + halfword bot = tex_get_mark(m, bot_marks_code); + if (bot) { + tex_set_mark(m, top_marks_code, bot); + if (tracing_marks_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[mark: class %i, top becomes bot]", m); + tex_aux_print_mark(NULL, bot); + tex_end_diagnostic(); + } + tex_delete_mark(m, first_marks_code); + } + } + } +} + +void tex_update_first_and_bot_mark(halfword n) +{ + halfword index = mark_index(n); + halfword ptr = mark_ptr(n); + if (node_subtype(n) == reset_mark_value_code) { + /*tex Work in progress. */ + if (tracing_marks_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[mark: index %i, reset]", index); + tex_end_diagnostic(); + } + tex_reset_mark(index); + } else { + /*tex Update the values of |first_mark| and |bot_mark|. */ + halfword first = tex_get_mark(index, first_marks_code); + if (! first) { + tex_set_mark(index, first_marks_code, ptr); + if (tracing_marks_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[mark: index %i, first becomes mark]", index); + tex_aux_print_mark(NULL, ptr); + tex_end_diagnostic(); + } + } + tex_set_mark(index, bot_marks_code, ptr); + if (tracing_marks_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[mark: index %i, bot becomes mark]", index); + tex_aux_print_mark(NULL, ptr); + tex_end_diagnostic(); + } + } +} + +void tex_update_first_marks(void) +{ + if (lmt_mark_state.min_used >= 0) { + for (halfword m = lmt_mark_state.min_used; m <= lmt_mark_state.max_used; m++) { + halfword top = tex_get_mark(m, top_marks_code); + halfword first = tex_get_mark(m, first_marks_code); + if (top && ! first) { + tex_set_mark(m, first_marks_code, top); + if (tracing_marks_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[mark: class %i, first becomes top]", m); + tex_aux_print_mark(NULL, top); + tex_end_diagnostic(); + } + } + } + } +} + +void tex_update_split_mark(halfword n) +{ + halfword index = mark_index(n); + halfword ptr = mark_ptr(n); + if (node_subtype(n) == reset_mark_value_code) { + tex_reset_mark(index); + } else { + if (tex_get_mark(index, split_first_marks_code)) { + tex_set_mark(index, split_bot_marks_code, ptr); + if (tracing_marks_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[mark: index %i, split bot becomes mark]", index); + tex_aux_print_mark(NULL, tex_get_mark(index, split_bot_marks_code)); + tex_end_diagnostic(); + } + } else { + tex_set_mark(index, split_first_marks_code, ptr); + tex_set_mark(index, split_bot_marks_code, ptr); + if (tracing_marks_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[mark: index %i, split first becomes mark]", index); + tex_aux_print_mark(NULL, tex_get_mark(index, split_first_marks_code)); + tex_print_format("[mark: index %i, split bot becomes split first]", index); + tex_aux_print_mark(NULL, tex_get_mark(index, split_bot_marks_code)); + tex_end_diagnostic(); + } + } + } +} + + +void tex_delete_mark(halfword m, int what) +{ + switch (what) { + case top_mark_code : what = top_marks_code; + case first_mark_code : what = first_marks_code; + case bot_mark_code : what = bot_marks_code; + case split_first_mark_code: what = split_first_marks_code; + case split_bot_mark_code : what = split_bot_marks_code; + } + tex_set_mark(m, what, null); +} + +halfword tex_get_some_mark(halfword chr, halfword val) +{ + switch (chr) { + case top_mark_code : val = top_marks_code; + case first_mark_code : val = first_marks_code; + case bot_mark_code : val = bot_marks_code; + case split_first_mark_code: val = split_first_marks_code; + case split_bot_mark_code : val = split_bot_marks_code; + } + return tex_get_mark(val, chr); +} + +void tex_wipe_mark(halfword m) +{ + for (int what = 0; what <= last_unique_mark_code; what++) { + tex_set_mark(m, what, null); + } +} + +int tex_has_mark(halfword m) +{ + for (int what = 0; what <= last_unique_mark_code; what++) { + if (lmt_mark_state.data[m][what]) { + return 1; + } + } + return 0; +} + +/*tex + + The |make_mark| procedure has been renamed, because if the current chr code is 1, then the + actual command was |\clearmarks|, which did not generate a mark node but instead destroyed the + current mark related tokenlists. We now have proper reset nodes. + +*/ + +void tex_run_mark(void) +{ + halfword class = 0; + halfword code = cur_chr; + switch (code) { + case set_marks_code: + case clear_marks_code: + case flush_marks_code: + class = tex_scan_mark_number(); + break; + } + if (tex_valid_mark(class)) { + quarterword subtype = set_mark_value_code; + halfword ptr = null; + switch (code) { + case set_marks_code: + case set_mark_code: + ptr = tex_scan_toks_expand(0, NULL, 0); + break; + case clear_marks_code: + tex_wipe_mark(class); + return; + case flush_marks_code: + subtype = reset_mark_value_code; + break; + } + tex_tail_append(tex_new_mark(subtype, class, ptr)); + } else { + /* error already issued */ + } +} diff --git a/source/luametatex/source/tex/texmarks.h b/source/luametatex/source/tex/texmarks.h new file mode 100644 index 000000000..e787fa9d0 --- /dev/null +++ b/source/luametatex/source/tex/texmarks.h @@ -0,0 +1,65 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_MARKS_H +# define LMT_MARKS_H + +typedef enum get_mark_codes { + current_marks_code, + top_marks_code, + first_marks_code, + bot_marks_code, + split_first_marks_code, + split_bot_marks_code, + /* these map to zero */ + top_mark_code, /*tex the mark in effect at the previous page break */ + first_mark_code, /*tex the first mark between |top_mark| and |bot_mark| */ + bot_mark_code, /*tex the mark in effect at the current page break */ + split_first_mark_code, /*tex the first mark found by |\vsplit| */ + split_bot_mark_code, /*tex the last mark found by |\vsplit| */ +} get_mark_codes; + +# define first_valid_mark_code top_marks_code +# define last_unique_mark_code split_bot_marks_code +# define last_get_mark_code split_bot_mark_code + +typedef enum set_mark_codes { + set_mark_code, + set_marks_code, + clear_marks_code, + flush_marks_code, +} set_mark_codes; + +# define last_set_mark_code flush_marks_code + +typedef halfword mark_record[split_bot_marks_code+1]; + +typedef struct mark_state_info { + mark_record *data; + int min_used; + int max_used; + memory_data mark_data; +} mark_state_info; + +extern mark_state_info lmt_mark_state; + +extern void tex_initialize_marks (void); +extern int tex_valid_mark (halfword m); +extern void tex_reset_mark (halfword m); +extern void tex_wipe_mark (halfword m); +extern void tex_delete_mark (halfword m, int what); +extern halfword tex_get_some_mark (halfword chr, halfword val); +extern halfword tex_new_mark (quarterword subtype, halfword cls, halfword ptr); +extern void tex_update_top_marks (void); +extern void tex_update_first_and_bot_mark (halfword m); +extern void tex_update_first_marks (void); +extern void tex_update_split_mark (halfword m); +extern void tex_show_marks (void); +extern int tex_has_mark (halfword m); +extern halfword tex_get_mark (halfword m, halfword s); +extern void tex_set_mark (halfword m, halfword s, halfword v); + +extern void tex_run_mark (void); + +# endif diff --git a/source/luametatex/source/tex/texmath.c b/source/luametatex/source/tex/texmath.c new file mode 100644 index 000000000..d38cbf182 --- /dev/null +++ b/source/luametatex/source/tex/texmath.c @@ -0,0 +1,5593 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + The code can be simplified a lot when we decide that traditional 8 bit fonts are virtualized in + a way that avoids the split delimiter definitions (small and large) and that the traditional + way to define characters is dropped in favor of the unicode variants. So, this might happen at + some point. After all it makes no sense to use this engine with traditional fonts because there + \PDFTEX\ is a better choice. + + We might also benefit more from the fact that we have prev pointers. Occasionally I visit this + file and make some variables more verbose. I'm in no hurry with that. + +*/ + +/*tex + + When \TEX\ reads a formula that is enclosed between |$|'s, it constructs an \quote {mlist}, + which is essentially a tree structure representing that formula. An mlist is a linear sequence + of items, but we can regard it as a tree structure because mlists can appear within mlists. For + example, many of the entries can be subscripted or superscripted, and such \quote {scripts} are + mlists in their own right. + + An entire formula is parsed into such a tree before any of the actual typesetting is done, + because the current style of type is usually not known until the formula has been fully scanned. + For example, when the formula |$a+b \over c+d$| is being read, there is no way to tell that |a+b| + will be in script size until |\over| has appeared. + + During the scanning process, each element of the mlist being built is classified as a relation, + a binary operator, an open parenthesis, etc., or as a construct like |\sqrt| that must be built + up. This classification appears in the mlist data structure. + + After a formula has been fully scanned, the mlist is converted to an hlist so that it can be + incorporated into the surrounding text. This conversion is controlled by a recursive procedure + that decides all of the appropriate styles by a \quote {top-down} process starting at the + outermost level and working in towards the subformulas. The formula is ultimately pasted together + using combinations of horizontal and vertical boxes, with glue and penalty nodes inserted as + necessary. + + An mlist is represented internally as a linked list consisting chiefly of \quote {noads} + (pronounced \quotation {no-adds}), to distinguish them from the somewhat similar \quote {nodes} + in hlists and vlists. Certain kinds of ordinary nodes are allowed to appear in mlists together + with the noads; \TEX\ tells the difference by means of the |type| field, since a noad's |type| + is always greater than that of a node. An mlist does not contain character nodes, hlist nodes, + vlist nodes, math nodes or unset nodes; in particular, each mlist item appears in the + variable-size part of |mem|, so the |type| field is always present. + + Each noad is five or more words long. The first word contains the |type| and |subtype| and |link| + fields that are already so familiar to us; the second contains the attribute list pointer, and + the third, fourth an fifth words are called the noad's |nucleus|, |subscr|, and |supscr| fields. + (This use of a combined attribute list is temporary. Eventually, each of fields need their own + list) + + Consider, for example, the simple formula |$x^2$|, which would be parsed into an mlist containing + a single element called an |ord_noad|. The |nucleus| of this noad is a representation of |x|, the + |subscr| is empty, and the |supscr| is a representation of |2|. + + The |nucleus|, |subscr|, and |supscr| fields are further broken into subfields. If |p| points to + a noad, and if |q| is one of its principal fields (e.g., |q=subscr(p)|), |q=null| indicates a + field with no value (the corresponding attribute of noad |p| is not present). Otherwise, there are + several possibilities for the subfields, depending on the |type| of |q|. + + \startitemize + + \startitem + |type(q)=math_char_node| means that |math_fam(q)| refers to one of the sixteen font + families, and |character(q)| is the number of a character within a font of that family, as + in a character node. + \stopitem + + \startitem + |type(q) = math_text_char_node| is similar, but the character is unsubscripted and + unsuperscripted and it is followed immediately by another character from the same font. + (This |type| setting appears only briefly during the processing; it is used to suppress + unwanted italic corrections.) + \stopitem + + \startitem + |type(q) = sub_box_node| means that |math_list(q)| points to a box node (either an + |hlist_node| or a |vlist_node|) that should be used as the value of the field. The + |shift_amount| in the subsidiary box node is the amount by which that box will be + shifted downward. + \stopitem + + \startitem + |type(q) = sub_mlist_node| means that |math_list(q)| points to an mlist; the mlist must + be converted to an hlist in order to obtain the value of this field. + \stopitem + + \startitem + In the latter case, we might have |math_list(q) = null|. This is not the same as |q = + null|; for example, |$P_{\}$| and |$P$| produce different results (the former will not + have the \quote {italic correction} added to the width of |P|, but the \quote {script + skip} will be added). + \stopitem + + \startitemize + + Concerning display skips, \TEX\ normally always inserts before and only after when larger than + zero. This can now be controlled with |\mathdisplayskipmode|: + + \starttabulate + \NC 0 \NC normal \TEX \NC \NR + \NC 1 \NC always \NC \NR + \NC 2 \NC non-zero \NC \NR + \NC 3 \NC ignore \NC \NR + \stoptabulate + +*/ + +math_state_info lmt_math_state = { + .size = 0, + .level = 0, + /* .opentype = 1, */ + /* .padding = 0, */ + .par_head = NULL, + .fam_head = NULL, + .last_left = 0, + .last_right = 0, + .last_atom = 0, + .scale = 1000, +}; + +static int tex_aux_scan_math (halfword p, halfword style, int usetextfont, halfword toks, halfword toks_text, int nocomponent, halfword cls, halfword all); +static halfword tex_aux_finish_math_list (halfword p); +static void tex_aux_math_math_component (halfword n, int append); + +# define cramped 1 + +# define cramped_style(A) (2 * ((A) / 2) + cramped) /*tex cramp the style */ +# define sub_style(A) (2 * ((A) / 4) + script_style + cramped) /*tex smaller and cramped */ +# define sup_style(A) (2 * ((A) / 4) + script_style + ((A) % 2)) /*tex smaller */ +# define num_style(A) ((A) + 2 - 2 * ((A) / 6)) /*tex smaller unless already scriptscript */ +# define denom_style(A) (2 * ((A) / 2) + cramped + 2 - 2 * ((A) / 6)) /*tex smaller, cramped */ +# define sup_sup_style(A) sup_style(sup_style((A))) /*tex smaller */ + +inline static mathdictval tex_fake_math_dict(halfword chr) +{ + mathdictval d = { 0, 0, 0 }; + if (math_dict_properties_par || math_dict_group_par) { + d.properties = (unsigned short) math_dict_properties_par; + d.group = (unsigned short) math_dict_group_par; + d.index = (unsigned int) chr; + } + return d; +} + +void tex_math_copy_char_data(halfword target, halfword source, int wipelist) +{ + if (node_type(source) == math_char_node) { + kernel_math_family(target) = kernel_math_family(source); + kernel_math_character(target) = kernel_math_character(source); + kernel_math_options(target) = kernel_math_options(source); + kernel_math_properties(target) = kernel_math_properties(source); + kernel_math_group(target) = kernel_math_group(source); + kernel_math_index(target) = kernel_math_index(source); + } else { + kernel_math_list(target) = kernel_math_list(source); + if (wipelist) { + kernel_math_list(source) = null; + } + } +} + +// static const math_styles map_cramped_style[] = { /*tex cramp the style */ +// cramped_display_style, +// cramped_display_style, +// cramped_text_style, +// cramped_text_style, +// cramped_script_style, +// cramped_script_style, +// cramped_script_script_style, +// cramped_script_script_style, +// }; +// +// static const math_styles map_subscript_style[] = { /*tex smaller and cramped */ +// cramped_script_style, +// cramped_script_style, +// cramped_script_style, +// cramped_script_style, +// cramped_script_script_style, +// cramped_script_script_style, +// cramped_script_script_style, +// cramped_script_script_style, +// }; +// +// static const math_styles map_superscript_style[] = { /*tex smaller */ +// script_style, +// script_style, +// script_style, +// script_style, +// script_script_style, +// script_script_style, +// script_script_style, +// script_script_style, +// }; +// +// static const math_styles map_numerator_style[] = {/*tex smaller unless already scriptscript */ +// script_style, +// cramped_script_style, +// script_style, +// cramped_script_style, +// script_script_style, +// cramped_script_script_style, +// script_script_style, +// cramped_script_script_style, +// }; +// +// static const math_styles map_denominator_style[] = { /*tex smaller, all cramped */ +// cramped_script_style, +// cramped_script_style, +// cramped_script_style, +// cramped_script_style, +// cramped_script_script_style, +// cramped_script_script_style, +// cramped_script_script_style, +// cramped_script_script_style, +// }; +// +// static const math_styles map_double_superscript_style[] = { /*tex smaller, keep cramped */ +// script_style, +// cramped_script_style, +// script_style, +// cramped_script_style, +// script_script_style, +// cramped_script_script_style, +// script_script_style, +// cramped_script_script_style, +// }; + +/*tex + This is very \TEX: a variable class influences the family being used. +*/ + +halfword tex_size_of_style(halfword style) +{ + switch (style) { + case script_style: + case cramped_script_style: + return script_size; + case script_script_style: + case cramped_script_script_style: + return script_script_size; + break; + default: + return text_size; + } +} + +halfword tex_math_style_variant(halfword style, halfword param) +{ + switch (tex_get_math_parameter(style, param, NULL)) { + case math_normal_style_variant: + return style; + case math_cramped_style_variant: + // return map_cramped_style[s]; + return cramped_style(style); + case math_subscript_style_variant: + // return map_subscript_style[s]; + return sub_style(style); + case math_superscript_style_variant: + case math_small_style_variant: + // return map_superscript_style[s]; + return sup_style(style); + case math_smaller_style_variant: + case math_numerator_style_variant: + // return map_numerator_style[s]; + return num_style(style); + case math_denominator_style_variant: + // return map_denominator_style[s]; + return denom_style(style); + case math_double_superscript_variant: + // return map_double_superscript_style[s]; + return sup_sup_style(style); + default: + return style; + } +} + +int tex_math_has_class_option(halfword cls, int option) +{ + halfword value = count_parameter(first_math_options_code + cls); + if (value == no_class_options) { + unsigned parent = (unsigned) count_parameter(first_math_parent_code + cls); + cls = (parent >> 16) & 0xFF; + if (! valid_math_class_code(cls)) { + return 0; + } + value = count_parameter(first_math_options_code + cls); + } + return (value & option) == option; +} + +static void tex_aux_unsave_math(void) +{ + tex_unsave(); + lmt_save_state.save_stack_data.ptr -= saved_math_n_of_items; + tex_flush_node_list(lmt_dir_state.text_dir_ptr); + if (saved_type(saved_math_item_direction) == saved_text_direction) { + lmt_dir_state.text_dir_ptr = saved_value(saved_math_item_direction); + } else { + tex_confusion("unsave math"); + } +} + +/*tex + + Sometimes it is necessary to destroy an mlist. The following subroutine empties the current + list, assuming that |abs(mode) = mmode|. + +*/ + +void tex_flush_math(void) +{ + halfword head = cur_list.head; + tex_flush_node_list(node_next(head)); + tex_flush_node_list(cur_list.incomplete_noad); + node_next(head) = null; + cur_list.tail = head; + cur_list.incomplete_noad = null; +} + +/*tex A printing helper. */ + +static void tex_aux_print_parameter(const char *what, halfword style, halfword param, halfword indirect, halfword value) +{ + tex_begin_diagnostic(); + tex_print_char('{'); + tex_print_str(what); + tex_print_char(' '); + if (indirect >= 0 && indirect <= last_math_indirect) { + tex_print_str(lmt_interface.math_indirect_values[indirect].name); + tex_print_char(' '); + } + if (param < math_parameter_last) { + tex_print_cmd_chr(set_math_parameter_cmd, param); + } else { + tex_print_format("%x %x ", math_parameter_spacing_left(param), math_parameter_spacing_right(param)); + } + tex_print_cmd_chr(math_style_cmd, style); + tex_print_char('='); + switch (math_parameter_value_type(param)) { + case math_int_parameter: + case math_style_parameter: + tex_print_int(value); + break; + case math_dimen_parameter: + tex_print_dimension(value, pt_unit); + break; + case math_muglue_parameter: + tex_print_spec(value, mu_unit); + break; + default: + tex_print_int(value); + break; + } + tex_print_char('}'); + tex_end_diagnostic(); +} + +static void tex_aux_print_fam(const char *what, halfword size, halfword fam) +{ + tex_begin_diagnostic(); + tex_print_format("{%s %C %i=%F}", what, define_family_cmd, size, tex_fam_fnt(fam, size)); + tex_end_diagnostic(); +} + +/*tex + Before we can do anything in math mode, we need fonts. We can use |max_n_of_math_families| + instead of 256 but we need to pack in bytes anyway so there is no gain. +*/ + +int tex_fam_fnt(int fam, int size) +{ + return (int) sa_get_item_4(lmt_math_state.fam_head, fam + (256 * size)).int_value; +} + +void tex_def_fam_fnt(int fam, int size, int fnt, int level) +{ + sa_tree_item item; + item.int_value = fnt; + sa_set_item_4(lmt_math_state.fam_head, fam + (256 * size), item, level); + tex_fixup_math_parameters(fam, size, fnt, level); + if (tracing_assigns_par > 1) { + tex_aux_print_fam("assigning", size, fam); + } +} + +static void tex_aux_unsave_math_fam_data(int gl) +{ + if (lmt_math_state.fam_head->stack) { + while (lmt_math_state.fam_head->sa_stack_ptr > 0 && abs(lmt_math_state.fam_head->stack[lmt_math_state.fam_head->sa_stack_ptr].level) >= (int) gl) { + sa_stack_item item = lmt_math_state.fam_head->stack[lmt_math_state.fam_head->sa_stack_ptr]; + if (item.level > 0) { + sa_rawset_item_4(lmt_math_state.fam_head, item.code, item.value_1); + /*tex Now do a trace message, if requested. */ + if (tracing_restores_par > 1) { + int size = item.code / 256; + int fam = item.code % 256; + tex_aux_print_fam("restoring", size, fam); + } + } + (lmt_math_state.fam_head->sa_stack_ptr)--; + } + } +} + +/*tex Math parameters, we have a lot of them! Todo: move the style into 2 */ + +void tex_def_math_parameter(int style, int param, scaled value, int level, int indirect) +{ + sa_tree_item item1, item2; + if (level <= 1) { + if (math_parameter_value_type(param) == math_muglue_parameter) { + item1 = sa_get_item_8(lmt_math_state.par_head, (param + (math_parameter_max_range * style)), &item2); + if (item2.int_value == indirect_math_regular && item1.int_value > thick_mu_skip_code) { + if (lmt_node_memory_state.nodesizes[item1.int_value]) { + tex_free_node(item1.int_value, glue_spec_size); + } + } + } + } + item1.int_value = value; + item2.int_value = indirect; + sa_set_item_8(lmt_math_state.par_head, (param + (math_parameter_max_range * style)), item1, item2, level); + if (tracing_assigns_par > 1) { + tex_aux_print_parameter("assigning", style, param, indirect, value); + } +} + +// mukern .. there is no mudimen + +scaled tex_get_math_parameter(int style, int param, halfword *type) +{ + halfword indirect, value; + sa_tree_item v2; + sa_tree_item v1 = sa_get_item_8(lmt_math_state.par_head, (param + (math_parameter_max_range * style)), &v2); + indirect = v2.int_value == lmt_math_state.par_head->dflt.int_value ? indirect_math_unset : v2.uint_value; + value = v1.int_value; + switch (indirect) { + case indirect_math_unset: + if (type) { + *type = no_val_level; + } + return MATHPARAMDEFAULT; + /* we stored nothing */ + case indirect_math_regular: + switch (math_parameter_value_type(param)) { + case math_dimen_parameter: + if (type) { + *type = dimen_val_level; + } + return value; + case math_muglue_parameter: + if (type) { + *type = mu_val_level; + } + return value <= thick_mu_skip_code ? mu_glue_parameter(value) : value; + // case math_int_parameter: + // case math_style_parameter: + default: + if (type) { + *type = int_val_level; + } + return value; + } + /* we stored cs */ + case indirect_math_integer: + if (! value) { + if (type) { + *type = int_val_level; + } + return value; + } else if (eq_type(value) == integer_cmd) { + if (type) { + *type = int_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_dimension: + if (! value) { + if (type) { + *type = dimen_val_level; + } + return value; + } else if (eq_type(value) == dimension_cmd) { + if (type) { + *type = dimen_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_mugluespec: + if (! value) { + if (type) { + *type = mu_val_level; + } + return value; + } else { + switch (eq_type(value)) { + case mugluespec_cmd: + if (type) { + *type = mu_val_level; + } + return eq_value(value); + default: + goto MISMATCH; + } + + } + case indirect_math_gluespec: + if (! value) { + if (type) { + *type = glue_val_level; + } + return value; + } else { + switch (eq_type(value)) { + case gluespec_cmd: + if (type) { + *type = glue_val_level; + } + return eq_value(value); + default: + goto MISMATCH; + } + } + /* we stored chr */ + case indirect_math_register_integer: + if (! value) { + if (type) { + *type = int_val_level; + } + return value; + } else if (eq_type(value) == register_int_reference_cmd) { + if (type) { + *type = int_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_register_dimension: + if (! value) { + if (type) { + *type = dimen_val_level; + } + return value; + } else if (eq_type(value) == register_dimen_reference_cmd) { + if (type) { + *type = dimen_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_register_gluespec: + if (! value) { + if (type) { + *type = glue_val_level; + } + return value; + } else if (eq_type(value) == register_glue_reference_cmd) { + if (type) { + *type = glue_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_register_mugluespec: + if (! value) { + if (type) { + *type = mu_val_level; + } + return value; + } else if (eq_type(value) == register_mu_glue_reference_cmd) { + if (type) { + *type = mu_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_internal_integer: + if (! value) { + if (type) { + *type = int_val_level; + } + return value; + } else if (eq_type(value) == internal_int_reference_cmd) { + if (type) { + *type = int_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_internal_dimension: + if (! value) { + if (type) { + *type = dimen_val_level; + } + return value; + } else if (eq_type(value) == internal_dimen_reference_cmd) { + if (type) { + *type = dimen_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_internal_gluespec: + if (! value) { + if (type) { + *type = glue_val_level; + } + return value; + } else if (eq_type(value) == internal_glue_reference_cmd) { + if (type) { + *type = glue_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + case indirect_math_internal_mugluespec: + if (! value) { + if (type) { + *type = mu_val_level; + } + return value; + } else if (eq_type(value) == internal_mu_glue_reference_cmd) { + if (type) { + *type = mu_val_level; + } + return eq_value(value); + } else { + goto MISMATCH; + } + default: + MISMATCH: + tex_handle_error( + normal_error_type, + "Invalid inherited math parameter", + "You probably changed the type of the inherited math parameter, so I will " + "use zero instead." + ); + return 0; + } +} + +int tex_has_math_parameter(int style, int param) +{ + sa_tree_item v2; + sa_get_item_8(lmt_math_state.par_head, (param + (math_parameter_max_range * style)), &v2); + return v2.int_value == lmt_math_state.par_head->dflt.int_value ? indirect_math_unset : v2.uint_value; +} + +static void tex_aux_unsave_math_parameter_data(int gl) +{ + if (lmt_math_state.par_head->stack) { + while (lmt_math_state.par_head->sa_stack_ptr > 0 && abs(lmt_math_state.par_head->stack[lmt_math_state.par_head->sa_stack_ptr].level) >= (int) gl) { + sa_stack_item item = lmt_math_state.par_head->stack[lmt_math_state.par_head->sa_stack_ptr]; + if (item.level > 0) { + int param = item.code % math_parameter_max_range; + int style = item.code / math_parameter_max_range; + sa_tree_item item1, item2; + if (math_parameter_value_type(param) == math_muglue_parameter) { + item1 = sa_get_item_8(lmt_math_state.par_head, item.code, &item2); + if (item2.int_value == indirect_math_regular && item1.int_value > thick_mu_skip_code) { + /* if (tex_valid_node(item1.int_value)) { */ + if (lmt_node_memory_state.nodesizes[item1.int_value]) { + // printf("HERE 2.1: %i %i / %i %i / %i\n",item2.int_value,item1.int_value, item.value_1.int_value, item.value_2.int_value, node_type(item1.int_value)); + tex_free_node(item1.int_value, glue_spec_size); + } else { + // printf("HERE 2.2: %i %i / %i %i / %i\n",item2.int_value,item1.int_value, item.value_1.int_value, item.value_2.int_value, node_type(item1.int_value)); + } + } + } + sa_rawset_item_8(lmt_math_state.par_head, item.code, item.value_1, item.value_2); + /*tex Do a trace message, if requested. */ + if (tracing_restores_par > 1) { + int indirect = item.value_2.int_value; + tex_aux_print_parameter("restoring", style, param, indirect, tex_get_math_parameter(style, param, NULL)); + } + } + lmt_math_state.par_head->sa_stack_ptr--; + } + } +} + +/*tex Saving and unsaving of both: */ + +void tex_unsave_math_data(int level) +{ + tex_aux_unsave_math_fam_data(level); + tex_aux_unsave_math_parameter_data(level); +} + +/*tex Dumping and undumping: */ + +void tex_dump_math_data(dumpstream f) +{ + if (! lmt_math_state.fam_head) { + lmt_math_state.fam_head = sa_new_tree(MATHFONTSTACK, 4, (sa_tree_item) { .int_value = MATHFONTDEFAULT }); + } + sa_dump_tree(f, lmt_math_state.fam_head); + if (! lmt_math_state.par_head) { + lmt_math_state.par_head = sa_new_tree(MATHPARAMSTACK, 8, (sa_tree_item) { .int_value = MATHPARAMDEFAULT }); + } + sa_dump_tree(f, lmt_math_state.par_head); +} + +void tex_undump_math_data(dumpstream f) +{ + lmt_math_state.fam_head = sa_undump_tree(f); + lmt_math_state.par_head = sa_undump_tree(f); +} + +void tex_initialize_math(void) +{ + if (! lmt_math_state.fam_head) { + lmt_math_state.fam_head = sa_new_tree(MATHFONTSTACK, 4, (sa_tree_item) { .int_value = MATHFONTDEFAULT }); + } + if (! lmt_math_state.par_head) { + lmt_math_state.par_head = sa_new_tree(MATHPARAMSTACK, 8, (sa_tree_item) { .int_value = MATHPARAMDEFAULT }); + tex_initialize_math_spacing(); + } + return; +} + +/*tex + + Each portion of a formula is classified as Ord, Op, Bin, Rel, Ope, Clo, Pun, or Inn, for purposes + of spacing and line breaking. An |ord_noad|, |op_noad|, |bin_noad|, |rel_noad|, |open_noad|, + |close_noad|, |punct_noad|, or |inner_noad| is used to represent portions of the various types. + For example, an |=| sign in a formula leads to the creation of a |rel_noad| whose |nucleus| field + is a representation of an equals sign (usually |fam = 0|, |character = 075|). A formula preceded + by |\mathrel| also results in a |rel_noad|. When a |rel_noad| is followed by an |op_noad|, say, + and possibly separated by one or more ordinary nodes (not noads), \TEX\ will insert a penalty + node (with the current |rel_penalty|) just after the formula that corresponds to the |rel_noad|, + unless there already was a penalty immediately following; and a \quote {thick space} will be + inserted just before the formula that corresponds to the |op_noad|. + + A noad of type |ord_noad|, |op_noad|, \dots, |inner_noad| usually has a |subtype = normal|. The + only exception is that an |op_noad| might have |subtype = limits| or |no_limits|, if the normal + positioning of limits has been overridden for this operator. + + A |radical_noad| also has a |left_delimiter| field, which usually represents a square root sign. + + A |fraction_noad| has a |right_delimiter| field as well as a |left_delimiter|. + + Delimiter fields have four subfields called |small_fam|, |small_char|, |large_fam|, |large_char|. + These subfields represent variable-size delimiters by giving the \quote {small} and \quote + {large} starting characters, as explained in Chapter~17 of {\em The \TEX book}. + + A |fraction_noad| is actually quite different from all other noads. It has |thickness|, + |denominator|, and |numerator| fields instead of |nucleus|, |subscr|, and |supscr|. The + |thickness| is a scaled value that tells how thick to make a fraction rule; however, the special + value |preset_rule_thickness| is used to stand for the |preset_rule_thickness| of the current + size. The |numerator| and |denominator| point to mlists that define a fraction; we always have + |type(numerator) = type(denominator) = sub_mlist|. The |left_delimiter| and |right_delimiter| + fields specify delimiters that will be placed at the left and right of the fraction. In this way, + a |fraction_noad| is able to represent all of \TEX's operators |\over|, |\atop|, |\above|, + |\overwithdelims|, |\atopwithdelims|, and |\abovewithdelims|. + + The |new_noad| function creates an |ord_noad| that is completely |null|. + +*/ + +halfword tex_new_sub_box(halfword curbox) +{ + halfword noad = tex_new_node(simple_noad, ordinary_noad_subtype); + halfword sbox = tex_new_node(sub_box_node, 0); + noad_nucleus(noad) = sbox; + kernel_math_list(sbox) = curbox; + return noad; +} + +quarterword tex_aux_set_math_char(halfword target, mathcodeval *mval, mathdictval *dval) +{ + halfword hmcode = tex_get_hm_code(mval->character_value); + kernel_math_character(target) = mval->character_value; + if (mval->class_value == math_use_current_family_code) { + kernel_math_family(target) = cur_fam_par_in_range ? cur_fam_par : 0; + node_subtype(target) = ordinary_noad_subtype; + } else { + kernel_math_family(target) = mval->family_value; + node_subtype(target) = mval->class_value; + } + if (dval) { + kernel_math_properties(target) = dval->properties; + kernel_math_group(target) = dval->group; + kernel_math_index(target) = dval->index; + } + if ((hmcode & auto_discretionary_normal) == auto_discretionary_normal) { // has_discretionary_normal + math_kernel_node_set_option(target, math_kernel_auto_discretionary); + } + if ((hmcode & auto_discretionary_italic) == auto_discretionary_italic) { // has_discretionary_italic + math_kernel_node_set_option(target, math_kernel_full_discretionary); + } + return node_subtype(target); +} + +/*tex + + A few more kinds of noads will complete the set: An |under_noad| has its nucleus underlined; an + |over_noad| has it overlined. An |accent_noad| places an accent over its nucleus; the accent + character appears as |math_fam (accent_chr (p))| and |math_character (accent_chr (p))|. A + |vcenter_noad| centers its nucleus vertically with respect to the axis of the formula; in such + noads we always have |type (nucleus (p)) = sub_box|. + + And finally, we have the |fence_noad| type, to implement \TEX's |\left| and |\right| as well as + \ETEX's |\middle|. The |nucleus| of such noads is replaced by a |delimiter| field; thus, for + example, |\left(| produces a |fence_noad| such that |delimiter(p)| holds the family and + character codes for all left parentheses. A |fence_noad| of subtype |left_noad_side| never + appears in an mlist except as the first element, and a |fence_noad| with subtype + |right_noad_side| never appears in an mlist except as the last element; furthermore, we either + have both a |left_noad_side| and a |right_noad_side|, or neither one is present. + + Math formulas can also contain instructions like |\textstyle| that override \TeX's normal style + rules. A |style_node| is inserted into the data structure to record such instructions; it is + three words long, so it is considered a node instead of a noad. The |subtype| is either + |display_style| or |text_style| or |script_style| or |script_script_style|. The second and + third words of a |style_node| are not used, but they are present because a |choice_node| is + converted to a |style_node|. + + \TEX\ uses even numbers 0, 2, 4, 6 to encode the basic styles |display_style|, \dots, + |script_script_style|, and adds~1 to get the \quote {cramped} versions of these styles. This + gives a numerical order that is backwards from the convention of Appendix~G in {\em The \TEX + book}; i.e., a smaller style has a larger numerical value. + +*/ + +void tex_run_math_style(void) { + switch (cur_chr) { + case yet_unset_math_style: + { + halfword style = tex_scan_math_style_identifier(1, 0); + if (is_valid_math_style(style)) { + halfword noad = tex_new_node(style_node, (quarterword) style); + cur_list.math_style = style; + tex_tail_append(noad); + } + } + break; + case scaled_math_style: + { + halfword noad = tex_new_node(style_node, scaled_math_style); + style_scale(noad) = tex_scan_int(0, NULL); + // style_scale(noad) = tex_scan_positive_scale(0); + tex_tail_append(noad); + } + break; + default: + if (is_valid_math_style(cur_chr)) { + halfword noad = tex_new_node(style_node, (quarterword) cur_chr); + cur_list.math_style = cur_chr; + tex_tail_append(noad); + } else { + /*tex For now silently ignored. */ + } + } +} + +/*tex + + Let's consider now the previously unwritten part of |show_node_list| that displays the things + that can only be present in mlists; this program illustrates how to access the data structures + just defined. + + In the context of the following program, |p| points to a node or noad that should be displayed, + and the current string contains the \quote {recursion history} that leads to this point. The + recursion history consists of a dot for each outer level in which |p| is subsidiary to some + node, or in which |p| is subsidiary to the |nucleus| field of some noad; the dot is replaced by + |_| or |^| or |/| or |\\| if |p| is descended from the |subscr| or |supscr| or |denominator| or + |numerator| fields of noads. For example, the current string would be |{\^_/}| if |p| points to + the |ord_noad| for |x| in the (ridiculous) formula {$\sqrt {a ^ {\mathinner {b _ + {c \over x+y}}}}$|. + +*/ + +static void tex_aux_display_choice_noad (halfword n, int threshold, int max); +static void tex_aux_display_parameter_node (halfword n); +static void tex_aux_display_simple_noad (halfword n, int threshold, int max); +static void tex_aux_display_radical_noad (halfword n, int threshold, int max); +static void tex_aux_display_accent_noad (halfword n, int threshold, int max); +static void tex_aux_display_fence_noad (halfword n, int threshold, int max); +static void tex_aux_display_fraction_noad (halfword n, int threshold, int max); + +static void tex_aux_print_fam_and_char(halfword n) +{ + tex_print_format(", family %x, character %x, original %x", kernel_math_family(n), kernel_math_character(n)); + tex_aux_show_dictionary(n, kernel_math_properties(n), kernel_math_group(n), kernel_math_index(n), tex_fam_fnt(kernel_math_family(n), 0), kernel_math_character(n)); +} + +int tex_show_math_node(halfword n, int threshold, int max) +{ + switch (node_type(n)) { + case style_node: + /* why not shown? */ + break; + case choice_node: + tex_aux_display_choice_noad(n, threshold, max); + break; + case parameter_node: + tex_aux_display_parameter_node(n); + break; + case simple_noad: + tex_aux_display_simple_noad(n, threshold, max); + break; + case radical_noad: + tex_aux_display_radical_noad(n, threshold, max); + break; + case accent_noad: + tex_aux_display_accent_noad(n, threshold, max); + break; + case fence_noad: + tex_aux_display_fence_noad(n, threshold, max); + break; + case fraction_noad: + tex_aux_display_fraction_noad(n, threshold, max); + break; + case math_text_char_node: + case math_char_node: + tex_aux_print_fam_and_char(n); + break; + case sub_box_node: + tex_print_node_list(kernel_math_list(n), NULL, threshold, max); + break; + case sub_mlist_node: + if (kernel_math_list(n)) { + tex_print_node_list(kernel_math_list(n), NULL, threshold, max); + } else { + tex_print_str(", empty"); + } + break; + default: + return 0; + } + return 1; +} + +inline halfword tex_aux_valid_delimiter(halfword d) +{ + return (d && (delimiter_small_family(d) || delimiter_small_character(d) || delimiter_large_family(d) || delimiter_large_character(d))) ? d : null; +} + +static void tex_aux_print_delimiter(halfword d) +{ + if (delimiter_small_family(d) < 0) { + /*tex This should never happen. */ + tex_print_int(-1); + } else if (delimiter_small_family(d) < 16 && delimiter_large_family(d) < 16 && delimiter_small_character(d) < 256 && delimiter_large_character(d) < 256) { + /*tex Traditional tex style. */ + int a = delimiter_small_family(d) * 256 + delimiter_small_character(d); + a = a * 0x1000 + delimiter_large_family(d) * 256 + delimiter_large_character(d); + tex_print_format(", code %x", a); + } else if ((delimiter_large_family(d) == 0 && delimiter_large_character(d) == 0) || delimiter_small_character(d) > 65535 || delimiter_large_character(d) > 65535) { + /*tex \LUATEX\ style. */ + tex_print_format(", family %x, character %x", delimiter_small_family(d), delimiter_small_character(d)); + } +} + +/*tex + + The next subroutine will descend to another level of recursion when a subsidiary mlist needs to + be displayed. The parameter |c| indicates what character is to become part of the recursion + history. An empty mlist is distinguished from a missing field, because these are not equivalent + (as explained above). + +*/ + +static void tex_aux_display_common_noad(halfword n, int threshold, int max) +{ + tex_print_node_list(noad_nucleus(n), "nucleus", threshold, max); + tex_print_node_list(noad_supscr(n), "superscript", threshold, max); + tex_print_node_list(noad_subscr(n), "subscript", threshold, max); + tex_print_node_list(noad_supprescr(n), "superprescript", threshold, max); + tex_print_node_list(noad_subprescr(n), "subprescript", threshold, max); + tex_print_node_list(noad_prime(n), "primescript", threshold, max); + tex_print_node_list(noad_new_hlist(n), "newhlist", threshold, max); +} + +static void tex_aux_display_parameter_node(halfword n) +{ + tex_print_format(", id %i, style %i", parameter_name(n), parameter_style(n)); +} + +static void tex_aux_display_choice_noad(halfword n, int threshold, int max) +{ + switch (node_subtype(n)) { + case normal_choice_subtype: + tex_print_node_list(choice_display_mlist(n), "display", threshold, max); + tex_print_node_list(choice_text_mlist(n), "text", threshold, max); + tex_print_node_list(choice_script_mlist(n), "script", threshold, max); + tex_print_node_list(choice_script_script_mlist(n), "scriptscript", threshold, max); + break; + case discretionary_choice_subtype: + tex_print_format(", class %i", choice_class(n)); + tex_print_node_list(choice_pre_break(n), "pre", threshold, max); + tex_print_node_list(choice_post_break(n), "post", threshold, max); + tex_print_node_list(choice_no_break(n), "replace", threshold, max); + break; + } +} + +static void tex_aux_display_simple_noad(halfword n, int threshold, int max) +{ + if (noad_source(n)) { + tex_print_format(", source %i", noad_source(n)); + } + tex_aux_display_common_noad(n, threshold, max); +} + +static void tex_aux_display_radical_noad(halfword n, int threshold, int max) +{ + if (noad_width(n)) { + tex_print_format(", width %D", noad_width(n), pt_unit); + } + if (radical_height(n)) { + tex_print_format(", height %D", radical_height(n), pt_unit); + } + if (radical_depth(n)) { + tex_print_format(", depth %D", radical_depth(n), pt_unit); + } + if (noad_source(n) != 0) { + tex_print_format(", source %i", noad_source(n)); + } + if (noad_options(n)) { + tex_print_format(", options %x", noad_options(n)); + } + if (radical_left_delimiter(n)) { + tex_print_str(", left"); + tex_aux_print_delimiter(radical_left_delimiter(n)); + } + if (radical_right_delimiter(n)) { + tex_print_str(", right"); + tex_aux_print_delimiter(radical_right_delimiter(n)); + } + if (radical_degree(n)) { + tex_print_node_list(radical_degree(n), "degree", threshold, max); + } + tex_aux_display_common_noad(n, threshold, max); +} + +static void tex_aux_display_accent_noad(halfword n, int threshold, int max) +{ + halfword top_char = accent_top_character(n); + halfword bottom_char = accent_bottom_character(n); + halfword fraction = accent_fraction(n); + if (fraction) { + tex_print_str(", fraction "); + tex_print_int(fraction); + } + switch (node_subtype(n)) { + case bothflexible_accent_subtype: + if (top_char) { + tex_print_str(", top "); + tex_aux_print_fam_and_char(top_char); + } + if (bottom_char) { + tex_print_str(", bottom "); + tex_aux_print_fam_and_char(bottom_char); + } + if (! (top_char || bottom_char)) { + tex_print_str(", overlay "); + tex_aux_print_fam_and_char(accent_middle_character(n)); + } + break; + case fixedtop_accent_subtype: + if (top_char) { + tex_print_str(", fixed top "); + tex_aux_print_fam_and_char(top_char); + } + if (bottom_char) { + tex_print_str(", bottom "); + tex_aux_print_fam_and_char(bottom_char); + } + break; + case fixedbottom_accent_subtype: + if (top_char) { + tex_print_str(", top "); + tex_aux_print_fam_and_char(top_char); + } + if (bottom_char) { + tex_print_str(", fixed bottom "); + tex_aux_print_fam_and_char(bottom_char); + } + break; + case fixedboth_accent_subtype: + if (top_char) { + tex_print_str(", fixed top "); + tex_aux_print_fam_and_char(top_char); + } + if (bottom_char) { + tex_print_str(", fixed bottom "); + tex_aux_print_fam_and_char(bottom_char); + } + break; + } + tex_aux_display_common_noad(n, threshold, max); +} + +static void tex_aux_display_fence_noad(halfword n, int threshold, int max) +{ + if (noad_height(n)) { + tex_print_format(", height %D", noad_height(n), pt_unit); + } + if (noad_depth(n)) { + tex_print_format(", depth %D", noad_depth(n), pt_unit); + } + if (get_noad_main_class(n) >= 0) { + tex_print_format(", class %i", get_noad_main_class(n)); + } + if (get_noad_left_class(n) >= 0) { + tex_print_format(", leftclass %i", get_noad_left_class(n)); + } + if (get_noad_right_class(n) >= 0) { + tex_print_format(", rightclass %i", get_noad_right_class(n)); + } + if (noad_source(n) != 0) { + tex_print_format(", source %i", noad_source(n)); + } + if (noad_options(n)) { + tex_print_format(", options %x", noad_options(n)); + } + tex_aux_print_delimiter(fence_delimiter_list(n)); + tex_print_node_list(fence_delimiter_top(n), "top", threshold, max); + tex_print_node_list(fence_delimiter_bottom(n), "bottom", threshold, max); +} + +static void tex_aux_display_fraction_noad(halfword n, int threshold, int max) +{ + halfword leftdelimiter = tex_aux_valid_delimiter(fraction_left_delimiter(n)); + halfword rightdelimiter = tex_aux_valid_delimiter(fraction_right_delimiter(n)); + tex_print_str(", thickness "); + if (fraction_rule_thickness(n) == preset_rule_thickness) { + tex_print_str("default"); + } else { + tex_print_dimension(fraction_rule_thickness(n), pt_unit); + } + if (leftdelimiter) { + tex_print_str(", leftdelimiter "); + tex_aux_print_delimiter(leftdelimiter); + } + if (rightdelimiter) { + tex_print_str(", rightdelimiter "); + tex_aux_print_delimiter(rightdelimiter); + } + if (noad_source(n) != 0) { + tex_print_str(", source "); + tex_print_int(noad_source(n)); + } + if (noad_options(n)) { + tex_print_str(", options "); + tex_print_qhex(noad_options(n)); + } + tex_print_node_list(fraction_numerator(n), "numerator", threshold, max); + tex_print_node_list(fraction_denominator(n), "denominator", threshold, max); +} + +/*tex + + The routines that \TEX\ uses to create mlists are similar to those we have just seen for the + generation of hlists and vlists. But it is necessary to make \quote {noads} as well as nodes, + so the reader should review the discussion of math mode data structures before trying to make + sense out of the following program. + + Here is a little routine that needs to be done whenever a subformula is about to be processed. + The parameter is a code like |math_group|. + +*/ + +static void tex_aux_new_save_level_math(quarterword group) +{ + halfword direction = math_direction_par; + tex_set_saved_record(saved_math_item_direction, saved_text_direction, 0, lmt_dir_state.text_dir_ptr); + lmt_save_state.save_stack_data.ptr += saved_math_n_of_items; + lmt_dir_state.text_dir_ptr = tex_new_dir(normal_dir_subtype, direction); + tex_new_save_level(group); + update_tex_par_direction(direction); + update_tex_text_direction(direction); +} + +static void tex_aux_push_math(quarterword group, int style) +{ + if (math_direction_par != text_direction_par) { + cur_list.math_dir = 1; + } + cur_list.math_begin = math_begin_class_par; + cur_list.math_end = math_end_class_par; + cur_list.math_main_style = style; + tex_push_nest(); + cur_list.mode = -mmode; + cur_list.incomplete_noad = null; + cur_list.math_style = style; + tex_aux_new_save_level_math(group); + update_tex_math_left_class(unset_noad_class); + update_tex_math_right_class(unset_noad_class); +} + +static void tex_aux_enter_ordinary_math(int style) +{ + tex_aux_push_math(math_shift_group, style); + update_tex_family(0, unused_math_family); + if (every_math_par) { + tex_begin_token_list(every_math_par, every_math_text); + } +} + +static void tex_aux_enter_display_math(halfword cmd); + +/*tex + + We get into math mode from horizontal mode when a |$| (i.e., a |math_shift| character) is + scanned. We must check to see whether this |$| is immediately followed by another, in case + display math mode is called for. + +*/ + +void tex_run_math_initialize(void) +{ + switch(cur_cmd) { + case math_shift_cmd: + /*tex |get_x_token| would fail on |\ifmmode|! */ + tex_get_token(); + if (cur_cmd == math_shift_cmd && cur_list.mode > nomode) { + tex_aux_enter_display_math(math_shift_cmd); + } else { + tex_back_input(cur_tok); + tex_aux_enter_ordinary_math(text_style); + } + break; + case math_shift_cs_cmd: + if (cur_chr == begin_math_mode_code) { + tex_aux_enter_ordinary_math(tex_scan_math_style_identifier(0, 0)); + } else if (cur_chr == begin_display_math_code && cur_list.mode > nomode) { + tex_aux_enter_display_math(begin_display_math_code); + } else if (cur_chr == begin_inline_math_code) { + tex_aux_enter_ordinary_math(text_style); + } else { + tex_you_cant_error("math shift 1"); + } + break; + default: + tex_you_cant_error("math shift 2"); + break; + } +} + +/*tex + + We get into ordinary math mode from display math mode when |\eqno| or |\leqno| appears. In such + cases |cur_chr| will be 0 or~1, respectively; the value of |cur_chr| is placed onto |save_stack| + for safe keeping. When \TEX\ is in display math mode, |cur_group = math_shift_group|, so it is + not necessary for the |start_eq_no| procedure to test for this condition. + +*/ + +void tex_run_math_equation_number(void) { + if (tex_in_privileged_mode()) { + if (cur_group == math_shift_group) { + tex_set_saved_record(saved_equation_number_item_location, saved_equation_number_location, 0, cur_chr); + lmt_save_state.save_stack_data.ptr += saved_equation_number_n_of_items; + tex_aux_enter_ordinary_math(text_style); + } else { + tex_off_save(); + } + } +} + +/*tex + + Subformulas of math formulas cause a new level of math mode to be entered, on the semantic nest + as well as the save stack. These subformulas arise in several ways: (1)~A left brace by itself + indicates the beginning of a subformula that will be put into a box, thereby freezing its glue + and preventing line breaks. (2)~A subscript or superscript is treated as a subformula if it is + not a single character; the same applies to the nucleus of things like |\underline|. (3)~The + |\left| primitive initiates a subformula that will be terminated by a matching |\right|. The + group codes placed on |save_stack| in these three cases are |math_group|, |math_group|, and + |math_left_group|, respectively. + + Here is the code that handles case (1); the other cases are not quite as trivial, so we shall + consider them later. + +*/ + +void tex_run_math_left_brace(void) +{ + if (math_grouping_mode_par) { + /*tex This is an experiment. Some tracing has to be adapted probably. */ + tex_new_save_level(math_simple_group); + update_tex_internal_math_style(cur_mode == mmode ? cur_list.math_style : -1); + update_tex_internal_math_scale(cur_mode == mmode ? cur_list.math_scale : -1); + } else { + halfword q = tex_new_node(math_char_node, 0); + halfword n = tex_new_node(simple_noad, ordinary_noad_subtype); + tex_tail_append(n); + noad_nucleus(n) = q; + tex_back_input(cur_tok); + tex_aux_scan_math(q, cur_list.math_style, 0, 0, 0, 0, unset_noad_class, unset_noad_class); + } +} + +/*tex + + If the inline directions of |\pardir| and |\mathdir| are opposite, then this function will + return true. Discovering that fact is somewhat odd because it needs traversal of the + |save_stack|. The occurance of displayed equations is weird enough that this is probably still + better than having yet another field in the |input_stack| structures. + + None of this makes much sense if the inline direction of either one of |\pardir| or |\mathdir| + is vertical, but in that case the current math machinery is ill suited anyway so I do not + bother to test that. We now just return the direction. + +*/ + +static int tex_aux_pre_math_par_direction(void) +{ + return tex_located_save_value(internal_int_location(par_direction_code)); +} + +/*tex + + When we enter display math mode, we need to call |line_break| to process the partial paragraph + that has just been interrupted by the display. Then we can set the proper values of + |display_width| and |display_indent| and |pre_display_size|. + +*/ + +static void tex_aux_enter_display_math(halfword cmd) +{ + if (math_display_mode_par) { + tex_aux_push_math(math_shift_group, display_style); + cur_list.math_mode = cmd; + update_tex_family(0, unused_math_family); + if (every_display_par) { + tex_begin_token_list(every_display_par, every_display_text); + } + } else { + /*tex new or partial |pre_display_size| */ + scaled size; + /*tex new |display_width| */ + scaled width; + /*tex new |display_indent| */ + scaled indent; + /*tex + Deal with |\noindent$$| or |$${ }$$| or the 2nd of |$${ }$$| |$${ }$$|. + */ + if (cur_list.head == cur_list.tail || (node_next(cur_list.head) == cur_list.tail && node_type(cur_list.tail) == par_node && ! node_next(cur_list.tail))) { + if (node_next(cur_list.head) == cur_list.tail) { + /*tex + + |resume_after_display| inserts a |par_node|, but if there is another display + immediately following, we have to get rid of that node. + + */ + tex_flush_node(cur_list.tail); + /* cur_list.tail = cur_list.head; */ /* probably needed */ + } + tex_pop_nest(); + size = - max_dimen; + } else { + tex_line_break(1, math_shift_group); + // size = tex_actual_box_width(lmt_linebreak_state.just_box, tex_x_over_n(tex_get_font_em_width(cur_font_par), 1000) * math_pre_display_gap_factor_par); + size = tex_actual_box_width(lmt_linebreak_state.just_box, scaledround((tex_get_font_em_width(cur_font_par) / 1000.0) * math_pre_display_gap_factor_par)); + } + /*tex + + Now we are in vertical mode, working on the list that will contain the display. A displayed + equation is considered to be three lines long, so we calculate the length and offset of line + number |prev_graf + 2|. + + */ + if (par_shape_par) { + /*tex scope of paragraph shape specification */ + int n = tex_get_specification_count(par_shape_par); + if (n > 0) { + if (cur_list.prev_graf + 2 < n) { + n = cur_list.prev_graf + 2; + } + indent = tex_get_specification_indent(par_shape_par, n) ; + width = tex_get_specification_width(par_shape_par, n); + indent = swap_parshape_indent(pre_display_direction_par, indent, width); + } else { + width = hsize_par; + indent = 0; + } + } else if ((hang_indent_par != 0) && (((hang_after_par >= 0) && (cur_list.prev_graf + 2 > hang_after_par)) || (cur_list.prev_graf + 1 < -hang_after_par))) { + halfword hangindent = swap_hang_indent(pre_display_direction_par, hang_indent_par); + width = hsize_par - abs(hangindent); + indent = hangindent > 0 ? hangindent : 0; + } else { + width = hsize_par; + indent = 0; + } + tex_aux_push_math(math_shift_group, display_style); + cur_list.mode = mmode; + update_tex_family(0, unused_math_family); + update_tex_pre_display_size(size); + update_tex_display_width(width); + update_tex_display_indent(indent); + update_tex_pre_display_direction(tex_aux_pre_math_par_direction()); + if (every_display_par) { + tex_begin_token_list(every_display_par, every_display_text); + } + if (lmt_nest_state.nest_data.ptr == 1) { + if (! lmt_page_builder_state.output_active) { + lmt_page_filter_callback(before_display_page_context, 0); + } + tex_build_page(); + } + } +} + +/*tex + + The next routine parses all variations of a delimiter code. The |extcode| tells what syntax form + to use (\TEX\ or \LUATEX) , the |doclass| tells whether or not read a math class also (for + |\delimiter| c.s.). The class is passed on for conversion to |\mathchar|. + +*/ + +static delcodeval tex_aux_scan_extdef_del_code(int extcode, int doclass) +{ + delcodeval d = tex_no_del_code(); + switch (extcode) { + case tex_mathcode: + /*tex This is the easiest: |\delcode|,*/ + { + halfword v = tex_scan_int(0, NULL); + /*tex |MFCCFCC| or |FCCFCC| */ + if (doclass) { + d.small.class_value = (short) (v / 0x1000000); + v = (v & 0xFFFFFF); + } + if (v > 0xFFFFFF) { + tex_handle_error( + normal_error_type, + "Invalid delimiter code", + "I'm going to use 0 instead of that illegal code value." + ); + v = 0; + } + d.small.family_value = (short) (v / 0x100000); + d.small.character_value = (v % 0x100000) / 0x1000; + d.large.family_value = (short) ((v & 0xFFF) / 0x100); + d.large.character_value = (v % 0x100); + /* */ + d.small.character_value = math_character_part(d.small.character_value); + d.large.character_value = math_character_part(d.large.character_value); + } + break; + case umath_mathcode: + /*tex |\Udelcode|: |<0-7><0-0xFF><0-0x10FFFF>| or |<0-0xFF><0-0x10FFFF>| */ + { + if (doclass) { + d.small.class_value = (short) tex_scan_math_class_number(0); + } + d.small.family_value = (short) tex_scan_math_family_number(); + d.small.character_value = tex_scan_math_char_number(); + if (d.small.family_value < 0 || d.small.family_value > max_math_family_index) { + tex_handle_error( + normal_error_type, + "Invalid delimiter family", + "I'm going to use family 0 instead." + ); + d.small.family_value = 0; + d.small.character_value = 0; + } + } + break; + /* + case umathnum_mathcode: + // |\Udelcodenum|: |"FF<21bits>|; the largest numeric value is $2^29-1$, but the top of + // bit 21 can't be used as it contains invalid USV's. + if (doclass) { + tex_confusion("umathnum mathcode"); + } else { + halfword v = tex_scan_int(0, NULL); + d.small.family_value = (short) math_family_part(v); + d.small.character_value = math_character_part(v); + if (d.small.family_value < 0 || d.small.family_value > max_math_family_index || d.small.character_value > max_math_character_code) { + tex_handle_error( + normal_error_type, + "Invalid delimiter code", + "I'm going to use 0 instead of that illegal code value." + ); + d.small.family_value = 0; + d.small.character_value = 0; + } + } + break; + */ + default: + /*tex Something's gone wrong! */ + tex_confusion("unknown extcode, case 1"); + break; + } + d.large.class_value = d.small.class_value; + return d; +} + +void tex_scan_extdef_del_code(int level, int extcode) +{ + delcodeval d; + int chr = tex_scan_char_number(0); + tex_scan_optional_equals(); + d = tex_aux_scan_extdef_del_code(extcode, 0); + tex_set_del_code(chr, d, (quarterword) level); +} + +mathdictval tex_scan_mathdict(void) +{ + mathdictval d = { 0, 0, 0 }; /* use this one directly */ + d.properties = (unsigned short) tex_scan_math_properties_number(); + d.group = (unsigned short) tex_scan_math_group_number(); + d.index = (unsigned int) tex_scan_math_index_number(); + return d; +} + +mathcodeval tex_scan_mathchar(int extcode) +{ + mathcodeval d = { 0, 0, 0 }; /* use this one directly */ + switch (extcode) { + case tex_mathcode: + /*tex |"<4bits><4bits><8bits>| */ + { + halfword v = tex_scan_int(0, NULL); + if (v >= 0) { + if (v > 0xFFFF) { + v = 0xFFFF; + } + d.class_value = (short) math_old_class_part(v); + d.family_value = (short) math_old_family_part(v); + d.character_value = math_old_character_part(v); + } + } + break; + case umath_mathcode: + /*tex |"<6bits>"<6bits>"<20bits>| */ + { + d.class_value = (short) tex_scan_math_class_number(0); + d.family_value = (short) tex_scan_math_family_number(); + d.character_value = tex_scan_math_char_number(); + } + break; + /* + case umathnum_mathcode: + // |"<6bits><6bits><20bits>|: the largest numeric value is $2^32-1$, but the top of bit 21 can't + // be used as it contains invalid USV's. Note: |scan_int| won't accept families 128-255 + // because these use bit 32. + { + halfword v = tex_scan_int(0, NULL); + d.class_value = (short) math_class_part(v); + d.family_value = (short) math_family_part(v); + d.character_value = math_character_part(v); + } + break; + */ + default: + /*tex Something's gone wrong. */ + tex_confusion("unknown extcode, case 2"); + break; + } + if (d.class_value < 0 || d.character_value > max_math_character_code || d.class_value > max_math_class_code || d.family_value > max_math_family_index) { + tex_handle_error( + normal_error_type, + "Invalid math code", + "I'm going to use 0 instead of that illegal code value." + ); + d.class_value = 0; + d.family_value = 0; + d.character_value = 0; + } + return d; +} + +halfword tex_new_math_spec(mathcodeval m, quarterword code) +{ + halfword s = tex_new_node(math_spec_node, code); + math_spec_class(s) = (singleword) m.class_value; + math_spec_family(s) = (singleword) m.family_value; + math_spec_character(s) = m.character_value; + return s; +} + +halfword tex_new_math_dict_spec(mathdictval d, mathcodeval m, quarterword code) +{ + halfword s = tex_new_node(math_spec_node, code); + math_spec_class(s) = (singleword) m.class_value; + math_spec_family(s) = (singleword) m.family_value; + math_spec_character(s) = m.character_value; + math_spec_properties(s) = (quarterword) d.properties; + math_spec_group(s) = (quarterword) d.group; + math_spec_index(s) = d.index; + return s; +} + +mathcodeval tex_get_math_spec(halfword s) +{ + mathcodeval m = { 0, 0, 0 }; + if (s) { + m.class_value = math_spec_class(s); + m.family_value = math_spec_family(s); + m.character_value = math_spec_character(s); + } + return m; +} + +mathdictval tex_get_math_dict(halfword s) +{ + mathdictval d = { 0, 0, 0 }; + if (s) { + d.properties = math_spec_properties(s); + d.group = math_spec_group(s); + d.index = math_spec_index(s); + } + return d; +} + +halfword tex_scan_math_spec(int optional_equal) +{ + mathcodeval m; + if (optional_equal) { + tex_scan_optional_equals(); + } + m = tex_scan_mathchar(umath_mathcode); + return tex_new_math_spec(m, mathspec_mathcode); +} + +void tex_scan_extdef_math_code(int level, int extcode) +{ + mathcodeval d; + int chr = tex_scan_char_number(0); + tex_scan_optional_equals(); + d = tex_scan_mathchar(extcode); + tex_set_math_code(chr, d, (quarterword) level); +} + +/*tex This reads in a delcode when actually a mathcode is needed. */ + +mathcodeval tex_scan_delimiter_as_mathchar(int extcode) +{ + delcodeval dval = tex_aux_scan_extdef_del_code(extcode, 1); + return dval.small; +} + +/*tex + + Recall that the |nucleus|, |subscr|, and |supscr| fields in a noad are broken down into subfields + called |type| and either |math_list| or |(math_fam, math_character)|. The job of |scan_math| is + to figure out what to place in one of these principal fields; it looks at the subformula that + comes next in the input, and places an encoding of that subformula into a given word of |mem|. + + already prepared: every [component, degree, radical, over, under, accent, prime, subscript, + superscript] + + toks : every_subscript_par + toks_text : every_subscipt_text or every_math_text (for tracing) + +*/ + +/*tex + For some reason |$\char44$| gives an undefined |$| when we made that character active in math. +*/ + +static void tex_aux_scan_active_math_char(void) +{ + cur_cs = tex_active_to_cs(cur_chr, 1); + cur_cmd = eq_type(cur_cs); + cur_chr = eq_value(cur_cs); + tex_x_token(); + tex_back_input(cur_tok); +} + +static int tex_aux_scan_math(halfword target, halfword style, int usetextfont, halfword toks, halfword toks_text, int nocomponent, halfword cls, halfword all) +{ + mathcodeval mval = { 0, 0, 0 }; + mathdictval dval = { 0, 0, 0 }; + lmt_math_state.last_atom = cls; + RESTART: + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd); +// RESWITCH: + switch (cur_cmd) { + case char_number_cmd: + /* The |\glyph| variant is accepted but no keywords here. */ + cur_chr = tex_scan_char_number(0); + // fall through + case letter_cmd: + case other_char_cmd: + case char_given_cmd: + mval = tex_get_math_code(cur_chr); + if (mval.class_value == active_math_class_value) { + /*tex An active character is allowed here. */ + tex_aux_scan_active_math_char(); + goto RESTART; + } + dval = tex_fake_math_dict(mval.character_value); + break; + // case char_number_cmd: + // /* The |\glyph| variant is accepted but no keywords here. */ + // cur_chr = tex_scan_char_number(); + // cur_cmd = char_given_cmd; + // goto RESWITCH; + case math_char_number_cmd: + switch (cur_chr) { + case math_char_number_code: + mval = tex_scan_mathchar(tex_mathcode); + break; + case math_xchar_number_code: + mval = tex_scan_mathchar(umath_mathcode); + break; + /* + case math_uchar_number_code: + mval = tex_scan_mathchar(umathnum_mathcode); + break; + */ + default: + tex_confusion("scan math char, case 1"); + break; + } + dval = tex_fake_math_dict(mval.character_value); + break; + case mathspec_cmd: + mval = tex_get_math_spec(cur_chr); + dval = tex_get_math_dict(cur_chr); + break; + // case math_char_given_cmd: + // mval = tex_mathchar_from_integer(cur_chr, tex_mathcode); + // break; + // case math_char_xgiven_cmd: + // mval = tex_mathchar_from_integer(cur_chr, umath_mathcode); + // break; + case delimiter_number_cmd: + switch (cur_chr) { + case math_delimiter_code: + mval = tex_scan_delimiter_as_mathchar(tex_mathcode); + break; + case math_udelimiter_code: + mval = tex_scan_delimiter_as_mathchar(umath_mathcode); + break; + default: + tex_confusion("scan math char, case 2"); + break; + } + break; + case math_component_cmd: + if (nocomponent) { + goto DEFAULT; + } else { + tex_set_saved_record(saved_math_group_item_pointer, saved_math_pointer, 0, target); + tex_set_saved_record(saved_math_group_all_class, saved_math_class, 0, unset_noad_class); + lmt_save_state.save_stack_data.ptr += saved_math_group_n_of_items; + tex_aux_push_math(math_group, style); + if (usetextfont) { + tex_set_math_text_font(style, usetextfont); + } + tex_aux_math_math_component(cur_list.tail, 0); + tex_finish_math_group(); + return 1; + } + case left_brace_cmd: + goto SCAN_SUBFORMULA; + default: + /*tex + The pointer |p| is placed on |save_stack| while a complex subformula is being + scanned. + */ + DEFAULT: + tex_back_input(cur_tok); + tex_scan_left_brace(); + SCAN_SUBFORMULA: + tex_set_saved_record(saved_math_group_item_pointer, saved_math_pointer, 0, target); + tex_set_saved_record(saved_math_group_all_class, saved_math_class, 0, all); + lmt_save_state.save_stack_data.ptr += saved_math_group_n_of_items; + tex_aux_push_math(math_group, style); + toks = every_math_atom_par; + toks_text = every_math_atom_text; + if (toks) { + tex_begin_token_list(toks, (quarterword) toks_text); + } + if (usetextfont) { + tex_set_math_text_font(style, usetextfont); + } + return 1; + } + node_type(target) = math_char_node; + if (glyph_options_par & glyph_option_no_italic_correction) { + math_kernel_node_set_option(target, math_kernel_no_italic_correction); + } + if (glyph_options_par & glyph_option_no_left_kern) { + math_kernel_node_set_option(target, math_kernel_no_left_pair_kern); + } + if (glyph_options_par & glyph_option_no_right_kern) { + math_kernel_node_set_option(target, math_kernel_no_right_pair_kern); + } + tex_aux_set_math_char(target, &mval, &dval); + return 0; +} + +/*tex + + The |append_math_char| procedure creates a new noad appropriate to a given math code, and + appends it to the current mlist. However, if the math code is sufficiently large, the |cur_chr| + is treated as an active character and nothing is appended. + +*/ + +static void tex_aux_append_math_accent(mathcodeval mval, mathdictval dval) +{ + halfword accent = tex_new_node(accent_noad, bothflexible_accent_subtype); + quarterword subtype = ordinary_noad_subtype; + tex_tail_append(accent); + if (! (mval.character_value == 0 && mval.family_value == 0)) { + halfword q = tex_new_node(math_char_node, 0); + subtype = tex_aux_set_math_char(q, &mval, &dval); + accent_top_character(accent) = q; + } + { + halfword q = tex_new_node(math_char_node, subtype); + noad_nucleus(accent) = q; + tex_aux_scan_math(q, tex_math_style_variant(cur_list.math_style, math_parameter_accent_variant), 0, 0, 0, 0, unset_noad_class, unset_noad_class); + } +} + +/*tex + Fences are actually constructs and middle sort of interferes here: we keep a sort of flat fence + sequence so middle ends a group and opens a new one. + +*/ + +static void tex_aux_append_math_fence(halfword fence, quarterword class) +{ + switch (class) { + case open_noad_subtype: + { + tex_aux_push_math(math_fence_group, cur_list.math_style); + node_subtype(fence) = left_fence_side; + node_next(cur_list.head) = fence; + cur_list.tail = fence; + cur_list.delim = fence; + } + break; + case close_noad_subtype: + { + halfword q = tex_aux_finish_math_list(fence); + halfword n = tex_new_node(simple_noad, fenced_noad_subtype); + halfword l = tex_new_node(sub_mlist_node, 0); + tex_aux_unsave_math(); + tex_tail_append(n); + node_subtype(fence) = right_fence_side; + noad_nucleus(n) = l; + noad_options(n) |= noad_option_unpack_list; + kernel_math_list(noad_nucleus(n)) = q; + } + break; + case middle_noad_subtype: + { + halfword q = tex_aux_finish_math_list(fence); + tex_aux_unsave_math(); + tex_aux_push_math(math_fence_group, cur_list.math_style); + node_subtype(fence) = middle_fence_side; + node_next(cur_list.head) = q; + cur_list.tail = fence; + cur_list.delim = fence; + } + break; + } +} + +static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, quarterword class) +{ + halfword fence = tex_new_node(fence_noad, middle_fence_side); + halfword delimiter = tex_new_node(delimiter_node, mval.class_value); + (void) dval; /* maybe todo */ + fence_delimiter_list(fence) = delimiter; + delimiter_small_family(delimiter) = mval.family_value; + delimiter_small_character(delimiter) = mval.character_value; + delimiter_large_family(delimiter) = mval.family_value; + delimiter_large_character(delimiter) = mval.character_value; + set_noad_classes(fence, mval.class_value); + /* todo : share the next three with the regular fences */ + noad_options(fence) |= noad_option_no_check; + if (class == middle_noad_subtype && cur_group != math_fence_group) { + tex_aux_append_math_fence_val((mathcodeval) { 0, 0, 0 }, (mathdictval) { 0, 0, 0 }, open_noad_subtype); + } + tex_aux_append_math_fence(fence, class); +} + +static void tex_aux_append_math_char(mathcodeval mval, mathdictval dval, int automatic) +{ + if (mval.class_value == active_math_class_value) { + /*tex An active character is allowed here */ + tex_aux_scan_active_math_char(); + return; + } else { + if (automatic && tex_math_has_class_option(mval.class_value, auto_inject_class_option)) { + switch (mval.class_value) { + case accent_noad_subtype: + tex_aux_append_math_accent(mval, dval); + return; + case open_noad_subtype: + case close_noad_subtype: + case middle_noad_subtype: + tex_aux_append_math_fence_val(mval, dval, mval.class_value); + return; + } + } + { + halfword p = tex_new_node(simple_noad, ordinary_noad_subtype); + halfword q = tex_new_node(math_char_node, 0); + noad_nucleus(p) = q; + if (glyph_options_par & glyph_option_no_italic_correction) { + math_kernel_node_set_option(q, math_kernel_no_italic_correction); + } + node_subtype(p) = tex_aux_set_math_char(q, &mval, &dval); + tex_tail_append(p); + } + } +} + +/*tex + + The |append_math_char_in_text| procedure creates a new node representing a math char in text + code, and appends it to the current list. However, if the math code is sufficiently large, the + |cur_chr| is treated as an active character and nothing is appended. + +*/ + +static void tex_aux_append_math_char_in_text(mathcodeval mval, mathdictval dval) +{ + (void) dval; + if (mval.class_value == active_math_class_value) { + /*tex An active character is allowed here. But why in text mode too. */ + tex_aux_scan_active_math_char(); + } else { + halfword p = tex_new_char_node(glyph_character_subtype, tex_fam_fnt(mval.family_value, text_size), mval.character_value, 1); /* todo: data */ + tex_tail_append(p); + } +} + +void tex_run_math_letter(void) +{ + tex_aux_append_math_char(tex_get_math_code(cur_chr), tex_fake_math_dict(cur_chr), 1); +} + +void tex_run_math_char_number(void) { + /*tex + Both |\char| and |\glyph| get the same treatment. Scanning can change |cur_chr| so we do + that first. We no longer check for active here! + */ + mathcodeval mval = { 0, 0, 0 }; + mathdictval dval = { 0, 0, 0 }; + cur_chr = tex_scan_char_number(0); + mval.character_value = cur_chr; + mval.family_value = (short) cur_fam_par; + // tex_aux_append_math_char(tex_get_math_code(cur_chr), tex_fake_math_dict(cur_chr)); + tex_aux_append_math_char(mval, dval, 1); +} + +void tex_run_math_math_spec(void) +{ + tex_aux_append_math_char(tex_get_math_spec(cur_chr), tex_get_math_dict(cur_chr), 1); +} + +void tex_run_text_math_spec(void) +{ + tex_aux_append_math_char_in_text(tex_get_math_spec(cur_chr), tex_get_math_dict(cur_chr)); +} + +int tex_scan_math_cmd_val(mathcodeval *mval, mathdictval *dval) +{ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + switch (cur_cmd) { + // case math_char_given_cmd: + // *mval = tex_mathchar_from_integer(cur_chr, tex_mathcode); + // break; + // case math_char_xgiven_cmd: + // *mval = tex_mathchar_from_integer(cur_chr, umath_mathcode); + // break; + case mathspec_cmd: + *mval = tex_get_math_spec(cur_chr); + break; + case math_char_number_cmd: + switch (cur_chr) { + case math_char_number_code: + *mval = tex_scan_mathchar(tex_mathcode); + break; + case math_xchar_number_code: + *mval = tex_scan_mathchar(umath_mathcode); + break; + case math_dchar_number_code: + *dval = tex_scan_mathdict(); + *mval = tex_scan_mathchar(umath_mathcode); + break; + /* + case math_uchar_number_code: + *mval = tex_scan_mathchar(umathnum_mathcode); + break; + */ + default: + /* no message yet */ + return 0; + } + break; + case letter_cmd: + case other_char_cmd: + mval->character_value = cur_chr; + break; + default: + { + halfword n = 0; + tex_back_input(cur_tok); + n = tex_scan_int(0, NULL); + *mval = tex_mathchar_from_integer(n, umath_mathcode); + } + break; + } + return 1; +} + +int tex_scan_math_code_val(halfword code, mathcodeval *mval, mathdictval *dval) +{ + switch (code) { + case math_char_number_code: + *mval = tex_scan_mathchar(tex_mathcode); + break; + case math_xchar_number_code: + *mval = tex_scan_mathchar(umath_mathcode); + break; + case math_dchar_number_code: + *dval = tex_scan_mathdict(); + *mval = tex_scan_mathchar(umath_mathcode); + break; + /* + case math_uchar_number_code: + *mval = tex_scan_mathchar(umathnum_mathcode); + break; + */ + case math_class_number_code: + { + halfword family = cur_fam_par; + halfword class = tex_scan_int(0, NULL); + tex_scan_math_cmd_val(mval, dval); + mval->class_value = (short) class; + mval->family_value = (short) family; + } + break; + default: + /* no message yet */ + tex_back_input(cur_tok); + return 0; + } + return 1; +} + +void tex_run_text_math_char_number(void) { + mathcodeval mval = { 0, 0, 0 }; + mathdictval dval = { 0, 0, 0 }; + if (tex_scan_math_code_val(cur_chr, &mval, &dval)) { + tex_aux_append_math_char_in_text(mval, dval); + } +} + +void tex_run_math_math_char_number(void) { + mathcodeval mval = { 0, 0, 0 }; + mathdictval dval = { 0, 0, 0 }; + if (tex_scan_math_code_val(cur_chr, &mval, &dval)) { + tex_aux_append_math_char(mval, dval, 1); + } +} + +/*tex We build up an argument to |append_math_char|: */ + +// void tex_run_text_math_char_given(void) { +// tex_aux_append_math_char_in_text(tex_mathchar_from_integer(cur_chr, tex_mathcode)); +// } +// +// void tex_run_math_math_char_given(void) { +// tex_aux_append_math_char(tex_mathchar_from_integer(cur_chr, tex_mathcode)); +// } + +/*tex We build up an argument to |append_math_char| the \LUATEX\ way: */ + +// void tex_run_text_math_char_xgiven(void) { +// tex_aux_append_math_char_in_text(tex_mathchar_from_integer(cur_chr, umath_mathcode)); +// } +// +// void tex_run_math_math_char_xgiven(void) { +// tex_aux_append_math_char(tex_mathchar_from_integer(cur_chr, umath_mathcode)); +// } + +void tex_run_math_delimiter_number(void) { + switch (cur_chr) { + case math_delimiter_code: + tex_aux_append_math_char(tex_scan_delimiter_as_mathchar(tex_mathcode), (mathdictval) { 0, 0, 0 }, 0); + break; + case math_udelimiter_code: + tex_aux_append_math_char(tex_scan_delimiter_as_mathchar(umath_mathcode), (mathdictval) { 0, 0, 0 }, 0); + break; + } +} + +/*tex + In original \TEX\ the subtype overlaps the class. Here we are more strict: a subtype is the + main class as in original \TEX\ but we also have overloads: main, left and right. The subtype + drives the rendering, the others the spacing etc. +*/ + +static void tex_aux_math_math_component(halfword target, int append) +{ + quarterword subtype = unset_noad_class; + quarterword allclass = unset_noad_class; + halfword style = cur_list.math_style; + int usetextfont = math_atom_no_font_option; + reset_noad_classes(target); + switch (cur_chr) { + case math_component_ordinary_code: + subtype = ordinary_noad_subtype; + break; + case math_component_operator_code: + subtype = operator_noad_subtype; + break; + case math_component_binary_code: + subtype = binary_noad_subtype; + break; + case math_component_relation_code: + subtype = relation_noad_subtype; + break; + case math_component_open_code: + subtype = open_noad_subtype; + break; + case math_component_close_code: + subtype = close_noad_subtype; + break; + case math_component_punctuation_code: + subtype = punctuation_noad_subtype; + break; + case math_component_variable_code: + subtype = variable_noad_subtype; + break; + case math_component_inner_code: + subtype = inner_noad_subtype; + break; + case math_component_under_code: + subtype = under_noad_subtype; + style = tex_math_style_variant(style, math_parameter_under_line_variant); + break; + case math_component_over_code: + subtype = over_noad_subtype; + style = tex_math_style_variant(style, math_parameter_over_line_variant); + break; + case math_component_fraction_code: + subtype = fraction_noad_subtype; + break; + case math_component_radical_code: + subtype = radical_noad_subtype; + break; + case math_component_middle_code: + subtype = middle_noad_subtype; + break; + case math_component_accent_code: + subtype = accent_noad_subtype; + break; + case math_component_fenced_code: + subtype = fenced_noad_subtype; + break; + case math_component_ghost_code: + subtype = ghost_noad_subtype; + break; + case math_component_atom_code: + { + halfword attrlist = null; + while (1) { + switch (tex_scan_character("custnmaolprvCUSTNMAOLPRV", 0, 1, 0)) { + case 'a': case 'A': + switch (tex_scan_character("ltLT", 0, 0, 0)) { + case 't': case 'T': + if (tex_scan_mandate_keyword("attr", 2)) { + attrlist = tex_scan_attribute(attrlist); + } + break; + case 'l': case 'L': + if (tex_scan_mandate_keyword("all", 2)) { + allclass = (quarterword) tex_scan_math_class_number(0); + if (! valid_math_class_code(subtype)) { + allclass = unset_noad_class; + } + } + break; + default: + tex_aux_show_keyword_error("attr|all"); + goto DONE; + } + break; + case 'l': case 'L': + switch (tex_scan_character("ieIE", 0, 0, 0)) { + case 'e': case 'E': + if (tex_scan_mandate_keyword("leftclass", 2)) { + halfword c = tex_scan_math_class_number(0); + if (! valid_math_class_code(subtype)) { + c = ordinary_noad_subtype; + } + set_noad_left_class(target, c); + } + break; + case 'i': case 'I': + if (tex_scan_mandate_keyword("limits", 2)) { + noad_options(target) |= noad_option_limits; + } + break; + default: + tex_aux_show_keyword_error("leftclass|limits"); + goto DONE; + } + break; + case 'r': case 'R': + if (tex_scan_mandate_keyword("rightclass", 1)) { + halfword c = tex_scan_math_class_number(0); + if (! valid_math_class_code(c)) { + c = ordinary_noad_subtype; + } + set_noad_right_class(target, c); + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("class", 1)) { + subtype = (quarterword) tex_scan_math_class_number(0); + if (! valid_math_class_code(subtype)) { + subtype = ordinary_noad_subtype; + } + set_noad_main_class(target, subtype); + } + break; + case 'u': case 'U': + /*tex A bit over the top, three steps but a push back is still worse. We can scan for 'un'. */ + if (tex_scan_character("nN", 0, 0, 0)) { + switch (tex_scan_character("prPR", 0, 0, 0)) { + case 'p': case 'P': + if (tex_scan_mandate_keyword("unpack", 3)) { + noad_options(target) |= noad_option_unpack_list; + } + break; + case 'r': case 'R': + if (tex_scan_mandate_keyword("unroll", 3)) { + noad_options(target) |= noad_option_unroll_list; + } + break; + default: + tex_aux_show_keyword_error("unpack|unroll"); + goto DONE; + } + } + break; + case 's': case 'S': + if (tex_scan_mandate_keyword("source", 1)) { + noad_source(target) = tex_scan_int(0, NULL); + } + break; + case 't': case 'T': + if (tex_scan_mandate_keyword("textfont", 1)) { + usetextfont = math_atom_text_font_option; + } + break; + case 'm': case 'M': + if (tex_scan_mandate_keyword("mathfont", 1)) { + usetextfont = math_atom_math_font_option; + } + break; + case 'n': case 'N': + /*tex A bit over the top, three steps but a push back is still worse. We can scan for 'no'. */ + if (tex_scan_character("oO", 0, 0, 0)) { + switch (tex_scan_character("loLO", 0, 0, 0)) { + case 'l': case 'L': + if (tex_scan_mandate_keyword("nolimits", 3)) { + noad_options(target) |= noad_option_no_limits; + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("nooverflow", 3)) { + noad_options(target) |= noad_option_no_overflow; + } + break; + default: + tex_aux_show_keyword_error("nolimits|nooverflow"); + goto DONE; + } + } + break; + case 'o': case 'O': + /* no names, just numbers, we might also do that with other noads */ + if (tex_scan_mandate_keyword("options", 1)) { + noad_options(target) = tex_scan_int(0, NULL); + } + break; + case 'v': case 'V': + if (tex_scan_mandate_keyword("void", 1)) { + noad_options(target) |= noad_option_void; + } + break; + case 'p': case 'P': + if (tex_scan_mandate_keyword("phantom", 1)) { + noad_options(target) |= noad_option_phantom; + } + break; + default: + goto DONE; + } + } + DONE: + if (attrlist) { + tex_attach_attribute_list_attribute(target, attrlist); + } + if (subtype == unset_noad_class) { + if (get_noad_left_class(target) != unset_noad_class && get_noad_right_class(target) != unset_noad_class) { + subtype = ordinary_noad_subtype; + } else { + /* mandate, maybe we will just force a keyword */ + subtype = (quarterword) tex_scan_math_class_number(0); + } + } + } + break; + } + if (! valid_math_class_code(subtype)) { + subtype = ordinary_noad_subtype; + } + /*tex + Now we can scan for the content: + */ + { + halfword content = tex_new_node(math_char_node, 0); + noad_nucleus(target) = content; + node_subtype(target) = subtype; + if (append) { + tex_tail_append(target); + } + tex_aux_scan_math(content, style, usetextfont, 0, 0, 0, subtype, allclass); + } +} + +void tex_run_math_math_component(void) +{ + halfword n = tex_new_node(simple_noad, ordinary_noad_subtype); + tex_aux_math_math_component(n, 1); +} + +int tex_is_math_disc(halfword n) +{ + return + n && node_type(n) == hlist_node && box_list(n) && node_type(box_list(n)) == disc_node && + disc_class(box_list(n)) != unset_disc_class && ! node_next(box_list(n)); +} + +halfword tex_math_make_disc(halfword d) +{ + halfword q = tex_new_node(sub_mlist_node, 0); + halfword n = tex_new_node(simple_noad, (quarterword) disc_class(d)); + kernel_math_list(q) = d; + noad_nucleus(n) = q; + noad_options(n) = noad_option_unpack_list; + return n; +} + +/*tex + Easiest is to permit all modifiers and just ignore those that make no sense. We then can + stepwise support whatever modifier we like later on. +*/ + +void tex_run_math_modifier(void) +{ + halfword tail = cur_list.tail; + if (cur_list.head != tail && node_type(tail) == simple_noad) { // maybe all + switch (cur_chr) { + case adapt_to_left_modifier_code: + noad_options(tail) = unset_option(noad_options(tail), noad_option_adapt_to_right_size); + noad_options(tail) |= noad_option_adapt_to_left_size; + break; + case adapt_to_right_modifier_code: + noad_options(tail) = unset_option(noad_options(tail), noad_option_adapt_to_left_size); + noad_options(tail) |= noad_option_adapt_to_right_size; + break; + /* todo: actually this one can also be used for other types */ + case axis_modifier_code: + noad_options(tail) |= noad_option_axis; + break; + case no_axis_modifier_code: + noad_options(tail) |= noad_option_no_axis; + break; + case phantom_modifier_code: + noad_options(tail) |= noad_option_phantom; + break; + case void_modifier_code: + noad_options(tail) |= noad_option_void; + break; + case source_modifier_code: + noad_source(tail) = tex_scan_int(0, NULL); + break; + case openup_height_modifier_code: + noad_options(tail) |= noad_option_openup_height; + noad_height(tail) = tex_scan_dimen(0, 0, 0, 0, NULL); + break; + case openup_depth_modifier_code: + noad_options(tail) |= noad_option_openup_depth; + noad_depth(tail) = tex_scan_dimen(0, 0, 0, 0, NULL); + break; + case display_limits_modifier_code: + noad_options(tail) = unset_option(noad_options(tail), noad_option_limits | noad_option_no_limits); + break; + case limits_modifier_code: + noad_options(tail) = unset_option(noad_options(tail), noad_option_no_limits); + noad_options(tail) |= noad_option_limits; + break; + case no_limits_modifier_code: + noad_options(tail) = unset_option(noad_options(tail), noad_option_limits); + noad_options(tail) |= noad_option_no_limits; + break; + } + } +} + +/*tex + + Delimiter fields of noads are filled in by the |scan_delimiter| routine. The first parameter + of this procedure is the |mem| address where the delimiter is to be placed; the second tells + if this delimiter follows |\radical| or not. + +*/ + +static void tex_aux_scan_delimiter(halfword target, int code, int class) +{ + delcodeval dval = tex_no_del_code(); + mathcodeval mval = tex_no_math_code(); + switch (code) { + case no_mathcode: + /* can be integrated */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd || cur_cmd == relax_cmd); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + dval = tex_get_del_code(cur_chr); + if (tex_has_del_code(dval)) { + goto REALDELIMITER; + } else { + mval = tex_get_math_code(cur_chr); + goto FAKEDELIMITER; + } + case delimiter_number_cmd: + switch (cur_chr) { + case math_delimiter_code: + /*tex |\delimiter| */ + dval = tex_aux_scan_extdef_del_code(tex_mathcode, 1); + break; + case math_udelimiter_code: + /*tex |\Udelimiter| */ + dval = tex_aux_scan_extdef_del_code(umath_mathcode, 1); + break; + default: + tex_confusion("scan delimiter, case 1"); + break; + } + goto REALDELIMITER; + case mathspec_cmd: + mval = tex_get_math_spec(cur_chr); + goto FAKEDELIMITER; + case math_char_number_cmd: + switch (cur_chr) { + case math_char_number_code: + mval = tex_scan_mathchar(tex_mathcode); + break; + case math_xchar_number_code: + mval = tex_scan_mathchar(umath_mathcode); + break; + /* + case math_uchar_number_code: + mval = tex_scan_mathchar(umathnum_mathcode); + break; + */ + default: + tex_confusion("scan math char, case 1"); + break; + } + goto FAKEDELIMITER; + } + break; + case tex_mathcode: + /*tex |\radical| */ + dval = tex_aux_scan_extdef_del_code(tex_mathcode, 1); + goto REALDELIMITER; + case umath_mathcode: + /*tex |\Uradical| */ + dval = tex_aux_scan_extdef_del_code(umath_mathcode, 0); + goto REALDELIMITER; + default: + tex_confusion("scan delimiter, case 2"); + goto REALDELIMITER; + } + FAKEDELIMITER: + if (class != unset_noad_class) { + mval.class_value = (short) class; + } + dval.small = mval; + dval.large = mval; + REALDELIMITER: + if (! target) { + return; + } else if (tex_has_del_code(dval)) { + node_subtype(target) = dval.small.class_value; + delimiter_small_family(target) = dval.small.family_value; + delimiter_small_character(target) = dval.small.character_value; + delimiter_large_family(target) = dval.large.family_value; + delimiter_large_character(target) = dval.large.character_value; + } else { + tex_back_input(cur_tok); + tex_handle_error( + normal_error_type, + "Missing delimiter (. inserted)", + "I was expecting to see something like '(' or '\\{' or '\\}' here. Acceptable\n" + "delimiters are characters whose \\delcode is nonnegative, or you can use\n" + "'\\delimiter <delimiter code>'." + ); + node_subtype(target) = unset_noad_class; + delimiter_small_family(target) = 0; + delimiter_small_character(target) = 0; + delimiter_large_family(target) = 0; + delimiter_large_character(target) = 0; + } + return; +} + +void tex_run_math_radical(void) +{ + halfword code = cur_chr; + halfword options = 0; + halfword radical = tex_new_node(radical_noad, (quarterword) code); + halfword style = yet_unset_math_style; + halfword variant = 0; /* quad, harmless */ + halfword attrlist = null; + tex_tail_append(radical); + /* only kewords to UI ones? */ + while (1) { + switch (tex_scan_character("abeswlmrhndABESWLMRHDN", 0, 1, 0)) { + case 0: + goto DONE; + case 'a': case 'A': + if (tex_scan_mandate_keyword("attr", 1)) { + attrlist = tex_scan_attribute(attrlist); + } + break; + case 'e': case 'E': + if (tex_scan_mandate_keyword("exact", 1)) { + options = options | noad_option_exact; + } + break; + case 's': case 'S': + switch (tex_scan_character("toTO", 0, 0, 0)) { + case 't': case 'T': + if (tex_scan_mandate_keyword("style", 2)) { + switch (code) { + case normal_radical_subtype: + case radical_radical_subtype: + case root_radical_subtype: + case rooted_radical_subtype: + case delimited_radical_subtype: + style = tex_scan_math_style_identifier(1, 0); + break; + default: + /* ignore */ + break; + } + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("source", 2)) { + noad_source(radical) = tex_scan_int(0, NULL); + } + break; + default: + tex_aux_show_keyword_error("style|source"); + goto DONE; + } + break; + case 'w': case 'W': + if (tex_scan_mandate_keyword("width", 1)) { + noad_width(radical) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'd': case 'D': + if (tex_scan_mandate_keyword("depth", 1)) { + radical_depth(radical) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'h': case 'H': + if (tex_scan_mandate_keyword("height", 1)) { + radical_height(radical) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'l': case 'L': + if (tex_scan_mandate_keyword("left", 1)) { + options = options | noad_option_left; + } + break; + case 'm': case 'M': + if (tex_scan_mandate_keyword("middle", 1)) { + options = options | noad_option_middle; + } + break; + case 'r': case 'R': + if (tex_scan_mandate_keyword("right", 1)) { + options = options | noad_option_right; + } + break; + case 'n': case 'N': + if (tex_scan_mandate_keyword("nooverflow", 1)) { + options |= noad_option_no_overflow; + } + break; + default: + goto DONE; + } + } + DONE: + if (style == yet_unset_math_style) { + switch (code) { + case normal_radical_subtype: + case radical_radical_subtype: + case root_radical_subtype: + variant = math_parameter_radical_variant; + break; + case under_delimiter_radical_subtype: + variant = math_parameter_under_delimiter_variant; + break; + case over_delimiter_radical_subtype: + variant = math_parameter_over_delimiter_variant; + break; + case delimiter_under_radical_subtype: + variant = math_parameter_delimiter_under_variant; + break; + case delimiter_over_radical_subtype: + variant = math_parameter_delimiter_over_variant; + break; + case delimited_radical_subtype: + variant = math_parameter_radical_variant; /* math_parameter_delimited_variant */ + break; + case h_extensible_radical_subtype: + variant = math_parameter_h_extensible_variant; + break; + } + style = variant ? tex_math_style_variant(cur_list.math_style, variant) : cur_list.math_style; + } + if (attrlist) { + tex_attach_attribute_list_attribute(radical, attrlist); + } + noad_options(radical) = options; + set_noad_style(radical, style); + { + switch (code) { + case normal_radical_subtype: + { + halfword left = tex_new_node(delimiter_node, 0); + radical_left_delimiter(radical) = left; + tex_aux_scan_delimiter(left, tex_mathcode, unset_noad_class); + } + break; + case radical_radical_subtype: + case root_radical_subtype: + case rooted_radical_subtype: + case delimited_radical_subtype: + { + halfword left = tex_new_node(delimiter_node, 0); + radical_left_delimiter(radical) = left; + tex_aux_scan_delimiter(left, umath_mathcode, unset_noad_class); + } + switch (code) { + case rooted_radical_subtype: + case delimited_radical_subtype: + { + halfword right = tex_new_node(delimiter_node, 0); + radical_right_delimiter(radical) = right; + tex_aux_scan_delimiter(right, umath_mathcode, unset_noad_class); + } + } + break; + case under_delimiter_radical_subtype: + case over_delimiter_radical_subtype: + case delimiter_under_radical_subtype: + case delimiter_over_radical_subtype: + case h_extensible_radical_subtype: + { + halfword left = tex_new_node(delimiter_node, 0); + radical_left_delimiter(radical) = left; + tex_aux_scan_delimiter(left, umath_mathcode, unset_noad_class); + } + break; + default: + tex_confusion("scan math radical"); + break; + } + } + switch (code) { + case h_extensible_radical_subtype: + /*tex type will change */ + { + halfword q = tex_new_node(sub_box_node, 0); + noad_nucleus(radical) = q; + break; + } + case root_radical_subtype: + case rooted_radical_subtype: + /*tex + The trick with the |node_next(q)| is used by |scan_math| to decide whether it needs to + go on. This code looks a bit weird, is it okay? So, here we directly pick up the two + lists while in choices we go through the somewhat complex \quote {complete} group based + mechanism. + */ + { + halfword q = tex_new_node(math_char_node, 0); + node_next(q) = radical; /* trick */ + radical_degree(radical) = q; + if (! tex_aux_scan_math(radical_degree(radical), tex_math_style_variant(style, math_parameter_degree_variant), 0, 0, 0, 0, unset_noad_class, unset_noad_class)) { + /*tex Actually it's always scriptscript I guess. */ + node_next(radical_degree(radical)) = null; + q = tex_new_node(math_char_node, 0); + noad_nucleus(radical) = q; + if (noad_style(radical) != style) { + /* We keep the style in the node for diagnostics. */ + tex_back_input(token_val(math_style_cmd, noad_style(radical))); + } + tex_aux_scan_math(q, tex_math_style_variant(style, math_parameter_radical_variant), 0, 0, 0, 0, unset_noad_class, unset_noad_class); + } + break; + } + default : + { + halfword q = tex_new_node(math_char_node, 0); + noad_nucleus(radical) = q; + tex_aux_scan_math(q, tex_math_style_variant(style, variant ? variant : math_parameter_radical_variant), 0, 0, 0, 0, unset_noad_class, unset_noad_class); + break; + } + } +} + +void tex_run_math_accent(void) +{ + mathcodeval t = tex_no_math_code(); + mathcodeval b = tex_no_math_code(); + mathcodeval o = tex_no_math_code(); + halfword code = cur_chr; + halfword accent = tex_new_node(accent_noad, bothflexible_accent_subtype); + quarterword subtype = ordinary_noad_subtype; + halfword attrlist = null; + if (cur_cmd == accent_cmd) { + tex_handle_error( + normal_error_type, + "Please use \\mathaccent for accents in math mode", + "I'm changing \\accent to \\mathaccent here; wish me luck. (Accents are not the\n" + "same in formulas as they are in text.)" ); + } + tex_tail_append(accent); + switch (code) { + case math_accent_code: + /*tex |\mathaccent| */ + t = tex_scan_mathchar(tex_mathcode); + break; + case math_uaccent_code: + /*tex |\Umathaccent| */ + while (1) { + switch (tex_scan_character("ansfASFN", 0, 0, 0)) { + case 'a': case 'A': + if (tex_scan_mandate_keyword("attr", 1)) { + attrlist = tex_scan_attribute(attrlist); + } + break; + case 's': case 'S': + if (tex_scan_mandate_keyword("source", 1)) { + noad_source(accent) = tex_scan_int(0, NULL); + } + break; + case 'f': case 'F': + if (tex_scan_mandate_keyword("fraction", 1)) { + accent_fraction(accent) = tex_scan_int(0, NULL); + } + break; + case 'n': case 'N': + if (tex_scan_mandate_keyword("nooverflow", 1)) { + /*tex + Actually there never is an overflow but for consistency we do + accept this key. Mayebe in the future it will be used. + */ + noad_options(accent) |= noad_option_no_overflow; + } + break; + default: + goto DONE; + } + } + DONE: + /* todo: integrate in the above */ + if (tex_scan_keyword("fixed")) { + /*tex top */ + node_subtype(accent) = fixedtop_accent_subtype; + t = tex_scan_mathchar(umath_mathcode); + } else if (tex_scan_keyword("both")) { + /*tex top bottom */ + if (tex_scan_keyword("fixed")) { + node_subtype(accent) = fixedtop_accent_subtype; + } + t = tex_scan_mathchar(umath_mathcode); + if (tex_scan_keyword("fixed")) { + node_subtype(accent) = fixedboth_accent_subtype; + } + b = tex_scan_mathchar(umath_mathcode); + } else if (tex_scan_keyword("bottom")) { + /*tex bottom */ + if (tex_scan_keyword("fixed")) { + node_subtype(accent) = fixedbottom_accent_subtype; + } + b = tex_scan_mathchar(umath_mathcode); + } else if (tex_scan_keyword("top")) { + /*tex top */ + if (tex_scan_keyword("fixed")) { + node_subtype(accent) = fixedtop_accent_subtype; + } + t = tex_scan_mathchar(umath_mathcode); + } else if (tex_scan_keyword("overlay")) { + /* overlay */ + if (tex_scan_keyword("fixed")) { + node_subtype(accent) = fixedtop_accent_subtype; + } + o = tex_scan_mathchar(umath_mathcode); + } else { + /*tex top */ + t = tex_scan_mathchar(umath_mathcode); + } + break; + default: + tex_confusion("scan math accent"); + } + if (attrlist) { + tex_attach_attribute_list_attribute(accent, attrlist); + } + if (! (t.character_value == 0 && t.family_value == 0)) { + halfword n = tex_new_node(math_char_node, 0); + subtype = tex_aux_set_math_char(n, &t, NULL); + accent_top_character(accent) = n; + } + if (! (b.character_value == 0 && b.family_value == 0)) { + halfword n = tex_new_node(math_char_node, 0); + subtype = tex_aux_set_math_char(n, &b, NULL); + accent_bottom_character(accent) = n; + } + if (! (o.character_value == 0 && o.family_value == 0)) { + halfword n = tex_new_node(math_char_node, 0); + subtype = tex_aux_set_math_char(n, &o, NULL); + accent_middle_character(accent) = n; + } + { + halfword n = tex_new_node(math_char_node, subtype); + noad_nucleus(accent) = n; + tex_aux_scan_math(n, tex_math_style_variant(cur_list.math_style, math_parameter_accent_variant), 0, 0, 0, 0, unset_noad_class, unset_noad_class); + } +} + +/*tex + + The routine that scans the four mlists of a |\mathchoice| is very much like the routine that + builds discretionary nodes. Finally, the |\mathchoice| primitive creates a |choice_node|, + which has special subfields |display_mlist|, |text_mlist|, |script_mlist|, and + |script_script_mlist| pointing to the mlists for each style. + +*/ + +void tex_run_math_choice(void) { + switch (cur_chr) { + case math_discretionary_code: + { + halfword n = tex_new_node(choice_node, discretionary_choice_subtype); + choice_class(n) = unset_noad_class; + while (1) { + switch (tex_scan_character("cC", 0, 1, 0)) { + case 0: + goto DONE; + case 'c': case 'C': + if (tex_scan_mandate_keyword("class", 1)) { + choice_class(n) = tex_scan_math_class_number(0); + } + break; + default: + goto DONE; + } + } + DONE: + tex_tail_append(n); + tex_set_saved_record(saved_choice_item_count, saved_choices_count, 0, math_pre_break_choice); + lmt_save_state.save_stack_data.ptr += saved_choice_n_of_items; + tex_aux_push_math(math_choice_group, cur_list.math_style); + tex_scan_left_brace(); + break; + } + case math_choice_code: + /*tex |\mathchoice| */ + { + halfword n = tex_new_node(choice_node, normal_choice_subtype); + tex_tail_append(n); + tex_set_saved_record(saved_choice_item_count, saved_choices_count, 0, math_display_choice); + lmt_save_state.save_stack_data.ptr += saved_choice_n_of_items; + tex_aux_push_math(math_choice_group, display_style); + tex_scan_left_brace(); + break; + } + case math_ustack_code: + /*tex |\Ustack| */ + { + // halfword m = tex_new_node(sub_mlist_node, 0); /* was for some reason a math_char_node */ + halfword m = tex_new_node(math_char_node, 0); + halfword n = tex_new_node(simple_noad, ordinary_noad_subtype); + halfword s = tex_math_style_variant(cur_list.math_style, math_parameter_stack_variant); + tex_tail_append(n); + noad_nucleus(n) = m; + tex_scan_left_brace(); + tex_set_saved_record(0, saved_math_pointer, 0, m); + ++lmt_save_state.save_stack_data.ptr; + tex_aux_push_math(math_group, s); + break; + } + } +} + +int tex_current_math_style(void) +{ + return (abs(cur_list.mode) == mmode) ? cur_list.math_style : -1; +} + +int tex_current_math_main_style(void) +{ + return (abs(cur_list.mode) == mmode) ? cur_list.math_main_style : -1; +} + +void tex_finish_math_choice(void) +{ + halfword content; + tex_aux_unsave_math(); + content = tex_aux_finish_math_list(null); + /* We should just count and not rely on the next hackery test: */ + if (saved_type(saved_choice_item_count - saved_choice_n_of_items) == saved_choices_count) { + int choice = saved_value(saved_choice_item_count - saved_choice_n_of_items); + int style = cur_list.math_style; + switch (node_subtype(cur_list.tail)) { + case normal_choice_subtype: + switch (choice) { + case math_display_choice: + choice_display_mlist(cur_list.tail) = content; + style = text_style; + break; + case math_text_choice: + choice_text_mlist(cur_list.tail) = content; + style = script_style; + break; + case math_script_choice: + choice_script_mlist(cur_list.tail) = content; + style = script_script_style; + break; + case math_script_script_choice: + choice_script_script_mlist(cur_list.tail) = content; + lmt_save_state.save_stack_data.ptr -= saved_choice_n_of_items; + return; + } + break; + case discretionary_choice_subtype: + switch (choice) { + case math_pre_break_choice: + choice_pre_break(cur_list.tail) = content; + style = display_style; + break; + case math_post_break_choice: + choice_post_break(cur_list.tail) = content; + style = text_style; + break; + case math_no_break_choice: + choice_no_break(cur_list.tail) = content; + style = script_style; + lmt_save_state.save_stack_data.ptr -= saved_choice_n_of_items; + return; + } + break; + } + tex_set_saved_record(saved_choice_item_count - saved_choice_n_of_items, saved_choices_count, 0, choice + 1); + tex_aux_push_math(math_choice_group, style); + tex_scan_left_brace(); + } else { + tex_confusion("scan build choices"); + } +} + +void tex_finish_math_fraction(void) +{ + halfword content; + tex_aux_unsave_math(); + content = tex_aux_finish_math_list(null); + if (saved_type(saved_fraction_item_variant - saved_fraction_n_of_items) == saved_fraction_variant) { + halfword over = saved_value(saved_fraction_item_variant - saved_fraction_n_of_items); + halfword autostyle = saved_value(saved_fraction_item_autostyle - saved_fraction_n_of_items); + halfword userstyle = saved_value(saved_fraction_item_userstyle - saved_fraction_n_of_items); + halfword fraction = cur_list.tail; + set_noad_style(fraction, userstyle); + switch (over) { + case math_numerator_above: + kernel_math_list(fraction_numerator(fraction)) = content; + break; + case math_denominator_above: + kernel_math_list(fraction_denominator(fraction)) = content; + lmt_save_state.save_stack_data.ptr -= saved_fraction_n_of_items; + return; + } + tex_set_saved_record(saved_fraction_item_variant - saved_fraction_n_of_items, saved_fraction_variant, 0, over + 1); + tex_aux_push_math(math_fraction_group, autostyle); + tex_scan_left_brace(); + } else { + tex_confusion("scan build fraction"); + } +} + +void tex_finish_math_operator(void) +{ + halfword content; + tex_aux_unsave_math(); + content = tex_aux_finish_math_list(null); + if (saved_type(saved_operator_item_variant - saved_operator_n_of_items) == saved_operator_variant) { + halfword over = saved_value(saved_operator_item_variant - saved_operator_n_of_items); + halfword fenced = cur_list.tail; + switch (over) { + case math_limits_top: + kernel_math_list(fence_delimiter_top(fenced)) = content; + break; + case math_limits_bottom: + kernel_math_list(fence_delimiter_bottom(fenced)) = content; + lmt_save_state.save_stack_data.ptr -= saved_operator_n_of_items; + return; + } + tex_set_saved_record(saved_operator_item_variant - saved_operator_n_of_items, saved_operator_variant, 0, over + 1); + tex_aux_push_math(math_operator_group, tex_math_style_variant(cur_list.math_style, math_parameter_subscript_variant)); + tex_scan_left_brace(); + } else { + tex_confusion("scan build operator"); + } +} + +/*tex + + Subscripts and superscripts are attached to the previous nucleus by the action procedure called + |sub_sup|. + +*/ + +# define scripts_allowed(A) ((node_type((A)) >= simple_noad) && (node_type((A)) < fence_noad)) + +static halfword tex_math_double_atom(void) +{ + halfword tail = tex_new_node(simple_noad, ordinary_noad_subtype); + halfword list = tex_new_node(sub_mlist_node, 0); + tex_tail_append(tail); + if (math_double_script_mode_par >= 0) { + node_subtype(tail) = (math_double_script_mode_par >> 16) & 0xFF; + noad_class_left(tail) = (math_double_script_mode_par >> 8) & 0xFF; + noad_class_right(tail) = (math_double_script_mode_par >> 0) & 0xFF; + } + noad_nucleus(tail) = list; + return tail; +} + +void tex_run_math_script(void) +{ + int code = cur_chr; + halfword tail = cur_list.tail; + switch (cur_cmd) { + case subscript_cmd: + code = math_sub_script_code; + break; + case superscript_cmd: + code = math_super_script_code; + break; + } + switch (code) { + case math_no_script_code: + { + halfword glue = tex_new_glue_node(zero_glue, conditional_math_glue); + tex_tail_append(glue); + tex_add_glue_option(glue, glue_option_no_auto_break); + } + return; + case math_no_ruling_code: + { + halfword glue = tex_new_glue_node(zero_glue, rulebased_math_glue); + tex_tail_append(glue); + tex_add_glue_option(glue, glue_option_no_auto_break); + } + return; + case math_sub_script_code: + tex_get_token(); + if (cur_tok == underscore_token || cur_cmd == subscript_cmd) { + tex_get_token(); + if (cur_tok == underscore_token || cur_cmd == subscript_cmd) { + tex_get_token(); + if (cur_tok == underscore_token || cur_cmd == subscript_cmd) { + code = math_shifted_sub_pre_script_code; + } else { + tex_back_input(cur_tok); + code = math_shifted_sub_script_code; + } + } else { + tex_back_input(cur_tok); + code = math_sub_pre_script_code; + } + } else { + tex_back_input(cur_tok); + } + break; + case math_super_script_code: + tex_get_token(); + if (cur_tok == circumflex_token || cur_cmd == superscript_cmd) { + tex_get_token(); + if (cur_tok == circumflex_token || cur_cmd == superscript_cmd) { + tex_get_token(); + if (cur_tok == circumflex_token || cur_cmd == superscript_cmd) { + code = math_shifted_super_pre_script_code; + } else { + tex_back_input(cur_tok); + code = math_shifted_super_script_code; + } + } else { + tex_back_input(cur_tok); + code = math_super_pre_script_code; + } + } else { + tex_back_input(cur_tok); + } + break; + } + if (tail == cur_list.head || (! scripts_allowed(tail))) { + halfword n = tex_new_node(sub_mlist_node, 0); + tail = tex_new_node(simple_noad, ordinary_noad_subtype); + tex_tail_append(tail); + noad_nucleus(tail) = n; + } + switch (code) { + case math_sub_script_code: + case math_no_sub_script_code: + case math_shifted_sub_script_code: + { + if (noad_subscr(tail)) { + tail = tex_math_double_atom(); + if (math_double_script_mode_par < 0) { + tex_handle_error( + normal_error_type, + "Double subscript", + "I treat 'x_1_2' essentially like 'x_1{}_2'." + ); + } + } + switch (code) { + case math_no_sub_script_code: + noad_options(tail) |= noad_option_no_sub_script; + break; + case math_shifted_sub_script_code: + noad_options(tail) |= noad_option_shifted_sub_script; + break; + } + { + halfword n = tex_new_node(math_char_node, 0); + noad_subscr(tail) = n; + tex_aux_scan_math(n, tex_math_style_variant(cur_list.math_style, math_parameter_subscript_variant), 0, 0, 0, 1, unset_noad_class, unset_noad_class); + if (! noad_script_order(tail)) { + noad_script_order(tail) = script_subscript_first; + } + } + break; + } + case math_sub_pre_script_code: + case math_no_sub_pre_script_code: + case math_shifted_sub_pre_script_code: + { + if (noad_subprescr(tail)) { + int limitation = node_type(tail) == fraction_noad; /*tex See remark at node definition. */ + tail = tex_math_double_atom(); + if (math_double_script_mode_par < 0) { + tex_handle_error( + normal_error_type, + limitation ? "Fractions take no pre subscript directly" : "Double pre subscript", + "I just ignore it; consider wrapping this element." + ); + } + } + switch (code) { + case math_no_sub_pre_script_code: + noad_options(tail) |= noad_option_no_sub_pre_script; + break; + case math_shifted_sub_pre_script_code: + noad_options(tail) |= noad_option_shifted_sub_pre_script; + break; + } + { + halfword n = tex_new_node(math_char_node, 0); + noad_subprescr(tail) = n; + tex_aux_scan_math(n, tex_math_style_variant(cur_list.math_style, math_parameter_subscript_variant), 0, 0, 0, 1, unset_noad_class, unset_noad_class); + } + break; + } + case math_super_script_code: + case math_no_super_script_code: + case math_shifted_super_script_code: + { + if (noad_supscr(tail)) { + tail = tex_math_double_atom(); + if (math_double_script_mode_par < 0) { + tex_handle_error( + normal_error_type, + "Double superscript", + "I treat 'x^1^2' essentially like 'x^1{}^2'." + ); + } + } + switch (code) { + case math_no_super_script_code: + noad_options(tail) |= noad_option_no_super_script; + break; + case math_shifted_super_script_code: + noad_options(tail) |= noad_option_shifted_super_script; + break; + } + { + halfword n = tex_new_node(math_char_node, 0); + noad_supscr(tail) = n; + if (! noad_script_order(tail)) { + noad_script_order(tail) = script_superscript_first; + } + tex_aux_scan_math(n, tex_math_style_variant(cur_list.math_style, math_parameter_superscript_variant), 0, 0, 0, 1, unset_noad_class, unset_noad_class); + } + break; + } + case math_super_pre_script_code: + case math_no_super_pre_script_code: + case math_shifted_super_pre_script_code: + { + if (noad_supprescr(tail)) { + int limitation = node_type(tail) == fraction_noad; /*tex See remark at node definition. */ + tail = tex_math_double_atom(); + if (math_double_script_mode_par < 0) { + tex_handle_error( + normal_error_type, + limitation ? "Fractions take no pre superscript directly" : "Double pre superscript", + "I just ignore it; consider wrapping this element." + ); + } + } + switch (code) { + case math_no_super_script_code: + noad_options(tail) |= noad_option_no_super_pre_script; + break; + case math_shifted_super_pre_script_code: + noad_options(tail) |= noad_option_shifted_super_pre_script; + break; + } + { + halfword n = tex_new_node(math_char_node, 0); + noad_supprescr(tail) = n; + tex_aux_scan_math(n, tex_math_style_variant(cur_list.math_style, math_parameter_superscript_variant), 0, 0, 0, 1, unset_noad_class, unset_noad_class); + } + break; + } + case math_prime_script_code: + { + if (noad_prime(tail)) { + tail = tex_math_double_atom(); + if (math_double_script_mode_par < 0) { + tex_handle_error( + normal_error_type, + "Double prime script", + "I'll add a dummy nucleus." + ); + } + } + { + halfword n = tex_new_node(math_char_node, 0); + noad_prime(tail) = n; + if (! noad_script_order(tail)) { + noad_script_order(tail) = script_primescript_first; + } + /* maybe it's own variant */ + tex_aux_scan_math(n, tex_math_style_variant(cur_list.math_style, math_parameter_superscript_variant), 0, 0, 0, 1, unset_noad_class, unset_noad_class); + } + break; + } + } +} + +/*tex + + An operation like |\over| causes the current mlist to go into a state of suspended animation: + |incomplete_noad| points to a |fraction_noad| that contains the mlist-so-far as its numerator, + while the denominator is yet to come. Finally when the mlist is finished, the denominator will + go into the incomplete fraction noad, and that noad will become the whole formula, unless it is + surrounded by |\left| and |\right| delimiters. + + We can probably replace the |incomplete_noad_par| trickery because we can now look back in the + list using the |alink| field. But not now. + +*/ + +void tex_run_math_fraction(void) +{ + /*tex The type of generalized fraction we are scanning: */ + halfword code = cur_chr; + if (cur_list.incomplete_noad) { + /*tex Recovery code. */ + switch (code) { + case math_above_delimited_code: + case math_over_delimited_code: + case math_atop_delimited_code: + case math_u_above_delimited_code: + case math_u_over_delimited_code: + case math_u_atop_delimited_code: + case math_u_skewed_delimited_code: + case math_u_stretched_delimited_code: + tex_aux_scan_delimiter(null, no_mathcode, unset_noad_class); + tex_aux_scan_delimiter(null, no_mathcode, unset_noad_class); + break; + } + switch (code) { + case math_above_code: + case math_above_delimited_code: + case math_u_above_code: + case math_u_above_delimited_code: + tex_scan_dimen(0, 0, 0, 0, NULL); + break; + } + /*tex This is somewhat weird, this error here. */ + tex_handle_error( + normal_error_type, + "Ambiguous; you need another { and }", + "I'm ignoring this fraction specification, since I don't know whether a\n" + "construction like 'x \\over y \\over z' means '{x \\over y} \\over z' or\n" + "'x \\over {y \\over z}'." + ); + } else { + halfword fraction = tex_new_node(fraction_noad, 0); + halfword numerator = tex_new_node(sub_mlist_node, 0); + halfword denominator = null; + halfword autostyle = tex_math_style_variant(cur_list.math_style, math_parameter_fraction_variant); + halfword userstyle = -1; + halfword attrlist = null; + halfword options = 0; + halfword class = fraction_noad_subtype; + halfword rulethickness = preset_rule_thickness; + int ruledone = 0; + fraction_h_factor(fraction) = 1000; + fraction_v_factor(fraction) = 1000; + switch (code) { + case math_above_code: + case math_above_delimited_code: + node_subtype(fraction) = above_fraction_subtype; + goto NEXTSTEP1; + case math_over_code: + case math_over_delimited_code: + node_subtype(fraction) = over_fraction_subtype; + goto NEXTSTEP1; + case math_atop_code: + case math_atop_delimited_code: + node_subtype(fraction) = atop_fraction_subtype; + NEXTSTEP1: + { + cur_list.incomplete_noad = fraction; + fraction_numerator(fraction) = numerator; + kernel_math_list(numerator) = node_next(cur_list.head); + node_next(cur_list.head) = null; + cur_list.tail = cur_list.head; + cur_list.math_style = autostyle; + break; + } + case math_u_above_code: + case math_u_above_delimited_code: + node_subtype(fraction) = above_fraction_subtype; + goto NEXTSTEP2; + case math_u_over_code: + case math_u_over_delimited_code: + node_subtype(fraction) = over_fraction_subtype; + goto NEXTSTEP2; + case math_u_atop_code: + case math_u_atop_delimited_code: + node_subtype(fraction) = atop_fraction_subtype; + goto NEXTSTEP2; + case math_u_skewed_code: + case math_u_skewed_delimited_code: + node_subtype(fraction) = skewed_fraction_subtype; + goto NEXTSTEP2; + case math_u_stretched_code: + case math_u_stretched_delimited_code: + node_subtype(fraction) = stretched_fraction_subtype; + NEXTSTEP2: + { + cur_list.incomplete_noad = null; + denominator = tex_new_node(sub_mlist_node, 0); + tex_tail_append(fraction); + fraction_numerator(fraction) = numerator; + fraction_denominator(fraction) = denominator; + break; + } + } + switch (code) { + case math_u_skewed_code: + case math_u_skewed_delimited_code: + case math_u_stretched_code: + case math_u_stretched_delimited_code: + { + halfword q = tex_new_node(delimiter_node, 0); + fraction_middle_delimiter(fraction) = q; + tex_aux_scan_delimiter(q, no_mathcode, unset_noad_class); + break; + } + } + switch (code) { + case math_above_delimited_code: + case math_over_delimited_code: + case math_atop_delimited_code: + case math_u_above_delimited_code: + case math_u_over_delimited_code: + case math_u_atop_delimited_code: + case math_u_skewed_delimited_code: + case math_u_stretched_delimited_code: + { + halfword left = tex_new_node(delimiter_node, 0); + halfword right = tex_new_node(delimiter_node, 0); + fraction_left_delimiter(fraction) = left; + fraction_right_delimiter(fraction) = right; + tex_aux_scan_delimiter(left, no_mathcode, open_noad_subtype); + tex_aux_scan_delimiter(right, no_mathcode, close_noad_subtype); + break; + } + } + switch (code) { + /*tex We can't have keyword here because of compatibility reasons. */ + case math_above_code: + case math_above_delimited_code: + rulethickness = tex_scan_dimen(0, 0, 0, 0, NULL); + break; + case math_over_code: + case math_over_delimited_code: + rulethickness = preset_rule_thickness; + break; + case math_atop_code: + case math_atop_delimited_code: + break; + /*tex + But here we can! For practical reasons we accept the rule related options + and in principle we cold do with one command. + */ + case math_u_atop_code: + case math_u_atop_delimited_code: + case math_u_above_code: + case math_u_above_delimited_code: + goto OPTIONS; + case math_u_over_code: + case math_u_over_delimited_code: + ruledone = 1; + goto OPTIONS; + case math_u_stretched_code: + case math_u_stretched_delimited_code: + case math_u_skewed_code: + case math_u_skewed_delimited_code: + ruledone = 1; + OPTIONS: + while (1) { + switch (tex_scan_character("acefhnstvACEFHNSTV", 0, 1, 0)) { + case 'a': case 'A': + if (tex_scan_mandate_keyword("attr", 1)) { + attrlist = tex_scan_attribute(attrlist); + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("class", 1)) { + halfword c = (quarterword) tex_scan_math_class_number(0); + if (valid_math_class_code(c)) { + class = c; + } + } + break; + case 'e': case 'E': + if (tex_scan_mandate_keyword("exact", 1)) { + options |= noad_option_exact; + } + break; + case 'n': case 'N': + /*tex A bit over the top, three steps but a push back is still worse. */ + if (tex_scan_character("oO", 0, 0, 0)) { + switch (tex_scan_character("aoAO", 0, 0, 0)) { + case 'a': case 'A': + if (tex_scan_mandate_keyword("noaxis", 3)) { + options |= noad_option_no_axis; + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("nooverflow", 3)) { + options |= noad_option_no_overflow; + } + break; + default: + tex_aux_show_keyword_error("noaxis|nooverflow"); + goto DONE; + } + } + break; + case 't': case 'T': + if (tex_scan_mandate_keyword("thickness", 1)) { + ruledone = 1; + rulethickness = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'f': case 'F': + if (tex_scan_mandate_keyword("font", 1)) { + ruledone = 1; + options |= noad_option_prefer_font_thickness; + } + break; + case 's': case 'S': + switch (tex_scan_character("toTO", 0, 0, 0)) { + case 't': case 'T': + if (tex_scan_mandate_keyword("style", 2)) { + halfword style = tex_scan_math_style_identifier(1, 0); + if (denominator) { + userstyle = style; + } else { + /* just ignore */ + } + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("source", 2)) { + noad_source(fraction) = tex_scan_int(0, NULL); + } + break; + default: + tex_aux_show_keyword_error("style|source"); + goto DONE; + } + break; + case 'h': case 'H': + if (tex_scan_mandate_keyword("hfactor", 1)) { + fraction_h_factor(fraction) = tex_scan_int(0, NULL); + } + break; + case 'v': case 'V': + if (tex_scan_mandate_keyword("vfactor", 1)) { + fraction_v_factor(fraction) = tex_scan_int(0, NULL); + } + break; + default: + goto DONE; + } + } + DONE: + if (! ruledone) { + rulethickness = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + } + fraction_rule_thickness(fraction) = rulethickness; + noad_options(fraction) = options; + set_noad_main_class(fraction, class); + if (attrlist) { + tex_attach_attribute_list_attribute(fraction, attrlist); + } + if (denominator) { + /*tex + In this case we need to pick up two math groups, and after some playing around using + a variant of choices made most sense. + */ + tex_set_saved_record(saved_fraction_item_variant, saved_fraction_variant, 0, math_numerator_above); + tex_set_saved_record(saved_fraction_item_autostyle, saved_fraction_auto_style, 0, autostyle); + tex_set_saved_record(saved_fraction_item_userstyle, saved_fraction_user_style, 0, userstyle); + lmt_save_state.save_stack_data.ptr += saved_fraction_n_of_items; + cur_list.math_flatten = 0; + tex_aux_push_math(math_fraction_group, autostyle); + tex_scan_left_brace(); + } else { + /*tex + This is the pre/post variant. Actually, this variant is the reason why math scanning + code is somewhat complex, this |incomplete_noad| stuff. + */ + } + } +} + +/*tex + + At the end of a math formula or subformula, the |finish_math_list| routine is called upon to + return a halfword to the newly completed mlist, and to pop the nest back to the enclosing + semantic level. The parameter to |finish_math_list|, if not null, points to a |fence_noad| that + ends the current mlist; this |fence_noad| has not yet been appended. + +*/ + +static halfword tex_aux_finish_math_list(halfword p) +{ + halfword q; + if (cur_list.incomplete_noad) { + halfword denominator = fraction_denominator(cur_list.incomplete_noad); + if (denominator) { + node_type(denominator) = sub_mlist_node; + } else { + denominator = tex_new_node(sub_mlist_node, 0); + fraction_denominator(cur_list.incomplete_noad) = denominator; + q = denominator; + } + kernel_math_list(denominator) = node_next(cur_list.head); + if (p) { + halfword numerator = fraction_numerator(cur_list.incomplete_noad); + q = kernel_math_list(numerator); + if ((node_type(q) != fence_noad) || (node_subtype(q) != left_fence_side) || (! cur_list.delim)) { + tex_confusion("right fence"); + } + kernel_math_list(numerator) = node_next(cur_list.delim); + node_next(cur_list.delim) = cur_list.incomplete_noad; + node_next(cur_list.incomplete_noad) = p; + } else { + q = cur_list.incomplete_noad; + } + } else { + node_next(cur_list.tail) = p; + q = node_next(cur_list.head); + } + tex_pop_nest(); + return q; +} + +/*tex + Here traditional \TEX\ does some flattening but it can interfrere. It is for instance needed + in order to find the skew of an accented character which happens at the outer level but that + bit of code now does that recursively. I need to check why the accent was flattened so we + keep the original code here for testing. + + A \CONTEXT\ test case: |$\tilde{x}'$| i.e.\ primes! +*/ + +static void tex_aux_flatten_math_list(halfword parent) +{ + halfword p = kernel_math_list(parent); + if (p && ! node_next(p)) { + switch (node_type(p)) { + case simple_noad: + { + if (! noad_has_following_scripts(p) && tex_math_has_class_option(node_subtype(p), flatten_class_option)) { + halfword n = noad_nucleus(p); + halfword s = parent; + node_type(s) = node_type(n); + tex_math_copy_char_data(s, n, 1); + tex_attach_attribute_list_copy(s, n); + tex_flush_node(p); + } + break; + } + case accent_noad: + { + halfword tail = cur_list.tail; + if (saved_value(saved_math_group_item_pointer) == noad_nucleus(tail) && node_type(tail) == simple_noad) { + switch (node_subtype(tail)) { + case ordinary_noad_subtype: + tex_couple_nodes(node_prev(tail), p); + noad_nucleus(tail) = null; + noad_subscr(tail) = null; + noad_supscr(tail) = null; + noad_prime(tail) = null; + tex_attach_attribute_list_copy(p, tail); + tex_flush_node(tail); + cur_list.tail = p; + break; + } + } + break; + } + } + } +} + +/*tex + + Now at last we're ready to see what happens when a right brace occurs in a math formula. Two + special cases are simplified here: braces are effectively removed when they surround a single + Ord without sub- and/or superscripts, or when they surround an accent that is the nucleus of + an Ord atom. + +*/ + +void tex_finish_math_group(void) +{ + int old_style = cur_list.math_style; + halfword p, parent; + quarterword allclass; + tex_aux_unsave_math(); + lmt_save_state.save_stack_data.ptr -= saved_math_group_n_of_items; + parent = saved_value(saved_math_group_item_pointer); + allclass = (quarterword) saved_value(saved_math_group_all_class); + node_type(parent) = sub_mlist_node; /* can be math_char_node */ + p = tex_aux_finish_math_list(null); /* this incomplete trickery */ + kernel_math_list(parent) = p; + if (cur_list.math_flatten) { + tex_aux_flatten_math_list(parent); + } + /*tex + If needed, here we pickup a next \quote {argument}, so we sort of finish a group and reopen + a new one. It is somewhat curious that we use a character node here. + */ + if (allclass != unset_noad_class) { + while (p) { + if (node_type(p) == simple_noad) { + // node_subtype(p) = allclass; + if (get_noad_main_class(p) == unset_noad_class) { + set_noad_main_class(p, allclass); + } + if (get_noad_left_class(p) == unset_noad_class) { + set_noad_left_class(p, allclass); + } + if (get_noad_right_class(p) == unset_noad_class) { + set_noad_right_class(p, allclass); + } + } + p = node_next(p); + } + /* */ + } + if (node_next(saved_value(saved_math_group_item_pointer)) > 0) { + halfword q = tex_new_node(math_char_node, 0); /* hm */ + noad_nucleus(node_next(saved_value(saved_math_group_item_pointer))) = q; + node_next(saved_value(saved_math_group_item_pointer)) = null; + saved_value(saved_math_group_item_pointer) = q; + tex_aux_scan_math(q, old_style, 0, 0, 0, 0, unset_noad_class, unset_noad_class); + /*tex restart */ + } +} + +/*tex + + We have dealt with all constructions of math mode except |\left| and |\right|, so the picture is + completed by the following sections of the program. The |middle| feature of \ETEX\ allows one or + several |\middle| delimiters to appear between |\left| and |\right|. + +*/ + +void tex_run_math_fence(void) +{ + halfword ht = 0; + halfword dp = 0; + halfword options = 0; + halfword mainclass = unset_noad_class; + halfword leftclass = unset_noad_class; + halfword rightclass = unset_noad_class; + halfword source = 0; + halfword attrlist = null; + quarterword st = (quarterword) cur_chr; + halfword style = cur_list.math_style; + if (math_check_fences_par) { + options |= noad_option_no_check; + } + switch (st) { + case left_operator_side: + case no_fence_side: + break; + case extended_left_fence_side: /*tex |\Uleft| */ + st = left_fence_side; + break; + case extended_middle_fence_side: /*tex |\Umiddle| */ + st = middle_fence_side; + break; + case extended_right_fence_side: /*tex |\Uright| */ + st = right_fence_side; + break; + default : + goto CHECK_PAIRING; + } + while (1) { + /* todo: break down */ + switch (tex_scan_character("hdanlevpcrsuHDANLEVPCRSU", 0, 1, 0)) { + case 0: + goto CHECK_PAIRING; + case 'h': case 'H': + if (tex_scan_mandate_keyword("height", 1)) { + ht = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'd': case 'D': + if (tex_scan_mandate_keyword("depth", 1)) { + dp = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'a': case 'A': + switch (tex_scan_character("uxtUXT", 0, 0, 0)) { + case 'u': case 'U': + if (tex_scan_mandate_keyword("auto", 2)) { + options |= noad_option_auto; + } + break; + case 't': case 'T': + if (tex_scan_mandate_keyword("attr", 2)) { + attrlist = tex_scan_attribute(attrlist); + } + break; + case 'x': case 'X': + if (tex_scan_mandate_keyword("axis", 2)) { + options |= noad_option_axis; + } + break; + default: + tex_aux_show_keyword_error("auto|attr|axis"); + goto CHECK_PAIRING; + } + break; + case 'n': case 'N': + switch (tex_scan_character("oO", 0, 0, 0)) { + case 'o': case 'O': + switch (tex_scan_character("alcoALCO", 0, 0, 0)) { + case 'a': case 'A': + if (tex_scan_mandate_keyword("noaxis", 3)) { + options |= noad_option_no_axis; + } + break; + case 'l': case 'L': + if (tex_scan_mandate_keyword("nolimits", 3)) { + options = unset_option(options, noad_option_limits); + options |= noad_option_no_limits; + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("nocheck", 3)) { + options |= noad_option_no_check; + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("nooverflow", 3)) { + options |= noad_option_no_overflow; + } + break; + default: + tex_aux_show_keyword_error("noaxis|nolimits|nocheck|nooverflow"); + goto CHECK_PAIRING; + } + break; + default: + goto CHECK_PAIRING; + } + break; + case 'l': case 'L': + switch (tex_scan_character("ieIE", 0, 0, 0)) { + case 'e': case 'E': + if (tex_scan_mandate_keyword("leftclass", 2)) { + halfword c = tex_scan_math_class_number(0); + // if (! valid_math_class_code(c)) { + if (valid_math_class_code(c)) { + leftclass = c; + } + } + break; + case 'i': case 'I': + if (tex_scan_mandate_keyword("limits", 2)) { + options = unset_option(options, noad_option_no_limits); + options |= noad_option_limits; + } + break; + default: + tex_aux_show_keyword_error("leftclass|limits"); + goto CHECK_PAIRING; + } + break; + case 'e': case 'E': + if (tex_scan_mandate_keyword("exact", 1)) { + options |= noad_option_exact; + } + break; + case 'v': case 'V': + if (tex_scan_mandate_keyword("void", 1)) { + options |= noad_option_void; + } + break; + case 'p': case 'P': + if (tex_scan_mandate_keyword("phantom", 1)) { + options |= noad_option_phantom; + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("class", 1)) { + mainclass = tex_scan_math_class_number(0); + } + break; + case 'r': case 'R': + if (tex_scan_mandate_keyword("rightclass", 1)) { + halfword c = tex_scan_math_class_number(0); + // if (valid_math_class_code(c)) { + if (valid_math_class_code(c)) { + rightclass = c; + } + } + break; + case 's': case 'S': + if (tex_scan_mandate_keyword("source", 1)) { + source = tex_scan_int(0, NULL); + } + break; + default: + goto CHECK_PAIRING; + } + } + CHECK_PAIRING: + switch (st) { + case no_fence_side: + case left_fence_side: + break; + case left_operator_side: + { + /* becomes a class option */ + int indisplay = style == display_style || style == cramped_display_style; + /* options |= noad_option_no_check; */ /*tex Best just expect a dummy right. */ + if (! (has_option(options, noad_option_limits) || has_option(options, noad_option_no_limits))) { + /* otherwise we don't enter the placement function */ + options |= indisplay ? noad_option_limits : noad_option_no_limits; + } + } + break; + default: + if (cur_group != math_fence_group) { + tex_aux_append_math_fence_val((mathcodeval) { 0, 0, 0 }, (mathdictval) { 0, 0, 0 }, open_noad_subtype); + } + switch (cur_group) { + case math_fence_group: + break; + case math_shift_group: + tex_aux_scan_delimiter(null, no_mathcode, unset_noad_class); + if (st == middle_fence_side) { + tex_handle_error( + normal_error_type, + "Extra \\middle", + "I'm ignoring a \\middle that had no matching \\left." + ); + } else { + tex_handle_error( + normal_error_type, + "Extra \\right", + "I'm ignoring a \\right that had no matching \\left." + ); + } + break; + default: + tex_off_save(); + } + } + /*tex + Now we only have a no, left, middle or right case left. + */ + { + halfword fence = tex_new_node(fence_noad, st); + halfword delimiter = tex_new_node(delimiter_node, 0); + halfword autoclass = unset_noad_class; + fence_delimiter_list(fence) = delimiter; + noad_height(fence) = ht; + noad_depth(fence) = dp; + noad_options(fence) = options; + set_noad_classes(fence, mainclass); + if (leftclass != unset_noad_class) { + set_noad_left_class(fence, leftclass); + } + if (rightclass != unset_noad_class) { + set_noad_right_class(fence, rightclass); + } + noad_italic(fence) = 0; + noad_source(fence) = source; + /*tex + By setting this here, we can get rid of the hard coded values in |mlist_to_hlist| which + sort of interfere (or at least confuse) things there. When set, the |leftclass| and + |rightclass| settings win anyway. + */ + if (mainclass == unset_noad_class) { + mainclass = node_subtype(delimiter); + if (mainclass == unset_noad_class || mainclass == ordinary_noad_subtype) { + switch (st) { + case left_fence_side: + mainclass = open_noad_subtype; + break; + case middle_fence_side: + mainclass = middle_noad_subtype; + break; + case right_fence_side: + mainclass = close_noad_subtype; + break; + } + } + set_noad_main_class(fence, mainclass); + } + /* */ + switch (st) { + case left_fence_side: + autoclass = open_noad_subtype; + break; + case middle_fence_side: + autoclass = middle_noad_subtype; /* we need a way to overload this */ + break; + case right_fence_side: + autoclass = close_noad_subtype; + break; + } + /* */ + tex_aux_scan_delimiter(delimiter, no_mathcode, autoclass); + /* */ + if (attrlist) { + tex_attach_attribute_list_attribute(fence, attrlist); + tex_attach_attribute_list_attribute(delimiter, attrlist); + } + switch (st) { + case left_fence_side: + tex_aux_append_math_fence(fence, open_noad_subtype); + break; + case middle_fence_side: + tex_aux_append_math_fence(fence, middle_noad_subtype); + break; + case right_fence_side: + tex_aux_append_math_fence(fence, close_noad_subtype); + break; + case left_operator_side: + { + halfword top = tex_new_node(sub_mlist_node, 0); + halfword bottom = tex_new_node(sub_mlist_node, 0); + fence_delimiter_top(fence) = top; + fence_delimiter_bottom(fence) = bottom; + tex_aux_push_math(math_fence_group, style); + node_next(cur_list.head) = fence; + cur_list.tail = fence; + cur_list.delim = fence; + tex_set_saved_record(saved_operator_item_variant, saved_operator_variant, 0, math_limits_top); + lmt_save_state.save_stack_data.ptr += saved_operator_n_of_items; + tex_aux_push_math(math_operator_group, tex_math_style_variant(style, math_parameter_superscript_variant)); + tex_scan_left_brace(); + } + break; + case no_fence_side: + { + /* halfword n = tex_new_node(simple_noad, math_fences_mode_par ? fenced_noad_subtype : inner_noad_subtype); */ + halfword n = tex_new_node(simple_noad, fenced_noad_subtype); + halfword l = tex_new_node(sub_mlist_node, 0); + tex_tail_append(n); + set_noad_main_class(n, mainclass); /*tex Really needed here! */ + noad_nucleus(n) = l; + kernel_math_list(noad_nucleus(n)) = fence; + } + break; + default: + tex_confusion("left right fence"); + break; + } + } +} + +/*tex + + \TEX\ gets to the following part of the program when the first |$| ending a display has been + scanned. + +*/ + +static void tex_aux_check_second_math_shift(void) +{ + tex_get_x_token(); + if (cur_cmd != math_shift_cmd) { + tex_back_input(cur_tok); + tex_handle_error( + normal_error_type, + "Display math should end with $$", + "The '$' that I just saw supposedly matches a previous '$$'. So I shall assume\n" + "that you typed '$$' both times." + ); + } +} + +static void tex_aux_check_display_math_end(void) +{ + switch (cur_chr) { + case end_display_math_code: + case end_math_mode_code: + return; + } + tex_handle_error( + normal_error_type, + "Display math should end with \\Ustopdisplaymath or \\Ustopmathmode", + "I shall assume that you typed that." + ); +} + +static void tex_aux_check_inline_math_end(void) +{ + switch (cur_chr) { + case end_inline_math_code: + case end_math_mode_code: + return; + } + tex_handle_error( + normal_error_type, + "Inline math should end with \\Ustopmath or \\Ustopmathmode", + "I shall assume that you typed that." + ); +} + +static void tex_aux_resume_after_display(void) +{ + if (cur_group == math_shift_group) { + tex_aux_unsave_math(); + cur_list.prev_graf += 3; + tex_push_nest(); + cur_list.mode = hmode; + cur_list.space_factor = 1000; + /*tex This needs to be intercepted in the display math start! Todo! */ + tex_tail_append(tex_new_par_node(penalty_par_subtype)); + tex_get_x_token(); + if (cur_cmd != spacer_cmd) { + tex_back_input(cur_tok); + } + if (lmt_nest_state.nest_data.ptr == 1) { + lmt_page_filter_callback(after_display_page_context, 0); + tex_build_page(); + } + } else { + tex_confusion("finishing display math"); + } +} + +/*tex + + The fuziest part of math mode processing occurs when a displayed formula is being centered and + placed with an optional equation number. At this time we are in vertical mode (or internal + vertical mode). + + \starttabulate + \NC \type {p} \NC points to the mlist for the formula \NC \NR + \NC \type {a} \NC is either |null| or it points to a box containing the equation number \NC \NR + \NC \type {l} \NC is true if there was an |\leqno| (so |a| is a horizontal box) \NC \NR + \stoptabulate + + Per 2022 we ditched display mode in \CONTEXT\ LMTX\ so the code related to display math is now + completely frozen, if only because testing has become unreasonable. There is anyway not much more + to do here. + +*/ + +inline static void tex_aux_inject_display_skip_before(quarterword param, quarterword subtype) +{ + if (param > 0) { + switch (display_skip_mode_par) { + case display_skip_default : + case display_skip_always : + tex_tail_append(tex_new_param_glue_node(param, subtype)); + break; + case display_skip_non_zero: + if (! tex_glue_is_zero(glue_parameter(param))) { + tex_tail_append(tex_new_param_glue_node(param, subtype)); + } + break; + case display_skip_ignore: + break; + default: + /*tex > 3 reserved for future use */ + tex_tail_append(tex_new_param_glue_node(param, subtype)); + break; + } + } +} + +inline static void tex_aux_inject_display_skip_after(quarterword param, quarterword subtype) +{ + if (param > 0) { + switch (display_skip_mode_par) { + case display_skip_default : + case display_skip_always : + tex_tail_append(tex_new_param_glue_node(param, subtype)); + break; + case display_skip_non_zero: + if (! tex_glue_is_zero(glue_parameter(param))) { + tex_tail_append(tex_new_param_glue_node(param, subtype)); + } + break; + case display_skip_ignore: + break; + default: + /*tex > 3 reserved for future use */ + tex_tail_append(tex_new_param_glue_node(param, subtype)); + break; + } + } +} + +static void tex_aux_finish_displayed_math(int atleft, halfword eqnumber, halfword equation) +{ + /*tex box containing the equation */ + halfword equation_box; + /*tex width of the equation */ + scaled equation_width; + /*tex width of the line */ + scaled line_width; + /*tex width of equation number */ + scaled number_width; + /*tex width of equation number plus space to separate from equation */ + scaled number_plus_gap_width; + /*tex move the line right this much */ + scaled indent; + /*tex displacement of equation in the line */ + scaled displacement; + /*tex glue parameter codes for before and after */ + quarterword glue_above, glue_below; + /*tex glue parameter subtypes for before and after */ + quarterword subtype_above, subtype_below; + /*tex tail of adjustment lists */ + halfword post_adjust_tail, pre_adjust_tail; + /*tex tail of migration lists */ + halfword post_migrate_tail, pre_migrate_tail; + /*tex for equation numbers */ + scaled eqno_width; + /*tex true if the math and surrounding (par) dirs are different */ + int swap_dir = math_direction_par != pre_display_direction_par; + if (eqnumber && swap_dir) { + atleft = ! atleft; + } + /* */ + lmt_packaging_state.post_adjust_tail = post_adjust_head; + lmt_packaging_state.pre_adjust_tail = pre_adjust_head; + lmt_packaging_state.post_migrate_tail = post_migrate_head; + lmt_packaging_state.pre_migrate_tail = pre_migrate_head; + /* */ + equation_box = tex_hpack(equation, 0, packing_additional, direction_unknown, holding_none_option); + node_subtype(equation_box) = equation_list; + attach_current_attribute_list(equation_box); + equation = box_list(equation_box); + /* */ + post_adjust_tail = lmt_packaging_state.post_adjust_tail; + pre_adjust_tail = lmt_packaging_state.pre_adjust_tail; + post_migrate_tail = lmt_packaging_state.post_migrate_tail; + pre_migrate_tail = lmt_packaging_state.pre_migrate_tail; + lmt_packaging_state.post_adjust_tail = null; + lmt_packaging_state.pre_adjust_tail = null; + lmt_packaging_state.post_migrate_tail = null; + lmt_packaging_state.pre_migrate_tail = null; + /* */ + equation_width = box_width(equation_box); + line_width = display_width_par; + indent = display_indent_par; + if (eqnumber) { + number_width = box_width(eqnumber); + eqno_width = number_width; + number_plus_gap_width = number_width + tex_round_xn_over_d(math_eqno_gap_step_par, tex_get_math_quad_style(text_style), 1000); + node_subtype(eqnumber) = equation_number_list; + /*tex attach_current_attribute_list(eqno_box); */ + } else { + number_width = 0; + eqno_width = 0; + number_plus_gap_width = 0; + } + if (equation_width + number_plus_gap_width > line_width) { + /*tex + + The user can force the equation number to go on a separate line by causing its width to + be zero. + + */ + if ((number_width != 0) && ((equation_width - lmt_packaging_state.total_shrink[normal_glue_order] + number_plus_gap_width <= line_width) + || (lmt_packaging_state.total_shrink[fi_glue_order] != 0) + || (lmt_packaging_state.total_shrink[fil_glue_order] != 0) + || (lmt_packaging_state.total_shrink[fill_glue_order] != 0) + || (lmt_packaging_state.total_shrink[filll_glue_order] != 0))) { + box_list(equation_box) = null; + tex_flush_node(equation_box); + equation_box = tex_hpack(equation, line_width - number_plus_gap_width, packing_exactly, direction_unknown, holding_none_option); + node_subtype(equation_box) = equation_list; + attach_current_attribute_list(equation_box); + } else { + number_width = 0; + if (equation_width > line_width) { + box_list(equation_box) = null; + tex_flush_node(equation_box); + equation_box = tex_hpack(equation, line_width, packing_exactly, direction_unknown, holding_none_option); + node_subtype(equation_box) = equation_list; + attach_current_attribute_list(equation_box); + } + } + equation_width = box_width(equation_box); + } + /*tex + + We try first to center the display without regard to the existence of the equation number. + If that would make it too close (where \quote {too close} means that the space between + display and equation number is less than the width of the equation number), we either + center it in the remaining space or move it as far from the equation number as possible. + The latter alternative is taken only if the display begins with glue, since we assume that + the user put glue there to control the spacing precisely. + + */ + displacement = tex_half_scaled(line_width - equation_width); + if ((number_width > 0) && (displacement < 2 * number_width)) { + /*tex too close */ + displacement = tex_half_scaled(line_width - equation_width - number_width); + /* + if (p && !is_char_node(p) && node_type(p) == glue_node) + d = 0; + */ /* kind of weird this, so why not just */ + if (equation && node_type(equation) == glue_node) { + displacement = 0; + } + } + tex_tail_append(tex_new_penalty_node(pre_display_penalty_par, before_display_penalty_subtype)); + if ((displacement + indent <= pre_display_size_par) || ((cur_list.math_dir == dir_lefttoright) && atleft) + || ((cur_list.math_dir == dir_righttoleft) && ! atleft)) { + /*tex not enough clearance */ + glue_above = above_display_skip_code; + subtype_above = above_display_skip_glue; + glue_below = below_display_skip_code; + subtype_below = below_display_skip_glue; + } else { + glue_above = above_display_short_skip_code; + subtype_above = above_display_short_skip_glue; + glue_below = below_display_short_skip_code; + subtype_below = below_display_short_skip_glue; + } + /*tex + + If the equation number is set on a line by itself, either before or after the formula, we + append an infinite penalty so that no page break will separate the display from its number; + and we use the same size and displacement for all three potential lines of the display, + even though |\parshape| may specify them differently; |\leqno| on a forced single line due + to |width=0|; it follows that |type(a) = hlist_node|. + + */ + if (eqnumber && atleft && (number_width == 0)) { + /* if (math_direction_par == dir_lefttoright) { */ + box_shift_amount(eqnumber) = 0; + /* } else { */ + /* } */ + tex_append_to_vlist(eqnumber, lua_key_index(equation_number), NULL); + tex_tail_append(tex_new_penalty_node(infinite_penalty, equation_number_penalty_subtype)); + } else { + tex_aux_inject_display_skip_before(glue_above, subtype_above); + } + if (number_width != 0) { + scaled shift = line_width - equation_width - number_width - displacement; + halfword move = tex_new_kern_node(shift, explicit_kern_subtype); + if (atleft) { + if (swap_dir) { + if (math_direction_par == dir_lefttoright) { + /*tex TRT + TLT + \eqno: (swap_dir=true, math_direction_par=TLT, l=true) */ + halfword kern = tex_new_kern_node(shift + number_width, explicit_kern_subtype); + tex_try_couple_nodes(eqnumber, move); + tex_try_couple_nodes(move, equation_box); + tex_try_couple_nodes(equation_box, kern); + } else { + /*tex TLT + TRT + \eqno: (swap_dir=true, math_direction_par=TRT, l=true) */ + tex_try_couple_nodes(eqnumber, move); + tex_try_couple_nodes(move, equation_box); + } + } else { + halfword kern; + if (math_direction_par == dir_lefttoright) { + /*tex TLT + TLT + \leqno: (swap_dir=false, math_direction_par=TLT, l=true) */ + kern = tex_new_kern_node(shift + number_width, explicit_kern_subtype); + } else { + /*tex TRT + TRT + \leqno: (swap_dir=false, math_direction_par=TRT, l=true) */ + kern = tex_new_kern_node(shift, explicit_kern_subtype); + } + tex_try_couple_nodes(eqnumber, move); + tex_try_couple_nodes(move, equation_box); + tex_try_couple_nodes(equation_box, kern); + } + equation_box = eqnumber; + } else { + if (swap_dir) { + if (math_direction_par == dir_lefttoright) { + /*tex TRT + TLT + \leqno: (swap_dir=true, math_direction_par=TLT, l=false) */ + } else { + /*tex TLT + TRT + \leqno: (swap_dir=true, math_direction_par=TRT, l=false) */ + } + tex_try_couple_nodes(equation_box, move); + tex_try_couple_nodes(move, eqnumber); + } else { + halfword kern; + if (math_direction_par == dir_lefttoright) { + /*tex TLT + TLT + \eqno: (swap_dir=false, math_direction_par=TLT, l=false) */ + kern = tex_new_kern_node(displacement, explicit_kern_subtype); + } else { + /*tex TRT + TRT + \eqno: (swap_dir=false, math_direction_par=TRT, l=false) */ + kern = tex_new_kern_node(shift + number_width, explicit_kern_subtype); + } + tex_try_couple_nodes(kern, equation_box); + tex_try_couple_nodes(equation_box, move); + tex_try_couple_nodes(move, eqnumber); + equation_box = kern; + } + } + equation_box = tex_hpack(equation_box, 0, packing_additional, direction_unknown, holding_none_option); + node_subtype(equation_box) = equation_list; /* new */ + attach_current_attribute_list(equation_box); + box_shift_amount(equation_box) = indent; + } else { + box_shift_amount(equation_box) = indent + displacement; + } + /*tex check for prev: */ + tex_append_to_vlist(equation_box, lua_key_index(equation), NULL); + if (eqnumber && number_width == 0 && ! atleft) { + tex_tail_append(tex_new_penalty_node(infinite_penalty, equation_number_penalty_subtype)); + /* if (math_direction_par == dir_lefttoright) { */ + box_shift_amount(eqnumber) = indent + line_width - eqno_width ; + /* } else { */ + /* } */ + tex_append_to_vlist(eqnumber, lua_key_index(equation_number), NULL); + glue_below = 0; /* shouldn't this be an option */ + } + /*tex Migrating material comes after equation number: is this ok? */ + if (post_migrate_tail != post_migrate_head) { + node_next(cur_list.tail) = node_next(post_migrate_head); + node_prev(lmt_packaging_state.post_migrate_tail) = node_prev(cur_list.tail); + cur_list.tail = post_migrate_tail; + } + if (post_adjust_tail != post_adjust_head) { + node_next(cur_list.tail) = node_next(post_adjust_head); + node_prev(lmt_packaging_state.post_adjust_tail) = node_prev(cur_list.tail); + cur_list.tail = post_adjust_tail; + } + /*tex A weird place: is this ok? */ + if (pre_adjust_tail != pre_adjust_head) { + node_next(cur_list.tail) = node_next(pre_adjust_head); + node_prev(lmt_packaging_state.pre_adjust_tail) = node_prev(cur_list.tail); + cur_list.tail = pre_adjust_tail; + } + if (pre_migrate_tail != pre_migrate_head) { + node_next(cur_list.tail) = node_next(pre_migrate_head); + node_prev(lmt_packaging_state.pre_migrate_tail) = node_prev(cur_list.tail); + cur_list.tail = pre_migrate_tail; + } + tex_tail_append(tex_new_penalty_node(post_display_penalty_par, after_display_penalty_subtype)); + tex_aux_inject_display_skip_after(glue_below, subtype_below); + tex_aux_resume_after_display(); +} + +/*tex +* + A |math_node|, which occurs only in horizontal lists, appears before and after mathematical + formulas. The |subtype| field is |before| before the formula and |after| after it. There is a + |surround| field, which represents the amount of surrounding space inserted by |\mathsurround|. + + As an outcome of the math upgrading sub project that Mikael Sundqvist and I undertook end 2021 + and beginning 2022 Mikael suggested penalties surrounding inline formulas so there you have it: + |\preinlinepanelty| and |\postinlinepanelty|. + +*/ + +void tex_run_math_shift(void) { + if (cur_group == math_shift_group) { + /*tex box containing equation number */ + halfword eqnumber = null; + /*tex Use |\leqno| instead of |\eqno|, we default to right. */ + int atleft = 0; + /*tex |mmode| or |-mmode| */ + int mode = cur_list.mode; + int mathmode = cur_list.math_mode; + /*tex this pops the nest, the formula */ + halfword p = tex_aux_finish_math_list(null); + int mathleft = cur_list.math_begin; + int mathright = cur_list.math_end; + if (cur_cmd == math_shift_cs_cmd) { + switch (cur_chr) { + case begin_inline_math_code: + case begin_display_math_code: + case begin_math_mode_code: + tex_you_cant_error(NULL); + break; + } + } + if (cur_list.mode == -mode) { + /*tex end of equation number */ + AGAIN: + switch (cur_cmd) { + case math_shift_cmd: + tex_aux_check_second_math_shift(); + break; + case end_paragraph_cmd: + tex_get_x_token(); + goto AGAIN; + default: + tex_aux_check_display_math_end(); + break; + } + tex_run_mlist_to_hlist(p, 0, text_style, unset_noad_class, unset_noad_class); + eqnumber = tex_hpack(node_next(temp_head), 0, packing_additional, direction_unknown, holding_none_option); + attach_current_attribute_list(eqnumber); + tex_aux_unsave_math(); + /*tex now |cur_group = math_shift_group| */ + lmt_save_state.save_stack_data.ptr -= saved_equation_number_n_of_items; + if (saved_type(saved_equation_number_item_location) == saved_equation_number_location) { + atleft = saved_value(saved_equation_number_item_location) == left_location_code; + mode = cur_list.mode; + p = tex_aux_finish_math_list(null); + } else { + tex_confusion("after math"); + } + } + if (mode < 0) { + /*tex + + The |unsave| is done after everything else here; hence an appearance of |\mathsurround| + inside of |$...$| affects the spacing at these particular |$|'s. This is consistent + with the conventions of |$$ ... $$|, since |\abovedisplayskip| inside a display affects + the space above that display. + + */ + halfword math = tex_new_node(math_node, begin_inline_math); + if (mathmode) { + switch (cur_cmd) { + case math_shift_cs_cmd: + if (cur_chr != end_display_math_code && cur_chr != end_math_mode_code) { + tex_aux_check_second_math_shift(); + } + break; + case math_shift_cmd: + tex_aux_check_second_math_shift(); + break; + } + } else if (cur_cmd == math_shift_cs_cmd) { + tex_aux_check_inline_math_end(); + } + tex_tail_append(math); + math_penalty(math) = pre_inline_penalty_par; + /*tex begin mathskip code */ + switch (math_skip_mode_par) { + case math_skip_surround_when_zero: + if (! tex_glue_is_zero(math_skip_par)) { + tex_copy_glue_values(math, math_skip_par); + } else { + math_surround(math) = math_surround_par; + } + break ; + case math_skip_always_left: + case math_skip_always_both: + case math_skip_only_when_skip: + tex_copy_glue_values(math, math_skip_par); + break ; + case math_skip_always_right: + case math_skip_ignore: + break ; + case math_skip_always_surround: + default: + math_surround(math) = math_surround_par; + break; + } + /*tex end mathskip code */ + if (cur_list.math_dir) { + tex_tail_append(tex_new_dir(normal_dir_subtype, math_direction_par)); + } + tex_run_mlist_to_hlist(p, cur_list.mode > nomode, is_valid_math_style(cur_list.math_main_style) ? cur_list.math_main_style : text_style, cur_list.math_begin, cur_list.math_end); + tex_try_couple_nodes(cur_list.tail, node_next(temp_head)); + cur_list.tail = tex_tail_of_node_list(cur_list.tail); + if (cur_list.math_dir) { + tex_tail_append(tex_new_dir(cancel_dir_subtype, math_direction_par)); + } + cur_list.math_dir = 0; + math = tex_new_node(math_node, end_inline_math); + tex_tail_append(math); + math_penalty(math) = post_inline_penalty_par; + /*tex begin mathskip code */ + switch (math_skip_mode_par) { + case math_skip_surround_when_zero : + if (! tex_glue_is_zero(math_skip_par)) { + tex_copy_glue_values(math, math_skip_par); + math_surround(math) = 0; + } else { + math_surround(math) = math_surround_par; + } + break; + case math_skip_always_right: + case math_skip_always_both: + case math_skip_only_when_skip: + tex_copy_glue_values(math, math_skip_par); + break; + case math_skip_always_left: + case math_skip_ignore: + break; + case math_skip_always_surround: + default: + math_surround(math) = math_surround_par; + break; + } + /*tex end mathskip code */ + cur_list.space_factor = 1000; + mathleft = cur_list.math_begin; + mathright = cur_list.math_end; + tex_aux_unsave_math(); + } else { + if (! eqnumber) { + if (cur_cmd == math_shift_cmd) { + tex_aux_check_second_math_shift(); + } else { + tex_aux_check_display_math_end(); + } + } + tex_run_mlist_to_hlist(p, 0, display_style, cur_list.math_begin, cur_list.math_end); + mathleft = cur_list.math_begin; + mathright = cur_list.math_end; + tex_aux_finish_displayed_math(atleft, eqnumber, node_next(temp_head)); + } + /* local */ + update_tex_math_left_class(mathleft); + update_tex_math_right_class(mathright); + /* global */ + lmt_math_state.last_left = mathleft; + lmt_math_state.last_right = mathright; + } else { + tex_off_save(); + } +} + +/*tex + + When |\halign| appears in a display, the alignment routines operate essentially as they do in + vertical mode. Then the following program is activated, with |p| and |q| pointing to the + beginning and end of the resulting list, and with |aux_save| holding the |prev_depth| value. + +*/ + +void tex_finish_display_alignment(halfword head, halfword tail, halfword prevdepth) +{ + tex_handle_assignments(); + AGAIN: + switch (cur_cmd) { + case math_shift_cmd: + tex_aux_check_second_math_shift(); + break; + case end_paragraph_cmd: + tex_get_x_token(); + goto AGAIN; + default: + tex_aux_check_display_math_end(); + break; + } + tex_pop_nest(); + tex_tail_append(tex_new_penalty_node(pre_display_penalty_par, before_display_penalty_subtype)); + tex_aux_inject_display_skip_before(above_display_skip_code, above_display_skip_glue); + node_next(cur_list.tail) = head; + if (head && tail) { + cur_list.tail = tail; + } + tex_tail_append(tex_new_penalty_node(post_display_penalty_par, after_display_penalty_subtype)); + tex_aux_inject_display_skip_after(below_display_skip_code, below_display_skip_glue); + cur_list.prev_depth = prevdepth; + tex_aux_resume_after_display(); +} + +/* + + Turning macros into functions brought the mingw64 bin down from 2548224 to 2511360 bytes but + not the linux one, so I guess mingw doesn't inline (yet, in 2020). + +*/ + +static void tex_aux_define_inl_math_parameters(int size, int param, scaled value, int level) +{ + switch (size) { + case script_size: + tex_def_math_parameter(script_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(cramped_script_style, param, value, level, indirect_math_regular); + break; + case script_script_size: + tex_def_math_parameter(script_script_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(cramped_script_script_style, param, value, level, indirect_math_regular); + break; + default: + tex_def_math_parameter(text_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(cramped_text_style, param, value, level, indirect_math_regular); + break; + } +} + +static void tex_aux_define_dis_math_parameters(int size, int param, scaled value, int level) +{ + if (size == text_size) { + tex_def_math_parameter(display_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(cramped_display_style, param, value, level, indirect_math_regular); + } +} + +static void tex_aux_define_all_math_parameters(int size, int param, scaled value, int level) +{ + switch (size) { + case script_size: + tex_def_math_parameter(script_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(cramped_script_style, param, value, level, indirect_math_regular); + break; + case script_script_size: + tex_def_math_parameter(script_script_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(cramped_script_script_style, param, value, level, indirect_math_regular); + break; + default: + tex_def_math_parameter(text_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(cramped_text_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(display_style, param, value, level, indirect_math_regular); + tex_def_math_parameter(cramped_display_style, param, value, level, indirect_math_regular); + break; + } +} + +/*tex + + Here are the math parameters that are font-dependant. Before an mlist is converted to an hlist, + \TEX\ makes sure that the fonts in family~2 have enough parameters to be math symbol fonts, and + that the fonts in family~3 have enough parameters to be math extension fonts. The math-symbol + parameters are referred to by using the following macros, which take a size code as their + parameter; for example, |num1 (cur_size)| gives the value of the |num1| parameter for the + current size. + + The math extension parameters have similar macros, but the size code is omitted (since it is + always |cur_size| when we refer to such parameters). + +*/ + +# define total_mathsy_parameters 22 +# define total_mathex_parameters 13 + +# define mathsy(A,B) font_parameter(tex_fam_fnt(2,A),B) +# define mathex(A,B) font_parameter(tex_fam_fnt(3,A),B) + +# define math_x_height(A) mathsy(A,5) /*tex height of |x| */ +# define math_quad(A) mathsy(A,6) /*tex |18mu| */ +# define num1(A) mathsy(A,8) /*tex numerator shift-up in display styles */ +# define num2(A) mathsy(A,9) /*tex numerator shift-up in non-display, non-|\atop| */ +# define num3(A) mathsy(A,10) /*tex numerator shift-up in non-display |\atop| */ +# define denom1(A) mathsy(A,11) /*tex denominator shift-down in display styles */ +# define denom2(A) mathsy(A,12) /*tex denominator shift-down in non-display styles */ +# define sup1(A) mathsy(A,13) /*tex superscript shift-up in uncramped display style */ +# define sup2(A) mathsy(A,14) /*tex superscript shift-up in uncramped non-display */ +# define sup3(A) mathsy(A,15) /*tex superscript shift-up in cramped styles */ +# define sub1(A) mathsy(A,16) /*tex subscript shift-down if superscript is absent */ +# define sub2(A) mathsy(A,17) /*tex subscript shift-down if superscript is present */ +# define sup_drop(A) mathsy(A,18) /*tex superscript baseline below top of large box */ +# define sub_drop(A) mathsy(A,19) /*tex subscript baseline below bottom of large box */ +# define delim1(A) mathsy(A,20) /*tex size of |\atopwithdelims| delimiters in display styles */ +# define delim2(A) mathsy(A,21) /*tex size of |\atopwithdelims| delimiters in non-displays */ +# define axis_height(A) mathsy(A,22) /*tex height of fraction lines above the baseline */ + +# define default_rule_thickness(A) mathex(A,8) /*tex thickness of |\over| bars */ +# define big_operator_spacing1(A) mathex(A,9) /*tex minimum clearance above a displayed op */ +# define big_operator_spacing2(A) mathex(A,10) /*tex minimum clearance below a displayed op */ +# define big_operator_spacing3(A) mathex(A,11) /*tex minimum baselineskip above displayed op */ +# define big_operator_spacing4(A) mathex(A,12) /*tex minimum baselineskip below displayed op */ +# define big_operator_spacing5(A) mathex(A,13) /*tex padding above and below displayed limits */ + +/*tex + Somehow a scale > 1000 results in extreme values. +*/ + +/* +inline static int tex_aux_get_font_math_parameter(scaled scale, halfword f, int id) +{ + scaled v = get_font_math_par(f, id); +// return scale == 1000 ? v : round_xn_over_d(v, scale, 1000); + if (v) { + double d = 0.001 * scale * v; + return (d < 0.0) ? (int) (d - 0.5) : (int) (d + 0.5); + } else { + return 0; + } +} + +inline static int tex_aux_get_font_math_quantity(scaled scale, halfword v) +{ +// return scale == 1000 ? v : round_xn_over_d(v, scale, 1000); + if (v) { + double d = 0.001 * scale * v; + return (d < 0.0) ? (int) (d - 0.5) : (int) (d + 0.5); + } else { + return 0; + } +} +*/ + +# define math_parameter(a,b) ((font_math_parameter_count(a) >= b) ? font_math_parameter(a,b) : undefined_math_parameter) + +inline static scaled tex_aux_get_font_math_parameter(scaled scale, halfword f, int id) +{ + scaled v = math_parameter(f, id); + if (v == undefined_math_parameter) { + return v; + } else { + return v ? scaledround(0.001 * scale * v) : 0; + } +} + +inline static scaled tex_aux_get_font_math_quantity(scaled scale, halfword v) +{ + return v ? scaledround(0.001 * scale * v) : 0; +} + +/*tex + The next function is called when we define a family, but first we define a few helpers + for identifying traditional math fonts. Watch the hard codes family check! +*/ + +void tex_fixup_math_parameters(int fam, int size, int f, int level) +{ + scaled scale = tex_get_math_font_scale(f, size); + + if (tracing_math_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[math: fixing up font, family %i, size %i, font %i, level %i]", fam, size, f, level); + tex_end_diagnostic(); + } + + /*tex These apply to all: */ + + tex_aux_define_all_math_parameters(size, math_parameter_quad, tex_aux_get_font_math_quantity (scale, font_size(f)), level); + tex_aux_define_all_math_parameters(size, math_parameter_axis, tex_aux_get_font_math_parameter(scale, f, AxisHeight), level); + + tex_aux_define_all_math_parameters(size, math_parameter_accent_base_height, tex_aux_get_font_math_parameter(scale, f, AccentBaseHeight), level); + tex_aux_define_all_math_parameters(size, math_parameter_accent_base_depth, tex_aux_get_font_math_parameter(scale, f, AccentBaseDepth), level); /* engine, reserved */ + tex_aux_define_all_math_parameters(size, math_parameter_flattened_accent_base_height, tex_aux_get_font_math_parameter(scale, f, FlattenedAccentBaseHeight), level); + tex_aux_define_all_math_parameters(size, math_parameter_flattened_accent_base_depth, tex_aux_get_font_math_parameter(scale, f, FlattenedAccentBaseDepth), level); /* engine, reserved */ + tex_aux_define_all_math_parameters(size, math_parameter_overbar_kern, tex_aux_get_font_math_parameter(scale, f, OverbarExtraAscender), level); + tex_aux_define_all_math_parameters(size, math_parameter_overbar_rule, tex_aux_get_font_math_parameter(scale, f, OverbarRuleThickness), level); + tex_aux_define_all_math_parameters(size, math_parameter_overbar_vgap, tex_aux_get_font_math_parameter(scale, f, OverbarVerticalGap), level); + tex_aux_define_all_math_parameters(size, math_parameter_underbar_kern, tex_aux_get_font_math_parameter(scale, f, UnderbarExtraDescender), level); + tex_aux_define_all_math_parameters(size, math_parameter_underbar_rule, tex_aux_get_font_math_parameter(scale, f, UnderbarRuleThickness ), level); + tex_aux_define_all_math_parameters(size, math_parameter_underbar_vgap, tex_aux_get_font_math_parameter(scale, f, UnderbarVerticalGap), level); + tex_aux_define_all_math_parameters(size, math_parameter_under_delimiter_vgap, tex_aux_get_font_math_parameter(scale, f, StretchStackGapAboveMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_under_delimiter_bgap, tex_aux_get_font_math_parameter(scale, f, StretchStackBottomShiftDown), level); + tex_aux_define_all_math_parameters(size, math_parameter_over_delimiter_vgap, tex_aux_get_font_math_parameter(scale, f, StretchStackGapBelowMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_over_delimiter_bgap, tex_aux_get_font_math_parameter(scale, f, StretchStackTopShiftUp), level); + tex_aux_define_all_math_parameters(size, math_parameter_radical_kern, tex_aux_get_font_math_parameter(scale, f, RadicalExtraAscender), level); + tex_aux_define_all_math_parameters(size, math_parameter_radical_rule, tex_aux_get_font_math_parameter(scale, f, RadicalRuleThickness), level); + tex_aux_define_all_math_parameters(size, math_parameter_radical_degree_before, tex_aux_get_font_math_parameter(scale, f, RadicalKernBeforeDegree), level); + tex_aux_define_all_math_parameters(size, math_parameter_radical_degree_after, tex_aux_get_font_math_parameter(scale, f, RadicalKernAfterDegree), level); + tex_aux_define_all_math_parameters(size, math_parameter_subscript_shift_drop, tex_aux_get_font_math_parameter(scale, f, SubscriptBaselineDropMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_superscript_shift_drop, tex_aux_get_font_math_parameter(scale, f, SuperscriptBaselineDropMax), level); + tex_aux_define_all_math_parameters(size, math_parameter_subscript_shift_down, tex_aux_get_font_math_parameter(scale, f, SubscriptShiftDown), level); + tex_aux_define_all_math_parameters(size, math_parameter_prime_shift_drop, tex_aux_get_font_math_parameter(scale, f, PrimeBaselineDropMax), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_subscript_top_max, tex_aux_get_font_math_parameter(scale, f, SubscriptTopMax), level); + tex_aux_define_all_math_parameters(size, math_parameter_superscript_bottom_min, tex_aux_get_font_math_parameter(scale, f, SuperscriptBottomMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_superscript_subscript_bottom_max, tex_aux_get_font_math_parameter(scale, f, SuperscriptBottomMaxWithSubscript), level); + tex_aux_define_all_math_parameters(size, math_parameter_subscript_superscript_vgap, tex_aux_get_font_math_parameter(scale, f, SubSuperscriptGapMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_limit_above_vgap, tex_aux_get_font_math_parameter(scale, f, UpperLimitGapMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_limit_above_bgap, tex_aux_get_font_math_parameter(scale, f, UpperLimitBaselineRiseMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_limit_below_vgap, tex_aux_get_font_math_parameter(scale, f, LowerLimitGapMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_limit_below_bgap, tex_aux_get_font_math_parameter(scale, f, LowerLimitBaselineDropMin), level); + tex_aux_define_all_math_parameters(size, math_parameter_nolimit_sub_factor, tex_aux_get_font_math_parameter(scale, f, NoLimitSubFactor), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_nolimit_sup_factor, tex_aux_get_font_math_parameter(scale, f, NoLimitSupFactor), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_skewed_fraction_hgap, tex_aux_get_font_math_parameter(scale, f, SkewedFractionHorizontalGap), level); + tex_aux_define_all_math_parameters(size, math_parameter_skewed_fraction_vgap, tex_aux_get_font_math_parameter(scale, f, SkewedFractionVerticalGap), level); + tex_aux_define_all_math_parameters(size, math_parameter_space_before_script, tex_aux_get_font_math_parameter(scale, f, SpaceBeforeScript), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_space_after_script, tex_aux_get_font_math_parameter(scale, f, SpaceAfterScript), level); + tex_aux_define_all_math_parameters(size, math_parameter_connector_overlap_min, tex_aux_get_font_math_parameter(scale, f, MinConnectorOverlap), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_fraction_rule, tex_aux_get_font_math_parameter(scale, f, FractionRuleThickness), level); + + tex_aux_define_all_math_parameters(size, math_parameter_radical_degree_raise, math_parameter(f, RadicalDegreeBottomRaisePercent), level); + tex_aux_define_all_math_parameters(size, math_parameter_prime_raise, math_parameter(f, PrimeRaisePercent), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_prime_raise_composed, math_parameter(f, PrimeRaiseComposedPercent), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_prime_space_after, math_parameter(f, PrimeSpaceAfter), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_prime_width, math_parameter(f, PrimeWidthPercent), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_skewed_delimiter_tolerance, math_parameter(f, SkewedDelimiterTolerance), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_accent_top_shift_up, math_parameter(f, AccentTopShiftUp), level); /* engine, undefined */ + tex_aux_define_all_math_parameters(size, math_parameter_accent_bottom_shift_down, math_parameter(f, AccentBottomShiftDown), level); /* engine, undefined */ + tex_aux_define_all_math_parameters(size, math_parameter_accent_top_overshoot, math_parameter(f, AccentTopOvershoot), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_accent_bottom_overshoot, math_parameter(f, AccentBottomOvershoot), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_accent_superscript_drop, math_parameter(f, AccentSuperscriptDrop), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_accent_superscript_percent, math_parameter(f, AccentSuperscriptPercent), level); /* engine, default 0 */ + tex_aux_define_all_math_parameters(size, math_parameter_accent_extend_margin, math_parameter(f, AccentExtendMargin), level); /* engine, undefined */ + tex_aux_define_all_math_parameters(size, math_parameter_flattened_accent_top_shift_up, math_parameter(f, FlattenedAccentTopShiftUp), level); /* engine, undefined */ + tex_aux_define_all_math_parameters(size, math_parameter_flattened_accent_bottom_shift_down, math_parameter(f, FlattenedAccentBottomShiftDown), level); /* engine, undefined */ + tex_aux_define_all_math_parameters(size, math_parameter_delimiter_percent, math_parameter(f, DelimiterPercent), level); /* engine, undefined */ + tex_aux_define_all_math_parameters(size, math_parameter_delimiter_shortfall, math_parameter(f, DelimiterShortfall), level); /* engine, undefined */ + + tex_aux_define_all_math_parameters(size, math_parameter_radical_extensible_after, math_parameter(f, RadicalKernAfterExtensible), level); /* engine, undefined */ + tex_aux_define_all_math_parameters(size, math_parameter_radical_extensible_before, math_parameter(f, RadicalKernBeforeExtensible), level); /* engine, undefined */ + + /*tex Not all are official \OPENTYPE: */ + + tex_aux_define_all_math_parameters(size, math_parameter_x_scale, 1000, level); + tex_aux_define_all_math_parameters(size, math_parameter_y_scale, 1000, level); + + /*tex Most are zero and have to be set at by the macro package (if at all):. */ + + tex_aux_define_all_math_parameters(size, math_parameter_limit_above_kern, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_limit_below_kern, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_extra_superscript_shift, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_extra_subscript_shift, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_extra_superprescript_shift, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_extra_subprescript_shift, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_rule_height, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_rule_depth, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_superscript_shift_distance, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_subscript_shift_distance, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_superprescript_shift_distance, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_subprescript_shift_distance, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_extra_superscript_space, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_extra_subscript_space, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_extra_superprescript_space, 0, level); + tex_aux_define_all_math_parameters(size, math_parameter_extra_subprescript_space, 0, level); + + /*tex A special one: */ + + if (math_parameter(f, SubscriptShiftDownWithSuperscript) != undefined_math_parameter) { /* engine */ + tex_aux_define_all_math_parameters(size, math_parameter_subscript_superscript_shift_down, tex_aux_get_font_math_parameter(scale, f, SubscriptShiftDownWithSuperscript), level); + } else { + tex_aux_define_all_math_parameters(size, math_parameter_subscript_superscript_shift_down, tex_aux_get_font_math_parameter(scale, f, SubscriptShiftDown), level); + } + + /*tex These differentiate between display and inline: */ + + tex_aux_define_dis_math_parameters(size, math_parameter_operator_size, tex_aux_get_font_math_parameter(scale, f, DisplayOperatorMinHeight), level); + tex_aux_define_inl_math_parameters(size, math_parameter_radical_vgap, tex_aux_get_font_math_parameter(scale, f, RadicalVerticalGap), level); + tex_aux_define_dis_math_parameters(size, math_parameter_radical_vgap, tex_aux_get_font_math_parameter(scale, f, RadicalDisplayStyleVerticalGap), level); + tex_aux_define_inl_math_parameters(size, math_parameter_stack_num_up, tex_aux_get_font_math_parameter(scale, f, StackTopShiftUp), level); + tex_aux_define_dis_math_parameters(size, math_parameter_stack_num_up, tex_aux_get_font_math_parameter(scale, f, StackTopDisplayStyleShiftUp), level); + tex_aux_define_inl_math_parameters(size, math_parameter_stack_denom_down, tex_aux_get_font_math_parameter(scale, f, StackBottomShiftDown), level); + tex_aux_define_dis_math_parameters(size, math_parameter_stack_denom_down, tex_aux_get_font_math_parameter(scale, f, StackBottomDisplayStyleShiftDown), level); + tex_aux_define_inl_math_parameters(size, math_parameter_stack_vgap, tex_aux_get_font_math_parameter(scale, f, StackGapMin), level); + tex_aux_define_dis_math_parameters(size, math_parameter_stack_vgap, tex_aux_get_font_math_parameter(scale, f, StackDisplayStyleGapMin), level); + tex_aux_define_inl_math_parameters(size, math_parameter_fraction_num_vgap, tex_aux_get_font_math_parameter(scale, f, FractionNumeratorGapMin), level); + tex_aux_define_dis_math_parameters(size, math_parameter_fraction_num_vgap, tex_aux_get_font_math_parameter(scale, f, FractionNumeratorDisplayStyleGapMin), level); + tex_aux_define_inl_math_parameters(size, math_parameter_fraction_num_up, tex_aux_get_font_math_parameter(scale, f, FractionNumeratorShiftUp), level); + tex_aux_define_dis_math_parameters(size, math_parameter_fraction_num_up, tex_aux_get_font_math_parameter(scale, f, FractionNumeratorDisplayStyleShiftUp), level); + tex_aux_define_inl_math_parameters(size, math_parameter_fraction_denom_vgap, tex_aux_get_font_math_parameter(scale, f, FractionDenominatorGapMin), level); + tex_aux_define_dis_math_parameters(size, math_parameter_fraction_denom_vgap, tex_aux_get_font_math_parameter(scale, f, FractionDenominatorDisplayStyleGapMin), level); + tex_aux_define_inl_math_parameters(size, math_parameter_fraction_denom_down, tex_aux_get_font_math_parameter(scale, f, FractionDenominatorShiftDown), level); + tex_aux_define_dis_math_parameters(size, math_parameter_fraction_denom_down, tex_aux_get_font_math_parameter(scale, f, FractionDenominatorDisplayStyleShiftDown), level); + tex_aux_define_inl_math_parameters(size, math_parameter_fraction_del_size, tex_aux_get_font_math_parameter(scale, f, FractionDelimiterSize), level); /* engine, undefined */ + tex_aux_define_dis_math_parameters(size, math_parameter_fraction_del_size, tex_aux_get_font_math_parameter(scale, f, FractionDelimiterDisplayStyleSize), level); /* engine, undefined */ + + /*tex A few more specials: */ + + switch (size) { + case script_size: + tex_def_math_parameter(script_style, math_parameter_superscript_shift_up, tex_aux_get_font_math_parameter(scale, f, SuperscriptShiftUp), level, indirect_math_regular); + tex_def_math_parameter(cramped_script_style, math_parameter_superscript_shift_up, tex_aux_get_font_math_parameter(scale, f, SuperscriptShiftUpCramped), level, indirect_math_regular); + tex_def_math_parameter(script_style, math_parameter_prime_shift_up, tex_aux_get_font_math_parameter(scale, f, PrimeShiftUp), level, indirect_math_regular); /* engine, default 0 */ + tex_def_math_parameter(cramped_script_style, math_parameter_prime_shift_up, tex_aux_get_font_math_parameter(scale, f, PrimeShiftUpCramped), level, indirect_math_regular); /* engine, default 0 */ + break; + case script_script_size: + tex_def_math_parameter(script_script_style, math_parameter_superscript_shift_up, tex_aux_get_font_math_parameter(scale, f, SuperscriptShiftUp), level, indirect_math_regular); + tex_def_math_parameter(cramped_script_script_style, math_parameter_superscript_shift_up, tex_aux_get_font_math_parameter(scale, f, SuperscriptShiftUpCramped), level, indirect_math_regular); + tex_def_math_parameter(script_script_style, math_parameter_prime_shift_up, tex_aux_get_font_math_parameter(scale, f, PrimeShiftUp), level, indirect_math_regular); /* engine, default 0 */ + tex_def_math_parameter(cramped_script_script_style, math_parameter_prime_shift_up, tex_aux_get_font_math_parameter(scale, f, PrimeShiftUpCramped), level, indirect_math_regular); /* engine, default 0 */ + break; + default: + tex_def_math_parameter(display_style, math_parameter_superscript_shift_up, tex_aux_get_font_math_parameter(scale, f, SuperscriptShiftUp), level, indirect_math_regular); + tex_def_math_parameter(cramped_display_style, math_parameter_superscript_shift_up, tex_aux_get_font_math_parameter(scale, f, SuperscriptShiftUpCramped), level, indirect_math_regular); + tex_def_math_parameter(text_style, math_parameter_superscript_shift_up, tex_aux_get_font_math_parameter(scale, f, SuperscriptShiftUp), level, indirect_math_regular); + tex_def_math_parameter(cramped_text_style, math_parameter_superscript_shift_up, tex_aux_get_font_math_parameter(scale, f, SuperscriptShiftUpCramped), level, indirect_math_regular); + tex_def_math_parameter(display_style, math_parameter_prime_shift_up, tex_aux_get_font_math_parameter(scale, f, PrimeShiftUp), level, indirect_math_regular); /* engine, default 0 */ + tex_def_math_parameter(cramped_display_style, math_parameter_prime_shift_up, tex_aux_get_font_math_parameter(scale, f, PrimeShiftUpCramped), level, indirect_math_regular); /* engine, default 0 */ + tex_def_math_parameter(text_style, math_parameter_prime_shift_up, tex_aux_get_font_math_parameter(scale, f, PrimeShiftUp), level, indirect_math_regular); /* engine, default 0 */ + tex_def_math_parameter(cramped_text_style, math_parameter_prime_shift_up, tex_aux_get_font_math_parameter(scale, f, PrimeShiftUpCramped), level, indirect_math_regular); /* engine, default 0 */ + break; + } + +} + +/*tex + + There is some trickery here. The values are actually pointers and in \LUATEX\ the predefined + muglue ones are small numbers that are way below the normal node values. So, they are kind + of save signals. However, in \LUAMETATEX\ we use zero based internal codes (because that is + nicer for the interface. + +*/ + +void tex_set_display_styles(halfword code, halfword value, halfword level, halfword indirect) +{ + tex_def_math_parameter(display_style, code, value, level, indirect); + tex_def_math_parameter(cramped_display_style, code, value, level, indirect); +} + +void tex_set_text_styles(halfword code, halfword value, halfword level, halfword indirect) +{ + tex_def_math_parameter(text_style, code, value, level, indirect); + tex_def_math_parameter(cramped_text_style, code, value, level, indirect); +} + +void tex_set_script_styles(halfword code, halfword value, halfword level, halfword indirect) +{ + tex_def_math_parameter(script_style, code, value, level, indirect); + tex_def_math_parameter(cramped_script_style, code, value, level, indirect); +} + +void tex_set_script_script_styles(halfword code, halfword value, halfword level, halfword indirect) +{ + tex_def_math_parameter(script_script_style, code, value, level, indirect); + tex_def_math_parameter(cramped_script_script_style, code, value, level, indirect); +} + +void tex_set_all_styles(halfword code, halfword value, halfword level, halfword indirect) +{ + for (int style = display_style; style <= cramped_script_script_style; style++) { + tex_def_math_parameter(style, code, value, level, indirect); + } +} + +void tex_set_uncramped_styles(halfword code, halfword value, halfword level, halfword indirect) +{ + for (int style = display_style; style <= script_script_style; style += 2) { + tex_def_math_parameter(style, code, value, level, indirect); + } +} + +void tex_set_cramped_styles(halfword code, halfword value, halfword level, halfword indirect) +{ + for (int style = cramped_display_style; style <= cramped_script_script_style; style += 2) { + tex_def_math_parameter(style, code, value, level, indirect); + } +} + +void tex_set_split_styles(halfword code, halfword value, halfword level, halfword indirect) +{ + tex_set_display_styles (code, value, level, indirect); + tex_set_text_styles (code, value, level, indirect); + tex_set_script_styles (code, 0, level, indirect); + tex_set_script_script_styles(code, 0, level, indirect); +} + +void tex_reset_all_styles(halfword level) +{ + for (int code = math_parameter_atom_pairs_first; code <= math_parameter_atom_pairs_last; code++) { + tex_set_all_styles(code, zero_mu_skip_code, level, indirect_math_unset); + } +} + +inline static halfword tex_aux_math_class_default(halfword class) { + return (class << 24) + (class << 16) + (class << 8) + class; +} + +inline static void tex_set_math_class_default(halfword class, halfword parent, halfword options) +{ + tex_word_define(0, internal_int_location(first_math_class_code + class), tex_aux_math_class_default(parent)); + tex_word_define(0, internal_int_location(first_math_atom_code + class), tex_aux_math_class_default(class)); + tex_word_define(0, internal_int_location(first_math_options_code + class), options); + tex_word_define(0, internal_int_location(first_math_parent_code + class), tex_aux_math_class_default(class)); +} + +static void tex_aux_set_math_atom_rule(halfword left, halfword right, halfword newleft, halfword newright) +{ + tex_set_all_styles(math_parameter_rules_pair(left, right), (newleft << 16) + newright, level_one, indirect_math_regular); +} + +void tex_initialize_math_spacing(void) +{ + + for (int class = 0; class <= max_math_class_code; class++) { + tex_set_math_class_default(class, class, no_class_options); + /*tex We do this here as there is no real need for yet another initializer. */ + tex_word_define(0, internal_int_location(first_math_pre_penalty_code + class), infinite_penalty); + tex_word_define(0, internal_int_location(first_math_post_penalty_code + class), infinite_penalty); + tex_word_define(0, internal_int_location(first_math_display_pre_penalty_code + class), infinite_penalty); + tex_word_define(0, internal_int_location(first_math_display_post_penalty_code + class), infinite_penalty); + } + + tex_reset_all_styles(level_one); + + tex_set_math_class_default(ordinary_noad_subtype, ordinary_noad_subtype, no_italic_correction_class_option | + check_ligature_class_option | + check_kern_pair_class_option | + flatten_class_option); + tex_set_math_class_default(operator_noad_subtype, operator_noad_subtype, check_ligature_class_option | + check_kern_pair_class_option); + tex_set_math_class_default(binary_noad_subtype, binary_noad_subtype, no_italic_correction_class_option | + check_ligature_class_option | + check_kern_pair_class_option | + flatten_class_option); + tex_set_math_class_default(relation_noad_subtype, relation_noad_subtype, no_italic_correction_class_option | + check_ligature_class_option | + check_kern_pair_class_option | + flatten_class_option | + omit_penalty_class_option); + tex_set_math_class_default(open_noad_subtype, open_noad_subtype, no_italic_correction_class_option | + /* open_fence_class_option | */ + check_ligature_class_option | + check_kern_pair_class_option); + tex_set_math_class_default(close_noad_subtype, close_noad_subtype, no_italic_correction_class_option | + /* close_fence_class_option | */ + check_ligature_class_option | + check_kern_pair_class_option); + tex_set_math_class_default(punctuation_noad_subtype, punctuation_noad_subtype, no_italic_correction_class_option | + check_ligature_class_option | + check_kern_pair_class_option | + flatten_class_option); + tex_set_math_class_default(variable_noad_subtype, ordinary_noad_subtype, no_italic_correction_class_option); + tex_set_math_class_default(active_noad_subtype, ordinary_noad_subtype, no_italic_correction_class_option); + tex_set_math_class_default(inner_noad_subtype, inner_noad_subtype, flatten_class_option); + tex_set_math_class_default(under_noad_subtype, ordinary_noad_subtype, no_class_options); + tex_set_math_class_default(over_noad_subtype, ordinary_noad_subtype, no_class_options); + tex_set_math_class_default(fraction_noad_subtype, ordinary_noad_subtype, no_class_options); + tex_set_math_class_default(radical_noad_subtype, ordinary_noad_subtype, no_class_options); + tex_set_math_class_default(middle_noad_subtype, open_noad_subtype, no_italic_correction_class_option); /* | middle_fence_class_option= */ + tex_set_math_class_default(accent_noad_subtype, ordinary_noad_subtype, no_class_options); + tex_set_math_class_default(fenced_noad_subtype, inner_noad_subtype , no_class_options); + tex_set_math_class_default(ghost_noad_subtype, ordinary_noad_subtype, no_class_options); + tex_set_math_class_default(vcenter_noad_subtype, ordinary_noad_subtype, no_class_options); + + tex_aux_set_math_atom_rule(math_begin_class, binary_noad_subtype, ordinary_noad_subtype, ordinary_noad_subtype); + tex_aux_set_math_atom_rule(binary_noad_subtype, math_end_class, ordinary_noad_subtype, ordinary_noad_subtype); + + tex_aux_set_math_atom_rule(binary_noad_subtype, binary_noad_subtype, binary_noad_subtype, ordinary_noad_subtype); + tex_aux_set_math_atom_rule(operator_noad_subtype, binary_noad_subtype, operator_noad_subtype, ordinary_noad_subtype); + tex_aux_set_math_atom_rule(open_noad_subtype, binary_noad_subtype, open_noad_subtype, ordinary_noad_subtype); + tex_aux_set_math_atom_rule(punctuation_noad_subtype, binary_noad_subtype, punctuation_noad_subtype, ordinary_noad_subtype); + tex_aux_set_math_atom_rule(relation_noad_subtype, binary_noad_subtype, relation_noad_subtype, ordinary_noad_subtype); + + tex_aux_set_math_atom_rule(binary_noad_subtype, close_noad_subtype, ordinary_noad_subtype, close_noad_subtype); + tex_aux_set_math_atom_rule(binary_noad_subtype, punctuation_noad_subtype, ordinary_noad_subtype, punctuation_noad_subtype); + tex_aux_set_math_atom_rule(binary_noad_subtype, relation_noad_subtype, ordinary_noad_subtype, relation_noad_subtype); + + tex_aux_set_math_atom_rule(relation_noad_subtype, close_noad_subtype, ordinary_noad_subtype, close_noad_subtype); + tex_aux_set_math_atom_rule(relation_noad_subtype, punctuation_noad_subtype, ordinary_noad_subtype, punctuation_noad_subtype); + + /* */ + +// math_parameter_spacing_pair(ordinary_noad_subtype,ordinary_noad_subtype) + + tex_set_all_styles (math_parameter_spacing_pair(ordinary_noad_subtype, operator_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(ordinary_noad_subtype, binary_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(ordinary_noad_subtype, relation_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(ordinary_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + + tex_set_all_styles (math_parameter_spacing_pair(operator_noad_subtype, ordinary_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_spacing_pair(operator_noad_subtype, operator_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(operator_noad_subtype, relation_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(operator_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + + tex_set_all_styles (math_parameter_spacing_pair(operator_noad_subtype, fraction_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_spacing_pair(operator_noad_subtype, radical_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_spacing_pair(fraction_noad_subtype, operator_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_spacing_pair(radical_noad_subtype, operator_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + + tex_set_split_styles (math_parameter_spacing_pair(binary_noad_subtype, ordinary_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(binary_noad_subtype, operator_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(binary_noad_subtype, open_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(binary_noad_subtype, inner_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + + tex_set_split_styles (math_parameter_spacing_pair(binary_noad_subtype, middle_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(binary_noad_subtype, fraction_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(binary_noad_subtype, radical_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(middle_noad_subtype, binary_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(fraction_noad_subtype, binary_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(radical_noad_subtype, binary_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + + tex_set_split_styles (math_parameter_spacing_pair(relation_noad_subtype, ordinary_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(relation_noad_subtype, operator_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(relation_noad_subtype, open_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(relation_noad_subtype, inner_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + + tex_set_split_styles (math_parameter_spacing_pair(relation_noad_subtype, middle_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(relation_noad_subtype, fraction_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(relation_noad_subtype, radical_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(middle_noad_subtype, relation_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(fraction_noad_subtype, relation_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(radical_noad_subtype, relation_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + + tex_set_all_styles (math_parameter_spacing_pair(close_noad_subtype, operator_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(close_noad_subtype, binary_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(close_noad_subtype, relation_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(close_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, ordinary_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, operator_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, relation_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, open_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, close_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, punctuation_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, fraction_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, middle_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(punctuation_noad_subtype, radical_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(fraction_noad_subtype, punctuation_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(middle_noad_subtype, punctuation_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(radical_noad_subtype, punctuation_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + + tex_set_split_styles (math_parameter_spacing_pair(inner_noad_subtype, ordinary_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_spacing_pair(inner_noad_subtype, operator_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(inner_noad_subtype, binary_noad_subtype), med_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(inner_noad_subtype, relation_noad_subtype), thick_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(inner_noad_subtype, open_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(inner_noad_subtype, punctuation_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(inner_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + + tex_set_split_styles (math_parameter_spacing_pair(inner_noad_subtype, middle_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(fraction_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(radical_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(middle_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(fraction_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + tex_set_split_styles (math_parameter_spacing_pair(radical_noad_subtype, inner_noad_subtype), thin_mu_skip_code, level_one, indirect_math_regular); + + /* */ + + tex_set_all_styles (math_parameter_x_scale, 1000, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_y_scale, 1000, level_one, indirect_math_regular); + + /* could be initialize_math_defaults */ + + tex_set_all_styles (math_parameter_over_line_variant, math_cramped_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_under_line_variant, math_normal_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_over_delimiter_variant, math_small_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_under_delimiter_variant, math_small_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_delimiter_over_variant, math_normal_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_delimiter_under_variant, math_normal_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_h_extensible_variant, math_normal_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_v_extensible_variant, math_normal_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_fraction_variant, math_cramped_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_radical_variant, math_cramped_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_degree_variant, math_double_superscript_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_accent_variant, math_cramped_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_top_accent_variant, math_cramped_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_bottom_accent_variant, math_cramped_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_overlay_accent_variant, math_cramped_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_numerator_variant, math_numerator_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_denominator_variant, math_denominator_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_superscript_variant, math_superscript_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_subscript_variant, math_subscript_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_prime_variant, math_superscript_style_variant, level_one, indirect_math_regular); + tex_set_all_styles (math_parameter_stack_variant, math_numerator_style_variant, level_one, indirect_math_regular); +} + +/*tex + + This needs to be called just at the start of |mlist_to_hlist|, for backward compatibility with + |\scriptspace|. + +*/ + +void tex_finalize_math_parameters(void) +{ + int saved_trace = tracing_assigns_par; + tracing_assigns_par = 0; + if (tex_get_math_parameter(display_style, math_parameter_space_after_script, NULL) == undefined_math_parameter) { + tex_def_math_parameter(display_style, math_parameter_space_after_script, script_space_par, level_one, indirect_math_regular); + tex_def_math_parameter(text_style, math_parameter_space_after_script, script_space_par, level_one, indirect_math_regular); + tex_def_math_parameter(script_style, math_parameter_space_after_script, script_space_par, level_one, indirect_math_regular); + tex_def_math_parameter(script_script_style, math_parameter_space_after_script, script_space_par, level_one, indirect_math_regular); + tex_def_math_parameter(cramped_display_style, math_parameter_space_after_script, script_space_par, level_one, indirect_math_regular); + tex_def_math_parameter(cramped_text_style, math_parameter_space_after_script, script_space_par, level_one, indirect_math_regular); + tex_def_math_parameter(cramped_script_style, math_parameter_space_after_script, script_space_par, level_one, indirect_math_regular); + tex_def_math_parameter(cramped_script_script_style, math_parameter_space_after_script, script_space_par, level_one, indirect_math_regular); + } + tracing_assigns_par = saved_trace; +} + +static void tex_aux_math_parameter_error(int style, int param, const char *name) +{ + char msg[256] = { 0 }; + if (param >= 0) { + snprintf(msg, 256, "Math error: parameter '%s' with id %i in style %d is not set", name, param, style); + } else { + snprintf(msg, 256, "Math error: parameter '%s' style %d is not set", name, style); + } + tex_handle_error( + normal_error_type, + msg, + "Sorry, but I can't typeset math unless various parameters have been set. This is\n" + "normally done by loading special math fonts into the math family slots. Your font\n" + "set is lacking at least the parameter mentioned earlier." + ); + return; +} + +/*tex + + For the moment this is experimental. + +*/ + +inline static scaled tex_aux_max_scale(int style, int param) +{ + scaled scale = tex_get_math_parameter(style, param, NULL); + if (scale > 5000) { + return 5000; + } else if (scale < 0) { + return 0; + } else { + return scale; + } +} + +/*tex + + The non-staticness of this function is for the benefit of |texmath.w|. Watch out, this one + uses the style! The style and size numbers don't match because we have cramped styles. + +*/ + +scaled tex_get_math_quad_style(int style) +{ + scaled scale = tex_aux_max_scale(style, math_parameter_x_scale); + scaled value = tex_get_math_parameter(style, math_parameter_quad, NULL); + if (value == undefined_math_parameter) { + tex_aux_math_parameter_error(style, -1, "quad"); + return 0; + } else { + return scaledround(0.001 * value * scale); + } +} + +/*tex + + For this reason the next one is different because it is called with a size specifier instead + of a style specifier. + +*/ + +scaled tex_math_axis_size(int size) +{ + scaled value; + switch (size) { + case script_size : size = script_style; break; + case script_script_size: size = script_script_style; break; + default : size = text_style; break; + } + value = tex_get_math_parameter(size, math_parameter_axis, NULL); + if (value == undefined_math_parameter) { + tex_aux_math_parameter_error(size, -1, "axis"); + return 0; + } else { + return value; + } +} + +scaled tex_get_math_quad_size(int size) /* used in degree before and after */ +{ + switch (size) { + case script_size : size = script_style; break; + case script_script_size: size = script_script_style; break; + default : size = text_style; break; + } + return tex_get_math_parameter(size, math_parameter_quad, NULL); +} + +scaled tex_get_math_quad_size_scaled(int size) /* used in cur_mu */ +{ + scaled value, scale; + switch (size) { + case script_size : size = script_style; break; + case script_script_size: size = script_script_style; break; + default : size = text_style; break; + } + value = tex_get_math_parameter(size, math_parameter_quad, NULL); + scale = tex_aux_max_scale(size, math_parameter_x_scale); + /* return tex_x_over_n(scaledround(0.001 * value * scale), 18); */ + return scaledround(0.001 * value * scale / 18.0); +} + +static int tex_aux_math_parameter_okay(int param) +{ + if (ignore_math_parameter(param)) { + if (tracing_math_par > 1) { + tex_begin_diagnostic(); + tex_print_format("[math: parameter, name %s, ignored]", lmt_name_of_math_parameter(param)); + tex_end_diagnostic(); + } + return 0; + } else { + return 1; + } +} + +scaled tex_get_math_parameter_checked(int style, int param) +{ + if (tex_aux_math_parameter_okay(param)) { + scaled value = tex_get_math_parameter(style, param, NULL); + if (value == undefined_math_parameter) { + tex_aux_math_parameter_error(style, param, lmt_name_of_math_parameter(param)); + return 0; + } else { + return value; + } + } else { + return 0; + } +} + +scaled tex_get_math_parameter_default(int style, int param, scaled dflt) +{ + if (tex_aux_math_parameter_okay(param)) { + scaled value = tex_get_math_parameter(style, param, NULL); + if (value == undefined_math_parameter) { + return dflt; + } else { + return value; + } + } else { + return dflt; + } +} + +void tex_run_math_italic_correction(void) { + tex_tail_append(tex_new_kern_node(0, explicit_kern_subtype)); /* maybe math_shape_kern */ +} + +/* */ + +scaled tex_get_math_x_parameter(int style, int param) +{ + if (tex_aux_math_parameter_okay(param)) { + scaled scale = tex_aux_max_scale(style, math_parameter_x_scale); + scaled value = tex_get_math_parameter(style, param, NULL); + if (value == undefined_math_parameter) { + return value; // ?? scaledround(value * scale * 0.001); + } else { + return value ? scaledround(0.000000001 * glyph_scale_par * glyph_x_scale_par * value * scale) : 0; + } + } else { + return 0; + } +} + +scaled tex_get_math_x_parameter_checked(int style, int param) +{ + if (tex_aux_math_parameter_okay(param)) { + scaled scale = tex_aux_max_scale(style, math_parameter_x_scale); + scaled value = tex_get_math_parameter(style, param, NULL); + if (value == undefined_math_parameter) { + tex_aux_math_parameter_error(style, param, lmt_name_of_math_parameter(param)); + return 0; + } else { + return value ? scaledround(0.000000001 * glyph_scale_par * glyph_x_scale_par * value * scale) : 0; + } + } else { + return 0; + } +} + +scaled tex_get_math_x_parameter_default(int style, int param, scaled dflt) +{ + if (tex_aux_math_parameter_okay(param)) { + scaled scale = tex_aux_max_scale(style, math_parameter_x_scale); + scaled value = tex_get_math_parameter(style, param, NULL); + if (value == undefined_math_parameter) { + return dflt; + } else{ + return value ? scaledround(0.000000001 * glyph_scale_par * glyph_x_scale_par * value * scale) : 0; + } + } else { + return dflt; + } +} + +scaled tex_get_math_y_parameter(int style, int param) +{ + if (tex_aux_math_parameter_okay(param)) { + scaled scale = tex_aux_max_scale(style, math_parameter_y_scale); + scaled value = tex_get_math_parameter(style, param, NULL); + if (value == undefined_math_parameter) { + return value; + } else{ + return value ? scaledround(0.000000001 * glyph_scale_par * glyph_y_scale_par * value * scale) : 0; + } + } else { + return 0; + } +} + +scaled tex_get_math_y_parameter_checked(int style, int param) +{ + if (tex_aux_math_parameter_okay(param)) { + scaled scale = tex_aux_max_scale(style, math_parameter_y_scale); + scaled value = tex_get_math_parameter(style, param, NULL); + if (value == undefined_math_parameter) { + tex_aux_math_parameter_error(style, param, lmt_name_of_math_parameter(param)); + return 0; + } else { + return value ? scaledround(0.000000001 * glyph_scale_par * glyph_y_scale_par * value * scale) : 0; + } + } else { + return 0; + } +} + +scaled tex_get_math_y_parameter_default(int style, int param, scaled dflt) +{ + if (tex_aux_math_parameter_okay(param)) { + scaled scale = tex_aux_max_scale(style, math_parameter_y_scale); + scaled value = tex_get_math_parameter(style, param, NULL); + if (value == undefined_math_parameter) { + return dflt; + } else{ + return value ? scaledround(0.000000001 * glyph_scale_par * glyph_y_scale_par * value * scale) : 0; + } + } else { + return dflt; + } +} + +scaled tex_get_font_math_parameter(int font, int size, int param) +{ + scaled scale = tex_get_math_font_scale(font, size); + scaled value = tex_aux_get_font_math_parameter(scale, font, param); + if (value == undefined_math_parameter) { + return undefined_math_parameter; + } else { + return value ? scaledround(0.001 * glyph_scale_par * value) : 0; + } +} + +/* maybe more precission, so multiply all and divide by 0.000000001 */ + +scaled tex_get_font_math_y_parameter(int font, int size, int param) +{ + scaled scale = tex_get_math_font_scale(font, size); + scaled value = tex_aux_get_font_math_parameter(scale, font, param); + if (value == undefined_math_parameter) { + return undefined_math_parameter; + } else { + return value ? scaledround(0.000001 * glyph_scale_par * glyph_y_scale_par * value) : 0; + } +} + +scaled tex_get_font_math_x_parameter(int font, int size, int param) +{ + scaled scale = tex_get_math_font_scale(font, size); + scaled value = tex_aux_get_font_math_parameter(scale, font, param); + if (value == undefined_math_parameter) { + return undefined_math_parameter; + } else { + return value ? scaledround(0.000001 * glyph_scale_par * glyph_x_scale_par * value) : 0; + } +} + +halfword tex_to_math_spacing_parameter(halfword left, halfword right) +{ + halfword param = math_parameter_spacing_pair(left,right); + return (param >= math_parameter_atom_pairs_first && param <= math_parameter_atom_pairs_last) ? param : -1; +} + +halfword tex_to_math_rules_parameter(halfword left, halfword right) +{ + halfword param = math_parameter_rules_pair(left,right); + return (param >= math_parameter_atom_rules_first && param <= math_parameter_atom_rules_last) ? param : -1; +} + +void tex_set_default_math_codes(void) +{ + mathcodeval mval = { 0, 0, 0 }; + /*tex This will remap old font families at runtime. */ + mval.class_value = math_use_current_family_code; + /*tex Upright math digts come from family 0. */ + for (int d = '0'; d <= '9'; d++) { + mval.character_value = d; + tex_set_math_code(d, mval, level_one); + } + /* In traditional fonts math italic has family 1. */ + mval.family_value = 1; + for (int u = 'A'; u <= 'Z'; u++) { + mval.character_value = u; + tex_set_math_code(u, mval, level_one); + } + for (int l = 'a'; l <= 'z'; l++) { + mval.character_value = l; + tex_set_math_code(l, mval, level_one); + } + /*tex This is kind of standard. */ + tex_set_del_code('.', (delcodeval) { { 0, 0, 0, }, { 0, 0, 0 } }, level_one); +} + +int tex_in_main_math_style(halfword style) +{ + switch (style) { + case display_style: + case text_style: + return 1; + /* + case cramped_display_style: + case cramped_text_style: + return 0; // could be parameter driven + */ + default: + return 0; + } +} diff --git a/source/luametatex/source/tex/texmath.h b/source/luametatex/source/tex/texmath.h new file mode 100644 index 000000000..7dbd62b2e --- /dev/null +++ b/source/luametatex/source/tex/texmath.h @@ -0,0 +1,758 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXMATH_H +# define LMT_TEXMATH_H + +/*tex + This module also deals with math parameters. That code has been cleaned up a lot, and it + worked out well, but at some point Mikael Sundqvist and I entered \quutation {alternative + spacing models mode} and a more generic model was implemented. As a consequence new code + showed up and already cleaned up code (the many parameters) could be thrown out. That's how + it goed and it is in retrospect good that we had not yet released. + +*/ + +# define MATHPARAMSTACK 8 +# define MATHPARAMDEFAULT undefined_math_parameter + +# define MATHFONTSTACK 8 +# define MATHFONTDEFAULT 0 + +typedef struct math_state_info { + int size; /*tex Size code corresponding to |cur_style|. */ + int level; /*tex Maybe we should expose this one. */ + /* int opentype; */ /*tex We just assume opentype now. */ + /* int padding; */ + sa_tree par_head; + sa_tree fam_head; + halfword last_left; + halfword last_right; + scaled last_atom; + scaled scale; +} math_state_info; + +extern math_state_info lmt_math_state; + +typedef enum math_sizes { + text_size, + script_size, + script_script_size +} math_sizes; + +# define last_math_size script_script_size + +# define undefined_math_parameter max_dimen + +typedef enum math_indirect_types { + indirect_math_unset, + indirect_math_regular, + indirect_math_integer, + indirect_math_dimension, + indirect_math_gluespec, + indirect_math_mugluespec, + indirect_math_register_integer, + indirect_math_register_dimension, + indirect_math_register_gluespec, + indirect_math_register_mugluespec, + indirect_math_internal_integer, + indirect_math_internal_dimension, + indirect_math_internal_gluespec, + indirect_math_internal_mugluespec, +} math_indirect_types; + +# define last_math_indirect indirect_math_internal_mugluespec + +typedef enum math_parameter_types { + math_int_parameter, + math_dimen_parameter, + math_muglue_parameter, + math_style_parameter, + math_pair_parameter, +} math_parameter_types; + +typedef enum math_parameters { + math_parameter_quad, + math_parameter_axis, + math_parameter_accent_base_height, + math_parameter_accent_base_depth, + math_parameter_flattened_accent_base_height, + math_parameter_flattened_accent_base_depth, + math_parameter_x_scale, + math_parameter_y_scale, + math_parameter_operator_size, + math_parameter_overbar_kern, + math_parameter_overbar_rule, + math_parameter_overbar_vgap, + math_parameter_underbar_kern, + math_parameter_underbar_rule, + math_parameter_underbar_vgap, + math_parameter_radical_kern, + math_parameter_radical_rule, + math_parameter_radical_vgap, + math_parameter_radical_degree_before, + math_parameter_radical_degree_after, + math_parameter_radical_degree_raise, + math_parameter_radical_extensible_after, + math_parameter_radical_extensible_before, + math_parameter_stack_vgap, + math_parameter_stack_num_up, + math_parameter_stack_denom_down, + math_parameter_fraction_rule, + math_parameter_fraction_num_vgap, + math_parameter_fraction_num_up, + math_parameter_fraction_denom_vgap, + math_parameter_fraction_denom_down, + math_parameter_fraction_del_size, + math_parameter_skewed_fraction_hgap, + math_parameter_skewed_fraction_vgap, + math_parameter_limit_above_vgap, + math_parameter_limit_above_bgap, + math_parameter_limit_above_kern, + math_parameter_limit_below_vgap, + math_parameter_limit_below_bgap, + math_parameter_limit_below_kern, + math_parameter_nolimit_sub_factor, /*tex bonus */ + math_parameter_nolimit_sup_factor, /*tex bonus */ + math_parameter_under_delimiter_vgap, + math_parameter_under_delimiter_bgap, + math_parameter_over_delimiter_vgap, + math_parameter_over_delimiter_bgap, + math_parameter_subscript_shift_drop, + math_parameter_superscript_shift_drop, + math_parameter_subscript_shift_down, + math_parameter_subscript_superscript_shift_down, + math_parameter_subscript_top_max, + math_parameter_superscript_shift_up, + math_parameter_superscript_bottom_min, + math_parameter_superscript_subscript_bottom_max, + math_parameter_subscript_superscript_vgap, + math_parameter_space_before_script, + math_parameter_space_after_script, + math_parameter_connector_overlap_min, + /* */ + math_parameter_extra_superscript_shift, + math_parameter_extra_subscript_shift, + math_parameter_extra_superprescript_shift, + math_parameter_extra_subprescript_shift, + /* */ + math_parameter_prime_raise, + math_parameter_prime_raise_composed, + math_parameter_prime_shift_up, + math_parameter_prime_shift_drop, + math_parameter_prime_space_after, + math_parameter_prime_width, + /* */ + math_parameter_rule_height, + math_parameter_rule_depth, + /* */ + math_parameter_superscript_shift_distance, + math_parameter_subscript_shift_distance, + math_parameter_superprescript_shift_distance, + math_parameter_subprescript_shift_distance, + /* */ + math_parameter_extra_superscript_space, + math_parameter_extra_subscript_space, + math_parameter_extra_superprescript_space, + math_parameter_extra_subprescript_space, + /* */ + math_parameter_skewed_delimiter_tolerance, + /* */ + math_parameter_accent_top_shift_up, + math_parameter_accent_bottom_shift_down, + math_parameter_accent_top_overshoot, + math_parameter_accent_bottom_overshoot, + math_parameter_accent_superscript_drop, + math_parameter_accent_superscript_percent, + math_parameter_accent_extend_margin, + math_parameter_flattened_accent_top_shift_up, + math_parameter_flattened_accent_bottom_shift_down, + /* */ + math_parameter_delimiter_percent, + math_parameter_delimiter_shortfall, + /* */ + math_parameter_over_line_variant, + math_parameter_under_line_variant, + math_parameter_over_delimiter_variant, + math_parameter_under_delimiter_variant, + math_parameter_delimiter_over_variant, + math_parameter_delimiter_under_variant, + math_parameter_h_extensible_variant, + math_parameter_v_extensible_variant, + math_parameter_fraction_variant, + math_parameter_radical_variant, + math_parameter_accent_variant, + math_parameter_degree_variant, + math_parameter_top_accent_variant, + math_parameter_bottom_accent_variant, + math_parameter_overlay_accent_variant, + math_parameter_numerator_variant, + math_parameter_denominator_variant, + math_parameter_superscript_variant, + math_parameter_subscript_variant, + math_parameter_prime_variant, + math_parameter_stack_variant, + /* */ + /*tex The growing list of |math_parameter_ATOM1_ATOM2_spacing| is gone. */ + /* */ + math_parameter_last = 255, + math_parameter_atom_pairs_first = math_parameter_last + 1, + math_parameter_atom_pairs_last = math_parameter_atom_pairs_first + (max_n_of_math_classes * max_n_of_math_classes), + math_parameter_atom_rules_first = math_parameter_atom_pairs_last + 1, + math_parameter_atom_rules_last = math_parameter_atom_rules_first + (max_n_of_math_classes * max_n_of_math_classes), + /* a special private one */ + math_parameter_reset_spacing, + math_parameter_set_spacing, + math_parameter_let_spacing, + math_parameter_copy_spacing, + math_parameter_set_atom_rule, + math_parameter_let_atom_rule, + math_parameter_copy_atom_rule, + math_parameter_let_parent, + math_parameter_copy_parent, + math_parameter_set_pre_penalty, + math_parameter_set_post_penalty, + math_parameter_set_display_pre_penalty, + math_parameter_set_display_post_penalty, + math_parameter_ignore, + math_parameter_options, + math_parameter_set_defaults, +} math_parameters; + +# define math_parameter_max_range (16 * 1024) // 4 * (max_n_of_math_classes * max_n_of_math_classes) + +# define math_parameter_spacing_pair(l,r) (math_parameter_atom_pairs_first + (l * max_n_of_math_classes) + r) +# define math_parameter_rules_pair(l,r) (math_parameter_atom_rules_first + (l * max_n_of_math_classes) + r) + +# define math_parameter_spacing_left(n) ((n - math_parameter_atom_pairs_first) / max_n_of_math_classes) +# define math_parameter_spacing_right(n) ((n - math_parameter_atom_pairs_first) % max_n_of_math_classes) + +# define math_parameter_rules_left(n) ((n - math_parameter_atom_rules_first) / max_n_of_math_classes) +# define math_parameter_rules_right(n) ((n - math_parameter_atom_rules_first) % max_n_of_math_classes) + +# define ignore_math_parameter(n) (count_parameter(first_math_ignore_code + n)) +# define options_math_parameter(n) (count_parameter(first_math_options_code + n)) + +# define math_all_class (max_n_of_math_classes - 3) +# define math_begin_class (max_n_of_math_classes - 2) +# define math_end_class (max_n_of_math_classes - 1) + +# define valid_math_class_code(n) (n >= 0 && n < max_n_of_math_classes) + +# define last_math_parameter math_parameter_stack_variant +# define math_parameter_first_variant math_parameter_over_line_variant +# define math_parameter_last_variant math_parameter_stack_variant +# define math_default_spacing_parameter math_parameter_spacing_pair(ordinary_noad_subtype,ordinary_noad_subtype) +# define math_default_rules_parameter 0 + +typedef enum math_class_options { + no_pre_slack_class_option = 0x0000001, + no_post_slack_class_option = 0x0000002, + left_top_kern_class_option = 0x0000004, + right_top_kern_class_option = 0x0000008, + left_bottom_kern_class_option = 0x0000010, + right_bottom_kern_class_option = 0x0000020, + look_ahead_for_end_class_option = 0x0000040, + no_italic_correction_class_option = 0x0000080, + check_ligature_class_option = 0x0000100, + check_italic_correction_class_option = 0x0000200, + check_kern_pair_class_option = 0x0000400, + flatten_class_option = 0x0000800, + omit_penalty_class_option = 0x0001000, + unpack_class_option = 0x0002000, + raise_prime_option = 0x0004000, + // open_fence_class_option = 0x0000100, + // close_fence_class_option = 0x0000200, + // middle_fence_class_option = 0x0000400, + carry_over_left_top_kern_class_option = 0x0008000, + carry_over_right_top_kern_class_option = 0x0010000, + carry_over_left_bottom_kern_class_option = 0x0020000, + carry_over_right_bottom_kern_class_option = 0x0040000, + prefer_delimiter_dimensions_class_option = 0x0080000, + auto_inject_class_option = 0x0100000, + remove_italic_correction_class_option = 0x0200000, + no_class_options = 0xF000000, +} math_class_options; + +extern int tex_math_has_class_option(halfword cls, int option); + +typedef enum math_atom_font_options { + math_atom_no_font_option = 0, + math_atom_text_font_option = 1, + math_atom_math_font_option = 2, +} math_atom_font_options; + +inline int math_parameter_value_type(int n) +{ + if (n < last_math_parameter) { + return lmt_interface.math_parameter_values[n].type; + } else if (n >= math_parameter_atom_rules_first && n <= math_parameter_atom_rules_last) { + return math_pair_parameter; + } else { + return math_muglue_parameter; + } +} + +/*tex + We used to have a lot of defines like: + + \starttyping + # define math_parameter_A_B_spacing math_parameter_spacing_pair(A_noad_subtype,B_noad_subtype) + \stoptyping + + but we now inline them as they are only used once. + +*/ + +/*tex + + We also need to compute the change in style between mlists and their subsidiaries. The following + macros define the subsidiary style for an overlined nucleus (|cramped_style|), for a subscript + or a superscript (|sub_style| or |sup_style|), or for a numerator or denominator (|num_style| or + |denom_style|). We now delegate that to a helper function so that eventually we can symbolic + presets. + +*/ + +typedef enum math_style_variants { + math_normal_style_variant, + math_cramped_style_variant, + math_subscript_style_variant, + math_superscript_style_variant, + math_small_style_variant, + math_smaller_style_variant, + math_numerator_style_variant, + math_denominator_style_variant, + math_double_superscript_variant, +} math_style_variants; + +# define last_math_style_variant math_double_superscript_variant + +/* + +These are the mandate font parameters per \url {https://docs.microsoft.com/en-us/typography/opentype/spec/math}: + +\starttabulate[|T|p|] +\NC ScriptPercentScaleDown \NC Percentage of scaling down for level 1 superscripts and subscripts. Suggested value: 80 pct. \NC \NR +\NC ScriptScriptPercentScaleDown \NC Percentage of scaling down for level 2 (scriptScript) superscripts and subscripts. Suggested value: 60 pct. \NC \NR +\NC DelimitedSubFormulaMinHeight \NC Minimum height required for a delimited expression (contained within parentheses, etc.) to be treated as a sub-formula. Suggested value: normal line height × 1.5. \NC \NR +\NC DisplayOperatorMinHeight \NC Minimum height of n-ary operators (such as integral and summation) for formulas in display mode (that is, appearing as standalone page elements, not embedded inline within text). \NC \NR +\NC MathLeading \NC White space to be left between math formulas to ensure proper line spacing. For example, for applications that treat line gap as a part of line ascender, formulas with ink going above (os2.sTypoAscender + os2.sTypoLineGap - MathLeading) or with ink going below os2.sTypoDescender will result in increasing line height. \NC \NR +\NC AxisHeight \NC Axis height of the font. In math typesetting, the term axis refers to a horizontal reference line used for positioning elements in a formula. The math axis is similar to but distinct from the baseline for regular text layout. For example, in a simple equation, a minus symbol or fraction rule would be on the axis, but a string for a variable name would be set on a baseline that is offset from the axis. The axisHeight value determines the amount of that offset. \NC \NR +\NC AccentBaseHeight \NC Maximum (ink) height of accent base that does not require raising the accents. Suggested: x‑height of the font (os2.sxHeight) plus any possible overshots. \NC \NR +\NC FlattenedAccentBaseHeight \NC Maximum (ink) height of accent base that does not require flattening the accents. Suggested: cap height of the font (os2.sCapHeight). \NC \NR +\NC SubscriptShiftDown \NC The standard shift down applied to subscript elements. Positive for moving in the downward direction. Suggested: os2.ySubscriptYOffset. \NC \NR +\NC SubscriptTopMax \NC Maximum allowed height of the (ink) top of subscripts that does not require moving subscripts further down. Suggested: 4/5 x- height. \NC \NR +\NC SubscriptBaselineDropMin \NC Minimum allowed drop of the baseline of subscripts relative to the (ink) bottom of the base. Checked for bases that are treated as a box or extended shape. Positive for subscript baseline dropped below the base bottom. \NC \NR +\NC SuperscriptShiftUp \NC Standard shift up applied to superscript elements. Suggested: os2.ySuperscriptYOffset. \NC \NR +\NC SuperscriptShiftUpCramped \NC Standard shift of superscripts relative to the base, in cramped style. \NC \NR +\NC SuperscriptBottomMin \NC Minimum allowed height of the (ink) bottom of superscripts that does not require moving subscripts further up. Suggested: ¼ x-height. \NC \NR +\NC SuperscriptBaselineDropMax \NC Maximum allowed drop of the baseline of superscripts relative to the (ink) top of the base. Checked for bases that are treated as a box or extended shape. Positive for superscript baseline below the base top. \NC \NR +\NC SubSuperscriptGapMin \NC Minimum gap between the superscript and subscript ink. Suggested: 4 × default rule thickness. \NC \NR +\NC SuperscriptBottomMaxWithSubscript \NC The maximum level to which the (ink) bottom of superscript can be pushed to increase the gap between superscript and subscript, before subscript starts being moved down. Suggested: 4/5 x-height. \NC \NR +\NC SpaceAfterScript \NC Extra white space to be added after each subscript and superscript. Suggested: 0.5 pt for a 12 pt font. (Note that, in some math layout implementations, a constant value, such as 0.5 pt, may be used for all text sizes. Some implementations may use a constant ratio of text size, such as 1/24 of em.) \NC \NR +\NC UpperLimitGapMin \NC Minimum gap between the (ink) bottom of the upper limit, and the (ink) top of the base operator. \NC \NR +\NC UpperLimitBaselineRiseMin \NC Minimum distance between baseline of upper limit and (ink) top of the base operator. +\NC LowerLimitGapMin \NC Minimum gap between (ink) top of the lower limit, and (ink) bottom of the base operator. \NC \NR +\NC LowerLimitBaselineDropMin \NC Minimum distance between baseline of the lower limit and (ink) bottom of the base operator. \NC \NR +\NC StackTopShiftUp \NC Standard shift up applied to the top element of a stack. +\NC StackTopDisplayStyleShiftUp \NC Standard shift up applied to the top element of a stack in display style. \NC \NR +\NC StackBottomShiftDown \NC Standard shift down applied to the bottom element of a stack. Positive for moving in the downward direction. \NC \NR +\NC StackBottomDisplayStyleShiftDown \NC Standard shift down applied to the bottom element of a stack in display style. Positive for moving in the downward direction. \NC \NR +\NC StackGapMin \NC Minimum gap between (ink) bottom of the top element of a stack, and the (ink) top of the bottom element. Suggested: 3 × default rule thickness. \NC \NR +\NC StackDisplayStyleGapMin \NC Minimum gap between (ink) bottom of the top element of a stack, and the (ink) top of the bottom element in display style. Suggested: 7 × default rule thickness. \NC \NR +\NC StretchStackTopShiftUp \NC Standard shift up applied to the top element of the stretch stack. \NC \NR +\NC StretchStackBottomShiftDown \NC Standard shift down applied to the bottom element of the stretch stack. Positive for moving in the downward direction. \NC \NR +\NC StretchStackGapAboveMin \NC Minimum gap between the ink of the stretched element, and the (ink) bottom of the element above. Suggested: same value as upperLimitGapMin. \NC \NR +\NC StretchStackGapBelowMin \NC Minimum gap between the ink of the stretched element, and the (ink) top of the element below. Suggested: same value as lowerLimitGapMin. \NC \NR +\NC FractionNumeratorShiftUp \NC Standard shift up applied to the numerator. \NC \NR +\NC FractionNumeratorDisplayStyleShiftUp \NC Standard shift up applied to the numerator in display style. Suggested: same value as stackTopDisplayStyleShiftUp. \NC \NR +\NC FractionDenominatorShiftDown \NC Standard shift down applied to the denominator. Positive for moving in the downward direction. \NC \NR +\NC FractionDenominatorDisplayStyleShiftDown \NC Standard shift down applied to the denominator in display style. Positive for moving in the downward direction. Suggested: same value as stackBottomDisplayStyleShiftDown. \NC \NR +\NC FractionNumeratorGapMin \NC Minimum tolerated gap between the (ink) bottom of the numerator and the ink of the fraction bar. Suggested: default rule thickness. \NC \NR +\NC FractionNumDisplayStyleGapMin \NC Minimum tolerated gap between the (ink) bottom of the numerator and the ink of the fraction bar in display style. Suggested: 3 × default rule thickness. \NC \NR +\NC FractionRuleThickness \NC Thickness of the fraction bar. Suggested: default rule thickness. \NC \NR +\NC FractionDenominatorGapMin \NC Minimum tolerated gap between the (ink) top of the denominator and the ink of the fraction bar. Suggested: default rule thickness. \NC \NR +\NC FractionDenomDisplayStyleGapMin \NC Minimum tolerated gap between the (ink) top of the denominator and the ink of the fraction bar in display style. Suggested: 3 × default rule thickness. \NC \NR +\NC SkewedFractionHorizontalGap \NC Horizontal distance between the top and bottom elements of a skewed fraction. \NC \NR +\NC SkewedFractionVerticalGap \NC Vertical distance between the ink of the top and bottom elements of a skewed fraction. \NC \NR +\NC OverbarVerticalGap \NC Distance between the overbar and the (ink) top of he base. Suggested: 3 × default rule thickness. \NC \NR +\NC OverbarRuleThickness \NC Thickness of overbar. Suggested: default rule thickness. \NC \NR +\NC OverbarExtraAscender \NC Extra white space reserved above the overbar. Suggested: default rule thickness. \NC \NR +\NC UnderbarVerticalGap \NC Distance between underbar and (ink) bottom of the base. Suggested: 3 × default rule thickness. \NC \NR +\NC UnderbarRuleThickness \NC Thickness of underbar. Suggested: default rule thickness. \NC \NR +\NC UnderbarExtraDescender \NC Extra white space reserved below the underbar. Always positive. Suggested: default rule thickness. \NC \NR +\NC RadicalVerticalGap \NC Space between the (ink) top of the expression and the bar over it. Suggested: 1¼ default rule thickness. \NC \NR +\NC RadicalDisplayStyleVerticalGap \NC Space between the (ink) top of the expression and the bar over it. Suggested: default rule thickness + ¼ x-height. \NC \NR +\NC RadicalRuleThickness \NC Thickness of the radical rule. This is the thickness of the rule in designed or constructed radical signs. Suggested: default rule thickness. \NC \NR +\NC RadicalExtraAscender \NC Extra white space reserved above the radical. Suggested: same value as radicalRuleThickness. \NC \NR +\NC RadicalKernBeforeDegree \NC Extra horizontal kern before the degree of a radical, if such is present. Suggested: 5/18 of em. \NC \NR +\NC RadicalKernAfterDegree \NC Negative kern after the degree of a radical, if such is present. Suggested: −10/18 of em. \NC \NR +\NC RadicalDegreeBottomRaisePercent \NC Height of the bottom of the radical degree, if such is present, in proportion to the ascender of the radical sign. Suggested: 60 pct. \NC \NR +\stoptabulate + +And these are our own, some are a bit older already but most were introduced when we (Mikael and +Hans) overhauled the math engine. + +\starttabulate[|T|c|p|] +\NC MinConnectorOverlap \NC 0 \NC \NC \NR +\NC SubscriptShiftDownWithSuperscript \NC inherited \NC \NC \NR +\NC FractionDelimiterSize \NC undefined \NC \NC \NR +\NC FractionDelimiterDisplayStyleSize \NC undefined \NC \NC \NR +\NC NoLimitSubFactor \NC 0 \NC \NC \NR +\NC NoLimitSupFactor \NC 0 \NC \NC \NR +\NC AccentBaseDepth \NC reserved \NC \NC \NR +\NC FlattenedAccentBaseDepth \NC reserved \NC \NC \NR +\NC SpaceBeforeScript \NC 0 \NC \NC \NR +\NC PrimeRaisePercent \NC 0 \NC \NC \NR +\NC PrimeShiftUp \NC 0 \NC \NC \NR +\NC PrimeShiftUpCramped \NC 0 \NC \NC \NR +\NC PrimeSpaceAfter \NC 0 \NC \NC \NR +\NC PrimeBaselineDropMax \NC 0 \NC \NC \NR +\NC PrimeWidthPercent \NC 0 \NC \NC \NR +\NC SkewedDelimiterTolerance \NC 0 \NC \NC \NR +\NC AccentTopShiftUp \NC undefined \NC \NC \NR +\NC AccentBottomShiftDown \NC undefined \NC \NC \NR +\NC AccentTopOvershoot \NC 0 \NC \NC \NR +\NC AccentBottomOvershoot \NC 0 \NC \NC \NR +\NC AccentSuperscriptDrop \NC 0 \NC \NC \NR +\NC AccentSuperscriptPercent \NC 0 \NC \NC \NR +\NC FlattenedAccentTopShiftUp \NC undefined \NC \NC \NR +\NC FlattenedAccentBottomShiftDown \NC undefined \NC \NC \NR +\NC DelimiterPercent \NC \NC \NC \NR +\NC DelimiterShortfall \NC \NC \NC \NR +\stoptabulate + +*/ + +typedef enum math_parameter_codes { + /* official */ + ScriptPercentScaleDown = 1, + ScriptScriptPercentScaleDown, + DelimitedSubFormulaMinHeight, + DisplayOperatorMinHeight, + MathLeading, + AxisHeight, + AccentBaseHeight, + FlattenedAccentBaseHeight, + SubscriptShiftDown, + SubscriptTopMax, + SubscriptBaselineDropMin, + SuperscriptShiftUp, + SuperscriptShiftUpCramped, + SuperscriptBottomMin, + SuperscriptBaselineDropMax, + SubSuperscriptGapMin, + SuperscriptBottomMaxWithSubscript, + SpaceAfterScript, + UpperLimitGapMin, + UpperLimitBaselineRiseMin, + LowerLimitGapMin, + LowerLimitBaselineDropMin, + StackTopShiftUp, + StackTopDisplayStyleShiftUp, + StackBottomShiftDown, + StackBottomDisplayStyleShiftDown, + StackGapMin, + StackDisplayStyleGapMin, + StretchStackTopShiftUp, + StretchStackBottomShiftDown, + StretchStackGapAboveMin, + StretchStackGapBelowMin, + FractionNumeratorShiftUp, + FractionNumeratorDisplayStyleShiftUp, + FractionDenominatorShiftDown, + FractionDenominatorDisplayStyleShiftDown, + FractionNumeratorGapMin, + FractionNumeratorDisplayStyleGapMin, + FractionRuleThickness, + FractionDenominatorGapMin, + FractionDenominatorDisplayStyleGapMin, + SkewedFractionHorizontalGap, + SkewedFractionVerticalGap, + OverbarVerticalGap, + OverbarRuleThickness, + OverbarExtraAscender, + UnderbarVerticalGap, + UnderbarRuleThickness, + UnderbarExtraDescender, + RadicalVerticalGap, + RadicalDisplayStyleVerticalGap, + RadicalRuleThickness, + RadicalExtraAscender, + RadicalKernBeforeDegree, + RadicalKernAfterDegree, + RadicalDegreeBottomRaisePercent, + RadicalKernAfterExtensible, + RadicalKernBeforeExtensible, + /* unofficial */ + MinConnectorOverlap, + SubscriptShiftDownWithSuperscript, + FractionDelimiterSize, + FractionDelimiterDisplayStyleSize, + NoLimitSubFactor, + NoLimitSupFactor, + AccentBaseDepth, /* reserved */ + FlattenedAccentBaseDepth, /* reserved */ + SpaceBeforeScript, + PrimeRaisePercent, + PrimeRaiseComposedPercent, + PrimeShiftUp, + PrimeShiftUpCramped, + PrimeBaselineDropMax, + PrimeSpaceAfter, + PrimeWidthPercent, + SkewedDelimiterTolerance, + AccentTopShiftUp, + AccentBottomShiftDown, + AccentTopOvershoot, + AccentBottomOvershoot, + AccentSuperscriptDrop, + AccentSuperscriptPercent, + AccentExtendMargin, + FlattenedAccentTopShiftUp, + FlattenedAccentBottomShiftDown, + DelimiterPercent, + DelimiterShortfall, + /* done */ + math_parameter_last_code, +} math_parameter_codes; + +# define math_parameter_last_font_code NoLimitSupFactor +# define math_parameter_first_engine_code SpaceBeforeScript + +typedef enum display_skip_modes { + display_skip_default, + display_skip_always, + display_skip_non_zero, + display_skip_ignore, +} display_skip_modes; + +typedef enum math_skip_modes { + math_skip_surround_when_zero = 0, /*tex obey mathsurround when zero glue */ + math_skip_always_left = 1, + math_skip_always_right = 2, + math_skip_always_both = 3, + math_skip_always_surround = 4, /*tex ignore, obey marthsurround */ + math_skip_ignore = 5, /*tex all spacing disabled */ + math_skip_only_when_skip = 6, +} math_skip_modes; + +/*tex All kind of helpers: */ + +# define math_use_current_family_code math_component_variable_code +# define fam_par_in_range(fam) ((fam >= 0) && (cur_fam_par < max_n_of_math_families)) +# define cur_fam_par_in_range ((cur_fam_par >= 0) && (cur_fam_par < max_n_of_math_families)) + +extern halfword tex_size_of_style (halfword style); + +extern halfword tex_to_math_spacing_parameter (halfword left, halfword right); +extern halfword tex_to_math_rules_parameter (halfword left, halfword right); + +extern halfword tex_math_style_variant (halfword style, halfword param); + +extern void tex_def_math_parameter (int style, int param, scaled value, int level, int indirect); +extern scaled tex_get_math_parameter (int style, int param, halfword *type); +extern int tex_has_math_parameter (int style, int param); +extern scaled tex_get_math_parameter_checked (int style, int param); +extern scaled tex_get_math_parameter_default (int style, int param, scaled dflt); + +extern scaled tex_get_math_x_parameter (int style, int param); +extern scaled tex_get_math_x_parameter_checked (int style, int param); +extern scaled tex_get_math_x_parameter_default (int style, int param, scaled dflt); + +extern scaled tex_get_math_y_parameter (int style, int param); +extern scaled tex_get_math_y_parameter_checked (int style, int param); +extern scaled tex_get_math_y_parameter_default (int style, int paramm, scaled dflt); + +extern scaled tex_get_font_math_parameter (int font, int size, int param); +extern scaled tex_get_font_math_x_parameter (int font, int size, int param); +extern scaled tex_get_font_math_y_parameter (int font, int size, int param); + +extern void tex_fixup_math_parameters (int fam, int size, int fnt, int level); +extern void tex_finalize_math_parameters (void); +extern scaled tex_get_math_quad_style (int style); +extern scaled tex_math_axis_size (int size); +extern scaled tex_get_math_quad_size (int size); +extern scaled tex_get_math_quad_size_scaled (int size); + +extern void tex_initialize_math (void); +extern void tex_initialize_math_spacing (void); + +extern void tex_set_display_styles (halfword code, halfword value, halfword level, halfword indirect); +extern void tex_set_text_styles (halfword code, halfword value, halfword level, halfword indirect); +extern void tex_set_script_styles (halfword code, halfword value, halfword level, halfword indirect); +extern void tex_set_script_script_styles (halfword code, halfword value, halfword level, halfword indirect); +extern void tex_set_all_styles (halfword code, halfword value, halfword level, halfword indirect); +extern void tex_set_split_styles (halfword code, halfword value, halfword level, halfword indirect); +extern void tex_set_uncramped_styles (halfword code, halfword value, halfword level, halfword indirect); +extern void tex_set_cramped_styles (halfword code, halfword value, halfword level, halfword indirect); +extern void tex_reset_all_styles (halfword level); + +extern void tex_dump_math_data (dumpstream f); +extern void tex_undump_math_data (dumpstream f); +extern void tex_unsave_math_data (int level); + +extern void tex_math_copy_char_data (halfword target, halfword source, int wipelist); + +extern int tex_show_math_node (halfword n, int threshold, int max); +extern void tex_flush_math (void); +extern int tex_is_math_disc (halfword n); +extern halfword tex_math_make_disc (halfword n); +extern int tex_in_main_math_style (halfword style); + +extern halfword tex_new_sub_box (halfword n); +// halfword tex_math_vcenter_group (halfword n); +extern int tex_fam_fnt (int fam, int size); +extern void tex_def_fam_fnt (int fam, int size, int fnt, int level); +extern void tex_scan_extdef_del_code (int level, int extcode); +extern void tex_scan_extdef_math_code (int level, int extcode); +extern int tex_current_math_style (void); +extern int tex_current_math_main_style (void); +extern int tex_scan_math_code_val (halfword code, mathcodeval *mval, mathdictval *dval); +extern int tex_scan_math_cmd_val (mathcodeval *mval, mathdictval *dval); + +extern halfword tex_scan_math_spec (int optional_equal); +extern halfword tex_new_math_spec (mathcodeval m, quarterword code); +extern halfword tex_new_math_dict_spec (mathdictval d, mathcodeval m, quarterword code); +extern mathcodeval tex_get_math_spec (halfword s); +extern mathdictval tex_get_math_dict (halfword s); +extern void tex_run_math_math_spec (void); +extern void tex_run_text_math_spec (void); + +extern void tex_set_default_math_codes (void); + +/*tex The runners in maincontrol: */ + +extern void tex_run_math_left_brace (void); +extern void tex_run_math_math_component (void); +extern void tex_run_math_modifier (void); +extern void tex_run_math_radical (void); +extern void tex_run_math_accent (void); +extern void tex_run_math_style (void); +extern void tex_run_math_choice (void); +extern void tex_run_math_script (void); +extern void tex_run_math_fraction (void); +extern void tex_run_math_fence (void); +extern void tex_run_math_initialize (void); +extern void tex_run_math_letter (void); +extern void tex_run_math_math_char_number (void); +extern void tex_run_text_math_char_number (void); +extern void tex_run_math_char_number (void); +extern void tex_run_math_delimiter_number (void); +// void tex_run_math_math_char_given (void); +// void tex_run_text_math_char_given (void); +// void tex_run_math_math_char_xgiven (void); +// void tex_run_text_math_char_xgiven (void); +extern void tex_run_math_equation_number (void); +extern void tex_run_math_shift (void); +extern void tex_run_math_italic_correction (void); + +extern void tex_finish_math_group (void); +extern void tex_finish_math_choice (void); +extern void tex_finish_math_fraction (void); +extern void tex_finish_math_operator (void); +extern void tex_finish_display_alignment (halfword head, halfword tail, halfword prevdepth); + +typedef enum math_control_codes { + math_control_use_font_control = 0x000001, /* use the font flag, maybe for traditional, might go */ + math_control_over_rule = 0x000002, + math_control_under_rule = 0x000004, + math_control_radical_rule = 0x000008, + math_control_fraction_rule = 0x000010, + math_control_accent_skew_half = 0x000020, + math_control_accent_skew_apply = 0x000040, + math_control_apply_ordinary_kern_pair = 0x000080, + math_control_apply_vertical_italic_kern = 0x000100, + math_control_apply_ordinary_italic_kern = 0x000200, + math_control_apply_char_italic_kern = 0x000400, /* traditional */ + math_control_rebox_char_italic_kern = 0x000800, /* traditional */ + math_control_apply_boxed_italic_kern = 0x001000, + math_control_staircase_kern = 0x002000, + math_control_apply_text_italic_kern = 0x004000, + math_control_check_text_italic_kern = 0x008000, + math_control_check_space_italic_kern = 0x010000, + math_control_apply_script_italic_kern = 0x020000, + math_control_analyze_script_nucleus_char = 0x040000, + math_control_analyze_script_nucleus_list = 0x080000, + math_control_analyze_script_nucleus_box = 0x100000, +} math_control_codes; + +/*tex This is what we use for \OPENTYPE\ in \CONTEXT: */ + +# define assumed_math_control ( \ + math_control_over_rule \ + | math_control_under_rule \ + | math_control_radical_rule \ + | math_control_fraction_rule \ + | math_control_accent_skew_half \ + | math_control_accent_skew_apply \ + | math_control_apply_ordinary_kern_pair \ + | math_control_apply_vertical_italic_kern \ + | math_control_apply_ordinary_italic_kern \ + | math_control_apply_boxed_italic_kern \ + | math_control_staircase_kern \ + | math_control_apply_text_italic_kern \ + | math_control_check_text_italic_kern \ + | math_control_apply_script_italic_kern \ + | math_control_analyze_script_nucleus_char \ + | math_control_analyze_script_nucleus_list \ + | math_control_analyze_script_nucleus_box \ +) + +/*tex + In the process of improving the math engine several intermediate features have been + added that were removed later. They were mostly an aid for testing but in the end it + made no sense to keep them around. To some extend they could enforce compatibility + but with most fonts being opentype now that is no longer feasible. + + \starttyping + typedef enum math_flatten_codes { + math_flatten_ordinary = 0x01, + math_flatten_binary = 0x02, + math_flatten_relation = 0x04, + math_flatten_punctuation = 0x08, + math_flatten_inner = 0x10, + } math_flatten_codes; + \stoptyping + +*/ + +typedef enum saved_math_items { + saved_math_item_direction = 0, + /* saved_math_item_x_scale = 1, */ /* this was an experiment */ + /* saved_math_item_y_scale = 2, */ /* this was an experiment */ + /* saved_math_n_of_items = 3, */ + saved_math_n_of_items = 1, +} saved_math_items; + +typedef enum saved_equation_number_items { + saved_equation_number_item_location = 0, + saved_equation_number_n_of_items = 1, +} saved_equation_number_items; + +typedef enum saved_choice_items { + saved_choice_item_count = 0, + saved_choice_n_of_items = 1, +} saved_choice_items; + +typedef enum saved_fraction_items { + saved_fraction_item_userstyle = 0, + saved_fraction_item_autostyle = 1, + saved_fraction_item_variant = 2, + saved_fraction_n_of_items = 3, +} saved_fraction_items; + +typedef enum saved_operator_items { + saved_operator_item_variant = 0, + saved_operator_n_of_items = 1, +} saved_operator_items; + +typedef enum saved_math_group_items { + saved_math_group_item_pointer = 0, + saved_math_group_all_class = 1, + saved_math_group_n_of_items = 2, +} saved_math_group_items; + +# endif diff --git a/source/luametatex/source/tex/texmathcodes.c b/source/luametatex/source/tex/texmathcodes.c new file mode 100644 index 000000000..7d80eac19 --- /dev/null +++ b/source/luametatex/source/tex/texmathcodes.c @@ -0,0 +1,347 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + We support the traditional math codes as well as larger ones suitable for \UNICODE\ input and + fonts. + +*/ + +/*tex the |0xFFFFFFFF| is a flag value. */ + +# define MATHCODESTACK 8 +# define MATHCODEDEFAULT 0xFFFFFFFF +# define MATHCODEACTIVE 0xFFFFFFFE + +/*tex Delcodes are also went larger. */ + +# define DELCODESTACK 4 +# define DELCODEDEFAULT 0xFFFFFFFF + +typedef struct mathcode_state_info { + sa_tree mathcode_head; + sa_tree delcode_head; +} mathcode_state_info; + +static mathcode_state_info lmt_mathcode_state = { + .mathcode_head = NULL, + .delcode_head = NULL, +}; + +/*tex + + We now get lots of helpers for definitions and printing. The storage model that we use is + different because we can have many more so we need to be sparse. Therefore we use trees. + +*/ + +# define print_hex_digit_one(A) do { \ + if ((A) >= 10) { \ + tex_print_char('A' + (A) - 10); \ + } else { \ + tex_print_char('0' + (A)); \ + } \ +} while (0) + +# define print_hex_digit_two(A) do { \ + print_hex_digit_one((A) / 16); \ + print_hex_digit_one((A) % 16); \ +} while (0) + +# define print_hex_digit_four(A) do { \ + print_hex_digit_two((A) / 256); \ + print_hex_digit_two((A) % 256); \ +} while (0) + +# define print_hex_digit_six(A) do { \ + print_hex_digit_two( (A) / 65536); \ + print_hex_digit_two(((A) % 65536) / 256); \ + print_hex_digit_two( (A) % 256); \ +} while (0) + +/* 0xFFFFF is plenty for math */ + +mathcodeval tex_mathchar_from_integer(int value, int extcode) +{ + mathcodeval mval; + if (extcode == tex_mathcode) { + mval.class_value = math_old_class_part(value); + mval.family_value = math_old_family_part(value); + mval.character_value = math_old_character_part(value); + } else { + mval.class_value = math_class_part(value); + mval.family_value = math_family_part(value); + mval.character_value = math_character_part(value); + } + return mval; +} + +mathcodeval tex_mathchar_from_spec(int value) +{ + mathcodeval mval = { 0, 0, 0 }; + if (value) { + mval.class_value = math_spec_class(value); + mval.family_value = math_spec_family(value); + mval.character_value = math_spec_character(value); + } + return mval; +} + +void tex_show_mathcode_value(mathcodeval mval, int extcode) +{ + tex_print_char('"'); + if (extcode == tex_mathcode) { + print_hex_digit_one(math_old_class_mask(mval.class_value)); + print_hex_digit_one(math_old_family_mask(mval.family_value)); + print_hex_digit_two(math_old_character_mask(mval.character_value)); + } else { + print_hex_digit_two(mval.class_value); + tex_print_char('"'); + print_hex_digit_two(mval.family_value); + tex_print_char('"'); + print_hex_digit_six(mval.character_value); + } +} + +static void tex_aux_show_mathcode(int n) +{ + mathcodeval mval = tex_get_math_code(n); + tex_print_str_esc("Umathcode"); + tex_print_int(n); + tex_print_char('='); + tex_show_mathcode_value(mval, umath_mathcode); +} + +static void tex_aux_unsave_mathcode(int level) +{ + if (lmt_mathcode_state.mathcode_head->stack) { + while (lmt_mathcode_state.mathcode_head->sa_stack_ptr > 0 && abs(lmt_mathcode_state.mathcode_head->stack[lmt_mathcode_state.mathcode_head->sa_stack_ptr].level) >= level) { + sa_stack_item item = lmt_mathcode_state.mathcode_head->stack[lmt_mathcode_state.mathcode_head->sa_stack_ptr]; + if (item.level > 0) { + sa_rawset_item_4(lmt_mathcode_state.mathcode_head, item.code, item.value_1); + if (tracing_restores_par > 1) { + tex_begin_diagnostic(); + tex_print_str("{restoring "); + tex_aux_show_mathcode(item.code); + tex_print_char('}'); + tex_end_diagnostic(); + } + } + (lmt_mathcode_state.mathcode_head->sa_stack_ptr)--; + } + } +} + +mathcodeval tex_no_math_code(void) +{ + return (mathcodeval) { 0, 0, 0 }; +} + +void tex_set_math_code(int n, mathcodeval v, int level) +{ + sa_tree_item item; + if (v.class_value == active_math_class_value && v.family_value == 0 && v.character_value == 0) { + item.uint_value = MATHCODEACTIVE; + } else if (v.class_value == 0 && v.family_value == 0) { + /*tex This is rather safe because we don't decide on it. */ + item.uint_value = MATHCODEDEFAULT; + } else { + item.math_code_value.class_value = v.class_value; + item.math_code_value.family_value = v.family_value; + item.math_code_value.character_value = v.character_value; + } + sa_set_item_4(lmt_mathcode_state.mathcode_head, n, item, level); + if (tracing_assigns_par > 1) { + tex_begin_diagnostic(); + tex_print_str("{assigning "); + tex_aux_show_mathcode(n); + tex_print_char('}'); + tex_end_diagnostic(); + } +} + +mathcodeval tex_get_math_code(int n) +{ + sa_tree_item item = sa_get_item_4(lmt_mathcode_state.mathcode_head, n); + mathcodeval m = { 0, 0, 0 }; + if (item.uint_value == MATHCODEDEFAULT) { + m.character_value = n; + } else if (item.uint_value == MATHCODEACTIVE) { + m.class_value = active_math_class_value; + } else if (item.math_code_value.class_value == active_math_class_value) { + m.class_value = active_math_class_value; + m.character_value = n; + } else { + m.class_value = (short) item.math_code_value.class_value; + m.family_value = (short) item.math_code_value.family_value; + m.character_value = item.math_code_value.character_value; + } + return m; +} + +int tex_get_math_code_number(int n) /* should be unsigned */ +{ + mathcodeval d = tex_get_math_code(n); + return math_packed_character(d.class_value, d.family_value, d.character_value); +} + +static void tex_aux_initialize_mathcode(void) +{ + lmt_mathcode_state.mathcode_head = sa_new_tree(MATHCODESTACK, 4, (sa_tree_item) { .uint_value = MATHCODEDEFAULT }); +} + +static void tex_aux_dump_mathcode(dumpstream f) +{ + sa_dump_tree(f, lmt_mathcode_state.mathcode_head); +} + +static void tex_aux_undump_mathcode(dumpstream f) +{ + lmt_mathcode_state.mathcode_head = sa_undump_tree(f); +} + +static void tex_aux_show_delcode(int n) +{ + delcodeval dval = tex_get_del_code(n); + tex_print_str_esc("Udelcode"); + tex_print_int(n); + tex_print_char('='); + if (tex_has_del_code(dval)) { + tex_print_char('"'); + print_hex_digit_two(dval.small.family_value); + print_hex_digit_six(dval.small.character_value); + } else { + tex_print_str("-1"); + } +} + +static void tex_aux_unsave_delcode(int level) +{ + if (lmt_mathcode_state.delcode_head->stack) { + while (lmt_mathcode_state.delcode_head->sa_stack_ptr > 0 && abs(lmt_mathcode_state.delcode_head->stack[lmt_mathcode_state.delcode_head->sa_stack_ptr].level) >= level) { + sa_stack_item item = lmt_mathcode_state.delcode_head->stack[lmt_mathcode_state.delcode_head->sa_stack_ptr]; + if (item.level > 0) { + sa_rawset_item_8(lmt_mathcode_state.delcode_head, item.code, item.value_1, item.value_2); + if (tracing_restores_par > 1) { + tex_begin_diagnostic(); + tex_print_str("{restoring "); + tex_aux_show_delcode(item.code); + tex_print_char('}'); + tex_end_diagnostic(); + } + } + (lmt_mathcode_state.delcode_head->sa_stack_ptr)--; + } + } +} + +void tex_set_del_code(int n, delcodeval v, int level) +{ + sa_tree_item v1, v2; /* seldom all zero */ + v1.math_code_value.class_value = v.small.class_value; + v1.math_code_value.family_value = v.small.family_value; + v1.math_code_value.character_value = v.small.character_value; + v2.math_code_value.class_value = v.large.class_value; + v2.math_code_value.family_value = v.large.family_value; + v2.math_code_value.character_value = v.large.character_value; + /*tex Always global! */ + sa_set_item_8(lmt_mathcode_state.delcode_head, n, v1, v2, level); + if (tracing_assigns_par > 1) { + tex_begin_diagnostic(); + tex_print_str("{assigning "); + tex_aux_show_delcode(n); + tex_print_char('}'); + tex_end_diagnostic(); + } +} + +int tex_has_del_code(delcodeval d) +{ + return d.small.family_value >= 0; +} + +delcodeval tex_no_del_code(void) +{ + return (delcodeval) { { 0, -1, 0 }, { 0, 0, 0} }; +} + +delcodeval tex_get_del_code(int n) +{ + sa_tree_item v2; + sa_tree_item v1 = sa_get_item_8(lmt_mathcode_state.delcode_head, n, &v2); + delcodeval d = { { 0, -1, 0 }, { 0, 0, 0} }; + if (v1.uint_value != DELCODEDEFAULT) { + d.small.class_value = (short) v1.math_code_value.class_value; + d.small.family_value = (short) v1.math_code_value.family_value; + d.small.character_value = v1.math_code_value.character_value; + d.large.class_value = (short) v2.math_code_value.class_value; + d.large.family_value = (short) v2.math_code_value.family_value; + d.large.character_value = v2.math_code_value.character_value; + } + return d; +} + +/*tex This really only works for old-style delcodes! */ + +int tex_get_del_code_number(int n) +{ + delcodeval d = tex_get_del_code(n); + if (tex_has_del_code(d)) { + return ((d.small.family_value * 256 + d.small.character_value) * 4096 + + (d.large.family_value * 256) + d.large.character_value); + } else { + return -1; + } +} + +static void tex_aux_initialize_delcode(void) +{ + lmt_mathcode_state.delcode_head = sa_new_tree(DELCODESTACK, 8, (sa_tree_item) { .uint_value = DELCODEDEFAULT }); +} + +static void tex_aux_dump_delcode(dumpstream f) +{ + sa_dump_tree(f, lmt_mathcode_state.delcode_head); +} + +static void tex_aux_undump_delcode(dumpstream f) +{ + lmt_mathcode_state.delcode_head = sa_undump_tree(f); +} + +void tex_unsave_math_codes(int grouplevel) +{ + tex_aux_unsave_mathcode(grouplevel); + tex_aux_unsave_delcode(grouplevel); +} + +void tex_initialize_math_codes(void) +{ + tex_aux_initialize_mathcode(); + tex_aux_initialize_delcode(); + /*tex This might become optional: */ + tex_set_default_math_codes(); + tex_set_del_code('.', (delcodeval) { { 0, 0, 0, }, { 0, 0, 0 } }, level_one); +} + +void tex_free_math_codes(void) +{ + sa_destroy_tree(lmt_mathcode_state.mathcode_head); + sa_destroy_tree(lmt_mathcode_state.delcode_head); +} + +void tex_dump_math_codes(dumpstream f) +{ + tex_aux_dump_mathcode(f); + tex_aux_dump_delcode(f); +} + +void tex_undump_math_codes(dumpstream f) +{ + tex_aux_undump_mathcode(f); + tex_aux_undump_delcode(f); +} diff --git a/source/luametatex/source/tex/texmathcodes.h b/source/luametatex/source/tex/texmathcodes.h new file mode 100644 index 000000000..a45132171 --- /dev/null +++ b/source/luametatex/source/tex/texmathcodes.h @@ -0,0 +1,77 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_MATHCODES_H +# define LMT_MATHCODES_H + +/*tex + We keep this special value which is used in |0x8000| so we have no real problem with 8 being + some other class as well. The 8 here is not really a class. +*/ + +# define active_math_class_value 8 + +typedef enum mathcode_codes { + no_mathcode, + tex_mathcode, + umath_mathcode, + /* umathnum_mathcode, */ + mathspec_mathcode +} mathcode_codes; + +typedef struct mathcodeval { + short class_value; + short family_value; + int character_value; +} mathcodeval; + +typedef struct mathdictval { + unsigned short properties; // 1=char 2=open 4=close 8=middle 16=middle==class + unsigned short group; + unsigned int index; +} mathdictval; + +# undef small /* defined in some microsoft library */ + +/*tex + Until we drop 8 bit font support we keep the small and large distinction but it might + go away some day as it wastes memory. +*/ + +typedef struct delcodeval { + mathcodeval small; + mathcodeval large; +} delcodeval; + +typedef struct mathspecval { + mathcodeval code; + mathdictval dict; +} mathspecval; + +extern void tex_set_math_code (int n, mathcodeval v, int gl); +extern mathcodeval tex_get_math_code (int n); +extern int tex_get_math_code_number (int n); +extern mathcodeval tex_no_math_code (void); + +extern void tex_set_del_code (int n, delcodeval v, int gl); +extern delcodeval tex_get_del_code (int n); +extern int tex_get_del_code_number (int n); +extern int tex_has_del_code (delcodeval v); +extern delcodeval tex_no_del_code (void); + +extern mathcodeval tex_scan_mathchar (int extcode); +extern mathdictval tex_scan_mathdict (void); +extern mathcodeval tex_scan_delimiter_as_mathchar (int extcode); +extern mathcodeval tex_mathchar_from_integer (int value, int extcode); +extern mathcodeval tex_mathchar_from_spec (int value); + +extern void tex_show_mathcode_value (mathcodeval d, int extcode); +extern void tex_unsave_math_codes (int grouplevel); +extern void tex_initialize_math_codes (void); +extern void tex_dump_math_codes (dumpstream f); +extern void tex_undump_math_codes (dumpstream f); + +extern void tex_free_math_codes (void); + +# endif diff --git a/source/luametatex/source/tex/texmlist.c b/source/luametatex/source/tex/texmlist.c new file mode 100644 index 000000000..ac51d2c35 --- /dev/null +++ b/source/luametatex/source/tex/texmlist.c @@ -0,0 +1,7668 @@ +/* + See license.txt in the root of this project. +*/ + +/*tex + + The code here has to deal with traditional \TEX\ fonts as well as the more modern \OPENTYPE\ + fonts. In \TEX\ fonts the spacing between and construction of glyphs is determined by font + parameters, kerns, italic correction and linked lists of glyphs that make extensibles. In + \OPENTYPE\ fonts kerns are replaced by so called staircase kerns, italics are used differently + and extensibles are made from other glyphs, as in traditional \TEX\ fonts. + + In traditional \TEX\ the italic correction is added to the width of the glyph. This is part of + the engine design and this is also reflected in the widtn metric of the font. In \OPENTYPE\ math + this is different. There the italic correction had more explicit usage. The 1.7 spec says: + + \startitemize + + \startitem + {\em italic correction:} When a run of slanted characters is followed by a straight + character (such as an operator or a delimiter), the italics correction of the last glyph is + added to its advance width. + + When positioning limits on an N-ary operator (e.g., integral sign), the horizontal position + of the upper limit is moved to the right by half the italics correction, while the position + of the lower limit is moved to the left by the same distance. Comment HH: this is is only + true when we have a real italic integral where the top part stick out right and the bottom + part left. So, that's only 'one' n-ary operator. + + When positioning superscripts and subscripts, their default horizontal positions are also + different by the amount of the italics correction of the preceding glyph. + \stopitem + + \startitem + {\em math kerning:} Set the default horizontal position for the superscript as shifted + relative to the position of the subscript by the italics correction of the base glyph. + \stopitem + + \stopitemize + + Before this was specified we had to gamble a bit and assume that cambria was the font + benchmark and trust our eyes (and msword) for the logic. I must admit that I have been + fighting these italics in fonts (and the heuristics that \LUAMETATEX\ provided) right from the + start (for instance by using \LUA\ based postprocessing) but by now we know more and have more + fonts to test with. More fonts are handy because not all fonts are alike when it comes to + italics. Axis are another area of concern, as it looks like \OPENTYPE\ math fonts often already + apply that shift. + + Now, one can think of cheating. Say that we add the italic correction to the widths and then + make the italic correction zero for all these shapes except those that have a slope, in which + case we negate tot correction. Unfortunately that doesn't work well because the traditional + code path {\em assumes} the too narrow shape: it doesn't compensate subscripts. Also, keep in + mind that in for instance Pagella (etc), at least in the pre 2022 versions, even upright + characters have italic corrections! It looks like they are used as kerns in a way similar to + staircase kerns. So, here, when we add the correction we incorrectly flag it as italic but we + have no way to distinguish them from regular kerns. When the gyre fonts never get corrected + we're stick with the two code paths forever. + + Blocking italic correction via the glyph options is supported (not yet for other constructs + but that might happen). All this italic stuff makes the source a bit messy. Maybe the other + things will be controlled via a noad option. + + The above description is no longer accurate but we keep it for historic reasons. We now + follow a reverse approach: we just assume \OPENTYPE\ but also expect the needed features to + be enabled explicitly. That means that for instance \quote {out of the box} the engine will + not apply italic correction. + + In 2021-2022 Mikael Sundqvist and I (Hans Hagen) spent about a year investigating how we could + improve the rendering of math. Quite a bit of research went into that and we decided to get rid + of some old font code and concentrate on the \OPENTYPE\ fonts, although we found some flaws and + inconsistencies in them. The solution was to assume a Cambria alike font and adapt the other + fonts runtime using so called goodie files that are part of the \CONTEXT\ font loading code. + That way we could enforce some consistency and compentate for e.g. problematic dimensions like + widths and italic corrections as well as bad top accents and values of font parameters that + interfered with what we had in mind. We added plenty extra ones as well as extra kern options. + Combined with a more rich model for inter atom spacing we could improve the look and feel a lot. + + When the engine got updated a couple of options came and went. An example of this is delimiter + options. For instance we tracked if a delimiter was actually changes and could then react to that + wrt italic corrections. In the new approach we no longer handle that because assume decent fonts + or at least tweaked ones (read: \CONTEXT\ font goodies being applied). So in the end those extra + delimiter options got removed or were just handled by the noad options. The code is still in the + repository. Also some options related to tracing injected kerns became defaults because we had + them always turned on. + +*/ + +# include "luametatex.h" + +/*tex + + We have some more function calls and local so we have replace |cur_style| by |style| where that + makes sense. The same is true for some local variables. This makes it a bit easier to + distinguish with the more global variables stored in state structures. + + It's a stepwise process ... occasionally I visit this file and change the short variable names + to more verbose. There is also relatively new scaling code that needs checking. + +*/ + +static void tex_aux_append_hkern_to_box_list (halfword q, scaled delta, halfword subtype, const char *trace); +static void tex_aux_prepend_hkern_to_box_list(halfword q, scaled delta, halfword subtype, const char *trace); + +/*tex + + \LUAMETATEX\ makes a bunch of extensions cf.\ the |MATH| table in \OPENTYPE, but some of the + |MathConstants| values have no matching usage in \LUAMETATEX\ right now. + + \startitemize + + \startitem + |ScriptPercentScaleDown| |ScriptScriptPercentScaleDown|: These should be handled by the + macro package, on the engine side there are three separate fonts. + \stopitem + + \startitem + |DelimitedSubFormulaMinHeight|: This is perhaps related to word's natural math input? + We have no idea what to do about it. + \stopitem + + \startitem + |MathLeading|: \LUAMETATEX\ does not currently handle multi line displays, and the + parameter does not seem to make much sense elsewhere. + \stopitem + + \startitem + |FlattenedAccentBaseHeight|: This is based on the |flac| |GSUB| feature. It would not + be hard to support that, but proper math accent placements cf.\ |MATH| needs support + for |MathTopAccentAttachment| table to be implemented first. + \stopitem + + \stopitemize + + Old-style fonts do not define the |radical_rule|. This allows |make_radical| to select the + backward compatibility code, but it also means that we can't raise an error here. + + Occasionally I visit this file and make some variables more verbose. + + In the meantime some experimental and in the meantime obsolete code has been removed but it can + be found in the development repository if really needed. It makes no sense to keep code around + that has been replaced or improved otherwise. Some code we keep commented for a while before it + is flushed out. + +*/ + +typedef struct scriptdata { + halfword node; + halfword fnt; + halfword chr; + halfword box; + scaled kern; + scaled slack; + int shifted; + int padding; +} scriptdata; + +typedef struct delimiterextremes { + scaled tfont; + scaled tchar; + scaled bfont; + scaled bchar; + scaled height; + scaled depth; +} delimiterextremes; + +typedef enum limits_modes { + limits_unknown_mode, + limits_vertical_mode, // limits + limits_horizontal_mode, // no limits +} limits_modes; + +inline void tex_math_wipe_kerns(kernset *kerns) { + if (kerns) { + kerns->topright = 0; + kerns->topleft = 0; + kerns->bottomright = 0; + kerns->bottomleft = 0; + kerns->height = 0; + kerns->depth = 0; + kerns->toptotal = 0; + kerns->bottomtotal = 0; + } +} + +inline void tex_math_copy_kerns(kernset *kerns, kernset *parent) { + if (kerns && parent) { + kerns->topright = parent->topright; + kerns->topleft = parent->topleft; + kerns->bottomright = parent->bottomright; + kerns->bottomleft = parent->bottomleft; + kerns->height = parent->height; + kerns->depth = parent->depth; + kerns->toptotal = parent->toptotal; + kerns->bottomtotal = parent->bottomtotal; + } +} + +/*tex + + When the style changes, the following piece of program computes associated information: + +*/ + +inline static halfword tex_aux_set_style_to_size(halfword style) +{ + switch (style) { + case script_style: + case cramped_script_style: + return script_size; + case script_script_style: + case cramped_script_script_style: + return script_script_size; + default: + return text_size; + } +} + +inline static void tex_aux_set_current_math_scale(halfword scale) +{ + glyph_scale_par = scale; + lmt_math_state.scale = glyph_scale_par; +} + +inline static void tex_aux_set_current_math_size(halfword style) +{ + lmt_math_state.size = tex_aux_set_style_to_size(style); +} + +inline static void tex_aux_make_style(halfword current, halfword *current_style, halfword *current_mu) +{ + halfword style = node_subtype(current); + switch (style) { + case scaled_math_style: + tex_aux_set_current_math_scale(style_scale(current)); + break; + default: + if (is_valid_math_style(style)) { + if (current_style) { + *current_style = style; + } + tex_aux_set_current_math_size(style); + if (current_mu) { + *current_mu = scaledround(tex_get_math_quad_style(style) / 18.0); + } + } + break; + } +} + +void tex_set_math_text_font(halfword style, int usetextfont) +{ + halfword size = tex_aux_set_style_to_size(style); + halfword font = tex_fam_fnt(cur_fam_par, size); + halfword scale = tex_get_math_font_scale(font, size); + switch (usetextfont) { + case math_atom_text_font_option: + scale = scaledround((double) scale * lmt_font_state.fonts[font]->size / lmt_font_state.fonts[cur_font_par]->size); + break; + case math_atom_math_font_option: + update_tex_font(0, font); + break; + } + update_tex_glyph_scale(scale); +} + +static halfword tex_aux_math_penalty_what(int pre, halfword cls, halfword pre_code, halfword post_code) +{ + halfword value = count_parameter(pre ? (pre_code + cls) : (post_code + cls)); + if (value == infinite_penalty) { + unsigned parent = (unsigned) count_parameter(first_math_parent_code + cls); + cls = pre ? ((parent >> 8) & 0xFF) : (parent & 0xFF); + if (! valid_math_class_code(cls)) { + return infinite_penalty; + } + value = count_parameter(pre ? (pre_code + cls) : (post_code + cls)); + } + return value; +} + +static halfword tex_aux_math_penalty(int main_style, int pre, halfword cls) +{ + switch (main_style) { + case display_style: + case cramped_display_style: + { + halfword value = tex_aux_math_penalty_what(pre, cls, first_math_display_pre_penalty_code, first_math_display_post_penalty_code); + if (value != infinite_penalty) { + return value; + } else { + break; + } + } + } + return tex_aux_math_penalty_what(pre, cls, first_math_pre_penalty_code, first_math_post_penalty_code); +} + +inline static scaled limited_scaled(long l) { + if (l > max_dimen) { + return max_dimen; + } else if (l < -max_dimen) { + return -max_dimen; + } else { + return (scaled) l; + } +} + +inline static scaled limited_rounded(double d) { + long l = scaledround(d); + if (l > max_dimen) { + return max_dimen; + } else if (l < -max_dimen) { + return -max_dimen; + } else { + return (scaled) l; + } +} + +// inline static int tex_aux_has_opentype_metrics(halfword f) +// { +// return font_math_parameter_count(f) > 0 && ! font_oldmath(f); +// } + +inline static int tex_aux_math_engine_control(halfword fnt, halfword chr) +{ + if (fnt && (math_font_control_par & math_control_use_font_control) == math_control_use_font_control) { + /*tex + This is only for old fonts and it might go away eventually. Not all control options relate to + a font. + */ + return (font_mathcontrol(fnt) & chr) == chr; + } + return (math_font_control_par & chr) == chr; +} + +/* + + Todo: When we pass explicit dimensions (keyword driven) we use a different helper so that, if + needed we can add debug messages. These values {\em are} scaled according to the glyph scaling + so basically they are relative measures. Maybe we need an extra parameter to control this. + +*/ + +inline static scaled tex_aux_math_glyph_scale(scaled v) +{ + return v ? scaledround(0.001 * glyph_scale_par * v) : 0; +} + +inline static scaled tex_aux_math_x_scaled(scaled v, int style) +{ + scaled scale = tex_get_math_parameter(style, math_parameter_x_scale, NULL); + return v ? limited_rounded(0.000000001 * glyph_scale_par * glyph_x_scale_par * v * scale) : 0; +} + +inline static scaled tex_aux_math_given_x_scaled(scaled v) +{ + return v; +} + +/* used for math_operator_size */ + +inline static scaled tex_aux_math_y_scaled(scaled v, int style) +{ + scaled scale = tex_get_math_parameter(style, math_parameter_y_scale, NULL); + return v ? limited_rounded(0.000000001 * glyph_scale_par * glyph_y_scale_par * v * scale) : 0; +} + +inline static scaled tex_aux_math_given_y_scaled(scaled v) +{ + return v; +} + +inline static scaled tex_aux_math_axis(halfword size) +{ + scaled a = tex_math_axis_size(size); /* already scaled to size and x_scale */ + return a ? limited_rounded(0.000001 * glyph_scale_par * glyph_y_scale_par * a) : 0; +} + +inline static scaled tex_aux_math_x_size_scaled(halfword f, scaled v, halfword size) +{ + return v ? limited_rounded(0.000000001 * tex_get_math_font_scale(f, size) * glyph_scale_par * glyph_x_scale_par * v) : 0; +} + +inline static scaled tex_aux_math_y_size_scaled(halfword f, scaled v, halfword size) +{ + return v ? limited_rounded(0.000000001 * tex_get_math_font_scale(f, size) * glyph_scale_par * glyph_y_scale_par * v) : 0; +} + +halfword tex_math_font_char_ht(halfword fnt, halfword chr, halfword style) +{ + return tex_aux_math_y_size_scaled(fnt, tex_char_height_from_font(fnt, chr), tex_aux_set_style_to_size(style)); +} + +halfword tex_math_font_char_dp(halfword fnt, halfword chr, halfword style) +{ + return tex_aux_math_y_size_scaled(fnt, tex_char_depth_from_font(fnt, chr), tex_aux_set_style_to_size(style)); +} + +inline static halfword tex_aux_new_math_glyph(halfword fnt, halfword chr, quarterword subtype) { + halfword scale = 1000; + halfword glyph = tex_new_glyph_node(subtype, fnt, tex_get_math_char(fnt, chr, lmt_math_state.size, &scale), null); /* todo: data */; + set_glyph_options(glyph, glyph_options_par); + glyph_scale(glyph) = tex_aux_math_glyph_scale(scale); + glyph_x_scale(glyph) = glyph_x_scale_par; + glyph_y_scale(glyph) = glyph_y_scale_par; + glyph_protected(glyph) = glyph_protected_math_code; + return glyph; +} + +halfword tex_new_math_glyph(halfword fnt, halfword chr) { + return tex_aux_new_math_glyph(fnt, chr, 0); +} + +static void tex_aux_trace_kerns(halfword kern, const char *what, const char *detail) +{ + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: %s, %s, amount %D]", what, detail, kern_amount(kern), pt_unit); + tex_end_diagnostic(); + } +} + +static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, halfword template, const char *trace) +{ + /*tex Maybe |math_font_kern|, also to prevent expansion. */ + halfword kern = tex_new_kern_node(amount, font_kern_subtype); + tex_attach_attribute_list_copy(kern, template ? template : current); + if (node_next(current)) { + tex_couple_nodes(kern, node_next(current)); + } + tex_couple_nodes(current, kern); + tex_aux_trace_kerns(kern, "adding font kern", trace); + return kern; +} + +static halfword tex_aux_math_insert_italic_kern(halfword current, scaled amount, halfword template, const char *trace) +{ + /*tex Maybe |math_italic_kern|. */ + halfword kern = tex_new_kern_node(amount, italic_kern_subtype); + tex_attach_attribute_list_copy(kern, template ? template : current); + if (node_next(current)) { + tex_couple_nodes(kern, node_next(current)); + } + tex_couple_nodes(current, kern); + tex_aux_trace_kerns(kern, "adding italic kern", trace); + return kern; +} + +static int tex_aux_math_followed_by_italic_kern(halfword current, const char *trace) +{ + if (current) { + halfword next = node_next(current); + if (next && node_type(next) == kern_node && node_subtype(next) == italic_kern_subtype) { + tex_aux_trace_kerns(next, "ignoring italic kern", trace); + return 1; + } + } + return 0; +} + +static inline int tex_aux_checked_left_kern_fnt_chr(halfword fnt, halfword chr, halfword state, halfword subtype) +{ + halfword top = 0; + halfword bot = 0; + halfword hastop = (state & prime_script_state) || (state & post_super_script_state); + halfword hasbot = state & post_sub_script_state; + if (hastop && tex_math_has_class_option(subtype, left_top_kern_class_option)) { + top = tex_char_top_left_kern_from_font(fnt, chr); + } + if (hasbot && tex_math_has_class_option(subtype, left_bottom_kern_class_option)) { + bot = tex_char_bottom_left_kern_from_font(fnt, chr); + } + if (hastop && hasbot) { + return top > bot ? top : bot; + } else if (hastop) { + return top; + } else { + return bot; + } +} + +static inline int tex_aux_checked_left_kern(halfword list, halfword state, halfword subtype) +{ + if (list && node_type(list) == glyph_node) { + return tex_aux_checked_left_kern_fnt_chr(glyph_font(list), glyph_character(list), state, subtype); + } else { + return 0; + } +} + +static inline int tex_aux_checked_right_kern_fnt_chr(halfword fnt, halfword chr, halfword state, halfword subtype) +{ + halfword top = 0; + halfword bot = 0; + halfword hastop = state & pre_super_script_state; + halfword hasbot = state & pre_sub_script_state; + if (hastop && tex_math_has_class_option(subtype, right_top_kern_class_option)) { + top = tex_char_top_right_kern_from_font(fnt, chr); + } + if (hasbot && tex_math_has_class_option(subtype, right_bottom_kern_class_option)) { + bot = tex_char_bottom_right_kern_from_font(fnt, chr); + } + if (hastop && hasbot) { + return top < bot ? bot : top; + } else if (hastop) { + return top; + } else { + return bot; + } +} + +static inline int tex_aux_checked_right_kern(halfword list, halfword state, halfword subtype) +{ + if (list && node_type(list) == glyph_node) { + return tex_aux_checked_right_kern_fnt_chr(glyph_font(list), glyph_character(list), state, subtype); + } else { + return 0; + } +} + +/*tex We no longer need this one: + + \starttyping + static halfword tex_aux_math_remove_italic_kern(halfword head, scaled *italic, const char *trace) + { + halfword tail = tex_tail_of_node_list(box_list(head)); + if (tail && node_type(tail) == kern_node && node_subtype(tail) == italic_kern_subtype && kern_amount(tail) == *italic) { + tex_aux_trace_kerns(tail, "removing italic kern", trace); + if (head == tail) { + head = null; + } else { + head = node_prev(tail); + node_next(node_prev(tail)) = null; + } + tex_flush_node(tail); + *italic = 0; + } + return head; + } + \starttyping + +*/ + +/*tex We no longer need this one: + + \starttyping + static void tex_aux_normalize_delimiters(halfword l, halfword r) + { + if (box_width(l) == null_delimiter_space_par) { + box_height(l) = box_height(r); + box_depth(l) = box_depth(r); + box_shift_amount(l) = box_shift_amount(r); + } else if (box_width(r) == null_delimiter_space_par) { + box_height(r) = box_height(l); + box_depth(r) = box_depth(l); + box_shift_amount(r) = box_shift_amount(l); + } + } + \starttyping + +*/ + +static scaled tex_aux_check_rule_thickness(halfword target, int size, halfword *fam, halfword control, halfword param) +{ + /* if (math_rule_thickness_mode_par > 0) { */ + halfword family = noad_family(target); + if (family != unused_math_family) { + halfword font = tex_fam_fnt(family, size); + if (tex_aux_math_engine_control(font, control)) { + scaled thickness = tex_get_font_math_parameter(font, size, param); + if (thickness != undefined_math_parameter) { + *fam = family; + return thickness; + } + } + } + /* } */ + return undefined_math_parameter; +} + +/*tex Fake character */ + +static halfword tex_aux_fake_nucleus(quarterword cls) +{ + halfword n = tex_new_node(simple_noad, cls); + halfword q = tex_new_node(math_char_node, 0); + set_noad_classes(n, cls); + noad_nucleus(n) = q; + return n; +} + +/*tex For tracing purposes we add a kern instead of just adapting the width. */ + +static void tex_aux_fake_delimiter(halfword result) +{ + halfword amount = tex_aux_math_given_x_scaled(null_delimiter_space_par); + if (amount) { + box_width(result) = amount; + box_list(result) = tex_new_kern_node(amount, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(box_list(result), result); + } +} + +/*tex + A variant on a suggestion on the list based on analysis by Ulrik Vieth it in the mean + adapted. We keep these 500 and 2 because then we can use similar values. +*/ + +static scaled tex_aux_get_delimiter_height(scaled height, scaled depth, int axis, int size, int style) +{ + scaled delta1 = height + depth; + scaled delta2 = depth; + scaled delta3 = 0; + halfword percent = tex_get_math_parameter_default(style, math_parameter_delimiter_percent, 0); + scaled shortfall = tex_get_math_y_parameter_default(style, math_parameter_delimiter_shortfall, 0); + if (axis) { + delta2 += tex_aux_math_axis(size); + } + delta1 -= delta2; + if (delta2 > delta1) { + /*tex |delta1| is max distance from axis */ + delta1 = delta2; + } + delta3 = scaledround((delta1 / 500.0) * delimiter_factor_par * (percent / 100.0)); + delta2 = 2 * delta1 - delimiter_shortfall_par - shortfall; + return (delta3 < delta2) ? delta2 : delta3; +} + +/*tex + + In order to convert mlists to hlists, i.e., noads to nodes, we need several subroutines that + are conveniently dealt with now. + + Let us first introduce the macros that make it easy to get at the parameters and other font + information. A size code, which is a multiple of 256, is added to a family number to get an + index into the table of internal font numbers for each combination of family and size. (Be + alert: size codes get larger as the type gets smaller.) In the meantime we use different + maxima and packing as in \LUATEX. + +*/ + +static const char *tex_aux_math_size_string(int s) +{ + switch (s) { + case script_script_size: return "scriptscriptfont"; + case script_size: return "scriptfont"; + default: return "textfont"; + } +} + +/*tex Here is a simple routine that creates a flat copy of a nucleus. */ + +static halfword tex_aux_math_clone(halfword n) +{ + if (n) { + halfword result = tex_new_node(node_type(n), 0); + tex_attach_attribute_list_copy(result, n); + tex_math_copy_char_data(result, n, 0); + return result; + } else { + return null; + } +} + +/*tex + A helper used in void or phantom situations. We replace the content by a rule so that we still + have some content (handy for tracing). +*/ + +static halfword tex_aux_make_list_phantom(halfword source, int nowidth, halfword att) +{ + halfword target = null; + switch (node_type(source)) { + case hlist_node: + target = tex_new_node(hlist_node, node_subtype(source)); + break; + case vlist_node: + target = tex_new_node(vlist_node, node_subtype(source)); + break; + } + if (target) { + halfword rule = tex_new_rule_node(empty_rule_subtype); + tex_attach_attribute_list_attribute(target, att); + tex_attach_attribute_list_attribute(rule, att); + rule_width(rule) = nowidth ? 0 : box_width(source); + rule_height(rule) = box_height(source); + rule_depth(rule) = box_depth(source); + box_dir(target) = dir_lefttoright ; + box_height(target) = rule_height(rule); + box_depth(target) = rule_depth(rule); + box_width(target) = rule_width(rule); + box_shift_amount(target) = box_shift_amount(source); + box_list(target) = rule; + tex_flush_node_list(source); + return target; + } else { + return source; + } +} + +/*tex + + Here is a function that returns a pointer to a rule node having a given thickness |t|. The rule + will extend horizontally to the boundary of the vlist that eventually contains it. + +*/ + +static halfword tex_aux_fraction_rule(scaled width, scaled height, halfword att, quarterword ruletype, halfword size, halfword fam) +{ + halfword rule = null; + int callback_id = lmt_callback_defined(math_rule_callback); + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "ddddN->N", math_rules_mode_par ? ruletype : normal_rule_subtype, tex_fam_fnt(fam, size), width, height, att, &rule); + if (rule && node_type(rule) != hlist_node) { + rule = tex_hpack(rule, 0, packing_additional, direction_unknown, holding_none_option); + node_subtype(rule) = math_rule_list; + tex_attach_attribute_list_attribute(rule, att); + } + } + if (! rule) { + if (math_rules_mode_par) { + rule = tex_new_rule_node(ruletype); + rule_data(rule) = tex_fam_fnt(fam, size); + } else { + rule = tex_new_rule_node(normal_rule_subtype); + } + rule_height(rule) = height; + rule_depth(rule) = 0; + tex_attach_attribute_list_attribute(rule, att); + } + return rule; +} + +/*tex + + The |overbar| function returns a pointer to a vlist box that consists of a given box |b|, above + which has been placed a kern of height |k| under a fraction rule of thickness |t| under + additional space of height |ht|. + +*/ + +static halfword tex_aux_overbar(halfword box, scaled gap, scaled height, scaled krn, halfword att, quarterword index, halfword size, halfword fam) +{ + halfword rule = tex_aux_fraction_rule(box_width(box), height, att, index, size, fam); + if (gap) { + halfword kern = tex_new_kern_node(gap, vertical_math_kern_subtype); + tex_attach_attribute_list_attribute(kern, att); + tex_couple_nodes(kern, box); + tex_couple_nodes(rule, kern); + } else { + tex_couple_nodes(rule, box); + } + if (krn) { + halfword kern = tex_new_kern_node(krn, vertical_math_kern_subtype); + tex_attach_attribute_list_attribute(kern, att); + tex_couple_nodes(kern, rule); + rule = kern; + } + rule = tex_vpack(rule, 0, packing_additional, max_dimen, (singleword) math_direction_par, holding_none_option); + tex_attach_attribute_list_attribute(rule, att); + return rule; +} + +static halfword tex_aux_underbar(halfword box, scaled gap, scaled height, scaled krn, halfword att, quarterword index, halfword size, halfword fam) +{ + halfword rule = tex_aux_fraction_rule(box_width(box), height, att, index, size, fam); + if (gap) { + halfword kern = tex_new_kern_node(gap, vertical_math_kern_subtype); + tex_attach_attribute_list_attribute(kern, att); + tex_couple_nodes(box, kern); + tex_couple_nodes(kern, rule); + } else { + tex_couple_nodes(box, rule); + } + if (krn) { + halfword kern = tex_new_kern_node(krn, vertical_math_kern_subtype); + tex_attach_attribute_list_attribute(kern, att); + tex_couple_nodes(rule, kern); + } + rule = tex_vpack(box, 0, packing_additional, max_dimen, (singleword) math_direction_par, holding_none_option); + tex_attach_attribute_list_attribute(rule, att); + /* */ + box_depth(rule) = box_total(rule) + krn - box_height(box); + box_height(rule) = box_height(box); + /* */ + return rule; +} + +/*tex + + Here is a subroutine that creates a new box, whose list contains a single character, and whose + width includes the italic correction for that character. The height or depth of the box will be + negative, if the height or depth of the character is negative. Thus, this routine may deliver a + slightly different result than |hpack| would produce. + + The oldmath font flag can be used for cases where we pass a new school math constants (aka + parameters) table but have a (virtual) font assembled that uses old school type one fonts. In + that case we have a diffeent code path for: + + \startitemize + \startitem rule thickness \stopitem + \startitem accent skew \stopitem + \startitem italic correction (normal width assumes it to be added) \stopitem + \startitem kerning \stopitem + \startitem delimiter construction \stopitem + \startitem accent placement \stopitem + \stopitemize + + In the traditional case an italic kern is always added and the |ic| variable is then passed + to the caller. For a while we had an option to add the correction to the width but now we + have the control options. So these are the options: + + - traditional: insert a kern and pass that correction. + - opentype : traditional_math_char_italic_width: add to width + - : traditional_math_char_italic_pass : pass ic + + Adding a kern in traditional mode is a mode driven option, not a font one. + +*/ + +static halfword tex_aux_char_box(halfword fnt, int chr, halfword att, scaled *ic, quarterword subtype, scaled target, int style) +{ + /*tex The new box and its character node. */ + halfword glyph = tex_aux_new_math_glyph(fnt, chr, subtype); + halfword box = tex_new_null_box_node(hlist_node, math_char_list); + scaledwhd whd = tex_char_whd_from_glyph(glyph); + tex_attach_attribute_list_attribute(glyph, att); + tex_attach_attribute_list_attribute(box, att); + box_width(box) = whd.wd; + box_height(box) = whd.ht; + box_depth(box) = whd.dp; + box_list(box) = glyph; + if (tex_has_glyph_option(glyph, glyph_option_no_italic_correction)) { + whd.ic = 0; + } + if (whd.ic) { + if (ic) { + *ic = whd.ic; /* also in open type? needs checking */ + } + if (tex_aux_math_engine_control(fnt, math_control_apply_char_italic_kern)) { + tex_aux_math_insert_italic_kern(glyph, whd.ic, glyph, "box"); + box_width(box) += whd.ic; + } else { + return box; + } + } else if (ic) { + *ic = 0; + } + if (target && whd.wd < target && tex_char_has_tag_from_font(fnt, chr, extend_last_tag)) { + scaled margin = tex_get_math_x_parameter_default(style, math_parameter_accent_extend_margin, 0); + scaled amount = target - 2 * margin; + glyph_x_scale(glyph) = lround((double) glyph_x_scale(glyph) * amount/whd.wd); + glyph_x_offset(glyph) = (whd.wd - amount)/2; + } + return box; +} + +/*tex + + When we build an extensible character, it's handy to have the following subroutine, which puts + a given character on top of the characters already in box |b|: + +*/ + +// static scaled tex_aux_stack_into_box(halfword b, halfword f, int c, quarterword subtype, int horiziontal) +// { +// /*tex New node placed into |b|. Italic gets added to width in 8 bit fonts. */ +// halfword boxed = tex_aux_char_box(f, c, get_attribute_list(b), NULL, subtype); +// halfword glyph = box_list(boxed); +// if (horiziontal) { +// halfword list = box_list(b); +// if (list) { +// tex_couple_nodes(tex_tail_of_node_list(list), boxed); +// } else { +// box_list(b) = boxed; +// } +// if (box_height(b) < box_height(boxed)) { +// box_height(b) = box_height(boxed); +// } +// if (box_depth(b) < box_depth(boxed)) { +// box_depth(b) = box_depth(boxed); +// } +// return tex_char_width_from_glyph(glyph); +// } else { +// tex_try_couple_nodes(boxed, box_list(b)); +// box_list(b) = boxed; +// box_height(b) = box_height(boxed); +// if (box_width(b) < box_width(boxed)) { +// box_width(b) = box_width(boxed); +// } +// return tex_char_total_from_glyph(glyph); +// } +// } + +/*tex + There is no need to deal with an italic correction here. If there is one in an extensible we + have a real weird font! So in this version we don't end up with a redicoulous amount of hlists + in a horizontal extensible with is nicer when we trace. Actualy, the only extensibles that are + italic are integrals and these are not in traditional fonts. + + We only got a warning with Lucida that has italic correction on the begin and end glyphs of + integrals and it looks real bad it we add that, so now we don't even warn any more and just + ignore it. +*/ + +static scaled tex_aux_stack_char_into_box(halfword box, halfword fnt, int chr, quarterword subtype, int horiziontal) +{ + halfword glyph = tex_aux_new_math_glyph(fnt, chr, subtype); + scaledwhd whd = tex_char_whd_from_glyph(glyph); + halfword list = box_list(box); + tex_attach_attribute_list_attribute(glyph, get_attribute_list(box)); + if (horiziontal) { + if (list) { + tex_couple_nodes(tex_tail_of_node_list(list), glyph); + } else { + box_list(box) = glyph; + } + if (box_height(box) < whd.ht) { + box_height(box) = whd.ht; + } + if (box_depth(box) < whd.dp) { + box_depth(box) = whd.dp; + } + // if (whd.ic) { + // tex_print_message("italic correction found in horizontal delimiter parts, needs checking"); + // } + return whd.wd; + } else { + halfword boxed = tex_new_null_box_node(hlist_node, math_char_list); + tex_attach_attribute_list_attribute(boxed, get_attribute_list(box)); + box_width(boxed) = whd.wd; + box_height(boxed) = whd.ht; + box_depth(boxed) = whd.dp; + box_list(boxed) = glyph; + tex_try_couple_nodes(boxed, list); + box_list(box) = boxed; + // box_height(b) = box_height(boxed); + if (box_width(box) < whd.wd) { + box_width(box) = whd.wd; + } + // if (whd.ic) { + // tex_print_message("italic correction found in vertical delimiter parts, needs checking"); + // } + return whd.ht + whd.dp; + } +} + +static void tex_aux_stack_glue_into_box(halfword box, scaled min, scaled max) { + halfword glue = tex_new_glue_node(zero_glue, user_skip_glue); /* todo: subtype, correction_skip_glue? */ + glue_amount(glue) = min; + glue_stretch(glue) = max - min; + tex_add_glue_option(glue, glue_option_no_auto_break); + tex_attach_attribute_list_copy(glue, box); + if (node_type(box) == vlist_node) { + tex_try_couple_nodes(glue, box_list(box)); + box_list(box) = glue; + } else { + halfword list = box_list(box); + if (list) { + tex_couple_nodes(tex_tail_of_node_list(list), glue); + } else { + box_list(box) = glue; + } + } +} + +/*tex + + \TEX's most important routine for dealing with formulas is called |mlist_to_hlist|. After a + formula has been scanned and represented as an mlist, this routine converts it to an hlist that + can be placed into a box or incorporated into the text of a paragraph. The explicit parameter + |cur_mlist| points to the first node or noad in the given mlist (and it might be |null|). The + parameter |penalties| is |true| if penalty nodes for potential line breaks are to be inserted + into the resulting hlist, the parameter |cur_style| is a style code. After |mlist_to_hlist| has + acted, |vlink (temp_head)| points to the translated hlist. + + Since mlists can be inside mlists, the procedure is recursive. And since this is not part of + \TEX's inner loop, the program has been written in a manner that stresses compactness over + efficiency. (This is no longer always true in \LUAMETATEX.) + +*/ + +static halfword tex_aux_top_extensible_from_box(halfword e) +{ + if (node_type(e) == vlist_node && node_subtype(e) == math_v_extensible_list) { + e = box_list(e); + while (e) { + if (node_type(e) == hlist_node && box_list(e) && node_type(box_list(e)) == glyph_node) { + return box_list(e); /* hit is first */ + } else { + e = node_next(e); + } + } + } + return null; +} + +static halfword tex_aux_bottom_extensible_from_box(halfword e) +{ + halfword g = null; + if (node_type(e) == vlist_node && node_subtype(e) == math_v_extensible_list) { + e = box_list(e); + while (e) { + if (node_type(e) == hlist_node && box_list(e) && node_type(box_list(e)) == glyph_node) { + g = box_list(e); /* last so far */ + } + e = node_next(e); + } + } + return g; /* hit is last */ +} + +static halfword tex_aux_get_delimiter_box(halfword fnt, halfword chr, scaled target, scaled minoverlap, int horizontal, halfword att) +{ + halfword size = lmt_math_state.size; + int callback_id = lmt_callback_defined(make_extensible_callback); + if (callback_id > 0) { + /*tex + This call is not optimized as it hardly makes sense to use it ... special + and a bit of feature creep too. + */ + halfword boxed = null; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "ddddbNd->N", fnt, chr, target, minoverlap, horizontal, att, size, &boxed); + if (boxed) { + switch (node_type(boxed)) { + case hlist_node: + case vlist_node: + return boxed; + default: + tex_formatted_error("fonts", "invalid extensible character %i created for font %i, [h|v]list expected", chr, fnt); + break; + } + } + } + return tex_make_extensible(fnt, chr, target, minoverlap, horizontal, att, size); +} + +halfword tex_make_extensible(halfword fnt, halfword chr, scaled target, scaled minoverlap, int horizontal, halfword att, halfword size) +{ + /*tex natural (maximum) size of the stack */ + scaled max_natural = 0; + /*tex amount of possible shrink in the stack */ + scaled max_shrink = 0; + extinfo *extensible = NULL; + scaled overlap; + /*tex a temporary counter number of extensible pieces */ + int pieces = 0; + /*tex new box */ + halfword box = tex_new_null_box_node(horizontal ? hlist_node : vlist_node, horizontal ? math_h_extensible_list : math_v_extensible_list); + /*tex number of times to repeat each repeatable item in |ext| */ + int with_extenders = -1; + int n_of_extenders = 0; + int n_of_normal = 0; + if (minoverlap < 0) { + minoverlap = 0; + } + /* chr = math_char_exists(fnt, chr, math_state.size); */ + if (horizontal) { + extensible = tex_char_horizontal_parts_from_font(fnt, chr); + } else { + extensible = tex_char_vertical_parts_from_font(fnt, chr); + } + tex_attach_attribute_list_attribute(box, att); + for (extinfo *e = extensible; e; e = e->next) { + if (! tex_char_exists(fnt, e->glyph)) { + tex_handle_error( + normal_error_type, + "Extension part doesn't exist.", + "Each glyph part in an extensible item should exist in the font. I will give up\n" + "trying to find a suitable size for now. Fix your font!" + ); + tex_aux_fake_delimiter(box); + return box; + } else { + if (e->extender == math_extension_repeat) { + n_of_extenders++; + } else { + n_of_normal++; + } + /*tex + No negative overlaps or advances are allowed. Watch out, we patch the glyph data at + the \TEX\ end here. + */ + if (e->start_overlap < 0 || e->end_overlap < 0 || e->advance < 0) { + tex_handle_error( + normal_error_type, + "Extensible recipe has negative fields.", + "All measurements in extensible items should be positive. To get around this\n" + "problem, I have changed the font metrics. Fix your font!" + ); + if (e->start_overlap < 0) { + e->start_overlap = 0; + } + if (e->end_overlap < 0) { + e->end_overlap = 0; + } + if (e->advance < 0) { + e->advance = 0; + } + } + } + } + if (n_of_normal == 0) { + tex_handle_error( + normal_error_type, + "Extensible recipe has no fixed parts.", + "Each extensible recipe should have at least one non-repeatable part. To get\n" + "around this problem, I have changed the first part to be non-repeatable. Fix your\n" + "font!" + ); + if (extensible) { /* get rid of warning */ + extensible->extender = 0; + } + n_of_normal = 1; + n_of_extenders--; + } + /*tex + + In the meantime the Microsoft Typography website has a good description of the process: + + \startitemize + \startitem + Assemble all parts with all extenders removed and with connections overlapping by + the maximum amount. This gives the smallest possible result. + \stopitem + \startitem + Determine how much extra width/height can be obtained from all existing connections + between neighboring parts by using minimal overlaps. If that is enough to achieve + the size goal, extend each connection equally by changing overlaps of connectors to + finish the job. + \stopitem + \startitem + If all connections have been extended to the minimum overlap and further growth is + needed, add one of each extender, and repeat the process from the first step. + \stopitem + \stopitemize + + Original comment: |ext| holds a linked list of numerous items that may or may not be + repeatable. For the total height, we have to figure out how many items are needed to create + a stack of at least |v|. The next |while| loop does that. It has two goals: it finds out + the natural height |b_max| of the all the parts needed to reach at least |v|, and it sets + |with_extenders| to the number of times each of the repeatable items in |ext| has to be + repeated to reach that height. + + It's an example figure it out once, write the solution, test it well and then never look + back code. + */ + while (max_natural < target && n_of_extenders > 0) { + overlap = 0; + max_natural = 0; + with_extenders++; + if (horizontal) { + for (extinfo *e = extensible; e; e = e->next) { + if (e->extender == 0) { + scaled initial = tex_aux_math_x_size_scaled(fnt, e->start_overlap, size); + scaled advance = tex_aux_math_x_size_scaled(fnt, e->advance, size); + if (minoverlap < initial) { + initial = minoverlap; + } + if (overlap < initial) { + initial = overlap; + } + if (advance == 0) { + /*tex for tfm fonts (so no need for scaling) */ + advance = tex_aux_math_x_size_scaled(fnt, tex_char_width_from_font(fnt, e->glyph), size); /* todo: combine */ + if (advance <= 0) { + tex_formatted_error("fonts", "bad horizontal extensible character %i in font %i", chr, fnt); + } + } + max_natural += advance - initial; + overlap = tex_aux_math_x_size_scaled(fnt, e->end_overlap, size); + } else { + pieces = with_extenders; + while (pieces > 0) { + scaled initial = tex_aux_math_x_size_scaled(fnt, e->start_overlap, size); + scaled advance = tex_aux_math_x_size_scaled(fnt, e->advance, size); + if (minoverlap < initial) { + initial = minoverlap; + } + if (overlap < initial) { + initial = overlap; + } + if (advance == 0) { + /*tex for tfm fonts (so no need for scaling) */ + advance = tex_aux_math_x_size_scaled(fnt, tex_char_width_from_font(fnt, e->glyph), size); /* todo: combine */ + if (advance <= 0) { + tex_formatted_error("fonts", "bad horizontal extensible character %i in font %i", chr, fnt); + } + } + max_natural += advance - initial; + overlap = tex_aux_math_x_size_scaled(fnt, e->end_overlap, size); + pieces--; + } + } + } + } else { + for (extinfo *e = extensible; e; e = e->next) { + if (e->extender == 0) { + scaled initial = tex_aux_math_y_size_scaled(fnt, e->start_overlap, size); + scaled advance = tex_aux_math_y_size_scaled(fnt, e->advance, size); + if (minoverlap < initial) { + initial = minoverlap; + } + if (overlap < initial) { + initial = overlap; + } + if (advance == 0) { + /*tex for tfm fonts (so no need for scaling) */ + advance = tex_aux_math_y_size_scaled(fnt, tex_char_total_from_font(fnt, e->glyph), size); /* todo: combine */ + if (advance <= 0) { + tex_formatted_error("fonts", "bad vertical extensible character %i in font %i", chr, fnt); + } + } + max_natural += advance - initial; + overlap = tex_aux_math_y_size_scaled(fnt, e->end_overlap, size); + } else { + pieces = with_extenders; + while (pieces > 0) { + scaled initial = tex_aux_math_y_size_scaled(fnt, e->start_overlap, size); + scaled advance = tex_aux_math_y_size_scaled(fnt, e->advance, size); + if (minoverlap < initial) { + initial = minoverlap; + } + if (overlap < initial) { + initial = overlap; + } + if (advance == 0) { + /*tex for tfm fonts (so no need for scaling) */ + advance = tex_aux_math_y_size_scaled(fnt, tex_char_total_from_font(fnt, e->glyph), size); /* todo: combine */ + if (advance <= 0) { + tex_formatted_error("fonts", "bad vertical extensible character %i in font %i", chr, fnt); + } + } + max_natural += advance - initial; + overlap = tex_aux_math_y_size_scaled(fnt, e->end_overlap, size); + pieces--; + } + } + } + } + } + /*tex + Assemble box using |with_extenders| copies of each extender, with appropriate glue wherever + an overlap occurs. + */ + overlap = 0; + max_natural = 0; + max_shrink = 0; + for (extinfo *e = extensible; e; e = e->next) { + if (e->extender == 0) { + scaled progress; + scaled initial = horizontal ? tex_aux_math_x_size_scaled(fnt, e->start_overlap, size) : tex_aux_math_y_size_scaled(fnt,e->start_overlap, size); + if (overlap < initial) { + initial = overlap; + } + progress = initial; + if (minoverlap < initial) { + initial = minoverlap; + } + if (progress > 0) { + tex_aux_stack_glue_into_box(box, -progress, -initial); + max_shrink += (-initial) - (-progress); + max_natural -= progress; + } + max_natural += tex_aux_stack_char_into_box(box, fnt, e->glyph, glyph_math_extensible_subtype, horizontal); + overlap = horizontal ? tex_aux_math_x_size_scaled(fnt, e->end_overlap, size) : tex_aux_math_y_size_scaled(fnt, e->end_overlap, size); + pieces--; + } else { + pieces = with_extenders; + while (pieces > 0) { + scaled progress; + scaled initial = horizontal ? tex_aux_math_x_size_scaled(fnt, e->start_overlap, size) : tex_aux_math_y_size_scaled(fnt, e->start_overlap, size); + if (overlap < initial) { + initial = overlap; + } + progress = initial; + if (minoverlap < initial) { + initial = minoverlap; + } + if (progress > 0) { + tex_aux_stack_glue_into_box(box, -progress, -initial); + max_shrink += (-initial) - (-progress); + max_natural -= progress; + } + max_natural += tex_aux_stack_char_into_box(box, fnt, e->glyph, glyph_math_extensible_subtype, horizontal); + overlap = horizontal ? tex_aux_math_x_size_scaled(fnt, e->end_overlap, size) : tex_aux_math_y_size_scaled(fnt, e->end_overlap, size); + pieces--; + } + } + } + /*tex Set glue so as to stretch the connections if needed. */ + if (target > max_natural && max_shrink > 0) { + scaled delta = target - max_natural; + /*tex Don't stretch more than |s_max|. */ + if (delta > max_shrink) { + delta = max_shrink; + } + box_glue_order(box) = normal_glue_order; + box_glue_sign(box) = stretching_glue_sign; + box_glue_set(box) = (glueratio) (delta / (glueratio) max_shrink); + max_natural += delta; + } + if (horizontal) { + box_width(box) = max_natural; + node_subtype(box) = math_h_extensible_list; + } else { + box_height(box) = max_natural; + node_subtype(box) = math_v_extensible_list; + } + return box; +} + +/*tex + + The |var_delimiter| function, which finds or constructs a sufficiently large delimiter, is the + most interesting of the auxiliary functions that currently concern us. Given a pointer |d| to a + delimiter field in some noad, together with a size code |s| and a vertical distance |v|, this + function returns a pointer to a box that contains the smallest variant of |d| whose height plus + depth is |v| or more. (And if no variant is large enough, it returns the largest available + variant.) In particular, this routine will construct arbitrarily large delimiters from + extensible components, if |d| leads to such characters. + + The value returned is a box whose |shift_amount| has been set so that the box is vertically + centered with respect to the axis in the given size. If a built-up symbol is returned, the + height of the box before shifting will be the height of its topmost component. + +*/ + +static halfword register_extensible(halfword fnt, halfword chr, int size, halfword result, halfword att) +{ + int callback_id = lmt_callback_defined(register_extensible_callback); + if (callback_id > 0) { + halfword b = null; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dddN->N", fnt, chr, size, result, &b); + if (b) { + switch (node_type(b)) { + case hlist_node: + case vlist_node: + tex_attach_attribute_list_attribute(b, att); + return b; + default: + tex_formatted_error("fonts", "invalid extensible character %U registered for font %F, [h|v]list expected", chr, fnt); + break; + } + } + } + return result; +} + +/*tex + A first version passed the first and last glyph around but then we need to maintain a copy because + we can register a composed delimiter which can result in a flush of these nodes. +*/ + +static halfword tex_aux_make_delimiter(halfword target, halfword delimiter, int size, scaled targetsize, int flat, int style, int shift, int *stack, scaled *delta, scaled tolerance, int nooverflow, delimiterextremes *extremes, scaled move) +{ + /*tex the box that will be constructed */ + halfword result = null; + /*tex best-so-far and tentative font codes */ + halfword fnt = null_font; + /*tex best-so-far and tentative character codes */ + int chr = 0; + int nxtchr = 0; + /*tex are we trying the large variant? */ + int large_attempt = 0; + int do_parts = 0; + /*tex to save the current attribute list */ + halfword att = null; + if (extremes) { + extremes->tfont = null_font; + extremes->bfont = null_font; + extremes->tchar = 0; + extremes->bchar = 0; + extremes->height = 0; + extremes->depth = 0; + } + if (delimiter && ! delimiter_small_family(delimiter) && ! delimiter_small_character(delimiter) + && ! delimiter_large_family(delimiter) && ! delimiter_large_character(delimiter)) { + halfword result = tex_new_null_box_node(hlist_node, math_v_delimiter_list); + tex_attach_attribute_list_copy(result, delimiter); + if (! flat) { + tex_aux_fake_delimiter(result); + } + tex_flush_node(delimiter); /* no, we can assign later on ... better a fatal error here */ + return result; + } + if (delimiter) { + /*tex largest height-plus-depth so far */ + scaled besttarget = 0; + /*tex |z| runs through font family members */ + int curfam = delimiter_small_family(delimiter); + int curchr = 0; + int count = 0; + int prvfnt = null_font; + int prvchr = 0; + nxtchr = delimiter_small_character(delimiter); + while (1) { + /*tex + The search process is complicated slightly by the facts that some of the characters + might not be present in some of the fonts, and they might not be probed in increasing + order of height. When we run out of sizes (variants) and end up at an extensible + pointer (parts) we quit the loop. + */ + if (curfam || nxtchr) { + halfword curfnt = tex_fam_fnt(curfam, size); + if (curfnt != null_font) { + curchr = nxtchr; + CONTINUE: + count++; + if (tex_char_exists(curfnt, curchr)) { + scaled total = flat ? tex_aux_math_x_size_scaled(curfnt, tex_char_width_from_font(curfnt, curchr), size): tex_aux_math_y_size_scaled(curfnt, tex_char_total_from_font(curfnt, curchr), size); + if (nooverflow && total >= targetsize) { + if (total > targetsize && prvfnt != null_font) { + fnt = prvfnt; + chr = prvchr; + } else { + fnt = curfnt; + chr = curchr; + } + besttarget = total; + goto FOUND; + } else if (total >= besttarget) { + prvfnt = curfnt; + prvchr = curchr; + fnt = curfnt; + chr = curchr; + besttarget = total; + if (total >= (targetsize - tolerance)) { + goto FOUND; + } + } + if (tex_char_has_tag_from_font(curfnt, curchr, extension_tag)) { + fnt = curfnt; + chr = curchr; + do_parts = 1; + goto FOUND; + } else if (count > 1000) { + tex_formatted_warning("fonts", "endless loop in extensible character %U of font %F", curchr, curfnt); + goto FOUND; + } else if (tex_char_has_tag_from_font(curfnt, curchr, list_tag)) { + prvfnt = curfnt; + prvchr = curchr; + curchr = tex_char_remainder_from_font(curfnt, curchr); + goto CONTINUE; + } + } + } + } + if (large_attempt) { + /*tex There were none large enough. */ + goto FOUND; + } else { + large_attempt = 1; + curfam = delimiter_large_family(delimiter); + nxtchr = delimiter_large_character(delimiter); + } + } + } + FOUND: + if (delimiter) { + /*tex + The builder below sets the list if needed and we dereference later because otherwise + the list gets flushed before it can be reused. + */ + att = get_attribute_list(delimiter); + wipe_attribute_list_only(delimiter); + tex_flush_node(delimiter); + } + if (fnt != null_font) { + /*tex + When the following code is executed, |do_parts| will be true if a built-up symbol is + supposed to be returned. + */ + extinfo *ext = NULL; + if (do_parts) { + /* tex_char_process(fnt, chr); */ /* in case we realloc */ + ext = flat ? tex_char_horizontal_parts_from_font(fnt, chr) : tex_char_vertical_parts_from_font(fnt, chr); + } + if (ext) { + scaled minoverlap = flat ? tex_get_math_x_parameter_default(style, math_parameter_connector_overlap_min, 0) : tex_get_math_y_parameter_default(style, math_parameter_connector_overlap_min, 0);; + result = tex_aux_get_delimiter_box(fnt, chr, targetsize, minoverlap, flat, att); + if (delta) { + if (tex_aux_math_engine_control(fnt, math_control_apply_vertical_italic_kern)) { + *delta = tex_aux_math_x_size_scaled(fnt, tex_char_vertical_italic_from_font(fnt, nxtchr), size); + } else { + *delta = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, nxtchr), size); + } + } + if (stack) { + *stack = 1 ; + } + if (! flat && extremes) { + halfword first = tex_aux_top_extensible_from_box(result); + halfword last = tex_aux_bottom_extensible_from_box(result); + extremes->tfont = glyph_font(first); + extremes->tchar = glyph_character(first); + extremes->bfont = glyph_font(last); + extremes->bchar = glyph_character(last); + extremes->height = box_height(result); + extremes->depth = box_depth(result); + } + } else { + /*tex + Here italic is added to width in traditional fonts which makes the delimiter get + the real width. An \OPENTYPE\ font already has the right width. There is one case + where |delta| (ic) gets subtracted but only for a traditional font. In that case + the traditional width (which is fake width + italic) becomes less and the delta is + added. See (**). + */ + result = tex_aux_char_box(fnt, chr, att, delta, glyph_math_delimiter_subtype, flat ? targetsize : 0, style); + if (stack) { + *stack = 0 ; + } + if (! flat && extremes) { + extremes->tfont = fnt; + extremes->tchar = chr; + extremes->bfont = fnt; + extremes->bchar = chr; + extremes->height = box_height(result); + extremes->depth = box_depth(result); + } + } + } else { + /*tex This can be an empty one as is often the case with fractions! */ + result = tex_new_null_box_node(hlist_node, flat ? math_h_delimiter_list : math_v_delimiter_list); + tex_attach_attribute_list_attribute(result, att); + /*tex Use this width if no delimiter was found. */ + if (! flat) { + tex_aux_fake_delimiter(result); + } + if (delta) { + *delta = 0; + } + if (stack) { + *stack = 0 ; + } + } + if (do_parts) { + if (has_noad_option_phantom(target) || has_noad_option_void(target)) { + result = tex_aux_make_list_phantom(result, has_noad_option_void(target), att); + } else { + result = register_extensible(fnt, chr, size, result, att); + } + } + if (! flat) { + /*tex A vertical variant. Todo: add a kern instead. */ + switch (shift) { + case 0: + box_shift_amount(result) = tex_half_scaled(box_height(result) - box_depth(result)); + break; + case 1: + box_shift_amount(result) = tex_half_scaled(box_height(result) - box_depth(result)); + box_shift_amount(result) -= tex_aux_math_axis(size); + break; + case 2: + box_shift_amount(result) = move; + break; + } + if (do_parts && extremes && extremes->height) { + extremes->height -= box_shift_amount(result); + extremes->depth += box_shift_amount(result); + } + } + /* This needs checking in case the ref was changed. */ + delete_attribute_reference(att); + if ((node_type(result) == hlist_node || node_type(result) == vlist_node) && node_subtype(result) == unknown_list) { + node_subtype(result) = flat ? math_h_delimiter_list : math_v_delimiter_list; + } + return result; +} + +/*tex + + The next subroutine is much simpler; it is used for numerators and denominators of fractions as + well as for displayed operators and their limits above and below. It takes a given box~|b| and + changes it so that the new box is centered in a box of width~|w|. The centering is done by + putting |\hss| glue at the left and right of the list inside |b|, then packaging the new box; + thus, the actual box might not really be centered, if it already contains infinite glue. + + The given box might contain a single character whose italic correction has been added to the + width of the box; in this case a compensating kern is inserted. Actually, we now check for + the last glyph. + +*/ + +static halfword tex_aux_rebox(halfword box, scaled width, halfword size) +{ + (void) size; + if (box_width(box) != width && box_list(box)) { + /*tex temporary registers for list manipulation */ + halfword head = box_list(box); + quarterword subtype = node_subtype(box); + halfword att = get_attribute_list(box); + /*tex When the next two are not seen we can wipe att so we reserve by bump! */ + add_attribute_reference(att); + if (node_type(box) == vlist_node) { + box = tex_hpack(box, 0, packing_additional, direction_unknown, holding_none_option); + node_subtype(box) = subtype; + tex_attach_attribute_list_attribute(box, att); + head = box_list(box); + } else if (head && node_type(head) == glyph_node && ! node_next(head)) { + /*tex + This hack is for traditional fonts so with a proper opentype font we don't end up + here (because then the width is unchanged). However controls can cheat so there is + no explicit check for an opentype situation here. + */ + if (tex_aux_math_engine_control(glyph_font(head), math_control_rebox_char_italic_kern)) { + scaled boxwidth = box_width(box); + scaled chrwidth = tex_char_width_from_glyph(head); + if (boxwidth != chrwidth) { + /*tex + This is typical old font stuff. Maybe first check if we can just + remove a trailing kern. Also, why not just adapt the box width. + */ + halfword kern = tex_new_kern_node(boxwidth - chrwidth, italic_kern_subtype); /* horizontal_math_kern */ + tex_attach_attribute_list_attribute(kern, att); + tex_couple_nodes(head, kern); + } + } + } + box_list(box) = null; + tex_flush_node(box); + { + halfword left = tex_new_glue_node(filll_glue, user_skip_glue); /* todo: subtype, correction_skip_glue? */ + halfword right = tex_new_glue_node(filll_glue, user_skip_glue); /* todo: subtype, correction_skip_glue? */ + tex_add_glue_option(left, glue_option_no_auto_break); + tex_add_glue_option(right, glue_option_no_auto_break); + tex_attach_attribute_list_attribute(left, att); + tex_attach_attribute_list_attribute(right, att); + tex_couple_nodes(left, head); + tex_couple_nodes(tex_tail_of_node_list(head), right); + box = tex_hpack(left, width, packing_exactly, direction_unknown, holding_none_option); + tex_attach_attribute_list_attribute(box, att); + node_subtype(box) = subtype; + } + /*tex As we bumped we now need to unbump the ref counter! */ + delete_attribute_reference(att); + } else { + box_width(box) = width; + } + return box; +} + +/*tex + + Here is a subroutine that creates a new glue specification from another one that is expressed + in |mu|, given the value of the math unit. + +*/ + +inline static scaled tex_aux_mu_mult(scaled a, scaled n, scaled f) +{ + return tex_multiply_and_add(n, a, tex_xn_over_d(a, f, unity), max_dimen); +} + +inline static void tex_aux_calculate_glue(scaled m, scaled *f, scaled *n) +{ + /*tex fraction part of |m| */ + *f = 0; + /*tex integer part of |m| */ + *n = tex_x_over_n_r(m, unity, f); + /*tex the new glue specification */ + if (f < 0) { + --n; + f += unity; + } +} + +static halfword tex_aux_math_muglue(halfword g, quarterword subtype, scaled m, halfword detail, int style) +{ + scaled f, n; + halfword glue = tex_new_node(glue_node, subtype); + tex_aux_calculate_glue(m, &f, &n); + /* convert |mu| to |pt| */ + glue_amount(glue) = tex_aux_mu_mult(tex_aux_math_x_scaled(glue_amount(g), style), n, f); + if (math_glue_stretch_enabled) { + scaled stretch = tex_aux_math_x_scaled(glue_stretch(g), style); + glue_stretch_order(glue) = glue_stretch_order(g); + glue_stretch(glue) = (glue_stretch_order(glue) == normal_glue_order) ? tex_aux_mu_mult(stretch, n, f) : stretch; + } + if (math_glue_shrink_enabled) { + scaled shrink = tex_aux_math_x_scaled(glue_shrink(g), style); + glue_shrink_order(glue) = glue_shrink_order(g); + glue_shrink(glue) = (glue_shrink_order(glue) == normal_glue_order) ? tex_aux_mu_mult(shrink, n, f) : shrink; + } + glue_font(glue) = detail; + tex_add_glue_option(glue, glue_option_no_auto_break); + return glue; +} + +static halfword tex_aux_math_glue(halfword g, quarterword subtype, halfword detail) +{ + halfword glue = tex_new_glue_node(g, subtype); + if (! math_glue_stretch_enabled) { + glue_stretch_order(glue) = 0; + glue_stretch(glue) = 0; + } + if (! math_glue_shrink_enabled) { + glue_shrink_order(glue) = 0; + glue_shrink(glue) = 0; + } + glue_font(glue) = detail; + tex_add_glue_option(glue, glue_option_no_auto_break); + return glue; +} + +static halfword tex_aux_math_dimen(halfword g, quarterword subtype, halfword detail) +{ + halfword glue = tex_new_glue_node(null, subtype); + glue_amount(glue) = g; + glue_font(glue) = detail; + tex_add_glue_option(glue, glue_option_no_auto_break); + return glue; +} + +static void tex_aux_math_glue_to_glue(halfword p, scaled m, int style) +{ + scaled f, n; + tex_aux_calculate_glue(m, &f, &n); + /*tex convert |mu| to |pt| */ + glue_amount(p) = tex_aux_mu_mult(tex_aux_math_x_scaled(glue_amount(p), style), n, f); + if (! math_glue_stretch_enabled) { + glue_stretch_order(p) = 0; + glue_stretch(p) = 0; + } else if (glue_stretch_order(p) == normal_glue_order) { + glue_stretch(p) = tex_aux_mu_mult(tex_aux_math_x_scaled(glue_stretch(p), style), n, f); + } + if (! math_glue_shrink_enabled) { + glue_shrink_order(p) = 0; + glue_shrink(p) = 0; + } else if (glue_shrink_order(p) == normal_glue_order) { + glue_shrink(p) = tex_aux_mu_mult(tex_aux_math_x_scaled(glue_shrink(p), style), n, f); + } + /*tex Okay, we could have had a special subtype but we're stuck with this now. */ + node_subtype(p) = inter_math_skip_glue; + tex_add_glue_option(p, glue_option_no_auto_break); +} + +/*tex + + The |math_kern| subroutine removes |mu_glue| from a kern node, given the value of the math + unit. + +*/ + +static void tex_aux_make_kern(halfword current, scaled mu, int style) +{ + if (node_subtype(current) == explicit_math_kern_subtype) { + scaled f, n; + tex_aux_calculate_glue(mu, &f, &n); + kern_amount(current) = tex_aux_mu_mult(tex_aux_math_x_scaled(glue_amount(current), style), n, f); + node_subtype(current) = explicit_kern_subtype; + } +} + +/*tex + + Conditional math glue (|\nonscript|) results in a |glue_node| pointing to |zero_glue|, with + |subtype(q)=cond_math_glue|; in such a case the node following will be eliminated if it is a + glue or kern node and if the current size is different from |text_size|. + + Unconditional math glue (|\muskip|) is converted to normal glue by multiplying the dimensions + by |current_mu|. + +*/ + +static void tex_aux_make_glue(halfword current, scaled mu, int style) +{ + switch (node_subtype(current)) { + case mu_glue: + tex_aux_math_glue_to_glue(current, mu, style); + break; + case conditional_math_glue: + if (lmt_math_state.size != text_size) { + halfword p = node_next(current); + if (p) { + switch (node_type(p)) { + case glue_node: + case kern_node: + if (node_next(p)) { + tex_couple_nodes(current, node_next(p)); + node_next(p) = null; + } else { + node_next(current) = null; + } + tex_flush_node_list(p); + break; + } + } + } + break; + case rulebased_math_glue: + break; + } +} + +/*tex + + The |mlist_to_hlist| operation is actually called a lot when we have a math intense document, + because it is also called nested. Here we have the main runner, called in the main loop; + watch the callback. + +*/ + +inline static int tex_aux_is_math_penalty(halfword n) +{ + return node_type(n) == penalty_node && (node_subtype(n) == math_pre_penalty_subtype || node_subtype(n) == math_post_penalty_subtype); +} + +void tex_run_mlist_to_hlist(halfword mlist, halfword penalties, halfword style, int beginclass, int endclass) +{ + if (mlist) { + int saved_level = lmt_math_state.level; + int callback_id = lmt_callback_defined(mlist_to_hlist_callback); + lmt_math_state.level = 0; + if (! valid_math_class_code(beginclass)) { + beginclass = unset_noad_class; + } + if (! valid_math_class_code(endclass)) { + endclass = unset_noad_class; + } + math_begin_class_par = unset_noad_class; + math_end_class_par = unset_noad_class; + /* not on the stack ... yet */ + if (tracing_math_par >= 1) { + tex_begin_diagnostic(); + switch (style) { + case display_style: + tex_print_str("> \\displaymath="); + break; + case text_style: + tex_print_str("> \\inlinemath="); + break; + default: + tex_print_str("> \\math="); + break; + } + tex_show_box(mlist); + tex_end_diagnostic(); + } + tex_finalize_math_parameters(); + if (callback_id > 0) { + lua_State *L = lmt_lua_state.lua_instance; + int top = 0; + if (lmt_callback_okay(L, callback_id, &top)) { + int i; + node_prev(mlist) = null ; + lmt_node_list_to_lua(L, mlist); + lmt_push_math_style_name(L, style); + lua_pushboolean(L, penalties); + lua_pushinteger(L, beginclass); + lua_pushinteger(L, endclass); + lua_pushinteger(L, lmt_math_state.level); + i = lmt_callback_call(L, 6, 1, top); + if (i) { + lmt_callback_error(L, top, i); + node_next(temp_head) = null; + } else { + halfword a = lmt_node_list_from_lua(L, -1); + /* node_prev(node_next(a)) = null; */ + node_next(temp_head) = a; + lmt_callback_wrapup(L, top); + } + } else { + node_next(temp_head) = null; + } + } else if (callback_id == 0) { + node_next(temp_head) = tex_mlist_to_hlist(mlist, penalties, style, beginclass, endclass, NULL); + } else { + node_next(temp_head) = null; + } + if (penalties) { // && tex_in_main_math_style(style) + /*tex This makes no sense in display math not in script styles. */ + switch (style) { + case text_style: + case cramped_text_style: + if (math_forward_penalties_par) { + halfword n = tex_get_specification_count(math_forward_penalties_par); + if (n > 0) { + halfword h = node_next(temp_head); + halfword i = 1; + while (h && i <= n) { + if (tex_aux_is_math_penalty(h)) { + penalty_amount(h) += tex_get_specification_penalty(math_forward_penalties_par, i); + ++i; + } + h = node_next(h); + } + } + } + if (math_backward_penalties_par) { + halfword n = tex_get_specification_count(math_backward_penalties_par); + if (n > 0) { + halfword t = tex_tail_of_node_list(node_next(temp_head)); + halfword i = 1; + while (t && i <= n) { + if (tex_aux_is_math_penalty(t)) { + penalty_amount(t) += tex_get_specification_penalty(math_backward_penalties_par, i); + ++i; + } + t = node_prev(t); + } + } + } + break; + } + if (node_next(temp_head) && math_threshold_par) { + scaledwhd siz = tex_natural_hsizes(node_next(temp_head), null, 0.0, 0, 0); + if (siz.wd < glue_amount(math_threshold_par)) { + halfword box = tex_new_node(hlist_node, unknown_list); + tex_attach_attribute_list_copy(box, node_next(temp_head)); + box_width(box) = siz.wd; + box_height(box) = siz.ht; + box_depth(box) = siz.dp; + box_list(box) = node_next(temp_head); + node_next(temp_head) = box; + if (glue_stretch(math_threshold_par) || glue_shrink(math_threshold_par)) { + halfword glue = tex_new_glue_node(math_threshold_par, u_leaders); + tex_add_glue_option(glue, glue_option_no_auto_break); + tex_attach_attribute_list_copy(glue, box); + glue_amount(glue) = siz.wd; + glue_leader_ptr(glue) = box; + node_next(temp_head) = glue; + } else { + node_next(temp_head) = box; + } + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: boxing inline, threshold %D, width %D, height %D, depth %D]", + glue_amount(math_threshold_par), pt_unit, // todo: stretch and shrink + siz.wd, pt_unit, siz.ht, pt_unit, siz.dp, pt_unit + ); + tex_end_diagnostic(); + } + } + } + /* + At the outer level we check for discretionaries. Maybe only when we are in text or display? + */ + { + halfword current = temp_head; + while (current) { + /*tex Maybe |math_discretionary_code| but I need to check the impact on \CONTEXT\ first. */ + if (node_type(current) == glyph_node && tex_has_glyph_option(current, glyph_option_math_discretionary)) { + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: promoting glyph with character %U to discretionary]", glyph_character(current)); + tex_end_diagnostic(); + } + current = tex_glyph_to_discretionary(current, mathematics_discretionary_code, tex_has_glyph_option(current, glyph_option_math_italics_too)); + } + current = node_next(current); + } + } + } + lmt_math_state.level = saved_level; + } else { + node_next(temp_head) = null; + } +} + +/*tex + + The recursion in |mlist_to_hlist| is due primarily to a subroutine called |clean_box| that puts + a given noad field into a box using a given math style; |mlist_to_hlist| can call |clean_box|, + which can call |mlist_to_hlist|. + + The box returned by |clean_box| is clean in the sense that its |shift_amount| is zero. + +*/ + +inline static void tex_aux_remove_italic_after_first_glyph(halfword box) +{ + halfword list = box_list(box); + if (list && node_type(list) == glyph_node) { + halfword next = node_next(list); + /*todo: check for italic property */ + if (next && ! node_next(next) && node_type(next) == kern_node && node_subtype(next) == italic_kern_subtype) { + /*tex Unneeded italic correction. */ + box_width(box) -= kern_amount(next); + tex_flush_node(next); + node_next(list) = null; + } + } +} + +static halfword tex_aux_clean_box(halfword n, int main_style, int style, quarterword subtype, int keepitalic, kernset *kerns) +{ + /*tex beginning of a list to be boxed */ + halfword list; + /*tex box to be returned */ + halfword result; + /*tex beginning of mlist to be translated */ + halfword mlist = null; + switch (node_type(n)) { + case math_char_node: + mlist = tex_new_node(simple_noad, ordinary_noad_subtype); + noad_nucleus(mlist) = tex_aux_math_clone(n); + tex_attach_attribute_list_copy(mlist, n); + break; + case sub_box_node: + list = kernel_math_list(n); + goto FOUND; + case sub_mlist_node: + mlist = kernel_math_list(n); + break; + default: + list = tex_new_null_box_node(hlist_node, math_list_list); + tex_attach_attribute_list_copy(list, n); + goto FOUND; + } + /*tex This might add some italic correction. */ + list = tex_mlist_to_hlist(mlist, 0, main_style, unset_noad_class, unset_noad_class, kerns); + /*tex recursive call */ + tex_aux_set_current_math_size(style); /* persists after call */ + FOUND: + if (! list || node_type(list) == glyph_node) { + result = tex_hpack(list, 0, packing_additional, direction_unknown, holding_none_option); + tex_attach_attribute_list_copy(result, list); + } else if (! node_next(list) && (node_type(list) == hlist_node || node_type(list) == vlist_node) && (box_shift_amount(list) == 0)) { + /*tex It's already clean. */ + result = list; + } else { + result = tex_hpack(list, 0, packing_additional, direction_unknown, holding_none_option); + tex_attach_attribute_list_copy(result, list); + } + node_subtype(result) = subtype; + if (! keepitalic) { + tex_aux_remove_italic_after_first_glyph(result); + } + return result; +} + +/*tex + + It is convenient to have a procedure that converts a |math_char| field to an unpacked form. The + |fetch| routine sets |cur_f| and |cur_c| to the font code and character code of a given noad + field. It also takes care of issuing error messages for nonexistent characters; in such cases, + |char_exists (cur_f, cur_c)| will be |false| after |fetch| has acted, and the field will also + have been reset to |null|. The outputs of |fetch| are placed in global variables so that we can + access them any time we want. We add a bit more detail about the location of the issue than + standard \TEX\ does. + + The |cur_f| and |cur_c| variables are now locals and we keep the (opentype) state otherwise. + +*/ + +static int tex_aux_fetch(halfword n, const char *where, halfword *f, halfword *c) /* todo: also pass size */ +{ + if (node_type(n) == glyph_node) { + *f = glyph_font(n); + *c = glyph_character(n); + /* lmt_math_state.opentype = tex_aux_has_opentype_metrics(*f); */ + if (tex_char_exists(*f, *c)) { + return 1; + } else { + tex_char_warning(*f, *c); + return 0; + } + } else { + *f = tex_fam_fnt(kernel_math_family(n), lmt_math_state.size); + *c = kernel_math_character(n); + if (*f == null_font) { + char msg[256]; + snprintf(msg, 255, "\\%s%d is undefined in %s, font id %d, character %d)", + tex_aux_math_size_string(lmt_math_state.size), kernel_math_family(n), where, *f, *c + ); + tex_handle_error( + normal_error_type, + msg, + "Somewhere in the math formula just ended, you used the stated character from an\n" + "undefined font family. For example, plain TeX doesn't allow \\it or \\sl in\n" + "subscripts. Proceed, and I'll try to forget that I needed that character." + ); + return 0; + } else { + /* lmt_math_state.opentype = tex_aux_has_opentype_metrics(*f); */ + if (tex_math_char_exists(*f, *c, lmt_math_state.size)) { + return 1; + } else { + tex_char_warning(*f, *c); + return 0; + } + } + } +} + +/*tex + + We need to do a lot of different things, so |mlist_to_hlist| makes two passes over the given + mlist. + + The first pass does most of the processing: It removes |mu| spacing from glue, it recursively + evaluates all subsidiary mlists so that only the top-level mlist remains to be handled, it puts + fractions and square roots and such things into boxes, it attaches subscripts and superscripts, + and it computes the overall height and depth of the top-level mlist so that the size of + delimiters for a |fence_noad| will be known. The hlist resulting from each noad is recorded in + that noad's |new_hlist| field, an integer field that replaces the |nucleus| or |thickness|. + + The second pass eliminates all noads and inserts the correct glue and penalties between nodes. + +*/ + +static void tex_aux_assign_new_hlist(halfword target, halfword hlist) +{ + switch (node_type(target)) { + case fraction_noad: + kernel_math_list(fraction_numerator(target)) = null; + kernel_math_list(fraction_denominator(target)) = null; + tex_flush_node(fraction_numerator(target)); + tex_flush_node(fraction_denominator(target)); + fraction_numerator(target) = null; + fraction_denominator(target) = null; + break; + case radical_noad: + case simple_noad: + case accent_noad: + if (noad_nucleus(target)) { + kernel_math_list(noad_nucleus(target)) = null; + tex_flush_node(noad_nucleus(target)); + noad_nucleus(target) = null; + } + break; + } + noad_new_hlist(target) = hlist; +} + +/*tex + + Most of the actual construction work of |mlist_to_hlist| is done by procedures with names like + |make_fraction|, |make_radical|, etc. To illustrate the general setup of such procedures, let's + begin with a couple of simple ones. + +*/ + +static void tex_aux_make_over(halfword target, halfword style, halfword size, halfword fam) +{ + /*tex + + No rule adaption yet, maybe it will never be implemented because overbars should be proper + extensibles. The order is: kern, rule, gap, content. + + */ + halfword result; + scaled thickness = tex_get_math_y_parameter_checked(style, math_parameter_overbar_rule); + scaled vgap = tex_get_math_y_parameter_checked(style, math_parameter_overbar_vgap); + scaled kern = tex_get_math_y_parameter_checked(style, math_parameter_overbar_kern); + { + halfword t = tex_aux_check_rule_thickness(target, size, &fam, math_control_over_rule, OverbarRuleThickness); + if (t != undefined_math_parameter) { + thickness = t; + } + } + result = tex_aux_overbar( + tex_aux_clean_box(noad_nucleus(target), tex_math_style_variant(style, math_parameter_over_line_variant), style, math_nucleus_list, 0, NULL), + vgap, thickness, kern, + get_attribute_list(noad_nucleus(target)), math_over_rule_subtype, size, fam + ); + node_subtype(result) = math_over_list; + kernel_math_list(noad_nucleus(target)) = result; + node_type(noad_nucleus(target)) = sub_box_node; +} + +static void tex_aux_make_under(halfword target, halfword style, halfword size, halfword fam) +{ + /*tex + + No rule adaption yet, maybe never as underbars should be proper extensibles. Here |x| is + the head, and |p| the tail but we keep the original names. The order is: content, gap, + rule, kern. + + */ + halfword result; + scaled thickness = tex_get_math_y_parameter_checked(style, math_parameter_underbar_rule); + scaled vgap = tex_get_math_y_parameter_checked(style, math_parameter_underbar_vgap); + scaled kern = tex_get_math_y_parameter_checked(style, math_parameter_underbar_kern); + { + halfword t = tex_aux_check_rule_thickness(target, size, &fam, math_control_under_rule, UnderbarRuleThickness); + if (t != undefined_math_parameter) { + thickness = t; + } + } + result = tex_aux_underbar( + tex_aux_clean_box(noad_nucleus(target), tex_math_style_variant(style, math_parameter_under_line_variant), style, math_nucleus_list, 0, NULL), + vgap, thickness, kern, + get_attribute_list(noad_nucleus(target)), math_under_rule_subtype, size, fam + ); + node_subtype(result) = math_over_list; + kernel_math_list(noad_nucleus(target)) = result; + node_type(noad_nucleus(target)) = sub_box_node; +} + +/*tex + + In \LUAMETATEX\ we also permit |\vcenter| in text mode but there we use another function than + the one below. + + */ + +static void tex_aux_make_vcenter(halfword target, halfword style, halfword size) +{ + halfword box = kernel_math_list(noad_nucleus(target)); + if (node_type(box) != vlist_node) { + box = tex_aux_clean_box(noad_nucleus(target), style, style, math_list_list, 0, NULL); // todo: math_vcenter_list + kernel_math_list(noad_nucleus(target)) = box; + node_type(noad_nucleus(target)) = sub_box_node; + } + { + scaled total = box_total(box); + scaled axis = has_box_axis(box, no_math_axis) ? 0 : tex_aux_math_axis(size); + box_height(box) = axis + tex_half_scaled(total); + box_depth(box) = total - box_height(box); + } +} + +/*tex + + According to the rules in the |DVI| file specifications, we ensure alignment between a square + root sign and the rule above its nucleus by assuming that the baseline of the square-root + symbol is the same as the bottom of the rule. The height of the square-root symbol will be the + thickness of the rule, and the depth of the square-root symbol should exceed or equal the + height-plus-depth of the nucleus plus a certain minimum clearance~|psi|. The symbol will be + placed so that the actual clearance is |psi| plus half the excess. + +*/ + +static void tex_aux_make_hextension(halfword target, int style, int size) +{ + int stack = 0; + scaled radicalwidth = tex_aux_math_given_x_scaled(noad_width(target)); + halfword extensible = radical_left_delimiter(target); + halfword delimiter = tex_aux_make_delimiter(target, extensible, size, radicalwidth, 1, style, 1, &stack, NULL, 0, has_noad_option_nooverflow(target), NULL, 0); + halfword delimiterwidth = box_width(delimiter); + if (! stack && radicalwidth && (radicalwidth != delimiterwidth)) { + if (has_noad_option_middle(target)) { + scaled delta = tex_half_scaled(radicalwidth - delimiterwidth); + if (delta) { + halfword kern = tex_new_kern_node(delta, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(kern, delimiter); + delimiter = kern; + } + delimiterwidth = radicalwidth; + } else if (has_noad_option_exact(target)) { + delimiterwidth = radicalwidth; + } + } + delimiter = tex_hpack(delimiter, 0, packing_additional, direction_unknown, holding_none_option); + box_width(delimiter) = delimiterwidth; + tex_attach_attribute_list_copy(delimiter, target); + kernel_math_list(noad_nucleus(target)) = delimiter; + radical_left_delimiter(target) = null; + radical_right_delimiter(target) = null; +} + +static void tex_aux_preroll_root_radical(halfword target, int style, int size) +{ + (void) size; + noad_new_hlist(target) = tex_aux_clean_box(noad_nucleus(target), tex_math_style_variant(style, math_parameter_radical_variant), style, math_nucleus_list, 0, NULL); +} + +static halfword tex_aux_link_radical(halfword nucleus, halfword delimiter, halfword companion, halfword rightdelimiter) +{ + if (companion) { + tex_couple_nodes(delimiter, nucleus); + tex_couple_nodes(nucleus, companion); + return delimiter; + } else if (rightdelimiter) { + tex_couple_nodes(nucleus, delimiter); + return nucleus; + } else { + tex_couple_nodes(delimiter, nucleus); + return delimiter; + } +} + +static void tex_aux_assign_radical(halfword target, halfword radical) +{ + halfword result = tex_hpack(radical, 0, packing_additional, direction_unknown, holding_none_option); + node_subtype(result) = math_radical_list; + tex_attach_attribute_list_copy(result, target); + kernel_math_list(noad_nucleus(target)) = result; + node_type(noad_nucleus(target)) = sub_box_node; + radical_left_delimiter(target) = null; + radical_right_delimiter(target) = null; +} + +static void tex_aux_set_radical_kerns(delimiterextremes *extremes, kernset *kerns) +{ + if (kerns && extremes->tfont) { + if (tex_math_has_class_option(radical_noad_subtype, carry_over_left_top_kern_class_option)) { + kerns->topleft = tex_char_top_left_kern_from_font(extremes->tfont, extremes->tchar); + } + if (tex_math_has_class_option(radical_noad_subtype, carry_over_left_bottom_kern_class_option)) { + kerns->bottomleft = tex_char_bottom_left_kern_from_font(extremes->bfont, extremes->bchar); + } + if (tex_math_has_class_option(radical_noad_subtype, carry_over_right_top_kern_class_option)) { + kerns->topright = tex_char_top_right_kern_from_font(extremes->tfont, extremes->tchar); + } + if (tex_math_has_class_option(radical_noad_subtype, carry_over_right_bottom_kern_class_option)) { + kerns->bottomright = tex_char_bottom_right_kern_from_font(extremes->bfont, extremes->bchar); + } + if (tex_math_has_class_option(radical_noad_subtype, prefer_delimiter_dimensions_class_option)) { + kerns->height = extremes->height; + kerns->depth = extremes->depth; + } + } +} + +static void tex_aux_make_root_radical(halfword target, int style, int size, kernset *kerns) +{ + halfword nucleus = noad_new_hlist(target); + scaled clearance = tex_get_math_y_parameter_checked(style, math_parameter_radical_vgap); + scaled theta = tex_get_math_y_parameter(style, math_parameter_radical_rule); + scaled kern = tex_get_math_y_parameter_checked(style, math_parameter_radical_kern); + scaled fam = delimiter_small_family(radical_left_delimiter(target)); + halfword leftdelimiter = radical_left_delimiter(target); + halfword rightdelimiter = radical_right_delimiter(target); + halfword delimiter = leftdelimiter ? leftdelimiter : rightdelimiter; + halfword companion = leftdelimiter ? rightdelimiter : null; + halfword radical = null; + delimiterextremes extremes = { .tfont = null_font, .tchar = 0, .bfont = null_font, .bchar = 0, .height = 0, .depth = 0 }; + noad_new_hlist(target) = null; + /*tex + We can take the rule width from the fam/style of the delimiter or use the most recent math + parameters value. + */ + { + halfword t = tex_aux_check_rule_thickness(target, size, &fam, math_control_radical_rule, RadicalRuleThickness); + if (t != undefined_math_parameter) { + theta = t; + } + } + { + halfword weird = theta == undefined_math_parameter; + if (weird) { + /*tex What do we have here. Why not issue an error */ + theta = tex_get_math_y_parameter_checked(style, math_parameter_fraction_rule); /* a bit weird this one */ + } + delimiter = tex_aux_make_delimiter(target, delimiter, size, box_total(nucleus) + clearance + theta, 0, style, 1, NULL, NULL, 0, has_noad_option_nooverflow(target), &extremes, 0); + if (companion) { + /*tex For now we assume symmetry and same height and depth! */ + companion = tex_aux_make_delimiter(target, companion, size, box_total(nucleus) + clearance + theta, 0, style, 1, NULL, NULL, 0, has_noad_option_nooverflow(target), &extremes, 0); + } + if (weird) { + /*tex + If |y| is a composite then set |theta| to the height of its top character, else set it + to the height of |y|. Really? + */ + halfword list = box_list(delimiter); + if (list && (node_type(list) == hlist_node)) { + /*tex possible composite */ + halfword glyph = box_list(list); + if (glyph && node_type(glyph) == glyph_node) { + /*tex top character */ + theta = tex_char_height_from_glyph(glyph); + } else { + theta = box_height(delimiter); + } + } else { + theta = box_height(delimiter); + } + } + } + /* */ + tex_aux_set_radical_kerns(&extremes, kerns); + /* + Radicals in traditional fonts have their shape below the baseline which makes them unuseable + as stand alone characters but here we compensate for that fact. Opentype fonts derived from + traditional \TEX\ fonts can also be like that and it goed unnoticed until one accesses the + shape as character directly. Normally that gets corrected in the font when this has become + clear. + */ + { + halfword delta = (box_total(delimiter) - theta) - (box_total(nucleus) + clearance); + if (delta > 0) { + /*tex increase the actual clearance */ + clearance += tex_half_scaled(delta); + } + box_shift_amount(delimiter) = (box_height(delimiter) - theta) - (box_height(nucleus) + clearance); + if (companion) { + box_shift_amount(companion) = (box_height(companion) - theta) - (box_height(nucleus) + clearance); + } + } + if (node_type(delimiter) == vlist_node && node_subtype(delimiter) == math_v_delimiter_list) { + halfword before = tex_get_math_x_parameter_default(style, math_parameter_radical_extensible_before, 0); + tex_aux_prepend_hkern_to_box_list(nucleus, before, horizontal_math_kern_subtype, "bad delimiter"); + } + if (node_type(companion) == vlist_node && node_subtype(companion) == math_v_delimiter_list) { + halfword after = tex_get_math_x_parameter_default(style, math_parameter_radical_extensible_after, 0); + tex_aux_append_hkern_to_box_list(nucleus, after, horizontal_math_kern_subtype, "bad delimiter"); + } + { + halfword total = box_total(delimiter); + halfword list = tex_aux_overbar(nucleus, clearance, theta, kern, get_attribute_list(delimiter), math_radical_rule_subtype, size, fam); + radical = tex_aux_link_radical(list, delimiter, companion, rightdelimiter); + if (radical_degree(target)) { + halfword degree = tex_aux_clean_box(radical_degree(target), script_script_style, style, math_degree_list, 0, NULL); + scaled width = box_width(degree); + tex_attach_attribute_list_copy(degree, radical_degree(target)); + if (width) { + scaled before = tex_get_math_x_parameter_checked(style, math_parameter_radical_degree_before); + scaled after = tex_get_math_x_parameter_checked(style, math_parameter_radical_degree_after); + /* scaled raise = tex_get_math_y_parameter_checked(style, math_parameter_radical_degree_raise); */ /* no! */ + scaled raise = tex_get_math_parameter_checked(style, math_parameter_radical_degree_raise); + /* old: + if (-after > (width + before)) { + after = -(width + before); + } + new: */ + if (-after > width) { + before += -after - width; + } + if (after) { + halfword kern = tex_new_kern_node(after, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(kern, radical_degree(target)); + tex_couple_nodes(kern, radical); + nucleus = kern; + } else { + nucleus = radical; + } + box_shift_amount(degree) = - (tex_xn_over_d(total, raise, 100) - box_depth(radical) - box_shift_amount(radical)); + tex_couple_nodes(degree, nucleus); + if (before) { + halfword kern = tex_new_kern_node(before, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(kern, radical_degree(target)); + tex_couple_nodes(kern, degree); + radical = kern; + } else { + radical = degree; + } + } else { + tex_flush_node(degree); + } + /*tex for |\Uroot.. {<list>} {}|: */ + kernel_math_list(radical_degree(target)) = null; + tex_flush_node(radical_degree(target)); + radical_degree(target) = null; + } + } + tex_aux_assign_radical(target, radical); +} + +/*tex + This is pretty much the same as the above when the |norule| option is set. But by splitting this + variant off we can enhance it more cleanly. +*/ + +static void tex_aux_make_delimited_radical(halfword target, int style, int size, kernset *kerns) +{ + halfword nucleus = noad_new_hlist(target); + /* scaled clearance = tex_get_math_y_parameter_checked(style, math_parameter_radical_vgap); */ + halfword leftdelimiter = radical_left_delimiter(target); + halfword rightdelimiter = radical_right_delimiter(target); + halfword delimiter = leftdelimiter ? leftdelimiter : rightdelimiter; + halfword companion = leftdelimiter ? rightdelimiter : null; + halfword radical = null; + halfword depth = has_noad_option_exact(target) ? radical_depth(target) : (box_depth(nucleus) + radical_depth(target)); + halfword height = has_noad_option_exact(target) ? radical_height(target) : (box_height(nucleus) + radical_height(target)); + halfword total = height + depth; + delimiterextremes extremes = { .tfont = null_font, .tchar = 0, .bfont = null_font, .bchar = 0, .height = 0, .depth = 0 }; + noad_new_hlist(target) = null; + delimiter = tex_aux_make_delimiter(target, delimiter, size, total, 0, style, 2, NULL, NULL, 0, has_noad_option_nooverflow(target), &extremes, depth); + if (companion) { + /*tex For now we assume symmetry and same height and depth! */ + companion = tex_aux_make_delimiter(target, companion, size, total, 0, style, 2, NULL, NULL, 0, has_noad_option_nooverflow(target), &extremes, depth); + } + tex_aux_set_radical_kerns(&extremes, kerns); + radical = tex_aux_link_radical(nucleus, delimiter, companion, rightdelimiter); + tex_aux_assign_radical(target, radical); +} + +/*tex Construct a vlist box: */ + +static halfword tex_aux_wrapup_over_under_delimiter(halfword target, halfword x, halfword y, scaled shift_up, scaled shift_down, quarterword st) +{ + halfword box = tex_new_null_box_node(vlist_node, st); + scaled delta = (shift_up - box_depth(x)) - (box_height(y) - shift_down); + box_height(box) = shift_up + box_height(x); + box_depth(box) = box_depth(y) + shift_down; + tex_attach_attribute_list_copy(box, target); + if (delta) { + halfword kern = tex_new_kern_node(delta, vertical_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(x, kern); + tex_couple_nodes(kern, y); + } else { + tex_couple_nodes(x, y); + } + box_list(box) = x; + return box; +} + +/*tex When |exact| use radicalwidth (|y| is delimiter). */ + +inline static halfword tex_aux_check_radical(halfword target, int stack, halfword r, halfword t) +{ + if (! stack && (box_width(r) >= box_width(t))) { + scaled width = tex_aux_math_given_x_scaled(noad_width(target)); + if (width) { + scaled delta = width - box_width(r); + if (delta) { + if (has_noad_option_left(target)) { + halfword kern = tex_new_kern_node(delta, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(kern, r); + } else if (has_noad_option_middle(target)) { + halfword kern = tex_new_kern_node(tex_half_scaled(delta), horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(kern, r); + } else if (has_noad_option_right(target)) { + /*tex also kind of exact compared to vertical */ + } else { + return r; + } + r = tex_hpack(r, 0, packing_additional, direction_unknown, holding_none_option); + box_width(r) = noad_width(target); + tex_attach_attribute_list_copy(r, target); + } + } + } + return r; +} + +inline static void tex_aux_fixup_radical_width(halfword target, halfword x, halfword y) +{ + if (box_width(y) >= box_width(x)) { + if (noad_width(target)) { + box_shift_amount(x) += tex_half_scaled(box_width(y) - box_width(x)) ; + } + box_width(x) = box_width(y); + } else { + if (noad_width(target)) { + box_shift_amount(y) += tex_half_scaled(box_width(x) - box_width(y)) ; + } + box_width(y) = box_width(x); + } +} + +inline static halfword tex_aux_get_radical_width(halfword target, halfword p) +{ + return noad_width(target) ? noad_width(target) : box_width(p); +} + +/*tex + + This has the |nucleus| box |x| as a limit above an extensible delimiter |y|. + +*/ + +static void tex_aux_make_over_delimiter(halfword target, int style, int size) +{ + halfword result; + scaled delta; + int stack; + scaled shift = tex_get_math_y_parameter_checked(style, math_parameter_over_delimiter_bgap); + scaled clearance = tex_get_math_y_parameter_checked(style, math_parameter_over_delimiter_vgap); + halfword content = tex_aux_clean_box(noad_nucleus(target), tex_math_style_variant(style, math_parameter_over_delimiter_variant), style, math_nucleus_list, 0, NULL); + scaled width = tex_aux_get_radical_width(target, content); + halfword over_delimiter = fraction_left_delimiter(target); + halfword delimiter = tex_aux_make_delimiter(target, over_delimiter, size, width, 1, style, 1, &stack, NULL, 0, has_noad_option_nooverflow(target), NULL, 0); + fraction_left_delimiter(target) = null; + delimiter = tex_aux_check_radical(target, stack, delimiter, content); + tex_aux_fixup_radical_width(target, content, delimiter); + delta = clearance - (shift - box_depth(content) - box_height(delimiter)); + if (delta > 0) { + shift += delta; + } + result = tex_aux_wrapup_over_under_delimiter(target, content, delimiter, shift, 0, math_over_delimiter_list); + box_width(result) = box_width(content); + kernel_math_list(noad_nucleus(target)) = result; + node_type(noad_nucleus(target)) = sub_box_node; +} + +/*tex + + This has the extensible delimiter |x| as a limit below |nucleus| box |y|. + +*/ + +static void tex_aux_make_under_delimiter(halfword target, int style, int size) +{ + halfword result; + scaled delta; + int stack; + scaled shift = tex_get_math_y_parameter_checked(style, math_parameter_under_delimiter_bgap); + scaled clearance = tex_get_math_y_parameter_checked(style, math_parameter_under_delimiter_vgap); + halfword content = tex_aux_clean_box(noad_nucleus(target), tex_math_style_variant(style, math_parameter_under_delimiter_variant), style, math_nucleus_list, 0, NULL); + scaled width = tex_aux_get_radical_width(target, content); + halfword under_delimiter = fraction_left_delimiter(target); + halfword delimiter = tex_aux_make_delimiter(target, under_delimiter, size, width, 1, style, 1, &stack, NULL, 0, has_noad_option_nooverflow(target), NULL, 0); + fraction_left_delimiter(target) = null; + delimiter = tex_aux_check_radical(target, stack, delimiter, content); + tex_aux_fixup_radical_width(target, delimiter, content); + delta = clearance - (- box_depth(delimiter) - (box_height(content) - shift)); + if (delta > 0) { + shift += delta; + } + result = tex_aux_wrapup_over_under_delimiter(target, delimiter, content, 0, shift, math_under_delimiter_list); + box_width(result) = box_width(content); + kernel_math_list(noad_nucleus(target)) = result; + node_type(noad_nucleus(target)) = sub_box_node; +} + +/*tex + + This has the extensible delimiter |x| as a limit above |nucleus| box |y|. + +*/ + +static void tex_aux_make_delimiter_over(halfword target, int style, int size) +{ + halfword result; + scaled actual; + int stack; + scaled shift = tex_get_math_y_parameter_checked(style, math_parameter_over_delimiter_bgap); + scaled clearance = tex_get_math_y_parameter_checked(style, math_parameter_over_delimiter_vgap); + halfword content = tex_aux_clean_box(noad_nucleus(target), tex_math_style_variant(style, math_parameter_delimiter_over_variant), style, math_nucleus_list, 0, NULL); + scaled width = tex_aux_get_radical_width(target, content); + halfword over_delimiter = fraction_left_delimiter(target); + halfword delimiter = tex_aux_make_delimiter(target, over_delimiter, size + (size == script_script_size ? 0 : 1), width, 1, style, 1, &stack, NULL, 0, has_noad_option_nooverflow(over_delimiter), NULL, 0); + fraction_left_delimiter(target) = null; + delimiter = tex_aux_check_radical(target, stack, delimiter, content); + tex_aux_fixup_radical_width(target, delimiter, content); + shift -= box_total(delimiter); + actual = shift - box_height(content); + if (actual < clearance) { + shift += (clearance - actual); + } + result = tex_aux_wrapup_over_under_delimiter(target, delimiter, content, shift, 0, math_over_delimiter_list); + box_width(result) = box_width(delimiter); + kernel_math_list(noad_nucleus(target)) = result; + node_type(noad_nucleus(target)) = sub_box_node; +} + +/*tex + + This has the extensible delimiter |y| as a limit below a |nucleus| box |x|. + +*/ + +static void tex_aux_make_delimiter_under(halfword target, int style, int size) +{ + halfword result; + scaled actual; + int stack; + scaled shift = tex_get_math_y_parameter_checked(style, math_parameter_under_delimiter_bgap); + scaled clearance = tex_get_math_y_parameter_checked(style, math_parameter_under_delimiter_vgap); + halfword content = tex_aux_clean_box(noad_nucleus(target), tex_math_style_variant(style, math_parameter_delimiter_under_variant), style, math_nucleus_list, 0, NULL); + scaled width = tex_aux_get_radical_width(target, content); + halfword under_delimiter = fraction_left_delimiter(target); + halfword delimiter = tex_aux_make_delimiter(target, under_delimiter, size + (size == script_script_size ? 0 : 1), width, 1, style, 1, &stack, NULL, 0, has_noad_option_nooverflow(under_delimiter), NULL, 0); + fraction_left_delimiter(target) = null; + delimiter = tex_aux_check_radical(target, stack, delimiter, content); + tex_aux_fixup_radical_width(target, content, delimiter); + shift -= box_total(delimiter); + actual = shift - box_depth(content); + if (actual < clearance) { + shift += (clearance - actual); + } + result = tex_aux_wrapup_over_under_delimiter(target, content, delimiter, 0, shift, math_under_delimiter_list); + /*tex This also equals |width(y)|: */ + box_width(result) = box_width(delimiter); + kernel_math_list(noad_nucleus(target)) = result; + node_type(noad_nucleus(target)) = sub_box_node; +} + +static void tex_aux_make_radical(halfword target, int style, int size, kernset *kerns) +{ + switch (node_subtype(target)) { + case under_delimiter_radical_subtype: + tex_aux_make_under_delimiter(target, style, size); + break; + case over_delimiter_radical_subtype: + tex_aux_make_over_delimiter(target, style, size); + break; + case delimiter_under_radical_subtype: + tex_aux_make_delimiter_under(target, style, size); + break; + case delimiter_over_radical_subtype: + tex_aux_make_delimiter_over(target, style, size); + break; + case delimited_radical_subtype: + tex_aux_make_delimited_radical(target, style, size, kerns); + break; + case h_extensible_radical_subtype: + tex_aux_make_hextension(target, style, size); + break; + default: + tex_aux_make_root_radical(target, style, size, kerns); + break; + } + if (noad_source(target)) { + halfword result = kernel_math_list(noad_nucleus(target)); + if (result) { + box_source_anchor(result) = noad_source(target); + tex_set_box_geometry(result, anchor_geometry); + } + } +} + +static void tex_aux_preroll_radical(halfword target, int style, int size) +{ + switch (node_subtype(target)) { + case under_delimiter_radical_subtype: + case over_delimiter_radical_subtype: + case delimiter_under_radical_subtype: + case delimiter_over_radical_subtype: + case h_extensible_radical_subtype: + break; + default: + tex_aux_preroll_root_radical(target, style, size); + break; + } +} + +/*tex + + Slants are not considered when placing accents in math mode. The accenter is centered over the + accentee, and the accent width is treated as zero with respect to the size of the final box. + +*/ + +typedef enum math_accent_location_codes { + top_accent_code = 1, + bot_accent_code = 2, + overlay_accent_code = 4, + stretch_accent_code = 8, +} math_accent_location_codes; + +static int tex_aux_compute_accent_skew(halfword target, int flags, scaled *s, halfword size) +{ + /*tex will be true if a top-accent is placed in |s| */ + int absolute = 0; + switch (node_type(noad_nucleus(target))) { + case math_char_node: + { + halfword chr = null; + halfword fnt = null; + tex_aux_fetch(noad_nucleus(target), "accent", &fnt, &chr); + if (tex_aux_math_engine_control(fnt, math_control_accent_skew_apply)) { + /*tex + There is no bot_accent so let's assume that the shift also applies + to bottom and overlay accents. + */ + *s = tex_char_top_accent_from_font(fnt, chr); + if (*s != INT_MIN) { + *s = tex_aux_math_x_size_scaled(fnt, *s, size); + absolute = 1; + } else { + *s = 0; + } + } else if (flags & top_accent_code) { + *s = tex_aux_math_x_size_scaled(fnt, tex_get_kern(fnt, chr, font_skew_char(fnt)), size); + } else { + *s = 0; + } + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: accent skew, font %i, chr %x, skew %D, absolute %i]", fnt, chr, *s, pt_unit, absolute); + tex_end_diagnostic(); + } + break; + } + case sub_mlist_node: + { + /*tex + If |nucleus(q)| is a |sub_mlist_node| composed of an |accent_noad| we: + + \startitemize + \startitem + use the positioning of the nucleus of that noad, recursing until + \stopitem + \startitem + the inner most |accent_noad|. This way multiple stacked accents are + \stopitem + \startitem + aligned to the inner most one. + \stopitem + \stoptitemize + + The vlink test was added in version 1.06, so that we only consider a lone noad: + + $ + \Umathaccent bottom 0 0 "023DF { \Umathaccent fixed 0 0 "00302 { m } r } \quad + \Umathaccent bottom 0 0 "023DF { l \Umathaccent fixed 0 0 "00302 { m } r } \quad + \Umathaccent bottom 0 0 "023DF { l \Umathaccent fixed 0 0 "00302 { m } } \quad + \Umathaccent bottom 0 0 "023DF { \Umathaccent fixed 0 0 "00302 { m } } \quad + \Umathaccent bottom 0 0 "023DF { l r } + $ + + */ + halfword p = kernel_math_list(noad_nucleus(target)); + if (p && ! node_next(p)) { + switch (node_type(p)) { + case accent_noad: + absolute = tex_aux_compute_accent_skew(p, flags, s, size); + break; + case simple_noad: + if (! noad_has_following_scripts(p)) { + absolute = tex_aux_compute_accent_skew(p, flags, s, size); + } + break; + } + } + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: accent skew, absolute %i]", absolute); + tex_end_diagnostic(); + } + break; + } + } + return absolute; +} +static void tex_aux_do_make_math_accent(halfword target, halfword accentfnt, halfword accentchr, int flags, int style, int size, scaled *accenttotal) +{ + /*tex The width and height (without scripts) of base character: */ + scaled baseheight = 0; + // scaled basedepth = 0; + scaled basewidth = 0; + /*tex The space to remove between accent and base: */ + scaled delta = 0; + scaled overshoot = 0; + extinfo *extended = NULL; + halfword attrlist = node_attr(target); + scaled fraction = accent_fraction(target) > 0 ? accent_fraction(target) : 1000; + scaled skew = 0; + halfword accent = null; + halfword base = null; + halfword result = null; + halfword nucleus = noad_nucleus(target); + halfword stretch = (flags & stretch_accent_code) == stretch_accent_code; + /*tex + Compute the amount of skew, or set |skew| to an alignment point. This will be true if a + top-accent has been determined. + */ + int absolute = tex_aux_compute_accent_skew(target, flags, &skew, size); + { + halfword usedstyle; + if (flags & top_accent_code) { + usedstyle = tex_math_style_variant(style, math_parameter_top_accent_variant); + } else if (flags & bot_accent_code) { + usedstyle = tex_math_style_variant(style, math_parameter_bottom_accent_variant); + } else { + usedstyle = tex_math_style_variant(style, math_parameter_overlay_accent_variant); + } + /*tex Beware: this adds italic correction because it feeds into mlist_to_hlist */ + base = tex_aux_clean_box(noad_nucleus(target), usedstyle, style, math_nucleus_list, 1, NULL); /* keep italic */ + basewidth = box_width(base); + baseheight = box_height(base); + // basedepth = box_depth(base); + } + if (! absolute && tex_aux_math_engine_control(accentfnt, math_control_accent_skew_half)) { + skew = tex_half_scaled(basewidth); + absolute = 1; + } + /*tex + Todo: |w = w - loffset - roffset| but then we also need to add a few + kerns so no hurry with that one. + */ + if (stretch && (tex_char_width_from_font(accentfnt, accentchr) < basewidth)) { + /*tex Switch to a larger accent if available and appropriate */ + scaled target = 0; + if (flags & overlay_accent_code) { + target = baseheight; + } else { + target += basewidth; + if (base) { + /*tex Use larger margins, */ + halfword list = box_list(base); + if (list && node_type(list) == glyph_node) { + halfword basefnt = glyph_font(list); + halfword basechr = glyph_character(list); + if (basefnt && basechr) { + target += tex_aux_math_x_size_scaled(basefnt, tex_char_right_margin_from_font(basefnt, basechr), size); + target += tex_aux_math_x_size_scaled(basefnt, tex_char_left_margin_from_font(basefnt, basechr), size); + } + } + } + } + if (fraction > 0) { + target = tex_xn_over_d(target, fraction, 1000); + } + while (1) { + if (tex_char_has_tag_from_font(accentfnt, accentchr, extension_tag)) { + extended = tex_char_horizontal_parts_from_font(accentfnt, accentchr); + } + if (extended) { + /*tex + This is a bit weird for an overlay but anyway, here we don't need a factor as + we don't step. + */ + halfword overlap = tex_get_math_x_parameter_checked(style, math_parameter_connector_overlap_min); + accent = tex_aux_get_delimiter_box(accentfnt, accentchr, basewidth, overlap, 1, attrlist); + accent = register_extensible(accentfnt, accentchr, size, accent, attrlist); + break; + } else if (! tex_char_has_tag_from_font(accentfnt, accentchr, list_tag)) { + break; + } else { + halfword remainder = tex_char_remainder_from_font(accentfnt, accentchr); + if (! tex_char_exists(accentfnt, remainder)) { + break; + } else if (flags & overlay_accent_code) { + if (tex_aux_math_y_size_scaled(accentfnt, tex_char_height_from_font(accentfnt, remainder), size) > target) { + break; + } + } else { + if (tex_aux_math_x_size_scaled(accentfnt, tex_char_width_from_font(accentfnt, remainder), size) > target) { + break; + } + } + accentchr = remainder; + } + } + /*tex + So here we then need to package the offsets. + */ + } + if (! accent) { + /*tex Italic gets added to width for traditional fonts (no italic anyway): */ + accent = tex_aux_char_box(accentfnt, accentchr, attrlist, NULL, glyph_math_accent_subtype, basewidth, style); + } + if (accenttotal) { + *accenttotal = box_total(accent); + } + if (flags & top_accent_code) { + scaled b = tex_get_math_y_parameter(style, math_parameter_accent_base_height); + scaled f = tex_get_math_y_parameter(style, math_parameter_flattened_accent_base_height); + scaled u = tex_get_math_y_parameter(style, stretch ? math_parameter_flattened_accent_top_shift_up : math_parameter_accent_top_shift_up); + if (f != undefined_math_parameter && baseheight > f) { + halfword flatchr = tex_char_flat_accent_from_font(accentfnt, accentchr); + if (flatchr != INT_MIN && flatchr != accentchr) { + tex_flush_node(accent); + accent = tex_aux_char_box(accentfnt, flatchr, attrlist, NULL, glyph_math_accent_subtype, basewidth, style); + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: flattening accent, old %x, new %x]", accentchr, flatchr); + tex_end_diagnostic(); + } + accentchr = flatchr; + } + } + if (b != undefined_math_parameter) { + /* not okay */ + delta = baseheight < b ? baseheight : b; + } + if (u != undefined_math_parameter) { + delta -= u; + } + } else if (flags & bot_accent_code) { + // scaled b = tex_get_math_y_parameter(style, math_parameter_accent_base_depth, 0); + // scaled f = tex_get_math_y_parameter(style, math_parameter_flattened_accent_base_depth, 0); + scaled l = tex_get_math_y_parameter(style, stretch ? math_parameter_flattened_accent_bottom_shift_down : math_parameter_accent_bottom_shift_down); + // if (b != undefined_math_parameter) { + // /* not okay */ + // delta = basedepth < b ? basedepth : b; + // } + if (l != undefined_math_parameter) { + delta += l; + } + } else { /* if (flags & overlay_accent_code) { */ + /*tex Center the accent vertically around base: */ + delta = tex_half_scaled(box_total(accent) + box_total(base)); + } + if (node_type(nucleus) != math_char_node) { + /*tex We have a molecule, not a simple atom. */ + } else if (noad_has_following_scripts(target)) { + /*tex Swap the scripts: */ + tex_flush_node_list(base); + base = tex_new_node(simple_noad, ordinary_noad_subtype); + tex_attach_attribute_list_copy(base, nucleus); + noad_nucleus(base) = tex_aux_math_clone(nucleus); + /* we no longer move the outer scripts to the inner noad */ + node_type(nucleus) = sub_mlist_node; + kernel_math_list(nucleus) = base; + base = tex_aux_clean_box(nucleus, style, style, math_nucleus_list, 1, NULL); /* keep italic */ + delta = delta + box_height(base) - baseheight; + baseheight = box_height(base); + } else { + /*tex We have only pure math char nodes here:*/ + // halfword basefnt = tex_fam_fnt(math_family(nucleus), size); + // if (tex_aux_has_opentype_metrics(basefnt)) { + // halfword basechr = math_character(nucleus); + // if (math_kernel_node_has_option(nucleus, math_kernel_no_italic_correction)) { + // italic = 0; + // } else if (tex_aux_math_engine_control(basefnt, math_control_accent_italic_kern)) { + // italic = tex_aux_math_x_style_scaled(basefnt, tex_char_italic_from_font(basefnt, basechr), size); + // } + // } + } + /*tex The top accents of both characters are aligned. */ + { + halfword accentwidth = box_width(accent); + if (absolute) { + scaled anchor = 0; + if (extended) { + /*tex If the accent is extensible just take the center. */ + anchor = tex_half_scaled(accentwidth); + } else { + anchor = tex_char_top_accent_from_font(accentfnt, accentchr); /* no bot accent key */ + if (anchor != INT_MIN) { + anchor = tex_aux_math_y_size_scaled(accentfnt, anchor, size); /* why y and not x */ + } else { + /*tex just take the center */ + anchor = tex_half_scaled(accentwidth); + } + } + if (math_direction_par == dir_righttoleft) { + skew += anchor - accentwidth; + } else { + skew -= anchor; + } + } else if (accentwidth == 0) { + skew += basewidth; + } else if (math_direction_par == dir_righttoleft) { + skew += accentwidth; /* ok? */ + } else { + skew += tex_half_scaled(basewidth - accentwidth); + } + box_shift_amount(accent) = skew; + box_width(accent) = 0; /* in gyre zero anyway */ + if (accentwidth) { + overshoot = accentwidth + skew - basewidth; + } + if (overshoot < 0) { + overshoot = 0; + } + } + if (flags & (top_accent_code)) { + accent_top_overshoot(target) = overshoot; + } + if (flags & (bot_accent_code)) { + accent_bot_overshoot(target) = overshoot; + } + if (flags & (top_accent_code | overlay_accent_code)) { + if (delta) { + halfword kern = tex_new_kern_node(-delta, vertical_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(accent, kern); + tex_couple_nodes(kern, base); + } else { + tex_couple_nodes(accent, base); + } + result = accent; + } else { + tex_couple_nodes(base, accent); + result = base; + } + result = tex_vpack(result, 0, packing_additional, max_dimen, (singleword) math_direction_par, holding_none_option); + tex_attach_attribute_list_copy(result, target); + node_subtype(result) = math_accent_list; + box_width(result) = box_width(base); // basewidth + delta = baseheight - box_height(result); + if (flags & (top_accent_code | overlay_accent_code)) { + if (delta > 0) { + /*tex make the height of box |y| equal to |h| */ + halfword kern = tex_new_kern_node(delta, vertical_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_try_couple_nodes(kern, box_list(result)); + box_list(result) = kern; + box_height(result) = baseheight; + } + } else { + box_shift_amount(result) = - delta; + } + box_width(result) += overshoot; + // if (italic) { + // /*tex + // The old font codepath has ic built in, but new font code doesn't so we add + // the correction here. + // */ + // tex_aux_math_insert_italic_kern(result, italic, nucleus, "accent"); + // box_width(result) += italic ; + // } + kernel_math_list(nucleus) = result; + node_type(nucleus) = sub_box_node; +} + +static void tex_aux_make_accent(halfword target, int style, int size, kernset *kerns) +{ + int topstretch = 0; /* ! (node_subtype(q) % 2); */ + int botstretch = 0; /* ! (node_subtype(q) / 2); */ + halfword fnt = null; + halfword chr = null; + /*tex + We don't do some div and mod magic on the subtype here: we just check it: + */ + switch (node_subtype(target)) { + case bothflexible_accent_subtype: topstretch = 1; botstretch = 1; break; + case fixedtop_accent_subtype : botstretch = 1; break; + case fixedbottom_accent_subtype : topstretch = 1; break; + case fixedboth_accent_subtype : break; + } + /*tex + There is some inefficiency here as we calculate the width of the nuclues upto three times. + Maybe I need to have a look at that some day. + */ + if (accent_top_character(target)) { + if (tex_aux_fetch(accent_top_character(target), "top accent", &fnt, &chr)) { + tex_aux_do_make_math_accent(target, fnt, chr, top_accent_code | (topstretch ? stretch_accent_code : 0), style, size, &(kerns->toptotal)); + } + tex_flush_node(accent_top_character(target)); + accent_top_character(target) = null; + } + if (accent_bottom_character(target)) { + if (tex_aux_fetch(accent_bottom_character(target), "bottom accent", &fnt, &chr)) { + tex_aux_do_make_math_accent(target, fnt, chr, bot_accent_code | (botstretch ? stretch_accent_code : 0), style, size, &(kerns->bottomtotal)); + } + tex_flush_node(accent_bottom_character(target)); + accent_bottom_character(target) = null; + } + if (accent_middle_character(target)) { + if (tex_aux_fetch(accent_middle_character(target), "overlay accent", &fnt, &chr)) { + tex_aux_do_make_math_accent(target, fnt, chr, overlay_accent_code | stretch_accent_code, style, size, NULL); + } + tex_flush_node(accent_middle_character(target)); + accent_middle_character(target) = null; + } + if (noad_source(target)) { + halfword result = kernel_math_list(noad_nucleus(target)); + if (result) { + box_source_anchor(result) = noad_source(target); + tex_set_box_geometry(result, anchor_geometry); + } + } +} + +/*tex + + The |make_fraction| procedure is a bit different because it sets |new_hlist (q)| directly rather + than making a sub-box. + + Kerns are probably never zero so no need to be lean here. Actually they are likely to + be the same. By the time we make the rule we already dealt with all these clearance + issues, so we're sort of ahead of what happens in a callback wrt thickness. + + This rather large function has been split up in pieces which is a bit more readable but also gives + a much bigger binary (probably due to inlining the helpers). + +*/ + +/*tex + Create equal-width boxes |x| and |z| for the numerator and denominator. After this one is + called we compute the default amounts |shift_up| and |shift_down| by which they are displaced + from the baseline. +*/ + +static void tex_aux_wrap_fraction_parts(halfword target, int style, int size, halfword *numerator, halfword *denominator, int check) +{ + if (noad_style(target) == unused_math_style) { + *numerator = tex_aux_clean_box(fraction_numerator(target), tex_math_style_variant(style, math_parameter_numerator_variant), style, math_numerator_list, 0, NULL); + *denominator = tex_aux_clean_box(fraction_denominator(target), tex_math_style_variant(style, math_parameter_denominator_variant), style, math_denominator_list, 0, NULL); + } else { + *numerator = tex_aux_clean_box(fraction_numerator(target), noad_style(target), style, math_numerator_list, 0, NULL); + *denominator = tex_aux_clean_box(fraction_denominator(target), noad_style(target), style, math_denominator_list, 0, NULL); + } + if (check) { + if (box_width(*numerator) < box_width(*denominator)) { + *numerator = tex_aux_rebox(*numerator, box_width(*denominator), size); + } else { + *denominator = tex_aux_rebox(*denominator, box_width(*numerator), size); + } + } +} + +/*tex + Put the fraction into a box with its delimiters, and make |new_hlist(q)| point to it. +*/ + +static void tex_aux_wrap_fraction_result(halfword target, int style, int size, halfword fraction, kernset *kerns) +{ + halfword result = null; + halfword left_delimiter = fraction_left_delimiter(target); + halfword right_delimiter = fraction_right_delimiter(target); + if (left_delimiter || right_delimiter) { + halfword left = null; + halfword right = null; + halfword delta = tex_get_math_y_parameter(style, math_parameter_fraction_del_size); + if (delta == undefined_math_parameter) { + delta = tex_aux_get_delimiter_height(box_height(fraction), box_depth(fraction), 1, size, style); + } + /*tex Watch out: there can be empty delimiter boxes but with width. */ + delimiterextremes extremes = { .tfont = null_font, .tchar = 0, .bfont = null_font, .bchar = 0, .height = 0, .depth = 0 }; + left = tex_aux_make_delimiter(target, left_delimiter, size, delta, 0, style, 1, NULL, NULL, 0, has_noad_option_nooverflow(target), NULL, 0); + right = tex_aux_make_delimiter(target, right_delimiter, size, delta, 0, style, 1, NULL, NULL, 0, has_noad_option_nooverflow(target), &extremes, 0); + if (kerns && extremes.tfont) { + if (tex_math_has_class_option(fraction_noad_subtype, carry_over_left_top_kern_class_option)) { + kerns->topleft = tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar); + } + if (tex_math_has_class_option(fraction_noad_subtype, carry_over_left_bottom_kern_class_option)) { + kerns->bottomleft = tex_char_bottom_left_kern_from_font(extremes.bfont, extremes.bchar); + } + if (tex_math_has_class_option(fraction_noad_subtype, carry_over_right_top_kern_class_option)) { + kerns->topright = tex_char_top_right_kern_from_font(extremes.tfont, extremes.tchar); + } + if (tex_math_has_class_option(fraction_noad_subtype, carry_over_right_bottom_kern_class_option)) { + kerns->bottomright = tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar); + } + if (tex_math_has_class_option(fraction_noad_subtype, prefer_delimiter_dimensions_class_option)) { + kerns->height = extremes.height; + kerns->depth = extremes.depth; + } + } + /* tex_aux_normalize_delimiters(left, right); */ + tex_couple_nodes(left, fraction); + tex_couple_nodes(fraction, right); + fraction = left; + } + result = tex_hpack(fraction, 0, packing_additional, direction_unknown, holding_none_option); + /*tex There can also be a nested one: */ + node_subtype(result) = math_fraction_list; + tex_aux_assign_new_hlist(target, result); + if (noad_source(target)) { + box_source_anchor(result) = noad_source(target); + // box_anchor(result) = left_origin_anchor; + tex_set_box_geometry(result, anchor_geometry); + } +} + +/*tex + The numerator and denominator must be separated by a certain minimum clearance, called |clr| in + the following program. The difference between |clr| and the actual clearance is |2 * delta|. +*/ + +static void tex_aux_calculate_fraction_shifts_stack(halfword target, int style, int size, halfword numerator, halfword denominator, scaled *shift_up, scaled *shift_down, scaled *delta) +{ + scaled clearance = tex_get_math_y_parameter_checked(style, math_parameter_stack_vgap); + (void) size; + *shift_up = tex_get_math_y_parameter_checked(style, math_parameter_stack_num_up); + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_stack_denom_down); + *shift_up = tex_round_xn_over_d(*shift_up, fraction_v_factor(target), 1000); + *shift_down = tex_round_xn_over_d(*shift_down, fraction_v_factor(target), 1000); + *delta = tex_half_scaled(clearance - ((*shift_up - box_depth(numerator)) - (box_height(denominator) - *shift_down))); + if (*delta > 0) { + *shift_up += *delta; + *shift_down += *delta; + } +} + +/*tex + In the case of a fraction line, the minimum clearance depends on the actual thickness of the + line. +*/ + +static void tex_aux_calculate_fraction_shifts_normal(halfword target, int style, int size, halfword numerator, halfword denominator, scaled *shift_up, scaled *shift_down, scaled *delta) +{ + scaled axis = tex_aux_math_axis(size); + scaled numerator_clearance = tex_get_math_y_parameter_checked(style, math_parameter_fraction_num_vgap); + scaled denominator_clearance = tex_get_math_y_parameter_checked(style, math_parameter_fraction_denom_vgap); + scaled delta_up = 0; + scaled delta_down = 0; + *shift_up = tex_get_math_y_parameter_checked(style, math_parameter_fraction_num_up); + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_fraction_denom_down); + *shift_up = tex_round_xn_over_d(*shift_up, fraction_v_factor(target), 1000); + *shift_down = tex_round_xn_over_d(*shift_down, fraction_v_factor(target), 1000); + /* hm, delta is only set when we have a middle delimiter ... needs checking .. i should write this from scratch */ + *delta = tex_half_scaled(tex_aux_math_given_y_scaled(fraction_rule_thickness(target))); + if (has_noad_option_exact(target)) { + delta_up = numerator_clearance - ((*shift_up - box_depth(numerator) ) - (axis + *delta)); + delta_down = denominator_clearance - ((*shift_down - box_height(denominator)) + (axis - *delta)); + } else { + // maybe this is just the old tex code path + scaled rule_thickness = tex_aux_math_given_y_scaled(fraction_rule_thickness(target)); + scaled rule_parameter = tex_get_math_y_parameter_checked(style, math_parameter_fraction_rule); + numerator_clearance = tex_ext_xn_over_d(numerator_clearance, rule_thickness, rule_parameter); + denominator_clearance = tex_ext_xn_over_d(denominator_clearance, rule_thickness, rule_parameter); + delta_up = numerator_clearance - ((*shift_up - box_depth(numerator) ) - (axis + *delta)); + delta_down = denominator_clearance - ((*shift_down - box_height(denominator)) + (axis - *delta)); + } + *shift_up += delta_up; + *shift_down += delta_down; +} + +static scaled tex_aux_check_fraction_rule(halfword target, int style, int size, int fractiontype, halfword *usedfam) +{ + scaled preferfont = has_noad_option_preferfontthickness(target); + halfword fam = math_rules_fam_par; + (void) style; + /*tex + We can take the rule width from an explicitly set fam, even if a fraction itself has no + character, otherwise we just use the math parameter. + */ + if (preferfont) { + /*tex Forced by option or command. */ + } else if (fractiontype == above_fraction_subtype) { + /*tex Bypassed by command. */ + preferfont = 0; + } else if (fraction_rule_thickness(target)) { + /*tex Controlled by optional parameter. */ + preferfont = 1; + } + if (preferfont) { + halfword t = tex_aux_check_rule_thickness(target, size, &fam, math_control_fraction_rule, FractionRuleThickness); + if (t != undefined_math_parameter) { + fraction_rule_thickness(target) = t; + } + } + if (fraction_rule_thickness(target) == preset_rule_thickness) { + fraction_rule_thickness(target) = tex_get_math_y_parameter_checked(style, math_parameter_fraction_rule); + } + if (usedfam) { + *usedfam = fam; + } + return tex_aux_math_given_y_scaled(fraction_rule_thickness(target)); +} + +static void tex_aux_compensate_fraction_rule(halfword target, halfword fraction, halfword separator, scaled thickness) +{ + (void) target; + if (box_total(separator) != thickness) { + scaled half = tex_half_scaled(box_total(separator) - thickness); + box_height(fraction) += half; + box_depth(fraction) += half; + } +} + +static void tex_aux_apply_fraction_shifts(halfword fraction, halfword numerator, halfword denominator, scaled shift_up, scaled shift_down) +{ + box_height(fraction) = shift_up + box_height(numerator); + box_depth(fraction) = box_depth(denominator) + shift_down; + box_width(fraction) = box_width(numerator); +} + +/*tex + We construct a vlist box for the fraction, according to |shift_up| and |shift_down|. Maybe in + the meantime it is nicer to just calculate the fraction instead of messing with the height and + depth explicitly (the old approach). +*/ + +static halfword tex_aux_assemble_fraction(halfword target, int style, int size, halfword numerator, halfword denominator, halfword separator, scaled delta, scaled shift_up, scaled shift_down) +{ + (void) target; + (void) style; + if (separator) { + scaled axis = tex_aux_math_axis(size); + halfword after = tex_new_kern_node((axis - delta) - (box_height(denominator) - shift_down), vertical_math_kern_subtype); + halfword before = tex_new_kern_node((shift_up - box_depth(numerator)) - (axis + delta), vertical_math_kern_subtype); + tex_attach_attribute_list_copy(after, target); + tex_attach_attribute_list_copy(before, target); + tex_couple_nodes(separator, after); + tex_couple_nodes(after, denominator); + tex_couple_nodes(before, separator); + tex_couple_nodes(numerator, before); + } else { + halfword between = tex_new_kern_node((shift_up - box_depth(numerator)) - (box_height(denominator) - shift_down), vertical_math_kern_subtype); + tex_attach_attribute_list_copy(between, target); + tex_couple_nodes(between, denominator); + tex_couple_nodes(numerator, between); + } + return numerator; +} + +static halfword tex_aux_make_skewed_fraction(halfword target, int style, int size, kernset *kerns) +{ + halfword middle = null; + halfword fraction = null; + halfword numerator = null; + halfword denominator = null; + scaled delta = 0; + halfword middle_delimiter = fraction_middle_delimiter(target); + scaled maxheight = 0; + scaled maxdepth = 0; + scaled ngap = 0; + scaled dgap = 0; + scaled hgap = 0; + delimiterextremes extremes = { .tfont = null_font, .tchar = 0, .bfont = null_font, .bchar = 0, .height = 0, .depth = 0 }; + scaled tolerance = tex_get_math_y_parameter_default(style, math_parameter_skewed_delimiter_tolerance, 0); + scaled shift_up = tex_get_math_y_parameter_checked(style, math_parameter_skewed_fraction_vgap); + scaled shift_down = tex_round_xn_over_d(shift_up, fraction_v_factor(target), 1000); + (void) kerns; + shift_up = shift_down; /*tex The |shift_up| value might change later. */ + tex_aux_wrap_fraction_parts(target, style, size, &numerator, &denominator, 0); + /*tex + Here we don't share code bnecause we're going horizontal. + */ + if (! has_noad_option_noaxis(target)) { + shift_up += tex_half_scaled(tex_aux_math_axis(size)); + } + /*tex + Construct a hlist box for the fraction, according to |hgap| and |vgap|. + */ + hgap = tex_get_math_x_parameter_checked(style, math_parameter_skewed_fraction_hgap); + hgap = tex_round_xn_over_d(hgap, fraction_h_factor(target), 1000); + { + scaled ht = box_height(numerator) + shift_up; + scaled dp = box_depth(numerator) - shift_up; + if (dp < 0) { + dp = 0; + } + if (ht < 0) { + ht = 0; + } + if (ht > maxheight) { + maxheight = ht; + } + if (dp > maxdepth) { + maxdepth = dp; + } + } + { + scaled ht = box_height(denominator) - shift_down; + scaled dp = box_depth(denominator) + shift_down; + if (dp < 0) { + dp = 0; + } + if (ht < 0) { + ht = 0; + } + if (ht > maxheight) { + maxheight = ht; + } + if (dp > maxdepth) { + maxdepth = dp; + } + } + box_shift_amount(numerator) = -shift_up; + box_shift_amount(denominator) = shift_down; + delta = maxheight + maxdepth; + middle = tex_aux_make_delimiter(target, middle_delimiter, size, delta, 0, style, 1, NULL, NULL, tolerance, has_noad_option_nooverflow(target), &extremes, 0); + fraction = tex_new_null_box_node(hlist_node, math_fraction_list); + tex_attach_attribute_list_copy(fraction, target); + box_width(fraction) = box_width(numerator) + box_width(denominator) + box_width(middle) - hgap; + hgap = -tex_half_scaled(hgap); + box_height(fraction) = box_height(middle) > maxheight ? box_height(middle) : maxheight; + box_depth(fraction) = box_depth(middle) > maxdepth ? box_depth(middle) : maxdepth; + ngap = hgap; + dgap = hgap; + if (tex_math_has_class_option(fraction_noad_subtype, carry_over_left_top_kern_class_option)) { + ngap += tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar); + } + if (tex_math_has_class_option(fraction_noad_subtype, carry_over_right_bottom_kern_class_option)) { + dgap += tex_char_bottom_right_kern_from_font(extremes.tfont, extremes.tchar); + } + if (ngap || dgap) { + // todo: only add when non zero + halfword nkern = tex_new_kern_node(ngap, horizontal_math_kern_subtype); + halfword dkern = tex_new_kern_node(dgap, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(nkern, target); + tex_attach_attribute_list_copy(dkern, target); + tex_couple_nodes(numerator, nkern); + tex_couple_nodes(nkern, middle); + tex_couple_nodes(middle, dkern); + tex_couple_nodes(dkern, denominator); + } else { + tex_couple_nodes(numerator, middle); + tex_couple_nodes(middle, denominator); + } + box_list(fraction) = numerator; + return fraction; +} + +static halfword tex_aux_make_stretched_fraction(halfword target, int style, int size, kernset *kerns) +{ + halfword middle = null; + halfword numerator = null; + halfword denominator = null; + scaled shift_up = 0; + scaled shift_down = 0; + scaled delta = 0; + halfword middle_delimiter = fraction_middle_delimiter(target); + halfword thickness = tex_aux_check_fraction_rule(target, style, size, stretched_fraction_subtype, NULL); + halfword fraction = tex_new_null_box_node(vlist_node, math_fraction_list); + (void) kerns; + tex_attach_attribute_list_copy(fraction, target); + tex_aux_wrap_fraction_parts(target, style, size, &numerator, &denominator, 1); + tex_aux_calculate_fraction_shifts_normal(target, style, size, numerator, denominator, &shift_up, &shift_down, &delta); + tex_aux_apply_fraction_shifts(fraction, numerator, denominator, shift_up, shift_down); + middle = tex_aux_make_delimiter(target, middle_delimiter, size, box_width(fraction), 1, style, 0, NULL, NULL, 0, 0, NULL, 0); + if (box_width(middle) < box_width(fraction)) { + /*tex It's always in the details: */ + scaled delta = (box_width(fraction) - box_width(middle)) / 2; + tex_aux_prepend_hkern_to_box_list(middle, delta, horizontal_math_kern_subtype, "bad delimiter"); + tex_aux_append_hkern_to_box_list(middle, delta, horizontal_math_kern_subtype, "bad delimiter"); + box_width(middle) = box_width(fraction); + } + tex_aux_compensate_fraction_rule(target, fraction, middle, thickness); + box_list(fraction) = tex_aux_assemble_fraction(target, style, size, numerator, denominator, middle, delta, shift_up, shift_down); + return fraction; +} + +static halfword tex_aux_make_ruled_fraction(halfword target, int style, int size, kernset *kerns, int fractiontype) +{ + halfword numerator = null; + halfword denominator = null; + scaled shift_up = 0; + scaled shift_down = 0; + scaled delta = 0; + halfword fam = 0; + halfword thickness = tex_aux_check_fraction_rule(target, style, size, fractiontype, &fam); + halfword fraction = tex_new_null_box_node(vlist_node, math_fraction_list); + halfword rule = null; + (void) kerns; + tex_attach_attribute_list_copy(fraction, target); + tex_aux_wrap_fraction_parts(target, style, size, &numerator, &denominator, 1); + if (fraction_rule_thickness(target) == 0) { + tex_aux_calculate_fraction_shifts_stack(target, style, size, numerator, denominator, &shift_up, &shift_down, &delta); + } else { + tex_aux_calculate_fraction_shifts_normal(target, style, size, numerator, denominator, &shift_up, &shift_down, &delta); + } + tex_aux_apply_fraction_shifts(fraction, numerator, denominator, shift_up, shift_down); + if (fractiontype != atop_fraction_subtype) { + rule = tex_aux_fraction_rule(box_width(fraction), thickness, get_attribute_list(target), math_fraction_rule_subtype, size, fam); + tex_aux_compensate_fraction_rule(target, fraction, rule, thickness); + } + box_list(fraction) = tex_aux_assemble_fraction(target, style, size, numerator, denominator, rule, delta, shift_up, shift_down); + return fraction; +} + +/*tex + We intercept bad nodes created at the \LUA\ end but only partially. The fraction handler is + quite complex and uses a lot of parameters. You shouldn't mess with \TEX. +*/ + +static void tex_aux_make_fraction(halfword target, int style, int size, kernset *kerns) +{ + quarterword fractiontype = node_subtype(target); + halfword fraction = null; + TRYAGAIN: + switch (fractiontype) { + case over_fraction_subtype: + case atop_fraction_subtype: + case above_fraction_subtype: + tex_flush_node_list(fraction_middle_delimiter(target)); + fraction_middle_delimiter(target) = null; + fraction = tex_aux_make_ruled_fraction(target, style, size, kerns, fractiontype); + break; + case skewed_fraction_subtype: + fraction_rule_thickness(target) = 0; + fraction = tex_aux_make_skewed_fraction(target, style, size, kerns); + break; + case stretched_fraction_subtype: + fraction = tex_aux_make_stretched_fraction(target, style, size, kerns); + break; + default: + fractiontype = atop_fraction_subtype; + goto TRYAGAIN; + } + tex_aux_wrap_fraction_result(target, style, size, fraction, kerns); + fraction_left_delimiter(target) = null; + fraction_middle_delimiter(target) = null; + fraction_right_delimiter(target) = null; +} + +/*tex + + If the nucleus of an |op_noad| is a single character, it is to be centered vertically with + respect to the axis, after first being enlarged (via a character list in the font) if we are in + display style. The normal convention for placing displayed limits is to put them above and + below the operator in display style. + + The italic correction is removed from the character if there is a subscript and the limits are + not being displayed. The |make_op| routine returns the value that should be used as an offset + between subscript and superscript. + + After |make_op| has acted, |subtype(q)| will be |limits| if and only if the limits have been + set above and below the operator. In that case, |new_hlist(q)| will already contain the desired + final box. + + In display mode we also handle the nolimits scripts here because we have an option to tweak the + placement with |\mathnolimitsmode| in displaymode. So, when we have neither |\limits| or + |\nolimits| in text mode we fall through and scripts are dealt with later. + +*/ + +static void tex_aux_make_scripts ( + halfword target, + halfword kernel, + scaled italic, + int style, + scaled supshift, + scaled subshift, + scaled supdrop, + kernset *kerns +); + +static halfword tex_aux_check_nucleus_complexity ( + halfword target, + scaled *delta, + halfword style, + halfword size, + kernset *kerns +); + +/* + For easy configuration ... fonts are somewhat inconsistent and the + values for italic correction run from 30 to 60\% of the width. + +*/ + +static void tex_aux_get_shifts(int mode, int style, scaled delta, scaled *top, scaled *bot) +{ + switch (mode) { + case 0: + /*tex full bottom correction */ + *top = 0; + *bot = -delta; + break; + case 1: + /*tex |MathConstants| driven */ + *top = tex_round_xn_over_d(delta, tex_get_math_parameter_default(style, math_parameter_nolimit_sup_factor, 0), 1000); + *bot = -tex_round_xn_over_d(delta, tex_get_math_parameter_default(style, math_parameter_nolimit_sub_factor, 0), 1000); + break ; + case 2: + /*tex no correction */ + *top = 0; + *bot = 0; + break ; + case 3: + /*tex half bottom correction */ + *top = 0; + *bot = -tex_half_scaled(delta); + break; + case 4: + /*tex half bottom and top correction */ + *top = tex_half_scaled(delta); + *bot = -tex_half_scaled(delta); + break; + default : + /*tex above 15: for quickly testing values */ + *top = 0; + *bot = (mode > 15) ? -tex_round_xn_over_d(delta, mode, 1000) : 0; + break; + } +} + +// static scaled tex_aux_make_op(halfword q, int style, int size, int italic, int forced_no_limits, kernset *kerns) +// { +// /*tex for historic reasons we have two flags .. because we need to adapt to the style */ +// int limits = has_noad_option_limits(q); +// int nolimits = has_noad_option_nolimits(q); +// if (! limits && ! nolimits && (style == display_style || style == cramped_display_style)) { +// nolimits = 0; +// limits = 1; +// noad_options(q) |= noad_option_limits; /* so we can track it */ +// } +// if (forced_no_limits) { +// nolimits = 1; +// } +// if (node_type(noad_nucleus(q)) == math_char_node) { +// halfword x; +// int shiftaxis = 0; +// halfword chr = null; +// halfword fnt = null; +// halfword autoleft = null; +// halfword autoright = null; +// halfword autosize = has_noad_option_auto(q); +// scaled openupheight = has_noad_option_openupheight(q) ? noad_height(q) : 0; +// scaled openupdepth = has_noad_option_openupdepth(q) ? noad_depth(q) : 0; +// if (has_noad_option_adapttoleft(q) && node_prev(q)) { +// autoleft = node_prev(q); +// if (node_type(autoleft) != simple_noad) { +// autoleft = null; +// } else { +// autoleft = noad_new_hlist(autoleft); +// } +// } +// if (has_noad_option_adapttoright(q) && node_next(q)) { +// autoright = noad_nucleus(node_next(q)); +// } +// tex_aux_fetch(noad_nucleus(q), "operator", &fnt, &chr); +// /*tex Nicer is actually to just test for |display_style|. */ +// if ((style < text_style) || autoleft || autoright || autosize) { +// /*tex Try to make it larger in displaystyle. */ +// scaled opsize = tex_get_math_parameter(style, math_parameter_operator_size, NULL); +// if ((autoleft || autoright || autosize) && (opsize == undefined_math_parameter)) { +// opsize = 0; +// } +// if (opsize != undefined_math_parameter) { +// /*tex Creating a temporary delimiter is the cleanest way. */ +// halfword y = tex_new_node(delimiter_node, 0); +// tex_attach_attribute_list_copy(y, noad_nucleus(q)); +// delimiter_small_family(y) = math_family(noad_nucleus(q)); +// delimiter_small_character(y) = math_character(noad_nucleus(q)); +// opsize = tex_aux_math_y_scaled(opsize, style); +// if (autoright) { +// /*tex We look ahead and preroll, |autoright| is a noad. */ +// scaledwhd siz = tex_natural_hsizes(autoright, null, 0.0, 0, 0); +// scaled total = siz.ht + siz.dp; +// if (total > opsize) { +// opsize = total; +// } +// } +// if (autoleft && box_total(autoleft) > opsize) { +// /*tex We look back and check, |autoleft| is a box. */ +// opsize = box_total(autoleft); +// } +// /* we need to check for overflow here */ +// opsize += limited_scaled(openupheight); +// opsize += openupdepth; +// x = tex_aux_make_delimiter(y, text_size, opsize, 0, style, ! has_noad_option_noaxis(q), noad_options(q), NULL, &italic, 0, has_noad_option_nooverflow(q), NULL); +// // if (italic) { +// // if (lmt_math_state.opentype) { +// // /*tex +// // As we never added italic correction we don't need to compensate. The ic +// // is stored in a special field of the node and applied in some occasions. +// // */ +// // } else if (noad_subscr(q) && ! has_noad_option_limits(q)) { /* todo: control option */ +// // /*tex +// // Here we (selectively) remove the italic correction that always gets added +// // in a traditional font. See (**). In \OPENTYPE\ mode we insert italic kerns, +// // but in traditional mode it's width manipulation. This actually makes sense +// // because those fonts have a fake width and the italic correction sets that +// // right. +// // */ +// // box_list(x) = tex_aux_math_remove_italic_kern(box_list(x), &italic, "operator"); +// // box_width(x) -= italic; +// // } +// // } +// } else { +// /*tex +// Where was the weird + 1 coming from? It tweaks the comparison. Anyway, because we +// do a lookup we don't need to scale the |total| and |opsize|. We have a safeguard +// against endless loops. +// */ +// opsize = tex_char_total_from_font(fnt, chr) + openupheight + openupdepth + 1; +// /* +// if (opsize) { +// opsize = tex_aux_math_y_style_scaled(fnt, opsize, size); // we compare unscaled +// } +// */ +// while (tex_char_tag_from_font(fnt, chr) == list_tag && tex_char_total_from_font(fnt, chr) < opsize) { +// halfword rem = tex_char_remainder_from_font(fnt, chr); +// if (chr != rem && tex_char_exists(fnt, rem)) { +// chr = rem; +// math_character(noad_nucleus(q)) = chr; +// } else { +// break; +// } +// } +// if (math_kernel_node_has_option(noad_nucleus(q), math_kernel_no_italic_correction)) { +// italic = 0; +// } else { +// italic = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, chr), size); +// } +// x = tex_aux_clean_box(noad_nucleus(q), style, style, math_nucleus_list, 0, NULL); +// // if (italic) { +// // if (lmt_math_state.opentype) { +// // /*tex we never added italic correction unless we had a |mlist_to_hlist| call. */ +// // } else if (noad_subscr(q) && ! has_noad_option_limits(q)) { /* todo: control option */ +// // box_list(x) = tex_aux_math_remove_italic_kern(box_list(x), &italic, "operator"); +// // box_width(x) -= italic; +// // } +// // } +// shiftaxis = 1; +// } +// } else { +// /*tex Non display style. */ +// italic = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, chr), size); +// x = tex_aux_clean_box(noad_nucleus(q), style, style, math_nucleus_list, 0, NULL); +// // if (italic) { +// // if (lmt_math_state.opentype) { +// // /*tex We never added italic correction, but it gets ignored anyway. */ +// // box_width(x) -= italic; +// // } else if (noad_subscr(q) && ! has_noad_option_limits(q)) { /* todo: control option, what does this assume from the font */ +// // /*tex remove italic correction */ +// // box_width(x) -= italic; +// // } +// // } +// box_height(x) += openupheight; +// box_depth(x) += openupdepth; +// shiftaxis = 1; +// } +// if (shiftaxis) { +// /*tex center vertically */ +// box_shift_amount(x) = tex_half_scaled(box_height(x) - box_depth(x)) - tex_aux_math_axis(size); +// } +// if ((node_type(x) == hlist_node) && (openupheight || openupdepth)) { +// box_shift_amount(x) -= openupheight/2; +// box_shift_amount(x) += openupdepth/2; +// } +// node_type(noad_nucleus(q)) = sub_box_node; +// math_list(noad_nucleus(q)) = x; +// } +// if (nolimits) { +// /*tex +// We end up here when there is an explicit directive or when we're in displaymode without +// an explicit directive. If in text mode we want to have this mode driven placement tweak +// we need to use the |\nolimits| directive. Beware: that mode might be changed to a font +// property or option itself. +// */ +// // if (lmt_math_state.opentype) { +// kernset localkerns = { .tr = 0, .br = 0, .tl = 0, .bl = 0 }; +// if (kerns) { +// localkerns.tr = kerns->tr; +// localkerns.br = kerns->br; +// localkerns.tl = kerns->tl; +// localkerns.bl = kerns->bl; +// } +// halfword p = tex_aux_check_nucleus_complexity(q, NULL, style, lmt_math_state.size, &localkerns); +// if (noad_has_scripts(q)) { +// scaled top = 0; /*tex Normally this would be: | delta|. */ +// scaled bot = 0; /*tex Normally this would be: |-delta|. */ +// if (localkerns.tr || localkerns.br) { +// italic = 0; +// } +// tex_aux_get_shifts(math_nolimits_mode_par, style, italic, &top, &bot); +// tex_aux_make_scripts(q, p, 0, style, top, bot, 0, &localkerns); +// } else { +// tex_aux_assign_new_hlist(q, p); +// } +// italic = 0; +// // } else { +// // /*tex similar code as in the caller */ +// // halfword p = tex_aux_check_nucleus_complexity(q, &italic, style, lmt_math_state.size, NULL); +// // if (noad_has_scripts(q)) { +// // tex_aux_make_scripts(q, p, italic, style, 0, 0); +// // } else { +// // tex_aux_assign_new_hlist(q, p); +// // } +// // } +// } else if (limits) { +// /*tex +// +// The following program builds a vlist box |v| for displayed limits. The width of the box +// is not affected by the fact that the limits may be skewed. +// +// We end up here when we have a limits directive or when that property is set because +// we're in displaymode. +// */ +// halfword nucleus = noad_nucleus(q); +// halfword x = tex_aux_clean_box(noad_supscr(q), tex_math_style_variant(style, math_parameter_superscript_variant), style, math_sup_list, 0, NULL); +// halfword y = tex_aux_clean_box(nucleus, style, style, math_nucleus_list, 0, NULL); +// halfword z = tex_aux_clean_box(noad_subscr(q), tex_math_style_variant(style, math_parameter_subscript_variant), style, math_sub_list, 0, NULL); +// halfword result = tex_new_null_box_node(vlist_node, math_modifier_list); +// tex_attach_attribute_list_copy(result, q); +// if (nucleus) { +// switch (node_type(nucleus)) { +// case sub_mlist_node: +// case sub_box_node: +// { +// halfword n = math_list(nucleus); +// if (! n) { +// /* kind of special */ +// } else if (node_type(n) == hlist_node) { +// /*tex just a not scaled char */ +// n = box_list(n); +// while (n) { +// if (node_type(n) == glyph_node && ! tex_has_glyph_option(n, glyph_option_no_italic_correction)) { +// if (tex_aux_math_engine_control(glyph_font(n), math_control_apply_boxed_italic_kern)) { +// italic = tex_aux_math_x_size_scaled(glyph_font(n), tex_char_italic_from_font(glyph_font(n), glyph_character(n)), size); +// } +// } +// n = node_next(n); +// } +// } else { +// /*tex This might need checking. */ +// while (n) { +// if (node_type(n) == fence_noad && noad_italic(n) > italic) { +// /*tex we can have dummies, the period ones */ +// italic = tex_aux_math_given_x_scaled(noad_italic(n)); +// } +// n = node_next(n); +// } +// } +// break; +// } +// case math_char_node: +// { +// halfword fnt = tex_fam_fnt(math_family(nucleus), size); +// halfword chr = math_character(nucleus); +// italic = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, chr), size); +// break; +// } +// } +// } +// /*tex We're still doing limits. */ +// { +// scaled halfitalic = tex_half_scaled(italic); +// scaled supwidth = box_width(x); +// scaled boxwidth = box_width(y); +// scaled subwidth = box_width(z); +// box_width(result) = boxwidth; +// if (supwidth > boxwidth) { +// boxwidth = supwidth; +// } +// if (subwidth > boxwidth) { +// boxwidth = subwidth; +// } +// box_width(result) = boxwidth; +// x = tex_aux_rebox(x, boxwidth, size); +// y = tex_aux_rebox(y, boxwidth, size); +// z = tex_aux_rebox(z, boxwidth, size); +// /*tex This is only (visually) ok for integrals, but other operators have no italic anyway. */ +// box_shift_amount(x) = halfitalic; +// box_shift_amount(z) = -halfitalic; +// if (math_limits_mode_par >= 1) { +// /*tex +// This option enforces the real dimensions and avoids longer limits to stick out +// which is a traditional \TEX\ feature. It's handy to have this for testing. Nicer +// would be to also adapt the width of the wrapped scripts but these are reboxed +// with centering so we keep that as it is. +// */ +// if (supwidth + halfitalic > boxwidth) { +// box_width(result) += supwidth + halfitalic - boxwidth; +// } +// if (subwidth + halfitalic > boxwidth) { +// box_x_offset(result) = subwidth + halfitalic - boxwidth; +// box_width(result) += box_x_offset(result); +// tex_set_box_geometry(result, offset_geometry); +// } +// } else { +// /*tex We keep the possible left and/or right overshoot of limits. */ +// } +// /*tex Here the target |v| is still empty but we do set the height and depth. */ +// box_height(result) = box_height(y); +// box_depth(result) = box_depth(y); +// } +// /*tex +// +// Attach the limits to |y| and adjust |height(v)|, |depth(v)| to account for +// their presence. +// +// We use |shift_up| and |shift_down| in the following program for the amount of +// glue between the displayed operator |y| and its limits |x| and |z|. +// +// The vlist inside box |v| will consist of |x| followed by |y| followed by |z|, +// with kern nodes for the spaces between and around them; |b| is baseline and |v| +// is the minumum gap. +// +// */ +// if (noad_supscr(q)) { +// scaled bgap = tex_get_math_y_parameter_checked(style, math_parameter_limit_above_bgap); +// scaled vgap = tex_get_math_y_parameter_checked(style, math_parameter_limit_above_vgap); +// scaled vkern = tex_get_math_y_parameter_checked(style, math_parameter_limit_above_kern); +// scaled vshift = bgap - box_depth(x); +// if (vshift < vgap) { +// vshift = vgap; +// } +// if (vshift) { +// halfword kern = tex_new_kern_node(vshift, vertical_math_kern_subtype); +// tex_attach_attribute_list_copy(kern, q); +// tex_couple_nodes(kern, y); +// tex_couple_nodes(x, kern); +// } else { +// tex_couple_nodes(y, x); +// } +// if (vkern) { +// halfword kern = tex_new_kern_node(vkern, vertical_math_kern_subtype); +// tex_attach_attribute_list_copy(kern, q); +// tex_couple_nodes(kern, x); +// box_list(result) = kern; +// } else { +// box_list(result) = x; +// } +// box_height(result) += vkern + box_total(x) + vshift; +// } else { +// box_list(x) = null; +// tex_flush_node(x); +// box_list(result) = y; +// } +// if (noad_subscr(q)) { +// scaled bgap = tex_get_math_y_parameter_checked(style, math_parameter_limit_below_bgap); +// scaled vgap = tex_get_math_y_parameter_checked(style, math_parameter_limit_below_vgap); +// scaled vkern = tex_get_math_y_parameter_checked(style, math_parameter_limit_below_kern); +// scaled vshift = bgap - box_height(z); +// if (vshift < vgap) { +// vshift = vgap; +// } +// if (vshift) { +// halfword kern = tex_new_kern_node(vshift, vertical_math_kern_subtype); +// tex_attach_attribute_list_copy(kern, q); +// tex_couple_nodes(y, kern); +// tex_couple_nodes(kern, z); +// } else { +// tex_couple_nodes(y, z); +// } +// if (vkern) { +// halfword kern = tex_new_kern_node(vkern, vertical_math_kern_subtype); +// tex_attach_attribute_list_copy(kern, q); +// tex_couple_nodes(z, kern); +// } +// box_depth(result) += vkern + box_total(z) + vshift; +// } else { +// box_list(z) = null; +// tex_flush_node(z); +// } +// if (noad_subscr(q)) { +// math_list(noad_subscr(q)) = null; +// tex_flush_node(noad_subscr(q)); +// noad_subscr(q) = null; +// } +// if (noad_supscr(q)) { +// math_list(noad_supscr(q)) = null; +// tex_flush_node(noad_supscr(q)); +// noad_supscr(q) = null; +// } +// tex_aux_assign_new_hlist(q, result); +// // if (lmt_math_state.opentype) { +// italic = 0; +// // } +// } else { +// /*tex +// We end up here when we're not in displaymode and don't have a (no)limits directive. +// */ +// } +// return italic; +// } + +static scaled tex_aux_op_no_limits(halfword target, int style, int size, int italic, kernset *kerns) +{ + kernset localkerns ; + halfword p; + (void) size; + if (kerns) { + tex_math_copy_kerns(&localkerns, kerns); + } else { + tex_math_wipe_kerns(&localkerns); + } + p = tex_aux_check_nucleus_complexity(target, NULL, style, lmt_math_state.size, &localkerns); + if (noad_has_scripts(target)) { + scaled top = 0; /*tex Normally this would be: | delta|. */ + scaled bot = 0; /*tex Normally this would be: |-delta|. */ + if (localkerns.topright || localkerns.bottomright) { + italic = 0; + } + tex_aux_get_shifts(math_nolimits_mode_par, style, italic, &top, &bot); + tex_aux_make_scripts(target, p, 0, style, top, bot, 0, &localkerns); + } else { + tex_aux_assign_new_hlist(target, p); + } + // italic = 0; + return 0; +} + +static scaled tex_aux_op_do_limits(halfword target, int style, int size, int italic, kernset *kerns) +{ + halfword nucleus = noad_nucleus(target); + halfword x = tex_aux_clean_box(noad_supscr(target), tex_math_style_variant(style, math_parameter_superscript_variant), style, math_sup_list, 0, NULL); + halfword y = tex_aux_clean_box(nucleus, style, style, math_nucleus_list, 0, NULL); + halfword z = tex_aux_clean_box(noad_subscr(target), tex_math_style_variant(style, math_parameter_subscript_variant), style, math_sub_list, 0, NULL); + halfword result = tex_new_null_box_node(vlist_node, math_modifier_list); + (void) kerns; + tex_attach_attribute_list_copy(result, target); + if (nucleus) { + switch (node_type(nucleus)) { + case sub_mlist_node: + case sub_box_node: + { + halfword n = kernel_math_list(nucleus); + if (! n) { + /* kind of special */ + } else if (node_type(n) == hlist_node) { + /*tex just a not scaled char */ + n = box_list(n); + while (n) { + if (node_type(n) == glyph_node && ! tex_has_glyph_option(n, glyph_option_no_italic_correction)) { + if (tex_aux_math_engine_control(glyph_font(n), math_control_apply_boxed_italic_kern)) { + italic = tex_aux_math_x_size_scaled(glyph_font(n), tex_char_italic_from_font(glyph_font(n), glyph_character(n)), size); + } + } + n = node_next(n); + } + } else { + /*tex This might need checking. */ + while (n) { + if (node_type(n) == fence_noad && noad_italic(n) > italic) { + /*tex we can have dummies, the period ones */ + italic = tex_aux_math_given_x_scaled(noad_italic(n)); + } + n = node_next(n); + } + } + break; + } + case math_char_node: + { + halfword fnt = tex_fam_fnt(kernel_math_family(nucleus), size); + halfword chr = kernel_math_character(nucleus); + italic = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, chr), size); + break; + } + } + } + /*tex We're still doing limits. */ + { + scaled halfitalic = tex_half_scaled(italic); + scaled supwidth = box_width(x); + scaled boxwidth = box_width(y); + scaled subwidth = box_width(z); + box_width(result) = boxwidth; + if (supwidth > boxwidth) { + boxwidth = supwidth; + } + if (subwidth > boxwidth) { + boxwidth = subwidth; + } + box_width(result) = boxwidth; + x = tex_aux_rebox(x, boxwidth, size); + y = tex_aux_rebox(y, boxwidth, size); + z = tex_aux_rebox(z, boxwidth, size); + /*tex This is only (visually) ok for integrals, but other operators have no italic anyway. */ + box_shift_amount(x) = halfitalic; + box_shift_amount(z) = -halfitalic; + if (math_limits_mode_par >= 1) { + /*tex + This option enforces the real dimensions and avoids longer limits to stick out + which is a traditional \TEX\ feature. It's handy to have this for testing. Nicer + would be to also adapt the width of the wrapped scripts but these are reboxed + with centering so we keep that as it is. + */ + if (supwidth + halfitalic > boxwidth) { + box_width(result) += supwidth + halfitalic - boxwidth; + } + if (subwidth + halfitalic > boxwidth) { + box_x_offset(result) = subwidth + halfitalic - boxwidth; + box_width(result) += box_x_offset(result); + tex_set_box_geometry(result, offset_geometry); + } + } else { + /*tex We keep the possible left and/or right overshoot of limits. */ + } + /*tex Here the target |v| is still empty but we do set the height and depth. */ + box_height(result) = box_height(y); + box_depth(result) = box_depth(y); + } + /*tex + + Attach the limits to |y| and adjust |height(v)|, |depth(v)| to account for + their presence. + + We use |shift_up| and |shift_down| in the following program for the amount of + glue between the displayed operator |y| and its limits |x| and |z|. + + The vlist inside box |v| will consist of |x| followed by |y| followed by |z|, + with kern nodes for the spaces between and around them; |b| is baseline and |v| + is the minumum gap. + + */ + if (noad_supscr(target)) { + scaled bgap = tex_get_math_y_parameter_checked(style, math_parameter_limit_above_bgap); + scaled vgap = tex_get_math_y_parameter_checked(style, math_parameter_limit_above_vgap); + scaled vkern = tex_get_math_y_parameter_checked(style, math_parameter_limit_above_kern); + scaled vshift = bgap - box_depth(x); + if (vshift < vgap) { + vshift = vgap; + } + if (vshift) { + halfword kern = tex_new_kern_node(vshift, vertical_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(kern, y); + tex_couple_nodes(x, kern); + } else { + tex_couple_nodes(y, x); + } + if (vkern) { + halfword kern = tex_new_kern_node(vkern, vertical_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(kern, x); + box_list(result) = kern; + } else { + box_list(result) = x; + } + box_height(result) += vkern + box_total(x) + vshift; + } else { + box_list(x) = null; + tex_flush_node(x); + box_list(result) = y; + } + if (noad_subscr(target)) { + scaled bgap = tex_get_math_y_parameter_checked(style, math_parameter_limit_below_bgap); + scaled vgap = tex_get_math_y_parameter_checked(style, math_parameter_limit_below_vgap); + scaled vkern = tex_get_math_y_parameter_checked(style, math_parameter_limit_below_kern); + scaled vshift = bgap - box_height(z); + if (vshift < vgap) { + vshift = vgap; + } + if (vshift) { + halfword kern = tex_new_kern_node(vshift, vertical_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(y, kern); + tex_couple_nodes(kern, z); + } else { + tex_couple_nodes(y, z); + } + if (vkern) { + halfword kern = tex_new_kern_node(vkern, vertical_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(z, kern); + } + box_depth(result) += vkern + box_total(z) + vshift; + } else { + box_list(z) = null; + tex_flush_node(z); + } + if (noad_subscr(target)) { + kernel_math_list(noad_subscr(target)) = null; + tex_flush_node(noad_subscr(target)); + noad_subscr(target) = null; + } + if (noad_supscr(target)) { + kernel_math_list(noad_supscr(target)) = null; + tex_flush_node(noad_supscr(target)); + noad_supscr(target) = null; + } + tex_aux_assign_new_hlist(target, result); + // italic = 0; + return 0; +} + +/*tex + The adapt to left or right is sort of fuzzy and might disappear in future versions. After all, + we have more fance fence support now. +*/ + +static void tex_aux_op_wrapup(halfword target, int style, int size, int italic, kernset *kerns) +{ + halfword x; + int shiftaxis = 0; + halfword chr = null; + halfword fnt = null; + halfword autoleft = null; + halfword autoright = null; + halfword autosize = has_noad_option_auto(target); + scaled openupheight = has_noad_option_openupheight(target) ? noad_height(target) : 0; + scaled openupdepth = has_noad_option_openupdepth(target) ? noad_depth(target) : 0; + (void) kerns; + if (has_noad_option_adapttoleft(target) && node_prev(target)) { + autoleft = node_prev(target); + if (node_type(autoleft) != simple_noad) { + autoleft = null; + } else { + autoleft = noad_new_hlist(autoleft); + } + } + if (has_noad_option_adapttoright(target) && node_next(target)) { + /* doesn't always work well */ + autoright = noad_nucleus(node_next(target)); + } + tex_aux_fetch(noad_nucleus(target), "operator", &fnt, &chr); + /*tex Nicer is actually to just test for |display_style|. */ + if ((style < text_style) || autoleft || autoright || autosize) { + /*tex Try to make it larger in displaystyle. */ + scaled opsize = tex_get_math_parameter(style, math_parameter_operator_size, NULL); + if ((autoleft || autoright || autosize) && (opsize == undefined_math_parameter)) { + opsize = 0; + } + if (opsize != undefined_math_parameter) { + /*tex Creating a temporary delimiter is the cleanest way. */ + halfword y = tex_new_node(delimiter_node, 0); + tex_attach_attribute_list_copy(y, noad_nucleus(target)); + delimiter_small_family(y) = kernel_math_family(noad_nucleus(target)); + delimiter_small_character(y) = kernel_math_character(noad_nucleus(target)); + opsize = tex_aux_math_y_scaled(opsize, style); + if (autoright) { + /*tex We look ahead and preroll, |autoright| is a noad. */ + scaledwhd siz = tex_natural_hsizes(autoright, null, 0.0, 0, 0); + scaled total = siz.ht + siz.dp; + if (total > opsize) { + opsize = total; + } + } + if (autoleft && box_total(autoleft) > opsize) { + /*tex We look back and check, |autoleft| is a box. */ + opsize = box_total(autoleft); + } + /* we need to check for overflow here */ + opsize += limited_scaled(openupheight); + opsize += openupdepth; + x = tex_aux_make_delimiter(target, y, text_size, opsize, 0, style, ! has_noad_option_noaxis(target), NULL, &italic, 0, has_noad_option_nooverflow(target), NULL, 0); + } else { + /*tex + Where was the weird + 1 coming from? It tweaks the comparison. Anyway, because we + do a lookup we don't need to scale the |total| and |opsize|. We have a safeguard + against endless loops. + */ + opsize = tex_char_total_from_font(fnt, chr) + openupheight + openupdepth + 1; + /* + if (opsize) { + opsize = tex_aux_math_y_style_scaled(fnt, opsize, size); // we compare unscaled + } + */ + while (tex_char_has_tag_from_font(fnt, chr, list_tag) && tex_char_total_from_font(fnt, chr) < opsize) { + halfword rem = tex_char_remainder_from_font(fnt, chr); + if (chr != rem && tex_char_exists(fnt, rem)) { + chr = rem; + kernel_math_character(noad_nucleus(target)) = chr; + } else { + break; + } + } + if (math_kernel_node_has_option(noad_nucleus(target), math_kernel_no_italic_correction)) { + italic = 0; + } else { + italic = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, chr), size); + } + x = tex_aux_clean_box(noad_nucleus(target), style, style, math_nucleus_list, 0, NULL); + shiftaxis = 1; + } + } else { + /*tex Non display style. */ + italic = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, chr), size); + x = tex_aux_clean_box(noad_nucleus(target), style, style, math_nucleus_list, 0, NULL); + box_height(x) += openupheight; + box_depth(x) += openupdepth; + shiftaxis = 1; + } + if (shiftaxis) { + /*tex center vertically */ + box_shift_amount(x) = tex_half_scaled(box_height(x) - box_depth(x)) - tex_aux_math_axis(size); + } + if ((node_type(x) == hlist_node) && (openupheight || openupdepth)) { + box_shift_amount(x) -= openupheight/2; + box_shift_amount(x) += openupdepth/2; + } + node_type(noad_nucleus(target)) = sub_box_node; + kernel_math_list(noad_nucleus(target)) = x; +} + +static scaled tex_aux_make_op(halfword target, int style, int size, int italic, int limits_mode, kernset *kerns) +{ + if (limits_mode == limits_horizontal_mode) { + /*tex We enforce this and it can't be overruled! */ + } else if (! has_noad_option_limits(target) && ! has_noad_option_nolimits(target) && (style == display_style || style == cramped_display_style)) { + limits_mode = limits_vertical_mode; + noad_options(target) |= noad_option_limits; /* so we can track it */ + } else if (has_noad_option_nolimits(target)) { + limits_mode = limits_horizontal_mode; + } else if (has_noad_option_limits(target)) { + limits_mode = limits_vertical_mode; + } + if (node_type(noad_nucleus(target)) == math_char_node) { + tex_aux_op_wrapup(target, style, size, italic, kerns); + } + switch (limits_mode) { + case limits_horizontal_mode: + /*tex + We end up here when there is an explicit directive or when we're in displaymode without + an explicit directive. If in text mode we want to have this mode driven placement tweak + we need to use the |\nolimits| directive. Beware: that mode might be changed to a font + property or option itself. + */ + return tex_aux_op_no_limits(target, style, size, italic, kerns); /* italic becomes zero */ + case limits_vertical_mode: + /*tex + + We end up here when we have a limits directive or when that property is set because + we're in displaymode. The following program builds a vlist box |v| for displayed limits. + The width of the box is not affected by the fact that the limits may be skewed. + */ + return tex_aux_op_do_limits(target, style, size, italic, kerns); /* italic becomes zero */ + default: + /*tex + We end up here when we're not in displaymode and don't have a (no)limits directive. + */ + return italic; /* italic is retained */ + } +} + +/*tex + + A ligature found in a math formula does not create a ligature, because there is no question of + hyphenation afterwards; the ligature will simply be stored in an ordinary |glyph_node|, after + residing in an |ord_noad|. + + The |type| is converted to |math_text_char| here if we would not want to apply an italic + correction to the current character unless it belongs to a math font (i.e., a font with + |space=0|). + + No boundary characters enter into these ligatures. + +*/ + +/* How about: ord_noad_type_limits */ + +// inline static int tex_aux_is_simple_char_noad(halfword p) /* only old school characters */ +// { +// return (node_type(p) == simple_noad) && (node_type(noad_nucleus(p)) == math_char_node && tex_math_has_class_option(node_subtype(p), check_ligature_class_option)); +// } +// +// inline static int tex_aux_have_same_nucleus_fam(halfword p, halfword q) +// { +// return math_family(noad_nucleus(p)) == math_family(noad_nucleus(q)); +// } +// +// static void tex_aux_make_ord(halfword q, halfword size) +// { +// /*tex The left-side character for lig/kern testing. */ +// RESTART: +// /*tex We can end up here again after a ligature is built. */ +// if (! noad_has_following_scripts(q) && node_type(noad_nucleus(q)) == math_char_node) { +// halfword p = node_next(q); +// /*tex */ +// if (p && tex_aux_is_simple_char_noad(p) && tex_aux_have_same_nucleus_fam(p, q)) { +// halfword chr = null; +// halfword fnt = null; +// node_type(noad_nucleus(q)) = math_text_char_node; +// tex_aux_fetch(noad_nucleus(q), "ordinal", &fnt, &chr); +// if (tex_aux_math_engine_control(fnt, math_control_apply_ordinary_italic_kern)) { +// /* +// We don't have other kerns in opentype math fonts. There are however these +// staircase kerns that are dealt with elsewhere. But for new math fonts we do +// need to add italic correction. +// */ +// if (math_kernel_node_has_option(noad_nucleus(q), math_kernel_no_italic_correction)) { +// /* go on */ +// } else { +// scaled kern = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, math_character(noad_nucleus(q))), size); +// if (kern) { +// tex_aux_math_insert_italic_kern(q, kern, q, "ord"); +// } +// } +// } else if (tex_aux_math_engine_control(fnt, math_control_check_ligature_and_kern)) { +// if (tex_has_kern(fnt, chr) || tex_has_ligature(fnt, chr)) { +// /*tex +// +// Here we construct ligatures, quite unlikely in new math fonts so maybe we +// should just not go here for such fonts. +// +// If character |a| has a kern with |cur_c|, attach the kern after~|q|; or if +// it has a ligature with |cur_c|, combine noads |q| and~|p| appropriately; +// then |return| if the cursor has moved past a noad, or |goto restart|. +// +// Note that a ligature between an |ord_noad| and another kind of noad is +// replaced by an |ord_noad|, when the two noads collapse into one. +// +// We could make a parenthesis (say) change shape when it follows certain +// letters. Presumably a font designer will define such ligatures only when +// this convention makes sense. +// +// */ +// halfword nxt = math_character(noad_nucleus(p)); +// halfword slot; +// int type = tex_valid_ligature(chr, nxt, &slot); +// if (type >= 0) { +// switch (type) { +// case 1: /*tex \type{=:|} */ +// case 5: /*tex \type{=:|>} */ +// math_character(noad_nucleus(q)) = slot; +// break; +// case 2: /*tex \type{|=:} */ +// case 6: /*tex \type{|=:>} */ +// math_character(noad_nucleus(p)) = slot; +// break; +// case 3: /*tex \type{|=:|} */ +// case 7: /*tex \type{|=:|>} */ +// case 11: /*tex \type{|=:|>>} */ +// { +// halfword r = tex_new_node(simple_noad, ordinary_noad_subtype); +// halfword s = tex_new_node(math_char_node, 0); +// tex_attach_attribute_list_copy(r, q); +// tex_attach_attribute_list_copy(s, q); +// noad_nucleus(r) = s; +// math_character(noad_nucleus(r)) = slot; +// math_family(noad_nucleus(r)) = math_family(noad_nucleus(q)); +// tex_couple_nodes(q, r); +// tex_couple_nodes(r, p); +// if (type < 11) { +// node_type(noad_nucleus(r)) = math_char_node; +// } else { +// /*tex prevent combination */ +// node_type(noad_nucleus(r)) = math_text_char_node; +// } +// } +// break; +// default: /*tex |=:| */ +// tex_try_couple_nodes(q, node_next(p)); +// math_character(noad_nucleus(q)) = slot; +// noad_subscr(q) = noad_subscr(p); +// noad_supscr(q) = noad_supscr(p); +// noad_subscr(p) = null ; +// noad_supscr(p) = null ; +// tex_flush_node(p); +// break; +// } +// if (type > 3) { +// return; +// } else { +// node_type(noad_nucleus(q)) = math_char_node; +// goto RESTART; /*tex Inefficient but we never see this branch anyway. */ +// } +// } +// { +// // scaled kern = tex_aux_math_x_size_scaled(fnt, tex_valid_kern(chr, nxt), size); +// halfword nxtchr = null; +// halfword nxtfnt = null; +// tex_aux_fetch(noad_nucleus(p), "ordinal", &nxtfnt, &nxtchr); +// scaled kern = tex_get_kern(fnt, chr, nxtchr); +// if (kern) { +// tex_aux_math_insert_font_kern(q, kern, q, "ord"); +// return; +// } +// } +// } +// } +// } +// } +// } + + +// $ \mathord {a} $ : ord -> nucleus -> mathchar +// $ \mathord {ab} $ : ord -> nucleus -> submlist -> ord + ord + +/*tex + Have there ever been math fonts with kerns and ligatures? If so it had to be between characters + within the same font. Maybe this was meant for composed charaters? And the 256 limits of the + number of characters didn't help either. This is why we take the freedom to do things a bit + different. + + We don't have other kerns in opentype math fonts. There are however these staircase kerns that + are dealt with elsewhere. But for new math fonts we do need to add italic correction occasionally + and staircase kerns only happen with scripts. + + We could add support for ligatures but we don't need those anyway so it's a waste of time and + bytes. + + The ord checker kicks in after every ord but we can consider a special version where we handle + |sub_list_node| noads. And we could maybe check on sloped shapes but then we for sure end up + in a mess we don't want. + +*/ + +static halfword tex_aux_check_ord(halfword current, halfword size, halfword next) +{ + if (! noad_has_following_scripts(current)) { + halfword nucleus = noad_nucleus(current); + switch (node_type(nucleus)) { + case sub_mlist_node: + { + // I'm not that motivated for this and it should be an engine option anyway then. + + // halfword head = math_list(nucleus); + // halfword tail = tex_tail_of_node_list(head); + // // doesn't work + // if (node_type(head) == simple_noad && node_prev(current) ) { + // if (node_type(node_prev(current)) == simple_noad) { + // head = tex_aux_check_ord(node_prev(current), size, head); + // math_list(nucleus) = head; + // } + // } + // // works + // if (node_type(tail) == simple_noad && node_next(current) ) { + // tex_aux_check_ord(tail, size, node_next(current)); + // } + break; + } + case math_char_node: + { + if (! next) { + next = node_next(current); + } + halfword curchr = null; + halfword curfnt = null; + tex_aux_fetch(nucleus, "ordinal", &curfnt, &curchr); + if (curfnt && curchr) { + halfword kern = 0; + halfword italic = 0; + if (next) { + halfword nxtnucleus = noad_nucleus(next); + halfword nxtfnt = null; + halfword nxtchr = null; + if (node_type(nxtnucleus) == math_char_node && kernel_math_family(nucleus) == kernel_math_family(nxtnucleus)) { + tex_aux_fetch(nxtnucleus, "ordinal", &nxtfnt, &nxtchr); + if (nxtfnt && nxtchr) { + halfword mainclass = node_subtype(current); + /* todo: ligatures */ + if (tex_aux_math_engine_control(curfnt, math_control_apply_ordinary_kern_pair)) { + if (math_kernel_node_has_option(nucleus, math_kernel_no_right_pair_kern) || math_kernel_node_has_option(nxtnucleus, math_kernel_no_left_pair_kern)) { + /* ignore */ + } else if (tex_math_has_class_option(mainclass, check_italic_correction_class_option)) { + /* ignore */ + } else if (tex_aux_math_engine_control(curfnt, math_control_apply_ordinary_italic_kern)) { + kern = tex_aux_math_x_size_scaled(curfnt, tex_get_kern(curfnt, curchr, nxtchr), size); + } + } + if (tex_aux_math_engine_control(curfnt, math_control_apply_ordinary_italic_kern)) { + if (math_kernel_node_has_option(nucleus, math_kernel_no_italic_correction)) { + /* ignore */ + } else if (tex_math_has_class_option(mainclass, check_kern_pair_class_option)) { + /* ignore */ + } else if (tex_aux_math_engine_control(curfnt, math_control_apply_ordinary_italic_kern)) { + italic = tex_aux_math_x_size_scaled(curfnt, tex_char_italic_from_font(curfnt, curchr), size); + } + } + } + } + } + if (kern) { + current = tex_aux_math_insert_font_kern(current, kern, current, "ord"); + } + if (italic) { + // todo : after last unless upright but then we need to signal + current = tex_aux_math_insert_italic_kern(current, italic, current, "ord"); + } + } + } + break; + } + } + return current; +} + +static halfword tex_aux_prepend_hkern_to_new_hlist(halfword box, scaled delta, halfword subtype, const char *trace) +{ + halfword list = noad_new_hlist(box); + halfword kern = tex_new_kern_node(delta, (quarterword) subtype); + tex_attach_attribute_list_copy(kern, box); + if (list) { + tex_couple_nodes(kern, list); + } + list = kern; + noad_new_hlist(box) = list; + tex_aux_trace_kerns(kern, "adding kern", trace); + return list; +} + +static void tex_aux_append_hkern_to_box_list(halfword box, scaled delta, halfword subtype, const char *trace) +{ + halfword list = box_list(box); + halfword kern = tex_new_kern_node(delta, (quarterword) subtype); + tex_attach_attribute_list_copy(kern, box); + if (list) { + tex_couple_nodes(tex_tail_of_node_list(list), kern); + } else { + list = kern; + } + box_list(box) = list; + box_width(box) += delta; + tex_aux_trace_kerns(kern, "adding kern", trace); +} + +static void tex_aux_prepend_hkern_to_box_list(halfword box, scaled delta, halfword subtype, const char *trace) +{ + halfword list = box_list(box); + halfword kern = tex_new_kern_node(delta, (quarterword) subtype); + tex_attach_attribute_list_copy(kern, box); + if (list) { + tex_couple_nodes(kern, list); + } + list = kern; + box_list(box) = list; + box_width(box) += delta; + tex_aux_trace_kerns(kern, "adding kern", trace); +} + +/*tex + + The purpose of |make_scripts (q, it)| is to attach the subscript and/or superscript of noad |q| + to the list that starts at |new_hlist (q)|, given that subscript and superscript aren't both + empty. The superscript will be horizontally shifted over |delta1|, the subscript over |delta2|. + + We set |shift_down| and |shift_up| to the minimum amounts to shift the baseline of subscripts + and superscripts based on the given nucleus. + + Note: We need to look at a character but also at the first one in a sub list and there we + ignore leading kerns and glue. Elsewhere is code that removes kerns assuming that is italic + correction. The heuristics are unreliable for the new fonts so eventualy there will be an + option to ignore such corrections. (We now actually have that level of control.) + + Instead of a few mode parameters we now control this via the control options bitset. In this + case we cheat a bit as there is no relationship with a font (the first |null| parameter that + gets passed here). In the archive we can find all the variants. + +*/ + +static halfword tex_aux_analyze_script(halfword init, scriptdata *data) +{ + if (init) { + switch (node_type(init)) { + case math_char_node : + if (tex_aux_math_engine_control(null, math_control_analyze_script_nucleus_char)) { + if (tex_aux_fetch(init, "script char", &(data->fnt), &(data->chr))) { + return init; + } else { + goto NOTHING; + } + } else { + break; + } + case sub_mlist_node: + if (tex_aux_math_engine_control(null, math_control_analyze_script_nucleus_list)) { + init = kernel_math_list(init); + while (init) { + switch (node_type(init)) { + case kern_node: + case glue_node: + init = node_next(init); + break; + case simple_noad: + { + init = noad_nucleus(init); + if (node_type(init) != math_char_node) { + return null; + } else if (tex_aux_fetch(init, "script list", &(data->fnt), &(data->chr))) { + return init; + } else { + goto NOTHING; + } + } + default: + goto NOTHING; + } + } + } + break; + case sub_box_node: + if (tex_aux_math_engine_control(null, math_control_analyze_script_nucleus_box)) { + init = kernel_math_list(init); + if (init && node_type(init) == hlist_node) { + init = box_list(init); + } + while (init) { + switch (node_type(init)) { + case kern_node: + case glue_node: + init = node_next(init); + break; + case glyph_node: + if (tex_aux_fetch(init, "script box", &(data->fnt), &(data->chr))) { + return init; + } else { + goto NOTHING; + } + default: + goto NOTHING; + } + } + } + break; + } + } + NOTHING: + data->fnt = null; + data->chr = null; + return null; +} + +/*tex + + These prescripts are kind of special. For instance, should top and bottom scripts be aligned? + When there is are two top or two bottom, should we then just use the maxima? + +*/ + +static void tex_aux_get_math_sup_shifts(halfword sup, halfword style, scaled *shift_up) +{ + switch (math_scripts_mode_par) { + case 1: + *shift_up = tex_get_math_y_parameter_checked(style, math_parameter_superscript_shift_up); + break; + case 2: + *shift_up = tex_get_math_y_parameter_checked(style, math_parameter_superscript_shift_up); + break; + case 3: + *shift_up = tex_get_math_y_parameter_checked(style, math_parameter_superscript_shift_up) + + tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down) + - tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down); + break; + case 4: + *shift_up = tex_get_math_y_parameter_checked(style, math_parameter_superscript_shift_up) + + tex_half_scaled(tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down) + - tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down)); + break; + case 5: + *shift_up = tex_get_math_y_parameter_checked(style, math_parameter_superscript_shift_up) + + tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down) + - tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down); + break; + default: + { + scaled clr = tex_get_math_y_parameter_checked(style, math_parameter_superscript_shift_up); + scaled bot = tex_get_math_y_parameter_checked(style, math_parameter_superscript_bottom_min); + if (*shift_up < clr) { + *shift_up = clr; + } + clr = box_depth(sup) + bot; + if (*shift_up < clr) { + *shift_up = clr; + } + break; + } + } +} + +static void tex_aux_get_math_sub_shifts(halfword sub, halfword style, scaled *shift_down) +{ + switch (math_scripts_mode_par) { + case 1: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down); + break; + case 2: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down); + break; + case 3: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down); + break; + case 4: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down) + + tex_half_scaled(tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down) + - tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down)) ; + break; + case 5: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down); + break; + default: + { + scaled clr = tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down); + scaled top = tex_get_math_y_parameter_checked(style, math_parameter_subscript_top_max); + if (*shift_down < clr) { + *shift_down = clr; + } + clr = box_height(sub) - top; + if (*shift_down < clr) { + *shift_down = clr; + } + break; + } + } +} + +static void tex_aux_get_math_sup_sub_shifts(halfword sup, halfword sub, halfword style, scaled *shift_up, scaled *shift_down) +{ + switch (math_scripts_mode_par) { + case 1: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down); + break; + case 2: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down); + break; + case 3: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down); + break; + case 4: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down) + + tex_half_scaled(tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down) + - tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down)); + break; + case 5: + *shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_down); + break; + default: + { + scaled clr = tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_shift_down); + scaled gap = tex_get_math_y_parameter_checked(style, math_parameter_subscript_superscript_vgap); + scaled bot = tex_get_math_y_parameter_checked(style, math_parameter_superscript_subscript_bottom_max); + if (*shift_down < clr) { + *shift_down = clr; + } + clr = gap - ((*shift_up - box_depth(sup)) - (box_height(sub) - *shift_down)); + if (clr > 0) { + *shift_down += clr; + clr = bot - (*shift_up - box_depth(sup)); + if (clr > 0) { + *shift_up += clr; + *shift_down -= clr; + } + } + break; + } + } +} + +static halfword tex_aux_combine_script(halfword target, halfword width, halfword pre, halfword post, halfword *k1, halfword *k2) +{ + *k1 = tex_new_kern_node(-(width + box_width(pre)), horizontal_math_kern_subtype); + *k2 = tex_new_kern_node(width, horizontal_math_kern_subtype); + tex_couple_nodes(*k1, pre); + tex_couple_nodes(pre, *k2); + if (post) { + tex_couple_nodes(*k2, post); + } + post = tex_hpack(*k1, 0, packing_additional, direction_unknown, holding_none_option); + tex_attach_attribute_list_copy(*k1, target); + tex_attach_attribute_list_copy(*k2, target); + tex_attach_attribute_list_copy(post, target); + node_subtype(post) = math_pre_post_list; + return post; +} + + /*tex + + The following steps are involved: + + We look at the subscript character (_i) or first character in a list (_{ij}). We look at the + superscript character (^i) or first character in a list (^{ij}). + + Construct a superscript box |x|. The bottom of a superscript should never descend below the + baseline plus one-fourth of the x-height. + + Construct a sub/superscript combination box |x|, with the superscript offset by |delta|. When + both subscript and superscript are present, the subscript must be separated from the superscript + by at least four times |preset_rule_thickness|. If this condition would be violated, the + subscript moves down, after which both subscript and superscript move up so that the bottom + of the superscript is at least as high as the baseline plus four-fifths of the x-height. + + Now the horizontal shift for the superscript; the superscript is also to be shifted by |delta1| + (the italic correction). + + Construct a subscript box |x| when there is no superscript. When there is a subscript without + a superscript, the top of the subscript should not exceed the baseline plus four-fifths of the + x-height. + + We start with some helpers that deal with the staircase kerns in \OPENTYPE\ math. + +*/ + +/*tex + + This function tries to find the kern needed for proper cut-ins. The left side doesn't move, but + the right side does, so the first order of business is to create a staggered fence line on the + left side of the right character. + + If the fonts for the left and right bits of a mathkern are not both new-style fonts, then return + a sentinel value meaning: please use old-style italic correction placement + + This code is way to complex as it evolved stepwise and we wanted to keep the post scripts code + more or less the same. but ... I'll redo it. + +*/ + +static scaled tex_aux_math_kern_at(halfword fnt, int chr, int side, int value) +{ + /*tex We know that the character exists. */ + charinfo *ci = tex_get_charinfo(fnt, chr); + if (ci->math) { + scaled *kerns_heights; + int n_of_kerns = tex_get_charinfo_math_kerns(ci, side); + if (n_of_kerns == 0) { + switch (side) { + case top_left_kern: + return tex_char_top_left_kern_from_font(fnt, chr); + case bottom_left_kern: + return tex_char_bottom_left_kern_from_font(fnt, chr); + break; + case top_right_kern: + return tex_char_top_right_kern_from_font(fnt, chr); + case bottom_right_kern: + return tex_char_bottom_right_kern_from_font(fnt, chr); + default: + return 0; + } + } else { + switch (side) { + case top_left_kern: + kerns_heights = ci->math->top_left_math_kern_array; + break; + case bottom_left_kern: + kerns_heights = ci->math->bottom_left_math_kern_array; + break; + case top_right_kern: + kerns_heights = ci->math->top_right_math_kern_array; + break; + case bottom_right_kern: + kerns_heights = ci->math->bottom_right_math_kern_array; + break; + default: + /*tex Not reached: */ + kerns_heights = NULL; + return tex_confusion("math kern at"); + } + } + if (value < kerns_heights[0]) { + return kerns_heights[1]; + } else { + scaled kern = 0; + for (int i = 0; i < n_of_kerns; i++) { + scaled height = kerns_heights[i * 2]; + kern = kerns_heights[(i * 2) + 1]; + if (height > value) { + return kern; + } + } + return kern; + } + } else { + return 0; + } +} + +inline static scaled tex_aux_max_left_kern_value(scaled *kerns, int n) +{ + if (kerns && n > 0) { + scaled kern = 0; + for (int i = 0; i < n; i++) { + scaled value = kerns[(i * 2) + 1]; + if (value < kern) { + kern = value; + } + } + return -kern; + } else { + return 0; + } +} + +static scaled tex_aux_math_left_kern(halfword fnt, int chr) +{ + charinfo *ci = tex_get_charinfo(fnt, chr); + if (ci->math) { + scaled top = 0; + scaled bot = 0; + { + scaled *a = ci->math->top_left_math_kern_array; + halfword n = a ? tex_get_charinfo_math_kerns(ci, top_left_kern) : 0; + if (n) { + top = tex_aux_max_left_kern_value(a, n); + } else { + top = tex_char_top_left_kern_from_font(fnt, chr); + } + } + { + scaled *a = ci->math->bottom_left_math_kern_array; + halfword n = a ? tex_get_charinfo_math_kerns(ci, bottom_left_kern) : 0; + if (n) { + bot = tex_aux_max_left_kern_value(a, n); + } else { + bot = tex_char_bottom_left_kern_from_font(fnt, chr); + } + } + return top > bot ? top : bot; + } else { + return 0; + } +} + +/* + +inline static scaled tex_aux_max_right_kern_value(scaled *kerns, int n) +{ + if (kerns && n > 0) { + scaled kern = 0; + for (int i = 0; i < n; i++) { + scaled value = kerns[(i * 2) + 1]; + if (value > kern) { + kern = value; + } + } + return kern; + } else { + return 0; + } +} + +static scaled tex_aux_math_right_kern(halfword fnt, int chr) +{ + charinfo *ci = tex_get_charinfo(fnt, chr); + if (ci->math) { + scaled top = 0; + scaled bot = 0; + { + scaled *a = ci->math->top_right_math_kern_array; + halfword n = a ? tex_get_charinfo_math_kerns(ci, top_right_kern) : 0; + if (n) { + top = tex_aux_max_right_kern_value(a, n); + } else { + top = tex_char_top_right_kern_from_font(fnt, chr); + } + } + { + scaled *a = ci->math->bottom_right_math_kern_array; + halfword n = a ? tex_get_charinfo_math_kerns(ci, bottom_right_kern) : 0; + if (n) { + bot = tex_aux_max_right_kern_value(a, n); + } else { + bot = tex_char_bottom_right_kern_from_font(fnt, chr); + } + } + return top > bot ? top : bot; + } else { + return 0; + } +} +*/ + +static scaled tex_aux_find_math_kern(halfword l_f, int l_c, halfword r_f, int r_c, int cmd, scaled shift, int *found) +{ + if (tex_aux_math_engine_control(l_f, math_control_staircase_kern) && + tex_aux_math_engine_control(r_f, math_control_staircase_kern) && + /* tex_aux_has_opentype_metrics(l_f) && tex_aux_has_opentype_metrics(r_f) && */ + tex_char_exists(l_f, l_c) && tex_char_exists(r_f, r_c)) { + scaled krn_l = 0; + scaled krn_r = 0; + scaled krn = 0; + switch (cmd) { + case superscript_cmd: + /*tex bottom of superscript */ + { + scaled corr_height_top = tex_char_height_from_font(l_f, l_c); + scaled corr_height_bot = -tex_char_depth_from_font(r_f, r_c) + shift; + krn_l = tex_aux_math_kern_at(l_f, l_c, top_right_kern, corr_height_top); + krn_r = tex_aux_math_kern_at(r_f, r_c, bottom_left_kern, corr_height_top); + krn = krn_l + krn_r; + krn_l = tex_aux_math_kern_at(l_f, l_c, top_right_kern, corr_height_bot); + krn_r = tex_aux_math_kern_at(r_f, r_c, bottom_left_kern, corr_height_bot); + } + break; + case subscript_cmd: + /*tex top of subscript */ + { + scaled corr_height_top = tex_char_height_from_font(r_f, r_c) - shift; + scaled corr_height_bot = -tex_char_depth_from_font(l_f, l_c); + krn_l = tex_aux_math_kern_at(l_f, l_c, bottom_right_kern, corr_height_top); + krn_r = tex_aux_math_kern_at(r_f, r_c, top_left_kern, corr_height_top); + krn = krn_l + krn_r; + krn_l = tex_aux_math_kern_at(l_f, l_c, bottom_right_kern, corr_height_bot); + krn_r = tex_aux_math_kern_at(r_f, r_c, top_left_kern, corr_height_bot); + } + break; + default: + return tex_confusion("find math kern"); + } + *found = 1; + if ((krn_l + krn_r) < krn) { + krn = krn_l + krn_r; + } + return krn ? tex_aux_math_x_size_scaled(l_f, krn, lmt_math_state.size) : 0; + } else { + return MATH_KERN_NOT_FOUND; + } +} + +static int tex_aux_get_sup_kern(halfword kernel, scriptdata *sup, scaled shift_up, scaled supshift, scaled *supkern, kernset *kerns) +{ + int found = 0; + *supkern = MATH_KERN_NOT_FOUND; + if (sup->node) { + *supkern = tex_aux_find_math_kern(glyph_font(kernel), glyph_character(kernel), sup->fnt, sup->chr, superscript_cmd, shift_up, &found); + if (*supkern == MATH_KERN_NOT_FOUND) { + *supkern = supshift; + } else { + if (*supkern) { + tex_aux_trace_kerns(*supkern, "superscript kern", "regular"); + } + *supkern += supshift; + } + return found; + } + if (kerns && kerns->topright) { + *supkern = kerns->topright; + if (*supkern == MATH_KERN_NOT_FOUND) { + *supkern = supshift; + } else { + if (*supkern) { + tex_aux_trace_kerns(*supkern, "superscript kern", "kernset top right"); + } + *supkern += supshift; + } + return found; + } + *supkern = supshift; + return found; +} + +static int tex_aux_get_sub_kern(halfword kernel, scriptdata *sub, scaled shift_down, scaled subshift, scaled *subkern, kernset *kerns) +{ + int found = 0; + *subkern = MATH_KERN_NOT_FOUND; + if (sub->node) { + *subkern = tex_aux_find_math_kern(glyph_font(kernel), glyph_character(kernel), sub->fnt, sub->chr, subscript_cmd, shift_down, &found); + if (*subkern == MATH_KERN_NOT_FOUND) { + *subkern = subshift; + } else { + if (*subkern) { + tex_aux_trace_kerns(*subkern, "subscript kern", "regular"); + } + *subkern += subshift; + } + return found; + } + if (kerns && kerns->bottomright) { + *subkern = kerns->bottomright; + if (*subkern == MATH_KERN_NOT_FOUND) { + *subkern = subshift; + } else { + if (*subkern) { + tex_aux_trace_kerns(*subkern, "superscript kern", "kernset bottom right"); + } + *subkern += subshift; + } + return found; + } + *subkern = subshift; + return found; +} + +/*tex + + The code is quite ugly because these staircase kerns can only be calculated when we know the + heights and depths but when we pack the pre/post scripts we already relatiev position them so + we need to manipulate kerns. I need to figure out why we have slight rounding errors in the + realignments of prescripts. Anyway, because prescripts are not really part of \TEX\ we have + some freedom in dealing with them. + + This code is now a bit too complex due to some (probably by now) redundant analysis so at some + point I will rewrite it. + +*/ + +inline static scaled tex_aux_insert_italic_now(halfword target, halfword kernel, scaled italic) +{ + switch (node_type(noad_nucleus(target))) { + case math_char_node: + case math_text_char_node: + { + halfword fam = noad_family(noad_nucleus(target)); + if (fam != unused_math_family) { + halfword fnt = tex_fam_fnt(fam, lmt_math_state.size); + if (! tex_aux_math_engine_control(fnt, math_control_apply_script_italic_kern)) { + /*tex We ignore the correction. */ + italic = 0; + } else if (noad_subscr(target)) { + /*tex We will add the correction before the superscripts and/or primes. */ + } else { + /*tex We can add the correction the kernel and then forget about it. */ + tex_aux_math_insert_italic_kern(kernel, italic, noad_nucleus(target), "scripts"); + italic = 0; + } + } else { + /*tex We have a weird case, so we ignore the correction. */ + italic = 0; + } + } + break; + } + return italic; +} + +static inline int tex_aux_raise_prime_composed(halfword target) +{ + int mainclass = -1 ; + /* maybe also mainclass */ + switch (node_type(target)) { + case simple_noad: + mainclass = node_subtype(target); + break; + case radical_noad: + mainclass = radical_noad_subtype; + break; + case fraction_noad: + mainclass = fraction_noad_subtype; + break; + case accent_noad: + mainclass = accent_noad_subtype; + break; + case fence_noad: + /* we could be more granular and do open / close nut for now assume symmetry */ + mainclass = fenced_noad_subtype; + break; + } + return mainclass >= 0 ? tex_math_has_class_option(mainclass, raise_prime_option) : 0; +} + +static void tex_aux_make_scripts(halfword target, halfword kernel, scaled italic, int style, scaled supshift, scaled subshift, scaled supdrop, kernset *kerns) +{ + halfword result = null; + halfword preresult = null; + scaled prekern = 0; + scaled primekern = 0; + scaled shift_up = 0; + scaled shift_down = 0; + scaled prime_up = 0; + scriptdata postsubdata = { .node = null, .fnt = null_font, .chr = 0, .box = null, .kern = null, .slack = 0, .shifted = 0 }; + scriptdata postsupdata = { .node = null, .fnt = null_font, .chr = 0, .box = null, .kern = null, .slack = 0, .shifted = 0 }; + scriptdata presubdata = { .node = null, .fnt = null_font, .chr = 0, .box = null, .kern = null, .slack = 0, .shifted = 0 }; + scriptdata presupdata = { .node = null, .fnt = null_font, .chr = 0, .box = null, .kern = null, .slack = 0, .shifted = 0 }; + scriptdata primedata = { .node = null, .fnt = null_font, .chr = 0, .box = null, .kern = null, .slack = 0, .shifted = 0 }; + halfword maxleftkern = 0; + // halfword maxrightkern = 0; + scaled leftslack = 0; + scaled rightslack = 0; + scaledwhd kernelsize = { .wd = 0, .ht = 0, .dp = 0, .ic = 0 }; + // scaled primewidth = 0; + scaled topovershoot = 0; + scaled botovershoot = 0; + int italicmultiplier = 1; /* This was a hard coded 2 so it needs more checking! */ + int splitscripts = 0; + quarterword primestate = prime_unknown_location; + /*tex + This features was added when MS and I found that the Latin Modern (and other) fonts have + rather badly configured script (calligraphic) shapes. There is no provision for proper + anchoring subscripts and superscripts can overlap with for instance wide accents especially + when there is not much granularity in them. For that we now register the overshoot of + accents and compensate for them here. + + One assumption is that the shape is somewhat italic and that an overshoot makes it even + more so. The two factors default to zero, so it only works when the right parameters are + set. + + It's a mess. By adding more and more and also trying to be a bit like old \TEX\ we now have + too many kerns. + + */ + if (node_type(target) == accent_noad) { + scaled top = tex_get_math_parameter_default(style, math_parameter_accent_top_overshoot, 0); + scaled bot = tex_get_math_parameter_default(style, math_parameter_accent_bottom_overshoot, 0); + topovershoot = scaledround(accent_top_overshoot(target) * top / 100.0); + botovershoot = scaledround(accent_top_overshoot(target) * bot / 100.0); + } + /*tex + So this is somewhat weird. We pass the kernel and also some italic and then act upon the + target again. This is a bit messy side effect of the transition from old to new fonts. We + also have to make sure that we don't add the correction too soon, that is, before the + subscript. + */ + if (italic) { + italic = tex_aux_insert_italic_now(target, kernel, italic); + } + /*tex + In some cases we need to split the scripts, for instance when we have fenced material that + can get split over lines. + */ + if (node_type(target) == simple_noad) { + switch (node_subtype(target)) { + case fenced_noad_subtype: + splitscripts = tex_math_has_class_option(fenced_noad_subtype, unpack_class_option); + break; + case ghost_noad_subtype: + splitscripts = has_noad_option_unpacklist(target); + break; + } + } + /*tex + When we have a single character we need to deal with kerning based on staircase kerns, but + we also can have explicit kerns defined with single characters, which is more a \CONTEXT\ + feature as it is not in \OPENTYPE\ fonts. + */ + tex_aux_assign_new_hlist(target, kernel); + kernelsize = tex_natural_hsizes(kernel, null, 0.0, 0, 0); + if (kerns) { + /* todo: option */ + if (kerns->height) { + kernelsize.ht = kerns->height; + } + if (kerns->depth) { + kernelsize.dp = kerns->depth; + } + } + switch (node_type(kernel)) { + case glyph_node: + postsubdata.node = tex_aux_analyze_script(noad_subscr(target), &postsubdata); + postsupdata.node = tex_aux_analyze_script(noad_supscr(target), &postsupdata); + primedata.node = tex_aux_analyze_script(noad_prime(target), &primedata); + maxleftkern = tex_aux_math_left_kern(glyph_font(kernel), glyph_character(kernel)); + // maxrightkern = tex_aux_math_right_kern(glyph_font(kernel), glyph_character(kernel)); + prime_up = tex_get_math_y_parameter_default(style, math_parameter_prime_shift_drop, 0); + shift_up = tex_get_math_y_parameter_checked(style, math_parameter_superscript_shift_drop); + shift_down = tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_drop); + break; // fallthrough + default: + kernelsize.ht -= supdrop; /* new */ + prime_up = kernelsize.ht - tex_get_math_y_parameter_default(style, math_parameter_prime_shift_drop, 0); + shift_up = kernelsize.ht - tex_get_math_y_parameter_checked(style, math_parameter_superscript_shift_drop); + shift_down = kernelsize.dp + tex_get_math_y_parameter_checked(style, math_parameter_subscript_shift_drop); + break; + } + /*tex + Next we're doing some analysis, needed because of all these parameters than control horizontal and vertical + spacing. We start with primes. + */ + if (noad_prime(target)) { + /* todo extra */ + scaled shift = tex_get_math_y_parameter_default(style, math_parameter_prime_shift_up, 0); + scaled raise = tex_get_math_y_parameter_default(style, tex_aux_raise_prime_composed(target) ? math_parameter_prime_raise_composed : math_parameter_prime_raise, 0); + scaled distance = tex_get_math_x_parameter_default(style, math_parameter_prime_space_after, 0); + // scaled width = tex_get_math_x_parameter_default(style, math_parameter_prime_width, 0); + primedata.box = tex_aux_clean_box(noad_prime(target), (has_noad_option_nosupscript(target) ? style : tex_math_style_variant(style, math_parameter_prime_variant)), style, math_sup_list, 0, NULL); + box_shift_amount(primedata.box) -= prime_up ? prime_up : shift; + box_shift_amount(primedata.box) -= scaledround(box_height(primedata.box) * raise / 100.0); + kernel_math_list(noad_prime(target)) = null; + tex_flush_node(noad_prime(target)); + noad_prime(target) = null; + if (noad_supscr(target)) { + primestate = prime_at_end_location; + } else if (noad_subscr(target)) { + primestate = prime_above_sub_location; + } else { + primestate = prime_at_begin_location; + } + if (distance) { + tex_aux_append_hkern_to_box_list(primedata.box, distance, horizontal_math_kern_subtype, "prime distance"); + } + primedata.slack = distance; + switch (primestate) { + /* [prime] [super/sub] */ + case prime_at_begin_location: + { + /* supshift ? */ + tex_aux_get_sup_kern(kernel, &primedata, shift_up, supshift, &primekern, kerns); + if (italic) { + /* why no injection */ + primekern += italic; + italic = 0; + } + } + break; + /* [prime/sub] [super] */ + case prime_above_sub_location: + { + /* supshift ? */ + tex_aux_get_sup_kern(kernel, &primedata, shift_up, supshift, &primekern, kerns); + if (italic) { + /* why no injection */ + primekern += italic; + italic = 0; + } + if (primekern) { + tex_aux_prepend_hkern_to_box_list(primedata.box, primekern, math_shape_kern_subtype, "prime kern"); + /* now width added */ + primekern = 0; /* added */ + } + } + break; + /* [super/sub] [prime] */ + case prime_at_end_location: + { + primekern = 0; + } + break; + } + } + /*tex + Each of the scripts gets treated. Traditionally a super and subscript are looked and and + vercially spaced out together which in turn results in the staricase kerns needing that + information. Prescripts we handle differently: they are always aligned, so there the + maximum kern wins. + */ + postsupdata.shifted = noad_supscr(target) && has_noad_option_shiftedsupscript(target); + postsubdata.shifted = noad_subscr(target) && has_noad_option_shiftedsubscript(target); + presupdata.shifted = noad_supprescr(target) && has_noad_option_shiftedsupprescript(target); + presubdata.shifted = noad_subprescr(target) && has_noad_option_shiftedsubprescript(target); + /* + When we have a shifted super or subscript (stored in the prescripts) we don't need to kern + the super and subscripts. What to do with the shifts? + */ + if (noad_supscr(target)) { + halfword extra = tex_get_math_y_parameter_checked(style, math_parameter_extra_superscript_shift); + postsupdata.slack = tex_get_math_x_parameter_checked(style, math_parameter_extra_superscript_space); + postsupdata.slack += tex_get_math_x_parameter_checked(style, math_parameter_space_after_script); + postsupdata.box = tex_aux_clean_box(noad_supscr(target), (has_noad_option_nosupscript(target) ? style : tex_math_style_variant(style, math_parameter_superscript_variant)), style, math_sup_list, 0, NULL); + if (extra) { + box_height(postsupdata.box) += extra; + box_shift_amount(postsupdata.box) -= extra; + } + if (postsupdata.slack) { + tex_aux_append_hkern_to_box_list(postsupdata.box, postsupdata.slack, horizontal_math_kern_subtype, "post sup slack"); + } + kernel_math_list(noad_supscr(target)) = null; + tex_flush_node(noad_supscr(target)); + noad_supscr(target) = null; + } + if (noad_subscr(target)) { + halfword extra = tex_get_math_y_parameter_checked(style, math_parameter_extra_subscript_shift); + postsubdata.slack = tex_get_math_x_parameter_checked(style, math_parameter_extra_subscript_space); + postsubdata.slack += tex_get_math_x_parameter_checked(style, math_parameter_space_after_script); + postsubdata.box = tex_aux_clean_box(noad_subscr(target), (has_noad_option_nosubscript(target) ? style : tex_math_style_variant(style, math_parameter_subscript_variant)), style, math_sub_list, 0, NULL); + if (extra) { + box_depth(postsubdata.box) += extra; + box_shift_amount(postsubdata.box) += extra; + } + if (postsubdata.slack) { + tex_aux_append_hkern_to_box_list(postsubdata.box, postsubdata.slack, horizontal_math_kern_subtype, "post sub slack"); + } + kernel_math_list(noad_subscr(target)) = null; + tex_flush_node(noad_subscr(target)); + noad_subscr(target) = null; + } + if (noad_supprescr(target)) { + halfword extra = tex_get_math_y_parameter_checked(style, math_parameter_extra_superprescript_shift); + presupdata.slack = tex_get_math_x_parameter_checked(style, math_parameter_extra_superprescript_space); + presupdata.slack += tex_get_math_x_parameter_default(style, math_parameter_space_before_script, 0); + presupdata.box = tex_aux_clean_box(noad_supprescr(target), (has_noad_option_nosupprescript(target) ? style : tex_math_style_variant(style, math_parameter_superscript_variant)), style, math_sup_list, 0, NULL); + if (maxleftkern) { + tex_aux_append_hkern_to_box_list(presupdata.box, maxleftkern, math_shape_kern_subtype, "max left shape"); + } + if (extra) { + box_height(presupdata.box) += extra; + box_shift_amount(presupdata.box) -= extra; + } + if (presupdata.slack) { + tex_aux_prepend_hkern_to_box_list(presupdata.box, presupdata.slack, horizontal_math_kern_subtype, "pre sup slack"); + } + kernel_math_list(noad_supprescr(target)) = null; + tex_flush_node(noad_supprescr(target)); + noad_supprescr(target) = null; + } + if (noad_subprescr(target)) { + halfword extra = tex_get_math_y_parameter_checked(style, math_parameter_extra_subprescript_shift); + presubdata.slack = tex_get_math_x_parameter_checked(style, math_parameter_extra_subprescript_space); + presubdata.slack += tex_get_math_x_parameter_default(style, math_parameter_space_before_script, 0); + presubdata.box = tex_aux_clean_box(noad_subprescr(target), (has_noad_option_nosubprescript(target) ? style : tex_math_style_variant(style, math_parameter_subscript_variant)), style, math_sub_list, 0, NULL); + if (maxleftkern) { + tex_aux_append_hkern_to_box_list(presubdata.box, maxleftkern, math_shape_kern_subtype, "max left shape"); + } + if (extra) { + box_depth(presubdata.box) += extra; + box_shift_amount(presubdata.box) += extra; + } + if (presubdata.slack) { + tex_aux_prepend_hkern_to_box_list(presubdata.box, presubdata.slack, horizontal_math_kern_subtype, "pre sub slack"); + } + kernel_math_list(noad_subprescr(target)) = null; + tex_flush_node(noad_subprescr(target)); + noad_subprescr(target) = null; + } + /*tex + When we're here, the kerns are in the boxes. We now register the state of scripts in the + noad for (optional) later usage. + */ + if (presupdata.box) { + noad_script_state(target) |= pre_super_script_state; + } + if (presubdata.box) { + noad_script_state(target) |= pre_sub_script_state; + } + if (postsupdata.box) { + noad_script_state(target) |= post_super_script_state; + } + if (postsubdata.box) { + noad_script_state(target) |= post_sub_script_state; + } + if (primedata.box) { + noad_script_state(target) |= prime_script_state; + } + /* */ + if (primestate == prime_above_sub_location) { + rightslack = box_width(primedata.box) > box_width(postsubdata.box) ? primedata.slack : postsubdata.slack; + } else if (postsupdata.box) { + if (postsubdata.box) { + /* todo: take deltas */ + rightslack = box_width(postsupdata.box) > box_width(postsubdata.box) ? postsupdata.slack : postsubdata.slack; + } else { + rightslack = postsupdata.slack; + } + } else if (postsubdata.box) { + rightslack = postsubdata.slack; + } + + if (primestate == prime_above_sub_location) { + halfword list = noad_new_hlist(target); + if (list) { + /*tex We want to keep the size for tracing! */ + halfword overshoot = box_width(primedata.box) - box_width(postsubdata.box); + halfword primebox = tex_hpack(primedata.box, 0, packing_additional, direction_unknown, holding_none_option); + tex_attach_attribute_list_copy(primebox, primedata.box); + box_width(primebox) = 0; + tex_couple_nodes(tex_tail_of_node_list(list), primebox); + primedata.box = null; + if (overshoot > 0) { + tex_aux_append_hkern_to_box_list(postsubdata.box, overshoot, math_shape_kern_subtype, "prime overshoot kern"); + } + } else { + list = primedata.box; + } + noad_new_hlist(target) = list; + } + + if (presupdata.box) { + if (presubdata.box) { + /* todo: take deltas */ + leftslack = box_width(presupdata.box) > box_width(presubdata.box) ? presupdata.slack : presubdata.slack; + } else { + leftslack = presupdata.slack; + } + } else if (presubdata.box) { + leftslack = presubdata.slack; + } + switch (primestate) { + case prime_at_begin_location: + kernelsize.wd += box_width(primedata.box); + break; + case prime_above_sub_location: + /* only excess */ + break; + } + if (postsupdata.box || postsubdata.box) { + /*tex + The post scripts determine the shifts. An option can be to use the max of pre/post. + */ + scaled supkern = 0; + scaled subkern = 0; + if (! splitscripts) { + if (presupdata.box) { + prekern = box_width(presupdata.box); + postsupdata.box = tex_aux_combine_script(target, kernelsize.wd, presupdata.box, postsupdata.box, &presupdata.kern, &postsupdata.kern); + presupdata.box = null; + } + if (presubdata.box) { + // test: what with negative extra kerns and what with a negative width + if (box_width(presubdata.box) > prekern) { + prekern = box_width(presubdata.box); + } + postsubdata.box = tex_aux_combine_script(target, kernelsize.wd, presubdata.box, postsubdata.box, &presubdata.kern, &postsubdata.kern); + presubdata.box = null; + } + } + /*tex + We want to retain the kern because it is a visual thing but it could be an option to + only add the excess over the shift. We're talking tiny here. + + We could be clever and deal with combinations of shifted but lets play safe and let + the user worry about it. The sub index always wins. + */ + if (postsubdata.box && postsupdata.shifted) { + halfword shift = tex_get_math_x_parameter_checked(style, math_parameter_subscript_shift_distance); + halfword amount = box_width(postsupdata.box) + shift; + tex_aux_prepend_hkern_to_box_list(postsubdata.box, amount, horizontal_math_kern_subtype, "post shifted"); + } else if (postsupdata.box && postsubdata.shifted) { + halfword shift = tex_get_math_x_parameter_checked(style, math_parameter_superscript_shift_distance); + halfword amount = box_width(postsubdata.box) + shift; + tex_aux_prepend_hkern_to_box_list(postsupdata.box, amount, horizontal_math_kern_subtype, "post shifted"); + } + if (presubdata.box && presupdata.shifted) { + halfword shift = tex_get_math_x_parameter_checked(style, math_parameter_subprescript_shift_distance); + halfword amount = box_width(presupdata.box) + shift; + tex_aux_append_hkern_to_box_list(presubdata.box, amount, horizontal_math_kern_subtype, "pre shifted"); + } else if (presupdata.box && presubdata.shifted) { + halfword shift = tex_get_math_x_parameter_checked(style, math_parameter_superprescript_shift_distance); + halfword amount = box_width(presubdata.box) + shift; + tex_aux_append_hkern_to_box_list(presupdata.box, amount, horizontal_math_kern_subtype, "pre shifted"); + } + /* */ + if (postsupdata.box) { + tex_aux_get_math_sup_shifts(postsupdata.box, style, &shift_up); + if (postsubdata.box) { + tex_aux_get_math_sup_sub_shifts(postsupdata.box, postsubdata.box, style, &shift_up, &shift_down); + tex_aux_get_sup_kern(kernel, &postsupdata, shift_up, supshift, &supkern, kerns); + tex_aux_get_sub_kern(kernel, &postsubdata, shift_down, subshift, &subkern, kerns); + if (primestate == prime_at_begin_location) { + primekern += supkern ; + subkern = 0; + supkern = 0; + } else { + if (supkern) { + tex_aux_prepend_hkern_to_box_list(postsupdata.box, supkern, math_shape_kern_subtype, "post sup shape"); + } + if (subkern) { + tex_aux_prepend_hkern_to_box_list(postsubdata.box, subkern, math_shape_kern_subtype, "post sub shape"); + } + } + if (italic) { + tex_aux_prepend_hkern_to_box_list(postsupdata.box, italic, italic_kern_subtype, "italic"); + } + if (presubdata.kern) { + kern_amount(presubdata.kern) += -subkern; + kern_amount(postsubdata.kern) += subkern; + } + if (presupdata.kern) { + /* italic needs checking */ + kern_amount(presupdata.kern) += -supkern - italicmultiplier * italic; + kern_amount(postsupdata.kern) += supkern + italicmultiplier * italic; + } + { + halfword kern = tex_new_kern_node((shift_up - box_depth(postsupdata.box)) - (box_height(postsubdata.box) - shift_down), vertical_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_couple_nodes(postsupdata.box, kern); + tex_couple_nodes(kern, postsubdata.box); + result = tex_vpack(postsupdata.box, 0, packing_additional, max_dimen, (singleword) math_direction_par, holding_none_option); + tex_attach_attribute_list_copy(result, target); + node_subtype(result) = math_scripts_list; + box_shift_amount(result) = shift_down; + } + } else { + tex_aux_get_sup_kern(kernel, &postsupdata, shift_up, supshift, &supkern, kerns); + if (primestate == prime_at_begin_location) { + primekern += supkern ; + supkern = 0; + } else if (supkern) { + tex_aux_prepend_hkern_to_box_list(postsupdata.box, supkern, math_shape_kern_subtype, "post sup shape"); + } + box_shift_amount(postsupdata.box) = -shift_up; + result = postsupdata.box; + if (presupdata.kern) { + kern_amount(presupdata.kern) += -supkern - subkern - italicmultiplier * italic; + kern_amount(postsupdata.kern) += supkern + subkern + italicmultiplier * italic; + } + } + } else { + tex_aux_get_math_sub_shifts(postsubdata.box, style, &shift_down); + tex_aux_get_sub_kern(kernel, &postsubdata, shift_down, subshift, &subkern, kerns); + if (primestate == prime_at_begin_location) { + subkern = 0; + } else if (subkern) { + tex_aux_prepend_hkern_to_box_list(postsubdata.box, subkern, math_shape_kern_subtype, "post sub shape"); + } + box_shift_amount(postsubdata.box) = shift_down; + result = postsubdata.box; + if (presubdata.kern) { + kern_amount(presubdata.kern) += -subkern; + kern_amount(postsubdata.kern) += subkern; + } + } + /* */ + if (! splitscripts) { + if (topovershoot) { + /* todo: tracing */ + if (noad_script_state(target) & pre_super_script_state) { + kern_amount(postsubdata.kern) -= topovershoot; + kern_amount(postsupdata.kern) -= topovershoot; + } + if (noad_script_state(target) & post_sub_script_state) { + kern_amount(presupdata.kern) += topovershoot; + } + } + if (botovershoot) { + /* todo: tracing, yet untested */ + if (noad_script_state(target) & pre_sub_script_state) { + kern_amount(presubdata.kern) -= botovershoot; + kern_amount(presupdata.kern) -= botovershoot; + } + if (noad_script_state(target) & post_sub_script_state) { + kern_amount(presubdata.kern) += botovershoot; + } + } + goto PICKUP; + } + } + if (presubdata.box) { + if (presupdata.box) { + tex_aux_get_math_sup_shifts(presupdata.box, style, &shift_up); + tex_aux_get_math_sup_sub_shifts(presupdata.box, presubdata.box, style, &shift_up, &shift_down); + prekern = box_width(presupdata.box); + // test: what with negative extra kerns and what with a negative width + if (! splitscripts) { + if (box_width(presubdata.box) > prekern) { + prekern = box_width(presubdata.box); + } + presupdata.box = tex_aux_combine_script(target, kernelsize.wd, presupdata.box, null, &presupdata.kern, &postsupdata.kern); + presubdata.box = tex_aux_combine_script(target, kernelsize.wd, presubdata.box, null, &presubdata.kern, &postsubdata.kern); + } + { + halfword k = tex_new_kern_node((shift_up - box_depth(presupdata.box)) - (box_height(presubdata.box) - shift_down), vertical_math_kern_subtype); + tex_attach_attribute_list_copy(k, target); + tex_couple_nodes(presupdata.box, k); + tex_couple_nodes(k, presubdata.box); + preresult = tex_vpack(presupdata.box, 0, packing_additional, max_dimen, (singleword) math_direction_par, holding_none_option); + tex_attach_attribute_list_copy(preresult, target); + node_subtype(preresult) = math_scripts_list; + box_shift_amount(preresult) = shift_down; + } + } else { + tex_aux_get_math_sub_shifts(presubdata.box, style, &shift_down); + if (! splitscripts) { + prekern = box_width(presubdata.box); + presubdata.box = tex_aux_combine_script(target, kernelsize.wd, presubdata.box, null, &presubdata.kern, &postsubdata.kern); + } + box_shift_amount(presubdata.box) = shift_down; + preresult = presubdata.box; + } + } else if (presupdata.box) { + tex_aux_get_math_sup_shifts(presupdata.box, style, &shift_up); + if (! splitscripts) { + prekern = box_width(presupdata.box); + presupdata.box = tex_aux_combine_script(target, kernelsize.wd, presupdata.box, null, &presupdata.kern, &postsupdata.kern); + } + box_shift_amount(presupdata.box) = -shift_up; + preresult = presupdata.box; + } + PICKUP: + if (primestate == prime_at_begin_location) { + halfword list = noad_new_hlist(target); + if (primekern) { + tex_aux_prepend_hkern_to_box_list(primedata.box, primekern, math_shape_kern_subtype, "prime"); + } + if (list) { + tex_couple_nodes(tex_tail_of_node_list(list), primedata.box); + } else { + list = primedata.box; + } + noad_new_hlist(target) = list; + } + if (splitscripts) { + halfword list = noad_new_hlist(target); + if (preresult) { + if (list) { + tex_couple_nodes(preresult, list); + } + list = preresult; + } + if (result) { + if (list) { + tex_couple_nodes(tex_tail_of_node_list(list), result); + } else { + list = result; + } + } + noad_new_hlist(target) = list; + } else { + if (preresult) { + result = preresult; + } + if (prekern) { + /* must become horizontal kern */ + halfword list = tex_aux_prepend_hkern_to_new_hlist(target, prekern, horizontal_math_kern_subtype, "pre compensation"); + tex_couple_nodes(tex_tail_of_node_list(list), result); + } else if (noad_new_hlist(target)) { + tex_couple_nodes(tex_tail_of_node_list(noad_new_hlist(target)), result); + } else { + noad_new_hlist(target) = result; + } + } + if (primestate == prime_at_end_location) { + tex_couple_nodes(tex_tail_of_node_list(result), primedata.box); + rightslack = primedata.slack; + } + if (math_slack_mode_par > 0) { + noad_left_slack(target) = leftslack; + noad_right_slack(target) = rightslack; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: script slack, left %D, right %D]", leftslack, pt_unit, rightslack, pt_unit); + tex_end_diagnostic(); + } + } +} + +/*tex + + The |make_left_right| function constructs a left or right delimiter of the required size and + returns the value |open_noad| or |close_noad|. The |left_noad_side| and |right_noad_side| will + both be based on the original |style|, so they will have consistent sizes. + +*/ + +static halfword tex_aux_make_left_right(halfword target, int style, scaled max_d, scaled max_h, int size, delimiterextremes *extremes) +{ + halfword tmp; + scaled ic = 0; + int stack = 0; + halfword mainclass = get_noad_main_class(target); + halfword leftclass = get_noad_left_class(target); + halfword rightclass = get_noad_right_class(target); + scaled height = tex_aux_math_given_y_scaled(noad_height(target)); + scaled depth = tex_aux_math_given_y_scaled(noad_depth(target)); + int leftoperator = node_type(target) == fence_noad && node_subtype(target) == left_operator_side; + if (extremes) { + extremes->tfont = null_font; + extremes->bfont = null_font; + extremes->tchar = 0; + extremes->tchar = 0; + extremes->height = 0; + extremes->depth = 0; + } + tex_aux_set_current_math_size(style); + if (height || depth || has_noad_option_exact(target)) { + halfword lst; + scaled delta = height + depth; + tmp = tex_aux_make_delimiter(target, fence_delimiter_list(target), size, delta, 0, style, 0, &stack, &ic, 0, has_noad_option_nooverflow(target), extremes, 0); +/* do extremes here */ + noad_italic(target) = ic; + /*tex + Beware, a stacked delimiter has a shift but no corrected height/depth (yet). + */ + if (stack) { + box_shift_amount(tmp) = depth; + } + if (has_noad_option_exact(target)) { + height = box_height(tmp) - box_shift_amount(tmp); + depth = box_depth(tmp) + box_shift_amount(tmp); + } + if (has_noad_option_axis(target)) { + halfword axis = tex_aux_math_axis(size); + height += axis; + depth -= axis; + box_shift_amount(tmp) -= axis; + } + lst = tex_new_node(hlist_node, 0); + tex_attach_attribute_list_copy(lst, target); + box_dir(lst) = dir_lefttoright ; + box_height(lst) = height; + box_depth(lst) = depth; + box_width(lst) = box_width(tmp); + box_list(lst) = tmp; + tmp = lst; + } else { + int axis = ! has_noad_option_noaxis(target); + scaled delta = 0; + if (leftoperator && has_noad_option_auto(target)) { + /*tex Todo: option for skipping this. */ + if (style < text_style) { + scaled s = scaledround(tex_get_math_parameter(style, math_parameter_operator_size, NULL)); + if (s > max_h + max_d) { + max_h = scaledround(s / 2.0); + max_d = max_h; + delta = max_h + max_d; + } + } + } + if (! delta) { + delta = tex_aux_get_delimiter_height(max_h, max_d, axis, size, style); // todo: pass scaled axis + } + tmp = tex_aux_make_delimiter(target, fence_delimiter_list(target), size, delta, 0, style, axis, &stack, &ic, 0, has_noad_option_nooverflow(target), extremes, 0); + } + /* delimiter is wiped */ + noad_height(target) = height; + noad_depth(target) = depth; + fence_delimiter_list(target) = null; + noad_italic(target) = ic; + /* */ + if (noad_source(target)) { + box_source_anchor(tmp) = noad_source(target); + // box_anchor(tmp) = left_origin_anchor; + tex_set_box_geometry(tmp, anchor_geometry); + } + /* */ + if (leftoperator) { + halfword s = tex_new_node(sub_box_node, 0); + kernset kerns; + tex_math_wipe_kerns(&kerns); + tex_flush_node_list(noad_supscr(target)); + tex_flush_node_list(noad_subscr(target)); + tex_flush_node_list(noad_nucleus(target)); + if (kernel_math_list(fence_delimiter_top(target))) { + noad_supscr(target) = fence_delimiter_top(target); + fence_delimiter_top(target) = null; + } + if (kernel_math_list(fence_delimiter_bottom(target))) { + noad_subscr(target) = fence_delimiter_bottom(target); + fence_delimiter_bottom(target) = null; + } + kernel_math_list(s) = tmp; + noad_nucleus(target) = s; + /* maybe elsewhere as the above case */ + if (extremes && extremes->tfont) { + if (tex_math_has_class_option(fenced_noad_subtype, carry_over_right_top_kern_class_option)) { + kerns.topright = tex_char_top_right_kern_from_font(extremes->tfont, extremes->tchar); + } + if (tex_math_has_class_option(fenced_noad_subtype, carry_over_right_bottom_kern_class_option)) { + kerns.bottomright = tex_char_bottom_right_kern_from_font(extremes->bfont, extremes->bchar); + } + if (tex_math_has_class_option(fenced_noad_subtype, prefer_delimiter_dimensions_class_option)) { + kerns.height = extremes->height; + kerns.depth = extremes->depth; + } + } + tex_aux_make_op(target, style, size, ic, limits_unknown_mode, &kerns); + /* otherwise a leak: */ + kernel_math_list(s) = null; + tex_flush_node(s); + } else { + tex_aux_assign_new_hlist(target, tmp); + } + /* */ + switch (node_subtype(target)) { + case left_fence_side: + if (leftclass != unset_noad_class) { + return leftclass; + } else if (mainclass != unset_noad_class) { + return mainclass; + } else { + return open_noad_subtype; + } + case middle_fence_side: + if (mainclass != unset_noad_class) { + return mainclass; + } else { + return middle_noad_subtype; + } + case right_fence_side: + if (rightclass != unset_noad_class) { + return rightclass; + } else if (mainclass != unset_noad_class) { + return mainclass; + } else { + return close_noad_subtype; + } + case left_operator_side: + if (leftclass != unset_noad_class) { + return leftclass; + } else if (mainclass != unset_noad_class) { + return mainclass; + } else { + return operator_noad_subtype; + } + default: + if (mainclass != unset_noad_class) { + return mainclass; + } else { + /*tex So one can best set the class! */ + return ordinary_noad_subtype; + } + } +} + +inline static int tex_aux_fallback_math_spacing_class(halfword style, halfword class) +{ + unsigned parent = (unsigned) count_parameter(first_math_class_code + class); + switch (style) { + case display_style: case cramped_display_style: return (parent >> 24) & 0xFF; + case text_style: case cramped_text_style: return (parent >> 16) & 0xFF; + case script_style: case cramped_script_style: return (parent >> 8) & 0xFF; + case script_script_style: case cramped_script_script_style: return (parent >> 0) & 0xFF; + default: return 0; + } +} + +static halfword tex_aux_math_spacing_glue(halfword ltype, halfword rtype, halfword style, scaled mmu) +{ + halfword c = tex_to_math_spacing_parameter(ltype, rtype); + halfword s = c; + for (int i = 1; i <= 2; i++) { + if (s >= 0) { + halfword d = 0; + halfword x = tex_get_math_parameter(style, s, &d); + if (x) { + switch (d) { + case no_val_level: + break; + case dimen_val_level: + if (x) { + x = tex_aux_math_dimen(x, inter_math_skip_glue, c); + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: inter atom kern, left %n, right %n, resolved %i, amount %D]", ltype, rtype, s, kern_amount(x), pt_unit); + tex_end_diagnostic(); + } + return x; + } + goto NONE; + case glue_val_level: + if (! tex_glue_is_zero(x)) { + x = tex_aux_math_glue(x, inter_math_skip_glue, c); + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: inter atom glue, left %n, right %n, resolved %i, amount %P]", ltype, rtype, s, glue_amount(x), glue_stretch(x), NULL, NULL, NULL, glue_shrink(x)); + tex_end_diagnostic(); + } + return x; + } + goto NONE; + case mu_val_level: + if (! tex_math_glue_is_zero(x)) { + x = tex_aux_math_muglue(x, inter_math_skip_glue, mmu, c, style); + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: inter atom (mu) glue, left %n, right %n, resolved %i, amount %P]", ltype, rtype, s, glue_amount(x), glue_stretch(x), NULL, NULL, NULL, glue_shrink(x)); + tex_end_diagnostic(); + } + return x; + } + goto NONE; + default: + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: inter atom (mu) glue, left %n, right %n, resolved %i, unset]", ltype, rtype, s); + tex_end_diagnostic(); + } + goto NONE; + } + } + /* try again */ + { + halfword lparent = tex_aux_fallback_math_spacing_class(style, ltype); + halfword rparent = tex_aux_fallback_math_spacing_class(style, rtype); + /*tex Let's try the parents (one level). */ + if (lparent != ltype || rparent != rtype) { + s = tex_to_math_spacing_parameter(lparent, rtype); + if (tex_has_math_parameter(style, s)) { + goto FOUND; + } + s = tex_to_math_spacing_parameter(ltype, rparent); + if (tex_has_math_parameter(style, s)) { + goto FOUND; + } + s = tex_to_math_spacing_parameter(lparent, rparent); + if (tex_has_math_parameter(style, s)) { + goto FOUND; + } + } + /*tex We fall back on the |all| classes. */ + s = tex_to_math_spacing_parameter(ltype, math_all_class); + if (tex_has_math_parameter(style, s)) { + goto FOUND; + } + s = tex_to_math_spacing_parameter(math_all_class, rtype); + if (tex_has_math_parameter(style, s)) { + goto FOUND; + } + s = tex_to_math_spacing_parameter(lparent, math_all_class); + if (tex_has_math_parameter(style, s)) { + goto FOUND; + } + s = tex_to_math_spacing_parameter(math_all_class, rparent); + if (tex_has_math_parameter(style, s)) { + goto FOUND; + } + /*tex Now we're lost. */ + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: inter atom fallback, left %n, right %n, left parent %n, right parent %n, not resolved]", ltype, rtype, lparent, rparent); + tex_end_diagnostic(); + } + goto NONE; + FOUND: + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: inter atom fallback, left %n, right %n, left parent %n, right parent %n, resolved %i]", ltype, rtype, lparent, rparent, s); + tex_end_diagnostic(); + } + } + } else { + /* tex_confusion("math atom spacing"); */ + goto NONE; + } + } + NONE: + if (math_spacing_mode_par && c >= 0) { + if (math_spacing_mode_par == 1 && (ltype == math_begin_class || rtype == math_end_class)) { + return null; + } else { + return tex_aux_math_dimen(0, inter_math_skip_glue, c); + } + } else { + return null; + } +} + +inline static int tex_aux_fallback_math_ruling_class(halfword style, halfword class) +{ + unsigned parent = (unsigned) count_parameter(first_math_atom_code + class); + switch (style) { + case display_style: case cramped_display_style: return (parent >> 24) & 0xFF; + case text_style: case cramped_text_style: return (parent >> 16) & 0xFF; + case script_style: case cramped_script_style: return (parent >> 8) & 0xFF; + case script_script_style: case cramped_script_script_style: return (parent >> 0) & 0xFF; + default: return 0; + } +} + +static halfword tex_aux_math_ruling(halfword ltype, halfword rtype, halfword style) +{ + halfword c = tex_to_math_rules_parameter(ltype, rtype); + halfword s = c; + for (int i = 1; i <= 2; i++) { + if (s >= 0) { + halfword x = tex_get_math_parameter(style, s, NULL); + if (x != MATHPARAMDEFAULT) { + return x; + } else { + halfword lparent = tex_aux_fallback_math_ruling_class(style, ltype); + halfword rparent = tex_aux_fallback_math_ruling_class(style, rtype); + if (lparent != ltype || rparent != rtype) { + s = tex_to_math_rules_parameter(lparent, rparent); + } else { + return MATHPARAMDEFAULT; + } + } + } else { + return MATHPARAMDEFAULT; + } + } + return MATHPARAMDEFAULT; +} + +halfword tex_math_spacing_glue(halfword ltype, halfword rtype, halfword style) +{ + halfword mu = tex_get_math_quad_size_scaled(lmt_math_state.size); + halfword sg = tex_aux_math_spacing_glue(ltype, rtype, style, mu); + if (node_type(sg) == glue_node) { + tex_add_glue_option(sg, glue_option_no_auto_break); + } + return sg; +} + +/*tex + + This is a bit complex function and it can beter be merged into the caller and be more specific + there. The delta parameter can have a value already. When it keeps it value the caller can add + is as italic correction. However, when we have no scripts we do it here. + + Also, in some cases a new glyph is made while we alredy have one. The fetch routine also sets + |lmt_math_state.opentype| so we can use it here. The complexity of the muxed machinery makes + this complexity test also complex. + +*/ + +static halfword tex_aux_check_nucleus_complexity(halfword target, scaled *italic, halfword style, halfword size, kernset *kerns) +{ + halfword nucleus = noad_nucleus(target); + if (nucleus) { + if (italic) { + *italic = 0; + } + switch (node_type(nucleus)) { + case math_char_node: + case math_text_char_node: + { + halfword chr = null; + halfword fnt = null; + if (tex_aux_fetch(nucleus, "(text) char", &fnt, &chr)) { + /*tex We make a math glyph from an ordinary one. */ + quarterword subtype = 0; + switch (node_subtype(nucleus)) { + case ordinary_noad_subtype: subtype = glyph_math_ordinary_subtype; break; + case operator_noad_subtype: subtype = glyph_math_operator_subtype; break; + case binary_noad_subtype: subtype = glyph_math_binary_subtype; break; + case relation_noad_subtype: subtype = glyph_math_relation_subtype; break; + case open_noad_subtype: subtype = glyph_math_open_subtype; break; + case close_noad_subtype: subtype = glyph_math_close_subtype; break; + case punctuation_noad_subtype: subtype = glyph_math_punctuation_subtype; break; + case variable_noad_subtype: subtype = glyph_math_variable_subtype; break; + case active_noad_subtype: subtype = glyph_math_active_subtype; break; + case inner_noad_subtype: subtype = glyph_math_inner_subtype; break; + case over_noad_subtype: subtype = glyph_math_over_subtype; break; + case under_noad_subtype: subtype = glyph_math_under_subtype; break; + case fraction_noad_subtype: subtype = glyph_math_fraction_subtype; break; + case radical_noad_subtype: subtype = glyph_math_radical_subtype; break; + case middle_noad_subtype: subtype = glyph_math_middle_subtype; break; + case accent_noad_subtype: subtype = glyph_math_accent_subtype; break; + case fenced_noad_subtype: subtype = glyph_math_fenced_subtype; break; + case ghost_noad_subtype: subtype = glyph_math_ghost_subtype; break; + default: + if (node_subtype(nucleus) < math_begin_class) { + /*tex + So at least we can recongize them and have some slack for + new ones below this boundary. Nicer would be to be in range + but then we have to ditch the normal glyph subtypes. Maybe + we should move all classes above this edge. + */ + subtype = glyph_math_extra_subtype + node_subtype(nucleus); + } + break; + + } + halfword glyph = tex_aux_new_math_glyph(fnt, chr, subtype); + tex_attach_attribute_list_copy(glyph, nucleus); + if (node_type(nucleus) == math_char_node) { + glyph_properties(glyph) = kernel_math_properties(nucleus); + glyph_group(glyph) = kernel_math_group(nucleus); + glyph_index(glyph) = kernel_math_index(nucleus); + if (math_kernel_node_has_option(nucleus, math_kernel_auto_discretionary)) { + tex_add_glyph_option(glyph, glyph_option_math_discretionary); + } + if (math_kernel_node_has_option(nucleus, math_kernel_full_discretionary)) { + tex_add_glyph_option(glyph, glyph_option_math_italics_too); + } + } + /*tex + Do we have a correction at all? In opentype fonts we normally set the + delta to zero. + */ + if (math_kernel_node_has_option(nucleus, math_kernel_no_italic_correction)) { + /*tex + This node is flagged not to have italic correction. + */ + } else if (tex_aux_math_followed_by_italic_kern(target, "complexity")) { + /*tex + For some reason there is (already) an explicit italic correction so we + don't add more here. I need a use case. + */ + } else if (tex_aux_math_engine_control(fnt, math_control_apply_text_italic_kern)) { + /*tex + This is a bit messy and needs a more fundamental cleanup giving the + kind of control that we want. + */ + if (italic) { + *italic = tex_aux_math_x_size_scaled(fnt, tex_char_italic_from_font(fnt, chr), size); + if (*italic) { + if (node_type(nucleus) == math_text_char_node) { + if (tex_aux_math_engine_control(fnt, math_control_check_text_italic_kern)) { + /*tex + We add no italic correction in mid-word of (opentype) + text font. This is kind of fragile so it might go away + or become an option. + */ + if (chr == letter_cmd) { + *italic = 0; + } + } + if (tex_aux_math_engine_control(fnt, math_control_check_space_italic_kern)) { + /*tex + We're now in the traditional branch. it is a bit weird + test based on space being present in an old school math + font. For now we keep this. + */ + if (tex_get_font_space(fnt)) { + /*tex + We add no italic correction in mid-word (traditional) + text font. In the case of a math font, the correction + became part of the width. + */ + *italic = 0; + } + } + } + if (*italic && ! noad_has_following_scripts(target)) { + /*tex + Here we add a correction but then also have to make sure that it + doesn't happen later on so we zero |delta| afterwards. The call + handles the one script only case (maybe delegate the next too). + */ + tex_aux_math_insert_italic_kern(glyph, *italic, nucleus, "check"); + *italic = 0; + } + } + } + } + return glyph; + } else { + return tex_new_node(hlist_node, unknown_list); + } + } + case sub_box_node: + return kernel_math_list(nucleus); + case sub_mlist_node: + { + halfword list = kernel_math_list(nucleus); + halfword package = null; + halfword fenced = node_type(target) == simple_noad && node_subtype(target) == fenced_noad_subtype; + int unpack = tex_math_has_class_option(node_subtype(target), unpack_class_option) || has_noad_option_unpacklist(target); + // todo: check has_noad_option_unpacklist vs hpack later + // halfword result = tex_mlist_to_hlist(list, fenced || has_noad_option_unpacklist(q), style, unset_noad_class, unset_noad_class); /*tex Here we're nesting. */ + halfword result = tex_mlist_to_hlist(list, unpack, style, unset_noad_class, unset_noad_class, kerns); /*tex Here we're nesting. */ + tex_aux_set_current_math_size(style); + package = tex_hpack(result, 0, packing_additional, direction_unknown, holding_none_option); + if (fenced) { + node_subtype(package) = math_fence_list; + // } else if (has_noad_option_unpacklist(q)) { + } else if (unpack) { + node_subtype(package) = math_list_list; + } else if (noad_class_main(target) == unset_noad_class) { + node_subtype(package) = math_pack_list; + } else { + node_subtype(package) = 0x100 + noad_class_main(target); + } + tex_attach_attribute_list_copy(package, nucleus); + return package; + } + case hlist_node: + /* really */ + break; + default: + tex_confusion("check nucleus complexity"); + } + } else { + tex_normal_warning("math", "recovering from missing nucleus, best check it out"); + noad_nucleus(target) = tex_aux_fake_nucleus(ghost_noad_subtype); + } + return tex_new_node(hlist_node, unknown_list); +} + +/*tex + The main reason for keeping the node is that original \TEX\ has no prev links but we do have + these in \LUATEX. But it is anyway okay to keep this a signal. +*/ + +static halfword tex_aux_make_choice(halfword current, halfword style) +{ + halfword prv = node_prev(current); + halfword nxt = node_next(current); + halfword signal = tex_new_node(style_node, former_choice_math_style); + /*tex We replace choice by signal encoded in a style noad, it is no longer a cast! */ + tex_try_couple_nodes(prv, signal); + tex_try_couple_nodes(signal, nxt); + switch (node_subtype(current)) { + case normal_choice_subtype: + { + halfword choice = null; + switch (style) { + case display_style: + case cramped_display_style: + choice = choice_display_mlist(current); + choice_display_mlist(current) = null; + break; + case text_style: + case cramped_text_style: + choice = choice_text_mlist(current); + choice_text_mlist(current) = null; + break; + case script_style: + case cramped_script_style: + choice = choice_script_mlist(current); + choice_script_mlist(current) = null; + break; + case script_script_style: + case cramped_script_script_style: + choice = choice_script_script_mlist(current); + choice_script_script_mlist(current) = null; + break; + } + /*tex We inject the choice list after the signal. */ + if (choice) { + tex_couple_nodes(signal, choice); + tex_try_couple_nodes(tex_tail_of_node_list(choice), nxt); + } + } + break; + case discretionary_choice_subtype: + { + halfword disc = tex_new_disc_node(normal_discretionary_code); + halfword pre = choice_pre_break(current); + halfword post = choice_post_break(current); + halfword replace = choice_no_break(current); + choice_pre_break(current) = null; + choice_post_break(current) = null; + choice_no_break(current) = null; + if (pre) { + pre = tex_mlist_to_hlist(pre, 0, style, unset_noad_class, unset_noad_class, NULL); + tex_set_disc_field(disc, pre_break_code, pre); + } + if (post) { + post = tex_mlist_to_hlist(post, 0, style, unset_noad_class, unset_noad_class, NULL); + tex_set_disc_field(disc, post_break_code, post); + } + if (replace) { + replace = tex_mlist_to_hlist(replace, 0, style, unset_noad_class, unset_noad_class, NULL); + tex_set_disc_field(disc, no_break_code, replace); + } + disc_class(disc) = choice_class(current); + disc = tex_math_make_disc(disc); + tex_couple_nodes(signal, disc); + tex_try_couple_nodes(disc, nxt); + } + break; + } + /*tex We flush the old choice node */ + tex_flush_node(current); + return signal; +} + +/*tex + This is just a \quote {fixer}. Todo: prepend the top and/or bottom to the super/subscript, + but we also need to hpack then. Problem: how to determine the slack here? However, slack + is less important because we normally have binding right text here. +*/ + +static int tex_aux_make_fenced(halfword current, halfword current_style, halfword size, noad_classes *fenceclasses) +{ + halfword nucleus = noad_nucleus(current); + (void) current_style; + (void) size; + if (nucleus) { + halfword list = kernel_math_list(nucleus); + if (list && node_type(list) == fence_noad && node_subtype(list) == left_operator_side) { + fenceclasses->main = noad_class_main(list); + fenceclasses->left = noad_class_left(list); + fenceclasses->right = noad_class_right(list); + if (noad_supscr(current) && ! kernel_math_list(fence_delimiter_top(list))) { + halfword n = tex_new_node(simple_noad, ordinary_noad_subtype); + node_subtype(n) = math_char_node; + noad_nucleus(n) = noad_supscr(current); + kernel_math_list(fence_delimiter_top(list)) = n; + noad_supscr(current) = null; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_str("[math: promoting supscript to top delimiter]"); + tex_end_diagnostic(); + } + } + if (noad_subscr(current) && ! kernel_math_list(fence_delimiter_bottom(list))) { + halfword n = tex_new_node(simple_noad, ordinary_noad_subtype); + node_subtype(n) = math_char_node; + noad_nucleus(n) = noad_subscr(current); + kernel_math_list(fence_delimiter_bottom(list)) = n; + noad_subscr(current) = null; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_str("[math: promoting subscript to bottom delimiter]"); + tex_end_diagnostic(); + } + } + /*tex + Now we remove the dummy right one. If something is in between we assume it's on + purpose. + */ + { + halfword nxt = node_next(list); + if (nxt && node_type(nxt) == fence_noad && node_subtype(nxt) == right_fence_side) { + /* todo : check for delimiter . or 0 */ + node_next(list) = null; + tex_flush_node_list(nxt); + } + } + return 1; /* we had a growing one */ + } + } + return 0; +} + +static void tex_aux_finish_fenced(halfword current, halfword main_style, scaled max_depth, scaled max_height, kernset *kerns) +{ + delimiterextremes extremes = { .tfont = null_font, .tchar = 0, .bfont = null_font, .bchar = 0, .height = 0, .depth = 0 }; + noad_analyzed(current) = (singleword) tex_aux_make_left_right(current, main_style, max_depth, max_height, lmt_math_state.size, &extremes); + if (kerns && extremes.tfont) { + switch (node_subtype(current)) { + case left_fence_side: + case extended_left_fence_side: + if (tex_math_has_class_option(fenced_noad_subtype, carry_over_left_top_kern_class_option)) { + kerns->topleft = tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar); + } + if (tex_math_has_class_option(fenced_noad_subtype, carry_over_left_bottom_kern_class_option)) { + kerns->bottomleft = tex_char_bottom_left_kern_from_font(extremes.bfont, extremes.bchar); + } + if (tex_math_has_class_option(fenced_noad_subtype, prefer_delimiter_dimensions_class_option)) { + kerns->height = extremes.height; + kerns->depth = extremes.depth; + } + break; + case right_fence_side: + case extended_right_fence_side: + case left_operator_side: + case no_fence_side: + if (tex_math_has_class_option(fenced_noad_subtype, carry_over_right_top_kern_class_option)) { + kerns->topright = tex_char_top_right_kern_from_font(extremes.tfont, extremes.tchar); + } + if (tex_math_has_class_option(fenced_noad_subtype, carry_over_right_bottom_kern_class_option)) { + kerns->bottomright = tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar); + } + if (tex_math_has_class_option(fenced_noad_subtype, prefer_delimiter_dimensions_class_option)) { + kerns->height = extremes.height; + kerns->depth = extremes.depth; + } + break; + } + } +} + +/*tex + + Here is the overall plan of |mlist_to_hlist|, and the list of its local variables. In + \LUAMETATEX\ we could actually use the fact that we have a double linked list. Because we have + a more generic class and penalty handling the two stages are clearly separated, also variable + wise. + +*/ + +static halfword tex_aux_unroll_noad(halfword tail, halfword l, quarterword s) +{ + while (l) { + halfword n = node_next(l); + node_next(l) = null; + if (node_type(l) == hlist_node && (s < 0 || node_subtype(l) == s) && ! box_source_anchor(l)) { + if (box_list(l)) { + tex_couple_nodes(tail, box_list(l)); + tail = tex_tail_of_node_list(tail); + box_list(l) = null; + } + tex_flush_node(l); + } else { + tex_couple_nodes(tail, l); + tail = l; + } + l = n; + } + return tail; +} + +static halfword tex_aux_unroll_list(halfword tail, halfword l) +{ + while (l) { + halfword n = node_next(l); + node_next(l) = null; + if (node_type(l) == hlist_node && ! box_source_anchor(l)) { + if (box_list(l)) { + switch (node_subtype(l)) { + case hbox_list: + case container_list: + case math_list_list: /* in case of a ghost (we could remap subtype instead) */ + tex_couple_nodes(tail, box_list(l)); + tail = tex_tail_of_node_list(tail); + box_list(l) = null; + break; + default: + tex_couple_nodes(tail, l); + tail = l; + break; + } + } + tex_flush_node(l); + } else { + tex_couple_nodes(tail, l); + tail = l; + } + l = n; + } + return tail; +} + +inline static void tex_aux_wipe_noad(halfword n) +{ + if (tex_nodetype_has_attributes(node_type(n))) { + remove_attribute_list(n); + } + tex_reset_node_properties(n); + tex_free_node(n, get_node_size(node_type(n))); +} + +static halfword tex_aux_append_ghost(halfword ghost, halfword p) +{ + halfword l = noad_new_hlist(ghost); + if (l) { + if (has_noad_option_unpacklist(ghost)) { + /* always anyway */ + p = tex_aux_unroll_noad(p, l, math_list_list); + } else if (has_noad_option_unrolllist(ghost)) { + p = tex_aux_unroll_list(p, l); + } else { + if (node_type(l) == hlist_node && ! node_next(l)) { + node_subtype(l) = math_ghost_list; + } + tex_couple_nodes(p, l); + p = tex_tail_of_node_list(p); + } + noad_new_hlist(ghost) = null; + } + tex_aux_wipe_noad(ghost); + return p; +} + +static halfword tex_aux_get_plus_glyph(halfword current) +{ + if (node_type(current) == simple_noad) { + halfword list = noad_new_hlist(current); + if (list && node_type(list) == hlist_node) { + list = box_list(list); + } + if (list && node_type(list) == glue_node) { + list = node_next(list); + } + if (list && node_type(list) == glyph_node && ! node_next(list)) { + return list; + } + } + return null; +} + +static void tex_aux_show_math_list(const char *fmt, halfword list) +{ + tex_begin_diagnostic(); + tex_print_format(fmt, lmt_math_state.level); + tex_show_node_list(list, tracing_math_par >= 3 ? max_integer : show_box_depth_par, tracing_math_par >= 3 ? max_integer : show_box_breadth_par); + tex_print_ln(); + tex_end_diagnostic(); +} + +static void tex_aux_wrapup_nucleus_and_add_scripts(halfword current, halfword nxt, int current_style, halfword *italic, kernset *kerns) +{ + halfword p; + p = tex_aux_check_nucleus_complexity(current, italic, current_style, lmt_math_state.size, kerns); + if (p && noad_source(current)) { + switch (node_type(p)) { + case hlist_node: + case vlist_node: + if (! box_source_anchor(p)) { + box_source_anchor(p) = noad_source(current); + tex_set_box_geometry(p, anchor_geometry); + } + break; + default: + /*tex Todo: maybe pack and assign! */ + break; + } + } + if (noad_has_scripts(current)) { + scaled drop = 0; + if (node_type(current) == accent_noad && noad_has_superscripts(current)) { + drop = tex_get_math_y_parameter_default(current_style, math_parameter_accent_superscript_drop, 0); + drop += scaledround(kerns->toptotal * tex_get_math_parameter_default(current_style, math_parameter_accent_superscript_percent, 0) / 100.0); + } + tex_aux_make_scripts(current, p, *italic, current_style, 0, 0, drop, kerns); + } else { + /*tex + Adding italic correction here is kind of fuzzy because some characters already have + that built in. However, we also add it in the scripts so if it's optional here it + also should be there. The compexity tester can have added it in which case delta + is zero. + */ + if (nxt && *italic) { + if (node_type(nxt) == simple_noad && tex_math_has_class_option(node_subtype(nxt), no_italic_correction_class_option)) { + *italic = 0; + } + if (*italic) { + /* If we want it as option we need the fontor store it in the noad. */ + tex_aux_math_insert_italic_kern(p, *italic, current, "final"); + } + } + tex_aux_assign_new_hlist(current, p); + } +} + +/*tex + + This function is called recursively, for instance for wrapped content in fence, accent, fraction + and radical noads. Especially the fences introduce some messy code but I might clean that up + stepwise. We don't want to get away too much from the original. + + Because we have more than two passes, and the function became way larger, it has been split up + in smaller functions. + +*/ + +typedef struct mliststate { + halfword mlist; + int penalties; + int main_style; + int beginclass; + int endclass; + kernset *kerns; + halfword scale; + scaled max_height; + scaled max_depth; +} mliststate; + +static void tex_mlist_to_hlist_set_boundaries(mliststate *state) +{ + halfword b = tex_aux_fake_nucleus((quarterword) state->beginclass); + halfword e = tex_aux_fake_nucleus((quarterword) state->endclass); + if (state->mlist) { + tex_couple_nodes(b, state->mlist); + } + state->mlist = b; + tex_couple_nodes(tex_tail_of_node_list(state->mlist), e); + state->beginclass = unset_noad_class; + state->endclass = unset_noad_class; +} + +static void tex_mlist_to_hlist_preroll_radicals(mliststate *state) +{ + halfword current = state->mlist; + halfword current_style = state->main_style; + halfword height = 0; + halfword depth = 0; + tex_aux_set_current_math_size(current_style); + tex_aux_set_current_math_scale(state->scale); + if (tracing_math_par >= 2) { + tex_aux_show_math_list("[math: radical sizing pass, level %i]", state->mlist); + } + while (current) { + switch (node_type(current)) { + case radical_noad: + { + halfword body = null; + tex_aux_preroll_radical(current, current_style, lmt_math_state.size); + body = noad_new_hlist(current); + if (box_height(body) > height) { + height = box_height(body); + } + if (box_depth(body) > depth) { + depth = box_depth(body); + } + } + break; + case style_node: + tex_aux_make_style(current, ¤t_style, NULL); + break; + case parameter_node: + tex_def_math_parameter(node_subtype(current), parameter_name(current), parameter_value(current), cur_level + lmt_math_state.level, indirect_math_regular); + break; + } + current = node_next(current); + } + /*tex + A positive value is assigned, a negative value subtracted and a value of maxdimen will use + the maximum found dimensions. Todo: use an option to control this instead. + */ + current = state->mlist; + while (current) { + if (node_type(current) == radical_noad) { + switch (node_subtype(current)) { + case normal_radical_subtype: + case radical_radical_subtype: + case root_radical_subtype: + case rooted_radical_subtype: + { + halfword body = noad_new_hlist(current); + if (radical_height(current) == max_dimen) { + box_height(body) = height; + } else if (radical_height(current) < 0) { + box_height(body) += radical_height(current); + if (box_height(body) < 0) { + box_height(body) += 0; + } + } else if (radical_height(current)) { + box_height(body) = radical_height(current); + } + if (radical_depth(current) == max_dimen) { + box_depth(body) = depth; + } else if (radical_depth(current) < 0) { + box_depth(body) += radical_depth(current); + if (box_depth(body) < 0) { + box_depth(body) += 0; + } + } else if (radical_depth(current)) { + box_depth(body) = radical_depth(current); + } + } + break; + } + } + current = node_next(current); + } +} + +static void tex_mlist_to_hlist_preroll_dimensions(mliststate *state) +{ + halfword current = state->mlist; + scaled current_mu = 0; + halfword current_style = state->main_style; + int blockrulebased = 0; + /*tex We set the math unit width corresponding to |size|: */ + tex_aux_set_current_math_size(current_style); + tex_aux_set_current_math_scale(state->scale); + current_mu = tex_get_math_quad_size_scaled(lmt_math_state.size); + if (tracing_math_par >= 2) { + tex_aux_show_math_list("[math: first pass, level %i]", state->mlist); + } + while (current) { + /*tex The italic correction offset for subscript and superscript: */ + scaled italic = 0; + halfword nxt = node_next(current); + noad_classes fenceclasses = { unset_noad_class, unset_noad_class, unset_noad_class }; + kernset localkerns; + tex_math_wipe_kerns(&localkerns); + /*tex + At some point we had nicely cleaned up switch driven code here but we ended up with a + more generic approach. The reference is still in the pre-2022 zips and git repository. + + The fact that we have configurable atom spacing (with inheritance) means that we can + now have a rather simple switch without any remapping and RESWITCH magic. + */ + if (blockrulebased > 0) { + blockrulebased -= 1; + } + switch (node_type(current)) { + case simple_noad: + /*tex + Because we have added features we no longer combine the case in clever ways to + minimize code. Let the compiler do that for us. We could be generic and treat + all the same but for now we just emulate some of traditional \TEX's selectivity. + */ + if (blockrulebased > 0) { + noad_options(current) |= noad_option_no_ruling; + blockrulebased = 0; + } + switch (node_subtype(current)) { + case under_noad_subtype: + tex_aux_make_under(current, current_style, lmt_math_state.size, math_rules_fam_par); + break; + case over_noad_subtype: + tex_aux_make_over(current, current_style, lmt_math_state.size, math_rules_fam_par); + break; + case vcenter_noad_subtype: + tex_aux_make_vcenter(current, current_style, lmt_math_state.size); + break; + case fenced_noad_subtype: + if (tex_aux_make_fenced(current, current_style, lmt_math_state.size, &fenceclasses)) { + /*tex We have a left operator so we fall through! */ + } else { + break; + } + case operator_noad_subtype: + /* compatibility */ + if (! (has_noad_option_limits(current) || has_noad_option_nolimits(current))) { + /* otherwise we don't enter the placement function */ + noad_options(current) |= (current_style == display_style || current_style == cramped_display_style) ? noad_option_limits : noad_option_no_limits; + } + goto PROCESS; + default: + /* setting both forces check */ + if ((has_noad_option_limits(current) && has_noad_option_nolimits(current))) { + if (current_style == display_style || current_style == cramped_display_style) { + noad_options(current) = unset_option(noad_options(current), noad_option_no_limits); + noad_options(current) |= noad_option_limits; + } else { + noad_options(current) = unset_option(noad_options(current), noad_option_limits); + noad_options(current) |= noad_option_no_limits; + } + } + PROCESS: + if ( // node_subtype(q) == operator_noad_subtype + // || + has_noad_option_limits(current) || has_noad_option_nolimits(current) + || has_noad_option_openupheight(current) || has_noad_option_openupdepth(current) + || has_noad_option_adapttoleft(current) || has_noad_option_adapttoright(current) + ) { + if (node_subtype(current) == fenced_noad_subtype && ! noad_has_scripts(current)) { + /*tex + This is a special case: the right counterpart of the left operator + can trigger a boxing of all that comes before so we need to enforce + nolimits. Mikael Sundqvist will reveal all this in the CMS manual. + */ + italic = tex_aux_make_op(current, current_style, lmt_math_state.size, 0, limits_horizontal_mode, NULL); + } else { + italic = tex_aux_make_op(current, current_style, lmt_math_state.size, 0, limits_unknown_mode, NULL); + } + /* tex_math_has_class_option(node_subtype(current),keep_correction_class_code) */ + if (node_subtype(current) != operator_noad_subtype) { + italic = 0; + } + if (fenceclasses.main != unset_noad_class) { + noad_class_main(current) = fenceclasses.main; + } + if (fenceclasses.left != unset_noad_class) { + noad_class_left(current) = fenceclasses.left; + } + if (fenceclasses.right != unset_noad_class) { + noad_class_right(current) = fenceclasses.right; + } + if (has_noad_option_limits(current) || has_noad_option_nolimits(current)) { + goto CHECK_DIMENSIONS; + } + } else { + // tex_aux_make_ord(current, lmt_math_state.size); + tex_aux_check_ord(current, lmt_math_state.size, null); + } + break; + } + break; + case fence_noad: + { + /* why still ... */ + current_style = state->main_style; + tex_aux_set_current_math_size(current_style); + current_mu = tex_get_math_quad_size_scaled(lmt_math_state.size); + /* ... till here */ + goto DONE_WITH_NODE; + } + case fraction_noad: + tex_aux_make_fraction(current, current_style, lmt_math_state.size, state->kerns); + goto CHECK_DIMENSIONS; + case radical_noad: + tex_aux_make_radical(current, current_style, lmt_math_state.size, &localkerns); + break; + case accent_noad: + tex_aux_make_accent(current, current_style, lmt_math_state.size, &localkerns); + break; + case style_node: + tex_aux_make_style(current, ¤t_style, ¤t_mu); + goto DONE_WITH_NODE; + case choice_node: + current = tex_aux_make_choice(current, current_style); + goto DONE_WITH_NODE; + case parameter_node: + /* maybe not needed as we do a first pass */ + tex_def_math_parameter(node_subtype(current), parameter_name(current), parameter_value(current), cur_level + lmt_math_state.level, indirect_math_regular); + goto DONE_WITH_NODE; + case insert_node: + case mark_node: + case adjust_node: + case boundary_node: + case whatsit_node: + case penalty_node: + case disc_node: + case par_node: /* for local boxes */ + goto DONE_WITH_NODE; + case rule_node: + tex_aux_check_math_strut_rule(current, current_style); + if (rule_height(current) > state->max_height) { + state->max_height = rule_height(current); + } + if (rule_depth(current) > state->max_depth) { + state->max_depth = rule_depth(current); + } + goto DONE_WITH_NODE; + case glue_node: + if (node_subtype(current) == rulebased_math_glue) { + blockrulebased = 2; + } + tex_aux_make_glue(current, current_mu, current_style); + goto DONE_WITH_NODE; + case kern_node: + tex_aux_make_kern(current, current_mu, current_style); + goto DONE_WITH_NODE; + default: + tex_confusion("mlist to hlist, case 1"); + } + /*tex + When we get to the following part of the program, we have \quote {fallen through} from + cases that did not lead to |check_dimensions| or |done_with_noad| or |done_with_node|. + Thus, |q|~points to a noad whose nucleus may need to be converted to an hlist, and + whose subscripts and superscripts need to be appended if they are present. + + If |nucleus(q)| is not a |math_char|, the variable |italic| is the amount by which a + superscript should be moved right with respect to a subscript when both are present. + */ + tex_aux_wrapup_nucleus_and_add_scripts(current, nxt, current_style, &italic, &localkerns); + // { + // kernset kerns; + // halfword p; + // tex_math_copy_kerns(&kerns, &localkerns); + // p = tex_aux_check_nucleus_complexity(current, &italic, current_style, lmt_math_state.size, &kerns); + // if (p && noad_source(current)) { + // switch (node_type(p)) { + // case hlist_node: + // case vlist_node: + // if (! box_source_anchor(p)) { + // box_source_anchor(p) = noad_source(current); + // tex_set_box_geometry(p, anchor_geometry); + // } + // break; + // default: + // /*tex Todo: maybe pack and assign! */ + // break; + // } + // } + // if (noad_has_scripts(current)) { + // scaled drop = 0; + // if (node_type(current) == accent_noad && noad_has_superscripts(current)) { + // drop = tex_get_math_y_parameter_default(current_style, math_parameter_accent_superscript_drop, 0); + // drop += scaledround(localkerns.toptotal * tex_get_math_parameter_default(current_style, math_parameter_accent_superscript_percent, 0) / 100.0); + // } + // tex_aux_make_scripts(current, p, italic, current_style, 0, 0, drop, &kerns); + // } else { + // /*tex + // Adding italic correction here is kind of fuzzy because some characters already have + // that built in. However, we also add it in the scripts so if it's optional here it + // also should be there. The compexity tester can have added it in which case delta + // is zero. + // */ + // if (nxt && italic) { + // if (node_type(nxt) == simple_noad && tex_math_has_class_option(node_subtype(nxt), no_italic_correction_class_option)) { + // italic = 0; + // } + // if (italic) { + // /* If we want it as option we need the fontor store it in the noad. */ + // tex_aux_math_insert_italic_kern(p, italic, current, "final"); + // } + // } + // tex_aux_assign_new_hlist(current, p); + // } + // } + CHECK_DIMENSIONS: + { + scaledwhd siz = tex_natural_hsizes(noad_new_hlist(current), null, normal_glue_multiplier, normal_glue_sign, normal_glue_sign); + if (siz.ht > state->max_height) { + state->max_height = siz.ht; + } + if (siz.dp > state->max_depth) { + state->max_depth = siz.dp; + } + } + DONE_WITH_NODE: + if ((node_type(current) == simple_noad) && noad_new_hlist(current)) { + if (has_noad_option_phantom(current) || has_noad_option_void(current)) { + noad_new_hlist(current) = tex_aux_make_list_phantom(noad_new_hlist(current), has_noad_option_void(current), get_attribute_list(current)); + } + } + current = node_next(current); + } +} + +static void tex_mlist_to_hlist_size_fences(mliststate *state) +{ + halfword current = state->mlist; + halfword current_style = state->main_style; + tex_aux_set_current_math_size(current_style); + tex_aux_set_current_math_scale(state->scale); + if (tracing_math_par >= 2) { + tex_aux_show_math_list("[math: fence sizing pass, level %i]", state->mlist); + } + while (current) { + switch (node_type(current)) { + case fence_noad: + tex_aux_finish_fenced(current, current_style, state->max_depth, state->max_height, state->kerns); + break; + case style_node: + tex_aux_make_style(current, ¤t_style, NULL); + break; + case parameter_node: + /* tricky as this is sort of persistent, we need to reset it at the start */ + tex_def_math_parameter(node_subtype(current), parameter_name(current), parameter_value(current), cur_level + lmt_math_state.level, indirect_math_regular); + break; + } + current = node_next(current); + } +} + +static void tex_mlist_to_hlist_finalize_list(mliststate *state) +{ + halfword recent = null; /*tex Watch out: can be wiped, so more a signal! */ + int recent_type = 0; + int recent_subtype = ordinary_noad_subtype; + halfword current_style = state->main_style; + halfword fenced = null; + halfword recent_left_slack = 0; + halfword recent_right_slack = 0; + halfword recent_class_overload = unset_noad_class; + halfword recent_script_state = 0; + halfword recent_plus_glyph = null; + scaled current_mu = 0; + halfword current = state->mlist; + halfword p = temp_head; + halfword ghost = null; + node_next(p) = null; + tex_aux_set_current_math_size(current_style); + tex_aux_set_current_math_scale(state->scale); + current_mu = tex_get_math_quad_size_scaled(lmt_math_state.size); + if (math_penalties_mode_par) { + state->penalties = 1; /* move to caller ? */ + } + if (tracing_math_par >= 2) { + tex_aux_show_math_list("[math: second pass, level %i]", state->mlist); + } + RESTART: + while (current) { + /*tex + If node |q| is a style node, change the style and |goto delete_q|; otherwise if it is + not a noad, put it into the hlist, advance |q|, and |goto done|; otherwise set |s| to + the size of noad |q|, set |t| to the associated type (|ord_noad.. inner_noad|), and set + |pen| to the associated penalty. + + Just before doing the big |case| switch in the second pass, the program sets up default + values so that most of the branches are short. + + We need to remain somewhat compatible so we still handle some open and close fence + setting (marked as safeguard) here but as we (1) take the class from the delimiter, + when set, or (2) derive it from the fence subtype, we don't really need it. In some + cases, like with bars that serve a dual purpose, it will always be a mess. + + */ + /*tex the effective |type| of noad |q| during the second pass */ + halfword current_type = simple_noad; + /*tex the effective |subtype| of noad |q| during the second pass */ + halfword current_subtype = ordinary_noad_subtype; + /*tex penalties to be inserted */ + halfword post_penalty = infinite_penalty; + halfword pre_penalty = infinite_penalty; + /*tex experiment */ + halfword current_left_slack = 0; + halfword current_right_slack = 0; + halfword current_script_state = 0; + halfword current_plus_glyph = 0; + halfword old_recent = 0; + halfword old_current = 0; + HERE: + switch (node_type(current)) { + case simple_noad: + if (node_subtype(current) == ghost_noad_subtype) { + /* for now, what to do with edges */ + halfword nxt = node_next(current); + if (ghost) { + // check for noad_new_hlist(ghost) + halfword p = tex_tail_of_node_list(noad_new_hlist(ghost)); + noad_class_right(ghost) = noad_class_right(current); + p = tex_aux_append_ghost(current, p); + noad_new_hlist(ghost) = tex_head_of_node_list(p); + } else { + ghost = current; + } + current = nxt; + if (current) { + goto HERE; + } else { + goto RESTART; + } + } else { + current_subtype = node_subtype(current); + current_left_slack = noad_left_slack(current); + current_right_slack = noad_right_slack(current); + current_script_state = noad_script_state(current); + switch (current_subtype) { + case fenced_noad_subtype: + { + // halfword list = noad_new_hlist(current); + // if (list && ! noad_nucleus(current) && ! noad_has_scripts(current)) { // scripts test will go + fenced = current; + if (get_noad_right_class(fenced) != unset_noad_class) { + current_subtype = get_noad_left_class(fenced); + } else if (get_noad_main_class(fenced) != unset_noad_class) { // needs testing by MS + current_subtype = get_noad_main_class(fenced); + } else { + current_subtype = open_noad_subtype; /* safeguard, see comment above */ + } + // } + break; + } + default: + { + halfword list = noad_new_hlist(current); + if (list && tex_is_math_disc(list)) { + current_type = simple_noad; + current_subtype = disc_class(box_list(list)); + } + if (list && noad_source(current)) { + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: packing due to source field %D]", noad_source(current)); + tex_end_diagnostic(); + } + switch (node_type(list)) { + case hlist_node: + case vlist_node: + if (! box_source_anchor(list)) { + box_source_anchor(list) = noad_source(current); + tex_set_box_geometry(list, anchor_geometry); + } + break; + default: + list = tex_hpack(list, 0, packing_additional, direction_unknown, holding_none_option); + tex_attach_attribute_list_copy(list, current); + box_source_anchor(list) = noad_source(current); + tex_set_box_geometry(list, anchor_geometry); + noad_new_hlist(current) = list; + node_subtype(list) = math_pack_list; + break; + } + } + break; + } + } + if (get_noad_left_class(current) != unset_noad_class) { + current_subtype = get_noad_left_class(current); + } else if (get_noad_main_class(current) != unset_noad_class) { + current_subtype = get_noad_main_class(current); + } + } + break; + case radical_noad: + switch (node_subtype(current)) { + case normal_radical_subtype: + case radical_radical_subtype: + case root_radical_subtype: + case rooted_radical_subtype: + case delimited_radical_subtype: + current_type = simple_noad; + current_subtype = radical_noad_subtype; + break; + case under_delimiter_radical_subtype: + case delimiter_under_radical_subtype: + current_type = simple_noad; + current_subtype = under_noad_subtype; + break; + case over_delimiter_radical_subtype: + case delimiter_over_radical_subtype: + current_type = simple_noad; + current_subtype = over_noad_subtype; + break; + case h_extensible_radical_subtype: + current_type = simple_noad; + current_subtype = accent_noad_subtype; + break; + } + break; + case accent_noad: + current_type = simple_noad; /*tex Same kind of fields. */ + current_subtype = accent_noad_subtype; + current_left_slack = noad_left_slack(current); + current_right_slack = noad_right_slack(current); + break; + case fraction_noad: + current_type = simple_noad; /*tex Same kind of fields. */ + current_subtype = fraction_noad_subtype; /* inner_noad_type */ + break; + case fence_noad: + current_type = simple_noad; /*tex Same kind of fields. */ + current_subtype = noad_analyzed(current); + fenced = current; + break; + case style_node: + tex_aux_make_style(current, ¤t_style, ¤t_mu); + recent = current; + current = node_next(current); + tex_aux_wipe_noad(recent); + goto RESTART; + case parameter_node: + tex_def_math_parameter(node_subtype(current), parameter_name(current), parameter_value(current), cur_level + lmt_math_state.level, indirect_math_regular); + recent = current; + current = node_next(current); + tex_aux_wipe_noad(recent); + goto RESTART; + case glue_node: + switch (node_subtype(current)) { + case conditional_math_glue: + case rulebased_math_glue: + { + halfword t = current; + current = node_next(current); + tex_flush_node(t); + goto MOVEON; + } + default: + break; + } + // case glyph_node: + case disc_node: + case hlist_node: + case boundary_node: + case whatsit_node: + case penalty_node: + case rule_node: + case adjust_node: + case insert_node: + case mark_node: + case par_node: + case kern_node: + tex_couple_nodes(p, current); + p = current; + current = node_next(current); + node_next(p) = null; + MOVEON: + if (current) { + /*tex These nodes are invisible! */ + switch (node_type(p)) { + case boundary_node: + case adjust_node: + case insert_node: + case mark_node: + case par_node: + goto HERE; + case rule_node: + if (node_subtype(p) == strut_rule_subtype) { + goto HERE; + } + } + } + continue; + // goto NEXT_NODE; + default: + tex_confusion("mlist to hlist, case 2"); + } + /*tex + Apply some logic. The hard coded pairwise comparison is replaced by a generic one + because we can have more classes. For a while spacing and pairing was under a mode + control but that made no sense. We start with the begin class. + */ + recent_class_overload = get_noad_right_class(current); + if (current_type == simple_noad && state->beginclass == unset_noad_class) { + if (noad_new_hlist(current)) { + tex_flush_node(noad_new_hlist(current)); + noad_new_hlist(current) = null; + } + state->beginclass = current_subtype; + /* */ + recent_type = current_type; + recent_subtype = current_subtype; + recent = current; + current = node_next(current); + goto WIPE; + } + /*tex + This is a special case where a sign starts something marked as (like) numeric, in + which there will be different spacing applied. + */ + if (tex_math_has_class_option(current_subtype, look_ahead_for_end_class_option)) { + halfword endhack = node_next(current); + if (endhack && node_type(endhack) == simple_noad && (node_subtype(endhack) == math_end_class || get_noad_main_class(endhack) == math_end_class)) { + halfword value = tex_aux_math_ruling(current_subtype, math_end_class, current_style); + if (value != MATHPARAMDEFAULT) { + // recent_subtype = (value >> 16) & 0xFF; + // current_subtype = value & 0xFF; + current_subtype = (value >> 16) & 0xFF; + } + + } + } + old_recent = recent_subtype; + old_current = current_subtype; + if (current_subtype != unset_noad_class && recent_subtype != unset_noad_class && current_type == simple_noad) { + if (recent_type == simple_noad && ! has_noad_option_noruling(current)) { + halfword value = tex_aux_math_ruling(recent_subtype, current_subtype, current_style); + if (value != MATHPARAMDEFAULT) { + recent_subtype = (value >> 16) & 0xFF; + current_subtype = value & 0xFF; + } + } + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + if (old_recent != recent_subtype || old_current != current_subtype) { + tex_print_format("[math: atom ruling, recent %n, current %n, new recent %n, new current %n]", old_recent, old_current, recent_subtype, current_subtype); + } else { + tex_print_format("[math: atom ruling, recent %n, current %n]", old_recent, old_current); + } + tex_end_diagnostic(); + } + } + /*tex Now we set the inter-atom penalties: */ + if (ghost && ! has_noad_option_right(ghost)) { + p = tex_aux_append_ghost(ghost, p); + ghost = null; + } + if (current_type == simple_noad) { + pre_penalty = tex_aux_math_penalty(state->main_style, 1, current_subtype); + post_penalty = tex_aux_math_penalty(state->main_style,0, current_subtype); + } + /*tex Dirty trick: */ /* todo: use kerns info */ + current_plus_glyph = tex_aux_get_plus_glyph(current); + /*tex Append inter-element spacing based on |r_type| and |t| */ + if (current_plus_glyph && recent_script_state) { + /*tex This is a very special case and used {x^2 / 3| kind of situations: */ + halfword plus = tex_aux_checked_left_kern(current_plus_glyph, recent_script_state, current_subtype); + if (plus) { + halfword kern = tex_new_kern_node(plus, math_shape_kern_subtype); + tex_attach_attribute_list_copy(kern, current); + tex_couple_nodes(p, kern); + p = kern; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: state driven left shape kern %p]", plus, pt_unit); + tex_end_diagnostic(); + } + } + } + if (recent_type > 0) { + halfword last = node_type(p); /* can be temp */ + halfword glue = tex_aux_math_spacing_glue(recent_subtype, current_subtype, current_style, current_mu); + halfword kern = null; + if (glue) { + tex_attach_attribute_list_copy(glue, current); + } + if (recent_right_slack) { + halfword kern = tex_new_kern_node(-recent_right_slack, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(kern, current); + tex_couple_nodes(p, kern); + p = kern; + if (current_subtype >= 0 && tex_math_has_class_option(current_subtype, no_pre_slack_class_option)) { + /* */ + } else if (! glue) { + glue = tex_aux_math_dimen(recent_right_slack, inter_math_skip_glue, -2); + } else { + glue_amount(glue) += recent_right_slack; + } + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: migrating right slack %p]", recent_right_slack, pt_unit); + tex_end_diagnostic(); + } + recent_right_slack = 0; + } + if (recent_plus_glyph && current_script_state) { + /*tex This is a very special case and used {x^2 / 3| kind of situations: */ + halfword plus = tex_aux_checked_right_kern(recent_plus_glyph, current_script_state, recent_subtype); + if (plus) { + halfword kern = tex_new_kern_node(plus, math_shape_kern_subtype); + tex_attach_attribute_list_copy(kern, current); + tex_couple_nodes(p, kern); + p = kern; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: state driven right shape kern %p]", plus, pt_unit); + tex_end_diagnostic(); + } + } + } + if (current_left_slack) { + kern = tex_new_kern_node(-current_left_slack, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(kern, p); + /* tex_couple_nodes(node_prev(p), kern); */ /* close to the molecule */ + /* tex_couple_nodes(kern, p); */ /* close to the molecule */ + if (recent_subtype >= 0 && tex_math_has_class_option(recent_subtype, no_post_slack_class_option)) { + /* */ + } else if (! glue) { + glue = tex_aux_math_dimen(current_left_slack, inter_math_skip_glue, -1); + } else { + glue_amount(glue) += current_left_slack; + } + current_left_slack = 0; + } + /*tex + Do we still want this check in infinite. + */ + if (state->penalties && pre_penalty < infinite_penalty && node_type(last) != penalty_node) { + /*tex no checking of prev node type */ + halfword penalty = tex_new_penalty_node(pre_penalty, math_pre_penalty_subtype); + tex_attach_attribute_list_copy(penalty, current); + tex_couple_nodes(p, penalty); + p = penalty; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: pre penalty, left %n, right %n, amount %i]", recent_subtype, current_subtype, penalty_amount(penalty)); + tex_end_diagnostic(); + } + } + if (tex_math_has_class_option(current_subtype, remove_italic_correction_class_option)) { + if (node_type(p) == kern_node && node_subtype(p) == italic_kern_subtype) { + halfword prv = node_prev(p); + if (prv) { + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: removing italic correction %D between %i and %i]", kern_amount(p), recent_subtype, current_subtype); + tex_end_diagnostic(); + } + tex_flush_node(p); + p = prv; + } + } + } + if (glue) { + tex_couple_nodes(p, glue); + p = glue; + } + if (kern) { + tex_couple_nodes(p, kern); + p = kern; + } + } + if (ghost) { + p = tex_aux_append_ghost(ghost, p); + ghost = null; + } + { + halfword l = noad_new_hlist(current); + if (! l) { + /* curious */ + } else if (node_type(l) == hlist_node && box_source_anchor(l)) { + tex_couple_nodes(p, l); + } else if (fenced) { + /*tex Watch out: we can have |[prescripts] [fencelist] [postscripts]| */ + if (tex_math_has_class_option(fenced_noad_subtype, unpack_class_option)) { + p = tex_aux_unroll_noad(p, l, math_fence_list); + } else { + tex_couple_nodes(p, l); + } + } else if (has_noad_option_unpacklist(current) || tex_math_has_class_option(current_subtype, unpack_class_option)) { + /*tex So here we only unpack a math list. */ + p = tex_aux_unroll_noad(p, l, math_list_list); + } else if (has_noad_option_unrolllist(current)) { + p = tex_aux_unroll_list(p, l); + } else if (tex_is_math_disc(l)) { + /* hm, temp nodes here */ + tex_couple_nodes(p, box_list(l)); + box_list(l) = null; + tex_flush_node(l); + } else if (current_type == simple_noad && current_subtype == math_end_class) { + if (noad_new_hlist(current)) { + tex_flush_node(noad_new_hlist(current)); + noad_new_hlist(current) = null; + } + } else { + tex_couple_nodes(p, l); + } + p = tex_tail_of_node_list(p); + if (fenced) { + if (get_noad_right_class(fenced) != unset_noad_class) { + current_subtype = get_noad_right_class(fenced); + } else if (get_noad_main_class(fenced) != unset_noad_class) { // needs testing by MS + current_subtype = get_noad_main_class(fenced); + } else { + current_subtype = close_noad_subtype; /* safeguard, see comment above */ + } + fenced = null; + } + noad_new_hlist(current) = null; + } + /*tex + Append any |new_hlist| entries for |q|, and any appropriate penalties. We insert a + penalty node after the hlist entries of noad |q| if |pen| is not an \quote {infinite} + penalty, and if the node immediately following |q| is not a penalty node or a + |rel_noad| or absent entirely. We could combine more here but for beter understanding + we keep the branches seperated. This code is not performance sentitive anyway. + + We can actually drop the omit check because we pair by class. + */ + if (state->penalties && node_next(current) && post_penalty < infinite_penalty) { + halfword recent = node_next(current); + recent_type = node_type(recent); + recent_subtype = node_subtype(recent); + /* todo: maybe also check the mainclass of the recent */ + if ((recent_type != penalty_node) && ! (recent_type == simple_noad && tex_math_has_class_option(recent_subtype, omit_penalty_class_option))) { + halfword z = tex_new_penalty_node(post_penalty, math_post_penalty_subtype); + tex_attach_attribute_list_copy(z, current); + tex_couple_nodes(p, z); + p = z; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: post penalty, left %n, right %n, amount %i]", recent_subtype, current_subtype, penalty_amount(z)); + tex_end_diagnostic(); + } + } + } + if (recent_class_overload != unset_noad_class) { + current_type = simple_noad; + current_subtype = recent_class_overload; + } + if (current_type == simple_noad && current_subtype != math_end_class) { + state->endclass = current_subtype; + } + recent_type = current_type; + recent_subtype = current_subtype; + recent_left_slack = current_left_slack; + recent_right_slack = current_right_slack; + recent_script_state = current_script_state; + recent_plus_glyph = current_plus_glyph; + // if (first && recent_left_slack) { + if (p == temp_head && recent_left_slack) { + halfword k = tex_new_kern_node(-recent_left_slack, horizontal_math_kern_subtype); + halfword h = node_next(temp_head); + tex_attach_attribute_list_copy(k, p); + tex_couple_nodes(k, h); + node_next(temp_head) = k; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: nilling recent left slack %p]", recent_left_slack); + tex_end_diagnostic(); + } + } + recent = current; + current = node_next(current); + if (! current && recent_right_slack) { + halfword k = tex_new_kern_node(-recent_right_slack, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(k, p); + tex_couple_nodes(p, k); + p = k; + if (tracing_math_par >= 2) { + tex_begin_diagnostic(); + tex_print_format("[math: nilling recent right slack %p]", recent_right_slack); + tex_end_diagnostic(); + } + } + // first = 0; + /*tex + The m|-|to|-|hlist conversion takes place in|-|place, so the various dependant fields + may not be freed (as would happen if |flush_node| was called). A low|-|level |free_node| + is easier than attempting to nullify such dependant fields for all possible node and + noad types. + */ + WIPE: + tex_aux_wipe_noad(recent); + } + if (tracing_math_par >= 3) { + tex_aux_show_math_list("[math: result, level %i]", node_next(temp_head)); + } +} + +halfword tex_mlist_to_hlist(halfword mlist, int penalties, int main_style, int beginclass, int endclass, kernset *kerns) /* classes should be quarterwords */ +{ + /*tex + We start with a little housekeeping. There are now only two variables that live across the + two passes. We actually could split this function in two. For practical reasons we have + collected all relevant state parameters in a structure. The values in there can be adapted + in this state. + */ + mliststate state; + state.mlist = mlist; + state.penalties = penalties; + state.main_style = main_style; + state.beginclass = beginclass == unset_noad_class ? math_begin_class : beginclass; + state.endclass = endclass == unset_noad_class ? math_end_class : endclass;; + state.kerns = kerns; + state.scale = glyph_scale_par; + state.max_height = 0; + state.max_depth = 0; + if (state.kerns) { + tex_math_wipe_kerns(state.kerns); + } + ++lmt_math_state.level; + /*tex + Here we can deal with end_class spacing: we can inject a dummy current atom with no content and + just a class. In fact, we can always add a begin and endclass. A nucleus is kind of mandate. + */ + tex_mlist_to_hlist_set_boundaries(&state); + /*tex + This first pass processes the bodies of radicals so that we can normalize them when height + and/or depth are set. + */ + tex_mlist_to_hlist_preroll_radicals(&state); + /* + Make a second pass over the mlist. This is needed in order to get the maximum height and + depth in order to make fences match. + */ + tex_mlist_to_hlist_preroll_dimensions(&state); + /*tex + The fence sizing is done in the third pass. Using a dedicated pass permits experimenting. + */ + tex_mlist_to_hlist_size_fences(&state); + /*tex + Make a fourth pass over the mlist; traditionally this was the second pass. We removing all + noads and insert the proper spacing (glue) and penalties. The binary checking is gone and + replaced by generic arbitrary inter atom mapping control, so for the hard coded older logic + one has to check the (development) git repository. + + The original comment for this pass is: \quotation {We have now tied up all the loose ends of + the first pass of |mlist_to_hlist|. The second pass simply goes through and hooks everything + together with the proper glue and penalties. It also handles the |fence_noad|s that might be + present, since |max_hl| and |max_d| are now known. Variable |p| points to a node at the + current end of the final hlist.} However, in \LUAMETATEX\ the fence sizing has already be + done in the previous pass. + */ + tex_mlist_to_hlist_finalize_list(&state); + /*tex + We're done now and can restore the possibly changed values as well as provide some feedback + about the result. + */ + tex_unsave_math_data(cur_level + lmt_math_state.level); + cur_list.math_begin = state.beginclass; + cur_list.math_end = state.endclass; + glyph_scale_par = state.scale; + --lmt_math_state.level; + node_prev(node_next(temp_head)) = null; + return node_next(temp_head); +} diff --git a/source/luametatex/source/tex/texmlist.h b/source/luametatex/source/tex/texmlist.h new file mode 100644 index 000000000..1cb2a6cc7 --- /dev/null +++ b/source/luametatex/source/tex/texmlist.h @@ -0,0 +1,30 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_MLIST_H +# define LMT_MLIST_H + +typedef struct kernset { + scaled topright; + scaled bottomright; + scaled topleft; + scaled bottomleft; + scaled height; + scaled depth; + scaled toptotal; + scaled bottomtotal; +} kernset; + +extern void tex_run_mlist_to_hlist (halfword p, halfword penalties, halfword style, int beginclass, int endclass); +extern halfword tex_mlist_to_hlist (halfword, int penalties, int mainstyle, int beginclass, int endclass, kernset *kerns); +extern halfword tex_make_extensible (halfword fnt, halfword chr, scaled target, scaled min_overlap, int horizontal, halfword att, halfword size); +extern halfword tex_new_math_glyph (halfword fnt, halfword chr); +extern halfword tex_math_spacing_glue (halfword ltype, halfword rtype, halfword style); + +extern halfword tex_math_font_char_ht (halfword fnt, halfword chr, halfword style); +extern halfword tex_math_font_char_dp (halfword fnt, halfword chr, halfword style); + +extern void tex_set_math_text_font (halfword style, int usefamfont); + +# endif diff --git a/source/luametatex/source/tex/texnesting.c b/source/luametatex/source/tex/texnesting.c new file mode 100644 index 000000000..d699d58fc --- /dev/null +++ b/source/luametatex/source/tex/texnesting.c @@ -0,0 +1,432 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex These are for |show_activities|: */ + +# define page_goal lmt_page_builder_state.goal + +/*tex + + \TEX\ is typically in the midst of building many lists at once. For example, when a math formula + is being processed, \TEX\ is in math mode and working on an mlist; this formula has temporarily + interrupted \TEX\ from being in horizontal mode and building the hlist of a paragraph; and this + paragraph has temporarily interrupted \TEX\ from being in vertical mode and building the vlist + for the next page of a document. Similarly, when a |\vbox| occurs inside of an |\hbox|, \TEX\ is + temporarily interrupted from working in restricted horizontal mode, and it enters internal + vertical mode. The \quote {semantic nest} is a stack that keeps track of what lists and modes + are currently suspended. + + At each level of processing we are in one of six modes: + + \startitemize[n] + \startitem + |vmode| stands for vertical mode (the page builder); + \stopitem + \startitem + |hmode| stands for horizontal mode (the paragraph builder); + \stopitem + \startitem + |mmode| stands for displayed formula mode; + \stopitem + \startitem + |-vmode| stands for internal vertical mode (e.g., in a |\vbox|); + \stopitem + \startitem + |-hmode| stands for restricted horizontal mode (e.g., in an |\hbox|); + \stopitem + \startitem + |-mmode| stands for math formula mode (not displayed). + \stopitem + \stopitemize + + The mode is temporarily set to zero while processing |\write| texts in the |ship_out| routine. + + Numeric values are assigned to |vmode|, |hmode|, and |mmode| so that \TEX's \quote {big semantic + switch} can select the appropriate thing to do by computing the value |abs(mode) + cur_cmd|, + where |mode| is the current mode and |cur_cmd| is the current command code. + +*/ + +# if main_control_mode == 0 + +const char *tex_string_mode(int m) +{ + if (m > 0) { + switch (m / (max_command_cmd + 1)) { + case 0: return "vertical mode"; + case 1: return "horizontal mode"; + case 2: return "display math mode"; + } + } else if (m == 0) { + return "no mode"; + } else { + switch ((-m) / (max_command_cmd + 1)) { + case 0: return "internal vertical mode"; + case 1: return "restricted horizontal mode"; + case 2: return "math mode"; + } + } + return "unknown mode"; +} + +# else + +const char *tex_string_mode(int m) +{ + switch (m) { + case nomode: return "no mode"; + case vmode : return "vertical mode"; + case hmode : return "horizontal mode"; + case mmode : return "display math mode"; + case -vmode : return "internal vertical mode"; + case -hmode : return "restricted horizontal mode"; + case -mmode : return "math mode"; + default : return "unknown mode"; + } +} + +# endif + +/*tex + + The state of affairs at any semantic level can be represented by five values: + + \startitemize + \startitem + |mode| is the number representing the semantic mode, as just explained. + \stopitem + \startitem + |head| is a |pointer| to a list head for the list being built; |link(head)| therefore + points to the first element of the list, or to |null| if the list is empty. + \stopitem + \startitem + |tail| is a |pointer| to the final node of the list being built; thus, |tail=head| if + and only if the list is empty. + \stopitem + \startitem + |prev_graf| is the number of lines of the current paragraph that have already been put + into the present vertical list. + \stopitem + \startitem + |aux| is an auxiliary |memoryword| that gives further information that is needed to + characterize the situation. + \stopitem + \stopitemize + + In vertical mode, |aux| is also known as |prev_depth|; it is the scaled value representing the + depth of the previous box, for use in baseline calculations, or it is |<= -1000pt| if the next + box on the vertical list is to be exempt from baseline calculations. In horizontal mode, |aux| + is also known as |space_factor|; it holds the current space factor used in spacing calculations. + In math mode, |aux| is also known as |incompleat_noad|; if not |null|, it points to a record + that represents the numerator of a generalized fraction for which the denominator is currently + being formed in the current list. + + There is also a sixth quantity, |mode_line|, which correlates the semantic nest with the + user's input; |mode_line| contains the source line number at which the current level of nesting + was entered. The negative of this line number is the |mode_line| at the level of the user's + output routine. + + A seventh quantity, |eTeX_aux|, is used by the extended features eTeX. In math mode it is known + as |delim_ptr| and points to the most recent |fence_noad| of a |math_left_group|. + + In horizontal mode, the |prev_graf| field is used for initial language data. + + The semantic nest is an array called |nest| that holds the |mode|, |head|, |tail|, |prev_graf|, + |aux|, and |mode_line| values for all semantic levels below the currently active one. + Information about the currently active level is kept in the global quantities |mode|, |head|, + |tail|, |prev_graf|, |aux|, and |mode_line|, which live in a struct that is ready to be pushed + onto |nest| if necessary. + + The math field is used by various bits and pieces in |texmath.w| + + This implementation of \TEX\ uses two different conventions for representing sequential stacks. + + \startitemize[n] + + \startitem + If there is frequent access to the top entry, and if the stack is essentially never + empty, then the top entry is kept in a global variable (even better would be a machine + register), and the other entries appear in the array |stack[0 .. (ptr-1)]|. The semantic + stack is handled this way. + \stopitem + + \startitem + If there is infrequent top access, the entire stack contents are in the array |stack[0 + .. (ptr - 1)]|. For example, the |save_stack| is treated this way, as we have seen. + \stopitem + + \stopitemize + + In |nest_ptr| we have the first unused location of |nest|, and |max_nest_stack| has the maximum + of |nest_ptr| when pushing. In |shown_mode| we store the most recent mode shown by + |\tracingcommands| and with |save_tail| we can examine whether we have an auto kern before a + glue. + +*/ + +nest_state_info lmt_nest_state = { + .nest = NULL, + .nest_data = { + .minimum = min_nest_size, + .maximum = max_nest_size, + .size = siz_nest_size, + .step = stp_nest_size, + .allocated = 0, + .itemsize = sizeof(list_state_record), + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .shown_mode = 0, + .padding = 0, +}; + +/*tex + + We will see later that the vertical list at the bottom semantic level is split into two parts; + the \quote {current page} runs from |page_head| to |page_tail|, and the \quote {contribution + list} runs from |contribute_head| to |tail| of semantic level zero. The idea is that contributions + are first formed in vertical mode, then \quote {contributed} to the current page (during which + time the page|-|breaking decisions are made). For now, we don't need to know any more details + about the page-building process. + +*/ + +# define reserved_nest_slots 0 + +void tex_initialize_nest_state(void) +{ + int size = lmt_nest_state.nest_data.minimum; + lmt_nest_state.nest = aux_allocate_clear_array(sizeof(list_state_record), size, reserved_nest_slots); + if (lmt_nest_state.nest) { + lmt_nest_state.nest_data.allocated = size; + } else { + tex_overflow_error("nest", size); + } +} + +static int tex_aux_room_on_nest_stack(void) /* quite similar to save_stack checker so maybe share */ +{ + int top = lmt_nest_state.nest_data.ptr; + if (top > lmt_nest_state.nest_data.top) { + lmt_nest_state.nest_data.top = top; + if (top > lmt_nest_state.nest_data.allocated) { + list_state_record *tmp = NULL; + top = lmt_nest_state.nest_data.allocated + lmt_nest_state.nest_data.step; + if (top > lmt_nest_state.nest_data.size) { + top = lmt_nest_state.nest_data.size; + } + if (top > lmt_nest_state.nest_data.allocated) { + lmt_nest_state.nest_data.allocated = top; + tmp = aux_reallocate_array(lmt_nest_state.nest, sizeof(list_state_record), top, reserved_nest_slots); + lmt_nest_state.nest = tmp; + } + lmt_run_memory_callback("nest", tmp ? 1 : 0); + if (! tmp) { + tex_overflow_error("nest", top); + return 0; + } + } + } + return 1; +} + +void tex_initialize_nesting(void) +{ + lmt_nest_state.nest_data.ptr = 0; + lmt_nest_state.nest_data.top = 0; + lmt_nest_state.shown_mode = 0; + cur_list.mode = vmode; + cur_list.head = contribute_head; + cur_list.tail = contribute_head; + cur_list.delim = null; + cur_list.prev_graf = 0; + cur_list.mode_line = 0; + cur_list.prev_depth = ignore_depth; + cur_list.space_factor = 1000; + cur_list.incomplete_noad = null; + cur_list.direction_stack = null; + cur_list.math_dir = 0; + cur_list.math_style = -1; + cur_list.math_flatten = 1; + cur_list.math_begin = unset_noad_class; + cur_list.math_end = unset_noad_class; + cur_list.math_mode = 0; +} + +halfword tex_pop_tail(void) +{ + if (cur_list.tail != cur_list.head) { + halfword r = cur_list.tail; + halfword n = node_prev(r); + if (node_next(n) != r) { + n = cur_list.head; + while (node_next(n) != r) { + n = node_next(n); + } + } + cur_list.tail = n; + node_prev(r) = null; + node_next(n) = null; + return r; + } else { + return null; + } +} + +/*tex + + When \TEX's work on one level is interrupted, the state is saved by calling |push_nest|. This + routine changes |head| and |tail| so that a new (empty) list is begun; it does not change + |mode| or |aux|. + +*/ + +void tex_push_nest(void) +{ + list_state_record *top = &lmt_nest_state.nest[lmt_nest_state.nest_data.ptr]; + lmt_nest_state.nest_data.ptr += 1; + if (tex_aux_room_on_nest_stack()) { + cur_list.mode = top->mode; + cur_list.head = tex_new_temp_node(); + cur_list.tail = cur_list.head; + cur_list.delim = null; + cur_list.prev_graf = 0; + cur_list.mode_line = lmt_input_state.input_line; + cur_list.prev_depth = top->prev_depth; + cur_list.space_factor = top->space_factor; + cur_list.incomplete_noad = top->incomplete_noad; + cur_list.direction_stack = null; + cur_list.math_dir = 0; + cur_list.math_style = -1; + cur_list.math_flatten = 1; + cur_list.math_begin = unset_noad_class; + cur_list.math_end = unset_noad_class; + // cur_list.math_begin = top->math_begin; + // cur_list.math_end = top->math_end; + cur_list.math_mode = 0; + } else { + tex_overflow_error("semantic nest size", lmt_nest_state.nest_data.size); + } +} + +/*tex + + Conversely, when \TEX\ is finished on the current level, the former state is restored by + calling |pop_nest|. This routine will never be called at the lowest semantic level, nor will + it be called unless |head| is a node that should be returned to free memory. + +*/ + +void tex_pop_nest(void) +{ + if (cur_list.head) { + /* tex_free_node(cur_list.head, temp_node_size); */ /* looks fragile */ + tex_flush_node(cur_list.head); + /*tex Just to be sure, in case we access from \LUA: */ + // cur_list.head = null; + // cur_list.tail = null; + } + --lmt_nest_state.nest_data.ptr; +} + +/*tex Here is a procedure that displays what \TEX\ is working on, at all levels. */ + +void tex_show_activities(void) +{ + tex_print_nlp(); + for (int p = lmt_nest_state.nest_data.ptr; p >= 0; p--) { + list_state_record n = lmt_nest_state.nest[p]; + tex_print_format("%l[%M entered at line %i%s]", n.mode, abs(n.mode_line), n.mode_line < 0 ? " (output routine)" : ""); // %L + if (p == 0) { + /*tex Show the status of the current page */ + if (page_head != lmt_page_builder_state.page_tail) { + tex_print_format("%l[current page:%s]", lmt_page_builder_state.output_active ? " (held over for next output)" : ""); + tex_show_box(node_next(page_head)); + if (lmt_page_builder_state.contents != contribute_nothing) { + halfword r; + tex_print_format("%l[total height %P, goal height %D]", + page_total, page_stretch, page_filstretch, page_fillstretch, page_filllstretch, page_shrink, + page_goal, pt_unit + ); + r = node_next(page_insert_head); + while (r != page_insert_head) { + halfword index = insert_index(r); + halfword multiplier = tex_get_insert_multiplier(index); + halfword size = multiplier == 1000 ? insert_total_height(r) : tex_x_over_n(insert_total_height(r), 1000) * multiplier; + if (node_type(r) == split_node && node_subtype(r) == insert_split_subtype) { + halfword q = page_head; + halfword n = 0; + do { + q = node_next(q); + if (node_type(q) == insert_node && split_insert_index(q) == insert_index(r)) { + ++n; + } + } while (q != split_broken_insert(r)); + tex_print_format("%l[insert %i adds %D, might split to %i]", index, size, pt_unit, n); + } else { + tex_print_format("%l[insert %i adds %D]", index, size, pt_unit); + } + r = node_next(r); + } + } + } + if (node_next(contribute_head)) { + tex_print_format("%l[recent contributions:]"); + } + } + tex_print_format("%l[begin list]"); + tex_show_box(node_next(n.head)); + tex_print_format("%l[end list]"); + /*tex Show the auxiliary field, |a|. */ + switch (abs(n.mode) / (max_command_cmd + 1)) { + case 0: + { + if (n.prev_depth <= ignore_depth) { + tex_print_format("%l[prevdepth ignored"); + } else { + tex_print_format("%l[prevdepth %D", n.prev_depth, pt_unit); + } + if (n.prev_graf != 0) { + tex_print_format(", prevgraf %i line%s", n.prev_graf, n.prev_graf == 1 ? "" : "s"); + } + tex_print_char(']'); + break; + } + case 1: + { + break; + } + case 2: + { + if (n.incomplete_noad) { + tex_print_format("%l[this will be denominator of:]"); + tex_print_format("%l[begin list]"); + tex_show_box(n.incomplete_noad); + tex_print_format("%l[end list]"); + } + break; + } + } + } +} + +int tex_vmode_nest_index(void) +{ + int p = lmt_nest_state.nest_data.ptr; /* index into |nest| */ + while (abs(lmt_nest_state.nest[p].mode) != vmode) { + --p; + } + return p; +} + +void tex_tail_append(halfword p) +{ + node_next(cur_list.tail) = p; + node_prev(p) = cur_list.tail; + cur_list.tail = p; +} diff --git a/source/luametatex/source/tex/texnesting.h b/source/luametatex/source/tex/texnesting.h new file mode 100644 index 000000000..f940094a0 --- /dev/null +++ b/source/luametatex/source/tex/texnesting.h @@ -0,0 +1,71 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_NESTING_H +# define LMT_NESTING_H + +typedef struct list_state_record { + int mode; + halfword head; + halfword tail; + int prev_graf; + int mode_line; + halfword prev_depth; // scaled + halfword space_factor; + halfword direction_stack; + int math_dir; + int math_style; + int math_scale; + int math_main_style; + halfword delim; + halfword incomplete_noad; + halfword math_flatten; + halfword math_begin; + halfword math_end; + halfword math_mode; +} list_state_record; + +typedef struct nest_state_info { + list_state_record *nest; + memory_data nest_data; + int shown_mode; + int padding; +} nest_state_info; + +extern nest_state_info lmt_nest_state; + +# define cur_list lmt_nest_state.nest[lmt_nest_state.nest_data.ptr] /*tex The \quote {top} semantic state. */ +# define cur_mode (abs(cur_list.mode)) + +extern void tex_initialize_nest_state (void); +/* int tex_room_on_nest_stack (void); */ +extern void tex_initialize_nesting (void); +extern void tex_push_nest (void); +extern void tex_pop_nest (void); +extern void tex_tail_append (halfword p); +extern halfword tex_pop_tail (void); +extern const char *tex_string_mode (int m); +extern void tex_show_activities (void); +extern int tex_vmode_nest_index (void); + +/*tex + When we use a macro instead of a function we need to use an intermediate variable because |_p_| + can be a functioncall itself (something |new_*|). The gain is a little performance because this + one is called a lot. The loss is a bit larger binary. There are some more macros sensitive for + this, like the ones that couple nodes. Also, inlining a function can spoil this game! +*/ + +/* +# define tail_append(_p_) do { \ + halfword __p__ = _p_ ; \ + tex_couple_nodes(cur_list.tail, __p__); \ + cur_list.tail = __p__; \ +} while (0) +*/ + +/* +# define tail_append tex_tail_append +*/ + +# endif diff --git a/source/luametatex/source/tex/texnodes.c b/source/luametatex/source/tex/texnodes.c new file mode 100644 index 000000000..45e04dfd2 --- /dev/null +++ b/source/luametatex/source/tex/texnodes.c @@ -0,0 +1,4794 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + This module started out using DEBUG to trigger checking invalid node usage, something that is + needed because users can mess up nodes in \LUA. At some point that code was always enabled so + it is now always on but still can be recognized as additional code. And as the performance hit + is close to zero so disabling makes no sense, not even to make it configureable. There is a + little more memory used but that is neglectable compared to other memory usage. Only on + massive freeing we can gain. + +*/ + +node_memory_state_info lmt_node_memory_state = { + .nodes = NULL, + .nodesizes = NULL, + .free_chain = { null }, + .nodes_data = { + .minimum = min_node_size, + .maximum = max_node_size, + .size = siz_node_size, + .step = stp_node_size, + .allocated = 0, + .itemsize = sizeof(memoryword) + sizeof(char), + .top = 0, // beware, node pointers are just offsets below top + .ptr = 0, // total size in use + .initial = 0, + .offset = 0, + }, + .extra_data = { + .minimum = memory_data_unset, + .maximum = memory_data_unset, + .size = memory_data_unset, + .step = memory_data_unset, + .allocated = 0, + .itemsize = 1, + .top = 0, + .ptr = 0, + .initial = memory_data_unset, + .offset = 0, + }, + .reserved = 0, + .padding = 0, + .node_properties_id = 0, + .lua_properties_level = 0, + .attribute_cache = 0, + .max_used_attribute = 1, + .node_properties_table_size = 0, +}; + +/*tex Defined below. */ + +static void tex_aux_check_node (halfword node); +static halfword tex_aux_allocated_node (int size); + +/*tex + + The following definitions are used for keys at the \LUA\ end and provide an efficient way to + share hashed strings. For a long time we had this: + + static value_info lmt_node_fields_accent [10]; + + node_info lmt_node_data[] = { + { .id = hlist_node, .size = box_node_size, .subtypes = NULL, .fields = lmt_node_fields_list, .name = NULL, .lua = 0, .visible = 1 }, + .... + } ; + + etc but eventually we went a bit more dynamic because after all some helpers showeed up. This + brings many node properties together. Not all nodes are visible for users. Most of the + properties can be provided as lists. + + not all math noad fields ar ementioned here yet ... some are still experimental + +*/ + +void lmt_nodelib_initialize(void) { + + /*tes The subtypes of nodes. */ + + value_info + *subtypes_dir, *subtypes_par, *subtypes_glue, *subtypes_boundary, *subtypes_penalty, *subtypes_kern, + *subtypes_rule, *subtypes_glyph , *subtypes_disc, *subtypes_list, *subtypes_adjust, *subtypes_mark, + *subtypes_math, *subtypes_noad, *subtypes_radical, *subtypes_choice, *subtypes_accent, *subtypes_fence, *subtypes_split, + *subtypes_attribute; + + value_info + *lmt_node_fields_accent, *lmt_node_fields_adjust, *lmt_node_fields_attribute, + *lmt_node_fields_boundary, *lmt_node_fields_choice, *lmt_node_fields_delimiter, *lmt_node_fields_dir, + *lmt_node_fields_disc, *lmt_node_fields_fence, *lmt_node_fields_fraction, *lmt_node_fields_glue, + *lmt_node_fields_glue_spec, *lmt_node_fields_glyph, *lmt_node_fields_insert, *lmt_node_fields_split, + *lmt_node_fields_kern, *lmt_node_fields_list, *lmt_node_fields_par, *lmt_node_fields_mark, *lmt_node_fields_math, + *lmt_node_fields_math_char, *lmt_node_fields_math_text_char, *lmt_node_fields_noad, *lmt_node_fields_penalty, + *lmt_node_fields_radical, *lmt_node_fields_rule, *lmt_node_fields_style, *lmt_node_fields_parameter, + *lmt_node_fields_sub_box, *lmt_node_fields_sub_mlist, *lmt_node_fields_unset, *lmt_node_fields_whatsit; + + subtypes_dir = lmt_aux_allocate_value_info(cancel_dir_subtype); + + set_value_entry_key(subtypes_dir, normal_dir_subtype, normal) + set_value_entry_key(subtypes_dir, cancel_dir_subtype, cancel) + + subtypes_split = lmt_aux_allocate_value_info(insert_split_subtype); + + set_value_entry_key(subtypes_split, normal_split_subtype, normal) + set_value_entry_key(subtypes_split, insert_split_subtype, insert) + + subtypes_par = lmt_aux_allocate_value_info(math_par_subtype); + + set_value_entry_key(subtypes_par, vmode_par_par_subtype, vmodepar) + set_value_entry_key(subtypes_par, local_box_par_subtype, localbox) + set_value_entry_key(subtypes_par, hmode_par_par_subtype, hmodepar) + set_value_entry_key(subtypes_par, penalty_par_subtype, penalty) + set_value_entry_key(subtypes_par, math_par_subtype, math) + + subtypes_glue = lmt_aux_allocate_value_info(u_leaders); + + set_value_entry_key(subtypes_glue, user_skip_glue, userskip) + set_value_entry_key(subtypes_glue, line_skip_glue, lineskip) + set_value_entry_key(subtypes_glue, baseline_skip_glue, baselineskip) + set_value_entry_key(subtypes_glue, par_skip_glue, parskip) + set_value_entry_key(subtypes_glue, above_display_skip_glue, abovedisplayskip) + set_value_entry_key(subtypes_glue, below_display_skip_glue, belowdisplayskip) + set_value_entry_key(subtypes_glue, above_display_short_skip_glue, abovedisplayshortskip) + set_value_entry_key(subtypes_glue, below_display_short_skip_glue, belowdisplayshortskip) + set_value_entry_key(subtypes_glue, left_skip_glue, leftskip) + set_value_entry_key(subtypes_glue, right_skip_glue, rightskip) + set_value_entry_key(subtypes_glue, top_skip_glue, topskip) + set_value_entry_key(subtypes_glue, split_top_skip_glue, splittopskip) + set_value_entry_key(subtypes_glue, tab_skip_glue, tabskip) + set_value_entry_key(subtypes_glue, space_skip_glue, spaceskip) + set_value_entry_key(subtypes_glue, xspace_skip_glue, xspaceskip) + set_value_entry_key(subtypes_glue, zero_space_skip_glue, zerospaceskip) + set_value_entry_key(subtypes_glue, par_fill_left_skip_glue, parfillleftskip) + set_value_entry_key(subtypes_glue, par_fill_right_skip_glue, parfillskip) + set_value_entry_key(subtypes_glue, par_init_left_skip_glue, parinitleftskip) + set_value_entry_key(subtypes_glue, par_init_right_skip_glue, parinitrightskip) + set_value_entry_key(subtypes_glue, indent_skip_glue, indentskip) + set_value_entry_key(subtypes_glue, left_hang_skip_glue, lefthangskip) + set_value_entry_key(subtypes_glue, right_hang_skip_glue, righthangskip) + set_value_entry_key(subtypes_glue, correction_skip_glue, correctionskip) + set_value_entry_key(subtypes_glue, inter_math_skip_glue, intermathskip) + set_value_entry_key(subtypes_glue, ignored_glue, ignored) + set_value_entry_key(subtypes_glue, page_glue, page) + set_value_entry_key(subtypes_glue, math_skip_glue, mathskip) + set_value_entry_key(subtypes_glue, thin_mu_skip_glue, thinmuskip) + set_value_entry_key(subtypes_glue, med_mu_skip_glue, medmuskip) + set_value_entry_key(subtypes_glue, thick_mu_skip_glue, thickmuskip) + set_value_entry_key(subtypes_glue, conditional_math_glue, conditionalmathskip) + set_value_entry_key(subtypes_glue, rulebased_math_glue, rulebasedmathskip) + set_value_entry_key(subtypes_glue, mu_glue, muglue) + set_value_entry_key(subtypes_glue, a_leaders, leaders) + set_value_entry_key(subtypes_glue, c_leaders, cleaders) + set_value_entry_key(subtypes_glue, x_leaders, xleaders) + set_value_entry_key(subtypes_glue, g_leaders, gleaders) + set_value_entry_key(subtypes_glue, u_leaders, uleaders) + + subtypes_boundary = lmt_aux_allocate_value_info(word_boundary); + + set_value_entry_key(subtypes_boundary, cancel_boundary, cancel) + set_value_entry_key(subtypes_boundary, user_boundary, user) + set_value_entry_key(subtypes_boundary, protrusion_boundary, protrusion) + set_value_entry_key(subtypes_boundary, word_boundary, word) + + subtypes_penalty = lmt_aux_allocate_value_info(equation_number_penalty_subtype); + + set_value_entry_key(subtypes_penalty, user_penalty_subtype, userpenalty) + set_value_entry_key(subtypes_penalty, linebreak_penalty_subtype, linebreakpenalty) + set_value_entry_key(subtypes_penalty, line_penalty_subtype, linepenalty) + set_value_entry_key(subtypes_penalty, word_penalty_subtype, wordpenalty) + set_value_entry_key(subtypes_penalty, final_penalty_subtype, finalpenalty) + set_value_entry_key(subtypes_penalty, orphan_penalty_subtype, orphanpenalty) + set_value_entry_key(subtypes_penalty, math_pre_penalty_subtype, mathprepenalty) + set_value_entry_key(subtypes_penalty, math_post_penalty_subtype, mathpostpenalty) + set_value_entry_key(subtypes_penalty, before_display_penalty_subtype, beforedisplaypenalty) + set_value_entry_key(subtypes_penalty, after_display_penalty_subtype, afterdisplaypenalty) + set_value_entry_key(subtypes_penalty, equation_number_penalty_subtype, equationnumberpenalty) + + subtypes_kern = lmt_aux_allocate_value_info(vertical_math_kern_subtype); + + set_value_entry_key(subtypes_kern, font_kern_subtype, fontkern) + set_value_entry_key(subtypes_kern, explicit_kern_subtype, userkern) + set_value_entry_key(subtypes_kern, accent_kern_subtype, accentkern) + set_value_entry_key(subtypes_kern, italic_kern_subtype, italiccorrection) + set_value_entry_key(subtypes_kern, left_margin_kern_subtype, leftmarginkern) + set_value_entry_key(subtypes_kern, right_margin_kern_subtype, rightmarginkern) + set_value_entry_key(subtypes_kern, explicit_math_kern_subtype, mathkerns) + set_value_entry_key(subtypes_kern, math_shape_kern_subtype, mathshapekern) + set_value_entry_key(subtypes_kern, horizontal_math_kern_subtype, horizontalmathkern) + set_value_entry_key(subtypes_kern, vertical_math_kern_subtype, verticalmathkern) + + subtypes_rule = lmt_aux_allocate_value_info(image_rule_subtype); + + set_value_entry_key(subtypes_rule, normal_rule_subtype, normal) + set_value_entry_key(subtypes_rule, empty_rule_subtype, empty) + set_value_entry_key(subtypes_rule, strut_rule_subtype, strut) + set_value_entry_key(subtypes_rule, outline_rule_subtype, outline) + set_value_entry_key(subtypes_rule, user_rule_subtype, user) + set_value_entry_key(subtypes_rule, math_over_rule_subtype, over) + set_value_entry_key(subtypes_rule, math_under_rule_subtype, under) + set_value_entry_key(subtypes_rule, math_fraction_rule_subtype, fraction) + set_value_entry_key(subtypes_rule, math_radical_rule_subtype, radical) + set_value_entry_key(subtypes_rule, box_rule_subtype, box) + set_value_entry_key(subtypes_rule, image_rule_subtype, image) + + subtypes_glyph = lmt_aux_allocate_value_info(glyph_math_accent_subtype); + + set_value_entry_key(subtypes_glyph, glyph_unset_subtype, unset) + set_value_entry_key(subtypes_glyph, glyph_character_subtype, character) + set_value_entry_key(subtypes_glyph, glyph_ligature_subtype, ligature) + set_value_entry_key(subtypes_glyph, glyph_math_delimiter_subtype, delimiter); + set_value_entry_key(subtypes_glyph, glyph_math_extensible_subtype, extensible); + set_value_entry_key(subtypes_glyph, glyph_math_ordinary_subtype, ord); + set_value_entry_key(subtypes_glyph, glyph_math_operator_subtype, op); + set_value_entry_key(subtypes_glyph, glyph_math_binary_subtype, bin); + set_value_entry_key(subtypes_glyph, glyph_math_relation_subtype, rel); + set_value_entry_key(subtypes_glyph, glyph_math_open_subtype, open); + set_value_entry_key(subtypes_glyph, glyph_math_close_subtype, close); + set_value_entry_key(subtypes_glyph, glyph_math_punctuation_subtype, punct); + set_value_entry_key(subtypes_glyph, glyph_math_variable_subtype, variable); + set_value_entry_key(subtypes_glyph, glyph_math_active_subtype, active); + set_value_entry_key(subtypes_glyph, glyph_math_inner_subtype, inner); + set_value_entry_key(subtypes_glyph, glyph_math_over_subtype, over); + set_value_entry_key(subtypes_glyph, glyph_math_under_subtype, under); + set_value_entry_key(subtypes_glyph, glyph_math_fraction_subtype, fraction); + set_value_entry_key(subtypes_glyph, glyph_math_radical_subtype, radical); + set_value_entry_key(subtypes_glyph, glyph_math_middle_subtype, middle); + set_value_entry_key(subtypes_glyph, glyph_math_accent_subtype, accent); + + subtypes_disc = lmt_aux_allocate_value_info(syllable_discretionary_code); + + set_value_entry_key(subtypes_disc, normal_discretionary_code, discretionary) + set_value_entry_key(subtypes_disc, explicit_discretionary_code, explicit) + set_value_entry_key(subtypes_disc, automatic_discretionary_code, automatic) + set_value_entry_key(subtypes_disc, mathematics_discretionary_code, math) + set_value_entry_key(subtypes_disc, syllable_discretionary_code, regular) + + subtypes_fence = lmt_aux_allocate_value_info(no_fence_side); + + set_value_entry_key(subtypes_fence, unset_fence_side, unset) + set_value_entry_key(subtypes_fence, left_fence_side, left) + set_value_entry_key(subtypes_fence, middle_fence_side, middle) + set_value_entry_key(subtypes_fence, right_fence_side, right) + set_value_entry_key(subtypes_fence, left_operator_side, operator) + set_value_entry_key(subtypes_fence, no_fence_side, no) + + subtypes_list = lmt_aux_allocate_value_info(local_middle_list); + + set_value_entry_key(subtypes_list, unknown_list, unknown) + set_value_entry_key(subtypes_list, line_list, line) + set_value_entry_key(subtypes_list, hbox_list, box) + set_value_entry_key(subtypes_list, indent_list, indent) + set_value_entry_key(subtypes_list, container_list, container) + set_value_entry_key(subtypes_list, align_row_list, alignment) + set_value_entry_key(subtypes_list, align_cell_list, cell) + set_value_entry_key(subtypes_list, equation_list, equation) + set_value_entry_key(subtypes_list, equation_number_list, equationnumber) + set_value_entry_key(subtypes_list, math_list_list, math) + set_value_entry_key(subtypes_list, math_pack_list, mathpack) + set_value_entry_key(subtypes_list, math_char_list, mathchar) + set_value_entry_key(subtypes_list, math_h_extensible_list, hextensible) + set_value_entry_key(subtypes_list, math_v_extensible_list, vextensible) + set_value_entry_key(subtypes_list, math_h_delimiter_list, hdelimiter) + set_value_entry_key(subtypes_list, math_v_delimiter_list, vdelimiter) + set_value_entry_key(subtypes_list, math_over_delimiter_list, overdelimiter) + set_value_entry_key(subtypes_list, math_under_delimiter_list, underdelimiter) + set_value_entry_key(subtypes_list, math_numerator_list, numerator) + set_value_entry_key(subtypes_list, math_denominator_list, denominator) + set_value_entry_key(subtypes_list, math_modifier_list, modifier) + set_value_entry_key(subtypes_list, math_fraction_list, fraction) + set_value_entry_key(subtypes_list, math_nucleus_list, nucleus) + set_value_entry_key(subtypes_list, math_sup_list, sup) + set_value_entry_key(subtypes_list, math_sub_list, sub) + set_value_entry_key(subtypes_list, math_pre_post_list, prepost) + set_value_entry_key(subtypes_list, math_degree_list, degree) + set_value_entry_key(subtypes_list, math_scripts_list, scripts) + set_value_entry_key(subtypes_list, math_over_list, over) + set_value_entry_key(subtypes_list, math_under_list, under) + set_value_entry_key(subtypes_list, math_accent_list, accent) + set_value_entry_key(subtypes_list, math_radical_list, radical) + set_value_entry_key(subtypes_list, math_fence_list, fence) + set_value_entry_key(subtypes_list, math_rule_list, rule) + set_value_entry_key(subtypes_list, math_ghost_list, ghost) + set_value_entry_key(subtypes_list, insert_result_list, insert) + set_value_entry_key(subtypes_list, local_list, local) + set_value_entry_key(subtypes_list, local_left_list, left) + set_value_entry_key(subtypes_list, local_right_list, right) + set_value_entry_key(subtypes_list, local_middle_list, middle) + + subtypes_math = lmt_aux_allocate_value_info(end_inline_math); + + set_value_entry_key(subtypes_math, begin_inline_math, beginmath) + set_value_entry_key(subtypes_math, end_inline_math, endmath) + + subtypes_adjust = lmt_aux_allocate_value_info(local_adjust_code); + + set_value_entry_key(subtypes_adjust, pre_adjust_code, pre) + set_value_entry_key(subtypes_adjust, post_adjust_code, post) + set_value_entry_key(subtypes_adjust, local_adjust_code, local) + + subtypes_mark = lmt_aux_allocate_value_info(reset_mark_value_code); + + set_value_entry_key(subtypes_mark, set_mark_value_code, set) + set_value_entry_key(subtypes_mark, reset_mark_value_code, reset) + + subtypes_noad = lmt_aux_allocate_value_info(vcenter_noad_subtype); // last_noad_subtype + + set_value_entry_key(subtypes_noad, ordinary_noad_subtype, ord) + set_value_entry_key(subtypes_noad, operator_noad_subtype, op) + set_value_entry_key(subtypes_noad, binary_noad_subtype, bin) + set_value_entry_key(subtypes_noad, relation_noad_subtype, rel) + set_value_entry_key(subtypes_noad, open_noad_subtype, open) + set_value_entry_key(subtypes_noad, close_noad_subtype, close) + set_value_entry_key(subtypes_noad, punctuation_noad_subtype, punct) + set_value_entry_key(subtypes_noad, variable_noad_subtype, variable) + set_value_entry_key(subtypes_noad, active_noad_subtype, active) + set_value_entry_key(subtypes_noad, inner_noad_subtype, inner) + set_value_entry_key(subtypes_noad, under_noad_subtype, under) + set_value_entry_key(subtypes_noad, over_noad_subtype, over) + set_value_entry_key(subtypes_noad, fraction_noad_subtype, fraction) + set_value_entry_key(subtypes_noad, radical_noad_subtype, radical) + set_value_entry_key(subtypes_noad, middle_noad_subtype, middle) + set_value_entry_key(subtypes_noad, accent_noad_subtype, accent) + set_value_entry_key(subtypes_noad, fenced_noad_subtype, fenced) + set_value_entry_key(subtypes_noad, ghost_noad_subtype, ghost) + set_value_entry_key(subtypes_noad, vcenter_noad_subtype, vcenter) + + subtypes_choice = lmt_aux_allocate_value_info(discretionary_choice_subtype); + + set_value_entry_key(subtypes_choice, normal_choice_subtype, normal) + set_value_entry_key(subtypes_choice, discretionary_choice_subtype, discretionary) + + subtypes_radical = lmt_aux_allocate_value_info(h_extensible_radical_subtype); + + set_value_entry_key(subtypes_radical, normal_radical_subtype, normal) + set_value_entry_key(subtypes_radical, radical_radical_subtype, radical) + set_value_entry_key(subtypes_radical, root_radical_subtype, root) + set_value_entry_key(subtypes_radical, rooted_radical_subtype, rooted) + set_value_entry_key(subtypes_radical, under_delimiter_radical_subtype, underdelimiter) + set_value_entry_key(subtypes_radical, over_delimiter_radical_subtype, overdelimiter) + set_value_entry_key(subtypes_radical, delimiter_under_radical_subtype, delimiterunder) + set_value_entry_key(subtypes_radical, delimiter_over_radical_subtype, delimiterover) + set_value_entry_key(subtypes_radical, delimited_radical_subtype, delimited) + set_value_entry_key(subtypes_radical, h_extensible_radical_subtype, hextensible) + + subtypes_accent = lmt_aux_allocate_value_info(fixedboth_accent_subtype); + + set_value_entry_key(subtypes_accent, bothflexible_accent_subtype, bothflexible) + set_value_entry_key(subtypes_accent, fixedtop_accent_subtype, fixedtop) + set_value_entry_key(subtypes_accent, fixedbottom_accent_subtype, fixedbottom) + set_value_entry_key(subtypes_accent, fixedboth_accent_subtype, fixedboth) + + subtypes_attribute = lmt_aux_allocate_value_info(attribute_value_subtype); + + set_value_entry_key(subtypes_attribute, attribute_list_subtype, list) + set_value_entry_key(subtypes_attribute, attribute_value_subtype, value) + + /*tex The fields of nodes. */ + + lmt_node_fields_accent = lmt_aux_allocate_value_info(9); + + set_value_entry_val(lmt_node_fields_accent, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_accent, 1, node_list_field, nucleus); + set_value_entry_val(lmt_node_fields_accent, 2, node_list_field, sub); + set_value_entry_val(lmt_node_fields_accent, 3, node_list_field, sup); + set_value_entry_val(lmt_node_fields_accent, 4, node_list_field, accent); + set_value_entry_val(lmt_node_fields_accent, 5, node_list_field, bottomaccent); + set_value_entry_val(lmt_node_fields_accent, 6, node_list_field, topaccent); + set_value_entry_val(lmt_node_fields_accent, 7, node_list_field, overlayaccent); + set_value_entry_val(lmt_node_fields_accent, 8, node_list_field, fraction); + + lmt_node_fields_adjust = lmt_aux_allocate_value_info(2); + + set_value_entry_val(lmt_node_fields_adjust, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_adjust, 1, node_list_field, list); + + lmt_node_fields_attribute = lmt_aux_allocate_value_info(4); + + set_value_entry_val(lmt_node_fields_attribute, 0, integer_field, count); + set_value_entry_val(lmt_node_fields_attribute, 1, integer_field, data); + set_value_entry_val(lmt_node_fields_attribute, 2, integer_field, index); + set_value_entry_val(lmt_node_fields_attribute, 3, integer_field, value); + + /* Nothing */ + + lmt_node_fields_boundary = lmt_aux_allocate_value_info(2); + + set_value_entry_val(lmt_node_fields_boundary, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_boundary, 1, integer_field, data); + + lmt_node_fields_choice = lmt_aux_allocate_value_info(5); + + set_value_entry_val(lmt_node_fields_choice, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_choice, 1, node_list_field, display); + set_value_entry_val(lmt_node_fields_choice, 2, node_list_field, text); + set_value_entry_val(lmt_node_fields_choice, 3, node_list_field, script); + set_value_entry_val(lmt_node_fields_choice, 4, node_list_field, scriptscript); + + lmt_node_fields_delimiter = lmt_aux_allocate_value_info(5); + + set_value_entry_val(lmt_node_fields_delimiter, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_delimiter, 1, integer_field, smallfamily); + set_value_entry_val(lmt_node_fields_delimiter, 2, integer_field, smallchar); + set_value_entry_val(lmt_node_fields_delimiter, 3, integer_field, largefamily); + set_value_entry_val(lmt_node_fields_delimiter, 4, integer_field, largechar); + + lmt_node_fields_dir = lmt_aux_allocate_value_info(3); + + set_value_entry_val(lmt_node_fields_dir, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_dir, 1, integer_field, dir); + set_value_entry_val(lmt_node_fields_dir, 2, integer_field, level); + + lmt_node_fields_disc = lmt_aux_allocate_value_info( 6); + + set_value_entry_val(lmt_node_fields_disc, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_disc, 1, node_list_field, pre); + set_value_entry_val(lmt_node_fields_disc, 2, node_list_field, post); + set_value_entry_val(lmt_node_fields_disc, 3, node_list_field, replace); + set_value_entry_val(lmt_node_fields_disc, 4, integer_field, penalty); + set_value_entry_val(lmt_node_fields_disc, 5, integer_field, options); + + lmt_node_fields_fence = lmt_aux_allocate_value_info(10); + + set_value_entry_val(lmt_node_fields_fence, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_fence, 1, node_list_field, delimiter); + set_value_entry_val(lmt_node_fields_fence, 2, dimension_field, italic); + set_value_entry_val(lmt_node_fields_fence, 3, dimension_field, height); + set_value_entry_val(lmt_node_fields_fence, 4, dimension_field, depth); + set_value_entry_val(lmt_node_fields_fence, 5, integer_field, options); + set_value_entry_val(lmt_node_fields_fence, 6, integer_field, class); + set_value_entry_val(lmt_node_fields_fence, 7, integer_field, source); + set_value_entry_val(lmt_node_fields_fence, 8, node_list_field, top); + set_value_entry_val(lmt_node_fields_fence, 9, node_list_field, bottom); + + lmt_node_fields_fraction = lmt_aux_allocate_value_info(9); + + set_value_entry_val(lmt_node_fields_fraction, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_fraction, 1, dimension_field, width); + set_value_entry_val(lmt_node_fields_fraction, 2, node_list_field, numerator); + set_value_entry_val(lmt_node_fields_fraction, 3, node_list_field, denominator); + set_value_entry_val(lmt_node_fields_fraction, 4, node_list_field, left); + set_value_entry_val(lmt_node_fields_fraction, 5, node_list_field, right); + set_value_entry_val(lmt_node_fields_fraction, 6, node_list_field, middle); + set_value_entry_val(lmt_node_fields_fraction, 7, integer_field, fam); + set_value_entry_val(lmt_node_fields_fraction, 8, integer_field, options); + + lmt_node_fields_glue = lmt_aux_allocate_value_info(8); + + set_value_entry_val(lmt_node_fields_glue, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_glue, 1, node_list_field, leader); + set_value_entry_val(lmt_node_fields_glue, 2, dimension_field, width); + set_value_entry_val(lmt_node_fields_glue, 3, dimension_field, stretch); + set_value_entry_val(lmt_node_fields_glue, 4, dimension_field, shrink); + set_value_entry_val(lmt_node_fields_glue, 5, integer_field, stretchorder); + set_value_entry_val(lmt_node_fields_glue, 6, integer_field, shrinkorder); + set_value_entry_val(lmt_node_fields_glue, 7, integer_field, font); + + lmt_node_fields_glue_spec = lmt_aux_allocate_value_info(5); + + set_value_entry_val(lmt_node_fields_glue_spec, 0, dimension_field, width); + set_value_entry_val(lmt_node_fields_glue_spec, 1, dimension_field, stretch); + set_value_entry_val(lmt_node_fields_glue_spec, 2, dimension_field, shrink); + set_value_entry_val(lmt_node_fields_glue_spec, 3, integer_field, stretchorder); + set_value_entry_val(lmt_node_fields_glue_spec, 4, integer_field, shrinkorder); + + lmt_node_fields_glyph = lmt_aux_allocate_value_info(27); + + set_value_entry_val(lmt_node_fields_glyph, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_glyph, 1, integer_field, char); + set_value_entry_val(lmt_node_fields_glyph, 2, integer_field, font); + set_value_entry_val(lmt_node_fields_glyph, 3, integer_field, language); + set_value_entry_val(lmt_node_fields_glyph, 4, integer_field, lhmin); + set_value_entry_val(lmt_node_fields_glyph, 5, integer_field, rhmin); + set_value_entry_val(lmt_node_fields_glyph, 6, integer_field, uchyph); + set_value_entry_val(lmt_node_fields_glyph, 7, integer_field, state); + set_value_entry_val(lmt_node_fields_glyph, 8, dimension_field, left); + set_value_entry_val(lmt_node_fields_glyph, 9, dimension_field, right); + set_value_entry_val(lmt_node_fields_glyph, 10, dimension_field, xoffset); + set_value_entry_val(lmt_node_fields_glyph, 11, dimension_field, yoffset); + set_value_entry_val(lmt_node_fields_glyph, 12, dimension_field, xscale); + set_value_entry_val(lmt_node_fields_glyph, 13, dimension_field, yscale); + set_value_entry_val(lmt_node_fields_glyph, 14, dimension_field, width); + set_value_entry_val(lmt_node_fields_glyph, 15, dimension_field, height); + set_value_entry_val(lmt_node_fields_glyph, 16, dimension_field, depth); + set_value_entry_val(lmt_node_fields_glyph, 17, dimension_field, total); + set_value_entry_val(lmt_node_fields_glyph, 18, integer_field, expansion); + set_value_entry_val(lmt_node_fields_glyph, 19, integer_field, data); + set_value_entry_val(lmt_node_fields_glyph, 20, integer_field, script); + set_value_entry_val(lmt_node_fields_glyph, 21, integer_field, hyphenate); + set_value_entry_val(lmt_node_fields_glyph, 22, integer_field, options); + set_value_entry_val(lmt_node_fields_glyph, 23, integer_field, protected); + set_value_entry_val(lmt_node_fields_glyph, 24, integer_field, properties); + set_value_entry_val(lmt_node_fields_glyph, 25, integer_field, group); + set_value_entry_val(lmt_node_fields_glyph, 26, integer_field, index); + + lmt_node_fields_insert = lmt_aux_allocate_value_info(6); + + set_value_entry_val(lmt_node_fields_insert, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_insert, 1, integer_field, cost); + set_value_entry_val(lmt_node_fields_insert, 2, dimension_field, depth); + set_value_entry_val(lmt_node_fields_insert, 3, dimension_field, height); + set_value_entry_val(lmt_node_fields_insert, 4, integer_field, spec); + set_value_entry_val(lmt_node_fields_insert, 5, node_list_field, list); + + lmt_node_fields_split = lmt_aux_allocate_value_info(6); + + set_value_entry_val(lmt_node_fields_split, 0, attribute_field, height); + set_value_entry_val(lmt_node_fields_split, 1, integer_field, index); + set_value_entry_val(lmt_node_fields_split, 2, node_field, lastinsert); + set_value_entry_val(lmt_node_fields_split, 3, node_field, bestinsert); + set_value_entry_val(lmt_node_fields_split, 4, integer_field, stretchorder); + set_value_entry_val(lmt_node_fields_split, 5, integer_field, shrinkorder); + + lmt_node_fields_kern = lmt_aux_allocate_value_info(3); + + set_value_entry_val(lmt_node_fields_kern, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_kern, 1, dimension_field, kern); + set_value_entry_val(lmt_node_fields_kern, 2, integer_field, expansion); + + lmt_node_fields_list = lmt_aux_allocate_value_info(20); + + set_value_entry_val(lmt_node_fields_list, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_list, 1, dimension_field, width); + set_value_entry_val(lmt_node_fields_list, 2, dimension_field, depth); + set_value_entry_val(lmt_node_fields_list, 3, dimension_field, height); + set_value_entry_val(lmt_node_fields_list, 4, integer_field, direction); + set_value_entry_val(lmt_node_fields_list, 5, dimension_field, shift); + set_value_entry_val(lmt_node_fields_list, 6, integer_field, glueorder); + set_value_entry_val(lmt_node_fields_list, 7, integer_field, gluesign); + set_value_entry_val(lmt_node_fields_list, 8, integer_field, glueset); + set_value_entry_val(lmt_node_fields_list, 9, node_list_field, list); + set_value_entry_val(lmt_node_fields_list, 10, integer_field, orientation); + set_value_entry_val(lmt_node_fields_list, 11, integer_field, source); + set_value_entry_val(lmt_node_fields_list, 12, integer_field, target); + set_value_entry_val(lmt_node_fields_list, 13, dimension_field, woffset); + set_value_entry_val(lmt_node_fields_list, 14, dimension_field, hoffset); + set_value_entry_val(lmt_node_fields_list, 15, dimension_field, doffset); + set_value_entry_val(lmt_node_fields_list, 16, dimension_field, xoffset); + set_value_entry_val(lmt_node_fields_list, 17, dimension_field, yoffset); + set_value_entry_val(lmt_node_fields_list, 18, integer_field, state); + set_value_entry_val(lmt_node_fields_list, 19, integer_field, class); + + lmt_node_fields_par = lmt_aux_allocate_value_info(9); + set_value_entry_val(lmt_node_fields_par, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_par, 1, integer_field, interlinepenalty); + set_value_entry_val(lmt_node_fields_par, 2, integer_field, brokenpenalty); + set_value_entry_val(lmt_node_fields_par, 3, integer_field, dir); + set_value_entry_val(lmt_node_fields_par, 4, node_field, leftbox); + set_value_entry_val(lmt_node_fields_par, 5, dimension_field, leftboxwidth); + set_value_entry_val(lmt_node_fields_par, 6, node_field, rightbox); + set_value_entry_val(lmt_node_fields_par, 7, dimension_field, rightboxwidth); + set_value_entry_val(lmt_node_fields_par, 8, node_field, middlebox); + + lmt_node_fields_mark = lmt_aux_allocate_value_info(3); + + set_value_entry_val(lmt_node_fields_mark, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_mark, 1, integer_field, class); + set_value_entry_val(lmt_node_fields_mark, 2, token_list_field, mark); + + lmt_node_fields_math = lmt_aux_allocate_value_info(8); + + set_value_entry_val(lmt_node_fields_math, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_math, 1, integer_field, surround); + set_value_entry_val(lmt_node_fields_math, 2, dimension_field, width); + set_value_entry_val(lmt_node_fields_math, 3, dimension_field, stretch); + set_value_entry_val(lmt_node_fields_math, 4, dimension_field, shrink); + set_value_entry_val(lmt_node_fields_math, 5, integer_field, stretchorder); + set_value_entry_val(lmt_node_fields_math, 6, integer_field, shrinkorder); + set_value_entry_val(lmt_node_fields_math, 7, integer_field, penalty); + + lmt_node_fields_math_char = lmt_aux_allocate_value_info(7); + + set_value_entry_val(lmt_node_fields_math_char, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_math_char, 1, integer_field, fam); + set_value_entry_val(lmt_node_fields_math_char, 2, integer_field, char); + set_value_entry_val(lmt_node_fields_math_char, 3, integer_field, options); + set_value_entry_val(lmt_node_fields_math_char, 4, integer_field, properties); + set_value_entry_val(lmt_node_fields_math_char, 5, integer_field, group); + set_value_entry_val(lmt_node_fields_math_char, 6, integer_field, index); + + lmt_node_fields_math_text_char = lmt_aux_allocate_value_info(4); + + set_value_entry_val(lmt_node_fields_math_text_char, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_math_text_char, 1, integer_field, fam); + set_value_entry_val(lmt_node_fields_math_text_char, 2, integer_field, char); + set_value_entry_val(lmt_node_fields_math_text_char, 3, integer_field, options); + + lmt_node_fields_noad = lmt_aux_allocate_value_info(8); + + set_value_entry_val(lmt_node_fields_noad, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_noad, 1, node_list_field, nucleus); + set_value_entry_val(lmt_node_fields_noad, 2, node_list_field, sub); + set_value_entry_val(lmt_node_fields_noad, 3, node_list_field, sup); + set_value_entry_val(lmt_node_fields_noad, 4, node_list_field, subpre); + set_value_entry_val(lmt_node_fields_noad, 5, node_list_field, suppre); + set_value_entry_val(lmt_node_fields_noad, 6, node_list_field, prime); + set_value_entry_val(lmt_node_fields_noad, 7, integer_field, options); + + lmt_node_fields_penalty = lmt_aux_allocate_value_info(2); + + set_value_entry_val(lmt_node_fields_penalty, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_penalty, 1, integer_field, penalty); + + lmt_node_fields_radical = lmt_aux_allocate_value_info(11); + + set_value_entry_val(lmt_node_fields_radical, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_radical, 1, node_list_field, nucleus); + set_value_entry_val(lmt_node_fields_radical, 2, node_list_field, sub); + set_value_entry_val(lmt_node_fields_radical, 3, node_list_field, sup); + set_value_entry_val(lmt_node_fields_radical, 4, node_list_field, presub); + set_value_entry_val(lmt_node_fields_radical, 5, node_list_field, presup); + set_value_entry_val(lmt_node_fields_radical, 6, node_list_field, prime); + set_value_entry_val(lmt_node_fields_radical, 7, node_list_field, left); + set_value_entry_val(lmt_node_fields_radical, 8, node_list_field, degree); + set_value_entry_val(lmt_node_fields_radical, 9, dimension_field, width); + set_value_entry_val(lmt_node_fields_radical, 10, integer_field, options); + + lmt_node_fields_rule = lmt_aux_allocate_value_info(11); + + set_value_entry_val(lmt_node_fields_rule, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_rule, 1, dimension_field, width); + set_value_entry_val(lmt_node_fields_rule, 2, dimension_field, depth); + set_value_entry_val(lmt_node_fields_rule, 3, dimension_field, height); + set_value_entry_val(lmt_node_fields_rule, 4, dimension_field, xoffset); + set_value_entry_val(lmt_node_fields_rule, 5, dimension_field, yoffset); + set_value_entry_val(lmt_node_fields_rule, 6, dimension_field, left); + set_value_entry_val(lmt_node_fields_rule, 7, dimension_field, right); + set_value_entry_val(lmt_node_fields_rule, 8, integer_field, data); + set_value_entry_val(lmt_node_fields_rule, 9, integer_field, char); + set_value_entry_val(lmt_node_fields_rule, 10, integer_field, font); + + lmt_node_fields_style = lmt_aux_allocate_value_info(2); + + set_value_entry_val(lmt_node_fields_style, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_style, 1, integer_field, style); + + lmt_node_fields_parameter = lmt_aux_allocate_value_info(4); + + set_value_entry_val(lmt_node_fields_parameter, 0, integer_field, style); + set_value_entry_val(lmt_node_fields_parameter, 1, integer_field, name); + set_value_entry_val(lmt_node_fields_parameter, 2, integer_field, value); + set_value_entry_val(lmt_node_fields_parameter, 3, node_list_field, list); + + lmt_node_fields_sub_box = lmt_aux_allocate_value_info(2); + + set_value_entry_val(lmt_node_fields_sub_box, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_sub_box, 1, node_list_field, list); + + lmt_node_fields_sub_mlist = lmt_aux_allocate_value_info(2); + + set_value_entry_val(lmt_node_fields_sub_mlist, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_sub_mlist, 1, node_list_field, list); + + lmt_node_fields_unset = lmt_aux_allocate_value_info(11); + + set_value_entry_val(lmt_node_fields_unset, 0, attribute_field, attr); + set_value_entry_val(lmt_node_fields_unset, 1, dimension_field, width); + set_value_entry_val(lmt_node_fields_unset, 2, dimension_field, depth); + set_value_entry_val(lmt_node_fields_unset, 3, dimension_field, height); + set_value_entry_val(lmt_node_fields_unset, 4, integer_field, dir); + set_value_entry_val(lmt_node_fields_unset, 5, dimension_field, shrink); + set_value_entry_val(lmt_node_fields_unset, 6, integer_field, glueorder); + set_value_entry_val(lmt_node_fields_unset, 7, integer_field, gluesign); + set_value_entry_val(lmt_node_fields_unset, 8, dimension_field, stretch); + set_value_entry_val(lmt_node_fields_unset, 9, integer_field, span); + set_value_entry_val(lmt_node_fields_unset, 10, node_list_field, list); + + lmt_node_fields_whatsit = lmt_aux_allocate_value_info(1); + + set_value_entry_val(lmt_node_fields_whatsit, 0, attribute_field, attr); + + lmt_interface.node_data = lmt_memory_malloc((passive_node + 2) * sizeof(node_info)); + + /*tex + We start with the nodes that users can encounter. The order is mostly the one that \TEX\ + uses but we have move some around because we have some more and sometimes a bit different + kind of nodes. You should use abstractions anyway, so numbers mean nothing. In original + \TEX\ there are sometimes tests like |if (foo < kern_node)| but these have been replaces + by switches and (un)equality tests so that the order is not really important. + + Subtypes in nodes and codes in commands sometimes are sort of in sync but don't rely on + that! + */ + + lmt_interface.node_data[hlist_node] = (node_info) { .id = hlist_node, .size = box_node_size, .first = 0, .last = last_list_subtype, .subtypes = subtypes_list, .fields = lmt_node_fields_list, .name = lua_key(hlist), .lua = lua_key_index(hlist), .visible = 1 }; + lmt_interface.node_data[vlist_node] = (node_info) { .id = vlist_node, .size = box_node_size, .first = 0, .last = last_list_subtype, .subtypes = subtypes_list, .fields = lmt_node_fields_list, .name = lua_key(vlist), .lua = lua_key_index(vlist), .visible = 1 }; + lmt_interface.node_data[rule_node] = (node_info) { .id = rule_node, .size = rule_node_size, .first = 0, .last = last_rule_subtype, .subtypes = subtypes_rule, .fields = lmt_node_fields_rule, .name = lua_key(rule), .lua = lua_key_index(rule), .visible = 1 }; + lmt_interface.node_data[insert_node] = (node_info) { .id = insert_node, .size = insert_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_insert, .name = lua_key(insert), .lua = lua_key_index(insert), .visible = 1 }; + lmt_interface.node_data[mark_node] = (node_info) { .id = mark_node, .size = mark_node_size, .first = 0, .last = last_mark_subtype, .subtypes = subtypes_mark, .fields = lmt_node_fields_mark, .name = lua_key(mark), .lua = lua_key_index(mark), .visible = 1 }; + lmt_interface.node_data[adjust_node] = (node_info) { .id = adjust_node, .size = adjust_node_size, .first = 0, .last = last_adjust_subtype, .subtypes = subtypes_adjust, .fields = lmt_node_fields_adjust, .name = lua_key(adjust), .lua = lua_key_index(adjust), .visible = 1 }; + lmt_interface.node_data[boundary_node] = (node_info) { .id = boundary_node, .size = boundary_node_size, .first = 0, .last = last_boundary_subtype, .subtypes = subtypes_boundary, .fields = lmt_node_fields_boundary, .name = lua_key(boundary), .lua = lua_key_index(boundary), .visible = 1 }; + lmt_interface.node_data[disc_node] = (node_info) { .id = disc_node, .size = disc_node_size, .first = 0, .last = last_discretionary_subtype, .subtypes = subtypes_disc, .fields = lmt_node_fields_disc, .name = lua_key(disc), .lua = lua_key_index(disc), .visible = 1 }; + lmt_interface.node_data[whatsit_node] = (node_info) { .id = whatsit_node, .size = whatsit_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_whatsit, .name = lua_key(whatsit), .lua = lua_key_index(whatsit), .visible = 1 }; + lmt_interface.node_data[par_node] = (node_info) { .id = par_node, .size = par_node_size, .first = 0, .last = last_par_subtype, .subtypes = subtypes_par, .fields = lmt_node_fields_par, .name = lua_key(par), .lua = lua_key_index(par), .visible = 1 }; + lmt_interface.node_data[dir_node] = (node_info) { .id = dir_node, .size = dir_node_size, .first = 0, .last = last_dir_subtype, .subtypes = subtypes_dir, .fields = lmt_node_fields_dir, .name = lua_key(dir), .lua = lua_key_index(dir), .visible = 1 }; + lmt_interface.node_data[math_node] = (node_info) { .id = math_node, .size = math_node_size, .first = 0, .last = last_math_subtype, .subtypes = subtypes_math, .fields = lmt_node_fields_math, .name = lua_key(math), .lua = lua_key_index(math), .visible = 1 }; + lmt_interface.node_data[glue_node] = (node_info) { .id = glue_node, .size = glue_node_size, .first = 0, .last = last_glue_subtype, .subtypes = subtypes_glue, .fields = lmt_node_fields_glue, .name = lua_key(glue), .lua = lua_key_index(glue), .visible = 1 }; + lmt_interface.node_data[kern_node] = (node_info) { .id = kern_node, .size = kern_node_size, .first = 0, .last = last_kern_subtype, .subtypes = subtypes_kern, .fields = lmt_node_fields_kern, .name = lua_key(kern), .lua = lua_key_index(kern), .visible = 1 }; + lmt_interface.node_data[penalty_node] = (node_info) { .id = penalty_node, .size = penalty_node_size, .first = 0, .last = last_penalty_subtype, .subtypes = subtypes_penalty, .fields = lmt_node_fields_penalty, .name = lua_key(penalty), .lua = lua_key_index(penalty), .visible = 1 }; + lmt_interface.node_data[style_node] = (node_info) { .id = style_node, .size = style_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_style, .name = lua_key(style), .lua = lua_key_index(style), .visible = 1 }; + lmt_interface.node_data[choice_node] = (node_info) { .id = choice_node, .size = choice_node_size, .first = 0, .last = last_choice_subtype, .subtypes = subtypes_choice, .fields = lmt_node_fields_choice, .name = lua_key(choice), .lua = lua_key_index(choice), .visible = 1 }; + lmt_interface.node_data[parameter_node] = (node_info) { .id = parameter_node, .size = parameter_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_parameter, .name = lua_key(parameter), .lua = lua_key_index(parameter), .visible = 1 }; + lmt_interface.node_data[simple_noad] = (node_info) { .id = simple_noad, .size = noad_size, .first = 0, .last = last_noad_subtype, .subtypes = subtypes_noad, .fields = lmt_node_fields_noad, .name = lua_key(noad), .lua = lua_key_index(noad), .visible = 1 }; + lmt_interface.node_data[radical_noad] = (node_info) { .id = radical_noad, .size = radical_noad_size, .first = 0, .last = last_radical_subtype, .subtypes = subtypes_radical, .fields = lmt_node_fields_radical, .name = lua_key(radical), .lua = lua_key_index(radical), .visible = 1 }; + lmt_interface.node_data[fraction_noad] = (node_info) { .id = fraction_noad, .size = fraction_noad_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_fraction, .name = lua_key(fraction), .lua = lua_key_index(fraction), .visible = 1 }; + lmt_interface.node_data[accent_noad] = (node_info) { .id = accent_noad, .size = accent_noad_size, .first = 0, .last = last_accent_subtype, .subtypes = subtypes_accent, .fields = lmt_node_fields_accent, .name = lua_key(accent), .lua = lua_key_index(accent), .visible = 1 }; + lmt_interface.node_data[fence_noad] = (node_info) { .id = fence_noad, .size = fence_noad_size, .first = 0, .last = last_fence_subtype, .subtypes = subtypes_fence, .fields = lmt_node_fields_fence, .name = lua_key(fence), .lua = lua_key_index(fence), .visible = 1 }; + lmt_interface.node_data[math_char_node] = (node_info) { .id = math_char_node, .size = math_kernel_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_math_char, .name = lua_key(mathchar), .lua = lua_key_index(mathchar), .visible = 1 }; + lmt_interface.node_data[math_text_char_node] = (node_info) { .id = math_text_char_node, .size = math_kernel_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_math_text_char, .name = lua_key(mathtextchar), .lua = lua_key_index(mathtextchar), .visible = 1 }; + lmt_interface.node_data[sub_box_node] = (node_info) { .id = sub_box_node, .size = math_kernel_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_sub_box, .name = lua_key(subbox), .lua = lua_key_index(subbox), .visible = 1 }; + lmt_interface.node_data[sub_mlist_node] = (node_info) { .id = sub_mlist_node, .size = math_kernel_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_sub_mlist, .name = lua_key(submlist), .lua = lua_key_index(submlist), .visible = 1 }; + lmt_interface.node_data[delimiter_node] = (node_info) { .id = delimiter_node, .size = math_delimiter_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_delimiter, .name = lua_key(delimiter), .lua = lua_key_index(delimiter), .visible = 1 }; + lmt_interface.node_data[glyph_node] = (node_info) { .id = glyph_node, .size = glyph_node_size, .first = 0, .last = last_glyph_subtype, .subtypes = subtypes_glyph, .fields = lmt_node_fields_glyph, .name = lua_key(glyph), .lua = lua_key_index(glyph), .visible = 1 }; + + /*tex + Who knows when someone needs is, so for now we keep it exposed. + */ + + lmt_interface.node_data[unset_node] = (node_info) { .id = unset_node, .size = box_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_unset, .name = lua_key(unset), .lua = lua_key_index(unset), .visible = 1 }; + lmt_interface.node_data[specification_node] = (node_info) { .id = specification_node, .size = specification_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(specification), .lua = lua_key_index(specification), .visible = 0 }; + lmt_interface.node_data[align_record_node] = (node_info) { .id = align_record_node, .size = box_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_unset, .name = lua_key(alignrecord), .lua = lua_key_index(alignrecord), .visible = 1 }; + + /*tex + These nodes never show up in nodelists and are managed special. Messing with such nodes + directly is not a good idea. + */ + + lmt_interface.node_data[attribute_node] = (node_info) { .id = attribute_node, .size = attribute_node_size, .first = 0, .last = last_attribute_subtype, .subtypes = subtypes_attribute,.fields = lmt_node_fields_attribute, .name = lua_key(attribute), .lua = lua_key_index(attribute), .visible = 1 }; + + /* + We still expose the glue spec as they are the containers for skip registers but there is no + real need to use them at the user end. + */ + + lmt_interface.node_data[glue_spec_node] = (node_info) { .id = glue_spec_node, .size = glue_spec_size, .first = 0, .last = 0, .subtypes = NULL, .fields = lmt_node_fields_glue_spec, .name = lua_key(gluespec), .lua = lua_key_index(gluespec), .visible = 1 }; + + /*tex + This one sometimes shows up, especially when we temporarily need an alternative head pointer, + simply because we want to retain some head in case the original head is replaced. + */ + + lmt_interface.node_data[temp_node] = (node_info) { .id = temp_node, .size = temp_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(temp), .lua = lua_key_index(temp), .visible = 1 }; + + /*tex + The split nodes are used for insertions. + */ + + lmt_interface.node_data[split_node] = (node_info) { .id = split_node, .size = split_node_size, .first = 0, .last = last_split_subtype, .subtypes = subtypes_split, .fields = lmt_node_fields_split, .name = lua_key(split), .lua = lua_key_index(split), .visible = 1 }; + + /*tex + The following nodes are not meant for users. They are used internally for different purposes + and you should not encounter them in node lists. As with many nodes, they often are + allocated using fast methods so they never show up in the new, copy and flush handlers. + */ + + lmt_interface.node_data[expression_node] = (node_info) { .id = expression_node, .size = expression_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(expression), .lua = lua_key_index(expression), .visible = 0 }; + lmt_interface.node_data[math_spec_node] = (node_info) { .id = math_spec_node, .size = math_spec_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(mathspec), .lua = lua_key_index(mathspec), .visible = 0 }; + lmt_interface.node_data[font_spec_node] = (node_info) { .id = font_spec_node, .size = font_spec_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(fontspec), .lua = lua_key_index(fontspec), .visible = 0 }; + lmt_interface.node_data[nesting_node] = (node_info) { .id = nesting_node, .size = nesting_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(nestedlist), .lua = lua_key_index(nestedlist), .visible = 0 }; + lmt_interface.node_data[span_node] = (node_info) { .id = span_node, .size = span_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(span), .lua = lua_key_index(span), .visible = 0 }; + lmt_interface.node_data[align_stack_node] = (node_info) { .id = align_stack_node, .size = align_stack_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(alignstack), .lua = lua_key_index(alignstack), .visible = 0 }; + lmt_interface.node_data[noad_state_node] = (node_info) { .id = noad_state_node, .size = noad_state_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(noadstate), .lua = lua_key_index(noadstate), .visible = 0 }; + lmt_interface.node_data[if_node] = (node_info) { .id = if_node, .size = if_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(ifstack), .lua = lua_key_index(ifstack), .visible = 0 }; + lmt_interface.node_data[unhyphenated_node] = (node_info) { .id = unhyphenated_node, .size = active_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(unhyphenated), .lua = lua_key_index(unhyphenated), .visible = 0 }; + lmt_interface.node_data[hyphenated_node] = (node_info) { .id = hyphenated_node, .size = active_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(hyphenated), .lua = lua_key_index(hyphenated), .visible = 0 }; + lmt_interface.node_data[delta_node] = (node_info) { .id = delta_node, .size = delta_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(delta), .lua = lua_key_index(delta), .visible = 0 }; + lmt_interface.node_data[passive_node] = (node_info) { .id = passive_node, .size = passive_node_size, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = lua_key(passive), .lua = lua_key_index(passive), .visible = 0 }; + lmt_interface.node_data[passive_node + 1] = (node_info) { .id = -1, .size = -1, .first = 0, .last = 0, .subtypes = NULL, .fields = NULL, .name = NULL, .lua = 0, .visible = 0 }; + +} + +/*tex + + When we copy a node list, there are several possibilities: we do the same as a new node, we + copy the entry to the table in properties (a reference), we do a deep copy of a table in the + properties, we create a new table and give it the original one as a metatable. After some + experiments (that also included timing) with these scenarios I decided that a deep copy made no + sense, nor did nilling. In the end both the shallow copy and the metatable variant were both + ok, although the second ons is slower. The most important aspect to keep in mind is that + references to other nodes in properties no longer can be valid for that copy. We could use two + tables (one unique and one shared) or metatables but that only complicates matters. + + When defining a new node, we could already allocate a table but it is rather easy to do that at + the lua end e.g. using a metatable __index method. That way it is under macro package control. + + When deleting a node, we could keep the slot (e.g. setting it to false) but it could make + memory consumption raise unneeded when we have temporary large node lists and after that only + small lists. + + So, in the end this is what we ended up with. For the record, I also experimented with the + following: + + \startitemize + + \startitem + Copy attributes to the properties so that we have fast access at the \LUA\ end: in the + end the overhead is not compensated by speed and convenience, in fact, attributes are + not that slow when it comes to accessing them. + \stopitem + + \startitem + A bitset in the node but again the gain compared to attributes is neglectable and it + also demands a pretty string agreement over what bit represents what, and this is + unlikely to succeed in the tex community (I could use it for font handling, which is + cross package, but decided that it doesn't pay off. + \stopitem + + \stopitemize + + In case one wonders why properties make sense then, well, it is not so much speed that we gain, + but more convenience: storing all kind of (temporary) data in attributes is no fun and this + mechanism makes sure that properties are cleaned up when a node is freed. Also, the advantage + of a more or less global properties table is that we stay at the \LUA\ end. An alternative is + to store a reference in the node itself but that is complicated by the fact that the register + has some limitations (no numeric keys) and we also don't want to mess with it too much. + + We keep track of nesting so that we don't overflow the stack, and, what is more important, + don't keep resolving the registry index. + + We could add an index field to each node and use that one. But then we'd have to default to + false. It actually would look nicer in tracing: indices instead of pseudo memory slots. It + would not boost performance. A table like this is never really collected. + +*/ + +inline static void lmt_properties_push(lua_State * L) +{ + lmt_node_memory_state.lua_properties_level++ ; + if (lmt_node_memory_state.lua_properties_level == 1) { + lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_node_memory_state.node_properties_id); + } +} + +inline static void lmt_properties_pop(lua_State * L) +{ + if (lmt_node_memory_state.lua_properties_level == 1) { + lua_pop(L, 1); + } + lmt_node_memory_state.lua_properties_level-- ; +} + +/*tex Resetting boils down to nilling. */ + +inline static void lmt_properties_reset(lua_State * L, halfword target) +{ + if (lmt_node_memory_state.lua_properties_level == 0) { + lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_node_memory_state.node_properties_id); + lua_pushnil(L); + lua_rawseti(L, -2, target); + lua_pop(L, 1); + } else { + lua_pushnil(L); + lua_rawseti(L, -2, target); + } +} + +inline static void lmt_properties_copy(lua_State *L, halfword target, halfword source) +{ + if (lmt_node_memory_state.lua_properties_level == 0) { + lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_node_memory_state.node_properties_id); + } + /* properties */ + if (lua_rawgeti(L, -1, source) == LUA_TTABLE) { + /* properties source */ + lua_createtable(L, 0, 1); + /* properties source {} */ + lua_insert(L, -2); + /* properties {} source */ + lua_push_key(__index); + /* properties {} source "__index" */ + lua_insert(L, -2); + /* properties {} "__index" source */ + lua_rawset(L, -3); + /* properties {__index=source} */ + lua_createtable(L, 0, 1); + /* properties {__index=source} {} */ + lua_insert(L, -2); + /* properties {} {__index=source} */ + lua_setmetatable(L, -2); + /* properties {}->{__index=source} */ + lua_rawseti(L, -2, target); + /* properties[target]={}->{__index=source} */ + } else { + /* properties nil */ + lua_pop(L, 1); + } + /* properties */ + if (lmt_node_memory_state.lua_properties_level == 0) { + lua_pop(L, 1); + } +} + +/*tex The public one: */ + +void tex_reset_node_properties(halfword b) +{ + if (b) { + lmt_properties_reset(lmt_lua_state.lua_instance, b); + } +} + +/*tex Here end the property handlers. */ + +static void tex_aux_node_range_test(halfword a, halfword b) +{ + if (b < 0 || b >= lmt_node_memory_state.nodes_data.allocated) { + tex_formatted_error("nodes", "node range test failed in %s node", lmt_interface.node_data[node_type(a)].name); + } +} + +/*tex + + Because of the 5-10\% overhead that \SYNTEX\ creates some options have been implemented + controlled by |synctex_anyway_mode|. + + \startabulate + \NC \type {1} \NC all but glyphs \NC \NR + \NC \type {2} \NC also glyphs \NC \NR + \NC \type {3} \NC glyphs and glue \NC \NR + \NC \type {4} \NC only glyphs \NC \NR + \stoptabulate + +*/ + +/*tex |if_stack| is called a lot so maybe optimize that one. */ + +/*tex This needs a cleanup ... there is no need to store the pointer location itself. */ + +inline static void tex_aux_preset_disc_node(halfword n) +{ + disc_pre_break(n) = disc_pre_break_node(n); + disc_post_break(n) = disc_post_break_node(n); + disc_no_break(n) = disc_no_break_node(n); + node_type(disc_pre_break(n)) = nesting_node; + node_type(disc_post_break(n)) = nesting_node; + node_type(disc_no_break(n)) = nesting_node; + node_subtype(disc_pre_break(n)) = pre_break_code; + node_subtype(disc_post_break(n)) = post_break_code; + node_subtype(disc_no_break(n)) = no_break_code; +} + +inline static void tex_aux_preset_node(halfword n, quarterword t) +{ + switch (t) { + case glyph_node: + break; + case hlist_node: + case vlist_node: + box_dir(n) = direction_unknown; + break; + case disc_node: + tex_aux_preset_disc_node(n); + break; + case rule_node: + rule_width(n) = null_flag; + rule_depth(n) = null_flag; + rule_height(n) = null_flag; + rule_data(n) = 0; + break; + case unset_node: + box_width(n) = null_flag; + break; + case specification_node: + tex_null_specification_list(n); + break; + case simple_noad: + case radical_noad: + case fraction_noad: + case accent_noad: + case fence_noad: + noad_family(n) = unused_math_family; + noad_style(n) = unused_math_style; + reset_noad_classes(n); /* unsets them */ + break; + } +} + +halfword tex_new_node(quarterword i, quarterword j) +{ + halfword s = get_node_size(i); + halfword n = tex_get_node(s); + + /*tex + + Both type() and subtype() will be set below, and node_next() is set to null by |get_node()|, + so we can clear one word less than |s|. + + */ + + memset((void *) (lmt_node_memory_state.nodes + n + 1), 0, (sizeof(memoryword) * ((size_t) s - 1))); + + if (tex_nodetype_is_complex(i)) { + tex_aux_preset_node(n, i); + if (input_file_state.mode > 0) { + /*tex See table above. */ + switch (i) { + case glyph_node: + if (input_file_state.mode > 1) { + glyph_input_file(n) = input_file_value(); + glyph_input_line(n) = input_line_value(); + } + break; + case hlist_node: + case vlist_node: + case unset_node: + box_input_file(n) = input_file_value(); + box_input_line(n) = input_line_value(); + break; + } + } + if (tex_nodetype_has_attributes(i)) { + attach_current_attribute_list(n); + } + } + /* last */ + node_type(n) = i; + node_subtype(n) = j; + return n; +} + +halfword tex_new_temp_node(void) +{ + halfword n = tex_get_node(temp_node_size); + node_type(n) = temp_node; + node_subtype(n) = 0; + memset((void *) (lmt_node_memory_state.nodes + n + 1), 0, (sizeof(memoryword) * (temp_node_size - 1))); + return n; +} + +static halfword tex_aux_new_glyph_node_with_attributes(halfword parent) +{ + halfword n = tex_get_node(glyph_node_size); + memset((void *) (lmt_node_memory_state.nodes + n + 1), 0, (sizeof(memoryword) * (glyph_node_size - 1))); + if (input_file_state.mode > 1) { + glyph_input_file(n) = input_file_value(); + glyph_input_line(n) = input_line_value(); + } + node_type(n) = glyph_node; + node_subtype(n) = glyph_unset_subtype; + if (parent) { + tex_attach_attribute_list_copy(n, parent); + } else { + attach_current_attribute_list(n); + } + return n; +} + +/*tex + This makes a duplicate of the node list that starts at |p| and returns a pointer to the new + list. +*/ + +halfword tex_copy_node_list(halfword p, halfword end) +{ + /*tex head of the list */ + halfword h = null; + /*tex previous position in new list */ + halfword q = null; + /*tex saves stack and time */ + lua_State *L = lmt_lua_state.lua_instance; + lmt_properties_push(L); + while (p != end) { + halfword s = tex_copy_node(p); + if (h) { + tex_couple_nodes(q, s); + } else { + h = s; + } + q = s; + p = node_next(p); + } + /*tex saves stack and time */ + lmt_properties_pop(L); + return h; +} + +/*tex Make a dupe of a single node. */ + +halfword tex_copy_node_only(halfword p) +{ + quarterword t = node_type(p); + int s = get_node_size(t); + halfword r = tex_get_node(s); + memcpy((void *) (lmt_node_memory_state.nodes + r), (void *) (lmt_node_memory_state.nodes + p), (sizeof(memoryword) )); + memset((void *) (lmt_node_memory_state.nodes + r + 1), 0, (sizeof(memoryword) * ((unsigned) s - 1))); + tex_aux_preset_node(r, t); + return r; +} + +/*tex + We really need to use macros here as we need the temporary variable because varmem can be + reallocated! We cross our fingers that the compiler doesn't optimize that one away. (The test + suite had a few cases where reallocation during a copy happens.) We can make |copy_stub| + local here. + */ + +# define copy_sub_list(target,source) do { \ + if (source) { \ + halfword copy_stub = tex_copy_node_list(source, null); \ + target = copy_stub; \ + } else { \ + target = null; \ + } \ + } while (0) + +# define copy_sub_node(target,source) do { \ + if (source) { \ + halfword copy_stub = tex_copy_node(source); \ + target = copy_stub ; \ + } else { \ + target = null; \ + } \ +} while (0) + +halfword tex_copy_node(halfword p) +{ + /*tex + We really need a stub for copying because mem might move in the meantime due to resizing! + */ + if (p < 0 || p >= lmt_node_memory_state.nodes_data.allocated) { + return tex_formatted_error("nodes", "attempt to copy an impossible node %d", (int) p); + } else if (p > lmt_node_memory_state.reserved && lmt_node_memory_state.nodesizes[p] == 0) { + return tex_formatted_error("nodes", "attempt to copy a free %s node %d", get_node_name(node_type(p)), (int) p); + } else { + /*tex type of node */ + halfword t = node_type(p); + int i = get_node_size(t); + /*tex current node being fabricated for new list */ + halfword r = tex_get_node(i); + /*tex this saves work */ + memcpy((void *) (lmt_node_memory_state.nodes + r), (void *) (lmt_node_memory_state.nodes + p), (sizeof(memoryword) * (unsigned) i)); + if (tex_nodetype_is_complex(i)) { + // halfword copy_stub; + if (tex_nodetype_has_attributes(t)) { + add_attribute_reference(node_attr(p)); + node_prev(r) = null; + lmt_properties_copy(lmt_lua_state.lua_instance, r, p); + } + node_next(r) = null; + switch (t) { + case glue_node: + copy_sub_list(glue_leader_ptr(r), glue_leader_ptr(p)); + break; + case hlist_node: + copy_sub_list(box_pre_adjusted(r), box_pre_adjusted(p)); + copy_sub_list(box_post_adjusted(r), box_post_adjusted(p)); + // fall through + case vlist_node: + copy_sub_list(box_pre_migrated(r), box_pre_migrated(p)); + copy_sub_list(box_post_migrated(r), box_post_migrated(p)); + // fall through + case unset_node: + copy_sub_list(box_list(r), box_list(p)); + break; + case disc_node: + disc_pre_break(r) = disc_pre_break_node(r); + if (disc_pre_break_head(p)) { + tex_set_disc_field(r, pre_break_code, tex_copy_node_list(disc_pre_break_head(p), null)); + } else { + tex_set_disc_field(r, pre_break_code, null); + } + disc_post_break(r) = disc_post_break_node(r); + if (disc_post_break_head(p)) { + tex_set_disc_field(r, post_break_code, tex_copy_node_list(disc_post_break_head(p), null)); + } else { + tex_set_disc_field(r, post_break_code, null); + } + disc_no_break(r) = disc_no_break_node(r); + if (disc_no_break_head(p)) { + tex_set_disc_field(r, no_break_code, tex_copy_node_list(disc_no_break_head(p), null)); + } else { + tex_set_disc_field(r, no_break_code, null); + } + break; + case insert_node: + copy_sub_list(insert_list(r), insert_list(p)) ; + break; + case mark_node: + tex_add_token_reference(mark_ptr(p)); + break; + case adjust_node: + copy_sub_list(adjust_list(r), adjust_list(p)); + break; + case choice_node: + copy_sub_list(choice_display_mlist(r), choice_display_mlist(p)) ; + copy_sub_list(choice_text_mlist(r), choice_text_mlist(p)) ; + copy_sub_list(choice_script_mlist(r), choice_script_mlist(p)) ; + copy_sub_list(choice_script_script_mlist(r), choice_script_script_mlist(p)) ; + break; + case simple_noad: + case radical_noad: + case fraction_noad: + case accent_noad: + copy_sub_list(noad_nucleus(r), noad_nucleus(p)) ; + copy_sub_list(noad_subscr(r), noad_subscr(p)) ; + copy_sub_list(noad_supscr(r), noad_supscr(p)) ; + copy_sub_list(noad_subprescr(r), noad_subprescr(p)) ; + copy_sub_list(noad_supprescr(r), noad_supprescr(p)) ; + copy_sub_list(noad_prime(r), noad_prime(p)) ; + copy_sub_list(noad_state(r), noad_state(p)) ; + switch (t) { + case radical_noad: + copy_sub_node(radical_left_delimiter(r), radical_left_delimiter(p)) ; + copy_sub_node(radical_right_delimiter(r), radical_right_delimiter(p)) ; + copy_sub_list(radical_degree(r), radical_degree(p)) ; + break; + case fraction_noad: + // copy_sub_list(fraction_numerator(r), fraction_numerator(p)) ; + // copy_sub_list(fraction_denominator(r), fraction_denominator(p)) ; + copy_sub_node(fraction_left_delimiter(r), fraction_left_delimiter(p)) ; + copy_sub_node(fraction_right_delimiter(r), fraction_right_delimiter(p)) ; + copy_sub_node(fraction_middle_delimiter(r), fraction_middle_delimiter(p)) ; + break; + case accent_noad: + copy_sub_list(accent_top_character(r), accent_top_character(p)) ; + copy_sub_list(accent_bottom_character(r), accent_bottom_character(p)) ; + copy_sub_list(accent_middle_character(r), accent_middle_character(p)) ; + break; + } + break; + case fence_noad: + /* in principle also scripts */ + copy_sub_node(fence_delimiter_list(r), fence_delimiter_list(p)) ; + copy_sub_node(fence_delimiter_top(r), fence_delimiter_top(p)) ; + copy_sub_node(fence_delimiter_bottom(r), fence_delimiter_bottom(p)) ; + break; + case sub_box_node: + case sub_mlist_node: + copy_sub_list(kernel_math_list(r), kernel_math_list(p)) ; + break; + case par_node: + /* can also be copy_sub_node */ + copy_sub_list(par_box_left(r), par_box_left(p)); + copy_sub_list(par_box_right(r), par_box_right(p)); + copy_sub_list(par_box_middle(r), par_box_middle(p)); + /* wipe copied fields */ + par_left_skip(r) = null; + par_right_skip(r) = null; + par_par_fill_left_skip(r) = null; + par_par_fill_right_skip(r) = null; + par_par_init_left_skip(r) = null; + par_par_init_right_skip(r) = null; + par_baseline_skip(r) = null; + par_line_skip(r) = null; + par_par_shape(r) = null; + par_inter_line_penalties(r) = null; + par_club_penalties(r) = null; + par_widow_penalties(r) = null; + par_display_widow_penalties(r) = null; + par_orphan_penalties(r) = null; + /* really copy fields */ + tex_set_par_par(r, par_left_skip_code, tex_get_par_par(p, par_left_skip_code), 1); + tex_set_par_par(r, par_right_skip_code, tex_get_par_par(p, par_right_skip_code), 1); + tex_set_par_par(r, par_par_fill_left_skip_code, tex_get_par_par(p, par_par_fill_left_skip_code), 1); + tex_set_par_par(r, par_par_fill_right_skip_code, tex_get_par_par(p, par_par_fill_right_skip_code), 1); + tex_set_par_par(r, par_par_init_left_skip_code, tex_get_par_par(p, par_par_init_left_skip_code), 1); + tex_set_par_par(r, par_par_init_right_skip_code, tex_get_par_par(p, par_par_init_right_skip_code), 1); + tex_set_par_par(r, par_baseline_skip_code, tex_get_par_par(p, par_baseline_skip_code), 1); + tex_set_par_par(r, par_line_skip_code, tex_get_par_par(p, par_line_skip_code), 1); + tex_set_par_par(r, par_par_shape_code, tex_get_par_par(p, par_par_shape_code), 1); + tex_set_par_par(r, par_inter_line_penalties_code, tex_get_par_par(p, par_inter_line_penalties_code), 1); + tex_set_par_par(r, par_club_penalties_code, tex_get_par_par(p, par_club_penalties_code), 1); + tex_set_par_par(r, par_widow_penalties_code, tex_get_par_par(p, par_widow_penalties_code), 1); + tex_set_par_par(r, par_display_widow_penalties_code, tex_get_par_par(p, par_display_widow_penalties_code), 1); + tex_set_par_par(r, par_orphan_penalties_code, tex_get_par_par(p, par_orphan_penalties_code), 1); + /* tokens, we could mess with a ref count instead */ + par_end_par_tokens(r) = par_end_par_tokens(p); + tex_add_token_reference(par_end_par_tokens(p)); + break; + case specification_node: + tex_copy_specification_list(r, p); + break; + default: + break; + } + } + return r; + } +} + +inline static void tex_aux_free_sub_node_list(halfword source) +{ + if (source) { + tex_flush_node_list(source); + } +} + +inline static void tex_aux_free_sub_node(halfword source) +{ + if (source) { + tex_flush_node(source); + } +} + +/* We don't need the checking for attributes if we make these lists frozen. */ + +void tex_flush_node(halfword p) +{ + if (! p) { + /*tex legal, but no-op. */ + return; + } else if (p <= lmt_node_memory_state.reserved || p >= lmt_node_memory_state.nodes_data.allocated) { + tex_formatted_error("nodes", "attempt to free an impossible node %d of type %d", (int) p, node_type(p)); + } else if (lmt_node_memory_state.nodesizes[p] == 0) { + for (int i = (lmt_node_memory_state.reserved + 1); i < lmt_node_memory_state.nodes_data.allocated; i++) { + if (lmt_node_memory_state.nodesizes[i] > 0) { + tex_aux_check_node(i); + } + } + tex_formatted_error("nodes", "attempt to double-free %s node %d, ignored", get_node_name(node_type(p)), (int) p); + } else { + int t = node_type(p); + if (tex_nodetype_is_complex(t)) { + switch (t) { + case glue_node: + tex_aux_free_sub_node_list(glue_leader_ptr(p)); + break; + case hlist_node: + tex_aux_free_sub_node_list(box_pre_adjusted(p)); + tex_aux_free_sub_node_list(box_post_adjusted(p)); + // fall through + case vlist_node: + tex_aux_free_sub_node_list(box_pre_migrated(p)); + tex_aux_free_sub_node_list(box_post_migrated(p)); + // fall through + case unset_node: + tex_aux_free_sub_node_list(box_list(p)); + break; + case disc_node: + /*tex Watch the start at temp node hack! */ + tex_aux_free_sub_node_list(disc_pre_break_head(p)); + tex_aux_free_sub_node_list(disc_post_break_head(p)); + tex_aux_free_sub_node_list(disc_no_break_head(p)); + break; + case par_node: + tex_aux_free_sub_node_list(par_box_left(p)); + tex_aux_free_sub_node_list(par_box_right(p)); + tex_aux_free_sub_node_list(par_box_middle(p)); + /* we could check for the flag */ + tex_flush_node(par_left_skip(p)); + tex_flush_node(par_right_skip(p)); + tex_flush_node(par_par_fill_left_skip(p)); + tex_flush_node(par_par_fill_right_skip(p)); + tex_flush_node(par_par_init_left_skip(p)); + tex_flush_node(par_par_init_right_skip(p)); + tex_flush_node(par_baseline_skip(p)); + tex_flush_node(par_line_skip(p)); + tex_flush_node(par_par_shape(p)); + tex_flush_node(par_club_penalties(p)); + tex_flush_node(par_inter_line_penalties(p)); + tex_flush_node(par_widow_penalties(p)); + tex_flush_node(par_display_widow_penalties(p)); + tex_flush_node(par_orphan_penalties(p)); + /* tokens */ + tex_flush_token_list(par_end_par_tokens(p)); + break; + case insert_node: + tex_flush_node_list(insert_list(p)); + break; + case mark_node: + tex_delete_token_reference(mark_ptr(p)); + break; + case adjust_node: + tex_flush_node_list(adjust_list(p)); + break; + case choice_node: + tex_aux_free_sub_node_list(choice_display_mlist(p)); + tex_aux_free_sub_node_list(choice_text_mlist(p)); + tex_aux_free_sub_node_list(choice_script_mlist(p)); + tex_aux_free_sub_node_list(choice_script_script_mlist(p)); + break; + case simple_noad: + case fraction_noad: + case radical_noad: + case accent_noad: + tex_aux_free_sub_node_list(noad_nucleus(p)); + tex_aux_free_sub_node_list(noad_subscr(p)); + tex_aux_free_sub_node_list(noad_supscr(p)); + tex_aux_free_sub_node_list(noad_subprescr(p)); + tex_aux_free_sub_node_list(noad_supprescr(p)); + tex_aux_free_sub_node_list(noad_prime(p)); + tex_aux_free_sub_node_list(noad_state(p)); + switch (t) { + case fraction_noad: + // tex_aux_free_sub_node_list(fraction_numerator(p)); + // tex_aux_free_sub_node_list(fraction_denominator(p)); + tex_aux_free_sub_node(fraction_left_delimiter(p)); + tex_aux_free_sub_node(fraction_right_delimiter(p)); + tex_aux_free_sub_node(fraction_middle_delimiter(p)); + break; + case radical_noad: + tex_aux_free_sub_node(radical_left_delimiter(p)); + tex_aux_free_sub_node(radical_right_delimiter(p)); + tex_aux_free_sub_node_list(radical_degree(p)); + break; + case accent_noad: + tex_aux_free_sub_node_list(accent_top_character(p)); + tex_aux_free_sub_node_list(accent_bottom_character(p)); + tex_aux_free_sub_node_list(accent_middle_character(p)); + break; + } + break; + case fence_noad: + tex_aux_free_sub_node_list(fence_delimiter_list(p)); + tex_aux_free_sub_node_list(fence_delimiter_top(p)); + tex_aux_free_sub_node_list(fence_delimiter_bottom(p)); + break; + case sub_box_node: + case sub_mlist_node: + tex_aux_free_sub_node_list(kernel_math_list(p)); + break; + case specification_node: + tex_dispose_specification_list(p); + break; + default: + break; + } + if (tex_nodetype_has_attributes(t)) { + delete_attribute_reference(node_attr(p)); + node_attr(p) = null; /* when we debug */ + lmt_properties_reset(lmt_lua_state.lua_instance, p); + } + } + tex_free_node(p, get_node_size(t)); + } +} + +/*tex Erase the list of nodes starting at |pp|. */ + +void tex_flush_node_list(halfword l) +{ + if (! l) { + /*tex Legal, but no-op. */ + return; + } else if (l <= lmt_node_memory_state.reserved || l >= lmt_node_memory_state.nodes_data.allocated) { + tex_formatted_error("nodes", "attempt to free an impossible node list %d of type %d", (int) l, node_type(l)); + } else if (lmt_node_memory_state.nodesizes[l] == 0) { + for (int i = (lmt_node_memory_state.reserved + 1); i < lmt_node_memory_state.nodes_data.allocated; i++) { + if (lmt_node_memory_state.nodesizes[i] > 0) { + tex_aux_check_node(i); + } + } + tex_formatted_error("nodes", "attempt to double-free %s node %d, ignored", get_node_name(node_type(l)), (int) l); + } else { + /*tex Saves stack and time. */ + lua_State *L = lmt_lua_state.lua_instance; + lmt_properties_push(L); + while (l) { + halfword nxt = node_next(l); + tex_flush_node(l); + l = nxt; + } + /*tex Saves stack and time. */ + lmt_properties_pop(L); + } +} + +static void tex_aux_check_node(halfword p) +{ + halfword t = node_type(p); + switch (t) { + case glue_node: + tex_aux_node_range_test(p, glue_leader_ptr(p)); + break; + case hlist_node: + tex_aux_node_range_test(p, box_pre_adjusted(p)); + tex_aux_node_range_test(p, box_post_adjusted(p)); + // fall through + case vlist_node: + tex_aux_node_range_test(p, box_pre_migrated(p)); + tex_aux_node_range_test(p, box_post_migrated(p)); + // fall through + case unset_node: + case align_record_node: + tex_aux_node_range_test(p, box_list(p)); + break; + case insert_node: + tex_aux_node_range_test(p, insert_list(p)); + break; + case disc_node: + tex_aux_node_range_test(p, disc_pre_break_head(p)); + tex_aux_node_range_test(p, disc_post_break_head(p)); + tex_aux_node_range_test(p, disc_no_break_head(p)); + break; + case adjust_node: + tex_aux_node_range_test(p, adjust_list(p)); + break; + case choice_node: + tex_aux_node_range_test(p, choice_display_mlist(p)); + tex_aux_node_range_test(p, choice_text_mlist(p)); + tex_aux_node_range_test(p, choice_script_mlist(p)); + tex_aux_node_range_test(p, choice_script_script_mlist(p)); + break; + case simple_noad: + case radical_noad: + case fraction_noad: + case accent_noad: + tex_aux_node_range_test(p, noad_nucleus(p)); + tex_aux_node_range_test(p, noad_subscr(p)); + tex_aux_node_range_test(p, noad_supscr(p)); + tex_aux_node_range_test(p, noad_subprescr(p)); + tex_aux_node_range_test(p, noad_supprescr(p)); + tex_aux_node_range_test(p, noad_prime(p)); + tex_aux_node_range_test(p, noad_state(p)); + switch (t) { + case radical_noad: + tex_aux_node_range_test(p, radical_degree(p)); + tex_aux_node_range_test(p, radical_left_delimiter(p)); + tex_aux_node_range_test(p, radical_right_delimiter(p)); + break; + case fraction_noad: + // tex_aux_node_range_test(p, fraction_numerator(p)); + // tex_aux_node_range_test(p, fraction_denominator(p)); + tex_aux_node_range_test(p, fraction_left_delimiter(p)); + tex_aux_node_range_test(p, fraction_right_delimiter(p)); + tex_aux_node_range_test(p, fraction_middle_delimiter(p)); + break; + case accent_noad: + tex_aux_node_range_test(p, accent_top_character(p)); + tex_aux_node_range_test(p, accent_bottom_character(p)); + tex_aux_node_range_test(p, accent_middle_character(p)); + break; + } + break; + case fence_noad: + tex_aux_node_range_test(p, fence_delimiter_list(p)); + tex_aux_node_range_test(p, fence_delimiter_top(p)); + tex_aux_node_range_test(p, fence_delimiter_bottom(p)); + break; + case par_node: + tex_aux_node_range_test(p, par_box_left(p)); + tex_aux_node_range_test(p, par_box_right(p)); + tex_aux_node_range_test(p, par_box_middle(p)); + tex_aux_node_range_test(p, par_left_skip(p)); + tex_aux_node_range_test(p, par_right_skip(p)); + tex_aux_node_range_test(p, par_baseline_skip(p)); + tex_aux_node_range_test(p, par_line_skip(p)); + tex_aux_node_range_test(p, par_par_shape(p)); + tex_aux_node_range_test(p, par_club_penalties(p)); + tex_aux_node_range_test(p, par_inter_line_penalties(p)); + tex_aux_node_range_test(p, par_widow_penalties(p)); + tex_aux_node_range_test(p, par_display_widow_penalties(p)); + tex_aux_node_range_test(p, par_orphan_penalties(p)); + tex_aux_node_range_test(p, par_par_fill_left_skip(p)); + tex_aux_node_range_test(p, par_par_fill_right_skip(p)); + tex_aux_node_range_test(p, par_par_init_left_skip(p)); + tex_aux_node_range_test(p, par_par_init_right_skip(p)); + break; + default: + break; + } +} + +/* +halfword fix_node_list(halfword head) +{ + if (head) { + halfword tail = head; + halfword next = node_next(head); + while (next) { + node_prev(next) = tail; + tail = next; + next = node_next(tail); + } + return tail; + } else { + return null; + } +} +*/ + +halfword tex_get_node(int size) +{ + if (size < max_chain_size) { + halfword p = lmt_node_memory_state.free_chain[size]; + if (p) { + lmt_node_memory_state.free_chain[size] = node_next(p); + lmt_node_memory_state.nodesizes[p] = (char) size; + node_next(p) = null; + lmt_node_memory_state.nodes_data.ptr += size; + return p; + } else { + return tex_aux_allocated_node(size); + } + } else { + return tex_normal_error("nodes", "there is a problem in getting a node, case 1"); + } +} + +void tex_free_node(halfword p, int size) /* no need to pass size, we can get is here */ +{ + if (p > lmt_node_memory_state.reserved && size < max_chain_size) { + lmt_node_memory_state.nodesizes[p] = 0; + node_next(p) = lmt_node_memory_state.free_chain[size]; + lmt_node_memory_state.free_chain[size] = p; + lmt_node_memory_state.nodes_data.ptr -= size; + } else { + tex_formatted_error("nodes", "node number %d of type %d with size %d should not be freed", (int) p, node_type(p), size); + } +} + +/*tex + + At the start of the node memory area we reserve some special nodes, for instance frequently + used glue specifications. We could as well just use new_glue here but for the moment we stick + to the traditional approach. We can omit the zeroing because it's already done. + +*/ + +static void tex_aux_initialize_glue(halfword n, scaled wi, scaled st, scaled sh, halfword sto, halfword sho) +{ + // memset((void *) (node_memory_state.nodes + n), 0, (sizeof(memoryword) * node_memory_state.nodesizes[glue_spec_node])); + node_type(n) = glue_spec_node; + glue_amount(n) = wi; + glue_stretch(n) = st; + glue_shrink(n) = sh; + glue_stretch_order(n) = sto; + glue_shrink_order(n) = sho; +} + +static void tex_aux_initialize_whatever_node(halfword n, quarterword t) +{ + // memset((void *) (node_memory_state.nodes + n), 0, (sizeof(memoryword) * node_memory_state.nodesizes[t])); + node_type(n) = t; +} + +static void tex_aux_initialize_character(halfword n, halfword chr) +{ + // memset((void *) (node_memory_state.nodes + n), 0, (sizeof(memoryword) * node_memory_state.nodesizes[glyph_node])); + node_type(n) = glyph_node; + glyph_character(n) = chr; +} +# define reserved_node_slots 32 + +void tex_initialize_node_mem() +{ + memoryword *nodes = NULL; + char *sizes = NULL; + int size = 0; + if (lmt_main_state.run_state == initializing_state) { + size = lmt_node_memory_state.nodes_data.minimum; + lmt_node_memory_state.reserved = last_reserved; + lmt_node_memory_state.nodes_data.top = last_reserved + 1; + lmt_node_memory_state.nodes_data.allocated = size; + lmt_node_memory_state.nodes_data.ptr = last_reserved; + } else { + size = lmt_node_memory_state.nodes_data.allocated; + lmt_node_memory_state.nodes_data.initial = lmt_node_memory_state.nodes_data.ptr; + } + if (size >0) { + nodes = aux_allocate_clear_array(sizeof(memoryword), size, reserved_node_slots); + sizes = aux_allocate_clear_array(sizeof(char), size, reserved_node_slots); + } + if (nodes && sizes) { + lmt_node_memory_state.nodes = nodes; + lmt_node_memory_state.nodesizes = sizes; + } else { + tex_overflow_error("nodes", size); + } +} + +void tex_initialize_nodes(void) +{ + if (lmt_main_state.run_state == initializing_state) { + /*tex Initialize static glue specs. */ + + tex_aux_initialize_glue(zero_glue, 0, 0, 0, 0, 0); + tex_aux_initialize_glue(fi_glue, 0, 0, 0, fi_glue_order, 0); + tex_aux_initialize_glue(fil_glue, 0, unity, 0, fil_glue_order, 0); + tex_aux_initialize_glue(fill_glue, 0, unity, 0, fill_glue_order, 0); + tex_aux_initialize_glue(filll_glue, 0, unity, unity, fil_glue_order, fil_glue_order); + tex_aux_initialize_glue(fil_neg_glue, 0, -unity, 0, fil_glue_order, 0); + + /*tex Initialize node list heads. */ + + tex_aux_initialize_whatever_node(page_insert_head, temp_node); /* actually a split node */ + tex_aux_initialize_whatever_node(contribute_head, temp_node); + tex_aux_initialize_whatever_node(page_head, temp_node); + tex_aux_initialize_whatever_node(temp_head, temp_node); + tex_aux_initialize_whatever_node(hold_head, temp_node); + tex_aux_initialize_whatever_node(post_adjust_head, temp_node); + tex_aux_initialize_whatever_node(pre_adjust_head, temp_node); + tex_aux_initialize_whatever_node(post_migrate_head, temp_node); + tex_aux_initialize_whatever_node(pre_migrate_head, temp_node); + tex_aux_initialize_whatever_node(align_head, temp_node); + tex_aux_initialize_whatever_node(active_head, unhyphenated_node); + tex_aux_initialize_whatever_node(end_span, span_node); + + tex_aux_initialize_character(begin_period, '.'); + tex_aux_initialize_character(end_period, '.'); + } +} + +void tex_dump_node_mem(dumpstream f) +{ + dump_int(f, lmt_node_memory_state.nodes_data.allocated); + dump_int(f, lmt_node_memory_state.nodes_data.top); + dump_things(f, lmt_node_memory_state.nodes[0], (size_t) lmt_node_memory_state.nodes_data.top + 1); + dump_things(f, lmt_node_memory_state.nodesizes[0], lmt_node_memory_state.nodes_data.top); + dump_things(f, lmt_node_memory_state.free_chain[0], max_chain_size); + dump_int(f, lmt_node_memory_state.nodes_data.ptr); + dump_int(f, lmt_node_memory_state.reserved); +} + +/*tex + Node memory is (currently) also used for some stack related nodes. Using dedicated arrays instead + makes sense but on the other hand this is the charm of \TEX. Variable nodes are no longer using + the node pool so we don't need clever code to reclaim space. We have plenty anyway. +*/ + +void tex_undump_node_mem(dumpstream f) // todo: check allocation +{ + undump_int(f, lmt_node_memory_state.nodes_data.allocated); + undump_int(f, lmt_node_memory_state.nodes_data.top); + tex_initialize_node_mem(); + undump_things(f, lmt_node_memory_state.nodes[0], (size_t) lmt_node_memory_state.nodes_data.top + 1); + undump_things(f, lmt_node_memory_state.nodesizes[0], (size_t) lmt_node_memory_state.nodes_data.top); + undump_things(f, lmt_node_memory_state.free_chain[0], max_chain_size); + undump_int(f, lmt_node_memory_state.nodes_data.ptr); + undump_int(f, lmt_node_memory_state.reserved); +} + +static halfword tex_aux_allocated_node(int s) +{ + int old = lmt_node_memory_state.nodes_data.top; + int new = old + s; + if (new > lmt_node_memory_state.nodes_data.allocated) { + if (lmt_node_memory_state.nodes_data.allocated + lmt_node_memory_state.nodes_data.step <= lmt_node_memory_state.nodes_data.size) { + memoryword *nodes = aux_reallocate_array(lmt_node_memory_state.nodes, sizeof(memoryword), lmt_node_memory_state.nodes_data.allocated + lmt_node_memory_state.nodes_data.step, reserved_node_slots); + char *sizes = aux_reallocate_array(lmt_node_memory_state.nodesizes, sizeof(char), lmt_node_memory_state.nodes_data.allocated + lmt_node_memory_state.nodes_data.step, reserved_node_slots); + if (nodes && sizes) { + lmt_node_memory_state.nodes = nodes; + lmt_node_memory_state.nodesizes = sizes; + memset((void *) (nodes + lmt_node_memory_state.nodes_data.allocated), 0, (size_t) lmt_node_memory_state.nodes_data.step * sizeof(memoryword)); + memset((void *) (sizes + lmt_node_memory_state.nodes_data.allocated), 0, (size_t) lmt_node_memory_state.nodes_data.step * sizeof(char)); + lmt_node_memory_state.nodes_data.allocated += lmt_node_memory_state.nodes_data.step; + lmt_run_memory_callback("node", 1); + } else { + lmt_run_memory_callback("node", 0); + tex_overflow_error("node memory size", lmt_node_memory_state.nodes_data.size); + } + } + if (new > lmt_node_memory_state.nodes_data.allocated) { + tex_overflow_error("node memory size", lmt_node_memory_state.nodes_data.size); + } + } + /* We allocate way larger than the maximum size. */ + // printf("old=%i size=%i new=%i\n",old,s,new); + lmt_node_memory_state.nodesizes[old] = (char) s; + lmt_node_memory_state.nodes_data.top = new; + return old; +} + +int tex_n_of_used_nodes(int counts[]) +{ + int n = 0; + for (int i = 0; i < max_node_type; i++) { + counts[i] = 0; + } + for (int i = lmt_node_memory_state.nodes_data.top; i > lmt_node_memory_state.reserved; i--) { + if (lmt_node_memory_state.nodesizes[i] > 0 && (node_type(i) <= max_node_type)) { + counts[node_type(i)] += 1; + } + } + for (int i = 0; i < max_node_type; i++) { + n += counts[i]; + } + return n; +} + +halfword tex_list_node_mem_usage(void) +{ + char *saved_varmem_sizes = aux_allocate_array(sizeof(char), lmt_node_memory_state.nodes_data.allocated, 1); + if (saved_varmem_sizes) { + halfword q = null; + halfword p = null; + memcpy(saved_varmem_sizes, lmt_node_memory_state.nodesizes, (size_t) lmt_node_memory_state.nodes_data.allocated); + for (halfword i = lmt_node_memory_state.reserved + 1; i < (lmt_node_memory_state.nodes_data.allocated - 1); i++) { + if (saved_varmem_sizes[i] > 0) { + halfword j = tex_copy_node(i); + if (p) { + node_next(p) = j; + } else { + q = j; + } + p = j; + } + } + aux_deallocate_array(saved_varmem_sizes); + return q; + } else { + return null; + } +} + +/* + Now comes some attribute stuff. We could have a fast allocator for them and a dedicated pool + (actually for each node tyep I guess). +*/ + +inline static halfword tex_aux_new_attribute_list_node(halfword count) +{ + halfword r = tex_get_node(attribute_node_size); + node_type(r) = attribute_node; + node_subtype(r) = attribute_list_subtype; + attribute_unset(r) = 0; + attribute_count(r) = count; + return r; +} + +inline static halfword tex_aux_new_attribute_node(halfword index, int value) +{ + halfword r = tex_get_node(attribute_node_size); + node_type(r) = attribute_node; + node_subtype(r) = attribute_value_subtype; + attribute_index(r) = index; + attribute_value(r) = value; + return r; +} + +inline static halfword tex_aux_copy_attribute_node(halfword n) +{ + halfword a = tex_get_node(attribute_node_size); + memcpy((void *) (lmt_node_memory_state.nodes + a), (void *) (lmt_node_memory_state.nodes + n), (sizeof(memoryword) * attribute_node_size)); + return a; +} + +halfword tex_copy_attribute_list(halfword a_old) +{ + if (a_old && a_old != attribute_cache_disabled) { + halfword a_new = tex_aux_new_attribute_list_node(0); + halfword p_old = a_old; + halfword p_new = a_new; + p_old = node_next(p_old); + while (p_old) { + halfword a = tex_copy_node(p_old); + node_next(p_new) = a; + p_new = a; + p_old = node_next(p_old); + } + node_next(p_new) = null; + return a_new; + } else { + return a_old; + } +} + +halfword tex_copy_attribute_list_set(halfword a_old, int index, int value) +{ + halfword a_new = tex_aux_new_attribute_list_node(0); + halfword p_new = a_new; + int done = 0; + if (a_old && a_old != attribute_cache_disabled) { + halfword p_old = node_next(a_old); + while (p_old) { + halfword i = attribute_index(p_old); + if (! done && i >= index) { + if (value != unused_attribute_value) { + halfword a = tex_aux_new_attribute_node(index, value); + node_next(p_new) = a; + p_new = a; + } + done = 1; + if (i == index) { + goto CONTINUE; + } + } + /* APPEND: */ + { + halfword a = tex_aux_copy_attribute_node(p_old); + node_next(p_new) = a; + p_new = a; + } + CONTINUE: + p_old = node_next(p_old); + } + node_next(p_new) = null; + } + if (! done && value != unused_attribute_value) { + halfword b = tex_aux_new_attribute_node(index, value); + node_next(p_new) = b; + } + return a_new; +} + +static void tex_aux_update_attribute_cache(void) +{ + halfword p = tex_aux_new_attribute_list_node(0); + set_current_attribute_state(p); + for (int i = 0; i <= lmt_node_memory_state.max_used_attribute; i++) { + int v = attribute_register(i); + if (v > unused_attribute_value) { + halfword r = tex_aux_new_attribute_node(i, v); + node_next(p) = r; + p = r; + } + } + if (! node_next(current_attribute_state)) { + tex_free_node(current_attribute_state, attribute_node_size); + set_current_attribute_state(null); + } else { + add_attribute_reference(current_attribute_state); + } +} + +void tex_build_attribute_list(halfword target) +{ + if (lmt_node_memory_state.max_used_attribute >= 0) { + if (! current_attribute_state || current_attribute_state == attribute_cache_disabled) { + tex_aux_update_attribute_cache(); + if (! current_attribute_state) { + return; + } + } + add_attribute_reference(current_attribute_state); + /*tex Checking for validity happens before the call; the subtype can be unset (yet). */ + node_attr(target) = current_attribute_state; + } +} + +halfword tex_current_attribute_list(void) +{ + if (lmt_node_memory_state.max_used_attribute >= 0) { + if (! current_attribute_state || current_attribute_state == attribute_cache_disabled) { + tex_aux_update_attribute_cache(); + } + return current_attribute_state; + } else { + return null ; + } +} + +/*tex + + There can be some gain in setting |attr_last_unset_enabled| but only when a lot of unsetting + happens with rather long attribute lists, which actually is rare. + + One tricky aspect if attributes is that when we test for a list head being the same, we have + the problem that freeing and (re)allocating can result in the same node address. Flushing in + reverse order sort of prevents that. + +*/ + +void tex_dereference_attribute_list(halfword a) +{ + if (a && a != attribute_cache_disabled) { + if (node_type(a) == attribute_node && node_subtype(a) == attribute_list_subtype){ + if (attribute_count(a) > 0) { + --attribute_count(a); + if (attribute_count(a) == 0) { + if (a == current_attribute_state) { + set_current_attribute_state(attribute_cache_disabled); + } + { + int u = 0; + /* this works (different order) */ + while (a) { + halfword n = node_next(a); + lmt_node_memory_state.nodesizes[a] = 0; + node_next(a) = lmt_node_memory_state.free_chain[attribute_node_size]; + lmt_node_memory_state.free_chain[attribute_node_size] = a; + ++u; + a = n; + } + /* this doesn't always (which is weird) */ + // halfword h = a; + // halfword t = a; + // while (a) { + // lmt_node_memory_state.nodesizes[a] = 0; + // ++u; + // t = a; + // a = node_next(a); + // } + // node_next(t) = lmt_node_memory_state.free_chain[attribute_node_size]; + // lmt_node_memory_state.free_chain[attribute_node_size] = h; + /* */ + lmt_node_memory_state.nodes_data.ptr -= u * attribute_node_size; + } + } + } else { + tex_formatted_error("nodes", "zero referenced attribute list %i", a); + } + } else { + tex_formatted_error("nodes", "trying to delete an attribute reference of a non attribute list node %i (%i)", a, node_type(a)); + } + } +} + +/*tex + Here |p| is an attr list head, or zero. This one works on a copy, so we can overwrite a value! +*/ + +halfword tex_patch_attribute_list(halfword list, int index, int value) +{ + if (list == attribute_cache_disabled) { + return list; + } else if (list) { + halfword current = node_next(list); + halfword previous = list; + while (current) { + int i = attribute_index(current); + if (i == index) { + /*tex Replace: */ + attribute_value(current) = value; + return list; + } else if (i > index) { + /*tex Prepend: */ + halfword r = tex_aux_new_attribute_node(index, value); + node_next(previous) = r; + node_next(r) = current; + return list; + } else { + previous = current; + current = node_next(current); + } + } + { + /*tex Append: */ + halfword r = tex_aux_new_attribute_node(index, value); + node_next(r) = node_next(previous); + node_next(previous) = r; + } + } else { + /*tex Watch out, we don't set a ref count, this branch is not seen anyway. */ + halfword r = tex_aux_new_attribute_node(index, value); + list = tex_aux_new_attribute_list_node(0); + node_next(list) = r; + } + return list; +} + +/* todo: combine set and unset */ + +void tex_set_attribute(halfword target, int index, int value) +{ + /*tex Not all nodes can have an attribute list. */ + if (tex_nodetype_has_attributes(node_type(target))) { + if (value == unused_attribute_value) { + tex_unset_attribute(target, index, value); + } else { + /*tex If we have no list, we create one and quit. */ + halfword a = node_attr(target); + /* needs checking: can we get an empty one here indeed, the vlink test case ... */ + if (a) { + halfword p = node_next(a); + while (p) { + int i = attribute_index(p); + if (i == index) { + if (attribute_value(p) == value) { + return; + } else { + break; + } + } else if (i > index) { + break; + } else { + p = node_next(p); + } + } + // p = tex_copy_attribute_list_set(a, index, value); + // tex_attach_attribute_list_attribute(target, p); + // } else { + // halfword p = tex_copy_attribute_list_set(null, index, value); + // tex_attach_attribute_list_attribute(target, p); + // } + } + a = tex_copy_attribute_list_set(a, index, value); + tex_attach_attribute_list_attribute(target, a); + } + } +} + +int tex_unset_attribute(halfword target, int index, int value) +{ + if (tex_nodetype_has_attributes(node_type(target))) { + halfword p = node_attr(target); + if (p) { + halfword c = node_next(p); + while (c) { + halfword i = attribute_index(c); + if (i == index) { + halfword v = attribute_value(c); + if (v != value) { + halfword l = tex_copy_attribute_list_set(p, index, value); + tex_attach_attribute_list_attribute(target, l); + } + return v; + } else if (i > index) { + return unused_attribute_value; + } + c = node_next(c); + } + } + } + return unused_attribute_value; +} + +void tex_unset_attributes(halfword first, halfword last, int index) +{ + halfword a = null; + halfword q = null; + halfword n = first; + while (n) { + if (tex_nodetype_has_attributes(node_type(n))) { + halfword p = node_attr(n); + if (p) { + if (p == q) { + tex_attach_attribute_list_attribute(n, a); + } else { + halfword c = node_next(p); + while (c) { + halfword i = attribute_index(c); + if (i == index) { + q = p; + a = tex_copy_attribute_list_set(p, index, unused_attribute_value); /* check */ + tex_attach_attribute_list_attribute(n, a); + break; + } else if (i > index) { + break; + } + c = node_next(c); + } + } + } + } + if (n == last) { + break; + } else { + n = node_next(n); + } + } +} + +int tex_has_attribute(halfword n, int index, int value) +{ + if (tex_nodetype_has_attributes(node_type(n))) { + halfword p = node_attr(n); + if (p) { + p = node_next(p); + while (p) { + if (attribute_index(p) == index) { + int v = attribute_value(p); + if (value == v || value == unused_attribute_value) { + return v; + } else { + return unused_attribute_value; + } + } else if (attribute_index(p) > index) { + return unused_attribute_value; + } + p = node_next(p); + } + } + } + return unused_attribute_value; +} + +/*tex + Because we have more detail available we provide node names and show a space when we have one. + The disc nodes are also more granular. I might drop the font in showing glyph nodes. A previous + version used full node types inside brackets but we now collapse the node types and use only + the first character of the type. Eventually I might come up with some variants. + */ + +void tex_print_short_node_contents(halfword p) +{ + int collapsing = 0; + while (p) { + switch (node_type(p)) { + case rule_node: + if (collapsing) { tex_print_char(']'); collapsing = 0; } + tex_print_char('|'); + break; + case glue_node: + switch (node_subtype(p)) { + case space_skip_glue: + case xspace_skip_glue: + case zero_space_skip_glue: + if (collapsing) { tex_print_char(']'); collapsing = 0; } + tex_print_char(' '); + break; + default: + goto DEFAULT; + } + break; + case math_node: + if (collapsing) { tex_print_char(']'); collapsing = 0; } + tex_print_char('$'); + break; + case disc_node: + if (collapsing) { tex_print_char(']'); collapsing = 0; } + tex_print_str("[["); + tex_print_short_node_contents(disc_pre_break_head(p)); + tex_print_str("]["); + tex_print_short_node_contents(disc_post_break_head(p)); + tex_print_str("]["); + tex_print_short_node_contents(disc_no_break_head(p)); + tex_print_str("]]"); + break; + case dir_node: + if (collapsing) { tex_print_char(']'); collapsing = 0; } + if (node_subtype(p) == cancel_dir_subtype) { + tex_print_str(" >"); + } else { + tex_print_str(dir_direction(p) ? "<r2l " : "<l2r "); + } + break; + case glyph_node: + if (collapsing) { tex_print_char(']'); collapsing = 0; } + if (glyph_font(p) != lmt_print_state.font_in_short_display) { + tex_print_font_identifier(glyph_font(p)); + tex_print_char(' '); + lmt_print_state.font_in_short_display = glyph_font(p); + } + tex_print_tex_str(glyph_character(p)); + break; + case par_node: + if (collapsing) { tex_print_char(']'); collapsing = 0; } + tex_print_str(par_dir(p) ? "<r2l p>" : "<l2r p>"); + break; + default: + DEFAULT: + if (! collapsing) { + tex_print_char('['); + collapsing = 1; + } + tex_print_char(lmt_interface.node_data[node_type(p)].name[0]); + break; + } + p = node_next(p); + } + if (collapsing) { + tex_print_char(']'); + } +} + +/*tex + + Now we are ready for |show_node_list| itself. This procedure has been written to be \quote + {extra robust} in the sense that it should not crash or get into a loop even if the data + structures have been messed up by bugs in the rest of the program. You can safely call its + parent routine |show_box(p)| for arbitrary values of |p| when you are debugging \TEX. However, + in the presence of bad data, the procedure may fetch a |memoryword| whose variant is different + from the way it was stored; for example, it might try to read |mem[p].hh| when |mem[p]| + contains a scaled integer, if |p| is a pointer that has been clobbered or chosen at random. + +*/ + +void tex_print_node_list(halfword p, const char *what, int threshold, int max) +{ + if (p) { + if (what) { + tex_append_char('.'); + tex_append_char('.'); + tex_print_levels(); + tex_print_current_string(); + tex_print_str_esc(what); + } else { + /*tex This happens in math. */ + } + tex_append_char('.'); + tex_append_char('.'); + tex_show_node_list(p, threshold, max); // show_box_depth_par, show_box_breadth_par + tex_flush_char(); + tex_flush_char(); + if (what) { + tex_flush_char(); + tex_flush_char(); + } + } +} + +/*tex + + Print a node list symbolically. This one is adaped to the fact that we have a bit more + granularity in subtypes and some more fields. It is therefore not compatible with traditional + \TEX. This is work in progress. I will also normalize some subtype names so ... + +*/ + +static void tex_aux_show_attr_list(halfword p) +{ + p = node_attr(p); + if (p) { + int callback_id = lmt_callback_defined(get_attribute_callback); + if (tracing_nodes_par > 1) { + tex_print_format("<%i#%i>", p, attribute_count(p)); + } + tex_print_char('['); + p = node_next(p); + while (p) { + halfword k = attribute_index(p); + halfword v = attribute_value(p); + if (callback_id) { + strnumber u = tex_save_cur_string(); + char *ks = NULL; + char *vs = NULL; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "dd->RR", k, v, &ks, &vs); + tex_restore_cur_string(u); + if (ks) { + tex_print_str(ks); + lmt_memory_free(ks); + } else { + tex_print_int(k); + } + tex_print_char('='); + if (vs) { + tex_print_str(vs); + lmt_memory_free(vs); + } else { + tex_print_int(v); + } + } else { + tex_print_int(k); + tex_print_char('='); + tex_print_int(v); + }; + p = node_next(p); + if (p) { + tex_print_char(','); + } + } + tex_print_char(']'); + } +} + +void tex_print_name(halfword n, const char* what) +{ + tex_print_str_esc(what); + if (tracing_nodes_par > 0) { + tex_print_char('<'); + tex_print_int(n); + tex_print_char('>'); + } +} + +static void tex_aux_print_subtype_and_attributes_str(halfword p, const char *n) +{ + if (show_node_details_par > 0) { + tex_print_char('['); + tex_print_str(n); + tex_print_char(']'); + } + if (show_node_details_par > 1 && tex_nodetype_has_attributes(node_type(p))) { + tex_aux_show_attr_list(p); + } +} + +void tex_print_extended_subtype(halfword p, quarterword s) +{ + halfword st = s; + switch (p ? node_type(p) : simple_noad) { + case hlist_node: + if (s > noad_class_list_base) { + st -= noad_class_list_base; + } + case simple_noad: + case math_char_node: + { + int callback_id = lmt_callback_defined(get_noad_class_callback); + if (callback_id) { + strnumber u = tex_save_cur_string(); + char *v = NULL; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "d->R", st, &v); + tex_restore_cur_string(u); + if (v) { + if (p && node_type(p) == hlist_node) { + tex_print_str("math"); + } + tex_print_str(v); + lmt_memory_free(v); + break; + } + } + /* fall through */ + } + break; + default: + tex_print_int(s); + break; + } +} + +static void tex_print_subtype_and_attributes_info(halfword p, quarterword s, node_info *data) +{ + if (show_node_details_par > 0) { + tex_print_char('['); + if (data && data->subtypes && s >= data->first && s <= data->last) { + tex_print_str(data->subtypes[s].name); + } else { + tex_print_extended_subtype(p, s); + } + tex_print_char(']'); + } + if (show_node_details_par > 1 && tex_nodetype_has_attributes(node_type(p))) { + tex_aux_show_attr_list(p); + } +} + +static void tex_print_node_and_details(halfword p) +{ + halfword type = node_type(p); + quarterword subtype = node_subtype(p); + tex_print_name(p, lmt_interface.node_data[type].name); + switch (type) { + case temp_node: + case whatsit_node: + return; + } + tex_print_subtype_and_attributes_info(p, subtype, &lmt_interface.node_data[type]); +} + +static void tex_aux_print_subtype_and_attributes_int(halfword p, halfword n) +{ + if (show_node_details_par > 0) { \ + tex_print_char('['); + tex_print_int(n); + tex_print_char(']'); + } + if (show_node_details_par > 1 && tex_nodetype_has_attributes(node_type(p))) { + tex_aux_show_attr_list(p); + } +} + +const char *tex_aux_subtype_str(halfword n) +{ + if (n) { + node_info *data = &lmt_interface.node_data[node_type(n)]; + if (data && data->subtypes && node_subtype(n) >= data->first && node_subtype(n) <= data->last) { + return data->subtypes[node_subtype(n)].name; + } + } + return ""; +} + +/*tex + + We're not downward compatible here and it might even evolve a bit (and maybe I'll add a + compability mode too). We have way more information and plenty of log space so there is no + need to be compact. Consider it work in progress. + + I admit that there is some self interest here in adding more detail. At some point (around + ctx 2019) I needed to see attribute values in the trace so I added that option which in turn + made me reformat the output a bit. Of course it makes sense to have the whole show be a + callback (and I might actually do that) but on the other hand it's so integral to \TEX\ that + it doesn't add much and in all the years that \LUATEX| is now arround I never really needed + it anyway. + + One option is to go completely |\node[key=value,key={value,value}]| here as that can be easily + parsed. It's to be decided. + + What is the string pool char data used for here? + + Per version 2.09.22 we use the values from the node definitions which is more consistent and + also makes the binary somewhat smaller. It's all in the details. It's a typical example of + a change doen when we're stabel for a while (as it influences tracing). + +*/ + +void tex_print_specnode(halfword v, int unit) +{ + if (tracing_nodes_par > 2) { + tex_print_format("<%i>", v); + } + tex_print_spec(v, unit); +} + +void tex_aux_show_dictionary(halfword p, halfword properties, halfword group, halfword index,halfword font, halfword character) +{ + int callback_id = lmt_callback_defined(get_math_dictionary_callback); + if (callback_id) { + strnumber u = tex_save_cur_string(); + char *s = NULL; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Nddddd->R", p, properties, group, index, font, character, &s); + tex_restore_cur_string(u); + if (s) { + tex_print_str(", "); + tex_print_str(s); + lmt_memory_free(s); + return; + } + } + if (properties) { + tex_print_str(", properties "); + tex_print_qhex(properties); + } + if (group) { + tex_print_str(", group "); + tex_print_qhex(group); + } + if (index) { + tex_print_str(", index "); + tex_print_qhex(index); + } +} + +void tex_show_node_list(halfword p, int threshold, int max) +{ + if ((int) lmt_string_pool_state.string_temp_top > threshold) { + if (p > null) { + /*tex Indicate that there's been some truncation. */ + tex_print_format("[tracing depth %i reached]", threshold); + } + return; + } else { + /*tex The number of items already printed at this level: */ + int n = 0; + if (max <= 0) { + max = 5; + } + while (p) { + tex_print_levels(); + tex_print_current_string(); + ++n; + if (n > max) { + /*tex Time to stop. */ + halfword t = tex_tail_of_node_list(p); + if (t == p) { + /*tex We've showed the whole list. */ + return; + } else if (p == node_prev(t)) { + // /*tex We're just before the end. */ + } else { + tex_print_format("[tracing breadth %i reached]", max); + return; + } + } + tex_print_node_and_details(p); + switch (node_type(p)) { + case glyph_node: + if (show_node_details_par > 0) { + scaledwhd whd = tex_char_whd_from_glyph(p); + if (glyph_protected(p)) { + tex_print_str(", protected"); + } + /* effective */ + if (whd.wd) { + tex_print_str(", wd "); + tex_print_dimension(whd.wd, pt_unit); + } + if (whd.ht) { + tex_print_str(", ht "); + tex_print_dimension(whd.ht, pt_unit); + } + if (whd.dp) { + tex_print_str(", dp "); + tex_print_dimension(whd.dp, pt_unit); + } + if (whd.ic) { + tex_print_str(", ic "); + tex_print_dimension(whd.ic, pt_unit); + } + /* */ + if (get_glyph_language(p)) { + tex_print_str(", language (n="); + tex_print_int(get_glyph_language(p)); + tex_print_str(",l="); + tex_print_int(get_glyph_lhmin(p)); + tex_print_str(",r="); + tex_print_int(get_glyph_rhmin(p)); + tex_print_char(')'); + } + if (get_glyph_script(p)) { + tex_print_str(", script "); + tex_print_int(get_glyph_script(p)); + } + if (get_glyph_hyphenate(p)) { + tex_print_str(", hyphenationmode "); + tex_print_qhex(get_glyph_hyphenate(p)); + } + if (glyph_x_offset(p)) { + tex_print_str(", xoffset "); + tex_print_dimension(glyph_x_offset(p), pt_unit); + } + if (glyph_y_offset(p)) { + tex_print_str(", yoffset "); + tex_print_dimension(glyph_y_offset(p), pt_unit); + } + if (glyph_left(p)) { + tex_print_str(", left "); + tex_print_dimension(glyph_left(p), pt_unit); + } + if (glyph_right(p)) { + tex_print_str(", right "); + tex_print_dimension(glyph_right(p), pt_unit); + } + if (glyph_raise(p)) { + tex_print_str(", raise "); + tex_print_dimension(glyph_raise(p), pt_unit); + } + if (glyph_expansion(p)) { + tex_print_str(", expansion "); + tex_print_int(glyph_expansion(p)); + } + if (glyph_scale(p) && glyph_scale(p) != 1000) { + tex_print_str(", scale "); + tex_print_int(glyph_scale(p)); + } + if (glyph_x_scale(p) && glyph_x_scale(p) != 1000) { + tex_print_str(", xscale "); + tex_print_int(glyph_x_scale(p)); + } + if (glyph_y_scale(p) && glyph_y_scale(p) != 1000) { + tex_print_str(", yscale "); + tex_print_int(glyph_y_scale(p)); + } + if (glyph_data(p)) { + tex_print_str(", data "); + tex_print_int(glyph_data(p)); + } + if (glyph_state(p)) { + tex_print_str(", state "); + tex_print_int(glyph_state(p)); + } + if (glyph_options(p)) { + tex_print_str(", options "); + tex_print_qhex(glyph_options(p)); + } + if (glyph_discpart(p)) { + tex_print_str(", discpart "); + tex_print_int(glyph_discpart(p)); + } + tex_aux_show_dictionary(p, glyph_properties(p), glyph_group(p), glyph_index(p), glyph_font(p), glyph_character(p)); + } + tex_print_str(", font "); + /* this could be a callback */ + tex_print_font_identifier(glyph_font(p)); /* for now consistent with others, might change */ + tex_print_str(", glyph "); + tex_print_char_identifier(glyph_character(p)); + break; + case hlist_node: + case vlist_node: + case unset_node: + /*tex Display box |p|. */ + if (box_width(p)) { + tex_print_str(", width "); + tex_print_dimension(box_width(p), pt_unit); + } + if (box_height(p)) { + tex_print_str(", height "); + tex_print_dimension(box_height(p), pt_unit); + } + if (box_depth(p)) { + tex_print_str(", depth "); + tex_print_dimension(box_depth(p), pt_unit); + } + if (node_type(p) == unset_node) { + /*tex Display special fields of the unset node |p|. */ + if (box_span_count(p)) { + tex_print_str(", columns "); + tex_print_int(box_span_count(p) + 1); + } + if (box_glue_stretch(p)) { + tex_print_str(", stretch "); + tex_print_glue(box_glue_stretch(p), box_glue_order(p), no_unit); + } + if (box_glue_shrink(p)) { + tex_print_str(", shrink "); + tex_print_glue(box_glue_shrink(p), box_glue_sign(p), no_unit); + } + } else { + /*tex + + Display the value of |glue_set(p)|. The code will have to change in + this place if |glue_ratio| is a structured type instead of an + ordinary |real|. Note that this routine should avoid arithmetic + errors even if the |glue_set| field holds an arbitrary random value. + The following code assumes that a properly formed nonzero |real| + number has absolute value $2^{20}$ or more when it is regarded as an + integer; this precaution was adequate to prevent floating point + underflow on the author's computer. + + */ + double g = (double) (box_glue_set(p)); + if ((g != 0.0) && (box_glue_sign(p) != normal_glue_sign)) { + tex_print_str(", glue "); /*tex This was |glue set|. */ + if (box_glue_sign(p) == shrinking_glue_sign) { + tex_print_str("- "); + } + if (g > 20000.0 || g < -20000.0) { + if (g > 0.0) { + tex_print_char('>'); + } else { + tex_print_str("< -"); + } + tex_print_glue(20000 * unity, box_glue_order(p), no_unit); + } else { + tex_print_glue((scaled) glueround(unity *g), box_glue_order(p), no_unit); + } + } + if (box_shift_amount(p) != 0) { + tex_print_str(", shifted "); + tex_print_dimension(box_shift_amount(p), pt_unit); + } + if (valid_direction(box_dir(p))) { + tex_print_str(", direction "); + switch (box_dir(p)) { + case 0 : tex_print_str("l2r"); break; + case 1 : tex_print_str("r2l"); break; + default : tex_print_str("unset"); break; + } + } + if (box_geometry(p)) { + tex_print_str(", geometry "); + tex_print_qhex(box_geometry(p)); + if (tex_has_box_geometry(p, orientation_geometry)) { + tex_print_str(", orientation "); + tex_print_qhex(box_orientation(p)); + } + if (tex_has_box_geometry(p, offset_geometry)) { + tex_print_str(", offset("); + tex_print_dimension(box_x_offset(p), pt_unit); + tex_print_char(','); + tex_print_dimension(box_y_offset(p), pt_unit); + tex_print_char(')'); + } + if (tex_has_box_geometry(p, anchor_geometry)) { + if (box_anchor(p)) { + tex_print_str(", anchor "); + tex_print_qhex(box_anchor(p)); + } + if (box_source_anchor(p)) { + tex_print_str(", source "); + tex_print_int(box_source_anchor(p)); + } + if (box_target_anchor(p)) { + tex_print_str(", target "); + tex_print_int(box_target_anchor(p)); + } + } + } + if (box_index(p)) { + tex_print_str(", index "); + tex_print_int(box_index(p)); + } + if (box_package_state(p)) { + tex_print_str(", state "); + tex_print_int(box_package_state(p)); + } + } + tex_print_node_list(box_pre_adjusted(p), "preadjusted", threshold, max); + tex_print_node_list(box_pre_migrated(p), "premigrated", threshold, max); + tex_print_node_list(box_list(p), "list", threshold, max); + tex_print_node_list(box_post_migrated(p), "postmigrated", threshold, max); + tex_print_node_list(box_post_adjusted(p), "postadjusted", threshold, max); + break; + case rule_node: + /*tex Display rule |p|. */ + if (rule_width(p)) { + tex_print_str(", width "); + tex_print_rule_dimen(rule_width(p)); + } + if (rule_height(p)) { + tex_print_str(", height "); + tex_print_rule_dimen(rule_height(p)); + } + if (rule_depth(p)) { + tex_print_str(", depth "); + tex_print_rule_dimen(rule_depth(p)); + } + if (rule_left(p)) { + tex_print_str(", left / top "); + tex_print_rule_dimen(rule_left(p)); + } + if (rule_right(p)) { + tex_print_str(", right / bottom "); + tex_print_rule_dimen(rule_right(p)); + } + if (rule_x_offset(p)) { + tex_print_str(", xoffset "); + tex_print_rule_dimen(rule_x_offset(p)); + } + if (rule_y_offset(p)) { + tex_print_str(", yoffset "); + tex_print_rule_dimen(rule_y_offset(p)); + } + if (rule_font(p)) { + if (rule_font(p) < 0 || rule_font(p) >= rule_font_fam_offset) { + tex_print_str(", font "); + tex_print_font_identifier(rule_font(p)); + } else { + tex_print_str(", family "); + tex_print_int(rule_font(p) - rule_font_fam_offset); + } + } + if (rule_character(p)) { + tex_print_str(", character "); + tex_print_char_identifier(rule_character(p)); + } + break; + case insert_node: + /*tex Display insertion |p|. The natural size is the sum of height and depth. */ + tex_print_str(", index "); + tex_print_int(insert_index(p)); + tex_print_str(", total height "); + tex_print_dimension(insert_total_height(p), pt_unit); + tex_print_str(", max depth "); + tex_print_dimension(insert_max_depth(p), pt_unit); + tex_print_str(", split glue ("); + tex_print_specnode(insert_split_top(p), no_unit); + tex_print_str("), float cost "); + tex_print_int(insert_float_cost(p)); + tex_print_node_list(insert_list(p), "list", threshold, max); + break; + case dir_node: + tex_print_str(", direction "); + switch (dir_direction(p)) { + case direction_l2r : tex_print_str("l2r"); break; + case direction_r2l : tex_print_str("r2l"); break; + default : tex_print_str("unset"); break; + } + break; + case par_node: + { + halfword v; + /*tex We're already past processing so we only show the stored values. */ + if (node_subtype(p) == vmode_par_par_subtype) { + if (tex_par_state_is_set(p, par_par_shape_code) ) { v = par_par_shape(p) ; if (v) { tex_print_str(", parshape * "); } } + if (tex_par_state_is_set(p, par_inter_line_penalties_code) ) { v = par_inter_line_penalties(p) ; if (v) { tex_print_str(", interlinepenalties * "); } } + if (tex_par_state_is_set(p, par_club_penalties_code) ) { v = par_club_penalties(p) ; if (v) { tex_print_str(", clubpenalties * "); } } + if (tex_par_state_is_set(p, par_widow_penalties_code) ) { v = par_widow_penalties(p) ; if (v) { tex_print_str(", widowpenalties * "); } } + if (tex_par_state_is_set(p, par_display_widow_penalties_code)) { v = par_display_widow_penalties(p) ; if (v) { tex_print_str(", displsaywidowpenalties * "); } } + if (tex_par_state_is_set(p, par_orphan_penalties_code) ) { v = par_orphan_penalties(p) ; if (v) { tex_print_str(", orphanpenalties * "); } } + if (tex_par_state_is_set(p, par_hang_indent_code) ) { v = par_hang_indent(p) ; if (v) { tex_print_str(", hangindent "); tex_print_dimension(v, pt_unit); } } + if (tex_par_state_is_set(p, par_hang_after_code) ) { v = par_hang_after(p) ; if (v) { tex_print_str(", hangafter "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_hsize_code) ) { v = par_hsize(p) ; if (v) { tex_print_str(", hsize "); tex_print_dimension(v, pt_unit); } } + if (tex_par_state_is_set(p, par_right_skip_code) ) { v = par_right_skip(p) ; if (! tex_glue_is_zero(v)) { tex_print_str(", rightskip "); tex_print_specnode (v, pt_unit); } } + if (tex_par_state_is_set(p, par_left_skip_code) ) { v = par_left_skip(p) ; if (! tex_glue_is_zero(v)) { tex_print_str(", leftskip "); tex_print_specnode (v, pt_unit); } } + if (tex_par_state_is_set(p, par_last_line_fit_code) ) { v = par_last_line_fit(p) ; if (v) { tex_print_str(", lastlinefit "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_pre_tolerance_code) ) { v = par_pre_tolerance(p) ; if (v) { tex_print_str(", pretolerance "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_tolerance_code) ) { v = par_tolerance(p) ; if (v) { tex_print_str(", tolerance "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_looseness_code) ) { v = par_looseness(p) ; if (v) { tex_print_str(", looseness "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_adjust_spacing_code) ) { v = par_adjust_spacing(p) ; if (v) { tex_print_str(", adjustaspacing "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_adj_demerits_code) ) { v = par_adj_demerits(p) ; if (v) { tex_print_str(", adjdemerits "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_protrude_chars_code) ) { v = par_protrude_chars(p) ; if (v) { tex_print_str(", protrudechars "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_line_penalty_code) ) { v = par_line_penalty(p) ; if (v) { tex_print_str(", linepenalty "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_double_hyphen_demerits_code) ) { v = par_double_hyphen_demerits(p) ; if (v) { tex_print_str(", doublehyphendemerits "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_final_hyphen_demerits_code) ) { v = par_final_hyphen_demerits(p) ; if (v) { tex_print_str(", finalhyphendemerits "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_inter_line_penalty_code) ) { v = par_inter_line_penalty(p) ; if (v) { tex_print_str(", interlinepenalty "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_club_penalty_code) ) { v = par_club_penalty(p) ; if (v) { tex_print_str(", clubpenalty "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_widow_penalty_code) ) { v = par_widow_penalty(p) ; if (v) { tex_print_str(", widowpenalty "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_display_widow_penalty_code) ) { v = par_display_widow_penalty(p) ; if (v) { tex_print_str(", displaywidowpenalty "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_orphan_penalty_code) ) { v = par_orphan_penalty(p) ; if (v) { tex_print_str(", orphanpenalty "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_broken_penalty_code) ) { v = par_broken_penalty(p) ; if (v) { tex_print_str(", brokenpenalty "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_emergency_stretch_code) ) { v = par_emergency_stretch(p) ; if (v) { tex_print_str(", emergencystretch "); tex_print_dimension(v, pt_unit); } } + if (tex_par_state_is_set(p, par_par_indent_code) ) { v = par_par_indent(p) ; if (v) { tex_print_str(", parindent "); tex_print_dimension(v, pt_unit); } } + if (tex_par_state_is_set(p, par_par_fill_left_skip_code) ) { v = par_par_fill_left_skip(p) ; if (! tex_glue_is_zero(v)) { tex_print_str(", parfilleftskip "); tex_print_specnode (v, pt_unit); } } + if (tex_par_state_is_set(p, par_par_fill_right_skip_code) ) { v = par_par_fill_right_skip(p) ; if (! tex_glue_is_zero(v)) { tex_print_str(", parfillskip "); tex_print_specnode (v, pt_unit); } } + if (tex_par_state_is_set(p, par_par_init_left_skip_code) ) { v = par_par_init_left_skip(p) ; if (! tex_glue_is_zero(v)) { tex_print_str(", parinitleftskip "); tex_print_specnode (v, pt_unit); } } + if (tex_par_state_is_set(p, par_par_init_right_skip_code) ) { v = par_par_init_right_skip(p) ; if (! tex_glue_is_zero(v)) { tex_print_str(", parinitrightskip "); tex_print_specnode (v, pt_unit); } } + if (tex_par_state_is_set(p, par_baseline_skip_code) ) { v = par_baseline_skip(p) ; if (! tex_glue_is_zero(v)) { tex_print_str(", baselineskip "); tex_print_specnode (v, pt_unit); } } + if (tex_par_state_is_set(p, par_line_skip_code) ) { v = par_line_skip(p) ; if (! tex_glue_is_zero(v)) { tex_print_str(", lineskip "); tex_print_specnode (v, pt_unit); } } + if (tex_par_state_is_set(p, par_line_skip_limit_code) ) { v = par_line_skip_limit(p) ; if (v) { tex_print_str(", lineskiplimt "); tex_print_dimension(v, pt_unit); } } + if (tex_par_state_is_set(p, par_adjust_spacing_step_code) ) { v = par_adjust_spacing_step(p) ; if (v > 0) { tex_print_str(", adjustspacingstep "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_adjust_spacing_shrink_code) ) { v = par_adjust_spacing_shrink(p) ; if (v > 0) { tex_print_str(", adjustspacingshrink "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_adjust_spacing_stretch_code) ) { v = par_adjust_spacing_stretch(p) ; if (v > 0) { tex_print_str(", adjustspacingstretch "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_hyphenation_mode_code) ) { v = par_hyphenation_mode(p) ; if (v > 0) { tex_print_str(", hyphenationmode "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_shaping_penalties_mode_code) ) { v = par_shaping_penalties_mode(p) ; if (v > 0) { tex_print_str(", shapingpenaltiesmode "); tex_print_int (v); } } + if (tex_par_state_is_set(p, par_shaping_penalty_code) ) { v = par_shaping_penalty(p) ; if (v > 0) { tex_print_str(", shapingpenalty "); tex_print_int (v); } } + } + /* local boxes */ + v = tex_get_local_left_width(p) ; if (v) { tex_print_str(", leftboxwidth "); tex_print_dimension(v, pt_unit); } + v = tex_get_local_right_width(p) ; if (v) { tex_print_str(", rightboxwidth "); tex_print_dimension(v, pt_unit); } + tex_print_node_list(par_box_left(p), "leftbox", threshold, max); + tex_print_node_list(par_box_right(p), "rightbox", threshold, max); + tex_print_node_list(par_box_middle(p), "middlebox", threshold, max); + } + break; + case boundary_node: + if (boundary_data(p)) { + tex_print_str(", data "); + tex_print_int(boundary_data(p)); + } + break; + case whatsit_node: + { + int callback_id = lmt_callback_defined(show_whatsit_callback); + /*tex we always print this */ + if (callback_id) { + strnumber u = tex_save_cur_string(); + char *s = NULL; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Nd->S", p, 1, &s); + tex_restore_cur_string(u); + if (s) { + tex_aux_print_subtype_and_attributes_str(p, s); + lmt_memory_free(s); + } else { + tex_aux_print_subtype_and_attributes_int(p, node_subtype(p)); + } + } else { + tex_aux_print_subtype_and_attributes_int(p, node_subtype(p)); + } + /*tex but optionally there can be more */ + if (callback_id) { + int l = lmt_string_pool_state.string_temp_top / 2; + strnumber u = tex_save_cur_string(); + /*tex Todo: the tracing needs checking. */ + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Nddddd->", p, 2, l, (tracing_levels_par & (tracing_levels_group | tracing_levels_input)), cur_level, lmt_input_state.input_stack_data.ptr); + tex_restore_cur_string(u); + } + } + break; + case glue_node: + /*tex Display glue |p|. */ + if (is_leader(p)) { + /*tex Display leaders |p|. */ + tex_print_str(", leader "); + tex_print_specnode(p, no_unit); + tex_print_node_list(glue_leader_ptr(p), "list", threshold, max); + } else { + if (node_subtype(p) != conditional_math_glue && node_subtype(p) != rulebased_math_glue) { + tex_print_char(' '); + tex_print_specnode(p, node_subtype(p) < conditional_math_glue ? pt_unit : mu_unit); /* was |no_unit : mu_unit| */ + } + if (glue_data(p)) { + tex_print_str(", data "); + tex_print_int(glue_data(p)); + } + if (node_subtype(p) == space_skip_glue && glue_font(p)) { + tex_print_str(", font "); + tex_print_int(glue_font(p)); + } + } + break; + case kern_node: + /*tex Display kern |p| */ + tex_print_str(", amount "); + tex_print_dimension(kern_amount(p), pt_unit); + if (node_subtype(p) != explicit_math_kern_subtype) { + tex_print_unit(pt_unit); + if (kern_expansion(p)) { + tex_print_str(", expansion "); + tex_print_int(kern_expansion(p)); + } + } else { + tex_print_unit(mu_unit); + } + break; + case math_node: + /*tex Display math node |p|. */ + if (! tex_math_glue_is_zero(p)) { + tex_print_str(", glued "); + tex_print_specnode(p, no_unit); + } else if (math_surround(p)) { + tex_print_str(", surrounded "); + tex_print_dimension(math_surround(p), pt_unit); + } + if (math_penalty(p)) { + tex_print_str(", penalty "); + tex_print_int(math_penalty(p)); + } + break; + case penalty_node: + /*tex Display penalty |p|. */ + tex_print_str(", amount "); + tex_print_int(penalty_amount(p)); + break; + case disc_node: + if (disc_class(p) != unset_disc_class) { + tex_print_str(", class "); + tex_print_int(disc_class(p)); + } + if (disc_options(p)) { + tex_print_str(", options "); + tex_print_qhex(disc_options(p)); + } + tex_print_str(", penalty "); + tex_print_int(disc_penalty(p)); + tex_print_node_list(disc_pre_break_head(p), "prebreaklist", threshold, max); + tex_print_node_list(disc_post_break_head(p), "postbreaklist", threshold, max); + tex_print_node_list(disc_no_break_head(p), "nobreaklist", threshold, max); + break; + case mark_node: + /*tex Display mark |p|. */ + tex_print_str(", index "); + tex_print_int(mark_index(p)); + if (node_subtype(p) == reset_mark_value_code) { + tex_print_str(", reset"); + } else { + tex_print_token_list(NULL, token_link(mark_ptr(p))); /*tex We have a ref count token. */ + } + break; + case adjust_node: + /*tex Display adjustment |p|. */ + if (adjust_options(p)) { + tex_print_str(", options "); + tex_print_qhex(adjust_options(p)); + } + if (adjust_index(p)) { + tex_print_str(", index "); + tex_print_int(adjust_index(p)); + } + if (has_adjust_option(p, adjust_option_depth_before) && adjust_depth_before(p)) { + tex_print_str(", depthbefore "); + tex_print_dimension(adjust_depth_before(p), pt_unit); + } + if (has_adjust_option(p, adjust_option_depth_after) &&adjust_depth_before(p)) { + tex_print_str(", depthafter "); + tex_print_dimension(adjust_depth_after(p), pt_unit); + } + tex_print_node_list(adjust_list(p), "list", threshold, max); + break; + case glue_spec_node: + case math_spec_node: + case font_spec_node: + /*tex This is actually an error! */ + break; + case align_record_node: + tex_print_token_list(NULL, align_record_pre_part(p)); /*tex No ref count token here. */ + tex_print_levels(); + tex_print_str("..<content>"); + tex_print_token_list(NULL, align_record_post_part(p)); /*tex No ref count token here. */ + break; + case temp_node: + break; + default: + if (! tex_show_math_node(p, threshold, max)) { + tex_print_format("<unknown node type %i>", node_type(p)); + } + break; + } + p = node_next(p); + } + } +} + +/*tex + + This routine finds the base width of a horizontal box, using the same logic that \TEX82\ used + for |\predisplaywidth|. + +*/ + +static halfword tex_aux_get_actual_box_width(halfword r, halfword p, scaled initial_width) +{ + /*tex calculated |size| */ + scaled w = -max_dimen; + /*tex |w| plus possible glue amount */ + scaled v = initial_width; + while (p) { + /*tex increment to |v| */ + scaled d; + switch (node_type(p)) { + case glyph_node: + d = tex_glyph_width(p); + goto FOUND; + case hlist_node: + case vlist_node: + d = box_width(p); + goto FOUND; + case rule_node: + d = rule_width(p); + goto FOUND; + case kern_node: + d = kern_amount(p); + break; + case disc_node: + /*tex At the end of the line we should actually take the |pre|. */ + if (disc_no_break(p)) { + d = tex_aux_get_actual_box_width(r, disc_no_break_head(p),0); + if (d <= -max_dimen || d >= max_dimen) { + d = 0; + } + } else { + d = 0; + } + goto FOUND; + case math_node: + if (tex_math_glue_is_zero(p)) { + d = math_surround(p); + } else { + d = math_amount(p); + switch (box_glue_sign(r)) { + case stretching_glue_sign: + if ((box_glue_order(r) == math_stretch_order(p)) && math_stretch(p)) { + v = max_dimen; + } + break; + case shrinking_glue_sign: + if ((box_glue_order(r) == math_shrink_order(p)) && math_shrink(p)) { + v = max_dimen; + } + break; + } + break; + } + break; + case glue_node: + /*tex + We need to be careful that |w|, |v|, and |d| do not depend on any |glue_set| + values, since such values are subject to system-dependent rounding. System + dependent numbers are not allowed to infiltrate parameters like + |pre_display_size|, since \TEX82 is supposed to make the same decisions on + all machines. + */ + d = glue_amount(p); + if (box_glue_sign(r) == stretching_glue_sign) { + if ((box_glue_order(r) == glue_stretch_order(p)) && glue_stretch(p)) { + v = max_dimen; + } + } else if (box_glue_sign(r) == shrinking_glue_sign) { + if ((box_glue_order(r) == glue_shrink_order(p)) && glue_shrink(p)) { + v = max_dimen; + } + } + if (is_leader(p)) { + goto FOUND; + } + break; + default: + d = 0; + break; + } + if (v < max_dimen) { + v += d; + } + goto NOT_FOUND; + FOUND: + if (v < max_dimen) { + v += d; + w = v; + } else { + w = max_dimen; + break; + } + NOT_FOUND: + p = node_next(p); + } + return w; +} + +halfword tex_actual_box_width(halfword r, scaled base_width) +{ + /*tex + + Often this is the same as: + + \starttyping + return + shift_amount(r) + base_width + + natural_sizes(list_ptr(r),null,(glueratio) box_glue_set(r),box_glue_sign(r),box_glue_order(r),box_dir(r)); + \stoptyping + */ + return tex_aux_get_actual_box_width(r, box_list(r), box_shift_amount(r) + base_width); +} + +int tex_list_has_glyph(halfword list) +{ + while (list) { + switch (node_type(list)) { + case glyph_node: + case disc_node: + return 1; + default: + list = node_next(list); + break; + } + } + return 0; +} + +/*tex + + Attribute lists need two extra globals to increase processing efficiency. |max_used_attr| + limits the test loop that checks for set attributes, and |attr_cache| contains a pointer to an + already created attribute list. It is set to the special value |cache_disabled| when the + current value can no longer be trusted: after an assignment to an attribute register, and after + a group has ended. + + From the computer's standpoint, \TEX's chief mission is to create horizontal and vertical + lists. We shall now investigate how the elements of these lists are represented internally as + nodes in the dynamic memory. + + A horizontal or vertical list is linked together by |link| fields in the first word of each + node. Individual nodes represent boxes, glue, penalties, or special things like discretionary + hyphens; because of this variety, some nodes are longer than others, and we must distinguish + different kinds of nodes. We do this by putting a |type| field in the first word, together + with the link and an optional |subtype|. + + Character nodes appear only in horizontal lists, never in vertical lists. + + An |hlist_node| stands for a box that was made from a horizontal list. Each |hlist_node| is + seven words long, and contains the following fields (in addition to the mandatory |type| and + |link|, which we shall not mention explicitly when discussing the other node types): The + |height| and |width| and |depth| are scaled integers denoting the dimensions of the box. There + is also a |shift_amount| field, a scaled integer indicating how much this box should be + lowered (if it appears in a horizontal list), or how much it should be moved to the right (if + it appears in a vertical list). There is a |list_ptr| field, which points to the beginning of + the list from which this box was fabricated; if |list_ptr| is |null|, the box is empty. Finally, + there are three fields that represent the setting of the glue: |glue_set(p)| is a word of type + |glue_ratio| that represents the proportionality constant for glue setting; |glue_sign(p)| is + |stretching| or |shrinking| or |normal| depending on whether or not the glue should stretch or + shrink or remain rigid; and |glue_order(p)| specifies the order of infinity to which glue + setting applies (|normal|, |sfi|, |fil|, |fill|, or |filll|). The |subtype| field is not used. + + The |new_null_box| function returns a pointer to an |hlist_node| in which all subfields have + the values corresponding to |\hbox{}|. The |subtype| field is set to |min_quarterword|, since + that's the desired |span_count| value if this |hlist_node| is changed to an |unset_node|. + +*/ + +/*tex Create a new box node. */ + +halfword tex_new_null_box_node(quarterword t, quarterword s) +{ + // halfword p = tex_new_node(hlist_node, min_quarterword); + halfword p = tex_new_node(t, s); + box_dir(p) = (singleword) text_direction_par; + return p; +} + +/*tex + + A |vlist_node| is like an |hlist_node| in all respects except that it contains a vertical list. + + A |rule_node| stands for a solid black rectangle; it has |width|, |depth|, and |height| fields + just as in an |hlist_node|. However, if any of these dimensions is $-2^{30}$, the actual value + will be determined by running the rule up to the boundary of the innermost enclosing box. This + is called a \quote {running dimension}. The |width| is never running in an hlist; the |height| + and |depth| are never running in a~vlist. + + A new rule node is delivered by the |new_rule| function. It makes all the dimensions \quote + {running}, so you have to change the ones that are not allowed to run. + +*/ + +halfword tex_new_rule_node(quarterword s) +{ + return tex_new_node(rule_node, s); +} + +/*tex + + Insertions are represented by |insert_node| records, where the |subtype| indicates the + corresponding box number. For example, |\insert 250| leads to an |insert_node| whose |subtype| + is |250 + min_quarterword|. The |height| field of an |insert_node| is slightly misnamed; it + actually holds the natural height plus depth of the vertical list being inserted. The |depth| + field holds the |split_max_depth| to be used in case this insertion is split, and the + |split_top_ptr| points to the corresponding |split_top_skip|. The |float_cost| field holds the + |floating_penalty| that will be used if this insertion floats to a subsequent page after a + split insertion of the same class. There is one more field, the |insert_ptr|, which points to the + beginning of the vlist for the insertion. + + A |mark_node| has a |mark_ptr| field that points to the reference count of a token list that + contains the user's |\mark| text. In addition there is a |mark_class| field that contains the + mark class. + + An |adjust_node|, which occurs only in horizontal lists, specifies material that will be moved + out into the surrounding vertical list; i.e., it is used to implement \TEX's |\vadjust| + operation. The |adjust_ptr| field points to the vlist containing this material. + + A |glyph_node|, which occurs only in horizontal lists, specifies a glyph in a particular font, + along with its attribute list. Older versions of \TEX\ could use token memory for characters, + because the font,char combination would fit in a single word (both values were required to be + strictly less than $2^{16}$). In \LUATEX, room is needed for characters that are larger than + that, as well as a pointer to a potential attribute list, and the two displacement values. + + In turn, that made the node so large that it made sense to merge ligature glyphs as well, as + that requires only one extra pointer. A few extra classes of glyph nodes will be introduced + later. The unification of all those types makes it easier to manipulate lists of glyphs. The + subtype differentiates various glyph kinds. + + First, here is a function that returns a pointer to a glyph node for a given glyph in a given + font. If that glyph doesn't exist, |null| is returned instead. Nodes of this subtype are + directly created only for accents and their base (through |make_accent|), and math nucleus + items (in the conversion from |mlist| to |hlist|). + + We no longer check if the glyph exists because a replacement can be used instead. We copy some + properties when there is a parent passed. + +*/ + +halfword tex_new_glyph_node(quarterword s, halfword f, halfword c, halfword parent) +{ + halfword p = parent && node_type(parent) == glyph_node ? tex_copy_node(parent) : tex_aux_new_glyph_node_with_attributes(parent); + node_subtype(p) = s; + glyph_font(p) = f; + glyph_character(p) = c; + tex_char_process(f, c); + return p; +} + +/*tex + + A subset of the glyphs nodes represent ligatures: characters fabricated from the interaction + of two or more actual characters. The characters that generated the ligature have not been + forgotten, since they are needed for diagnostic messages; the |lig_ptr| field points to a + linked list of character nodes for all original characters that have been deleted. (This list + might be empty if the characters that generated the ligature were retained in other nodes.) + Remark: we no longer keep track of ligatures via |lig_ptr| because there is no guarantee that + they are consistently tracked; they are something internal anyway. Of course one can provide an + alternative at the \LUA\ end (which is what we do in \CONTEXT). + + The |subtype| field of these |glyph_node|s is 1, plus 2 and/or 1 if the original source of the + ligature included implicit left and/or right boundaries. These nodes are created by the C + function |new_ligkern|. + + A third general type of glyphs could be called a character, as it only appears in lists that + are not yet processed by the ligaturing and kerning steps of the program. + + |main_control| inserts these, and they are later converted to |subtype_normal| by |new_ligkern|. + +*/ + +/* +quarterword norm_min(int h) +{ + if (h <= 0) + return 1; + else if (h >= 255) + return 255; + else + return (quarterword) h; +} +*/ + +halfword tex_new_char_node(quarterword subtype, halfword fnt, halfword chr, int all) +{ + halfword p = tex_aux_new_glyph_node_with_attributes(null); + node_subtype(p) = subtype; + glyph_font(p) = fnt; + glyph_character(p) = chr; + if (all) { + glyph_data(p) = glyph_data_par; + /* no state */ + set_glyph_script(p, glyph_script_par); + set_glyph_language(p, cur_lang_par); + set_glyph_lhmin(p, left_hyphen_min_par); + set_glyph_rhmin(p, right_hyphen_min_par); + set_glyph_hyphenate(p, hyphenation_mode_par); + set_glyph_options(p, glyph_options_par); + set_glyph_scale(p, glyph_scale_par); + set_glyph_x_scale(p, glyph_x_scale_par); + set_glyph_y_scale(p, glyph_y_scale_par); + set_glyph_x_offset(p, glyph_x_offset_par); + set_glyph_y_offset(p, glyph_y_offset_par); + } + if (! tex_char_exists(fnt, chr)) { + int callback_id = lmt_callback_defined(missing_character_callback); + if (callback_id > 0) { + /* maybe direct node */ + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Ndd->", p, fnt, chr); + } + } + return p; +} + +halfword tex_new_text_glyph(halfword fnt, halfword chr) +{ + halfword p = tex_get_node(glyph_node_size); + memset((void *) (lmt_node_memory_state.nodes + p + 1), 0, (sizeof(memoryword) * (glyph_node_size - 1))); + node_type(p) = glyph_node; + node_subtype(p) = glyph_unset_subtype; + glyph_font(p) = fnt; + glyph_character(p) = chr; + glyph_data(p) = glyph_data_par; + /* no state */ + set_glyph_script(p, glyph_script_par); + set_glyph_language(p, cur_lang_par); + set_glyph_lhmin(p, left_hyphen_min_par); + set_glyph_rhmin(p, right_hyphen_min_par); + set_glyph_hyphenate(p, hyphenation_mode_par); + set_glyph_options(p, glyph_options_par); + set_glyph_scale(p, glyph_scale_par); + set_glyph_x_scale(p, glyph_x_scale_par); + set_glyph_y_scale(p, glyph_y_scale_par); + set_glyph_x_offset(p, glyph_x_offset_par); + set_glyph_y_offset(p, glyph_y_offset_par); + return p; +} + +/*tex + + Here are a few handy helpers used by the list output routines. + + We had an xadvance but dropped it but it might come back eventually. The offsets are mostly + there to deal with anchoring and we assume kerns to be used to complement x offsets if needed: + just practical decisions made long ago. + + Why do we check y offset being positive for dp but not for ht? Maybe change this to be + consistent? Anyway, we have adapted \LUATEX\ so ... + + \startitemize + \startitem what we had before \stopitem + \startitem compensate height and depth \stopitem + \startitem compensate height and depth, take max \stopitem + \startitem we keep height and depth \stopitem + \stopitemize + +*/ + +/*tex These should move to the texfont.c as we have too many variants now. */ + +scaled tex_glyph_width(halfword p) +{ + scaled w = tex_char_width_from_glyph(p); + scaled x = glyph_x_offset(p); + if (x && tex_has_glyph_option(p, glyph_option_apply_x_offset)) { + w += x; /* or after expansion? needs testing */ + } + w -= (glyph_left(p) + glyph_right(p)); + return w; +} + +scaled tex_glyph_width_ex(halfword p) +{ + scaled w = tex_char_width_from_glyph(p); + scaled x = glyph_x_offset(p); + if (x && tex_has_glyph_option(p, glyph_option_apply_x_offset)) { + w += x; /* or after expansion? needs testing */ + } + w -= (glyph_left(p) + glyph_right(p)); + if (glyph_expansion(p)) { + w = w + tex_ext_xn_over_d(w, 1000000 + glyph_expansion(p), 1000000); + } + return w; +} + +scaled tex_glyph_height(halfword p) +{ + scaled h = tex_char_height_from_glyph(p) + glyph_raise(p); + scaled y = glyph_y_offset(p); + if (y && tex_has_glyph_option(p, glyph_option_apply_y_offset)) { + h += y; + } + return h < 0 ? 0 : h; +} + +scaled tex_glyph_depth(halfword p) /* not used */ +{ + scaled d = tex_char_depth_from_glyph(p) - glyph_raise(p); + scaled y = glyph_y_offset(p); + if (y && tex_has_glyph_option(p, glyph_option_apply_y_offset)) { + d -= y; + } + return d < 0 ? 0 : d; +} + +scaledwhd tex_glyph_dimensions(halfword p) +{ + scaledwhd whd = { 0, 0, 0 }; + scaled x = glyph_x_offset(p); + scaled y = glyph_y_offset(p); + whd.ht = tex_char_height_from_glyph(p) + glyph_raise(p); + whd.dp = tex_char_depth_from_glyph(p) - glyph_raise(p); + whd.wd = tex_char_width_from_glyph(p) - (glyph_left(p) + glyph_right(p)); + if (x && tex_has_glyph_option(p, glyph_option_apply_x_offset)) { + whd.wd += x; + } + if (y && tex_has_glyph_option(p, glyph_option_apply_y_offset)) { + whd.ht += y; + whd.dp -= y; + } + if (whd.ht < 0) { + whd.ht = 0; + } + if (whd.dp < 0) { + whd.dp = 0; + } + return whd; +} + +scaledwhd tex_glyph_dimensions_ex(halfword p) +{ + scaledwhd whd = { 0, 0, 0 }; + scaled x = glyph_x_offset(p); + scaled y = glyph_y_offset(p); + whd.ht = tex_char_height_from_glyph(p) + glyph_raise(p); + whd.dp = tex_char_depth_from_glyph(p) - glyph_raise(p); + whd.wd = tex_char_width_from_glyph(p) - (glyph_left(p) + glyph_right(p)); + if (x && tex_has_glyph_option(p, glyph_option_apply_x_offset)) { + whd.wd += x; + } + if (y && tex_has_glyph_option(p, glyph_option_apply_y_offset)) { + whd.ht += y; + whd.dp -= y; + } + if (whd.ht < 0) { + whd.ht = 0; + } + if (whd.dp < 0) { + whd.dp = 0; + } + if (whd.wd && glyph_expansion(p)) { + whd.wd = tex_ext_xn_over_d(whd.wd, 1000000 + glyph_expansion(p), 1000000); + } + return whd; +} + +scaled tex_glyph_total(halfword p) +{ + scaled ht = tex_char_height_from_glyph(p); + scaled dp = tex_char_depth_from_glyph(p); + if (ht < 0) { + ht = 0; + } + if (dp < 0) { + dp = 0; + } + return ht + dp; +} + +int tex_glyph_has_dimensions(halfword p) +{ + scaled offset = glyph_x_offset(p); + scaled amount = tex_char_width_from_glyph(p); + if (offset && tex_has_glyph_option(p, glyph_option_apply_x_offset)) { + amount += offset; + } + amount -= (glyph_left(p) + glyph_right(p)); + if (amount) { + return 1; + } else { + amount = tex_char_total_from_glyph(p); + /* here offset adn raise just moves */ + return amount != 0; + } +} + +halfword tex_kern_dimension(halfword p) +{ + return kern_amount(p); +} + +halfword tex_kern_dimension_ex(halfword p) +{ + halfword k = kern_amount(p); + if (k && kern_expansion(p)) { + k = tex_ext_xn_over_d(k, 1000000 + kern_expansion(p), 1000000); + } + return k; +} + +scaledwhd tex_pack_dimensions(halfword p) +{ + scaledwhd whd = { 0, 0, 0 }; + whd.ht = box_height(p); + whd.dp = box_depth(p); + whd.wd = box_width(p); + return whd; +} + +/*tex + + A |disc_node|, which occurs only in horizontal lists, specifies a \quote {discretionary} + line break. If such a break occurs at node |p|, the text that starts at |pre_break(p)| will + precede the break, the text that starts at |post_break(p)| will follow the break, and text + that appears in |no_break(p)| nodes will be ignored. For example, an ordinary discretionary + hyphen, indicated by |\-|, yields a |disc_node| with |pre_break| pointing to a |char_node| + containing a hyphen, |post_break = null|, and |no_break=null|. + + If |subtype(p) = automatic_disc|, the |ex_hyphen_penalty| will be charged for this break. + Otherwise the |hyphen_penalty| will be charged. The texts will actually be substituted into + the list by the line-breaking algorithm if it decides to make the break, and the discretionary + node will disappear at that time; thus, the output routine sees only discretionaries that were + not chosen. + +*/ + +halfword tex_new_disc_node(quarterword s) +{ + halfword p = tex_new_node(disc_node, s); + disc_penalty(p) = hyphen_penalty_par; + disc_class(p) = unset_disc_class; + return p; +} + +/*tex + + The program above includes a bunch of \quote {hooks} that allow further capabilities to be + added without upsetting \TEX's basic structure. Most of these hooks are concerned with \quote + {whatsit} nodes, which are intended to be used for special purposes; whenever a new extension + to \TEX\ involves a new kind of whatsit node, a corresponding change needs to be made to the + routines below that deal with such nodes, but it will usually be unnecessary to make many + changes to the other parts of this program. + + In order to demonstrate how extensions can be made, we shall treat |\write|, |\openout|, + |\closeout|, |\immediate|, and |\special| as if they were extensions. These commands are + actually primitives of \TEX, and they should appear in all implementations of the system; but + let's try to imagine that they aren't. Then the program below illustrates how a person could + add them. + + Sometimes, of course, an extension will require changes to \TEX\ itself; no system of hooks + could be complete enough for all conceivable extensions. The features associated with |\write| + are almost all confined to the following paragraphs, but there are small parts of the |print_ln| + and |print_char| procedures that were introduced specifically to |\write| characters. + Furthermore one of the token lists recognized by the scanner is a |write_text|; and there are a + few other miscellaneous places where we have already provided for some aspect of |\write|. The + goal of a \TeX\ extender should be to minimize alterations to the standard parts of the program, + and to avoid them completely if possible. He or she should also be quite sure that there's no + easy way to accomplish the desired goals with the standard features that \TEX\ already has. + \quote {Think thrice before extending}, because that may save a lot of work, and it will also + keep incompatible extensions of \TEX\ from proliferating. + + First let's consider the format of whatsit nodes that are used to represent the data associated + with |\write| and its relatives. Recall that a whatsit has |type=whatsit_node|, and the |subtype| + is supposed to distinguish different kinds of whatsits. Each node occupies two or more words; + the exact number is immaterial, as long as it is readily determined from the |subtype| or other + data. + + We shall introduce five |subtype| values here, corresponding to the control sequences |\openout|, + |\write|, |\closeout|, and |\special|. The second word of I/O whatsits has a |write_stream| + field that identifies the write-stream number (0 to 15, or 16 for out-of-range and positive, or + 17 for out-of-range and negative). In the case of |\write| and |\special|, there is also a field + that points to the reference count of a token list that should be sent. In the case of |\openout|, + we need three words and three auxiliary subfields to hold the string numbers for name, area, and + extension. + + Extensions might introduce new command codes; but it's best to use |extension| with a modifier, + whenever possible, so that |main_control| stays the same. + + The sixteen possible |\write| streams are represented by the |write_file| array. The |j|th file + is open if and only if |write_open[j]=true|. The last two streams are special; |write_open[16]| + represents a stream number greater than 15, while |write_open[17]| represents a negative stream + number, and both of these variables are always |false|. + + Writing to files is delegated to \LUA, so we have no write channels. + + To write a token list, we must run it through \TEX's scanner, expanding macros and |\the| and + |\number|, etc. This might cause runaways, if a delimited macro parameter isn't matched, and + runaways would be extremely confusing since we are calling on \TEX's scanner in the middle of + a |\shipout| command. Therefore we will put a dummy control sequence as a \quote {stopper}, + right after the token list. This control sequence is artificially defined to be |\outer|. + + The presence of |\immediate| causes the |do_extension| procedure to descend to one level of + recursion. Nothing happens unless |\immediate| is followed by |\openout|, |\write|, or + |\closeout|. + + Here is a subroutine that creates a whatsit node having a given |subtype| and a given number + of words. It initializes only the first word of the whatsit, and appends it to the current + list. + + A |whatsit_node| is a wild card reserved for extensions to \TEX. The |subtype| field in its + first word says what |whatsit| it is, and implicitly determines the node size (which must be + 2 or more) and the format of the remaining words. When a |whatsit_node| is encountered in a + list, special actions are invoked; knowledgeable people who are careful not to mess up the + rest of \TEX\ are able to make \TEX\ do new things by adding code at the end of the program. + For example, there might be a \quote {\TEX nicolor} extension to specify different colors of + ink, and the whatsit node might contain the desired parameters. + + The present implementation of \TEX\ treats the features associated with |\write| and |\special| + as if they were extensions, in order to illustrate how such routines might be coded. We shall + defer further discussion of extensions until the end of this program. + + However, in \LUAMETATEX\ we only have a generic whatsit node, a small one that can be used to + implement whatever you like, using \LUA. So, all we have here is the above comment as + guideline for that. + + \TEX\ makes use of the fact that |hlist_node|, |vlist_node|, |rule_node|, |insert_node|, + |mark_node|, |adjust_node|, |disc_node|, |whatsit_node|, and |math_node| are at the low end of + the type codes, by permitting a break at glue in a list if and only if the |type| of the + previous node is less than |math_node|. Furthermore, a node is discarded after a break if its + type is |math_node| or~more. + + A |glue_node| represents glue in a list. However, it is really only a pointer to a separate + glue specification, since \TEX\ makes use of the fact that many essentially identical nodes of + glue are usually present. If |p| points to a |glue_node|, |glue_ptr(p)| points to another packet + of words that specify the stretch and shrink components, etc. + + Glue nodes also serve to represent leaders; the |subtype| is used to distinguish between + ordinary glue (which is called |normal|) and the three kinds of leaders (which are called + |a_leaders|, |c_leaders|, and |x_leaders|). The |leader_ptr| field points to a rule node or to + a box node containing the leaders; it is set to |null| in ordinary glue nodes. + + Many kinds of glue are computed from \TEX's skip parameters, and it is helpful to know which + parameter has led to a particular glue node. Therefore the |subtype| is set to indicate the + source of glue, whenever it originated as a parameter. We will be defining symbolic names for + the parameter numbers later (e.g., |line_skip_code = 0|, |baseline_skip_code = 1|, etc.); it + suffices for now to say that the |subtype| of parametric glue will be the same as the parameter + number, plus~one. + + In math formulas there are two more possibilities for the |subtype| in a glue node: |mu_glue| + denotes an |\mskip| (where the units are scaled |mu| instead of scaled |pt|); and + |cond_math_glue| denotes the |\nonscript| feature that cancels the glue node immediately + following if it appears in a subscript. + + A glue specification has a halfword reference count in its first word, representing |null| + plus the number of glue nodes that point to it (less one). Note that the reference count + appears in the same position as the |link| field in list nodes; this is the field that is + initialized to |null| when a node is allocated, and it is also the field that is flagged by + |empty_flag| in empty nodes. + + Glue specifications also contain three |scaled| fields, for the |width|, |stretch|, and + |shrink| dimensions. Finally, there are two one-byte fields called |stretch_order| and + |shrink_order|; these contain the orders of infinity (|normal|, |sfi|, |fil|, |fill|, or + |filll|) corresponding to the stretch and shrink values. + + Here is a function that returns a pointer to a copy of a glue spec. The reference count in the + copy is |null|, because there is assumed to be exactly one reference to the new specification. + +*/ + +halfword tex_new_glue_spec_node(halfword q) +{ + if (q) { + switch (node_type(q)) { + case glue_spec_node: + return tex_copy_node(q); + case glue_node: + { + halfword p = tex_copy_node(zero_glue); + glue_amount(p) = glue_amount(q); + glue_stretch(p) = glue_stretch(q); + glue_shrink(p) = glue_shrink(q); + glue_stretch_order(p) = glue_stretch_order(q); + glue_shrink_order(p) = glue_shrink_order(q); + return p; + } + } + } + return tex_copy_node(zero_glue); +} + +/*tex + + And here's a function that creates a glue node for a given parameter identified by its code + number; for example, |new_param_glue(line_skip_code)| returns a pointer to a glue node for the + current |\lineskip|. + +*/ + +halfword tex_new_param_glue_node(quarterword p, quarterword s) +{ + halfword n = tex_new_node(glue_node, s); + halfword g = glue_parameter(p); + if (g) { + memcpy((void *) (lmt_node_memory_state.nodes + n + 2), (void *) (lmt_node_memory_state.nodes + g + 2), (glue_spec_size - 2) * (sizeof(memoryword))); + } + return n; +} + +/*tex + + Glue nodes that are more or less anonymous are created by |new_glue|, whose argument points to + a glue specification. + +*/ + +halfword tex_new_glue_node(halfword q, quarterword s) +{ + halfword p = tex_new_node(glue_node, s); + memcpy((void *) (lmt_node_memory_state.nodes + p + 2), (void *) (lmt_node_memory_state.nodes + q + 2), (glue_spec_size - 2) * (sizeof(memoryword))); + return p; +} + +/*tex + + Still another subroutine is needed: |new_skip_param|. This one is sort of a combination of + |new_param_glue| and |new_glue|. It creates a glue node for one of the current glue parameters, + but it makes a fresh copy of the glue specification, since that specification will probably be + subject to change, while the parameter will stay put. + + Remark: as we have copies we don't need this one can use |new_param_glue| instead. + +*/ + +/*tex + + A |kern_node| has a |width| field to specify a (normally negative) amount of spacing. This + spacing correction appears in horizontal lists between letters like A and V when the font + designer said that it looks better to move them closer together or further apart. A kern node + can also appear in a vertical list, when its |width| denotes additional spacing in the vertical + direction. The |subtype| is either |normal| (for kerns inserted from font information or math + mode calculations) or |explicit| (for kerns inserted from |\kern| and |\/| commands) or + |acc_kern| (for kerns inserted from non-math accents) or |mu_glue| (for kerns inserted from + |\mkern| specifications in math formulas). + + The |new_kern| function creates a (font) kern node having a given width. + +*/ + +halfword tex_new_kern_node(scaled w, quarterword s) +{ + halfword p = tex_new_node(kern_node, s); + kern_amount(p) = w; + return p; +} + +/*tex + + A |penalty_node| specifies the penalty associated with line or page breaking, in its |penalty| + field. This field is a fullword integer, but the full range of integer values is not used: + Any penalty |>=10000| is treated as infinity, and no break will be allowed for such high values. + Similarly, any penalty |<= -10000| is treated as negative infinity, and a break will be forced. + + Anyone who has been reading the last few sections of the program will be able to guess what + comes next. + +*/ + +halfword tex_new_penalty_node(halfword m, quarterword s) +{ + halfword p = tex_new_node(penalty_node, s); + penalty_amount(p) = m; + return p; +} + +/*tex + + You might think that we have introduced enough node types by now. Well, almost, but there is + one more: An |unset_node| has nearly the same format as an |hlist_node| or |vlist_node|; it is + used for entries in |\halign| or |\valign| that are not yet in their final form, since the box + dimensions are their \quote {natural} sizes before any glue adjustment has been made. The + |glue_set| word is not present; instead, we have a |glue_stretch| field, which contains the + total stretch of order |glue_order| that is present in the hlist or vlist being boxed. + Similarly, the |shift_amount| field is replaced by a |glue_shrink| field, containing the total + shrink of order |glue_sign| that is present. The |subtype| field is called |span_count|; an + unset box typically contains the data for |qo(span_count)+1| columns. Unset nodes will be + changed to box nodes when alignment is completed. + + In fact, there are still more types coming. When we get to math formula processing we will + see that a |style_node| has |type=14|; and a number of larger type codes will also be defined, + for use in math mode only. + + Warning: If any changes are made to these data structure layouts, such as changing any of the + node sizes or even reordering the words of nodes, the |copy_node_list| procedure and the memory + initialization code below may have to be changed. However, other references to the nodes are + made symbolically in terms of the \WEB\ macro definitions above, so that format changes will + leave \TEX's other algorithms intact. + + Some day we might store the current paragraph properties in this node. Actually, we already + store the interline and broken penalties. But it then also demands adaptation if the functions + that deal with breaking (we can just pass the local par node) and related specification node + cleanups. We could either snapshot parameters before a group ends, or we can add a lots of + |\local...| parameters. + +*/ + +halfword tex_new_par_node(quarterword mode) +{ + int callback_id, top; + halfword p = tex_new_node(par_node, mode); + /* */ + tex_set_local_interline_penalty(p, local_interline_penalty_par); + tex_set_local_broken_penalty(p, local_broken_penalty_par); + par_dir(p) = par_direction_par; + /* */ + tex_add_local_boxes(p); + if (mode != local_box_par_subtype) { + /*tex Callback with node passed. Todo: move to luanode with the rest of callbacks. */ + callback_id = lmt_callback_defined(insert_par_callback); + if (callback_id > 0) { + lua_State *L = lmt_lua_state.lua_instance; + if (lmt_callback_okay(L, callback_id, &top)) { + int i; + lmt_node_list_to_lua(L, p); + lmt_push_par_mode(L, mode); + i = lmt_callback_call(L, 2, 0 ,top); + if (i) { + lmt_callback_error(L, top, i); + } else { + lmt_callback_wrapup(L, top); + } + } + } + } + return p; +} + +static halfword tex_aux_internal_to_par_code(halfword cmd, halfword index) { + switch (cmd) { + case internal_int_cmd: + switch (index) { + case hang_after_code : return par_hang_after_code; + case adjust_spacing_code : return par_adjust_spacing_code; + case protrude_chars_code : return par_protrude_chars_code; + case pre_tolerance_code : return par_pre_tolerance_code; + case tolerance_code : return par_tolerance_code; + case looseness_code : return par_looseness_code; + case last_line_fit_code : return par_last_line_fit_code; + case line_penalty_code : return par_line_penalty_code; + case inter_line_penalty_code : return par_inter_line_penalty_code; + case club_penalty_code : return par_club_penalty_code; + case widow_penalty_code : return par_widow_penalty_code; + case display_widow_penalty_code : return par_display_widow_penalty_code; + case orphan_penalty_code : return par_orphan_penalty_code; + case broken_penalty_code : return par_broken_penalty_code; + case adj_demerits_code : return par_adj_demerits_code; + case double_hyphen_demerits_code : return par_double_hyphen_demerits_code; + case final_hyphen_demerits_code : return par_final_hyphen_demerits_code; + case shaping_penalties_mode_code : return par_shaping_penalties_mode_code; + case shaping_penalty_code : return par_shaping_penalty_code; + } + case internal_dimen_cmd: + switch (index) { + case hsize_code : return par_hsize_code; + case hang_indent_code : return par_hang_indent_code; + case par_indent_code : return par_par_indent_code; + case emergency_stretch_code : return par_emergency_stretch_code; + case line_skip_limit_code : return par_line_skip_limit_code; + } + case internal_glue_cmd: + switch (index) { + case left_skip_code : return par_left_skip_code; + case right_skip_code : return par_right_skip_code; + case par_fill_left_skip_code : return par_par_fill_left_skip_code; + case par_fill_right_skip_code : return par_par_fill_right_skip_code; + case par_init_left_skip_code : return par_par_init_left_skip_code; + case par_init_right_skip_code : return par_par_init_right_skip_code; + case baseline_skip_code : return par_baseline_skip_code; + case line_skip_code : return par_line_skip_code; + } + case specification_reference_cmd: + switch (index) { + case par_shape_code : return par_par_shape_code; + case inter_line_penalties_code : return par_inter_line_penalties_code; + case club_penalties_code : return par_club_penalties_code; + case widow_penalties_code : return par_widow_penalties_code; + case display_widow_penalties_code: return par_display_widow_penalties_code; + case orphan_penalties_code : return par_orphan_penalties_code; + } + } + return -1; +} + +void tex_update_par_par(halfword cmd, halfword index) +{ + halfword code = tex_aux_internal_to_par_code(cmd, index); + if (code >= 0) { + halfword par = tex_find_par_par(cur_list.head); + if (par) { + tex_snapshot_par(par, code); + } + } +} + +halfword tex_get_par_par(halfword p, halfword what) +{ + int set = tex_par_state_is_set(p, what); + switch (what) { + case par_par_shape_code: return set ? par_par_shape(p) : par_shape_par; + case par_inter_line_penalties_code: return set ? par_inter_line_penalties(p) : inter_line_penalties_par; + case par_club_penalties_code: return set ? par_club_penalties(p) : club_penalties_par; + case par_widow_penalties_code: return set ? par_widow_penalties(p) : widow_penalties_par; + case par_display_widow_penalties_code: return set ? par_display_widow_penalties(p) : display_widow_penalties_par; + case par_orphan_penalties_code: return set ? par_orphan_penalties(p) : orphan_penalties_par; + case par_hang_indent_code: return set ? par_hang_indent(p) : hang_indent_par; + case par_hang_after_code: return set ? par_hang_after(p) : hang_after_par; + case par_hsize_code: return set ? par_hsize(p) : hsize_par; + case par_left_skip_code: return set ? par_left_skip(p) : left_skip_par; + case par_right_skip_code: return set ? par_right_skip(p) : right_skip_par; + case par_last_line_fit_code: return set ? par_last_line_fit(p) : last_line_fit_par; + case par_pre_tolerance_code: return set ? par_pre_tolerance(p) : pre_tolerance_par; + case par_tolerance_code: return set ? par_tolerance(p) : tolerance_par; + case par_looseness_code: return set ? par_looseness(p) : looseness_par; + case par_adjust_spacing_code: return set ? par_adjust_spacing(p) : adjust_spacing_par; + case par_adj_demerits_code: return set ? par_adj_demerits(p) : adj_demerits_par; + case par_protrude_chars_code: return set ? par_protrude_chars(p) : protrude_chars_par; + case par_line_penalty_code: return set ? par_line_penalty(p) : line_penalty_par; + case par_double_hyphen_demerits_code: return set ? par_double_hyphen_demerits(p) : double_hyphen_demerits_par; + case par_final_hyphen_demerits_code: return set ? par_final_hyphen_demerits(p) : final_hyphen_demerits_par; + case par_inter_line_penalty_code: return set ? par_inter_line_penalty(p) : inter_line_penalty_par; + case par_club_penalty_code: return set ? par_club_penalty(p) : club_penalty_par; + case par_widow_penalty_code: return set ? par_widow_penalty(p) : widow_penalty_par; + case par_display_widow_penalty_code: return set ? par_display_widow_penalty(p) : display_widow_penalty_par; + case par_orphan_penalty_code: return set ? par_orphan_penalty(p) : orphan_penalty_par; + case par_broken_penalty_code: return set ? par_broken_penalty(p) : broken_penalty_par; + case par_emergency_stretch_code: return set ? par_emergency_stretch(p) : emergency_stretch_par; + case par_par_indent_code: return set ? par_par_indent(p) : par_indent_par; + case par_par_fill_left_skip_code: return set ? par_par_fill_left_skip(p) : par_fill_left_skip_par; + case par_par_fill_right_skip_code: return set ? par_par_fill_right_skip(p) : par_fill_right_skip_par; + case par_par_init_left_skip_code: return set ? par_par_init_left_skip(p) : par_init_left_skip_par; + case par_par_init_right_skip_code: return set ? par_par_init_right_skip(p) : par_init_right_skip_par; + case par_baseline_skip_code: return set ? par_baseline_skip(p) : baseline_skip_par; + case par_line_skip_code: return set ? par_line_skip(p) : line_skip_par; + case par_line_skip_limit_code: return set ? par_line_skip_limit(p) : line_skip_limit_par; + case par_adjust_spacing_step_code: return set ? par_adjust_spacing_step(p) : adjust_spacing_step_par; + case par_adjust_spacing_shrink_code: return set ? par_adjust_spacing_shrink(p) : adjust_spacing_shrink_par; + case par_adjust_spacing_stretch_code: return set ? par_adjust_spacing_stretch(p) : adjust_spacing_stretch_par; + case par_hyphenation_mode_code: return set ? par_hyphenation_mode(p) : hyphenation_mode_par; + case par_shaping_penalties_mode_code: return set ? par_shaping_penalties_mode(p) : shaping_penalties_mode_par; + case par_shaping_penalty_code: return set ? par_shaping_penalty(p) : shaping_penalty_par; + } + return null; +} + +void tex_set_par_par(halfword p, halfword what, halfword v, int force) +{ + if (force || tex_par_state_is_set(p, what)) { + switch (what) { + case par_hsize_code: + par_hsize(p) = v; + break; + case par_left_skip_code: + if (par_left_skip(p)) { + tex_flush_node(par_left_skip(p)); + } + par_left_skip(p) = v ? tex_copy_node(v) : null; + break; + case par_right_skip_code: + if (par_right_skip(p)) { + tex_flush_node(par_right_skip(p)); + } + par_right_skip(p) = v ? tex_copy_node(v) : null; + break; + case par_hang_indent_code: + par_hang_indent(p) = v; + break; + case par_hang_after_code: + par_hang_after(p) = v; + break; + case par_par_indent_code: + par_par_indent(p) = v; + break; + case par_par_fill_left_skip_code: + if (par_par_fill_left_skip(p)) { + tex_flush_node(par_par_fill_left_skip(p)); + } + par_par_fill_left_skip(p) = v ? tex_copy_node(v) : null; + break; + case par_par_fill_right_skip_code: + if (par_par_fill_right_skip(p)) { + tex_flush_node(par_par_fill_right_skip(p)); + } + par_par_fill_right_skip(p) = v ? tex_copy_node(v) : null; + break; + case par_par_init_left_skip_code: + if (par_par_init_left_skip(p)) { + tex_flush_node(par_par_init_left_skip(p)); + } + par_par_init_left_skip(p) = v ? tex_copy_node(v) : null; + break; + case par_par_init_right_skip_code: + if (par_par_init_right_skip(p)) { + tex_flush_node(par_par_init_right_skip(p)); + } + par_par_init_right_skip(p) = v ? tex_copy_node(v) : null; + break; + case par_adjust_spacing_code: + par_adjust_spacing(p) = v; + break; + case par_protrude_chars_code: + par_protrude_chars(p) = v; + break; + case par_pre_tolerance_code: + par_pre_tolerance(p) = v; + break; + case par_tolerance_code: + par_tolerance(p) = v; + break; + case par_emergency_stretch_code: + par_emergency_stretch(p) = v; + break; + case par_looseness_code: + par_looseness(p) = v; + break; + case par_last_line_fit_code: + par_last_line_fit(p) = v; + break; + case par_line_penalty_code: + par_line_penalty(p) = v; + break; + case par_inter_line_penalty_code: + par_inter_line_penalty(p) = v; + break; + case par_club_penalty_code: + par_club_penalty(p) = v; + break; + case par_widow_penalty_code: + par_widow_penalty(p) = v; + break; + case par_display_widow_penalty_code: + par_display_widow_penalty(p) = v; + break; + case par_orphan_penalty_code: + par_orphan_penalty(p) = v; + break; + case par_broken_penalty_code: + par_broken_penalty(p) = v; + break; + case par_adj_demerits_code: + par_adj_demerits(p) = v; + break; + case par_double_hyphen_demerits_code: + par_double_hyphen_demerits(p) = v; + break; + case par_final_hyphen_demerits_code: + par_final_hyphen_demerits(p) = v; + break; + case par_par_shape_code: + if (par_par_shape(p)) { + tex_flush_node(par_par_shape(p)); + } + par_par_shape(p) = v ? tex_copy_node(v) : null; + break; + case par_inter_line_penalties_code: + if (par_inter_line_penalties(p)) { + tex_flush_node(par_inter_line_penalties(p)); + } + par_inter_line_penalties(p) = v ? tex_copy_node(v) : null; + break; + case par_club_penalties_code: + if (par_club_penalties(p)) { + tex_flush_node(par_club_penalties(p)); + } + par_club_penalties(p) = v ? tex_copy_node(v) : null; + break; + case par_widow_penalties_code: + if (par_widow_penalties(p)) { + tex_flush_node(par_widow_penalties(p)); + } + par_widow_penalties(p) = v ? tex_copy_node(v) : null; + break; + case par_display_widow_penalties_code: + if (par_display_widow_penalties(p)) { + tex_flush_node(par_display_widow_penalties(p)); + } + par_display_widow_penalties(p) = v ? tex_copy_node(v) : null; + break; + case par_orphan_penalties_code: + if (par_orphan_penalties(p)) { + tex_flush_node(par_orphan_penalties(p)); + } + par_orphan_penalties(p) = v ? tex_copy_node(v) : null; + break; + case par_baseline_skip_code: + if (par_baseline_skip(p)) { + tex_flush_node(par_baseline_skip(p)); + } + par_baseline_skip(p) = v ? tex_copy_node(v) : null; + break; + case par_line_skip_code: + if (par_line_skip(p)) { + tex_flush_node(par_line_skip(p)); + } + par_line_skip(p) = v ? tex_copy_node(v) : null; + break; + case par_line_skip_limit_code: + par_line_skip_limit(p) = v; + break; + case par_adjust_spacing_step_code: + par_adjust_spacing_step(p) = v; + break; + case par_adjust_spacing_shrink_code: + par_adjust_spacing_shrink(p) = v; + break; + case par_adjust_spacing_stretch_code: + par_adjust_spacing_stretch(p) = v; + break; + case par_hyphenation_mode_code: + par_hyphenation_mode(p) = v; + break; + case par_shaping_penalties_mode_code: + par_shaping_penalties_mode(p) = v; + break; + case par_shaping_penalty_code: + par_shaping_penalty(p) = v; + break; + } + tex_set_par_state(p, what); + } +} + +void tex_snapshot_par(halfword p, halfword what) +{ + if (p && lmt_main_state.run_state != initializing_state) { + int unset = 0; + if (what) { + if (what < 0) { + unset = 1; + what = -what; + } + if (what > par_all_category) { + what = par_all_category; + } + } else { + unset = 1; + what = par_all_category; + } + if (tex_par_to_be_set(what, par_hsize_code)) { tex_set_par_par(p, par_hsize_code, unset ? null : hsize_par, 1); } + if (tex_par_to_be_set(what, par_left_skip_code)) { tex_set_par_par(p, par_left_skip_code, unset ? null : left_skip_par, 1); } + if (tex_par_to_be_set(what, par_right_skip_code)) { tex_set_par_par(p, par_right_skip_code, unset ? null : right_skip_par, 1); } + if (tex_par_to_be_set(what, par_hang_indent_code)) { tex_set_par_par(p, par_hang_indent_code, unset ? null : hang_indent_par, 1); } + if (tex_par_to_be_set(what, par_hang_after_code)) { tex_set_par_par(p, par_hang_after_code, unset ? null : hang_after_par, 1); } + if (tex_par_to_be_set(what, par_par_indent_code)) { tex_set_par_par(p, par_par_indent_code, unset ? null : par_indent_par, 1); } + if (tex_par_to_be_set(what, par_par_fill_left_skip_code)) { tex_set_par_par(p, par_par_fill_left_skip_code, unset ? null : par_fill_left_skip_par, 1); } + if (tex_par_to_be_set(what, par_par_fill_right_skip_code)) { tex_set_par_par(p, par_par_fill_right_skip_code, unset ? null : par_fill_right_skip_par, 1); } + if (tex_par_to_be_set(what, par_par_init_left_skip_code)) { tex_set_par_par(p, par_par_init_left_skip_code, unset ? null : par_init_left_skip_par, 1); } + if (tex_par_to_be_set(what, par_par_init_right_skip_code)) { tex_set_par_par(p, par_par_init_right_skip_code, unset ? null : par_init_right_skip_par, 1); } + if (tex_par_to_be_set(what, par_adjust_spacing_code)) { tex_set_par_par(p, par_adjust_spacing_code, unset ? null : adjust_spacing_par, 1); } + if (tex_par_to_be_set(what, par_protrude_chars_code)) { tex_set_par_par(p, par_protrude_chars_code, unset ? null : protrude_chars_par, 1); } + if (tex_par_to_be_set(what, par_pre_tolerance_code)) { tex_set_par_par(p, par_pre_tolerance_code, unset ? null : pre_tolerance_par, 1); } + if (tex_par_to_be_set(what, par_tolerance_code)) { tex_set_par_par(p, par_tolerance_code, unset ? null : tolerance_par, 1); } + if (tex_par_to_be_set(what, par_emergency_stretch_code)) { tex_set_par_par(p, par_emergency_stretch_code, unset ? null : emergency_stretch_par, 1); } + if (tex_par_to_be_set(what, par_looseness_code)) { tex_set_par_par(p, par_looseness_code, unset ? null : looseness_par, 1); } + if (tex_par_to_be_set(what, par_last_line_fit_code)) { tex_set_par_par(p, par_last_line_fit_code, unset ? null : last_line_fit_par, 1); } + if (tex_par_to_be_set(what, par_line_penalty_code)) { tex_set_par_par(p, par_line_penalty_code, unset ? null : line_penalty_par, 1); } + if (tex_par_to_be_set(what, par_inter_line_penalty_code)) { tex_set_par_par(p, par_inter_line_penalty_code, unset ? null : inter_line_penalty_par, 1); } + if (tex_par_to_be_set(what, par_club_penalty_code)) { tex_set_par_par(p, par_club_penalty_code, unset ? null : club_penalty_par, 1); } + if (tex_par_to_be_set(what, par_widow_penalty_code)) { tex_set_par_par(p, par_widow_penalty_code, unset ? null : widow_penalty_par, 1); } + if (tex_par_to_be_set(what, par_display_widow_penalty_code)) { tex_set_par_par(p, par_display_widow_penalty_code, unset ? null : display_widow_penalty_par, 1); } + if (tex_par_to_be_set(what, par_orphan_penalty_code)) { tex_set_par_par(p, par_orphan_penalty_code, unset ? null : orphan_penalty_par, 1); } + if (tex_par_to_be_set(what, par_broken_penalty_code)) { tex_set_par_par(p, par_broken_penalty_code, unset ? null : broken_penalty_par, 1); } + if (tex_par_to_be_set(what, par_adj_demerits_code)) { tex_set_par_par(p, par_adj_demerits_code, unset ? null : adj_demerits_par, 1); } + if (tex_par_to_be_set(what, par_double_hyphen_demerits_code)) { tex_set_par_par(p, par_double_hyphen_demerits_code, unset ? null : double_hyphen_demerits_par, 1); } + if (tex_par_to_be_set(what, par_final_hyphen_demerits_code)) { tex_set_par_par(p, par_final_hyphen_demerits_code, unset ? null : final_hyphen_demerits_par, 1); } + if (tex_par_to_be_set(what, par_par_shape_code)) { tex_set_par_par(p, par_par_shape_code, unset ? null : par_shape_par, 1); } + if (tex_par_to_be_set(what, par_inter_line_penalties_code)) { tex_set_par_par(p, par_inter_line_penalties_code, unset ? null : inter_line_penalties_par, 1); } + if (tex_par_to_be_set(what, par_club_penalties_code)) { tex_set_par_par(p, par_club_penalties_code, unset ? null : club_penalties_par, 1); } + if (tex_par_to_be_set(what, par_widow_penalties_code)) { tex_set_par_par(p, par_widow_penalties_code, unset ? null : widow_penalties_par, 1); } + if (tex_par_to_be_set(what, par_display_widow_penalties_code)) { tex_set_par_par(p, par_display_widow_penalties_code, unset ? null : display_widow_penalties_par, 1); } + if (tex_par_to_be_set(what, par_orphan_penalties_code)) { tex_set_par_par(p, par_orphan_penalties_code, unset ? null : orphan_penalties_par, 1); } + if (tex_par_to_be_set(what, par_baseline_skip_code)) { tex_set_par_par(p, par_baseline_skip_code, unset ? null : baseline_skip_par, 1); } + if (tex_par_to_be_set(what, par_line_skip_code)) { tex_set_par_par(p, par_line_skip_code, unset ? null : line_skip_par, 1); } + if (tex_par_to_be_set(what, par_line_skip_limit_code)) { tex_set_par_par(p, par_line_skip_limit_code, unset ? null : line_skip_limit_par, 1); } + if (tex_par_to_be_set(what, par_adjust_spacing_step_code)) { tex_set_par_par(p, par_adjust_spacing_step_code, unset ? null : adjust_spacing_step_par, 1); } + if (tex_par_to_be_set(what, par_adjust_spacing_shrink_code)) { tex_set_par_par(p, par_adjust_spacing_shrink_code, unset ? null : adjust_spacing_shrink_par, 1); } + if (tex_par_to_be_set(what, par_adjust_spacing_stretch_code)) { tex_set_par_par(p, par_adjust_spacing_stretch_code, unset ? null : adjust_spacing_stretch_par, 1); } + if (tex_par_to_be_set(what, par_hyphenation_mode_code)) { tex_set_par_par(p, par_hyphenation_mode_code, unset ? null : hyphenation_mode_par, 1); } + if (tex_par_to_be_set(what, par_shaping_penalties_mode_code)) { tex_set_par_par(p, par_shaping_penalties_mode_code, unset ? null : shaping_penalties_mode_par, 1); } + if (tex_par_to_be_set(what, par_shaping_penalty_code)) { tex_set_par_par(p, par_shaping_penalty_code, unset ? null : shaping_penalty_par, 1); } + + if (what == par_all_category) { + par_state(p) = unset ? 0 : par_all_category; + } else if (unset) { + par_state(p) &= ~(what | par_state(p)); + } else { + par_state(p) |= what; + } + } +} + +halfword tex_find_par_par(halfword head) +{ + if (head) { + if (node_type(head) == temp_node) { + head = node_next(head); + } + if (head && node_type(head) == par_node) { + return head; + } + } + return null; +} + +halfword tex_reversed_node_list(halfword list) +{ + if (list) { + halfword prev = list; + halfword last = list; + list = node_next(list); + if (list) { + while (1) { + halfword next = node_next(list); + tex_couple_nodes(list, prev); + if (node_type(list) == dir_node) { + node_subtype(list) = node_subtype(list) == cancel_dir_subtype ? normal_dir_subtype : cancel_dir_subtype ; + } + if (next) { + prev = list; + list = next; + } else { + node_next(last) = null; + node_prev(list) = null; + return list; + } + } + } + } + return list; +} + +/* */ + +halfword tex_new_specification_node(halfword n, quarterword s, halfword options) +{ + halfword p = tex_new_node(specification_node, s); + tex_new_specification_list(p, n, options); + return p; +} + +void tex_dispose_specification_nodes(void) { + if (par_shape_par) { tex_flush_node(par_shape_par); par_shape_par = null; } + if (inter_line_penalties_par) { tex_flush_node(inter_line_penalties_par); inter_line_penalties_par = null; } + if (club_penalties_par) { tex_flush_node(club_penalties_par); club_penalties_par = null; } + if (widow_penalties_par) { tex_flush_node(widow_penalties_par); widow_penalties_par = null; } + if (display_widow_penalties_par) { tex_flush_node(display_widow_penalties_par); display_widow_penalties_par = null; } + if (math_forward_penalties_par) { tex_flush_node(math_forward_penalties_par); math_forward_penalties_par = null; } + if (math_backward_penalties_par) { tex_flush_node(math_backward_penalties_par); math_backward_penalties_par = null; } + if (orphan_penalties_par) { tex_flush_node(orphan_penalties_par); orphan_penalties_par = null; } +} + +void tex_null_specification_list(halfword a) +{ + specification_pointer(a) = NULL; + specification_count(a) = 0; +} + +static void *tex_aux_allocate_specification(int n, size_t *s) +{ + void *p = NULL; + *s = n * sizeof(memoryword); + lmt_node_memory_state.extra_data.allocated += (int) *s; + lmt_node_memory_state.extra_data.ptr = lmt_node_memory_state.extra_data.allocated; + if (lmt_node_memory_state.extra_data.ptr > lmt_node_memory_state.extra_data.top) { + lmt_node_memory_state.extra_data.top = lmt_node_memory_state.extra_data.ptr; + } + p = lmt_memory_malloc(*s); + if (! p) { + tex_overflow_error("nodes", (int) *s); + } + return p; +} + +static void tex_aux_deallocate_specification(void *p, int n) +{ + size_t s = n * sizeof(memoryword); + lmt_node_memory_state.extra_data.allocated -= (int) s; + lmt_node_memory_state.extra_data.ptr = lmt_node_memory_state.extra_data.allocated; + lmt_memory_free(p); +} + +void tex_new_specification_list(halfword a, halfword n, halfword o) +{ + size_t s = 0; + specification_pointer(a) = tex_aux_allocate_specification(n, &s); + specification_count(a) = specification_pointer(a) ? n : 0; + specification_options(a) = o; +} + +void tex_dispose_specification_list(halfword a) +{ + if (specification_pointer(a)) { + tex_aux_deallocate_specification(specification_pointer(a), specification_count(a)); + specification_pointer(a) = NULL; + specification_count(a) = 0; + specification_options(a) = 0; + } +} + +void tex_copy_specification_list(halfword a, halfword b) { + if (specification_pointer(b)) { + size_t s = 0; + specification_pointer(a) = tex_aux_allocate_specification(specification_count(b), &s); + if (specification_pointer(a) && specification_pointer(b)) { + specification_count(a) = specification_count(b); + specification_options(a) = specification_options(b); + memcpy(specification_pointer(a), specification_pointer(b), s); + } else { + specification_count(a) = 0; + specification_options(a) = 0; + } + } +} + +void tex_shift_specification_list(halfword a, int n, int rotate) +{ + if (specification_pointer(a)) { + halfword c = specification_count(a); + if (rotate) { + if (n > 0 && c > 0 && n < c && c != n) { + size_t s = 0; + memoryword *b = tex_aux_allocate_specification(c, &s); + memoryword *p = specification_pointer(a); + halfword m = c - n; + s = m * sizeof(memoryword); + memcpy(b, p + n, s); + s = n * sizeof(memoryword); + memcpy(b + m, p, s); + tex_aux_deallocate_specification(specification_pointer(a), c); + specification_pointer(a) = b; + } + } else { + halfword o = 0; + halfword m = 0; + memoryword *b = NULL; + if (n > 0 && c > 0 && n < c) { + size_t s = 0; + memoryword *p = specification_pointer(a); + o = specification_options(a); + m = c - n; + b = tex_aux_allocate_specification(m, &s); + memcpy(b, p + n, s); + } + if (c > 0) { + tex_aux_deallocate_specification(specification_pointer(a), c); + } + specification_pointer(a) = b; + specification_count(a) = m; + specification_options(a) = o; + } + } +} + +/* */ + +void tex_set_disc_field(halfword target, halfword location, halfword source) +{ + switch (location) { + case pre_break_code: target = disc_pre_break(target); break; + case post_break_code: target = disc_post_break(target); break; + case no_break_code: target = disc_no_break(target); break; + } + node_prev(source) = null; /* don't expose this one! */ + if (source) { + node_head(target) = source ; + node_tail(target) = tex_tail_of_node_list(source); + } else { + node_head(target) = null; + node_tail(target) = null; + } +} + +void tex_check_disc_field(halfword n) +{ + halfword p = disc_pre_break_head(n); + disc_pre_break_tail(n) = p ? tex_tail_of_node_list(p) : null; + p = disc_post_break_head(n); + disc_post_break_tail(n) = p ? tex_tail_of_node_list(p) : null; + p = disc_no_break_head(n); + disc_no_break_tail(n) = p ? tex_tail_of_node_list(p) : null; +} + +void tex_set_discpart(halfword d, halfword h, halfword t, halfword code) +{ + switch (node_subtype(d)) { + case automatic_discretionary_code: + case mathematics_discretionary_code: + code = glyph_discpart_always; + break; + } + halfword c = h; + while (c) { + if (node_type(c) == glyph_node) { + set_glyph_discpart(c, code); + } + if (c == t) { + break; + } else { + c = node_next(c); + } + } +} + +halfword tex_flatten_discretionaries(halfword head, int *count, int nest) +{ + halfword current = head; + while (current) { + halfword next = node_next(current); + switch (node_type(current)) { + case disc_node: + { + halfword d = current; + halfword h = disc_no_break_head(d); + halfword t = disc_no_break_tail(d); + if (h) { + tex_set_discpart(current, h, t, glyph_discpart_replace); + tex_try_couple_nodes(t, next); + if (current == head) { + head = h; + } else { + tex_try_couple_nodes(node_prev(current), h); + } + disc_no_break_head(d) = null ; + } else if (current == head) { + head = next; + } else { + tex_try_couple_nodes(node_prev(current), next); + } + tex_flush_node(d); + if (count) { + *count += 1; + } + break; + } + case hlist_node: + case vlist_node: + if (nest) { + halfword list = box_list(current); + if (list) { + box_list(current) = tex_flatten_discretionaries(list, count, nest); + } + break; + } + } + current = next; + } + return head; +} + +void tex_flatten_leaders(halfword box, int *count) +{ + halfword head = box ? box_list(box) : null; + if (head) { + halfword current = head; + while (current) { + halfword next = node_next(current); + if (node_type(current) == glue_node && node_subtype(current) == u_leaders) { + halfword b = glue_leader_ptr(current); + if (b && (node_type(b) == hlist_node || node_type(b) == vlist_node)) { + halfword p = null; + halfword a = glue_amount(current); + double w = (double) a; + switch (box_glue_sign(box)) { + case stretching_glue_sign: + if (glue_stretch_order(current) == box_glue_order(box)) { + w += glue_stretch(current) * (double) box_glue_set(box); + } + break; + case shrinking_glue_sign: + if (glue_shrink_order(current) == box_glue_order(box)) { + w -= glue_shrink(current) * (double) box_glue_set(box); + } + break; + } + if (node_type(b) == hlist_node) { + p = tex_hpack(box_list(b), scaledround(w), packing_exactly, box_dir(b), holding_none_option); + } else { + p = tex_vpack(box_list(b), scaledround(w), packing_exactly, 0, box_dir(b), holding_none_option); + } + box_list(b) = box_list(p); + box_width(b) = box_width(p); + box_height(b) = box_height(p); + box_depth(b) = box_depth(p); + box_glue_order(b) = box_glue_order(p); + box_glue_sign(b) = box_glue_sign(p); + box_glue_set(b) = box_glue_set(p); + set_box_package_state(b, package_u_leader_set); + box_list(p) = null; + tex_flush_node(p); + glue_leader_ptr(current) = null; + tex_flush_node(current); + tex_try_couple_nodes(b, next); + if (current == head) { + box_list(box) = b; + } else { + tex_try_couple_nodes(node_prev(current), b); + } + if (count) { + *count += 1; + } + } + } + current = next; + } + } +} + +/*tex + This could of course be done in a \LUA\ loop but this is likely to be applied always so we + provide a helper, also because we need to check the font. Adding this sort of violates the + principle that we should this in \LUA\ instead but this time I permits myself to cheat. +*/ + +void tex_soften_hyphens(halfword head, int *found, int *replaced) +{ + halfword current = head; + while (current) { + switch (node_type(current)) { + case glyph_node: + { + if (glyph_character(current) == 0x2D) { + /* + We actually need a callback for this? Or we can have a nested loop + helper in the nodelib. + */ + ++(*found); + switch (glyph_discpart(current)) { + case glyph_discpart_unset: + /*tex Never seen by any disc handler. */ + set_glyph_discpart(current, glyph_discpart_always); + case glyph_discpart_always: + /*tex A hard coded - in the input. */ + break; + default : + if (tex_char_exists(glyph_font(current), 0xAD)) { + ++(*replaced); + glyph_character(current) = 0xAD; + } + break; + } + } + break; + } + case hlist_node: + case vlist_node: + { + halfword list = box_list(current); + if (list) { + tex_soften_hyphens(list, found, replaced); + } + break; + } + } + current = node_next(current); + } +} + +halfword tex_harden_spaces(halfword head, halfword tolerance, int* count) +{ + /* todo: take the context code */ + (void) tolerance; + (void) count; + return head; +} + +halfword tex_get_special_node_list(special_node_list_types list, halfword *tail) +{ + halfword h = null; + halfword t = null; + switch (list) { + case page_insert_list_type: + h = node_next(page_insert_head); + if (h == page_insert_head) { + h = null; + } + break; + case contribute_list_type: + h = node_next(contribute_head); + break; + case page_list_type: + h = node_next(page_head); + t = lmt_page_builder_state.page_tail; + break; + case temp_list_type: + h = node_next(temp_head); + break; + case hold_list_type: + h = node_next(hold_head); + break; + case post_adjust_list_type: + h = node_next(post_adjust_head); + t = lmt_packaging_state.post_adjust_tail; + break; + case pre_adjust_list_type: + h = node_next(pre_adjust_head); + t = lmt_packaging_state.pre_adjust_tail; + break; + case post_migrate_list_type: + h = node_next(post_migrate_head); + t = lmt_packaging_state.post_migrate_tail; + break; + case pre_migrate_list_type: + h = node_next(pre_migrate_head); + t = lmt_packaging_state.pre_migrate_tail; + break; + case align_list_type: + h = node_next(align_head); + break; + case page_discards_list_type: + h = lmt_packaging_state.page_discards_head; + break; + case split_discards_list_type: + h = lmt_packaging_state.split_discards_head; + break; + } + node_prev(h) = null; + if (tail) { + *tail = t ? t : (h ? tex_tail_of_node_list(h) : null); + } + return h; +}; + +int tex_is_special_node_list(halfword n, int *istail) +{ + if (istail) { + *istail = 0; + } + if (! n) { + return -1; + } else if (n == node_next(page_insert_head)) { + return page_insert_list_type; + } else if (n == node_next(contribute_head)) { + return contribute_list_type; + } else if (n == node_next(page_head) || n == lmt_page_builder_state.page_tail) { + if (istail && n == lmt_page_builder_state.page_tail) { + *istail = 0; + } + return page_list_type; + } else if (n == node_next(temp_head)) { + return temp_list_type; + } else if (n == node_next(hold_head)) { + return hold_list_type; + } else if (n == node_next(post_adjust_head) || n == lmt_packaging_state.post_adjust_tail) { + if (istail && n == lmt_packaging_state.post_adjust_tail) { + *istail = 0; + } + return post_adjust_list_type; + } else if (n == node_next(pre_adjust_head) || n == lmt_packaging_state.pre_adjust_tail) { + if (istail && n == lmt_packaging_state.pre_adjust_tail) { + *istail = 0; + } + return pre_adjust_list_type; + } else if (n == node_next(post_migrate_head) || n == lmt_packaging_state.post_migrate_tail) { + if (istail && n == lmt_packaging_state.post_migrate_tail) { + *istail = 0; + } + return post_migrate_list_type; + } else if (n == node_next(pre_migrate_head) || n == lmt_packaging_state.pre_migrate_tail) { + if (istail && n == lmt_packaging_state.pre_migrate_tail) { + *istail = 0; + } + return pre_migrate_list_type; + } else if (n == node_next(align_head)) { + return align_list_type; + } else if (n == lmt_packaging_state.page_discards_head) { + return page_discards_list_type; + } else if (n == lmt_packaging_state.split_discards_head) { + return split_discards_list_type; + // } else if (n == lmt_page_builder_state.best_page_break) { + // return 10000; + } else { + return -1; + } +}; + +void tex_set_special_node_list(special_node_list_types list, halfword head) +{ + switch (list) { + case page_insert_list_type: + /*tex This is a circular list where page_insert_head stays. */ + if (head) { + node_next(page_insert_head) = head; + node_next(tex_tail_of_node_list(head)) = page_insert_head; + } else { + node_next(page_insert_head) = page_insert_head; + } + break; + case contribute_list_type: + node_next(contribute_head) = head; + contribute_tail = head ? tex_tail_of_node_list(head) : contribute_head; + break; + case page_list_type: + node_next(page_head) = head; + lmt_page_builder_state.page_tail = head ? tex_tail_of_node_list(head) : page_head; + break; + case temp_list_type: + node_next(temp_head) = head; + break; + case hold_list_type: + node_next(hold_head) = head; + break; + case post_adjust_list_type: + node_next(post_adjust_head) = head; + lmt_packaging_state.post_adjust_tail = head ? tex_tail_of_node_list(head) : post_adjust_head; + break; + case pre_adjust_list_type: + node_next(pre_adjust_head) = head; + lmt_packaging_state.pre_adjust_tail = head ? tex_tail_of_node_list(head) : pre_adjust_head; + break; + case post_migrate_list_type: + node_next(post_migrate_head) = head; + lmt_packaging_state.post_migrate_tail = head ? tex_tail_of_node_list(head) : post_migrate_head; + break; + case pre_migrate_list_type: + node_next(pre_migrate_head) = head; + lmt_packaging_state.pre_migrate_tail = head ? tex_tail_of_node_list(head) : pre_migrate_head; + break; + case align_list_type: + node_next(align_head) = head; + break; + case page_discards_list_type: + lmt_packaging_state.page_discards_head = head; + break; + case split_discards_list_type: + lmt_packaging_state.split_discards_head = head; + break; + } +}; + +scaled tex_effective_glue(halfword parent, halfword glue) +{ + if (parent && glue) { + switch (node_type(glue)) { + case glue_node: + case glue_spec_node: + switch (node_type(parent)) { + case hlist_node: + case vlist_node: + { + double w = (double) glue_amount(glue); + switch (box_glue_sign(parent)) { + case stretching_glue_sign: + if (glue_stretch_order(glue) == box_glue_order(parent)) { + w += glue_stretch(glue) * (double) box_glue_set(parent); + } + break; + case shrinking_glue_sign: + if (glue_shrink_order(glue) == box_glue_order(parent)) { + w -= glue_shrink(glue) * (double) box_glue_set(parent); + } + break; + } + return (scaled) lmt_roundedfloat(w); + } + default: + return glue_amount(glue); + } + break; + } + } + return 0; +} diff --git a/source/luametatex/source/tex/texnodes.h b/source/luametatex/source/tex/texnodes.h new file mode 100644 index 000000000..f0d20e1e9 --- /dev/null +++ b/source/luametatex/source/tex/texnodes.h @@ -0,0 +1,2728 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXNODES_H +# define LMT_TEXNODES_H + +/*tex + + We can probably ditch |volatile| so that the compiler can optimize access a bit better. We + only need to make sure that we create nodes before we use their pointers. So, beware: a + newnode has to go via an intermediate variable because the |varmem| array can have been be + reallocated. I need to (re)check all cases! In case of a copy we use a intermediate volatile + variable. + + Anyway, we now have only a few |quarterwords| in use, most noticeably the type and subtype. + Eventually I might go for a consistent + + type subtype + prev next + attribute data + etc + + model. Or maybe even just a flat list, no need for memoryword, but just all halfwords. However, + it will demand all kind of tiny adaptations and we don't gain much. We'd also loose some charm + of traditional \TEX. Also, we now have a double glue related field and that would then become + a float. So, not now. + + There are a few more node types than in standard \TEX, but less than we have in e.g.\ \PDFTEX\ + or stock \LUATEX. For instance margin nodes are now just kern nodes, some whatits are first + class nodes and we have only one generic whatsit left. We also have more subtypes which makes + a more detailed tracking of where nodes come from possible. Other nodes, like the |inserting| + and |split_up| nodes are ot both |inserting| but with a subtype because the register index is + no longer the subtype. + + Not all nodes can end up in a node list. Some are used for housekeeping (stack, expressions, + conditional nesting, etc.) or show up in the process of breaking paragraphs into lines. When + we talk of nodes with users in the perspective of \TEX\ we normally refer to the ones in + horizontal and vertical lists or math lists, not to those more obscure housekeeping nodes. It + just happens that they share the same memory model and management. + + A complication is that some nodes have pointers that themselves point to a (often smaller) + node but use the same accessors. This means that (1) their layout should be the same with + respect to the pointer, which happens with span nodes or (2) that there is some offset in play, + which happens with ins pointers and break nodes that are embedded in a disc node. + + Now that we no longer have variable nodes, we can consider a different allocation model, like + a chain of malloced nodes but on the other hand storing them might be more work. We also cannot + longer share the accessors so again more work is needed. But ... maybe attributes might end up + as allocated lists some day, but that also demands storage. The current memory management is + very efficient and we don't gain anything with redoing that, apart maybe from nodes becoming + structs. Even then we will have an array of pointers instead of what we have now, but without + the jumps by side in the indices. So, given the constraints of offsets and overlap it makes no + sense to waste time on this. + + Instead of |var_mem| we use |nodes| and related names. This better complements the additional + variables that we have for dynamic management. Some more names have been changed (also in order + to avoid side effect in syntax highlighting). Too common names also result in too many matches + when searching the source tree. + + Soo, eventually most fields now have the type of the node in their name, which makes it more + clear what they are. As mentioned, it makes the syntax highlighted source look better as some + generic names are used elsewhere too. Another reason is that we have more fields in nodes and + when browsing the source it helps to know that a |width| is actually the |glue_amount| which + when we go down is actually a height anyway. It also makes it possible at some point to make + some nodes smaller when we don't need these \quote {shared by name} fields. We also need this + transition in order to get better interfacing to the \LUA\ end, one reason being that we need + to distinguish between fields that overlap (as in lists, unset nodes and and alignment + records). + + Todo: all subtype related constants will become |_subtype| so that also means a couple more + _code ones for commands. It's all about consistency but that will happen stepwise. A typical + rainy day with some newly acquired music in background kind of activity: + + - discretionary + - adjust + - noad + - fence + - radical + - boundary + +*/ + +typedef enum node_types { + hlist_node, + vlist_node, + rule_node, + insert_node, + mark_node, + adjust_node, + boundary_node, + disc_node, + whatsit_node, + /*tex The last_preceding_break_node: */ + par_node, + dir_node, + /*tex The last_non_discardable_node: */ + math_node, + glue_node, + kern_node, + penalty_node, + style_node, + choice_node, + parameter_node, + simple_noad, + radical_noad, + fraction_noad, + accent_noad, + fence_noad, + math_char_node, + math_text_char_node, + sub_box_node, + sub_mlist_node, + delimiter_node, + glyph_node, + /*tex This was the last node with attributes, except unset nodes. */ + unset_node, + specification_node, + align_record_node, + attribute_node, + glue_spec_node, + temp_node, + split_node, + /*tex The next set of nodes is invisible from the \LUA\ (but nesting nodes can show up). */ + expression_node, + math_spec_node, + font_spec_node, + nesting_node, + span_node, + align_stack_node, + noad_state_node, + if_node, + unhyphenated_node, /*tex These are both active nodes. */ + hyphenated_node, /*tex These are both active nodes. */ + delta_node, + passive_node, +} node_types; + +# define max_chain_size 32 + +# define unknown_node_type -1 +# define unknown_node_subtype -1 + +/* Todo: [type] [subtype|size] [index] -> nodes : advantage is no holes in node id's */ + +typedef struct node_memory_state_info { + memoryword *nodes; + // memoryword *volatile nodes; + char *nodesizes; + halfword free_chain[max_chain_size]; + memory_data nodes_data; + memory_data extra_data; + int reserved; /*tex There are some predefined nodes. */ + int padding; + int node_properties_id; + int lua_properties_level; + halfword attribute_cache; + halfword max_used_attribute; + int node_properties_table_size; +} node_memory_state_info; + +extern node_memory_state_info lmt_node_memory_state; + +typedef enum field_types { + nil_field, + integer_field, + dimension_field, + glue_field, + number_field, + string_field, + boolean_field, + function_field, + node_field, + node_list_field, + token_field, + token_list_field, + attribute_field, +} field_types; + +extern halfword tex_get_node (int size); +extern void tex_free_node (halfword p, int size); +extern void tex_dump_node_mem (dumpstream f); +extern void tex_undump_node_mem (dumpstream f); +extern void tex_initialize_node_mem (void); +extern void tex_initialize_nodes (void); + +extern void lmt_nodelib_initialize (void); /* name ? */ + +/*tex + + Most fields are integers (halfwords) that get aliased to |vinfo| and |vlink| for traditional + reasons. The |vlink| name is actually representing a next pointer. Only the type and subtype + remain quarterwords, the rest are just halfwords which wastes space for directions, + orientation, glue orders and glue signs but so be it. + + A memory word has two 32 bit integers so 8 bytes. A glueratio is a double which is 8 bytes so + there we waste some space. There is actually no need now to pack (node) data in pairs so maybe + some day I'll change that. When we make glue ration a float again we can go flat (and with most + node fields now being fully qualified that is easier). + + The first memoryword contains the |type| and |subtype| that are both 16 bit integers (unsigned) + as well as the |vlink| (next) pointer. After that comes the word that keeps the |attr| and + |alink| (prev) fields. Instead of the link names we use more meaningful ones. The |next|, |prev| + and |attr| fields all are halfwords representing a node index. + + The |node_size| field is used in managing freed nodes (mostly as a check) and it overwrites the + |type| and |subtype| fields. Actually we could just use the type or subtype but as the size is + small but on the other hand using an int here makes sense. + + half0 | quart0 quart1 | vinfo | size | type subtype + half1 | | vlink + + The |tlink| and |rlink| fields are used in disc nodes as tail and replace pointers (again + halfwords). We no longer need |rlink| as it's equivalent to |alink| (the prev pointer). The + |tlink| fields is used for links to the tail of a list. These indirect macros are somewhat + complicating matters. Again these have been renamed. + + We used to have |alink(a)| being |vlink(a,1)| but that has (after a few years) been replaced by + |node_prev| because is cleaner. Keep in mind that in \LUATEX\ we use double linked node lists. So, + we now only have some \quote {hard coded} pointers to the memory array in this file, not in the + files that use the node fields. However, for the next two paragraphs to be true, I need to find + a solution for the insert_ptr first because that is an index. + + Now, a logical question is: should we stick to the link and info model for nodes? One reason + is that we share the model with tokens. A less relevant reason is that the glue is stored in 8 + bytes but that can be reverted to 4 bytes if needed. So, indeed at some point we might see a 32 + bit wide array show up here as we're now more or less prepared for that. It will bump the direct + node numbers but that should work out okay. So, in the end, after stepwise abstraction we now + have field definitions that use a base and offset e.g. |vlink(a,3)| instead of |vlink(a+3)|. + Also, we have many more fields and using meaningful names quickly started to make sense. + + Once all is stable I will play with |var_mem| being an array of pointers and |malloc| the + smaller memoryword arrays (per node). This might lead to (not always) smaller memory footprint: + we have one pointer per node (but only that array gets preallocated) but we need less memory in + total, unless we use many nodes. Anyway, we keep the indirect model (which might add overhead, + but that can be compensated by \CPU\ caches) because using a numeric node pointer is more + efficient and quite handy. If we would go completely struct the source would change so much that + we loose the charm of \TEX\ and documentation and there is no gain in it. Also, using halfword + indices (but then to pointers) for nodes has the huge advantage that it is fast in \LUA\ (always + a bottleneck) and these node indices can (and have to) be stored in tokens. One nice side effect + would be that we have node indices in a sequence (without the current jumps due to node size + offset, which in turn gives more room for nodes references in tokens). + + In spite of all extensions we hope the spirit of how \TEX\ does it is still very visible. + +*/ + +# define mvalue(a,b) lmt_node_memory_state.nodes[a+b].P +# define lvalue(a,b) lmt_node_memory_state.nodes[a+b].L +# define dvalue(a,b) lmt_node_memory_state.nodes[a+b].D + +# define vinfo(a,b) lmt_node_memory_state.nodes[a+b].half0 +# define vlink(a,b) lmt_node_memory_state.nodes[a+b].half1 + +# define vinfo0(a,b) lmt_node_memory_state.nodes[a+b].quart00 +# define vinfo1(a,b) lmt_node_memory_state.nodes[a+b].quart01 +# define vlink0(a,b) lmt_node_memory_state.nodes[a+b].quart10 +# define vlink1(a,b) lmt_node_memory_state.nodes[a+b].quart11 + +# define vinfo00(a,b) lmt_node_memory_state.nodes[a+b].single00 +# define vinfo01(a,b) lmt_node_memory_state.nodes[a+b].single01 +# define vinfo02(a,b) lmt_node_memory_state.nodes[a+b].single02 +# define vinfo03(a,b) lmt_node_memory_state.nodes[a+b].single03 +# define vlink00(a,b) lmt_node_memory_state.nodes[a+b].single10 +# define vlink01(a,b) lmt_node_memory_state.nodes[a+b].single11 +# define vlink02(a,b) lmt_node_memory_state.nodes[a+b].single12 +# define vlink03(a,b) lmt_node_memory_state.nodes[a+b].single13 + +/*tex + We have some shared field names. Some day the subtypes will get meaningful names dependent on + the node type, if only because some already have. We used to have + + \starttyping + # define type(a) vinfo0(a,0) + # define subtype(a) vinfo1(a,0) + # define node_size(a) vinfo(a,0) + \stoptyping + + but we dropped the size mechanism and made most field shortcuts verbose in order to be able to + use variable names with the same name combined with proper syntax highlighting etc. It also + gives less noise when we search in the whole source tree. More later. +*/ + +# define node_type(a) vinfo0(a,0) +# define node_subtype(a) vinfo1(a,0) + +# define node_next(a) vlink(a,0) +# define node_prev(a) vlink(a,1) +# define node_attr(a) vinfo(a,1) + +# define node_head(a) vlink(a,0) /*tex the head |hlink(a)| aka |vlink(a)| of a disc sublist */ +# define node_tail(a) vinfo(a,1) /*tex the tail |tlink(a)| aka |vinfo(a)|, overlaps with |node_attr()| */ + +/*tex + + The dimension fields shared their locations which made for sometimes more compact code but + in the end the number of placxes where it really saved code were limited. Also, compilers will + do their job and deal with common code. So, these are now replaced by more meaningful names: + + \starttyping + # define width(a) vlink(a,2) + # define depth(a) vlink(a,3) + # define height(a) vlink(a,4) + \stoptyping + + Inserts use a trick. The insert pointers directly point into a node at the place where the list + starts which is why |list_ptr| has to overlap with |node_next|! I have looked into changign this + but it doesn't pay off and it's best to stay close to the original. A side effect is that some + fields in insert nodes are sort of impossible (for now). + + \starttyping + # define box_list_ptr(a) vlink(a,5) // We need to use the same field as |node_next|. + # define insert_list(a) (a + 5) // This kind of makes a virtual node: start at list. + \stoptyping + + Beware of the fact that for instance alignments use some fields for other purposes, like: + |u_part(a)|, |v_part(a)|, |span_ptr(a)|, etc. and assume the rest of the fields to overlap + with list nodes. So, we cannot simply reshuffle positions! + + In the original \TEX\ source (and also in \LUATEX) there are a couple of offsets used. Most + noticeably is the |list_offset| but in 2.0618 the related trickery was replaced by using + |list_ptr| and using the fact that we have a doubel linked list. The four fields are in + successive memory words and that means that we can use |node_next| in a field pointed to + by |list_offset| (because actually we then have the list pointer!). This makes for simple + loops in original \TEX. The dimension offsets are used to set fields in boxed but we already + abstracted that to proper field names; these were for instance used in alignment nodes that + have mostly the same properties as a box node. + + \starttyping + # define width_offset 2 + # define depth_offset 3 + # define height_offset 4 + # define list_offset 5 + \stoptyping + + These abstractions mean that we now have nodes, fields and offsets all abstracted in such a way + that all definitions and trickery in in this file. Of course I could have messed up. + +*/ + +/*tex + + Syntex supports demands some extra fields in nodes that makes it possible to output location as + well as file/line information for viewer-editor synchronization. The ideas is quite okay but + unfortunately the implementation of the library is rather bound to the way e.g. \LATEX\ typesets + documents. Synctex has always been problematic when it comes to \CONTEXT. There is for instance + no control over filenames and discussions around some limitations (and possible features) in the + \PDFTEX\ and early \LUATEX\ times never resulted in fixing that (setting filenames, adding some + additional synchronization points, etc). All that was supposed to happen deep down in the library + and was not considered to be dealt with by a macro package. For instance multiple loading of the + same file (metapost runs or smaple files) was a problem as was the need to block access to files + in tds (like styles). We also needed binding to for instance elements in an \XML\ file where line + numbers are sort of special and out of sync with inclusion. I guess we were ahead of the pack + because after nearly two decades of \LUATEX\ there is some discussion about this. + + Anyway, for the reasons mentioned \LUATEX\ offers some setters that overload the engine ones and + that permits \CONTEXT\ to implement its own variant. However, in \LUAMETATEX\ setting tags and + lines from \LUA\ is now the only way to support \SYNCTEX\ because the library is absent: we just + have some extra fields in some nodes. In \LUAMETATEX\ only glyph and list nodes have these fields + as it makes no sense to have them elsewhere: macro packages can add glue and kerns and rules and + \unknown\ all over the place and adding file state info there only makes things confusing and + working less well. This is what the mode parameter can handle in \LUATEX\ and in \LUAMETATEX\ it + only supports the modes 1 and 3. + + As a side note: the fact that a viewer needs to embed the library is also a limitation. Calling + out to an external program that analyzes the file and gives back the filename and line is more + flexible and robust. Because we have such an analyzer in \MKIV\ it was no big deal to add a few + lines so that the \TEX shop environment could use that script/method (bidirectional); hopefully + other viewers and editors will follow. + + So, compared to \LUATEX\ less nodes have the extra fields (which saves memory) and therefore + less has to be set. Because there is no library at all, writing a synctex file is up to some + additional \LUA\ code, but that was already the case in \MKIV\ anyway. We might at some point + change the field names to \quote {file} and \quote {line} and remove interface options that + have no use any more. We also moved to a more generic naming of (input related) fields. + +*/ + +/* + + Temporary nodes are really special head node pointers that only need links. They ensure that + there is at least one node in a list. + +*/ + +# define temp_node_size 2 + +/*tex + + In \LUATEX\ we have attribute list nodes and attribute nodes that are (anyway) of the same + size. In the end I decided to combine them into one node with a subtype. That also helps + diagnose issues. It is one of the few nodes now that has fields depending on the subtype + but these nodes are not really user ones anyway. + +*/ + +# define attribute_node_size 2 +# define attribute_unset(a) vinfo(a,1) +# define attribute_index(a) vinfo(a,1) /*tex actually we need half of this */ +# define attribute_count(a) vlink(a,1) /*tex the reference count */ +# define attribute_value(a) vlink(a,1) + +typedef enum attribute_subtypes { + attribute_list_subtype, + attribute_value_subtype, +} attribute_subtypes; + +# define last_attribute_subtype attribute_value_subtype + +/*tex + Penalties have only one primitive so we don't have |_code| here, also because it would conflict + with arguments. +*/ + +# define penalty_node_size 3 +# define penalty_amount(a) vlink(a,2) + +typedef enum penalty_subtypes { + user_penalty_subtype, + linebreak_penalty_subtype, /*tex includes widow, club, broken etc. */ + line_penalty_subtype, + word_penalty_subtype, + orphan_penalty_subtype, + final_penalty_subtype, + math_pre_penalty_subtype, + math_post_penalty_subtype, + before_display_penalty_subtype, + after_display_penalty_subtype, + equation_number_penalty_subtype, +} penalty_subtypes; + +# define last_penalty_subtype equation_number_penalty_subtype + +/*tex + We have plenty of glue variables and in the node lists most are also flagged. There is no + one|-|to|-|one correspondence between the codes (in tokens) and subtypes (in nodes) as listed + below, but they come close. The special math related glues and inserts now have nicer numbers. +*/ + +typedef enum glue_subtypes { + user_skip_glue, + line_skip_glue, + baseline_skip_glue, + par_skip_glue, + above_display_skip_glue, + below_display_skip_glue, + above_display_short_skip_glue, + below_display_short_skip_glue, + left_skip_glue, + right_skip_glue, + top_skip_glue, + split_top_skip_glue, + tab_skip_glue, + space_skip_glue, + xspace_skip_glue, + zero_space_skip_glue, + par_fill_right_skip_glue, + par_fill_left_skip_glue, + par_init_right_skip_glue, + par_init_left_skip_glue, + indent_skip_glue, + left_hang_skip_glue, + right_hang_skip_glue, + correction_skip_glue, + inter_math_skip_glue, + ignored_glue, /*tex |subtype| for cases where we ignore zero glue (alignments) */ + page_glue, /*tex |subtype| used in the page builder */ + /*tex math */ + math_skip_glue, + thin_mu_skip_glue, + med_mu_skip_glue, + thick_mu_skip_glue, + /*tex more math */ + conditional_math_glue, /*tex special |subtype| to suppress glue in the next node */ /* no need for jump */ + rulebased_math_glue, + mu_glue, /*tex |subtype| for math glue */ + /*tex leaders (glue with list) */ + a_leaders, /*tex |subtype| for aligned leaders */ + c_leaders, /*tex |subtype| for centered leaders */ + x_leaders, /*tex |subtype| for expanded leaders */ + g_leaders, /*tex |subtype| for global (page) leaders */ + u_leaders, +} glue_subtypes; + +# define last_glue_subtype u_leaders + +typedef enum skip_glue_codes_alias { + par_fill_skip_glue = par_fill_right_skip_glue, +} skip_glue_codes_alias; + +# define is_leader(a) (node_subtype(a) >= a_leaders) + +# define glue_node_size 7 +# define glue_spec_size 5 +# define glue_data(a) vinfo(a,2) /* ignored in spec */ +# define glue_amount(a) vlink(a,2) +# define glue_shrink(a) vinfo(a,3) +# define glue_stretch(a) vlink(a,3) +# define glue_stretch_order(a) vinfo(a,4) +# define glue_shrink_order(a) vlink(a,4) +# define glue_font(a) vinfo(a,5) /* not in spec */ /* when inter_math_skip_glue: parameter */ +# define glue_leader_ptr(a) vlink(a,5) /* not in spec */ +# define glue_options(a) vinfo(a,6) /* not in spec */ /* for now only internal */ +# define glue_unused(a) vlink(a,6) /* not in spec */ + +inline static void tex_add_glue_option (halfword a, halfword r) { glue_options(a) |= r; } +inline static void tex_remove_glue_option (halfword a, halfword r) { glue_options(a) &= ~(r | glue_options(a)); } +inline static int tex_has_glue_option (halfword a, halfword r) { return (glue_options(a) & r) == r; } + +typedef enum glue_option_codes { + glue_option_normal = 0x0000, + // glue_force_auto_break = 0x0001, + // glue_originates_in_math = 0x0002, + glue_option_no_auto_break = 0x0001, +} glue_option_codes; + +typedef enum math_subtypes { + begin_inline_math, + end_inline_math +} math_subtypes; + +# define last_math_subtype end_inline_math + +/*tex + Math nodes (currently) partially overlap with glue because they also have a glue property. +*/ + +# define math_node_size 6 +# define math_surround(a) vinfo(a,2) +# define math_amount(a) vlink(a,2) +# define math_shrink(a) vinfo(a,3) +# define math_stretch(a) vlink(a,3) +# define math_stretch_order(a) vinfo(a,4) +# define math_shrink_order(a) vlink(a,4) +# define math_penalty(a) vinfo(a,5) +# define math_options(a) vlink(a,5) + +inline static void tex_add_math_option (halfword a, halfword r) { math_options(a) |= r; } +inline static void tex_remove_math_option (halfword a, halfword r) { math_options(a) &= ~(r | math_options(a)); } +inline static int tex_has_math_option (halfword a, halfword r) { return (math_options(a) & r) == r; } + +/*tex Here are some (inline) helpers. We need specific ones for math glue. */ + +inline static int tex_glue_is_zero(halfword g) +{ + return (! g) || ((glue_amount(g) == 0) && (glue_stretch(g) == 0) && (glue_shrink(g) == 0)); +} + +inline static int tex_math_glue_is_zero(halfword g) +{ + return (! g) || ((math_amount(g) == 0) && (math_stretch(g) == 0) && (math_shrink(g) == 0)); +} + +inline static int tex_same_glue(halfword a, halfword b) +{ + return + (a == b) /* same glue specs or both zero */ + || (a && b && glue_amount(a) == glue_amount(b) + && glue_stretch(a) == glue_stretch(b) + && glue_shrink(a) == glue_shrink(b) + && glue_stretch_order(a) == glue_stretch_order(b) + && glue_shrink_order(a) == glue_shrink_order(b) + ) + ; +} + +inline static void tex_reset_glue_to_zero(halfword target) +{ + if (target) { + glue_amount(target) = 0; + glue_stretch(target) = 0; + glue_shrink(target) = 0; + glue_stretch_order(target) = 0; + glue_shrink_order(target) = 0; + } +} + +inline static void tex_reset_math_glue_to_zero(halfword target) +{ + if (target) { + math_amount(target) = 0; + math_stretch(target) = 0; + math_shrink(target) = 0; + math_stretch_order(target) = 0; + math_shrink_order(target) = 0; + } +} + +inline static void tex_copy_glue_values(halfword target, halfword source) +{ + if (source) { + glue_amount(target) = glue_amount(source); + glue_stretch(target) = glue_stretch(source); + glue_shrink(target) = glue_shrink(source); + glue_stretch_order(target) = glue_stretch_order(source); + glue_shrink_order(target) = glue_shrink_order(source); + } else { + glue_amount(target) = 0; + glue_stretch(target) = 0; + glue_shrink(target) = 0; + glue_stretch_order(target) = 0; + glue_shrink_order(target) = 0; + } +} + +inline static int tex_is_par_init_glue(halfword n) +{ + switch (node_subtype(n)) { + case indent_skip_glue: + case par_init_left_skip_glue: + case par_init_right_skip_glue: + return 1; + default: + return 0; + } +} + +/*tex + Kern nodes are relatively simple. Instead of |width| we use |kern_amount| which makes more + sense: we can go left, right, up or down. Margin kerns have been dropped and are now just a + special subtype of regular kerns. +*/ + +typedef enum kern_subtypes { + font_kern_subtype, + explicit_kern_subtype, /*tex |subtype| of kern nodes from |\kern| and |\/| */ + accent_kern_subtype, /*tex |subtype| of kern nodes from accents */ + italic_kern_subtype, + left_margin_kern_subtype, + right_margin_kern_subtype, + explicit_math_kern_subtype, + math_shape_kern_subtype, + horizontal_math_kern_subtype, + vertical_math_kern_subtype, +} kern_subtypes; + +# define last_kern_subtype vertical_math_kern_subtype + +# define kern_node_size 3 +# define kern_amount(a) vlink(a,2) /*tex aka |width = vlink(a,2)| */ +# define kern_expansion(a) vinfo(a,2) /*tex expansion factor (hz) */ + +inline static int tex_is_margin_kern(halfword n) +{ + return (n && node_type(n) == kern_node && (node_subtype(n) == left_margin_kern_subtype || node_subtype(n) == right_margin_kern_subtype)); +} + +/*tex + + Disc nodes are complicated: they have three embedded nesting nodes to which the |pre_break|, + |post_break| and |no_break| fields point. In there we find a head pointer (|vlink| aka |hlink|) + and tail pointer (|tlink|). The |alink| pointer is used in the base mode font machinery and is + not really a prev pointer. We have to make sure it gets nilled when we communicate with \LUA. + + The no-, pre-, and postbreak fields point to nesting nodes that are part of the disc node (three + times two memorywords). Sometimes these nodes are actually used, for instance when a temp node + is expected at the head of a list. The layout is: + + \starttyping + [ type+subtype + next ] + [ attr + prev ] + [ penalty + nobreak ] + [ prebreak + postbreak ] + [ type+subtype next/hlink ] (nesting node prebreak) + [ tlink prev ] + [ type+subtype next/hlink ] (nesting node postbreak) + [ tlink prev ] + [ type+subtype next/hlink ] (nesting node nobreak) + [ tlink prev ] + \stoptyping + + Another reason why we need the indirect apoproach is that we can set the fields to |null| which + is better than point to a nest node with no following up. + +*/ + +/*tex + Among the dropped nodes (\LUATEX\ has them) are movements nodes (used in the \DVI\ backend) + and variable nodes (replaced by specification nodes). + + Nesting nodes are really simple and just use the common type, subtype and next fields so they + have no dedicated fields. They can be part of another node type (like disc nodes). +*/ + +# define nesting_node_size 2 + +typedef enum nesting_subtypes { + pre_break_code, + post_break_code, + no_break_code, + insert_head_code, + unset_nesting_code, +} nesting_subtypes; + +# define last_nesting_subtype unset_nesting_code + +/*tex Here the codes in commands and subtypes are in sync. */ + +typedef enum discretionary_subtypes { + normal_discretionary_code, + explicit_discretionary_code, + automatic_discretionary_code, + mathematics_discretionary_code, + syllable_discretionary_code, +} discretionary_subtypes; + +# define last_discretionary_subtype syllable_discretionary_code +# define last_discretionary_code automatic_discretionary_code + +typedef enum disc_options { + disc_option_normal_word = 0x0, + disc_option_pre_word = 0x1, + disc_option_post_word = 0x2, +} disc_options; + +# define disc_node_size 13 +# define disc_no_break(a) vlink(a,2) /* beware: vinfo is used for type/subtype */ +# define disc_pre_break(a) vlink(a,3) /* beware: vinfo is used for type/subtype */ +# define disc_post_break(a) vlink(a,4) /* beware: vinfo is used for type/subtype */ +/* disc_no_break_node 5 6 */ /* this is a nesting node of size 2 */ +/* disc_pre_break_node 7 8 */ /* this is a nesting node of size 2 */ +/* disc_post_break_node 9 10 */ /* this is a nesting node of size 2 */ +# define disc_penalty(a) vinfo(a,11) +# define disc_options(a) vlink(a,11) +# define disc_class(a) vinfo(a,12) +# define disc_unused(a) vlink(a,12) + +# define set_disc_penalty(a,b) disc_penalty(a) = b +# define set_disc_class(a,b) disc_class(a) = b +# define set_disc_options(a,b) disc_options(a) = b +# define set_disc_option(a,b) disc_options(a) |= b + +# define has_disc_option(a,b) ((disc_options(a) & b) == b) + +# define unset_disc_class -1 + +/*tex + These are pseudo nodes inside a node. We used to reference them by |*_break_head| but now call + just call them nodes so that we can use head and tail instead of hlink and tlink. +*/ + +# define disc_pre_break_node(a) (a+5) +# define disc_post_break_node(a) (a+7) +# define disc_no_break_node(a) (a+9) + +# define disc_pre_break_head(a) node_head(disc_pre_break_node(a)) +# define disc_post_break_head(a) node_head(disc_post_break_node(a)) +# define disc_no_break_head(a) node_head(disc_no_break_node(a)) + +# define disc_pre_break_tail(a) node_tail(disc_pre_break_node(a)) +# define disc_post_break_tail(a) node_tail(disc_post_break_node(a)) +# define disc_no_break_tail(a) node_tail(disc_no_break_node(a)) + +extern void tex_set_disc_field (halfword target, halfword location, halfword source); +extern void tex_check_disc_field (halfword target); +extern void tex_set_discpart (halfword d, halfword h, halfword t, halfword code); +extern halfword tex_flatten_discretionaries (halfword head, int *count, int nest); +extern void tex_flatten_leaders (halfword box, int *count); +extern void tex_soften_hyphens (halfword head, int *found, int *replaced); +extern halfword tex_harden_spaces (halfword head, halfword tolerance, int *count); + +/*tex + Lists need a rather large node, also because the have quite some extra possibilities, like the + orientation features. We can put the dir with the orientation but it becomes messy in casting + that way. Also, memory is not really a constraint and for a cpu cache we're better off this + way. + + In the original setup the unset and align_record nodes have overlapping fields. This has the + side effect that when we access the alternates from \LUA\ that they can have weird values + unless we reset them. Even then, it can be that we actually want to use those other fields + somehow. For that reason it's better to waste a few more slots and play safe. We can now + actually explore table cells with offsets if we want. + + Beware: in alignments + + \startitemize[packed] + \startitem align record nodes become unset nodes \stopitem + \startitem unset nodes become hlist or vlist nodes \stopitem + \stopitemize +*/ + +typedef enum list_subtypes { + unknown_list, + line_list, /*tex paragraph lines */ + hbox_list, /*tex |\hbox| */ + indent_list, /*tex indentation box */ + container_list, /*tex container box */ + align_row_list, /*tex row from a |\halign| or |\valign| */ + align_cell_list, /*tex cell from a |\halign| or |\valign| */ + equation_list, /*tex display equation */ + equation_number_list, /*tex display equation number */ + math_list_list, + math_char_list, + math_pack_list, + math_h_extensible_list, + math_v_extensible_list, + math_h_delimiter_list, + math_v_delimiter_list, + math_over_delimiter_list, + math_under_delimiter_list, + math_numerator_list, + math_denominator_list, + math_modifier_list, + math_fraction_list, + math_nucleus_list, + math_sup_list, + math_sub_list, + math_pre_post_list, + math_degree_list, + math_scripts_list, + math_over_list, + math_under_list, + math_accent_list, + math_radical_list, + math_fence_list, + math_rule_list, + math_ghost_list, + insert_result_list, + local_list, + local_left_list, + local_right_list, + local_middle_list, +} list_subtypes ; + +# define last_list_subtype local_middle_list +# define noad_class_list_base 0x0100 + +typedef enum list_anchors { + left_origin_anchor = 0x001, + left_height_anchor = 0x002, + left_depth_anchor = 0x003, + right_origin_anchor = 0x004, + right_height_anchor = 0x005, + right_depth_anchor = 0x006, + center_origin_anchor = 0x007, + center_height_anchor = 0x008, + center_depth_anchor = 0x009, + halfway_total_anchor = 0x00A, + halfway_height_anchor = 0x00B, + halfway_depth_anchor = 0x00C, + halfway_left_anchor = 0x00D, + halfway_right_anchor = 0x00E, +} list_anchors; + +typedef enum list_signs { + negate_x_anchor = 0x100, + negate_y_anchor = 0x200, +} list_signs; + +typedef enum list_geometries { + no_geometry = 0x0, + offset_geometry = 0x1, + orientation_geometry = 0x2, + anchor_geometry = 0x4, +} list_geometries; + +# define box_node_size 15 +# define box_width(a) vlink(a,2) +# define box_w_offset(a) vinfo(a,2) +# define box_depth(a) vlink(a,3) +# define box_d_offset(a) vinfo(a,3) +# define box_height(a) vlink(a,4) +# define box_h_offset(a) vinfo(a,4) +# define box_list(a) vlink(a,5) /* 5 = list_offset */ +# define box_shift_amount(a) vinfo(a,5) +# define box_glue_order(a) vlink(a,6) +# define box_glue_sign(a) vinfo(a,6) +# define box_glue_set(a) dvalue(a,7) /* So we reserve a whole memory word! */ +# define box_dir(a) vlink00(a,8) +# define box_package_state(a) vlink01(a,8) +# define box_axis(a) vlink02(a,8) +# define box_geometry(a) vlink03(a,8) +# define box_orientation(a) vinfo(a,8) /* also used for size in alignments */ +# define box_x_offset(a) vlink(a,9) +# define box_y_offset(a) vinfo(a,9) +# define box_pre_migrated(a) vlink(a,10) +# define box_post_migrated(a) vinfo(a,10) +# define box_pre_adjusted(a) vlink(a,11) +# define box_post_adjusted(a) vinfo(a,11) +# define box_source_anchor(a) vlink(a,12) +# define box_target_anchor(a) vinfo(a,12) +# define box_anchor(a) vlink(a,13) +# define box_index(a) vinfo(a,13) +# define box_input_file(a) vlink(a,14) /* aka box_synctex_tag */ +# define box_input_line(a) vinfo(a,14) /* aka box_synctex_line */ + +# define box_total(a) (box_height(a) + box_depth(a)) + +inline static void tex_set_box_geometry (halfword b, halfword g) { box_geometry(b) |= (singleword) (g); } +/* static void tex_unset_box_geometry (halfword b, halfword g) { box_geometry(b) &= (singleword) ~((singleword) (g) | box_geometry(b)); } */ +inline static void tex_unset_box_geometry (halfword b, halfword g) { box_geometry(b) &= (singleword) (~g); } +inline static int tex_has_geometry (halfword g, halfword f) { return ((singleword) (g) & (singleword) (f)) == (singleword) (f); } +inline static int tex_has_box_geometry (halfword b, halfword g) { return (box_geometry(b) & (singleword) (g)) == (singleword) (g); } + +typedef enum package_states { + unknown_package_state = 0x00, + hbox_package_state = 0x01, + vbox_package_state = 0x02, + vtop_package_state = 0x03, + /* maybe vcenter */ +} package_states; + +typedef enum package_dimension_states { + package_dimension_not_set = 0x00, + package_dimension_size_set = 0x04, +} package_dimension_states; + +typedef enum package_leader_states { + package_u_leader_not_set = 0x00, + package_u_leader_set = 0x08, + package_u_leader_delayed = 0x10, +} package_leader_states; + +# define set_box_package_state(p,s) box_package_state(p) |= s +# define has_box_package_state(p,s) ((box_package_state(p) & s) == s) +# define is_box_package_state(p,s) ((p & s) == s) + +typedef enum list_axis { /* or maybe math states */ + no_math_axis = 0x01, +} list_axis; + +# define has_box_axis(p,s) ((box_axis(p) & s) == s) +# define set_box_axis(p,s) box_axis(p) |= (s & 0xFF) + +/*tex + These |unset| nodes have the same layout as list nodes and at some point become an |hlist| or + |vlist| node. +*/ + +# define unset_node_size box_node_size +# define box_glue_stretch(a) box_w_offset(a) +# define box_glue_shrink(a) box_h_offset(a) +# define box_span_count(a) box_d_offset(a) +# define box_size(a) box_orientation(a) + +/*tex + The |align record| nodes have the same layout as list nodes and at some point become an |unset| + node. +*/ + +# define align_record_size box_node_size +# define align_record_span_ptr(a) box_w_offset(a) /*tex A column spanning list */ +# define align_record_cmd(a) box_h_offset(a) /*tex Info to remember during template. */ +# define align_record_chr(a) box_d_offset(a) /*tex Info to remember during template. */ +# define align_record_pre_part(a) box_x_offset(a) /*tex The pointer to |u_j| token list. */ +# define align_record_post_part(a) box_y_offset(a) /*tex The pointer to |v_j| token list. */ +# define align_record_dimension(a) box_orientation(a) /*tex Optionally enforced width. */ + +/*tex + Span nodes are tricky in the sense that their |span_link| actually has to sit in the same slot + as |align_record_span_ptr| because we need the initial location to be the same. This is why we + renamed this field to |span_ptr|. Moving it to another spot than in \LUATEX\ also opens the + possibility for attributes to cells. +*/ + +# define span_node_size 3 +# define span_span(a) vinfo(a,1) +# define span_unused(a) vlink(a,1) +# define span_width(a) vlink(a,2) /* overlaps with |box_width(a)|. */ +# define span_ptr(a) vinfo(a,2) /* overlaps with |box_w_offset(a)| and align_record_span_ptr(a). */ + +/*tex + Here the subtypes and command codes partly overlay. We actually hav eonly avery few left because + it's mostly a backend feature now. +*/ + +typedef enum rule_subtypes { + normal_rule_subtype, + empty_rule_subtype, + strut_rule_subtype, + outline_rule_subtype, + user_rule_subtype, + math_over_rule_subtype, + math_under_rule_subtype, + math_fraction_rule_subtype, + math_radical_rule_subtype, + box_rule_subtype, + image_rule_subtype, +} rule_subtypes; + +typedef enum rule_codes { + normal_rule_code, + empty_rule_code, + strut_rule_code, +} rule_codes; + +# define last_rule_subtype image_rule_subtype +# define first_rule_code normal_rule_code +# define last_rule_code strut_rule_code + +# define rule_node_size 7 +# define rule_width(a) vlink(a,2) +# define rule_x_offset(a) vinfo(a,2) +# define rule_depth(a) vlink(a,3) +# define rule_y_offset(a) vinfo(a,3) +# define rule_height(a) vlink(a,4) +# define rule_data(a) vinfo(a,4) +# define rule_left(a) vinfo(a,5) +# define rule_right(a) vlink(a,5) +# define rule_font(a) vinfo(a,6) +# define rule_character(a) vlink(a,6) + +# define rule_total(a) (rule_height(a) + rule_depth(a)) + +/*tex + + Originally glyph nodes had a |lig_ptr| but storing components makes not that much sense so we + dropped that. The free slot is now used for a state field. We already had a data field that + took another free slot and that behaves like an attribute. The glyph data field can be set at + the \TEX\ end, the state field is only accessible in \LUA. At the same time we reshuffled the + fields a bit so that the most accessed fields are close together. + + The \LUATEX\ engine dropped the language node and moved that feature to the glyph nodes. In + addition to the language more properties could be set but they were all packed into one + halfword. In \LUAMETATEX\ we waste a few more bytes and keep the language separate but we + still pack a few properties. + + In \TEX\ we have character nodes and glyph nodes, but here we only have one type. The subtype + can be used to indicate if we have ligatures but in \LUATEX\ for various reasons we don't follow + the integrated approach that \TEX\ has: we have callbacks for hyphenation, ligature building, + kerning etc.\ which demands separation, but more important is that we want to use \LUA\ to deal + with modern fonts. The components field that is still present in \LUATEX\ is gone because it + serves no purpose. We don't need to reassemble and when dealing with \OPENTYPE\ fonts we loose + information in successive steps anyway. + + This also makes that the subtype is now only used to flag if glyphs have been processed. The + macro package can decide what additional properties get stored in this field. + + We used to have this: + + \starttyping + inline static void protect_glyph (halfword a) { quarterword s = node_subtype(a) ; if (s <= 256) { node_subtype(a) = s == 1 ? 256 : 256 + s; } } + inline static void unprotect_glyph (halfword a) { quarterword s = node_subtype(a) ; if (s > 256) { node_subtype(a) = s - 256; } } + inline static int is_protected_glyph (halfword a) { return node_subtype(a) >= 256; } + \stoptyping + + These were also dropped: + + \starttyping + # define is_character(p) (((node_subtype(p)) & glyph_character) == glyph_character) + # define is_ligature(p) (((node_subtype(p)) & glyph_ligature ) == glyph_ligature ) + # define is_simple_character(p) (is_character(p) && ! is_ligature(p)) + # define set_is_glyph(p) node_subtype(p) = (quarterword) (node_subtype(p) & ~glyph_character) + \stoptyping + +*/ + +/*tex + + Putting |width|, |height| and |depth| in a glyph has some advantages, for instance when we + fetch them in the builder, packer, \LUA\ interface, but it also has a disadvantage: we need to + have more complex copying of glyph nodes. For instance, when we copy glyphs in the open type + handler (e.g. for multiples) we also copy the fields. But then when we set a character, we also + would have to set the dimensions. Okay, some helper could do that (or a flag in setchar). It's + anyway not something to do in a hurry. An |x_extra| field is something different: combined with + setting |x_offset| that could replace font kerns: |x_advance = width + x_offset + x_extra|. + +*/ + +//define glyph_node_size 12 +# define glyph_node_size 13 +# define glyph_character(a) vinfo(a,2) +# define glyph_font(a) vlink(a,2) +# define glyph_data(a) vinfo(a,3) /*tex We had that unused, so now it's like an attribute. */ +# define glyph_state(a) vlink(a,3) /*tex A user field (can be handy in \LUA). */ +# define glyph_language(a) vinfo(a,4) +# define glyph_script(a) vlink(a,4) +# define glyph_options(a) vinfo(a,5) +# define glyph_hyphenate(a) vlink(a,5) +# define glyph_protected(a) vinfo00(a,6) +# define glyph_lhmin(a) vinfo01(a,6) +# define glyph_rhmin(a) vinfo02(a,6) +# define glyph_discpart(a) vinfo03(a,6) +# define glyph_expansion(a) vlink(a,6) +# define glyph_x_scale(a) vinfo(a,7) +# define glyph_y_scale(a) vlink(a,7) +# define glyph_scale(a) vinfo(a,8) +# define glyph_raise(a) vlink(a,8) +# define glyph_left(a) vinfo(a,9) +# define glyph_right(a) vlink(a,9) +# define glyph_x_offset(a) vinfo(a,10) +# define glyph_y_offset(a) vlink(a,10) +//define glyph_input_file(a) vinfo(a,11) /* aka glyph_synctex_tag */ +//define glyph_input_line(a) vlink(a,11) /* aka glyph_synctex_line */ +# define glyph_properties(a) vinfo0(a,11) +# define glyph_group(a) vinfo1(a,11) +# define glyph_index(a) vlink(a,11) +# define glyph_input_file(a) vinfo(a,12) +# define glyph_input_line(a) vlink(a,12) + +# define get_glyph_data(a) ((halfword) glyph_data(a)) +# define get_glyph_state(a) ((halfword) glyph_state(a)) +# define get_glyph_language(a) ((halfword) glyph_language(a)) +# define get_glyph_script(a) ((halfword) glyph_script(a)) +# define get_glyph_x_scale(a) ((halfword) glyph_x_scale(a)) +# define get_glyph_y_scale(a) ((halfword) glyph_y_scale(a)) +# define get_glyph_scale(a) ((halfword) glyph_scale(a)) +# define get_glyph_raise(a) ((halfword) glyph_raise(a)) +# define get_glyph_lhmin(a) ((halfword) glyph_lhmin(a)) +# define get_glyph_rhmin(a) ((halfword) glyph_rhmin(a)) +# define get_glyph_left(a) ((halfword) glyph_left(a)) +# define get_glyph_right(a) ((halfword) glyph_right(a)) +# define get_glyph_hyphenate(a) ((halfword) glyph_hyphenate(a)) +# define get_glyph_options(a) ((halfword) glyph_options(a)) +# define get_glyph_dohyph(a) (hyphenation_permitted(glyph_hyphenate(a), syllable_hyphenation_mode ) || hyphenation_permitted(glyph_hyphenate(a), force_handler_hyphenation_mode)) +# define get_glyph_uchyph(a) (hyphenation_permitted(glyph_hyphenate(a), uppercase_hyphenation_mode) || hyphenation_permitted(glyph_hyphenate(a), force_handler_hyphenation_mode)) + +# define set_glyph_data(a,b) glyph_data(a) = b +# define set_glyph_state(a,b) glyph_state(a) = b +# define set_glyph_language(a,b) glyph_language(a) = b +# define set_glyph_script(a,b) glyph_script(a) = b +# define set_glyph_x_scale(a,b) glyph_x_scale(a) = b +# define set_glyph_y_scale(a,b) glyph_y_scale(a) = b +# define set_glyph_x_offset(a,b) glyph_x_offset(a) = b +# define set_glyph_y_offset(a,b) glyph_y_offset(a) = b +# define set_glyph_scale(a,b) glyph_scale(a) = b +# define set_glyph_raise(a,b) glyph_raise(a) = b +# define set_glyph_left(a,b) glyph_left(a) = b +# define set_glyph_right(a,b) glyph_right(a) = b +# define set_glyph_lhmin(a,b) glyph_lhmin(a) = (singleword) b +# define set_glyph_rhmin(a,b) glyph_rhmin(a) = (singleword) b +# define set_glyph_hyphenate(a,b) glyph_hyphenate(a) = ((halfword) b) +# define set_glyph_options(a,b) glyph_options(a) = ((halfword) b) +/* set_glyph_dohyph(a,b) glyph_hyphenate(a) = ((halfword) flip_hyphenation_mode(glyph_hyphenate(a),syllable_hyphenation_mode)) */ +# define set_glyph_uchyph(a,b) glyph_hyphenate(a) = ((halfword) flip_hyphenation_mode(glyph_hyphenate(a),uppercase_hyphenation_mode)) +# define set_glyph_discpart(a,b) glyph_discpart(a) = (singleword) (b) +# define get_glyph_discpart(a) ((halfword) glyph_discpart(a)) + +typedef enum glyph_subtypes { + /* initial value: */ + glyph_unset_subtype, + /* traditional text: */ + glyph_character_subtype, + glyph_ligature_subtype, + /* special math */ + glyph_math_delimiter_subtype, + glyph_math_extensible_subtype, + /* engine math, class driven */ + glyph_math_ordinary_subtype, + glyph_math_operator_subtype, + glyph_math_binary_subtype, + glyph_math_relation_subtype, + glyph_math_open_subtype, + glyph_math_close_subtype, + glyph_math_punctuation_subtype, + glyph_math_variable_subtype, + glyph_math_active_subtype, + glyph_math_inner_subtype, + glyph_math_under_subtype, + glyph_math_over_subtype, + glyph_math_fraction_subtype, + glyph_math_radical_subtype, + glyph_math_middle_subtype, + glyph_math_accent_subtype, + glyph_math_fenced_subtype, + glyph_math_ghost_subtype, + /* extra math, user classes, set but anonymous */ + glyph_math_extra_subtype = 31, +} glyph_subtypes; + +# define last_glyph_subtype glyph_math_accent_subtype + +typedef enum glyph_hstate_codes { + glyph_discpart_unset, + glyph_discpart_pre, + glyph_discpart_post, + glyph_discpart_replace, + glyph_discpart_always, +} glyph_hstate_codes; + +typedef enum glyph_option_codes { + /*tex These are part of the defaults (all): */ + glyph_option_normal_glyph = 0x0000, + glyph_option_no_left_ligature = 0x0001, + glyph_option_no_right_ligature = 0x0002, + glyph_option_no_left_kern = 0x0004, + glyph_option_no_right_kern = 0x0008, + glyph_option_no_expansion = 0x0010, + glyph_option_no_protrusion = 0x0020, + glyph_option_apply_x_offset = 0x0040, + glyph_option_apply_y_offset = 0x0080, + glyph_option_no_italic_correction = 0x0100, + /* These are only meant for math characters: */ + glyph_option_math_discretionary = 0x0200, + glyph_option_math_italics_too = 0x0400, + /*tex So watch out: this is a subset! */ + glyph_option_all = 0x01FF, +} glyph_option_codes; + +typedef enum auto_discretionary_codes { + auto_discretionary_normal = 0x0001, /* turn glyphs into discretionary with three similar components */ + auto_discretionary_italic = 0x0002, /* also include italic correcxtion when present */ +} auto_discretionary_codes; + +inline static void tex_add_glyph_option (halfword a, halfword r) { glyph_options(a) |= r; } +inline static void tex_remove_glyph_option (halfword a, halfword r) { glyph_options(a) &= ~(r | glyph_options(a)); } +inline static int tex_has_glyph_option (halfword a, halfword r) { return (glyph_options(a) & r) == r; } + +/*tex + As we have a small field available for protection we no longer need to pack the protection + state in the subtype. We can now basically use the subtype for anything we want (as long as it + stays within the range |0x0000-0xFFFF|. +*/ + +/* inline static void tex_protect_glyph (halfword a) { node_subtype(a) |= (quarterword) 0x8000; } */ +/* inline static void tex_unprotect_glyph (halfword a) { node_subtype(a) &= (quarterword) 0x7FFF; } */ +/* inline static int tex_is_protected_glyph (halfword a) { return node_subtype(a) >= (quarterword) 0x8000; } */ +/* inline static int tex_subtype_of_glyph (halfword a) { return node_subtype(a) & (quarterword) 0x7FFF; } */ + +typedef enum glyph_protection_codes { + glyph_unprotected_code = 0x0, + glyph_protected_text_code = 0x1, + glyph_protected_math_code = 0x2, +} glyph_protection_codes; + +/*tex + Next come some very specialized nodes types. First the marks. They just register a token list. +*/ + +# define mark_node_size 3 +# define mark_ptr(a) vlink(a,2) +# define mark_index(a) vinfo(a,2) + +typedef enum mark_codes { + set_mark_value_code, + reset_mark_value_code, +} mark_codes; + +# define last_mark_subtype reset_mark_value_code + +/*tex + The (not really used in \CONTEXT) |\vadjust| nodes are also small. The codes and subtypes + overlap. +*/ + +typedef enum adjust_subtypes { + pre_adjust_code, + post_adjust_code, + local_adjust_code, +} adjust_subtypes; + +typedef enum adjust_options { + adjust_option_none = 0x00, + adjust_option_before = 0x01, + adjust_option_baseline = 0x02, + adjust_option_depth_before = 0x04, + adjust_option_depth_after = 0x08, + adjust_option_depth_check = 0x10, + adjust_option_depth_last = 0x20, +} adjust_options; + +# define last_adjust_subtype local_adjust_code + +# define adjust_node_size 5 +# define adjust_list(a) vlink(a,2) +# define adjust_options(a) vinfo(a,2) +# define adjust_index(a) vlink(a,3) +# define adjust_reserved(a) vinfo(a,3) +# define adjust_depth_before(a) vlink(a,4) +# define adjust_depth_after(a) vinfo(a,4) + +# define has_adjust_option(p,o) ((adjust_options(p) & o) == o) + +/*tex + Inserts are more complicated. The |ins| node stores an insert in the list while |inserting| + nodes keep track of where to break the page so that they (hopefully) stay with the text. As + already mentioned, the insert node is tricky in the sense that it uses an offset to an + embedded (fake) node. That node acts as start of a next chain. Making that more transparent + would demand some changes that I'm not willing to make right now (and maybe never). +*/ + +# define insert_node_size 6 /* can become 1 smaller or we can have insert_index instead of subtype */ +# define insert_index(a) vinfo(a,2) /* width is not used */ +# define insert_float_cost(a) vlink(a,2) +# define insert_max_depth(a) vlink(a,3) +# define insert_total_height(a) vlink(a,4) /* the sum of height and depth, i.e. total */ +# define insert_list(a) vinfo(a,5) /* is alias for |node_next|*/ +# define insert_split_top(a) vlink(a,5) /* a state variable */ + +# define insert_first_box(a) (a + 5) /*tex A fake node where box_list_ptr becomes a next field. */ + +# define split_node_size 5 /*tex Can become a |split_up_node|. */ +# define split_insert_index(a) vinfo(a,2) /*tex Same slot! */ +# define split_broken(a) vlink(a,2) /*tex An insertion for this class will break here if anywhere. */ +# define split_broken_insert(a) vinfo(a,3) /*tex This insertion might break at |broken_ptr|. */ +# define split_last_insert(a) vlink(a,3) /*tex The most recent insertion for this |subtype|. */ +# define split_best_insert(a) vinfo(a,4) /*tex The optimum most recent insertion. */ +# define split_height(a) vlink(a,4) /*tex Aka |height(a) = vlink(a,4)| */ /* todo */ + +typedef enum split_subtypes { + normal_split_subtype, + insert_split_subtype, +} split_subtypes; + +# define last_split_subtype insert_split_subtype + +/*tex + It's now time for some Some handy shortcuts. These are used when determining proper break points + and|/|or the beginning or end of words. +*/ + +# define last_preceding_break_node whatsit_node +# define last_non_discardable_node dir_node +# define last_node_with_attributes glyph_node +# define last_complex_node align_record_node +# define max_node_type passive_node + +# define precedes_break(a) (node_type(a) <= last_preceding_break_node) +# define precedes_kern(a) ((node_type(a) == kern_node) && (node_subtype(a) == font_kern_subtype || node_subtype(a) == accent_kern_subtype || node_subtype(a) == math_shape_kern_subtype)) +# define precedes_dir(a) ((node_type(a) == dir_node) && normalize_line_mode_permitted(normalize_line_mode_par,break_after_dir_mode)) +# define non_discardable(a) (node_type(a) <= last_non_discardable_node) + +inline static int tex_nodetype_is_complex (halfword t) { return t <= last_complex_node; } +inline static int tex_nodetype_has_attributes (halfword t) { return t <= last_node_with_attributes; } +inline static int tex_nodetype_has_subtype (halfword t) { return t != glue_spec_node && t != math_spec_node && t != font_spec_node; } +inline static int tex_nodetype_has_prev (halfword t) { return t != glue_spec_node && t != math_spec_node && t != font_spec_node && t != attribute_node; } +inline static int tex_nodetype_has_next (halfword t) { return t != glue_spec_node && t != math_spec_node && t != font_spec_node; } +inline static int tex_nodetype_is_visible (halfword t) { return (t >= 0) && (t <= max_node_type) && lmt_interface.node_data[t].visible; } + +/*tex + This is a bit weird place to define them but anyway. In the meantime in \LUAMETATEX\ we no + longer have the option to report the codes used in \ETEX. We have different nodes so it makes + no sense to complicate matters (although earlier version of \LUAMETATEX\ has this organized + quite well \unknown\ just an example of cleaning up, wondering about the use and then dropping + it. +*/ + +# define get_node_size(i) (lmt_interface.node_data[i].size) +# define get_node_name(i) (lmt_interface.node_data[i].name) +/* get_etex_code(i) (lmt_interface.node_data[i].etex) */ + +/*tex + Although expressions could use some dedicated data structure, currently they are implemented + using a linked list. This means that only memory is the limitation for recursion but I might + as well go for a dedicated structure some day, just for the fun of implementing it. It is + probably also more efficient. The current approach is inherited from \ETEX. The stack is only + used when we have expressions between parenthesis. +*/ + +# define expression_node_size 3 +# define expression_type(a) vinfo00(a,1) /*tex one of the value levels */ +# define expression_state(a) vinfo01(a,1) +# define expression_result(a) vinfo02(a,1) +# define expression_unused(a) vinfo03(a,1) +# define expression_expression(a) vlink(a,1) /*tex saved expression so far */ +# define expression_term(a) vlink(a,2) /*tex saved term so far */ +# define expression_numerator(a) vinfo(a,2) /*tex saved numerator */ + +/*tex + To be decided: go double +*/ + +# define expression_entry(a) lvalue(a,2) + +/*tex + This is a node that stores a font state. In principle we can do without but for tracing it + really helps to have this compound element because it is more compact. We could have gone + numeric and use the sparse array approach but then we'd have to add a 4 int store which is more + code and also makes save and restore more complex. +*/ + +# define font_spec_node_size 4 /* we can be smaller: no attr and no prev */ +# define font_spec_identifier(a) vinfo(a,2) +# define font_spec_scale(a) vlink(a,2) +# define font_spec_x_scale(a) vinfo(a,3) +# define font_spec_y_scale(a) vlink(a,3) + +inline static int tex_same_fontspec(halfword a, halfword b) +{ + return + (a == b) + || (a && b && font_spec_identifier(a) == font_spec_identifier(b) + && font_spec_scale(a) == font_spec_scale(b) + && font_spec_x_scale(a) == font_spec_x_scale(b) + && font_spec_y_scale(a) == font_spec_y_scale(b) + ) + ; +} + +/*tex + At the cost of some more memory we now use a mode for storage. This not only overcomes the + \UNICODE\ limitation but also permits storing more in the future. +*/ + +# define math_spec_node_size 3 +# define math_spec_class(a) vinfo00(a,1) /* attr */ +# define math_spec_family(a) vinfo01(a,1) +# define math_spec_character(a) vlink(a,1) /* prev */ +# define math_spec_properties(a) vinfo0(a,2) +# define math_spec_group(a) vinfo1(a,2) +# define math_spec_index(a) vlink(a,2) + +# define math_spec_value(a) (((math_spec_class(a) & 0x3F) << 12) + ((math_spec_family(a) & 0x3F) << 8) + (math_spec_character(a) & 0xFF)) + +inline static int tex_same_mathspec(halfword a, halfword b) +{ + return + (a == b) + || (a && b && math_spec_class(a) == math_spec_class(b) + && math_spec_family(a) == math_spec_family(b) + && math_spec_character(a) == math_spec_character(b) + && math_spec_properties(a) == math_spec_properties(b) + && math_spec_group(a) == math_spec_group(b) + && math_spec_index(a) == math_spec_index(b) + ) + ; +} + +/*tex + Here are some more stack related nodes. +*/ + +# define align_stack_node_size 10 +# define align_stack_align_ptr(a) vinfo(a,1) +# define align_stack_cur_align(a) vlink(a,1) +# define align_stack_preamble(a) vinfo(a,2) +# define align_stack_cur_span(a) vlink(a,2) +# define align_stack_cur_loop(a) vinfo(a,3) +# define align_stack_wrap_source(a) vlink(a,3) +# define align_stack_align_state(a) vinfo(a,4) +# define align_stack_no_align_level(a) vlink(a,4) +# define align_stack_cur_post_adjust_head(a) vinfo(a,5) +# define align_stack_cur_post_adjust_tail(a) vlink(a,5) +# define align_stack_cur_pre_adjust_head(a) vinfo(a,6) +# define align_stack_cur_pre_adjust_tail(a) vlink(a,6) +# define align_stack_cur_post_migrate_head(a) vinfo(a,7) +# define align_stack_cur_post_migrate_tail(a) vlink(a,7) +# define align_stack_cur_pre_migrate_head(a) vinfo(a,8) +# define align_stack_cur_pre_migrate_tail(a) vlink(a,8) +# define align_stack_no_tab_skips(a) vinfo(a,9) +# define align_stack_attr_list(a) vlink(a,9) + +/*tex + If nodes are for nesting conditionals. We have more state information that in (for instance) + \LUATEX\ because we have more tracing and more test variants. +*/ + +# define if_node_size 3 /*tex we can use prev now */ +# define if_limit_type(a) vinfo0(a,1) /*tex overlaps with node_attr */ +# define if_limit_subtype(a) vinfo1(a,1) /*tex overlaps with node_attr */ +# define if_limit_unless(a) vinfo00(a,2) +# define if_limit_step(a) vinfo01(a,2) +# define if_limit_stepunless(a) vinfo02(a,2) +# define if_limit_unused(a) vinfo03(a,2) +# define if_limit_line(a) vlink(a,2) + +/*tex + Now come some rather special ones. For instance par shapes and file cq.\ line related nodes + were variable nodes. Thsi was dropped and replaced by a more generic specficiation node type. + In principle we can use that for more purposes. + + We use a bit of abstraction as preparation for different allocations. Dynamic allocation makes + it possible to get rid of variable nodes but it is slower. + + Because this node has no links we can use the next field as counter. The subtype is just for + diagnostics. This node is special in the sense that it has a real pointer. Such nodes will not + be stored in the format file. Because there is a pointer field we have some extra accessors. + + Todo: we also need to catch the fact that we can run out of memory but in practice that will + not happen soon, for instance because we seldom use parshapes. And in the meantime the pseudo + file related nodes are gone anyway because all file IO has been delegated to \LUA\ now. +*/ + +# define specification_node_size 3 +# define specification_count(a) vlink(a,0) +# define specification_options(a) vinfo(a,1) +# define specification_unused(a) vlink(a,1) +# define specification_pointer(a) (mvalue(a,2)) + +typedef enum specification_options { + specification_option_repeat = 0x01, +} specifications_options; + +# define specification_index(a,n) ((memoryword *) specification_pointer(a))[n - 1] + +# define specification_repeat(a) ((specification_options(a) & specification_option_repeat) == specification_option_repeat) + +# define specification_n(a,n) (specification_repeat(a) ? ((n - 1) % specification_count(a) + 1) : (n > specification_count(a) ? specification_count(a) : n)) + +extern void tex_null_specification_list (halfword a); +extern void tex_new_specification_list (halfword a, halfword n, halfword o); +extern void tex_dispose_specification_list (halfword a); +extern void tex_copy_specification_list (halfword a, halfword b); +extern void tex_shift_specification_list (halfword a, int n, int rotate); + +inline static int tex_get_specification_count (halfword a) { return specification_count(a); } +inline static halfword tex_get_specification_indent (halfword a, halfword n) { return specification_index(a,specification_n(a,n)).half0; } +inline static halfword tex_get_specification_width (halfword a, halfword n) { return specification_index(a,specification_n(a,n)).half1; } +inline static halfword tex_get_specification_penalty (halfword a, halfword n) { return specification_index(a,specification_n(a,n)).half0; } +inline static void tex_set_specification_indent (halfword a, halfword n, halfword v) { specification_index(a,n).half0 = v; } +inline static void tex_set_specification_width (halfword a, halfword n, halfword v) { specification_index(a,n).half1 = v; } +inline static void tex_set_specification_penalty (halfword a, halfword n, halfword v) { specification_index(a,n).half0 = v; } +inline static void tex_set_specification_option (halfword a, int o) { specification_options(a) |= o; } + +extern halfword tex_new_specification_node (halfword n, quarterword s, halfword options); +extern void tex_dispose_specification_nodes (void); + +/*tex + We now define some math related nodes (and noads) and start with style and choice nodes. Style + nodes can be smaller, the information is encoded in |subtype|, but choice nodes are on-the-spot + converted to style nodes with slack. The advantage is that we don't run into issues when a choice + node is the first node in which case we would have to adapt head pointers (read: feed them back + into the calling routines). So, we keep this as it is now. + + Parameter nodes started out as an experiment. We could actually use the same mechanism as + attributes but (1) we don't want attribute nodes in the list, it is very math specific and (3) + we don't need to be real fast here. + + Maybe these three can be merged into one type but on the other hand they are part of the \TEX\ + legacy and well documented so \unknown for now we keep it as-is. In the meantime we are no + longer casting choices to styles. + +*/ + +# define style_node_size 3 +# define style_style node_subtype +# define style_scale(a) vinfo(a,2) +# define style_reserved(a) vlink(a,2) + +# define choice_node_size 5 +//define choice_style node_subtype +# define choice_display_mlist(a) vinfo(a,2) /*tex mlist to be used in display style or pre_break */ +# define choice_text_mlist(a) vlink(a,2) /*tex mlist to be used in text style or post_break */ +# define choice_script_mlist(a) vinfo(a,3) /*tex mlist to be used in script style or no_break */ +# define choice_script_script_mlist(a) vlink(a,3) /*tex mlist to be used in scriptscript style */ +# define choice_class(a) vinfo(a,4) /*tex we could abuse the script script field */ +# define choice_unused(a) vlink(a,4) + +# define choice_pre_break choice_display_mlist +# define choice_post_break choice_text_mlist +# define choice_no_break choice_script_mlist + +# define parameter_node_size 3 +# define parameter_style node_subtype +# define parameter_name(a) vinfo(a,2) +# define parameter_value(a) vlink(a,2) + +typedef enum simple_choice_subtypes { + normal_choice_subtype, + discretionary_choice_subtype, +} simple_choice_subtypes; + +# define last_choice_subtype discretionary_choice_subtype + +/*tex + Because noad types get changed when processing we need to make sure some if the node sizes + match and that we don't share slots with different properties. + + First come the regular noads. The generic noad has the same size and similar fields as a fence + noad, and their types get swapped a few times. + + We accept a little waste of space in order to get nicer code. After all, math is not that + demanding. Although delimiter, accent, fraction and radical share the same structure we do use + specific field names because of clarity. Not all fields are used always. + + \starttabulate[|l|l|l|l|l|l|] + \FL + \BC \BC noad \BC accent \BC fraction \BC radical \NC fence \NC \NR + \ML \NC + \NC vlink 2 \NC new_hlist \NC \NC \NC \NC \NC \NR + \ML \NC + \NC vinfo 2 \NC nucleus \NC \NC \NC \NC \NC \NR + \NC vlink 3 \NC supscr \NC \NC numerator \NC \NC \NC \NR + \NC vinfo 3 \NC subscr \NC \NC denominator \NC \NC \NC \NR + \NC vlink 4 \NC supprescr \NC \NC \NC \NC \NC \NR + \NC vinfo 4 \NC subprescr \NC \NC \NC \NC \NC \NR + \ML \NC + \NC vlink 5 \NC italic \NC \NC \NC \NC \NC \NR + \NC vinfo 5 \NC width \NC \NC \NC \NC \NC \NR + \NC vlink 6 \NC height \NC \NC \NC \NC \NC \NR + \NC vinfo 6 \NC depth \NC \NC \NC \NC \NC \NR + \ML \NC + \NC vlink 7 \NC options \NC \NC \NC \NC \NC \NR + \NC vinfo 7 \NC style \NC \NC \NC \NC \NC \NR + \NC vlink 8 \NC family \NC \NC \NC \NC \NC \NR + \NC vinfo 8 \NC class \NC \NC \NC \NC \NC \NR + \NC vlink 9 \NC source \NC \NC \NC \NC \NC \NR + \NC vinfo 9 \NC prime \NC \NC \NC \NC \NC \NR + \NC vlink 10 \NC leftslack \NC \NC \NC \NC \NC \NR + \NC vinfo 10 \NC rightslack \NC \NC \NC \NC \NC \NR + \ML \NC + \NC vlink 11 \NC extra_1 \NC top_character \NC rule_thickness \NC degree \NC list \NC \NR + \NC vinfo 11 \NC extra_2 \NC bot_character \NC left_delimiter \NC left_delimiter \NC source \NC \NR + \NC vlink 12 \NC extra_3 \NC overlay_character \NC right_delimiter \NC right_delimiter \NC top \NC \NR + \NC vinfo 12 \NC extra_4 \NC fraction \NC middle_delimiter \NC \NC bottom \NC \NR + \NC vlink 13 \NC extra_5 \NC topovershoot \NC \NC height \NC \NC \NR + \NC vinfo 13 \NC extra_6 \NC botovershoot \NC \NC depth \NC \NC \NR + \LL + \stoptabulate + + We can use smaller variables for style and class and then have one field available for + other usage so no need to grow. + +*/ + +# define noad_state_node_size 6 +# define noad_state_topright(a) vlink(a,2) +# define noad_state_bottomright(a) vinfo(a,2) +# define noad_state_topleft(a) vlink(a,3) +# define noad_state_bottomleft(a) vinfo(a,3) +# define noad_state_height(a) vlink(a,4) +# define noad_state_depth(a) vinfo(a,4) +# define noad_state_toptotal(a) vlink(a,5) +# define noad_state_bottomtotal(a) vinfo(a,5) + +# define noad_size 14 +# define noad_new_hlist(a) vlink(a,2) /*tex the translation of an mlist; a bit confusing name */ +# define noad_nucleus(a) vinfo(a,2) +# define noad_supscr(a) vlink(a,3) +# define noad_subscr(a) vinfo(a,3) +# define noad_supprescr(a) vlink(a,4) +# define noad_subprescr(a) vinfo(a,4) +# define noad_italic(a) vlink(a,5) /*tex Sometimes used, might become more. */ +# define noad_width(a) vinfo(a,5) +# define noad_height(a) vlink(a,6) +# define noad_depth(a) vinfo(a,6) +# define noad_options(a) vlink(a,7) +# define noad_style(a) vinfo00(a,7) +# define noad_family(a) vinfo01(a,7) +# define noad_script_state(a) vinfo02(a,7) +# define noad_analyzed(a) vinfo03(a,7) /*tex used for experiments */ +# define noad_state(a) vlink(a,8) /*tex this might replace */ +# define noad_class_main(a) vinfo00(a,8) +# define noad_class_left(a) vinfo01(a,8) +# define noad_class_right(a) vinfo02(a,8) +# define noad_script_order(a) vinfo03(a,8) +# define noad_source(a) vlink(a,9) +# define noad_prime(a) vinfo(a,9) +# define noad_left_slack(a) vlink(a,10) +# define noad_right_slack(a) vinfo(a,10) +# define noad_extra_1(a) vlink(a,11) +# define noad_extra_2(a) vinfo(a,11) +# define noad_extra_3(a) vlink(a,12) +# define noad_extra_4(a) vinfo(a,12) +# define noad_extra_5(a) vlink(a,13) +# define noad_extra_6(a) vinfo(a,13) + +# define noad_total(a) (noad_height(a) + noad_depth(a)) + +# define noad_has_postscripts(a) (noad_subscr(a) || noad_supscr(a)) +# define noad_has_prescripts(a) (noad_subprescr(a) || noad_supprescr(a)) +# define noad_has_scripts(a) (noad_has_postscripts(a) || noad_has_prescripts(a) || noad_prime(a)) +# define noad_has_following_scripts(a) (noad_subscr(a) || noad_supscr(a) || noad_prime(a)) +# define noad_has_superscripts(a) (noad_supprescr(a) || noad_supscr(a) || noad_prime(a)) +# define noad_has_subscripts(a) (noad_subprescr(a) || noad_subscr(a)) + +# define noad_has_scriptstate(a,s) ((noad_script_state(a) & s) == s) + +# define unset_noad_class 0xFE + +typedef enum noad_script_states { + post_super_script_state = 0x01, + post_sub_script_state = 0x02, + pre_super_script_state = 0x04, + pre_sub_script_state = 0x08, + prime_script_state = 0x10, +} noad_script_states; + +typedef enum noad_script_locations { + prime_unknown_location, + prime_at_begin_location, + prime_above_sub_location, + prime_at_end_location, +} noad_prime_locations; + +typedef enum noad_script_order { + script_unknown_first, + script_primescript_first, + script_subscript_first, + script_superscript_first, +} noad_script_order; + +typedef struct noad_classes { + singleword main; + singleword left; + singleword right; +} noad_classes; + +# define reset_noad_classes(n) do { \ + noad_class_main(n) = (singleword) unset_noad_class; \ + noad_class_left(n) = (singleword) unset_noad_class; \ + noad_class_right(n) = (singleword) unset_noad_class; \ +} while (0); + +# define set_noad_classes(n,c) do { \ + noad_class_main(n) = (singleword) (c & 0xFF); \ + noad_class_left(n) = (singleword) (c & 0xFF); \ + noad_class_right(n) = (singleword) (c & 0xFF); \ +} while (0); + +# define set_noad_main_class(n,c) noad_class_main(n) = (singleword) (c & 0xFF) +# define set_noad_left_class(n,c) noad_class_left(n) = (singleword) (c & 0xFF) +# define set_noad_right_class(n,c) noad_class_right(n) = (singleword) (c & 0xFF) + +# define get_noad_main_class(n) (noad_class_main(n)) +# define get_noad_left_class(n) (noad_class_left(n)) +# define get_noad_right_class(n) (noad_class_right(n)) + +# define set_noad_style(n,s) noad_style(n) = (singleword) (s & 0xFF) +# define set_noad_family(n,f) noad_family(n) = (singleword) (f & 0xFF) + +/*tex + Options are something \LUATEX\ and in \LUAMETEX\ we added some more. When we have dimensions + then we obey |axis| and otherwise |noaxis|. This might evolve a bit over time. These options + currently are on the same spot but we pretend they aren't so we have dedicated accessors. This + also makes clear what noads have what options. + + If we run out of options we can combine some, like auto. +*/ + +typedef enum noad_options { + noad_option_axis = 0x00000001, + noad_option_no_axis = 0x00000002, + noad_option_exact = 0x00000004, + noad_option_left = 0x00000008, /* align option for overflown under/over */ /* used ? */ + noad_option_middle = 0x00000010, /* idem */ + noad_option_right = 0x00000020, /* idem */ + noad_option_adapt_to_left_size = 0x00000040, /* old trickery, might go away but kind of fun */ + noad_option_adapt_to_right_size = 0x00000080, /* idem */ + noad_option_no_sub_script = 0x00000100, + noad_option_no_super_script = 0x00000200, + noad_option_no_sub_pre_script = 0x00000400, + noad_option_no_super_pre_script = 0x00000800, + noad_option_no_script = 0x00001000, + noad_option_no_overflow = 0x00002000, /* keep (middle) extensible widthin target size */ + noad_option_void = 0x00004000, /* wipe and set width to zero */ + noad_option_phantom = 0x00008000, /* wipe */ + noad_option_openup_height = 0x00010000, + noad_option_openup_depth = 0x00020000, + noad_option_limits = 0x00040000, /* traditional modifier */ + noad_option_no_limits = 0x00080000, /* idem */ + noad_option_prefer_font_thickness = 0x00100000, + noad_option_no_ruling = 0x00200000, + noad_option_shifted_sub_script = 0x00400000, + noad_option_shifted_super_script = 0x00800000, + noad_option_shifted_sub_pre_script = 0x01000000, + noad_option_shifted_super_pre_script = 0x02000000, + noad_option_unpack_list = 0x04000000, + noad_option_no_check = 0x08000000, /* don't check for missing end fence */ + noad_option_auto = 0x10000000, + noad_option_unroll_list = 0x20000000, + noad_option_followed_by_space = 0x40000000, + /* available: */ + noad_option_reserved = 0x80000000, +} noad_options; + +# define has_option(a,b) (((a) & (b)) == (b)) +# define unset_option(a,b) ((a) & ~(b)) + +inline static void tex_add_noad_option (halfword a, halfword r) { noad_options(a) |= r; } +inline static void tex_remove_noad_option (halfword a, halfword r) { noad_options(a) &= ~(r | noad_options(a)); } +inline static int tex_has_noad_option (halfword a, halfword r) { return (noad_options(a) & r) == r; } + +inline int has_noad_no_script_option(halfword n, halfword option) +{ + switch (node_type(n)) { + case simple_noad: + case accent_noad: + case radical_noad: + case fence_noad: + case fraction_noad: + return has_option(noad_options(n), option) || has_option(noad_options(n), noad_option_no_script); + } + return 0; +} + +# define has_noad_option_nosubscript(a) has_noad_no_script_option(a, noad_option_no_sub_script) +# define has_noad_option_nosupscript(a) has_noad_no_script_option(a, noad_option_no_super_script) +# define has_noad_option_nosubprescript(a) has_noad_no_script_option(a, noad_option_no_sub_pre_script) +# define has_noad_option_nosupprescript(a) has_noad_no_script_option(a, noad_option_no_super_pre_script) + +# define has_noad_option_shiftedsubscript(a) (has_option(noad_options(a), noad_option_shifted_sub_script)) +# define has_noad_option_shiftedsupscript(a) (has_option(noad_options(a), noad_option_shifted_super_script)) +# define has_noad_option_shiftedsubprescript(a) (has_option(noad_options(a), noad_option_shifted_sub_pre_script)) +# define has_noad_option_shiftedsupprescript(a) (has_option(noad_options(a), noad_option_shifted_super_pre_script)) +# define has_noad_option_axis(a) (has_option(noad_options(a), noad_option_axis)) +# define has_noad_option_exact(a) (has_option(noad_options(a), noad_option_exact)) +# define has_noad_option_noaxis(a) (has_option(noad_options(a), noad_option_no_axis)) +# define has_noad_option_openupheight(a) (has_option(noad_options(a), noad_option_openup_height)) +# define has_noad_option_openupdepth(a) (has_option(noad_options(a), noad_option_openup_depth)) +# define has_noad_option_adapttoleft(a) (has_option(noad_options(a), noad_option_adapt_to_left_size)) +# define has_noad_option_adapttoright(a) (has_option(noad_options(a), noad_option_adapt_to_right_size)) +# define has_noad_option_limits(a) (has_option(noad_options(a), noad_option_limits)) +# define has_noad_option_nolimits(a) (has_option(noad_options(a), noad_option_no_limits)) +# define has_noad_option_nooverflow(a) (has_option(noad_options(a), noad_option_no_overflow)) +# define has_noad_option_preferfontthickness(a) (has_option(noad_options(a), noad_option_prefer_font_thickness)) +# define has_noad_option_noruling(a) (has_option(noad_options(a), noad_option_no_ruling)) +# define has_noad_option_unpacklist(a) (has_option(noad_options(a), noad_option_unpack_list)) +# define has_noad_option_nocheck(a) (has_option(noad_options(a), noad_option_no_check)) +# define has_noad_option_exact(a) (has_option(noad_options(a), noad_option_exact)) +# define has_noad_option_left(a) (has_option(noad_options(a), noad_option_left)) +# define has_noad_option_middle(a) (has_option(noad_options(a), noad_option_middle)) +# define has_noad_option_right(a) (has_option(noad_options(a), noad_option_right)) +# define has_noad_option_auto(a) (has_option(noad_options(a), noad_option_auto)) +# define has_noad_option_phantom(a) (has_option(noad_options(a), noad_option_phantom)) +# define has_noad_option_void(a) (has_option(noad_options(a), noad_option_void)) +# define has_noad_option_unrolllist(a) (has_option(noad_options(a), noad_option_unroll_list)) +# define has_noad_option_followedbyspace(a) (has_option(noad_options(a), noad_option_followed_by_space)) + +/*tex + In the meantime the codes and subtypes are in sync. The variable component does not really + become a subtype. +*/ + +typedef enum simple_noad_subtypes { + ordinary_noad_subtype, + operator_noad_subtype, + binary_noad_subtype, + relation_noad_subtype, + open_noad_subtype, + close_noad_subtype, + punctuation_noad_subtype, + variable_noad_subtype, /* we want to run in parallel */ + active_noad_subtype, /* we want to run in parallel */ + inner_noad_subtype, + under_noad_subtype, + over_noad_subtype, + fraction_noad_subtype, + radical_noad_subtype, + middle_noad_subtype, + accent_noad_subtype, + fenced_noad_subtype, + ghost_noad_subtype, + vcenter_noad_subtype, +} simple_noad_subtypes; + +# define last_noad_type vcenter_noad_subtype +# define last_noad_subtype vcenter_noad_subtype + +typedef enum math_component_types { + math_component_ordinary_code, + math_component_operator_code, + math_component_binary_code, + math_component_relation_code, + math_component_open_code, + math_component_close_code, + math_component_punctuation_code, + math_component_variable_code, + math_component_inner_code, + math_component_under_code, + math_component_over_code, + math_component_fraction_code, + math_component_radical_code, + math_component_middle_code, + math_component_accent_code, + math_component_fenced_code, + math_component_ghost_code, + math_component_atom_code, +} math_component_types; + +# define first_math_component_type math_component_ordinary_code +# define last_math_component_type math_component_accent_code + +/*tex + When I added adapt options, the |math_limits_cmd| became |math_modifier_cmd| just because it + nicely fits in there. +*/ + +typedef enum math_modifier_types { + display_limits_modifier_code, + limits_modifier_code, + no_limits_modifier_code, + adapt_to_left_modifier_code, + adapt_to_right_modifier_code, + axis_modifier_code, + no_axis_modifier_code, + phantom_modifier_code, + void_modifier_code, + source_modifier_code, + openup_height_modifier_code, + openup_depth_modifier_code, +} math_modifier_types; + +# define first_math_modifier_code display_limits_modifier_code +# define last_math_modifier_code openup_depth_modifier_code + +/*tex accent noads: todo, left and right offsets and options */ + +# define accent_noad_size noad_size +# define accent_top_character noad_extra_1 /*tex the |top_accent_chr| field of an accent noad */ +# define accent_bottom_character noad_extra_2 /*tex the |bot_accent_chr| field of an accent noad */ +# define accent_middle_character noad_extra_3 /*tex the |overlay_accent_chr| field of an accent noad */ +# define accent_fraction noad_extra_4 +# define accent_top_overshoot noad_extra_5 +# define accent_bot_overshoot noad_extra_6 + +typedef enum math_accent_subtypes { + bothflexible_accent_subtype, + fixedtop_accent_subtype, + fixedbottom_accent_subtype, + fixedboth_accent_subtype, +} math_accent_subtypes; + +# define last_accent_subtype fixedboth_accent_subtype + +/*tex + With these left and right fencing noads we have a historical mix of |fence| and |delimiter| (and + |shield|) naming which for now we keep. It gets swapped with the generic noad, so size matters. + */ + +# define fence_noad_size noad_size +# define fence_delimiter_list noad_extra_1 // not really a list +# define fence_delimiter_top noad_extra_3 +# define fence_delimiter_bottom noad_extra_4 +//define fence_delimiter_first noad_extra_5 +//define fence_delimiter_last noad_extra_6 + +typedef enum fence_subtypes { + unset_fence_side, + left_fence_side, + middle_fence_side, + right_fence_side, + left_operator_side, + no_fence_side, + extended_left_fence_side, + extended_middle_fence_side, + extended_right_fence_side, +} fence_subtypes; + +# define last_fence_subtype extended_right_fence_side +# define first_fence_code left_fence_side +# define last_fence_code extended_right_fence_side + +/*tex + Fraction noads are generic in the sense that they are also used for non|-|fractions, not that + it matters much. We keep them as they are in \TEX\ but have more fields. + + We put the numerator and denomerator in script fields so there can be no such direct scripts + attached. Because we have prescripts we can used these fields and limit this handicap a bit but + if we ever overcome this (at the cost of more fields in these similar noads) we need to adapt + the error message for double scripts in |tex_run_math_script|. + +*/ + +# define fraction_noad_size noad_size +# define fraction_numerator noad_supprescr /* ! */ +# define fraction_denominator noad_subprescr /* ! */ +# define fraction_rule_thickness noad_extra_1 +# define fraction_left_delimiter noad_extra_2 +# define fraction_right_delimiter noad_extra_3 +# define fraction_middle_delimiter noad_extra_4 +# define fraction_h_factor noad_extra_5 +# define fraction_v_factor noad_extra_6 + +typedef enum fraction_subtypes { + over_fraction_subtype, + atop_fraction_subtype, + above_fraction_subtype, + skewed_fraction_subtype, + stretched_fraction_subtype, +} fraction_subtypes; + +# define valid_fraction_subtype(s) (s >= over_fraction_subtype && s <= stretched_fraction_subtype) + +/*tex + Radical noads are like fraction noads, but they only store a |left_delimiter|. They are also + used for extensibles (over, under, etc) so the name is is somewhat confusing. +*/ + +# define radical_noad_size noad_size +# define radical_degree noad_extra_1 +# define radical_left_delimiter noad_extra_2 +# define radical_right_delimiter noad_extra_3 +# define radical_height noad_extra_5 +# define radical_depth noad_extra_6 + +typedef enum radical_subtypes { + normal_radical_subtype, + radical_radical_subtype, + root_radical_subtype, + rooted_radical_subtype, + under_delimiter_radical_subtype, + over_delimiter_radical_subtype, + delimiter_under_radical_subtype, + delimiter_over_radical_subtype, + delimited_radical_subtype, + h_extensible_radical_subtype, +} radical_subtypes; + +# define last_radical_subtype h_extensible_radical_subtype +# define last_radical_code h_extensible_radical_subtype + +/*tex + Again a very simple small node: it represents a math character so naturally it has a family. + It can be turned list. These are subnodes. When an extra options field gets added, the + overlapping character and list fields can be split, so then we also have the origin saved. + + The following nodes are kernel nodes: |math_char_node|, |math_text_char_node|, |sub_box_node| + and |sub_mlist_node|. Characters eventually becomes wrapped in a list. +*/ + +typedef enum math_kernel_options { + math_kernel_no_italic_correction = 0x0001, + math_kernel_no_left_pair_kern = 0x0002, + math_kernel_no_right_pair_kern = 0x0004, + math_kernel_auto_discretionary = 0x0008, + math_kernel_full_discretionary = 0x0010, +} math_kernel_options; + +# define math_kernel_node_size 5 +# define kernel_math_family(a) vinfo(a,2) +# define kernel_math_character(a) vlink(a,2) +# define kernel_math_options(a) vinfo(a,3) +# define kernel_math_list(a) vlink(a,3) +# define kernel_math_properties(a) vinfo0(a,4) /* for characters */ +# define kernel_math_group(a) vinfo1(a,4) /* for characters */ +# define kernel_math_index(a) vlink(a,4) /* for characters */ + +# define math_kernel_node_has_option(a,b) ((kernel_math_options(a) & b) == b) +# define math_kernel_node_set_option(a,b) kernel_math_options(a) = (kernel_math_options(a) | b) + +/*tex + This is also a subnode, this time for a delimiter field. The large family field is only used + in traditional \TEX\ fonts where a base character can come from one font, and the extensible + from another, but in \OPENTYPE\ math font that doesn't happen. +*/ + +# define math_delimiter_node_size 4 +# define delimiter_small_family(a) vinfo(a,2) /*tex |family| for small delimiter */ +# define delimiter_small_character(a) vlink(a,2) /*tex |character| for small delimiter */ +# define delimiter_large_family(a) vinfo(a,3) /*tex |family| for large delimiter */ +# define delimiter_large_character(a) vlink(a,3) /*tex |character| for large delimiter */ + +/*tex + Before we come to the by now rather large local par node we define some small ones. The + boundary nodes are an extended version of the original ones. The direction nodes are + a simplified version of what \OMEGA\ has as whatsit. In \LUATEX\ it became a first class + citizen and in \LUAMETATEX\ we cleaned it up. +*/ + +typedef enum boundary_subtypes { + cancel_boundary, + user_boundary, + protrusion_boundary, + word_boundary, + page_boundary, + par_boundary, +} boundary_subtypes; + +# define last_boundary_subtype word_boundary +# define last_boundary_code page_boundary + +# define boundary_node_size 3 +# define boundary_data(a) vinfo(a,2) + +typedef enum dir_subtypes { + normal_dir_subtype, + cancel_dir_subtype, +} dir_subtypes; + +# define last_dir_subtype cancel_dir_subtype + +# define dir_node_size 3 +# define dir_direction(a) vinfo(a,2) +# define dir_level(a) vlink(a,2) + +/*tex + Local par nodes come from \OMEGA\ and store the direction as well as local boxes. In \LUATEX + we use a leaner direction model and in \LUAMETATEX\ we only kept the two directions that just + work. In the end it is the backend that deals with these properties. The frontend just keeps + a little track of them. + + However, in \LUAMETATEX\ we can also store the paragraph state in this node. That way we no + longer have the issue that properties are lost when a group ends before a |\par| is triggered. + This is probably a feature that only makes sense in \CONTEXT\ which is why I made sure that + there is not much overhead. In the first version one could control each variable, but as we + ran out of bits in the end was done per group of variables. However, when I really need more + detail I might go for a 64 bit field instead. After all we have that possibility in memory + words. + + These local par nodes can actually end up in the middle of lines as they can be used to change + the left and right box as well as inject penalties. For that reason they now have a proper + subtype so that the initial and successive instances can be recognized. + */ + +typedef enum par_codes { + par_none_code, + par_hsize_code, + par_left_skip_code, + par_right_skip_code, + par_hang_indent_code, + par_hang_after_code, + par_par_indent_code, + par_par_fill_left_skip_code, + par_par_fill_right_skip_code, + par_par_init_left_skip_code, + par_par_init_right_skip_code, + par_adjust_spacing_code, + par_protrude_chars_code, + par_pre_tolerance_code, + par_tolerance_code, + par_emergency_stretch_code, + par_looseness_code, + par_last_line_fit_code, + par_line_penalty_code, + par_inter_line_penalty_code, + par_club_penalty_code, + par_widow_penalty_code, + par_display_widow_penalty_code, + par_orphan_penalty_code, + par_broken_penalty_code, + par_adj_demerits_code, + par_double_hyphen_demerits_code, + par_final_hyphen_demerits_code, + par_par_shape_code, + par_inter_line_penalties_code, + par_club_penalties_code, + par_widow_penalties_code, + par_display_widow_penalties_code, + par_orphan_penalties_code, + par_baseline_skip_code, + par_line_skip_code, + par_line_skip_limit_code, + par_adjust_spacing_step_code, + par_adjust_spacing_shrink_code, + par_adjust_spacing_stretch_code, + par_hyphenation_mode_code, + par_shaping_penalties_mode_code, + par_shaping_penalty_code, +} par_codes; + +typedef enum par_categories { + par_none_category = 0x00000000, + par_hsize_category = 0x00000001, // \hsize + par_skip_category = 0x00000002, // \leftskip \rightskip + par_hang_category = 0x00000004, // \hangindent \hangafter + par_indent_category = 0x00000008, // \parindent + par_par_fill_category = 0x00000010, // \parfillskip \parfillleftskip + par_adjust_category = 0x00000020, // \adjustspacing + par_protrude_category = 0x00000040, // \protrudechars + par_tolerance_category = 0x00000080, // \tolerance \pretolerance + par_stretch_category = 0x00000100, // \emergcystretch + par_looseness_category = 0x00000200, // \looseness + par_last_line_category = 0x00000400, // \lastlinefit + par_line_penalty_category = 0x00000800, // \linepenalty \interlinepenalty \interlinepenalties + par_club_penalty_category = 0x00001000, // \clubpenalty \clubpenalties + par_widow_penalty_category = 0x00002000, // \widowpenalty \widowpenalties + par_display_penalty_category = 0x00004000, // \displaypenalty \displaypenalties + par_broken_penalty_category = 0x00008000, // \brokenpenalty + par_demerits_category = 0x00010000, // \doublehyphendemerits \finalhyphendemerits \adjdemerits + par_shape_category = 0x00020000, // \parshape + par_line_category = 0x00040000, // \baselineskip \lineskip \lineskiplimit + par_hyphenation_category = 0x00080000, // \Hyphenationmode + par_shaping_penalty_category = 0x00100000, // \shapingpenaltiesmode + par_orphan_penalty_category = 0x00200000, // \orphanpenalties + par_all_category = 0x7FFFFFFF, // +} par_categories; + +static int par_category_to_codes[] = { + par_none_category, + par_hsize_category, // par_hsize_code + par_skip_category, // par_left_skip_code + par_skip_category, // par_right_skip_code + par_hang_category, // par_hang_indent_code + par_hang_category, // par_hang_after_code + par_indent_category, // par_par_indent_code + par_par_fill_category, // par_par_fill_skip_code + par_par_fill_category, // par_par_fill_left_skip_code + par_par_fill_category, // par_par_init_skip_code + par_par_fill_category, // par_par_init_skip_code + par_adjust_category, // par_adjust_spacing_code + par_protrude_category, // par_protrude_chars_code + par_tolerance_category, // par_pre_tolerance_code + par_tolerance_category, // par_tolerance_code + par_stretch_category, // par_emergency_stretch_code + par_looseness_category, // par_looseness_code + par_last_line_category, // par_last_line_fit_code + par_line_penalty_category, // par_line_penalty_code + par_line_penalty_category, // par_inter_line_penalty_code + par_club_penalty_category, // par_club_penalty_code + par_widow_penalty_category, // par_widow_penalty_code + par_display_penalty_category, // par_display_widow_penalty_code + par_orphan_penalty_category, // par_orphan_penalty_code + par_broken_penalty_category, // par_broken_penalty_code + par_demerits_category, // par_adj_demerits_code + par_demerits_category, // par_double_hyphen_demerits_code + par_demerits_category, // par_final_hyphen_demerits_code + par_shape_category, // par_par_shape_code + par_line_penalty_category, // par_inter_line_penalties_code + par_club_penalty_category, // par_club_penalties_code + par_widow_penalty_category, // par_widow_penalties_code + par_display_penalty_category, // par_display_widow_penalties_code + par_orphan_penalty_category, // par_orphan_penalties_code + par_line_category, // par_baseline_skip_code + par_line_category, // par_line_skip_code + par_line_category, // par_line_skip_limit_code + par_adjust_category, // par_adjust_spacing_step_code + par_adjust_category, // par_adjust_spacing_shrink_code + par_adjust_category, // par_adjust_spacing_stretch_code + par_hyphenation_category, // par_hyphenation_mode_code + par_shaping_penalty_category, // par_shaping_penalties_mode_code + par_shaping_penalty_category, // par_shaping_penalty_code +}; + +/*tex + Todo: make the fields 6+ into a par_state node so that local box ones can be + small. Also, penalty and broken fields now are duplicate. Do we need to keep + these? +*/ + +# define par_node_size 28 +# define par_penalty_interline(a) vinfo(a,2) /*tex These come from \OMEGA. */ +# define par_penalty_broken(a) vlink(a,2) /*tex These come from \OMEGA. */ +# define par_box_left(a) vinfo(a,3) +# define par_box_left_width(a) vlink(a,3) +# define par_box_right(a) vinfo(a,4) +# define par_box_right_width(a) vlink(a,4) +# define par_box_middle(a) vinfo(a,5) /* no width here */ +# define par_dir(a) vlink(a,5) +# define par_state(a) vinfo(a,6) +# define par_hsize(a) vlink(a,6) +# define par_left_skip(a) vinfo(a,7) +# define par_right_skip(a) vlink(a,7) +# define par_hang_indent(a) vinfo(a,8) +# define par_hang_after(a) vlink(a,8) +# define par_par_indent(a) vinfo(a,9) +# define par_par_fill_left_skip(a) vlink(a,9) +# define par_par_fill_right_skip(a) vinfo(a,10) +# define par_adjust_spacing(a) vlink(a,10) +# define par_protrude_chars(a) vinfo(a,11) +# define par_pre_tolerance(a) vlink(a,11) +# define par_tolerance(a) vinfo(a,12) +# define par_emergency_stretch(a) vlink(a,12) +# define par_looseness(a) vinfo(a,13) +# define par_last_line_fit(a) vlink(a,13) +# define par_line_penalty(a) vinfo(a,14) +# define par_inter_line_penalty(a) vlink(a,14) +# define par_club_penalty(a) vinfo(a,15) +# define par_widow_penalty(a) vlink(a,15) +# define par_display_widow_penalty(a) vinfo(a,16) +# define par_orphan_penalty(a) vlink(a,16) +# define par_broken_penalty(a) vinfo(a,17) +# define par_adj_demerits(a) vlink(a,17) +# define par_double_hyphen_demerits(a) vinfo(a,18) +# define par_final_hyphen_demerits(a) vlink(a,18) +# define par_par_shape(a) vinfo(a,19) +# define par_inter_line_penalties(a) vlink(a,19) +# define par_club_penalties(a) vinfo(a,20) +# define par_widow_penalties(a) vlink(a,20) +# define par_display_widow_penalties(a) vinfo(a,21) +# define par_orphan_penalties(a) vlink(a,21) +# define par_baseline_skip(a) vinfo(a,22) +# define par_line_skip(a) vlink(a,22) +# define par_line_skip_limit(a) vinfo(a,23) +# define par_adjust_spacing_step(a) vlink(a,23) +# define par_adjust_spacing_shrink(a) vinfo(a,24) +# define par_adjust_spacing_stretch(a) vlink(a,24) +# define par_end_par_tokens(a) vinfo(a,25) +# define par_hyphenation_mode(a) vlink(a,25) +# define par_shaping_penalties_mode(a) vinfo(a,26) +# define par_shaping_penalty(a) vlink(a,26) +# define par_par_init_left_skip(a) vlink(a,27) +# define par_par_init_right_skip(a) vinfo(a,27) + +typedef enum par_subtypes { + vmode_par_par_subtype, + local_box_par_subtype, + hmode_par_par_subtype, + penalty_par_subtype, + math_par_subtype, +} par_subtypes; + +# define last_par_subtype math_par_subtype + +inline static int tex_is_start_of_par_node(halfword n) +{ + return ( n && (node_type(n) == par_node) && (node_subtype(n) == vmode_par_par_subtype || node_subtype(n) == hmode_par_par_subtype) ); +} + +extern halfword tex_get_par_par (halfword p, halfword what); +extern void tex_set_par_par (halfword p, halfword what, halfword v, int force); +extern void tex_snapshot_par (halfword p, halfword what); +extern halfword tex_find_par_par (halfword head); +/* halfword tex_internal_to_par_code (halfword cmd, halfword index); */ +extern void tex_update_par_par (halfword cmd, halfword index); + +inline static int tex_par_state_is_set (halfword p, halfword what) { return (par_state(p) & par_category_to_codes[what]) == par_category_to_codes[what]; } +inline static void tex_set_par_state (halfword p, halfword what) { par_state(p) |= par_category_to_codes[what]; } +inline static int tex_par_to_be_set (halfword state, halfword what) { return (state & par_category_to_codes[what]) == par_category_to_codes[what]; } + +/*tex + Because whatsits are used by the backend (or callbacks in the frontend) we do provide this node. + It only has the basic properties: subtype, attribute, prev link and next link. User nodes have + been dropped because one can use whatsits to achieve the same. We also don't standardize the + subtypes as it's very macro package specific what they do. So, only a size here: +*/ + +# define whatsit_node_size 2 + +/*tex + Active and passive nodes are used in the par builder. There is plenty of comments in the code + that explains them (although it's not that trivial I guess). Delta nodes just store the + progression in widths, stretch and shrink: they are copies of arrays. Originally they just used + offsets: + + \starttyping + # define delta_node_size 10 + # define delta_field(a,n) node_next(a + n) + \stoptyping + + But that wasted 9 halfs for storing the 9 fields. So, next I played with this: + + \starttyping + # define delta_field_1(d) (delta_field(d,1)) // or: vinfo(d,1) + # define delta_field_2(d) (delta_field(d,2)) // or: vlink(d,1) + ... + # define delta_field_9(d) (delta_field(d,9)) // or: vinfo(d,5) + \stoptyping + + But soon after that more meaningfull names were introduced, simply because in the code where they + are used also verbose names showed up. + + The active node is actually a |hyphenated_node| or an |unhyphenated_node| but for now we keep + the \TEX\ lingua. We could probably turn the type into a subtype and moev fitness to another + spot. +*/ + +/* is vinfo(a,2) used? it not we can have fitness there and hyphenated/unyphenates as subtype */ + +# define active_node_size 4 /*tex |hyphenated_node| or |unhyphenated_node| */ +# define active_fitness node_subtype /*tex |very_loose_fit..tight_fit| on final line for this break */ +# define active_break_node(a) vlink(a,1) /*tex pointer to the corresponding passive node */ +# define active_line_number(a) vinfo(a,1) /*tex line that begins at this breakpoint */ +# define active_total_demerits(a) vlink(a,2) /*tex the quantity that \TEX\ minimizes */ +# define active_short(a) vinfo(a,3) /*tex |shortfall| of this line */ +# define active_glue(a) vlink(a,3) /*tex corresponding glue stretch or shrink */ + +# define passive_node_size 7 +# define passive_cur_break(a) vlink(a,1) /*tex in passive node, points to position of this breakpoint */ +# define passive_prev_break(a) vinfo(a,1) /*tex points to passive node that should precede this one */ +# define passive_pen_inter(a) vinfo(a,2) +# define passive_pen_broken(a) vlink(a,2) +# define passive_left_box(a) vlink(a,3) +# define passive_left_box_width(a) vinfo(a,3) +# define passive_last_left_box(a) vlink(a,4) +# define passive_last_left_box_width(a) vinfo(a,4) +# define passive_right_box(a) vlink(a,5) +# define passive_right_box_width(a) vinfo(a,5) +# define passive_serial(a) vlink(a,6) /*tex serial number for symbolic identification (pass) */ +# define passive_middle_box(a) vinfo(a,6) + +# define delta_node_size 6 +# define delta_field_total_glue(d) vinfo(d,1) +# define delta_field_total_shrink(d) vinfo(d,2) +# define delta_field_total_stretch(d) vlink(d,2) +# define delta_field_total_fi_amount(d) vinfo(d,3) +# define delta_field_total_fil_amount(d) vlink(d,3) +# define delta_field_total_fill_amount(d) vinfo(d,4) +# define delta_field_total_filll_amount(d) vlink(d,4) +# define delta_field_font_shrink(d) vinfo(d,5) +# define delta_field_font_stretch(d) vlink(d,5) + +/*tex + Again we now have some helpers. We have a double linked list so here we go: +*/ + +inline static void tex_couple_nodes(int a, int b) +{ + node_next(a) = b; + node_prev(b) = a; +} + +inline static void tex_try_couple_nodes(int a, int b) +{ + if (b) { + if (a) { + node_next(a) = b; + } + node_prev(b) = a; + } else if (a) { + node_next(a) = null; + } +} + +inline static void tex_uncouple_node(int a) +{ + node_next(a) = null; + node_prev(a) = null; +} + +inline static halfword tex_head_of_node_list(halfword n) +{ + while (node_prev(n)) { + n = node_prev(n); + } + return n; +} + +inline static halfword tex_tail_of_node_list(halfword n) +{ + while (node_next(n)) { + n = node_next(n); + } + return n; +} + +/*tex + Attribute management is kind of complicated. They are stored in a sorted linked list and we + try to share these for successive nodes. In \LUATEX\ a state is kept and reset frequently but + in \LUAMETATEX\ we try to be more clever, for instance we keep track of grouping. This comes + as some overhead but saves reconstructing (often the same) list. It also saves memory. +*/ + +# define attribute_cache_disabled max_halfword +# define current_attribute_state lmt_node_memory_state.attribute_cache + +extern halfword tex_copy_attribute_list (halfword attr); +extern halfword tex_copy_attribute_list_set (halfword attr, int index, int value); +extern halfword tex_patch_attribute_list (halfword attr, int index, int value); +extern void tex_dereference_attribute_list (halfword attr); +extern void tex_build_attribute_list (halfword target); +extern halfword tex_current_attribute_list (void); +extern int tex_unset_attribute (halfword target, int index, int value); +extern void tex_unset_attributes (halfword first, halfword last, int index); +extern void tex_set_attribute (halfword target, int index, int value); +extern int tex_has_attribute (halfword target, int index, int value); + +extern void tex_reset_node_properties (halfword target); + +# define get_attribute_list(target) \ + node_attr(target) + +# define add_attribute_reference(a) do { \ + if (a && a != attribute_cache_disabled) { \ + ++attribute_count(a); \ + } \ +} while (0) + +# define delete_attribute_reference(a) do { \ + if (a && a != attribute_cache_disabled) { \ + tex_dereference_attribute_list(a); \ + } \ +} while (0) + +# define remove_attribute_list(target) do { \ + halfword old_a = node_attr(target); \ + delete_attribute_reference(old_a); \ + node_attr(target) = null; \ +} while (0) + +/* +inline static void remove_attribute_list(halfword target) +{ + halfword a_old = node_attr(target); + if (a_old && a_old != attribute_cache_disabled) { + dereference_attribute_list(a_old); + } + node_attr(target) = null; +} +*/ + +/* This can be dangerous: */ + +# define wipe_attribute_list_only(target) \ + node_attr(target) = null; + +/*tex + Better is to add a ref before we remove one because there's the danger of premature freeing + otherwise. +*/ + +typedef enum saved_attribute_items { + saved_attribute_item_list = 0, + saved_attribute_n_of_items = 1, +} saved_attribute_items; + +inline static void tex_attach_attribute_list_copy(halfword target, halfword source) +{ + halfword a_new = node_attr(source); + halfword a_old = node_attr(target); + node_attr(target) = a_new; + add_attribute_reference(a_new); + delete_attribute_reference(a_old); +} + +inline static void tex_attach_attribute_list_attribute(halfword target, halfword a_new) +{ + halfword a_old = node_attr(target); + if (a_old != a_new) { + node_attr(target) = a_new; + add_attribute_reference(a_new); + delete_attribute_reference(a_old); + } +} + +# define attach_current_attribute_list tex_build_attribute_list /* (target) */ + +# define set_current_attribute_state(v) do { \ + current_attribute_state = v; \ +} while (0) + +# define change_attribute_register(a,id,value) do { \ + if (eq_value(id) != value) { \ + if (is_global(a)) { \ + int i; \ + for (i = (lmt_save_state.save_stack_data.ptr - 1); i >= 0; i--) { \ + if (save_type(i) == saved_attribute_list) { \ + delete_attribute_reference(save_value(i)); \ + save_value(i) = attribute_cache_disabled; \ + } \ + } \ + } else { \ + delete_attribute_reference(current_attribute_state); \ + } \ + set_current_attribute_state(attribute_cache_disabled); \ + } \ +} while (0) + +# define save_attribute_state_before() do { \ + halfword c = current_attribute_state; \ + tex_set_saved_record(saved_attribute_item_list, saved_attribute_list, 0, c); \ + lmt_save_state.save_stack_data.ptr += saved_attribute_n_of_items; \ + add_attribute_reference(c); \ +} while (0) + +# define save_attribute_state_after() do { \ +} while (0) + +# define unsave_attribute_state_before() do { \ + halfword c = current_attribute_state; \ + delete_attribute_reference(c); \ +} while (0) + +# define unsave_attribute_state_after() do { \ + lmt_save_state.save_stack_data.ptr -= saved_attribute_n_of_items; \ + set_current_attribute_state(saved_value(saved_attribute_item_list)); \ +} while (0) + +/*tex + We now arrive at some functions that report the nodes to users. The subtype information that + is used in the \LUA\ interface is stored alongside. +*/ + +extern void tex_print_short_node_contents (halfword n); +extern void tex_show_node_list (halfword n, int threshold, int max); +extern halfword tex_actual_box_width (halfword r, scaled base_width); +extern void tex_print_name (halfword p, const char *what); +extern void tex_print_node_list (halfword n, const char *what, int threshold, int max); +/* void tex_print_node_and_details (halfword p); */ +/* void tex_print_subtype_and_attributes_info (halfword p, halfword s, node_info *data); */ +extern void tex_print_extended_subtype (halfword p, quarterword s); +extern void tex_aux_show_dictionary (halfword p, halfword properties, halfword group, halfword index, halfword font, halfword character); + +extern halfword tex_new_node (quarterword i, quarterword j); +extern void tex_flush_node_list (halfword n); +extern void tex_flush_node (halfword n); +extern halfword tex_copy_node_list (halfword n, halfword e); +extern halfword tex_copy_node (halfword n); +extern halfword tex_copy_node_only (halfword n); +/* halfword tex_fix_node_list (halfword n); */ + +/*tex + We already defined glue and gluespec node but here are some of the properties + that they have. Again a few helpers. +*/ + +typedef enum glue_orders { + normal_glue_order, + fi_glue_order, + fil_glue_order, + fill_glue_order, + filll_glue_order +} glue_orders; + +typedef enum glue_amounts { + /* we waste slot zero, we padd anyway */ + total_glue_amount = 1, // 1 // + total_stretch_amount = 2, // 3 // + total_fi_amount = 3, // 4 // + total_fil_amount = 4, // 5 // + total_fill_amount = 5, // 6 // + total_filll_amount = 6, // 7 // + total_shrink_amount = 7, // 2 // + font_stretch_amount = 8, // 8 // + font_shrink_amount = 9, // 9 // +} glue_amounts; + +# define min_glue_order normal_glue_order +# define max_glue_order filll_glue_order + +typedef enum glue_signs { + normal_glue_sign, + stretching_glue_sign, + shrinking_glue_sign +} glue_signs; + +# define min_glue_sign normal_glue_sign +# define max_glue_sign shrinking_glue_sign + +# define normal_glue_multiplier 0.0 + +inline halfword tex_checked_glue_sign(halfword sign) +{ + if ((sign < min_glue_sign) || (sign > max_glue_sign)) { + return normal_glue_sign; + } else { + return sign; + } +} + +inline halfword tex_checked_glue_order(halfword order) +{ + if ((order < min_glue_order) || (order > max_glue_order)) { + return normal_glue_order; + } else { + return order; + } +} + +/*tex + These are reserved nodes that sit at the start of main memory. We could actually just allocate + them, but then we also need to set some when we start up. Now they are just saved in the format + file. In \TEX\ these nodes were shared as much as possible (using a reference count) but here + we just use copies. + + Below we start at |zero_glue| which in our case is just 0, or |null| in \TEX\ speak. After these + reserved nodes the memory used for whatever nodes are needed takes off. + + Changing this to real nodes makes sense but is also tricky due to initializations ... some day + (we need to store stuff in teh states then and these are not saved!). + + +*/ + +# define fi_glue (zero_glue + glue_spec_size) /*tex These are constants */ +# define fil_glue (fi_glue + glue_spec_size) +# define fill_glue (fil_glue + glue_spec_size) +# define filll_glue (fill_glue + glue_spec_size) +# define fil_neg_glue (filll_glue + glue_spec_size) + +# define page_insert_head (fil_neg_glue + glue_spec_size) +# define contribute_head (page_insert_head + split_node_size) /*tex This was temp_node_size but we assign more. */ +# define page_head (contribute_head + temp_node_size) +# define temp_head (page_head + glue_node_size) /*tex It gets a glue type assigned. */ +# define hold_head (temp_head + temp_node_size) +# define post_adjust_head (hold_head + temp_node_size) +# define pre_adjust_head (post_adjust_head + temp_node_size) +# define post_migrate_head (pre_adjust_head + temp_node_size) +# define pre_migrate_head (post_migrate_head + temp_node_size) +# define align_head (pre_migrate_head + temp_node_size) +# define active_head (align_head + temp_node_size) +# define end_span (active_head + active_node_size) +# define begin_period (end_span + span_node_size) /*tex Used to mark begin of word in hjn. */ +# define end_period (begin_period + glyph_node_size) /*tex Used to mark end of word in hjn. */ + +# define last_reserved (end_period + glyph_node_size - 1) + +/*tex More helpers! */ + +extern int tex_list_has_glyph (halfword list); + +extern halfword tex_new_null_box_node (quarterword type, quarterword subtype); +extern halfword tex_new_rule_node (quarterword subtype); +extern halfword tex_new_glyph_node (quarterword subtype, halfword fnt, halfword chr, halfword parent); /*tex afterwards: when we mess around */ +extern halfword tex_new_char_node (quarterword subtype, halfword fnt, halfword chr, int all); /*tex as we go: in maincontrol */ +extern halfword tex_new_text_glyph (halfword fnt, halfword chr); +extern halfword tex_new_disc_node (quarterword subtype); +extern halfword tex_new_glue_spec_node (halfword param); +extern halfword tex_new_param_glue_node (quarterword param, quarterword subtype); +extern halfword tex_new_glue_node (halfword qlue, quarterword subtype); +extern halfword tex_new_kern_node (scaled width, quarterword subtype); +extern halfword tex_new_penalty_node (halfword penalty, quarterword subtype); +extern halfword tex_new_par_node (quarterword mode); + +extern halfword tex_new_temp_node (void); + +extern scaled tex_glyph_width (halfword p); /* x/y scaled */ +extern scaled tex_glyph_height (halfword p); /* x/y scaled */ +extern scaled tex_glyph_depth (halfword p); /* x/y scaled */ +extern scaled tex_glyph_total (halfword p); /* x/y scaled */ +extern scaledwhd tex_glyph_dimensions (halfword p); /* x/y scaled */ +extern int tex_glyph_has_dimensions (halfword p); /* x/y scaled */ +extern scaled tex_glyph_width_ex (halfword p); /* x/y scaled, expansion included */ +extern scaledwhd tex_glyph_dimensions_ex (halfword p); /* x/y scaled, expansion included */ + +extern halfword tex_kern_dimension (halfword p); +extern halfword tex_kern_dimension_ex (halfword p); /* expansion included */ + +extern scaledwhd tex_pack_dimensions (halfword p); + +extern halfword tex_list_node_mem_usage (void); +extern halfword tex_reversed_node_list (halfword list); +extern int tex_n_of_used_nodes (int counts[]); + +# define _valid_node_(p) ((p > lmt_node_memory_state.reserved) && (p < lmt_node_memory_state.nodes_data.allocated) && (lmt_node_memory_state.nodesizes[p] > 0)) + +inline static int tex_valid_node(halfword n) +{ + return n && _valid_node_(n) ? n : null; +} + +/*tex This is a bit strange place but better than a macro elsewhere: */ + +inline static int tex_math_skip_boundary(halfword n) +{ + return (n && node_type(n) == glue_node + && (node_subtype(n) == space_skip_glue || + node_subtype(n) == xspace_skip_glue || + node_subtype(n) == zero_space_skip_glue)); +} + +typedef enum special_node_list_types { /* not in sycn with the above .. maybe add bogus ones */ + page_insert_list_type, + contribute_list_type, + page_list_type, + temp_list_type, + hold_list_type, + post_adjust_list_type, + pre_adjust_list_type, + post_migrate_list_type, + pre_migrate_list_type, + align_list_type, + /* in different spot */ + page_discards_list_type, + split_discards_list_type, + // best_page_break_type +} special_node_list_types; + +extern int tex_is_special_node_list (halfword n, int *istail); +extern halfword tex_get_special_node_list (special_node_list_types list, halfword *tail); +extern void tex_set_special_node_list (special_node_list_types list, halfword head); + +extern scaled tex_effective_glue (halfword parent, halfword glue); + +extern const char *tex_aux_subtype_str (halfword n ); + +# endif + diff --git a/source/luametatex/source/tex/texpackaging.c b/source/luametatex/source/tex/texpackaging.c new file mode 100644 index 000000000..5942c1365 --- /dev/null +++ b/source/luametatex/source/tex/texpackaging.c @@ -0,0 +1,3409 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + We're essentially done with the parts of \TEX\ that are concerned with the input (|get_next|) + and the output (|ship_out|). So it's time to get heavily into the remaining part, which does + the real work of typesetting. + + After lists are constructed, \TEX\ wraps them up and puts them into boxes. Two major + subroutines are given the responsibility for this task: |hpack| applies to horizontal lists + (hlists) and |vpack| applies to vertical lists (vlists). The main duty of |hpack| and |vpack| + is to compute the dimensions of the resulting boxes, and to adjust the glue if one of those + dimensions is pre-specified. The computed sizes normally enclose all of the material inside the + new box; but some items may stick out if negative glue is used, if the box is overfull, or if a + |\vbox| includes other boxes that have been shifted left. + + The subroutine call |hpack(p, w, m)| returns a pointer to an |hlist_node| for a box containing + the hlist that starts at |p|. Parameter |w| specifies a width; and parameter |m| is either + |exactly| or |additional|. Thus, |hpack(p, w, exactly)| produces a box whose width is exactly + |w|, while |hpack(p, w, additional)| yields a box whose width is the natural width plus |w|. It + is convenient to define a macro called |natural| to cover the most common case, so that we can + say |hpack(p, natural)| to get a box that has the natural width of list |p|. + + Similarly, |vpack(p, w, m)| returns a pointer to a |vlist_node| for a box containing the vlist + that starts at |p|. In this case |w| represents a height instead of a width; the parameter |m| + is interpreted as in |hpack|. + + The parameters to |hpack| and |vpack| correspond to \TEX's primitives like |\hbox to 300pt|, + |\hbox spread 10pt|; note that |\hbox| with no dimension following it is equivalent to |\hbox + spread 0pt|. The |scan_spec| subroutine scans such constructions in the user's input, including + the mandatory left brace that follows them, and it puts the specification onto |save_stack| so + that the desired box can later be obtained by executing the following code: + + \starttyping + save_state.save_ptr := save_state.save_ptr-1; + hpack(p, saved_value(0), saved_level(0)); + \stoptyping + + Scan a box specification and left brace: + */ + +/*tex + The next version is the (current) end point of successive improvements. After some keys were + added it became important to avoid redundant checking and pushing back mismatched keys. The + older (maybe more readable) variants using |scan_keyword| can be found in the archives (zip + and git) instead of as comments here. +*/ + +/*tex + + When scanning, special care is necessary to ensure that the special |save_stack| codes are + placed just below the new group code, because scanning can change |save_stack| when |\csname| + appears. This coincides with the text on |dir| and |attr| keywords, as these are exaclty the + uses of |\hbox|, |\vbox|, and |\vtop| in the input stream (the others are |\vcenter|, |\valign|, + and |\halign|). + + Scan a box specification and left brace comes next. Again, the more verbose, but already + rather optimized intermediate variants are in the archives. Improving scanners like this happen + stepwise in order to maintain compatibility (although \unknown\ we now quit earlier in a + mismatch so we're not exact compatible when an forward looking error happens. + + */ + +static void tex_aux_scan_full_spec(quarterword c, quarterword spec_direction, int just_pack, scaled shift) +{ + quarterword spec_code = packing_additional; + int spec_amount = 0; + halfword attrlist = null; + halfword orientation = 0; + halfword reverse = 0; + halfword container = 0; + scaled xoffset = 0; + scaled yoffset = 0; + scaled xmove = 0; + scaled ymove = 0; + halfword source = 0; + halfword target = 0; + halfword anchor = 0; + halfword geometry = 0; + halfword axis = 0; + halfword state = 0; + halfword retain = 0; + halfword mainclass = unset_noad_class; + int context = saved_value(saved_full_spec_item_context); + int brace = 0; + while (1) { + /*tex Maybe |migrate <int>| makes sense here. */ + switch (tex_scan_character("tascdoxyrlTASCDOXYRL", 1, 1, 1)) { + case 0: + goto DONE; + case 't': case 'T': + switch (tex_scan_character("aoAO", 0, 0, 0)) { + case 'a': case 'A': + if (tex_scan_mandate_keyword("target", 2)) { + target = tex_scan_int(1, NULL); + } + break; + case 'o': case 'O': + spec_code = packing_exactly; + spec_amount = tex_scan_dimen(0, 0, 0, 0, NULL); + break; + default: + tex_aux_show_keyword_error("target|to"); + goto DONE; + } + break; + case 'a': case 'A': + switch (tex_scan_character("dntxDNTX", 0, 0, 0)) { + case 'd': case 'D': + if (tex_scan_mandate_keyword("adapt", 2)) { + spec_code = packing_adapted; + spec_amount = tex_scan_limited_scale(0); + } + break; + case 't': case 'T': + if (tex_scan_mandate_keyword("attr", 2)) { + halfword i = tex_scan_attribute_register_number(); + halfword v = tex_scan_int(1, NULL); + if (eq_value(register_attribute_location(i)) != v) { + if (attrlist) { + attrlist = tex_patch_attribute_list(attrlist, i, v); + } else { + attrlist = tex_copy_attribute_list_set(tex_current_attribute_list(), i, v); + } + } + } + break; + case 'n': case 'N': + if (tex_scan_mandate_keyword("anchor", 2)) { + switch (tex_scan_character("sS", 0, 0, 0)) { + case 's': case 'S': + anchor = tex_scan_anchors(0); + break; + default: + anchor = tex_scan_anchor(0); + break; + } + } + break; + case 'x': case 'X': + if (tex_scan_mandate_keyword("axis", 2)) { + axis |= tex_scan_box_axis(); + } + break; + default: + tex_aux_show_keyword_error("adapt|attr|anchor|axis"); + goto DONE; + } + break; + case 's': case 'S': + switch (tex_scan_character("hpoHPO", 0, 0, 0)) { + case 'h': case 'H': + /*tex + This is a bonus because we decoupled the shift amount from the context, + where it can be somewhat confusing as that is a hybrid amount, kind, or + flag field. The keyword overloads an already given |move_cmd|. + */ + if (tex_scan_mandate_keyword("shift", 2)) { + shift = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'p': case 'P': + if (tex_scan_mandate_keyword("spread", 2)) { + spec_code = packing_additional; + spec_amount = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("source", 2)) { + source = tex_scan_int(1, NULL); + } + break; + default: + tex_aux_show_keyword_error("shift|spread|source"); + goto DONE; + } + break; + case 'd': case 'D': + switch (tex_scan_character("eiEI", 0, 0, 0)) { + case 'i': case 'I': + if (tex_scan_mandate_keyword("direction", 2)) { + spec_direction = tex_scan_direction(0); + } + break; + case 'e': case 'E': + if (tex_scan_mandate_keyword("delay", 2)) { + state |= package_u_leader_delayed; + } + break; + default: + tex_aux_show_keyword_error("direction|delay"); + goto DONE; + } + break; + case 'o': case 'O': + if (tex_scan_mandate_keyword("orientation", 1)) { + orientation = tex_scan_orientation(0); + } + break; + case 'x': case 'X': + switch (tex_scan_character("omOM", 0, 0, 0)) { + case 'o': case 'O' : + if (tex_scan_mandate_keyword("xoffset", 2)) { + xoffset = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'm': case 'M' : + if (tex_scan_mandate_keyword("xmove", 2)) { + xmove = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + default: + tex_aux_show_keyword_error("xoffset|xmove"); + goto DONE; + } + break; + case 'y': case 'Y': + switch (tex_scan_character("omOM", 0, 0, 0)) { + case 'o': case 'O' : + if (tex_scan_mandate_keyword("yoffset", 2)) { + yoffset = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'm': case 'M' : + if (tex_scan_mandate_keyword("ymove", 2)) { + ymove = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + default: + tex_aux_show_keyword_error("yoffset|ymove"); + goto DONE; + } + break; + case 'r': case 'R': + if (tex_scan_character("eE", 0, 0, 0)) { + switch (tex_scan_character("vVtT", 0, 0, 0)) { + case 'v': case 'V' : + if (tex_scan_mandate_keyword("reverse", 3)) { + reverse = 1; + } + break; + case 't': case 'T' : + if (tex_scan_mandate_keyword("retain", 3)) { + retain = tex_scan_int(0, NULL); + } + break; + default: + tex_aux_show_keyword_error("reverse|retain"); + goto DONE; + } + } + break; + case 'c': case 'C': + switch (tex_scan_character("olOL", 0, 0, 0)) { + case 'o': case 'O' : + if (tex_scan_mandate_keyword("container", 2)) { + container = 1; + } + break; + case 'l': case 'L' : + if (tex_scan_mandate_keyword("class", 2)) { + mainclass = tex_scan_math_class_number(0); + } + break; + default: + tex_aux_show_keyword_error("container|class"); + goto DONE; + } + break; + case '{': + brace = 1; + goto DONE; + default: + goto DONE; + } + } + DONE: + if (anchor || source || target) { + geometry |= anchor_geometry; + } + if (orientation || xmove || ymove) { + geometry |= orientation_geometry; + } + if (xoffset || yoffset) { + geometry |= offset_geometry; + } + /*tex + We either build one triggered by the |attr| key or we never set it in which case we use the + default. As we will use it anyway, we also bump the reference, which also makes sure that + it will stay. + */ + if (! attrlist) { + /* this alse sets the reference when not yet set */ + attrlist = tex_current_attribute_list(); + } + /*tex Now we're referenced. We need to preserve this over the group. */ + add_attribute_reference(attrlist); + /* */ + tex_set_saved_record(saved_full_spec_item_context, saved_box_context, 0, context); + /*tex Traditionally these two are packed into one record: */ + tex_set_saved_record(saved_full_spec_item_packaging, saved_box_spec, spec_code, spec_amount); + /*tex Adjust |text_dir_ptr| for |scan_spec|: */ + if (spec_direction != direction_unknown) { + tex_set_saved_record(saved_full_spec_item_direction, saved_box_direction, spec_direction, lmt_dir_state.text_dir_ptr); + lmt_dir_state.text_dir_ptr = tex_new_dir(normal_dir_subtype, spec_direction); + } else { + tex_set_saved_record(saved_full_spec_item_direction, saved_box_direction, spec_direction, null); + } + /* We could pack some in one record. */ + tex_set_saved_record(saved_full_spec_item_attr_list, saved_box_attr_list, 0, attrlist); + tex_set_saved_record(saved_full_spec_item_only_pack, saved_box_pack, 0, just_pack); + tex_set_saved_record(saved_full_spec_item_orientation, saved_box_orientation, 0, orientation); + tex_set_saved_record(saved_full_spec_item_anchor, saved_box_anchor, 0, anchor); + tex_set_saved_record(saved_full_spec_item_geometry, saved_box_geometry, 0, geometry); + tex_set_saved_record(saved_full_spec_item_xoffset, saved_box_xoffset, 0, xoffset); + tex_set_saved_record(saved_full_spec_item_yoffset, saved_box_yoffset, 0, yoffset); + tex_set_saved_record(saved_full_spec_item_xmove, saved_box_xmove, 0, xmove); + tex_set_saved_record(saved_full_spec_item_ymove, saved_box_ymove, 0, ymove); + tex_set_saved_record(saved_full_spec_item_reverse, saved_box_reverse, 0, reverse); + tex_set_saved_record(saved_full_spec_item_container, saved_box_container, 0, container); + tex_set_saved_record(saved_full_spec_item_shift, saved_box_shift, 0, shift); + tex_set_saved_record(saved_full_spec_item_source, saved_box_source, 0, source); + tex_set_saved_record(saved_full_spec_item_target, saved_box_target, 0, target); + tex_set_saved_record(saved_full_spec_item_axis, saved_box_axis, 0, axis); + tex_set_saved_record(saved_full_spec_item_class, saved_box_class, 0, mainclass); + tex_set_saved_record(saved_full_spec_item_state, saved_box_state, 0, state); + tex_set_saved_record(saved_full_spec_item_retain, saved_box_retain, 0, retain); + lmt_save_state.save_stack_data.ptr += saved_full_spec_n_of_items; + tex_new_save_level(c); + if (! brace) { + tex_scan_left_brace(); + } + update_tex_par_direction(spec_direction); + update_tex_text_direction(spec_direction); +} + +/*tex + + To figure out the glue setting, |hpack| and |vpack| determine how much stretchability and + shrinkability are present, considering all four orders of infinity. The highest order of + infinity that has a nonzero coefficient is then used as if no other orders were present. + + For example, suppose that the given list contains six glue nodes with the respective + stretchabilities |3pt|, |8fill|, |5fil|, |6pt|, |-3fil|, |-8fill|. Then the total is essentially + |2fil|; and if a total additional space of 6pt is to be achieved by stretching, the actual + amounts of stretch will be |0pt|, |0pt|, |15pt|, |0pt|, |-9pt|, and |0pt|, since only |fi| glue + will be considered. (The |fill| glue is therefore not really stretching infinitely with respect + to |fil|; nobody would actually want that to happen.) + + The arrays |total_stretch| and |total_shrink| are used to determine how much glue of each kind + is present. A global variable |last_badness| is used to implement |\badness|. + +*/ + +packaging_state_info lmt_packaging_state = { + .total_stretch = { 0 }, + .total_shrink = { 0 }, /*tex glue found by |hpack| or |vpack| */ + .last_badness = 0, /*tex badness of the most recently packaged box */ + .last_overshoot = 0, /*tex overshoot of the most recently packaged box */ + .post_adjust_tail = null, /*tex tail of adjustment list */ + .pre_adjust_tail = null, + .post_migrate_tail = null, /*tex tail of migration list */ + .pre_migrate_tail = null, + .last_leftmost_char = null, + .last_rightmost_char = null, + .pack_begin_line = 0, + /* .active_height = { 0 }, */ + .best_height_plus_depth = 0, + .previous_char_ptr = null, + .font_expansion_ratio = 0, + .padding = 0, + .page_discards_tail = null, + .page_discards_head = null, + .split_discards_head = null, +}; + +/*tex + + This state collects the glue found by |hpack| or |vpack|: |total_stretch| and |total_shrink| + and the badness of the most recently packaged box |last_badness|. + + If the variable |adjust_tail| is non-null, the |hpack| routine also removes all occurrences of + |insert_node|, |mark_node|, and |adjust_node| items and appends the resulting material onto the + list that ends at location |adjust_tail|. + + Tail of adjustment list is stored in |adjust_tail|. Materials in |\vadjust| used with |pre| + keyword will be appended to |pre_adjust_tail| instead of |adjust_tail|. + + The optimizers use |last_leftmost_char| and last_rightmost_char|. + + In order to provide a decent indication of where an overfull or underfull box originated, we + use a global variable |pack_begin_line| that is set nonzero only when |hpack| is being called + by the paragraph builder or the alignment finishing routine. + + The source file line where the current paragraph or alignment began; a negative value denotes + alignment |pack_begin_line|. + + Pointers to the prev and next char of an implicit kern are kept in |next_char_p| and + prev_char_p|. + + The kern stretch and shrink code was (or had become) rather weird ... the width field is set, + and then used in a second calculation, repeatedly, so why is that \unknown\ maybe some some + weird left-over \unknown\ anyway, the values are so small that in practice they are not + significant at all when the backend sees them because a few hundred sp positive or negative are + just noise there (so adjustlevel 3 has hardly any consequence for the result but is more + efficient). + + In the end I simplified the code because in practice these kerns can between glyphs burried in + discretionary nodes. Also, we don't enable it by default so let's just stick to the leftmost + character as reference. We can assume the same font anyway. + +*/ + +scaled tex_char_stretch(halfword p) /* todo: move this to texfont.c and make it more efficient */ +{ + if (! tex_has_glyph_option(p, glyph_option_no_expansion)) { + halfword f = glyph_font(p); + halfword m = font_max_stretch(f); + if (m > 0) { + halfword c = glyph_character(p); + halfword ef = tex_char_ef_from_font(f, c); + if (ef > 0) { + scaled dw = tex_calculated_glyph_width(p, m) - tex_char_width_from_glyph(p); + if (dw > 0) { + return tex_round_xn_over_d(dw, ef, 1000); + } + } + } + } + return 0; +} + +scaled tex_char_shrink(halfword p) /* todo: move this to texfont.c and make it more efficient */ +{ + if (! tex_has_glyph_option(p, glyph_option_no_expansion)) { + halfword f = glyph_font(p); + halfword m = font_max_shrink(f); + if (m > 0) { + halfword c = glyph_character(p); + halfword ef = tex_char_ef_from_font(f, c); + if (ef > 0) { + scaled dw = tex_char_width_from_glyph(p) - tex_calculated_glyph_width(p, -m); + if (dw > 0) { + return tex_round_xn_over_d(dw, ef, 1000); + } + } + } + } + return 0; +} + +scaled tex_kern_stretch(halfword p) +{ + scaled w = kern_amount(p); + if (w) { + halfword l = lmt_packaging_state.previous_char_ptr; + if (l && node_type(l) == glyph_node && ! tex_has_glyph_option(l, glyph_option_no_expansion)) { + scaled m = font_max_stretch(glyph_font(l)); + if (m > 0) { + scaled e = tex_char_ef_from_font(glyph_font(l), glyph_character(l)); + if (e > 0) { + scaled dw = w - tex_round_xn_over_d(w, 1000 + m, 1000); + if (dw > 0) { + return tex_round_xn_over_d(dw, e, 1000); + } + } + } + } + } + return 0; +} + +scaled tex_kern_shrink(halfword p) +{ + scaled w = kern_amount(p) ; + if (w) { + halfword l = lmt_packaging_state.previous_char_ptr; + if (l && node_type(l) == glyph_node && ! tex_has_glyph_option(l, glyph_option_no_expansion)) { + halfword m = font_max_shrink(glyph_font(l)); + if (m > 0) { + halfword e = tex_char_ef_from_font(glyph_font(l), glyph_character(l)); + if (e > 0) { + scaled dw = tex_round_xn_over_d(w, 1000 - m, 1000) - w; + if (dw > 0) { + return tex_round_xn_over_d(dw, e, 1000); + } + } + } + } + } + return 0; +} + +static void tex_aux_set_kern_expansion(halfword p, halfword ex_ratio) +{ + scaled w = kern_amount(p) ; + if (w ) { + halfword l = lmt_packaging_state.previous_char_ptr; + if (l && node_type(l) == glyph_node && ! tex_has_glyph_option(l, glyph_option_no_expansion)) { + halfword f = glyph_font(l); + halfword c = glyph_character(l); + halfword ef = tex_char_ef_from_font(f, c); + if (ef == 0) { + return; + } else if (ex_ratio > 0) { + halfword m = font_max_stretch(f); + if (m > 0) { + halfword ex_stretch = tex_ext_xn_over_d(ex_ratio * ef, m, 1000000); + kern_expansion(p) = tex_fix_expand_value(f, ex_stretch) * 1000; + } + } else if (ex_ratio < 0) { + halfword m = font_max_shrink(f); + if (m > 0) { + halfword ex_shrink = tex_ext_xn_over_d(ex_ratio * ef, m, 1000000); + kern_expansion(p) = tex_fix_expand_value(f, ex_shrink) * 1000; + } + } + } + } +} + +static void tex_aux_set_glyph_expansion(halfword p, int ex_ratio) +{ + switch (node_type(p)) { + case glyph_node: + if (! tex_has_glyph_option(p, glyph_option_no_expansion)) { + halfword f = glyph_font(p); + halfword c = glyph_character(p); + halfword ef = tex_char_ef_from_font(f, c); + if (ef == 0) { + return; + } else if (ex_ratio > 0) { + halfword m = font_max_stretch(f); + if (m > 0) { + halfword ex_stretch = tex_ext_xn_over_d(ex_ratio * ef, m, 1000000); + glyph_expansion(p) = tex_fix_expand_value(f, ex_stretch) * 1000; + } + } else if (ex_ratio < 0) { + halfword m = font_max_shrink(f); + if (m > 0) { + halfword ex_shrink = tex_ext_xn_over_d(ex_ratio * ef, m, 1000000); + glyph_expansion(p) = tex_fix_expand_value(f, ex_shrink) * 1000; + } + } + } + break; + case disc_node: + { + halfword r = disc_pre_break_head(p); + while (r) { + if (node_type(r) == glyph_node) { + tex_aux_set_glyph_expansion(r, ex_ratio); + } + r = node_next(r); + } + r = disc_post_break_head(p); + while (r) { + if (node_type(r) == glyph_node) { + tex_aux_set_glyph_expansion(r, ex_ratio); + } + r = node_next(r); + } + r = disc_no_break_head(p); + while (r) { + if (node_type(r) == glyph_node) { + tex_aux_set_glyph_expansion(r, ex_ratio); + } + r = node_next(r); + } + break; + } + default: + tex_normal_error("font expansion", "invalid node type"); + break; + } +} + +scaled tex_left_marginkern(halfword p) +{ + while (p && node_type(p) == glue_node) { + p = node_next(p); + } + if (p && node_type(p) == kern_node && node_subtype(p) == left_margin_kern_subtype) { + return kern_amount(p); + } else { + return 0; + } +} + +scaled tex_right_marginkern(halfword p) +{ + if (p) { + p = tex_tail_of_node_list(p); + /*tex + There can be a leftskip, rightskip, penalty and yes, also a disc node with a nesting + node that points to glue spec ... and we don't want to analyze that messy lot. + */ + while (p) { + switch(node_type(p)) { + case glue_node: + /*tex We backtrack over glue. */ + p = node_prev(p); + break; + case kern_node: + if (node_subtype(p) == right_margin_kern_subtype) { + return kern_amount(p); + } else { + return 0; + } + case disc_node: + /*tex + Officially we should look in the replace but currently protrusion doesn't + work anyway with |foo\discretionary {} {} {bar-} | (no following char) so we + don't need it now. + */ + p = node_prev(p); + if (p && node_type(p) == kern_node && node_subtype(p) == right_margin_kern_subtype) { + return kern_amount(p); + } else { + return 0; + } + default: + return 0; + } + } + } + return 0; +} + +/*tex + + Character protrusion is something we inherited from \PDFTEX\ and the next helper calculates + the extend. + +*/ + +scaled tex_char_protrusion(halfword p, int side) +{ + if (side == left_margin_kern_subtype) { + lmt_packaging_state.last_leftmost_char = null; + } else { + lmt_packaging_state.last_rightmost_char = null; + } + if (! p || node_type(p) != glyph_node || tex_has_glyph_option(p, glyph_option_no_protrusion)) { + return 0; + } else if (side == left_margin_kern_subtype) { + lmt_packaging_state.last_leftmost_char = p; + return tex_char_lp_from_font(glyph_font(p), glyph_character(p)); + } else { + lmt_packaging_state.last_rightmost_char = p; + return tex_char_rp_from_font(glyph_font(p), glyph_character(p)); + } +} + +/*tex + + Here we prepare for |hpack|, which is place where we do font substituting when font expansion + is being used. + +*/ + +int tex_ignore_math_skip(halfword p) +{ + if (math_skip_mode_par == 6) { + if (node_subtype(p) == end_inline_math) { + if (tex_math_skip_boundary(node_next(p))) { + return 0; + } + } else { + if (tex_math_skip_boundary(node_prev(p))) { + return 0; + } + } + } else if (math_skip_mode_par == 7) { + if (node_subtype(p) == end_inline_math) { + if (! tex_math_skip_boundary(node_next(p))) { + return 0; + } + } else { + if (! tex_math_skip_boundary(node_prev(p))) { + return 0; + } + } + } else { + return 0; + } + tex_reset_math_glue_to_zero(p); + return 1; +} + +# define fix_int(val,min,max) (val < min ? min : (val > max ? max : val)) + +inline static halfword tex_aux_used_order(halfword *total) +{ + if (total[filll_glue_order]) { + return filll_glue_order; + } else if (total[fill_glue_order]) { + return fill_glue_order; + } else if (total[fil_glue_order]) { + return fil_glue_order; + } else if (total[fi_glue_order]) { + return fi_glue_order; + } else { + return normal_glue_order; + } +} + +/*tex + + The original code mentions: \quotation {Transfer node |p| to the adjustment list. Although node + |q| is not necessarily the immediate predecessor of node |p|, it always points to some node in + the list preceding |p|. Thus, we can delete nodes by moving |q| when necessary. The algorithm + takes linear time, and the extra computation does not intrude on the inner loop unless it is + necessary to make a deletion.}. The trick used is the following: + + \starttyping + q = r + list_offset; + node_next(q) = p; + .... + while (node_next(q) != p) { + q = node_next(q); + } + \stoptyping + + This list offset points to the memory slot in the node and it happens that the next pointer + takes the same subfield as the normal next pointer (these are actually offsets in an array of + memorywords). This kind of neat trickery is needed because there are only forward linked lists, + but we can do it differently and thereby also use the normal list pointer. We need a bit more + checking but in the end we have a better abstraction. + +*/ + +inline static void tex_aux_promote_pre_migrated(halfword r, halfword p) +{ + halfword pm = box_pre_migrated(p); + halfword pa = box_pre_adjusted(p); + if (pa) { + if (lmt_packaging_state.pre_adjust_tail) { + lmt_packaging_state.pre_adjust_tail = tex_append_adjust_list(pre_adjust_head, lmt_packaging_state.pre_adjust_tail, pa); + } else if (box_pre_adjusted(r)) { + tex_couple_nodes(box_pre_adjusted(r), pa); + } else { + box_pre_adjusted(r) = pa; + } + box_pre_adjusted(p) = null; + } + if (pm) { + if (lmt_packaging_state.pre_migrate_tail) { + tex_couple_nodes(lmt_packaging_state.pre_migrate_tail, pm); + lmt_packaging_state.pre_migrate_tail = tex_tail_of_node_list(pm); + } else { + /* here we prepend pm to rm */ + halfword rm = box_pre_migrated(r); + if (rm) { + tex_couple_nodes(pm, rm); + } + box_pre_migrated(r) = pm; + } + box_pre_migrated(p) = null; + } +} + +inline static void tex_aux_promote_post_migrated(halfword r, halfword p) +{ + halfword pm = box_post_migrated(p); + halfword pa = box_post_adjusted(p); + if (pa) { + if (lmt_packaging_state.post_adjust_tail) { + lmt_packaging_state.post_adjust_tail = tex_append_adjust_list(post_adjust_head, lmt_packaging_state.post_adjust_tail, pa); + } else if (box_post_adjusted(r)) { + tex_couple_nodes(box_post_adjusted(r), pa); + } else { + box_post_adjusted(r) = pa; + } + box_post_adjusted(p) = null; + } + if (pm) { + if (lmt_packaging_state.post_migrate_tail) { + tex_couple_nodes(lmt_packaging_state.post_migrate_tail, pm); + lmt_packaging_state.post_migrate_tail = tex_tail_of_node_list(pm); + } else { + /* here we append pm to rm */ + halfword rm = box_post_migrated(r); + if (rm) { + tex_couple_nodes(tex_tail_of_node_list(rm), pm); + } else { + box_post_migrated(r) = pm; + } + } + box_post_migrated(p) = null; + } +} + +inline static halfword tex_aux_post_migrate(halfword r, halfword p) +{ + halfword n = p; + halfword nn = node_next(p); + halfword pm = box_post_migrated(r); + if (p == box_list(r)) { + box_list(r) = nn; + if (nn) { + node_prev(nn) = null; + } + } else { + tex_couple_nodes(node_prev(p), nn); + } + if (pm) { + tex_couple_nodes(tex_tail_of_node_list(pm), n); + } else { + box_post_migrated(r) = n; + } + node_next(n) = null; + p = nn; + return p; +} + +inline static halfword tex_aux_normal_migrate(halfword r, halfword p) +{ + halfword n = p; + halfword nn = node_next(p); + if (p == box_list(r)) { + box_list(r) = nn; + if (nn) { + node_prev(nn) = null; + } + } else { + tex_couple_nodes(node_prev(p), nn); + } + tex_couple_nodes(lmt_packaging_state.post_migrate_tail, n); + lmt_packaging_state.post_migrate_tail = n; + node_next(n) = null; + p = nn; + return p; +} + +static void tex_aux_append_diagnostic_rule(halfword box, halfword rule) +{ + halfword n = box_list(box); + if (n) { + halfword t = tex_tail_of_node_list(n); + halfword c = t; + while (c && node_type(c) == glue_node) { + switch (node_subtype(c)) { + case par_fill_right_skip_glue: + case par_init_right_skip_glue: + case right_skip_glue: + case right_hang_skip_glue: + c = node_prev(c); + break; + default: + goto DONE; + } + } + DONE: + if (c) { + n = node_next(c); + if (n) { + tex_couple_nodes(rule, n); + } + } else { + c = t; + } + tex_couple_nodes(c, rule); + } else { + box_list(box) = rule; + } +} + +void tex_repack(halfword p, scaled w, int m) +{ + if (p) { + halfword tmp; + switch (node_type(p)) { + case hlist_node: + tmp = tex_hpack(box_list(p), w, m, box_dir(p), holding_none_option); + break; + case vlist_node: + tmp = tex_vpack(box_list(p), w, m > packing_additional ? packing_additional : m, max_dimen, box_dir(p), holding_none_option); + break; + default: + return; + } + box_width(p) = box_width(tmp); + box_height(p) = box_height(tmp); + box_depth(p) = box_depth(tmp); + box_glue_set(p) = box_glue_set(tmp); + box_glue_order(p) = box_glue_order(tmp); + box_glue_sign(p) = box_glue_sign(tmp); + box_list(tmp) = null; + tex_flush_node(tmp); + } +} + +// Not ok. For now we accept some drift and assume it averages out. Just +// for fun we could actually store it in the glue set field afterwards. +// +// { +// halfword drift = scaledround(wd) - ws; +// if (drift < 0) { +// d -= (double) drift; +// wd -= (double) drift; +// } +// } + +void tex_freeze(halfword p, int recurse) +{ + if (p) { + switch (node_type(p)) { + case hlist_node: + { + halfword c = box_list(p); + double set = (double) box_glue_set(p); + halfword order = box_glue_order(p); + halfword sign = box_glue_sign(p); + while (c) { + switch (node_type(c)) { + case glue_node: + if (sign != normal_glue_sign) { + switch (sign) { + case stretching_glue_sign: + if (glue_stretch_order(c) == order) { + glue_amount(c) += scaledround(glue_stretch(c) * set); + } + break; + case shrinking_glue_sign: + if (glue_shrink_order(c) == order) { + glue_amount(c) -= scaledround(glue_shrink(c) * set); + } + break; + } + glue_stretch(c) = 0; + glue_shrink(c) = 0; + glue_stretch_order(c) = 0; + glue_shrink_order(c) = 0; + break; + } + case hlist_node: + case vlist_node: + { + if (recurse) { + tex_freeze(c, recurse); + } + break; + } + case math_node: + if (sign != normal_glue_sign) { + switch (sign) { + case stretching_glue_sign: + if (math_stretch_order(c) == order) { + math_amount(c) += scaledround(math_stretch(c) * set); + } + break; + case shrinking_glue_sign: + if (math_shrink_order(c) == order) { + math_amount(c) += scaledround(math_shrink(c) * set); + } + break; + } + math_stretch(c) = 0; + math_shrink(c) = 0; + math_stretch_order(c) = 0; + math_shrink_order(c) = 0; + break; + } + default: + break; + } + c = node_next(c); + } + box_glue_set(p) = 0; + box_glue_order(p) = 0; + box_glue_sign(p) = 0; + } + break; + case vlist_node: + { + halfword c = box_list(p); + double set = (double) box_glue_set(p); + halfword order = box_glue_order(p); + halfword sign = box_glue_sign(p); + while (c) { + switch (node_type(c)) { + case glue_node: + if (sign != normal_glue_sign) { + switch (sign) { + case stretching_glue_sign: + if (glue_stretch_order(c) == order) { + glue_amount(c) += scaledround(glue_stretch(c) * set); + } + break; + case shrinking_glue_sign: + if (glue_shrink_order(c) == order) { + glue_amount(c) -= scaledround(glue_shrink(c) * set); + } + break; + } + glue_stretch(c) = 0; + glue_shrink(c) = 0; + glue_stretch_order(c) = 0; + glue_shrink_order(c) = 0; + } + break; + case hlist_node: + case vlist_node: + { + if (recurse) { + tex_freeze(c, recurse); + } + break; + } + default: + break; + } + c = node_next(c); + } + box_glue_set(p) = 0; + box_glue_order(p) = 0; + box_glue_sign(p) = 0; + } + break; + default: + return; + } + } +} + +halfword tex_hpack(halfword p, scaled w, int m, singleword pack_direction, int retain) +{ + /*tex trails behind |p| */ + halfword q = null; + /*tex height */ + scaled h = 0; + /*tex depth */ + scaled d = 0; + /*tex natural width */ + scaled x = 0; + /*tex the current direction */ + singleword hpack_dir = pack_direction == direction_unknown ? text_direction_par : pack_direction; + int disc_level = 0; + halfword pack_interrupt[8]; + scaled font_stretch = 0; + scaled font_shrink = 0; + int adjust_spacing = adjust_spacing_off; + /*tex the box node that will be returned */ + halfword r = tex_new_node(hlist_node, unknown_list); + box_dir(r) = hpack_dir; + lmt_packaging_state.last_badness = 0; + lmt_packaging_state.last_overshoot = 0; + // if (! p) { + // box_width(r) = w; + // return r; + // } + if (m == packing_linebreak) { + m = packing_expanded; + adjust_spacing = tex_checked_font_adjust( + lmt_linebreak_state.adjust_spacing, + lmt_linebreak_state.adjust_spacing_step, + lmt_linebreak_state.adjust_spacing_shrink, + lmt_linebreak_state.adjust_spacing_stretch + ); + } else { + adjust_spacing = tex_checked_font_adjust( + adjust_spacing_par, + adjust_spacing_step_par, + adjust_spacing_shrink_par, + adjust_spacing_stretch_par + ); + } + /*tex + + A potential optimization, saves a little but neglectable in practice (not that many empty + boxes are used): + + \starttyping + if (! p) { + box_width(r) = w; + return r; + } + \stoptyping + + */ + box_list(r) = p; + if (m == packing_expanded) { + /*tex Why not always: */ + lmt_packaging_state.previous_char_ptr = null; + } else if (m == packing_adapted) { + if (w > 1000) { + w = 1000; + } else if (w < -1000) { + w = -1000; + } + } + for (int i = normal_glue_order; i <= filll_glue_order; i++) { + lmt_packaging_state.total_stretch[i] = 0; + lmt_packaging_state.total_shrink[i] = 0; + } + /*tex + + Examine node |p| in the hlist, taking account of its effect on the dimensions of the new + box, or moving it to the adjustment list; then advance |p| to the next node. For disc + node we enter a level so we don't use recursion. + + In other engines there is an optimization for glyph runs but here we use just one switch + for everything. The performance hit is neglectable. So the comment \quotation {Incorporate + character dimensions into the dimensions of the hbox that will contain~it, then move to + the next node.} no longer applies. In \LUATEX\ ligature building, kerning and hyphenation + are decoupled so comments about inner loop and performance no longer make sense here. + + */ + while (p) { + switch (node_type(p)) { + case glyph_node: + { + scaledwhd whd; + if (adjust_spacing) { + switch (m) { + case packing_expanded: + { + lmt_packaging_state.previous_char_ptr = p; + font_stretch += tex_char_stretch(p); + font_shrink += tex_char_shrink(p); + break; + } + case packing_substitute: + { + lmt_packaging_state.previous_char_ptr = p; + tex_aux_set_glyph_expansion(p, lmt_packaging_state.font_expansion_ratio); + break; + } + } + } + whd = tex_glyph_dimensions_ex(p); + x += whd.wd; + if (whd.ht > h) { + h = whd.ht; + } + if (whd.dp > d) { + d = whd.dp; + } + break; + } + case hlist_node: + case vlist_node: + { + /*tex + + Incorporate box dimensions into the dimensions of the hbox that will contain + it. + + */ + halfword s = box_shift_amount(p); + scaledwhd whd = tex_pack_dimensions(p); + x += whd.wd; + if (whd.ht - s > h) { + h = whd.ht - s; + } + if (whd.dp + s > d) { + d = whd.dp + s; + } + tex_aux_promote_pre_migrated(r, p); + tex_aux_promote_post_migrated(r, p); + break; + } + case unset_node: + x += box_width(p); + if (box_height(p) > h) { + h = box_height(p); + } + if (box_depth(p) > d) { + d = box_depth(p); + } + // tex_aux_promote_pre_migrated(r, p); + // tex_aux_promote_post_migrated(r, p); + break; + case rule_node: + /*tex + + The code here implicitly uses the fact that running dimensions are indicated + by |null_flag|, which will be ignored in the calculations because it is a + highly negative number. + + */ + x += rule_width(p); + if (rule_height(p) > h) { + h = rule_height(p); + } + if (rule_depth(p) > d) { + d = rule_depth(p); + } + break; + case glue_node: + /*tex Incorporate glue into the horizontal totals. Can this overflow? */ + { + switch (m) { + case packing_adapted: + if (w < 0) { + if (glue_shrink_order(p) == normal_glue_order) { + glue_amount(p) -= scaledround(-0.001 * w * (double) glue_shrink(p)); + } + } else if (w > 0) { + if (glue_stretch_order(p) == normal_glue_order) { + glue_amount(p) += scaledround( 0.001 * w * (double) glue_stretch(p)); + } + } + x += glue_amount(p); + glue_shrink_order(p) = normal_glue_order; + glue_shrink(p) = 0; + glue_stretch_order(p) = normal_glue_order; + glue_stretch(p) = 0; + break; + default: + { + halfword o; + x += glue_amount(p); + o = glue_stretch_order(p); + lmt_packaging_state.total_stretch[o] += glue_stretch(p); + o = glue_shrink_order(p); + lmt_packaging_state.total_shrink[o] += glue_shrink(p); + } + } + if (is_leader(p)) { + halfword gl = glue_leader_ptr(p); + scaled ht = 0; + scaled dp = 0; + switch (node_type(gl)) { + case hlist_node: + case vlist_node: + ht = box_height(gl); + dp = box_depth(gl); + break; + case rule_node: + ht = rule_height(gl); + dp = rule_depth(gl); + break; + } + if (ht > h) { + h = ht; + } + if (dp > d) { + d = dp; + } + } + break; + } + case kern_node: + if (adjust_spacing == adjust_spacing_full && node_subtype(p) == font_kern_subtype) { + switch (m) { + case packing_expanded: + { + font_stretch += tex_kern_stretch(p); + font_shrink += tex_kern_shrink(p); + break; + } + case packing_substitute: + { + tex_aux_set_kern_expansion(p, lmt_packaging_state.font_expansion_ratio); + break; + } + } + } + x += tex_kern_dimension_ex(p); + break; + case disc_node: + if (adjust_spacing) { + switch (m) { + case packing_expanded: + /*tex + Won't give this issues with complex discretionaries as we don't + do the |packing_expand| here? I need to look into this! + */ + break; + case packing_substitute: + tex_aux_set_glyph_expansion(p, lmt_packaging_state.font_expansion_ratio); + break; + } + } + if (disc_no_break_head(p)) { + pack_interrupt[disc_level] = node_next(p); + ++disc_level; + p = disc_no_break(p); + } + break; + case math_node: + if (tex_math_glue_is_zero(p) || tex_ignore_math_skip(p)) { + x += math_surround(p); + } else { + halfword o; + x += math_amount(p); + o = math_stretch_order(p); + lmt_packaging_state.total_stretch[o] += math_stretch(p); + o = math_shrink_order(p); + lmt_packaging_state.total_shrink[o] += math_shrink(p); + } + break; + case dir_node: + break; + case insert_node: + if (retain_inserts(retain)) { + break; + } else if (lmt_packaging_state.post_migrate_tail) { + p = tex_aux_normal_migrate(r, p); + /*tex Here |q| stays as it is and we're already at next. */ + continue; + } else if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_insert)) { + halfword l = insert_list(p); + p = tex_aux_post_migrate(r, p); + while (l) { + l = node_type(l) == insert_node ? tex_aux_post_migrate(r, l) : node_next(l); + } + /*tex Here |q| stays as it is and we're already at next. */ + continue; + } else { + /*tex Nothing done, so we move on. */ + break; + } + case mark_node: + if (retain_marks(retain)) { + break; + } else if (lmt_packaging_state.post_migrate_tail) { + p = tex_aux_normal_migrate(r, p); + /*tex Here |q| stays as it is and we're already at next. */ + continue; + } else if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_mark)) { + p = tex_aux_post_migrate(r, p); + /*tex Here |q| stays as it is and we're already at next. */ + continue; + } else { + /*tex Nothing done, so we move on. */ + break; + } + case adjust_node: + /*tex + We could combine this with migration code but adjust content actually is taken into account + as part of the flow (dimensions, penalties, etc). + */ + if (adjust_list(p) && ! retain_adjusts(retain)) { + halfword next = node_next(p); + halfword current = p; + /*tex Remove from list: */ + if (p == box_list(r)) { + box_list(r) = next; + if (next) { + node_prev(next) = null; + } + } else { + tex_couple_nodes(node_prev(p), next); + } + if (lmt_packaging_state.post_adjust_tail || lmt_packaging_state.pre_adjust_tail) { + tex_adjust_passon(r, current); + } else if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_adjust)) { + tex_adjust_attach(r, current); + } + p = next; + continue; + } else { + break; + } + default: + break; + } + /* + This is kind of tricky: q is the pre-last pointer so we don't change it when we're + inside a disc node. This way of keeping track of the last node is different from the + previous engine. + */ + if (disc_level > 0) { + p = node_next(p); + if (! p) { + --disc_level; + p = pack_interrupt[disc_level]; + } + } else { + q = p; + p = node_next(p); + } + } + box_height(r) = h; + box_depth(r) = d; + /*tex + Determine the value of |width(r)| and the appropriate glue setting; then |return| or |goto + common_ending|. When we get to the present part of the program, |x| is the natural width of + the box being packaged. + */ + switch (m) { + case packing_additional: + w += x; + break; + case packing_adapted: + w = x; + break; + } + box_width(r) = w; + x = w - x; + /*tex Now |x| is the excess to be made up. */ + if (x == 0) { + box_glue_sign(r) = normal_glue_sign; + box_glue_order(r) = normal_glue_order; + box_glue_set(r) = 0.0; + goto EXIT; + } else if (x > 0) { + /*tex + Determine horizontal glue stretch setting, then |return| or |goto common_ending|. If + |hpack| is called with |m=cal_expand_ratio| we calculate |font_expand_ratio| and return + without checking for overfull or underfull box. + */ + halfword o = tex_aux_used_order(lmt_packaging_state.total_stretch); + if ((m == packing_expanded) && (o == normal_glue_order) && (font_stretch > 0)) { + lmt_packaging_state.font_expansion_ratio = tex_divide_scaled_n(x, font_stretch, 1000.0); + goto EXIT; + } + box_glue_order(r) = o; + box_glue_sign(r) = stretching_glue_sign; + if (lmt_packaging_state.total_stretch[o]) { + box_glue_set(r) = (glueratio) ((double) x / lmt_packaging_state.total_stretch[o]); + } else { + /*tex There's nothing to stretch. */ + box_glue_sign(r) = normal_glue_sign; + box_glue_set(r) = 0.0; + } + if (o == normal_glue_order && box_list(r)) { + /*tex + Report an underfull hbox and |goto common_ending|, if this box is sufficiently bad. + */ + lmt_packaging_state.last_badness = tex_badness(x, lmt_packaging_state.total_stretch[normal_glue_order]); + if (lmt_packaging_state.last_badness > hbadness_par) { + int callback_id = lmt_callback_defined(hpack_quality_callback); + if (callback_id > 0) { + if (q) { + halfword rule = null; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "SdNddS->N", + lmt_packaging_state.last_badness > 100 ? "underfull" : "loose", + lmt_packaging_state.last_badness, + r, + abs(lmt_packaging_state.pack_begin_line), + lmt_input_state.input_line, + tex_current_input_file_name(), + &rule + ); + if (rule) { + tex_aux_append_diagnostic_rule(r, rule); + } + } + } else { + tex_print_nlp(); + if (lmt_packaging_state.last_badness > 100) { + tex_print_format("%l[package: underfull \\hbox (badness %i)", lmt_packaging_state.last_badness); + } else { + tex_print_format("%l[package: loose \\hbox (badness %i)", lmt_packaging_state.last_badness); + } + goto COMMON_ENDING; + } + } + } + goto EXIT; + } else { + /*tex + Determine horizontal glue shrink setting, then |return| or |goto common_ending|, + */ + halfword o = tex_aux_used_order(lmt_packaging_state.total_shrink); + if ((m == packing_expanded) && (o == normal_glue_order) && (font_shrink > 0)) { + lmt_packaging_state.font_expansion_ratio = tex_divide_scaled_n(x, font_shrink, 1000.0); + goto EXIT; + } + box_glue_order(r) = o; + box_glue_sign(r) = shrinking_glue_sign; + if (lmt_packaging_state.total_shrink[o]) { + box_glue_set(r) = (glueratio) ((double) (-x) / (double) lmt_packaging_state.total_shrink[o]); + } else { + /*tex There's nothing to shrink. */ + box_glue_sign(r) = normal_glue_sign; + box_glue_set(r) = 0.0; + } + if ((lmt_packaging_state.total_shrink[o] < -x) && (o == normal_glue_order) && (box_list(r))) { + int overshoot = -x - lmt_packaging_state.total_shrink[normal_glue_order]; + lmt_packaging_state.last_badness = 1000000; + lmt_packaging_state.last_overshoot = overshoot; + /*tex Use the maximum shrinkage */ + box_glue_set(r) = 1.0; + /*tex Report an overfull hbox and |goto common_ending|, if this box is sufficiently bad. */ + if ((overshoot > hfuzz_par) || (hbadness_par < 100)) { + int callback_id = lmt_callback_defined(hpack_quality_callback); + halfword rule = null; + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "SdNddS->N", + "overfull", + overshoot, + r, + abs(lmt_packaging_state.pack_begin_line), + lmt_input_state.input_line, + tex_current_input_file_name(), + &rule); + } else if (q && overfull_rule_par > 0) { + rule = tex_new_rule_node(normal_rule_subtype); + rule_width(rule) = overfull_rule_par; + } + if (rule) { + tex_aux_append_diagnostic_rule(r, rule); + } + if (callback_id == 0) { + tex_print_nlp(); + tex_print_format("%l[package: overfull \\hbox (%D too wide)", overshoot, pt_unit); + goto COMMON_ENDING; + } + } + } else if (o == normal_glue_order) { + if (box_list(r)) { + /*tex Report a tight hbox and |goto common_ending|, if this box is sufficiently bad. */ + lmt_packaging_state.last_badness = tex_badness(-x, lmt_packaging_state.total_shrink[normal_glue_order]); + if (lmt_packaging_state.last_badness > hbadness_par) { + int callback_id = lmt_callback_defined(hpack_quality_callback); + if (callback_id > 0) { + halfword rule = null; + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "SdNddS->N", + "tight", + lmt_packaging_state.last_badness, + r, + abs(lmt_packaging_state.pack_begin_line), + lmt_input_state.input_line, + tex_current_input_file_name(), + &rule); + if (rule) { + tex_aux_append_diagnostic_rule(r, rule); + } + } else { + tex_print_nlp(); + tex_print_format("%l[package: tight \\hbox (badness %i)", lmt_packaging_state.last_badness); + goto COMMON_ENDING; + } + } + } + } + goto EXIT; + } + COMMON_ENDING: + /*tex Finish issuing a diagnostic message for an overfull or underfull hbox. */ + if (lmt_page_builder_state.output_active) { + tex_print_format(" has occurred while \\output is active]"); + } else if (lmt_packaging_state.pack_begin_line == 0) { + tex_print_format(" detected at line %i]", lmt_input_state.input_line); + } else if (lmt_packaging_state.pack_begin_line > 0) { + tex_print_format(" in paragraph at lines %i--%i]", lmt_packaging_state.pack_begin_line, lmt_input_state.input_line); + } else { + tex_print_format(" in alignment at lines %i--%i]", -lmt_packaging_state.pack_begin_line, lmt_input_state.input_line); + } + tex_print_ln(); + lmt_print_state.font_in_short_display = null_font; + if (tracing_full_boxes_par > 0) { + halfword detail = show_node_details_par; + show_node_details_par = tracing_full_boxes_par; + tex_short_display(box_list(r)); + tex_print_ln(); + tex_begin_diagnostic(); + tex_show_box(r); + tex_end_diagnostic(); + show_node_details_par = detail; + } + EXIT: + if ((m == packing_expanded) && (lmt_packaging_state.font_expansion_ratio != 0)) { + lmt_packaging_state.font_expansion_ratio = fix_int(lmt_packaging_state.font_expansion_ratio, -1000, 1000); + q = box_list(r); + box_list(r) = null; + tex_flush_node(r); + /*tex This nested call uses the more or less global font_expand_ratio. */ + r = tex_hpack(q, w, packing_substitute, hpack_dir, holding_none_option); + } + /*tex Here we reset the |font_expand_ratio|. */ + lmt_packaging_state.font_expansion_ratio = 0; + return r; +} + +halfword tex_filtered_hpack(halfword p, halfword qt, scaled w, int m, int grp, halfword d, int just_pack, halfword attr, int state, int retain) +{ + halfword head; + singleword direction = checked_direction_value(d); + (void) state; /*tex Why do we pass it? Probably a left-over from an experiment. */ + if (just_pack) { + head = node_next(p); + } else if (node_type(p) == temp_node && ! node_next(p)) { + head = node_next(p); + } else { + /*tex Maybe here: |node_prev(p) = null|. */ + head = node_next(p); + if (head) { + node_prev(head) = null; + if (tex_list_has_glyph(head)) { + tex_handle_hyphenation(head, qt); + head = tex_handle_glyphrun(head, grp, direction); + } + if (head) { + /*tex ignores empty anyway. Maybe also pass tail? */ + head = lmt_hpack_filter_callback(head, w, m, grp, direction, attr); + } + } + } + return tex_hpack(head, w, m, direction, retain); +} + +/*tex Here is a function to calculate the natural whd of a (horizontal) node list. */ + +scaledwhd tex_natural_hsizes(halfword p, halfword pp, glueratio g_mult, int g_sign, int g_order) +{ + scaledwhd siz = { 0, 0, 0 }; + scaled gp = 0; + scaled gm = 0; + while (p && p != pp) { + switch (node_type(p)) { + case glyph_node: + { + scaledwhd whd = tex_glyph_dimensions_ex(p); + siz.wd += whd.wd; + if (whd.ht > siz.ht) { + siz.ht = whd.ht; + } + if (whd.dp > siz.dp) { + siz.dp = whd.dp; + } + break; + } + case hlist_node: + case vlist_node: + { + scaled s = box_shift_amount(p); + scaledwhd whd = tex_pack_dimensions(p); + siz.wd += whd.wd; + if (whd.ht - s > siz.ht) { + siz.ht = whd.ht - s; + } + if (whd.dp + s > siz.dp) { + siz.dp = whd.dp + s; + } + break; + } + case unset_node: + siz.wd += box_width(p); + if (box_height(p) > siz.ht) { + siz.ht = box_height(p); + } + if (box_depth(p) > siz.dp) { + siz.dp = box_depth(p); + } + break; + case rule_node: + siz.wd += rule_width(p); + if (rule_height(p) > siz.ht) { + siz.ht = rule_height(p); + } + if (rule_depth(p) > siz.dp) { + siz.dp = rule_depth(p); + } + break; + case glue_node: + siz.wd += glue_amount(p); + switch (g_sign) { + case stretching_glue_sign: + if (glue_stretch_order(p) == g_order) { + gp += glue_stretch(p); + } + break; + case shrinking_glue_sign: + if (glue_shrink_order(p) == g_order) { + gm += glue_shrink(p); + } + break; + } + if (is_leader(p)) { + halfword gl = glue_leader_ptr(p); + halfword ht = 0; + halfword dp = 0; + switch (node_type(gl)) { + case hlist_node: + case vlist_node: + ht = box_height(gl); + dp = box_depth(gl); + break; + case rule_node: + ht = rule_height(gl); + dp = rule_depth(gl); + break; + } + if (ht) { + siz.ht = ht; + } + if (dp > siz.dp) { + siz.dp = dp; + } + } + break; + case kern_node: + siz.wd += tex_kern_dimension_ex(p); + break; + case disc_node: + { + scaledwhd whd = tex_natural_hsizes(disc_no_break_head(p), null, g_mult, g_sign, g_order); /* hm, really glue here? */ + siz.wd += whd.wd; + if (whd.ht > siz.ht) { + siz.ht = whd.ht; + } + if (whd.dp > siz.dp) { + siz.dp = whd.dp; + } + } + break; + case math_node: + if (tex_math_glue_is_zero(p) || tex_ignore_math_skip(p)) { + siz.wd += math_surround(p); + } else { + siz.wd += math_amount(p); + switch (g_sign) { + case stretching_glue_sign: + if (math_stretch_order(p) == g_order) { + gp += math_stretch(p); + } + break; + case shrinking_glue_sign: + if (math_shrink_order(p) == g_order) { + gm += math_shrink(p); + } + break; + } + } + break; + case sub_box_node: + /* really? */ + break; + case sub_mlist_node: + { + /* hack */ + scaledwhd whd = tex_natural_hsizes(kernel_math_list(p), null, 0.0, 0, 0); + siz.wd += whd.wd; + if (whd.ht > siz.ht) { + siz.ht = whd.ht; + } + if (whd.dp > siz.dp) { + siz.dp = whd.dp; + } + } + break; + default: + break; + } + p = node_next(p); + } + switch (g_sign) { + case stretching_glue_sign: + siz.wd += glueround((glueratio)(g_mult) * (glueratio)(gp)); + break; + case shrinking_glue_sign: + siz.wd -= glueround((glueratio)(g_mult) * (glueratio)(gm)); + break; + } + return siz; +} + +scaledwhd tex_natural_vsizes(halfword p, halfword pp, glueratio g_mult, int g_sign, int g_order) +{ + scaledwhd siz = { 0, 0, 0 }; + scaled gp = 0; + scaled gm = 0; + while (p && p != pp) { + switch (node_type(p)) { + case hlist_node: + case vlist_node: + { + scaled s = box_shift_amount(p); + scaledwhd whd = tex_pack_dimensions(p); + if (whd.wd + s > siz.wd) { + siz.wd = whd.wd + s; + } + siz.ht += siz.dp + whd.ht; + siz.dp = whd.dp; + } + break; + case unset_node: + siz.ht += siz.dp + box_height(p); + siz.dp = box_depth(p); + if (box_width(p) > siz.wd) { + siz.wd = box_width(p); + } + break; + case rule_node: + siz.ht += siz.dp + rule_height(p); + siz.dp = rule_depth(p); + if (rule_width(p) > siz.wd) { + siz.wd = rule_width(p); + } + break; + case glue_node: + { + siz.ht += siz.dp + glue_amount(p); + siz.dp = 0; + if (is_leader(p)) { + halfword gl = glue_leader_ptr(p); + halfword wd = 0; + switch (node_type(gl)) { + case hlist_node: + case vlist_node: + wd = box_width(gl); + break; + case rule_node: + wd = rule_width(gl); + break; + } + if (wd > siz.wd) { + siz.wd = wd; + } + } + switch (g_sign) { + case stretching_glue_sign: + if (glue_stretch_order(p) == g_order) { + gp += glue_stretch(p); + } + break; + case shrinking_glue_sign: + if (glue_shrink_order(p) == g_order) { + gm += glue_shrink(p); + } + break; + } + break; + } + case kern_node: + siz.ht += siz.dp + kern_amount(p); + siz.dp = 0; + break; + case glyph_node: + tex_confusion("glyph in vpack"); + break; + case disc_node: + tex_confusion("discretionary in vpack"); + break; + default: + break; + } + p = node_next(p); + } + switch (g_sign) { + case stretching_glue_sign: + siz.ht += glueround((glueratio)(g_mult) * (glueratio)(gp)); + break; + case shrinking_glue_sign: + siz.ht -= glueround((glueratio)(g_mult) * (glueratio)(gm)); + break; + } + return siz; +} + +/*tex simplified variant with less memory access */ + +halfword tex_natural_width(halfword p, halfword pp, glueratio g_mult, int g_sign, int g_order) +{ + scaled wd = 0; + scaled gp = 0; + scaled gm = 0; + while (p && p != pp) { + /* no real gain over check in switch */ + switch (node_type(p)) { + case glyph_node: + wd += tex_glyph_width(p); /* Plus expansion? */ + break; + case hlist_node: + case vlist_node: + case unset_node: + wd += box_width(p); + break; + case rule_node: + wd += rule_width(p); + break; + case glue_node: + wd += glue_amount(p); + switch (g_sign) { + case stretching_glue_sign: + if (glue_stretch_order(p) == g_order) { + gp += glue_stretch(p); + } + break; + case shrinking_glue_sign: + if (glue_shrink_order(p) == g_order) { + gm += glue_shrink(p); + } + break; + } + break; + case kern_node: + wd += kern_amount(p); // + kern_expansion(p); + break; + case disc_node: + wd += tex_natural_width(disc_no_break(p), null, g_mult, g_sign, g_order); + break; + case math_node: + if (tex_math_glue_is_zero(p) || tex_ignore_math_skip(p)) { + wd += math_surround(p); + } else { + wd += math_amount(p); + switch (g_sign) { + case stretching_glue_sign: + if (math_stretch_order(p) == g_order) { + gp += math_stretch(p); + } + break; + case shrinking_glue_sign: + if (math_shrink_order(p) == g_order) { + gm += math_shrink(p); + } + break; + } + } + break; + default: + break; + } + p = node_next(p); + } + switch (g_sign) { + case stretching_glue_sign: + wd += glueround((glueratio) (g_mult) * (glueratio) (gp)); + break; + case shrinking_glue_sign: + wd -= glueround((glueratio) (g_mult) * (glueratio) (gm)); + break; + } + return wd; +} + +halfword tex_natural_hsize(halfword p, halfword *correction) +{ + scaled wd = 0; + halfword c = null; + while (p) { + switch (node_type(p)) { + case glyph_node: + wd += tex_glyph_width(p); /* Plus expansion? */ + break; + case hlist_node: + case vlist_node: + case unset_node: + wd += box_width(p); + break; + case rule_node: + wd += rule_width(p); + break; + case glue_node: + wd += glue_amount(p); + if (node_subtype(p) == correction_skip_glue) { + c = p; + } + break; + case kern_node: + wd += kern_amount(p); // + kern_expansion(p); + break; + case disc_node: + wd += tex_natural_hsize(disc_no_break(p), NULL); + break; + case math_node: + if (tex_math_glue_is_zero(p) || tex_ignore_math_skip(p)) { + wd += math_surround(p); + } else { + wd += math_amount(p); + } + break; + default: + break; + } + p = node_next(p); + } + if (correction) { + *correction = c; + } + return wd; +} + +halfword tex_natural_vsize(halfword p) +{ + scaledwhd siz = { 0, 0, 0 }; + while (p) { + switch (node_type(p)) { + case hlist_node: + case vlist_node: + { + scaledwhd whd = tex_pack_dimensions(p); + siz.ht += siz.dp + whd.ht; + siz.dp = whd.dp; + } + break; + case unset_node: + siz.ht += siz.dp + box_height(p); + siz.dp = box_depth(p); + break; + case rule_node: + siz.ht += siz.dp + rule_height(p); + siz.dp = rule_depth(p); + break; + case glue_node: + siz.ht += siz.dp + glue_amount(p); + siz.dp = 0; + break; + case kern_node: + siz.ht += siz.dp + kern_amount(p); + siz.dp = 0; + break; + default: + break; + } + p = node_next(p); + } + return siz.ht + siz.dp; +} + +/*tex + + The |vpack| subroutine is actually a special case of a slightly more general routine called + |vpackage|, which has four parameters. The fourth parameter, which is |max_dimen| in the case + of |vpack|, specifies the maximum depth of the page box that is constructed. The depth is first + computed by the normal rules; if it exceeds this limit, the reference point is simply moved + down until the limiting depth is attained. We actually hav efive parameters because we also + deal with teh direction. + +*/ + +halfword tex_vpack(halfword p, scaled h, int m, scaled l, singleword pack_direction, int retain) +{ + /*tex width */ + scaled w = 0; + /*tex depth */ + scaled d = 0; + /*tex natural height */ + scaled x = 0; + /*tex the box node that will be returned */ + halfword r = tex_new_node(vlist_node, unknown_list); + (void) retain; /* todo */ + box_dir(r) = pack_direction; + node_subtype(r) = min_quarterword; + box_shift_amount(r) = 0; + box_list(r) = p; + lmt_packaging_state.last_badness = 0; + lmt_packaging_state.last_overshoot = 0; + for (int i = normal_glue_order; i <= filll_glue_order; i++) { + lmt_packaging_state.total_stretch[i] = 0; + lmt_packaging_state.total_shrink[i] = 0; + } + while (p) { + /*tex + + Examine node |p| in the vlist, taking account of its effect on the dimensions of the + new box; then advance |p| to the next node. + + */ + halfword n = node_next(p); + switch (node_type(p)) { + case hlist_node: + case vlist_node: + { + /*tex + + Incorporate box dimensions into the dimensions of the vbox that will + contain it. + + */ + scaled s = box_shift_amount(p); + scaledwhd whd = tex_pack_dimensions(p); + if (whd.wd + s > w) { + w = whd.wd + s; + } + x += d + whd.ht; + d = whd.dp; + tex_aux_promote_pre_migrated(r, p); + tex_aux_promote_post_migrated(r, p); + } + break; + case unset_node: + x += d + box_height(p); + d = box_depth(p); + if (box_width(p) > w) { + w = box_width(p); + } + // tex_aux_promote_pre_migrated(r, p); + // tex_aux_promote_post_migrated(r, p); + break; + case rule_node: + x += d + rule_height(p); + d = rule_depth(p); + if (rule_width(p) > w) { + w = rule_width(p); + } + break; + case glue_node: + /*tex Incorporate glue into the vertical totals. */ + { + halfword o; + x += d + glue_amount(p); + d = 0; + o = glue_stretch_order(p); + lmt_packaging_state.total_stretch[o] += glue_stretch(p); + o = glue_shrink_order(p); + lmt_packaging_state.total_shrink[o] += glue_shrink(p); + if (is_leader(p)) { + halfword gl = glue_leader_ptr(p); + scaled wd = 0; + switch (node_type(gl)) { + case hlist_node: + case vlist_node: + wd = box_width(gl); + break; + case rule_node: + wd = rule_width(gl); + break; + } + if (wd > w) { + w = wd; + } + } + break; + } + case kern_node: + x += d + kern_amount(p); + d = 0; + break; + case insert_node: + if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_insert)) { + halfword l = insert_list(p); + tex_aux_post_migrate(r, p); + while (l) { + l = node_type(l) == insert_node ? tex_aux_post_migrate(r, l) : node_next(l); + } + } + break; + case mark_node: + if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_mark)) { + tex_aux_post_migrate(r, p); + } + break; + case glyph_node: + tex_confusion("glyph in vpack"); + break; + case disc_node: + tex_confusion("discretionary in vpack"); + break; + default: + break; + } + p = n; + } + box_width(r) = w; + if (d > l) { + x += d - l; + box_depth(r) = l; + } else { + box_depth(r) = d; + } + /*tex + + Determine the value of |height(r)| and the appropriate glue setting; then |return| or |goto + common_ending|. When we get to the present part of the program, |x| is the natural height of + the box being packaged. + */ + if (m == packing_additional) { + h += x; + } + box_height(r) = h; + x = h - x; + /*tex Now |x| is the excess to be made up. */ + if (x == 0) { + box_glue_sign(r) = normal_glue_sign; + box_glue_order(r) = normal_glue_order; + box_glue_set(r) = 0.0; + goto EXIT; + } else if (x > 0) { + /*tex Determine vertical glue stretch setting, then |return| or |goto common_ending|. */ + halfword o = tex_aux_used_order(lmt_packaging_state.total_stretch); + box_glue_order(r) = o; + box_glue_sign(r) = stretching_glue_sign; + if (lmt_packaging_state.total_stretch[o] != 0) { + box_glue_set(r) = (glueratio) ((double) x / lmt_packaging_state.total_stretch[o]); + } else { + /*tex There's nothing to stretch. */ + box_glue_sign(r) = normal_glue_sign; + box_glue_set(r) = 0.0; + } + if (o == normal_glue_order && box_list(r)) { + /*tex Report an underfull vbox and |goto common_ending|, if this box is sufficiently bad. */ + lmt_packaging_state.last_badness = tex_badness(x, lmt_packaging_state.total_stretch[normal_glue_order]); + if (lmt_packaging_state.last_badness > vbadness_par) { + int callback_id = lmt_callback_defined(vpack_quality_callback); + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "SdNddS->", + lmt_packaging_state.last_badness > 100 ? "underfull" : "loose", + lmt_packaging_state.last_badness, + r, + abs(lmt_packaging_state.pack_begin_line), + lmt_input_state.input_line, + tex_current_input_file_name() + ); + goto EXIT; + } else { + tex_print_nlp(); + if (lmt_packaging_state.last_badness > 100) { + tex_print_format("%l[package: underfull \\vbox (badness %i)", lmt_packaging_state.last_badness); + } else { + tex_print_format("%l[package: loose \\vbox (badness %i)", lmt_packaging_state.last_badness); + } + goto COMMON_ENDING; + } + } + } + goto EXIT; + } else { + /*tex Determine vertical glue shrink setting, then |return| or |goto common_ending|. */ + halfword o = tex_aux_used_order(lmt_packaging_state.total_shrink); + box_glue_order(r) = o; + box_glue_sign(r) = shrinking_glue_sign; + if (lmt_packaging_state.total_shrink[o] != 0) { + box_glue_set(r) = (glueratio) ((double) (-x) / lmt_packaging_state.total_shrink[o]); + } else { + /*tex There's nothing to shrink. */ + box_glue_sign(r) = normal_glue_sign; + box_glue_set(r) = 0.0; + } + if ((lmt_packaging_state.total_shrink[o] < -x) && (o == normal_glue_order) && (box_list(r))) { + int overshoot = -x - lmt_packaging_state.total_shrink[normal_glue_order]; + lmt_packaging_state.last_badness = 1000000; + lmt_packaging_state.last_overshoot = overshoot; + /*tex Use the maximum shrinkage */ + box_glue_set(r) = 1.0; + /*tex Report an overfull vbox and |goto common_ending|, if this box is sufficiently bad. */ + if ((overshoot > vfuzz_par) || (vbadness_par < 100)) { + int callback_id = lmt_callback_defined(vpack_quality_callback); + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "SdNddS->", + "overfull", + overshoot, + r, + abs(lmt_packaging_state.pack_begin_line), + lmt_input_state.input_line, + tex_current_input_file_name() + ); + goto EXIT; + } else { + tex_print_nlp(); + tex_print_format("%l[package: overfull \\vbox (%D too high)", - x - lmt_packaging_state.total_shrink[normal_glue_order], pt_unit); + goto COMMON_ENDING; + } + } + } else if (o == normal_glue_order) { + if (box_list(r)) { + /*tex Report a tight vbox and |goto common_ending|, if this box is sufficiently bad. */ + lmt_packaging_state.last_badness = tex_badness(-x, lmt_packaging_state.total_shrink[normal_glue_order]); + if (lmt_packaging_state.last_badness > vbadness_par) { + int callback_id = lmt_callback_defined(vpack_quality_callback); + if (callback_id > 0) { + lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "SdNddS->", + "tight", + lmt_packaging_state.last_badness, + r, + abs(lmt_packaging_state.pack_begin_line), + lmt_input_state.input_line, + tex_current_input_file_name() + ); + goto EXIT; + } else { + tex_print_nlp(); + tex_print_format("%l[package: tight \\vbox (badness %i)", lmt_packaging_state.last_badness); + goto COMMON_ENDING; + } + } + } + } + goto EXIT; + } + COMMON_ENDING: + /*tex Finish issuing a diagnostic message or an overfull or underfull vbox. */ + if (lmt_page_builder_state.output_active) { + tex_print_format(" has occurred while \\output is active]"); + } else if (lmt_packaging_state.pack_begin_line != 0) { + tex_print_format(" in alignment at lines %i--%i]", abs(lmt_packaging_state.pack_begin_line), lmt_input_state.input_line); + } else { + tex_print_format(" detected at line %i]", lmt_input_state.input_line); + } + tex_print_ln(); + tex_begin_diagnostic(); + tex_show_box(r); + tex_end_diagnostic(); + EXIT: + /*tex Further (experimental) actions can go here. */ + return r; +} + +halfword tex_filtered_vpack(halfword p, scaled h, int m, scaled l, int grp, halfword pack_direction, int just_pack, halfword attr, int state, int retain) +{ + halfword q = p; + if (! just_pack) { + q = lmt_vpack_filter_callback(q, h, m, l, grp, pack_direction, attr); + } + q = tex_vpack(q, h, m, l, checked_direction_value(pack_direction), retain); + if (q && normalize_par_mode_permitted(normalize_par_mode_par, flatten_v_leaders_mode) && ! is_box_package_state(state, package_u_leader_delayed)) { + tex_flatten_leaders(q, NULL); + } + if (! just_pack) { + q = lmt_packed_vbox_filter_callback(q, grp); + } + return q; +} + +/*tex + Here we always start out in l2r mode and without shift. After all we need to be compatible with + how it was before. +*/ + +void tex_run_vcenter(void) +{ + tex_aux_scan_full_spec(vcenter_group, direction_l2r, 0, 0); + tex_normal_paragraph(vcenter_par_context); + tex_push_nest(); + cur_list.mode = -vmode; + cur_list.prev_depth = ignore_depth; + if (every_vbox_par) { + tex_begin_token_list(every_vbox_par, every_vbox_text); + } +} + +void tex_finish_vcenter_group(void) +{ + if (! tex_wrapped_up_paragraph(vcenter_par_context)) { + halfword p; + tex_end_paragraph(vcenter_group, vcenter_par_context); + tex_package(vpack_code); + p = tex_pop_tail(); + if (p) { + switch (node_type(p)) { + case vlist_node: + { + scaled delta = box_total(p); + box_height(p) = tex_half_scaled(delta); + box_depth(p) = delta - box_height(p); + break; + } + case simple_noad: + node_subtype(p) = vcenter_noad_subtype; + break; + /* + case style_node: + break; + */ + } + tex_tail_append(p); + } + } +} + +inline static scaled tex_aux_checked_dimen1(halfword v) +{ + if (v > max_dimen) { + return max_dimen; + } else if (v < -max_dimen) { + return -max_dimen; + } else { + return v; + } +} + +inline static scaled tex_aux_checked_dimen2(halfword v) +{ + if (v > max_dimen) { + return max_dimen; + } else if (v < 0) { + return 0; + } else { + return v; + } +} + +void tex_package(singleword nature) +{ + halfword context, spec, dirptr, attrlist, justpack, orientation, anchor, geometry, source, target, axis, mainclass, state, retain; + scaled shift; + int grp = cur_group; + scaled d = box_max_depth_par; + halfword boxnode = null; /*tex Aka |cur_box|. */ + tex_unsave(); + lmt_save_state.save_stack_data.ptr -= saved_full_spec_n_of_items; + context = saved_value(saved_full_spec_item_context); + spec = saved_value(saved_full_spec_item_packaging); + dirptr = saved_value(saved_full_spec_item_direction); + attrlist = saved_value(saved_full_spec_item_attr_list); + justpack = saved_value(saved_full_spec_item_only_pack); + orientation = saved_value(saved_full_spec_item_orientation); + anchor = saved_value(saved_full_spec_item_anchor); + geometry = saved_value(saved_full_spec_item_geometry); + shift = saved_value(saved_full_spec_item_shift); + source = saved_value(saved_full_spec_item_source); + target = saved_value(saved_full_spec_item_target); + axis = saved_value(saved_full_spec_item_axis); + mainclass = saved_value(saved_full_spec_item_class); + state = saved_value(saved_full_spec_item_state); + retain = saved_value(saved_full_spec_item_retain); + if (cur_list.mode == -hmode) { + boxnode = tex_filtered_hpack(cur_list.head, cur_list.tail, spec, saved_level(saved_full_spec_item_packaging), + grp, saved_level(saved_full_spec_item_direction), justpack, attrlist, state, retain); + node_subtype(boxnode) = hbox_list; + if (saved_value(saved_full_spec_item_reverse)) { + box_list(boxnode) = tex_reversed_node_list(box_list(boxnode)); + } + box_package_state(boxnode) = hbox_package_state; + } else { + boxnode = tex_filtered_vpack(node_next(cur_list.head), spec, saved_level(saved_full_spec_item_packaging), + d, grp, saved_level(saved_full_spec_item_direction), justpack, attrlist, state, retain); + if (nature == vtop_code) { + /*tex + + Read just the height and depth of |boxnode| (|boxnode|), for |\vtop|. The height of + a |\vtop| box is inherited from the first item on its list, if that item is an + |hlist_node|, |vlist_node|, or |rule_node|; otherwise the |\vtop| height is zero. + + */ + scaled h = 0; + halfword p = box_list(boxnode); + if (p) { + switch (node_type(p)) { + case hlist_node: + case vlist_node: + h = box_height(p); + break; + case rule_node: + h = rule_height(p); + break; + } + } + box_depth(boxnode) = box_total(boxnode) - h; + box_height(boxnode) = h; + box_package_state(boxnode) = vtop_package_state; + } else { + box_package_state(boxnode) = vbox_package_state; + } + } + if (dirptr) { + /*tex Adjust back |text_dir_ptr| for |scan_spec| */ + tex_flush_node_list(lmt_dir_state.text_dir_ptr); + lmt_dir_state.text_dir_ptr = dirptr; + } + /* + An attribute is not assigned beforehand, just passed. But, when some is assigned we need to + retain it. So, how do we deal with attributes that are added? Maybe we have to merge + changes? Or maybe an extra option in hpack ... some day. + */ + tex_attach_attribute_list_attribute(boxnode, attrlist); + delete_attribute_reference(attrlist); + /* */ + if (tex_has_geometry(geometry, offset_geometry) || tex_has_geometry(geometry, orientation_geometry)) { + scaled xoffset = saved_value(saved_full_spec_item_xoffset); + scaled yoffset = saved_value(saved_full_spec_item_yoffset); + scaled xmove = saved_value(saved_full_spec_item_xmove); + scaled ymove = saved_value(saved_full_spec_item_ymove); + scaled wd = box_width(boxnode); + scaled ht = box_height(boxnode); + scaled dp = box_depth(boxnode); + if (xmove) { + xoffset = tex_aux_checked_dimen1(xoffset + xmove); + wd = tex_aux_checked_dimen2(wd + xmove); + } + if (ymove) { + yoffset = tex_aux_checked_dimen1(yoffset + ymove); + ht = tex_aux_checked_dimen2(ht + ymove); + dp = tex_aux_checked_dimen2(dp - ymove); + } + box_w_offset(boxnode) = wd; + box_h_offset(boxnode) = ht; + box_d_offset(boxnode) = dp; + switch (orientationonly(orientation)) { + case 0 : /* 0 */ + break; + case 2 : /* 180 */ + box_height(boxnode) = dp; + box_depth(boxnode) = ht; + geometry |= orientation_geometry; + break; + case 1 : /* 90 */ + case 3 : /* 270 */ + box_width(boxnode) = ht + dp; + box_height(boxnode) = wd; + box_depth(boxnode) = 0; + geometry |= orientation_geometry; + break; + case 4 : /* 0 */ + box_height(boxnode) = ht + dp; + box_depth(boxnode) = 0; + geometry |= orientation_geometry; + break; + case 5 : /* 180 */ + box_height(boxnode) = 0; + box_depth(boxnode) = ht + dp; + geometry |= orientation_geometry; + break; + default : + break; + } + if (xoffset || yoffset) { + box_x_offset(boxnode) = xoffset; + box_y_offset(boxnode) = yoffset; + geometry |= offset_geometry; + } + } + if (source || target) { + box_source_anchor(boxnode) = source; + box_target_anchor(boxnode) = target; + geometry |= anchor_geometry; + } + box_anchor(boxnode) = anchor; + box_orientation(boxnode) = orientation; + box_geometry(boxnode) = (singleword) geometry; + if (saved_value(saved_full_spec_item_container)) { + node_subtype(boxnode) = container_list; + } + box_axis(boxnode) = (singleword) axis; + box_package_state(boxnode) |= (singleword) state; + tex_pop_nest(); + tex_box_end(context, boxnode, shift, mainclass); +} + +void tex_run_unpackage(void) +{ + int code = cur_chr; /*tex should we copy? */ + halfword head = cur_list.tail; + halfword tail = cur_list.tail; + switch (code) { + case box_code: + case copy_code: + case unpack_code: + { + halfword n = tex_scan_box_register_number(); + halfword b = box_register(n); + if (! b) { + return; + } else if ((abs(cur_list.mode) == mmode) + || ((abs(cur_list.mode) == vmode) && (node_type(b) != vlist_node)) + || ((abs(cur_list.mode) == hmode) && (node_type(b) != hlist_node))) { + tex_handle_error( + normal_error_type, + "Incompatible list can't be unboxed", + "Sorry, Pandora. (You sneaky devil.) I refuse to unbox an \\hbox in vertical mode\n" + "or vice versa. And I can't open any boxes in math mode." + ); + return; + } else { + + /* todo: check head, not needed, always a temp */ + + /*tex Via variables for varmem assignment. */ + halfword list = box_list(b); + halfword pre_migrated = code == unpack_code ? null : box_pre_migrated(b); + halfword post_migrated = code == unpack_code ? null : box_post_migrated(b); + // halfword pre_adjusted = code == unpack_code || (abs(cur_list.mode) == hmode) ? null : box_pre_adjusted(b); + // halfword post_adjusted = code == unpack_code || (abs(cur_list.mode) == hmode) ? null : box_post_adjusted(b); + // halfword pre_adjusted = code == unpack_code ? null : box_pre_adjusted(b); + // halfword post_adjusted = code == unpack_code ? null : box_post_adjusted(b); + halfword pre_adjusted = box_pre_adjusted(b); + halfword post_adjusted = box_post_adjusted(b); + if (pre_adjusted) { + if (code == copy_code) { + pre_adjusted = tex_copy_node_list(pre_adjusted, null); + } else { + box_pre_adjusted(b) = null; + } + while (pre_adjusted) { + halfword p = pre_adjusted; + halfword h = adjust_list(pre_adjusted); + if (h) { + if (abs(cur_list.mode) == hmode) { + halfword n = tex_new_node(adjust_node, pre_adjust_code); + adjust_list(n) = h; + h = n; + } + if (! head) { + head = h; + } + tex_try_couple_nodes(tail, h); + tail = tex_tail_of_node_list(h); + adjust_list(pre_adjusted) = null; + } + pre_adjusted = node_next(pre_adjusted); + tex_flush_node(p); + } + } + if (pre_migrated) { + if (code == copy_code) { + pre_migrated = tex_copy_node_list(pre_migrated, null); + } else { + box_pre_migrated(b) = null; + } + tex_try_couple_nodes(tail, pre_migrated); + tail = tex_tail_of_node_list(pre_migrated); + if (! head) { + head = pre_migrated; + } + } + if (list) { + if (code == copy_code) { + list = tex_copy_node_list(list, null); + } else { + box_list(b) = null; + } + tex_try_couple_nodes(tail, list); + tail = tex_tail_of_node_list(list); + if (! head) { + head = list; + } + } + if (post_migrated) { + if (code == copy_code) { + post_migrated = tex_copy_node_list(post_migrated, null); + } else { + box_post_migrated(b) = null; + } + tex_try_couple_nodes(tail, post_migrated); + tail = tex_tail_of_node_list(post_migrated); + if (! head) { + head = post_migrated; + } + } + if (post_adjusted) { + if (code == copy_code) { + post_adjusted = tex_copy_node_list(post_adjusted, null); + } else { + box_post_adjusted(b) = null; + } + while (post_adjusted) { + halfword p = post_adjusted; + halfword h = adjust_list(post_adjusted); + if (h) { + if (abs(cur_list.mode) == hmode) { + halfword n = tex_new_node(adjust_node, post_adjust_code); + adjust_list(n) = h; + h = n; + } + if (! head) { + head = h; + } + tex_try_couple_nodes(tail, h); + tail = tex_tail_of_node_list(h); + adjust_list(post_adjusted) = null; + } + post_adjusted = node_next(post_adjusted); + tex_flush_node(p); + } + } + if (code != copy_code) { + box_register(n) = null; + tex_flush_node(b); + } + if (! head) { + tail = null; + } else if (node_type(b) == hlist_node && normalize_line_mode_permitted(normalize_line_mode_par, remove_margin_kerns_mode)) { + /* only here head is used ... */ + tail = head; + while (1) { + halfword next = node_next(tail); + if (next) { + if (tex_is_margin_kern(next)) { + tex_try_couple_nodes(tail, node_next(next)); + tex_flush_node(next); + } else { + tail = next; + } + } else { + break; + } + } + } else { + tail = tex_tail_of_node_list(tail); + } + cur_list.tail = tail; + break; + } + } + case last_box_code: + { + tex_try_couple_nodes(tail, lmt_packaging_state.page_discards_head); + lmt_packaging_state.page_discards_head = null; + cur_list.tail = tex_tail_of_node_list(tail); + break; + } + case vsplit_code: + { + tex_try_couple_nodes(tail, lmt_packaging_state.split_discards_head); + lmt_packaging_state.split_discards_head = null; + cur_list.tail = tex_tail_of_node_list(tail); + break; + } + case insert_box_code: + case insert_copy_code: + { + /* + This one is sensitive for messing with callbacks. Somehow attributes and the if + stack ifs can get corrupted but I have no clue yet how that happens but temp + nodes have the same size so ... + */ + halfword index = tex_scan_int(0, NULL); + if (tex_valid_insert_id(index)) { + halfword boxnode = tex_get_insert_content(index); /* also checks for id */ + if (boxnode) { + if (abs(cur_list.mode) != vmode) { + tex_handle_error( + normal_error_type, + "Unpacking an inserts can only happen in vertical mode.", + NULL + ); + } else if (node_type(boxnode) == vlist_node) { + if (code == insert_copy_code) { + boxnode = tex_copy_node(boxnode); + } else { + tex_set_insert_content(index, null); + } + if (boxnode) { + halfword list = box_list(boxnode); + if (list) { + tex_try_couple_nodes(tail, list); + cur_list.tail = tex_tail_of_node_list(list); + box_list(boxnode) = null; + } + tex_flush_node(boxnode); + } + } else { + /* error, maybe migration list */ + } + } + } + break; + } + case local_left_box_box_code: + { + tex_try_couple_nodes(tail, tex_get_local_boxes(local_left_box_code)); + cur_list.tail = tex_tail_of_node_list(tail); + break; + } + case local_right_box_box_code: + { + tex_try_couple_nodes(tail, tex_get_local_boxes(local_right_box_code)); + cur_list.tail = tex_tail_of_node_list(tail); + break; + } + case local_middle_box_box_code: + { + tex_try_couple_nodes(tail, tex_get_local_boxes(local_middle_box_code)); + cur_list.tail = tex_tail_of_node_list(tail); + break; + } + default: + { + tex_confusion("weird unpackage"); + break; + } + } + /* margin stuff was here */ +} + +/*tex + + When a box is being appended to the current vertical list, the baselineskip calculation is + handled by the |append_to_vlist| routine. + + Todo: maybe store some more in lines, so that we can get more consistent spacing (for instance + the |baseline_skip_par| and |prev_depth_par| are now pars and not values frozen with the line. + But as usual we can expect side effects so \unknown + +*/ + +inline static halfword tex_aux_depth_correction(halfword b, const line_break_properties *properties) +{ + /*tex The deficiency of space between baselines: */ + halfword p; + if (properties) { + scaled d = glue_amount(properties->baseline_skip) - cur_list.prev_depth - box_height(b); + if (d < properties->line_skip_limit) { + p = tex_new_glue_node(properties->line_skip, line_skip_glue); + } else { + p = tex_new_glue_node(properties->baseline_skip, baseline_skip_glue); + glue_amount(p) = d; + } + } else { + scaled d = glue_amount(baseline_skip_par) - cur_list.prev_depth - box_height(b); + if (d < line_skip_limit_par) { + p = tex_new_param_glue_node(line_skip_code, line_skip_glue); + } else { + p = tex_new_param_glue_node(baseline_skip_code, baseline_skip_glue); + glue_amount(p) = d; + } + } + return p; +} + +void tex_append_to_vlist(halfword b, int location, const line_break_properties *properties) +{ + if (location >= 0) { + halfword result = null; + halfword next_depth = ignore_depth; + int prev_set = 0; + int check_depth = 0; + if (lmt_append_to_vlist_callback(b, location, cur_list.prev_depth, &result, &next_depth, &prev_set, &check_depth)) { + if (prev_set) { + cur_list.prev_depth = next_depth; + } + if (check_depth && result && (cur_list.prev_depth > ignore_depth)) { + /*tex + We only deal with a few types and one can always at the \LUA\ end check for some of + these and decide not to apply the correction. + */ + switch (node_type(result)) { + case hlist_node: + case vlist_node: + case rule_node: + { + halfword p = tex_aux_depth_correction(result, properties); + tex_couple_nodes(cur_list.tail, p); + cur_list.tail = p; + break; + } + } + } + while (result) { + tex_couple_nodes(cur_list.tail, result); + cur_list.tail = result; + result = node_next(result); + } + return; + } + } + if (cur_list.prev_depth > ignore_depth) { + halfword p = tex_aux_depth_correction(b, properties); + tex_couple_nodes(cur_list.tail, p); + cur_list.tail = p; + } + tex_couple_nodes(cur_list.tail, b); + cur_list.tail = b; + cur_list.prev_depth = box_depth(b); +} + +/*tex + + When |saving_vdiscards| is positive then the glue, kern, and penalty nodes removed by the page + builder or by |\vsplit| from the top of a vertical list are saved in special lists instead of + being discarded. + + The |vsplit| procedure, which implements \TEX's |\vsplit| operation, is considerably simpler + than |line_break| because it doesn't have to worry about hyphenation, and because its mission + is to discover a single break instead of an optimum sequence of breakpoints. But before we get + into the details of |vsplit|, we need to consider a few more basic things. + + A subroutine called |prune_page_top| takes a pointer to a vlist and returns a pointer to a + modified vlist in which all glue, kern, and penalty nodes have been deleted before the first + box or rule node. However, the first box or rule is actually preceded by a newly created glue + node designed so that the topmost baseline will be at distance |split_top_skip| from the top, + whenever this is possible without backspacing. + + When the second argument |s| is |false| the deleted nodes are destroyed, otherwise they are + collected in a list starting at |split_discards|. + + Are the prev pointers okay here? + +*/ + +halfword tex_prune_page_top(halfword p, int s) +{ + /*tex Lags one step behind |p|. */ + halfword prev_p = temp_head; + halfword r = null; + node_next(temp_head) = p; + while (p) { + switch (node_type(p)) { + case hlist_node: + case vlist_node: + case rule_node: + { + /*tex Insert glue for |split_top_skip| and set |p| to |null|. */ + halfword h = node_type(p) == rule_node ? rule_height(p) : box_height(p); + halfword q = tex_new_param_glue_node(split_top_skip_code, split_top_skip_glue); + node_next(prev_p) = q; + node_next(q) = p; + glue_amount(q) = glue_amount(q) > h ? glue_amount(q) - h : 0; + p = null; + } + break; + case boundary_node: + /* shouldn't we discard */ + case whatsit_node: + case mark_node: + case insert_node: + prev_p = p; + p = node_next(prev_p); + break; + case glue_node: + case kern_node: + case penalty_node: + { + halfword q = p; + p = node_next(q); + node_next(q) = null; + node_next(prev_p) = p; + if (s) { + if (lmt_packaging_state.split_discards_head) { + node_next(r) = q; + } else { + lmt_packaging_state.split_discards_head = q; + } + r = q; + } else { + tex_flush_node_list(q); + } + } + break; + default: + tex_confusion("pruning page top"); + break; + } + } + return node_next(temp_head); +} + +/*tex + + The next subroutine finds the best place to break a given vertical list so as to obtain a box + of height~|h|, with maximum depth~|d|. A pointer to the beginning of the vertical list is given, + and a pointer to the optimum breakpoint is returned. The list is effectively followed by a + forced break, i.e., a penalty node with the |eject_penalty|; if the best break occurs at this + artificial node, the value |null| is returned. + + An array of six |scaled| distances is used to keep track of the height from the beginning of + the list to the current place, just as in |line_break|. In fact, we use one of the same arrays, + only changing its name to reflect its new significance. + + The distance from first active node to |cur_p| is stored in |active_height|. + + A global variable |best_height_plus_depth| will be set to the natural size of the box (without + stretching or shrinking) that corresponds to the optimum breakpoint found by |vert_break|. This + value is used by the insertion splitting algorithm of the page builder. + + \starttyping + scaled best_height_plus_depth; + \stoptyping + + The natural height: + +*/ + +/* cur_height lmt_packaging_state.active_height[total_glue_amount] */ +# define cur_height active_height[total_glue_amount] + +halfword tex_vert_break(halfword p, scaled h, scaled d) +{ + /*tex + If |p| is a glue node, |type(prev_p)| determines whether |p| is a legal breakpoint, an + initial glue node is not a legal breakpoint. + */ + halfword prev_p = p; + /*tex penalty value */ + halfword pi = 0; + /*tex the smallest badness plus penalties found so far */ + halfword least_cost = awful_bad; + /*tex the most recent break that leads to |least_cost| */ + halfword best_place = null; + /*tex depth of previous box in the list */ + scaled prev_dp = 0; + scaled active_height[10] = { 0 }; + while (1) { + /*tex + + If node |p| is a legal breakpoint, check if this break is the best known, and |goto + done| if |p| is null or if the page-so-far is already too full to accept more stuff. + + A subtle point to be noted here is that the maximum depth~|d| might be negative, so + |cur_height| and |prev_dp| might need to be corrected even after a glue or kern node. + */ + if (p) { + /*tex + + Use node |p| to update the current height and depth measurements; if this node is + not a legal breakpoint, |goto not_found| or |update_heights|, otherwise set |pi| + to the associated penalty at the break. + + */ + switch (node_type(p)) { + case hlist_node: + case vlist_node: + /*tex + If we do this we also need to subtract the dimensions and bubble it up. But + at least we could inline the inserts. + */ + /* + if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_post)) { + // same code as in page builder + } + if (auto_migrating_mode_permitted(auto_migration_mode_par, auto_migrate_pre)) { + // same code as in page builder + continue; + } + */ + cur_height += prev_dp + box_height(p); + prev_dp = box_depth(p); + goto NOT_FOUND; + case rule_node: + cur_height += prev_dp + rule_height(p); + prev_dp = rule_depth(p); + goto NOT_FOUND; + case boundary_node: + case whatsit_node: + goto NOT_FOUND; + case glue_node: + if (precedes_break(prev_p)) { + pi = 0; + break; + } else { + goto UPDATE_HEIGHTS; + } + case kern_node: + if (node_next(p) && node_type(node_next(p)) == glue_node) { + pi = 0; + break; + } else { + goto UPDATE_HEIGHTS; + } + case penalty_node: + pi = penalty_amount(p); + break; + case mark_node: + case insert_node: + goto NOT_FOUND; + default: + tex_confusion("vertical break"); + break; + } + } else { + pi = eject_penalty; + } + /*tex + + Check if node |p| is a new champion breakpoint; then |goto done| if |p| is a forced + break or if the page-so-far is already too full. + + */ + if (pi < infinite_penalty) { + /*tex Compute the badness, |b|, using |awful_bad| if the box is too full. */ + int b; + if (cur_height < h) { + if ((active_height[total_fi_amount] != 0) || (active_height[total_fil_amount] != 0) || + (active_height[total_fill_amount] != 0) || (active_height[total_filll_amount] != 0)) { + b = 0; + } else { + b = tex_badness(h - cur_height, active_height[total_stretch_amount]); + } + } else if (cur_height - h > active_height[total_shrink_amount]) { + b = awful_bad; + } else { + b = tex_badness(cur_height - h, active_height[total_shrink_amount]); + } + if (b < awful_bad) { + if (pi <= eject_penalty) { + b = pi; + } else if (b < infinite_bad) { + b = b + pi; + } else { + b = deplorable; + } + } + if (b <= least_cost) { + best_place = p; + least_cost = b; + lmt_packaging_state.best_height_plus_depth = cur_height + prev_dp; + } + if ((b == awful_bad) || (pi <= eject_penalty)) { + return best_place; + } + } + UPDATE_HEIGHTS: + /*tex + + Update the current height and depth measurements with respect to a glue or kern node~|p|. + Vertical lists that are subject to the |vert_break| procedure should not contain infinite + shrinkability, since that would permit any amount of information to fit on one page. + + We only end up here for glue and kern nodes. + + */ + switch(node_type(p)) { + case kern_node: + cur_height += prev_dp + kern_amount(p); + prev_dp = 0; + goto KEEP_GOING; /* We assume a positive depth. */ + case glue_node: + active_height[total_stretch_amount + glue_stretch_order(p)] += glue_stretch(p); + active_height[total_shrink_amount] += glue_shrink(p); + if ((glue_shrink_order(p) != normal_glue_order) && (glue_shrink(p) != 0)) { + tex_handle_error( + normal_error_type, + "Infinite glue shrinkage found in box being split", + "The box you are \\vsplitting contains some infinitely shrinkable glue, e.g.,\n" + "'\\vss' or '\\vskip 0pt minus 1fil'. Such glue doesn't belong there; but you can\n" + "safely proceed, since the offensive shrinkability has been made finite." + ); + glue_shrink_order(p) = normal_glue_order; + } + cur_height += prev_dp + glue_amount(p); + prev_dp = 0; + goto KEEP_GOING; /* We assume a positive depth. */ + } + NOT_FOUND: + if (prev_dp > d) { + cur_height += prev_dp - d; + prev_dp = d; + } + KEEP_GOING: + prev_p = p; + p = node_next(prev_p); + } + return best_place; +} + +/*tex + + Now we are ready to consider |vsplit| itself. Most of its work is accomplished by the two + subroutines that we have just considered. + + Given the number of a vlist box |n|, and given a desired page height |h|, the |vsplit| + function finds the best initial segment of the vlist and returns a box for a page of height~|h|. + The remainder of the vlist, if any, replaces the original box, after removing glue and penalties + and adjusting for |split_top_skip|. Mark nodes in the split-off box are used to set the values + of |split_first_mark| and |split_bot_mark|; we use the fact that |split_first_mark(x) = null| if + and only if |split_bot_mark(x) = null|. + + The original box becomes \quote {void} if and only if it has been entirely extracted. The + extracted box is \quote {void} if and only if the original box was void (or if it was, + erroneously, an hlist box). + + Extract a page of height |h| from box |n|: +*/ + +halfword tex_vsplit(halfword n, scaled h, int m) +{ + /*tex the box to be split */ + halfword v = box_register(n); + tex_flush_node_list(lmt_packaging_state.split_discards_head); + lmt_packaging_state.split_discards_head = null; + for (halfword i = 0; i <= lmt_mark_state.mark_data.ptr; i++) { + tex_delete_mark(i, split_first_mark_code); + tex_delete_mark(i, split_bot_mark_code); + } + /*tex Dispense with trivial cases of void or bad boxes. */ + if (! v) { + return null; + } else if (node_type(v) != vlist_node) { + tex_handle_error( + normal_error_type, + "\\vsplit needs a \\vbox", + "The box you are trying to split is an \\hbox. I can't split such a box, so I''ll\n" + "leave it alone." + ); + return null; + } else { + /*tex points to where the break occurs */ + halfword q = tex_vert_break(box_list(v), h, split_max_depth_par); + /*tex + + Look at all the marks in nodes before the break, and set the final link to |null| at + the break. It's possible that the box begins with a penalty node that is the quote + {best} break, so we must be careful to handle this special case correctly. + + */ + halfword p = box_list(v); + /*tex The direction of the box to be split, obsolete! */ + int vdir = box_dir(v); + if (p == q) { + box_list(v) = null; + } else { + while (1) { + if (node_type(p) == mark_node) { + tex_update_split_mark(p); + } + if (node_next(p) == q) { + node_next(p) = null; + break; + } else { + p = node_next(p); + } + } + } + q = tex_prune_page_top(q, saving_vdiscards_par > 0); + p = box_list(v); + box_list(v) = null; + tex_flush_node(v); + if (q) { + box_register(n) = tex_filtered_vpack(q, 0, packing_additional, max_depth_par, split_keep_group, vdir, 0, 0, 0, holding_none_option); + } else { + /*tex The |eq_level| of the box stays the same. */ + box_register(n) = null; + } + return tex_filtered_vpack(p, m == packing_additional ? 0 : h, m, max_depth_par, split_off_group, vdir, 0, 0, 0, holding_none_option); + } +} + +/*tex + + Now that we can see what eventually happens to boxes, we can consider the first steps in their + creation. The |begin_box| routine is called when |box_context| is a context specification, + |cur_chr| specifies the type of box desired, and |cur_cmd=make_box|. + +*/ + +void tex_begin_box(int boxcontext, scaled shift) +{ + halfword code = cur_chr; + halfword boxnode = null; /*tex Aka |cur_box|. */ + switch (code) { + case box_code: + { + halfword n = tex_scan_box_register_number(); + boxnode = box_register(n); + /*tex The box becomes void, at the same level. */ + box_register(n) = null; + break; + } + case copy_code: + { + halfword n = tex_scan_box_register_number(); + /* boxnode = copy_node_list(box_register(n), null); */ + boxnode = tex_copy_node(box_register(n)); + break; + } + case last_box_code: + /*tex + + If the current list ends with a box node, delete it from the list and make |boxnode| + point to it; otherwise set |boxnode := null|. + + */ + boxnode = null; + if (abs(cur_list.mode) == mmode) { + tex_you_cant_error( + "Sorry; this \\lastbox will be void." + ); + } else if (cur_list.mode == vmode && cur_list.head == cur_list.tail) { + tex_you_cant_error( + "Sorry...I usually can't take things from the current page.\n" + "This \\lastbox will therefore be void." + ); + } else if (cur_list.head != cur_list.tail) { + switch (node_type(cur_list.tail)) { + case hlist_node: + case vlist_node: + { + /*tex Remove the last box */ + halfword q = node_prev(cur_list.tail); + if (! q || node_next(q) != cur_list.tail) { + q = cur_list.head; + while (node_next(q) != cur_list.tail) + q = node_next(q); + } + tex_uncouple_node(cur_list.tail); + boxnode = cur_list.tail; + box_shift_amount(boxnode) = 0; + cur_list.tail = q; + node_next(cur_list.tail) = null; + } + break; + } + } + break; + case vsplit_code: + { + /*tex + + Split off part of a vertical box, make |boxnode| point to it. Here we deal with + things like |\vsplit 13 to 100pt|. + + Maybe todo: just split off one line. + + */ + halfword mode = packing_exactly ; + halfword index = tex_scan_box_register_number(); + halfword size = 0; + switch (tex_scan_character("utUT", 0, 1, 0)) { + case 'u': case 'U': + if (tex_scan_mandate_keyword("upto", 1)) { + mode = packing_additional; + size = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 't': case 'T': + if (tex_scan_mandate_keyword("to", 1)) { + mode = packing_exactly ; + size = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + default: + tex_aux_show_keyword_error("upto|to"); + break; + } + boxnode = tex_vsplit(index, size, mode); + } + break; + case insert_box_code: + case insert_copy_code: + { + halfword index = tex_scan_int(0, NULL); + if (tex_valid_insert_id(index)) { + boxnode = tex_get_insert_content(index); + if (boxnode) { + if (node_type(boxnode) == vlist_node) { + if (code == insert_copy_code) { + boxnode = tex_copy_node(boxnode); + } else { + tex_set_insert_content(index, null); + } + } else { + tex_set_insert_content(index, null); + /* error, maybe migration list */ + } + } + } + break; + } + case local_left_box_box_code: + { + boxnode = tex_get_local_boxes(local_left_box_code); + break; + } + case local_right_box_box_code: + { + boxnode = tex_get_local_boxes(local_right_box_code); + break; + } + case local_middle_box_box_code: + { + boxnode = tex_get_local_boxes(local_middle_box_code); + break; + } + default: + { + /*tex + + Initiate the construction of an hbox or vbox, then |return|. Here is where we + enter restricted horizontal mode or internal vertical mode, in order to make a + box. + + */ + int just_pack = 0; + quarterword spec_direction = direction_unknown; + /*tex 0 or |vmode| or |hmode| */ + halfword mode; /* todo */ + switch (code) { + case tpack_code: + code = vtop_code; + just_pack = 1; + break; + case vpack_code: + code = vtop_code + vmode; + just_pack = 1; + break; + case hpack_code: + code = vtop_code + hmode; + just_pack = 1; + break; + } + mode = code - vtop_code; + tex_set_saved_record(saved_full_spec_item_context, saved_box_context, 0, boxcontext); + switch (abs(cur_list.mode)) { + case vmode: + spec_direction = dir_lefttoright; + break; + case hmode: + spec_direction = (singleword) text_direction_par; + break; + case mmode: + spec_direction = (singleword) math_direction_par; + break; + } + if (mode == hmode) { + if ((boxcontext < box_flag) && (abs(cur_list.mode) == vmode)) { + tex_aux_scan_full_spec(adjusted_hbox_group, spec_direction, just_pack, shift); + } else { + tex_aux_scan_full_spec(hbox_group, spec_direction, just_pack, shift); + } + } else { + if (mode == vmode) { + tex_aux_scan_full_spec(vbox_group, spec_direction, just_pack, shift); + } else { + tex_aux_scan_full_spec(vtop_group, spec_direction, just_pack, shift); + mode = vmode; + } + tex_normal_paragraph(vmode_par_context); + } + tex_push_nest(); + update_tex_internal_dir_state(0); + cur_list.mode = - mode; + if (mode == vmode) { + cur_list.prev_depth = ignore_depth; + if (every_vbox_par) { + tex_begin_token_list(every_vbox_par, every_vbox_text); + } + } else { + cur_list.space_factor = 1000; + if (every_hbox_par) { + tex_begin_token_list(every_hbox_par, every_hbox_text); + } + } + return; + } + } + /*tex In simple cases, we use the box immediately. */ + tex_box_end(boxcontext, boxnode, shift, unset_noad_class); +} diff --git a/source/luametatex/source/tex/texpackaging.h b/source/luametatex/source/tex/texpackaging.h new file mode 100644 index 000000000..75d3d1653 --- /dev/null +++ b/source/luametatex/source/tex/texpackaging.h @@ -0,0 +1,205 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_PACKAGING_H +# define LMT_PACKAGING_H + +# include "luametatex.h" + +/* We define some constants used when calling |hpack| to deal with font expansion. */ + +typedef enum hpack_subtypes { + packing_exactly, /*tex a box dimension is pre-specified */ + packing_additional, /*tex a box dimension is increased from the natural one */ + packing_expanded, /*tex calculate amount for font expansion after breaking paragraph into lines */ + packing_substitute, /*tex substitute fonts */ + packing_adapted, + packing_linebreak, /*tex signals that we need to take the frozen adjust properties */ +} hpack_subtypes; + +typedef enum box_codes { + box_code, /*tex |chr_code| for |\box| */ + copy_code, /*tex |chr_code| for |\copy| */ + unpack_code, + last_box_code, /*tex |chr_code| for |\lastbox| */ + vsplit_code, /*tex |chr_code| for |\vsplit| */ + tpack_code, + vpack_code, + hpack_code, + vtop_code, /*tex |chr_code| for |\vtop| */ + vbox_code, + hbox_code, + insert_box_code, + insert_copy_code, + local_left_box_box_code, + local_right_box_box_code, + local_middle_box_box_code +} box_codes; + +// typedef enum saved_spec_items { +// saved_spec_item_packaging = 0, +// saved_spec_item_attribute = 1, +// saved_spec_n_of_items = 2, +// } saved_spec_items; + +typedef enum saved_full_spec_items { + saved_full_spec_item_context = 0, + saved_full_spec_item_packaging = 1, + saved_full_spec_item_direction = 2, + saved_full_spec_item_attr_list = 3, + saved_full_spec_item_only_pack = 4, + saved_full_spec_item_orientation = 5, + saved_full_spec_item_anchor = 6, + saved_full_spec_item_geometry = 7, + saved_full_spec_item_xoffset = 8, + saved_full_spec_item_yoffset = 9, + saved_full_spec_item_xmove = 10, + saved_full_spec_item_ymove = 11, + saved_full_spec_item_reverse = 12, + saved_full_spec_item_container = 13, + saved_full_spec_item_shift = 14, /* cleaner than passing it as context */ + saved_full_spec_item_source = 15, + saved_full_spec_item_target = 16, + saved_full_spec_item_axis = 17, + saved_full_spec_item_class = 18, + saved_full_spec_item_state = 19, + saved_full_spec_item_retain = 20, + saved_full_spec_n_of_items = 21, +} saved_full_spec_items; + +typedef enum holding_migration_options { + holding_none_option = 0x00, + holding_marks_option = 0x01, + holding_inserts_option = 0x02, + holding_adjusts_option = 0x04, +} holding_migration_options ; + +# define retain_marks(r) (((r | holding_migrations_par) & holding_marks_option ) == holding_marks_option ) +# define retain_inserts(r) (((r | holding_migrations_par) & holding_inserts_option) == holding_inserts_option) +# define retain_adjusts(r) (((r | holding_migrations_par) & holding_adjusts_option) == holding_adjusts_option) + +typedef struct packaging_state_info { + scaled total_stretch[6]; /*tex with one for padding, the results are also used in alignments */ + scaled total_shrink[6]; /*tex glue found by |hpack| or |vpack|, the results are also used in alignments */ + int last_badness; /*tex badness of the most recently packaged box */ + scaled last_overshoot; + halfword post_adjust_tail; /*tex tail of adjustment list */ + halfword pre_adjust_tail; + halfword post_migrate_tail; /*tex tail of adjustment list */ + halfword pre_migrate_tail; + halfword last_leftmost_char; + halfword last_rightmost_char; + int pack_begin_line; + scaled best_height_plus_depth; /*tex The height of the best box, without stretching or shrinking: */ + halfword previous_char_ptr; + scaled font_expansion_ratio; + halfword page_discards_tail; + halfword page_discards_head; + halfword split_discards_head; + halfword padding; +} packaging_state_info; + +extern packaging_state_info lmt_packaging_state; + +extern scaled tex_char_stretch (halfword p); +extern scaled tex_char_shrink (halfword p); +/* void tex_get_char_expansion (halfword p, halfword *stretch, halfword *shrink); */ /* no gain */ +extern scaled tex_kern_stretch (halfword p); +extern scaled tex_kern_shrink (halfword p); +extern scaled tex_char_protrusion (halfword p, int side); +/* void tex_kern_protrusion (halfword p, int side, halfword *stretch, halfword *shrink); */ + +extern scaled tex_left_marginkern (halfword p); +extern scaled tex_right_marginkern (halfword p); + +extern halfword tex_filtered_hpack (halfword p, halfword qt, scaled w, int m, int grp, halfword d, int just_pack, halfword attr, int state, int retain); +extern halfword tex_filtered_vpack (halfword p, scaled h, int m, scaled l, int grp, halfword d, int just_pack, halfword attr, int state, int retain); + +extern scaledwhd tex_natural_hsizes (halfword p, halfword pp, glueratio g_mult, int g_sign, int g_order); +extern scaledwhd tex_natural_vsizes (halfword p, halfword pp, glueratio g_mult, int g_sign, int g_order); +extern halfword tex_natural_width (halfword p, halfword pp, glueratio g_mult, int g_sign, int g_order); +extern halfword tex_natural_hsize (halfword p, halfword *correction); +extern halfword tex_natural_vsize (halfword p); + +extern halfword tex_hpack (halfword p, scaled w, int m, singleword d, int retain); +extern halfword tex_vpack (halfword p, scaled h, int m, scaled l, singleword d, int retain); + +extern void tex_repack (halfword p, scaled w, int m); +extern void tex_freeze (halfword p, int recurse); + +extern void tex_package (singleword nature); +extern void tex_run_unpackage (void); + +extern void tex_append_to_vlist (halfword b, int location, const line_break_properties *properties); + +extern halfword tex_prune_page_top (halfword p, int s); +extern halfword tex_vert_break (halfword p, scaled h, scaled d); +extern halfword tex_vsplit (halfword n, scaled h, int m); + +extern void tex_finish_vcenter_group (void); +extern void tex_run_vcenter (void); + +//# define vpack(A,B,C,D) tex_vpackage(A,B,C,max_dimen,D) + +# define first_un_box_code box_code +# define last_un_box_code unpack_code +# define first_nu_box_code box_code +# define last_nu_box_code local_middle_box_box_code /*tex needs checking */ + +/*tex + + Now let's turn to the question of how |\hbox| is treated. We actually need to consider also a + slightly larger context, since constructions like + + \starttyping + \setbox3={\\hbox... + \leaders\hbox... + \lower3.8pt\hbox... + \stoptyping + + are supposed to invoke quite different actions after the box has been packaged. Conversely, + constructions like |\setbox 3 =| can be followed by a variety of different kinds of boxes, and + we would like to encode such things in an efficient way. + + In other words, there are two problems: To represent the context of a box, and to represent its + type. The first problem is solved by putting a \quote {context code} on the |save_stack|, just + below the two entries that give the dimensions produced by |scan_spec|. The context code is + either a (signed) shift amount, or it is a large integer |>= box_flag|, where |box_flag = | + $2^{30}$. Codes |box_flag| through |box_flag + biggest_reg| represent |\setbox0| through + |\setbox biggest_reg|; codes |box_flag + biggest_reg + 1| through |box_flag + 2 * biggest_reg| + represent |\global \setbox 0| through |\global\setbox| |biggest_reg|; code |box_flag + 2 * + number_regs| represents |\shipout|; and codes |box_flag + 2 * number_regs + 1| through |box_flag + + 2 * number_regs + 3| represent |\leaders|, |\cleaders|, and |\xleaders|. + + The second problem is solved by giving the command code |make_box| to all control sequences that + produce a box, and by using the following |chr_code| values to distinguish between them: + |box_code|, |copy_code|, |last_box_code|, |vsplit_code|, |vtop_code|, |vtop_code + vmode|, and + |vtop_code + hmode|, where the latter two are used denote |\vbox| and |\hbox|, respectively. + +*/ + +# define biggest_reg 65535 /*tex This could be in |textypes.h|. */ + +typedef enum box_flags { + box_flag = 010000000000, /*tex context code for |\setbox0| (< maxdimen) */ + global_box_flag = 010000000000 + biggest_reg, /*tex context code for |\global\setbox0| */ + max_global_box_flag = 010000000000 + 2 * biggest_reg, + left_box_flag = 010000000000 + 2 * biggest_reg + 1, /*tex context code for |\localleftbox| (not used) */ + right_box_flag = 010000000000 + 2 * biggest_reg + 2, /*tex context code for |\localrightbox| (not used) */ + middle_box_flag = 010000000000 + 2 * biggest_reg + 3, /*tex context code for |\localrightbox| (not used) */ + shipout_flag = 010000000000 + 2 * biggest_reg + 4, /*tex context code for |\shipout| */ + lua_scan_flag = 010000000000 + 2 * biggest_reg + 5, /*tex context code for |scan_list| */ + a_leaders_flag = 010000000000 + 2 * biggest_reg + 6, /*tex context code for |\leaders| */ + c_leaders_flag = 010000000000 + 2 * biggest_reg + 7, /*tex context code for |\cleaders| */ + x_leaders_flag = 010000000000 + 2 * biggest_reg + 8, /*tex context code for |\xleaders| */ + g_leaders_flag = 010000000000 + 2 * biggest_reg + 9, /*tex context code for |\gleaders| */ + u_leaders_flag = 010000000000 + 2 * biggest_reg + 10, /*tex context code for |\uleaders| */ +} box_flags; + +# define box_leaders_flag(f) (f >= a_leaders_flag && f <= u_leaders_flag) + +extern void tex_begin_box (int boxcontext, scaled shift); +extern int tex_ignore_math_skip (halfword p); + +# endif diff --git a/source/luametatex/source/tex/texprimitive.c b/source/luametatex/source/tex/texprimitive.c new file mode 100644 index 000000000..bbeea1bc0 --- /dev/null +++ b/source/luametatex/source/tex/texprimitive.c @@ -0,0 +1,913 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + Control sequences are stored and retrieved by means of a fairly standard hash table algorithm + called the method of \quote {coalescing lists} (cf.\ Algorithm 6.4C in {\em The Art of + Computer Programming}). Once a control sequence enters the table, it is never removed, because + there are complicated situations involving |\gdef| where the removal of a control sequence at + the end of a group would be a mistake preventable only by the introduction of a complicated + reference-count mechanism. + + The actual sequence of letters forming a control sequence identifier is stored in the |str_pool| + array together with all the other strings. An auxiliary array |hash| consists of items with two + halfword fields per word. The first of these, called |next(p)|, points to the next identifier + belonging to the same coalesced list as the identifier corresponding to~|p|; and the other, + called |text(p)|, points to the |str_start| entry for |p|'s identifier. If position~|p| of the + hash table is empty, we have |text(p)=0|; if position |p| is either empty or the end of a + coalesced hash list, we have |next(p) = 0|. An auxiliary pointer variable called |hash_used| is + maintained in such a way that all locations |p >= hash_used| are nonempty. The global variable + |cs_count| tells how many multiletter control sequences have been defined, if statistics are + being kept. + + A boolean variable called |no_new_control_sequence| is set to |true| during the time that new + hash table entries are forbidden. + + The other variables in the following state structure are: the hash table: |hash|, the allocation + pointer |hash_used| for |hash|, |hash_extra| above |eqtb_size|, the maximum of the hash array + |hash_top|, the pointer to the next high hash location |hash_high|, the mentioned flag that says + if new identifiers are legal |no_new_control_sequence| and the total number of known identifiers: + |cs_count|. + +*/ + +hash_state_info lmt_hash_state = { + .hash = NULL, + .hash_data = { + .minimum = min_hash_size, + .maximum = max_hash_size, + .size = siz_hash_size, + .step = stp_hash_size, + .allocated = 0, + .itemsize = sizeof(memoryword) + sizeof(memoryword), + .top = 0, + .ptr = 0, + .initial = 0, + .offset = 0, // eqtb_size, + }, + .eqtb_data = { + .minimum = min_hash_size, + .maximum = max_hash_size, + .size = siz_hash_size, + .step = stp_hash_size, + .allocated = memory_data_unset, + .itemsize = memory_data_unset, + .top = frozen_control_sequence, + .ptr = 0, + .initial = 0, + .offset = 0, + }, + .eqtb = NULL, + .no_new_cs = 1, + .padding = 0, +}; + +/*tex + + The arrays |prim| and |prim_eqtb| are used for |name -> cmd, chr| lookups. The are modelled + after |hash| and |eqtb|, except that primitives do not have an |eq_level|, that field is + replaced by |origin|. Furthermore we have a link for coalesced lists: |prim_next (a)|; the + string number for control sequence name: |prim_text (a)|; test if all positions are occupied: + |prim_is_full|; some fields: |prim_origin_field (a)|, |prim_eq_type_field (a)| and + |prim_equiv_field(a)|; the level of definition: |prim_origin (a)|; the command code for + equivalent: |prim_eq_type(a)|; the equivalent value: |prim_equiv(a)|; the allocation pointer + for |prim|: |prim_used|; the primitives tables: |two_halves prim [(prim_size + 1)]| and + |memoryword prim_eqtb [(prim_size + 1)]|. The array |prim_data| works the other way around, it + is used for |cmd, chr| to name lookups. + +*/ + +primitive_state_info lmt_primitive_state; + +/*tex Test if all positions are occupied: */ + +# define prim_base 1 +# define reserved_hash_slots 1 + +/*tex Initialize the memory arrays: */ + +void tex_initialize_primitives(void) +{ + memset(lmt_primitive_state.prim_data, 0, sizeof(prim_info) * (last_cmd + 1)); + memset(lmt_primitive_state.prim, 0, sizeof(memoryword) * (prim_size + 1)); + memset(lmt_primitive_state.prim_eqtb, 0, sizeof(memoryword) * (prim_size + 1)); + for (int k = 0; k <= prim_size; k++) { + prim_eq_type(k) = undefined_cs_cmd; + } + lmt_primitive_state.prim_used = prim_size; +} + +void tex_initialize_hash_mem(void) +{ + if (lmt_main_state.run_state == initializing_state) { + if (lmt_hash_state.hash_data.minimum == 0) { + tex_emergency_message("startup error", "you need at least some hash size"); + } else { + lmt_hash_state.hash_data.allocated = lmt_hash_state.hash_data.minimum; + lmt_hash_state.hash_data.top = eqtb_size + lmt_hash_state.hash_data.minimum; + } + } + { + int size = lmt_hash_state.hash_data.top + 1; + memoryword *hash = aux_allocate_clear_array(sizeof(memoryword), size, reserved_hash_slots); + memoryword *eqtb = aux_allocate_clear_array(sizeof(memoryword), size, reserved_hash_slots); + if (hash && eqtb) { + lmt_hash_state.hash = hash; + lmt_hash_state.eqtb = eqtb; + if (lmt_main_state.run_state == initializing_state) { + /*tex Initialization happens elsewhere. */ + } else { + tex_initialize_undefined_cs(); + for (int i = eqtb_size + 1; i <= lmt_hash_state.hash_data.top; i++) { + copy_eqtb_entry(i, undefined_control_sequence); + } + } + } else { + tex_overflow_error("hash", size); + } + } +} + +static int tex_aux_room_in_hash(void) +{ + if (lmt_hash_state.hash_data.allocated + lmt_hash_state.hash_data.step <= lmt_hash_state.hash_data.size) { + int size = lmt_hash_state.hash_data.top + lmt_hash_state.hash_data.step + 1; + memoryword *hash = aux_reallocate_array(lmt_hash_state.hash, sizeof(memoryword), size, reserved_hash_slots); + memoryword *eqtb = aux_reallocate_array(lmt_hash_state.eqtb, sizeof(memoryword), size, reserved_hash_slots); + if (hash && eqtb) { + memset(hash + lmt_hash_state.hash_data.top + 1, 0, sizeof(memoryword) * (size_t) lmt_hash_state.hash_data.step); + memset(eqtb + lmt_hash_state.hash_data.top + 1, 0, sizeof(memoryword) * (size_t) lmt_hash_state.hash_data.step); + lmt_hash_state.hash = hash; + lmt_hash_state.eqtb = eqtb; + /*tex + This is not really needed because we now dp this when a new id is created which + is a better place anyway. But we play safe and still do it: + */ + for (int i = lmt_hash_state.hash_data.top + 1; i <= size; i++) { + copy_eqtb_entry(i, undefined_control_sequence); + } + lmt_hash_state.hash_data.allocated += lmt_hash_state.hash_data.step; + lmt_hash_state.hash_data.top += lmt_hash_state.hash_data.step; + lmt_run_memory_callback("hash", 1); + return 1; + } else { + lmt_run_memory_callback("hash", 0); + tex_overflow_error("hash", size); + } + } + return 0; +} + +/*tex + + The value of |hash_prime| should be roughly 85\%! of |hash_size|, and it should be a prime + number. The theory of hashing tells us to expect fewer than two table probes, on the average, + when the search is successful. [See J.~S. Vitter, {\sl Journal of the ACM\/ \bf30} (1983), + 231--258.] + + https://en.wikipedia.org/wiki/Coalesced_hashing + + Because we seldom use uppercase we get many misses, multiplying a chr j[k] by k actually gives + a better spread. + + Making a \CONTEXT\ format takes some 250.000 hash calculations while the \LUAMETATEX\ needs + some 1.7 million for just over 250 pages (with an average string length of 15). + + The primitive hash lookups are needed when we initialize and when we lookup an internal + variable. + +*/ + +inline static halfword tex_aux_compute_hash(const char *j, int l) +{ + halfword h = (unsigned const char) j[0]; + for (int k = 1; k < l; k++) { + h = (h + h + (unsigned const char) j[k]) % hash_prime; + } + return h; +} + +inline static halfword tex_aux_compute_prim(const char *j, unsigned l) +{ + halfword h = (unsigned const char) j[0]; + for (unsigned k = 1; k < l; k++) { + h = (h + h + (unsigned const char) j[k]) % prim_prime; + } + return h; +} + +halfword tex_prim_lookup(strnumber s) +{ + /*tex The index in the |hash| array: */ + if (s >= cs_offset_value) { + unsigned char *j = str_string(s); + unsigned l = (unsigned) str_length(s); + halfword h = tex_aux_compute_prim((char *) j, l); + /*tex We start searching here; note that |0 <= h < hash_prime|. */ + halfword p = h + 1; + while (1) { + if (prim_text(p) > 0 && str_length(prim_text(p)) == l && tex_str_eq_str(prim_text(p), s)) { + return p; + } else if (prim_next(p)) { + p = prim_next(p); + } else if (lmt_hash_state.no_new_cs) { + return undefined_primitive; + } else { + /*tex Insert a new primitive after |p|, then make |p| point to it. */ + if (prim_text(p) > 0) { + /*tex Search for an empty location in |prim| */ + do { + if (lmt_primitive_state.prim_used > prim_base) { + --lmt_primitive_state.prim_used; + } else { + tex_overflow_error("primitive size", prim_size); + } + } while (prim_text(lmt_primitive_state.prim_used)); + prim_next(p) = lmt_primitive_state.prim_used; + p = lmt_primitive_state.prim_used; + } + prim_text(p) = s; + break; + } + } + return p; + } else if ((s < 0) || (s == undefined_control_sequence)) { + return undefined_primitive; + } else { + return s; + } +} + +/*tex How to test a csname for primitive-ness? */ + +/* +int tex_cs_is_primitive(strnumber csname) +{ + int m = prim_lookup(csname); + if (m != undefined_primitive) { + char *ss = makecstring(csname); + int n = string_locate(ss, str_length(csname), 0); + lmt_memory_free(ss); + return ((n != undefined_cs_cmd) && (eq_type(n) == prim_eq_type(m)) && (eq_value(n) == prim_equiv(m))); + } else { + return 0; + } +} +*/ + +/*tex Dumping and undumping. */ + +/* We cheat! It should be dump_things(f, prim_state.prim[p], 1); */ + +void tex_dump_primitives(dumpstream f) +{ + /* + for (int p = 0; p <= prim_size; p++) { + dump_mem(f, prim_state.prim[p]); + } + for (int p = 0; p <= prim_size; p++) { + dump_mem(f, prim_state.prim_eqtb[p]); + } + */ + dump_things(f, lmt_primitive_state.prim[0], prim_size + 1); + dump_things(f, lmt_primitive_state.prim_eqtb[0], prim_size + 1); + for (int p = 0; p <= last_cmd; p++) { + dump_int(f, lmt_primitive_state.prim_data[p].offset); + dump_int(f, lmt_primitive_state.prim_data[p].subids); + for (int q = 0; q < lmt_primitive_state.prim_data[p].subids; q++) { + dump_int(f, lmt_primitive_state.prim_data[p].names[q]); + } + } +} + +void tex_undump_primitives(dumpstream f) +{ + undump_things(f, lmt_primitive_state.prim[0], prim_size + 1); + undump_things(f, lmt_primitive_state.prim_eqtb[0], prim_size + 1); + for (int p = 0; p <= last_cmd; p++) { + undump_int(f, lmt_primitive_state.prim_data[p].offset); + undump_int(f, lmt_primitive_state.prim_data[p].subids); + if (lmt_primitive_state.prim_data[p].subids > 0) { + int size = lmt_primitive_state.prim_data[p].subids; + strnumber *names = aux_allocate_clear_array(sizeof(strnumber *), size, 1); + if (names) { + lmt_primitive_state.prim_data[p].names = names; + for (int q = 0; q < lmt_primitive_state.prim_data[p].subids; q++) { + undump_int(f, names[q]); + } + } else { + tex_overflow_error("primitives", size * sizeof(strnumber *)); + } + } + } +} + +/*tex + + Dump the hash table, A different scheme is used to compress the hash table, since its lower + region is usually sparse. When |text (p) <> 0| for |p <= hash_used|, we output two words, + |p| and |hash[p]|. The hash table is, of course, densely packed for |p >= hash_used|, so the + remaining entries are output in a~block. + +*/ + +void tex_dump_hashtable(dumpstream f) +{ + dump_int(f, lmt_hash_state.eqtb_data.top); + lmt_hash_state.eqtb_data.ptr = frozen_control_sequence - 1 - lmt_hash_state.eqtb_data.top + lmt_hash_state.hash_data.ptr; + /* the root entries, i.e. the direct hash slots */ + for (halfword p = hash_base; p <= lmt_hash_state.eqtb_data.top; p++) { + if (cs_text(p)) { + dump_int(f, p); + dump_int(f, lmt_hash_state.hash[p]); + ++lmt_hash_state.eqtb_data.ptr; + } + } + /* the chain entries, i.e. the follow up list slots => eqtb */ + dump_things(f, lmt_hash_state.hash[lmt_hash_state.eqtb_data.top + 1], special_sequence_base - lmt_hash_state.eqtb_data.top); + if (lmt_hash_state.hash_data.ptr > 0) { + dump_things(f, lmt_hash_state.hash[eqtb_size + 1], lmt_hash_state.hash_data.ptr); + } + dump_int(f, lmt_hash_state.eqtb_data.ptr); +} + +void tex_undump_hashtable(dumpstream f) +{ + undump_int(f, lmt_hash_state.eqtb_data.top); + if (lmt_hash_state.eqtb_data.top >= hash_base && lmt_hash_state.eqtb_data.top <= frozen_control_sequence) { + halfword p = hash_base - 1; + do { + halfword q; + undump_int(f, q); + if (q >= (p + 1) && q <= lmt_hash_state.eqtb_data.top) { + undump_int(f, lmt_hash_state.hash[q]); + p = q; + } else { + goto BAD; + } + } while (p != lmt_hash_state.eqtb_data.top); + undump_things(f, lmt_hash_state.hash[lmt_hash_state.eqtb_data.top + 1], special_sequence_base - lmt_hash_state.eqtb_data.top); + if (lmt_hash_state.hash_data.ptr > 0) { + /* we get a warning on possible overrun here */ + undump_things(f, lmt_hash_state.hash[eqtb_size + 1], lmt_hash_state.hash_data.ptr); + } + undump_int(f, lmt_hash_state.eqtb_data.ptr); + lmt_hash_state.eqtb_data.initial = lmt_hash_state.eqtb_data.ptr; + return; + } + BAD: + tex_fatal_undump_error("hash"); +} + +/*tex + + We need to put \TEX's \quote {primitive} control sequences into the hash table, together with + their command code (which will be the |eq_type|) and an operand (which will be the |equiv|). + The |primitive| procedure does this, in a way that no \TEX\ user can. The global value |cur_val| + contains the new |eqtb| pointer after |primitive| has acted. + + Because the definitions of the actual user-accessible name of a primitive can be postponed until + runtime, the function |primitive_def| is needed that does nothing except creating the control + sequence name. + +*/ + +void tex_primitive_def(const char *str, size_t length, singleword cmd, halfword chr) +{ + /*tex This creates the |text()| string: */ + cur_val = tex_string_locate(str, length, 1); + set_eq_level(cur_val, level_one); + set_eq_type(cur_val, cmd); + set_eq_flag(cur_val, primitive_flag_bit); + set_eq_value(cur_val, chr); +} + +/*tex + + The function |store_primitive_name| sets up the bookkeeping for the reverse lookup. It is + quite paranoid, because it is easy to mess this up accidentally. + + The |offset| is needed because sometimes character codes (in |o|) are indices into |eqtb| + or are offset by a magical value to make sure they do not conflict with something else. We + don't want the |prim_data[c].names| to have too many entries as it will just be wasted room, + so |offset| is substracted from |o| before creating or accessing the array. + +*/ + +static void tex_aux_store_primitive_name(strnumber s, singleword cmd, halfword chr, halfword offset) +{ + lmt_primitive_state.prim_data[cmd].offset = offset; + if (lmt_primitive_state.prim_data[cmd].subids < (chr + 1)) { + /*tex Not that efficient as each primitive triggers this now but only at ini time so ... */ + strnumber *newstr = aux_allocate_clear_array(sizeof(strnumber *), chr + 1, 1); + if (lmt_primitive_state.prim_data[cmd].names) { + memcpy(newstr, lmt_primitive_state.prim_data[cmd].names, (unsigned) (lmt_primitive_state.prim_data[cmd].subids) * sizeof(strnumber)); + aux_deallocate_array(lmt_primitive_state.prim_data[cmd].names); + } + lmt_primitive_state.prim_data[cmd].names = newstr; + lmt_primitive_state.prim_data[cmd].subids = chr + 1; + } + lmt_primitive_state.prim_data[cmd].names[chr] = s; +} + +/*tex + + Compared to \TEX82, |primitive| has two extra parameters. The |off| is an offset that will be + passed on to |store_primitive_name|, the |cmd_origin| is the bit that is used to group + primitives by originator. So the next function is called for each primitive and fills |prim_eqtb|. + + Contrary to \LUATEX\ we define (using |primitive_def|) all primitives beforehand, so not only + those with |cmd_origin| values |core| and |tex|. As side effect, we don't get redundant string + entries as in \LUATEX. + +*/ + +void tex_primitive(int cmd_origin, const char *str, singleword cmd, halfword chr, halfword offset) +{ + int prim_val; + strnumber ss; + if (cmd_origin != no_command) { + tex_primitive_def(str, strlen(str), cmd, offset + chr); + /*tex Indeed, |cur_val| has the latest primitive. */ + ss = cs_text(cur_val); + } else { + ss = tex_maketexstring(str); + } + prim_val = tex_prim_lookup(ss); + prim_origin(prim_val) = (quarterword) cmd_origin; + prim_eq_type(prim_val) = cmd; + prim_equiv(prim_val) = offset + chr; + tex_aux_store_primitive_name(ss, cmd, chr, offset); +} + +/*tex + + Here is a helper that does the actual hash insertion. This code far from ideal: the existence + of |hash_extra| changes all the potential (short) coalesced lists into a single (long) one. + This will create a slowdown. + + Here |hash_state.hash_used| starts out as the maximum \quote {normal} hash, not extra. + +*/ + +static halfword tex_aux_insert_id(halfword p, const unsigned char *j, unsigned int l) +{ + if (cs_text(p) > 0) { + RESTART: + if (lmt_hash_state.hash_data.ptr < lmt_hash_state.hash_data.allocated) { + ++lmt_hash_state.hash_data.ptr; + cs_next(p) = lmt_hash_state.hash_data.ptr + eqtb_size; + p = cs_next(p); + } else if (tex_aux_room_in_hash()) { + goto RESTART; + } else { + /*tex + Search for an empty location in |hash|. This actually makes the direct first hit + in such a hash slot invalid but we check for the string anyway. As we now use a + hash size that is rather minimal, we don't really need this branch. It is a last + resort anyway. + */ + do { + if (lmt_hash_state.eqtb_data.top == hash_base) { + /*tex We cannot go lower than this. */ + tex_overflow_error("hash size", hash_size + lmt_hash_state.hash_data.allocated); + } + --lmt_hash_state.eqtb_data.top; + } while (cs_text(lmt_hash_state.eqtb_data.top) != 0); + cs_next(p) = lmt_hash_state.eqtb_data.top; + p = lmt_hash_state.eqtb_data.top; + } + } + cs_text(p) = tex_push_string(j, l); + copy_eqtb_entry(p, undefined_control_sequence); + ++lmt_hash_state.eqtb_data.ptr; + return p; +} + +/*tex + + Here is the subroutine that searches the hash table for an identifier that matches a given + string of length |l > 1| appearing in |buffer[j .. (j + l - 1)]|. If the identifier is found, + the corresponding hash table address is returned. Otherwise, if the global variable + |no_new_control_sequence| is |true|, the dummy address |undefined_control_sequence| is returned. + Otherwise the identifier is inserted into the hash table and its location is returned. + + On the \LUAMETATEX\ manual we have 250K hits and 400K misses. Adapting the max and prime does + bring down the misses but also no gain in performance. In practice we seldom follow the chain. + +*/ + +halfword tex_id_locate(int j, int l, int create) +{ + /*tex The index in |hash| array: */ + halfword h = tex_aux_compute_hash((char *) (lmt_fileio_state.io_buffer + j), l); + /*tex We start searching here. Note that |0 <= h < hash_prime|: */ + halfword p = h + hash_base; + /*tex The next one in a list: */ + while (1) { + strnumber s = cs_text(p); + if ((s > 0) && (str_length(s) == (unsigned) l) && tex_str_eq_buf(s, j, l)) { + return p; + } else { + halfword n = cs_next(p); + if (n) { + p = n; + } else if (create) { + return tex_aux_insert_id(p, (lmt_fileio_state.io_buffer + j), (unsigned) l); + } else { + break; + } + } + } + return undefined_control_sequence; +} + +/*tex + + Here is a similar subroutine for finding a primitive in the hash. This one is based on a \CCODE\ + string. + +*/ + +halfword tex_string_locate(const char *s, size_t l, int create) +{ + /*tex The hash code: */ + halfword h = tex_aux_compute_hash(s, (int) l); + /*tex The index in |hash| array. We start searching here. Note that |0 <= h < hash_prime|: */ + halfword p = h + hash_base; + while (1) { + if (cs_text(p) > 0 && tex_str_eq_cstr(cs_text(p), s, (int) l)) { + return p; + } else { + halfword n = cs_next(p); + if (n) { + p = n; + } else if (create) { + return tex_aux_insert_id(p, (const unsigned char *) s, (unsigned) l); + } else { + break; + } + } + } + return undefined_control_sequence; +} + +halfword tex_located_string(const char *s) +{ + size_t l = strlen(s); + return tex_string_locate(s, l, 0); +} + +/*tex + + The |print_cmd_chr| routine prints a symbolic interpretation of a command code and its modifier. + This is used in certain \quotation {You can\'t} error messages, and in the implementation of + diagnostic routines like |\show|. + + The body of |print_cmd_chr| use to be a rather tedious listing of print commands, and most of it + was essentially an inverse to the |primitive| routine that enters a \TEX\ primitive into |eqtb|. + + Thanks to |prim_data|, there is no need for all that tediousness. What is left of |primt_cnd_chr| + are just the exceptions to the general rule that the |cmd,chr_code| pair represents in a single + primitive command. + +*/ + +static void tex_aux_print_chr_cmd(const char *s, halfword cmd, halfword chr) +{ + tex_print_str(s); + if (chr) { + tex_print_str(cmd == letter_cmd ? " letter " : " character "); + tex_print_uhex(chr); + tex_print_char(' '); + /* + By using the the unicode (ascii) names for some we can better support syntax + highlighting (which often involves parsing). The names are enclused in single + quotes. For the chr codes above 128 we assume \UNICODE\ support. + */ + /*tex + We already intercepted the line feed here so that it doesn't give a side effect here + in the original |tex_print_tex_str(chr)| call but we have now inlined similar code + but without side effects. + */ + if (chr < 32 || chr == 127) { + return; + } else if (chr <= 0x7F) { + switch (chr) { + case '\n' : tex_print_str("'line feed'"); return; + case '\r' : tex_print_str("'carriage return'"); return; + case ' ' : tex_print_str("'space'"); return; + case '!' : tex_print_str("'exclamation mark'"); return; + case '\"' : tex_print_str("'quotation mark'"); return; + case '#' : tex_print_str("'hash tag'"); return; + case '$' : tex_print_str("'dollar sign'"); return; + case '%' : tex_print_str("'percent sign'"); return; + case '&' : tex_print_str("'ampersand'"); return; + case '\'' : tex_print_str("'apostrophe'"); return; + case '(' : tex_print_str("'left parenthesis'"); return; + case ')' : tex_print_str("'right parenthesis'"); return; + case '*' : tex_print_str("'asterisk'"); return; + case '+' : tex_print_str("'plus sign'"); return; + case ',' : tex_print_str("'comma'"); return; + case '-' : tex_print_str("'hyphen minus'"); return; + case '.' : tex_print_str("'full stop'"); return; + case '/' : tex_print_str("'slash'"); return; + case ':' : tex_print_str("'colon'"); return; + case ';' : tex_print_str("'semicolon'"); return; + case '<' : tex_print_str("'less than sign'"); return; + case '=' : tex_print_str("'equal sign'"); return; + case '>' : tex_print_str("'more than sign'"); return; + case '?' : tex_print_str("'question mark'"); return; + case '@' : tex_print_str("'at sign'"); return; + case '[' : tex_print_str("'left square bracket'"); return; + case '\\' : tex_print_str("'backslash'"); return; + case ']' : tex_print_str("'right square bracket'"); return; + case '^' : tex_print_str("'circumflex accent'"); return; + case '_' : tex_print_str("'low line'"); return; + case '`' : tex_print_str("'grave accent'"); return; + case '{' : tex_print_str("'left curly bracket'"); return; + case '|' : tex_print_str("'vertical bar'"); return; + case '}' : tex_print_str("'right curly bracket'"); return; + case '~' : tex_print_str("'tilde'"); return; + } + tex_print_char(chr); + } else if (chr <= 0x7FF) { + tex_print_char(0xC0 + (chr / 0x40)); + tex_print_char(0x80 + (chr % 0x40)); + } else if (chr <= 0xFFFF) { + tex_print_char(0xE0 + (chr / 0x1000)); + tex_print_char(0x80 + ((chr % 0x1000) / 0x40)); + tex_print_char(0x80 + ((chr % 0x1000) % 0x40)); + } else if (chr <= 0x10FFFF) { + tex_print_char(0xF0 + (chr / 0x40000)); + tex_print_char(0x80 + ((chr % 0x40000) / 0x1000)); + tex_print_char(0x80 + (((chr % 0x40000) % 0x1000) / 0x40)); + tex_print_char(0x80 + (((chr % 0x40000) % 0x1000) % 0x40)); + } + } +} + +/*tex |\TEX82| Didn't print the |cmd,idx| information, but it may be useful. */ + +static void tex_aux_prim_cmd_chr(quarterword cmd, halfword chr) +{ + if (cmd <= last_visible_cmd) { + int idx = chr - lmt_primitive_state.prim_data[cmd].offset; + if (idx >= 0 && idx < lmt_primitive_state.prim_data[cmd].subids) { + if (lmt_primitive_state.prim_data[cmd].names && lmt_primitive_state.prim_data[cmd].names[idx]) { + tex_print_tex_str_esc(lmt_primitive_state.prim_data[cmd].names[idx]); + } else { + tex_print_format("[warning: cmd %i, chr %i, no name]", cmd, idx); + } + } else if (cmd == internal_int_cmd && idx < number_int_pars) { + /* a special case */ + tex_print_format("[integer: chr %i, class specific]", cmd); + } else { + tex_print_format("[warning: cmd %i, chr %i, out of range]", cmd, idx); + } + } else { + tex_print_format("[warning: cmd %i, invalid]", cmd); + } +} + +static void tex_aux_show_lua_call(const char *what, int slot) +{ + int callback_id = lmt_callback_defined(show_lua_call_callback); + if (callback_id) { + char *ss = NULL; + int lua_retval = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Sd->S", what, slot, &ss); + if (lua_retval && ss && strlen(ss) > 0) { + tex_print_str(ss); + lmt_memory_free(ss); + return; + } + } + tex_print_format("%s %i", what, slot); +} + +void tex_print_cmd_flags(halfword cs, halfword cmd, int flags, int escaped) +{ + if (flags) { + flags = eq_flag(cs); + if (is_frozen (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("frozen " ); } + if (is_permanent(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("permanent "); } + if (is_immutable(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("immutable "); } + if (is_primitive(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("primitive "); } + if (is_mutable (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("mutable " ); } + if (is_noaligned(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("noaligned "); } + if (is_instance (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("instance " ); } + if (is_untraced (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("untraced " ); } + } + if (is_tolerant_cmd (cmd)) { + (escaped ? tex_print_str_esc : tex_print_str)("tolerant " ); + } + if (is_protected_cmd(cmd)) { + (escaped ? tex_print_str_esc : tex_print_str)("protected "); + } else if (is_semi_protected_cmd(cmd)) { + (escaped ? tex_print_str_esc : tex_print_str)("semiprotected "); + } +} + +void tex_print_cmd_chr(singleword cmd, halfword chr) +{ + switch (cmd) { + case left_brace_cmd: + tex_aux_print_chr_cmd("begin group", cmd, chr); + break; + case right_brace_cmd: + tex_aux_print_chr_cmd("end group", cmd, chr); + break; + case math_shift_cmd: + tex_aux_print_chr_cmd("math shift", cmd, chr); + break; + case alignment_tab_cmd: + tex_aux_print_chr_cmd("alignment tab", cmd, chr); + break; + case parameter_cmd: + tex_aux_print_chr_cmd("parameter", cmd, chr); + break; + case superscript_cmd: + tex_aux_print_chr_cmd("superscript", cmd, chr); + break; + case subscript_cmd: + tex_aux_print_chr_cmd("subscript", cmd, chr); + break; + case spacer_cmd: + tex_aux_print_chr_cmd("blank space", cmd, chr); + break; + case letter_cmd: + case other_char_cmd: + tex_aux_print_chr_cmd("the", cmd, chr); + break; + /* + case active_char_cmd: + case comment_cmd: + case invalid_char_cmd: + break; + */ + case end_template_cmd: + /*tex Kind of special: |chr| points to |null_list). */ + tex_print_str_esc("endtemplate"); + // tex_print_str("end of alignment template"); + break; + case if_test_cmd: + if (chr <= last_if_test_code) { + tex_aux_prim_cmd_chr(cmd, chr); + } else { + tex_aux_show_lua_call("luacondition", chr - last_if_test_code); + } + break; + case char_given_cmd: + tex_print_str_esc("char"); + tex_print_qhex(chr); + break; + // case math_char_given_cmd: + // /*tex + // Okay, it's better for old macro packages that mess with meaning to report a more + // traditional value. A compromise. But, this might be dropped. + // */ + // tex_print_str_esc("mathchar"); + // tex_show_mathcode_value(tex_mathchar_from_integer(chr, tex_mathcode), tex_mathcode); + // break; + // case math_char_xgiven_cmd: + // tex_print_str_esc("Umathchar"); + // tex_show_mathcode_value(tex_mathchar_from_integer(chr, umath_mathcode), umath_mathcode); + // break; + case lua_call_cmd: + tex_aux_show_lua_call("luacall", chr); + break; + case lua_local_call_cmd: + tex_aux_show_lua_call("local luacall", chr); + break; + case lua_protected_call_cmd: + tex_aux_show_lua_call("protected luacall", chr); + break; + case lua_value_cmd: + tex_aux_show_lua_call("luavalue", chr); + break; + case set_font_cmd: + tex_print_str("select font "); + tex_print_font(chr); + break; + case undefined_cs_cmd: + tex_print_str("undefined"); + break; + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + tex_print_cmd_flags(cur_cs, cur_cmd, 1, 0); + tex_print_str("macro"); + break; + case internal_toks_cmd: + tex_aux_prim_cmd_chr(cmd, chr); + break; + case register_toks_cmd: + tex_print_str_esc("toks"); + tex_print_int(register_toks_number(chr)); + break; + case internal_int_cmd: + tex_aux_prim_cmd_chr(cmd, chr); + break; + case register_int_cmd: + tex_print_str_esc("count"); + tex_print_int(register_int_number(chr)); + break; + case internal_attribute_cmd: + tex_aux_prim_cmd_chr(cmd, chr); + break; + case register_attribute_cmd: + tex_print_str_esc("attribute"); + tex_print_int(register_attribute_number(chr)); + break; + case internal_dimen_cmd: + tex_aux_prim_cmd_chr(cmd, chr); + break; + case register_dimen_cmd: + tex_print_str_esc("dimen"); + tex_print_int(register_dimen_number(chr)); + break; + case internal_glue_cmd: + tex_aux_prim_cmd_chr(cmd, chr); + break; + case register_glue_cmd: + tex_print_str_esc("skip"); + tex_print_int(register_glue_number(chr)); + break; + case internal_mu_glue_cmd: + tex_aux_prim_cmd_chr(cmd, chr); + break; + case register_mu_glue_cmd: + tex_print_str_esc("muskip"); + tex_print_int(register_mu_glue_number(chr)); + break; + case node_cmd: + tex_print_str(node_token_flagged(chr) ? "large" : "small"); + tex_print_str(" node reference"); + break; + case integer_cmd: + tex_print_str("integer "); + tex_print_int(chr); + break; + case dimension_cmd: + tex_print_str("dimension "); + tex_print_dimension(chr, pt_unit); + break; + case gluespec_cmd: + tex_print_str("gluespec "); + tex_print_spec(chr, pt_unit); + break; + case mugluespec_cmd: + tex_print_str("mugluespec "); + tex_print_spec(chr, mu_unit); + break; + case mathspec_cmd: + switch (node_subtype(chr)) { + case tex_mathcode: + tex_print_str_esc("mathchar"); + break; + case umath_mathcode: + /* case umathnum_mathcode: */ + tex_print_str_esc("Umathchar"); + break; + case mathspec_mathcode: + tex_print_str("mathspec "); + } + tex_print_mathspec(chr); + break; + case fontspec_cmd: + { + /* We don't check for validity here. */ + tex_print_str("fontspec "); + tex_print_fontspec(chr); + } + break; + case deep_frozen_end_template_cmd: + /*tex Kind of special: |chr| points to |null_list). */ + tex_print_str_esc("endtemplate"); + break; + case deep_frozen_dont_expand_cmd: + /*tex Kind of special. */ + tex_print_str_esc("notexpanded"); + break; + /* + case string_cmd: + print_str("string:->"); + print(cs_offset_value + chr); + break; + */ + case internal_box_reference_cmd: + tex_print_str_esc("hiddenlocalbox"); + break; + default: + /*tex These are most commands, actually. Todo: local boxes*/ + tex_aux_prim_cmd_chr(cmd, chr); + break; + } +} diff --git a/source/luametatex/source/tex/texprimitive.h b/source/luametatex/source/tex/texprimitive.h new file mode 100644 index 000000000..640a6b232 --- /dev/null +++ b/source/luametatex/source/tex/texprimitive.h @@ -0,0 +1,95 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_PRIMITIVE_H +# define LMT_PRIMITIVE_H + +/*tex + + This is a list of origins for primitive commands. The engine starts out with hardly anything + enabled so as a first step one should enable the \TEX\ primitives, and additional \ETEX\ and + \LUATEX\ primitives. Maybe at some moment we should just enable all by default. + +*/ + +typedef enum command_origin { + tex_command = 1, + etex_command = 2, + luatex_command = 4, + no_command = 8, +} command_origin; + +typedef struct hash_state_info { + memoryword *hash; /*tex The hash table. */ + memory_data hash_data; + memoryword *eqtb; /*tex The equivalents table. */ + memory_data eqtb_data; + int no_new_cs; /*tex Are new identifiers legal? */ + int padding; +} hash_state_info ; + +extern hash_state_info lmt_hash_state; + +/*tex + + We use no defines as a |hash| macro will clash with lua hash. Most hash accessors are in a few + places where it makes sense to be explicit anyway. + +*/ + +# define cs_next(a) lmt_hash_state.hash[(a)].half0 /*tex link for coalesced lists */ +# define cs_text(a) lmt_hash_state.hash[(a)].half1 /*tex string number for control sequence name */ + +# define undefined_primitive 0 +# define prim_size 2100 /*tex (can be 1000) maximum number of primitives (quite a bit more than needed) */ +# define prim_prime 1777 /*tex (can be 853) about 85 percent of |primitive_size| */ + +typedef struct primitive_info { + halfword subids; /*tex number of name entries */ + halfword offset; /*tex offset to be used for |chr_code|s */ + strnumber *names; /*tex array of names */ +} prim_info; + +typedef struct primitive_state_info { + memoryword prim[prim_size + 1]; + memoryword prim_eqtb[prim_size + 1]; + prim_info prim_data[last_cmd + 1]; + halfword prim_used; + /* alignment */ + int padding; +} primitive_state_info; + +extern primitive_state_info lmt_primitive_state; + +# define prim_next(a) lmt_primitive_state.prim[(a)].half0 /*tex Link for coalesced lists. */ +# define prim_text(a) lmt_primitive_state.prim[(a)].half1 /*tex String number for control sequence name. */ +# define prim_origin(a) lmt_primitive_state.prim_eqtb[(a)].quart01 /*tex Level of definition. */ +# define prim_eq_type(a) lmt_primitive_state.prim_eqtb[(a)].quart00 /*tex Command code for equivalent. */ +# define prim_equiv(a) lmt_primitive_state.prim_eqtb[(a)].half1 /*tex Equivalent value. */ + +# define get_prim_eq_type(p) prim_eq_type(p) +# define get_prim_equiv(p) prim_equiv(p) +# define get_prim_text(p) prim_text(p) +# define get_prim_origin(p) prim_origin(p) + +extern void tex_initialize_primitives (void); +extern void tex_initialize_hash_mem (void); +/* int tex_room_in_hash (void); */ +extern halfword tex_prim_lookup (strnumber s); +/* int tex_cs_is_primitive (strnumber csname); */ +extern void tex_primitive (int cmd_origin, const char *ss, singleword cmd, halfword chr, halfword offset); +extern void tex_primitive_def (const char *str, size_t length, singleword cmd, halfword chr); +extern void tex_print_cmd_chr (singleword cmd, halfword chr); +extern void tex_dump_primitives (dumpstream f); +extern void tex_undump_primitives (dumpstream f); +extern void tex_dump_hashtable (dumpstream f); +extern void tex_undump_hashtable (dumpstream f); +/* halfword tex_string_lookup (const char *s, size_t l); */ +extern halfword tex_string_locate (const char *s, size_t l, int create); +extern halfword tex_located_string (const char *s); +/* halfword tex_id_lookup (int j, int l); */ +extern halfword tex_id_locate (int j, int l, int create); +extern void tex_print_cmd_flags (halfword cs, halfword cmd, int flags, int escape); + +# endif diff --git a/source/luametatex/source/tex/texprinting.c b/source/luametatex/source/tex/texprinting.c new file mode 100644 index 000000000..005c2a3c8 --- /dev/null +++ b/source/luametatex/source/tex/texprinting.c @@ -0,0 +1,1460 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +print_state_info lmt_print_state = { + .logfile = NULL, + .loggable_info = NULL, + .selector = 0, + .tally = 0, + .terminal_offset = 0, + .logfile_offset = 0, + .new_string_line = 0, + .trick_buffer = { 0 }, + .trick_count = 0, + .first_count = 0, + .saved_selector = 0, + .font_in_short_display = 0, + .saved_logfile = NULL, + .saved_logfile_offset = 0, +}; + +/*tex + + During the development of \LUAMETATEX\ reporting has been stepwise upgraded, for instance with more + abstract print functions and a formatter. Much more detail is shown and additional tracing options + have been added (like for marks, inserts, adjust, math, etc.). The format of the traditonal messages + was mostly kept (sometimes under paramameter control using a higher tracing value) but after reading + the nth ridiculous comment about logging in \LUATEX\ related to \CONTEXT\ I decided that it no + longer made sense to offer compatibility because it will never satisfy everyone and we want to move + on, so per spring 2022 we will see even further normalization and log compatility options get (are) + dropped. If there are inconsistencies left, assume they will be dealt with. It's all about being able + to recognize what gets logged. If someone longs for the old reporting, there are plenty alternative + engines available. + + [where: ...] : all kind of tracing + {...} : more traditional tex tracing + <...> : if tracing (maybe) + +*/ + +/*tex + + Messages that are sent to a user's terminal and to the transcript-log file are produced by + several |print| procedures. These procedures will direct their output to a variety of places, + based on the setting of the global variable |selector|, which has the following possible values: + + \startitemize + + \startitem + |term_and_log|, the normal setting, prints on the terminal and on the transcript file. + \stopitem + + \startitem + |log_only|, prints only on the transcript file. + \stopitem + + \startitem + |term_only|, prints only on the terminal. + \stopitem + + \startitem + |no_print|, doesn't print at all. This is used only in rare cases before the transcript + file is open. + \stopitem + + \startitem + |pseudo|, puts output into a cyclic buffer that is used by the |show_context| routine; when + we get to that routine we shall discuss the reasoning behind this curious mode. + \stopitem + + \startitem + |new_string|, appends the output to the current string in the string pool. + \stopitem + + \startitem + 0 to 15, prints on one of the sixteen files for |\write| output. + \stopitem + + \stopitemize + + The symbolic names |term_and_log|, etc., have been assigned numeric codes that satisfy the + convenient relations |no_print + 1 = term_only|, |no_print + 2 = log_only|, |term_only + 2 = + log_only + 1 = term_and_log|. + + Three additional global variables, |tally| and |term_offset| and |file_offset|, record the + number of characters that have been printed since they were most recently cleared to zero. We + use |tally| to record the length of (possibly very long) stretches of printing; |term_offset| + and |file_offset|, on the other hand, keep track of how many characters have appeared so far on + the current line that has been output to the terminal or to the transcript file, respectively. + + The state structure collects: |new_string_line| and |escape_controls|, the transcript handle of + a \TEX\ session: |log_file|, the target of a message: |selector|, the digits in a number being + output |dig[23]|, the number of characters recently printed |tally|, the number of characters + on the current terminal line |term_offset|, the number of characters on the current file line + |file_offset|, the circular buffer for pseudoprinting |trick_buf|, the threshold for + pseudoprinting (explained later) |trick_count|, another variable for pseudoprinting + |first_count|, a blocker for minor adjustments to |show_token_list| namely |inhibit_par_tokens|. + + To end a line of text output, we call |print_ln|: + +*/ + +void tex_print_ln(void) +{ + switch (lmt_print_state.selector) { + case no_print_selector_code: + break; + case terminal_selector_code: + fputc('\n', stdout); + lmt_print_state.terminal_offset = 0; + break; + case logfile_selector_code: + fputc('\n', lmt_print_state.logfile); + lmt_print_state.logfile_offset = 0; + break; + case terminal_and_logfile_selector_code: + fputc('\n', stdout); + fputc('\n', lmt_print_state.logfile); + lmt_print_state.terminal_offset = 0; + lmt_print_state.logfile_offset = 0; + break; + case pseudo_selector_code: + break; + case new_string_selector_code: + if (lmt_print_state.new_string_line > 0) { + tex_print_char(lmt_print_state.new_string_line); + } + break; + case luabuffer_selector_code: + lmt_newline_to_buffer(); + break; + default: + break; + } + /*tex |tally| is not affected */ +} + + +/*tex + + The |print_char| procedure sends one byte to the desired destination. All printing comes through + |print_ln| or |print_char|, except for the case of |print_str| (see below). + + The checking of the line length is an inheritance from previous engines and we dropped it here. + It doesn't make much sense nowadays. The same is true for escaping. + + Incrementing the tally ... only needed in pseudo mode : + +*/ + +// void tex_print_char(int s) +// { +// if (s < 0 || s > 255) { +// tex_formatted_warning("print", "weird character %i", s); +// } else if (s == new_line_char_par && (lmt_print_state.selector < pseudo_selector_code)) { +// tex_print_ln(); +// } else { +// switch (lmt_print_state.selector) { +// case no_print_selector_code: +// break; +// case terminal_selector_code: +// fputc(s, stdout); +// ++lmt_print_state.terminal_offset; +// break; +// case logfile_selector_code: +// fputc(s, lmt_print_state.logfile); +// ++lmt_print_state.logfile_offset; +// break; +// case terminal_and_logfile_selector_code: +// fputc(s, stdout); +// fputc(s, lmt_print_state.logfile); +// ++lmt_print_state.terminal_offset; +// ++lmt_print_state.logfile_offset; +// break; +// case pseudo_selector_code: +// if (lmt_print_state.tally < lmt_print_state.trick_count) { +// lmt_print_state.trick_buffer[lmt_print_state.tally % lmt_error_state.line_limits.size] = (unsigned char) s; +// } +// ++lmt_print_state.tally; +// break; +// case new_string_selector_code: +// tex_append_char((unsigned char) s); +// break; +// case luabuffer_selector_code: +// lmt_char_to_buffer((char) s); +// break; +// default: +// break; +// } +// } +// } + +void tex_print_char(int s) +{ + if (s < 0 || s > 255) { + tex_formatted_warning("print", "weird character %i", s); + } else { + switch (lmt_print_state.selector) { + case no_print_selector_code: + break; + case terminal_selector_code: + if (s == new_line_char_par) { + fputc('\n', stdout); + lmt_print_state.terminal_offset = 0; + } else { + fputc(s, stdout); + ++lmt_print_state.terminal_offset; + } + break; + case logfile_selector_code: + if (s == new_line_char_par) { + fputc('\n', lmt_print_state.logfile); + lmt_print_state.logfile_offset = 0; + } else { + fputc(s, lmt_print_state.logfile); + ++lmt_print_state.logfile_offset; + } + break; + case terminal_and_logfile_selector_code: + if (s == new_line_char_par) { + fputc('\n', stdout); + fputc('\n', lmt_print_state.logfile); + lmt_print_state.terminal_offset = 0; + lmt_print_state.logfile_offset = 0; + } else { + fputc(s, stdout); + fputc(s, lmt_print_state.logfile); + ++lmt_print_state.terminal_offset; + ++lmt_print_state.logfile_offset; + } + break; + case pseudo_selector_code: + if (lmt_print_state.tally < lmt_print_state.trick_count) { + lmt_print_state.trick_buffer[lmt_print_state.tally % lmt_error_state.line_limits.size] = (unsigned char) s; + } + ++lmt_print_state.tally; + break; + case new_string_selector_code: + tex_append_char((unsigned char) s); + break; + case luabuffer_selector_code: + lmt_char_to_buffer((char) s); + break; + default: + break; + } + } +} + +/*tex + + An entire string is output by calling |print|. Note that if we are outputting the single + standard \ASCII\ character |c|, we could call |print("c")|, since |"c" = 99| is the number of a + single-character string, as explained above. But |print_char("c")| is quicker, so \TEX\ goes + directly to the |print_char| routine when it knows that this is safe. (The present + implementation assumes that it is always safe to print a visible \ASCII\ character.) + + The first 256 entries above the 17th unicode plane are used for a special trick: when \TEX\ has + to print items in that range, it will instead print the character that results from substracting + 0x110000 from that value. This allows byte-oriented output to things like |\specials|. + + This feature will disappear. + +*/ + +static void tex_aux_uprint(int s) +{ + /*tex We're not sure about this so it's disabled for now! */ + /* + if ((print_state.selector > pseudo_selector_code)) { + / *tex internal strings are not expanded * / + print_char(s); + return; + } + */ + if (s == new_line_char_par && lmt_print_state.selector < pseudo_selector_code) { + tex_print_ln(); + return; + } else if (s <= 0x7F) { + tex_print_char(s); + } else if (s <= 0x7FF) { + tex_print_char(0xC0 + (s / 0x40)); + tex_print_char(0x80 + (s % 0x40)); + } else if (s <= 0xFFFF) { + tex_print_char(0xE0 + (s / 0x1000)); + tex_print_char(0x80 + ((s % 0x1000) / 0x40)); + tex_print_char(0x80 + ((s % 0x1000) % 0x40)); + } else if (s >= 0x110000) { + int c = s - 0x110000; + if (c >= 256) { + tex_formatted_warning("print", "bad raw byte to print (c=%d), skipped",c); + } else { + tex_print_char(c); + } + } else { + tex_print_char(0xF0 + (s / 0x40000)); + tex_print_char(0x80 + ((s % 0x40000) / 0x1000)); + tex_print_char(0x80 + (((s % 0x40000) % 0x1000) / 0x40)); + tex_print_char(0x80 + (((s % 0x40000) % 0x1000) % 0x40)); + } +} + +static void tex_aux_lprint(lstring *ss) { + /*tex current character code position */ + unsigned char *j = ss->s; + unsigned char *l = j + ss->l; + while (j < l) { + /*tex We don't bother checking the last two bytes explicitly */ + /* 0x110000 in utf=8: 0xF4 0x90 0x80 0x80 */ + if ((j < l - 4) && (*j == 0xF4) && (*(j + 1) == 0x90)) { + int c = (*(j + 2) - 128) * 64 + (*(j + 3) - 128); + tex_print_char(c); + j = j + 4; + } else { + tex_print_char(*j); + ++j; + } + } +} + +void tex_print_tex_str(int s) +{ + if (s >= lmt_string_pool_state.string_pool_data.ptr) { + tex_normal_warning("print", "bad string pointer"); + } else if (s < cs_offset_value) { + if (s < 0) { + tex_normal_warning("print", "bad string offset"); + } else { + tex_aux_uprint(s); + } + } else if (lmt_print_state.selector == new_string_selector_code) { + tex_append_string(str_string(s), (unsigned) str_length(s)); + } else { + tex_aux_lprint(&str_lstring(s)); + } +} + +/*tex + + The procedure |print_nl| is like |print|, but it makes sure that the string appears at the + beginning of a new line. + +*/ + +void tex_print_nlp(void) +{ + if (lmt_print_state.new_string_line > 0) { + tex_print_char(lmt_print_state.new_string_line); + } else { + switch (lmt_print_state.selector) { + case terminal_selector_code: + if (lmt_print_state.terminal_offset > 0) { + fputc('\n', stdout); + lmt_print_state.terminal_offset = 0; + } + break; + case logfile_selector_code: + if (lmt_print_state.logfile_offset > 0) { + fputc('\n', lmt_print_state.logfile); + lmt_print_state.logfile_offset = 0; + } + break; + case terminal_and_logfile_selector_code: + if (lmt_print_state.terminal_offset > 0) { + fputc('\n', stdout); + lmt_print_state.terminal_offset = 0; + } + if (lmt_print_state.logfile_offset > 0) { + fputc('\n', lmt_print_state.logfile); + lmt_print_state.logfile_offset = 0; + } + break; + case luabuffer_selector_code: + lmt_newline_to_buffer(); + break; + } + } +} + +/*tex + + The |char *| versions of the same procedures. |print_str| is different because it uses + buffering, which works well because most of the output actually comes through |print_str|. + +*/ + +void tex_print_str(const char *s) +{ + int logfile = 0; + int terminal = 0; + switch (lmt_print_state.selector) { + case no_print_selector_code: + return; + case terminal_selector_code: + terminal = 1; + break; + case logfile_selector_code: + logfile = 1; + break; + case terminal_and_logfile_selector_code: + logfile = 1; + terminal = 1; + break; + case pseudo_selector_code: + while ((*s) && (lmt_print_state.tally < lmt_print_state.trick_count)) { + lmt_print_state.trick_buffer[lmt_print_state.tally % lmt_error_state.line_limits.size] = (unsigned char) *s++; + lmt_print_state.tally++; + } + return; + case new_string_selector_code: + tex_append_string((const unsigned char *) s, (unsigned) strlen(s)); + return; + case luabuffer_selector_code: + lmt_string_to_buffer(s); + return; + default: + break; + } + if (terminal || logfile) { + int len = (int) strlen(s); + if (logfile && ! lmt_fileio_state.log_opened) { + logfile = 0; + } + if (len > 0) { + int newline = s[len-1] == '\n'; + if (logfile) { + fputs(s, lmt_print_state.logfile); + if (newline) { + lmt_print_state.logfile_offset = 0; + } else { + lmt_print_state.logfile_offset += len; + } + } + if (terminal) { + fputs(s, stdout); + if (newline) { + lmt_print_state.terminal_offset = 0; + } else { + lmt_print_state.terminal_offset += len; + } + } + } + } +} + +/*tex + + Here is the very first thing that \TEX\ prints: a headline that identifies the version number + and format package. The |term_offset| variable is temporarily incorrect, but the discrepancy is + not serious since we assume that the banner and format identifier together will occupy at most + |max_print_line| character positions. Well, we dropped that check in this variant. + + Maybe we should drop printing the format identifier. + +*/ + +void tex_print_banner(void) +{ + fprintf( + stdout, + "%s %s\n", + lmt_engine_state.luatex_banner, + str_string(lmt_dump_state.format_identifier) + ); +} + +void tex_print_log_banner(void) +{ + fprintf( + lmt_print_state.logfile, + "engine: %s, format id: %s, time stamp: %d-%d-%d %d:%d, startup file: %s, job name: %s, dump name: %s", + lmt_engine_state.luatex_banner, + str_string(lmt_dump_state.format_identifier), + year_par, month_par > 12 ? 0 : month_par, day_par, time_par / 60, time_par % 60, + lmt_engine_state.startup_filename ? lmt_engine_state.startup_filename : "-", + lmt_engine_state.startup_jobname ? lmt_engine_state.startup_jobname : "-", + lmt_engine_state.dump_name ? lmt_engine_state.dump_name : "-" + ); +} + +void tex_print_version_banner(void) +{ + fputs(lmt_engine_state.luatex_banner, stdout); +} + +/*tex + + The procedure |print_esc| prints a string that is preceded by the user's escape character + (which is usually a backslash). + +*/ + +void tex_print_tex_str_esc(strnumber s) +{ + /*tex Set variable |c| to the current escape character: */ + int c = escape_char_par; + if (c >= 0 && c < 0x110000) { + tex_print_tex_str(c); + } + if (s) { + tex_print_tex_str(s); + } +} + +/*tex This prints escape character, then |s|. */ + +void tex_print_str_esc(const char *s) +{ + /*tex Set variable |c| to the current escape character: */ + int c = escape_char_par; + if (c >= 0 && c < 0x110000) { + tex_print_tex_str(c); + } + if (s) { + tex_print_str(s); + } +} + +/*tex + An array of digits in the range |0..15| is printed by |print_the_digs|. These digits are in the + reverse order: |dig[k-1]|$\,\ldots\,$|dig[0]|! +*/ + +// inline static void tex_print_decimal_digits(const unsigned char *digits, int k) +// { +// while (k-- > 0) { +// tex_print_char('0' + digits[k]); +// } +// } + +// inline static void tex_print_hexadecimal_digits(const unsigned char *digits, int k) +// { +// while (k-- > 0) { +// if (digits[k] < 10) { +// tex_print_char('0' + digits[k]); +// } else { +// tex_print_char('A' - 10 + digits[k]); +// } +// } +// } + +/*tex + + The following procedure, which prints out the decimal representation of a given integer |n|, + has been written carefully so that it works properly if |n = 0| or if |(-n)| would cause + overflow. It does not apply |mod| or |div| to negative arguments, since such operations are not + implemented consistently by all \PASCAL\ compilers. + +*/ + +// void tex_print_int(int n) +// { +// /*tex In the end a 0..9 fast path works out best. */ +// if (n >= 0 && n <= 9) { +// tex_print_char('0' + n); +// } else { +// /*tex index to current digit; we assume that $|n|<10^{23}$ */ +// int k = 0; +// unsigned char digits[24]; +// if (n < 0) { +// tex_print_char('-'); +// n = -n; +// } +// do { +// digits[k] = (unsigned char) (n % 10); +// n = n / 10; +// ++k; +// } while (n != 0); +// tex_print_decimal_digits(digits, k); +// } +// } + +void tex_print_int(int n) +{ + /*tex In the end a 0..9 fast path works out best; using |sprintf| is slower. */ + if (n >= 0 && n <= 9) { + tex_print_char('0' + n); + } else { + int k = 0; + unsigned char digits[24]; + if (n < 0) { + tex_print_char('-'); + n = -n; + } + do { + digits[k] = '0' + (unsigned char) (n % 10); + n = n / 10; + ++k; + } while (n != 0); + while (k-- > 0) { + tex_print_char(digits[k]); + } + } +} + +/*tex + + Conversely, here is a procedure analogous to |print_int|. If the output of this procedure is + subsequently read by \TEX\ and converted by the |round_decimals| routine above, it turns out + that the original value will be reproduced exactly; the \quote {simplest} such decimal number + is output, but there is always at least one digit following the decimal point. + + The invariant relation in the |repeat| loop is that a sequence of decimal digits yet to be + printed will yield the original number if and only if they form a fraction~$f$ in the range $s + - \delta \L10 \cdot 2^{16} f < s$. We can stop if and only if $f = 0$ satisfies this condition; + the loop will terminate before $s$ can possibly become zero. + + The next one prints a scaled real, rounded to five digits. + +*/ + +void tex_print_dimension(scaled s, int unit) +{ + if (s == 0) { + tex_print_str("0.0"); /* really .. just 0 is not ok for some applications */ + } else { + /*tex The amount of allowable inaccuracy: */ + scaled delta = 10; + char buffer[20] = { 0 } ; + int i = 0; + if (s < 0) { + /*tex Print the sign, if negative. */ + tex_print_char('-'); + s = -s; + } + /*tex Print the integer part. */ + tex_print_int(s / unity); + buffer[i++] = '.'; + s = 10 * (s % unity) + 5; + do { + if (delta > unity) { + /*tex Round the last digit. */ + s = s + 0100000 - 50000; + } + buffer[i++] = (unsigned char) ('0' + (s / unity)); + s = 10 * (s % unity); + delta *= 10; + } while (s > delta); + // buffer[i++] = '\0'; + tex_print_str(buffer); + } + if (unit != no_unit) { + tex_print_unit(unit); + } +} + +void tex_print_sparse_dimension(scaled s, int unit) +{ + if (s == 0) { + tex_print_char('0'); + } else if (s == unity) { + tex_print_char('1'); + } else { + /*tex The amount of allowable inaccuracy: */ + scaled delta = 10; + char buffer[20]; + int i = 0; + if (s < 0) { + /*tex Print the sign, if negative. */ + tex_print_char('-'); + /*tex So we trust it here while in printing int we mess around. */ + s = -s; + } + /*tex Print the integer part. */ + tex_print_int(s / unity); + s = 10 * (s % unity) + 5; + do { + if (delta > unity) { + /*tex Round the last digit. */ + s = s + 0100000 - 50000; + } + buffer[i++] = (unsigned char) ('0' + (s / unity)); + s = 10 * (s % unity); + delta *= 10; + } while (s > delta); + if (i == 1 && buffer[i-1] == '0') { + /* no need */ + } else { + buffer[i++] = '\0'; + tex_print_char('.'); + tex_print_str(buffer); + } + } + if (unit != no_unit) { + tex_print_unit(unit); + } +} + +/*tex + + Hexadecimal printing of nonnegative integers is accomplished by |print_hex|. We have a few + variants. Because we have bitsets that can give upto |0xFFFFFFFF| we treat the given integer + as an unsigned. +*/ + +// void tex_print_hex(int n) +// { +// /*tex index to current digit; we assume that $0\L n<16^{22}$ */ +// int k = 0 ; +// unsigned char digits[24]; +// do { +// digits[k] = n % 16; +// n = n / 16; +// ++k; +// } while (n != 0); +// tex_print_hexadecimal_digits(digits, k); +// } + +void tex_print_hex(int sn) +{ + unsigned int n = (unsigned int) sn; + int k = 0; + unsigned char digits[24]; + if (n < 0) { + tex_print_char('-'); + n = -n; + } + do { + unsigned char d = (unsigned char) (n % 16); + if (d < 10) { + digits[k] = '0' + d; + } else { + digits[k] = 'A' - 10 + d; + } + n = n / 16; + ++k; + } while (n != 0); + while (k-- > 0) { + tex_print_char(digits[k]); + } +} + +void tex_print_qhex(int n) +{ + tex_print_char('"'); + tex_print_hex(n); +} + +void tex_print_uhex(int n) +{ + tex_print_str("U+"); + if (n < 16) { + tex_print_char('0'); + } + if (n < 256) { + tex_print_char('0'); + } + if (n < 4096) { + tex_print_char('0'); + } + tex_print_hex(n); +} + +/*tex + + Roman numerals are produced by the |print_roman_int| routine. Readers who like puzzles might + enjoy trying to figure out how this tricky code works; therefore no explanation will be given. + Notice that 1990 yields |mcmxc|, not |mxm|. + +*/ + +void tex_print_roman_int(int n) +{ + char mystery[] = "m2d5c2l5x2v5i"; + char *j = (char *) mystery; + int v = 1000; + while (1) { + while (n >= v) { + tex_print_char(*j); + n = n - v; + } + if (n <= 0) { + /*tex nonpositive input produces no output */ + return; + } else { + char *k = j + 2; + int u = v / (*(k - 1) - '0'); + if (*(k - 1) == '2') { + k = k + 2; + u = u / (*(k - 1) - '0'); + } + if (n + u >= v) { + tex_print_char(*k); + n = n + u; + } else { + j = j + 2; + v = v / (*(j - 1) - '0'); + } + } + } +} + +/*tex + + The |print| subroutine will not print a string that is still being created. The following + procedure will. + +*/ + +void tex_print_current_string(void) +{ + for (int j = 0; j < lmt_string_pool_state.string_temp_top; j++) { + tex_print_char(lmt_string_pool_state.string_temp[j++]); + } +} + +/*tex + + The procedure |print_cs| prints the name of a control sequence, given a pointer to its address + in |eqtb|. A space is printed after the name unless it is a single nonletter or an active + character. This procedure might be invoked with invalid data, so it is \quote {extra robust}. + The individual characters must be printed one at a time using |print|, since they may be + unprintable. + +*/ + +void tex_print_cs_checked(halfword p) +{ + if (p == null_cs) { + tex_print_str_esc("csname"); + tex_print_str_esc("endcsname"); + tex_print_char(' '); + } else if (p < hash_base) { + tex_print_str(error_string_impossible(11)); + } else if (p == undefined_control_sequence) { + tex_print_str_esc("undefined"); + tex_print_char(' '); + } else if (eqtb_out_of_range(p)) { + tex_print_str(error_string_impossible(12)); + } else { + strnumber t = cs_text(p); + if (t < 0 || t >= lmt_string_pool_state.string_pool_data.ptr) { + tex_print_str(error_string_nonexistent(13)); + } else if (tex_is_active_cs(t)) { + tex_print_tex_str(active_cs_value(t)); + } else { + tex_print_tex_str_esc(t); + if (! tex_single_letter(t) || (tex_get_cat_code(cat_code_table_par, aux_str2uni(str_string(t))) == letter_cmd)) { + tex_print_char(' '); + } + } + } +} + +/*tex + + Here is a similar procedure; it avoids the error checks, and it never prints a space after the + control sequence. The other one doesn't even print the bogus cs. + +*/ + +void tex_print_cs(halfword p) +{ + if (p == null_cs) { + tex_print_str_esc("csname"); + tex_print_str_esc("endcsname"); + } else { + strnumber t = cs_text(p); + if (tex_is_active_cs(t)) { + tex_print_tex_str(active_cs_value(t)); + } else { + tex_print_tex_str_esc(t); + } + } +} + +void tex_print_cs_name(halfword p) +{ + if (p != null_cs) { + strnumber t = cs_text(p); + if (tex_is_active_cs(t)) { + tex_print_tex_str(active_cs_value(t)); + } else { + tex_print_tex_str(t); + } + } +} + +/*tex + + Then there is a subroutine that prints glue stretch and shrink, possibly followed by the name + of finite units: + +*/ + +void tex_print_glue(scaled d, int order, int unit) +{ + tex_print_dimension(d, no_unit); + if ((order < normal_glue_order) || (order > filll_glue_order)) { + tex_print_str("foul"); + } else if (order > normal_glue_order) { + tex_print_str("fi"); + while (order > fi_glue_order) { + tex_print_char('l'); + --order; + } + } else { + tex_print_unit(unit); + } +} + +/*tex The next subroutine prints a whole glue specification. */ + +void tex_print_unit(int unit) +{ + if (unit != no_unit) { + tex_print_str(unit == pt_unit ? "pt" : "mu"); + } +} + +void tex_print_spec(int p, int unit) +{ + if (p < 0) { + tex_print_char('*'); + } else if (p == 0) { + tex_print_dimension(0, unit); + } else { + tex_print_dimension(glue_amount(p), unit); + if (glue_stretch(p)) { + tex_print_str(" plus "); + tex_print_glue(glue_stretch(p), glue_stretch_order(p), unit); + } + if (glue_shrink(p)) { + tex_print_str(" minus "); + tex_print_glue(glue_shrink(p), glue_shrink_order(p), unit); + } + } +} + +void tex_print_fontspec(int p) +{ + tex_print_int(font_spec_identifier(p)); + if (font_spec_scale(p) != unused_scale_value) { + tex_print_str(" scale "); + tex_print_int(font_spec_scale(p)); + } + if (font_spec_x_scale(p) != unused_scale_value) { + tex_print_str(" xscale "); + tex_print_int(font_spec_x_scale(p)); + } + if (font_spec_y_scale(p) != unused_scale_value) { + tex_print_str(" yscale "); + tex_print_int(font_spec_y_scale(p)); + } +} + +/*tex Math characters: */ + +void tex_print_mathspec(int p) +{ + if (p) { + mathcodeval m = tex_get_math_spec(p); + tex_show_mathcode_value(m, node_subtype(p)); + } else { + tex_print_str("[invalid mathspec]"); + } +} + +/*tex + + We can reinforce our knowledge of the data structures just introduced by considering two + procedures that display a list in symbolic form. The first of these, called |short_display|, is + used in \quotation {overfull box} messages to give the top-level description of a list. The + other one, called |show_node_list|, prints a detailed description of exactly what is in the + data structure. + + The philosophy of |short_display| is to ignore the fine points about exactly what is inside + boxes, except that ligatures and discretionary breaks are expanded. As a result, + |short_display| is a recursive procedure, but the recursion is never more than one level deep. + + A global variable |font_in_short_display| keeps track of the font code that is assumed to be + present when |short_display| begins; deviations from this font will be printed. + + Boxes, rules, inserts, whatsits, marks, and things in general that are sort of \quote + {complicated} are indicated only by printing |[]|. + + We print a bit more than original \TEX. A value of 0 or 1 or any large value will behave the + same as before. The reason for this extension is that a |name| not always makes sense. + + \starttyping + 0 \foo xyz + 1 \foo (bar) + 2 <bar> xyz + 3 <bar @ ..> xyz + 4 <id> + 5 <id: bar> + 6 <id: bar @ ..> xyz + \stoptyping + +*/ + +void tex_print_char_identifier(halfword c) // todo: use string_print_format +{ + if (c <= 0x10FFFF) { + char b[10]; + if ( (c >= 0x00E000 && c <= 0x00F8FF) || (c >= 0x0F0000 && c <= 0x0FFFFF) || + (c >= 0x100000 && c <= 0x10FFFF) || (c >= 0x00D800 && c <= 0x00DFFF) ) { + sprintf(b, "0x%06X", c); + tex_print_str(b); + } else { + sprintf(b, "U+%06X", c); + tex_print_str(b); + tex_print_char(' '); + tex_print_tex_str(c); + } + } +} + +void tex_print_font_identifier(halfword f) +{ + /*tex |< >| is less likely to clash with text parenthesis */ + if (tex_is_valid_font(f)) { + // switch (tracing_fonts_par) { + // case 0: + // case 1: + // if (font_original(f)) { + // tex_print_format(font_original(f)); + // } else { + // tex_print_format("font: %i", f); + // } + // if (tracing_fonts_par == 0) { + // break; + // } else if (font_size(f) == font_design_size(f)) { + // tex_print_format(" (%s)", font_name(f)); + // } else { + // tex_print_format(" (%s @ %D)", font_name(f), font_size(f), pt_unit); + // } + // break; + // case 2: + // tex_print_format("<%s>", font_name(f)); + // break; + // case 3: + // tex_print_format("<%s @ %D>", font_name(f), font_size(f), pt_unit); + // break; + // case 4: + // tex_print_format("<%i>", f); + // break; + // case 5: + // tex_print_format("<%i: %s>", f, font_name(f)); + // break; + // /* case 6: */ + // default: + tex_print_format("<%i: %s @ %D>", f, font_name(f), font_size(f), pt_unit); + // break; + // } + } else { + tex_print_str("<*>"); + } +} + +void tex_print_font_specifier(halfword e) +{ + if (e && tex_is_valid_font(font_spec_identifier(e))) { + tex_print_format("<%i: %i %i %i>", font_spec_identifier(e), font_spec_scale(e), font_spec_x_scale(e), font_spec_y_scale(e)); + } else { + tex_print_str("<*>"); + } +} + +void tex_print_font(halfword f) +{ + if (! f) { + tex_print_str("nullfont"); + } else if (tex_is_valid_font(f)) { + tex_print_str(font_name(f)); + /* if (font_size(f) != font_design_size(f)) { */ + /*tex + Nowadays this check for designsize is rather meaningless so we could as well + always enter this branch. We can even make this while blob a callback. + */ + tex_print_format(" at %D", font_size(f), pt_unit); + /* } */ + } else { + tex_print_str("nofont"); + } +} + +/*tex This prints highlights of list |p|. */ + +void tex_short_display(halfword p) +{ + tex_print_levels(); + if (p) { + tex_print_short_node_contents(p); + } else { + tex_print_str("empty list"); + } +} + +/*tex This prints token list data in braces. */ + +void tex_print_token_list(const char *s, halfword p) +{ + tex_print_levels(); + tex_print_str(".."); + if (s) { + tex_print_str(s); + tex_print_char(' '); + } + tex_print_char('{'); + if ((p >= 0) && (p <= (int) lmt_token_memory_state.tokens_data.top)) { + tex_show_token_list(p, null, default_token_show_max, 0); + } else { + tex_print_str(error_string_clobbered(21)); + } + tex_print_char('}'); +} + +/*tex This prints dimensions of a rule node. */ + +void tex_print_rule_dimen(scaled d) +{ + if (d == null_flag) { + tex_print_char('*'); + } else { + tex_print_dimension(d, pt_unit); + } +} + +/*tex + + Since boxes can be inside of boxes, |show_node_list| is inherently recursive, up to a given + maximum number of levels. The history of nesting is indicated by the current string, which + will be printed at the beginning of each line; the length of this string, namely |cur_length|, + is the depth of nesting. + + A global variable called |depth_threshold| is used to record the maximum depth of nesting for + which |show_node_list| will show information. If we have |depth_threshold = 0|, for example, + only the top level information will be given and no sublists will be traversed. Another global + variable, called |breadth_max|, tells the maximum number of items to show at each level; + |breadth_max| had better be positive, or you won't see anything. + + The maximum nesting depth in box displays is kept in |depth_threshold| and the maximum number + of items shown at the same list level in |breadth_max|. + + The recursive machinery is started by calling |show_box|. Assign the values |depth_threshold := + show_box_depth| and |breadth_max := show_box_breadth| + +*/ + +void tex_show_box(halfword p) +{ + /*tex the show starts at |p| */ + tex_show_node_list(p, show_box_depth_par, show_box_breadth_par); + tex_print_ln(); +} + +/*tex + + \TEX\ is occasionally supposed to print diagnostic information that goes only into the + transcript file, unless |tracing_online| is positive. Here are two routines that adjust the + destination of print commands: + +*/ + +void tex_begin_diagnostic(void) +{ + lmt_print_state.saved_selector = lmt_print_state.selector; + if ((tracing_online_par <= 0) && (lmt_print_state.selector == terminal_and_logfile_selector_code)) { + lmt_print_state.selector = logfile_selector_code; + if (lmt_error_state.history == spotless) { + lmt_error_state.history = warning_issued; + } + } + tex_print_levels(); +} + +/*tex Restore proper conditions after tracing. */ + +void tex_end_diagnostic(void) +{ + tex_print_nlp(); + lmt_print_state.selector = lmt_print_state.saved_selector; +} + +static void tex_print_padding(void) +{ + switch (lmt_print_state.selector) { + case terminal_selector_code: + if (! odd(lmt_print_state.terminal_offset)) { + tex_print_char(' '); + } + break; + case logfile_selector_code: + case terminal_and_logfile_selector_code: + if (! odd(lmt_print_state.logfile_offset)) { + tex_print_char(' '); + } + break; + case luabuffer_selector_code: + break; + } +} + +void tex_print_levels(void) +{ + int l0 = tracing_levels_par; + tex_print_nlp(); + if (l0 > 0) { + int l1 = (l0 & 0x01) == tracing_levels_group; + int l2 = (l0 & 0x02) == tracing_levels_input; + int l4 = (l0 & 0x04) == tracing_levels_catcodes; + if (l1) { + tex_print_int(cur_level); + tex_print_char(':'); + } + if (l2) { + tex_print_int(lmt_input_state.input_stack_data.ptr); + tex_print_char(':'); + } + if (l4) { + tex_print_int(cat_code_table_par); + tex_print_char(':'); + } + if (l1 || l2 || l4) { + tex_print_char(' '); + } + tex_print_padding(); + } +} + +/* maybe %GROUP% where we scan upto [UPPER][%], so %G and %GR are also is ok + + shared with error messages, so at some point we will merge: + + %c int char + %s *char string + %q *char 'string' + %i int integer + %e backslash (tex escape) + %C int int symbolic representation of cmd chr + %E *char \cs + %S int tex cs string + %M int mode + %T int tex string + %% percent + + specific for print (I need to identify the rest) + + ! %U int unicode + ! %D int dimension + + ! %B int badness + ! %G int group + + ! %L int (if) linenumber + +*/ + +extern void tex_print_format(const char *format, ...) +{ + va_list args; + va_start(args, format); /* hm, weird, no number */ + while (1) { + int chr = *format++; + switch (chr) { + case '\0': + goto DONE; + case '%': + { + chr = *format++; + switch (chr) { + case '\0': + goto DONE; + case 'c': + tex_print_char(va_arg(args, int)); + break; + case 'e': + tex_print_str_esc(NULL); + break; + case 'i': + tex_print_int(va_arg(args, int)); + break; + case 'l': + tex_print_levels(); + break; + case 'n': + tex_print_extended_subtype(null, (quarterword) va_arg(args, int)); + break; + case 'm': + tex_print_cs_checked(va_arg(args, int)); + break; + case 's': + tex_print_str(va_arg(args, char *)); + break; + case 'q': + tex_print_char('\''); + tex_print_str(va_arg(args, char *)); + tex_print_char('\''); + break; + case 'x': + tex_print_qhex(va_arg(args, int)); + break; + /* + case 'u': + tex_print_unit(va_arg(args, int)); + break; + */ + case 'B': /* badness */ + { + scaled b = va_arg(args, halfword); + if (b == awful_bad) { + tex_print_char('*'); + } else { + tex_print_int(b); + } + break; + } + case 'C': + { + int cmd = va_arg(args, int); + int val = va_arg(args, int); + tex_print_cmd_chr((singleword) cmd, val); /* inlining doesn't work */ + break; + } + case 'D': /* dimension */ + { + scaled s = va_arg(args, scaled); + int u = va_arg(args, int); + tex_print_dimension(s, u); + break; + } + case 'E': + tex_print_str_esc(va_arg(args, char *)); + break; + case 'G': + { + halfword g = va_arg(args, int); + tex_print_group(g); + break; + } + case 'F': + { + halfword i = va_arg(args, int); + tex_print_font_identifier(i); + break; + } + case 'L': + { + /* typically used for if line */ + halfword line = va_arg(args, int); + if (line) { + tex_print_str(" entered on line "); + tex_print_int(line); + } + break; + } + case 'M': + { + halfword mode = va_arg(args, int); + tex_print_str(tex_string_mode(mode)); + break; + } + case 'P': + { + scaled total = va_arg(args, int); + scaled stretch = va_arg(args, int); + scaled filstretch = va_arg(args, int); + scaled fillstretch = va_arg(args, int); + scaled filllstretch = va_arg(args, int); + scaled shrink= va_arg(args, int); + tex_print_dimension(total, pt_unit); + if (stretch) { + tex_print_str(" plus "); + tex_print_dimension(stretch, pt_unit); + } else if (filstretch) { + tex_print_str(" plus "); + tex_print_dimension(filstretch, no_unit); + tex_print_str(" fil"); + } else if (fillstretch) { + tex_print_str(" plus "); + tex_print_dimension(fillstretch, no_unit); + tex_print_str(" fill"); + } else if (filllstretch) { + tex_print_str(" plus "); + tex_print_dimension(fillstretch, no_unit); + tex_print_str(" filll"); + } + if (shrink) { + tex_print_str(" minus "); + tex_print_dimension(shrink, pt_unit); + } + break; + } + case 'S': + { + halfword cs = va_arg(args, int); + tex_print_cs(cs); + break; + } + case 'T': + { + strnumber s = va_arg(args, int); + tex_print_tex_str(s); + break; + } + case 'U': + { + halfword c = va_arg(args, int); + tex_print_uhex(c); + break; + } + case '%': + tex_print_char('%'); + break; + // case '[': + // tex_begin_diagnostic(); + // tex_print_char('['); + // break; + // case ']': + // tex_print_char(']'); + // tex_end_diagnostic(); + // break; + default: + /* ignore bad one */ + break; + } + } + break; + default: + tex_print_char(chr); /* todo: utf */ + break; + } + } + DONE: + va_end(args); +} + +/*tex + + Group codes were introcued in \ETEX\ but have been extended in the meantime in \LUATEX\ and + later again in \LUAMETATEX. We might have (even) more granularity in the future. + + Todo: combine this with an array of struct(id,name,lua) ... a rainy day + stack of new cd's job. + +*/ + +void tex_print_group(int e) +{ + int line = tex_saved_line_at_level(); + tex_print_str(lmt_interface.group_code_values[cur_group].name); + if (cur_group != bottom_level_group) { + tex_print_str(" group"); + if (line) { + tex_print_str(e ? " entered at line " : " at line "); + tex_print_int(line); + } + } +} + +void tex_print_message(const char *s) +{ + tex_print_nlp(); + tex_print_char('('); + tex_print_str(s); + tex_print_char(')'); + tex_print_nlp(); +} diff --git a/source/luametatex/source/tex/texprinting.h b/source/luametatex/source/tex/texprinting.h new file mode 100644 index 000000000..61b7e45a2 --- /dev/null +++ b/source/luametatex/source/tex/texprinting.h @@ -0,0 +1,133 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_PRINTING_H +# define LMT_PRINTING_H + +typedef enum selector_settings { + no_print_selector_code, /*tex |selector| setting that makes data disappear */ + terminal_selector_code, /*tex printing is destined for the terminal only */ + logfile_selector_code, /*tex printing is destined for the transcript file only */ + terminal_and_logfile_selector_code, /*tex normal |selector| setting */ + pseudo_selector_code, /*tex special |selector| setting for |show_context| */ + new_string_selector_code, /*tex printing is deflected to the string pool */ + luabuffer_selector_code, +} selector_settings; + +typedef struct print_state_info { + FILE *logfile; + char *loggable_info; + int selector; + int terminal_offset; + int logfile_offset; + int new_string_line; + int tally; + unsigned char trick_buffer[max_error_line + 1]; /* padded */ + int trick_count; + int first_count; + int saved_selector; + int font_in_short_display; /*tex an internal font number */ + FILE *saved_logfile; + int saved_logfile_offset; +} print_state_info; + +extern print_state_info lmt_print_state; + +typedef enum spec_units { + no_unit, + pt_unit, + mu_unit, +} spec_units; + +/*tex + Some of these can go away because we stepwise implement usage of |tex_print_format| instead of + a multitude of specific calls. It's one of these thing I do when I'm bored. + + todo : check tex_print_ln + todo : check tex_print_nl + todo : check tex_print_str_nl + +*/ + +extern void tex_print_ln (void); /* always forces a newline */ +extern void tex_print_char (int s); +extern void tex_print_tex_str (int s); +extern void tex_print_tex_str_esc (strnumber s); +extern void tex_print_nlp (void); /* flushes a line if we're doing one */ +extern void tex_print_banner (void); +extern void tex_print_log_banner (void); +extern void tex_print_version_banner (void); +////// void tex_print_digits (const unsigned char *digits, int k); +extern void tex_print_int (int n); +extern void tex_print_hex (int n); +extern void tex_print_uhex (int n); +extern void tex_print_qhex (int n); +extern void tex_print_roman_int (int n); +extern void tex_print_current_string (void); +extern void tex_print_cs_checked (halfword p); /*tex Also does the |IMPOSSIBLE| etc. */ +extern void tex_print_cs (halfword p); /*tex Only does the undefined case. */ +extern void tex_print_cs_name (halfword p); /*tex Only prints known ones. */ +extern void tex_print_str (const char *s); +extern void tex_print_str_esc (const char *s); +extern void tex_print_dimension (scaled d, int unit); /*tex prints a dimension with pt */ +extern void tex_print_sparse_dimension (scaled d, int unit); /*tex prints a dimension with pt */ +extern void tex_print_unit (int unit); /*tex prints a glue component */ +extern void tex_print_glue (scaled d, int order, int unit); /*tex prints a glue component */ +extern void tex_print_spec (int p, int unit); /*tex prints a glue specification */ +extern void tex_print_fontspec (int p); +extern void tex_print_mathspec (int p); +extern void tex_print_font_identifier (halfword f); +extern void tex_print_font_specifier (halfword e); /*tex this is an eq table entry */ +extern void tex_print_font (halfword f); +extern void tex_print_char_identifier (halfword c); +extern void tex_print_token_list (const char *s, halfword p); /*tex prints token list data in braces */ +extern void tex_print_rule_dimen (scaled d); /*tex prints dimension in rule node */ +extern void tex_print_group (int e); +extern void tex_print_format (const char *format, ...); /*tex similar to the one we use for errors */ +extern void tex_begin_diagnostic (void); +extern void tex_print_levels (void); +extern void tex_end_diagnostic (void); +extern void tex_show_box (halfword p); +extern void tex_short_display (halfword p); /*tex prints highlights of list |p| */ + +extern void tex_print_message (const char *s); + + +/* +# define single_letter(A) \ + ((str_length(A)==1)|| \ + ((str_length(A)==4)&&*(str_string(A))>=0xF0)|| \ + ((str_length(A)==3)&&*(str_string(A))>=0xE0)|| \ + ((str_length(A)==2)&&*(str_string(A))>=0xC0)) + +# define is_active_cs(a) \ + (a && str_length(a)>3 && \ + ( *str_string(a) == 0xEF) && \ + (*(str_string(a)+1) == 0xBF) && \ + (*(str_string(a)+2) == 0xBF)) + +*/ + +inline static int tex_single_letter(strnumber s) +{ + return ( + (str_length(s) == 1) + || ( (str_length(s) == 4) && *(str_string(s) ) >= 0xF0) + || ( (str_length(s) == 3) && *(str_string(s) ) >= 0xE0) + || ( (str_length(s) == 2) && *(str_string(s) ) >= 0xC0) + ); +} + +inline static int tex_is_active_cs(strnumber s) +{ + if (s && str_length(s) > 3) { + const unsigned char *ss = str_string(s); + return (ss[0] == 0xEF) && (ss[1] == 0xBF) && (ss[2] == 0xBF); + } else { + return 0; + } +} +# define active_cs_value(A) aux_str2uni((str_string((A))+3)) + +# endif diff --git a/source/luametatex/source/tex/texrules.c b/source/luametatex/source/tex/texrules.c new file mode 100644 index 000000000..1a3040585 --- /dev/null +++ b/source/luametatex/source/tex/texrules.c @@ -0,0 +1,248 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +halfword tex_aux_scan_rule_spec(rule_types t, halfword s) +{ + /*tex |width|, |depth|, and |height| all equal |null_flag| now */ + halfword rule = tex_new_rule_node((quarterword) s); + halfword attr = node_attr(rule); + switch (t) { + case h_rule_type: + rule_height(rule) = default_rule; + rule_depth(rule) = 0; + break; + case v_rule_type: + case m_rule_type: + if (s == strut_rule_code) { + rule_width(rule) = 0; + node_subtype(rule) = strut_rule_subtype; + rule_height(rule) = null_flag; + rule_depth(rule) = null_flag; + } else { + rule_width(rule) = default_rule; + } + break; + } + while (1) { + /*tex + Maybe: + + h : "whdxylrWHDXYLR" + v : "whdxytbWHDXYTB" + m : "whdxylrtbWHDXYLRTB" + + but for now we are tolerant because internally it's left/right anyway. + + */ + switch (tex_scan_character("awhdxylrtbcfAWHDXYLRTBCF", 0, 1, 0)) { + case 0: + goto DONE; + case 'a': case 'A': + if (tex_scan_mandate_keyword("attr", 1)) { + halfword i = tex_scan_attribute_register_number(); + halfword v = tex_scan_int(1, NULL); + if (eq_value(register_attribute_location(i)) != v) { + if (attr) { + attr = tex_patch_attribute_list(attr, i, v); + } else { + attr = tex_copy_attribute_list_set(tex_current_attribute_list(), i, v); + } + } + } + break; + case 'w': case 'W': + if (tex_scan_mandate_keyword("width", 1)) { + rule_width(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'h': case 'H': + if (tex_scan_mandate_keyword("height", 1)) { + rule_height(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'd': case 'D': + if (tex_scan_mandate_keyword("depth", 1)) { + rule_depth(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'l': case 'L': + if (tex_scan_mandate_keyword("left", 1)) { + rule_left(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'r': case 'R': + if (tex_scan_mandate_keyword("right", 1)) { + rule_right(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 't': case 'T': /* just because it's nicer */ + if (tex_scan_mandate_keyword("top", 1)) { + rule_left(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'b': case 'B': /* just because it's nicer */ + if (tex_scan_mandate_keyword("bottom", 1)) { + rule_right(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'x': case 'X': + if (tex_scan_mandate_keyword("xoffset", 1)) { + rule_x_offset(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'y': case 'Y': + if (tex_scan_mandate_keyword("yoffset", 1)) { + rule_y_offset(rule) = tex_scan_dimen(0, 0, 0, 0, NULL); + } + break; + case 'f': case 'F': + switch (tex_scan_character("aoAO", 0, 0, 0)) { + case 'o': case 'O': + if (tex_scan_mandate_keyword("font", 2)) { + tex_set_rule_font(rule, tex_scan_font_identifier(NULL)); + } + break; + case 'a': case 'A': + if (tex_scan_mandate_keyword("fam", 2)) { + tex_set_rule_family(rule, tex_scan_math_family_number()); + } + break; + default: + tex_aux_show_keyword_error("font|fam"); + goto DONE; + } + break; + case 'c': case 'C': + if (tex_scan_mandate_keyword("char", 1)) { + rule_character(rule) = tex_scan_char_number(0); + } + break; + default: + goto DONE; + } + } + DONE: + node_attr(rule) = attr; + if (t == v_rule_type && s == strut_rule_code) { + tex_aux_check_text_strut_rule(rule, text_style); + } + return rule; +} + +void tex_aux_run_vrule(void) +{ + tex_tail_append(tex_aux_scan_rule_spec(v_rule_type, cur_chr)); + cur_list.space_factor = 1000; +} + +void tex_aux_run_hrule(void) +{ + tex_tail_append(tex_aux_scan_rule_spec(h_rule_type, cur_chr)); + cur_list.prev_depth = ignore_depth; +} + +void tex_aux_run_mrule(void) +{ + tex_tail_append(tex_aux_scan_rule_spec(m_rule_type, cur_chr)); +} + +void tex_aux_check_math_strut_rule(halfword rule, halfword style) +{ + if (node_subtype(rule) == strut_rule_subtype) { + scaled ht = rule_height(rule); + scaled dp = rule_depth(rule); + if (ht == null_flag || dp == null_flag) { + halfword fnt = tex_get_rule_font(rule, style); + halfword chr = rule_character(rule); + if (fnt > 0 && chr && tex_char_exists(fnt, chr)) { + if (ht == null_flag) { + ht = tex_math_font_char_ht(fnt, chr, style); + } + if (dp == null_flag) { + dp = tex_math_font_char_dp(fnt, chr, style); + } + } else { + if (ht == null_flag) { + ht = tex_get_math_y_parameter(style, math_parameter_rule_height); + } + if (dp == null_flag) { + dp = tex_get_math_y_parameter(style, math_parameter_rule_depth); + } + } + rule_height(rule) = ht; + rule_depth(rule) = dp; + } + } +} + +void tex_aux_check_text_strut_rule(halfword rule, halfword style) +{ + if (node_subtype(rule) == strut_rule_subtype) { + scaled ht = rule_height(rule); + scaled dp = rule_depth(rule); + if (ht == null_flag || dp == null_flag) { + halfword fnt = tex_get_rule_font(rule, style); + halfword chr = rule_character(rule); + if (fnt > 0 && chr && tex_char_exists(fnt, chr)) { + if (ht == null_flag) { + ht = tex_char_height_from_font(fnt, chr); + } + if (dp == null_flag) { + dp = tex_char_depth_from_font(fnt, chr); + } + } + rule_height(rule) = ht; + rule_depth(rule) = dp; + } + } +} + +halfword tex_get_rule_font(halfword n, halfword style) +{ + halfword fnt = rule_font(n); + if (fnt > rule_font_fam_offset) { + halfword fam = fnt - rule_font_fam_offset; + if (fam_par_in_range(fam)) { + fnt = tex_fam_fnt(fam, tex_size_of_style(style)); + } + } + if (fnt < 0 || fnt >= max_n_of_fonts) { + return null_font; + } else { + return fnt; + } +} + +halfword tex_get_rule_family(halfword n) +{ + halfword fnt = rule_font(n); + if (fnt > rule_font_fam_offset) { + halfword fam = fnt - rule_font_fam_offset; + if (fam_par_in_range(fam)) { + return fam; + } + } + return 0; +} + +void tex_set_rule_font(halfword n, halfword fnt) +{ + if (fnt < 0 || fnt >= rule_font_fam_offset) { + rule_font(n) = 0; + } else { + rule_font(n) = fnt; + } +} + +void tex_set_rule_family(halfword n, halfword fam) +{ + if (fam < 0 || fam >= max_n_of_math_families) { + rule_font(n) = rule_font_fam_offset; + } else { + rule_font(n) = rule_font_fam_offset + fam; + } +} + diff --git a/source/luametatex/source/tex/texrules.h b/source/luametatex/source/tex/texrules.h new file mode 100644 index 000000000..8a01ac847 --- /dev/null +++ b/source/luametatex/source/tex/texrules.h @@ -0,0 +1,27 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXRULES_H +# define LMT_TEXRULES_H + +typedef enum rule_types { + h_rule_type = 0, + v_rule_type = 1, + m_rule_type = 2, +} rule_types; + +extern halfword tex_aux_scan_rule_spec (rule_types t, halfword s); +extern void tex_aux_run_vrule (void); +extern void tex_aux_run_hrule (void); +extern void tex_aux_run_mrule (void); + +extern void tex_aux_check_text_strut_rule (halfword rule, halfword style); +extern void tex_aux_check_math_strut_rule (halfword rule, halfword style); + +extern halfword tex_get_rule_font (halfword n, halfword style); +extern halfword tex_get_rule_family (halfword n); +extern void tex_set_rule_font (halfword n, halfword fnt); +extern void tex_set_rule_family (halfword n, halfword fam); + +# endif diff --git a/source/luametatex/source/tex/texscanning.c b/source/luametatex/source/tex/texscanning.c new file mode 100644 index 000000000..8f2dfa050 --- /dev/null +++ b/source/luametatex/source/tex/texscanning.c @@ -0,0 +1,5760 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +static void tex_aux_scan_expr (halfword level); +static void tex_aux_scan_expression (int level); + +/*tex + A helper. +*/ + +inline void tex_push_back(halfword tok, halfword cmd, halfword chr) +{ + if (cmd != spacer_cmd && tok != deep_frozen_relax_token && ! (cmd == relax_cmd && chr == no_relax_code)) { + tex_back_input(tok); + } +} + +/*tex + + Let's turn now to some procedures that \TEX\ calls upon frequently to digest certain kinds of + patterns in the input. Most of these are quite simple; some are quite elaborate. Almost all of + the routines call |get_x_token|, which can cause them to be invoked recursively. + + The |scan_left_brace| routine is called when a left brace is supposed to be the next non-blank + token. (The term \quote {left brace} means, more precisely, a character whose catcode is + |left_brace|.) \TEX\ allows |\relax| to appear before the |left_brace|. + +*/ + +/* This reads a mandatory |left_brace|: */ + +void tex_scan_left_brace(void) +{ + /*tex Get the next non-blank non-relax non-call token */ + while(1) { + tex_get_x_token(); + switch (cur_cmd) { + case spacer_cmd: + case relax_cmd: + /* stay in while */ + break; + case left_brace_cmd: + /* we found one */ + return; + default: + /* we recover */ + tex_handle_error( + back_error_type, + "Missing { inserted", + "A left brace was mandatory here, so I've put one in." + ); + cur_tok = left_brace_token + '{'; + cur_cmd = left_brace_cmd; + cur_chr = '{'; + ++lmt_input_state.align_state; + return; + } + } +} + +/*tex + + The |scan_optional_equals| routine looks for an optional |=| sign preceded by optional spaces; + |\relax| is not ignored here. + +*/ + +void tex_scan_optional_equals(void) +{ + /*tex Get the next non-blank non-call token. */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_tok != equal_token) { + tex_back_input(cur_tok); + } +} + +/*tex + + Here is a procedure that sounds an alarm when mu and non-mu units are being switched. + +*/ + +static void tex_aux_mu_error(int n) +{ + tex_handle_error( + normal_error_type, + "Incompatible glue units (case %i)", + n, + "I'm going to assume that 1mu=1pt when they're mixed." + ); +} + +/*tex + + The next routine |scan_something_internal| is used to fetch internal numeric quantities like + |\hsize|, and also to handle the |\the| when expanding constructions like |\the\toks0| and + |\the\baselineskip|. Soon we will be considering the |scan_int| procedure, which calls + |scan_something_internal|; on the other hand, |scan_something_internal| also calls |scan_int|, + for constructions like |\catcode\`\$| or |\fontdimen 3 \ff|. So we have to declare |scan_int| + as a |forward| procedure. A few other procedures are also declared at this point. + + \TEX\ doesn't know exactly what to expect when |scan_something_internal| begins. For example, + an integer or dimension or glue value could occur immediately after |\hskip|; and one can even + say |\the} with respect to token lists in constructions like |\xdef\o{\the\output}|. On the + other hand, only integers are allowed after a construction like |\count|. To handle the various + possibilities, |scan_something_internal| has a |level| parameter, which tells the \quote + {highest} kind of quantity that |scan_something_internal| is allowed to produce. Seven levels + are distinguished, namely |int_val|, |attr_val|, |dimen_val|, |glue_val|, |mu_val|, |tok_val| + and |ident_val|. + + The output of |scan_something_internal| (and of the other routines |scan_int|, |scan_dimen|, + and |scan_glue| below) is put into the global variable |cur_val|, and its level is put into + |cur_val_level|. The highest values of |cur_val_level| are special: |mu_val| is used only when + |cur_val| points to something in a \quote {muskip} register, or to one of the three parameters + |\thinmuskip|, |\medmuskip|, |\thickmuskip|; |ident_val| is used only when |cur_val| points to + a font identifier; |tok_val| is used only when |cur_val| points to |null| or to the reference + count of a token list. The last two cases are allowed only when |scan_something_internal| is + called with |level = tok_val|. + + If the output is glue, |cur_val| will point to a glue specification, and the reference count + of that glue will have been updated to reflect this reference; if the output is a nonempty + token list, |cur_val| will point to its reference count, but in this case the count will not + have been updated. Otherwise |cur_val| will contain the integer or scaled value in question. + +*/ + +scanner_state_info lmt_scanner_state = { + .current_cmd = 0, + .current_chr = 0, + .current_cs = 0, + // .current_flag = 0, + .current_tok = 0, + .current_val = 0, + .current_val_level = 0, + .current_box = 0, + .last_cs_name = 0, + .arithmic_error = 0, + .expression_depth = 0, +}; + +/*tex + + When a |glue_val| changes to a |dimen_val|, we use the width component of the glue; there is no + need to decrease the reference count, since it has not yet been increased. When a |dimen_val| + changes to an |int_val|, we use scaled points so that the value doesn't actually change. And + when a |mu_val| changes to a |glue_val|, the value doesn't change either. + + In \LUATEX\ we don't share glue but we have copies, so there is no need to mess with the + reference count and downgrading. + +*/ + +inline static void tex_aux_downgrade_cur_val(int level, int succeeded, int negative) +{ + switch (cur_val_level) { + case tok_val_level: + case font_val_level: + case mathspec_val_level: + case fontspec_val_level: + /*tex + This test pays back as this actually happens, but we also need it for the + |none_lua_function| handling. We end up here in |ident_val_level| and |tok_val_level| + and they don't downgrade, nor negate which saves a little testing. + */ + break; + // case int_val_level: + // case attr_val_level: + // case dimen_val_level: + // while (cur_val_level > level) { + // --cur_val_level; + // } + // if (negative) { + // negate(cur_val); + // } + // break; + // case glue_val_level: + // case mu_val_level: + // while (cur_val_level > level) { + // tex_aux_downgrade_cur_val(); /* cleaner is inline */ + // } + // if (succeeded == 1) { + // cur_val = new_glue_spec_node(cur_val); + // } + // if (negative) { + // negate(glue_amount(cur_val)); + // negate(glue_stretch(cur_val)); + // negate(glue_shrink(cur_val)); + // } + // break; + // default: + // /* this can't happen */ + // return 0; + default: + /*tex There is no real need for it being a loop, a test would do. */ + while (cur_val_level > level) { + /*tex Convert |cur_val| to a lower level. */ + switch (cur_val_level) { + case glue_val_level: + cur_val = glue_amount(cur_val); + break; + case mu_val_level : + tex_aux_mu_error(1); + break; + } + --cur_val_level; + } + if (cur_val_level == glue_val_level || cur_val_level == mu_val_level) { + if (succeeded == 1) { + cur_val = tex_new_glue_spec_node(cur_val); + } + if (negative) { + glue_amount(cur_val) = -glue_amount(cur_val); + glue_stretch(cur_val) = -glue_stretch(cur_val); + glue_shrink(cur_val) = -glue_shrink(cur_val); + } + } else if (negative) { + cur_val = -cur_val; + } + break; + } +} + +/*tex + + Some of the internal items can be fetched both routines, and these have been split off into the + next routine, that returns true if the command code was understood. + +*/ + +/*tex + + The |last_item_cmd| branch has been flattened a bit because we don't need to treat \ETEX\ + specific thingies special any longer. + +*/ + +static void tex_aux_set_cur_val_by_lua_value_cmd(halfword index, halfword property) +{ + int class = lua_value_none_code; + halfword value = 0; /* can also be scaled */ + strnumber u = tex_save_cur_string(); + lmt_token_state.luacstrings = 0; + class = lmt_function_call_by_class(index, property, &value); + switch (class) { + case lua_value_none_code: + cur_val_level = no_val_level; + break; + case lua_value_integer_code: + case lua_value_cardinal_code: + cur_val_level = int_val_level; + break; + case lua_value_dimension_code: + cur_val_level = dimen_val_level; + break; + case lua_value_skip_code: + cur_val_level = glue_val_level; + break; + case lua_value_boolean_code: + /*tex For usage with |\ifboolean| */ + value = value ? 1 : 0; + cur_val_level = int_val_level; + break; + case lua_value_float_code: + /*tex We assume a proper print back. */ + cur_val_level = tok_val_level; + break; + case lua_value_string_code: + cur_val_level = no_val_level; + break; + case lua_value_node_code: + case lua_value_direct_code: + if (value) { + switch (node_type(value)) { + case hlist_node: + case vlist_node: + case whatsit_node: + case rule_node: + cur_val_level = list_val_level; + break; + default: + /* maybe a warning */ + value = null; + cur_val_level = no_val_level; + break; + } + } else { + value = null; + cur_val_level = no_val_level; + } + break; + default: + cur_val_level = no_val_level; + break; + } + cur_val = value; + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } +} + +halfword tex_scan_lua_value(int index) +{ + tex_aux_set_cur_val_by_lua_value_cmd(index, 0); + return cur_val_level; +} + +static halfword tex_aux_scan_register_index(void) +{ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + switch (cur_cmd) { + case register_toks_cmd : return cur_chr - register_toks_base; + case register_int_cmd : return cur_chr - register_int_base; + case register_dimen_cmd : return cur_chr - register_dimen_base; + case register_attribute_cmd: return cur_chr - register_attribute_base; + case register_glue_cmd : return cur_chr - register_glue_base; + case register_mu_glue_cmd : return cur_chr - register_mu_glue_base; + case char_given_cmd : return cur_chr; + case integer_cmd : return cur_chr; + default : return -1; + } +} + +static halfword tex_aux_scan_character_index(void) +{ + halfword result = -1; + tex_get_token(); + if (cur_tok < cs_token_flag) { + result = cur_chr; + } else if (cur_cmd == char_given_cmd) { + result = cur_chr; + } else { + strnumber txt = cs_text(cur_tok - cs_token_flag); + if (tex_single_letter(txt)) { + result = aux_str2uni(str_string(txt)); + } else if (tex_is_active_cs(txt)) { + result = active_cs_value(txt); + } else { + result = max_character_code + 1; + } + } + return result > max_character_code ? -1 : result; +} + +/* + Fetch an item in the current node, if appropriate. Here is where |\last*| |\ |, and some more + are implemented. The reference count for |\lastskip| will be updated later. We also handle + |\inputlineno| and |\badness| here, because they are legal in similar contexts. In the follow + up engines much more than these are handled here. +*/ + +static int tex_aux_set_cur_val_by_some_cmd(int code) +{ + switch (code) { + case lastpenalty_code: + cur_val_level = int_val_level; + goto COMMON; + case lastkern_code: + cur_val_level = dimen_val_level; + goto COMMON; + case lastskip_code: + cur_val_level = glue_val_level; + goto COMMON; + case lastboundary_code: + cur_val_level = int_val_level; + COMMON: + { + cur_val = 0; + if (cur_list.tail != contribute_head && ! (cur_list.tail && node_type(cur_list.tail) == glyph_node) && cur_list.mode != nomode) { + switch (code) { + case lastpenalty_code: + if (node_type(cur_list.tail) == penalty_node) { + cur_val = penalty_amount(cur_list.tail); + } + break; + case lastkern_code: + if (node_type(cur_list.tail) == kern_node) { + cur_val = kern_amount(cur_list.tail); + } + break; + case lastskip_code: + if (node_type(cur_list.tail) == glue_node) { + cur_val = cur_list.tail; + if (node_subtype(cur_list.tail) == mu_glue) { + cur_val_level = mu_val_level; + } + } + break; /* should we return 1 ? */ + case lastboundary_code: + if (node_type(cur_list.tail) == boundary_node && node_subtype(cur_list.tail) == user_boundary) { + cur_val = boundary_data(cur_list.tail); + } + break; + } + } else if (cur_list.mode == vmode && cur_list.tail == cur_list.head) { + switch (code) { + case lastpenalty_code: + cur_val = lmt_page_builder_state.last_penalty; + break; + case lastkern_code: + cur_val = lmt_page_builder_state.last_kern; + break; + case lastskip_code: + if (lmt_page_builder_state.last_glue != max_halfword) { + cur_val = lmt_page_builder_state.last_glue; + } + break; /* should we return 1 ? */ + case lastboundary_code: + cur_val = lmt_page_builder_state.last_boundary; + break; + } + } + break; + } + case last_node_type_code: + /*tex + We have mode nodes and when the mode parameter is set we report the real numbers. + This is a bit messy. + */ + { + cur_val_level = int_val_level; + if (cur_list.tail != contribute_head && cur_list.mode != nomode) { + cur_val = node_type(cur_list.tail); + } else if (cur_list.mode == vmode && cur_list.tail == cur_list.head) { + cur_val = lmt_page_builder_state.last_node_type; + } else if (cur_list.tail == cur_list.head || cur_list.mode == nomode) { + cur_val = -1; + } else { + cur_val = node_type(cur_list.tail); + } + break; + } + case last_node_subtype_code: + { + cur_val_level = int_val_level; + if (cur_list.tail != contribute_head && cur_list.mode != nomode) { + cur_val = node_subtype(cur_list.tail); + } else if (cur_list.mode == vmode && cur_list.tail == cur_list.head) { + cur_val = lmt_page_builder_state.last_node_subtype; + } else if (cur_list.tail == cur_list.head || cur_list.mode == nomode) { + cur_val = -1; + } else { + cur_val = node_subtype(cur_list.tail); + } + break; + } + case input_line_no_code: + cur_val = lmt_input_state.input_line; + cur_val_level = int_val_level; + break; + case badness_code: + cur_val = lmt_packaging_state.last_badness; + cur_val_level = int_val_level; + break; + case overshoot_code: + cur_val = lmt_packaging_state.last_overshoot; + cur_val_level = dimen_val_level; + break; + case luatex_version_code: + cur_val = lmt_version_state.version; + cur_val_level = int_val_level; + break; + case luatex_revision_code: + cur_val = lmt_version_state.revision; + cur_val_level = int_val_level; + break; + case current_group_level_code: + cur_val = cur_level - level_one; + cur_val_level = int_val_level; + break; + case current_group_type_code: + cur_val = cur_group; + cur_val_level = int_val_level; + break; + case current_if_level_code: + { + halfword q = lmt_condition_state.cond_ptr; + cur_val = 0; + while (q) { + ++cur_val; + q = node_next(q); + } + cur_val_level = int_val_level; + break; + } + case current_if_type_code: + { + /*tex + We have more conditions than standard \TEX\ and \ETEX\ and the order is also somewhat + different. One problem is that in \ETEX\ a zero means \quotation {not in an test}, so + we're one off! Not that it matters much as this feature is probably never really used, + but we kept if for compatibility reasons. But it's gone now ... as ususl with some + sentiment as it was nicely abstracted cleaned up code. + */ + cur_val = lmt_condition_state.cond_ptr ? (lmt_condition_state.cur_if - first_real_if_test_code) : -1; + cur_val_level = int_val_level; + break; + } + case current_if_branch_code: + { + switch (lmt_condition_state.if_limit) { + case if_code: + cur_val = 0; + break; + case fi_code: + cur_val = -1; + break; + case else_code: + case or_code: + cur_val = 1; + break; + default: + cur_val = 0; + break; + } + cur_val_level = int_val_level; + break; + } + case glue_stretch_order_code: + case glue_shrink_order_code: + { + /*TeX + Not that we need it but \LUATEX\ now has |\eTeXglue..order|. In \CONTEXT\ we're + not using the internal codes anyway (or symbolic constants). In \LUATEX\ there + is some \ETEX\ related shifting but we don't do that here. + */ + halfword q = tex_scan_glue(glue_val_level, 0); + cur_val = (code == glue_stretch_order_code) ? glue_stretch_order(q) : glue_shrink_order(q); + tex_flush_node(q); + cur_val_level = int_val_level; + break; + } + case font_id_code: + { + cur_val = tex_scan_font_identifier(NULL); + cur_val_level = int_val_level; + break; + } + case glyph_x_scaled_code: + { + cur_val = tex_font_x_scaled(tex_scan_dimen(0, 0, 0, 1, NULL)); + cur_val_level = dimen_val_level; + break; + } + case glyph_y_scaled_code: + { + cur_val = tex_font_y_scaled(tex_scan_dimen(0, 0, 0, 1, NULL)); + cur_val_level = dimen_val_level; + break; + } + case font_spec_id_code: + case font_spec_scale_code: + case font_spec_xscale_code: + case font_spec_yscale_code: + { + halfword fs = tex_scan_fontspec_identifier(); + if (fs) { + switch (code) { + case font_spec_id_code: + cur_val = font_spec_identifier(fs); + break; + case font_spec_scale_code: + cur_val = font_spec_scale(fs); + break; + case font_spec_xscale_code: + cur_val = font_spec_x_scale(fs); + break; + case font_spec_yscale_code: + cur_val = font_spec_y_scale(fs); + break; + } + } else { + cur_val = 0; + } + cur_val_level = int_val_level; + break; + } + case font_char_wd_code: + case font_char_ht_code: + case font_char_dp_code: + case font_char_ic_code: + case font_char_ta_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword chr = tex_scan_char_number(0); + if (tex_char_exists(fnt, chr)) { + switch (code) { + case font_char_wd_code: + cur_val = tex_char_width_from_font(fnt, chr); + break; + case font_char_ht_code: + cur_val = tex_char_height_from_font(fnt, chr); + break; + case font_char_dp_code: + cur_val = tex_char_depth_from_font(fnt, chr); + break; + case font_char_ic_code: + cur_val = tex_char_italic_from_font(fnt, chr); + break; + case font_char_ta_code: + cur_val = tex_char_top_anchor_from_font(fnt, chr); + break; + } + } else { + cur_val = 0; + } + cur_val_level = dimen_val_level; + break; + } + case font_size_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + cur_val = font_size(fnt); + cur_val_level = dimen_val_level; + break; + } + case font_math_control_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + cur_val = font_mathcontrol(fnt); + cur_val_level = int_val_level; + break; + } + case font_text_control_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + cur_val = font_textcontrol(fnt); + cur_val_level = int_val_level; + break; + } + case math_scale_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + if (tex_is_valid_font(fnt)) { + cur_val = tex_get_math_font_scale(fnt, tex_math_style_to_size(tex_current_math_style())); + } else { + cur_val = 1000; + } + cur_val_level = int_val_level; + break; + } + case math_style_code: + { + cur_val = tex_current_math_style(); + if (cur_val < 0) { + cur_val = text_style; + } + cur_val_level = int_val_level; + break; + } + case math_main_style_code: + { + cur_val = tex_current_math_main_style(); + if (cur_val < 0) { + cur_val = text_style; + } + cur_val_level = int_val_level; + break; + } + case math_style_font_id_code: + { + halfword style = tex_scan_math_style_identifier(0, 0); + halfword family = tex_scan_math_family_number(); + cur_val = tex_fam_fnt(family, tex_size_of_style(style)); + cur_val_level = int_val_level; + break; + } + case math_stack_style_code: + { + cur_val = tex_math_style_variant(cur_list.math_style, math_parameter_stack_variant); + if (cur_val < 0) { + cur_val = text_style; + } + cur_val_level = int_val_level; + break; + } + case math_char_class_code: + case math_char_fam_code: + case math_char_slot_code: + /* we actually need two commands or we need to look ahead */ + { + mathcodeval mval = { 0, 0, 0 }; + mathdictval dval = { 0, 0, 0 }; + if (tex_scan_math_cmd_val(&mval, &dval)) { + switch (code) { + case math_char_class_code: + cur_val = mval.class_value; + break; + case math_char_fam_code: + cur_val = mval.family_value; + break; + case math_char_slot_code: + cur_val = mval.character_value; + break; + default: + cur_val = 0; + break; + } + } else { + cur_val = 0; + } + cur_val_level = int_val_level; + break; + } + case scaled_slant_per_point_code: + case scaled_interword_space_code: + case scaled_interword_stretch_code: + case scaled_interword_shrink_code: + case scaled_ex_height_code: + case scaled_em_width_code: + case scaled_extra_space_code: + { + cur_val = tex_get_scaled_parameter(cur_font_par, (code - scaled_slant_per_point_code + 1)); + cur_val_level = dimen_val_level; + break; + } + case last_arguments_code: + { + cur_val = lmt_expand_state.arguments; + cur_val_level = int_val_level; + break; + } + case parameter_count_code: + { + cur_val = tex_get_parameter_count(); + cur_val_level = int_val_level; + break; + } + /* + case lua_value_function_code: + { + halfword v = scan_int(0, NULL); + if (v <= 0) { + normal_error("luafunction", "invalid number"); + } else { + set_cur_val_by_lua_value_cmd(code); + } + return 1; + } + */ + case insert_progress_code: + { + cur_val = tex_get_insert_progress(tex_scan_int(0, NULL)); + cur_val_level = dimen_val_level; + break; + } + case left_margin_kern_code: + case right_margin_kern_code: + { + halfword v = tex_scan_int(0, NULL); + halfword b = box_register(v); + if (b && (node_type(b) == hlist_node)) { + if (code == left_margin_kern_code) { + cur_val = tex_left_marginkern(box_list(b)); + } else { + cur_val = tex_right_marginkern(box_list(b)); + } + } else { + tex_normal_error("marginkern", "a hbox expected"); + cur_val = 0; + } + cur_val_level = dimen_val_level; + break; + } + case par_shape_length_code: + case par_shape_indent_code: + case par_shape_dimen_code: + { + halfword q = code - par_shape_length_code; + halfword v = tex_scan_int(0, NULL); + if (v <= 0 || ! par_shape_par) { + v = 0; + } else { + int n = specification_count(par_shape_par); + if (q == 2) { + q = v % 2; + v = (v + q) / 2; + } + if (v > n) { + v = n; + } + if (n == 0) { + v = 0; + } else if (q) { + v = tex_get_specification_indent(par_shape_par, v); + } else { + v = tex_get_specification_width(par_shape_par, v); + } + } + cur_val = v; + cur_val_level = dimen_val_level; /* hm, also for length ? */ + break; + } + case glue_stretch_code: + case glue_shrink_code: + { + halfword q = tex_scan_glue(glue_val_level, 0); + cur_val = code == glue_stretch_code ? glue_stretch(q) : glue_shrink(q); + tex_flush_node(q); + cur_val_level = dimen_val_level; + break; + } + case mu_to_glue_code: + cur_val = tex_scan_glue(mu_val_level, 0); + cur_val_level = glue_val_level; + return 1; + case glue_to_mu_code: + cur_val = tex_scan_glue(glue_val_level, 0); + cur_val_level = mu_val_level; + return 1; + case numexpr_code: + /* case attrexpr_code: */ + tex_aux_scan_expr(int_val_level); + return 1; + case dimexpr_code: + tex_aux_scan_expr(dimen_val_level); + return 1; + case glueexpr_code: + tex_aux_scan_expr(glue_val_level); + return 1; + case muexpr_code: + tex_aux_scan_expr(mu_val_level); + return 1; + case numexpression_code: + tex_aux_scan_expression(int_val_level); + return 1; + case dimexpression_code: + tex_aux_scan_expression(dimen_val_level); + return 1; + // case dimen_to_scale_code: + // cur_val_level = int_val_level; + // cur_val = round_xn_over_d(100, scan_dimen(0, 0, 0, 0, NULL), 65536); + // return 1; + case numeric_scale_code: + cur_val_level = int_val_level; + cur_val = tex_scan_scale(0); + return 1; + case index_of_register_code: + cur_val = tex_aux_scan_register_index(); + cur_val_level = int_val_level; + return 1; + case index_of_character_code: + cur_val = tex_aux_scan_character_index(); + cur_val_level = int_val_level; + return 1; + case last_chk_num_code: + cur_val_level = int_val_level; + cur_val = lmt_condition_state.chk_num; + return 1; + case last_chk_dim_code: + cur_val_level = dimen_val_level; + cur_val = lmt_condition_state.chk_dim; + return 1; + case last_left_class_code: + cur_val_level = int_val_level; + cur_val = lmt_math_state.last_left; + if (! valid_math_class_code(cur_val)) { + cur_val = unset_noad_class; + } + return 1; + case last_right_class_code: + cur_val_level = int_val_level; + cur_val = lmt_math_state.last_right; + if (! valid_math_class_code(cur_val)) { + cur_val = unset_noad_class; + } + return 1; + case last_atom_class_code: + cur_val_level = int_val_level; + cur_val = lmt_math_state.last_atom; + if (! valid_math_class_code(cur_val)) { + cur_val = unset_noad_class; + } + return 1; + case current_loop_iterator_code: + case last_loop_iterator_code: + cur_val_level = int_val_level; + cur_val = lmt_main_control_state.loop_iterator; + return 1; + case current_loop_nesting_code: + cur_val_level = int_val_level; + cur_val = lmt_main_control_state.loop_nesting; + return 1; + case last_par_context_code: + cur_val_level = int_val_level; + cur_val = lmt_main_control_state.last_par_context; + return 1; + case last_page_extra_code: + cur_val_level = int_val_level; + cur_val = lmt_page_builder_state.last_extra_used; + return 1; + case math_atom_glue_code: + { + halfword style = tex_scan_math_style_identifier(0, 0); + halfword leftclass = tex_scan_math_class_number(0); + halfword rightclass = tex_scan_math_class_number(0); + cur_val = tex_math_spacing_glue(leftclass, rightclass, style); + cur_val_level = mu_val_level; + break; + } + } + return 0; +} + +static void tex_aux_set_cur_val_by_auxiliary_cmd(int chr) +{ + halfword mode = abs(cur_list.mode); + switch (chr) { + case space_factor_code: + if (mode == hmode) { + cur_val = cur_list.space_factor; + } else { + tex_handle_error(normal_error_type, "Improper %C", set_auxiliary_cmd, chr, + "You can refer to \\spacefactor only in horizontal mode and not in \n" + "inside \\write. So I'm forgetting what you said and using zero instead." + ); + cur_val = 0; + } + cur_val_level = int_val_level; + break; + case prev_depth_code: + if (mode == vmode) { + cur_val = cur_list.prev_depth; + } else { + tex_handle_error(normal_error_type, "Improper %C", set_auxiliary_cmd, chr, + "You can refer to \\prevdepth only in horizontal mode and not in \n" + "inside \\write. So I'm forgetting what you said and using zero instead." + ); + cur_val = 0; + } + cur_val_level = dimen_val_level; + break; + case prev_graf_code: + if (mode == nomode) { + /*tex So |prev_graf=0| within |\write|, not that we have that. */ + cur_val = 0; + } else { + cur_val = lmt_nest_state.nest[tex_vmode_nest_index()].prev_graf; + } + cur_val_level = int_val_level; + break; + case interaction_mode_code: + cur_val = lmt_error_state.interaction; + cur_val_level = int_val_level; + break; + case insert_mode_code: + cur_val = lmt_insert_state.mode; + cur_val_level = int_val_level; + break; + } +} + +static void tex_aux_set_cur_val_by_specification_cmd(int chr) +{ + if (chr == internal_specification_location(par_shape_code)) { + cur_val = (par_shape_par) ? specification_count(par_shape_par) : 0; + } else { + halfword v = tex_scan_int(0, NULL); /* hm */ + halfword e = eq_value(chr); + if ((! e) || (v < 0)) { + cur_val = 0; + } else { + cur_val = tex_get_specification_penalty(e, v > specification_count(e) ? specification_count(e) : v); + } + } + cur_val_level = int_val_level; +} + +# define page_state_okay (lmt_page_builder_state.contents == contribute_nothing && ! lmt_page_builder_state.output_active) + +static void tex_aux_set_cur_val_by_page_property_cmd(int chr) +{ + switch (chr) { + case page_goal_code: + cur_val = page_state_okay ? max_dimen : lmt_page_builder_state.goal; + cur_val_level = dimen_val_level; + break; + case page_vsize_code: + cur_val = page_state_okay ? 0 : lmt_page_builder_state.vsize; + cur_val_level = dimen_val_level; + break; + case page_total_code: + cur_val = page_state_okay ? 0 : lmt_page_builder_state.total; + cur_val_level = dimen_val_level; + break; + case page_depth_code: + cur_val = page_state_okay ? 0 : lmt_page_builder_state.depth; + cur_val_level = dimen_val_level; + break; + case dead_cycles_code: + cur_val = lmt_page_builder_state.dead_cycles; + cur_val_level = int_val_level; + break; + case insert_penalties_code: + cur_val = lmt_page_builder_state.insert_penalties; + cur_val_level = int_val_level; + break; + case insert_heights_code: + cur_val = lmt_page_builder_state.insert_heights; + cur_val_level = dimen_val_level; + break; + case insert_storing_code: + cur_val = lmt_insert_state.storing; + cur_val_level = int_val_level; + break; + case insert_distance_code: + cur_val = tex_get_insert_distance(tex_scan_int(0, NULL)); + cur_val_level = glue_val_level; + break; + case insert_multiplier_code: + cur_val = tex_get_insert_multiplier(tex_scan_int(0, NULL)); + cur_val_level = int_val_level; + break; + case insert_limit_code: + cur_val = tex_get_insert_limit(tex_scan_int(0, NULL)); + cur_val_level = dimen_val_level; + break; + case insert_storage_code: + cur_val = tex_get_insert_storage(tex_scan_int(0, NULL)); + cur_val_level = int_val_level; + break; + case insert_penalty_code: + cur_val = tex_get_insert_penalty(tex_scan_int(0, NULL)); + cur_val_level = int_val_level; + break; + case insert_maxdepth_code: + cur_val = tex_get_insert_maxdepth(tex_scan_int(0, NULL)); + cur_val_level = dimen_val_level; + break; + case insert_height_code: + cur_val = tex_get_insert_height(tex_scan_int(0, NULL)); + cur_val_level = dimen_val_level; + break; + case insert_depth_code: + cur_val = tex_get_insert_depth(tex_scan_int(0, NULL)); + cur_val_level = dimen_val_level; + break; + case insert_width_code: + cur_val = tex_get_insert_width(tex_scan_int(0, NULL)); + cur_val_level = dimen_val_level; + break; + default: + cur_val = page_state_okay ? 0 : lmt_page_builder_state.page_so_far[page_state_offset(chr)]; + cur_val_level = dimen_val_level; + break; + } +} + +static void tex_aux_set_cur_val_by_define_char_cmd(int chr) +{ + halfword index = tex_scan_char_number(0); + switch (chr) { + case catcode_charcode: + chr = tex_get_cat_code(cat_code_table_par, index); + break; + case lccode_charcode: + chr = tex_get_lc_code(index); + break; + case uccode_charcode: + chr = tex_get_uc_code(index); + break; + case sfcode_charcode: + chr = tex_get_sf_code(index); + break; + case hccode_charcode: + chr = tex_get_hc_code(index); + break; + case hmcode_charcode: + chr = tex_get_hm_code(index); + break; + case mathcode_charcode: + case extmathcode_charcode: + /* case extmathcodenum_charcode: */ + chr = tex_get_math_code_number(index); + break; + case delcode_charcode: + case extdelcode_charcode: + /* case extdelcodenum_charcode: */ + chr = tex_get_del_code_number(index); + break; + default: + tex_confusion("scan char"); + break; + } + cur_val = chr; + cur_val_level = int_val_level; +} + +/* + First, here is a short routine that is called from lua code. All the real work is delegated to + |short_scan_something_internal| that is shared between this routine and |scan_something_internal|. + In the end it was much cleaner to integrate |tex_aux_short_scan_something_internal| into the two + switches. +*/ + +void tex_scan_something_simple(halfword cmd, halfword chr) +{ + int succeeded = 1; + switch (cmd) { + /* begin of tex_aux_short_scan_something_internal */ + case char_given_cmd: + // case math_char_given_cmd: + // case math_char_xgiven_cmd: + cur_val = chr; + cur_val_level = int_val_level; + break; + +case mathspec_cmd: + cur_val = (chr && node_subtype(chr) == tex_mathcode) ? math_spec_value(chr) : 0; + cur_val_level = int_val_level; + break; + + + case iterator_value_cmd: + cur_val = chr > 0x100000 ? - (chr - 0x100000) : chr; + cur_val_level = int_val_level; + break; + case some_item_cmd: + { + /*tex + Because the items in this case directly refer to |cur_chr|, it needs to be saved + and restored. + */ + int save_cur_chr = cur_chr; + cur_chr = chr; + if (tex_aux_set_cur_val_by_some_cmd(chr)) { + succeeded = 2; + } else { + cur_chr = save_cur_chr; + } + break; + } + case internal_toks_cmd: + case register_toks_cmd: + cur_val = eq_value(chr); + cur_val_level = tok_val_level; + break; + case internal_int_cmd: + case register_int_cmd: + case internal_attribute_cmd: + case register_attribute_cmd: + cur_val = eq_value(chr); + cur_val_level = int_val_level; + break; + case internal_dimen_cmd: + case register_dimen_cmd: + cur_val = eq_value(chr); + cur_val_level = dimen_val_level; + break; + case internal_glue_cmd: + case register_glue_cmd: + cur_val = eq_value(chr); + cur_val_level = glue_val_level; + break; + case internal_mu_glue_cmd: + case register_mu_glue_cmd: + cur_val = eq_value(chr); + cur_val_level = mu_val_level; + break; + case lua_value_cmd: + tex_aux_set_cur_val_by_lua_value_cmd(chr, 0); + if (cur_val_level == no_val_level) { + return; + } else { + break; + } + case math_style_cmd: + cur_val = (chr == yet_unset_math_style) ? tex_scan_math_style_identifier(0, 0) : chr; + cur_val_level = int_val_level; + break; + case set_auxiliary_cmd: + tex_aux_set_cur_val_by_auxiliary_cmd(chr); + break; + case set_page_property_cmd: + tex_aux_set_cur_val_by_page_property_cmd(chr); + break; + case set_specification_cmd: + tex_aux_set_cur_val_by_specification_cmd(chr); + break; + /* end of tex_aux_short_scan_something_internal */ + default: + tex_handle_error( + normal_error_type, + "You can't use '%C' as tex library index", + cmd, chr, + "I'm forgetting what you said and using zero instead." + ); + cur_val = 0; + cur_val_level = int_val_level; + break; + } + tex_aux_downgrade_cur_val(cur_val_level, succeeded, 0); +} + +/*tex + + OK, we're ready for |scan_something_internal| itself. A second parameter, |negative|, is set + |true| if the value that is found should be negated. It is assumed that |cur_cmd| and |cur_chr| + represent the first token of the internal quantity to be scanned; an error will be signalled if + |cur_cmd < min_internal| or |cur_cmd > max_internal|. + +*/ + +/*tex Fetch an internal parameter: */ + +static void tex_aux_missing_number_error(void) +{ + tex_handle_error( + back_error_type, + "Missing number, treated as zero", + "A number should have been here; I inserted '0'. (If you can't figure out why I\n" + "needed to see a number, look up 'weird error' in the index to The TeXbook.)" + ); +} + +/* todo: get rid of cur_val */ + +static int tex_aux_valid_tok_level(halfword level) +{ + if (level == tok_val_level) { + return 1; + } else { + if (lmt_error_state.intercept) { + lmt_error_state.last_intercept = 1 ; + } else { + tex_aux_missing_number_error(); + } + cur_val = 0; + cur_val_level = dimen_val_level; /* why dimen */ + return 0; + } +} + +static int tex_aux_scan_hyph_data_number(halfword code, halfword *target) +{ + switch (code) { + case prehyphenchar_code: + *target = tex_get_pre_hyphen_char(language_par); + break; + case posthyphenchar_code: + *target = tex_get_post_hyphen_char(language_par); + break; + case preexhyphenchar_code: + *target = tex_get_pre_exhyphen_char(language_par); + break; + case postexhyphenchar_code: + *target = tex_get_post_exhyphen_char(language_par); + break; + case hyphenationmin_code: + *target = tex_get_hyphenation_min(language_par); + break; + case hjcode_code: + *target = tex_get_hj_code(language_par, tex_scan_int(0, NULL)); + break; + default: + return 0; + } + return 1; +} + +static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int level, int negative, halfword property) +{ + + int succeeded = 1; + switch (cmd) { + /* begin of tex_aux_short_scan_something_internal */ + case char_given_cmd: + // case math_char_given_cmd: + // case math_char_xgiven_cmd: + cur_val = chr; + cur_val_level = int_val_level; + break; + case some_item_cmd: + { + /*tex + Because the items in this case directly refer to |cur_chr|, it needs to be saved + and restored. + */ + int save_cur_chr = cur_chr; + cur_chr = chr; + if (tex_aux_set_cur_val_by_some_cmd(chr)) { + succeeded = 2; + } else { + cur_chr = save_cur_chr; + } + break; + } + case internal_toks_cmd: + case register_toks_cmd: + cur_val = eq_value(chr); + cur_val_level = tok_val_level; + break; + case internal_int_cmd: + case register_int_cmd: + case internal_attribute_cmd: + case register_attribute_cmd: + cur_val = eq_value(chr); + cur_val_level = int_val_level; + break; + case internal_dimen_cmd: + case register_dimen_cmd: + cur_val = eq_value(chr); + cur_val_level = dimen_val_level; + break; + case internal_glue_cmd: + case register_glue_cmd: + cur_val = eq_value(chr); + cur_val_level = glue_val_level; + break; + case internal_mu_glue_cmd: + case register_mu_glue_cmd: + cur_val = eq_value(chr); + cur_val_level = mu_val_level; + break; + case lua_value_cmd: + tex_aux_set_cur_val_by_lua_value_cmd(chr, property); + if (cur_val_level == no_val_level) { + return 0; + } + break; + case iterator_value_cmd: + cur_val = chr > 0x100000 ? - (chr - 0x100000) : chr; + cur_val_level = int_val_level; + break; + case math_style_cmd: + cur_val = (chr == yet_unset_math_style) ? tex_scan_math_style_identifier(0, 0) : chr; + cur_val_level = int_val_level; + break; + case set_auxiliary_cmd: + tex_aux_set_cur_val_by_auxiliary_cmd(chr); + break; + case set_page_property_cmd: + tex_aux_set_cur_val_by_page_property_cmd(chr); + break; + case set_specification_cmd: + tex_aux_set_cur_val_by_specification_cmd(chr); + break; + case define_char_code_cmd: + tex_aux_set_cur_val_by_define_char_cmd(chr); + break; + /* end of tex_aux_short_scan_something_internal */ + case define_font_cmd: + if (tex_aux_valid_tok_level(level)) { + cur_val = cur_font_par; + cur_val_level = font_val_level; + return cur_val; + } else { + break; + } + case set_font_cmd: + if (tex_aux_valid_tok_level(level)) { + cur_val = cur_chr; + cur_val_level = font_val_level; + /* set_font_touched(cur_chr, 1); */ + return cur_val; + } else { + break; + } + case define_family_cmd: + /*tex Fetch a math font identifier. */ + { + halfword fam = tex_scan_math_family_number(); + cur_val = tex_fam_fnt(fam, chr); + cur_val_level = font_val_level; + return cur_val; + } + case set_math_parameter_cmd: + { + switch (chr) { + case math_parameter_reset_spacing: + case math_parameter_set_spacing: + case math_parameter_let_spacing: + case math_parameter_copy_spacing: + { + halfword left = tex_scan_math_class_number(0); + halfword right = tex_scan_math_class_number(0); + halfword style = tex_scan_math_style_identifier(0, 0); + halfword node = tex_math_spacing_glue(left, right, style); + cur_val = node ? node : zero_glue; + cur_val_level = mu_val_level; + break; + } + case math_parameter_set_atom_rule: + case math_parameter_let_atom_rule: + case math_parameter_copy_atom_rule: + case math_parameter_let_parent: + case math_parameter_copy_parent: + case math_parameter_set_defaults: + { + // cur_val = 0; + // cur_val_level = int_val_level; + break; + } + case math_parameter_set_pre_penalty: + case math_parameter_set_post_penalty: + case math_parameter_set_display_pre_penalty: + case math_parameter_set_display_post_penalty: + { + halfword class = tex_scan_math_class_number(0); + if (valid_math_class_code(class)) { + switch (chr) { + case math_parameter_set_pre_penalty: + cur_val = count_parameter(first_math_pre_penalty_code + class); + break; + case math_parameter_set_post_penalty: + cur_val = count_parameter(first_math_post_penalty_code + class); + break; + case math_parameter_set_display_pre_penalty: + cur_val = count_parameter(first_math_display_pre_penalty_code + class); + break; + case math_parameter_set_display_post_penalty: + cur_val = count_parameter(first_math_display_post_penalty_code + class); + break; + } + } else { + cur_val = 0; + } + cur_val_level = int_val_level; + break; + } + case math_parameter_ignore: + { + halfword code = tex_scan_math_parameter(); + cur_val = code >= 0 ? count_parameter(first_math_ignore_code + code) : 0; + cur_val_level = int_val_level; + break; + } + case math_parameter_options: + { + halfword class = tex_scan_math_class_number(0); + if (valid_math_class_code(class)) { + cur_val = count_parameter(first_math_options_code + class); + } else { + cur_val = 0; + } + break; + } + default: + { + cur_val = tex_scan_math_style_identifier(0, 0); + switch (math_parameter_value_type(chr)) { + case math_int_parameter: + cur_val_level = int_val_level; + break; + case math_dimen_parameter: + cur_val_level = dimen_val_level; + break; + case math_muglue_parameter: + cur_val_level = mu_val_level; + break; + case math_style_parameter: + cur_val_level = int_val_level; + break; + } + chr = tex_get_math_parameter(cur_val, chr, NULL); + if (cur_val_level == mu_val_level) { + switch (chr) { + case petty_mu_skip_code: + chr = petty_mu_skip_par; + break; + case tiny_mu_skip_code: + chr = tiny_mu_skip_par; + break; + case thin_mu_skip_code: + chr = thin_mu_skip_par; + break; + case med_mu_skip_code: + chr = med_mu_skip_par; + break; + case thick_mu_skip_code: + chr = thick_mu_skip_par; + break; + } + } + cur_val = chr; + break; + } + } + } + break; + case set_box_property_cmd: + { + /*tex We hike on the dimen_cmd but some are integers. */ + halfword n = tex_scan_box_register_number(); + halfword b = box_register(n); + switch (chr) { + case box_width_code: + cur_val = b ? box_width(b) : 0; + cur_val_level = dimen_val_level; + break; + case box_height_code: + cur_val = b ? box_height(b) : 0; + cur_val_level = dimen_val_level; + break; + case box_depth_code: + cur_val = b ? box_depth(b) : 0; + cur_val_level = dimen_val_level; + break; + case box_direction_code: + cur_val = b ? box_dir(b) : 0; + cur_val_level = int_val_level; + break; + case box_geometry_code: + cur_val = b ? box_geometry(b) : 0; + cur_val_level = int_val_level; + break; + case box_orientation_code: + cur_val = b ? box_orientation(b) : 0; + cur_val_level = int_val_level; + break; + case box_anchor_code: + case box_anchors_code: + cur_val = b ? box_anchor(b) : 0; + cur_val_level = int_val_level; + break; + case box_source_code: + cur_val = b ? box_source_anchor(b) : 0; + cur_val_level = int_val_level; + break; + case box_target_code: + cur_val = b ? box_target_anchor(b) : 0; + cur_val_level = int_val_level; + break; + case box_xoffset_code: + cur_val = b ? box_x_offset(b) : 0; + cur_val_level = dimen_val_level; + break; + case box_yoffset_code: + cur_val = b ? box_y_offset(b) : 0; + cur_val_level = dimen_val_level; + break; + case box_xmove_code: + cur_val = b ? (box_width(b) - box_x_offset(b)) : 0; + cur_val_level = dimen_val_level; + break; + case box_ymove_code: + cur_val = b ? (box_total(b) - box_y_offset(b)) : 0; + cur_val_level = dimen_val_level; + break; + case box_total_code: + cur_val = b ? box_total(b) : 0; + cur_val_level = dimen_val_level; + break; + case box_shift_code: + cur_val = b ? box_shift_amount(b) : 0; + cur_val_level = dimen_val_level; + break; + case box_adapt_code: + cur_val = 0; + cur_val_level = int_val_level; + break; + case box_repack_code: + if (node_type(b) == hlist_node) { + cur_val = box_list(b) ? tex_natural_hsize(box_list(b), NULL) : 0; + } else { + cur_val = box_list(b) ? tex_natural_vsize(box_list(b)) : 0; + } + cur_val_level = dimen_val_level; + break; + case box_freeze_code: + cur_val = node_type(b) == hlist_node ? box_width(b) : box_total(b); + cur_val_level = dimen_val_level; + break; + case box_attribute_code: + { + halfword att = tex_scan_attribute_register_number(); + cur_val = b ? tex_has_attribute(b, att, unused_attribute_value) : unused_attribute_value; + cur_val_level = int_val_level; + break; + } + } + break; + } + case set_font_property_cmd: + /*tex Fetch a font integer or dimension. */ + { + switch (chr) { + case font_hyphen_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + cur_val = font_hyphen_char(fnt); + cur_val_level = int_val_level; + break; + } + case font_skew_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + cur_val = font_skew_char(fnt); + cur_val_level = int_val_level; + break; + } + case font_lp_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword chr = tex_scan_char_number(0); + cur_val = tex_char_lp_from_font(fnt, chr); + cur_val_level = dimen_val_level; + break; + } + case font_rp_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword chr = tex_scan_char_number(0); + cur_val = tex_char_rp_from_font(fnt, chr); + cur_val_level = dimen_val_level; + break; + } + case font_ef_code: + { + halfword fnt = tex_scan_font_identifier(NULL); + halfword chr = tex_scan_char_number(0); + cur_val = tex_char_ef_from_font(fnt, chr); + cur_val_level = int_val_level; + break; + } + case font_dimen_code: + { + cur_val = tex_get_font_dimen(); + cur_val_level = dimen_val_level; + break; + } + case scaled_font_dimen_code: + { + cur_val = tex_get_scaled_font_dimen(); + cur_val_level = dimen_val_level; + break; + } + } + break; + } + case register_cmd: + /*tex Fetch a register */ + { + switch (chr) { + case int_val_level: + { + halfword n = tex_scan_int_register_number(); + cur_val = count_register(n); + break; + } + case attr_val_level: + { + halfword n = tex_scan_attribute_register_number(); + cur_val = attribute_register(n); + break; + } + case dimen_val_level: + { + scaled n = tex_scan_dimen_register_number(); + cur_val = dimen_register(n); + break; + } + case glue_val_level: + { + halfword n = tex_scan_glue_register_number(); + cur_val = skip_register(n); + break; + } + case mu_val_level: + { + halfword n = tex_scan_mu_glue_register_number(); + cur_val = mu_skip_register(n); + break; + } + case tok_val_level: + { + halfword n = tex_scan_toks_register_number(); + cur_val = toks_register(n); + break; + } + } + cur_val_level = chr; + break; + } + case ignore_something_cmd: + break; + case hyphenation_cmd: + if (tex_aux_scan_hyph_data_number(chr, &cur_val)) { + cur_val_level = int_val_level; + break; + } else { + goto DEFAULT; + } + case integer_cmd: + cur_val = chr; + cur_val_level = int_val_level; + break; + case dimension_cmd: + cur_val = chr; + cur_val_level = dimen_val_level; + break; + case gluespec_cmd: + cur_val = chr; + cur_val_level = glue_val_level; + break; + case mugluespec_cmd: + cur_val = chr; + cur_val_level = mu_val_level; + break; + case mathspec_cmd: + cur_val = chr; + if (chr) { + switch (node_subtype(chr)) { + case tex_mathcode: + cur_val = math_spec_value(chr); + cur_val_level = int_val_level; + break; + case umath_mathcode: + /* case umathnum_mathcode: */ + case mathspec_mathcode: + cur_val_level = mathspec_val_level; + break; + default: + cur_val = 0; + cur_val_level = int_val_level; + break; + } + } else { + cur_val_level = int_val_level; + } + break; + case fontspec_cmd: + cur_val = tex_get_font_identifier(chr) ? chr : null; + cur_val_level = fontspec_val_level; + break; + case begin_paragraph_cmd: + switch (chr) { + case snapshot_par_code: + { + halfword par = tex_find_par_par(cur_list.head); + cur_val = par ? par_state(par) : 0; + cur_val_level = int_val_level; + break; + } + /* case attribute_par_code: */ + case wrapup_par_code: + { + halfword par = tex_find_par_par(cur_list.head); + cur_val = par ? par_end_par_tokens(par) : null; + cur_val_level = tok_val_level; + break; + } + default: + goto DEFAULT; + } + break; + /* + case string_cmd: + { + halfword head = str_toks(str_lstring(cs_offset_value + chr), NULL); + begin_inserted_list(head); + cur_val = 0; + cur_val_level = no_val_level; + break; + } + */ + /* + case special_box_cmd: + switch (chr) { + case left_box_code: + cur_val = cur_mode == hmode ? local_left_box_par : null; + cur_val_level = list_val_level; + return cur_val; + case right_box_code: + cur_val = cur_mode == hmode ? local_right_box_par : null; + cur_val_level = list_val_level; + return cur_val; + default: + goto DEFAULT; + } + break; + */ + default: + DEFAULT: + /*tex Complain that |\the| can not do this; give zero result. */ + tex_handle_error( + normal_error_type, + "You can't use '%C' after \\the", + cmd, chr, + "I'm forgetting what you said and using zero instead." + ); + cur_val = 0; + cur_val_level = (level == tok_val_level) ? int_val_level : dimen_val_level; + break; + } + tex_aux_downgrade_cur_val(level, succeeded, negative); + return cur_val; +} + +/*tex + + It is nice to have routines that say what they do, so the original |scan_eight_bit_int| is + superceded by |scan_register_number| and |scan_mark_number|. It may become split up even further + in the future. + + Many of the |restricted classes| routines are the essentially the same except for the upper + limit and the error message, so it makes sense to combine these all into one function. + +*/ + +inline static halfword tex_aux_scan_limited_int(int optional_equal, int min, int max, const char *invalid) +{ + halfword v = tex_scan_int(optional_equal, NULL); + if (v < min || v > max) { + tex_handle_error( + normal_error_type, + "%s (%i) should be in the range %i..%i", + invalid, v, min, max, + "I'm going to use 0 instead of that illegal code value." + ); + return 0; + } else { + return v; + } +} + +halfword tex_scan_int_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_int_register_index, "Integer register index"); } +halfword tex_scan_dimen_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_dimen_register_index, "Dimension register index"); } +halfword tex_scan_attribute_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_attribute_register_index, "Attribute register index"); } +halfword tex_scan_glue_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_glue_register_index, "Glue register index"); } +halfword tex_scan_mu_glue_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_mu_glue_register_index, "Mu glue register index"); } +halfword tex_scan_toks_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_toks_register_index, "Toks register index"); } +halfword tex_scan_box_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_box_register_index, "Box register index"); } +halfword tex_scan_mark_number (void) { return tex_aux_scan_limited_int(0, 0, max_mark_index, "Marks index"); } +halfword tex_scan_char_number (int optional_equal) { return tex_aux_scan_limited_int(optional_equal, 0, max_character_code, "Character code"); } +halfword tex_scan_math_char_number (void) { return tex_aux_scan_limited_int(0, 0, max_math_character_code, "Character code"); } +halfword tex_scan_math_family_number (void) { return tex_aux_scan_limited_int(0, 0, max_math_family_index, "Math family"); } +halfword tex_scan_math_properties_number (void) { return tex_aux_scan_limited_int(0, 0, max_math_property, "Math properties"); } +halfword tex_scan_math_group_number (void) { return tex_aux_scan_limited_int(0, 0, max_math_group, "Math group"); } +halfword tex_scan_math_index_number (void) { return tex_aux_scan_limited_int(0, 0, max_math_index, "Math index"); } +halfword tex_scan_math_discretionary_number (int optional_equal) { return tex_aux_scan_limited_int(optional_equal, 0, max_math_discretionary, "Math discretionary"); } +singleword tex_scan_box_index (void) { return (singleword) tex_aux_scan_limited_int(0, 0, max_box_index, "Box index"); } +singleword tex_scan_box_axis (void) { return (singleword) tex_aux_scan_limited_int(0, 0, max_box_axis, "Box axis"); } +halfword tex_scan_category_code (void) { return tex_aux_scan_limited_int(0, 0, max_category_code,"Category code"); } +halfword tex_scan_function_reference (int optional_equal) { return tex_aux_scan_limited_int(optional_equal, 0, max_function_reference, "Function reference"); } +halfword tex_scan_bytecode_reference (int optional_equal) { return tex_aux_scan_limited_int(optional_equal, 0, max_bytecode_index, "Bytecode reference"); } +halfword tex_scan_limited_scale (int optional_equal) { return tex_aux_scan_limited_int(optional_equal, -max_limited_scale, max_limited_scale, "Limited scale"); } +halfword tex_scan_positive_scale (int optional_equal) { return tex_aux_scan_limited_int(optional_equal, min_limited_scale, max_limited_scale, "Limited scale"); } + +halfword tex_scan_math_class_number(int optional_equal) +{ + halfword v = tex_aux_scan_limited_int(optional_equal, -1, max_math_class_code + 1, "Math class"); + if (v >= 0 && v <= max_math_class_code) { + return v; + } else { + return unset_noad_class; + } +} + +/*tex + + An integer number can be preceded by any number of spaces and |+| or |-| signs. Then comes + either a decimal constant (i.e., radix 10), an octal constant (i.e., radix 8, preceded by~|'|), + a hexadecimal constant (radix 16, preceded by~|"|), an alphabetic constant (preceded by~|`|), + or an internal variable. After scanning is complete, |cur_val| will contain the answer, which + must be at most $2^{31}-1=2147483647$ in absolute value. The value of |radix| is set to 10, 8, + or 16 in the cases of decimal, octal, or hexadecimal constants, otherwise |radix| is set to + zero. An optional space follows a constant. + + The |scan_int| routine is used also to scan the integer part of a fraction; for example, the + |3| in |3.14159| will be found by |scan_int|. The |scan_dimen| routine assumes that |cur_tok + = point_token| after the integer part of such a fraction has been scanned by |scan_int|, and + that the decimal point has been backed up to be scanned again. + +*/ + +static void tex_aux_number_to_big_error(void) +{ + tex_handle_error( + normal_error_type, + "Number too big", + "I can only go up to 2147483647 = '17777777777 = \"7FFFFFFF, so I'm using that\n" + "number instead of yours." + ); +} + +static void tex_aux_improper_constant_error(void) +{ + tex_handle_error( + back_error_type, + "Improper alphabetic constant", + "A one-character control sequence belongs after a ` mark. So I'm essentially\n" + "inserting \\0 here." + ); +} + +/*tex + + The next function is somewhat special. It is also called in other scanners and therefore + |cur_val| cannot simply be replaced. For that reason we do return the value but also set + |cur_val|, just in case. I might sort this out some day when other stuff has been reworked. + + The routine has been optimnized a bit (equal scanning and such) and after a while I decided to + split the three cases. It makes for a bit nicer code. + + If we backport the checking code to \LUATEX, a pre May 24 2020 copy has to be taken, because + that is closer to the original. + +*/ + +halfword tex_scan_int(int optional_equal, int *radix) +{ + int negative = 0; + long long result = 0; + do { + while (1) { + tex_get_x_token(); + if (cur_cmd != spacer_cmd) { + if (optional_equal && (cur_tok == equal_token)) { + optional_equal = 0; + } else { + break; + } + } + } + if (cur_tok == minus_token) { + negative = ! negative; + cur_tok = plus_token; + } + } while (cur_tok == plus_token); + if (cur_tok == alpha_token) { + /*tex + Scan an alphabetic character code into |result|. A space is ignored after an alphabetic + character constant, so that such constants behave like numeric ones. We don't expand the + next token! + */ + tex_get_token(); + if (cur_tok < cs_token_flag) { + result = cur_chr; + if (cur_cmd == right_brace_cmd) { + ++lmt_input_state.align_state; + // } else if (cur_cmd < right_brace_cmd) { + } else if (cur_cmd == left_brace_cmd || cur_cmd == relax_cmd) { + /* left_brace_cmd or relax_cmd (really?)*/ + --lmt_input_state.align_state; + } + } else { + /*tex + The value of a csname in this context is its name. A single letter case happens more + frequently than an active character but both seldom are ran into anyway. + */ + strnumber txt = cs_text(cur_tok - cs_token_flag); + if (tex_single_letter(txt)) { + result = aux_str2uni(str_string(txt)); + } else if (tex_is_active_cs(txt)) { + result = active_cs_value(txt); + } else { + result = max_character_code + 1; + } + } + if (result > max_character_code) { + if (lmt_error_state.intercept) { + lmt_error_state.last_intercept = 1 ; + tex_back_input(cur_tok); + } else { + result = '0'; /*tex Why not just 0. */ + tex_aux_improper_constant_error(); + } + } else { + /*tex Scan an optional space. */ + tex_get_x_token(); + if (cur_cmd != spacer_cmd) { + tex_back_input(cur_tok); + } + } + } else if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0); + if (cur_val_level != int_val_level) { + result = 0; + goto NONUMBER; + } + } else if (cur_cmd == math_style_cmd) { + /* A pity that we need to check this way in |scan_int|. */ + result = (cur_chr == yet_unset_math_style) ? tex_scan_math_style_identifier(0, 0) : cur_chr; + } else if (cur_cmd == hyphenation_cmd) { + /* A pity that we need to check this way in |scan_int|. */ + if (tex_aux_scan_hyph_data_number(cur_chr, &cur_chr)) { + result = cur_chr; + } else { + result = 0; + goto NONUMBER; + } + } else { + /*tex has an error message been issued? */ + int vacuous = 1; + int ok_so_far = 1; + /*tex + Scan a numeric constant. The interwoven common loop has been split up now. + */ + switch (cur_tok) { + case octal_token: + { + if (radix) { + *radix = 8; + } + while (1) { + tex_get_x_token(); + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= seven_token)) { + d = cur_tok - zero_token; + } else { + goto DONE; + } + vacuous = 0; + if (ok_so_far) { + result = result * 8 + d; + if (result > max_integer) { + result = infinity; + if (lmt_error_state.intercept) { + vacuous = 1; + goto DONE; + } else { + tex_aux_number_to_big_error(); + } + ok_so_far = 0; + } + } + } + break; + } + case hex_token: + { + if (radix) { + *radix = 16; + } + while (1) { + tex_get_x_token(); + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= nine_token)) { + d = cur_tok - zero_token; + } else if ((cur_tok >= A_token_l) && (cur_tok <= F_token_l)) { + d = cur_tok - A_token_l + 10; + } else if ((cur_tok >= A_token_o) && (cur_tok <= F_token_o)) { + d = cur_tok - A_token_o + 10; + } else { + goto DONE; + } + vacuous = 0; + if (ok_so_far) { + result = result * 16 + d; + if (result > max_integer) { + result = infinity; + if (lmt_error_state.intercept) { + vacuous = 1; + goto DONE; + } else { + tex_aux_number_to_big_error(); + } + ok_so_far = 0; + } + } + } + break; + } + default: + { + if (radix) { + *radix = 10; + } + while (1) { + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= nine_token)) { + d = cur_tok - zero_token; + } else { + goto DONE; + } + vacuous = 0; + if (ok_so_far) { + result = result * 10 + d; + if (result > max_integer) { + result = infinity; + if (lmt_error_state.intercept) { + vacuous = 1; + goto DONE; + } else { + tex_aux_number_to_big_error(); + } + ok_so_far = 0; + } + } + tex_get_x_token(); + } + break; + } + } + DONE: + if (vacuous) { + NONUMBER: + /*tex Express astonishment that no number was here */ + if (lmt_error_state.intercept) { + lmt_error_state.last_intercept = 1 ; + if (cur_cmd != spacer_cmd) { + tex_back_input(cur_tok); + } + } else { + tex_aux_missing_number_error(); + } + } else { + tex_push_back(cur_tok, cur_cmd, cur_chr); + } + } + /*tex For now we still keep |cur_val| set too. */ + cur_val = (halfword) (negative ? - result : result); + return cur_val; +} + +int tex_scan_cardinal(unsigned *value, int dontbark) +{ + long long result = 0; + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0); + } else { + int vacuous = 1; + switch (cur_tok) { + case octal_token: + { + while (1) { + tex_get_x_token(); + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= seven_token)) { + d = cur_tok - zero_token; + } else { + goto DONE; + } + vacuous = 0; + result = result * 8 + d; + if (result > max_cardinal) { + result = max_cardinal; + } + } + break; + } + case hex_token: + { + while (1) { + tex_get_x_token(); + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= nine_token)) { + d = cur_tok - zero_token; + } else if ((cur_tok >= A_token_l) && (cur_tok <= F_token_l)) { + d = cur_tok - A_token_l + 10; + } else if ((cur_tok >= A_token_o) && (cur_tok <= F_token_o)) { + d = cur_tok - A_token_o + 10; + } else { + goto DONE; + } + vacuous = 0; + result = result * 16 + d; + if (result > max_cardinal) { + result = max_cardinal; + } + } + break; + } + default: + { + while (1) { + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= nine_token)) { + d = cur_tok - zero_token; + } else { + goto DONE; + } + vacuous = 0; + result = result * 10 + d; + if (result > max_cardinal) { + result = max_cardinal; + } + tex_get_x_token(); + } + break; + } + } + DONE: + if (vacuous) { + if (dontbark) { + return 0; + } else { + tex_aux_missing_number_error(); + } + } else { + tex_push_back(cur_tok, cur_cmd, cur_chr); + } + } + *value = (unsigned) result; + cur_val = (halfword) result; + return 1; +} + +/*tex + + The following code is executed when |scan_something_internal| was called asking for |mu_val|, + when we really wanted a mudimen instead of muglue. + +*/ + +static halfword tex_aux_coerced_glue(halfword value, halfword level) +{ + if (level == glue_val_level || level == mu_val_level) { + int v = glue_amount(value); + tex_flush_node(value); + return v; + } else { + return value; + } +} + +/*tex + + The |scan_dimen| routine is similar to |scan_int|, but it sets |cur_val| to a |scaled| value, + i.e., an integral number of sp. One of its main tasks is therefore to interpret the + abbreviations for various kinds of units and to convert measurements to scaled points. + + There are three parameters: |mu| is |true| if the finite units must be |mu|, while |mu| is + |false| if |mu| units are disallowed; |inf| is |true| if the infinite units |fil|, |fill|, + |filll| are permitted; and |shortcut| is |true| if |cur_val| already contains an integer and + only the units need to be considered. + + The order of infinity that was found in the case of infinite glue is returned in the global + variable |cur_order|. + + Constructions like |-'77 pt| are legal dimensions, so |scan_dimen| may begin with |scan_int|. + This explains why it is convenient to use |scan_int| also for the integer part of a decimal + fraction. + + Several branches of |scan_dimen| work with |cur_val| as an integer and with an auxiliary + fraction |f|, so that the actual quantity of interest is $|cur_val|+|f|/2^{16}$. At the end of + the routine, this \quote {unpacked} representation is put into the single word |cur_val|, which + suddenly switches significance from |integer| to |scaled|. + + The necessary conversion factors can all be specified exactly as fractions whose numerator and + denominator add to 32768 or less. According to the definitions here, $\rm 2660 \, dd \approx + 1000.33297 \, mm$; this agrees well with the value $\rm 1000.333 \, mm$ cited by Hans Rudolf + Bosshard in {\em Technische Grundlagen zur Satzherstellung} (Bern, 1980). The Didot point has + been newly standardized in 1978; it's now exactly $\rm 1 \, nd = 0.375 \, mm$. Conversion uses + the equation $0.375 = 21681 / 20320 / 72.27 \cdot 25.4$. The new Cicero follows the new Didot + point; $\rm 1 \, nc = 12 \, nd$. These would lead to the ratios $21681 / 20320$ and $65043 + / 5080$, respectively. The closest approximations supported by the algorithm would be $11183 / + 10481$ and $1370 / 107$. In order to maintain the relation $\rm 1 \, nc = 12 \, nd$, we pick + the ratio $685 / 642$ for $\rm nd$, however. + +*/ + +static void tex_aux_scan_dimen_mu_error(void) { + tex_handle_error( + normal_error_type, + "Illegal unit of measure (mu inserted)", + "The unit of measurement in math glue must be mu." ); + +} + +static void tex_aux_scan_dimen_fi_error(void) { + tex_handle_error( + normal_error_type, + "Illegal unit of measure", + "The unit of measurement can't be fi, fil, fill or filll here." ); + +} + +static void tex_aux_scan_dimen_unknown_unit_error(void) { + tex_handle_error( + normal_error_type, + "Illegal unit of measure (pt inserted)", + "Dimensions can be in units of em, ex, in, pt, pc, cm, mm, dd, cc, bp, dk, or\n" + "sp; but yours is a new one! I'll assume that you meant to say pt, for printer's\n" + "points. two letters." + ); +} + +static void tex_aux_scan_dimen_out_of_range_error(void) { + tex_handle_error( + normal_error_type, + "Dimension too large", + "I can't work with sizes bigger than about 19 feet. Continue and I'll use the\n" + "largest value I can." + ); +} + +# define set_conversion(A,B) do { num=(A); denom=(B); } while(0) + +/*tex + + This function sets |cur_val| to a dimension. We still have some |cur_val| sync issue so no + result replacement yet. (The older variant, also already optimzied can be found in the + history). + + When order is |NULL| mu units and glue fills are not scanned. + +*/ + +typedef enum scanned_unit { + no_unit_scanned, /* 0 : error */ + normal_unit_scanned, /* 1 : cm mm pt bp dd cc in dk */ + scaled_point_scanned, /* 2 : sp */ + relative_unit_scanned, /* 3 : ex em px */ + math_unit_scanned, /* 4 : mu */ + flexible_unit_scanned, /* 5 : fi fil fill filll */ + quantitity_unit_scanned, /* 6 : internal quantity */ +} scanned_unit; + +/*tex + + We support the Knuthian Potrzebie cf.\ \url {https://en.wikipedia.org/wiki/Potrzebie} as the + |dk| unit. It was added on 2021-09-22 exactly when we crossed the season during an evening + session at the 15th \CONTEXT\ meeting in Bassenge (Boirs) Belgium. It took a few iterations to + find the best numerator and denominator, but Taco Hoekwater, Harald Koenig and Mikael Sundqvist + figured it out in this interactive session. The error messages have been adapted accordingly and + the scanner in the |tex| library also handles it. One |dk| is 6.43985pt. There is no need to + make \METAPOST\ aware of this unit because there it is just a numeric multiplier in a macro + package. + + From Wikipedia: + + In issue 33, Mad published a partial table of the \quotation {Potrzebie System of Weights and + Measures}, developed by 19-year-old Donald~E. Knuth, later a famed computer scientist. According + to Knuth, the basis of this new revolutionary system is the potrzebie, which equals the thickness + of Mad issue 26, or 2.2633484517438173216473 mm [...]. + +*/ + +static int tex_aux_scan_unit(halfword *num, halfword *denom, halfword *value, halfword *order) +{ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + return quantitity_unit_scanned; + } else { + int chrone, chrtwo; + halfword tokone, toktwo; + halfword save_cur_cs = cur_cs; + tokone = cur_tok; + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + chrone = cur_chr; + } else { + goto BACK_ONE; + } + tex_get_x_token(); + toktwo = cur_tok; + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + chrtwo = cur_chr; + } else { + goto BACK_TWO; + } + cur_cs = save_cur_cs; + AGAIN: + switch (chrone) { + case 'p': case 'P': + switch (chrtwo) { + case 't': case 'T': + return normal_unit_scanned; + case 'c': case 'C': + *num = 12; + *denom = 1; + return normal_unit_scanned; + case 'x': case 'X': + *value = px_dimen_par; + return relative_unit_scanned; + } + break; + case 'm': case 'M': + if (order) { + switch (chrtwo) { + case 'm': case 'M': + *num = 7227; + *denom = 2540; + return normal_unit_scanned; + case 'u': case 'U': + return math_unit_scanned; + } + } + break; + case 'c': case 'C': + switch (chrtwo) { + case 'm': case 'M': + *num = 7227; + *denom = 254; + return normal_unit_scanned; + case 'c': case 'C': + *num = 14856; + *denom = 1157; + return normal_unit_scanned; + } + break; + case 's': case 'S': + switch (chrtwo) { + case 'p': case 'P': + return scaled_point_scanned; + } + break; + case 'b': case 'B': + switch (chrtwo) { + case 'p': case 'P': + *num = 7227; + *denom = 7200; + return normal_unit_scanned; + } + break; + case 'i': case 'I': + switch (chrtwo) { + case 'n': case 'N': + *num = 7227; + *denom = 100; + return normal_unit_scanned; + } + break; + case 'd': case 'D': + switch (chrtwo) { + case 'd': case 'D': + *num = 1238; + *denom = 1157; + return normal_unit_scanned; + case 'k': case 'K': /* number: 422042 */ + *num = 49838; // 152940; + *denom = 7739; // 23749; + return normal_unit_scanned; + } + break; + case 't': case 'T': + if (order) { + switch (chrtwo) { + case 'r': case 'R': + if (tex_scan_mandate_keyword("true", 2)) { + /*tex This is now a bogus prefix! */ + goto AGAIN; + } + } + } + break; + case 'e': case 'E': + switch (chrtwo) { + case 'm': case 'M': + *value = tex_get_scaled_em_width(cur_font_par); + return relative_unit_scanned; + case 'x': case 'X': + *value = tex_get_scaled_ex_height(cur_font_par); + return relative_unit_scanned; + } + break; + case 'f': case 'F': + if (order) { + switch (chrtwo) { + case 'i': case 'I': + *order = fi_glue_order; + if (tex_scan_character("lL", 0, 0, 0)) { + *order = fil_glue_order; + if (tex_scan_character("lL", 0, 0, 0)) { + *order = fill_glue_order; + if (tex_scan_character("lL", 0, 0, 0)) { + *order = filll_glue_order; + } + } + } + return flexible_unit_scanned; + } + } + break; + } + BACK_TWO: + tex_back_input(toktwo); + BACK_ONE: + tex_back_input(tokone); + cur_cs = save_cur_cs; + return no_unit_scanned; + } +} + +/*tex + When we drop |true| support we can use the next variant which is a bit more efficient + and also handles optional units. LAter we will see a more limited variant that also + includes the scaler. +*/ + +/* +static int tex_aux_scan_unit_new(halfword *num, halfword *denom, halfword *value, halfword *order) +{ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + return quantitity_unit_scanned; + } else if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + halfword saved_cs = cur_cs; + halfword saved_tok = cur_tok; + switch (cur_chr) { + case 'p': case 'P': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 't': case 'T': + return normal_unit_scanned; + case 'c': case 'C': + *num = 12; + *denom = 1; + return normal_unit_scanned; + case 'x': case 'X': + *value = px_dimen_par; + return relative_unit_scanned; + } + } + break; + case 'm': case 'M': + if (order) { + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'm': case 'M': + *num = 7227; + *denom = 2540; + return normal_unit_scanned; + case 'u': case 'U': + return math_unit_scanned; + } + } + } + break; + case 'c': case 'C': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'm': case 'M': + *num = 7227; + *denom = 254; + return normal_unit_scanned; + case 'c': case 'C': + *num = 14856; + *denom = 1157; + return normal_unit_scanned; + } + } + break; + case 's': case 'S': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'p': case 'P': + return scaled_point_scanned; + } + } + break; + case 'b': case 'B': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'p': case 'P': + *num = 7227; + *denom = 7200; + return normal_unit_scanned; + } + } + break; + case 'i': case 'I': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'n': case 'N': + *num = 7227; + *denom = 100; + return normal_unit_scanned; + } + } + break; + case 'd': case 'D': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'd': case 'D': + *num = 1238; + *denom = 1157; + return normal_unit_scanned; + } + } + break; + case 'e': case 'E': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'm': case 'M': + *value = tex_get_scaled_em_width(cur_font_par); + return relative_unit_scanned; + case 'x': case 'X': + *value = tex_get_scaled_ex_height(cur_font_par); + return relative_unit_scanned; + } + } + break; + case 'f': case 'F': + if (order) { + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'i': case 'I': + *order = fi_glue_order; + if (tex_scan_character("lL", 0, 0, 0)) { + *order = fil_glue_order; + if (tex_scan_character("lL", 0, 0, 0)) { + *order = fill_glue_order; + if (tex_scan_character("lL", 0, 0, 0)) { + *order = filll_glue_order; + } + } + } + return flexible_unit_scanned; + } + } + } + break; + default: + goto JUSTONE; + } + tex_back_input(cur_tok); + JUSTONE: + tex_back_input(saved_tok); + cur_cs = saved_cs; + cur_tok = saved_tok; + return no_unit_scanned; + } else { + tex_back_input(cur_tok); + return no_unit_scanned; + } +} +*/ + +halfword tex_scan_dimen(int mu, int inf, int shortcut, int optional_equal, halfword *order) +{ + int negative = 0; + int fraction = 0; + int num = 0; + int denom = 0; + scaled v; + int save_cur_val; + halfword cur_order = normal_glue_order; + lmt_scanner_state.arithmic_error = 0; + if (! shortcut) { + do { + while (1) { + tex_get_x_token(); + if (cur_cmd != spacer_cmd) { + if (optional_equal && (cur_tok == equal_token)) { + optional_equal = 0; + } else { + break; + } + } + } + if (cur_tok == minus_token) { + negative = ! negative; + cur_tok = plus_token; + } + } while (cur_tok == plus_token); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + cur_val = tex_aux_scan_something_internal(cur_cmd, cur_chr, mu ? mu_val_level : dimen_val_level, 0, 0); /* adapts cur_val_level */ + if (mu) { + cur_val = tex_aux_coerced_glue(cur_val, cur_val_level); + if (cur_val_level == mu_val_level) { + goto ATTACH_SIGN; + } else if (cur_val_level != int_val_level) { + tex_aux_mu_error(2); + } + } else if (cur_val_level == dimen_val_level) { + goto ATTACH_SIGN; + } + } else { + int has_fraction = tex_token_is_seperator(cur_tok); + if (has_fraction) { + cur_val = 0; + } else { + int cur_radix; + tex_back_input(cur_tok); + cur_val = tex_scan_int(0, &cur_radix); + if (cur_radix == 10 && tex_token_is_seperator(cur_tok)) { + has_fraction = 1; + tex_get_token(); + } + } + if (has_fraction) { + unsigned k = 0; + unsigned char digits[18]; + while (1) { + tex_get_x_token(); + if ((cur_tok > nine_token) || (cur_tok < zero_token)) { + break; + } else if (k < 17) { + digits[k] = (unsigned char) (cur_tok - zero_token); + ++k; + } + } + fraction = tex_round_decimals_digits(digits, k); + if (cur_cmd != spacer_cmd) { + tex_back_input(cur_tok); + } + } + } + } + if (cur_val < 0) { + negative = ! negative; + cur_val = -cur_val; + } + save_cur_val = cur_val; + /*tex + Actually we have cur_tok but it's already pushed back and we also need to skip spaces so + let's not overdo this. + */ + switch (tex_aux_scan_unit(&num, &denom, &v, &cur_order)) { + case no_unit_scanned: + /* error */ + if (lmt_error_state.intercept) { + lmt_error_state.last_intercept = 1; + } else { + tex_aux_scan_dimen_unknown_unit_error(); + } + goto ATTACH_FRACTION; + case normal_unit_scanned: + /* cm mm pt bp dd cc in dk */ + if (mu) { + tex_aux_scan_dimen_unknown_unit_error(); + } else if (num) { + int remainder = 0; + cur_val = tex_xn_over_d_r(cur_val, num, denom, &remainder); + fraction = (num * fraction + 0200000 * remainder) / denom; + cur_val += fraction / 0200000; + fraction = fraction % 0200000; + } + goto ATTACH_FRACTION; + case scaled_point_scanned: + /* sp */ + if (mu) { + tex_aux_scan_dimen_unknown_unit_error(); + } + goto DONE; + case relative_unit_scanned: + /* ex em px */ + if (mu) { + tex_aux_scan_dimen_unknown_unit_error(); + } + cur_val = tex_nx_plus_y(save_cur_val, v, tex_xn_over_d(v, fraction, 0200000)); + goto DONE; + case math_unit_scanned: + /* mu (slightly different but an error anyway */ + if (! mu) { + tex_aux_scan_dimen_mu_error(); + } + goto ATTACH_FRACTION; + case flexible_unit_scanned: + /* fi fil fill filll */ + if (mu) { + tex_aux_scan_dimen_unknown_unit_error(); + } else if (! inf) { + tex_aux_scan_dimen_fi_error(); + } + goto ATTACH_FRACTION; + case quantitity_unit_scanned: + /* internal quantity */ + cur_val = tex_aux_scan_something_internal(cur_cmd, cur_chr, mu ? mu_val_level : dimen_val_level, 0, 0); /* adapts cur_val_level */ + if (mu) { + cur_val = tex_aux_coerced_glue(cur_val, cur_val_level); + if (cur_val_level != mu_val_level) { + tex_aux_mu_error(3); + } + } + v = cur_val; + cur_val = tex_nx_plus_y(save_cur_val, v, tex_xn_over_d(v, fraction, 0200000)); + goto ATTACH_SIGN; + } + ATTACH_FRACTION: + if (cur_val >= 040000) { // 0x4000 + lmt_scanner_state.arithmic_error = 1; + } else { + cur_val = cur_val * unity + fraction; + } + DONE: + tex_get_x_token(); + tex_push_back(cur_tok, cur_cmd, cur_chr); + ATTACH_SIGN: + if (lmt_scanner_state.arithmic_error || (abs(cur_val) >= 010000000000)) { // 0x40000000 + if (lmt_error_state.intercept) { + lmt_error_state.last_intercept = 1 ; + } else { + tex_aux_scan_dimen_out_of_range_error(); + } + cur_val = max_dimen; + lmt_scanner_state.arithmic_error = 0; + } + if (negative) { + cur_val = -cur_val; + } + if (order) { + *order = cur_order; + } + return cur_val; +} + +/*tex + + The final member of \TEX's value-scanning trio is |scan_glue|, which makes |cur_val| point to + a glue specification. The reference count of that glue spec will take account of the fact that + |cur_val| is pointing to~it. The |level| parameter should be either |glue_val| or |mu_val|. + + Since |scan_dimen| was so much more complex than |scan_int|, we might expect |scan_glue| to be + even worse. But fortunately, it is very simple, since most of the work has already been done. + +*/ + +/* todo: get rid of cur_val */ + +halfword tex_scan_glue(int level, int optional_equal) +{ + /*tex should the answer be negated? */ + int negative = 0; + /*tex new glue specification */ + halfword q = null; + /*tex does |level=mu_val|? */ + int mu = level == mu_val_level; + /*tex Get the next non-blank non-sign. */ + do { + /*tex Get the next non-blank non-call token. */ + while (1) { + tex_get_x_token(); + if (cur_cmd != spacer_cmd) { + if (optional_equal && (cur_tok == equal_token)) { + optional_equal = 0; + } else { + break; + } + } + } + if (cur_tok == minus_token) { + negative = ! negative; + cur_tok = plus_token; + } + } while (cur_tok == plus_token); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + cur_val = tex_aux_scan_something_internal(cur_cmd, cur_chr, level, negative, 0); + if (cur_val_level >= glue_val_level) { + if (cur_val_level != level) { + tex_aux_mu_error(4); + } + return cur_val; + } + if (cur_val_level == int_val_level) { + cur_val = tex_scan_dimen(mu, 0, 1, 0, NULL); + } else if (level == mu_val_level) { + tex_aux_mu_error(5); + } + } else { + tex_back_input(cur_tok); + cur_val = tex_scan_dimen(mu, 0, 0, 0, NULL); + if (negative) { + cur_val = -cur_val; + } + } + /*tex + + Create a new glue specification whose width is |cur_val|; scan for its stretch and shrink + components. + + */ + q = tex_new_glue_spec_node(zero_glue); + glue_amount(q) = cur_val; + while (1) { + switch (tex_scan_character("pmPM", 0, 1, 0)) { + case 0: + return q; + case 'p': case 'P': + if (tex_scan_mandate_keyword("plus", 1)) { + halfword order; + glue_stretch(q) = tex_scan_dimen(mu, 1, 0, 0, &order); + glue_stretch_order(q) = order; + } + break; + case 'm': case 'M': + if (tex_scan_mandate_keyword("minus", 1)) { + halfword order; + glue_shrink(q) = tex_scan_dimen(mu, 1, 0, 0, &order); + glue_shrink_order(q) = order; + } + break; + default: + tex_aux_show_keyword_error("plus|minus"); + return q; + } + } +} + +/*tex + + This started as an experiment. A font object is just a container for a combination of id and + scales. It permits fast font switching (not that setting the font id and scales separately is + that slow) and has the benefit of a more sparse logging. We use nodes and not some array + because after all we always have symbolic names and we then get saving and restoring as well as + memory management for free. + + When an spec is given we make a copy but can overload the scales after that. Otherwise we just + create a new spec with default scales 1000. This fontspec object was introduced after we had + experimental compact font support in \CONTEXT\ for over a year working well. + +*/ + +halfword tex_scan_font(int optional_equal) +{ + halfword fv = null; + halfword id, fs; + if (optional_equal) { + tex_scan_optional_equals(); + } + id = tex_scan_font_identifier(&fv); + if (fv) { + fs = tex_copy_node(fv); + } else { + /*tex We create a new one and assign the mandate id. */ + fs = tex_new_node(font_spec_node, normal_code); + font_spec_identifier(fs) = id; + font_spec_scale(fs) = unused_scale_value; + font_spec_x_scale(fs) = unused_scale_value; + font_spec_y_scale(fs) = unused_scale_value; + } + while (1) { + switch (tex_scan_character("asxyASXY", 0, 1, 0)) { + case 0: + return fs; + case 'a': case 'A': + if (tex_scan_mandate_keyword("all", 1)) { + font_spec_scale(fs) = tex_scan_scale(0); + font_spec_x_scale(fs) = tex_scan_scale(0); + font_spec_y_scale(fs) = tex_scan_scale(0); + } + break; + case 's': case 'S': + if (tex_scan_mandate_keyword("scale", 1)) { + font_spec_scale(fs) = tex_scan_scale(0); + } + break; + case 'x': case 'X': + if (tex_scan_mandate_keyword("xscale", 1)) { + font_spec_x_scale(fs) = tex_scan_scale(0); + } + break; + case 'y': case 'Y': + if (tex_scan_mandate_keyword("yscale", 1)) { + font_spec_y_scale(fs) = tex_scan_scale(0); + } + break; + default: + return fs; + } + } +} + +/*tex + + This procedure is supposed to scan something like |\skip \count 12|, i.e., whatever can follow + |\the|, and it constructs a token list containing something like |-3.0pt minus 0.5 fill|. + + There is a bit duplicate code here but it makes a nicer switch as we also need to deal with + tokens and font identifiers. + +*/ + +# define push_selector { \ + saved_selector = lmt_print_state.selector; \ + lmt_print_state.selector = new_string_selector_code; \ +} + +# define pop_selector { \ + lmt_print_state.selector = saved_selector; \ +} + +halfword tex_the_value_toks(int code, halfword *tail, halfword property) /* maybe split this as already checked */ +{ + tex_get_x_token(); + cur_val = tex_aux_scan_something_internal(cur_cmd, cur_chr, tok_val_level, 0, property); + switch (cur_val_level) { + case int_val_level: + case attr_val_level: + { + int saved_selector; + push_selector; + tex_print_int(cur_val); + pop_selector; + return tex_cur_str_toks(tail); + } + case dimen_val_level: + { + int saved_selector; + push_selector; + tex_print_dimension(cur_val, code == the_without_unit_code ? no_unit : pt_unit); + pop_selector; + return tex_cur_str_toks(tail); + } + case glue_val_level: + case mu_val_level: + { + int saved_selector; + push_selector; + tex_print_spec(cur_val, (code != the_without_unit_code) ? (cur_val_level == glue_val_level ? pt_unit : mu_unit) : no_unit); + tex_flush_node(cur_val); + pop_selector; + return tex_cur_str_toks(tail); + } + case tok_val_level: + { + /*tex Copy the token list */ + halfword h = null; + halfword p = null; + if (cur_val) { + /*tex Do not copy the reference count! */ + halfword r = token_link(cur_val); + while (r) { + p = tex_store_new_token(p, token_info(r)); + if (! h) { + h = p; + } + r = token_link(r); + } + } + if (tail) { + *tail = p; + } + return h; + } + case font_val_level: + { + int saved_selector; + push_selector; + tex_print_font_identifier(cur_val); + pop_selector; + return tex_cur_str_toks(tail); + } + case mathspec_val_level: + { + /*tex So we don't mess with null font. */ + if (cur_val) { + int saved_selector; + push_selector; + tex_print_mathspec(cur_val); + pop_selector; + return tex_cur_str_toks(tail); + } else { + return null; + } + } + case fontspec_val_level: + { + /*tex So we don't mess with null font. */ + if (cur_val) { + int saved_selector; + push_selector; + tex_print_font_specifier(cur_val); + pop_selector; + return tex_cur_str_toks(tail); + } else { + return null; + } + } + case list_val_level: + { + if (cur_val) { + // halfword copy = tex_copy_node_list(cur_val, null); + halfword copy = tex_copy_node(cur_val); + tex_tail_append(copy); + cur_val = null; + } + break; + } + } + return null; +} + +halfword tex_the_detokenized_toks(halfword *tail) +{ + halfword head = tex_scan_general_text(tail); + int saved_selector; + push_selector; + tex_show_token_list(head, null, extreme_token_show_max, 0); + pop_selector; + tex_flush_token_list(head); + return tex_cur_str_toks(tail); +} + +/*tex + The |the_without_unit| variant implements |\thewithoutunit| is not really that impressive but + just there because it's cheap to implement and also avoids a kind of annoying macro definition, + one of the kind that demonstrates that one really understands \TEX. Now, with plenty of memory + and disk space the added code is probably not noticed and adds less bytes to the binary than a + macro does to the (and probably every) format file. +*/ + +halfword tex_the_toks(int code, halfword *tail) +{ + switch (code) { + case the_code: + case the_without_unit_code: + return tex_the_value_toks(code, tail, 0); + /* case the_with_property_code: */ + /* return tex_the_value_toks(code, tail, tex_scan_int(0, 0)); */ + case unexpanded_code: + return tex_scan_general_text(tail); + case detokenize_code: + return tex_the_detokenized_toks(tail); + default: + return null; + } +} + +strnumber tex_the_scanned_result(void) +{ + /*tex return value */ + strnumber r; + /*tex holds |selector| setting */ + int saved_selector; + push_selector; + switch (cur_val_level) { + case int_val_level: + tex_print_int(cur_val); + break; + case attr_val_level: + tex_print_int(cur_val); + break; + case dimen_val_level: + tex_print_dimension(cur_val, pt_unit); + break; + case glue_val_level: + tex_print_spec(cur_val, pt_unit); + tex_flush_node(cur_val); + break; + case mu_val_level: + tex_print_spec(cur_val, mu_unit); + tex_flush_node(cur_val); + break; + case tok_val_level: + if (cur_val) { + tex_token_show(cur_val, extreme_token_show_max); + break; + } else { + r = get_nullstr(); + goto DONE; + } + /* + case list_val_level: + printf("TODO\n"); + if (cur_val) { + cur_val = tex_copy_node(cur_val); + tex_couple_nodes(cur_list.tail, cur_val); + cur_list.tail = cur_val; + } + r = get_nullstr(); + goto DONE; + */ + default: + r = get_nullstr(); + goto DONE; + } + r = tex_make_string(); + DONE: + pop_selector; + return r; +} + +/*tex + + The following routine is used to implement |\fontdimen n f|. We no longer automatically increase + the number of allocated dimensions because we have plenty of dimensions available and loading is + done differently anyway. + +*/ + +static halfword tex_aux_scan_font_id_and_parameter(halfword *fnt, halfword *n) +{ + *n = tex_scan_int(0, NULL); + *fnt = tex_scan_font_identifier(NULL); + if (*n <= 0 || *n > max_integer) { + tex_handle_error( + normal_error_type, + "Font '%s' has at most %i fontdimen parameters", + font_original(*fnt), font_parameter_count(*fnt), + "The font parameter index is out of range." + ); + return 0; + } else { + return 1; + } +} + +void tex_set_font_dimen(void) +{ + halfword fnt, n; + if (tex_aux_scan_font_id_and_parameter(&fnt, &n)) { + tex_set_font_parameter(fnt, n, tex_scan_dimen(0, 0, 0, 1, NULL)); + } +} + +halfword tex_get_font_dimen(void) +{ + halfword fnt, n; + return tex_aux_scan_font_id_and_parameter(&fnt, &n) ? tex_get_font_parameter(fnt, n) : null; +} + +void tex_set_scaled_font_dimen(void) +{ + halfword fnt, n; + if (tex_aux_scan_font_id_and_parameter(&fnt, &n)) { + tex_set_scaled_parameter(fnt, n, tex_scan_dimen(0, 0, 0, 1, NULL)); + } +} + +halfword tex_get_scaled_font_dimen(void) +{ + halfword fnt, n; + return tex_aux_scan_font_id_and_parameter(&fnt, &n) ? tex_get_scaled_parameter(fnt, n) : null; +} + +/*tex Declare procedures that scan font-related stuff. */ + +halfword tex_scan_math_style_identifier(int tolerant, int styles) +{ + halfword style = tex_scan_int(0, NULL); + if (is_valid_math_style(style)) { + return style; + } else if (styles && are_valid_math_styles(style)) { + return style; + } else if (tolerant) { + return -1; + } else { + tex_handle_error( + back_error_type, + "Missing math style, treated as \\displaystyle", + "A style should have been here; I inserted '\\displaystyle'." + ); + return display_style; + } +} + +halfword tex_scan_math_parameter(void) +{ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd == set_math_parameter_cmd && cur_chr < math_parameter_last) { + return cur_chr; + } else { + tex_handle_error( + normal_error_type, + "Invalid math parameter", + "I'm going to ignore this one." + ); + return -1; + } +} + +halfword tex_scan_fontspec_identifier(void) +{ + /*tex Get the next non-blank non-call. */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd == fontspec_cmd) { + return cur_chr; + } else { + return 0; + } +} + +halfword tex_scan_font_identifier(halfword *spec) +{ + /*tex Get the next non-blank non-call. */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + switch (cur_cmd) { + case define_font_cmd: + return cur_font_par; + case set_font_cmd: + /* set_font_touched(cur_chr, 1); */ + return cur_chr; + case fontspec_cmd: + { + halfword fnt = tex_get_font_identifier(cur_chr); + if (fnt && spec) { + *spec = fnt ? cur_chr : null; + } + return fnt; + } + case define_family_cmd: + { + halfword siz = cur_chr; + halfword fam = tex_scan_math_family_number(); + halfword fnt = tex_fam_fnt(fam, siz); + /* set_font_touched(fnt, 1); */ + return fnt; + } + case register_int_cmd: + case integer_cmd: + { + /*tex Checking here saves a push back when we want an integer. */ + halfword fnt = eq_value(cur_chr); + if (tex_is_valid_font(fnt)) { + return fnt; + } else { + goto BAD; + } + } + case internal_int_cmd: + { + /*tex Bonus: |\setfontid| */ + if (internal_int_number(cur_chr) == font_code) { + halfword fnt = tex_scan_int(0, NULL); + if (tex_is_valid_font(fnt)) { + return fnt; + } + } + goto BAD; + } + default: + { + /*tex We abuse |scan_cardinal| here btu we have to push back. */ + unsigned fnt = null_font; + tex_back_input(cur_tok); + if (tex_scan_cardinal(&fnt, 1)) { + if (tex_is_valid_font((halfword) fnt)) { + return (halfword) fnt; + } + } else { + /*tex Fall through to a font error message. */ + } + BAD: + tex_handle_error( + back_error_type, + "Missing or invalid font identifier (or equivalent) or integer (register or otherwise)", + "I was looking for a control sequence whose current meaning has been defined by\n" + "\\font or a valid font id number." + ); + return null_font; + } + } +} + +/*tex + + The |scan_general_text| procedure is much like |scan_toks (false, false)|, but will be invoked + via |expand|, i.e., recursively. + + The token list (balanced text) created by |scan_general_text| begins at |link (temp_token_head)| + and ends at |cur_val|. (If |cur_val = temp_token_head|, the list is empty.) + +*/ + +halfword tex_scan_general_text(halfword *tail) +{ + /*tex The tail of the token list being built: */ + halfword p = get_reference_token(); + halfword head; + /*tex The number of nested left braces: */ + halfword unbalance = 0; + halfword saved_scanner_status = lmt_input_state.scanner_status; + halfword saved_warning_index = lmt_input_state.warning_index; + halfword saved_def_ref = lmt_input_state.def_ref; + lmt_input_state.scanner_status = scanner_is_absorbing; + lmt_input_state.warning_index = cur_cs; + lmt_input_state.def_ref = p; + /*tex Remove the compulsory left brace. */ + tex_scan_left_brace(); + while (1) { + tex_get_token(); + if (cur_tok < right_brace_limit) { + // if (cur_cmd < right_brace_cmd) { + if (cur_cmd == left_brace_cmd || cur_cmd == relax_cmd) { + ++unbalance; + } else if (unbalance) { + --unbalance; + } else { + break; + } + } + p = tex_store_new_token(p, cur_tok); + } + head = token_link(lmt_input_state.def_ref); + if (tail) { + *tail = head ? p : null; + } + /*tex Discard reference count. */ + tex_put_available_token(lmt_input_state.def_ref); + lmt_input_state.scanner_status = saved_scanner_status; + lmt_input_state.warning_index = saved_warning_index; + lmt_input_state.def_ref = saved_def_ref; + return head; +} + +/*tex + + The |get_x_or_protected| procedure is like |get_x_token| except that protected macros are not + expanded. It sets |cur_cmd|, |cur_chr|, |cur_tok|, and expands non-protected macros. + +*/ + +void tex_get_x_or_protected(void) +{ + while (1) { + tex_get_token(); + if (cur_cmd <= max_command_cmd || is_protected_cmd(cur_cmd)) { + return; + } else { + tex_expand_current_token(); + } + } +} + +/*tex + + |scan_toks|. This function returns a pointer to the tail of a new token list, and it also makes + |def_ref| point to the reference count at the head of that list. + + There are two boolean parameters, |macro_def| and |xpand|. If |macro_def| is true, the goal is + to create the token list for a macro definition; otherwise the goal is to create the token list + for some other \TEX\ primitive: |\mark|, |\output|, |\everypar|, |\lowercase|, |\uppercase|, + |\message|, |\errmessage|, |\write|, or |\special|. In the latter cases a left brace must be + scanned next; this left brace will not be part of the token list, nor will the matching right + brace that comes at the end. If |xpand| is false, the token list will simply be copied from the + input using |get_token|. Otherwise all expandable tokens will be expanded until unexpandable + tokens are left, except that the results of expanding |\the| are not expanded further. If both + |macro_def| and |xpand| are true, the expansion applies only to the macro body (i.e., to the + material following the first |left_brace| character). + + The value of |cur_cs| when |scan_toks| begins should be the |eqtb| address of the control + sequence to display in runaway error messages. + + Watch out: there are two extensions to the macro definition parser: a |#0| will just gobble the + argument and not copy it to the parameter stack, and |#+| will not remove braces around a + \quote {single group} argument, something that comes in handy when you grab and pass over an + argument. + + If the next character is a parameter number, make |cur_tok| a |match| token; but if it is a + left brace, store |left_brace|, |end_match|, set |hash_brace|, and |goto done|. + + For practical reasone, we have split the |scan_toks| function up in four smaller dedicated + functions. When we add features it makes no sense to clutter the code even more. Keep in mind + that compared to the reference \TEX\ inplementation we have to support |\expanded| token lists + but also |\protected| and friends. There is of course some overlap now but that's a small + price to pay for readability. + + The split functions need less redundant checking and the expandable variants got one loop + instead of two nested loops. + +*/ + +halfword tex_scan_toks_normal(int left_brace_found, halfword *tail) +{ + halfword unbalance = 0; + halfword result = get_reference_token(); + halfword p = result; + lmt_input_state.scanner_status = scanner_is_absorbing; + lmt_input_state.warning_index = cur_cs; + lmt_input_state.def_ref = result; + if (! left_brace_found) { + tex_scan_left_brace(); + } + while (1) { + tex_get_token(); + if (cur_tok < right_brace_limit) { + if (cur_cmd == left_brace_cmd) { + ++unbalance; + } else if (unbalance) { + --unbalance; + } else { + break; + } + } else if (cur_cmd == prefix_cmd && cur_chr == enforced_code && (! overload_mode_par || lmt_main_state.run_state != production_state)) { /* todo cur_tok == let_aliased_token */ + cur_tok = token_val(prefix_cmd, always_code); + } + p = tex_store_new_token(p, cur_tok); + } + lmt_input_state.scanner_status = scanner_is_normal; + if (tail) { + *tail = p; + } + return result; +} + +halfword tex_scan_toks_expand(int left_brace_found, halfword *tail, int expandconstant) +{ + halfword unbalance = 0; + halfword result = get_reference_token(); + halfword p = result; + lmt_input_state.scanner_status = scanner_is_absorbing; + lmt_input_state.warning_index = cur_cs; + lmt_input_state.def_ref = result; + if (! left_brace_found) { + tex_scan_left_brace(); + } + while (1) { + PICKUP: + tex_get_next(); + switch (cur_cmd) { + case call_cmd: + case tolerant_call_cmd: + tex_expand_current_token(); + goto PICKUP; + case protected_call_cmd: + case tolerant_protected_call_cmd: + cur_tok = cs_token_flag + cur_cs; + goto APPENDTOKEN; + case semi_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + if (expandconstant) { + tex_expand_current_token(); + goto PICKUP; + } else { + cur_tok = cs_token_flag + cur_cs; + goto APPENDTOKEN; + } + case the_cmd: + { + halfword t = null; + halfword h = tex_the_toks(cur_chr, &t); + if (h) { + set_token_link(p, h); + p = t; + } + goto PICKUP; + } + case prefix_cmd: + if (cur_chr == enforced_code && (! overload_mode_par || lmt_main_state.run_state != production_state)) { + cur_tok = token_val(prefix_cmd, always_code); + goto APPENDTOKEN; + } + default: + if (cur_cmd > max_command_cmd) { + tex_expand_current_token(); + goto PICKUP; + } else { + goto DONEEXPANDING; + } + } + DONEEXPANDING: + tex_x_token(); + if (cur_tok < right_brace_limit) { + if (cur_cmd == left_brace_cmd) { + ++unbalance; + } else if (unbalance) { + --unbalance; + } else { + goto FINALYDONE; + } + } + APPENDTOKEN: + p = tex_store_new_token(p, cur_tok); + } + FINALYDONE: + lmt_input_state.scanner_status = scanner_is_normal; + if (tail) { + *tail = p; + } + return result; +} + +static void tex_aux_too_many_parameters_error(void) +{ + tex_handle_error( + normal_error_type, + "You already have nine parameters", + "I'm going to ignore the # sign you just used, as well the token that followed it.\n" + /*tex That last bit was added in the TeX 2021 buglet fix round. */ + ); +} + +static void tex_aux_parameters_order_error(void) +{ + tex_handle_error( + back_error_type, + "Parameters must be numbered consecutively", + "I've inserted the digit you should have used after the #." + ); +} + +static void tex_aux_missing_brace_error(void) +{ + tex_handle_error( + normal_error_type, + "Missing { inserted", + "Where was the left brace? You said something like '\\def\\a}', which I'm going to\n" + "interpret as '\\def\\a{}'." + ); +} + +static void tex_aux_illegal_parameter_in_body_error(void) +{ + tex_handle_error( + back_error_type, + "Illegal parameter number in definition of %S", + lmt_input_state.warning_index, + "You meant to type ## instead of #, right? Or maybe a } was forgotten somewhere\n" + "earlier, and things are all screwed up? I'm going to assume that you meant ##." + ); +} + +/*tex + There are interesting aspects in reporting the preamble, like: + + \starttyping + \def\test#1#{test#1} : macro:#1{->test#1{ + \stoptyping + + So, the \type {#} gets reported as left brace. + + The |\par| handling depends on the mode + + \starttyping + % 0x1 text | 0x2 macro | 0x4 go-on + + \autoparagraphmode0 \def\foo#1\par{[#1]} 0: \meaningfull\foo\par \foo test\par test\par + \autoparagraphmode1 \def\foo#1\par{[#1]} 1: \meaningfull\foo\par \foo test\par test\par + \autoparagraphmode2 \def\foo#1\par{[#1]} 2: \meaningfull\foo\par \foo test\par test\par % discard after #1 till \par + \autoparagraphmode4 \def\foo#1\par{[#1]} 4: \meaningfull\foo\par \foo test\par test\par + \stoptyping +*/ + +inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfword *hash_brace) +{ + halfword h = *p; + while (1) { + tex_get_token(); + if (cur_tok < right_brace_limit) { + break; + } else if (cur_cmd == parameter_cmd) { + tex_get_token(); + /* + cf. TeX 2021 we not do a more strict testing. Interesting is that wondered why we + had a more generous test here but just considered that a feature or intended side + effect but in the end we have to be strict. + + \starttyping + \def\cs#1#\bgroup hi#1} % was weird but okay pre 2021 + \def\cs#1\bgroup{hi#1\bgroup} % but this is better indeed + \stoptyping + */ + if (cur_tok < left_brace_limit) { + /* if (cur_cmd == left_brace_cmd) { */ + /*tex The |\def\foo#{}| case. */ + *hash_brace = cur_tok; + *p = tex_store_new_token(*p, cur_tok); + *p = tex_store_new_token(*p, end_match_token); + set_token_parameters(h, *counter - zero_token + 1); + return 1; + } else if (*counter == nine_token) { + tex_aux_too_many_parameters_error(); + } else { + switch (cur_tok) { + case zero_token: + ++*counter; + cur_tok = match_token; + break; + case asterisk_token: + cur_tok = spacer_match_token; + break; + case plus_token: + ++*counter; + cur_tok = keep_match_token; + break; + case minus_token: + cur_tok = thrash_match_token; + break; + case period_token: + cur_tok = par_spacer_match_token; + break; + case comma_token: + cur_tok = keep_spacer_match_token; + break; + case slash_token: + ++*counter; + cur_tok = prune_match_token; + break; + case colon_token: + cur_tok = continue_match_token; + break; + case semi_colon_token: + cur_tok = quit_match_token; + break; + case equal_token: + ++*counter; + cur_tok = mandate_match_token; + break; + case circumflex_token_l: + case circumflex_token_o: + ++*counter; + cur_tok = leading_match_token; + break; + case underscore_token_l: + case underscore_token_o: + ++*counter; + cur_tok = mandate_keep_match_token; + break; + case at_token_l: + case at_token_o: + cur_tok = par_command_match_token; + break; + default: + ++*counter; + if (cur_tok != *counter) { + tex_aux_parameters_order_error(); + } + cur_tok += match_token - other_token; + break; + } + } + } else if (cur_cmd == end_paragraph_cmd && auto_paragraph_mode(auto_paragraph_macro)) { + cur_tok = par_command_match_token; + } + *p = tex_store_new_token(*p, cur_tok); + } + if (h != *p) { + *p = tex_store_new_token(*p, end_match_token); + set_token_parameters(h, *counter - zero_token + 1); + } + if (cur_cmd == right_brace_cmd) { + ++lmt_input_state.align_state; + tex_aux_missing_brace_error(); + return 0; + } else { + return 1; + } +} + +halfword tex_scan_macro_normal(void) +{ + halfword hash_brace = 0; + halfword counter = zero_token; + halfword result = get_reference_token(); + halfword p = result; + lmt_input_state.scanner_status = scanner_is_defining; + lmt_input_state.warning_index = cur_cs; + lmt_input_state.def_ref = result; + if (tex_aux_valid_macro_preamble(&p, &counter, &hash_brace)) { + halfword unbalance = 0; + while (1) { + tex_get_token(); + if (cur_tok < right_brace_limit) { + /*tex Maybe use |cur_cmd < left_brace_limit| for consistency. */ + if (cur_cmd == left_brace_cmd) { + ++unbalance; + } else if (unbalance) { + --unbalance; + } else { + goto FINALYDONE; + } + } else if (cur_cmd == parameter_cmd) { + halfword s = cur_tok; + tex_get_token(); + if (cur_cmd == parameter_cmd) { + /*tex Keep the |#|. */ + } else if (cur_tok <= zero_token || cur_tok > counter) { + tex_aux_illegal_parameter_in_body_error(); + cur_tok = s; + } else { + cur_tok = token_val(parameter_reference_cmd, cur_chr - '0'); + } + } else if (cur_cmd == prefix_cmd && cur_chr == enforced_code && (! overload_mode_par || lmt_main_state.run_state != production_state)) { /* todo cur_tok == let_aliased_token */ + cur_tok = token_val(prefix_cmd, always_code); + } + p = tex_store_new_token(p, cur_tok); + } + } + FINALYDONE: + lmt_input_state.scanner_status = scanner_is_normal; + if (hash_brace) { + p = tex_store_new_token(p, hash_brace); + } + return result; +} + +# define optimize_grouping 0 + +halfword tex_scan_macro_expand(void) +{ + halfword hash_brace = 0; + halfword counter = zero_token; + halfword result = get_reference_token(); + halfword p = result; + lmt_input_state.scanner_status = scanner_is_defining; + lmt_input_state.warning_index = cur_cs; + lmt_input_state.def_ref = result; + if (tex_aux_valid_macro_preamble(&p, &counter, &hash_brace)) { + halfword unbalance = 0; + while (1) { + PICKUP: + tex_get_next(); + switch (cur_cmd) { + case call_cmd: + case tolerant_call_cmd: + tex_expand_current_token(); + goto PICKUP; + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + cur_tok = cs_token_flag + cur_cs; + goto APPENDTOKEN; + case the_cmd: + { + halfword t = null; + halfword h = tex_the_toks(cur_chr, &t); + if (h) { + set_token_link(p, h); + p = t; + } + goto PICKUP; + } + case relax_cmd: + if (cur_chr == no_relax_code) { + /*tex Think of |\ifdim\dimen0=\dimen2\norelax| inside an |\edef|. */ + goto PICKUP; + } else { + goto DONEEXPANDING; + } + case prefix_cmd: + if (cur_chr == enforced_code && (! overload_mode_par || lmt_main_state.run_state != production_state)) { + cur_tok = token_val(prefix_cmd, always_code); + goto APPENDTOKEN; + } else { + goto DONEEXPANDING; + } + case parameter_cmd: + { + /* move into switch ... */ + halfword s = cur_tok; + tex_get_x_token(); + if (cur_cmd == parameter_cmd) { + /*tex Keep the |#|. */ + } else if (cur_tok <= zero_token || cur_tok > counter) { + tex_aux_illegal_parameter_in_body_error(); + cur_tok = s; + } else { + cur_tok = token_val(parameter_reference_cmd, cur_chr - '0'); + } + goto APPENDTOKEN; + } +# if (optimize_grouping) + case left_brace_cmd: + if (cur_cs) { + cur_tok = cs_token_flag + cur_cs; + } else { + cur_tok = token_val(cur_cmd, cur_chr); + ++unbalance; + } + goto APPENDTOKEN; + case right_brace_cmd: + if (cur_cs) { + cur_tok = cs_token_flag + cur_cs; + goto APPENDTOKEN; + } else { + cur_tok = token_val(cur_cmd, cur_chr); + if (unbalance) { + --unbalance; + goto APPENDTOKEN; + } else { + goto FINALYDONE; + } + } +# endif + default: + if (cur_cmd > max_command_cmd) { + tex_expand_current_token(); + goto PICKUP; + } else { + goto DONEEXPANDING; + } + } + DONEEXPANDING: + /* tex_x_token(); */ + if (cur_cs) { + cur_tok = cs_token_flag + cur_cs; + } else { + cur_tok = token_val(cur_cmd, cur_chr); + } + /* */ +# if (! optimize_grouping) + if (cur_tok < right_brace_limit) { + if (cur_cmd == left_brace_cmd) { + ++unbalance; + } else if (unbalance) { + --unbalance; + } else { + goto FINALYDONE; + } + } +# endif + APPENDTOKEN: + p = tex_store_new_token(p, cur_tok); + } + } + FINALYDONE: + lmt_input_state.scanner_status = scanner_is_normal; + if (hash_brace) { + p = tex_store_new_token(p, hash_brace); + } + return result; +} + +/*tex + + The |scan_expr| procedure scans and evaluates an expression. Evaluating an expression is a + recursive process: When the left parenthesis of a subexpression is scanned we descend to the + next level of recursion; the previous level is resumed with the matching right parenthesis. + +*/ + +typedef enum expression_states { + expression_none, /*tex |(| or |(expr)| */ + expression_add, /*tex |+| */ + expression_subtract, /*tex |-| */ + expression_multiply, /*tex |*| */ + expression_divide, /*tex |/| */ + expression_scale, /*tex |* factor| */ + expression_idivide, /*tex |:|, is like |/| but floored */ +} expression_states; + +/*tex + + We want to make sure that each term and (intermediate) result is in the proper range. Integer + values must not exceed |infinity| ($2^{31} - 1$) in absolute value, dimensions must not exceed + |max_dimen| ($2^{30} - 1$). We avoid the absolute value of an integer, because this might fail + for the value $-2^{31}$ using 32-bit arithmetic. + + Todo: maybe use |long long| here. + +*/ + +inline static void tex_aux_normalize_glue(halfword g) +{ + if (! glue_stretch(g)) { + glue_stretch_order(g) = normal_glue_order; + } + if (! glue_shrink(g)) { + glue_shrink_order(g) = normal_glue_order; + } +} + +/*tex + + Parenthesized subexpressions can be inside expressions, and this nesting has a stack. Seven + local variables represent the top of the expression stack: |p| points to pushed-down entries, + if any; |l| specifies the type of expression currently beeing evaluated; |e| is the expression + so far and |r| is the state of its evaluation; |t| is the term so far and |s| is the state of + its evaluation; finally |n| is the numerator for a combined multiplication and division, if any. + + The function |add_or_sub (x, y, max_answer, negative)| computes the sum (for |negative = false|) + or difference (for |negative = true|) of |x| and |y|, provided the absolute value of the result + does not exceed |max_answer|. + +*/ + +inline static int tex_aux_add_or_sub(int x, int y, int max_answer, int operation) +{ + switch (operation) { + case expression_subtract: + y = -y; + // fall-trough + case expression_add: + if (x >= 0) { + if (y <= max_answer - x) { + return x + y; + } else { + lmt_scanner_state.arithmic_error = 1; + } + } else if (y >= -max_answer - x) { + return x + y; + } else { + lmt_scanner_state.arithmic_error = 1; + } + break; + } + return 0; +} + +/*tex + + The function |quotient (n, d)| computes the rounded quotient $q = \lfloor n / d + {1 \over 2} + \rfloor$, when $n$ and $d$ are positive. + +*/ + +inline static int tex_aux_quotient(int n, int d, int round) +{ + /*tex The answer: */ + if (d == 0) { + lmt_scanner_state.arithmic_error = 1; + return 0; + } else { + /*tex Should the answer be negated? */ + int negative; + int a; + if (d > 0) { + negative = 0; + } else { + d = -d; + negative = 1; + } + if (n < 0) { + n = -n; + negative = ! negative; + } + a = n / d; + if (round) { + n = n - a * d; + /*tex Avoid certain compiler optimizations! Really? */ + d = n - d; + if (d + n >= 0) { + ++a; + } + } + if (negative) { + a = -a; + } + return a; + } +} + +/*tex + + Finally, the function |fract (x, n, d, max_answer)| computes the integer $q = \lfloor x n / d + + {1 \over 2} \rfloor$, when $x$, $n$, and $d$ are positive and the result does not exceed + |max_answer|. We can't use floating point arithmetic since the routine must produce identical + results in all cases; and it would be too dangerous to multiply by~|n| and then divide by~|d|, + in separate operations, since overflow might well occur. Hence this subroutine simulates double + precision arithmetic, somewhat analogous to Metafont's |make_fraction| and |take_fraction| + routines. + +*/ + +int tex_fract(int x, int n, int d, int max_answer) +{ + /*tex should the answer be negated? */ + int negative = 0; + /*tex the answer */ + int a = 0; + /*tex a proper fraction */ + int f; + /*tex smallest integer such that |2*h>=d| */ + int h; + /*tex intermediate remainder */ + int r; + /*tex temp variable */ + int t; + if (d == 0) { + goto TOO_BIG; + } + if (x == 0) { + return 0; + } + if (d < 0) { + d = -d; + negative = 1; + } + if (x < 0) { + x = -x; + negative = ! negative; + } + if (n < 0) { + n = -n; + negative = ! negative; + } + t = n / d; + if (t > max_answer / x) { + goto TOO_BIG; + } + a = t * x; + n = n - t * d; + if (n == 0) { + goto FOUND; + } + t = x / d; + if (t > (max_answer - a) / n) { + goto TOO_BIG; + } + a = a + t * n; + x = x - t * d; + if (x == 0) { + goto FOUND; + } + if (x < n) { + t = x; + x = n; + n = t; + } + /*tex + + Now |0 < n <= x < d| and we compute $f = \lfloor xn/d+{1\over2}\rfloor$. The loop here + preserves the following invariant relations between |f|, |x|, |n|, and~|r|: (i)~$f + \lfloor + (xn + (r + d))/d\rfloor = \lfloor x_0 n_0/d + {1\over2} \rfloor$; (ii)~|-d <= r < 0 < n <= x + < d|, where $x_0$, $n_0$ are the original values of~$x$ and $n$. + + Notice that the computation specifies |(x - d) + x| instead of |(x + x) - d|, because the + latter could overflow. + + */ + f = 0; + r = (d / 2) - d; + h = -r; + while (1) { + if (odd(n)) { + r = r + x; + if (r >= 0) { + r = r - d; + ++f; + } + } + n = n / 2; + if (n == 0) { + break; + } else if (x < h) { + x = x + x; + } else { + t = x - d; + x = t + x; + f = f + n; + if (x < n) { + if (x == 0) { + break; + } else { + t = x; + x = n; + n = t; + } + } + } + } + if (f > (max_answer - a)) { + goto TOO_BIG; + } + a = a + f; + FOUND: + if (negative) { + a = -a; + } + goto DONE; + TOO_BIG: + lmt_scanner_state.arithmic_error = 1; + a = 0; + DONE: + return a; +} + +/*tex + + The main stacking logic approach is kept but I get the impression that the code is still + suboptimal. + +*/ + +static void tex_aux_scan_expr(halfword level) +{ + /*tex state of expression so far */ + int result; + /*tex state of term so far */ + int state; + /*tex next operation or type of next factor */ + int operation; + /*tex expression so far */ + int expression; + /*tex term so far */ + int term; + /*tex current factor */ + int factor = 0; + /*tex numerator of combined multiplication and division */ + int numerator; + /*tex saved values of |arith_error| */ + int error_a = lmt_scanner_state.arithmic_error; + int error_b = 0; + /*tex top of expression stack */ + halfword top = null; + /*tex Scan and evaluate an expression |e| of type |l|. */ + cur_val_level = level; /* for now */ + lmt_scanner_state.expression_depth++; + if (lmt_scanner_state.expression_depth > 1000) { + tex_fatal_error("\\*expr can only be nested 1000 deep"); + } + RESTART: + result = expression_none; + state = expression_none; + expression = 0; + term = 0; + numerator = 0; + CONTINUE: + operation = state == expression_none ? level : int_val_level; /* we abuse operation */ + /*tex + + Scan a factor |f| of type |o| or start a subexpression. Get the next non-blank non-call + token. + + */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_tok == left_parent_token) { + /*tex Push the expression stack and |goto restart|. */ + halfword t = tex_get_node(expression_node_size); + node_type(t) = expression_node; + node_subtype(t) = 0; + /* */ + node_next(t) = top; + expression_type(t) = (quarterword) level; + expression_state(t) = (singleword) state; + expression_result(t) = (singleword) result; + expression_expression(t) = expression; + expression_term(t) = term; + expression_numerator(t) = numerator; + top = t; + level = operation; + goto RESTART; + } + if (cur_cmd != spacer_cmd) { + tex_back_input(cur_tok); + } + switch (operation) { + case int_val_level: + case attr_val_level: + factor = tex_scan_int(0, NULL); + break; + case dimen_val_level: + factor = tex_scan_dimen(0, 0, 0, 0, NULL); + break; + case glue_val_level: + factor = tex_scan_glue(glue_val_level, 0); + break; + case mu_val_level: + factor = tex_scan_glue(mu_val_level, 0); + break; + } + FOUND: + /*tex + Scan the next operator and set |o| and get the next non-blank non-call token. + */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + switch (cur_tok) { + case plus_token: + operation = expression_add; + break; + case minus_token: + operation = expression_subtract; + break; + case asterisk_token: + operation = expression_multiply; + break; + case slash_token: + operation = expression_divide; + break; + case colon_token: + operation = expression_idivide; + break; + /*tex + The commented bitwise experiment as of 2020-07-20 has been removed and is now in + |\scanbitexpr|. You can find it in the archive. + */ + default: + operation = expression_none; + if (! top) { + if (cur_cmd != relax_cmd) { + tex_back_input(cur_tok); + } + } else if (cur_tok != right_parent_token) { + tex_handle_error( + back_error_type, + "Missing ) inserted for expression", + "I was expecting to see '+', '-', '*', '/', ':' or ')'. Didn't." + ); + } + break; + } + lmt_scanner_state.arithmic_error = error_b; + /*tex Make sure that |f| is in the proper range. */ + switch (level) { + case int_val_level: + case attr_val_level: + if ((factor > infinity) || (factor < -infinity)) { + lmt_scanner_state.arithmic_error = 1; + factor = 0; + } + break; + case dimen_val_level: + if (abs(factor) > max_dimen) { + lmt_scanner_state.arithmic_error = 1; + factor = 0; + } + break; + case glue_val_level: + case mu_val_level: + if ((abs(glue_amount(factor)) > max_dimen) || (abs(glue_stretch(factor)) > max_dimen) || (abs(glue_shrink(factor)) > max_dimen)) { + lmt_scanner_state.arithmic_error = 1; + tex_reset_glue_to_zero(factor); + } + break; + default: + if ((state > expression_subtract) && ((factor > infinity) || (factor < -infinity))) { + lmt_scanner_state.arithmic_error = 1; + factor = 0; + } + } + /*tex Cases for evaluation of the current term. */ + switch (state) { + case expression_none: + /*tex + Applying the factor |f| to the partial term |t| (with the operator |s|) is delayed + until the next operator |o| has been scanned. Here we handle the first factor of a + partial term. A glue spec has to be copied unless the next operator is a right + parenthesis; this allows us later on to simply modify the glue components. + */ + term = factor; + if ((level >= glue_val_level) && (operation != expression_none)) { + /*tex Do we really need to copy here? */ + tex_aux_normalize_glue(term); + } else { + term = factor; + } + break; + case expression_multiply: + /*tex + If a multiplication is followed by a division, the two operations are combined into + a 'scaling' operation. Otherwise the term |t| is multiplied by the factor |f|. + */ + if (operation == expression_divide) { + numerator = factor; + operation = expression_scale; + } else { + switch (level) { + case int_val_level: + case attr_val_level: + term = tex_multiply_integers(term, factor); + break; + case dimen_val_level: + term = tex_nx_plus_y(term, factor, 0); + break; + default: + glue_amount(term) = tex_nx_plus_y(glue_amount(term), factor, 0); + glue_stretch(term) = tex_nx_plus_y(glue_stretch(term), factor, 0); + glue_shrink(term) = tex_nx_plus_y(glue_shrink(term), factor, 0); + break; + } + } + break; + case expression_divide: + /*tex Here we divide the term |t| by the factor |f|. */ + if (level < glue_val_level) { + term = tex_aux_quotient(term, factor, 1); + } else { + glue_amount(term) = tex_aux_quotient(glue_amount(term), factor, 1); + glue_stretch(term) = tex_aux_quotient(glue_stretch(term), factor, 1); + glue_shrink(term) = tex_aux_quotient(glue_shrink(term), factor, 1); + } + break; + case expression_scale: + /*tex Here the term |t| is multiplied by the quotient $n/f$. */ + switch (level) { + case int_val_level: + case attr_val_level: + term = tex_fract(term, numerator, factor, infinity); + break; + case dimen_val_level: + term = tex_fract(term, numerator, factor, max_dimen); + break; + default: + glue_amount(term) = tex_fract(glue_amount(term), numerator, factor, max_dimen); + glue_stretch(term) = tex_fract(glue_stretch(term), numerator, factor, max_dimen); + glue_shrink(term) = tex_fract(glue_shrink(term), numerator, factor, max_dimen); + break; + } + break; + case expression_idivide: + /*tex Here we divide the term |t| by the factor |f| but we don't round. */ + if (level < glue_val_level) { + term = tex_aux_quotient(term, factor, 0); + } else { + glue_amount(term) = tex_aux_quotient(glue_amount(term), factor, 0); + glue_stretch(term) = tex_aux_quotient(glue_stretch(term), factor, 0); + glue_shrink(term) = tex_aux_quotient(glue_shrink(term), factor, 0); + } + break; + } + if (operation > expression_subtract) { + state = operation; + } else { + /*tex + Evaluate the current expression. When a term |t| has been completed it is copied to, + added to, or subtracted from the expression |e|. + */ + state = expression_none; + if (result == expression_none) { + expression = term; + } else { + switch (level) { + case int_val_level: + case attr_val_level: + expression = tex_aux_add_or_sub(expression, term, infinity, result); + break; + case dimen_val_level: + expression = tex_aux_add_or_sub(expression, term, max_dimen, result); + break; + default : + /*tex + Compute the sum or difference of two glue specs. We know that |stretch_order + (e) > normal| implies |stretch (e) <> 0| and |shrink_order (e) > normal| + implies |shrink (e) <> 0|. + */ + glue_amount(expression) = tex_aux_add_or_sub(glue_amount(expression), glue_amount(term), max_dimen, result); + if (glue_stretch_order(expression) == glue_stretch_order(term)) { + glue_stretch(expression) = tex_aux_add_or_sub(glue_stretch(expression), glue_stretch(term), max_dimen, result); + } else if ((glue_stretch_order(expression) < glue_stretch_order(term)) && (glue_stretch(term) != 0)) { + glue_stretch(expression) = glue_stretch(term); + glue_stretch_order(expression) = glue_stretch_order(term); + } + if (glue_shrink_order(expression) == glue_shrink_order(term)) { + glue_shrink(expression) = tex_aux_add_or_sub(glue_shrink(expression), glue_shrink(term), max_dimen, result); + } else if ((glue_shrink_order(expression) < glue_shrink_order(term)) && (glue_shrink(term) != 0)) { + glue_shrink(expression) = glue_shrink(term); + glue_shrink_order(expression) = glue_shrink_order(term); + } + tex_flush_node(term); + tex_aux_normalize_glue(expression); + break; + } + } + result = operation; + } + error_b = lmt_scanner_state.arithmic_error; + if (operation != expression_none) { + goto CONTINUE; + } else if (top) { + /*tex Pop the expression stack and |goto found|. */ + halfword t = top; + top = node_next(top); + factor = expression; + expression = expression_expression(t); + term = expression_term(t); + numerator = expression_numerator(t); + state = expression_state(t); + result = expression_result(t); + level = expression_type(t); + tex_free_node(t, expression_node_size); + goto FOUND; + } else if (error_b) { + tex_handle_error( + normal_error_type, + "Arithmetic overflow", + "I can't evaluate this expression, since the result is out of range." + ); + if (level >= glue_val_level) { + tex_reset_glue_to_zero(expression); + } else { + expression = 0; + } + } + lmt_scanner_state.arithmic_error = error_a; + lmt_scanner_state.expression_depth--; + cur_val_level = level; + cur_val = expression; +} + +/*tex + + Already early in \LUAMETATEX\ I wondered about adding suypport for boolean expressions but at + that time (2019) I still wanted it as part of \type |\numexpr|. I added some code that actually + worked okay, but kept it commented. After all, we don't need it that often and \CONTEXT\ has + helpers for it so it's best to avoid the extra overhead in other expressions. + + However, occasionally, when I check the manual I came back to this. I wondered about some more + that just extra bitwise operators. However, prcedence makes it a bit tricky. Also, we can't use + some characters because they can be letter, other, active or have special meaning in math or + alignments. Then I played with verbose operators: mod (instead of a percent sign), and + |and|, |or|, |band|, |bor| and |bxor| (cf the \LUA\ bit32 library). + + In the end I decided not to integrate it but make a dedicated |\bitexpr| instead. I played with + some variants but the approach in the normal expression scanned is not really suitable for it. + + In the end, after some variations, I decided that some reverse polish notation approach made + more sense and when considering an infix to rpn translation and searching the web a bit I ran + into nice example: + + https://github.com/chidiwilliams/expression-evaluator/blob/main/simple.js + + It shows how to handled the nested expressions. I made a comaprable variant in \LUA, extended + it for more than the usual four operators, condensed it a bit and then went on to write the code + below. Of course we have a completely different token parser and we use \TEX\ (temp) nodes for + a few stacks. I know that we can combine the loops but that becomes messy and performance is + quite okay, also because we move items from one to another stack with little overhead. Although + stacks are not that large, using static sized stacks (\CCODE\ arrays) makes no sense here. + + After the initial |\bitexpr| I eventually ended up with an integer and dimension scanner and + it became more complex that originally intended, but the current implementaiton is flexible + enough to extend. I can probably squeeze out some more performance. + + Beware: details can change, for instance handling some (math) \UNICODE\ characters has been + dropped because it's an inconsistent bunch and incomplete anyway. + + In the end we have a set of dedicated scanners. We could use the existing ones but for instance + units are optional here. We also have a bit more predictable sentinel, so we can optimize some + push back. We don't handle mu units nor fillers. It was also kind of fun to explore that. + +*/ + +typedef enum bit_expression_states { + bit_expression_none, + + bit_expression_bor, /* | bor v */ + bit_expression_band, /* & band */ + bit_expression_bxor, /* ^ bxor */ + + bit_expression_bset, /* bset */ + bit_expression_bunset, /* bunset */ + + bit_expression_bleft, /* << */ + bit_expression_bright, /* >> */ + + bit_expression_less, /* < */ + bit_expression_lessequal, /* <= */ + bit_expression_equal, /* = == */ + bit_expression_moreequal, /* >= */ + bit_expression_more, /* > */ + bit_expression_unequal, /* <> != */ + + bit_expression_add, /* + */ + bit_expression_subtract, /* - */ + + bit_expression_multiply, /* * */ + bit_expression_divide, /* / : */ + + bit_expression_mod, /* % mod */ + + // bit_expression_power, /* */ + + bit_expression_not, /* ! ~ not */ + + bit_expression_or, /* or */ + bit_expression_and, /* and */ + + bit_expression_open, + bit_expression_close, + + bit_expression_number, + bit_expression_float, + bit_expression_dimension, +} bit_expression_states; + + +static int bit_operator_precedence[] = { /* like in lua */ + 0, // bit_expression_none + 4, // bit_expression_bor + 6, // bit_expression_band + 5, // bit_expression_bxor + + 7, // bit_expression_bset // like shifts + 7, // bit_expression_bunset // like shifts + + 7, // bit_expression_bleft + 7, // bit_expression_bright + + 3, // bit_expression_less + 3, // bit_expression_lessequal + 3, // bit_expression_equal + 3, // bit_expression_more + 3, // bit_expression_moreequal + 3, // bit_expression_unequal + + 8, // bit_expression_add + 8, // bit_expression_subtract + + 9, // bit_expression_multiply + 9, // bit_expression_divide + + 9, // bit_expression_mod + +// 10, // bit_expression_power + + 10, // bit_expression_not + + 1, // bit_expression_or + 2, // bit_expression_and + + 0, // bit_expression_open + 0, // bit_expression_close + + 0, // bit_expression_number + 0, + 0, +}; + +static const char *bit_expression_names[] = { + "none", "bor", "band", "bxor", "bset", "bunset", + "<<", ">>", "<", "<=", "==", ">=", ">", "<>", + "+", "-", "*", "/", "mod", "not", "or", "and", + "open", "close", "number", "float", "dimension" +}; + +/*tex + This way we stay within the regular tex accuracy with 1000 scales. But I will play with a + variant that only uses doubles: |dimenexpression| and |numberexpression|. +*/ + +# define factor 1000 + +typedef struct stack_info { + halfword head; + halfword tail; +} stack_info; + +static stack_info tex_aux_new_stack(void) +{ + return (stack_info) { + .head = null, + .tail = null, + }; +} + +static void tex_aux_dispose_stack(stack_info *stack) +{ + /*tex Unless we have a problem we have stacks with zero or one slot. */ + halfword current = stack->head; + while (current) { + halfword next = node_next(current); + tex_free_node(current, expression_node_size); + current = next; + } +} + +static void tex_push_stack_entry(stack_info *stack, long long value) +{ + halfword n = tex_get_node(expression_node_size); + node_type(n) = expression_node; + node_subtype(n) = 0; + expression_entry(n) = value; + if (! stack->head) { + stack->head = n; + } else if (stack->head == stack->tail) { + node_next(stack->head) = n; + node_prev(n) = stack->head; + } else { + node_prev(n) = stack->tail; + node_next(stack->tail) = n; + } + stack->tail = n; +} + +static long long tex_pop_stack_entry(stack_info *stack) +{ + halfword t = stack->tail; + if (t) { + long long v = expression_entry(t); + if (t == stack->head) { + stack->head = null; + stack->tail = null; + } else { + stack->tail = node_prev(t); + node_next(stack->tail) = null; + } + tex_free_node(t, temp_node_size); + return v; + } else { + return 0; + } +} + +static void tex_move_stack_entry(stack_info *target, stack_info *source) +{ + halfword n = source->tail; + if (n == source->head) { + source->head = null; + source->tail = null; + } else { + source->tail = node_prev(n); + } + if (! target->head) { + target->head = n; + node_prev(n) = null; + } else if (target->head == target->tail) { + node_next(target->head) = n; + node_prev(n) = target->head; + } else { + node_prev(n) = target->tail; + node_next(target->tail) = n; + } + target->tail = n; +} + +static void tex_take_stack_entry(stack_info *target, stack_info *source, halfword current) +{ + while (source->head != current) { + halfword next = node_next(source->head); + tex_free_node(source->head, temp_node_size); + source->head = next; + } + if (current == source->tail) { + source->head = null; + source->tail = null; + } else { + source->head = node_next(current); + } + if (! target->head) { + target->head = current; + node_prev(current) = null; + } else if (target->head == target->tail) { + node_next(target->head) = current; + node_prev(current) = target->head; + } else { + node_prev(current) = target->tail; + node_next(target->tail) = current; + } + target->tail = current; + node_next(current) = null; +} + +static halfword tex_aux_scan_unit_applied(halfword value, halfword fraction, int has_fraction, int *has_unit) +{ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + halfword saved_val = value; + value = tex_aux_scan_something_internal(cur_cmd, cur_chr, dimen_val_level, 0, 0); + value = tex_nx_plus_y(saved_val, cur_val, tex_xn_over_d(cur_val, fraction, 0200000)); + return value; + } else if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + halfword num = 0; + halfword denom = 0; + halfword saved_cs = cur_cs; + halfword saved_tok = cur_tok; + *has_unit = 1; + switch (cur_chr) { + case 'p': case 'P': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 't': case 'T': + goto NORMALUNIT; + case 'c': case 'C': + num = 12; + denom = 1; + goto NORMALUNIT; + case 'x': case 'X': + return tex_nx_plus_y(value, px_dimen_par, tex_xn_over_d(px_dimen_par, fraction, 0200000)); + } + } + break; + case 'c': case 'C': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'm': case 'M': + num = 7227; + denom = 254; + goto NORMALUNIT; + case 'c': case 'C': + num = 14856; + denom = 1157; + goto NORMALUNIT; + } + } + break; + case 's': case 'S': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'p': case 'P': + return scaled_point_scanned; + } + } + break; + case 'b': case 'B': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'p': case 'P': + num = 7227; + denom = 7200; + goto NORMALUNIT; + } + } + break; + case 'i': case 'I': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'n': case 'N': + num = 7227; + denom = 100; + goto NORMALUNIT; + } + } + break; + case 'd': case 'D': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'd': case 'D': + num = 1238; + denom = 1157; + goto NORMALUNIT; + } + } + break; + case 'e': case 'E': + tex_get_x_token(); + if (cur_cmd == letter_cmd || cur_cmd == other_char_cmd) { + switch (cur_chr) { + case 'm': case 'M': + return tex_get_scaled_em_width(cur_font_par); + case 'x': case 'X': + return tex_get_scaled_ex_height(cur_font_par); + } + } + break; + default: + goto HALFUNIT; + } + goto NOUNIT; + NORMALUNIT: + if (num) { + int remainder = 0; + value = tex_xn_over_d_r(value, num, denom, &remainder); + fraction = (num * fraction + 0200000 * remainder) / denom; + value += fraction / 0200000; + fraction = fraction % 0200000; + } + if (value >= 040000) { // 0x4000 + lmt_scanner_state.arithmic_error = 1; + } else { + value = value * unity + fraction; + } + return value; + NOUNIT: + tex_back_input(cur_tok); + HALFUNIT: + tex_back_input(saved_tok); + cur_cs = saved_cs; + cur_tok = saved_tok; + } else { + tex_back_input(cur_tok); + } + if (has_fraction) { + *has_unit = 0; + if (value >= 040000) { // 0x4000 + lmt_scanner_state.arithmic_error = 1; + } else { + value = value * unity + fraction; + } + } + return value; +} + +static halfword tex_scan_bit_int(int *radix) +{ + int negative = 0; + long long result = 0; + do { + if (cur_tok == minus_token) { + negative = ! negative; + cur_tok = plus_token; + } + } while (cur_tok == plus_token); + if (cur_tok == alpha_token) { + tex_get_token(); + if (cur_tok < cs_token_flag) { + result = cur_chr; + } else { + strnumber txt = cs_text(cur_tok - cs_token_flag); + if (tex_single_letter(txt)) { + result = aux_str2uni(str_string(txt)); + } else if (tex_is_active_cs(txt)) { + result = active_cs_value(txt); + } else { + result = max_character_code + 1; + } + } + if (result > max_character_code) { + result = '0'; /*tex Why not just 0. */ + tex_aux_improper_constant_error(); + } + } else if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0); + if (cur_val_level != int_val_level) { + result = 0; + goto NONUMBER; + } + } else if (cur_cmd == math_style_cmd) { + result = (cur_chr == yet_unset_math_style) ? tex_scan_math_style_identifier(0, 0) : cur_chr; + } else if (cur_cmd == hyphenation_cmd) { + if (tex_aux_scan_hyph_data_number(cur_chr, &cur_chr)) { + result = cur_chr; + } else { + result = 0; + goto NONUMBER; + } + } else { + int vacuous = 1; + int ok_so_far = 1; + switch (cur_tok) { + case octal_token: + { + if (radix) { + *radix = 8; + } + while (1) { + tex_get_x_token(); + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= seven_token)) { + d = cur_tok - zero_token; + } else { + goto DONE; + } + vacuous = 0; + if (ok_so_far) { + result = result * 8 + d; + if (result > max_integer) { + result = infinity; + tex_aux_number_to_big_error(); + ok_so_far = 0; + } + } + } + break; + } + case hex_token: + { + if (radix) { + *radix = 16; + } + while (1) { + tex_get_x_token(); + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= nine_token)) { + d = cur_tok - zero_token; + } else if ((cur_tok >= A_token_l) && (cur_tok <= F_token_l)) { + d = cur_tok - A_token_l + 10; + } else if ((cur_tok >= A_token_o) && (cur_tok <= F_token_o)) { + d = cur_tok - A_token_o + 10; + } else { + goto DONE; + } + vacuous = 0; + if (ok_so_far) { + result = result * 16 + d; + if (result > max_integer) { + result = infinity; + tex_aux_number_to_big_error(); + ok_so_far = 0; + } + } + } + break; + } + default: + { + if (radix) { + *radix = 10; + } + while (1) { + unsigned d = 0; + if ((cur_tok >= zero_token) && (cur_tok <= nine_token)) { + d = cur_tok - zero_token; + } else { + goto DONE; + } + vacuous = 0; + if (ok_so_far) { + result = result * 10 + d; + if (result > max_integer) { + result = infinity; + tex_aux_number_to_big_error(); + ok_so_far = 0; + } + } + tex_get_x_token(); + } + break; + } + } + DONE: + if (vacuous) { + NONUMBER: + tex_aux_missing_number_error(); + } else { + tex_push_back(cur_tok, cur_cmd, cur_chr); + } + } + cur_val = (halfword) (negative ? - result : result); + return cur_val; +} + +static halfword tex_scan_bit_dimen(int *has_fraction, int *has_unit) +{ + int negative = 0; + int fraction = 0; + *has_fraction = 0; + *has_unit = 1; + lmt_scanner_state.arithmic_error = 0; + do { + if (cur_tok == minus_token) { + negative = ! negative; + cur_tok = plus_token; + } + } while (cur_tok == plus_token); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + cur_val = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0); + if (cur_val_level == dimen_val_level) { + goto ATTACH_SIGN; + } + } else { + *has_fraction = tex_token_is_seperator(cur_tok); + if (*has_fraction) { + /*tex We started with a |.| or |,|. */ + cur_val = 0; + } else { + int cur_radix = 10; + cur_val = tex_scan_bit_int(&cur_radix); + if (cur_radix == 10 && tex_token_is_seperator(cur_tok)) { + *has_fraction = 1; + tex_get_token(); + } + } + if (*has_fraction) { + unsigned k = 0; + unsigned char digits[18]; + while (1) { + tex_get_x_token(); + if (cur_tok > nine_token || cur_tok < zero_token) { + break; + } else if (k < 17) { + digits[k] = (unsigned char) (cur_tok - zero_token); + ++k; + } + } + fraction = tex_round_decimals_digits(digits, k); + if (cur_cmd != spacer_cmd) { + /* we can avoid this when parsing a unit but not now */ + tex_back_input(cur_tok); + } + } + } + if (cur_val < 0) { + negative = ! negative; + cur_val = - cur_val; + } + cur_val = tex_aux_scan_unit_applied(cur_val, fraction, *has_fraction, has_unit); + ATTACH_SIGN: + if (lmt_scanner_state.arithmic_error || (abs(cur_val) >= 010000000000)) { // 0x40000000 + tex_aux_scan_dimen_out_of_range_error(); + cur_val = max_dimen; + lmt_scanner_state.arithmic_error = 0; + } + if (negative) { + cur_val = -cur_val; + } + return cur_val; +} + +static void tex_aux_trace_expression(stack_info stack, halfword level, halfword n, int what) +{ + tex_begin_diagnostic(); + if (n > 0) { + tex_print_format(level == dimen_val_level ? "[dimexpression rpn %i %s:" : "[numexpression rpn %i %s:", n, what ? "r" :"s"); + if (! stack.head) { + tex_print_char(' '); + } + } else { + tex_print_str(level == dimen_val_level ? "[dimexpression rpn:" : "[numexpression rpn:"); + } + for (halfword current = stack.head; current; current = node_next(current)) { + tex_print_char(' '); + switch (node_subtype(current)) { + case bit_expression_number: + tex_print_int(scaledround((double) expression_entry(current) / factor)); + break; + case bit_expression_float: + tex_print_dimension(scaledround((double) expression_entry(current) / factor), no_unit); + break; + case bit_expression_dimension: + tex_print_char('('); + tex_print_dimension(scaledround((double) expression_entry(current) / factor), no_unit); + tex_print_char(')'); + break; + default: + tex_print_str(bit_expression_names[expression_entry(current)]); + break; + } + } + tex_print_char(']'); + tex_end_diagnostic(); +} + +static void tex_aux_scan_expression(int level) +{ + stack_info operators = tex_aux_new_stack(); + stack_info reverse = tex_aux_new_stack(); + stack_info stack = tex_aux_new_stack(); + halfword operation = bit_expression_none; + int alreadygotten = 0; + int trace = tracing_expressions_par; + while (1) { + if (alreadygotten) { + alreadygotten= 0; + } else { + tex_get_x_token(); + } + operation = bit_expression_none; + switch (cur_cmd) { + case relax_cmd: + goto COLLECTED; + case spacer_cmd: + continue; + case superscript_cmd: + switch (cur_chr) { + case '^': + operation = bit_expression_bxor; + goto OKAY; + } + goto UNEXPECTED; + case alignment_tab_cmd: + switch (cur_chr) { + case '&': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + case alignment_tab_cmd: + switch (cur_chr) { + case '&': + operation = bit_expression_and; + goto OKAY; + default: + operation = bit_expression_band; + alreadygotten = 1; + goto OKAY; + } + } + } + goto UNEXPECTED; + case letter_cmd: + case other_char_cmd: + switch (cur_chr) { + case '(': + tex_push_stack_entry(&operators, bit_expression_open); + continue; + case ')': + while (operators.tail && expression_entry(operators.tail) != bit_expression_open) { + tex_move_stack_entry(&reverse, &operators); + } + tex_pop_stack_entry(&operators); + continue; + case '+': + operation = bit_expression_add; + break; + case '-': + operation = bit_expression_subtract; + break; + case '*': + operation = bit_expression_multiply; + break; + case '/': + case ':': + operation = bit_expression_divide; + break; + case '%': + operation = bit_expression_mod; + break; + case '&': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + case alignment_tab_cmd: + switch (cur_chr) { + case '&': + operation = bit_expression_and; + goto OKAY; + } + } + operation = bit_expression_band; + alreadygotten = 1; + break; + case '^': + operation = bit_expression_bxor; + break; + case 'v': + operation = bit_expression_bor; + break; + case '|': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + switch (cur_chr) { + case '|': + operation = bit_expression_or; + goto OKAY; + } + } + operation = bit_expression_bor; + alreadygotten = 1; + break; + case '<': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + switch (cur_chr) { + case '<': + operation = bit_expression_bleft; + goto OKAY; + case '=': + operation = bit_expression_lessequal; + goto OKAY; + case '>': + operation = bit_expression_unequal; + goto OKAY; + } + } + operation = bit_expression_less; + alreadygotten = 1; + break; + case '>': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + switch (cur_chr) { + case '>': + operation = bit_expression_bright; + goto OKAY; + case '=': + operation = bit_expression_moreequal; + goto OKAY; + } + } + operation = bit_expression_more; + alreadygotten = 1; + break; + case '=': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + switch (cur_chr) { + case '=': + break; + default: + alreadygotten = 1; + break; + } + } + operation = bit_expression_equal; + break; + case '~': case '!': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + switch (cur_chr) { + case '=': + operation = bit_expression_unequal; + goto OKAY; + } + } + operation = bit_expression_not; + alreadygotten = 1; + break; + case 'm': case 'M': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'o': case 'O': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'd': case 'D': + operation = bit_expression_mod; + goto OKAY; + } + } + } + } + goto UNEXPECTED; + case 'n': case 'N': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'o': case 'O': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'o': case 'T': + operation = bit_expression_not; + goto OKAY; + } + } + } + } + goto UNEXPECTED; + case 'a': case 'A': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'n': case 'N': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'd': case 'D': + operation = bit_expression_and; + goto OKAY; + } + } + } + } + goto UNEXPECTED; + case 'b': case 'B': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: + switch (cur_chr) { + case 'a': case 'A': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'n': case 'N': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'd': case 'D': + operation = bit_expression_band; + goto OKAY; + } + } + } + } + break; + case 'o': case 'O': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'r': case 'R': + operation = bit_expression_bor; + goto OKAY; + } + } + break; + case 'x': case 'X': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'o': case 'O': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'r': case 'R': + operation = bit_expression_bxor; + goto OKAY; + } + } + } + } + break; + case 's': case 'S': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'e': case 'S': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 't': case 'T': + operation = bit_expression_bset; + goto OKAY; + } + } + } + } + break; + case 'r': case 'R': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'e': case 'E': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 's': case 'S': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'e': case 'S': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 't': case 'T': + operation = bit_expression_bset; + goto OKAY; + } + } + } + } + } + } + } + } + break; + } + } + goto UNEXPECTED; + case 'o': case 'O': + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: case other_char_cmd: switch (cur_chr) { case 'r': case 'R': + operation = bit_expression_or; + goto OKAY; + } + } + goto UNEXPECTED; + default: + goto NUMBER; + } + OKAY: + while (operators.tail && bit_operator_precedence[expression_entry(operators.tail)] >= bit_operator_precedence[operation]) { + // tex_push_stack_entry(&reverse, tex_pop_stack_entry(&operators)); + tex_move_stack_entry(&reverse, &operators); + } + tex_push_stack_entry(&operators, operation); + break; + default: + NUMBER: + /*tex These use |cur_tok|: */ + { + int has_fraction = 0; + int has_unit = 1; + operation = level == dimen_val_level ? tex_scan_bit_dimen(&has_fraction, &has_unit) : tex_scan_bit_int(NULL); + tex_push_stack_entry(&reverse, operation * factor); + if (level == dimen_val_level && has_unit) { + node_subtype(reverse.tail) = bit_expression_dimension; + } else if (has_fraction) { + node_subtype(reverse.tail) = bit_expression_float; + } else { + node_subtype(reverse.tail) = bit_expression_number; + } + continue; + } + } + } + COLLECTED: + while (operators.tail) { + tex_move_stack_entry(&reverse, &operators); + } + /*tex This is the reference: */ + /* + { + halfword current = reverse.head; + while (current) { + if (node_subtype(current) == bit_expression_number) { + tex_push_stack_entry(&stack, expression_entry(current)); + } else { + halfword token = expression_entry(current); + long long v; + if (token == bit_expression_not) { + v = ~ (long long) tex_pop_stack_entry(&stack); + } else { + long long b = (long long) tex_pop_stack_entry(&stack); + long long a = (long long) tex_pop_stack_entry(&stack); + switch (token) { + // calculations, see below + } + } + // checks, see below + tex_push_stack_entry(&stack, (halfword) v); + } + current = node_next(current); + } + } + */ + if (trace == 1) { + tex_aux_trace_expression(reverse, level, 0, 0); + } + { + halfword current = reverse.head; + int step = 0; + while (current) { + halfword next = node_next(current); + halfword subtype = node_subtype(current); + if (trace > 1) { + step = step + 1; + tex_aux_trace_expression(reverse, level, step, 0); + tex_aux_trace_expression(stack, level, step, 1); + } + switch (subtype) { + case bit_expression_number: + case bit_expression_float: + case bit_expression_dimension: + tex_take_stack_entry(&stack, &reverse, current); + break; + default: + { + halfword token = (halfword) expression_entry(current); + long long v = 0; + if (token == bit_expression_not) { + v =~ stack.tail ? expression_entry(stack.tail) : 0; + } else { + quarterword sa, sb; + long long va, vb; + sb = node_subtype(stack.tail); + vb = tex_pop_stack_entry(&stack); + if (stack.tail) { + sa = node_subtype(stack.tail); + va = expression_entry(stack.tail); + } else { + sa = bit_expression_number; + va = 0; + } + switch (token) { + case bit_expression_bor: + v = va | vb; + break; + case bit_expression_band: + v = va & vb; + break; + case bit_expression_bxor: + v = va ^ vb; + break; + case bit_expression_bset: + v = va | ((long long) 1 << (vb - 1)); + break; + case bit_expression_bunset: + v = va & ~ ((long long) 1 << (vb - 1)); + break; + case bit_expression_bleft: + v = va << vb; + break; + case bit_expression_bright: + v = va >> vb; + break; + case bit_expression_less: + v = va < vb; + break; + case bit_expression_lessequal: + v = va <= vb; + break; + case bit_expression_equal: + v = va == vb; + break; + case bit_expression_moreequal: + v = va >= vb; + break; + case bit_expression_more: + v = va > vb; + break; + case bit_expression_unequal: + v = va != vb; + break; + case bit_expression_add: + v = va + vb; + break; + case bit_expression_subtract: + v = va - vb; + break; + case bit_expression_multiply: + { + double d = va * vb; + if (sa == bit_expression_float) { + d = d / (65536 * factor); + } else if (sb == bit_expression_float) { + d = d / (65536 * factor); + } else { + d = d / factor; + } + if (sa == bit_expression_dimension || sb == bit_expression_dimension) { + node_subtype(stack.tail) = bit_expression_dimension; + } + v = longlonground(d); + } + break; + case bit_expression_divide: + if (vb) { + double d = (double) va / (double) vb; + if (sa == bit_expression_float) { + // d = d / (65536 * factor); + d = d * (65536 * factor); + } else if (sb == bit_expression_float) { + // d = d / (65536 * factor); + d = d * (65536 * factor); + } else { + d = d * factor; + } + if (sa == bit_expression_dimension || sb == bit_expression_dimension) { + node_subtype(stack.tail) = bit_expression_dimension; + } + v = longlonground(d); + } else { + goto ZERO; + } + break; + case bit_expression_mod: + v = va % vb; + break; + case bit_expression_or: + v = (va || vb) ? 1 : 0; + break; + case bit_expression_and: + v = (va && vb) ? 1 : 0; break; + default: + v = 0; + break; + } + } + if (v < -infinity) { + v = -infinity; + } else if (v > infinity) { + v = infinity; + } + expression_entry(stack.tail) = v; + break; + } + } + current = next; + } + } + goto DONE; + ZERO: + tex_handle_error( + back_error_type, + "I can't divide by zero", + "I was expecting to see a nonzero number. Didn't." + ); + goto DONE; + UNEXPECTED: + tex_handle_error( + back_error_type, + "Premature end of bit expression", + "I was expecting to see an integer or bitwise operator. Didn't." + ); + DONE: + cur_val = scaledround(((double) expression_entry(stack.tail)) / factor); + cur_val_level = level; + tex_aux_dispose_stack(&stack); + tex_aux_dispose_stack(&reverse); + tex_aux_dispose_stack(&operators); +} + +int tex_scanned_expression(int level) +{ + tex_aux_scan_expression(level); + return cur_val; +} + +/* */ + +halfword tex_scan_scale(int optional_equal) +{ + int negative = 0; + lmt_scanner_state.arithmic_error = 0; + do { + while (1) { + tex_get_x_token(); + if (cur_cmd != spacer_cmd) { + if (optional_equal && (cur_tok == equal_token)) { + optional_equal = 0; + } else { + break; + } + } + } + if (cur_tok == minus_token) { + negative = ! negative; + cur_tok = plus_token; + } + } while (cur_tok == plus_token); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + cur_val = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0); + } else { + int has_fraction = tex_token_is_seperator(cur_tok); + if (has_fraction) { + cur_val = 0; + } else { + int cur_radix; + tex_back_input(cur_tok); + cur_val = tex_scan_int(0, &cur_radix); + tex_get_token(); + if (cur_radix == 10 && tex_token_is_seperator(cur_tok)) { + has_fraction = 1; + } + } + if (has_fraction) { + unsigned k = 4; + cur_val = cur_val * 1000; + while (1) { + tex_get_x_token(); + if (cur_tok < zero_token || cur_tok > nine_token) { + break; + } else if (k == 1) { + /* rounding */ + if (cur_tok >= five_token && cur_tok <= nine_token) { + cur_val += 1; + } + --k; + } else if (k) { + cur_val = cur_val + (k == 4 ? 100 : (k == 3 ? 10 : 1)) * (cur_tok - zero_token); + --k; + } + } + } + tex_push_back(cur_tok, cur_cmd, cur_chr); + } + if (negative) { + cur_val = -cur_val; + } + if (lmt_scanner_state.arithmic_error || (abs(cur_val) >= 0x40000000)) { + // scan_dimen_out_of_range_error(); + cur_val = max_dimen; + lmt_scanner_state.arithmic_error = 0; + } + return cur_val; +} + +int tex_scan_tex_value(halfword level, halfword *value) +{ + tex_aux_scan_expr(level); + *value = cur_val; + return 1; +} + +quarterword tex_scan_direction(int optional_equal) +{ + int i = tex_scan_int(optional_equal, NULL); + return checked_direction_value(i); +} + +halfword tex_scan_geometry(int optional_equal) +{ + int i = tex_scan_int(optional_equal, NULL); + return checked_geometry_value(i); +} + +halfword tex_scan_orientation(int optional_equal) +{ + halfword i = tex_scan_int(optional_equal, NULL); + return checked_orientation_value(i); +} + +halfword tex_scan_anchor(int optional_equal) +{ + halfword a = tex_scan_int(optional_equal, NULL); + halfword l = (a >> 16) & 0xFFFF; + halfword r = a & 0xFFFF; + return (checked_anchor_value(l) << 16) + checked_anchor_value(r); +} + +halfword tex_scan_anchors(int optional_equal) +{ + halfword l = tex_scan_int(optional_equal, NULL) & 0xFFFF; + halfword r = tex_scan_int(0, NULL) & 0xFFFF; + return (checked_anchor_value(l) << 16) + checked_anchor_value(r); +} + +halfword tex_scan_attribute(halfword attrlist) +{ + halfword i = tex_scan_toks_register_number(); + halfword v = tex_scan_int(1, NULL); + if (eq_value(register_attribute_location(i)) != v) { + if (attrlist) { + attrlist = tex_patch_attribute_list(attrlist, i, v); + } else { + attrlist = tex_copy_attribute_list_set(tex_current_attribute_list(), i, v); + } + } + return attrlist; +} diff --git a/source/luametatex/source/tex/texscanning.h b/source/luametatex/source/tex/texscanning.h new file mode 100644 index 000000000..90897bf54 --- /dev/null +++ b/source/luametatex/source/tex/texscanning.h @@ -0,0 +1,210 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_SCANNING_H +# define LMT_SCANNING_H + +typedef enum value_level_code { + int_val_level, /*tex integer values */ + attr_val_level, /*tex integer values */ + dimen_val_level, /*tex dimension values */ + glue_val_level, /*tex glue specifications */ + mu_val_level, /*tex math glue specifications */ + tok_val_level, /*tex token lists */ + font_val_level , /*tex font identifier */ + mathspec_val_level , + fontspec_val_level , + specification_val_level, /*tex special purpose identifier */ + list_val_level, + no_val_level, +} value_level_code; + +# define first_value_level int_val_level +# define last_value_level mu_val_level + +typedef struct scanner_state_info { + int current_cmd; /*tex current command set by |get_next| */ + halfword current_chr; /*tex operand of current command */ + halfword current_cs; /*tex control sequence found here, zero if none found */ + // halfword current_flag; + halfword current_tok; /*tex packed representative of |cur_cmd| and |cur_chr| */ + int current_val; /*tex value returned by numeric scanners */ + int current_val_level; /*tex the level of this value */ + halfword current_box; /*tex the box to be placed into its context: */ + halfword last_cs_name; /*tex used in |\csname| and |\ifcsname| */ + int arithmic_error; + int expression_depth; +} scanner_state_info; + +extern scanner_state_info lmt_scanner_state; + +/*tex + These are rather basic \TEX\ The Program variables (aliases) so for now we stick to the + unqualified short names. +*/ + +# define cur_cmd lmt_scanner_state.current_cmd +# define cur_chr lmt_scanner_state.current_chr +# define cur_cs lmt_scanner_state.current_cs +# define cur_tok lmt_scanner_state.current_tok +# define cur_val lmt_scanner_state.current_val +# define cur_val_level lmt_scanner_state.current_val_level +# define cur_box lmt_scanner_state.current_box + +typedef struct full_scanner_status { + int save_scanner_status; + halfword save_def_ref; + halfword save_warning_index; +} full_scanner_status; + +inline static full_scanner_status tex_save_full_scanner_status(void) +{ + full_scanner_status a; + a.save_scanner_status = lmt_input_state.scanner_status; + a.save_def_ref = lmt_input_state.def_ref; + a.save_warning_index = lmt_input_state.warning_index; + return a; +} + +inline static void tex_unsave_full_scanner_status(full_scanner_status a) +{ + lmt_input_state.warning_index = a.save_warning_index; + lmt_input_state.def_ref = a.save_def_ref; + lmt_input_state.scanner_status = a.save_scanner_status; +} + +extern void tex_scan_something_simple (halfword cmd, halfword code); +extern void tex_scan_left_brace (void); +extern void tex_scan_optional_equals (void); +extern int tex_scan_cardinal (unsigned *value, int dontbark); +extern halfword tex_scan_int (int optional_equal, int *radix); +extern halfword tex_scan_scale (int optional_equal); +extern halfword tex_scan_dimen (int mu, int inf, int shortcut, int optional_equal, halfword *order); +extern halfword tex_scan_glue (int level, int optional_equal); +extern halfword tex_scan_font (int optional_equal); +extern halfword tex_scan_general_text (halfword *tail); +/* halfword tex_scan_toks (int macrodef, int xpand, int left_brace_found); */ +extern halfword tex_scan_toks_normal (int left_brace_found, halfword *tail); +extern halfword tex_scan_toks_expand (int left_brace_found, halfword *tail, int expandconstant); +extern halfword tex_scan_macro_normal (void); // (int tolerant); +extern halfword tex_scan_macro_expand (void); // (int tolerant); +extern halfword tex_scan_font_identifier (halfword *spec); +extern halfword tex_scan_fontspec_identifier (void); +extern halfword tex_scan_math_style_identifier (int tolerant, int styles); +extern halfword tex_scan_math_parameter (void); +extern halfword tex_scan_limited_scale (int optional_equal); +extern halfword tex_scan_positive_scale (int optional_equal); + +extern quarterword tex_scan_direction (int optional_equal); +extern halfword tex_scan_geometry (int optional_equal); +extern halfword tex_scan_orientation (int optional_equal); +extern halfword tex_scan_anchor (int optional_equal); +extern halfword tex_scan_anchors (int optional_equal); + +extern int tex_scanned_expression (int level); + +extern halfword tex_scan_int_register_number (void); +extern halfword tex_scan_dimen_register_number (void); +extern halfword tex_scan_attribute_register_number (void); +extern halfword tex_scan_glue_register_number (void); +extern halfword tex_scan_mu_glue_register_number (void); +extern halfword tex_scan_toks_register_number (void); +extern halfword tex_scan_box_register_number (void); +extern halfword tex_scan_mark_number (void); +extern halfword tex_scan_char_number (int optional_equal); +extern halfword tex_scan_math_char_number (void); +extern halfword tex_scan_math_family_number (void); +extern halfword tex_scan_math_class_number (int optional_equal); +extern halfword tex_scan_math_properties_number (void); +extern halfword tex_scan_math_group_number (void); +extern halfword tex_scan_math_index_number (void); +extern halfword tex_scan_math_discretionary_number (int optional_equal); +extern halfword tex_scan_category_code (void); +extern singleword tex_scan_box_index (void); /*tex For local boxes: small for now! */ +extern singleword tex_scan_box_axis (void); +extern halfword tex_scan_function_reference (int optional_equal); +extern halfword tex_scan_bytecode_reference (int optional_equal); + +extern halfword tex_the_value_toks (int unit, halfword *tail, halfword property); /* returns head */ +extern halfword tex_the_toks (int code, halfword *tail); /* returns head */ +extern halfword tex_the_detokenized_toks (halfword *head); +extern strnumber tex_the_scanned_result (void); + +extern void tex_set_font_dimen (void); +extern halfword tex_get_font_dimen (void); +extern void tex_set_scaled_font_dimen (void); +extern halfword tex_get_scaled_font_dimen (void); + +extern void tex_get_x_or_protected (void); + +extern int tex_fract (int x, int n, int d, int max_answer); + +extern halfword tex_scan_lua_value (int index); + +extern int tex_scan_tex_value (halfword level, halfword *value); + +extern halfword tex_scan_attribute (halfword attrlist); + +/* +# define token_is_digit(t) ((t >= zero_token ) && (t <= nine_token )) +# define token_is_xdigit(t) (((t >= zero_token ) && (t <= nine_token )) || \ + ((t >= a_token_l ) && (t <= f_token_l )) || \ + ((t >= A_token_l ) && (t <= F_token_l )) || \ + ((t >= a_token_o ) && (t <= f_token_o )) || \ + ((t >= A_token_o ) && (t <= F_token_o ))) +# define token_is_exponent(t) ((t == E_token_l ) || (t == e_token_l ) || \ + (t == E_token_o ) || (t == e_token_o )) +# define token_is_xexponent(t) ((t == P_token_l ) || (t == p_token_l ) || \ + (t == P_token_o ) || (t == p_token_o )) +# define token_is_hexadecimal(t) ((t == X_token_l ) || (t == x_token_l ) || \ + (t == X_token_o ) || (t == x_token_o )) +# define token_is_sign(t) ((t == minus_token ) || (t == plus_token )) +# define token_is_seperator(t) ((t == period_token) || (t == comma_token)) +*/ + +inline static int tex_token_is_digit(halfword t) +{ + return (t >= zero_token) && (t <= nine_token); +} + +inline static int tex_token_is_xdigit(halfword t) { + return ((t >= zero_token) && (t <= nine_token)) + || ((t >= a_token_l ) && (t <= f_token_l)) + || ((t >= A_token_l ) && (t <= F_token_l)) + || ((t >= a_token_o ) && (t <= f_token_o)) + || ((t >= A_token_o ) && (t <= F_token_o)); +} + +inline static int tex_token_is_exponent(halfword t) +{ + return (t == E_token_l) || (t == e_token_l) + || (t == E_token_o) || (t == e_token_o); +} + +inline static int tex_token_is_xexponent(halfword t) +{ + return (t == P_token_l) || (t == p_token_l) + || (t == P_token_o) || (t == p_token_o); +} + + inline static int tex_token_is_hexadecimal(halfword t) +{ + return (t == X_token_l) || (t == x_token_l) + || (t == X_token_o) || (t == x_token_o); +} + +inline static int tex_token_is_sign(halfword t) { + return (t == minus_token) || (t == plus_token); +} + +inline static int tex_token_is_seperator(halfword t) { + return (t == period_token) || (t == comma_token); +} + +inline static int tex_token_is_operator(halfword t) { + return (t == plus_token) || (t == minus_token) || (t == asterisk_token) || (t == slash_token) || (t == colon_token); +} + +# endif + diff --git a/source/luametatex/source/tex/texstringpool.c b/source/luametatex/source/tex/texstringpool.c new file mode 100644 index 000000000..8367447da --- /dev/null +++ b/source/luametatex/source/tex/texstringpool.c @@ -0,0 +1,607 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + Control sequence names and diagnostic messages are variable length strings of eight bit + characters. Since \PASCAL\ did not have a well-developed string mechanism, \TEX\ did all of its + string processing by homegrown methods. + + Elaborate facilities for dynamic strings are not needed, so all of the necessary operations can + be handled with a simple data structure. The array |str_pool| contains all of the (eight-bit) + bytes off all of the strings, and the array |str_start| contains indices of the starting points + of each string. Strings are referred to by integer numbers, so that string number |s| comprises + the characters |str_pool[j]| for |str_start_macro(s) <= j < str_start_macro (s + 1)|. Additional + integer variables |pool_ptr| and |str_ptr| indicate the number of entries used so far in + |str_pool| and |str_start|, respectively; locations |str_pool[pool_ptr]| and |str_start_macro + (str_ptr)| are ready for the next string to be allocated. + + String numbers 0 to |biggest_char| are reserved for strings that correspond to single \UNICODE\ + characters. This is in accordance with the conventions of \WEB\ which converts single-character + strings into the ASCII code number of the single character involved. + + The stringpool variables are collected in: + +*/ + +string_pool_info lmt_string_pool_state = { + .string_pool = NULL, + .string_pool_data = { + .minimum = min_pool_size, + .maximum = max_pool_size, + .size = siz_pool_size, + .step = stp_pool_size, + .allocated = 0, + .itemsize = sizeof(lstring), + .top = 0, + .ptr = 0, + .initial = 0, + .offset = cs_offset_value, + }, + .string_body_data = { + .minimum = min_body_size, + .maximum = max_body_size, + .size = siz_body_size, + .step = stp_body_size, + .allocated = 0, + .itemsize = sizeof(unsigned char), + .top = memory_data_unset, + .ptr = memory_data_unset, + .initial = 0, + .offset = 0, + }, + .reserved = 0, + .string_max_length = 0, + .string_temp = NULL, + .string_temp_allocated = 0, + .string_temp_top = 0, +}; + +/*tex + + The array of strings is |string_pool|, the number of the current string being created is + |str_ptr|, the starting value of |str_ptr| is |init_str_ptr|, and the current string buffer, + the current index in that buffer, the mallocedsize of |cur_string| and the occupied byte count + are kept in |cur_string|, |cur_length|, |cur_string_size| and |pool_size|. + + Once a sequence of characters has been appended to |cur_string|, it officially becomes a string + when the function |make_string| is called. This function returns the identification number of + the new string as its value. + + Strings end with a zero character which makes \TEX\ string also valid \CCODE\ strings. The + |string_temp*| fields deal with a temporary string (building). + + The |ptr| is always one ahead. This is kind of a safeguard: an overflow happens already when we + still assemble a new string. + +*/ + +# define initial_temp_string_slots 256 +# define reserved_temp_string_slots 2 + +static inline void tex_aux_increment_pool_string(int n) +{ + lmt_string_pool_state.string_body_data.allocated += n; + if (lmt_string_pool_state.string_body_data.allocated > lmt_string_pool_state.string_body_data.size) { + tex_overflow_error("poolbody", lmt_string_pool_state.string_body_data.allocated); + } +} + +static inline void tex_aux_decrement_pool_string(int n) +{ + lmt_string_pool_state.string_body_data.allocated -= n; +} + +static void tex_aux_flush_cur_string(void) +{ + if (lmt_string_pool_state.string_temp) { + aux_deallocate_array(lmt_string_pool_state.string_temp); + } + lmt_string_pool_state.string_temp = NULL; + lmt_string_pool_state.string_temp_top = 0; + lmt_string_pool_state.string_temp_allocated = 0; +} + +void tex_reset_cur_string(void) +{ + unsigned char *tmp = aux_allocate_clear_array(sizeof(unsigned char), initial_temp_string_slots, reserved_temp_string_slots); + if (tmp) { + lmt_string_pool_state.string_temp = tmp; + lmt_string_pool_state.string_temp_top = 0; + lmt_string_pool_state.string_temp_allocated = initial_temp_string_slots; + } else { + tex_overflow_error("pool", initial_temp_string_slots); + } +} + +static int tex_aux_room_in_string(int wsize) +{ + /* no callback here */ + if (! lmt_string_pool_state.string_temp) { + tex_reset_cur_string(); + } + if ((lmt_string_pool_state.string_temp_top + wsize) > lmt_string_pool_state.string_temp_allocated) { + unsigned char *tmp = NULL; + int size = lmt_string_pool_state.string_temp_allocated + lmt_string_pool_state.string_temp_allocated / 5 + STRING_EXTRA_AMOUNT; + if (size < wsize) { + size = wsize + STRING_EXTRA_AMOUNT; + } + tmp = aux_reallocate_array(lmt_string_pool_state.string_temp, sizeof(unsigned char), size, reserved_temp_string_slots); + if (tmp) { + lmt_string_pool_state.string_temp = tmp; + memset(tmp + lmt_string_pool_state.string_temp_top, 0, (size_t) size - lmt_string_pool_state.string_temp_top); + } else { + tex_overflow_error("pool", size); + } + lmt_string_pool_state.string_temp_allocated = size; + } + return 1; +} + +# define reserved_string_slots 1 + +/*tex Messy: ptr and top have cs_offset_value included */ + +void tex_initialize_string_mem(void) +{ + int size = lmt_string_pool_state.string_pool_data.minimum; + if (lmt_main_state.run_state == initializing_state) { + size = lmt_string_pool_state.string_pool_data.minimum; + lmt_string_pool_state.string_pool_data.ptr = cs_offset_value; + } else { + size = lmt_string_pool_state.string_pool_data.allocated; + lmt_string_pool_state.string_pool_data.initial = lmt_string_pool_state.string_pool_data.ptr; + } + if (size > 0) { + lstring *pool = aux_allocate_clear_array(sizeof(lstring), size, reserved_string_slots); + if (pool) { + lmt_string_pool_state.string_pool = pool; + lmt_string_pool_state.string_pool_data.allocated = size; + } else { + tex_overflow_error("pool", size); + } + } +} + +void tex_initialize_string_pool(void) +{ + unsigned char *nullstring = lmt_memory_malloc(1); + int size = lmt_string_pool_state.string_pool_data.allocated; + if (size && nullstring) { + lmt_string_pool_state.string_pool[0].s = nullstring; + nullstring[0] = '\0'; + lmt_string_pool_state.string_pool_data.ptr += 1; + tex_reset_cur_string(); + } else { + tex_overflow_error("pool", size); + } +} + +static int tex_aux_room_in_string_pool(int n) +{ + int top = lmt_string_pool_state.string_pool_data.ptr + n; + if (top > lmt_string_pool_state.string_pool_data.top) { + lmt_string_pool_state.string_pool_data.top = top; + top -= cs_offset_value; + if (top > lmt_string_pool_state.string_pool_data.allocated) { + lstring *tmp = NULL; + top = lmt_string_pool_state.string_pool_data.allocated; + do { + top += lmt_string_pool_state.string_pool_data.step; + n -= lmt_string_pool_state.string_pool_data.step; + } while (n > 0); + if (top > lmt_string_pool_state.string_pool_data.size) { + top = lmt_string_pool_state.string_pool_data.size; + } + if (top > lmt_string_pool_state.string_pool_data.allocated) { + lmt_string_pool_state.string_pool_data.allocated = top; + tmp = aux_reallocate_array(lmt_string_pool_state.string_pool, sizeof(lstring), top, reserved_string_slots); + lmt_string_pool_state.string_pool = tmp; + } + lmt_run_memory_callback("pool", tmp ? 1 : 0); + if (! tmp) { + tex_overflow_error("pool", top); + return 0; + } + } + } + return 1; +} + +/*tex + + Checking for the last one to be the same as the previous one doesn't save much some 10K on a + \CONTEXT\ format. + +*/ + +strnumber tex_make_string(void) +{ + if (tex_aux_room_in_string(1)) { + int ptr = lmt_string_pool_state.string_pool_data.ptr; + lmt_string_pool_state.string_temp[lmt_string_pool_state.string_temp_top] = '\0'; + str_string(ptr) = lmt_string_pool_state.string_temp; + str_length(ptr) = lmt_string_pool_state.string_temp_top; + tex_aux_increment_pool_string(lmt_string_pool_state.string_temp_top); + tex_reset_cur_string(); + if (tex_aux_room_in_string_pool(1)) { + lmt_string_pool_state.string_pool_data.ptr++; + } + return ptr; + } else { + return get_nullstr(); + } +} + +strnumber tex_push_string(const unsigned char *s, int l) +{ + if (tex_aux_room_in_string_pool(1)) { + unsigned char *t = lmt_memory_malloc(sizeof(char) * ((size_t) l + 1)); + if (t) { + int ptr = lmt_string_pool_state.string_pool_data.ptr; + memcpy(t, s, l); + t[l] = '\0'; + str_string(ptr) = t; + str_length(ptr) = l; + lmt_string_pool_state.string_pool_data.ptr++; + tex_aux_increment_pool_string(l); + return ptr; + } + } + return get_nullstr(); +} + +char *tex_take_string(int *len) +{ + char* ptr = NULL; + if (tex_aux_room_in_string(1)) { + lmt_string_pool_state.string_temp[lmt_string_pool_state.string_temp_top] = '\0'; + if (len) { + *len = lmt_string_pool_state.string_temp_top; + } + ptr = (char *) lmt_string_pool_state.string_temp; + tex_reset_cur_string(); + } + return ptr; +} + +/*tex + + The following subroutine compares string |s| with another string of the same length that appears + in |buffer| starting at position |k|; the result is |true| if and only if the strings are equal. + Empirical tests indicate that |str_eq_buf| is used in such a way that it tends to return |true| + about 80 percent of the time. + + \startyping + unsigned char *j = str_string(s); + unsigned char *l = j + str_length(s); + while (j < l) { + if (*j++ != buffer[k++]) + return 0; + } + \stoptyping + +*/ + +int tex_str_eq_buf(strnumber s, int k, int n) +{ + if (s < cs_offset_value) { + return buffer_to_unichar(k) == (unsigned int) s; + } else { + return memcmp(str_string(s), &lmt_fileio_state.io_buffer[k], n) == 0; + } +} + +/*tex + + Here is a similar routine, but it compares two strings in the string pool, and it does not + assume that they have the same length. + + \starttyping + k = str_string(t); + j = str_string(s); + l = j + str_length(s); + while (j < l) { + if (*j++ != *k++) + return 0; + } + \stoptyping +*/ + +int tex_str_eq_str(strnumber s, strnumber t) +{ + if (s >= cs_offset_value) { + if (t >= cs_offset_value) { + /* s and t are strings, this is the most likely test */ + return (str_length(s) == str_length(t)) && ! memcmp(str_string(s), str_string(t), str_length(s)); + } else { + /* s is a string and t an unicode character, happens seldom */ + return (strnumber) aux_str2uni(str_string(s)) == t; + } + } else if (t >= cs_offset_value) { + /* s is an unicode character and t is a string, happens seldom */ + return (strnumber) aux_str2uni(str_string(t)) == s; + } else { + /* s and t are unicode characters */ + return s == t; + } +} + +/*tex A string compare helper: */ + +int tex_str_eq_cstr(strnumber r, const char *s, size_t l) +{ + return (l == str_length(r)) && ! strncmp((const char *) (str_string(r)), s, l); +} + +/*tex + + The initial values of |str_pool|, |str_start|, |pool_ptr|, and |str_ptr| are computed set in + \INITEX\ mode. The first |string_offset| strings are single characters strings matching Unicode. + There is no point in generating all of these. But |str_ptr| has initialized properly, otherwise + |print_char| cannot see the difference between characters and strings. + +*/ + +int tex_get_strings_started(void) +{ + tex_reset_cur_string(); + return 1; +} + +/*tex + + The string recycling routines. \TEX\ uses 2 upto 4 {\em new} strings when scanning a filename + in an |\input|, |\openin|, or |\openout| operation. These strings are normally lost because the + reference to them are not saved after finishing the operation. |search_string| searches through + the string pool for the given string and returns either 0 or the found string number. However, + in \LUAMETATEX\ filenames (and fontnames) are implemented more efficiently so that code is gone. + +*/ + +strnumber tex_maketexstring(const char *s) +{ + if (s && *s) { + return tex_maketexlstring(s, strlen(s)); + } else { + return get_nullstr(); + } +} + +strnumber tex_maketexlstring(const char *s, size_t l) +{ + if (s && l > 0) { + int ptr = lmt_string_pool_state.string_pool_data.ptr; + size_t len = l + 1; + unsigned char *tmp = lmt_memory_malloc(len); + if (tmp) { + str_length(ptr) = l; + str_string(ptr) = tmp; + tex_aux_increment_pool_string((int) l); + memcpy(tmp, s, len); + if (tex_aux_room_in_string_pool(1)) { + lmt_string_pool_state.string_pool_data.ptr += 1; + } + return ptr; + } else { + tex_overflow_error("string pool", (int) len); + } + } + return get_nullstr(); +} + +/*tex + These two functions appends bytes to the current \TEX\ string. There is no checking on what + gets appended nd as in \LUA\ zero bytes are okay. Unlike the other engines we don't provide + |^^| escaping, which is already optional in \LUATEX. +*/ + +void tex_append_string(const unsigned char *s, unsigned l) +{ + if (s && l > 0 && tex_aux_room_in_string(l)) { + memcpy(lmt_string_pool_state.string_temp + lmt_string_pool_state.string_temp_top, s, l); + lmt_string_pool_state.string_temp_top += l; + } +} + +void tex_append_char(unsigned char c) +{ + if (tex_aux_room_in_string(1)) { + lmt_string_pool_state.string_temp[lmt_string_pool_state.string_temp_top++] = (unsigned char) c; + } +} + +char *tex_makeclstring(int s, size_t *len) +{ + if (s < cs_offset_value) { + *len = (size_t) utf8_size(s); + return (char *) aux_uni2str((unsigned) s); + } else { + size_t l = (size_t) str_length(s); + char *tmp = lmt_memory_malloc(l + 1); + if (tmp) { + memcpy(tmp, str_string(s), l); + tmp[l] = '\0'; + *len = l; + return tmp; + } else { + tex_overflow_error("string pool", (int) l); + *len = 0; + return NULL; + } + } +} + +char *tex_makecstring(int s) +{ + if (s < cs_offset_value) { + return (char *) aux_uni2str((unsigned) s); + } else { + return lmt_memory_strdup((str_length(s) > 0) ? (const char *) str_string(s) : ""); + } +} + +/*tex + + We can save some 150 K on the format file size by using a signed char as length (after checking) + because the max size of a string in \CONTEXT\ is around 70. A flag could indicate if we use 1 or + 4 bytes for the length. But not yet (preroll needed). Dumping and undumping all strings in a + block (where we need to zero terminate them) doesn't really work out any better. Okay, in the end + it was done. + +*/ + +/*tex We use the real accessors here, not the macros that use |cs_offset_value|. */ + +void tex_compact_string_pool(void) +{ + int n_of_strings = lmt_string_pool_state.string_pool_data.ptr - cs_offset_value; + int max_length = 0; + for (int j = 1; j < n_of_strings; j++) { + if (lmt_string_pool_state.string_pool[j].l > (unsigned int) max_length) { + max_length = (int) lmt_string_pool_state.string_pool[j].l; + } + } + lmt_string_pool_state.string_max_length = max_length; + tex_print_format("max string length %i, ", max_length); +} + +void tex_dump_string_pool(dumpstream f) +{ + int n_of_strings = lmt_string_pool_state.string_pool_data.ptr - cs_offset_value; + int total_length = lmt_string_pool_state.string_body_data.allocated; + int max_length = lmt_string_pool_state.string_max_length; + dump_via_int(f, lmt_string_pool_state.string_pool_data.allocated); + dump_via_int(f, lmt_string_pool_state.string_pool_data.top); /* includes cs_offset_value */ + dump_via_int(f, lmt_string_pool_state.string_pool_data.ptr); /* includes cs_offset_value */ + dump_via_int(f, n_of_strings); + dump_via_int(f, max_length); + dump_via_int(f, total_length); + if (max_length > 0 && max_length < 126) { + /*tex We only have short strings. */ + for (int j = 0; j < n_of_strings; j++) { + int l = (int) lmt_string_pool_state.string_pool[j].l; + char c; + if (! lmt_string_pool_state.string_pool[j].s) { + l = -1; + } + c = (char) l; + dump_things(f, c, 1); + if (l > 0) { + dump_things(f, *lmt_string_pool_state.string_pool[j].s, l); + } + } + } else { + /*tex We also have long strings. */ + for (int j = 0; j < n_of_strings; j++) { + int l = (int) lmt_string_pool_state.string_pool[j].l; + if (! lmt_string_pool_state.string_pool[j].s) { + l = -1; + } + dump_int(f, l); + if (l > 0) { + dump_things(f, *lmt_string_pool_state.string_pool[j].s, l); + } + } + } +} + +void tex_undump_string_pool(dumpstream f) +{ + int n_of_strings; + int max_length; + int total_length; + undump_int(f, lmt_string_pool_state.string_pool_data.allocated); + undump_int(f, lmt_string_pool_state.string_pool_data.top); /* includes cs_offset_value */ + undump_int(f, lmt_string_pool_state.string_pool_data.ptr); /* includes cs_offset_value */ + undump_int(f, n_of_strings); + undump_int(f, max_length); + undump_int(f, total_length); + lmt_string_pool_state.string_max_length = max_length; + tex_initialize_string_mem(); + { + int a = 0; + int compact = max_length > 0 && max_length < 126; + for (int j = 0; j < n_of_strings; j++) { + int x; + if (compact) { + /*tex We only have short strings. */ + char c; + undump_things(f, c, 1); + x = c; + } else { + /*tex We also have long strings. */ + undump_int(f, x); + } + if (x >= 0) { + /* we can overflow reserved_string_slots */ + int n = x + 1; + unsigned char *s = aux_allocate_clear_array(sizeof(unsigned char), n, reserved_string_slots); + if (s) { + lmt_string_pool_state.string_pool[j].s = s; + undump_things(f, s[0], x); + s[x] = '\0'; + a += n; + } else { + tex_overflow_error("string pool", n); + x = 0; + } + } else { + x = 0; + } + lmt_string_pool_state.string_pool[j].l = x; + } + lmt_string_pool_state.string_body_data.allocated = a; + lmt_string_pool_state.string_body_data.initial = a; + } +} + +/*tex To destroy an already made string, we say |flush_str|. */ + +void tex_flush_str(strnumber s) +{ + if (s > cs_offset_value) { + /*tex Don't ever delete the null string! */ + tex_aux_decrement_pool_string((int) str_length(s)); + str_length(s) = 0; + lmt_memory_free(str_string(s)); + str_string(s) = NULL; + // string_pool_state.string_pool_data.ptr--; + } + /* why a loop and not in previous branch */ + while (! str_string((lmt_string_pool_state.string_pool_data.ptr - 1))) { + lmt_string_pool_state.string_pool_data.ptr--; + } +} + +/* + In the old filename code we had the following, but I suspect some mem issue there (as we ran + into GB leaks for thousands of names): + + u = save_cur_string(); + get_x_token(); + restore_cur_string(u); +*/ + +strnumber tex_save_cur_string(void) +{ + return (lmt_string_pool_state.string_temp_top > 0 ? tex_make_string() : 0); +} + +void tex_restore_cur_string(strnumber u) +{ + if (u) { + /*tex Beware, we have no 0 termination here! */ + int ul = (int) str_length(u); + tex_aux_flush_cur_string(); + if (tex_aux_room_in_string(u)) { + memcpy(lmt_string_pool_state.string_temp, str_string(u), ul); + lmt_string_pool_state.string_temp_allocated = ul; + lmt_string_pool_state.string_temp_top = ul; + tex_flush_str(u); + } + } +} diff --git a/source/luametatex/source/tex/texstringpool.h b/source/luametatex/source/tex/texstringpool.h new file mode 100644 index 000000000..b3924a0fe --- /dev/null +++ b/source/luametatex/source/tex/texstringpool.h @@ -0,0 +1,110 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_STRINGPOOL_H +# define LMT_STRINGPOOL_H + +/*tex + + Both \LUA\ and |TEX\ strings can contain |nul| characters, but \CCODE\ strings cannot. The pool + is implemented differently anyway. The |init_str_ptr| is an offset that indicates how many strings + are in the format. Does it still make sense to have that distinction? Do we care? + + We store the used bytes (in strings) in the |real| field so that it is carried with the data blob + (and ends up in statistics). + +*/ + +typedef struct lstring { + union { + unsigned char *s; + const char *c; + }; + size_t l; /* could be int, but this way we padd */ +} lstring; + +typedef struct string_pool_info { + lstring *string_pool; + memory_data string_pool_data; + memory_data string_body_data; + strnumber reserved; + /*tex only when format is made and loaded */ + int string_max_length; + /*tex used for temporary string building: */ + unsigned char *string_temp; + int string_temp_allocated; + int string_temp_top; +} string_pool_info; + +extern string_pool_info lmt_string_pool_state; + +# define STRING_EXTRA_AMOUNT 512 + +/*tex This is the reference of the empty string: */ + +# define get_nullstr() cs_offset_value + +/*tex + + Several of the elementary string operations are performed using macros instead of procedures, + because many of the operations are done quite frequently and we want to avoid the overhead of + procedure calls. For example, here is a simple macro that computes the length of a string. + + Keep in mind that we are talking of a |string_pool| table that officially starts with the + unicode characters (as in \TEX\ with \ASCII) but that we use an offset to jump ove that. So the + real size doesn't include those single character code points. + +*/ + +# define str_length(a) (lmt_string_pool_state.string_pool[(a) - cs_offset_value].l) +# define str_string(a) (lmt_string_pool_state.string_pool[(a) - cs_offset_value].s) +# define str_lstring(a) (lmt_string_pool_state.string_pool[(a) - cs_offset_value]) + +/*tex + + Strings are created by appending character codes to |str_pool|. The |append_char| macro, + defined here, does not check to see if the value of |pool_ptr| has gotten too high; this test + is supposed to be made before |append_char| is used. There is also a |flush_char| macro, which + erases the last character appended. + + To test if there is room to append |l| more characters to |str_pool|, we shall write |str_room + (l)|, which aborts \TEX\ and gives an apologetic error message if there isn't enough room. The + length of the current string is called |cur_length|. + +*/ + +/*tex Forget the last character in the pool. */ + +inline void tex_flush_char(void) { --lmt_string_pool_state.string_temp_top; } + +extern strnumber tex_make_string (void); +extern strnumber tex_push_string (const unsigned char *s, int l); +extern char *tex_take_string (int *len); +extern int tex_str_eq_buf (strnumber s, int k, int n); +extern int tex_str_eq_str (strnumber s, strnumber t); +extern int tex_str_eq_cstr (strnumber s, const char *, size_t); +extern int tex_get_strings_started (void); +extern void tex_reset_cur_string (void); +/* strnumber tex_search_string (strnumber search); */ +/* int tex_used_strings (void); */ +extern strnumber tex_maketexstring (const char *s); +extern strnumber tex_maketexlstring (const char *s, size_t); +extern void tex_append_char (unsigned char c); +extern void tex_append_string (const unsigned char *s, unsigned l); +extern char *tex_makecstring (int s); +extern char *tex_makeclstring (int s, size_t *len); +extern void tex_dump_string_pool (dumpstream f); +extern void tex_undump_string_pool (dumpstream f); +extern void tex_initialize_string_pool (void); +extern void tex_initialize_string_mem (void); +extern void tex_flush_str (strnumber s); +extern strnumber tex_save_cur_string (void); +extern void tex_restore_cur_string (strnumber u); + +/* void tex_increment_pool_string (int n); */ +/* void tex_decrement_pool_string (int n); */ + +extern void tex_compact_string_pool (void); + +# endif diff --git a/source/luametatex/source/tex/textextcodes.c b/source/luametatex/source/tex/textextcodes.c new file mode 100644 index 000000000..39fc258c7 --- /dev/null +++ b/source/luametatex/source/tex/textextcodes.c @@ -0,0 +1,607 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + Contrary to traditional \TEX\ we have catcode tables so that we can switch catcode regimes very + fast. We can have many such regimes and they're stored in trees. + +*/ + +# define CATCODESTACK 8 +# define CATCODEDEFAULT 12 +# define CATCODEDEFAULTS 0x0C0C0C0C /*tex Used as |dflt| value in |sa| struct. */ + +typedef struct catcode_state_info { + sa_tree *catcode_heads; + unsigned char *catcode_valid; + int catcode_max; + int padding; +} catcode_state_info; + +static catcode_state_info lmt_catcode_state = { + .catcode_heads = NULL, + .catcode_valid = NULL, + .catcode_max = 0, + .padding = 0, +} ; + +static void tex_aux_allocate_catcodes(void) +{ + lmt_catcode_state.catcode_heads = sa_malloc_array(sizeof(sa_tree), max_n_of_catcode_tables); + lmt_catcode_state.catcode_valid = sa_malloc_array(sizeof(unsigned char), max_n_of_catcode_tables); + if (lmt_catcode_state.catcode_heads && lmt_catcode_state.catcode_valid) { + sa_wipe_array(lmt_catcode_state.catcode_heads, sizeof(sa_tree), max_n_of_catcode_tables); + sa_wipe_array(lmt_catcode_state.catcode_valid, sizeof(unsigned char), max_n_of_catcode_tables); + } else { + tex_overflow_error("catcodes", max_n_of_catcode_tables); + } +} + +static void tex_aux_initialize_catcodes(void) +{ + sa_tree_item item = { .uint_value = CATCODEDEFAULTS }; + lmt_catcode_state.catcode_max = 0; + tex_aux_allocate_catcodes(); + lmt_catcode_state.catcode_valid[0] = 1; + lmt_catcode_state.catcode_heads[0] = sa_new_tree(CATCODESTACK, 1, item); +} + +void tex_set_cat_code(int h, int n, halfword v, int gl) +{ + sa_tree_item item = { .uint_value = CATCODEDEFAULTS }; + sa_tree tree = lmt_catcode_state.catcode_heads[h]; + if (h > lmt_catcode_state.catcode_max) { + lmt_catcode_state.catcode_max = h; + } + if (! tree) { + tree = sa_new_tree(CATCODESTACK, 1, item); + lmt_catcode_state.catcode_heads[h] = tree; + } + sa_set_item_1(tree, n, v, gl); +} + +halfword tex_get_cat_code(int h, int n) +{ + sa_tree_item item = { .uint_value = CATCODEDEFAULTS }; + sa_tree tree = lmt_catcode_state.catcode_heads[h]; + if (h > lmt_catcode_state.catcode_max) { + lmt_catcode_state.catcode_max = h; + } + if (! tree) { + tree = sa_new_tree(CATCODESTACK, 1, item); + lmt_catcode_state.catcode_heads[h] = tree; + } + return sa_return_item_1(tree, n); +} + +void tex_unsave_cat_codes(int h, int gl) +{ + if (h > lmt_catcode_state.catcode_max) { + lmt_catcode_state.catcode_max = h; + } + for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) { + if (lmt_catcode_state.catcode_heads[k]) { + sa_restore_stack(lmt_catcode_state.catcode_heads[k], gl); + } + } +} + +static void tex_aux_dump_catcodes(dumpstream f) +{ + int total = 0; + for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) { + if (lmt_catcode_state.catcode_valid[k]) { + total++; + } + } + dump_int(f, lmt_catcode_state.catcode_max); + dump_int(f, total); + for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) { + if (lmt_catcode_state.catcode_valid[k]) { + dump_int(f, k); + sa_dump_tree(f, lmt_catcode_state.catcode_heads[k]); + } + } +} + +static void tex_aux_undump_catcodes(dumpstream f) +{ + int total; + sa_free_array(lmt_catcode_state.catcode_heads); + sa_free_array(lmt_catcode_state.catcode_valid); + tex_aux_allocate_catcodes(); + undump_int(f, lmt_catcode_state.catcode_max); + undump_int(f, total); + for (int k = 0; k < total; k++) { + int x; + undump_int(f, x); + lmt_catcode_state.catcode_heads[x] = sa_undump_tree(f); + lmt_catcode_state.catcode_valid[x] = 1; + } +} + +int tex_valid_catcode_table(int h) +{ + return (h >= 0 && h < max_n_of_catcode_tables && lmt_catcode_state.catcode_valid[h]); +} + +void tex_copy_cat_codes(int from, int to) +{ + if (from < 0 || from >= max_n_of_catcode_tables || lmt_catcode_state.catcode_valid[from] == 0) { + exit(EXIT_FAILURE); + } else { + if (to > lmt_catcode_state.catcode_max) { + lmt_catcode_state.catcode_max = to; + } + sa_destroy_tree(lmt_catcode_state.catcode_heads[to]); + lmt_catcode_state.catcode_heads[to] = sa_copy_tree(lmt_catcode_state.catcode_heads[from]); + lmt_catcode_state.catcode_valid[to] = 1; + } +} + +/* +void set_cat_code_table_default(int h, int dflt) +{ + if (valid_catcode_table(h)) { + catcode_state.catcode_heads[h]->dflt.uchar_value[0] = (unsigned char) dflt; + catcode_state.catcode_heads[h]->dflt.uchar_value[1] = (unsigned char) dflt; + catcode_state.catcode_heads[h]->dflt.uchar_value[2] = (unsigned char) dflt; + catcode_state.catcode_heads[h]->dflt.uchar_value[3] = (unsigned char) dflt; + } +} + +int get_cat_code_table_default(int h) +{ + if (valid_catcode_table(h)) { + return catcode_state.catcode_heads[h]->dflt.uchar_value[0]; + } else { + return CATCODEDEFAULT; + } +} +*/ + +void tex_initialize_cat_codes(int h) +{ + if (h > lmt_catcode_state.catcode_max) { + lmt_catcode_state.catcode_max = h; + } + sa_destroy_tree(lmt_catcode_state.catcode_heads[h]); + lmt_catcode_state.catcode_heads[h] = NULL; + tex_set_cat_code(h, '\r', end_line_cmd, 1); + tex_set_cat_code(h, ' ', spacer_cmd, 1); + tex_set_cat_code(h, '\\', escape_cmd, 1); + tex_set_cat_code(h, '%', comment_cmd, 1); + tex_set_cat_code(h, 127, invalid_char_cmd, 1); + tex_set_cat_code(h, 0, ignore_cmd, 1); + tex_set_cat_code(h, 0xFEFF, ignore_cmd, 1); + for (int k = 'A'; k <= 'Z'; k++) { + tex_set_cat_code(h, k, letter_cmd, 1); + tex_set_cat_code(h, k + 'a' - 'A', letter_cmd, 1); + } + lmt_catcode_state.catcode_valid[h] = 1; +} + +static void tex_aux_free_catcodes(void) +{ + for (int k = 0; k <= lmt_catcode_state.catcode_max; k++) { + if (lmt_catcode_state.catcode_valid[k]) { + sa_destroy_tree(lmt_catcode_state.catcode_heads[k]); + } + } + lmt_catcode_state.catcode_heads = sa_free_array(lmt_catcode_state.catcode_heads); + lmt_catcode_state.catcode_valid = sa_free_array(lmt_catcode_state.catcode_valid); +} + +/*tex + + The lowercase mapping codes are also stored in a tree. Let's keep them close for cache hits, + maybe also with hjcodes. + +*/ + +# define LCCODESTACK 8 +# define LCCODEDEFAULT 0 + +# define UCCODESTACK 8 +# define UCCODEDEFAULT 0 + +# define SFCODESTACK 8 +# define SFCODEDEFAULT 1000 + +# define HCCODESTACK 8 +# define HCCODEDEFAULT 0 + +# define HMCODESTACK 8 +# define HMCODEDEFAULT 0 + +typedef struct luscode_state_info { + sa_tree uccode_head; + sa_tree lccode_head; + sa_tree sfcode_head; + sa_tree hccode_head; + sa_tree hmcode_head; +} luscode_state_info; + +static luscode_state_info lmt_luscode_state = { + .uccode_head = NULL, + .lccode_head = NULL, + .sfcode_head = NULL, + .hccode_head = NULL, + .hmcode_head = NULL +}; + +void tex_set_lc_code(int n, halfword v, int gl) +{ + sa_tree_item item; + item.int_value = v; + sa_set_item_4(lmt_luscode_state.lccode_head, n, item, gl); +} + +halfword tex_get_lc_code(int n) +{ + return sa_return_item_4(lmt_luscode_state.lccode_head, n); +} + +static void tex_aux_unsave_lccodes(int gl) +{ + sa_restore_stack(lmt_luscode_state.lccode_head, gl); +} + +static void tex_aux_initialize_lccodes(void) +{ + sa_tree_item item; + item.int_value = LCCODEDEFAULT; + lmt_luscode_state.lccode_head = sa_new_tree(LCCODESTACK, 4, item); +} + +static void tex_aux_dump_lccodes(dumpstream f) +{ + sa_dump_tree(f, lmt_luscode_state.lccode_head); +} + +static void tex_aux_undump_lccodes(dumpstream f) +{ + lmt_luscode_state.lccode_head = sa_undump_tree(f); +} + +static void tex_aux_free_lccodes(void) +{ + sa_destroy_tree(lmt_luscode_state.lccode_head); +} + +/*tex + + And the uppercase mapping codes are again stored in a tree. + +*/ + +void tex_set_uc_code(int n, halfword v, int gl) +{ + sa_tree_item item; + item.int_value = v; + sa_set_item_4(lmt_luscode_state.uccode_head, n, item, gl); +} + +halfword tex_get_uc_code(int n) +{ + return sa_return_item_4(lmt_luscode_state.uccode_head, n); +} + +static void tex_aux_unsave_uccodes(int gl) +{ + sa_restore_stack(lmt_luscode_state.uccode_head, gl); +} + +static void tex_aux_initialize_uccodes(void) +{ + sa_tree_item item = { .int_value = UCCODEDEFAULT }; + lmt_luscode_state.uccode_head = sa_new_tree(UCCODESTACK, 4, item); +} + +static void tex_aux_dump_uccodes(dumpstream f) +{ + sa_dump_tree(f,lmt_luscode_state.uccode_head); +} + +static void tex_aux_undump_uccodes(dumpstream f) +{ + lmt_luscode_state.uccode_head = sa_undump_tree(f); +} + +static void tex_aux_free_uccodes(void) +{ + sa_destroy_tree(lmt_luscode_state.uccode_head); +} + +/*tex + + By now it will be no surprise that the space factors get stored in a tree. + +*/ + +void tex_set_sf_code(int n, halfword v, int gl) +{ + sa_tree_item item; + item.int_value = v; + sa_set_item_4(lmt_luscode_state.sfcode_head, n, item, gl); +} + +halfword tex_get_sf_code(int n) +{ + return sa_return_item_4(lmt_luscode_state.sfcode_head, n); +} + +static void tex_aux_unsave_sfcodes(int gl) +{ + sa_restore_stack(lmt_luscode_state.sfcode_head, gl); +} + +static void tex_aux_initialize_sfcodes(void) +{ + sa_tree_item item = { .int_value = SFCODEDEFAULT }; + lmt_luscode_state.sfcode_head = sa_new_tree(SFCODESTACK, 4, item); +} + +static void tex_aux_dump_sfcodes(dumpstream f) +{ + sa_dump_tree(f, lmt_luscode_state.sfcode_head); +} + +static void tex_aux_undump_sfcodes(dumpstream f) +{ + lmt_luscode_state.sfcode_head = sa_undump_tree(f); +} + +static void tex_aux_free_sfcodes(void) +{ + sa_destroy_tree(lmt_luscode_state.sfcode_head); +} + +/*tex + + Finaly the hyphen character codes, a rather small sparse array. + +*/ + +void tex_set_hc_code(int n, halfword v, int gl) +{ + sa_tree_item item; + item.int_value = v; + sa_set_item_4(lmt_luscode_state.hccode_head, n, item, gl); +} + +halfword tex_get_hc_code(int n) +{ + return sa_return_item_4(lmt_luscode_state.hccode_head, n); +} + +static void tex_aux_unsave_hccodes(int gl) +{ + sa_restore_stack(lmt_luscode_state.hccode_head, gl); +} + +static void tex_aux_initialize_hccodes(void) +{ + sa_tree_item item = { .int_value = HCCODEDEFAULT }; + lmt_luscode_state.hccode_head = sa_new_tree(HCCODESTACK, 4, item); +} + +static void tex_aux_dump_hccodes(dumpstream f) +{ + sa_dump_tree(f, lmt_luscode_state.hccode_head); +} + +static void tex_aux_undump_hccodes(dumpstream f) +{ + lmt_luscode_state.hccode_head = sa_undump_tree(f); +} + +static void tex_aux_free_hccodes(void) +{ + sa_destroy_tree(lmt_luscode_state.hccode_head); +} + +/*tex + The same is true for math hyphenation but here we have a small options set. +*/ + +void tex_set_hm_code(int n, halfword v, int gl) +{ + sa_set_item_1(lmt_luscode_state.hmcode_head, n, v, gl); +} + +halfword tex_get_hm_code(int n) +{ + return sa_return_item_1(lmt_luscode_state.hmcode_head, n); +} + +static void tex_aux_unsave_hmcodes(int gl) +{ + sa_restore_stack(lmt_luscode_state.hmcode_head, gl); +} + +static void tex_aux_initialize_hmcodes(void) +{ + sa_tree_item item = { .int_value = HMCODEDEFAULT }; + lmt_luscode_state.hmcode_head = sa_new_tree(HMCODESTACK, 1, item); +} + +static void tex_aux_dump_hmcodes(dumpstream f) +{ + sa_dump_tree(f, lmt_luscode_state.hmcode_head); +} + +static void tex_aux_undump_hmcodes(dumpstream f) +{ + lmt_luscode_state.hmcode_head = sa_undump_tree(f); +} + +static void tex_aux_free_hmcodes(void) +{ + sa_destroy_tree(lmt_luscode_state.hmcode_head); +} + +/*tex + + The hyphenation codes are indeed stored in a tree and are used instead of lowercase codes when + deciding what characters to take into acccount when hyphenating. They are bound to upto + |HJCODE_MAX| languages. In the end I decided to put the hash pointer in the language record + so that we can do better lean memory management. Actually, the hjcode handling already was more + efficient than in \LUATEX\ because I kept track of usage and allocated (dumped) only the + languages that were used. A typical example of nicely cleaned up code that in the end was + ditched but that happens often (and of course goes unnoticed). Actually, in \CONTEXT\ we don't + dump language info at all, so I might as wel drop language dumping, just like fonts. + +*/ + +# define HJCODESTACK 8 +# define HJCODEDEFAULT 0 + +void tex_set_hj_code(int h, int n, halfword v, int gl) +{ + if (h >= 0 && h <= lmt_language_state.language_data.top) { + sa_tree_item item = { .int_value = HJCODEDEFAULT }; + sa_tree tree = lmt_language_state.languages[h]->hjcode_head; + if (! tree) { + tree = sa_new_tree(HJCODESTACK, 4, item); + lmt_language_state.languages[h]->hjcode_head = tree; + } + if (tree) { + item.int_value = (int) v; + sa_set_item_4(tree, n, item, gl); + } + } +} + +/*tex We just return the lccodes when nothing is set. */ + +halfword tex_get_hj_code(int h, int n) +{ + if (h >= 0 && h <= lmt_language_state.language_data.top) { + sa_tree tree = lmt_language_state.languages[h]->hjcode_head; + if (! tree) { + tree = lmt_luscode_state.lccode_head; + } + return sa_return_item_4(tree, n); + } else { + return 0; + } +} + +void tex_dump_language_hj_codes(dumpstream f, int h) +{ + if (h >= 0 && h <= lmt_language_state.language_data.top) { + sa_tree tree = lmt_language_state.languages[h]->hjcode_head; + if (tree) { + dump_via_int(f, 1); + sa_dump_tree(f, tree); + } else { + dump_via_int(f, 0); + } + } else { + /* error */ + } +} + +void tex_undump_language_hj_codes(dumpstream f, int h) +{ + if (h >= 0 && h <= lmt_language_state.language_data.top) { + int x; + undump_int(f, x); + if (x) { + sa_free_array(lmt_language_state.languages[h]->hjcode_head); + lmt_language_state.languages[h]->hjcode_head = sa_undump_tree(f); + } else { + lmt_language_state.languages[h]->hjcode_head = NULL; + } + } else { + /* error */ + } +} + +void tex_hj_codes_from_lc_codes(int h) +{ + if (h >= 0 && h <= lmt_language_state.language_data.top) { + sa_tree tree = lmt_language_state.languages[h]->hjcode_head; + if (tree) { + sa_destroy_tree(tree); + } + tree = sa_copy_tree(lmt_luscode_state.lccode_head); + lmt_language_state.languages[h]->hjcode_head = tree ? tree : NULL; + } +} + +/*tex The public management functions. */ + +void tex_unsave_text_codes(int grouplevel) +{ + tex_aux_unsave_lccodes(grouplevel); + tex_aux_unsave_uccodes(grouplevel); + tex_aux_unsave_sfcodes(grouplevel); + tex_aux_unsave_hccodes(grouplevel); + tex_aux_unsave_hmcodes(grouplevel); +} + +void tex_initialize_text_codes(void) +{ + tex_aux_initialize_catcodes(); + tex_aux_initialize_lccodes(); + tex_aux_initialize_uccodes(); + tex_aux_initialize_sfcodes(); + tex_aux_initialize_hccodes(); + tex_aux_initialize_hmcodes(); + /* initializehjcodes(); */ +} + +void tex_free_text_codes(void) +{ + tex_aux_free_catcodes(); + tex_aux_free_lccodes(); + tex_aux_free_uccodes(); + tex_aux_free_sfcodes(); + tex_aux_free_hccodes(); + tex_aux_free_hmcodes(); + /* freehjcodes(); */ +} + +void tex_dump_text_codes(dumpstream f) +{ + tex_aux_dump_catcodes(f); + tex_aux_dump_lccodes(f); + tex_aux_dump_uccodes(f); + tex_aux_dump_sfcodes(f); + tex_aux_dump_hccodes(f); + tex_aux_dump_hmcodes(f); + /* dumphjcodes(f); */ +} + +void tex_undump_text_codes(dumpstream f) +{ + tex_aux_undump_catcodes(f); + tex_aux_undump_lccodes(f); + tex_aux_undump_uccodes(f); + tex_aux_undump_sfcodes(f); + tex_aux_undump_hccodes(f); + tex_aux_undump_hmcodes(f); + /* undumphjcodes(f); */ +} + +void tex_initialize_xx_codes(void) +{ + /*tex We're compatible. */ + for (int u = 'A'; u <= 'Z'; u++) { + int l = u + 32; + tex_set_lc_code(u, l, level_one); + tex_set_lc_code(l, l, level_one); + tex_set_uc_code(u, u, level_one); + tex_set_uc_code(l, u, level_one); + tex_set_sf_code(u, 999, level_one); + } + /*tex A good start but not compatible. */ + /* set_hc_code(0x002D, 0x002D, level_one); */ + /* set_hc_code(0x2010, 0x2010, level_one); */ +}
\ No newline at end of file diff --git a/source/luametatex/source/tex/textextcodes.h b/source/luametatex/source/tex/textextcodes.h new file mode 100644 index 000000000..476f0f03e --- /dev/null +++ b/source/luametatex/source/tex/textextcodes.h @@ -0,0 +1,49 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXTCODES_H +# define LMT_TEXTCODES_H + +/*tex + For practical reasons we handle the hmcodes here although they are used in + math only. We could have used the hc codes as there will be no overlap. +*/ + +extern void tex_set_cat_code (int h, int n, halfword v, int gl); +extern halfword tex_get_cat_code (int h, int n); +extern int tex_valid_catcode_table (int h); +extern void tex_unsave_cat_codes (int h, int gl); +extern void tex_copy_cat_codes (int from, int to); +extern void tex_initialize_cat_codes (int h); +/* void tex_set_cat_code_table_default (int h, int dflt); */ +/* int tex_get_cat_code_table_default (int h); */ + +extern void tex_set_lc_code (int n, halfword v, int gl); +extern halfword tex_get_lc_code (int n); +extern void tex_set_uc_code (int n, halfword v, int gl); +extern halfword tex_get_uc_code (int n); +extern void tex_set_sf_code (int n, halfword v, int gl); +extern halfword tex_get_sf_code (int n); +extern void tex_set_hc_code (int n, halfword v, int gl); +extern halfword tex_get_hc_code (int n); +extern void tex_set_hm_code (int n, halfword v, int gl); +extern halfword tex_get_hm_code (int n); +extern void tex_set_hj_code (int l, int n, halfword v, int gl); +extern halfword tex_get_hj_code (int l, int n); +extern void tex_initialize_xx_codes (void); + +extern void tex_hj_codes_from_lc_codes (int h); + +extern void tex_initialize_text_codes (void); +extern void tex_unsave_text_codes (int grouplevel); + +extern void tex_dump_text_codes (dumpstream f); +extern void tex_undump_text_codes (dumpstream f); + +extern void tex_dump_language_hj_codes (dumpstream f, int h); +extern void tex_undump_language_hj_codes (dumpstream f, int h); + +extern void tex_free_text_codes (void); + +# endif diff --git a/source/luametatex/source/tex/textoken.c b/source/luametatex/source/tex/textoken.c new file mode 100644 index 000000000..0d2415233 --- /dev/null +++ b/source/luametatex/source/tex/textoken.c @@ -0,0 +1,3511 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex Todo: move some helpers to other places. */ + +inline static int tex_aux_the_cat_code(halfword b) +{ + return (lmt_input_state.cur_input.cattable == default_catcode_table_preset) ? + tex_get_cat_code(cat_code_table_par, b) + : ( (lmt_input_state.cur_input.cattable > -0xFF) ? + tex_get_cat_code(lmt_input_state.cur_input.cattable, b) + : ( + - lmt_input_state.cur_input.cattable - 0xFF + ) ) ; +} + +/*tex + + The \TEX\ system does nearly all of its own memory allocation, so that it can readily be + transported into environments that do not have automatic facilities for strings, garbage + collection, etc., and so that it can be in control of what error messages the user receives. + The dynamic storage requirements of \TEX\ are handled by providing two large arrays called + |fixmem| and |varmem| in which consecutive blocks of words are used as nodes by the \TEX\ + routines. + + Pointer variables are indices into this array, or into another array called |eqtb| that + will be explained later. A pointer variable might also be a special flag that lies outside + the bounds of |mem|, so we allow pointers to assume any |halfword| value. The minimum + halfword value represents a null pointer. \TEX\ does not assume that |mem[null]| exists. + + Locations in |fixmem| are used for storing one-word records; a conventional |AVAIL| stack is + used for allocation in this array. + + One can make an argument to switch to standard \CCODE\ allocation but the current approach is + very efficient in memory usage and performence so we stay with it. On the average memory + consumption of \TEX| is not that large, definitely not compared to other programs that deal + with text. + + The big dynamic storage area is named |fixmem| where the smallest location of one|-|word + memory in use is |fix_mem_min| and the largest location of one|-|word memory in use is + |fix_mem_max|. + + The |dyn_used| variable keeps track of how much memory is in use. The head of the list of + available one|-|word nodes is registered in |avail|. The last one-|word node used in |mem| + is |fix_mem_end|. + + All these variables are packed in the structure |token_memory_state|. + +*/ + +token_memory_state_info lmt_token_memory_state = { + .tokens = NULL, + .tokens_data = { + .minimum = min_token_size, + .maximum = max_token_size, + .size = siz_token_size, + .step = stp_token_size, + .allocated = 0, + .itemsize = sizeof(memoryword), + .top = 0, + .ptr = 0, /* used to register usage */ + .initial = 0, + .offset = 0, + }, + .available = 0, + .padding = 0, +}; + +/*tex + + Token data has its own memory space. Again we have some state variables: |temp_token_head| is + the head of a (temporary) list of some kind as are |hold_token_head| and |omit_template|. A + permanently empty list is available in |null_list| and the head of the token list built by + |scan_keyword| is registered in |backup_head|. All these variables are packed in the structure + |token_data| but some have been moved to a more relevant state (so omit and hold are now in the + alignment state). + +*/ + +token_state_info lmt_token_state = { + .null_list = null, + .in_lua_escape = 0, + .force_eof = 0, + .luacstrings = 0, + .par_loc = null, + .par_token = null, + /* .line_par_loc = null, */ /* removed because not really used and useful */ + /* .line_par_token = null, */ /* idem */ + .buffer = NULL, + .bufloc = 0, + .bufmax = 0, + .padding = 0, +}; + +/*tex Some properties are dumped in the format so these are aet already! */ + +# define reserved_token_mem_slots 2 // play safe for slight overuns + +void tex_initialize_token_mem(void) +{ + memoryword *tokens = NULL; + int size = 0; + if (lmt_main_state.run_state == initializing_state) { + size = lmt_token_memory_state.tokens_data.minimum; + } else { + size = lmt_token_memory_state.tokens_data.allocated; + lmt_token_memory_state.tokens_data.initial = lmt_token_memory_state.tokens_data.ptr; + } + if (size > 0) { + tokens = aux_allocate_clear_array(sizeof(memoryword), size, reserved_token_mem_slots); + } + if (tokens) { + lmt_token_memory_state.tokens = tokens; + lmt_token_memory_state.tokens_data.allocated = size; + } else { + tex_overflow_error("tokens", size); + } +} + +static void tex_aux_bump_token_memory(void) +{ + /*tex We need to manage the big dynamic storage area. */ + int size = lmt_token_memory_state.tokens_data.allocated + lmt_token_memory_state.tokens_data.step; + if (size > lmt_token_memory_state.tokens_data.size) { + lmt_run_memory_callback("token", 0); + tex_show_runaway(); + tex_overflow_error("token memory size", lmt_token_memory_state.tokens_data.allocated); + } else { + memoryword *tokens = aux_reallocate_array(lmt_token_memory_state.tokens, sizeof(memoryword), size, reserved_token_mem_slots); + lmt_run_memory_callback("token", tokens ? 1 : 0); + if (tokens) { + lmt_token_memory_state.tokens = tokens; + } else { + /*tex If memory is exhausted, display possible runaway text. */ + tex_show_runaway(); + tex_overflow_error("token memory size", lmt_token_memory_state.tokens_data.allocated); + } + } + memset((void *) (lmt_token_memory_state.tokens + lmt_token_memory_state.tokens_data.allocated + 1), 0, ((size_t) lmt_token_memory_state.tokens_data.step + reserved_token_mem_slots) * sizeof(memoryword)); + lmt_token_memory_state.tokens_data.allocated = size; +} + +void tex_initialize_tokens(void) +{ + lmt_token_memory_state.available = null; + lmt_token_memory_state.tokens_data.top = 0; + lmt_token_state.null_list = tex_get_available_token(null); + lmt_token_state.in_lua_escape = 0; +} + +/*tex + Experiment. It saves some 512K on the \CONTEXT\ format of October 2020. It makes me wonder if I + should spend some time on optimizing token lists (kind of cisc commands as we're currently kind + of risc). +*/ + +void tex_compact_tokens(void) +{ + int nc = 0; + // memoryword *target = allocate_array(sizeof(memoryword), (size_t) token_memory_state.tokens_data.allocated, 0); + memoryword *target = aux_allocate_clear_array(sizeof(memoryword), (size_t) lmt_token_memory_state.tokens_data.allocated, 0); + halfword *mapper = aux_allocate_array(sizeof(halfword), (size_t) lmt_token_memory_state.tokens_data.allocated, 0); + int nofluacmds = 0; + if (target && mapper) { + // memset((void *) target, 0, ((size_t) token_memory_state.tokens_data.allocated) * sizeof(memoryword)); + memset((void *) mapper, -1, ((size_t) lmt_token_memory_state.tokens_data.allocated) * sizeof(halfword)); + memoryword *tokens = lmt_token_memory_state.tokens; + /* also reset available */ + for (int cs = 0; cs < (eqtb_size + lmt_hash_state.hash_data.ptr); cs++) { + switch (eq_type(cs)) { + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + case internal_toks_reference_cmd: + case register_toks_reference_cmd: + { + halfword v = eq_value(cs); /* ref count token*/ + if (v) { + if (mapper[v] < 0) { + // printf("before =>"); { halfword tt = v; while (tt) { printf("%7d ",tt); tt = token_link(tt); } } printf("\n"); + halfword t = v; + nc++; + mapper[v] = nc; /* new ref count token index */ + while (1) { + target[nc].half1 = tokens[t].half1; /* info cq. ref count */ + t = tokens[t].half0; + if (t) { + nc++; + target[nc-1].half0 = nc; /* link to next */ + } else { + target[nc].half0 = null; /* link to next */ + break; + } + } + // printf("after =>"); { halfword tt = mapper[v]; while (tt) { printf("%7d ",tt); tt = target[tt].half0; } } printf("\n"); + } + eq_value(cs) = mapper[v]; + } + break; + } + case lua_value_cmd: + case lua_call_cmd: + case lua_local_call_cmd: + { + ++nofluacmds; + break; + } + } + } + // print(dump_state.format_identifier); + tex_print_format("tokenlist compacted from %i to %i entries, ", lmt_token_memory_state.tokens_data.top, nc); + if (nofluacmds) { + /*tex + We just mention them because when these are aliased the macro package needs to make + sure that after loading that happens again because registered funciton references + can have changed between format generation and run! + */ + tex_print_format("%i potentially aliased lua call/value entries, ", nofluacmds); + } + lmt_token_memory_state.tokens_data.top = nc; + lmt_token_memory_state.tokens_data.ptr = nc; + aux_deallocate_array(lmt_token_memory_state.tokens); + lmt_token_memory_state.tokens = target; + lmt_token_memory_state.available = null; + } else { + tex_overflow_error("token compaction size", lmt_token_memory_state.tokens_data.allocated); + } +} + + +/*tex + + The function |get_avail| returns a pointer (index) to a new one word node whose |link| field is + |null| (which is just 0). However, \TEX\ will halt if there is no more room left. + + If the available space list is empty, i.e., if |avail = null|, we try first to increase + |fix_mem_end|. If that cannot be done, i.e., if |fix_mem_end = fix_mem_max|, we try to reallocate + array |fixmem|. If, that doesn't work, we have to quit. Users can configure \TEX\ to use a lot of + memory but in some scenarios limitations make sense. + + Remark: we can have a pool of chunks where we get from or just allocate per token (as we have lots + of them that is slow). But then format loading becomes much slower as we need to recreate the + linked list. A no go. In todays terms \TEX\ memory usage is low anyway. + + The freed tokens are kept in a linked list. First we check if we can quickly get one of these. If + that fails, we try to get one from the available pool. If that fails too, we enlarge the pool and + try again. We keep track of the used number of tokens. We also make sure that the tokens links to + nothing. + + One problem is of course that tokens can be scattered over memory. We could have some sorter that + occasionally kicks in but it doesn't pay off. Normally definitions (in the format) are in sequence + but a normal run \unknown\ it would be interesting to know if this impacts the cache. + +*/ + +halfword tex_get_available_token(halfword t) +{ + halfword p = lmt_token_memory_state.available; + if (p) { + lmt_token_memory_state.available = token_link(p); + } else if (lmt_token_memory_state.tokens_data.top < lmt_token_memory_state.tokens_data.allocated) { + p = ++lmt_token_memory_state.tokens_data.top; + } else { + tex_aux_bump_token_memory(); + p = ++lmt_token_memory_state.tokens_data.top; + } + ++lmt_token_memory_state.tokens_data.ptr; + token_link(p) = null; + token_info(p) = t; + return p; +} + +/*tex + + Because we only have forward links, a freed token ends up at the head of the list of available + tokens. + +*/ + +void tex_put_available_token(halfword p) +{ + token_link(p) = lmt_token_memory_state.available; + lmt_token_memory_state.available = p; + --lmt_token_memory_state.tokens_data.ptr; +} + +halfword tex_store_new_token(halfword p, halfword t) +{ + halfword q = tex_get_available_token(t); + token_link(p) = q; + return q; +} + +/*tex + + The procedure |flush_list (p)| frees an entire linked list of oneword nodes that starts at + position |p|. It makes list of single word nodes available. The second variant in principle + is faster but in practice this goes unnoticed. Of course there is a little price to pay for + keeping track of memory usage. + +*/ + +void tex_flush_token_list(halfword head) +{ + if (head) { + halfword current = head; + halfword tail; + int i = 0; + do { + ++i; + tail = current; + current = token_link(tail); + } while (current); + lmt_token_memory_state.tokens_data.ptr -= i; + token_link(tail) = lmt_token_memory_state.available; + lmt_token_memory_state.available = head; + } +} + +void tex_flush_token_list_head_tail(halfword head, halfword tail, int n) +{ + if (head) { + lmt_token_memory_state.tokens_data.ptr -= n; + token_link(tail) = lmt_token_memory_state.available; + lmt_token_memory_state.available = head; + } +} + +void tex_add_token_reference(halfword p) +{ + if (get_token_reference(p) < max_token_reference) { + add_token_reference(p); + } else { + tex_overflow_error("reference count", max_token_reference); + } +} + +void tex_increment_token_reference(halfword p, int n) +{ + if ((get_token_reference(p) + n) < max_token_reference) { + inc_token_reference(p,n); + } else { + tex_overflow_error("reference count", max_token_reference); + } +} + +void tex_delete_token_reference(halfword p) +{ + if (p) { + if (get_token_reference(p)) { + sub_token_reference(p); + } else { + tex_flush_token_list(p); + } + } +} + +/*tex + + A \TEX\ token is either a character or a control sequence, and it is represented internally in + one of two ways: + + \startitemize[n] + \startitem + A character whose ASCII code number is |c| and whose command code is |m| is represented + as the number $2^{21}m+c$; the command code is in the range |1 <= m <= 14|. + \stopitem + \startitem + A control sequence whose |eqtb| address is |p| is represented as the number + |cs_token_flag+p|. Here |cs_token_flag = t =| $2^{25}-1$ is larger than $2^{21}m+c$, yet + it is small enough that |cs_token_flag + p < max_halfword|; thus, a token fits + comfortably in a halfword. + \stopitem + \stopitemize + + A token |t| represents a |left_brace| command if and only if |t < left_brace_limit|; it + represents a |right_brace| command if and only if we have |left_brace_limit <= t < + right_brace_limit|; and it represents a |match| or |end_match| command if and only if + |match_token <= t <= end_match_token|. The following definitions take care of these + token-oriented constants and a few others. + + A token list is a singly linked list of one-word nodes in |mem|, where each word contains a token + and a link. Macro definitions, output routine definitions, marks, |\write| texts, and a few other + things are remembered by \TEX\ in the form of token lists, usually preceded by a node with a + reference count in its |token_ref_count| field. The token stored in location |p| is called + |info(p)|. + + Three special commands appear in the token lists of macro definitions. When |m = match|, it means + that \TEX\ should scan a parameter for the current macro; when |m = end_match|, it means that + parameter matching should end and \TEX\ should start reading the macro text; and when |m = + out_param|, it means that \TEX\ should insert parameter number |c| into the text at this point. + + The enclosing |\char'173| and |\char'175| characters of a macro definition are omitted, but the + final right brace of an output routine is included at the end of its token list. + + Here is an example macro definition that illustrates these conventions. After \TEX\ processes + the text: + + \starttyping + \def\mac a#1#2 \b {#1\-a ##1#2 \#2\} + \stoptyping + + The definition of |\mac| is represented as a token list containing: + + \starttyping + (reference count) letter a match # match # spacer \b end_match + out_param1 \- letter a spacer, mac_param # other_char 1 + out_param2 spacer out_param 2 + \stoptyping + + The procedure |scan_toks| builds such token lists, and |macro_call| does the parameter matching. + + Examples such as |\def \m {\def \m {a} b}| explain why reference counts would be needed even if + \TEX\ had no |\let| operation: When the token list for |\m| is being read, the redefinition of + |\m| changes the |eqtb| entry before the token list has been fully consumed, so we dare not + simply destroy a token list when its control sequence is being redefined. + + If the parameter-matching part of a definition ends with |#{}|, the corresponding token list + will have |{| just before the |end_match| and also at the very end. The first |{| is used to + delimit the parameter; the second one keeps the first from disappearing. + + The |print_meaning| subroutine displays |cur_cmd| and |cur_chr| in symbolic form, including the + expansion of a macro or mark. + +*/ + +void tex_print_meaning(halfword code) +{ + /*tex + + This would make sense but some macro packages don't like it: + + \starttyping + if (cur_cmd == math_given_cmd) { + cur_cmd = math_xgiven_cmd ; + } + \stoptyping + + Eventually we might just do it that way. We also can have |\meaningonly| that omits the + |macro:| and arguments. + */ + int untraced = is_untraced(eq_flag(cur_cs)); + if (! untraced) { + switch (code) { + case meaning_code: + case meaning_full_code: + case meaning_asis_code: + tex_print_cmd_flags(cur_cs, cur_cmd, (code == meaning_full_code || code == meaning_asis_code), code == meaning_asis_code); + break; + } + } + switch (cur_cmd) { + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + case tolerant_call_cmd: + case tolerant_protected_call_cmd: + case tolerant_semi_protected_call_cmd: + if (untraced) { + tex_print_cs(cur_cs); + return; + } else { + switch (code) { + case meaning_code: + case meaning_full_code: + tex_print_str("macro"); + goto FOLLOWUP; + case meaning_asis_code: + // tex_print_format("%e%C %S ", def_cmd, def_code, cur_cs); + tex_print_cmd_chr(def_cmd, def_code); + tex_print_char(' '); + tex_print_cs(cur_cs); + tex_print_char(' '); + if (cur_chr && token_link(cur_chr)) { + halfword body = get_token_parameters(cur_chr) ? tex_show_token_list(token_link(cur_chr), null, default_token_show_max, 1) : token_link(cur_chr); + tex_print_char('{'); + if (body) { + tex_show_token_list(body, null, default_token_show_max, 0); + } + tex_print_char('}'); + } + return; + } + goto DETAILS; + } + case get_mark_cmd: + tex_print_cmd_chr((singleword) cur_cmd, cur_chr); + tex_print_char(':'); + tex_print_nlp(); + tex_token_show(tex_get_some_mark(cur_chr, 0), default_token_show_max); + return; + case lua_value_cmd: + case lua_call_cmd: + case lua_local_call_cmd: + case lua_protected_call_cmd: + if (untraced) { + tex_print_cs(cur_cs); + return; + } else { + goto DEFAULT; + } + case if_test_cmd: + if (cur_chr > last_if_test_code) { + tex_print_cs(cur_cs); + return; + } else { + goto DEFAULT; + } + default: + DEFAULT: + tex_print_cmd_chr((singleword) cur_cmd, cur_chr); + if (cur_cmd < call_cmd) { + return; + } else { + /* all kind of reference cmds */ + break; + } + } + FOLLOWUP: + tex_print_char(':'); + DETAILS: + tex_print_nlp(); + tex_token_show(cur_chr, default_token_show_max); +} + +/*tex + + The procedure |show_token_list|, which prints a symbolic form of the token list that starts at + a given node |p|, illustrates these conventions. The token list being displayed should not begin + with a reference count. However, the procedure is intended to be robust, so that if the memory + links are awry or if |p| is not really a pointer to a token list, nothing catastrophic will + happen. + + An additional parameter |q| is also given; this parameter is either null or it points to a node + in the token list where a certain magic computation takes place that will be explained later. + Basically, |q| is non-null when we are printing the two-line context information at the time of + an error message; |q| marks the place corresponding to where the second line should begin. + + For example, if |p| points to the node containing the first |a| in the token list above, then + |show_token_list| will print the string + + \starttyping + a#1#2 \b ->#1-a ##1#2 #2 + \stoptyping + + and if |q| points to the node containing the second |a|, the magic computation will be performed + just before the second |a| is printed. + + The generation will stop, and |\ETC.| will be printed, if the length of printing exceeds a given + limit~|l|. Anomalous entries are printed in the form of control sequences that are not followed + by a blank space, e.g., |\BAD.|; this cannot be confused with actual control sequences because a + real control sequence named |BAD| would come out |\BAD |. + + In \LUAMETATEX\ we have some more node types and token types so we also have additional tracing. + Because there is some more granularity in for instance nodes (subtypes) more detail is reported. + +*/ + +static const char *tex_aux_special_cmd_string(halfword cmd, halfword chr, const char *unknown) +{ + switch (cmd) { + case node_cmd : return "[[special cmd: node pointer]]"; + case lua_protected_call_cmd : return "[[special cmd: lua protected call]]"; + case lua_value_cmd : return "[[special cmd: lua value call]]"; + case iterator_value_cmd : return "[[special cmd: iterator value]]"; + case lua_call_cmd : return "[[special cmd: lua call]]"; + case lua_local_call_cmd : return "[[special cmd: lua local call]]"; + case begin_local_cmd : return "[[special cmd: begin local call]]"; + case end_local_cmd : return "[[special cmd: end local call]]"; + // case prefix_cmd : return "[[special cmd: enforced]]"; + case prefix_cmd : return "\\always"; + default : printf("[[unknown cmd: (%i,%i)]\n", cmd, chr); return unknown; + } +} + +halfword nn = 0; + +halfword tex_show_token_list(halfword p, halfword q, int l, int asis) +{ + if (p) { + /*tex the highest parameter number, as an \ASCII\ digit */ + unsigned char n = '0'; + int min = 0; + int max = lmt_token_memory_state.tokens_data.top; + lmt_print_state.tally = 0; +// if (l <= 0) { + l = extreme_token_show_max; +// } + while (p && (lmt_print_state.tally < l)) { + if (p == q) { + /*tex Do magic computation. We only end up here in context showing. */ + tex_set_trick_count(); + } + /*tex Display token |p|, and |return| if there are problems. */ + if (p < min || p > max) { + tex_print_str(error_string_clobbered(41)); + return null; + } else if (token_info(p) >= cs_token_flag) { + // if (! ((print_state.inhibit_par_tokens) && (token_info(p) == token_state.par_token))) { + tex_print_cs_checked(token_info(p) - cs_token_flag); + // } + } else if (token_info(p) < 0) { + tex_print_str(error_string_bad(42)); + } else if (token_info(p) == 0) { + tex_print_str(error_string_bad(44)); + } else { + int cmd = token_cmd(token_info(p)); + int chr = token_chr(token_info(p)); + /* + Display the token (|cmd|,|chr|). The procedure usually \quote {learns} the character + code used for macro parameters by seeing one in a |match| command before it runs + into any |out_param| commands. + + */ + switch (cmd) { + case left_brace_cmd: + case right_brace_cmd: + case math_shift_cmd: + case alignment_tab_cmd: + case superscript_cmd: + case subscript_cmd: + case spacer_cmd: + case letter_cmd: + case other_char_cmd: + case ignore_cmd: /* new */ + tex_print_tex_str(chr); + break; + case parameter_cmd: + if (! lmt_token_state.in_lua_escape && (lmt_expand_state.cs_name_level == 0)) { + tex_print_tex_str(chr); + } + tex_print_tex_str(chr); + break; + case parameter_reference_cmd: + tex_print_tex_str(match_visualizer); + if (chr <= 9) { + tex_print_char(chr + '0'); + } else { + tex_print_char('!'); + return null; + } + break; + case match_cmd: + tex_print_char(match_visualizer); + if (is_valid_match_ref(chr)) { + ++n; + } + tex_print_char(chr ? chr : '0'); + if (n > '9') { + /*tex Can this happen at all? */ + return null; + } else { + break; + } + case end_match_cmd: + if (asis) { + return token_link(p); + } else if (chr == 0) { + tex_print_str("->"); + } + break; + case ignore_something_cmd: + break; + case set_font_cmd: + tex_print_format("[font->%s]", font_original(cur_val)); + break; + case end_paragraph_cmd: + tex_print_format("%e%s", "par "); + break; + default: + tex_print_str(tex_aux_special_cmd_string(cmd, chr, error_string_bad(43))); + break; + } + } + p = token_link(p); + } + if (p) { + tex_print_str_esc("ETC."); + } + } + return p; +} + +/* +# define do_buffer_to_unichar(a,b) do { \ + a = (halfword)str2uni(fileio_state.io_buffer+b); \ + b += utf8_size(a); \ +} while (0) +*/ + +inline halfword get_unichar_from_buffer(int *b) +{ + halfword a = (halfword) ((const unsigned char) *(lmt_fileio_state.io_buffer + *b)); + if (a <= 0x80) { + *b += 1; + } else { + a = (halfword) aux_str2uni(lmt_fileio_state.io_buffer + *b); + *b += utf8_size(a); + } + return a; +} + +/*tex + + Here's the way we sometimes want to display a token list, given a pointer to its reference count; + the pointer may be null. + +*/ + +void tex_token_show(halfword p, int max) +{ + if (p && token_link(p)) { + tex_show_token_list(token_link(p), null, max, 0); + } +} + +/*tex + + The next function, |delete_token_ref|, is called when a pointer to a token list's reference + count is being removed. This means that the token list should disappear if the reference count + was |null|, otherwise the count should be decreased by one. Variable |p| points to the reference + count of a token list that is losing one reference. + +*/ + +int tex_get_char_cat_code(int c) +{ + return tex_aux_the_cat_code(c); +} + +static void tex_aux_invalid_character_error(void) +{ + tex_handle_error( + normal_error_type, + "Text line contains an invalid character", + "A funny symbol that I can't read has just been input. Continue, and I'll forget\n" + "that it ever happened." + ); +} + +static int tex_aux_process_sup_mark(void); + +static int tex_aux_scan_control_sequence(void); + +typedef enum next_line_retval { + next_line_ok, + next_line_return, + next_line_restart +} next_line_retval; + +static next_line_retval tex_aux_next_line(void); + +/*tex + + In case you are getting bored, here is a slightly less trivial routine: Given a string of + lowercase letters, like |pt| or |plus| or |width|, the |scan_keyword| routine checks to see + whether the next tokens of input match this string. The match must be exact, except that + ppercase letters will match their lowercase counterparts; uppercase equivalents are determined + by subtracting |"a" - "A"|, rather than using the |uc_code| table, since \TEX\ uses this + routine only for its own limited set of keywords. + + If a match is found, the characters are effectively removed from the input and |true| is + returned. Otherwise |false| is returned, and the input is left essentially unchanged (except + for the fact that some macros may have been expanded, etc.). + + In \LUATEX\ and its follow up we have more keywords and for instance when scanning a box + specification that is noticeable because the |scan_keyword| function is a little inefficient + in the sense that when there is no match, it will push back what got read so far. So there is + token allocation, pushing a level etc involved. Keep in mind that expansion happens here so what + gets pushing back is not always literally pushing back what we started with. + + In \LUAMETATEX\ we now have a bit different approach. The |scan_mandate_keyword| follows up on + |scan_character| so we have a two step approach. We could actually pass a list of valid keywords + but that would make for a complex function with no real benefits. + +*/ + +halfword tex_scan_character(const char *s, int left_brace, int skip_space, int skip_relax) +{ + halfword save_cur_cs = cur_cs; +// (void) skip_space; /* some day */ + while (1) { + tex_get_x_token(); + switch (cur_cmd) { + case spacer_cmd: + if (skip_space) { + break; + } else { + goto DONE; + } + break; + case relax_cmd: + if (skip_relax) { + break; + } else { + goto DONE; + } + case letter_cmd: + case other_char_cmd: + if (cur_chr <= 'z' && strchr(s, cur_chr)) { + cur_cs = save_cur_cs; + return cur_chr; + } else { + goto DONE; + } + case left_brace_cmd: + if (left_brace) { + cur_cs = save_cur_cs; + return '{'; + } else { + goto DONE; + } + default: + goto DONE; + } + } + DONE: + tex_back_input(cur_tok); + cur_cs = save_cur_cs; + return 0; +} + +void tex_aux_show_keyword_error(const char *s) +{ + tex_handle_error( + normal_error_type, + "Valid keyword expected, likely '%s'", + s, + "You started a keyword but it seems to be an invalid one. The first character(s)\n" + "might give you a clue. You might want to quit unwanted lookahead with \\relax." + ); +} + +/*tex + Scanning an optional keyword starts at the beginning. This means that we can also (for instance) + have a minus or plus sign which means that we have a different loop than with the alternative + that already checked the first character. +*/ + +int tex_scan_optional_keyword(const char *s) +{ + halfword save_cur_cs = cur_cs; + int done = 0; + const char *p = s; + while (*p) { + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + if ((cur_chr == *p) || (cur_chr == *p - 'a' + 'A')) { + if (*(++p)) { + done = 1; + } else { + cur_cs = save_cur_cs; + return 1; + } + } else if (done) { + goto BAD_NEWS; + } else { + // can be a minus or so ! as in \advance\foo -10 + tex_back_input(cur_tok); + cur_cs = save_cur_cs; + return 1; + } + break; + case spacer_cmd: /* normally spaces are not pushed back */ + if (done) { + goto BAD_NEWS; + } else { + break; + } + // fall through + default: + tex_back_input(cur_tok); + if (done) { + /* unless we accept partial keywords */ + goto BAD_NEWS; + } else { + cur_cs = save_cur_cs; + return 0; + } + } + } + BAD_NEWS: + tex_aux_show_keyword_error(s); + cur_cs = save_cur_cs; + return 0; +} + +/*tex + Here we know that the first character(s) matched so we are in the middle of a keyword already + which means a different loop than the previous one. +*/ + +int tex_scan_mandate_keyword(const char *s, int offset) +{ + halfword save_cur_cs = cur_cs; + int done = 0; + // int done = offset > 0; + const char *p = s + offset; /* offset always > 0 so no issue with +/- */ + while (*p) { + tex_get_x_token(); + switch (cur_cmd) { + case letter_cmd: + case other_char_cmd: + if ((cur_chr == *p) || (cur_chr == *p - 'a' + 'A')) { + if (*(++p)) { + done = 1; + } else { + cur_cs = save_cur_cs; + return 1; + } + } else { + goto BAD_NEWS; + } + break; + // case spacer_cmd: /* normally spaces are not pushed back */ + // case relax_cmd: /* normally not, should be option */ + // if (done) { + // back_input(cur_tok); + // goto BAD_NEWS; + // } else { + // break; + // } + // default: + // goto BAD_NEWS; + case spacer_cmd: /* normally spaces are not pushed back */ + if (done) { + goto BAD_NEWS; + } else { + break; + } + // fall through + default: + tex_back_input(cur_tok); + /* unless we accept partial keywords */ + goto BAD_NEWS; + } + } + BAD_NEWS: + tex_aux_show_keyword_error(s); + cur_cs = save_cur_cs; + return 0; +} + +/* + This is the original scanner with push|-|back. It's a matter of choice: we are more restricted + on the one hand and more loose on the other. +*/ + +int tex_scan_keyword(const char *s) +{ + if (*s) { + halfword h = null; + halfword p = null; + halfword save_cur_cs = cur_cs; + int n = 0; + while (*s) { + /*tex Recursion is possible here! */ + tex_get_x_token(); + if ((cur_cmd == letter_cmd || cur_cmd == other_char_cmd) && ((cur_chr == *s) || (cur_chr == *s - 'a' + 'A'))) { + p = tex_store_new_token(p, cur_tok); + if (! h) { + h = p; + } + n++; + s++; + } else if ((p != h) || (cur_cmd != spacer_cmd)) { + tex_back_input(cur_tok); + if (h) { + tex_begin_backed_up_list(h); + } + cur_cs = save_cur_cs; + return 0; + } + } + if (h) { + tex_flush_token_list_head_tail(h, p, n); + } + cur_cs = save_cur_cs; + return 1; + } else { + /*tex but not with newtokenlib zero keyword simply doesn't match */ + return 0 ; + } +} + +int tex_scan_keyword_case_sensitive(const char *s) +{ + if (*s) { + halfword h = null; + halfword p = null; + halfword save_cur_cs = cur_cs; + int n = 0; + while (*s) { + tex_get_x_token(); + if ((cur_cmd == letter_cmd || cur_cmd == other_char_cmd) && (cur_chr == *s)) { + p = tex_store_new_token(p, cur_tok); + if (! h) { + h = p; + } + n++; + s++; + } else if ((p != h) || (cur_cmd != spacer_cmd)) { + tex_back_input(cur_tok); + if (h) { + tex_begin_backed_up_list(h); + } + cur_cs = save_cur_cs; + return 0; + } + } + if (h) { + tex_flush_token_list_head_tail(h, p, n); + } + cur_cs = save_cur_cs; + return 1; + } else { + return 0 ; + } +} + +/*tex + + We can not return |undefined_control_sequence| under some conditions (inside |shift_case|, + for example). This needs thinking. + +*/ + +halfword tex_active_to_cs(int c, int force) +{ + halfword cs = -1; + if (c > 0) { + /*tex This is not that efficient: we can make a helper that doesn't use an alloc. */ + char utfbytes[8] = { '\xEF', '\xBF', '\xBF', 0 }; + aux_uni2string((char *) &utfbytes[3], c); + cs = tex_string_locate(utfbytes, (size_t) utf8_size(c) + 3, force); + } + if (cs < 0) { + cs = tex_string_locate("\xEF\xBF\xBF", 4, force); /*tex Including the zero sentinel. */ + } + return cs; +} + +/*tex + + The heart of \TEX's input mechanism is the |get_next| procedure, which we shall develop in the + next few sections of the program. Perhaps we shouldn't actually call it the \quote {heart}, + however, because it really acts as \TEX's eyes and mouth, reading the source files and + gobbling them up. And it also helps \TEX\ to regurgitate stored token lists that are to be + processed again. + + The main duty of |get_next| is to input one token and to set |cur_cmd| and |cur_chr| to that + token's command code and modifier. Furthermore, if the input token is a control sequence, the + |eqtb| location of that control sequence is stored in |cur_cs|; otherwise |cur_cs| is set to + zero. + + Underlying this simple description is a certain amount of complexity because of all the cases + that need to be handled. However, the inner loop of |get_next| is reasonably short and fast. + + When |get_next| is asked to get the next token of a |\read| line, it sets |cur_cmd = cur_chr + = cur_cs = 0| in the case that no more tokens appear on that line. (There might not be any + tokens at all, if the |end_line_char| has |ignore| as its catcode.) + + The value of |par_loc| is the |eqtb| address of |\par|. This quantity is needed because a + blank line of input is supposed to be exactly equivalent to the appearance of |\par|; we must + set |cur_cs := par_loc| when detecting a blank line. + + Parts |get_next| are executed more often than any other instructions of \TEX. The global + variable |force_eof| is normally |false|; it is set |true| by an |\endinput| command. + |luacstrings| is the number of lua print statements waiting to be input, it is changed by + |lmt_token_call|. + + If the user has set the |pausing| parameter to some positive value, and if nonstop mode has + not been selected, each line of input is displayed on the terminal and the transcript file, + followed by |=>|. \TEX\ waits for a response. If the response is simply |carriage_return|, + the line is accepted as it stands, otherwise the line typed is used instead of the line in the + file. + + We no longer need the following: + +*/ + +// void firm_up_the_line(void) +// { +// ilimit = fileio_state.io_last; +// } + +/*tex + + The other variant gives less clutter in tracing cache usage when profiling and for some files + (like the manual) also a bit of a speedup. Splitting the switch which gives 10 times less Bim + in vallgrind! See the \LUATEX\ source for that code. + + The big switch changes the state if necessary, and |goto switch| if the current character + should be ignored, or |goto reswitch| if the current character changes to another. + + The n-way switch accomplishes the scanning quickly, assuming that a decent \CCODE\ compiler + has translated the code. Note that the numeric values for |mid_line|, |skip_blanks|, and + |new_line| are spaced apart from each other by |max_char_code+1|, so we can add a character's + command code to the state to get a single number that characterizes both. + + Remark: checking performance indicated that this switch was the cause of many branch prediction + errors but changing it to: + + \starttyping + c = istate + cur_cmd; + if (c == (mid_line_state + letter_cmd) || c == (mid_line_state + other_char_cmd)) { + return 1; + } else if (c >= new_line_state) { + switch (c) { + } + } else if (c >= skip_blanks_state) { + switch (c) { + } + } else if (c >= mid_line_state) { + switch (c) { + } + } else { + istate = mid_line_state; + return 1; + } + \stoptyping + + This gives as many prediction errors. So, we can indeed assume that the compiler does the right + job, or that there is simply no other way. + + When a line is finished a space is emited. When a character of type |spacer| gets through, its + character code is changed to |\ =040|. This means that the \ASCII\ codes for tab and space, and + for the space inserted at the end of a line, will be treated alike when macro parameters are + being matched. We do this since such characters are indistinguishable on most computer terminal + displays. + +*/ + +/* + + c = istate + cur_cmd; + if (c == (mid_line_state + letter_cmd) || c == (mid_line_state + other_char_cmd)) { + return 1; + } else if (c >= new_line_state) { + .... + } + +*/ + +/*tex + + This trick has been dropped when the wrapup mechanism had proven to be useful. The idea was + to backport this to \LUATEX\ but some other \PDFTEX\ compatible parstuff made it there and + backporting par related features becomes too messy. + + \starttyping + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1; + cur_cs = lmt_token_state.line_par_loc; + cur_cmd = eq_type(cur_cs); + if (cur_cmd == undefined_cs_cmd) { + cur_cs = lmt_token_state.par_loc; + cur_cmd = eq_type(cur_cs); + } + cur_chr = eq_value(cur_cs); + \stoptyping + +*/ + +static int tex_aux_get_next_file(void) +{ + SWITCH: + if (lmt_input_state.cur_input.loc <= lmt_input_state.cur_input.limit) { + /*tex current line not yet finished */ + cur_chr = get_unichar_from_buffer(&lmt_input_state.cur_input.loc); + RESWITCH: + if (lmt_input_state.cur_input.cattable == no_catcode_table_preset) { + /* happens seldom: detokenized line */ + cur_cmd = cur_chr == ' ' ? 10 : 12; + } else { + cur_cmd = tex_aux_the_cat_code(cur_chr); + } + switch (lmt_input_state.cur_input.state + cur_cmd) { + case mid_line_state + ignore_cmd: + case skip_blanks_state + ignore_cmd: + case new_line_state + ignore_cmd: + case skip_blanks_state + spacer_cmd: + case new_line_state + spacer_cmd: + /*tex Cases where character is ignored. */ + goto SWITCH; + case mid_line_state + escape_cmd: + case new_line_state + escape_cmd: + case skip_blanks_state + escape_cmd: + /*tex Scan a control sequence. */ + lmt_input_state.cur_input.state = (unsigned char) tex_aux_scan_control_sequence(); + break; + case mid_line_state + active_char_cmd: + case new_line_state + active_char_cmd: + case skip_blanks_state + active_char_cmd: + /*tex Process an active-character. */ + cur_cs = tex_active_to_cs(cur_chr, ! lmt_hash_state.no_new_cs); + cur_cmd = eq_type(cur_cs); + cur_chr = eq_value(cur_cs); + lmt_input_state.cur_input.state = mid_line_state; + break; + case mid_line_state + superscript_cmd: + case new_line_state + superscript_cmd: + case skip_blanks_state + superscript_cmd: + /*tex We need to check for multiple ^: + (0) always check for ^^ ^^^^ ^^^^^^^ + (1) only check in text mode + (*) never + */ + if (sup_mark_mode_par) { + if (sup_mark_mode_par == 1 && cur_mode != mmode && tex_aux_process_sup_mark()) { + goto RESWITCH; + } + } else if (tex_aux_process_sup_mark()) { + goto RESWITCH; + } else { + /*tex + We provide prescripts and shifted script in math mode and avoid fance |^| + processing in text mode (which is what we do in \CONTEXT). + */ + } + lmt_input_state.cur_input.state = mid_line_state; + break; + case mid_line_state + invalid_char_cmd: + case new_line_state + invalid_char_cmd: + case skip_blanks_state + invalid_char_cmd: + /*tex Decry the invalid character and |goto restart|. */ + tex_aux_invalid_character_error(); + /*tex Because state may be |token_list| now: */ + return 0; + case mid_line_state + spacer_cmd: + /*tex Enter |skip_blanks| state, emit a space. */ + lmt_input_state.cur_input.state = skip_blanks_state; + cur_chr = ' '; + break; + case mid_line_state + end_line_cmd: + /*tex Finish the line. See note above about dropped |\linepar|. */ + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1; + cur_cmd = spacer_cmd; + cur_chr = ' '; + break; + case skip_blanks_state + end_line_cmd: + case mid_line_state + comment_cmd: + case new_line_state + comment_cmd: + case skip_blanks_state + comment_cmd: + /*tex Finish line, |goto switch|; */ + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1; + goto SWITCH; + case new_line_state + end_line_cmd: + if (! auto_paragraph_mode(auto_paragraph_go_on)) { + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.limit + 1; + } + /*tex Finish line, emit a |\par|; */ + if (auto_paragraph_mode(auto_paragraph_text)) { + cur_cs = null; + cur_cmd = end_paragraph_cmd; + cur_chr = new_line_end_paragraph_code; + // cur_chr = normal_end_paragraph_code; + } else { + cur_cs = lmt_token_state.par_loc; + cur_cmd = eq_type(cur_cs); + cur_chr = eq_value(cur_cs); + } + break; + case skip_blanks_state + left_brace_cmd: + case new_line_state + left_brace_cmd: + lmt_input_state.cur_input.state = mid_line_state; + lmt_input_state.align_state++; + break; + case mid_line_state + left_brace_cmd: + lmt_input_state.align_state++; + break; + case skip_blanks_state + right_brace_cmd: + case new_line_state + right_brace_cmd: + lmt_input_state.cur_input.state = mid_line_state; + lmt_input_state.align_state--; + break; + case mid_line_state + right_brace_cmd: + lmt_input_state.align_state--; + break; + case mid_line_state + math_shift_cmd: + case mid_line_state + alignment_tab_cmd: + case mid_line_state + parameter_cmd: + case mid_line_state + subscript_cmd: + case mid_line_state + letter_cmd: + case mid_line_state + other_char_cmd: + break; + /* + case skip_blanks_state + math_shift_cmd: + case skip_blanks_state + tab_mark_cmd: + case skip_blanks_state + mac_param_cmd: + case skip_blanks_state + sub_mark_cmd: + case skip_blanks_state + letter_cmd: + case skip_blanks_state + other_char_cmd: + case new_line_state + math_shift_cmd: + case new_line_state + tab_mark_cmd: + case new_line_state + mac_param_cmd: + case new_line_state + sub_mark_cmd: + case new_line_state + letter_cmd: + case new_line_state + other_char_cmd: + */ + default: + lmt_input_state.cur_input.state = mid_line_state; + break; + } + } else { + if (! io_token_input(lmt_input_state.cur_input.name)) { + lmt_input_state.cur_input.state = new_line_state; + } + /*tex + + Move to next line of file, or |goto restart| if there is no next line, or |return| if a + |\read| line has finished. + + */ + do { + next_line_retval r = tex_aux_next_line(); + if (r == next_line_restart) { + /*tex This happens more often. */ + return 0; + } else if (r == next_line_return) { + return 1; + } + } while (0); + /* check_interrupt(); */ + goto SWITCH; + } + return 1; +} + +/*tex + + Notice that a code like |^^8| becomes |x| if not followed by a hex digit. We only support a + limited set: + + \starttyping + ^^^^^^XXXXXX + ^^^^XXXXXX + ^^XX ^^<char> + \stoptyping + +*/ + +# define is_hex(a) ((a >= '0' && a <= '9') || (a >= 'a' && a <= 'f')) + + inline static halfword tex_aux_two_hex_to_cur_chr(int c1, int c2) + { + return + 0x10 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10) + + 0x01 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10); + } + + inline static halfword tex_aux_four_hex_to_cur_chr(int c1, int c2,int c3, int c4) + { + return + 0x1000 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10) + + 0x0100 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10) + + 0x0010 * (c3 <= '9' ? c3 - '0' : c3 - 'a' + 10) + + 0x0001 * (c4 <= '9' ? c4 - '0' : c4 - 'a' + 10); +} + +inline static halfword tex_aux_six_hex_to_cur_chr(int c1, int c2, int c3, int c4, int c5, int c6) +{ + return + 0x100000 * (c1 <= '9' ? c1 - '0' : c1 - 'a' + 10) + + 0x010000 * (c2 <= '9' ? c2 - '0' : c2 - 'a' + 10) + + 0x001000 * (c3 <= '9' ? c3 - '0' : c3 - 'a' + 10) + + 0x000100 * (c4 <= '9' ? c4 - '0' : c4 - 'a' + 10) + + 0x000010 * (c5 <= '9' ? c5 - '0' : c5 - 'a' + 10) + + 0x000001 * (c6 <= '9' ? c6 - '0' : c6 - 'a' + 10); + +} + +static int tex_aux_process_sup_mark(void) +{ + if (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc]) { + if (lmt_input_state.cur_input.loc < lmt_input_state.cur_input.limit) { + if ((cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1]) && (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 2])) { + if ((cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 3]) && (cur_chr == lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 4])) { + if ((lmt_input_state.cur_input.loc + 10) <= lmt_input_state.cur_input.limit) { + /*tex |^^^^^^XXXXXX| */ + int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 5]; + int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 6]; + int c3 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 7]; + int c4 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 8]; + int c5 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 9]; + int c6 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 10]; + if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) { + lmt_input_state.cur_input.loc += 11; + cur_chr = tex_aux_six_hex_to_cur_chr(c1, c2, c3, c4, c5, c6); + return 1; + } else { + tex_handle_error( + normal_error_type, + "^^^^^^ needs six hex digits", + NULL + ); + } + } else { + tex_handle_error( + normal_error_type, + "^^^^^^ needs six hex digits, end of input", + NULL + ); + } + } else if ((lmt_input_state.cur_input.loc + 6) <= lmt_input_state.cur_input.limit) { + /*tex |^^^^XXXX| */ + int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 3]; + int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 4]; + int c3 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 5]; + int c4 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 6]; + if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) { + lmt_input_state.cur_input.loc += 7; + cur_chr = tex_aux_four_hex_to_cur_chr(c1, c2, c3, c4); + return 1; + } else { + tex_handle_error( + normal_error_type, + "^^^^ needs four hex digits", + NULL + ); + } + } else { + tex_handle_error( + normal_error_type, + "^^^^ needs four hex digits, end of input", + NULL + ); + } + } else if ((lmt_input_state.cur_input.loc + 2) <= lmt_input_state.cur_input.limit) { + /*tex |^^XX| */ + int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1]; + int c2 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 2]; + if (is_hex(c1) && is_hex(c2)) { + lmt_input_state.cur_input.loc += 3; + cur_chr = tex_aux_two_hex_to_cur_chr(c1, c2); + return 1; + } + } + /*tex The single character case: */ + { + int c1 = lmt_fileio_state.io_buffer[lmt_input_state.cur_input.loc + 1]; + if (c1 < 0200) { + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.loc + 2; + // if (is_hex(c1) && (iloc <= ilimit)) { + // int c2 = fileio_state.io_buffer[iloc]; + // if (is_hex(c2)) { + // ++iloc; + // cur_chr = two_hex_to_cur_chr(c1, c2); + // return 1; + // } + // } + // /*tex The somewhat odd cases, often special control characters: */ + cur_chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100); + return 1; + } + } + } + } + return 0; +} + +/*tex + + Control sequence names are scanned only when they appear in some line of a file. Once they have + been scanned the first time, their |eqtb| location serves as a unique identification, so \TEX\ + doesn't need to refer to the original name any more except when it prints the equivalent in + symbolic form. + + The program that scans a control sequence has been written carefully in order to avoid the + blowups that might otherwise occur if a malicious user tried something like |\catcode'15 = 0|. + The algorithm might look at |buffer[ilimit + 1]|, but it never looks at |buffer[ilimit + 2]|. + + If expanded characters like |^^A| or |^^df| appear in or just following a control sequence name, + they are converted to single characters in the buffer and the process is repeated, slowly but + surely. + +*/ + +/*tex + + Whenever we reach the following piece of code, we will have |cur_chr = buffer[k - 1]| and |k <= + ilimit + 1| and |cat = get_cat_code(cat_code_table, cur_chr)|. If an expanded code like |^^A| or + |^^df| appears in |buffer[(k - 1) .. (k + 1)]| or |buffer[(k - 1) .. (k + 2)]|, we will store + the corresponding code in |buffer[k - 1]| and shift the rest of the buffer left two or three + places. + +*/ + +static int tex_aux_check_expanded_code(int *kk, halfword *chr) +{ + if (sup_mark_mode_par > 1 || (sup_mark_mode_par == 1 && cur_mode == mmode)) { + return 0; + } else { + int k = *kk; + /* chr is the ^ character or an equivalent one */ + if (lmt_fileio_state.io_buffer[k] == *chr && k < lmt_input_state.cur_input.limit) { + int d = 1; + int l; + if ((*chr == lmt_fileio_state.io_buffer[k + 1]) && (*chr == lmt_fileio_state.io_buffer[k + 2])) { + if ((*chr == lmt_fileio_state.io_buffer[k + 3]) && (*chr == lmt_fileio_state.io_buffer[k + 4])) { + if ((k + 10) <= lmt_input_state.cur_input.limit) { + int c1 = lmt_fileio_state.io_buffer[k + 6 - 1]; + int c2 = lmt_fileio_state.io_buffer[k + 6 ]; + int c3 = lmt_fileio_state.io_buffer[k + 6 + 1]; + int c4 = lmt_fileio_state.io_buffer[k + 6 + 2]; + int c5 = lmt_fileio_state.io_buffer[k + 6 + 3]; + int c6 = lmt_fileio_state.io_buffer[k + 6 + 4]; + if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4) && is_hex(c5) && is_hex(c6)) { + d = 6; + *chr = tex_aux_six_hex_to_cur_chr(c1, c2, c3, c4, c5, c6); + } else { + tex_handle_error( + normal_error_type, + "^^^^^^ needs six hex digits", + NULL + ); + } + } else { + tex_handle_error( + normal_error_type, + "^^^^^^ needs six hex digits, end of input", + NULL + ); + } + } else if ((k + 6) <= lmt_input_state.cur_input.limit) { + int c1 = lmt_fileio_state.io_buffer[k + 4 - 1]; + int c2 = lmt_fileio_state.io_buffer[k + 4 ]; + int c3 = lmt_fileio_state.io_buffer[k + 4 + 1]; + int c4 = lmt_fileio_state.io_buffer[k + 4 + 2]; + if (is_hex(c1) && is_hex(c2) && is_hex(c3) && is_hex(c4)) { + d = 4; + *chr = tex_aux_four_hex_to_cur_chr(c1, c2, c3, c4); + } else { + tex_handle_error( + normal_error_type, + "^^^^ needs four hex digits", + NULL + ); + } + } else { + tex_handle_error( + normal_error_type, + "^^^^ needs four hex digits, end of input", + NULL + ); + } + } else { + int c1 = lmt_fileio_state.io_buffer[k + 1]; + if (c1 < 0200) { /* really ? */ + d = 1; + if (is_hex(c1) && (k + 2) <= lmt_input_state.cur_input.limit) { + int c2 = lmt_fileio_state.io_buffer[k + 2]; + if (is_hex(c2)) { + d = 2; + *chr = tex_aux_two_hex_to_cur_chr(c1, c2); + } else { + *chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100); + } + } else { + *chr = (c1 < 0100 ? c1 + 0100 : c1 - 0100); + } + } + } + if (d > 2) { + d = 2 * d - 1; + } else { + d++; + } + if (*chr <= 0x7F) { + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) *chr; + } else if (*chr <= 0x7FF) { + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xC0 + *chr / 0x40); + k++; + d--; + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + *chr % 0x40); + } else if (*chr <= 0xFFFF) { + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xE0 + *chr / 0x1000); + k++; + d--; + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x1000) / 0x40); + k++; + d--; + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x1000) % 0x40); + } else { + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0xF0 + *chr / 0x40000); + k++; + d--; + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + (*chr % 0x40000) / 0x1000); + k++; + d--; + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + ((*chr % 0x40000) % 0x1000) / 0x40); + k++; + d--; + lmt_fileio_state.io_buffer[k - 1] = (unsigned char) (0x80 + ((*chr % 0x40000) % 0x1000) % 0x40); + } + l = k; + lmt_input_state.cur_input.limit -= d; + while (l <= lmt_input_state.cur_input.limit) { + lmt_fileio_state.io_buffer[l] = lmt_fileio_state.io_buffer[l + d]; + l++; + } + *kk = k; + cur_chr = *chr; /* hm */ + return 1; + } else { + return 0; + } + } +} + +static int tex_aux_scan_control_sequence(void) +{ + int state = mid_line_state; + if (lmt_input_state.cur_input.loc > lmt_input_state.cur_input.limit) { + /*tex |state| is irrelevant in this case. */ + cur_cs = null_cs; + } else { + /*tex |cat_code(cur_chr)|, usually: */ + while (1) { + int loc = lmt_input_state.cur_input.loc; + halfword chr = get_unichar_from_buffer(&loc); + halfword cat = tex_aux_the_cat_code(chr); + if (cat != letter_cmd || loc > lmt_input_state.cur_input.limit) { + if (cat == spacer_cmd) { + state = skip_blanks_state; + } else { + state = mid_line_state; + if (cat == superscript_cmd && tex_aux_check_expanded_code(&loc, &chr)) { + continue; + } + } + // state = cat == spacer_cmd ? skip_blanks_state : mid_line_state; + // /*tex If an expanded \unknown */ + // if (cat == sup_mark_cmd && check_expanded_code(&loc, chr)) { + // continue; + // } + } else { + state = skip_blanks_state; + do { + chr = get_unichar_from_buffer(&loc); + cat = tex_aux_the_cat_code(chr); + } while (cat == letter_cmd && loc <= lmt_input_state.cur_input.limit); + /*tex If an expanded \unknown */ + if (cat == superscript_cmd && tex_aux_check_expanded_code(&loc, &chr)) { + continue; + } else if (cat != letter_cmd) { + /*tex Backtrack one character which can be \UTF. */ + if (chr <= 0x7F) { + loc -= 1; /* in most cases */ + } else if (chr > 0xFFFF) { + loc -= 4; + } else if (chr > 0x7FF) { + loc -= 3; + } else /* if (cur_chr > 0x7F) */ { + loc -= 2; + } + /*tex Now |k| points to first nonletter. */ + } + } + cur_cs = tex_id_locate(lmt_input_state.cur_input.loc, loc - lmt_input_state.cur_input.loc, ! lmt_hash_state.no_new_cs); + lmt_input_state.cur_input.loc = loc; + break; + } + } + cur_cmd = eq_type(cur_cs); + cur_chr = eq_value(cur_cs); + return state; +} + +/*tex + + All of the easy branches of |get_next| have now been taken care of. There is one more branch. + Conversely, the |file_warning| procedure is invoked when a file ends and some groups entered or + conditionals started while reading from that file are still incomplete. + +*/ + +static void tex_aux_file_warning(void) +{ + halfword cond_ptr = lmt_save_state.save_stack_data.ptr; /*tex saved value of |save_ptr| or |cond_ptr| */ + int cur_if = cur_group; /*tex saved value of |cur_group| or |cur_if| */ + int cur_unless = 0; + int if_step = 0; + int if_unless = 0; + int if_limit = cur_level; /*tex saved value of |cur_level| or |if_limit| */ + int if_line = 0; /*tex saved value of |if_line| */ + lmt_save_state.save_stack_data.ptr = cur_boundary; + while (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].group != lmt_save_state.save_stack_data.ptr) { + --cur_level; + tex_print_nlp(); + tex_print_format("Warning: end of file when %G is incomplete", 1); + cur_group = save_level(lmt_save_state.save_stack_data.ptr); + lmt_save_state.save_stack_data.ptr = save_value(lmt_save_state.save_stack_data.ptr); + } + /*tex Restore old values. */ + lmt_save_state.save_stack_data.ptr = cond_ptr; + cur_level = (quarterword) if_limit; + cur_group = (quarterword) cur_if; + cond_ptr = lmt_condition_state.cond_ptr; + cur_if = lmt_condition_state.cur_if; + cur_unless = lmt_condition_state.cur_unless; + if_step = lmt_condition_state.if_step; + if_unless = lmt_condition_state.if_unless; + if_limit = lmt_condition_state.if_limit; + if_line = lmt_condition_state.if_line; + while (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].if_ptr != lmt_condition_state.cond_ptr) { + /* todo, more info */ + tex_print_nlp(); + tex_print_format("Warning: end of file when %C", if_test_cmd, lmt_condition_state.cur_if); + if (lmt_condition_state.if_limit == fi_code) { + tex_print_str_esc("else"); + } + if (lmt_condition_state.if_line) { + tex_print_format(" entered on line %i", lmt_condition_state.if_line); + } + tex_print_str(" is incomplete"); + lmt_condition_state.cur_if = if_limit_subtype(lmt_condition_state.cond_ptr); + lmt_condition_state.cur_unless = if_limit_unless(lmt_condition_state.cond_ptr); + lmt_condition_state.if_step = if_limit_step(lmt_condition_state.cond_ptr); + lmt_condition_state.if_unless = if_limit_stepunless(lmt_condition_state.cond_ptr); + lmt_condition_state.if_limit = if_limit_type(lmt_condition_state.cond_ptr); + lmt_condition_state.if_line = if_limit_line(lmt_condition_state.cond_ptr); + lmt_condition_state.cond_ptr = node_next(lmt_condition_state.cond_ptr); + } + /*tex restore old values */ + lmt_condition_state.cond_ptr = cond_ptr; + lmt_condition_state.cur_if = cur_if; + lmt_condition_state.cur_unless = cur_unless; + lmt_condition_state.if_step = if_step; + lmt_condition_state.if_unless = if_unless; + lmt_condition_state.if_limit = if_limit; + lmt_condition_state.if_line = if_line; + tex_print_nlp(); + if (tracing_nesting_par > 1) { + tex_show_context(); + } + if (lmt_error_state.history == spotless) { + lmt_error_state.history = warning_issued; + } +} + +static void tex_aux_check_validity(void) +{ + switch (lmt_input_state.scanner_status) { + case scanner_is_normal: + break; + case scanner_is_skipping: + tex_handle_error( + condition_error_type, + "The file ended while I was skipping conditional text.", + "This kind of error happens when you say '\\if...' and forget the\n" + "matching '\\fi'. It can also be that you use '\\orelse' or '\\orunless\n'" + "in the wrong way. Or maybe a forbidden control sequence was encountered." + ); + break; + case scanner_is_defining: + tex_handle_error(runaway_error_type, "The file ended when scanning a definition.", NULL); + break; + case scanner_is_matching: + tex_handle_error(runaway_error_type, "The file ended when scanning an argument.", NULL); + break; + case scanner_is_tolerant: + break; + case scanner_is_aligning: + tex_handle_error(runaway_error_type, "The file ended when scanning an alignment preamble.", NULL); + break; + case scanner_is_absorbing: + tex_handle_error(runaway_error_type, "The file ended when absorbing something.", NULL); + break; + } +} + +static next_line_retval tex_aux_next_line(void) +{ + if (lmt_input_state.cur_input.name > io_initial_input_code) { + /*tex Read next line of file into |buffer|, or |goto restart| if the file has ended. */ + unsigned inhibit_eol = 0; + ++lmt_input_state.input_line; + lmt_fileio_state.io_first = lmt_input_state.cur_input.start; + if (! lmt_token_state.force_eof) { + unsigned force_eol = 0; + switch (lmt_input_state.cur_input.name) { + case io_lua_input_code: + { + halfword n = null; + int cattable = 0; + int partial = 0; + int finalline = 0; + int t = lmt_cstring_input(&n, &cattable, &partial, &finalline); + switch (t) { + case eof_tex_input: + lmt_token_state.force_eof = 1; + break; + case string_tex_input: + /*tex string */ + lmt_input_state.cur_input.limit = lmt_fileio_state.io_last; /*tex Was |firm_up_the_line();|. */ + lmt_input_state.cur_input.cattable = (short) cattable; + lmt_input_state.cur_input.partial = (signed char) partial; + if (finalline || partial || cattable == no_catcode_table_preset) { + inhibit_eol = 1; + } + if (! partial) { + lmt_input_state.cur_input.state = new_line_state; + } + break; + case token_tex_input: + /*tex token */ + if (n >= cs_token_flag && eq_type(n - cs_token_flag) == input_cmd && eq_value(n - cs_token_flag) == end_of_input_code && lmt_input_state.cur_input.index > 0) { + tex_end_file_reading(); + } + tex_back_input(n); + return next_line_restart; + case token_list_tex_input: + /*tex token */ + tex_begin_backed_up_list(n); + return next_line_restart; + case node_tex_input: + /*tex node */ + if (node_token_overflow(n)) { + tex_back_input(token_val(ignore_cmd, node_token_lsb(n))); + tex_reinsert_token(token_val(node_cmd, node_token_msb(n))); + return next_line_restart; + } else { + /*tex |0x10FFFF == 1114111| */ + tex_back_input(token_val(node_cmd, n)); + return next_line_restart; + } + default: + lmt_token_state.force_eof = 1; + break; + } + break; + } + case io_token_input_code: + case io_token_eof_input_code: + { + /* can be simplified but room for extensions now */ + halfword n = null; + int cattable = 0; + int partial = 0; + int finalline = 0; + int t = lmt_cstring_input(&n, &cattable, &partial, &finalline); + switch (t) { + case eof_tex_input: + lmt_token_state.force_eof = 1; + if (lmt_input_state.cur_input.name == io_token_eof_input_code && every_eof_par) { + force_eol = 1; + } + break; + case string_tex_input: + /*tex string */ + lmt_input_state.cur_input.limit = lmt_fileio_state.io_last; /*tex Was |firm_up_the_line();|. */ + lmt_input_state.cur_input.cattable = (short) cattable; + lmt_input_state.cur_input.partial = (signed char) partial; + inhibit_eol = lmt_input_state.cur_input.name != io_token_eof_input_code; + if (! partial) { + lmt_input_state.cur_input.state = new_line_state; + } + break; + default: + lmt_token_state.force_eof = 1; + break; + } + break; + } + case io_tex_macro_code: + /* what */ + default: + if (tex_lua_input_ln()) { + /*tex Not end of file, set |ilimit|. */ + lmt_input_state.cur_input.limit = lmt_fileio_state.io_last; /*tex Was |firm_up_the_line();|. */ + lmt_input_state.cur_input.cattable = default_catcode_table_preset; + } else if (every_eof_par && (! lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen)) { + force_eol = 1; + } else { + tex_aux_check_validity(); + lmt_token_state.force_eof = 1; + } + break; + } + if (force_eol) { + lmt_input_state.cur_input.limit = lmt_fileio_state.io_first - 1; + /* tex Fake one empty line. */ + lmt_input_state.in_stack[lmt_input_state.cur_input.index].end_of_file_seen = 1; + tex_begin_token_list(every_eof_par, every_eof_text); + return next_line_restart; + } + } + if (lmt_token_state.force_eof) { + if (tracing_nesting_par > 0) { + if ((lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].group != cur_boundary) || (lmt_input_state.in_stack[lmt_input_state.in_stack_data.ptr].if_ptr != lmt_condition_state.cond_ptr)) { + if (! io_token_input(lmt_input_state.cur_input.name)) { + /*tex Give warning for some unfinished groups and/or conditionals. */ + tex_aux_file_warning(); + } + } + } + if (io_file_input(lmt_input_state.cur_input.name)) { + tex_report_stop_file(); + --lmt_input_state.open_files; + } + lmt_token_state.force_eof = 0; + tex_end_file_reading(); + return next_line_restart; + } else { + if (inhibit_eol || end_line_char_inactive) { + lmt_input_state.cur_input.limit--; + } else { + lmt_fileio_state.io_buffer[lmt_input_state.cur_input.limit] = (unsigned char) end_line_char_par; + } + lmt_fileio_state.io_first = lmt_input_state.cur_input.limit + 1; + lmt_input_state.cur_input.loc = lmt_input_state.cur_input.start; + /*tex We're ready to read. */ + } + } else if (lmt_input_state.input_stack_data.ptr > 0) { + cur_cmd = 0; + cur_chr = 0; + return next_line_return; + } else { + /*tex A somewhat weird check: */ + switch (lmt_print_state.selector) { + case no_print_selector_code: + case terminal_selector_code: + tex_open_log_file(); + break; + } + tex_handle_error(eof_error_type, "end of file encountered", NULL); + /*tex Just in case it is not handled in a callback: */ + if (lmt_error_state.interaction > nonstop_mode) { + tex_fatal_error("aborting job"); + } + } + return next_line_ok; +} + +/*tex + Let's consider now what happens when |get_next| is looking at a token list. +*/ + +static int tex_aux_get_next_tokenlist(void) +{ + halfword t = token_info(lmt_input_state.cur_input.loc); + /*tex Move to next. */ + lmt_input_state.cur_input.loc = token_link(lmt_input_state.cur_input.loc); + if (t >= cs_token_flag) { + /*tex A control sequence token */ + cur_cs = t - cs_token_flag; + cur_cmd = eq_type(cur_cs); + if (cur_cmd == deep_frozen_dont_expand_cmd) { + /*tex + + Get the next token, suppressing expansion. The present point in the program is + reached only when the |expand| routine has inserted a special marker into the + input. In this special case, |token_info(iloc)| is known to be a control sequence + token, and |token_link(iloc) = null|. + + */ + cur_cs = token_info(lmt_input_state.cur_input.loc) - cs_token_flag; + lmt_input_state.cur_input.loc = null; + cur_cmd = eq_type(cur_cs); + if (cur_cmd > max_command_cmd) { + cur_cmd = relax_cmd; + // cur_chr = no_expand_flag; + cur_chr = no_expand_relax_code; + return 1; + } + } + cur_chr = eq_value(cur_cs); + } else { + cur_cmd = token_cmd(t); + cur_chr = token_chr(t); + switch (cur_cmd) { + case left_brace_cmd: + lmt_input_state.align_state++; + break; + case right_brace_cmd: + lmt_input_state.align_state--; + break; + case parameter_reference_cmd: + /*tex Insert macro parameter and |goto restart|. */ + tex_begin_parameter_list(lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + cur_chr - 1]); + return 0; + } + } + return 1; +} + +/*tex + + Now we're ready to take the plunge into |get_next| itself. Parts of this routine are executed + more often than any other instructions of \TEX. This sets |cur_cmd|, |cur_chr|, |cur_cs| to + next token. + + Handling alignments is interwoven because there we switch between constructing cells and rows + (node lists) based on templates that are token lists. This is why in several places we find + checks for |align_state|. + +*/ + +void tex_get_next(void) +{ + while (1) { + cur_cs = 0; + if (lmt_input_state.cur_input.state != token_list_state) { + /*tex Input from external file, |goto restart| if no input found. */ + if (! tex_aux_get_next_file()) { + continue; + } else { + /*tex Check align state later on! */ + } + } else if (! lmt_input_state.cur_input.loc) { + /*tex List exhausted, resume previous level. */ + tex_end_token_list(); + continue; + } else if (! tex_aux_get_next_tokenlist()) { + /*tex Parameter needs to be expanded. */ + continue; + } + if ((lmt_input_state.align_state == 0) && (cur_cmd == alignment_tab_cmd || cur_cmd == alignment_cmd)) { + /*tex If an alignment entry has just ended, take appropriate action. */ + tex_insert_alignment_template(); + continue; + } else { + break; + } + } +} + +/*tex + + Since |get_next| is used so frequently in \TEX, it is convenient to define three related + procedures that do a little more: + + \startitemize + \startitem + |get_token| not only sets |cur_cmd| and |cur_chr|, it also sets |cur_tok|, a packed + halfword version of the current token. + \stopitem + \startitem + |get_x_token|, meaning \quote {get an expanded token}, is like |get_token|, but if the + current token turns out to be a user-defined control sequence (i.e., a macro call), or + a conditional, or something like |\topmark| or |\expandafter| or |\csname|, it is + eliminated from the input by beginning the expansion of the macro or the evaluation of + the conditional. + \stopitem + \startitem + |x_token| is like |get_x_token| except that it assumes that |get_next| has already been + called. + \stopitem + \stopitemize + + In fact, these three procedures account for almost every use of |get_next|. No new control + sequences will be defined except during a call of |get_token|, or when |\csname| compresses a + token list, because |no_new_control_sequence| is always |true| at other times. + + This sets |cur_cmd|, |cur_chr|, |cur_tok|. For convenience we also return the token because in + some places we store it and then some direct assignment looks a bit nicer. + +*/ + +halfword tex_get_token(void) +{ + lmt_hash_state.no_new_cs = 0; + tex_get_next(); + lmt_hash_state.no_new_cs = 1; + cur_tok = cur_cs ? cs_token_flag + cur_cs : token_val(cur_cmd, cur_chr); + return cur_tok; +} + +/*tex This changes the string |s| to a token list. */ + +halfword tex_string_to_toks(const char *ss) +{ + const char *s = ss; + const char *se = ss + strlen(s); + /*tex tail of the token list */ + halfword h = null; + halfword p = null; + /*tex new node being added to the token list via |store_new_token| */ + while (s < se) { + halfword t = (halfword) aux_str2uni((const unsigned char *) s); + s += utf8_size(t); + if (t == ' ') { + t = space_token; + } else { + t += other_token; + } + p = tex_store_new_token(p, t); + if (! h) { + h = p; + } + } + return h; +} + +/*tex + + The token lists for macros and for other things like |\mark| and |\output| and |\write| are + produced by a procedure called |scan_toks|. + + Before we get into the details of |scan_toks|, let's consider a much simpler task, that of + converting the current string into a token list. The |str_toks| function does this; it + classifies spaces as type |spacer| and everything else as type |other_char|. + + The token list created by |str_toks| begins at |link(temp_token_head)| and ends at the value + |p| that is returned. If |p = temp_token_head|, the list is empty. + + |lua_str_toks| is almost identical, but it also escapes the three symbols that \LUA\ considers + special while scanning a literal string. + + This changes the string |str_pool[b .. pool_ptr]| to a token list: + +*/ + +static halfword lmt_str_toks(lstring b) /* returns head */ +{ + /*tex index into string */ + unsigned char *k = (unsigned char *) b.s; + /*tex tail of the token list */ + halfword h = null; + halfword p = null; + while (k < (unsigned char *) b.s + b.l) { + /*tex token being appended */ + halfword t = aux_str2uni(k); + k += utf8_size(t); + if (t == ' ') { + t = space_token; + } else { + if ((t == '\\') || (t == '"') || (t == '\'') || (t == 10) || (t == 13)) { + p = tex_store_new_token(p, escape_token); + if (t == 10) { + t = 'n'; + } else if (t == 13) { + t = 'r'; + } + } + t += other_token; + } + p = tex_store_new_token(p, t); + if (! h) { + h = p; + } + } + return h; +} + +/*tex + + Incidentally, the main reason for wanting |str_toks| is the function |the_toks|, which has + similar input/output characteristics. This changes the string |str_pool[b .. pool_ptr]| to a + token list: + +*/ + +halfword tex_str_toks(lstring s, halfword *tail) +{ + halfword h = null; + halfword p = null; + if (s.s) { + unsigned char *k = s.s; + unsigned char *l = k + s.l; + while (k < l) { + halfword t = aux_str2uni(k); + if (t == ' ') { + k += 1; + t = space_token; + } else { + k += utf8_size(t); + t += other_token; + } + p = tex_store_new_token(p, t); + if (! h) { + h = p; + } + } + } + if (tail) { + *tail = null; + } + return h; +} + +halfword tex_cur_str_toks(halfword *tail) +{ + halfword h = null; + halfword p = null; + unsigned char *k = (unsigned char *) lmt_string_pool_state.string_temp; + if (k) { + unsigned char *l = k + lmt_string_pool_state.string_temp_top; + /*tex tail of the token list */ + while (k < l) { + /*tex token being appended */ + halfword t = aux_str2uni(k); + if (t == ' ') { + k += 1; + t = space_token; + } else { + k += utf8_size(t); + t += other_token; + } + p = tex_store_new_token(p, t); + if (! h) { + h = p; + } + } + } + tex_reset_cur_string(); + if (tail) { + *tail = p; + } + return h; +} + +/*tex + + Most of the converter is similar to the one I made for macro so at some point I can make a + helper; also todo: there is no need to go through the pool. + +*/ + +/*tex Change the string |str_pool[b..pool_ptr]| to a token list. */ + +halfword tex_str_scan_toks(int ct, lstring ls) +{ + /*tex index into string */ + unsigned char *k = ls.s; + unsigned char *l = k + ls.l; + /*tex tail of the token list */ + halfword h = null; + halfword p = null; + while (k < l) { + int cc; + /*tex token being appended */ + halfword t = aux_str2uni(k); + k += utf8_size(t); + cc = tex_get_cat_code(ct, t); + if (cc == 0) { + /*tex We have a potential control sequence so we check for it. */ + int lname = 0 ; + int s = 0 ; + int c = 0 ; + unsigned char *name = k ; + while (k < l) { + t = (halfword) aux_str2uni((const unsigned char *) k); + s = utf8_size(t); + c = tex_get_cat_code(ct,t); + if (c == 11) { + k += s ; + lname += s ; + } else if (c == 10) { + /*tex We ignore a trailing space like normal scanning does. */ + k += s ; + break ; + } else { + break ; + } + } + if (s > 0) { + /*tex We have a potential |\cs|. */ + halfword cs = tex_string_locate((const char *) name, lname, 0); + if (cs == undefined_control_sequence) { + /*tex Let's play safe and backtrack. */ + t += cc * (1<<21); + k = name ; + } else { + t = cs_token_flag + cs; + } + } else { + /*tex + Just a character with some meaning, so |\unknown| becomes effectively + |\unknown| assuming that |\\| has some useful meaning of course. + */ + t += cc * (1<<21); + k = name ; + } + } else { + /*tex + Whatever token, so for instance $x^2$ just works given a \TEX\ catcode regime. + */ + t += cc * (1<<21); + } + p = tex_store_new_token(p, t); + if (! h) { + h = p; + } + } + return h; +} + +/* these two can be combined, then we can avoid the h check */ + +static void tex_aux_set_toks_register(halfword loc, singleword cmd, halfword t, int g) +{ + halfword ref = get_reference_token(); + set_token_link(ref, t); + tex_define((g > 0) ? global_flag_bit : 0, loc, cmd == internal_toks_cmd ? internal_toks_reference_cmd : register_toks_reference_cmd, ref); +} + +static void tex_aux_append_copied_toks_list(halfword loc, singleword cmd, int g, halfword s, halfword t) +{ + halfword ref = get_reference_token(); + halfword p = ref; + while (s) { + p = tex_store_new_token(p, token_info(s)); + s = token_link(s); + } + while (t) { + p = tex_store_new_token(p, token_info(t)); + t = token_link(t); + } + tex_define((g > 0) ? global_flag_bit : 0, loc, cmd == internal_toks_cmd ? internal_toks_reference_cmd : register_toks_reference_cmd, ref); +} + +/*tex Public helper: */ + +halfword tex_copy_token_list(halfword h1, halfword *t) +{ + halfword h2 = tex_store_new_token(null, token_info(h1)); + halfword t1 = token_link(h1); + halfword t2 = h2; + while (t1) { + t2 = tex_store_new_token(t2, token_info(t1)); + t1 = token_link(t1); + } + if (t) { + *t = t2; + } + return h2; +} + +/*tex + + At some point I decided to implement the following primitives: + + \starttabulate[|T||T||] + \NC 0 \NC \type {toksapp} \NC 1 \NC \type {etoksapp} \NC \NR + \NC 2 \NC \type {tokspre} \NC 3 \NC \type {etokspre} \NC \NR + \NC 4 \NC \type {gtoksapp} \NC 5 \NC \type {xtoksapp} \NC \NR + \NC 6 \NC \type {gtokspre} \NC 7 \NC \type {xtokspre} \NC \NR + \stoptabulate + + These append and prepend tokens to token lists. In \CONTEXT\ we always had macros doing something + like that. It was only a few years later that I ran again into an article that Taco and I wrote + in 1999 in the NTG Maps about an extension to \ETEX\ (called eetex). The first revelation was + that I had completely forgotten about it, which can be explained by the two decade time-lap. The + second was that Taco actually added that to the program at that time, so I could have used (parts + of) that code. Anyway, among the other proposed (and implemented) features were manipulating + lists and ways to output packed data to the \DVI\ files (numbers packed into 1 upto 4 bytes). + Maybe some day I'll have a go at lists, although with todays computers there is not that much to + gain. Also, \CONTEXT\ progressed to different internals so the urge is no longer there. The also + discussed \SGML\ mode also in no longer that relevant given that we have \LUA. + + If we want to handle macros too we really need to distinguish between toks and macros with + |cur_chr| above, but not now. We can't expand, and have to use |get_r_token| or so. I don't need + it anyway. + + \starttyping + get_r_token(); + if (cur_cmd == call_cmd) { + nt = cur_cs; + target = equiv(nt); + } else { + // some error message + } + \stoptyping +*/ + +# define immediate_permitted(loc,target) ((eq_level(loc) == cur_level) && (get_token_reference(target) == 0)) + +void tex_run_combine_the_toks(void) +{ + halfword source = null; + halfword target = null; + halfword append, expand, global; + halfword nt, ns; + singleword cmd; + /* */ + switch (cur_chr) { + case expanded_toks_code: append = 0; global = 0; expand = 1; break; + case append_toks_code: append = 1; global = 0; expand = 0; break; + case append_expanded_toks_code: append = 1; global = 0; expand = 1; break; + case prepend_toks_code: append = 2; global = 0; expand = 0; break; + case prepend_expanded_toks_code: append = 2; global = 0; expand = 1; break; + case global_expanded_toks_code: append = 0; global = 1; expand = 1; break; + case global_append_toks_code: append = 1; global = 1; expand = 0; break; + case global_append_expanded_toks_code: append = 1; global = 1; expand = 1; break; + case global_prepend_toks_code: append = 2; global = 1; expand = 0; break; + case global_prepend_expanded_toks_code: append = 2; global = 1; expand = 1; break; + default: append = 0; global = 0; expand = 0; break; + } + /*tex The target. */ + tex_get_x_token(); + if (cur_cmd == register_toks_cmd || cur_cmd == internal_toks_cmd) { + nt = eq_value(cur_cs); + cmd = (singleword) cur_cmd; + } else { + /*tex Maybe a number. */ + tex_back_input(cur_tok); + nt = register_toks_location(tex_scan_toks_register_number()); + cmd = register_toks_cmd; + } + target = eq_value(nt); + /*tex The source. */ + do { + tex_get_x_token(); + } while (cur_cmd == spacer_cmd); + if (cur_cmd == left_brace_cmd) { + source = expand ? tex_scan_toks_expand(1, NULL, 0) : tex_scan_toks_normal(1, NULL); + /*tex The action. */ + if (source) { + if (target) { + halfword s = token_link(source); + if (s) { + halfword t = token_link(target); + if (! t) { + /*tex Can this happen? */ + set_token_link(target, s); + token_link(source) = null; + } else { + switch (append) { + case 0: + goto ASSIGN_1; + case 1: + /*append */ + if (immediate_permitted(nt,target)) { + halfword p = t; + while (token_link(p)) { + p = token_link(p); + } + token_link(p) = s; + token_link(source) = null; + } else { + tex_aux_append_copied_toks_list(nt, cmd, global, t, s); + } + break; + case 2: + /* prepend */ + if (immediate_permitted(nt,target)) { + halfword p = s; + while (token_link(p)) { + p = token_link(p); + } + token_link(source) = null; + set_token_link(p, t); + set_token_link(target, s); + } else { + tex_aux_append_copied_toks_list(nt, cmd, global, s, t); + } + break; + } + } + } + } else { + ASSIGN_1: + tex_aux_set_toks_register(nt, cmd, token_link(source), global); + token_link(source) = null; + } + tex_flush_token_list(source); + } + } else { + if (cur_cmd == register_toks_cmd) { + ns = register_toks_number(eq_value(cur_cs)); + } else if (cur_cmd == internal_toks_cmd) { + ns = internal_toks_number(eq_value(cur_cs)); + } else { + ns = tex_scan_toks_register_number(); + } + /*tex The action. */ + source = toks_register(ns); + if (source) { + if (target) { + halfword s = token_link(source); + halfword t = token_link(target); + switch (append) { + case 0: + /*assign */ + goto ASSIGN_2; + case 1: + /*append */ + if (immediate_permitted(nt, target)) { + halfword p = t; + while (token_link(p)) { + p = token_link(p); + } + while (s) { + p = tex_store_new_token(p, token_info(s)); + s = token_link(s); + } + } else { + tex_aux_append_copied_toks_list(nt, cmd, global, t, s); + } + break; + case 2: + if (immediate_permitted(nt, target)) { + halfword h = null; + halfword p = null; + while (s) { + p = tex_store_new_token(p, token_info(s)); + if (! h) { + h = p; + } + s = token_link(s); + } + set_token_link(p, t); + set_token_link(target, h); + } else { + tex_aux_append_copied_toks_list(nt, cmd, global, s, t); + } + break; + } + } else { + ASSIGN_2: + // set_toks_register(nt, source, global); + tex_add_token_reference(source); + eq_value(nt) = source; + } + } + } +} + +/*tex + + This routine, used in the next one, prints the job name, possibly modified by the + |process_jobname| callback. + +*/ + +static void tex_aux_print_job_name(void) +{ + if (lmt_fileio_state.job_name) { + /*tex \CCODE\ strings for jobname before and after processing. */ + char *s = lmt_fileio_state.job_name; + int callback_id = lmt_callback_defined(process_jobname_callback); + if (callback_id > 0) { + char *ss; + int lua_retval = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "S->S", s, &ss); + if (lua_retval && ss) { + s = ss; + } + } + tex_print_str(s); + } +} + +/*tex + + The procedure |run_convert_tokens| uses |str_toks| to insert the token list for |convert| + functions into the scanner; |\outer| control sequences are allowed to follow |\string| and + |\meaning|. + +*/ + +/*tex Codes not really needed but cleaner when testing */ + +# define push_selector { \ + saved_selector = lmt_print_state.selector; \ + lmt_print_state.selector = new_string_selector_code; \ +} + +# define pop_selector { \ + lmt_print_state.selector = saved_selector; \ +} + +void tex_run_convert_tokens(halfword code) +{ + /*tex Scan the argument for command |c|. */ + switch (code) { + /*tex + The |number_code| is quite popular. Beware, when used with a lua none function, a zero + is injected. We could intercept it at the cost of messy code, but on the other hand, + nothing guarantees that the call returns a number so this side effect can be defended + as a recovery measure. + */ + case number_code: + { + int saved_selector; + halfword v = tex_scan_int(0, NULL); + push_selector; + tex_print_int(v); + pop_selector; + break; + } + case to_integer_code: + case to_hexadecimal_code: + { + int saved_selector; + halfword v = tex_scan_int(0, NULL); + tex_get_x_token(); /* maybe not x here */ + if (cur_cmd != relax_cmd) { + tex_back_input(cur_tok); + } + push_selector; + if (code == to_integer_code) { + tex_print_int(v); + } else { + tex_print_hex(v); + } + pop_selector; + break; + } + case to_scaled_code: + case to_sparse_scaled_code: + case to_dimension_code: + case to_sparse_dimension_code: + { + int saved_selector; + halfword v = tex_scan_dimen(0, 0, 0, 0, NULL); + tex_get_x_token(); /* maybe not x here */ + if (cur_cmd != relax_cmd) { + tex_back_input(cur_tok); + } + push_selector; + switch (code) { + case to_sparse_dimension_code: + case to_sparse_scaled_code: + tex_print_sparse_dimension(v, no_unit); + break; + default: + tex_print_dimension(v, no_unit); + break; + } + switch (code) { + case to_dimension_code: + case to_sparse_dimension_code: + tex_print_unit(pt_unit); + break; + } + pop_selector; + break; + } + case to_mathstyle_code: + { + int saved_selector; + halfword v = tex_scan_math_style_identifier(1, 0); + push_selector; + tex_print_int(v); + pop_selector; + break; + } + case lua_function_code: + { + halfword v = tex_scan_int(0, NULL); + if (v > 0) { + strnumber u = tex_save_cur_string(); + lmt_token_state.luacstrings = 0; + lmt_function_call(v, 0); + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } + } else { + tex_normal_error("luafunction", "invalid number"); + } + return; + } + case lua_bytecode_code: + { + halfword v = tex_scan_int(0, NULL); + if (v < 0 || v > 65535) { + tex_normal_error("luabytecode", "invalid number"); + } else { + strnumber u = tex_save_cur_string(); + lmt_token_state.luacstrings = 0; + lmt_bytecode_call(v); + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } + } + return; + } + case lua_code: + { + full_scanner_status saved_full_status = tex_save_full_scanner_status(); + strnumber u = tex_save_cur_string(); + halfword s = tex_scan_toks_expand(0, NULL, 0); + tex_unsave_full_scanner_status(saved_full_status); + lmt_token_state.luacstrings = 0; + lmt_token_call(s); + tex_delete_token_reference(s); /* boils down to flush_list */ + tex_restore_cur_string(u); + if (lmt_token_state.luacstrings > 0) { + tex_lua_string_start(); + } + /*tex No further action. */ + return; + } + case expanded_code: + case semi_expanded_code: + { + full_scanner_status saved_full_status = tex_save_full_scanner_status(); + strnumber u = tex_save_cur_string(); + halfword s = tex_scan_toks_expand(0, NULL, code == semi_expanded_code); + tex_unsave_full_scanner_status(saved_full_status); + if (token_link(s)) { + tex_begin_inserted_list(token_link(s)); + token_link(s) = null; + } + tex_put_available_token(s); + tex_restore_cur_string(u); + /*tex No further action. */ + return; + } + /* case immediate_assignment_code: */ + /* case immediate_assigned_code: */ + /*tex + These two were an on-the-road-to-bachotex brain-wave. A first variant did more in + sequence till a relax or spacer was seen. These commands permits for instance setting + counters in full expansion. However, as we have the more powerful local control + mechanisms available these two commands have been dropped in \LUAMETATEX. Performance + wise there is not that much to gain from |\immediateassigned| and it's even somewhat + limited. So, they're gone now. Actually, one can also use the local control feature in + an |\edef|, which {\em is} rather efficient, so we're good anyway. The upgraded code + can be found in the archive. + */ + case string_code: + { + int saved_selector; + int saved_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + tex_get_token(); + lmt_input_state.scanner_status = saved_scanner_status; + push_selector; + if (cur_cs) { + tex_print_cs(cur_cs); + } else { + tex_print_tex_str(cur_chr); + } + pop_selector; + break; + } + case cs_string_code: + { + int saved_selector; + int saved_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + tex_get_token(); + lmt_input_state.scanner_status = saved_scanner_status; + push_selector; + if (cur_cs) { + tex_print_cs_name(cur_cs); + } else { + tex_print_tex_str(cur_chr); + } + pop_selector; + break; + } + case detokenized_code: + { + int saved_selector; + int saved_scanner_status = lmt_input_state.scanner_status; + halfword t = null; + lmt_input_state.scanner_status = scanner_is_normal; + tex_get_token(); + lmt_input_state.scanner_status = saved_scanner_status; + t = tex_get_available_token(cur_tok); + push_selector; + tex_show_token_list(t, null, extreme_token_show_max, 0); + tex_put_available_token(t); + pop_selector; + break; + } + case roman_numeral_code: + { + int saved_selector; + halfword v = tex_scan_int(0, NULL); + push_selector; + tex_print_roman_int(v); + pop_selector; + break; + } + case meaning_code: + case meaning_full_code: + case meaning_less_code: + case meaning_asis_code: + { + int saved_selector; + int saved_scanner_status = lmt_input_state.scanner_status; + lmt_input_state.scanner_status = scanner_is_normal; + tex_get_token(); + lmt_input_state.scanner_status = saved_scanner_status; + push_selector; + tex_print_meaning(code); + pop_selector; + break; + } + case uchar_code: + { + int saved_selector; + int chr = tex_scan_char_number(0); + push_selector; + tex_print_tex_str(chr); + pop_selector; + break; + } + case lua_escape_string_code: + { + lstring escstr; + int l = 0; + int e = lmt_token_state.in_lua_escape; + full_scanner_status saved_full_status = tex_save_full_scanner_status(); + halfword result = tex_scan_toks_expand(0, NULL, 0); + lmt_token_state.in_lua_escape = 1; + escstr.s = (unsigned char *) tex_tokenlist_to_tstring(result, 0, &l, 0, 0, 0); + escstr.l = (unsigned) l; + lmt_token_state.in_lua_escape = e; + tex_delete_token_reference(result); /* boils down to flush_list */ + tex_unsave_full_scanner_status(saved_full_status); + if (escstr.l) { + result = lmt_str_toks(escstr); + tex_begin_inserted_list(result); + } + return; + } + case font_name_code: + { + int saved_selector; + halfword fnt = tex_scan_font_identifier(NULL); + push_selector; + tex_print_font(fnt); + pop_selector; + break; + } + case font_specification_code: + { + int saved_selector; + halfword fnt = tex_scan_font_identifier(NULL); + push_selector; + tex_append_string((const unsigned char *) font_original(fnt), (unsigned) strlen(font_original(fnt))); + pop_selector; + break; + } + case job_name_code: + { + int saved_selector; + if (! lmt_fileio_state.job_name) { + tex_open_log_file(); + } + push_selector; + tex_aux_print_job_name(); + pop_selector; + break; + } + case format_name_code: + { + int saved_selector; + if (! lmt_fileio_state.job_name) { + tex_open_log_file(); + } + push_selector; + tex_print_tex_str(lmt_dump_state.format_name); + pop_selector; + break; + } + case luatex_banner_code: + { + int saved_selector; + push_selector; + tex_print_str(lmt_engine_state.luatex_banner); + pop_selector; + break; + } + default: + tex_confusion("convert tokens"); + break; + } + { + halfword head = tex_cur_str_toks(NULL); + tex_begin_inserted_list(head); + } +} + +/*tex + The boolean |in_lua_escape| is keeping track of the lua string escape state. +*/ + +strnumber tex_the_convert_string(halfword c, int i) +{ + int saved_selector = lmt_print_state.selector; + strnumber ret = 0; + int done = 1 ; + lmt_print_state.selector = new_string_selector_code; + switch (c) { + case number_code: + case to_integer_code: + tex_print_int(i); + break; + case to_hexadecimal_code: + tex_print_hex(i); + break; + case to_scaled_code: + tex_print_dimension(i, no_unit); + break; + case to_sparse_scaled_code: + tex_print_sparse_dimension(i, no_unit); + break; + case to_dimension_code: + tex_print_dimension(i, pt_unit); + break; + case to_sparse_dimension_code: + tex_print_sparse_dimension(i, pt_unit); + break; + /* case to_mathstyle_code: */ + /* case lua_function_code: */ + /* case lua_code: */ + /* case expanded_code: */ + /* case string_code: */ + /* case cs_string_code: */ + case roman_numeral_code: + tex_print_roman_int(i); + break; + /* case meaning_code: */ + case uchar_code: + tex_print_tex_str(i); + break; + /* case lua_escape_string_code: */ + case font_name_code: + tex_print_font(i); + break; + case font_specification_code: + tex_print_str(font_original(i)); + break; + /* case left_margin_kern_code: */ + /* case right_margin_kern_code: */ + /* case math_char_class_code: */ + /* case math_char_fam_code: */ + /* case math_char_slot_code: */ + /* case insert_ht_code: */ + case job_name_code: + tex_aux_print_job_name(); + break; + case format_name_code: + tex_print_tex_str(lmt_dump_state.format_name); + break; + case luatex_banner_code: + tex_print_str(lmt_engine_state.luatex_banner); + break; + case font_identifier_code: + tex_print_font_identifier(i); + break; + default: + done = 0; + break; + } + if (done) { + ret = tex_make_string(); + } + lmt_print_state.selector = saved_selector; + return ret; +} + +/*tex Return a string from tokens list: */ + +strnumber tex_tokens_to_string(halfword p) +{ + if (lmt_print_state.selector == new_string_selector_code) { + tex_normal_error("tokens", "tokens_to_string() called while selector = new_string"); + return get_nullstr(); + } else { + int saved_selector = lmt_print_state.selector; + lmt_print_state.selector = new_string_selector_code; + tex_token_show(p, extreme_token_show_max); + lmt_print_state.selector = saved_selector; + return tex_make_string(); + } +} + +/*tex + + The actual token conversion in this function is now functionally equivalent to |show_token_list|, + except that it always prints the whole token list. Often the result is not that large, for + instance |\directlua| is seldom large. However, this converter is also used for patterns + and exceptions where size is mnore an issue. For that reason we used to have three variants, + one of which (experimentally) used a buffer. At some point, in the manual we were talking of + millions of allocations but times have changed. + + Macros were used to inline the appending code (in the thre variants), but in the end I decided + to just merge all into one function, with a bit more overhead because we need to optionally + skip a macro preamble. + + Values like 512 and 128 also work ok. There is not much to gain in optimization here. We used + to have 3 mostly overlapping functions, one of which used a buffer. We can probably use a + larger default buffer size and larger step and only free when we think it's too large. + +*/ + +# define default_buffer_size 512 /*tex This used to be 256 */ +# define default_buffer_step 4096 /*tex When we're larger, we always are much larger. */ + +// todo: check ret + +static void tex_aux_make_room_in_buffer(int a) +{ + if (lmt_token_state.bufloc + a + 1 > lmt_token_state.bufmax) { + char *tmp = aux_reallocate_array(lmt_token_state.buffer, sizeof(unsigned char), lmt_token_state.bufmax + default_buffer_step, 1); + if (tmp) { + lmt_token_state.bufmax += default_buffer_step; + } else { + // error + } + lmt_token_state.buffer = tmp; + } +} + +static void tex_aux_append_uchar_to_buffer(int s) +{ + tex_aux_make_room_in_buffer(4); + if (s <= 0x7F) { + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (s); + } else if (s <= 0x7FF) { + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xC0 + (s / 0x40)); + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (s % 0x40)); + } else if (s <= 0xFFFF) { + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xE0 + (s / 0x1000)); + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + ((s % 0x1000) / 0x40)); + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + ((s % 0x1000) % 0x40)); + } else if (s >= 0x110000) { + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (s - 0x11000); + } else { + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0xF0 + (s / 0x40000)); + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + ((s % 0x40000) / 0x1000)); + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (((s % 0x40000) % 0x1000) / 0x40)); + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (0x80 + (((s % 0x40000) % 0x1000) % 0x40)); + } +} + +static void tex_aux_append_char_to_buffer(int c) +{ + tex_aux_make_room_in_buffer(1); + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (c); +} + +/*tex Only errors and unknowns. */ + +static void tex_aux_append_str_to_buffer(const char *s) +{ + const char *v = s; + tex_aux_make_room_in_buffer((int) strlen(v)); + /*tex Using memcpy will inline and give a larger binary ... and we seldom need this. */ + while (*v) { + lmt_token_state.buffer[lmt_token_state.bufloc++] = (char) (*v); + v++; + } +} + +/*tex Only bogus csnames. */ + +static void tex_aux_append_esc_to_buffer(const char *s) +{ + int e = escape_char_par; + if (e > 0 && e < cs_offset_value) { + tex_aux_append_uchar_to_buffer(e); + } + tex_aux_append_str_to_buffer(s); +} + +# define is_cat_letter(a) (tex_aux_the_cat_code(aux_str2uni(str_string((a)))) == letter_cmd) + +/* make two versions: macro and not */ + +char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamble, int nospace, int strip) +{ + if (pp) { + /*tex We need to go beyond the reference. */ + int p = token_link(pp); + if (p) { + if (lmt_token_state.bufmax > default_buffer_size) { + /* Let's start fresh and small. */ + aux_deallocate_array(lmt_token_state.buffer); + lmt_token_state.buffer = aux_allocate_clear_array(sizeof(unsigned char), default_buffer_size, 1); + lmt_token_state.bufmax = default_buffer_size; + } else if (! lmt_token_state.buffer) { + /* Let's start. */ + lmt_token_state.buffer = aux_allocate_clear_array(sizeof(unsigned char), default_buffer_size, 1); + lmt_token_state.bufmax = default_buffer_size; + } + lmt_token_state.bufloc = 0; + int e = escape_char_par; /*tex The serialization of the escape, normally a backlash. */ + int n = '0'; /*tex The character after |#|, so |#0| upto |#9| */ + int min = 0; + int max = lmt_token_memory_state.tokens_data.top; + int skip = 0; + if (skippreamble) { + skip = get_token_parameters(pp); + } + while (p) { + if (p < min || p > max) { + tex_aux_append_str_to_buffer(error_string_clobbered(31)); + break; + } else { + int infop = token_info(p); + if (infop < 0) { + /* unlikely, will go after checking */ + tex_aux_append_str_to_buffer(error_string_bad(32)); + } else if (infop < cs_token_flag) { + /*tex We nearly always end up here because otherwise we have an error. */ + int cmd = token_cmd(infop); + int chr = token_chr(infop); + switch (cmd) { + case left_brace_cmd: + case right_brace_cmd: + case math_shift_cmd: + case alignment_tab_cmd: + case superscript_cmd: + case subscript_cmd: + case spacer_cmd: + case letter_cmd: + case other_char_cmd: + if (! skip) { + tex_aux_append_uchar_to_buffer(chr); + } + break; + case parameter_cmd: + if (! skip) { + if (! nospace && (! lmt_token_state.in_lua_escape && (lmt_expand_state.cs_name_level == 0))) { + tex_aux_append_uchar_to_buffer(chr); + } + tex_aux_append_uchar_to_buffer(chr); + } + break; + case parameter_reference_cmd: + if (! skip) { + tex_aux_append_char_to_buffer(match_visualizer); + if (chr <= 9) { + tex_aux_append_char_to_buffer(chr + '0'); + } else { + tex_aux_append_char_to_buffer('!'); + goto EXIT; + } + } else { + if (chr > 9) { + goto EXIT; + } + } + break; + case match_cmd: + if (! skip) { + tex_aux_append_char_to_buffer(match_visualizer); + } + if (is_valid_match_ref(chr)) { + ++n; + } + if (! skip) { + tex_aux_append_char_to_buffer(chr ? chr : '0'); + } + if (n > '9') { + goto EXIT; + } + break; + case end_match_cmd: + if (chr == 0) { + if (! skip) { + tex_aux_append_char_to_buffer('-'); + tex_aux_append_char_to_buffer('>'); + } + skip = 0 ; + } + break; + /* + case string_cmd: + c = c + cs_offset_value; + do_make_room((int) str_length(c)); + for (int i = 0; i < str_length(c); i++) { + token_state.buffer[token_state.bufloc++] = str_string(c)[i]; + } + break; + */ + case end_paragraph_cmd: + if (! inhibit_par && (auto_paragraph_mode(auto_paragraph_text))) { + tex_aux_append_esc_to_buffer("par"); + } + break; + default: + tex_aux_append_str_to_buffer(tex_aux_special_cmd_string(cmd, chr, error_string_bad(33))); + break; + } + } else if (! (inhibit_par && infop == lmt_token_state.par_token)) { + int q = infop - cs_token_flag; + if (q < hash_base) { + if (q == null_cs) { + tex_aux_append_esc_to_buffer("csname"); + tex_aux_append_esc_to_buffer("endcsname"); + } else { + tex_aux_append_str_to_buffer(error_string_impossible(34)); + } + } else if (eqtb_out_of_range(q)) { + tex_aux_append_str_to_buffer(error_string_impossible(35)); + } else { + strnumber txt = cs_text(q); + if (txt < 0 || txt >= lmt_string_pool_state.string_pool_data.ptr) { + tex_aux_append_str_to_buffer(error_string_nonexistent(36)); + } else { + char *sh = tex_makecstring(txt); + char *s = sh; + if (tex_is_active_cs(txt)) { + s = s + 3; + while (*s) { + tex_aux_append_char_to_buffer(*s); + s++; + } + } else { + if (e >= 0 && e < 0x110000) { + tex_aux_append_uchar_to_buffer(e); + } + while (*s) { + tex_aux_append_char_to_buffer(*s); + s++; + } + if ((! nospace) && ((! tex_single_letter(txt)) || is_cat_letter(txt))) { + tex_aux_append_char_to_buffer(' '); + } + } + lmt_memory_free(sh); + } + } + } + p = token_link(p); + } + } + EXIT: + if (strip && lmt_token_state.bufloc > 1) { + if (lmt_token_state.buffer[lmt_token_state.bufloc-1] == strip) { + lmt_token_state.bufloc -= 1; + } + if (lmt_token_state.bufloc > 1 && lmt_token_state.buffer[0] == strip) { + memcpy(&lmt_token_state.buffer[0], &lmt_token_state.buffer[1], lmt_token_state.bufloc-1); + lmt_token_state.bufloc -= 1; + } + } + lmt_token_state.buffer[lmt_token_state.bufloc] = '\0'; + if (siz) { + *siz = lmt_token_state.bufloc; + } + return lmt_token_state.buffer; + } + } + if (siz) { + *siz = 0; + } + return NULL; +} + +/*tex + + The \LUA\ interface needs some extra functions. The functions themselves are quite boring, but + they are handy because otherwise this internal stuff has to be accessed from \CCODE\ directly, + where lots of the defines are not available. + +*/ + +halfword tex_get_tex_dimen_register (int j, int internal) { return internal ? dimen_parameter(j) : dimen_register(j) ; } +halfword tex_get_tex_skip_register (int j, int internal) { return internal ? glue_parameter(j) : skip_register(j) ; } +halfword tex_get_tex_mu_skip_register (int j, int internal) { return internal ? mu_glue_parameter(j) : mu_skip_register(j); } +halfword tex_get_tex_count_register (int j, int internal) { return internal ? count_parameter(j) : count_register(j) ; } +halfword tex_get_tex_attribute_register (int j, int internal) { return internal ? attribute_parameter(j) : attribute_register(j) ; } +halfword tex_get_tex_box_register (int j, int internal) { return internal ? box_parameter(j) : box_register(j) ; } + +void tex_set_tex_dimen_register(int j, halfword v, int flags, int internal) +{ + if (global_defs_par) { + flags = add_global_flag(flags); + } + if (internal) { + tex_assign_internal_dimen_value(flags, internal_dimen_location(j), v); + } else { + tex_word_define(flags, register_dimen_location(j), v); + } +} + +void tex_set_tex_skip_register(int j, halfword v, int flags, int internal) +{ + if (global_defs_par) { + flags = add_global_flag(flags); + } + if (internal) { + tex_assign_internal_skip_value(flags, internal_glue_location(j), v); + } else { + tex_word_define(flags, register_glue_location(j), v); + } +} + +void tex_set_tex_mu_skip_register(int j, halfword v, int flags, int internal) +{ + if (global_defs_par) { + flags = add_global_flag(flags); + } + tex_word_define(flags, internal ? internal_mu_glue_location(j) : register_mu_glue_location(j), v); +} + +void tex_set_tex_count_register(int j, halfword v, int flags, int internal) +{ + if (global_defs_par) { + flags = add_global_flag(flags); + } + if (internal) { + tex_assign_internal_int_value(flags, internal_int_location(j), v); + } else { + tex_word_define(flags, register_int_location(j), v); + } +} + +void tex_set_tex_attribute_register(int j, halfword v, int flags, int internal) +{ + if (global_defs_par) { + flags = add_global_flag(flags); + } + if (j > lmt_node_memory_state.max_used_attribute) { + lmt_node_memory_state.max_used_attribute = j; + } + change_attribute_register(flags, register_attribute_location(j), v); + tex_word_define(flags, internal ? internal_attribute_location(j) : register_attribute_location(j), v); +} + +void tex_set_tex_box_register(int j, halfword v, int flags, int internal) +{ + if (global_defs_par) { + flags = add_global_flag(flags); + } + if (internal) { + tex_define(flags, internal_box_location(j), internal_box_reference_cmd, v); + } else { + tex_define(flags, register_box_location(j), register_box_reference_cmd, v); + } +} + +void tex_set_tex_toks_register(int j, lstring s, int flags, int internal) +{ + halfword ref = get_reference_token(); + halfword head = tex_str_toks(s, NULL); + set_token_link(ref, head); + if (global_defs_par) { + flags = add_global_flag(flags); + } + if (internal) { + tex_define(flags, internal_toks_location(j), internal_toks_reference_cmd, ref); + } else { + tex_define(flags, register_toks_location(j), register_toks_reference_cmd, ref); + } +} + +void tex_scan_tex_toks_register(int j, int c, lstring s, int flags, int internal) +{ + halfword ref = get_reference_token(); + halfword head = tex_str_scan_toks(c, s); + set_token_link(ref, head); + if (global_defs_par) { + flags = add_global_flag(flags); + } + if (internal) { + tex_define(flags, internal_toks_location(j), internal_toks_reference_cmd, ref); + } else { + tex_define(flags, register_toks_location(j), register_toks_reference_cmd, ref); + } +} + +int tex_get_tex_toks_register(int j, int internal) +{ + halfword t = internal ? toks_parameter(j) : toks_register(j); + if (t) { + return tex_tokens_to_string(t); + } else { + return get_nullstr(); + } +} + +/* Options: (0) error when undefined [bad], (1) create [but undefined], (2) ignore [discard] */ + +halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option) +{ + halfword p = null; + if (! head) { + head = get_reference_token(); + } + p = (tail && *tail) ? *tail : head; + if (lstr > 0) { + const char *se = str + lstr; + while (str < se) { + /*tex hh: |str2uni| could return len too (also elsewhere) */ + halfword u = (halfword) aux_str2uni((const unsigned char *) str); + halfword t = null; + halfword cc = tex_get_cat_code(ct, u); + str += utf8_size(u); + /*tex + This is a relating simple converter; if more is needed one can just use + |tex.print| with a regular |\def| or |\gdef| and feed the string into the + regular scanner. + */ + switch (cc) { + case escape_cmd: + { + /*tex We have a potential control sequence so we check for it. */ + int lname = 0; + const char *name = str; + while (str < se) { + halfword u = (halfword) aux_str2uni((const unsigned char *) str); + int s = utf8_size(u); + int c = tex_get_cat_code(ct, u); + if (c == letter_cmd) { + str += s; + lname += s; + } else if (c == spacer_cmd) { + /*tex We ignore a trailing space like normal scanning does. */ + if (lname == 0) { + // if (u == 32) { + lname += s; + } + str += s; + break ; + } else { + if (lname == 0) { + lname += s; + str += s; + } + break ; + } + } + if (lname > 0) { + /*tex We have a potential |\cs|. */ + halfword cs = tex_string_locate(name, lname, option == 1 ? 1 : 0); /* 1 == create */ + if (cs == undefined_control_sequence) { + if (option == 2) { + /*tex We ignore unknown commands. */ + // t = null; + } else { + /*tex We play safe and backtrack, as we have option 0, but never used anyway. */ + t = u + (cc * (1<<21)); + str = name; + } + } else { + /* We end up here when option is 1. */ + t = cs_token_flag + cs; + } + } else { + /*tex + Just a character with some meaning, so |\unknown| becomes effectively + |\unknown| assuming that |\\| has some useful meaning of course. + */ + t = u + (cc * (1 << 21)); + str = name; + } + break; + } + case comment_cmd: + goto DONE; + case ignore_cmd: + break; + case spacer_cmd: + /* t = u + (cc * (1<<21)); */ + t = token_val(spacer_cmd, ' '); + break; + default: + /*tex + Whatever token, so for instance $x^2$ just works given a tex catcode regime. + */ + t = u + (cc * (1<<21)); + break; + } + if (t) { + p = tex_store_new_token(p, t); + } + } + } + DONE: + if (tail) { + *tail = p; + } + return head; +} + +/*tex So far for the helpers. */ + +void tex_dump_token_mem(dumpstream f) +{ + /*tex + It doesn't pay off to prune the available list. We save less than 10K if we do this and + it assumes a sequence at the end. It doesn't help that the list is in reverse order so + we just dump the lot. But we do check the allocated size. We cheat a bit in reducing + the ptr so that we can set the the initial counter on loading. + */ + halfword p = lmt_token_memory_state.available; + halfword u = lmt_token_memory_state.tokens_data.top + 1; + while (p) { + --u; + p = token_link(p); + } + lmt_token_memory_state.tokens_data.ptr = u; + dump_int(f, lmt_token_state.null_list); /* the only one left */ + dump_int(f, lmt_token_memory_state.tokens_data.allocated); + dump_int(f, lmt_token_memory_state.tokens_data.top); + dump_int(f, lmt_token_memory_state.tokens_data.ptr); + dump_int(f, lmt_token_memory_state.available); + dump_things(f, lmt_token_memory_state.tokens[0], lmt_token_memory_state.tokens_data.top + 1); +} + +void tex_undump_token_mem(dumpstream f) +{ + undump_int(f, lmt_token_state.null_list); /* the only one left */ + undump_int(f, lmt_token_memory_state.tokens_data.allocated); + undump_int(f, lmt_token_memory_state.tokens_data.top); + undump_int(f, lmt_token_memory_state.tokens_data.ptr); + undump_int(f, lmt_token_memory_state.available); + tex_initialize_token_mem(); + undump_things(f, lmt_token_memory_state.tokens[0], lmt_token_memory_state.tokens_data.top + 1); +} diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h new file mode 100644 index 000000000..1996f351c --- /dev/null +++ b/source/luametatex/source/tex/textoken.h @@ -0,0 +1,399 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXTOKEN_H +# define LMT_TEXTOKEN_H + +# include "luametatex.h" + +/*tex + + These are constants that can be added to a chr value and then give a token with the right cmd + and chr combination, whichs is then equivalent to |token_val (cmd, chr)|. The cmd results from + shifting right 21 bits. The following tokens therefore should match the order of the (first + bunch) of cmd codes! + + \TEX\ stores the specific match character which defaults to |#|. When tokens get serialized the + machinery starts with |match_chr = '#'| but overloads that by the last stored variant. So the + last (!) seen |match_chr| in the macro preamble determines what gets used in showing the body. + One could argue that this is a buglet but I more see it as a side effect. In practice there is + never a mix of such characters used. Anyway, one could as well use the first seen in the + preamble and use that for the rest because consistency is better than confusion. Even better is + to just always use |#| and store the numbers in preamble match tokens, which opens up + possibilities (for strict or tolerant matching, skipping spaces, optional delimiters and even + more arguments). + +*/ + +//define cs_token_flag 0x1FFFFFFF + +# define node_token_max 0x0FFFFF +# define node_token_flag 0x100000 +# define node_token_lsb(sum) (sum & 0x0000FFFF) +# define node_token_msb(sum) (((sum & 0xFFFF0000) >> 16) + node_token_flag) +# define node_token_sum(msb,lsb) (((msb & 0x0000FFFF) << 16) + lsb) +# define node_token_overflow(sum) (sum > node_token_max) +# define node_token_flagged(sum) (sum > node_token_flag) + +/*tex + Instead of |fixmem| we use |tokens| because it is dynamic anyway and we then better match variables + that deal with managing that. Most was already hidden in a few files anyway. +*/ + +typedef struct token_memory_state_info { + memoryword *tokens; /*tex |memoryword *volatile fixmem;| */ + memory_data tokens_data; + halfword available; + int padding; +} token_memory_state_info; + +extern token_memory_state_info lmt_token_memory_state; + +typedef enum read_states { + reading_normal, /*tex we're going ahead */ + reading_just_opened, /*tex newly opened, first line not yet read */ + reading_closed, /*tex not open, or at end of file */ +} read_states; + +typedef enum lua_input_types { + unset_lua_input, + string_lua_input, + packed_lua_input, + token_lua_input, + token_list_lua_input, + node_lua_input, +} lua_input_types; + +typedef enum tex_input_types { + eof_tex_input, + string_tex_input, + token_tex_input, + token_list_tex_input, + node_tex_input, +} tex_input_types; + +typedef enum catcode_table_presets { + default_catcode_table_preset = -1, + no_catcode_table_preset = -2, +} catcode_table_presets; + +/*tex +* + There are a few temporary head pointers, one is |temp_token_head|. This one we keep because + when we expand, we can run into situations where we need that pointer. But, |backup_head| is + a real temporary one: we can replace that with local variables. Okay, it is kind of kept in + the format file but if it ends up there we're in some kind of troubles anyway. So, + |backup_head| is now local and |temp_token_head| only global when we are scanning; in cases + where we serialize tokens lists it has been replaced by local variables (and the related + functions now keep track of head and tail). This makes sense because in \LUAMETATEX\ we often + go between \TEX\ and \LUA\ and this keeps it kind of simple. This also makes clear when we + are scanning (the global head is used) and doing something simple with a list. The same is + true for |match_token_head| thatmoved to the expand state. The |backup_head| variable is gone + because we now use locals. + +*/ + +typedef struct token_state_info { + halfword null_list; /*tex permanently empty list */ + int in_lua_escape; + int force_eof; + int luacstrings; + /*tex These are pseudo constants, their value depends on the number of primitives etc. */ + halfword par_loc; + halfword par_token; + /* halfword line_par_loc; */ /*tex See note in textoken.c|. */ + /* halfword line_par_token; */ /*tex See note in textoken.c|. */ + /* */ + char *buffer; + int bufloc; + int bufmax; + int padding; +} token_state_info; + +extern token_state_info lmt_token_state; + +// # define max_token_reference 0x7FFF /* we can bump to 0xFFFF when we go unsigned here */ +// +//define token_reference(a) token_memory_state.tokens[a].half1 +// +// #define get_token_parameters(a) lmt_token_memory_state.tokens[a].quart2 +// #define get_token_reference(a) lmt_token_memory_state.tokens[a].quart3 +// +// #define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].quart2 = (b) +// +// #define add_token_reference(a) lmt_token_memory_state.tokens[a].quart3 += 1 +// #define sub_token_reference(a) lmt_token_memory_state.tokens[a].quart3 -= 1 +// #define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 += (quarterword) (b) +// #define dec_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 -= (quarterword) (b) + +# define max_token_reference 0x0FFFFFFF + +# define get_token_parameters(a) (lmt_token_memory_state.tokens[a].hulf1 >> 28) +# define get_token_reference(a) (lmt_token_memory_state.tokens[a].hulf1 & 0x0FFFFFFF) + +# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28) /* normally the variable is still zero here */ + +# define add_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 += 1 /* we are way off the parameter count */ +# define sub_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 -= 1 /* we are way off the parameter count */ +# define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 += (b) /* we are way off the parameter count */ +# define dec_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 -= (b) /* we are way off the parameter count */ + +/* */ + +# define token_info(a) lmt_token_memory_state.tokens[a].half1 +# define token_link(a) lmt_token_memory_state.tokens[a].half0 +# define get_token_info(a) lmt_token_memory_state.tokens[a].half1 +# define get_token_link(a) lmt_token_memory_state.tokens[a].half0 +# define set_token_info(a,b) lmt_token_memory_state.tokens[a].half1 = (b) +# define set_token_link(a,b) lmt_token_memory_state.tokens[a].half0 = (b) + +# define token_cmd(A) ((A) >> cs_offset_bits) +# define token_chr(A) ((A) & cs_offset_max) +# define token_val(A,B) (((A) << cs_offset_bits) + (B)) + +/*tex + Sometimes we add a value directly. Instead we could use |token_val| on the spot but then we + also need different range checkers. We use numbers because we don't have the cmd codes defined + yet when we're here. so we can't use for instance |token_val (spacer_cmd, 20)| yet. +*/ + +# define left_brace_token token_val( 1, 0) // token_val(left_brace_cmd,0) +# define right_brace_token token_val( 2, 0) // token_val(right_brace_cmd,0) +# define math_shift_token token_val( 3, 0) // token_val(math_shift_cmd,0) +# define alignment_token token_val( 4, 0) +# define superscript_token token_val( 7, 0) +# define subscript_token token_val( 8, 0) +# define ignore_token token_val( 9, 0) // token_val(ignore_cmd,0) +# define space_token token_val(10,32) // token_val(spacer_cmd,32) +# define letter_token token_val(11, 0) // token_val(letter_cmd,0) +# define other_token token_val(12, 0) // token_val(other_char_cmd,0) +# define active_token token_val(13, 0) + +# define match_token token_val(19,0) // token_val(match_cmd,0) +# define end_match_token token_val(20,0) // token_val(end_match_cmd,0) + +# define left_brace_limit right_brace_token +# define right_brace_limit math_shift_token + +# define octal_token (other_token + '\'') /*tex apostrophe, indicates an octal constant */ +# define hex_token (other_token + '"') /*tex double quote, indicates a hex constant */ +# define alpha_token (other_token + '`') /*tex reverse apostrophe, precedes alpha constants */ +# define point_token (other_token + '.') /*tex decimal point */ +# define continental_point_token (other_token + ',') /*tex decimal point, Eurostyle */ +# define period_token (other_token + '.') /*tex decimal point */ +# define comma_token (other_token + ',') /*tex decimal comma */ +# define plus_token (other_token + '+') +# define minus_token (other_token + '-') +# define slash_token (other_token + '/') +# define asterisk_token (other_token + '*') +# define colon_token (other_token + ':') +# define semi_colon_token (other_token + ';') +# define equal_token (other_token + '=') +# define less_token (other_token + '<') +# define more_token (other_token + '>') +# define exclamation_token_o (other_token + '!') +# define exclamation_token_l (letter_token + '!') +# define underscore_token (other_token + '_') +# define underscore_token_o (other_token + '_') +# define underscore_token_l (letter_token + '_') +# define circumflex_token (other_token + '^') +# define circumflex_token_o (other_token + '^') +# define circumflex_token_l (letter_token + '^') +# define escape_token (other_token + '\\') +# define left_parent_token (other_token + '(') +# define right_parent_token (other_token + ')') +# define zero_token (other_token + '0') /*tex zero, the smallest digit */ +# define five_token (other_token + '5') +# define seven_token (other_token + '7') +# define nine_token (other_token + '9') /*tex zero, the smallest digit */ + +# define a_token_l (letter_token + 'a') /*tex the smallest special hex digit */ +# define a_token_o (other_token + 'a') + +# define b_token_l (letter_token + 'b') /*tex the smallest special hex digit */ +# define b_token_o (other_token + 'b') + +# define d_token_l (letter_token + 'd') +# define d_token_o (other_token + 'd') + +# define e_token_l (letter_token + 'e') +# define e_token_o (other_token + 'e') + +# define f_token_l (letter_token + 'f') /*tex the largest special hex digit */ +# define f_token_o (other_token + 'f') + +# define i_token_l (letter_token + 'i') +# define i_token_o (other_token + 'i') + +# define l_token_l (letter_token + 'l') +# define l_token_o (other_token + 'l') + +# define m_token_l (letter_token + 'm') +# define m_token_o (other_token + 'm') + +# define n_token_l (letter_token + 'n') +# define n_token_o (other_token + 'n') + +# define o_token_l (letter_token + 'o') +# define o_token_o (other_token + 'o') + +# define p_token_l (letter_token + 'p') +# define p_token_o (other_token + 'p') + +# define r_token_l (letter_token + 'r') +# define r_token_o (other_token + 'r') + +# define s_token_l (letter_token + 's') +# define s_token_o (other_token + 's') + +# define t_token_l (letter_token + 't') +# define t_token_o (other_token + 't') + +# define u_token_l (letter_token + 'u') +# define u_token_o (other_token + 'u') + +# define x_token_l (letter_token + 'x') +# define x_token_o (other_token + 'x') + +# define A_token_l (letter_token + 'A') /*tex the smallest special hex digit */ +# define A_token_o (other_token + 'A') + +# define E_token_l (letter_token + 'E') +# define E_token_o (other_token + 'E') + +# define F_token_l (letter_token + 'F') /*tex the largest special hex digit */ +# define F_token_o (other_token + 'F') + +# define P_token_l (letter_token + 'P') /*tex the largest special hex digit */ +# define P_token_o (other_token + 'P') + +# define X_token_l (letter_token + 'X') +# define X_token_o (other_token + 'X') + +# define at_token_l (letter_token + '@') +# define at_token_o (other_token + '@') + +# define match_visualizer '#' +# define match_spacer '*' /* ignore spaces */ +# define match_bracekeeper '+' /* keep the braces */ +# define match_thrasher '-' /* discard and don't count the argument */ +# define match_par_spacer '.' /* ignore pars and spaces */ +# define match_keep_spacer ',' /* push back space when no match */ +# define match_pruner '/' /* remove leading and trailing spaces and pars */ +# define match_continuator ':' /* pick up scanning here */ +# define match_quitter ';' /* quit scanning */ +# define match_mandate '=' /* braces are mandate */ +# define match_spacekeeper '^' /* keep leading spaces */ +# define match_mandate_keep '_' /* braces are mandate and kept */ +# define match_par_command '@' /* par delimiter, only internal */ + +# define spacer_match_token (match_token + match_spacer) +# define keep_match_token (match_token + match_bracekeeper) +# define thrash_match_token (match_token + match_thrasher) +# define par_spacer_match_token (match_token + match_par_spacer) +# define keep_spacer_match_token (match_token + match_keep_spacer) +# define prune_match_token (match_token + match_pruner) +# define continue_match_token (match_token + match_continuator) +# define quit_match_token (match_token + match_quitter) +# define mandate_match_token (match_token + match_mandate) +# define leading_match_token (match_token + match_spacekeeper) +# define mandate_keep_match_token (match_token + match_mandate_keep) +# define par_command_match_token (match_token + match_par_command) + +# define is_valid_match_ref(r) (r != thrash_match_token && r != spacer_match_token && r != keep_spacer_match_token && r != continue_match_token && r != quit_match_token) + +/*tex + Managing the head of the list of available one-word nodes. The |get_avail| function has been + given a more verbose name. It gets from the pool and should not be confused with |get_token| + which reads from the input or token list. The |free_avail| function got renamed to + |put_available_token| so we have some symmetry here. +*/ + +extern void tex_compact_tokens (void); +extern void tex_initialize_tokens (void); +extern void tex_initialize_token_mem (void); +extern halfword tex_get_available_token (halfword t); +extern void tex_put_available_token (halfword p); +extern halfword tex_store_new_token (halfword p, halfword t); +extern void tex_delete_token_reference (halfword p); +extern void tex_add_token_reference (halfword p); +extern void tex_increment_token_reference (halfword p, int n); + +# define get_reference_token() tex_get_available_token(null) + +/*tex + + The |no_expand_flag| is a special character value that is inserted by |get_next| if it wants to + suppress expansion. + +*/ + +# define no_expand_flag special_char /* no_expand_relax_code */ + +/*tex A few special values: */ + +# define default_token_show_min 32 +# define default_token_show_max 2500 +# define extreme_token_show_max 0x3FFFFFFF + +/*tex All kind of helpers: */ + +extern void tex_dump_token_mem (dumpstream f); +extern void tex_undump_token_mem (dumpstream f); +extern void tex_print_meaning (halfword code); +extern void tex_flush_token_list (halfword p); +extern void tex_flush_token_list_head_tail (halfword h, halfword t, int n); +extern halfword tex_show_token_list (halfword p, halfword q, int l, int asis); /* Here |l| will go away. */ +extern void tex_token_show (halfword p, int max); +/* void tex_add_token_ref (halfword p); */ +/* void tex_delete_token_ref (halfword p); */ +extern void tex_get_next (void); +extern halfword tex_scan_character (const char *s, int left_brace, int skip_space, int skip_relax); +extern int tex_scan_optional_keyword (const char *s); +extern int tex_scan_mandate_keyword (const char *s, int offset); +extern void tex_aux_show_keyword_error (const char *s); +extern int tex_scan_keyword (const char *s); +extern int tex_scan_keyword_case_sensitive (const char *s); +extern halfword tex_active_to_cs (int c, int force); +extern halfword tex_string_to_toks (const char *s); +extern int tex_get_char_cat_code (int c); +extern halfword tex_get_token (void); +extern halfword tex_str_toks (lstring s, halfword *tail); /* returns head */ +extern halfword tex_cur_str_toks (halfword *tail); /* returns head */ +extern halfword tex_str_scan_toks (int c, lstring b); /* returns head */ +extern void tex_run_combine_the_toks (void); +extern void tex_run_convert_tokens (halfword code); +extern strnumber tex_the_convert_string (halfword c, int i); +extern strnumber tex_tokens_to_string (halfword p); +/* char *tex_tokenlist_to_cstring (int p, int inhibit_par, int *siz); */ +extern char *tex_tokenlist_to_tstring (int p, int inhibit_par, int *siz, int skip, int nospace, int strip); + +extern halfword tex_get_tex_dimen_register (int j, int internal); +extern halfword tex_get_tex_skip_register (int j, int internal); +extern halfword tex_get_tex_mu_skip_register (int j, int internal); +extern halfword tex_get_tex_count_register (int j, int internal); +extern halfword tex_get_tex_attribute_register (int j, int internal); +extern halfword tex_get_tex_box_register (int j, int internal); +extern halfword tex_get_tex_toks_register (int j, int internal); + +extern void tex_set_tex_dimen_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_skip_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_mu_skip_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_count_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_attribute_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_box_register (int j, halfword v, int flags, int internal); + +extern void tex_set_tex_toks_register (int j, lstring s, int flags, int internal); +extern void tex_scan_tex_toks_register (int j, int c, lstring s, int flags, int internal); + +extern halfword tex_copy_token_list (halfword h, halfword *t); + +extern halfword tex_parse_str_to_tok (halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option); + +inline int tex_valid_token(int t) +{ + return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top)); +} + +# endif diff --git a/source/luametatex/source/tex/textypes.c b/source/luametatex/source/tex/textypes.c new file mode 100644 index 000000000..2b67f5308 --- /dev/null +++ b/source/luametatex/source/tex/textypes.c @@ -0,0 +1,46 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +void tex_dump_constants(dumpstream f) +{ + dump_via_int(f, max_n_of_toks_registers); + dump_via_int(f, max_n_of_box_registers); + dump_via_int(f, max_n_of_int_registers); + dump_via_int(f, max_n_of_dimen_registers); + dump_via_int(f, max_n_of_attribute_registers); + dump_via_int(f, max_n_of_glue_registers); + dump_via_int(f, max_n_of_mu_glue_registers); + dump_via_int(f, max_n_of_bytecodes); + dump_via_int(f, max_n_of_math_families); + dump_via_int(f, max_n_of_math_classes); + dump_via_int(f, max_n_of_catcode_tables); + dump_via_int(f, max_n_of_box_indices); +} + +inline static void tex_aux_check_constant(dumpstream f, int c) +{ + int x; + undump_int(f, x); + if (x != c) { + tex_fatal_undump_error("inconsistent constant"); + } +} + +void tex_undump_constants(dumpstream f) +{ + tex_aux_check_constant(f, max_n_of_toks_registers); + tex_aux_check_constant(f, max_n_of_box_registers); + tex_aux_check_constant(f, max_n_of_int_registers); + tex_aux_check_constant(f, max_n_of_dimen_registers); + tex_aux_check_constant(f, max_n_of_attribute_registers); + tex_aux_check_constant(f, max_n_of_glue_registers); + tex_aux_check_constant(f, max_n_of_mu_glue_registers); + tex_aux_check_constant(f, max_n_of_bytecodes); + tex_aux_check_constant(f, max_n_of_math_families); + tex_aux_check_constant(f, max_n_of_math_classes); + tex_aux_check_constant(f, max_n_of_catcode_tables); + tex_aux_check_constant(f, max_n_of_box_indices); +} diff --git a/source/luametatex/source/tex/textypes.h b/source/luametatex/source/tex/textypes.h new file mode 100644 index 000000000..3eebccbf1 --- /dev/null +++ b/source/luametatex/source/tex/textypes.h @@ -0,0 +1,699 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXTYPES_H +# define LMT_TEXTYPES_H + +# include <stdio.h> + +# define LMT_TOSTRING_INDEED(s) #s +# define LMT_TOSTRING(s) LMT_TOSTRING_INDEED(s) + +/*tex + + Here is the comment from the engine(s) that we started with. Keep in mind that \TEX\ originates + on other architectures and that it was written in \PASCAL. + + In order to make efficient use of storage space, \TEX\ bases its major data structures on a + |memoryword|, which contains either a (signed) integer, possibly scaled, or a (signed) + |glue_ratio|, or a small number of fields that are one half or one quarter of the size used for + storing integers. More details about how we pack data in a memory word can be found in the + |memoryword| files. + + If |x| is a variable of type |memoryword|, it contains up to four fields that can be referred + to as follows (\LUATEX\ differs a bit here but the principles remain): + + \starttabulate + \NC |x.int| \NC an |integer| \NC \NR + \NC |x.sc | \NC a |scaled| integer \NC \NR + \NC |x.gr| \NC a |glueratio| \NC \NR + \NC |x.hh.lh|, |x.hh.rh| \NC two halfword fields) \NC \NR + \NC |x.hh.b0|, |x.hh.b1| \NC two quarterword fields \NC \NR + \NC |x.qqqq.b0| \unknown\ |x.qqqq.b3| \NC four quarterword fields \NC \NR + \stoptabulate + + This is somewhat cumbersome to write, and not very readable either, but macros will be used to + make the notation shorter and more transparent. The |memoryword| file gives a formal definition + of |memoryword| and its subsidiary types, using packed variant records. \TEX\ makes no + assumptions about the relative positions of the fields within a word. + + We are assuming 32-bit integers, a halfword must contain at least 32 bits, and a quarterword + must contain at least 16 bits. + + The present implementation tries to accommodate as many variations as possible, so it makes few + assumptions. If integers having the subrange |min_quarterword .. max_quarterword| can be packed + into a quarterword, and if integers having the subrange |min_halfword .. max_halfword| can be + packed into a halfword, everything should work satisfactorily. + + It is usually most efficient to have |min_quarterword = min_halfword = 0|, so one should try to + achieve this unless it causes a severe problem. The values defined here are recommended for most + 32-bit computers. + + We cannot use the full range of 32 bits in a halfword, because we have to allow negative values + for potential backend tricks like web2c's dynamic allocation, and parshapes pointers have to be + able to store at least twice the value |max_halfword| (see below). Therefore, |max_halfword| is + $2^{30}-1$ + + Via the intermediate step if \WEBC\ we went from \PASCAL\ to \CCODE. As in the meantime we also + live in a 64 bit world the above model has been adapted a bit but the principles and names remain. + + A |halfword| is a 32 bit integer and a |quarterword| a 16 bit one. The |scaled| type is used for + scaled integers but it's just another name for |halfword| or |int|. The code sometimes uses an + |int| instead of |scaled| or |halfword| (which might get fixed). By using the old type names we + sort of get an indication what we're dealing with. + + If we even bump scaled to 64 bit we need to redo some code that now assumes that a scaled and + halfword are the same size (as in values). Instead we can then decide to go 64 bit for both. + + The |internal_font_number| type is now also a |halfword| so it's no longer used as such. + + We now use 64 memory words split into whatever pieces we need. This also means that we can use + a double as glueratio which us saves some casting. + + In principle we can widen up the engine to use long instead of int because it is relatively easy + to adapt the nodes but it will take much more memory and we gain nothing. I might (re)introduce + the pointer as type instead of halfword just for clarity but the mixed usage doesn't really make + ot better. It's more about perception. I will do that when I have reason to check some code and + are in edit mode. + +*/ + +typedef int strnumber; +typedef int halfword; +typedef unsigned short quarterword; /*tex It really is an unsigned one! But \MPLIB| had it signed. */ +typedef unsigned char singleword; +typedef int scaled; +typedef double glueratio; /*tex This looks better in our (tex specific) syntax highlighting. */ +typedef int pointer; /*tex Maybe I'll replace halfwords that act as pointer some day. */ +typedef FILE *dumpstream; + +/* glueratio glue_ratio; */ /*tex one-word representation of a glue expansion factor */ +/* unsigned char glue_ord; */ /*tex infinity to the 0, 1, 2, 3, or 4 power */ +/* unsigned short group_code; */ /*tex |save_level| for a level boundary */ + +/*tex + + The documentation refers to pointers and halfwords and scaled and all are in fact just integers. + Okay, one can wonder about negative pointers but we never reach the limits so we're okay wrr + wraparound. At some point we might just replace all by int as some of the helpers already do + that. For now we keep halfword and scaled but we removed (the not so often used) pointers + because they were already mixed with halfwords in similar usage. + + So, again we use constants that reflect the original naming and also the related comments. + + Here are some more constants. Others definitions can be font alongside where they make most + sense. For instance, these are used all over the place: |null|, |normal|, etc. However, over + time, with all these extensions it was not used consistently. So, I replaced the usage of + |normal| by more explicit identifiers, also because we have more subtypes in this engine. But + we kept most constants (but most in enums)! + + Characters of text that have been converted to \TEX's internal form are said to be of type + |unsigned char|, which is a subrange of the integers. We are assuming that our runtime system + is able to read and write \UTF-8. + + If constants in this file change, one also must change the format identifier! + +*/ + +typedef struct scaledwhd { + scaled wd; + scaled ht; + scaled dp; + scaled ic; /* padding anyway */ +} scaledwhd; + +extern halfword tex_badness( + scaled t, + scaled s +); + +/*tex + We could use the 4 leftmost bits in tokens for [protected frozen tolerant permanent] flags but + it would mean way more shifting and checking so we don't to that. However, we already use + one nibble for the cstokenflag: 0x1FFFFFFF so we actually have no room. We also have a signed + unsigned issue because halfwords are integers so quite a bit needs to be adapted if we use all + 32 bits. We have between 128 and 256 cmd codes so we need one byte for that. We also have to + deal with the max utf / unicode values. +*/ + +# define cs_offset_bits 21 +# define cs_offset_value 0x00200000 // ((1 << STRING_OFFSET_BITS) - 1) +# define cs_offset_max 0x001FFFFF +# define cs_token_flag 0x1FFFFFFF + +# define max_cardinal 0xFFFFFFFF +# define min_cardinal 0 +# define max_integer 0x7FFFFFFF /*tex aka |infinity| */ +# define min_integer -0x7FFFFFFF /*tex aka |min_infinity| */ +# define max_dimen 0x3FFFFFFF +# define min_dimen -0x3FFFFFFF +# define min_data_value 0 +# define max_data_value cs_offset_max +# define max_half_value 32767 /*tex For instance sf codes.*/ + +# define one_bp 65781 + +# define infinity 017777777777 /*tex the largest positive value that \TEX\ knows */ +# define min_infinity -0x7FFFFFFF +# define awful_bad 07777777777 /*tex more than a billion demerits |0x3FFFFFFF| */ +# define infinite_bad 10000 /*tex infinitely bad value */ +# define infinite_penalty infinite_bad /*tex infinite penalty value */ +# define eject_penalty -infinite_penalty /*tex negatively infinite penalty value */ +# define deplorable 100000 /*tex more than |inf_bad|, but less than |awful_bad| */ +# define large_width_excess 7230584 +# define small_stretchability 1663497 +# define loose_criterium 99 +# define semi_loose_criterium 12 /* same as |decent_criterium| */ +# define decent_criterium 12 +# define semi_tight_criterium 12 /* same as |decent_criterium| */ + +# define default_rule 26214 /*tex 0.4pt */ +# define ignore_depth -65536000 /*tex The magic dimension value to mean \quote {ignore me}. */ + +# define min_quarterword 0 /*tex The smallest allowable value in a |quarterword|. */ +# define max_quarterword 65535 /*tex The largest allowable value in a |quarterword|. */ + +# define min_halfword -0x3FFFFFFF /*tex The smallest allowable value in a |halfword|. */ +# define max_halfword 0x3FFFFFFF /*tex The largest allowable value in a |halfword|. */ + +# define null_flag -0x40000000 +# define zero_glue 0 +# define unity 0200000 /*tex $2^{16}$, represents 1.00000 */ +# define two 0400000 /*tex $2^{17}$, represents 2.00000 */ +# define null 0 +# define null_font 0 + +# define unused_attribute_value -0x7FFFFFFF /*tex as low as it goes */ +# define unused_state_value 0 /*tex 0 .. 0xFFFF */ +# define unused_script_value 0 /*tex 0 .. 0xFFFF */ +# define unused_scale_value 1000 + +# define unused_math_style 0xFF +# define unused_math_family 0xFF + +# define preset_rule_thickness 010000000000 /*tex denotes |unset_rule_thickness|: |0x40000000|. */ + +# define max_char_code 15 /*tex largest catcode for individual characters */ +# define min_space_factor 0 /*tex watch out: |\spacefactor| cannot be zero but the sf code can!*/ +# define max_space_factor 077777 +# define default_space_factor 1000 +# define default_tolerance 10000 +# define default_hangafter 1 +# define default_deadcycles 25 +# define default_pre_display_gap 2000 +# define default_eqno_gap_step 1000 + +# define default_output_box 255 + +/*tex + For practical reasons all these registers were max'd to 64K but that really makes no sense for + e.g. glue and mu glue and even attributes. Imagine using more than 8K attributes: we get long + linked lists, slow lookup, lots of copying, need plenty node memory. These large ranges also + demand more memory as we need these eqtb entries. So, when I was pondering specific ex and em + glue (behaving like mu glue in math) I realized that we can do that at no cost at all: we just + make some register ranges smaller. Keep in mind that we already have cheap integer, dimension, + and glue shortcuts that can be used instead of registers for storing constant values. + + large : 7 * 64 = 448 3.584 Kb + medium : 4 * 64 + 2 * 32 + 1 * 16 = 336 2.688 Kb + small : 4 * 32 + 3 * 8 = 152 1.216 Kb + + The memory saving is not that large but keep in mind that we have these huge eqtb arrays and + registers are accessed frequently so the more we have in the CPU cache the better. (We already + use less than in \LUATEX\ because we got rid of some parallel array so there it would have more + impact). + +*/ + +# if 1 + + # define max_toks_register_index 0xFFFF /* 0xFFFF 0xFFFF 0x7FFF */ /* 64 64 32 */ + # define max_box_register_index 0xFFFF /* 0xFFFF 0xFFFF 0x7FFF */ /* 64 64 32 */ + # define max_int_register_index 0xFFFF /* 0xFFFF 0xFFFF 0x7FFF */ /* 64 64 32 */ + # define max_dimen_register_index 0xFFFF /* 0xFFFF 0xFFFF 0x7FFF */ /* 64 64 32 */ + # define max_attribute_register_index 0xFFFF /* 0xFFFF 0x7FFF 0x1FFF */ /* 64 32 8 */ + # define max_glue_register_index 0xFFFF /* 0xFFFF 0x7FFF 0x3FFF */ /* 64 32 8 */ + # define max_mu_glue_register_index 0xFFFF /* 0xFFFF 0x3FFF 0x1FFF */ /* 64 16 8 */ + # define max_em_glue_register_index 0xFFFF /* 0xFFFF 0x3FFF 0x1FFF */ /* 64 16 8 */ + # define max_ex_glue_register_index 0xFFFF /* 0xFFFF 0x3FFF 0x1FFF */ /* 64 16 8 */ + +# else + + # define max_toks_register_index 0x7FFF + # define max_box_register_index 0x7FFF + # define max_int_register_index 0x7FFF + # define max_dimen_register_index 0x7FFF + # define max_attribute_register_index 0x1FFF + # define max_glue_register_index 0x3FFF + # define max_mu_glue_register_index 0x1FFF + # define max_em_glue_register_index 0x1FFF + # define max_ex_glue_register_index 0x1FFF + +# endif + +# define max_n_of_toks_registers (max_toks_register_index + 1) +# define max_n_of_box_registers (max_box_register_index + 1) +# define max_n_of_int_registers (max_int_register_index + 1) +# define max_n_of_dimen_registers (max_dimen_register_index + 1) +# define max_n_of_attribute_registers (max_attribute_register_index + 1) +# define max_n_of_glue_registers (max_glue_register_index + 1) +# define max_n_of_mu_glue_registers (max_mu_glue_register_index + 1) +# define max_n_of_em_glue_registers (max_em_glue_register_index + 1) +# define max_n_of_ex_glue_registers (max_ex_glue_register_index + 1) + +# define max_n_of_bytecodes 65536 /* dynamic */ +# define max_n_of_math_families 64 +# define max_n_of_math_classes 64 +# define max_n_of_catcode_tables 256 +# define max_n_of_box_indices max_halfword + +# define max_character_code 0x10FFFF /*tex 1114111, the largest allowed character number; must be |< max_halfword| */ +//define max_math_character_code 0x0FFFFF /*tex 1048575, for now this is plenty, otherwise we need to store differently */ +# define max_math_character_code max_character_code /*tex part gets clipped when we convert to a number */ +# define max_function_reference cs_offset_max +# define min_iterator_value -0xFFFFF /* When we decide to generalize it might become 0xFFFF0 with */ +# define max_iterator_value 0xFFFFF /* 0x0000F being a classifier so that we save cmd's */ +# define max_category_code 15 +# define max_newline_character 127 /*tex Th is is an old constraint but there is no reason to change it. */ +# define max_box_axis 255 +# define max_size_of_word 1024 /*tex More than enough (esp. since this can end up on the stack. */ +# define min_limited_scale 0 /*tex Zero is a signal too. */ +# define max_limited_scale 1000 + +# define max_mark_index (max_n_of_marks - 1) +# define max_insert_index (max_n_of_inserts - 1) +# define max_box_index (max_n_of_box_indices - 1) +# define max_bytecode_index (max_n_of_bytecodes - 1) +# define max_math_family_index (max_n_of_math_families - 1) +# define max_math_class_code (max_n_of_math_classes - 1) +# define max_math_property 0xFFFF +# define max_math_group 0xFFFF +# define max_math_index max_character_code +# define max_math_discretionary 0xFF + +# define ascii_space 32 +# define ascii_max 127 + +/*tex + + This is very math specific: we used to pack info into an unsigned 32 bit integer: class, family + and character. We now use node for that (which also opend up the possibility to store more + info) but in case of a zero family we can also decide to use the older method of packing packing + a number: |FF+10FFFF| but the gain (at least on \CONTEXT) is litle: around 10K so here we only + mention it as consideration. We can consider anyway to omit the class part when we need a + numeric representation, although we don't really need (or like) that kind of abuse. + +*/ + +# define math_class_bits 6 +# define math_family_bits 6 +# define math_character_bits 20 + +# define math_class_part(a) ((a >> 26) & 0x3F) +# define math_family_part(a) ((a >> 20) & 0x3F) +# define math_character_part(a) (a & 0xFFFFF) + +# define math_old_class_part(a) ((a >> 12) & 0x0F) +# define math_old_family_part(a) ((a >> 8) & 0x0F) +# define math_old_character_part(a) (a & 0xFF) + +# define math_old_class_mask(a) (a & 0x0F) +# define math_old_family_mask(a) (a & 0x0F) +# define math_old_character_mask(a) (a & 0xFF) + +# define math_packed_character(c,f,v) (((c & 0x3F) << 26) + ((f & 0x3F) << 20) + (v & 0xFFFFF)) +# define math_old_packed_character(c,f,v) (((c & 0x0F) << 12) + ((f & 0x0F) << 8) + (v & 0x000FF)) + +# define rule_font_fam_offset 0xFFFFFF + +/*tex We put these here for consistency: */ + +# define too_big_char (max_character_code + 1) /*tex 1114112, |biggest_char + 1| */ +# define special_char (max_character_code + 2) /*tex 1114113, |biggest_char + 2| */ +# define number_chars (max_character_code + 3) /*tex 1114114, |biggest_char + 3| */ + +/*tex + + As mentioned, because we're now in \CCODE\ we use a bit simplified memory mode. We don't do any + byte swapping related to endian properties as we don't share formats between architectures + anyway. A memory word is 64 bits and interpreted in several ways. So the memoryword is a bit + different. We also use the opportunity to squeeze eight characters into the word. + + halfword : 32 bit integer (2) + quarterword : 16 bit integer (4) + singlechar : 8 bit unsigned char (8) + int : 32 bit integer (2) + glue : 64 bit double (1) + + The names below still reflect the original \TEX\ names but we have simplified the model a bit. + Watch out: we still make |B0| and |B1| overlap |LH| which for instance is needed when a we + store the size of a node in the type and subtype field. The same is true for the overlapping + |CINT|s! Don't change this without also checking the macros elsewhere. + + \starttyping + typedef union memoryword { + struct { + halfword H0, H1; + } h; + struct { + quarterword B0, B1, B2, B3; + } q; + struct { + unsigned char C0, C1, C2, C3, C4, C5, C6, C7; + } s; + struct { + glueratio GLUE; + } g; + } memoryword; + \stoptyping + + The dual 32 bit model suits tokens well and for nodes is only needed because we store a double but + when we'd store a 32 bit float instead (which is cf tex) we could use a smaller single 32 bit word. + + On the other hand. it might even make sense for nodes to move to a quad 32 bit variant because it + makes smaller node identifiers which might remove some limits. But as many nodes have an odd size + we will waste more memory. Of course for nodes we can at some point decide to go full dynamic and + use a pointer table but then we need to abstract the embedded subnodes (in disc and insert) first. + + It is a bit tricky if we want to use a [8][8][16][32], [16][16][32] of similar mixing because of + endiannes, which is why we use a more stepwise definition of memoryword. This mixed scheme permits + packing more data in anode. + +*/ + +// typedef union memoryword { +// halfword H[2]; /* 2 * 32 bit */ +// unsigned int U[2]; +// quarterword Q[4]; /* 4 * 16 bit */ +// unsigned char C[8]; /* 8 * 8 bit */ +// glueratio GLUE; /* 1 * 64 bit */ +// long long L; +// double D; +// void *P; /* 1 * 64 bit or 32 bit */ +// } memoryword; + +typedef union memorysplit { + quarterword Q; + singleword S[2]; +} memorysplit; + +typedef union memoryalias { + halfword H; + unsigned int U; + /* quarterword Q[2]; */ + /* singleword S[4]; */ + memorysplit X[2]; +} memoryalias; + +typedef union memoryword { + /* halfword H[2]; */ + /* unsigned int U[2]; */ + /* quarterword Q[4]; */ + memoryalias A[2]; + unsigned char C[8]; + glueratio GLUE; + long long L; + double D; + void *P; +} memoryword; + +/*tex + + These symbolic names will be used in the definitions of tokens and nodes, the core data + structures of the \TEX\ machinery. In some cases halfs and quarters overlap. + +*/ + +# define half0 A[0].H +# define half1 A[1].H + +# define hulf0 A[0].U +# define hulf1 A[1].U + +// # define quart00 A[0].Q[0] +// # define quart01 A[0].Q[1] +// # define quart10 A[1].Q[0] +// # define quart11 A[1].Q[1] + +# define quart00 A[0].X[0].Q +# define quart01 A[0].X[1].Q +# define quart10 A[1].X[0].Q +# define quart11 A[1].X[1].Q + +// # define single00 A[0].S[0] +// # define single01 A[0].S[1] +// # define single02 A[0].S[2] +// # define single03 A[0].S[3] +// # define single10 A[1].S[0] +// # define single11 A[1].S[1] +// # define single12 A[1].S[2] +// # define single13 A[1].S[3] + +# define single00 A[0].X[0].S[0] +# define single01 A[0].X[0].S[1] +# define single02 A[0].X[1].S[0] +# define single03 A[0].X[1].S[1] +# define single10 A[1].X[0].S[0] +# define single11 A[1].X[0].S[1] +# define single12 A[1].X[1].S[0] +# define single13 A[1].X[1].S[1] + +# define glue0 GLUE +# define long0 L +# define double0 D + +/*tex + + We're coming from \PASCAL\ which has a boolean type, while in \CCODE\ an |int| is used. However, + as we often have callbacks and and a connection with the \LUA\ end using |boolean|, |true| and + |false| is often somewhat inconstent. For that reason we now use |int| instead. It also prevents + interference with a different definition of |boolean|, something that we can into a few times in + the past with external code. + + There were not that many explicit booleans used anyway so better be consistent in using integers + than have an inconsistent mix. + +*/ + +/*tex + + The following parameters can be changed at compile time to extend or reduce \TEX's capacity. + They may have different values in |INITEX| and in production versions of \TEX. Some values can + be adapted at runtime. We start with those that influence memory management. Anyhow, some day + I will collect some statistics from runs and come up with (probably) lower defaults. + +*/ + +/*tex These do a stepwise allocation. */ + +/*tex The buffer is way too large ... only lines ... we could start out smaller */ + +/*define magic_maximum 2097151 */ /* (max string) Because we step 500K we will always be below this. */ +//define magic_maximum 2000000 /* Looks nicer and we never need the real maximum anyway. */ +# define magic_maximum cs_offset_value /* Looks nicer and we never need the real maximum anyway. */ + +# define max_hash_size magic_maximum /* This is one of these magic numbers. */ +# define min_hash_size 150000 /* A reasonable default. */ +# define siz_hash_size 250000 +# define stp_hash_size 100000 /* Often we have enough. */ + +# define max_pool_size magic_maximum /* stringsize ! */ +# define min_pool_size 150000 +# define siz_pool_size 500000 +# define stp_pool_size 100000 + +# define max_body_size 100000000 /* poolsize */ +# define min_body_size 10000000 +# define siz_body_size 20000000 +# define stp_body_size 1000000 + +# define max_node_size 100000000 /* Currently these are the memory words! */ +//define siz_node_size 5000000 +# define siz_node_size 25000000 +# define min_node_size 2000000 /* Currently these are the memory words! */ +# define stp_node_size 500000 /* Currently these are the memory words! */ + +# define max_token_size 10000000 /* If needed we can go much larger. */ +# define siz_token_size 10000000 +# define min_token_size 1000000 /* The original 10000 is a bit cheap. */ +# define stp_token_size 250000 + +# define max_buffer_size 100000000 /* Let's be generous */ +# define siz_buffer_size 10000000 +# define min_buffer_size 1000000 /* We often need quite a bit. */ +# define stp_buffer_size 1000000 /* We use this step when we increase the table. */ + +# define max_nest_size 10000 /* The table will grow dynamically but the file system might have limitations. */ +# define min_nest_size 1000 /* Quite a bit more that the old default 50. */ +# define siz_nest_size 10000 /* Quite a bit more that the old default 50. */ +# define stp_nest_size 1000 /* We use this step when we increase the table. */ + +# define max_in_open 2000 /* The table will grow dynamically but the file system might have limitations. */ +# define min_in_open 500 /* This used to be 100, but who knows what users load. */ +# define siz_in_open 2000 /* This used to be 100, but who knows what users load. */ +# define stp_in_open 250 /* We use this step when we increase the table. */ + +# define max_parameter_size 100000 /* This should be plenty and if not there probably is an issue in the macro package. */ +# define min_parameter_size 20000 /* The original value of 60 is definitely not enough when we nest macro calls. */ +# define siz_parameter_size 100000 /* The original value of 60 is definitely not enough when we nest macro calls. */ +# define stp_parameter_size 10000 /* We use this step when we increase the table. */ + +# define max_save_size 500000 /* The table will grow dynamically. */ +# define min_save_size 100000 /* The original value was 5000, which is not that large for todays usage. */ +# define siz_save_size 500000 /* The original value was 5000, which is not that large for todays usage. */ +# define stp_save_size 10000 /* We use this step when we increase the table. */ + +# define max_stack_size 100000 /* The table will grow dynamically. */ +# define min_stack_size 10000 /* The original value was 500, okay long ago, but not now. */ +# define siz_stack_size 100000 /* The original value was 500, okay long ago, but not now. */ +# define stp_stack_size 10000 /* We use this step when we increase the table. */ + +# define max_mark_size 10000 /*tex The 64K was rediculous (5 64K arrays of halfword). */ +# define min_mark_size 50 +# define stp_mark_size 50 + +# define max_insert_size 500 +# define min_insert_size 10 +# define stp_insert_size 10 + +# define max_font_size 100000 /* We're now no longer hooked into the eqtb (saved 500+ K in the format too). */ +# define min_font_size 250 +# define stp_font_size 250 + +# define max_language_size 10000 /* We could bump this (as we merged the hj codes) but it makes no sense. */ +# define min_language_size 250 +# define stp_language_size 250 + +/*tex + + These are used in the code, so when we want them to adapt, which is needed when we make them + configurable, we need to change this. + +*/ + +# define max_n_of_marks max_mark_size +# define max_n_of_inserts max_insert_size +# define max_n_of_fonts max_font_size +# define max_n_of_languages max_language_size + +/*tex + + The following settings are not related to memory management. Some day I will probably change + the error half stuff. There is already an indent related frozen setting here. + +*/ + +# define max_expand_depth 1000000 /* Just a number, no allocation. */ +# define min_expand_depth 10000 + +# define max_error_line 255 /* This also determines size of a (static) array */ +# define min_error_line 132 /* Good old \TEX\ uses a value of 79. */ + +# define max_half_error_line 255 +# define min_half_error_line 80 /* Good old \TEX\ uses a value of 50. */ + +# define memory_data_unset -1 + +typedef struct memory_data { + int ptr; /* the current pointer */ + int top; /* the maximum used pointer */ + int size; /* the used (optionally user asked) value */ + int allocated; /* the currently allocated amount */ + int step; /* the step used for growing */ + int minimum; /* the default mininum allocated, also the step */ + int maximum; /* the maximum possible */ + int itemsize; /* the itemsize */ + int initial; + int offset; /* offset of ptr and top */ +} memory_data; + +typedef struct limits_data { + int size; /* the used (optionally user asked) value */ + int minimum; /* the default mininum allocated */ + int maximum; /* the maximum possible */ + int top; /* the maximum used */ +} limits_data; + +extern void tex_dump_constants (dumpstream f); +extern void tex_undump_constants (dumpstream f); + +/*tex + +This is an experimental feature, different approaces to the main command dispatcher: + +\starttabulate[|l|l|l|l|l|l] +\BC n \BC method \BC [vhm]mode \BC binary \BC manual \BC comment \NC \NR +\ML +\NC 0 \NC jump table \NC cmd offsets \NC 2.691.584 \NC 10.719 \NC original method, selector: (cmd + mode) \NC \NR +\NC 1 \NC case with modes \NC sequential \NC 2.697.216 \NC 10.638 \NC nicer modes, we can delegate more to runners \NC \NR +\NC 2 \NC flat case \NC cmd offsets \NC 2.695.168 \NC 10.562 \NC variant on original \NC \NR +\stoptabulate + +The second method can be codes differently where we can delegate more to runners (that then can get +called with a mode argument). Maybe for a next iteration. Concerning performance: the differences +can be neglected (no differences on the test suite) because the bottleneck in \CONTEXT\ is at the +\LUA\ end. + +I occasionally test the variants. The last test showed that mode 1 gives a bit larger binary. There +is no real difference in performance. + +*/ + +# define main_control_mode 1 + +/*tex For the moment here. */ + +typedef struct line_break_properties { + halfword initial_par; + halfword display_math; + halfword tracing_paragraphs; + halfword paragraph_dir; + halfword parfill_left_skip; + halfword parfill_right_skip; + halfword parinit_left_skip; + halfword parinit_right_skip; + halfword par_left_skip; + halfword par_right_skip; + halfword pretolerance; + halfword tolerance; + halfword emergency_stretch; + halfword looseness; + halfword adjust_spacing; + halfword protrude_chars; + halfword adj_demerits; + halfword line_penalty; + halfword last_line_fit; + halfword double_hyphen_demerits; + halfword final_hyphen_demerits; + scaled hsize; + halfword left_skip; + halfword right_skip; + scaled hang_indent; + halfword hang_after; + halfword par_shape; + halfword inter_line_penalty; + halfword inter_line_penalties; + halfword club_penalty; + halfword club_penalties; + halfword widow_penalty; + halfword widow_penalties; + halfword display_widow_penalty; + halfword display_widow_penalties; + halfword orphan_penalty; + halfword orphan_penalties; + halfword broken_penalty; + halfword baseline_skip; + halfword line_skip; + halfword line_skip_limit; + halfword adjust_spacing_step; + halfword adjust_spacing_shrink; + halfword adjust_spacing_stretch; + halfword hyphenation_mode; + halfword shaping_penalties_mode; + halfword shaping_penalty; + halfword padding; +} line_break_properties; + +# endif + |