summaryrefslogtreecommitdiff
path: root/source/luametatex/source/tex/texexpand.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/tex/texexpand.c')
-rw-r--r--source/luametatex/source/tex/texexpand.c1411
1 files changed, 1411 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/texexpand.c b/source/luametatex/source/tex/texexpand.c
new file mode 100644
index 000000000..25dcccdf3
--- /dev/null
+++ b/source/luametatex/source/tex/texexpand.c
@@ -0,0 +1,1411 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "luametatex.h"
+
+/*tex
+
+ Only a dozen or so command codes |> max_command| can possibly be returned by |get_next|; in
+ increasing order, they are |undefined_cs|, |expand_after|, |no_expand|, |input|, |if_test|,
+ |fi_or_else|, |cs_name|, |convert|, |the|, |get_mark|, |call|, |long_call|, |outer_call|,
+ |long_outer_call|, and |end_template|.
+
+ Sometimes, recursive calls to the following |expand| routine may cause exhaustion of the
+ run-time calling stack, resulting in forced execution stops by the operating system. To
+ diminish the chance of this happening, a counter is used to keep track of the recursion depth,
+ in conjunction with a constant called |expand_depth|.
+
+ Note that this does not catch all possible infinite recursion loops, just the ones that
+ exhaust the application calling stack. The actual maximum value of |expand_depth| is outside
+ of our control, but the initial setting of |100| should be enough to prevent problems.
+
+*/
+
+expand_state_info lmt_expand_state = {
+ .limits = {
+ .minimum = min_expand_depth,
+ .maximum = max_expand_depth,
+ .size = min_expand_depth,
+ .top = 0,
+ },
+ .depth = 0,
+ .cs_name_level = 0,
+ .arguments = 0,
+ .match_token_head = null,
+ .padding = 0,
+};
+
+ static void tex_aux_macro_call (halfword cs, halfword cmd, halfword chr);
+inline static void tex_aux_manufacture_csname (void);
+inline static void tex_aux_manufacture_csname_use (void);
+inline static void tex_aux_manufacture_csname_future (void);
+inline static void tex_aux_inject_last_tested_cs (void);
+
+/*tex
+
+ We no longer store |match_token_head| in the format file. It is a bit cleaner to just
+ initialize them. So we free them.
+
+*/
+
+void tex_initialize_expansion(void)
+{
+ lmt_expand_state.match_token_head = tex_get_available_token(null);
+}
+
+void tex_cleanup_expansion(void)
+{
+ tex_put_available_token(lmt_expand_state.match_token_head);
+}
+
+halfword tex_expand_match_token_head(void)
+{
+ return lmt_expand_state.match_token_head;
+}
+
+/*tex
+
+ The |expand| subroutine is used when |cur_cmd > max_command|. It removes a \quote {call} or a
+ conditional or one of the other special operations just listed. It follows that |expand| might
+ invoke itself recursively. In all cases, |expand| destroys the current token, but it sets things
+ up so that the next |get_next| will deliver the appropriate next token. The value of |cur_tok|
+ need not be known when |expand| is called.
+
+ Since several of the basic scanning routines communicate via global variables, their values are
+ saved as local variables of |expand| so that recursive calls don't invalidate them.
+
+*/
+
+inline static void tex_aux_expand_after(void)
+{
+ /*tex
+ Expand the token after the next token. It takes only a little shuffling to do what \TEX\
+ calls |\expandafter|.
+ */
+ halfword t1 = tex_get_token();
+ halfword t2 = tex_get_token();
+ if (cur_cmd > max_command_cmd) {
+ tex_expand_current_token();
+ } else {
+ tex_back_input(t2);
+ }
+ tex_back_input(t1);
+}
+
+inline static void tex_aux_expand_toks_after(void)
+{
+ halfword t1 = tex_scan_toks_normal(0, NULL);
+ halfword t2 = tex_get_token();
+ if (cur_cmd > max_command_cmd) {
+ tex_expand_current_token();
+ } else {
+ tex_back_input(t2);
+ }
+ tex_begin_backed_up_list(token_link(t1));
+ tex_put_available_token(t1);
+}
+
+/*tex
+ Here we deal with stuff not in the big switch. Where that is discussed there is mentioning of
+ it all being a bit messy, also due to the fact that that switch (or actually a lookup table)
+ also uses the mode for determining what to do. We see no reason to change this model.
+*/
+
+void tex_expand_current_token(void)
+{
+ ++lmt_expand_state.depth;
+ if (lmt_expand_state.depth > lmt_expand_state.limits.top) {
+ if (lmt_expand_state.depth >= lmt_expand_state.limits.size) {
+ tex_overflow_error("expansion depth", lmt_expand_state.limits.size);
+ } else {
+ lmt_expand_state.limits.top += 1;
+ }
+ }
+ /*tex We're okay. */
+ {
+ halfword saved_cur_val = cur_val;
+ halfword saved_cur_val_level = cur_val_level;
+ // halfword saved_head = token_link(token_data.backup_head);
+ if (cur_cmd < first_call_cmd) {
+ /*tex Expand a nonmacro. */
+ if (tracing_commands_par > 1) {
+ tex_show_cmd_chr(cur_cmd, cur_chr);
+ }
+ switch (cur_cmd) {
+ case expand_after_cmd:
+ {
+ int mode = cur_chr;
+ switch (mode) {
+ case expand_after_code:
+ tex_aux_expand_after();
+ break;
+ /*
+ case expand_after_3_code:
+ tex_aux_expand_after();
+ // fall-through
+ case expand_after_2_code:
+ tex_aux_expand_after();
+ tex_aux_expand_after();
+ break;
+ */
+ case expand_unless_code:
+ tex_conditional_unless();
+ break;
+ case future_expand_code:
+ /*tex
+ This is an experiment: |\futureexpand| (2) which takes |\check \yes
+ \nop| as arguments. It's not faster, but gives less tracing noise
+ than a macro. The variant |\futureexpandis| (3) alternative doesn't
+ inject the gobbles space(s).
+ */
+ tex_get_token();
+ {
+ halfword spa = null;
+ halfword chr = cur_chr;
+ halfword cmd = cur_cmd;
+ halfword yes = tex_get_token(); /* when match */
+ halfword nop = tex_get_token(); /* when no match */
+ while (1) {
+ halfword t = tex_get_token();
+ if (cur_cmd == spacer_cmd) {
+ spa = t;
+ } else {
+ tex_back_input(t);
+ break;
+ }
+ }
+ /*tex The value 1 means: same input level. */
+ if (cur_cmd == cmd && cur_chr == chr) {
+ tex_reinsert_token(yes);
+ } else {
+ if (spa) {
+ tex_reinsert_token(space_token);
+ }
+ tex_reinsert_token(nop);
+ }
+ }
+ break;
+ case future_expand_is_code:
+ tex_get_token();
+ {
+ halfword chr = cur_chr;
+ halfword cmd = cur_cmd;
+ halfword yes = tex_get_token(); /* when match */
+ halfword nop = tex_get_token(); /* when no match */
+ while (1) {
+ halfword t = tex_get_token();
+ if (cur_cmd != spacer_cmd) {
+ tex_back_input(t);
+ break;
+ }
+ }
+ tex_reinsert_token((cur_cmd == cmd && cur_chr == chr) ? yes : nop);
+ }
+ break;
+ case future_expand_is_ap_code:
+ tex_get_token();
+ {
+ halfword chr = cur_chr;
+ halfword cmd = cur_cmd;
+ halfword yes = tex_get_token(); /* when match */
+ halfword nop = tex_get_token(); /* when no match */
+ while (1) {
+ halfword t = tex_get_token();
+ if (cur_cmd != spacer_cmd && cur_cmd != end_paragraph_cmd) {
+ tex_back_input(t);
+ break;
+ }
+ }
+ /*tex We stay at the same input level. */
+ tex_reinsert_token((cur_cmd == cmd && cur_chr == chr) ? yes : nop);
+ }
+ break;
+ case expand_after_spaces_code:
+ {
+ /* maybe two variants: after_spaces and after_par like in the ignores */
+ halfword t1 = tex_get_token();
+ while (1) {
+ halfword t2 = tex_get_token();
+ if (cur_cmd != spacer_cmd) {
+ tex_back_input(t2);
+ break;
+ }
+ }
+ tex_reinsert_token(t1);
+ break;
+ }
+ case expand_after_pars_code:
+ {
+ halfword t1 = tex_get_token();
+ while (1) {
+ halfword t2 = tex_get_token();
+ if (cur_cmd != spacer_cmd && cur_cmd != end_paragraph_cmd) {
+ tex_back_input(t2);
+ break;
+ }
+ }
+ tex_reinsert_token(t1);
+ break;
+ }
+ case expand_token_code:
+ {
+ /* we can share code with lmtokenlib .. todo */
+ halfword cat = tex_scan_category_code();
+ halfword chr = tex_scan_char_number(0);
+ /* too fragile:
+ halfword tok = null;
+ switch (cat) {
+ case letter_cmd:
+ case other_char_cmd:
+ case ignore_cmd:
+ case spacer_cmd:
+ tok = token_val(cat, chr);
+ break;
+ case active_char_cmd:
+ {
+ halfword cs = tex_active_to_cs(chr, ! lmt_hash_state.no_new_cs);
+ if (cs) {
+ chr = eq_value(cs);
+ tok = cs_token_flag + cs;
+ break;
+ }
+ }
+ default:
+ tok = token_val(other_char_cmd, chr);
+ break;
+ }
+ */
+ switch (cat) {
+ case letter_cmd:
+ case other_char_cmd:
+ case ignore_cmd:
+ case spacer_cmd:
+ break;
+ default:
+ cat = other_char_cmd;
+ break;
+ }
+ tex_back_input(token_val(cat, chr));
+ break;
+ }
+ case expand_cs_token_code:
+ {
+ tex_get_token();
+ if (cur_tok >= cs_token_flag) {
+ halfword cmd = eq_type(cur_cs);
+ switch (cmd) {
+ case left_brace_cmd:
+ case right_brace_cmd:
+ case math_shift_cmd:
+ case alignment_tab_cmd:
+ case superscript_cmd:
+ case subscript_cmd:
+ case spacer_cmd:
+ case letter_cmd:
+ case other_char_cmd:
+ cur_tok = token_val(cmd, eq_value(cur_cs));
+ break;
+ }
+ }
+ tex_back_input(cur_tok);
+ break;
+ }
+ case expand_code:
+ {
+ tex_get_token();
+ if (cur_cmd >= first_call_cmd && cur_cmd <= last_call_cmd) {
+ tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
+ } else {
+ /* Use expand_current_token so that protected lua call are dealt with too? */
+ tex_back_input(cur_tok);
+ }
+ break;
+ }
+ case semi_expand_code:
+ {
+ tex_get_token();
+ if (is_semi_protected_cmd(cur_cmd)) {
+ tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
+ } else {
+ /* Use expand_current_token so that protected lua call are dealt with too? */
+ tex_back_input(cur_tok);
+ }
+ break;
+ }
+ case expand_after_toks_code:
+ {
+ tex_aux_expand_toks_after();
+ break;
+ }
+ /*
+ case expand_after_fi:
+ {
+ conditional_after_fi();
+ break;
+ }
+ */
+ }
+ }
+ break;
+ case cs_name_cmd:
+ /*tex Manufacture a control sequence name. */
+ switch (cur_chr) {
+ case cs_name_code:
+ tex_aux_manufacture_csname();
+ break;
+ case last_named_cs_code:
+ tex_aux_inject_last_tested_cs();
+ break;
+ case begin_cs_name_code:
+ tex_aux_manufacture_csname_use();
+ break;
+ case future_cs_name_code:
+ tex_aux_manufacture_csname_future();
+ break;
+ }
+ break;
+ case no_expand_cmd:
+ {
+ /*tex
+ Suppress expansion of the next token. The implementation of |\noexpand|
+ is a bit trickier, because it is necessary to insert a special
+ |dont_expand| marker into \TEX's reading mechanism. This special marker
+ is processed by |get_next|, but it does not slow down the inner loop.
+
+ Since |\outer| macros might arise here, we must also clear the
+ |scanner_status| temporarily.
+ */
+ halfword t;
+ halfword save_scanner_status = lmt_input_state.scanner_status;
+ lmt_input_state.scanner_status = scanner_is_normal;
+ t = tex_get_token();
+ lmt_input_state.scanner_status = save_scanner_status;
+ tex_back_input(t);
+ /*tex Now |start| and |loc| point to the backed-up token |t|. */
+ if (t >= cs_token_flag) {
+ halfword p = tex_get_available_token(deep_frozen_dont_expand_token);
+ set_token_link(p, lmt_input_state.cur_input.loc);
+ lmt_input_state.cur_input.start = p;
+ lmt_input_state.cur_input.loc = p;
+ }
+ }
+ break;
+ case if_test_cmd:
+ if (cur_chr < first_real_if_test_code) {
+ tex_conditional_fi_or_else();
+ } else if (cur_chr != if_condition_code) {
+ tex_conditional_if(cur_chr, 0);
+ } else {
+ /*tex The |\ifcondition| primitive is a no-op unless we're in skipping mode. */
+ }
+ break;
+ case the_cmd:
+ {
+ halfword h = tex_the_toks(cur_chr, NULL);
+ tex_begin_inserted_list(h);
+ break;
+ }
+ case lua_call_cmd:
+ if (cur_chr > 0) {
+ strnumber u = tex_save_cur_string();
+ lmt_token_state.luacstrings = 0;
+ lmt_function_call(cur_chr, 0);
+ tex_restore_cur_string(u);
+ if (lmt_token_state.luacstrings > 0) {
+ tex_lua_string_start();
+ }
+ } else {
+ tex_normal_error("luacall", "invalid number");
+ }
+ break;
+ case lua_local_call_cmd:
+ if (cur_chr > 0) {
+ lua_State *L = lmt_lua_state.lua_instance;
+ strnumber u = tex_save_cur_string();
+ lmt_token_state.luacstrings = 0;
+ /* todo: use a private table as we can overflow, unless we register early */
+ lua_rawgeti(L, LUA_REGISTRYINDEX, cur_chr);
+ if (lua_pcall(L, 0, 0, 0)) {
+ tex_formatted_warning("luacall", "local call error: %s", lua_tostring(L, -1));
+ } else {
+ tex_restore_cur_string(u);
+ if (lmt_token_state.luacstrings > 0) {
+ tex_lua_string_start();
+ }
+ }
+ } else {
+ tex_normal_error("luacall", "invalid number");
+ }
+ break;
+ case begin_local_cmd:
+ tex_begin_local_control();
+ break;
+ case convert_cmd:
+ tex_run_convert_tokens(cur_chr);
+ break;
+ case input_cmd:
+ /*tex Initiate or terminate input from a file */
+ switch (cur_chr) {
+ case normal_input_code:
+ if (lmt_fileio_state.name_in_progress) {
+ tex_insert_relax_and_cur_cs();
+ } else {
+ tex_start_input(tex_read_file_name(0, NULL, texinput_extension));
+ }
+ break;
+ case end_of_input_code:
+ lmt_token_state.force_eof = 1;
+ break;
+ case quit_loop_code:
+ lmt_main_control_state.quit_loop = 1;
+ break;
+ case token_input_code:
+ tex_tex_string_start(io_token_eof_input_code, cat_code_table_par);
+ break;
+ case tex_token_input_code:
+ tex_tex_string_start(io_token_input_code, cat_code_table_par);
+ break;
+ case tokenized_code:
+ case retokenized_code:
+ {
+ /*tex
+ This variant complements the other expandable primitives but
+ also supports an optional keyword, who knows when that comes in
+ handy; what goes in is detokenized anyway. For now it is an
+ undocumented feature. It is likely that there is a |cct| passed
+ so we don't need to optimize. If needed we can make a version
+ where this is mandate.
+ */
+ int cattable = (cur_chr == retokenized_code || tex_scan_optional_keyword("catcodetable")) ? tex_scan_int(0, NULL) : cat_code_table_par;
+ full_scanner_status saved_full_status = tex_save_full_scanner_status();
+ strnumber u = tex_save_cur_string();
+ halfword s = tex_scan_toks_expand(0, NULL, 0);
+ tex_unsave_full_scanner_status(saved_full_status);
+ if (token_link(s)) {
+ tex_begin_inserted_list(tex_wrapped_token_list(s));
+ tex_tex_string_start(io_token_input_code, cattable);
+ }
+ tex_put_available_token(s);
+ tex_restore_cur_string(u);
+ }
+ break;
+ default:
+ break;
+ }
+ break;
+ case get_mark_cmd:
+ {
+ /*tex Insert the appropriate mark text into the scanner. */
+ halfword num = 0;
+ halfword code = cur_chr;
+ switch (code) {
+ case top_marks_code:
+ case first_marks_code:
+ case bot_marks_code:
+ case split_first_marks_code:
+ case split_bot_marks_code:
+ case current_marks_code:
+ num = tex_scan_mark_number();
+ break;
+ }
+ if (tex_valid_mark(num)) {
+ halfword ptr = tex_get_some_mark(code, num);
+ if (ptr) {
+ tex_begin_token_list(ptr, mark_text);
+ }
+ }
+ break;
+ }
+ /*
+ case string_cmd:
+ {
+ halfword head = str_toks(str_lstring(cs_offset_value + cur_chr), NULL);
+ begin_inserted_list(head);
+ break;
+ }
+ */
+ default:
+ /* Maybe ... or maybe an option */
+ // if (lmt_expand_state.cs_name_level == 0) {
+ /*tex Complain about an undefined macro */
+ tex_handle_error(
+ normal_error_type,
+ "Undefined control sequence %m", cur_cs,
+ "The control sequence at the end of the top line of your error message was never\n"
+ "\\def'ed. You can just continue as I'll forget about whatever was undefined."
+ );
+ // }
+ break;
+ }
+ } else if (cur_cmd <= last_call_cmd) {
+ tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
+ } else {
+ /*tex
+ Insert a token containing |frozen_endv|. An |end_template| command is effectively
+ changed to an |endv| command by the following code. (The reason for this is discussed
+ below; the |frozen_end_template| at the end of the template has passed the
+ |check_outer_validity| test, so its mission of error detection has been accomplished.)
+ */
+ tex_back_input(deep_frozen_end_template_2_token);
+ }
+ cur_val = saved_cur_val;
+ cur_val_level = saved_cur_val_level;
+ // set_token_link(token_data.backup_head, saved_head);
+ }
+ --lmt_expand_state.depth;
+}
+
+static void tex_aux_complain_missing_csname(void)
+{
+ tex_handle_error(
+ back_error_type,
+ "Missing \\endcsname inserted",
+ "The control sequence marked <to be read again> should not appear between \\csname\n"
+ "and \\endcsname."
+ );
+}
+
+inline static int tex_aux_uni_to_buffer(unsigned char *b, int m, int c)
+{
+ if (c <= 0x7F) {
+ b[m++] = (unsigned char) c;
+ } else if (c <= 0x7FF) {
+ b[m++] = (unsigned char) (0xC0 + c / 0x40);
+ b[m++] = (unsigned char) (0x80 + c % 0x40);
+ } else if (c <= 0xFFFF) {
+ b[m++] = (unsigned char) (0xE0 + c / 0x1000);
+ b[m++] = (unsigned char) (0x80 + (c % 0x1000) / 0x40);
+ b[m++] = (unsigned char) (0x80 + (c % 0x1000) % 0x40);
+ } else {
+ b[m++] = (unsigned char) (0xF0 + c / 0x40000);
+ b[m++] = (unsigned char) (0x80 + ( c % 0x40000) / 0x1000);
+ b[m++] = (unsigned char) (0x80 + ((c % 0x40000) % 0x1000) / 0x40);
+ b[m++] = (unsigned char) (0x80 + ((c % 0x40000) % 0x1000) % 0x40);
+ }
+ return m;
+}
+
+/*tex
+ We also quit on a protected macro call, which is different from \LUATEX\ (and \PDFTEX) but makes
+ much sense. It also long token lists that never (should) match anyway.
+*/
+
+
+static int tex_aux_collect_cs_tokens(halfword *p, int *n)
+{
+ while (1) {
+ tex_get_next();
+ switch (cur_cmd) {
+ case left_brace_cmd:
+ case right_brace_cmd:
+ case math_shift_cmd:
+ case alignment_tab_cmd:
+ /* case end_line_cmd: */
+ case parameter_cmd:
+ case superscript_cmd:
+ case subscript_cmd:
+ /* case ignore_cmd: */
+ case spacer_cmd:
+ case letter_cmd:
+ case other_char_cmd:
+ // cur_tok = token_val(cur_cmd, cur_chr);
+ // *p = tex_store_new_token(*p, cur_tok);
+ *p = tex_store_new_token(*p, token_val(cur_cmd, cur_chr));
+ *n += 1;
+ break;
+ /* case active_char_cmd: */
+ /* case comment_cmd: */
+ /* case invalid_char_cmd: */
+ /*
+ case string_cmd:
+ cur_tok = token_val(cur_cmd, cur_chr);
+ *p = store_new_token(*p, cur_tok);
+ *n += str_length(cs_offset_value + cur_chr);
+ break;
+ */
+ case call_cmd:
+ case tolerant_call_cmd:
+ tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
+ break;
+ case end_cs_name_cmd:
+ return 1;
+ default:
+ if (cur_cmd > max_command_cmd && cur_cmd < first_call_cmd) {
+ tex_expand_current_token();
+ } else {
+ return 0;
+ }
+ }
+ }
+}
+
+int tex_is_valid_csname(void)
+{
+ halfword cs = null_cs;
+ int b = 0;
+ int n = 0;
+ halfword h = tex_get_available_token(null);
+ halfword p = h;
+ lmt_expand_state.cs_name_level += 1;
+ if (! tex_aux_collect_cs_tokens(&p, &n)) {
+ do {
+ tex_get_x_or_protected(); /* we skip unprotected ! */
+ } while (cur_cmd != end_cs_name_cmd);
+ goto FINISH;
+ } else if (n) {
+ /*tex Look up the characters of list |n| in the hash table, and set |cur_cs|. */
+ int f = lmt_fileio_state.io_first;
+ if (tex_room_in_buffer(f + n * 4)) {
+ int m = f;
+ halfword l = token_link(h);
+ while (l) {
+ m = tex_aux_uni_to_buffer(lmt_fileio_state.io_buffer, m, token_chr(token_info(l)));
+ l = token_link(l);
+ }
+ cs = tex_id_locate(f, m - f, 0); /*tex Don't create a new cs! */
+ b = (cs != undefined_control_sequence) && (eq_type(cs) != undefined_cs_cmd);
+ }
+ }
+ FINISH:
+ tex_flush_token_list_head_tail(h, p, n + 1);
+ lmt_scanner_state.last_cs_name = cs;
+ lmt_expand_state.cs_name_level -= 1;
+ cur_cs = cs;
+ return b;
+}
+
+inline static halfword tex_aux_get_cs_name(void)
+{
+ halfword h = tex_get_available_token(null); /* hm */
+ halfword p = h;
+ int n = 0;
+ lmt_expand_state.cs_name_level += 1;
+ if (tex_aux_collect_cs_tokens(&p, &n)) {
+ /*tex Look up the characters of list |r| in the hash table, and set |cur_cs|. */
+ int siz;
+ char *s = tex_tokenlist_to_tstring(h, 1, &siz, 0, 0, 0);
+ cur_cs = (siz > 0) ? tex_string_locate((char *) s, siz, 1) : null_cs;
+ } else {
+ tex_aux_complain_missing_csname();
+ }
+ lmt_scanner_state.last_cs_name = cur_cs;
+ lmt_expand_state.cs_name_level -= 1;
+ tex_flush_token_list_head_tail(h, p, n);
+ return cur_cs;
+}
+
+inline static void tex_aux_manufacture_csname(void)
+{
+ halfword cs = tex_aux_get_cs_name();
+ if (eq_type(cs) == undefined_cs_cmd) {
+ /*tex The |save_stack| might change! */
+ tex_eq_define(cs, relax_cmd, relax_code);
+ }
+ /*tex The control sequence will now match |\relax| */
+ tex_back_input(cs + cs_token_flag);
+}
+
+inline static void tex_aux_manufacture_csname_use(void)
+{
+ if (tex_is_valid_csname()) {
+ tex_back_input(cur_cs + cs_token_flag);
+ } else {
+ lmt_scanner_state.last_cs_name = deep_frozen_relax_token;
+ }
+}
+
+inline static void tex_aux_manufacture_csname_future(void)
+{
+ halfword t = tex_get_token();
+ if (tex_is_valid_csname()) {
+ tex_back_input(cur_cs + cs_token_flag);
+ } else {
+ lmt_scanner_state.last_cs_name = deep_frozen_relax_token;
+ tex_back_input(t);
+ }
+}
+
+halfword tex_create_csname(void)
+{
+ halfword cs = tex_aux_get_cs_name();
+ if (eq_type(cs) == undefined_cs_cmd) {
+ tex_eq_define(cs, relax_cmd, relax_code);
+ }
+ return cs; // cs + cs_token_flag;
+}
+
+inline static void tex_aux_inject_last_tested_cs(void)
+{
+ if (lmt_scanner_state.last_cs_name != null_cs) {
+ tex_back_input(lmt_scanner_state.last_cs_name + cs_token_flag);
+ }
+}
+
+/*tex
+
+ Sometimes the expansion looks too far ahead, so we want to insert a harmless |\relax| into the
+ user's input.
+*/
+
+void tex_insert_relax_and_cur_cs(void)
+{
+ tex_back_input(cs_token_flag + cur_cs);
+ tex_reinsert_token(deep_frozen_relax_token);
+ lmt_input_state.cur_input.token_type = inserted_text;
+}
+
+/*tex
+
+ Here is a recursive procedure that is \TEX's usual way to get the next token of input. It has
+ been slightly optimized to take account of common cases.
+
+*/
+
+halfword tex_get_x_token(void)
+{
+ /*tex This code sets |cur_cmd|, |cur_chr|, |cur_tok|, and expands macros. */
+ while (1) {
+ tex_get_next();
+ if (cur_cmd <= max_command_cmd) {
+ break;
+ } else if (cur_cmd < first_call_cmd) {
+ tex_expand_current_token();
+ } else if (cur_cmd <= last_call_cmd) {
+ tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
+ } else {
+ cur_cs = deep_frozen_cs_end_template_2_code;
+ cur_cmd = end_template_cmd;
+ /*tex Now |cur_chr = token_state.null_list|. */
+ break;
+ }
+ }
+ if (cur_cs) {
+ cur_tok = cs_token_flag + cur_cs;
+ } else {
+ cur_tok = token_val(cur_cmd, cur_chr);
+ }
+ return cur_tok;
+}
+
+/*tex
+
+ The |get_x_token| procedure is equivalent to two consecutive procedure calls: |get_next; x_token|.
+ It's |get_x_token| without the initial |get_next|.
+
+*/
+
+void tex_x_token(void)
+{
+ while (cur_cmd > max_command_cmd) {
+ tex_expand_current_token();
+ tex_get_next();
+ }
+ if (cur_cs) {
+ cur_tok = cs_token_flag + cur_cs;
+ } else {
+ cur_tok = token_val(cur_cmd, cur_chr);
+ }
+}
+
+/*tex
+
+ A control sequence that has been |\def|'ed by the user is expanded by \TEX's |macro_call|
+ procedure. Here we also need to deal with marks, but these are discussed elsewhere.
+
+ So let's consider |macro_call| itself, which is invoked when \TEX\ is scanning a control
+ sequence whose |cur_cmd| is either |call|, |long_call|, |outer_call|, or |long_outer_call|. The
+ control sequence definition appears in the token list whose reference count is in location
+ |cur_chr| of |mem|.
+
+ The global variable |long_state| will be set to |call| or to |long_call|, depending on whether
+ or not the control sequence disallows |\par| in its parameters. The |get_next| routine will set
+ |long_state| to |outer_call| and emit |\par|, if a file ends or if an |\outer| control sequence
+ occurs in the midst of an argument.
+
+ The parameters, if any, must be scanned before the macro is expanded. Parameters are token
+ lists without reference counts. They are placed on an auxiliary stack called |pstack| while
+ they are being scanned, since the |param_stack| may be losing entries during the matching
+ process. (Note that |param_stack| can't be gaining entries, since |macro_call| is the only
+ routine that puts anything onto |param_stack|, and it is not recursive.)
+
+ After parameter scanning is complete, the parameters are moved to the |param_stack|. Then the
+ macro body is fed to the scanner; in other words, |macro_call| places the defined text of the
+ control sequence at the top of \TEX's input stack, so that |get_next| will proceed to read it
+ next.
+
+ The global variable |cur_cs| contains the |eqtb| address of the control sequence being expanded,
+ when |macro_call| begins. If this control sequence has not been declared |\long|, i.e., if its
+ command code in the |eq_type| field is not |long_call| or |long_outer_call|, its parameters are
+ not allowed to contain the control sequence |\par|. If an illegal |\par| appears, the macro call
+ is aborted, and the |\par| will be rescanned.
+
+ Beware: we cannot use |cur_cmd| here because for instance |\bgroup| can be part of an argument
+ without there being an |\egroup|. We really need to check raw brace tokens (|{}|) here when we
+ pick up an argument!
+
+ */
+
+/*tex
+
+ In \LUAMETATEX| we have an extended argument definition system. The approach is still the same
+ and the additional code kind of fits in. There is a bit more testing going on but the overhead
+ is kept at a minimum so performance is not hit. Macro packages like \CONTEXT\ spend a lot of
+ time expanding and the extra overhead of the extensions is compensated by some gain in using
+ them. However, the most important motive is in readability of macro code on the one hand and
+ the wish for less tracing (due to all this multi-step processing) on the other. It suits me
+ well. This is definitely a case of |goto| abuse.
+
+*/
+
+static halfword tex_aux_prune_list(halfword h)
+{
+ halfword t = h;
+ halfword p = null;
+ int done = 0;
+ int last = null;
+ while (t) {
+ halfword l = token_link(t);
+ halfword i = token_info(t);
+ halfword c = token_cmd(i);
+ if (c != spacer_cmd && c != end_paragraph_cmd && i != lmt_token_state.par_token) { // c != 0xFF
+ done = 1;
+ last = null;
+ } else if (done) {
+ if (! last) {
+ last = p; /* before space */
+ }
+ } else {
+ h = l;
+ tex_put_available_token(t);
+ }
+ p = t;
+ t = l;
+ }
+ if (last) {
+ halfword l = token_link(last);
+ token_link(last) = null;
+ tex_flush_token_list(l);
+ }
+ return h;
+}
+
+int tex_get_parameter_count(void)
+{
+ int n = 0;
+ for (int i = lmt_input_state.cur_input.parameter_start; i < lmt_input_state.parameter_stack_data.ptr; i++) {
+ if (lmt_input_state.parameter_stack[i]) {
+ ++n;
+ } else {
+ break;
+ }
+ }
+ return n;
+}
+
+static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)
+{
+ int tracing = tracing_macros_par > 0;
+ if (tracing) {
+ /*tex
+ Setting |\tracingmacros| to 2 means that elsewhere marks etc are shown so in fact a bit
+ more detail. However, as we turn that on anyway, using a value of 3 is not that weird
+ for less info here. Introducing an extra parameter makes no sense.
+ */
+ tex_begin_diagnostic();
+ tex_print_cs_checked(cs);
+ if (is_untraced(eq_flag(cs))) {
+ tracing = 0;
+ } else {
+ if (! get_token_parameters(chr)) {
+ tex_print_str("->");
+ } else {
+ /* maybe move the preamble scanner to here */
+ }
+ tex_token_show(chr, default_token_show_max);
+ }
+ tex_end_diagnostic();
+ }
+ if (get_token_parameters(chr)) {
+ halfword matchpointer = token_link(chr);
+ halfword matchtoken = token_info(matchpointer);
+ int save_scanner_status = lmt_input_state.scanner_status;
+ halfword save_warning_index = lmt_input_state.warning_index;
+ int nofscanned = 0;
+ int nofarguments = 0;
+ halfword pstack[9]; /* We could go for 15 if we accept |#A-#F|. */
+ /*tex
+ Scan the parameters and make |link(r)| point to the macro body; but |return| if an
+ illegal |\par| is detected.
+
+ At this point, the reader will find it advisable to review the explanation of token
+ list format that was presented earlier, since many aspects of that format are of
+ importance chiefly in the |macro_call| routine.
+
+ The token list might begin with a string of compulsory tokens before the first
+ |match| or |end_match|. In that case the macro name is supposed to be followed by
+ those tokens; the following program will set |s=null| to represent this restriction.
+ Otherwise |s| will be set to the first token of a string that will delimit the next
+ parameter.
+ */
+ int tolerant = is_tolerant_cmd(cmd);
+ /*tex the number of tokens or groups (usually) */
+ halfword count = 0;
+ /*tex one step before the last |right_brace| token */
+ halfword rightbrace = null;
+ /*tex the state, currently the character used in parameter */
+ int match = 0;
+ int thrash = 0;
+ int quitting = 0;
+ int last = 0;
+ /*tex current node in parameter token list being built */
+ halfword p = null;
+ /*tex backup pointer for parameter matching */
+ halfword s = null;
+ int spacer = 0;
+ /*tex
+ One day I will check the next code for too many tests, no that much branching that it.
+ The numbers in |#n| are match tokens except the last one, which is has a different
+ token info.
+ */
+ lmt_input_state.warning_index = cs;
+ lmt_input_state.scanner_status = tolerant ? scanner_is_tolerant : scanner_is_matching;
+ /* */
+ do {
+ /*tex
+ So, can we use a local head here? After all, there is no expansion going on here,
+ so no need to access |temp_token_head|. On the other hand, it's also used as a
+ signal, so not now.
+ */
+ RESTART:
+ set_token_link(lmt_expand_state.match_token_head, null);
+ AGAIN:
+ spacer = 0;
+ LATER:
+ if (matchtoken < match_token || matchtoken >= end_match_token) {
+ s = null;
+ } else {
+ switch (matchtoken) {
+ case spacer_match_token:
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ do {
+ tex_get_token();
+ } while (cur_cmd == spacer_cmd);
+ last = 1;
+ goto AGAIN;
+ case mandate_match_token:
+ match = match_mandate;
+ goto MANDATE;
+ case mandate_keep_match_token:
+ match = match_bracekeeper;
+ MANDATE:
+ if (last) {
+ last = 0;
+ } else {
+ tex_get_token();
+ last = 1;
+ }
+ if (cur_tok < left_brace_limit) {
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ s = matchpointer;
+ p = lmt_expand_state.match_token_head;
+ count = 0;
+ last = 0;
+ goto GROUPED;
+ } else {
+ if (tolerant) {
+ last = 0;
+ nofarguments = nofscanned;
+ tex_back_input(cur_tok);
+ goto QUITTING;
+ } else {
+ last = 0;
+ tex_back_input(cur_tok);
+ }
+ s = null;
+ goto BAD;
+ }
+ break;
+ case thrash_match_token:
+ match = 0;
+ thrash = 1;
+ break;
+ case leading_match_token:
+ match = match_spacekeeper;
+ break;
+ case prune_match_token:
+ match = match_pruner;
+ break;
+ case continue_match_token:
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ goto AGAIN;
+ case quit_match_token:
+ match = match_quitter;
+ if (tolerant) {
+ last = 0;
+ nofarguments = nofscanned;
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ goto QUITTING;
+ } else {
+ break;
+ }
+ case par_spacer_match_token:
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ do {
+ /* discard as we go */
+ tex_get_token();
+ } while (cur_cmd == spacer_cmd || cur_cmd == end_paragraph_cmd);
+ last = 1;
+ goto AGAIN;
+ case keep_spacer_match_token:
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ do {
+ tex_get_token();
+ if (cur_cmd == spacer_cmd) {
+ spacer = 1;
+ } else {
+ break;
+ }
+ } while (1);
+ last = 1;
+ goto LATER;
+ case par_command_match_token:
+ /* this discards till the next par token */
+ do {
+ tex_get_token();
+ } while (cur_cmd != end_paragraph_cmd);
+ goto DELIMITER;
+ default:
+ match = matchtoken - match_token;
+ break;
+ }
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ s = matchpointer;
+ p = lmt_expand_state.match_token_head;
+ count = 0;
+ }
+ /*tex
+ Scan a parameter until its delimiter string has been found; or, if |s = null|,
+ simply scan the delimiter string. If |info(r)| is a |match| or |end_match|
+ command, it cannot be equal to any token found by |get_token|. Therefore an
+ undelimited parameter --- i.e., a |match| that is immediately followed by
+ |match| or |end_match| --- will always fail the test |cur_tok=info(r)| in the
+ following algorithm.
+ */
+ CONTINUE:
+ /*tex Set |cur_tok| to the next token of input. */
+ if (last) {
+ last = 0;
+ } else {
+ tex_get_token();
+ }
+ /* is token_cmd reliable here? */
+ if (! count && token_cmd(matchtoken) == ignore_cmd) {
+ if (cur_cmd < ignore_cmd || cur_cmd > other_char_cmd || cur_chr != token_chr(matchtoken)) {
+ /*tex We could optimize this but it doesn't pay off now. */
+ tex_back_input(cur_tok);
+ }
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ if (s) {
+ s = matchpointer;
+ }
+ goto AGAIN;
+ }
+ if (cur_tok == matchtoken) {
+ /*tex
+ When we end up here we have a match on a delimiter. Advance |r|; |goto found|
+ if the parameter delimiter has been fully matched, otherwise |goto continue|.
+ A slightly subtle point arises here: When the parameter delimiter ends with
+ |#|, the token list will have a left brace both before and after the
+ |end_match|. Only one of these should affect the |align_state|, but both will
+ be scanned, so we must make a correction.
+ */
+ DELIMITER:
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ if (matchtoken >= match_token && matchtoken <= end_match_token) {
+ if (cur_tok < left_brace_limit) {
+ --lmt_input_state.align_state;
+ }
+ goto FOUND;
+ } else {
+ goto CONTINUE;
+ }
+ } else if (cur_cmd == ignore_something_cmd && cur_chr == ignore_argument_code) {
+ quitting = count ? 1 : count ? 2 : 3;
+ goto FOUND;
+ }
+ /*tex
+ Contribute the recently matched tokens to the current parameter, and |goto continue|
+ if a partial match is still in effect; but abort if |s = null|.
+
+ When the following code becomes active, we have matched tokens from |s| to the
+ predecessor of |r|, and we have found that |cur_tok <> info(r)|. An interesting
+ situation now presents itself: If the parameter is to be delimited by a string such
+ as |ab|, and if we have scanned |aa|, we want to contribute one |a| to the current
+ parameter and resume looking for a |b|. The program must account for such partial
+ matches and for others that can be quite complex. But most of the time we have
+ |s = r| and nothing needs to be done.
+
+ Incidentally, it is possible for |\par| tokens to sneak in to certain parameters of
+ non-|\long| macros. For example, consider a case like |\def\a#1\par!{...}| where
+ the first |\par| is not followed by an exclamation point. In such situations it
+ does not seem appropriate to prohibit the |\par|, so \TEX\ keeps quiet about this
+ bending of the rules.
+ */
+ if (s != matchpointer) {
+ BAD:
+ if (tolerant) {
+ quitting = nofscanned ? 1 : count ? 2 : 3;
+ tex_back_input(cur_tok);
+ // last = 0;
+ goto FOUND;
+ } else if (s) {
+ /*tex cycle pointer for backup recovery */
+ halfword t = s;
+ do {
+ halfword u, v;
+ if (match) {
+ p = tex_store_new_token(p, token_info(t));
+ }
+ ++count;
+ u = token_link(t);
+ v = s;
+ while (1) {
+ if (u == matchpointer) {
+ if (cur_tok != token_info(v)) {
+ break;
+ } else {
+ matchpointer = token_link(v);
+ matchtoken = token_info(matchpointer);
+ goto CONTINUE;
+ }
+ }
+ if (token_info(u) != token_info(v)) {
+ break;
+ } else {
+ u = token_link(u);
+ v = token_link(v);
+ }
+ }
+ t = token_link(t);
+ } while (t != matchpointer);
+ matchpointer = s;
+ matchtoken = token_info(matchpointer);
+ /*tex At this point, no tokens are recently matched. */
+ } else {
+ tex_handle_error(
+ normal_error_type,
+ "Use of %S doesn't match its definition",
+ lmt_input_state.warning_index,
+ "If you say, e.g., '\\def\\a1{...}', then you must always put '1' after '\\a',\n"
+ "since control sequence names are made up of letters only. The macro here has not\n"
+ "been followed by the required stuff, so I'm ignoring it."
+ );
+ goto EXIT;
+ }
+ }
+ GROUPED:
+ if (cur_tok < left_brace_limit) {
+ /*tex Contribute an entire group to the current parameter. */
+ int unbalance = 0;
+ while (1) {
+ if (match) {
+ p = tex_store_new_token(p, cur_tok);
+ }
+ if (last) {
+ last = 0;
+ } else {
+ tex_get_token();
+ }
+ if (cur_tok < right_brace_limit) {
+ if (cur_tok < left_brace_limit) {
+ ++unbalance;
+ } else if (unbalance) {
+ --unbalance;
+ } else {
+ break;
+ }
+ }
+ }
+ rightbrace = p;
+ if (match) {
+ p = tex_store_new_token(p, cur_tok);
+ }
+ } else if (cur_tok < right_brace_limit) {
+ /*tex Report an extra right brace and |goto continue|. */
+ tex_back_input(cur_tok);
+ /* moved up: */
+ ++lmt_input_state.align_state;
+ tex_insert_paragraph_token();
+ /* till here */
+ tex_handle_error(
+ insert_error_type,
+ "Argument of %S has an extra }",
+ lmt_input_state.warning_index,
+ "I've run across a '}' that doesn't seem to match anything. For example,\n"
+ "'\\def\\a#1{...}' and '\\a}' would produce this error. The '\\par' that I've just\n"
+ "inserted will cause me to report a runaway argument that might be the root of the\n"
+ "problem." );
+ goto CONTINUE;
+ /*tex A white lie; the |\par| won't always trigger a runaway. */
+ } else {
+ /*tex
+ Store the current token, but |goto continue| if it is a blank space that would
+ become an undelimited parameter.
+ */
+ if (cur_tok == space_token && matchtoken <= end_match_token && matchtoken >= match_token && matchtoken != leading_match_token) {
+ goto CONTINUE;
+ }
+ if (match) {
+ p = tex_store_new_token(p, cur_tok);
+ }
+ }
+ ++count;
+ if (matchtoken > end_match_token || matchtoken < match_token) {
+ goto CONTINUE;
+ }
+ FOUND:
+ if (s) {
+ /*
+ Tidy up the parameter just scanned, and tuck it away. If the parameter consists
+ of a single group enclosed in braces, we must strip off the enclosing braces.
+ That's why |rightbrace| was introduced. Actually, in most cases |m == 1|.
+ */
+ if (! thrash) {
+ if (token_info(p) < right_brace_limit && count == 1 && p != lmt_expand_state.match_token_head && match != match_bracekeeper) {
+ set_token_link(rightbrace, null);
+ tex_put_available_token(p);
+ p = token_link(lmt_expand_state.match_token_head);
+ pstack[nofscanned] = token_link(p);
+ tex_put_available_token(p);
+ } else {
+ pstack[nofscanned] = token_link(lmt_expand_state.match_token_head);
+ }
+ if (match == match_pruner) {
+ pstack[nofscanned] = tex_aux_prune_list(pstack[nofscanned]);
+ }
+ ++nofscanned;
+ if (tracing) {
+ tex_begin_diagnostic();
+ tex_print_format("%c%i<-", match_visualizer, nofscanned);
+ tex_show_token_list(pstack[nofscanned - 1], null, default_token_show_max, 0);
+ tex_end_diagnostic();
+ }
+ } else {
+ thrash = 0;
+ }
+ }
+ /*tex
+ Now |info(r)| is a token whose command code is either |match| or |end_match|.
+ */
+ if (quitting) {
+ nofarguments = quitting == 3 ? 0 : quitting == 2 && count == 0 ? 0 : nofscanned;
+ QUITTING:
+ if (spacer) {
+ tex_back_input(space_token); /* experiment */
+ }
+ while (1) {
+ switch (matchtoken) {
+ case end_match_token:
+ goto QUITDONE;
+ case spacer_match_token:
+ case thrash_match_token:
+ case par_spacer_match_token:
+ case keep_spacer_match_token:
+ goto NEXTMATCH;
+ case mandate_match_token:
+ case leading_match_token:
+ pstack[nofscanned] = null;
+ break;
+ case mandate_keep_match_token:
+ p = tex_store_new_token(null, left_brace_token);
+ pstack[nofscanned] = p;
+ p = tex_store_new_token(p, right_brace_token);
+ break;
+ case continue_match_token:
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ quitting = 0;
+ goto RESTART;
+ case quit_match_token:
+ if (quitting) {
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ quitting = 0;
+ goto RESTART;
+ } else {
+ goto NEXTMATCH;
+ }
+ default:
+ if (matchtoken >= match_token && matchtoken < end_match_token) {
+ pstack[nofscanned] = null;
+ break;
+ } else {
+ goto NEXTMATCH;
+ }
+ }
+ nofscanned++;
+ if (tracing) {
+ tex_begin_diagnostic();
+ tex_print_format("%c%i--", match_visualizer, nofscanned);
+ tex_end_diagnostic();
+ }
+ NEXTMATCH:
+ matchpointer = token_link(matchpointer);
+ matchtoken = token_info(matchpointer);
+ }
+ }
+ } while (matchtoken != end_match_token);
+ nofarguments = nofscanned;
+ QUITDONE:
+ matchpointer = token_link(matchpointer);
+ /*tex
+ Feed the macro body and its parameters to the scanner Before we put a new token list on the
+ input stack, it is wise to clean off all token lists that have recently been depleted. Then
+ a user macro that ends with a call to itself will not require unbounded stack space.
+ */
+ tex_cleanup_input_state();
+ /*tex
+ We don't really start a list, it's more housekeeping. The starting point is the body and
+ the later set |loc| reflects that.
+ */
+ tex_begin_macro_list(chr);
+ /*tex
+ Beware: here the |name| is used for symbolic locations but also for macro indices but these
+ are way above the symbolic |token_types| that we use. Better would be to have a dedicated
+ variable but let's not open up a can of worms now. We can't use |warning_index| combined
+ with a symbolic name either. We're at |end_match_token| now so we need to advance.
+ */
+ lmt_input_state.cur_input.name = cs;
+ lmt_input_state.cur_input.loc = matchpointer;
+ /*tex
+ This comes last, after the cleanup and the start of the macro list.
+ */
+ if (nofscanned) {
+ tex_copy_pstack_to_param_stack(&pstack[0], nofscanned);
+ }
+ EXIT:
+ lmt_expand_state.arguments = nofarguments;
+ lmt_input_state.scanner_status = save_scanner_status;
+ lmt_input_state.warning_index = save_warning_index;
+ } else {
+ tex_cleanup_input_state();
+ if (token_link(chr)) {
+ tex_begin_macro_list(chr);
+ lmt_expand_state.arguments = 0;
+ lmt_input_state.cur_input.name = lmt_input_state.warning_index;
+ lmt_input_state.cur_input.loc = token_link(chr);
+ } else {
+ /* We ignore empty bodies but it doesn't gain us that much. */
+ }
+ }
+}