diff options
Diffstat (limited to 'source')
28 files changed, 551 insertions, 340 deletions
diff --git a/source/luametatex/source/lua/lmtinterface.h b/source/luametatex/source/lua/lmtinterface.h index 67c3f56d5..c60a78266 100644 --- a/source/luametatex/source/lua/lmtinterface.h +++ b/source/luametatex/source/lua/lmtinterface.h @@ -562,6 +562,7 @@ make_lua_key(L, condition);\ make_lua_key(L, conditional);\ make_lua_key(L, conditionalmathskip);\ make_lua_key(L, connectoroverlapmin);\ +make_lua_key(L, constant);\ make_lua_key(L, container);\ make_lua_key(L, contributehead);\ make_lua_key(L, convert);\ @@ -1127,6 +1128,7 @@ make_lua_key(L, properties);\ make_lua_key(L, proportional);\ make_lua_key(L, protected);\ make_lua_key(L, protected_call);\ +make_lua_key(L, semi_protected_call);\ make_lua_key(L, protrudechars);\ make_lua_key(L, protrusion);\ make_lua_key(L, ptr);\ @@ -1387,6 +1389,7 @@ make_lua_key(L, tolerance);\ make_lua_key(L, tolerant);\ make_lua_key(L, tolerant_call);\ make_lua_key(L, tolerant_protected_call);\ +make_lua_key(L, tolerant_semi_protected_call);\ make_lua_key(L, top);\ make_lua_key(L, topaccent);\ make_lua_key(L, topaccentvariant);\ diff --git a/source/luametatex/source/lua/lmtstatuslib.c b/source/luametatex/source/lua/lmtstatuslib.c index cf665ede2..841ddeec0 100644 --- a/source/luametatex/source/lua/lmtstatuslib.c +++ b/source/luametatex/source/lua/lmtstatuslib.c @@ -254,8 +254,8 @@ static int statslib_getconstants(lua_State *L) lua_set_integer_by_key(L, "no_catcode_table", no_catcode_table_preset); lua_set_integer_by_key(L, "default_catcode_table", default_catcode_table_preset); - lua_set_cardinal_by_key(L, "max_cardinal", max_cardinal); - lua_set_cardinal_by_key(L, "min_cardinal", min_cardinal); + lua_set_cardinal_by_key(L,"max_cardinal", max_cardinal); + lua_set_cardinal_by_key(L,"min_cardinal", min_cardinal); lua_set_integer_by_key(L, "max_integer", max_integer); lua_set_integer_by_key(L, "min_integer", min_integer); lua_set_integer_by_key(L, "max_dimen", max_dimen); @@ -268,7 +268,7 @@ static int statslib_getconstants(lua_State *L) lua_set_integer_by_key(L, "one_bp", one_bp); - lua_set_integer_by_key(L, "infinity", infinity); + lua_set_integer_by_key(L, "infinity", max_infinity); lua_set_integer_by_key(L, "min_infinity", min_infinity); lua_set_integer_by_key(L, "awful_bad", awful_bad); lua_set_integer_by_key(L, "infinite_bad", infinite_bad); diff --git a/source/luametatex/source/lua/lmttexlib.c b/source/luametatex/source/lua/lmttexlib.c index 7d9395eb7..0d84eebdd 100644 --- a/source/luametatex/source/lua/lmttexlib.c +++ b/source/luametatex/source/lua/lmttexlib.c @@ -903,7 +903,7 @@ static const char *texlib_aux_scan_integer_part(lua_State *L, const char *ss, in DONE: if (overflow) { luaL_error(L, "number too big"); - result = infinity; + result = max_integer; } else if (vacuous) { luaL_error(L, "missing number, treated as zero") ; } @@ -1246,6 +1246,9 @@ int lmt_check_for_flags(lua_State *L, int slot, int *flags, int prefixes, int nu } else if (lua_key_eq(str, value)) { slot += 1; *flags = add_value_flag(*flags); + } else if (lua_key_eq(str, constant)) { + slot += 1; + *flags = add_constant_flag(*flags); } else if (lua_key_eq(str, conditional) || lua_key_eq(str, condition)) { /* condition will go, conditional stays */ slot += 1; @@ -2690,7 +2693,7 @@ static int texlib_aux_scan_internal(lua_State *L, int cmd, int code, int values) default: { int texstr = tex_the_scanned_result(); - char *str = tex_to_cstring(texstr); + const char *str = tex_to_cstring(texstr); if (str) { lua_pushstring(L, str); } else { @@ -3582,7 +3585,7 @@ static int texlib_enableprimitives(lua_State *L) for (int cs = 0; cs < prim_size; cs++) { strnumber s = get_prim_text(cs); if (s > 0) { - char *prm = tex_to_cstring(s); + const char *prm = tex_to_cstring(s); texlib_aux_enableprimitive(pre, lpre, prm); } } @@ -4116,7 +4119,7 @@ static int texlib_runlocal(lua_State *L) } else { halfword ref = eq_value(cs); halfword head = token_link(ref); - if (head && get_token_parameters(ref)) { + if (head && get_token_preamble(ref)) { tex_local_control_message("macro takes arguments and is ignored"); return 0; } else { @@ -4605,7 +4608,7 @@ static int texlib_setdimensionvalue(lua_State *L) static int texlib_aux_getvalue(lua_State *L, halfword level, halfword cs) { halfword chr = eq_value(cs); - if (chr && ! get_token_parameters(chr)) { + if (chr && ! get_token_preamble(chr)) { /* or get_token_parameters as we don't want trailing # */ halfword value = 0; tex_begin_inserted_list(tex_get_available_token(cs_token_flag + cs)); if (tex_scan_tex_value(level, &value)) { diff --git a/source/luametatex/source/lua/lmttokenlib.c b/source/luametatex/source/lua/lmttokenlib.c index 97ca1b144..41ee6c485 100644 --- a/source/luametatex/source/lua/lmttokenlib.c +++ b/source/luametatex/source/lua/lmttokenlib.c @@ -213,10 +213,10 @@ void lmt_tokenlib_initialize(void) /* lmt_interface.command_names[string_cmd] = (command_item) { .id = string_cmd, .lua = lua_key_index(string), .name = lua_key(string), .kind = regular_command_item, .min = ignore_entry, .max = max_integer, .base = 0, .fixedvalue = 0 }; */ lmt_interface.command_names[call_cmd] = (command_item) { .id = call_cmd, .lua = lua_key_index(call), .name = lua_key(call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[protected_call_cmd] = (command_item) { .id = protected_call_cmd, .lua = lua_key_index(protected_call), .name = lua_key(protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; - lmt_interface.command_names[semi_protected_call_cmd] = (command_item) { .id = semi_protected_call_cmd, .lua = lua_key_index(protected_call), .name = lua_key(protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; + lmt_interface.command_names[semi_protected_call_cmd] = (command_item) { .id = semi_protected_call_cmd, .lua = lua_key_index(semi_protected_call), .name = lua_key(protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[tolerant_call_cmd] = (command_item) { .id = tolerant_call_cmd, .lua = lua_key_index(tolerant_call), .name = lua_key(tolerant_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[tolerant_protected_call_cmd] = (command_item) { .id = tolerant_protected_call_cmd, .lua = lua_key_index(tolerant_protected_call), .name = lua_key(tolerant_protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; - lmt_interface.command_names[tolerant_semi_protected_call_cmd] = (command_item) { .id = tolerant_semi_protected_call_cmd, .lua = lua_key_index(tolerant_protected_call), .name = lua_key(tolerant_protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; + lmt_interface.command_names[tolerant_semi_protected_call_cmd] = (command_item) { .id = tolerant_semi_protected_call_cmd, .lua = lua_key_index(tolerant_semi_protected_call), .name = lua_key(tolerant_protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[deep_frozen_end_template_cmd] = (command_item) { .id = deep_frozen_end_template_cmd, .lua = lua_key_index(deep_frozen_cs_end_template), .name = lua_key(deep_frozen_cs_end_template), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[deep_frozen_dont_expand_cmd] = (command_item) { .id = deep_frozen_dont_expand_cmd, .lua = lua_key_index(deep_frozen_cs_dont_expand), .name = lua_key(deep_frozen_cs_dont_expand), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[internal_glue_reference_cmd] = (command_item) { .id = internal_glue_reference_cmd, .lua = lua_key_index(internal_glue_reference), .name = lua_key(internal_glue_reference), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; @@ -468,8 +468,9 @@ halfword lmt_token_list_from_lua(lua_State *L, int slot) if (s[i] == ascii_space) { tok = token_val(spacer_cmd, s[i]); } else { - int k = (int) aux_str2uni((const unsigned char *) (s + i)); - i = i + (size_t) (utf8_size(k)) - 1; + int kl; + int k = (int) aux_str2uni_len((const unsigned char *) (s + i), &kl); + i = i + kl - 1; tok = token_val(other_char_cmd, k); } p = tex_store_new_token(p, tok); @@ -737,15 +738,15 @@ static void tokenlib_aux_to_token(lua_State *L, int i, int m, int *head, int *ta const unsigned char *p = (const unsigned char *) s; size_t n = aux_utf8len(s, l); for (size_t j = 0; j < n; j++) { - int ch = *p; - halfword x = tex_get_available_token(tokenlib_aux_to_token_val(aux_str2uni(p))); + int xl; + halfword x = tex_get_available_token(tokenlib_aux_to_token_val(aux_str2uni_len(p, &xl))); if (*head) { token_link(*tail) = x; } else { *head = x; } *tail = x; - p += utf8_size(ch); + p += xl; } break; } @@ -2370,7 +2371,7 @@ static int tokenlib_getprimitives(lua_State *L) while (cs < prim_size) { strnumber s = get_prim_text(cs); if (s > 0 && (get_prim_origin(cs) != no_command)) { - char *ss = tex_to_cstring(s); + const char *ss = tex_to_cstring(s); int cmd = prim_eq_type(cs); int chr = prim_equiv(cs); if (! raw) { @@ -2668,6 +2669,19 @@ inline static int tokenlib_get_parameters(lua_State *L) return 0; } +inline static int tokenlib_get_constant(lua_State *L) +{ + lua_token *n = tokenlib_aux_check_istoken(L, 1); + halfword tok = token_info(n->token); + int result = 0; + if (tok >= cs_token_flag && is_call_cmd(eq_type(tok - cs_token_flag))) { + halfword v = eq_value(tok - cs_token_flag); + result = v && get_token_reference(v) == max_token_reference; + } + lua_pushboolean(L, result); + return 1; +} + static int tokenlib_getfield(lua_State *L) { const char *s = lua_tostring(L, 2); @@ -2711,6 +2725,8 @@ static int tokenlib_getfield(lua_State *L) return tokenlib_get_flags(L); } else if (lua_key_eq(s, parameters)) { return tokenlib_get_parameters(L); + } else if (lua_key_eq(s, constant)) { + return tokenlib_get_constant(L); } else { lua_pushnil(L); } @@ -3229,11 +3245,17 @@ static int tokenlib_set_macro(lua_State *L) /* todo: protected */ slot = lmt_check_for_flags(L, slot, &flags, 1, 1); } if (tex_define_permitted(cs, flags)) { /* we check before we allocate */ - halfword h = get_reference_token(); - halfword t = h; + halfword h; if (lstr > 0) { + h = get_reference_token(); /*tex Options: 1=create (will trigger an error), 2=ignore. */ - tex_parse_str_to_tok(h, &t, ct, str, lstr, lua_toboolean(L, slot++) ? 2 : 1); + tex_parse_str_to_tok(h, null, ct, str, lstr, lua_toboolean(L, slot++) ? 2 : 1); + if (is_constant(flags)) { + set_token_reference(h, max_token_reference); + } + } else { + h = lmt_token_state.empty; + // tex_add_token_reference(h); } tex_define(flags, cs, tex_flags_to_cmd(flags), h); } @@ -3388,7 +3410,7 @@ static int tokenlib_set_char(lua_State *L) /* also in texlib */ /* a weird place, these should be in tex */ -static int tokenlib_set_constant(lua_State *L, singleword cmd, halfword min, halfword max) +static int tokenlib_set_constant_value(lua_State *L, singleword cmd, halfword min, halfword max) { int top = lua_gettop(L); if (top >= 2) { @@ -3409,7 +3431,7 @@ static int tokenlib_set_constant(lua_State *L, singleword cmd, halfword min, hal return 0; } -static int tokenlib_get_constant(lua_State *L, halfword cmd) +static int tokenlib_get_constant_value(lua_State *L, halfword cmd) { if (lua_type(L, 1) == LUA_TSTRING) { size_t l; @@ -3428,32 +3450,32 @@ static int tokenlib_get_constant(lua_State *L, halfword cmd) static int tokenlib_set_integer(lua_State *L) { - return tokenlib_set_constant(L, integer_cmd, min_integer, max_integer); + return tokenlib_set_constant_value(L, integer_cmd, min_integer, max_integer); } static int tokenlib_set_dimension(lua_State *L) { - return tokenlib_set_constant(L, dimension_cmd, min_dimen, max_dimen); + return tokenlib_set_constant_value(L, dimension_cmd, min_dimen, max_dimen); } // static int tokenlib_set_gluespec(lua_State *L) // { -// return tokenlib_set_constant(L, gluespec_cmd, min_dimen, max_dimen); +// return tokenlib_set_constant_value(L, gluespec_cmd, min_dimen, max_dimen); // } static int tokenlib_get_integer(lua_State *L) { - return tokenlib_get_constant(L, integer_cmd); + return tokenlib_get_constant_value(L, integer_cmd); } static int tokenlib_get_dimension(lua_State *L) { - return tokenlib_get_constant(L, dimension_cmd); + return tokenlib_get_constant_value(L, dimension_cmd); } // static int tokenlib_get_gluespec(lua_State *L) // { -// return tokenlib_get_constant(L, gluespec_cmd); +// return tokenlib_get_constant_value(L, gluespec_cmd); // } /* @@ -3575,6 +3597,7 @@ static const struct luaL_Reg tokenlib_function_list[] = { { "getinstance", tokenlib_get_instance }, { "getflags", tokenlib_get_flags }, { "getparameters", tokenlib_get_parameters }, + { "getconstant", tokenlib_get_constant }, { "getmacro", tokenlib_get_macro }, { "getmeaning", tokenlib_get_meaning }, { "getcmdchrcs", tokenlib_get_cmdchrcs }, @@ -3721,11 +3744,13 @@ void lmt_local_call(int slot) lua_settop(L, stacktop); } -int lmt_function_call_by_class(int slot, int property, halfword *value) +/*tex We replaced |class| by |category because of g++ issues. */ + +int lmt_function_call_by_category(int slot, int property, halfword *value) { lua_State *L = lmt_lua_state.lua_instance; int stacktop = lua_gettop(L); - int class = lua_value_none_code; + int category = lua_value_none_code; lua_pushcfunction(L, lmt_traceback); lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_lua_state.function_table_id); if (lua_rawgeti(L, -1, slot) == LUA_TFUNCTION) { @@ -3744,9 +3769,9 @@ int lmt_function_call_by_class(int slot, int property, halfword *value) lmt_error(L, "function call", slot, i == LUA_ERRRUN ? 0 : 1); } else { if (lua_type(L, -2) == LUA_TNUMBER) { - class = lmt_tointeger(L, -2); + category = lmt_tointeger(L, -2); } - switch (class) { + switch (category) { case lua_value_none_code: { break; @@ -3798,7 +3823,7 @@ int lmt_function_call_by_class(int slot, int property, halfword *value) case lua_value_float_code: case lua_value_string_code: { - class = lua_value_none_code; + category = lua_value_none_code; break; } case lua_value_boolean_code: @@ -3816,14 +3841,14 @@ int lmt_function_call_by_class(int slot, int property, halfword *value) break; default: { - class = lua_value_none_code; + category = lua_value_none_code; break; } } } } lua_settop(L, stacktop); - return class; + return category; } /* some day maybe an alternative too diff --git a/source/luametatex/source/lua/lmttokenlib.h b/source/luametatex/source/lua/lmttokenlib.h index 450c6173a..bfc3ed6f2 100644 --- a/source/luametatex/source/lua/lmttokenlib.h +++ b/source/luametatex/source/lua/lmttokenlib.h @@ -33,7 +33,7 @@ extern halfword lmt_token_list_from_lua (lua_State *L, int slot); extern halfword lmt_token_code_from_lua (lua_State *L, int slot); extern void lmt_function_call (int slot, int prefix); -extern int lmt_function_call_by_class (int slot, int property, halfword *value); +extern int lmt_function_call_by_category (int slot, int property, halfword *value); extern void lmt_token_call (int p); extern void lmt_local_call (int slot); diff --git a/source/luametatex/source/luametatex.h b/source/luametatex/source/luametatex.h index 07921d53a..973b405ae 100644 --- a/source/luametatex/source/luametatex.h +++ b/source/luametatex/source/luametatex.h @@ -89,7 +89,7 @@ # define luametatex_version 210 # define luametatex_revision 04 # define luametatex_version_string "2.10.04" -# define luametatex_development_id 20221208 +# define luametatex_development_id 20221214 # define luametatex_name_camelcase "LuaMetaTeX" # define luametatex_name_lowercase "luametatex" diff --git a/source/luametatex/source/tex/texcommands.c b/source/luametatex/source/tex/texcommands.c index c7ec0a2f4..0ad91e420 100644 --- a/source/luametatex/source/tex/texcommands.c +++ b/source/luametatex/source/tex/texcommands.c @@ -774,10 +774,12 @@ void tex_initialize_commands(void) tex_primitive(tex_command, "def", def_cmd, def_code, 0); tex_primitive(tex_command, "xdef", def_cmd, global_expanded_def_code, 0); tex_primitive(tex_command, "gdef", def_cmd, global_def_code, 0); + tex_primitive(luatex_command, "cdef", def_cmd, constant_def_code, 0); tex_primitive(luatex_command, "edefcsname", def_cmd, expanded_def_csname_code, 0); tex_primitive(luatex_command, "defcsname", def_cmd, def_csname_code, 0); tex_primitive(luatex_command, "xdefcsname", def_cmd, global_expanded_def_csname_code, 0); tex_primitive(luatex_command, "gdefcsname", def_cmd, global_def_csname_code, 0); + tex_primitive(luatex_command, "cdefcsname", def_cmd, constant_def_csname_code, 0); tex_primitive(tex_command, "scriptfont", define_family_cmd, script_size, 0); tex_primitive(tex_command, "scriptscriptfont", define_family_cmd, script_script_size, 0); @@ -961,6 +963,7 @@ void tex_initialize_commands(void) tex_primitive(luatex_command, "semiprotected", prefix_cmd, semiprotected_code, 0); tex_primitive(luatex_command, "enforced", prefix_cmd, enforced_code, 0); tex_primitive(luatex_command, "inherited", prefix_cmd, inherited_code, 0); + tex_primitive(luatex_command, "constant", prefix_cmd, constant_code, 0); tex_primitive(tex_command, "long", prefix_cmd, long_code, 0); tex_primitive(tex_command, "outer", prefix_cmd, outer_code, 0); @@ -1303,10 +1306,16 @@ void tex_initialize_commands(void) cs_text(deep_frozen_cs_protection_code) = tex_maketexstring("inaccessible"); cs_text(deep_frozen_cs_end_write_code) = tex_maketexstring("endwrite"); - set_eq_level(deep_frozen_cs_end_write_code, level_one); set_eq_type(deep_frozen_cs_end_write_code, call_cmd); set_eq_flag(deep_frozen_cs_end_write_code, 0); set_eq_value(deep_frozen_cs_end_write_code, null); + set_eq_level(deep_frozen_cs_end_write_code, level_one); + + /*tex The empty list reference should be reassigned after compacting! */ + + lmt_token_state.empty = get_reference_token(); + // tex_add_token_reference(lmt_token_state.empty); + set_token_reference(lmt_token_state.empty, max_token_reference); lmt_string_pool_state.reserved = lmt_string_pool_state.string_pool_data.ptr; lmt_hash_state.no_new_cs = 1; diff --git a/source/luametatex/source/tex/texcommands.h b/source/luametatex/source/tex/texcommands.h index 55de1dce6..8df61a4db 100644 --- a/source/luametatex/source/tex/texcommands.h +++ b/source/luametatex/source/tex/texcommands.h @@ -797,6 +797,11 @@ typedef enum local_control_codes { bits for this but we don't have enough. Now, because frozen macros can be unfrozen we can indeed have a prefix that bypasses the check. Explicit (re)definitions are then up to the user. + Constant macros are special in the sense that we set the reference count to the maximum. This is + then a signal that we have an expanded macro with a meaning that we can immediately copy into + the expanded token list, as in csname construction. This saves some memory access and token + allocation. + */ typedef enum prefix_codes { @@ -820,6 +825,7 @@ typedef enum prefix_codes { enforced_code, always_code, inherited_code, + constant_code, long_code, outer_code, } prefix_codes; @@ -859,9 +865,11 @@ typedef enum def_codes { def_csname_code, global_expanded_def_csname_code, global_def_csname_code, + constant_def_code, + constant_def_csname_code, } def_codes; -# define last_def_code global_def_csname_code +# define last_def_code constant_def_csname_code typedef enum let_codes { global_let_code, diff --git a/source/luametatex/source/tex/texconditional.c b/source/luametatex/source/tex/texconditional.c index 2197e9065..925e9fac9 100644 --- a/source/luametatex/source/tex/texconditional.c +++ b/source/luametatex/source/tex/texconditional.c @@ -555,9 +555,7 @@ void tex_conditional_if(halfword code, int unless) } goto RESULT; case if_zero_int_code: - { - result = tex_scan_int(0, NULL) == 0; - } + result = tex_scan_int(0, NULL) == 0; goto RESULT; case if_abs_dim_code: case if_dim_code: @@ -587,15 +585,10 @@ void tex_conditional_if(halfword code, int unless) } goto RESULT; case if_zero_dim_code: - { - result = tex_scan_dimen(0, 0, 0, 0, NULL) == 0; - } + result = tex_scan_dimen(0, 0, 0, 0, NULL) == 0; goto RESULT; case if_odd_code: - { - halfword v = tex_scan_int(0, NULL); - result = odd(v); - } + result = odd(tex_scan_int(0, NULL)); goto RESULT; case if_vmode_code: result = abs(cur_list.mode) == vmode; @@ -970,8 +963,7 @@ void tex_conditional_if(halfword code, int unless) halfword t = token_info(lmt_input_state.cur_input.loc); lmt_input_state.cur_input.loc = token_link(lmt_input_state.cur_input.loc); if (t < cs_token_flag && token_cmd(t) == parameter_reference_cmd) { - // result = token_info(input_state.parameter_stack[input_state.cur_input.parameter_start + token_chr(t) - 1]) != null ? 1 : 2; - result = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + token_chr(t) - 1] != null ? 1 : 2; + result = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + token_chr(t) - 1] != null ? 1 : 2; } } goto CASE; @@ -1103,19 +1095,19 @@ void tex_conditional_if(halfword code, int unless) // } default: { - int class; + int category; strnumber u = tex_save_cur_string(); int save_scanner_status = lmt_input_state.scanner_status; lmt_input_state.scanner_status = scanner_is_normal; lmt_token_state.luacstrings = 0; - class = lmt_function_call_by_class(code - last_if_test_code, 0, &result); + category = lmt_function_call_by_category(code - last_if_test_code, 0, &result); tex_restore_cur_string(u); lmt_input_state.scanner_status = save_scanner_status; if (lmt_token_state.luacstrings > 0) { tex_lua_string_start(); /* bad */ } - switch (class) { + switch (category) { case lua_value_integer_code: case lua_value_cardinal_code: case lua_value_dimension_code: diff --git a/source/luametatex/source/tex/texdumpdata.h b/source/luametatex/source/tex/texdumpdata.h index 02514bdf3..4f3450ef4 100644 --- a/source/luametatex/source/tex/texdumpdata.h +++ b/source/luametatex/source/tex/texdumpdata.h @@ -55,7 +55,7 @@ */ -# define luametatex_format_fingerprint 678 +# define luametatex_format_fingerprint 679 /* These end up in the string pool. */ diff --git a/source/luametatex/source/tex/texequivalents.c b/source/luametatex/source/tex/texequivalents.c index bdf21446e..223e46e15 100644 --- a/source/luametatex/source/tex/texequivalents.c +++ b/source/luametatex/source/tex/texequivalents.c @@ -243,6 +243,7 @@ void tex_dump_equivalents_mem(dumpstream f) /*tex A special register. */ dump_int(f, lmt_token_state.par_loc); /* dump_int(f, lmt_token_state.line_par_loc); */ /*tex See note in textoken.c|. */ + dump_int(f, lmt_token_state.empty); } void tex_undump_equivalents_mem(dumpstream f) @@ -284,6 +285,7 @@ void tex_undump_equivalents_mem(dumpstream f) /* } else { */ /* tex_fatal_undump_error("lineparloc"); */ /* } */ + undump_int(f, lmt_token_state.empty); return; } diff --git a/source/luametatex/source/tex/texequivalents.h b/source/luametatex/source/tex/texequivalents.h index 336c9e206..c84b90226 100644 --- a/source/luametatex/source/tex/texequivalents.h +++ b/source/luametatex/source/tex/texequivalents.h @@ -1155,6 +1155,7 @@ typedef enum flag_bit { value_flag_bit = 0x08000, semiprotected_flag_bit = 0x10000, inherited_flag_bit = 0x20000, + constant_flag_bit = 0x40000, } flag_bits; /*tex Flags: */ @@ -1180,6 +1181,7 @@ typedef enum flag_bit { # define add_conditional_flag(a) ((a) | conditional_flag_bit) # define add_value_flag(a) ((a) | value_flag_bit) # define add_inherited_flag(a) ((a) | inherited_flag_bit) +# define add_constant_flag(a) ((a) | constant_flag_bit) # define remove_flag(a,b) ((a) & ~(b)) @@ -1220,6 +1222,7 @@ typedef enum flag_bit { # define is_conditional(a) (((a) & conditional_flag_bit) == conditional_flag_bit) # define is_value(a) (((a) & value_flag_bit) == value_flag_bit) # define is_inherited(a) (((a) & inherited_flag_bit) == inherited_flag_bit) +# define is_constant(a) (((a) & constant_flag_bit) == constant_flag_bit) # define is_expandable(cmd) (cmd > max_command_cmd) diff --git a/source/luametatex/source/tex/texexpand.c b/source/luametatex/source/tex/texexpand.c index 8a2fa79a0..f257f8b0f 100644 --- a/source/luametatex/source/tex/texexpand.c +++ b/source/luametatex/source/tex/texexpand.c @@ -88,7 +88,8 @@ inline static void tex_aux_expand_after(void) if (cur_cmd > max_command_cmd) { tex_expand_current_token(); } else { - tex_back_input(t2); + tex_back_input(t2); + /* token_link(t1) = t2; */ /* no gain, rarely happens */ } tex_back_input(t1); } @@ -615,7 +616,6 @@ inline static int tex_aux_uni_to_buffer(unsigned char *b, int m, int c) much sense. It also long token lists that never (should) match anyway. */ - static int tex_aux_collect_cs_tokens(halfword *p, int *n) { while (1) { @@ -650,7 +650,17 @@ static int tex_aux_collect_cs_tokens(halfword *p, int *n) */ case call_cmd: case tolerant_call_cmd: - tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); + if (get_token_reference(cur_chr) == max_token_reference) { // ! get_token_parameters(cur_chr)) { + /* we avoid the macro stack and expansion and we don't trace either */ + halfword h = token_link(cur_chr); + while (h) { + *p = tex_store_new_token(*p, token_info(h)); + *n += 1; + h = token_link(h); + } + } else { + tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); + } break; case end_cs_name_cmd: return 1; @@ -677,7 +687,7 @@ int tex_is_valid_csname(void) tex_get_x_or_protected(); /* we skip unprotected ! */ } while (cur_cmd != end_cs_name_cmd); goto FINISH; - /* no real gain: */ + /* no real gain as we hardly ever end up here */ // while (1) { // tex_get_token(); // if (cur_cmd == end_cs_name_cmd) { @@ -941,6 +951,16 @@ int tex_get_parameter_count(void) return n; } +/*tex + We can avoid the copy of parameters to the stack but it complicates the code because we also need + to clean up the previous set of parameters etc. It's not worth the effort. However, there are + plenty of optimizations compared to the original. Some are measurable on an average run, others + are more likely to increase performance when thousands of successive runs happen in e.g. a virtual + environment where threads fight for memory access and cpu cache. And because \CONTEXT\ is us used + that way we keep looking into ways to gain performance, but not at the cost of dirty hacks (that + I tried out of curiosity but rejected in the end). +*/ + static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr) { int tracing = tracing_macros_par > 0; @@ -955,7 +975,7 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr) if (is_untraced(eq_flag(cs))) { tracing = 0; } else { - if (! get_token_parameters(chr)) { + if (! get_token_preamble(chr)) { tex_print_str("->"); } else { /* maybe move the preamble scanner to here */ @@ -964,14 +984,14 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr) } tex_end_diagnostic(); } - if (get_token_parameters(chr)) { + if (get_token_preamble(chr)) { halfword matchpointer = token_link(chr); halfword matchtoken = token_info(matchpointer); int save_scanner_status = lmt_input_state.scanner_status; halfword save_warning_index = lmt_input_state.warning_index; int nofscanned = 0; int nofarguments = 0; - halfword pstack[9]; /* We could go for 15 if we accept |#A-#F|. */ + halfword pstack[max_match_count]; /*tex Scan the parameters and make |link(r)| point to the macro body; but |return| if an illegal |\par| is detected. @@ -1334,7 +1354,7 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr) ++nofscanned; if (tracing) { tex_begin_diagnostic(); - tex_print_format("%c%i<-", match_visualizer, nofscanned); + tex_print_format("%c%c<-", match_visualizer, '0' + nofscanned + (nofscanned > 9 ? gap_match_count : 0)); tex_show_token_list(pstack[nofscanned - 1], null, default_token_show_max, 0); tex_end_diagnostic(); } diff --git a/source/luametatex/source/tex/texinputstack.c b/source/luametatex/source/tex/texinputstack.c index e73451226..52262e486 100644 --- a/source/luametatex/source/tex/texinputstack.c +++ b/source/luametatex/source/tex/texinputstack.c @@ -62,9 +62,15 @@ input_file_state_info input_file_state = { .line = 0, }; -#define reserved_input_stack_slots 2 -#define reserved_in_stack_slots 2 -#define reserved_param_stack_slots 10 /*tex We play safe and always keep 10 in reserve (we have 9 max anyway). */ +/*tex + We play safe and always keep a few batches of parameter slots in reserve so that we + are unlikely to overrun. +*/ + +# define reserved_input_stack_slots 2 +# define reserved_in_stack_slots 2 +//define reserved_param_stack_slots 32 +# define reserved_param_stack_slots (2 * max_match_count) void tex_initialize_input_state(void) { @@ -793,7 +799,7 @@ void tex_end_token_list(void) case macro_text: { tex_delete_token_reference(lmt_input_state.cur_input.start); - if (get_token_parameters(lmt_input_state.cur_input.start)) { + if (get_token_preamble(lmt_input_state.cur_input.start)) { /*tex Parameters must be flushed: */ int ptr = lmt_input_state.parameter_stack_data.ptr; int start = lmt_input_state.cur_input.parameter_start; @@ -850,10 +856,17 @@ void tex_cleanup_input_state(void) ptr = lmt_input_state.parameter_stack_data.ptr; start = lmt_input_state.cur_input.parameter_start; while (ptr > start) { - --ptr; - if (lmt_input_state.parameter_stack[ptr]) { + if (lmt_input_state.parameter_stack[--ptr]) { tex_flush_token_list(lmt_input_state.parameter_stack[ptr]); } + // halfword p = lmt_input_state.parameter_stack[--ptr]; + // if (p) { + // if (! token_link(p)) { + // tex_put_available_token(p); /* very little gain on average */ + // } else { + // tex_flush_token_list(p); + // } + // } } lmt_input_state.parameter_stack_data.ptr = start; break; diff --git a/source/luametatex/source/tex/texlanguage.c b/source/luametatex/source/tex/texlanguage.c index 0fcd3b243..200ffbd1e 100644 --- a/source/luametatex/source/tex/texlanguage.c +++ b/source/luametatex/source/tex/texlanguage.c @@ -1279,8 +1279,9 @@ static int tex_aux_still_okay(halfword f, halfword l, halfword r, int n, const c tex_normal_warning("language", "the hyphenated word contains non-glyphs, skipping"); return 0; } else { - halfword c = (halfword) aux_str2uni((const unsigned char *) utf8original); - utf8original += utf8_size(c); + int cl; + halfword c = (halfword) aux_str2uni_len((const unsigned char *) utf8original, &cl); + utf8original += cl; if (! (c && c == glyph_character(f))) { tex_normal_warning("language", "the hyphenated word contains different characters, skipping"); return 0; diff --git a/source/luametatex/source/tex/texmaincontrol.c b/source/luametatex/source/tex/texmaincontrol.c index 24729d8cb..dbb52ab15 100644 --- a/source/luametatex/source/tex/texmaincontrol.c +++ b/source/luametatex/source/tex/texmaincontrol.c @@ -814,10 +814,10 @@ typedef enum saved_localbox_items { static void tex_aux_scan_local_box(int code) { quarterword options = 0; - halfword class = 0; - tex_scan_local_boxes_keys(&options, &class); + halfword index = 0; + tex_scan_local_boxes_keys(&options, &index); tex_set_saved_record(saved_localbox_item_location, local_box_location_save_type, 0, code); - tex_set_saved_record(saved_localbox_item_index, local_box_index_save_type, 0, class); + tex_set_saved_record(saved_localbox_item_index, local_box_index_save_type, 0, index); tex_set_saved_record(saved_localbox_item_options, local_box_options_save_type, 0, options); lmt_save_state.save_stack_data.ptr += saved_localbox_n_of_items; tex_new_save_level(local_box_group); @@ -894,23 +894,6 @@ static void tex_aux_finish_local_box(void) } } -// static void tex_aux_run_leader(void) { -// switch (cur_chr) { -// case a_leaders_code: -// tex_aux_scan_box(a_leaders_flag, 0, 0); -// break; -// case c_leaders_code: -// tex_aux_scan_box(c_leaders_flag, 0, 0); -// break; -// case x_leaders_code: -// tex_aux_scan_box(x_leaders_flag, 0, 0); -// break; -// case g_leaders_code: -// tex_aux_scan_box(g_leaders_flag, 0, 0); -// break; -// } -// } - static int leader_flags[] = { a_leaders_flag, c_leaders_flag, @@ -1244,6 +1227,10 @@ static void tex_aux_run_text_boundary(void) { case protrusion_boundary: boundary_data(n) = tex_scan_int(0, NULL); break; + case page_boundary: + /* or maybe force vmode */ + tex_scan_int(0, NULL); + break; default: break; } @@ -1260,6 +1247,7 @@ static void tex_aux_run_math_boundary(void) { break; } case protrusion_boundary: + case page_boundary: tex_scan_int(0, NULL); break; } @@ -4479,7 +4467,7 @@ static void tex_aux_set_define_font(int a) static void tex_aux_set_def(int a, int force) { - halfword expand = 0; + int expand = 0; switch (cur_chr) { case expanded_def_code: expand = 1; @@ -4505,6 +4493,15 @@ static void tex_aux_set_def(int a, int force) cur_cs = tex_create_csname(); a = add_global_flag(a); goto DONE; + case constant_def_code: + expand = 2; + a = add_constant_flag(a); + break; + case constant_def_csname_code: + expand = 2; + cur_cs = tex_create_csname(); + a = add_constant_flag(a); + goto DONE; } tex_get_r_token(); DONE: @@ -4513,7 +4510,13 @@ static void tex_aux_set_def(int a, int force) } if (force || tex_define_permitted(cur_cs, a)) { halfword p = cur_cs; - halfword t = expand ? tex_scan_macro_expand() : tex_scan_macro_normal(); + halfword t = expand == 2 ? tex_scan_toks_expand(0, null, 1) : (expand ? tex_scan_macro_expand() : tex_scan_macro_normal()); + if (is_constant(a)) { + /* todo: check if already defined or just accept a leak */ + set_token_reference(t, max_token_reference); + } else if (! token_link(t)) { + t = lmt_token_state.empty; /* maybe in tex_define */ + } tex_define(a, p, tex_flags_to_cmd(a), t); } } @@ -4674,7 +4677,14 @@ static void tex_aux_set_let(int a, int force) a = add_global_flag(a); } if (force || tex_define_permitted(cur_cs, a)) { - tex_define(a, cur_cs, tex_flags_to_cmd(a), get_reference_token()); + /*tex + The commented line permits plenty empty definitions, a |\let| can run out of + ref count so maybe some day \unknown + */ + // halfword empty = get_reference_token(); + halfword empty = lmt_token_state.empty; + // tex_add_token_reference(empty); + tex_define(a, cur_cs, tex_flags_to_cmd(a), empty); } return; default: @@ -4711,7 +4721,7 @@ static void tex_aux_set_let(int a, int force) } tex_define_inherit(a, p, (singleword) newf, (singleword) cmd, cur_chr); } else { - tex_define(a, p, (singleword) cur_cmd, cur_chr); + tex_define(a, p, (singleword) cur_cmd, cur_chr); } } @@ -4929,18 +4939,18 @@ static void tex_aux_set_math_parameter(int a) case math_parameter_let_spacing: case math_parameter_let_atom_rule: { - halfword class = tex_scan_math_class_number(0); + halfword mathclass = tex_scan_math_class_number(0); halfword display = tex_scan_math_class_number(1); halfword text = tex_scan_math_class_number(0); halfword script = tex_scan_math_class_number(0); halfword scriptscript = tex_scan_math_class_number(0); - if (valid_math_class_code(class)) { + if (valid_math_class_code(mathclass)) { switch (code) { case math_parameter_let_spacing: - code = internal_int_location(first_math_class_code + class); + code = internal_int_location(first_math_class_code + mathclass); break; case math_parameter_let_atom_rule: - code = internal_int_location(first_math_atom_code + class); + code = internal_int_location(first_math_atom_code + mathclass); break; } value = (display << 24) + (text << 16) + (script << 8) + scriptscript; @@ -4959,20 +4969,20 @@ static void tex_aux_set_math_parameter(int a) case math_parameter_copy_atom_rule: case math_parameter_copy_parent: { - halfword class = tex_scan_math_class_number(0); + halfword mathclass = tex_scan_math_class_number(0); halfword parent = tex_scan_math_class_number(1); - if (valid_math_class_code(class) && valid_math_class_code(parent)) { + if (valid_math_class_code(mathclass) && valid_math_class_code(parent)) { switch (code) { case math_parameter_copy_spacing: - code = internal_int_location(first_math_class_code + class); + code = internal_int_location(first_math_class_code + mathclass); value = count_parameter(first_math_class_code + parent); break; case math_parameter_copy_atom_rule: - code = internal_int_location(first_math_atom_code + class); + code = internal_int_location(first_math_atom_code + mathclass); value = count_parameter(first_math_atom_code + parent); break; case math_parameter_copy_parent: - code = internal_int_location(first_math_parent_code + class); + code = internal_int_location(first_math_parent_code + mathclass); value = count_parameter(first_math_parent_code + parent); break; } @@ -4991,21 +5001,21 @@ static void tex_aux_set_math_parameter(int a) case math_parameter_set_display_pre_penalty: case math_parameter_set_display_post_penalty: { - halfword class = tex_scan_math_class_number(0); + halfword mathclass = tex_scan_math_class_number(0); halfword penalty = tex_scan_int(1, NULL); - if (valid_math_class_code(class)) { + if (valid_math_class_code(mathclass)) { switch (code) { case math_parameter_set_pre_penalty: - code = internal_int_location(first_math_pre_penalty_code + class); + code = internal_int_location(first_math_pre_penalty_code + mathclass); break; case math_parameter_set_post_penalty: - code = internal_int_location(first_math_post_penalty_code + class); + code = internal_int_location(first_math_post_penalty_code + mathclass); break; case math_parameter_set_display_pre_penalty: - code = internal_int_location(first_math_display_pre_penalty_code + class); + code = internal_int_location(first_math_display_pre_penalty_code + mathclass); break; case math_parameter_set_display_post_penalty: - code = internal_int_location(first_math_display_post_penalty_code + class); + code = internal_int_location(first_math_display_post_penalty_code + mathclass); break; } tex_word_define(a, code, penalty); @@ -5021,13 +5031,13 @@ static void tex_aux_set_math_parameter(int a) } case math_parameter_let_parent: { - halfword class = tex_scan_math_class_number(0); + halfword mathclass = tex_scan_math_class_number(0); halfword pre = tex_scan_math_class_number(1); halfword post = tex_scan_math_class_number(0); halfword options = tex_scan_math_class_number(0); halfword reserved = tex_scan_math_class_number(0); - if (valid_math_class_code(class)) { - code = internal_int_location(first_math_parent_code + class); + if (valid_math_class_code(mathclass)) { + code = internal_int_location(first_math_parent_code + mathclass); value = (reserved << 24) + (options << 16) + (pre << 8) + post; tex_word_define(a, code, value); // tex_assign_internal_int_value(a, code, value); @@ -5052,9 +5062,9 @@ static void tex_aux_set_math_parameter(int a) } case math_parameter_options: { - halfword class = tex_scan_math_class_number(0); - if (valid_math_class_code(class)) { - code = internal_int_location(first_math_options_code + class); + halfword mathclass = tex_scan_math_class_number(0); + if (valid_math_class_code(mathclass)) { + code = internal_int_location(first_math_options_code + mathclass); value = tex_scan_int(1, NULL); tex_word_define(a, code, value); // tex_assign_internal_int_value(a, code, value); @@ -5409,6 +5419,7 @@ void tex_run_prefixed_command(void) case always_code: flags = add_aliased_flag (flags); force = 1; break; /*tex This one is special */ case inherited_code: flags = add_inherited_flag (flags); break; + case constant_code: flags = add_constant_flag (flags); break; default: goto PICKUP; } @@ -5956,7 +5967,7 @@ static void tex_aux_run_message(void) strnumber s = tex_aux_scan_string(); if (error_help_par) { strnumber helpinfo = tex_tokens_to_string(error_help_par); - char *h = tex_to_cstring(helpinfo); + const char *h = tex_to_cstring(helpinfo); tex_handle_error( normal_error_type, "%T", diff --git a/source/luametatex/source/tex/texmarks.c b/source/luametatex/source/tex/texmarks.c index 01e002fbd..c967beb4b 100644 --- a/source/luametatex/source/tex/texmarks.c +++ b/source/luametatex/source/tex/texmarks.c @@ -21,8 +21,6 @@ Watch out: zero is always valid and the good old single mark! - Todo: class -> index - */ mark_state_info lmt_mark_state = { @@ -115,23 +113,23 @@ int tex_valid_mark(halfword m) { return m < lmt_mark_state.mark_data.top; } -halfword tex_new_mark(quarterword subtype, halfword class, halfword ptr) +halfword tex_new_mark(quarterword subtype, halfword index, halfword ptr) { halfword mark = tex_new_node(mark_node, subtype); - mark_index(mark) = class; + mark_index(mark) = index; mark_ptr(mark) = ptr; if (lmt_mark_state.min_used < 0) { - lmt_mark_state.min_used = class; - lmt_mark_state.max_used = class; + lmt_mark_state.min_used = index; + lmt_mark_state.max_used = index; } else { - if (class < lmt_mark_state.min_used) { - lmt_mark_state.min_used = class; + if (index < lmt_mark_state.min_used) { + lmt_mark_state.min_used = index; } - if (class > lmt_mark_state.max_used) { - lmt_mark_state.max_used = class; + if (index > lmt_mark_state.max_used) { + lmt_mark_state.max_used = index; } } - tex_set_mark(class, current_marks_code, ptr); + tex_set_mark(index, current_marks_code, ptr); return mark; } @@ -315,16 +313,16 @@ int tex_has_mark(halfword m) void tex_run_mark(void) { - halfword class = 0; + halfword index = 0; halfword code = cur_chr; switch (code) { case set_marks_code: case clear_marks_code: case flush_marks_code: - class = tex_scan_mark_number(); + index = tex_scan_mark_number(); break; } - if (tex_valid_mark(class)) { + if (tex_valid_mark(index)) { quarterword subtype = set_mark_value_code; halfword ptr = null; switch (code) { @@ -333,13 +331,13 @@ void tex_run_mark(void) ptr = tex_scan_toks_expand(0, NULL, 0); break; case clear_marks_code: - tex_wipe_mark(class); + tex_wipe_mark(index); return; case flush_marks_code: subtype = reset_mark_value_code; break; } - tex_tail_append(tex_new_mark(subtype, class, ptr)); + tex_tail_append(tex_new_mark(subtype, index, ptr)); } else { /* error already issued */ } diff --git a/source/luametatex/source/tex/texmarks.h b/source/luametatex/source/tex/texmarks.h index e787fa9d0..9ce819f07 100644 --- a/source/luametatex/source/tex/texmarks.h +++ b/source/luametatex/source/tex/texmarks.h @@ -50,7 +50,7 @@ extern void tex_reset_mark (halfword m); extern void tex_wipe_mark (halfword m); extern void tex_delete_mark (halfword m, int what); extern halfword tex_get_some_mark (halfword chr, halfword val); -extern halfword tex_new_mark (quarterword subtype, halfword cls, halfword ptr); +extern halfword tex_new_mark (quarterword subtype, halfword index, halfword ptr); extern void tex_update_top_marks (void); extern void tex_update_first_and_bot_mark (halfword m); extern void tex_update_first_marks (void); diff --git a/source/luametatex/source/tex/texmath.c b/source/luametatex/source/tex/texmath.c index 327e8e6a3..216ba553b 100644 --- a/source/luametatex/source/tex/texmath.c +++ b/source/luametatex/source/tex/texmath.c @@ -1929,9 +1929,9 @@ static void tex_aux_append_math_accent(mathcodeval mval, mathdictval dval) */ -static void tex_aux_append_math_fence(halfword fence, quarterword class) +static void tex_aux_append_math_fence(halfword fence, quarterword mathclass) { - switch (class) { + switch (mathclass) { case open_noad_subtype: { tex_aux_push_math(math_fence_group, cur_list.math_style); @@ -1968,7 +1968,7 @@ static void tex_aux_append_math_fence(halfword fence, quarterword class) } } -static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, quarterword class) +static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, quarterword mathclass) { halfword fence = tex_new_node(fence_noad, middle_fence_side); halfword delimiter = tex_new_node(delimiter_node, mval.class_value); @@ -1981,10 +1981,10 @@ static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, qu set_noad_classes(fence, mval.class_value); /* todo : share the next three with the regular fences */ noad_options(fence) |= noad_option_no_check; - if (class == middle_noad_subtype && cur_group != math_fence_group) { + if (mathclass == middle_noad_subtype && cur_group != math_fence_group) { tex_aux_append_math_fence_val(tex_no_math_code(), tex_no_dict_code(), open_noad_subtype); } - tex_aux_append_math_fence(fence, class); + tex_aux_append_math_fence(fence, mathclass); } static void tex_aux_append_math_char(mathcodeval mval, mathdictval dval, int automatic) @@ -2147,9 +2147,9 @@ int tex_scan_math_code_val(halfword code, mathcodeval *mval, mathdictval *dval) case math_class_number_code: { halfword family = cur_fam_par; - halfword class = tex_scan_int(0, NULL); + halfword mathclass = tex_scan_int(0, NULL); tex_scan_math_cmd_val(mval, dval); - mval->class_value = (short) class; + mval->class_value = (short) mathclass; mval->family_value = (short) family; } break; @@ -2518,7 +2518,7 @@ void tex_run_math_modifier(void) */ -static void tex_aux_scan_delimiter(halfword target, int code, int class) +static void tex_aux_scan_delimiter(halfword target, int code, int mathclass) { delcodeval dval = tex_no_del_code(); mathcodeval mval = tex_no_math_code(); @@ -2584,8 +2584,8 @@ static void tex_aux_scan_delimiter(halfword target, int code, int class) goto REALDELIMITER; } FAKEDELIMITER: - if (class != unset_noad_class) { - mval.class_value = (short) class; + if (mathclass != unset_noad_class) { + mval.class_value = (short) mathclass; } dval.small = mval; dval.large = mval; @@ -3451,7 +3451,7 @@ void tex_run_math_fraction(void) halfword userstyle = -1; halfword attrlist = null; fullword options = 0; - halfword class = fraction_noad_subtype; + halfword mathclass = fraction_noad_subtype; halfword rulethickness = preset_rule_thickness; int ruledone = 0; fraction_h_factor(fraction) = 1000; @@ -3581,7 +3581,7 @@ void tex_run_math_fraction(void) if (tex_scan_mandate_keyword("class", 1)) { halfword c = (quarterword) tex_scan_math_class_number(0); if (valid_math_class_code(c)) { - class = c; + mathclass = c; } } break; @@ -3673,7 +3673,7 @@ void tex_run_math_fraction(void) } fraction_rule_thickness(fraction) = rulethickness; noad_options(fraction) = options; - set_noad_main_class(fraction, class); + set_noad_main_class(fraction, mathclass); if (attrlist) { tex_attach_attribute_list_attribute(fraction, attrlist); } @@ -5183,16 +5183,16 @@ void tex_reset_all_styles(halfword level) } } -inline static halfword tex_aux_math_class_default(halfword class) { - return (class << 24) + (class << 16) + (class << 8) + class; +inline static halfword tex_aux_math_class_default(halfword mathclass) { + return (mathclass << 24) + (mathclass << 16) + (mathclass << 8) + mathclass; } -inline static void tex_set_math_class_default(halfword class, halfword parent, halfword options) +inline static void tex_set_math_class_default(halfword mathclass, halfword parent, halfword options) { - tex_word_define(0, internal_int_location(first_math_class_code + class), tex_aux_math_class_default(parent)); - tex_word_define(0, internal_int_location(first_math_atom_code + class), tex_aux_math_class_default(class)); - tex_word_define(0, internal_int_location(first_math_options_code + class), options); - tex_word_define(0, internal_int_location(first_math_parent_code + class), tex_aux_math_class_default(class)); + tex_word_define(0, internal_int_location(first_math_class_code + mathclass), tex_aux_math_class_default(parent)); + tex_word_define(0, internal_int_location(first_math_atom_code + mathclass), tex_aux_math_class_default(mathclass)); + tex_word_define(0, internal_int_location(first_math_options_code + mathclass), options); + tex_word_define(0, internal_int_location(first_math_parent_code + mathclass), tex_aux_math_class_default(mathclass)); } static void tex_aux_set_math_atom_rule(halfword left, halfword right, halfword newleft, halfword newright) @@ -5203,13 +5203,13 @@ static void tex_aux_set_math_atom_rule(halfword left, halfword right, halfword n void tex_initialize_math_spacing(void) { - for (int class = 0; class <= max_math_class_code; class++) { - tex_set_math_class_default(class, class, no_class_options); + for (int mathclass = 0; mathclass <= max_math_class_code; mathclass++) { + tex_set_math_class_default(mathclass, mathclass, no_class_options); /*tex We do this here as there is no real need for yet another initializer. */ - tex_word_define(0, internal_int_location(first_math_pre_penalty_code + class), infinite_penalty); - tex_word_define(0, internal_int_location(first_math_post_penalty_code + class), infinite_penalty); - tex_word_define(0, internal_int_location(first_math_display_pre_penalty_code + class), infinite_penalty); - tex_word_define(0, internal_int_location(first_math_display_post_penalty_code + class), infinite_penalty); + tex_word_define(0, internal_int_location(first_math_pre_penalty_code + mathclass), infinite_penalty); + tex_word_define(0, internal_int_location(first_math_post_penalty_code + mathclass), infinite_penalty); + tex_word_define(0, internal_int_location(first_math_display_pre_penalty_code + mathclass), infinite_penalty); + tex_word_define(0, internal_int_location(first_math_display_post_penalty_code + mathclass), infinite_penalty); } tex_reset_all_styles(level_one); diff --git a/source/luametatex/source/tex/texmlist.c b/source/luametatex/source/tex/texmlist.c index 1d4cbacd8..b9453875e 100644 --- a/source/luametatex/source/tex/texmlist.c +++ b/source/luametatex/source/tex/texmlist.c @@ -477,11 +477,11 @@ static void tex_aux_trace_kerns(halfword kern, const char *what, const char *det } } -static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, halfword template, const char *trace) +static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, halfword attributetemplate, const char *trace) { /*tex Maybe |math_font_kern|, also to prevent expansion. */ halfword kern = tex_new_kern_node(amount, font_kern_subtype); - tex_attach_attribute_list_copy(kern, template ? template : current); + tex_attach_attribute_list_copy(kern, attributetemplate ? attributetemplate : current); if (node_next(current)) { tex_couple_nodes(kern, node_next(current)); } @@ -490,11 +490,11 @@ static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, h return kern; } -static halfword tex_aux_math_insert_italic_kern(halfword current, scaled amount, halfword template, const char *trace) +static halfword tex_aux_math_insert_italic_kern(halfword current, scaled amount, halfword attributetemplate, const char *trace) { /*tex Maybe |math_italic_kern|. */ halfword kern = tex_new_kern_node(amount, italic_kern_subtype); - tex_attach_attribute_list_copy(kern, template ? template : current); + tex_attach_attribute_list_copy(kern, attributetemplate ? attributetemplate : current); if (node_next(current)) { tex_couple_nodes(kern, node_next(current)); } @@ -1666,7 +1666,7 @@ inline static void tex_aux_calculate_glue(scaled m, scaled *f, scaled *n) /*tex integer part of |m| */ *n = tex_x_over_n_r(m, unity, f); /*tex the new glue specification */ - if (f < 0) { + if (*f < 0) { --n; f += unity; } @@ -5540,9 +5540,9 @@ if (! stack && has_noad_option_exact(target)) { } } -inline static int tex_aux_fallback_math_spacing_class(halfword style, halfword class) +inline static int tex_aux_fallback_math_spacing_class(halfword style, halfword mathclass) { - unsigned parent = (unsigned) count_parameter(first_math_class_code + class); + unsigned parent = (unsigned) count_parameter(first_math_class_code + mathclass); switch (style) { case display_style: case cramped_display_style: return (parent >> 24) & 0xFF; case text_style: case cramped_text_style: return (parent >> 16) & 0xFF; @@ -5673,9 +5673,9 @@ static halfword tex_aux_math_spacing_glue(halfword ltype, halfword rtype, halfwo } } -inline static int tex_aux_fallback_math_ruling_class(halfword style, halfword class) +inline static int tex_aux_fallback_math_ruling_class(halfword style, halfword mathclass) { - unsigned parent = (unsigned) count_parameter(first_math_atom_code + class); + unsigned parent = (unsigned) count_parameter(first_math_atom_code + mathclass); switch (style) { case display_style: case cramped_display_style: return (parent >> 24) & 0xFF; case text_style: case cramped_text_style: return (parent >> 16) & 0xFF; @@ -6838,6 +6838,8 @@ static void tex_mlist_to_hlist_finalize_list(mliststate *state) Apply some logic. The hard coded pairwise comparison is replaced by a generic one because we can have more classes. For a while spacing and pairing was under a mode control but that made no sense. We start with the begin class. + + Setting |state->beginclass| still fragile ... todo. */ recent_class_overload = get_noad_right_class(current); if (current_type == simple_noad && state->beginclass == unset_noad_class) { @@ -6853,6 +6855,9 @@ static void tex_mlist_to_hlist_finalize_list(mliststate *state) current = node_next(current); goto WIPE; } + if (recent_subtype == math_begin_class) { + state->beginclass = current_subtype; + } /*tex This is a special case where a sign starts something marked as (like) numeric, in which there will be different spacing applied. diff --git a/source/luametatex/source/tex/texprinting.c b/source/luametatex/source/tex/texprinting.c index bb021047e..86fa47e28 100644 --- a/source/luametatex/source/tex/texprinting.c +++ b/source/luametatex/source/tex/texprinting.c @@ -352,7 +352,7 @@ void tex_print_str(const char *s) lmt_string_to_buffer(s); return; default: - break; + return; } if (terminal || logfile) { int len = (int) strlen(s); diff --git a/source/luametatex/source/tex/texscanning.c b/source/luametatex/source/tex/texscanning.c index 15e887a71..e4354bba1 100644 --- a/source/luametatex/source/tex/texscanning.c +++ b/source/luametatex/source/tex/texscanning.c @@ -225,12 +225,12 @@ inline static void tex_aux_downgrade_cur_val(int level, int succeeded, int negat static void tex_aux_set_cur_val_by_lua_value_cmd(halfword index, halfword property) { - int class = lua_value_none_code; + int category = lua_value_none_code; halfword value = 0; /* can also be scaled */ strnumber u = tex_save_cur_string(); lmt_token_state.luacstrings = 0; - class = lmt_function_call_by_class(index, property, &value); - switch (class) { + category = lmt_function_call_by_category(index, property, &value); + switch (category) { case lua_value_none_code: cur_val_level = no_val_level; break; @@ -1380,20 +1380,20 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int case math_parameter_set_display_pre_penalty: case math_parameter_set_display_post_penalty: { - halfword class = tex_scan_math_class_number(0); - if (valid_math_class_code(class)) { + halfword mathclass = tex_scan_math_class_number(0); + if (valid_math_class_code(mathclass)) { switch (chr) { case math_parameter_set_pre_penalty: - cur_val = count_parameter(first_math_pre_penalty_code + class); + cur_val = count_parameter(first_math_pre_penalty_code + mathclass); break; case math_parameter_set_post_penalty: - cur_val = count_parameter(first_math_post_penalty_code + class); + cur_val = count_parameter(first_math_post_penalty_code + mathclass); break; case math_parameter_set_display_pre_penalty: - cur_val = count_parameter(first_math_display_pre_penalty_code + class); + cur_val = count_parameter(first_math_display_pre_penalty_code + mathclass); break; case math_parameter_set_display_post_penalty: - cur_val = count_parameter(first_math_display_post_penalty_code + class); + cur_val = count_parameter(first_math_display_post_penalty_code + mathclass); break; } } else { @@ -1411,9 +1411,9 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int } case math_parameter_options: { - halfword class = tex_scan_math_class_number(0); - if (valid_math_class_code(class)) { - cur_val = count_parameter(first_math_options_code + class); + halfword mathclass = tex_scan_math_class_number(0); + if (valid_math_class_code(mathclass)) { + cur_val = count_parameter(first_math_options_code + mathclass); } else { cur_val = 0; } @@ -1890,6 +1890,20 @@ static void tex_aux_improper_constant_error(void) */ + +static void tex_aux_scan_int_no_number() +{ + /*tex Express astonishment that no number was here. Mo longer a goto because g++ doesn't like it. */ + if (lmt_error_state.intercept) { + lmt_error_state.last_intercept = 1 ; + if (cur_cmd != spacer_cmd) { + tex_back_input(cur_tok); + } + } else { + tex_aux_missing_number_error(); + } +} + halfword tex_scan_int(int optional_equal, int *radix) { int negative = 0; @@ -1959,7 +1973,7 @@ halfword tex_scan_int(int optional_equal, int *radix) result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0); if (cur_val_level != int_val_level) { result = 0; - goto NONUMBER; + tex_aux_scan_int_no_number(); } } else if (cur_cmd == math_style_cmd) { /* A pity that we need to check this way in |scan_int|. */ @@ -1970,7 +1984,7 @@ halfword tex_scan_int(int optional_equal, int *radix) result = cur_chr; } else { result = 0; - goto NONUMBER; + tex_aux_scan_int_no_number(); } } else { /*tex has an error message been issued? */ @@ -1997,7 +2011,7 @@ halfword tex_scan_int(int optional_equal, int *radix) if (ok_so_far) { result = result * 8 + d; if (result > max_integer) { - result = infinity; + result = max_integer; if (lmt_error_state.intercept) { vacuous = 1; goto DONE; @@ -2031,7 +2045,7 @@ halfword tex_scan_int(int optional_equal, int *radix) if (ok_so_far) { result = result * 16 + d; if (result > max_integer) { - result = infinity; + result = max_integer; if (lmt_error_state.intercept) { vacuous = 1; goto DONE; @@ -2060,7 +2074,7 @@ halfword tex_scan_int(int optional_equal, int *radix) if (ok_so_far) { result = result * 10 + d; if (result > max_integer) { - result = infinity; + result = max_integer; if (lmt_error_state.intercept) { vacuous = 1; goto DONE; @@ -2077,16 +2091,7 @@ halfword tex_scan_int(int optional_equal, int *radix) } DONE: if (vacuous) { - NONUMBER: - /*tex Express astonishment that no number was here */ - if (lmt_error_state.intercept) { - lmt_error_state.last_intercept = 1 ; - if (cur_cmd != spacer_cmd) { - tex_back_input(cur_tok); - } - } else { - tex_aux_missing_number_error(); - } + tex_aux_scan_int_no_number(); } else { tex_push_back(cur_tok, cur_cmd, cur_chr); } @@ -3289,7 +3294,7 @@ halfword tex_scan_font_identifier(halfword *spec) if (tex_is_valid_font(fnt)) { return fnt; } else { - goto BAD; + break; /* to error */ } } case internal_int_cmd: @@ -3301,7 +3306,7 @@ halfword tex_scan_font_identifier(halfword *spec) return fnt; } } - goto BAD; + break; /* to error */ } default: { @@ -3312,19 +3317,17 @@ halfword tex_scan_font_identifier(halfword *spec) if (tex_is_valid_font((halfword) fnt)) { return (halfword) fnt; } - } else { - /*tex Fall through to a font error message. */ } - BAD: - tex_handle_error( - back_error_type, - "Missing or invalid font identifier (or equivalent) or integer (register or otherwise)", - "I was looking for a control sequence whose current meaning has been defined by\n" - "\\font or a valid font id number." - ); - return null_font; + break; /* to error */ } } + tex_handle_error( + back_error_type, + "Missing or invalid font identifier (or equivalent) or integer (register or otherwise)", + "I was looking for a control sequence whose current meaning has been defined by\n" + "\\font or a valid font id number." + ); + return null_font; } /*tex @@ -3612,9 +3615,10 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo *hash_brace = cur_tok; *p = tex_store_new_token(*p, cur_tok); *p = tex_store_new_token(*p, end_match_token); - set_token_parameters(h, *counter - zero_token + 1); + set_token_preamble(h, 1); + set_token_parameters(h, *counter - zero_token); return 1; - } else if (*counter == nine_token) { + } else if (*counter == F_token_l) { tex_aux_too_many_parameters_error(); } else { switch (cur_tok) { @@ -3669,7 +3673,13 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo default: ++*counter; if (cur_tok != *counter) { - tex_aux_parameters_order_error(); + if (cur_tok >= A_token_l && cur_tok <= F_token_l) { + *counter += gap_match_count; + cur_tok += match_token - letter_token; + break; + } else { + tex_aux_parameters_order_error(); + } } cur_tok += match_token - other_token; break; @@ -3682,7 +3692,8 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo } if (h != *p) { *p = tex_store_new_token(*p, end_match_token); - set_token_parameters(h, *counter - zero_token + 1); + set_token_preamble(h, 1); + set_token_parameters(h, *counter - zero_token); } if (cur_cmd == right_brace_cmd) { ++lmt_input_state.align_state; @@ -3721,8 +3732,12 @@ halfword tex_scan_macro_normal(void) if (cur_cmd == parameter_cmd) { /*tex Keep the |#|. */ } else if (cur_tok <= zero_token || cur_tok > counter) { - tex_aux_illegal_parameter_in_body_error(); - cur_tok = s; + if (cur_tok >= A_token_l && cur_tok <= F_token_l) { + cur_tok = token_val(parameter_reference_cmd, cur_chr - '0' - gap_match_count); + } else { + tex_aux_illegal_parameter_in_body_error(); + cur_tok = s; + } } else { cur_tok = token_val(parameter_reference_cmd, cur_chr - '0'); } @@ -3799,8 +3814,12 @@ halfword tex_scan_macro_expand(void) if (cur_cmd == parameter_cmd) { /*tex Keep the |#|. */ } else if (cur_tok <= zero_token || cur_tok > counter) { - tex_aux_illegal_parameter_in_body_error(); - cur_tok = s; + if (cur_tok >= A_token_l && cur_tok <= F_token_l) { + cur_tok = token_val(parameter_reference_cmd, cur_chr - '0' - gap_match_count); + } else { + tex_aux_illegal_parameter_in_body_error(); + cur_tok = s; + } } else { cur_tok = token_val(parameter_reference_cmd, cur_chr - '0'); } @@ -4245,7 +4264,7 @@ static void tex_aux_scan_expr(halfword level) switch (level) { case int_val_level: case attr_val_level: - if ((factor > infinity) || (factor < -infinity)) { + if ((factor > max_integer) || (factor < min_integer)) { lmt_scanner_state.arithmic_error = 1; factor = 0; } @@ -4264,7 +4283,7 @@ static void tex_aux_scan_expr(halfword level) } break; default: - if ((state > expression_subtract) && ((factor > infinity) || (factor < -infinity))) { + if ((state > expression_subtract) && ((factor > max_integer) || (factor < min_integer))) { lmt_scanner_state.arithmic_error = 1; factor = 0; } @@ -4326,7 +4345,7 @@ static void tex_aux_scan_expr(halfword level) switch (level) { case int_val_level: case attr_val_level: - term = tex_fract(term, numerator, factor, infinity); + term = tex_fract(term, numerator, factor, max_integer); break; case dimen_val_level: term = tex_fract(term, numerator, factor, max_dimen); @@ -4363,7 +4382,7 @@ static void tex_aux_scan_expr(halfword level) switch (level) { case int_val_level: case attr_val_level: - expression = tex_aux_add_or_sub(expression, term, infinity, result); + expression = tex_aux_add_or_sub(expression, term, max_integer, result); break; case dimen_val_level: expression = tex_aux_add_or_sub(expression, term, max_dimen, result); @@ -4850,7 +4869,7 @@ static halfword tex_scan_bit_int(int *radix) result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0); if (cur_val_level != int_val_level) { result = 0; - goto NONUMBER; + tex_aux_missing_number_error(); } } else if (cur_cmd == math_style_cmd) { result = (cur_chr == yet_unset_math_style) ? tex_scan_math_style_identifier(0, 0) : cur_chr; @@ -4859,7 +4878,7 @@ static halfword tex_scan_bit_int(int *radix) result = cur_chr; } else { result = 0; - goto NONUMBER; + tex_aux_missing_number_error(); } } else { int vacuous = 1; @@ -4882,7 +4901,7 @@ static halfword tex_scan_bit_int(int *radix) if (ok_so_far) { result = result * 8 + d; if (result > max_integer) { - result = infinity; + result = max_integer; tex_aux_number_to_big_error(); ok_so_far = 0; } @@ -4911,7 +4930,7 @@ static halfword tex_scan_bit_int(int *radix) if (ok_so_far) { result = result * 16 + d; if (result > max_integer) { - result = infinity; + result = max_integer; tex_aux_number_to_big_error(); ok_so_far = 0; } @@ -4935,7 +4954,7 @@ static halfword tex_scan_bit_int(int *radix) if (ok_so_far) { result = result * 10 + d; if (result > max_integer) { - result = infinity; + result = max_integer; tex_aux_number_to_big_error(); ok_so_far = 0; } @@ -4947,7 +4966,6 @@ static halfword tex_scan_bit_int(int *radix) } DONE: if (vacuous) { - NONUMBER: tex_aux_missing_number_error(); } else { tex_push_back(cur_tok, cur_cmd, cur_chr); @@ -5562,10 +5580,10 @@ static void tex_aux_scan_expression(int level) break; } } - if (v < -infinity) { - v = -infinity; - } else if (v > infinity) { - v = infinity; + if (v < min_integer) { + v = min_integer; + } else if (v > max_integer) { + v = max_integer; } expression_entry(stack.tail) = v; break; diff --git a/source/luametatex/source/tex/texstringpool.h b/source/luametatex/source/tex/texstringpool.h index a15b9fad5..f053a642a 100644 --- a/source/luametatex/source/tex/texstringpool.h +++ b/source/luametatex/source/tex/texstringpool.h @@ -78,35 +78,33 @@ extern string_pool_info lmt_string_pool_state; inline static void tex_flush_char(void) { --lmt_string_pool_state.string_temp_top; } -extern strnumber tex_make_string (void); -extern strnumber tex_push_string (const unsigned char *s, int l); -extern char *tex_take_string (int *len); -extern int tex_str_eq_buf (strnumber s, int k, int n); -extern int tex_str_eq_str (strnumber s, strnumber t); -extern int tex_str_eq_cstr (strnumber s, const char *, size_t); -extern int tex_get_strings_started (void); -extern void tex_reset_cur_string (void); -/* strnumber tex_search_string (strnumber search); */ -/* int tex_used_strings (void); */ -extern strnumber tex_maketexstring (const char *s); -extern strnumber tex_maketexlstring (const char *s, size_t); -extern void tex_append_char (unsigned char c); -extern void tex_append_string (const unsigned char *s, unsigned l); -extern char *tex_makecstring (int s, int *allocated); -extern char *tex_makeclstring (int s, size_t *len); -extern void tex_dump_string_pool (dumpstream f); -extern void tex_undump_string_pool (dumpstream f); -extern void tex_initialize_string_pool (void); -extern void tex_initialize_string_mem (void); -extern void tex_flush_str (strnumber s); -extern strnumber tex_save_cur_string (void); -extern void tex_restore_cur_string (strnumber u); - -/* void tex_increment_pool_string (int n); */ -/* void tex_decrement_pool_string (int n); */ - -extern void tex_compact_string_pool (void); - -inline static char *tex_to_cstring (int s) { return str_length(s) > 0 ? (char *) str_string(s) : ""; } +extern strnumber tex_make_string (void); +extern strnumber tex_push_string (const unsigned char *s, int l); +extern char *tex_take_string (int *len); +extern int tex_str_eq_buf (strnumber s, int k, int n); +extern int tex_str_eq_str (strnumber s, strnumber t); +extern int tex_str_eq_cstr (strnumber s, const char *, size_t); +extern int tex_get_strings_started (void); +extern void tex_reset_cur_string (void); +/* strnumber tex_search_string (strnumber search); */ +/* int tex_used_strings (void); */ +extern strnumber tex_maketexstring (const char *s); +extern strnumber tex_maketexlstring (const char *s, size_t); +extern void tex_append_char (unsigned char c); +extern void tex_append_string (const unsigned char *s, unsigned l); +extern char *tex_makecstring (int s, int *allocated); +extern char *tex_makeclstring (int s, size_t *len); +extern void tex_dump_string_pool (dumpstream f); +extern void tex_undump_string_pool (dumpstream f); +extern void tex_initialize_string_pool (void); +extern void tex_initialize_string_mem (void); +extern void tex_flush_str (strnumber s); +extern strnumber tex_save_cur_string (void); +extern void tex_restore_cur_string (strnumber u); +extern void tex_compact_string_pool (void); +/* void tex_increment_pool_string (int n); */ +/* void tex_decrement_pool_string (int n); */ + +inline static const char *tex_to_cstring (int s) { return str_length(s) > 0 ? (char *) str_string(s) : ""; } # endif diff --git a/source/luametatex/source/tex/textoken.c b/source/luametatex/source/tex/textoken.c index b46e6de85..f820e51d7 100644 --- a/source/luametatex/source/tex/textoken.c +++ b/source/luametatex/source/tex/textoken.c @@ -92,7 +92,7 @@ token_state_info lmt_token_state = { .buffer = NULL, .bufloc = 0, .bufmax = 0, - .padding = 0, + .empty = null, }; /*tex Some properties are dumped in the format so these are aet already! */ @@ -212,6 +212,7 @@ void tex_compact_tokens(void) } } } + lmt_token_state.empty = mapper[lmt_token_state.empty]; // print(dump_state.format_identifier); tex_print_format("tokenlist compacted from %i to %i entries, ", lmt_token_memory_state.tokens_data.top, nc); if (nofluacmds) { @@ -335,27 +336,41 @@ void tex_add_token_reference(halfword p) { if (get_token_reference(p) < max_token_reference) { add_token_reference(p); - } else { - tex_overflow_error("reference count", max_token_reference); + // } else { + // tex_overflow_error("reference count", max_token_reference); } } void tex_increment_token_reference(halfword p, int n) { if ((get_token_reference(p) + n) < max_token_reference) { - inc_token_reference(p,n); - } else { - tex_overflow_error("reference count", max_token_reference); + inc_token_reference(p, n); + } else { + inc_token_reference(p, max_token_reference - get_token_reference(p)); + // } else { + // tex_overflow_error("reference count", max_token_reference); } } +// void tex_delete_token_reference(halfword p) +// { +// if (p) { +// if (get_token_reference(p)) { +// sub_token_reference(p); +// } else { +// tex_flush_token_list(p); +// } +// } +// } + void tex_delete_token_reference(halfword p) { if (p) { - if (get_token_reference(p)) { - sub_token_reference(p); - } else { + halfword r = get_token_reference(p); + if (! r) { tex_flush_token_list(p); + } if(r < max_token_reference) { + sub_token_reference(p); } } } @@ -465,6 +480,9 @@ void tex_print_meaning(halfword code) tex_print_cs(cur_cs); return; } else { + if (cur_chr && get_token_reference(cur_chr) == max_token_reference) { + tex_print_str("constant "); + } switch (code) { case meaning_code: case meaning_full_code: @@ -477,7 +495,7 @@ void tex_print_meaning(halfword code) tex_print_cs(cur_cs); tex_print_char(' '); if (cur_chr && token_link(cur_chr)) { - halfword body = get_token_parameters(cur_chr) ? tex_show_token_list(token_link(cur_chr), null, default_token_show_max, 1) : token_link(cur_chr); + halfword body = get_token_preamble(cur_chr) ? tex_show_token_list(token_link(cur_chr), null, default_token_show_max, 1) : token_link(cur_chr); tex_print_char('{'); if (body) { tex_show_token_list(body, null, default_token_show_max, 0); @@ -582,7 +600,7 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis) { if (p) { /*tex the highest parameter number, as an \ASCII\ digit */ - unsigned char n = '0'; + unsigned char n = 0; int min = 0; int max = lmt_token_memory_state.tokens_data.top; lmt_print_state.tally = 0; @@ -639,6 +657,8 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis) tex_print_tex_str(match_visualizer); if (chr <= 9) { tex_print_char(chr + '0'); + } else if (chr <= max_match_count) { + tex_print_char(chr + '0' + gap_match_count); } else { tex_print_char('!'); return null; @@ -650,7 +670,7 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis) ++n; } tex_print_char(chr ? chr : '0'); - if (n > '9') { + if (n > max_match_count) { /*tex Can this happen at all? */ return null; } else { @@ -698,8 +718,9 @@ inline static halfword get_unichar_from_buffer(int *b) if (a <= 0x80) { *b += 1; } else { - a = (halfword) aux_str2uni(lmt_fileio_state.io_buffer + *b); - *b += utf8_size(a); + int al; + a = (halfword) aux_str2uni_len(lmt_fileio_state.io_buffer + *b, &al); + *b += al; } return a; } @@ -892,7 +913,7 @@ int tex_scan_optional_keyword(const char *s) /*tex Here we know that the first character(s) matched so we are in the middle of a keyword already - which means a different loop than the previous one. + which means a different loop than the previous one. */ int tex_scan_mandate_keyword(const char *s, int offset) @@ -2111,8 +2132,9 @@ halfword tex_string_to_toks(const char *ss) halfword p = null; /*tex new node being added to the token list via |store_new_token| */ while (s < se) { - halfword t = (halfword) aux_str2uni((const unsigned char *) s); - s += utf8_size(t); + int tl; + halfword t = (halfword) aux_str2uni_len((const unsigned char *) s, &tl); + s += tl; if (t == ' ') { t = space_token; } else { @@ -2148,8 +2170,9 @@ static halfword lmt_str_toks(lstring b) /* returns head */ halfword head = null; halfword tail = head; while (k < (unsigned char *) b.s + b.l) { - halfword t = aux_str2uni(k); - k += utf8_size(t); + int tl; + halfword t = aux_str2uni_len(k, &tl); + k += tl; if (t == ' ') { t = space_token; } else { @@ -2190,14 +2213,14 @@ halfword tex_str_toks(lstring s, halfword *tail) unsigned char *k = s.s; unsigned char *l = k + s.l; while (k < l) { - halfword t = aux_str2uni(k); + int tl; + halfword t = aux_str2uni_len(k, &tl); if (t == ' ') { - k += 1; t = space_token; } else { - k += utf8_size(t); t += other_token; } + k += tl; p = tex_store_new_token(p, t); if (! h) { h = p; @@ -2220,14 +2243,14 @@ halfword tex_cur_str_toks(halfword *tail) /*tex tail of the token list */ while (k < l) { /*tex token being appended */ - halfword t = aux_str2uni(k); + int tl; + halfword t = aux_str2uni_len(k, &tl); if (t == ' ') { - k += 1; t = space_token; } else { - k += utf8_size(t); t += other_token; } + k += tl; p = tex_store_new_token(p, t); if (! h) { h = p; @@ -2261,8 +2284,9 @@ halfword tex_str_scan_toks(int ct, lstring ls) while (k < l) { int cc; /*tex token being appended */ - halfword t = aux_str2uni(k); - k += utf8_size(t); + int lt; + halfword t = aux_str2uni_len(k, <); + k += lt; cc = tex_get_cat_code(ct, t); if (cc == 0) { /*tex We have a potential control sequence so we check for it. */ @@ -2271,8 +2295,7 @@ halfword tex_str_scan_toks(int ct, lstring ls) int c = 0 ; unsigned char *name = k ; while (k < l) { - t = (halfword) aux_str2uni((const unsigned char *) k); - s = utf8_size(t); + t = (halfword) aux_str2uni_len((const unsigned char *) k, &s); c = tex_get_cat_code(ct,t); if (c == 11) { k += s ; @@ -3131,7 +3154,7 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb int p = token_link(pp); if (p) { int e = escape_char_par; /*tex The serialization of the escape, normally a backlash. */ - int n = '0'; /*tex The character after |#|, so |#0| upto |#9| */ + int n = 0; /*tex The character after |#|, so |#0| upto |#9| */ int min = 0; int max = lmt_token_memory_state.tokens_data.top; int skip = 0; @@ -3149,7 +3172,7 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb } lmt_token_state.bufloc = 0; if (skippreamble) { - skip = get_token_parameters(pp); + skip = get_token_preamble(pp); } while (p) { if (p < min || p > max) { @@ -3192,12 +3215,14 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb tex_aux_append_char_to_buffer(match_visualizer); if (chr <= 9) { tex_aux_append_char_to_buffer(chr + '0'); + } else if (chr <= max_match_count) { + tex_aux_append_char_to_buffer(chr + '0' + gap_match_count); } else { - tex_aux_append_char_to_buffer('!'); + tex_aux_append_char_to_buffer('!'); goto EXIT; } } else { - if (chr > 9) { + if (chr > max_match_count) { goto EXIT; } } @@ -3210,9 +3235,14 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb ++n; } if (! skip) { - tex_aux_append_char_to_buffer(chr ? chr : '0'); + // tex_aux_append_char_to_buffer(chr ? chr : '0'); + if (chr <= 9) { + tex_aux_append_char_to_buffer(chr + '0'); + } else if (chr <= max_match_count) { + tex_aux_append_char_to_buffer(chr + '0' + gap_match_count); + } } - if (n > '9') { + if (n > max_match_count) { goto EXIT; } break; @@ -3457,14 +3487,14 @@ halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const const char *se = str + lstr; while (str < se) { /*tex hh: |str2uni| could return len too (also elsewhere) */ - halfword u = (halfword) aux_str2uni((const unsigned char *) str); + int ul; + halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &ul); halfword t = null; halfword cc = tex_get_cat_code(ct, u); - str += utf8_size(u); + str += ul; /*tex - This is a relating simple converter; if more is needed one can just use - |tex.print| with a regular |\def| or |\gdef| and feed the string into the - regular scanner. + This is a relative simple converter; if more is needed one can just use |tex.print| + with a regular |\def| or |\gdef| and feed the string into the regular scanner. */ switch (cc) { case escape_cmd: @@ -3473,8 +3503,8 @@ halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const int lname = 0; const char *name = str; while (str < se) { - halfword u = (halfword) aux_str2uni((const unsigned char *) str); - int s = utf8_size(u); + int s; + halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &s); int c = tex_get_cat_code(ct, u); if (c == letter_cmd) { str += s; diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h index da2d01f7c..68632792b 100644 --- a/source/luametatex/source/tex/textoken.h +++ b/source/luametatex/source/tex/textoken.h @@ -108,32 +108,39 @@ typedef struct token_state_info { char *buffer; int bufloc; int bufmax; - int padding; + int empty; } token_state_info; extern token_state_info lmt_token_state; -// # define max_token_reference 0x7FFF /* we can bump to 0xFFFF when we go unsigned here */ -// -//define token_reference(a) token_memory_state.tokens[a].half1 -// -// #define get_token_parameters(a) lmt_token_memory_state.tokens[a].quart2 -// #define get_token_reference(a) lmt_token_memory_state.tokens[a].quart3 -// -// #define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].quart2 = (b) -// -// #define add_token_reference(a) lmt_token_memory_state.tokens[a].quart3 += 1 -// #define sub_token_reference(a) lmt_token_memory_state.tokens[a].quart3 -= 1 -// #define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 += (quarterword) (b) -// #define dec_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 -= (quarterword) (b) +/*tex + + We now can have 15 paremeters but if needed we can go higher. However, we then also need to + cache more and change the |preamble| and |count| to some funny bit ranges. If needed we can + bump the reference count maximum but quite likely one already has run out of something else + already. + + \starttyping + preamble = 0xF0000000 : 1 when we have one, including trailing # + count = 0x0F000000 + reference = 0x00FFFFFF + \stoptyping + +*/ + +# define max_match_count 15 +# define gap_match_count 7 -# define max_token_reference 0x0FFFFFFF +# define max_token_reference 0x00FFFFFF -# define get_token_parameters(a) (lmt_token_memory_state.tokens[a].hulf1 >> 28) -# define get_token_reference(a) (lmt_token_memory_state.tokens[a].hulf1 & 0x0FFFFFFF) +# define get_token_preamble(a) ((lmt_token_memory_state.tokens[a].hulf1 >> 28) & 0xF) +# define get_token_parameters(a) ((lmt_token_memory_state.tokens[a].hulf1 >> 24) & 0xF) +# define get_token_reference(a) ((lmt_token_memory_state.tokens[a].hulf1 ) & max_token_reference) -# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28) /* normally the variable is still zero here */ +# define set_token_preamble(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28) /* normally the variable is still zero here */ +# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 24) /* normally the variable is still zero here */ +# define set_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 += (b) # define add_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 += 1 /* we are way off the parameter count */ # define sub_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 -= 1 /* we are way off the parameter count */ # define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 += (b) /* we are way off the parameter count */ diff --git a/source/luametatex/source/tex/textypes.h b/source/luametatex/source/tex/textypes.h index a09409522..c2cd57e64 100644 --- a/source/luametatex/source/tex/textypes.h +++ b/source/luametatex/source/tex/textypes.h @@ -155,7 +155,7 @@ extern halfword tex_badness( # define one_bp 65781 -# define infinity 017777777777 /*tex the largest positive value that \TEX\ knows */ +# define max_infinity 0x7FFFFFFF /*tex the largest positive value that \TEX\ knows */ # define min_infinity -0x7FFFFFFF # define awful_bad 07777777777 /*tex more than a billion demerits |0x3FFFFFFF| */ # define infinite_bad 10000 /*tex infinitely bad value */ diff --git a/source/luametatex/source/utilities/auxunistring.c b/source/luametatex/source/utilities/auxunistring.c index 9fe5531d6..746fde4ad 100644 --- a/source/luametatex/source/utilities/auxunistring.c +++ b/source/luametatex/source/utilities/auxunistring.c @@ -11,36 +11,100 @@ */ -unsigned aux_str2uni(const unsigned char *k) +// unsigned xaux_str2uni(const unsigned char *k) +// { +// const unsigned char *text = k; +// int ch = *text++; +// if (ch < 0x80) { +// return (unsigned) ch; +// } else if (ch <= 0xbf) { +// return 0xFFFD; +// } else if (ch <= 0xdf) { +// if (text[0] >= 0x80 && text[0] < 0xc0) { +// return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f)); +// } +// } else if (ch <= 0xef) { +// if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) { +// return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f)); +// } +// } else if (ch <= 0xf7) { +// if (text[0] < 0x80 || text[1] < 0x80 || text[2] < 0x80 || +// text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) { +// return 0xFFFD; +// } else { +// int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1; +// int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f); +// w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4); +// return (unsigned) (w1 * 0x400 + w2 + 0x10000); +// } +// } +// return 0xFFFD; +// } + +unsigned aux_str2uni(const unsigned char *text) +{ + if (text[0] < 0x80) { + return (unsigned) text[0]; + } else if (text[0] <= 0xbf) { + return 0xFFFD; + } else if (text[0] <= 0xdf) { + if (text[1] >= 0x80 && text[1] < 0xc0) { + return (unsigned) (((text[0] & 0x1f) << 6) | (text[1] & 0x3f)); + } + } else if (text[0] <= 0xef) { + if (text[1] >= 0x80 && text[1] < 0xc0 && text[2] >= 0x80 && text[2] < 0xc0) { + return (unsigned) (((text[0] & 0xf) << 12) | ((text[1] & 0x3f) << 6) | (text[2] & 0x3f)); + } + } else if (text[0] <= 0xf7) { + if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 || + text[1] >= 0xc0 || text[2] >= 0xc0 || text[3] >= 0xc0) { + return 0xFFFD; + } else { + int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1; + int w2 = ((text[2] & 0xf) << 6) | (text[3] & 0x3f); + w1 = (w1 << 6) | ((text[1] & 0xf) << 2) | ((text[2] & 0x30) >> 4); + return (unsigned) (w1 * 0x400 + w2 + 0x10000); + } + } + return 0xFFFD; +} + +unsigned aux_str2uni_len(const unsigned char *text, int *len) { - const unsigned char *text = k; - int ch = *text++; - if (ch < 0x80) { - return (unsigned) ch; - } else if (ch <= 0xbf) { + if (text[0] < 0x80) { + *len = 1; + return (unsigned) text[0]; + } else if (text[0] <= 0xbf) { + *len = 1; return 0xFFFD; - } else if (ch <= 0xdf) { - if (text[0] >= 0x80 && text[0] < 0xc0) { - return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f)); + } else if (text[0] <= 0xdf) { + if (text[1] >= 0x80 && text[1] < 0xc0) { + *len = 2; + return (unsigned) (((text[0] & 0x1f) << 6) | (text[1] & 0x3f)); } - } else if (ch <= 0xef) { - if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) { - return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f)); + } else if (text[0] <= 0xef) { + if (text[1] >= 0x80 && text[1] < 0xc0 && text[2] >= 0x80 && text[2] < 0xc0) { + *len = 3; + return (unsigned) (((text[0] & 0xf) << 12) | ((text[1] & 0x3f) << 6) | (text[2] & 0x3f)); } - } else if (ch <= 0xf7) { - if (text[0] < 0x80 || text[1] < 0x80 || text[2] < 0x80 || - text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) { + } else if (text[0] <= 0xf7) { + if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 || + text[1] >= 0xc0 || text[2] >= 0xc0 || text[3] >= 0xc0) { + *len = 4; return 0xFFFD; } else { - int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1; - int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f); - w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4); + *len = 4; + int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1; + int w2 = ((text[2] & 0xf) << 6) | (text[3] & 0x3f); + w1 = (w1 << 6) | ((text[1] & 0xf) << 2) | ((text[2] & 0x30) >> 4); return (unsigned) (w1 * 0x400 + w2 + 0x10000); } } + *len = 1; return 0xFFFD; } + unsigned char *aux_uni2str(unsigned unic) { unsigned char *buf = lmt_memory_malloc(5); diff --git a/source/luametatex/source/utilities/auxunistring.h b/source/luametatex/source/utilities/auxunistring.h index 4c5ee3639..92f46d91c 100644 --- a/source/luametatex/source/utilities/auxunistring.h +++ b/source/luametatex/source/utilities/auxunistring.h @@ -6,7 +6,8 @@ # define LMT_UTILITIES_UNISTRING_H extern unsigned char *aux_uni2str (unsigned); -extern unsigned aux_str2uni (const unsigned char *); +extern unsigned aux_str2uni (const unsigned char *text); +extern unsigned aux_str2uni_len (const unsigned char *text, int *len); extern char *aux_uni2string (char *utf8_text, unsigned ch); extern unsigned aux_splitutf2uni (unsigned int *ubuf, const char *utf8buf); extern size_t aux_utf8len (const char *text, size_t size); |