diff options
| author | Hans Hagen <pragma@wxs.nl> | 2022-12-15 19:13:44 +0100 | 
|---|---|---|
| committer | Context Git Mirror Bot <phg@phi-gamma.net> | 2022-12-15 19:13:44 +0100 | 
| commit | 8f472c629a996f1c08281198210354a253d7f56b (patch) | |
| tree | 26f5fa0bda520d7ea9e87947df6978b9085a7d58 /source/luametatex | |
| parent | 3bdc9b9072bba774cd5c604fe185d39ddbdc911e (diff) | |
| download | context-8f472c629a996f1c08281198210354a253d7f56b.tar.gz | |
2022-12-15 17:52:00
Diffstat (limited to 'source/luametatex')
28 files changed, 551 insertions, 340 deletions
diff --git a/source/luametatex/source/lua/lmtinterface.h b/source/luametatex/source/lua/lmtinterface.h index 67c3f56d5..c60a78266 100644 --- a/source/luametatex/source/lua/lmtinterface.h +++ b/source/luametatex/source/lua/lmtinterface.h @@ -562,6 +562,7 @@ make_lua_key(L, condition);\  make_lua_key(L, conditional);\  make_lua_key(L, conditionalmathskip);\  make_lua_key(L, connectoroverlapmin);\ +make_lua_key(L, constant);\  make_lua_key(L, container);\  make_lua_key(L, contributehead);\  make_lua_key(L, convert);\ @@ -1127,6 +1128,7 @@ make_lua_key(L, properties);\  make_lua_key(L, proportional);\  make_lua_key(L, protected);\  make_lua_key(L, protected_call);\ +make_lua_key(L, semi_protected_call);\  make_lua_key(L, protrudechars);\  make_lua_key(L, protrusion);\  make_lua_key(L, ptr);\ @@ -1387,6 +1389,7 @@ make_lua_key(L, tolerance);\  make_lua_key(L, tolerant);\  make_lua_key(L, tolerant_call);\  make_lua_key(L, tolerant_protected_call);\ +make_lua_key(L, tolerant_semi_protected_call);\  make_lua_key(L, top);\  make_lua_key(L, topaccent);\  make_lua_key(L, topaccentvariant);\ diff --git a/source/luametatex/source/lua/lmtstatuslib.c b/source/luametatex/source/lua/lmtstatuslib.c index cf665ede2..841ddeec0 100644 --- a/source/luametatex/source/lua/lmtstatuslib.c +++ b/source/luametatex/source/lua/lmtstatuslib.c @@ -254,8 +254,8 @@ static int statslib_getconstants(lua_State *L)      lua_set_integer_by_key(L, "no_catcode_table",             no_catcode_table_preset);      lua_set_integer_by_key(L, "default_catcode_table",        default_catcode_table_preset); -    lua_set_cardinal_by_key(L, "max_cardinal",                 max_cardinal); -    lua_set_cardinal_by_key(L, "min_cardinal",                 min_cardinal); +    lua_set_cardinal_by_key(L,"max_cardinal",                 max_cardinal); +    lua_set_cardinal_by_key(L,"min_cardinal",                 min_cardinal);      lua_set_integer_by_key(L, "max_integer",                  max_integer);      lua_set_integer_by_key(L, "min_integer",                  min_integer);      lua_set_integer_by_key(L, "max_dimen",                    max_dimen); @@ -268,7 +268,7 @@ static int statslib_getconstants(lua_State *L)      lua_set_integer_by_key(L, "one_bp",                       one_bp); -    lua_set_integer_by_key(L, "infinity",                     infinity); +    lua_set_integer_by_key(L, "infinity",                     max_infinity);      lua_set_integer_by_key(L, "min_infinity",                 min_infinity);      lua_set_integer_by_key(L, "awful_bad",                    awful_bad);      lua_set_integer_by_key(L, "infinite_bad",                 infinite_bad); diff --git a/source/luametatex/source/lua/lmttexlib.c b/source/luametatex/source/lua/lmttexlib.c index 7d9395eb7..0d84eebdd 100644 --- a/source/luametatex/source/lua/lmttexlib.c +++ b/source/luametatex/source/lua/lmttexlib.c @@ -903,7 +903,7 @@ static const char *texlib_aux_scan_integer_part(lua_State *L, const char *ss, in    DONE:      if (overflow) {          luaL_error(L, "number too big"); -        result = infinity; +        result = max_integer;      } else if (vacuous) {          luaL_error(L, "missing number, treated as zero") ;      } @@ -1246,6 +1246,9 @@ int lmt_check_for_flags(lua_State *L, int slot, int *flags, int prefixes, int nu                          } else if (lua_key_eq(str, value)) {                              slot += 1;                              *flags = add_value_flag(*flags); +                        } else if (lua_key_eq(str, constant)) { +                            slot += 1; +                            *flags = add_constant_flag(*flags);                          } else if (lua_key_eq(str, conditional) || lua_key_eq(str, condition)) {                              /* condition will go, conditional stays */                              slot += 1; @@ -2690,7 +2693,7 @@ static int texlib_aux_scan_internal(lua_State *L, int cmd, int code, int values)          default:              {                  int texstr = tex_the_scanned_result(); -                char *str = tex_to_cstring(texstr); +                const char *str = tex_to_cstring(texstr);                  if (str) {                      lua_pushstring(L, str);                  } else { @@ -3582,7 +3585,7 @@ static int texlib_enableprimitives(lua_State *L)                      for (int cs = 0; cs < prim_size; cs++) {                          strnumber s = get_prim_text(cs);                          if (s > 0) { -                            char *prm = tex_to_cstring(s); +                            const char *prm = tex_to_cstring(s);                              texlib_aux_enableprimitive(pre, lpre, prm);                          }                      } @@ -4116,7 +4119,7 @@ static int texlib_runlocal(lua_State *L)                  } else {                      halfword ref = eq_value(cs);                      halfword head = token_link(ref); -                    if (head && get_token_parameters(ref)) { +                    if (head && get_token_preamble(ref)) {                          tex_local_control_message("macro takes arguments and is ignored");                          return 0;                      } else { @@ -4605,7 +4608,7 @@ static int texlib_setdimensionvalue(lua_State *L)  static int texlib_aux_getvalue(lua_State *L, halfword level, halfword cs)  {      halfword chr = eq_value(cs); -    if (chr && ! get_token_parameters(chr)) { +    if (chr && ! get_token_preamble(chr)) { /* or get_token_parameters as we don't want trailing # */          halfword value = 0;          tex_begin_inserted_list(tex_get_available_token(cs_token_flag + cs));          if (tex_scan_tex_value(level, &value)) { diff --git a/source/luametatex/source/lua/lmttokenlib.c b/source/luametatex/source/lua/lmttokenlib.c index 97ca1b144..41ee6c485 100644 --- a/source/luametatex/source/lua/lmttokenlib.c +++ b/source/luametatex/source/lua/lmttokenlib.c @@ -213,10 +213,10 @@ void lmt_tokenlib_initialize(void)   /* lmt_interface.command_names[string_cmd]                       = (command_item) { .id = string_cmd,                         .lua = lua_key_index(string),                       .name = lua_key(string),                       .kind = regular_command_item,   .min = ignore_entry,              .max = max_integer,                  .base = 0,                       .fixedvalue = 0            }; */      lmt_interface.command_names[call_cmd]                         = (command_item) { .id = call_cmd,                           .lua = lua_key_index(call),                         .name = lua_key(call),                         .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            };      lmt_interface.command_names[protected_call_cmd]               = (command_item) { .id = protected_call_cmd,                 .lua = lua_key_index(protected_call),               .name = lua_key(protected_call),               .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            }; -    lmt_interface.command_names[semi_protected_call_cmd]          = (command_item) { .id = semi_protected_call_cmd,            .lua = lua_key_index(protected_call),               .name = lua_key(protected_call),               .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            }; +    lmt_interface.command_names[semi_protected_call_cmd]          = (command_item) { .id = semi_protected_call_cmd,            .lua = lua_key_index(semi_protected_call),          .name = lua_key(protected_call),               .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            };      lmt_interface.command_names[tolerant_call_cmd]                = (command_item) { .id = tolerant_call_cmd,                  .lua = lua_key_index(tolerant_call),                .name = lua_key(tolerant_call),                .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            };      lmt_interface.command_names[tolerant_protected_call_cmd]      = (command_item) { .id = tolerant_protected_call_cmd,        .lua = lua_key_index(tolerant_protected_call),      .name = lua_key(tolerant_protected_call),      .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            }; -    lmt_interface.command_names[tolerant_semi_protected_call_cmd] = (command_item) { .id = tolerant_semi_protected_call_cmd,   .lua = lua_key_index(tolerant_protected_call),      .name = lua_key(tolerant_protected_call),      .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            }; +    lmt_interface.command_names[tolerant_semi_protected_call_cmd] = (command_item) { .id = tolerant_semi_protected_call_cmd,   .lua = lua_key_index(tolerant_semi_protected_call), .name = lua_key(tolerant_protected_call),      .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            };      lmt_interface.command_names[deep_frozen_end_template_cmd]     = (command_item) { .id = deep_frozen_end_template_cmd,       .lua = lua_key_index(deep_frozen_cs_end_template),  .name = lua_key(deep_frozen_cs_end_template),  .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            };      lmt_interface.command_names[deep_frozen_dont_expand_cmd]      = (command_item) { .id = deep_frozen_dont_expand_cmd,        .lua = lua_key_index(deep_frozen_cs_dont_expand),   .name = lua_key(deep_frozen_cs_dont_expand),   .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            };      lmt_interface.command_names[internal_glue_reference_cmd]      = (command_item) { .id = internal_glue_reference_cmd,        .lua = lua_key_index(internal_glue_reference),      .name = lua_key(internal_glue_reference),      .kind = token_command_item,     .min = ignore_entry,              .max = ignore_entry,                 .base = ignore_entry,            .fixedvalue = 0            }; @@ -468,8 +468,9 @@ halfword lmt_token_list_from_lua(lua_State *L, int slot)                      if (s[i] == ascii_space) {                          tok = token_val(spacer_cmd, s[i]);                      } else { -                        int k = (int) aux_str2uni((const unsigned char *) (s + i)); -                        i = i + (size_t) (utf8_size(k)) - 1; +                        int kl;  +                        int k = (int) aux_str2uni_len((const unsigned char *) (s + i), &kl); +                        i = i + kl - 1;                          tok = token_val(other_char_cmd, k);                      }                      p = tex_store_new_token(p, tok); @@ -737,15 +738,15 @@ static void tokenlib_aux_to_token(lua_State *L, int i, int m, int *head, int *ta                  const unsigned char *p = (const unsigned char *) s;                  size_t n = aux_utf8len(s, l);                  for (size_t j = 0; j < n; j++) { -                    int ch = *p; -                    halfword x = tex_get_available_token(tokenlib_aux_to_token_val(aux_str2uni(p))); +                    int xl;  +                    halfword x = tex_get_available_token(tokenlib_aux_to_token_val(aux_str2uni_len(p, &xl)));                      if (*head) {                          token_link(*tail) = x;                      } else {                          *head = x;                      }                      *tail = x; -                    p += utf8_size(ch); +                    p += xl;                  }                  break;              } @@ -2370,7 +2371,7 @@ static int tokenlib_getprimitives(lua_State *L)      while (cs < prim_size) {          strnumber s = get_prim_text(cs);          if (s > 0 && (get_prim_origin(cs) != no_command)) { -            char *ss = tex_to_cstring(s); +            const char *ss = tex_to_cstring(s);              int cmd = prim_eq_type(cs);              int chr = prim_equiv(cs);              if (! raw) { @@ -2668,6 +2669,19 @@ inline static int tokenlib_get_parameters(lua_State *L)      return 0;  } +inline static int tokenlib_get_constant(lua_State *L) +{ +    lua_token *n = tokenlib_aux_check_istoken(L, 1); +    halfword tok = token_info(n->token); +    int result = 0; +    if (tok >= cs_token_flag && is_call_cmd(eq_type(tok - cs_token_flag))) { +        halfword v = eq_value(tok - cs_token_flag); +        result = v && get_token_reference(v) == max_token_reference; +    } +    lua_pushboolean(L, result); +    return 1; +} +  static int tokenlib_getfield(lua_State *L)  {      const char *s = lua_tostring(L, 2); @@ -2711,6 +2725,8 @@ static int tokenlib_getfield(lua_State *L)          return tokenlib_get_flags(L);      } else if (lua_key_eq(s, parameters)) {          return tokenlib_get_parameters(L); +    } else if (lua_key_eq(s, constant)) { +        return tokenlib_get_constant(L);      } else {          lua_pushnil(L);      } @@ -3229,11 +3245,17 @@ static int tokenlib_set_macro(lua_State *L) /* todo: protected */                  slot = lmt_check_for_flags(L, slot, &flags, 1, 1);              }              if (tex_define_permitted(cs, flags)) { /* we check before we allocate */ -                halfword h = get_reference_token(); -                halfword t = h; +                halfword h;                  if (lstr > 0) { +                    h = get_reference_token();                      /*tex Options: 1=create (will trigger an error), 2=ignore. */ -                    tex_parse_str_to_tok(h, &t, ct, str, lstr, lua_toboolean(L, slot++) ? 2 : 1); +                    tex_parse_str_to_tok(h, null, ct, str, lstr, lua_toboolean(L, slot++) ? 2 : 1); +                    if (is_constant(flags)) { +                        set_token_reference(h, max_token_reference); +                    } +                } else {  +                    h = lmt_token_state.empty; +                 // tex_add_token_reference(h);                  }                  tex_define(flags, cs, tex_flags_to_cmd(flags), h);              } @@ -3388,7 +3410,7 @@ static int tokenlib_set_char(lua_State *L) /* also in texlib */  /* a weird place, these should be in tex */ -static int tokenlib_set_constant(lua_State *L, singleword cmd, halfword min, halfword max) +static int tokenlib_set_constant_value(lua_State *L, singleword cmd, halfword min, halfword max)  {      int top = lua_gettop(L);      if (top >= 2) { @@ -3409,7 +3431,7 @@ static int tokenlib_set_constant(lua_State *L, singleword cmd, halfword min, hal      return 0;  } -static int tokenlib_get_constant(lua_State *L, halfword cmd) +static int tokenlib_get_constant_value(lua_State *L, halfword cmd)  {      if (lua_type(L, 1) == LUA_TSTRING) {          size_t l; @@ -3428,32 +3450,32 @@ static int tokenlib_get_constant(lua_State *L, halfword cmd)  static int tokenlib_set_integer(lua_State *L)  { -    return tokenlib_set_constant(L, integer_cmd, min_integer, max_integer); +    return tokenlib_set_constant_value(L, integer_cmd, min_integer, max_integer);  }  static int tokenlib_set_dimension(lua_State *L)  { -    return tokenlib_set_constant(L, dimension_cmd, min_dimen, max_dimen); +    return tokenlib_set_constant_value(L, dimension_cmd, min_dimen, max_dimen);  }  // static int tokenlib_set_gluespec(lua_State *L)  // { -//     return tokenlib_set_constant(L, gluespec_cmd, min_dimen, max_dimen); +//     return tokenlib_set_constant_value(L, gluespec_cmd, min_dimen, max_dimen);  // }  static int tokenlib_get_integer(lua_State *L)  { -    return tokenlib_get_constant(L, integer_cmd); +    return tokenlib_get_constant_value(L, integer_cmd);  }  static int tokenlib_get_dimension(lua_State *L)  { -    return tokenlib_get_constant(L, dimension_cmd); +    return tokenlib_get_constant_value(L, dimension_cmd);  }  // static int tokenlib_get_gluespec(lua_State *L)  // { -//     return tokenlib_get_constant(L, gluespec_cmd); +//     return tokenlib_get_constant_value(L, gluespec_cmd);  // }  /* @@ -3575,6 +3597,7 @@ static const struct luaL_Reg tokenlib_function_list[] = {      { "getinstance",         tokenlib_get_instance          },      { "getflags",            tokenlib_get_flags             },      { "getparameters",       tokenlib_get_parameters        }, +    { "getconstant",         tokenlib_get_constant          },      { "getmacro",            tokenlib_get_macro             },      { "getmeaning",          tokenlib_get_meaning           },      { "getcmdchrcs",         tokenlib_get_cmdchrcs          }, @@ -3721,11 +3744,13 @@ void lmt_local_call(int slot)      lua_settop(L, stacktop);  } -int lmt_function_call_by_class(int slot, int property, halfword *value) +/*tex We replaced |class| by |category because of g++ issues. */ + +int lmt_function_call_by_category(int slot, int property, halfword *value)  {      lua_State *L = lmt_lua_state.lua_instance;      int stacktop = lua_gettop(L); -    int class = lua_value_none_code; +    int category = lua_value_none_code;      lua_pushcfunction(L, lmt_traceback);      lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_lua_state.function_table_id);      if (lua_rawgeti(L, -1, slot) == LUA_TFUNCTION) { @@ -3744,9 +3769,9 @@ int lmt_function_call_by_class(int slot, int property, halfword *value)              lmt_error(L, "function call", slot, i == LUA_ERRRUN ? 0 : 1);          } else {              if (lua_type(L, -2) == LUA_TNUMBER) { -                class = lmt_tointeger(L, -2); +                category = lmt_tointeger(L, -2);              } -            switch (class) { +            switch (category) {                  case lua_value_none_code:                      {                          break; @@ -3798,7 +3823,7 @@ int lmt_function_call_by_class(int slot, int property, halfword *value)                  case lua_value_float_code:                  case lua_value_string_code:                      { -                        class = lua_value_none_code; +                        category = lua_value_none_code;                          break;                      }                  case lua_value_boolean_code: @@ -3816,14 +3841,14 @@ int lmt_function_call_by_class(int slot, int property, halfword *value)                          break;                  default:                      { -                        class = lua_value_none_code; +                        category = lua_value_none_code;                          break;                      }              }          }      }      lua_settop(L, stacktop); -    return class; +    return category;  }  /* some day maybe an alternative too diff --git a/source/luametatex/source/lua/lmttokenlib.h b/source/luametatex/source/lua/lmttokenlib.h index 450c6173a..bfc3ed6f2 100644 --- a/source/luametatex/source/lua/lmttokenlib.h +++ b/source/luametatex/source/lua/lmttokenlib.h @@ -33,7 +33,7 @@ extern halfword lmt_token_list_from_lua       (lua_State *L, int slot);  extern halfword lmt_token_code_from_lua       (lua_State *L, int slot);  extern void     lmt_function_call             (int slot, int prefix); -extern int      lmt_function_call_by_class    (int slot, int property, halfword *value); +extern int      lmt_function_call_by_category (int slot, int property, halfword *value);  extern void     lmt_token_call                (int p);  extern void     lmt_local_call                (int slot); diff --git a/source/luametatex/source/luametatex.h b/source/luametatex/source/luametatex.h index 07921d53a..973b405ae 100644 --- a/source/luametatex/source/luametatex.h +++ b/source/luametatex/source/luametatex.h @@ -89,7 +89,7 @@  # define luametatex_version          210  # define luametatex_revision         04  # define luametatex_version_string   "2.10.04" -# define luametatex_development_id   20221208 +# define luametatex_development_id   20221214  # define luametatex_name_camelcase   "LuaMetaTeX"  # define luametatex_name_lowercase   "luametatex" diff --git a/source/luametatex/source/tex/texcommands.c b/source/luametatex/source/tex/texcommands.c index c7ec0a2f4..0ad91e420 100644 --- a/source/luametatex/source/tex/texcommands.c +++ b/source/luametatex/source/tex/texcommands.c @@ -774,10 +774,12 @@ void tex_initialize_commands(void)          tex_primitive(tex_command,    "def",                            def_cmd,                def_code,                                 0);          tex_primitive(tex_command,    "xdef",                           def_cmd,                global_expanded_def_code,                 0);          tex_primitive(tex_command,    "gdef",                           def_cmd,                global_def_code,                          0); +        tex_primitive(luatex_command, "cdef",                           def_cmd,                constant_def_code,                        0);          tex_primitive(luatex_command, "edefcsname",                     def_cmd,                expanded_def_csname_code,                 0);          tex_primitive(luatex_command, "defcsname",                      def_cmd,                def_csname_code,                          0);          tex_primitive(luatex_command, "xdefcsname",                     def_cmd,                global_expanded_def_csname_code,          0);          tex_primitive(luatex_command, "gdefcsname",                     def_cmd,                global_def_csname_code,                   0); +        tex_primitive(luatex_command, "cdefcsname",                     def_cmd,                constant_def_csname_code,                 0);          tex_primitive(tex_command,    "scriptfont",                     define_family_cmd,      script_size,                              0);          tex_primitive(tex_command,    "scriptscriptfont",               define_family_cmd,      script_script_size,                       0); @@ -961,6 +963,7 @@ void tex_initialize_commands(void)          tex_primitive(luatex_command, "semiprotected",                  prefix_cmd,             semiprotected_code,                       0);          tex_primitive(luatex_command, "enforced",                       prefix_cmd,             enforced_code,                            0);          tex_primitive(luatex_command, "inherited",                      prefix_cmd,             inherited_code,                           0); +        tex_primitive(luatex_command, "constant",                       prefix_cmd,             constant_code,                            0);          tex_primitive(tex_command,    "long",                           prefix_cmd,             long_code,                                0);          tex_primitive(tex_command,    "outer",                          prefix_cmd,             outer_code,                               0); @@ -1303,10 +1306,16 @@ void tex_initialize_commands(void)          cs_text(deep_frozen_cs_protection_code) = tex_maketexstring("inaccessible");          cs_text(deep_frozen_cs_end_write_code) = tex_maketexstring("endwrite"); -        set_eq_level(deep_frozen_cs_end_write_code, level_one);          set_eq_type(deep_frozen_cs_end_write_code, call_cmd);          set_eq_flag(deep_frozen_cs_end_write_code, 0);          set_eq_value(deep_frozen_cs_end_write_code, null); +        set_eq_level(deep_frozen_cs_end_write_code, level_one); + +        /*tex The empty list reference should be reassigned after compacting! */ + +        lmt_token_state.empty = get_reference_token(); +     // tex_add_token_reference(lmt_token_state.empty); +        set_token_reference(lmt_token_state.empty, max_token_reference);          lmt_string_pool_state.reserved = lmt_string_pool_state.string_pool_data.ptr;          lmt_hash_state.no_new_cs = 1; diff --git a/source/luametatex/source/tex/texcommands.h b/source/luametatex/source/tex/texcommands.h index 55de1dce6..8df61a4db 100644 --- a/source/luametatex/source/tex/texcommands.h +++ b/source/luametatex/source/tex/texcommands.h @@ -797,6 +797,11 @@ typedef enum local_control_codes {      bits for this but we don't have enough. Now, because frozen macros can be unfrozen we can      indeed have a prefix that bypasses the check. Explicit (re)definitions are then up to the user. +    Constant macros are special in the sense that we set the reference count to the maximum. This is  +    then a signal that we have an expanded macro with a meaning that we can immediately copy into  +    the expanded token list, as in csname construction. This saves some memory access and token  +    allocation.  +  */  typedef enum prefix_codes { @@ -820,6 +825,7 @@ typedef enum prefix_codes {      enforced_code,      always_code,      inherited_code, +    constant_code,      long_code,      outer_code,  } prefix_codes; @@ -859,9 +865,11 @@ typedef enum def_codes {      def_csname_code,      global_expanded_def_csname_code,      global_def_csname_code, +    constant_def_code, +    constant_def_csname_code,  } def_codes; -# define last_def_code global_def_csname_code +# define last_def_code constant_def_csname_code  typedef enum let_codes {      global_let_code, diff --git a/source/luametatex/source/tex/texconditional.c b/source/luametatex/source/tex/texconditional.c index 2197e9065..925e9fac9 100644 --- a/source/luametatex/source/tex/texconditional.c +++ b/source/luametatex/source/tex/texconditional.c @@ -555,9 +555,7 @@ void tex_conditional_if(halfword code, int unless)              }              goto RESULT;          case if_zero_int_code: -            { -                result = tex_scan_int(0, NULL) == 0; -            } +            result = tex_scan_int(0, NULL) == 0;              goto RESULT;          case if_abs_dim_code:          case if_dim_code: @@ -587,15 +585,10 @@ void tex_conditional_if(halfword code, int unless)              }              goto RESULT;          case if_zero_dim_code: -            { -                result = tex_scan_dimen(0, 0, 0, 0, NULL) == 0; -            } +            result = tex_scan_dimen(0, 0, 0, 0, NULL) == 0;              goto RESULT;          case if_odd_code: -            { -                halfword v = tex_scan_int(0, NULL); -                result = odd(v); -            } +            result = odd(tex_scan_int(0, NULL));              goto RESULT;          case if_vmode_code:              result = abs(cur_list.mode) == vmode; @@ -970,8 +963,7 @@ void tex_conditional_if(halfword code, int unless)                      halfword t = token_info(lmt_input_state.cur_input.loc);                      lmt_input_state.cur_input.loc = token_link(lmt_input_state.cur_input.loc);                      if (t < cs_token_flag && token_cmd(t) == parameter_reference_cmd) { -                      // result = token_info(input_state.parameter_stack[input_state.cur_input.parameter_start + token_chr(t) - 1]) != null ? 1 : 2; -                         result = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + token_chr(t) - 1] != null ? 1 : 2; +                        result = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + token_chr(t) - 1] != null ? 1 : 2;                      }                  }                  goto CASE; @@ -1103,19 +1095,19 @@ void tex_conditional_if(halfword code, int unless)       //     }          default:              { -                int class; +                int category;                  strnumber u = tex_save_cur_string();                  int save_scanner_status = lmt_input_state.scanner_status;                  lmt_input_state.scanner_status = scanner_is_normal;                  lmt_token_state.luacstrings = 0; -                class = lmt_function_call_by_class(code - last_if_test_code, 0, &result); +                category = lmt_function_call_by_category(code - last_if_test_code, 0, &result);                  tex_restore_cur_string(u);                  lmt_input_state.scanner_status = save_scanner_status;                  if (lmt_token_state.luacstrings > 0) {                      tex_lua_string_start();                      /* bad */                  } -                switch (class) { +                switch (category) {                      case lua_value_integer_code:                      case lua_value_cardinal_code:                      case lua_value_dimension_code: diff --git a/source/luametatex/source/tex/texdumpdata.h b/source/luametatex/source/tex/texdumpdata.h index 02514bdf3..4f3450ef4 100644 --- a/source/luametatex/source/tex/texdumpdata.h +++ b/source/luametatex/source/tex/texdumpdata.h @@ -55,7 +55,7 @@  */ -# define luametatex_format_fingerprint 678 +# define luametatex_format_fingerprint 679  /* These end up in the string pool. */ diff --git a/source/luametatex/source/tex/texequivalents.c b/source/luametatex/source/tex/texequivalents.c index bdf21446e..223e46e15 100644 --- a/source/luametatex/source/tex/texequivalents.c +++ b/source/luametatex/source/tex/texequivalents.c @@ -243,6 +243,7 @@ void tex_dump_equivalents_mem(dumpstream f)      /*tex A special register. */      dump_int(f, lmt_token_state.par_loc);   /* dump_int(f, lmt_token_state.line_par_loc); */ /*tex See note in textoken.c|. */ +    dump_int(f, lmt_token_state.empty);  }  void tex_undump_equivalents_mem(dumpstream f) @@ -284,6 +285,7 @@ void tex_undump_equivalents_mem(dumpstream f)   /* } else { */   /*     tex_fatal_undump_error("lineparloc"); */   /* } */ +    undump_int(f, lmt_token_state.empty);      return;  } diff --git a/source/luametatex/source/tex/texequivalents.h b/source/luametatex/source/tex/texequivalents.h index 336c9e206..c84b90226 100644 --- a/source/luametatex/source/tex/texequivalents.h +++ b/source/luametatex/source/tex/texequivalents.h @@ -1155,6 +1155,7 @@ typedef enum flag_bit {      value_flag_bit         = 0x08000,      semiprotected_flag_bit = 0x10000,      inherited_flag_bit     = 0x20000, +    constant_flag_bit      = 0x40000,  } flag_bits;  /*tex Flags: */ @@ -1180,6 +1181,7 @@ typedef enum flag_bit {  # define add_conditional_flag(a)    ((a) | conditional_flag_bit)  # define add_value_flag(a)          ((a) | value_flag_bit)  # define add_inherited_flag(a)      ((a) | inherited_flag_bit) +# define add_constant_flag(a)       ((a) | constant_flag_bit)  # define remove_flag(a,b)           ((a) & ~(b)) @@ -1220,6 +1222,7 @@ typedef enum flag_bit {  # define is_conditional(a)          (((a) & conditional_flag_bit)   == conditional_flag_bit)  # define is_value(a)                (((a) & value_flag_bit)         == value_flag_bit)  # define is_inherited(a)            (((a) & inherited_flag_bit)     == inherited_flag_bit) +# define is_constant(a)             (((a) & constant_flag_bit)      == constant_flag_bit)  # define is_expandable(cmd)         (cmd > max_command_cmd) diff --git a/source/luametatex/source/tex/texexpand.c b/source/luametatex/source/tex/texexpand.c index 8a2fa79a0..f257f8b0f 100644 --- a/source/luametatex/source/tex/texexpand.c +++ b/source/luametatex/source/tex/texexpand.c @@ -88,7 +88,8 @@ inline static void tex_aux_expand_after(void)      if (cur_cmd > max_command_cmd) {          tex_expand_current_token();      } else { -        tex_back_input(t2); +         tex_back_input(t2); +      /* token_link(t1) = t2; */ /* no gain, rarely happens */      }      tex_back_input(t1);  } @@ -615,7 +616,6 @@ inline static int tex_aux_uni_to_buffer(unsigned char *b, int m, int c)      much sense. It also long token lists that never (should) match anyway.  */ -  static int tex_aux_collect_cs_tokens(halfword *p, int *n)  {      while (1) { @@ -650,7 +650,17 @@ static int tex_aux_collect_cs_tokens(halfword *p, int *n)              */              case call_cmd:              case tolerant_call_cmd: -                tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); +                if (get_token_reference(cur_chr) == max_token_reference) { // ! get_token_parameters(cur_chr)) { +                    /* we avoid the macro stack and expansion and we don't trace either */ +                    halfword h = token_link(cur_chr); +                    while (h) { +                        *p = tex_store_new_token(*p, token_info(h)); +                        *n += 1; +                        h = token_link(h); +                    } +                } else { +                    tex_aux_macro_call(cur_cs, cur_cmd, cur_chr); +                }                  break;              case end_cs_name_cmd:                  return 1; @@ -677,7 +687,7 @@ int tex_is_valid_csname(void)              tex_get_x_or_protected(); /* we skip unprotected ! */          } while (cur_cmd != end_cs_name_cmd);          goto FINISH; -        /* no real gain: */ +        /* no real gain as we hardly ever end up here */       // while (1) {       //     tex_get_token();       //     if (cur_cmd == end_cs_name_cmd) { @@ -941,6 +951,16 @@ int tex_get_parameter_count(void)      return n;  } +/*tex  +    We can avoid the copy of parameters to the stack but it complicates the code because we also need  +    to clean up the previous set of parameters etc. It's not worth the effort. However, there are  +    plenty of optimizations compared to the original. Some are measurable on an average run, others +    are more likely to increase performance when thousands of successive runs happen in e.g. a virtual  +    environment where threads fight for memory access and cpu cache. And because \CONTEXT\ is us used  +    that way we keep looking into ways to gain performance, but not at the cost of dirty hacks (that  +    I tried out of curiosity but rejected in the end).  +*/ +  static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)  {      int tracing = tracing_macros_par > 0; @@ -955,7 +975,7 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)          if (is_untraced(eq_flag(cs))) {              tracing = 0;          } else { -            if (! get_token_parameters(chr)) { +            if (! get_token_preamble(chr)) {                  tex_print_str("->");              } else {                  /* maybe move the preamble scanner to here */ @@ -964,14 +984,14 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)          }          tex_end_diagnostic();      } -    if (get_token_parameters(chr)) { +    if (get_token_preamble(chr)) {          halfword matchpointer = token_link(chr);          halfword matchtoken = token_info(matchpointer);          int save_scanner_status = lmt_input_state.scanner_status;          halfword save_warning_index = lmt_input_state.warning_index;          int nofscanned = 0;          int nofarguments = 0; -        halfword pstack[9]; /* We could go for 15 if we accept |#A-#F|. */ +        halfword pstack[max_match_count];           /*tex              Scan the parameters and make |link(r)| point to the macro body; but |return| if an              illegal |\par| is detected. @@ -1334,7 +1354,7 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)                      ++nofscanned;                      if (tracing) {                          tex_begin_diagnostic(); -                        tex_print_format("%c%i<-", match_visualizer, nofscanned); +                        tex_print_format("%c%c<-", match_visualizer, '0' + nofscanned + (nofscanned > 9 ? gap_match_count : 0));                          tex_show_token_list(pstack[nofscanned - 1], null, default_token_show_max, 0);                          tex_end_diagnostic();                      } diff --git a/source/luametatex/source/tex/texinputstack.c b/source/luametatex/source/tex/texinputstack.c index e73451226..52262e486 100644 --- a/source/luametatex/source/tex/texinputstack.c +++ b/source/luametatex/source/tex/texinputstack.c @@ -62,9 +62,15 @@ input_file_state_info input_file_state = {      .line        = 0,  }; -#define reserved_input_stack_slots  2 -#define reserved_in_stack_slots     2 -#define reserved_param_stack_slots 10 /*tex We play safe and always keep 10 in reserve (we have 9 max anyway). */ +/*tex  +    We play safe and always keep a few batches of parameter slots in reserve so that we  +    are unlikely to overrun. +*/ + +# define reserved_input_stack_slots  2 +# define reserved_in_stack_slots     2 +//define reserved_param_stack_slots 32                     +# define reserved_param_stack_slots (2 * max_match_count)   void tex_initialize_input_state(void)  { @@ -793,7 +799,7 @@ void tex_end_token_list(void)          case macro_text:              {                  tex_delete_token_reference(lmt_input_state.cur_input.start); -                if (get_token_parameters(lmt_input_state.cur_input.start)) { +                if (get_token_preamble(lmt_input_state.cur_input.start)) {                      /*tex Parameters must be flushed: */                      int ptr = lmt_input_state.parameter_stack_data.ptr;                      int start = lmt_input_state.cur_input.parameter_start; @@ -850,10 +856,17 @@ void tex_cleanup_input_state(void)                      ptr = lmt_input_state.parameter_stack_data.ptr;                      start = lmt_input_state.cur_input.parameter_start;                      while (ptr > start) { -                        --ptr; -                        if (lmt_input_state.parameter_stack[ptr]) { +                        if (lmt_input_state.parameter_stack[--ptr]) {                              tex_flush_token_list(lmt_input_state.parameter_stack[ptr]);                          } +                     // halfword p = lmt_input_state.parameter_stack[--ptr]; +                     // if (p) { +                     //     if (! token_link(p)) { +                     //         tex_put_available_token(p); /* very little gain on average */ +                     //     } else {  +                     //         tex_flush_token_list(p); +                     //     } +                     // }                      }                      lmt_input_state.parameter_stack_data.ptr = start;                      break; diff --git a/source/luametatex/source/tex/texlanguage.c b/source/luametatex/source/tex/texlanguage.c index 0fcd3b243..200ffbd1e 100644 --- a/source/luametatex/source/tex/texlanguage.c +++ b/source/luametatex/source/tex/texlanguage.c @@ -1279,8 +1279,9 @@ static int tex_aux_still_okay(halfword f, halfword l, halfword r, int n, const c                  tex_normal_warning("language", "the hyphenated word contains non-glyphs, skipping");                  return 0;              } else { -                halfword c = (halfword) aux_str2uni((const unsigned char *) utf8original); -                utf8original += utf8_size(c); +                int cl;  +                halfword c = (halfword) aux_str2uni_len((const unsigned char *) utf8original, &cl); +                utf8original += cl;                  if (! (c && c == glyph_character(f))) {                      tex_normal_warning("language", "the hyphenated word contains different characters, skipping");                      return 0; diff --git a/source/luametatex/source/tex/texmaincontrol.c b/source/luametatex/source/tex/texmaincontrol.c index 24729d8cb..dbb52ab15 100644 --- a/source/luametatex/source/tex/texmaincontrol.c +++ b/source/luametatex/source/tex/texmaincontrol.c @@ -814,10 +814,10 @@ typedef enum saved_localbox_items {  static void tex_aux_scan_local_box(int code) {      quarterword options = 0; -    halfword class = 0; -    tex_scan_local_boxes_keys(&options, &class); +    halfword index = 0; +    tex_scan_local_boxes_keys(&options, &index);      tex_set_saved_record(saved_localbox_item_location, local_box_location_save_type, 0, code); -    tex_set_saved_record(saved_localbox_item_index, local_box_index_save_type, 0, class); +    tex_set_saved_record(saved_localbox_item_index, local_box_index_save_type, 0, index);      tex_set_saved_record(saved_localbox_item_options, local_box_options_save_type, 0, options);      lmt_save_state.save_stack_data.ptr += saved_localbox_n_of_items;      tex_new_save_level(local_box_group); @@ -894,23 +894,6 @@ static void tex_aux_finish_local_box(void)      }  } -// static void tex_aux_run_leader(void) { -//     switch (cur_chr) { -//         case a_leaders_code: -//             tex_aux_scan_box(a_leaders_flag, 0, 0); -//             break; -//         case c_leaders_code: -//             tex_aux_scan_box(c_leaders_flag, 0, 0); -//             break; -//         case x_leaders_code: -//             tex_aux_scan_box(x_leaders_flag, 0, 0); -//             break; -//         case g_leaders_code: -//             tex_aux_scan_box(g_leaders_flag, 0, 0); -//             break; -//     } -// } -  static int leader_flags[] = {      a_leaders_flag,      c_leaders_flag, @@ -1244,6 +1227,10 @@ static void tex_aux_run_text_boundary(void) {          case protrusion_boundary:              boundary_data(n) = tex_scan_int(0, NULL);              break; +        case page_boundary: +            /* or maybe force vmode */ +            tex_scan_int(0, NULL); +            break;          default:              break;      } @@ -1260,6 +1247,7 @@ static void tex_aux_run_math_boundary(void) {                  break;              }          case protrusion_boundary: +        case page_boundary:              tex_scan_int(0, NULL);              break;      } @@ -4479,7 +4467,7 @@ static void tex_aux_set_define_font(int a)  static void tex_aux_set_def(int a, int force)  { -    halfword expand = 0; +    int expand = 0;      switch (cur_chr) {          case expanded_def_code:              expand = 1; @@ -4505,6 +4493,15 @@ static void tex_aux_set_def(int a, int force)              cur_cs = tex_create_csname();              a = add_global_flag(a);              goto DONE; +        case constant_def_code: +            expand = 2; +            a = add_constant_flag(a); +            break; +        case constant_def_csname_code: +            expand = 2; +            cur_cs = tex_create_csname(); +            a = add_constant_flag(a); +            goto DONE;      }      tex_get_r_token();    DONE: @@ -4513,7 +4510,13 @@ static void tex_aux_set_def(int a, int force)      }      if (force || tex_define_permitted(cur_cs, a)) {          halfword p = cur_cs; -        halfword t = expand ? tex_scan_macro_expand() : tex_scan_macro_normal(); +        halfword t = expand == 2 ? tex_scan_toks_expand(0, null, 1) : (expand ? tex_scan_macro_expand() : tex_scan_macro_normal()); +        if (is_constant(a)) { +            /* todo: check if already defined or just accept a leak */ +            set_token_reference(t, max_token_reference); +        } else if (! token_link(t)) {  +            t = lmt_token_state.empty; /* maybe in tex_define */ +        }          tex_define(a, p, tex_flags_to_cmd(a), t);      }  } @@ -4674,7 +4677,14 @@ static void tex_aux_set_let(int a, int force)                  a = add_global_flag(a);              }              if (force || tex_define_permitted(cur_cs, a)) { -                tex_define(a, cur_cs, tex_flags_to_cmd(a), get_reference_token()); +                /*tex  +                    The commented line permits plenty empty definitions, a |\let| can run out of  +                    ref count so maybe some day \unknown  +                */ +             // halfword empty = get_reference_token(); +                halfword empty = lmt_token_state.empty; +             // tex_add_token_reference(empty); +                tex_define(a, cur_cs, tex_flags_to_cmd(a), empty);              }              return;          default: @@ -4711,7 +4721,7 @@ static void tex_aux_set_let(int a, int force)          }          tex_define_inherit(a, p, (singleword) newf, (singleword) cmd, cur_chr);      } else { -        tex_define(a, p, (singleword) cur_cmd, cur_chr); +        tex_define(a, p, (singleword) cur_cmd, cur_chr);       }  } @@ -4929,18 +4939,18 @@ static void tex_aux_set_math_parameter(int a)          case math_parameter_let_spacing:          case math_parameter_let_atom_rule:              { -                halfword class = tex_scan_math_class_number(0); +                halfword mathclass = tex_scan_math_class_number(0);                  halfword display = tex_scan_math_class_number(1);                  halfword text = tex_scan_math_class_number(0);                  halfword script = tex_scan_math_class_number(0);                  halfword scriptscript = tex_scan_math_class_number(0); -                if (valid_math_class_code(class)) { +                if (valid_math_class_code(mathclass)) {                      switch (code) {                          case math_parameter_let_spacing: -                            code = internal_int_location(first_math_class_code + class); +                            code = internal_int_location(first_math_class_code + mathclass);                              break;                          case math_parameter_let_atom_rule: -                            code = internal_int_location(first_math_atom_code + class); +                            code = internal_int_location(first_math_atom_code + mathclass);                              break;                      }                      value = (display << 24) + (text << 16) + (script << 8) + scriptscript; @@ -4959,20 +4969,20 @@ static void tex_aux_set_math_parameter(int a)          case math_parameter_copy_atom_rule:          case math_parameter_copy_parent:              { -                halfword class = tex_scan_math_class_number(0); +                halfword mathclass = tex_scan_math_class_number(0);                  halfword parent = tex_scan_math_class_number(1); -                if (valid_math_class_code(class) && valid_math_class_code(parent)) { +                if (valid_math_class_code(mathclass) && valid_math_class_code(parent)) {                      switch (code) {                          case math_parameter_copy_spacing: -                            code = internal_int_location(first_math_class_code + class); +                            code = internal_int_location(first_math_class_code + mathclass);                              value = count_parameter(first_math_class_code + parent);                              break;                          case math_parameter_copy_atom_rule: -                            code = internal_int_location(first_math_atom_code + class); +                            code = internal_int_location(first_math_atom_code + mathclass);                              value = count_parameter(first_math_atom_code + parent);                              break;                          case math_parameter_copy_parent: -                            code = internal_int_location(first_math_parent_code + class); +                            code = internal_int_location(first_math_parent_code + mathclass);                              value = count_parameter(first_math_parent_code + parent);                              break;                      } @@ -4991,21 +5001,21 @@ static void tex_aux_set_math_parameter(int a)          case math_parameter_set_display_pre_penalty:          case math_parameter_set_display_post_penalty:              { -                halfword class = tex_scan_math_class_number(0); +                halfword mathclass = tex_scan_math_class_number(0);                  halfword penalty = tex_scan_int(1, NULL); -                if (valid_math_class_code(class)) { +                if (valid_math_class_code(mathclass)) {                      switch (code) {                          case math_parameter_set_pre_penalty: -                            code = internal_int_location(first_math_pre_penalty_code + class); +                            code = internal_int_location(first_math_pre_penalty_code + mathclass);                              break;                          case math_parameter_set_post_penalty: -                            code = internal_int_location(first_math_post_penalty_code + class); +                            code = internal_int_location(first_math_post_penalty_code + mathclass);                              break;                          case math_parameter_set_display_pre_penalty: -                            code = internal_int_location(first_math_display_pre_penalty_code + class); +                            code = internal_int_location(first_math_display_pre_penalty_code + mathclass);                              break;                          case math_parameter_set_display_post_penalty: -                            code = internal_int_location(first_math_display_post_penalty_code + class); +                            code = internal_int_location(first_math_display_post_penalty_code + mathclass);                              break;                      }                      tex_word_define(a, code, penalty); @@ -5021,13 +5031,13 @@ static void tex_aux_set_math_parameter(int a)              }          case math_parameter_let_parent:              { -                halfword class = tex_scan_math_class_number(0); +                halfword mathclass = tex_scan_math_class_number(0);                  halfword pre = tex_scan_math_class_number(1);                  halfword post = tex_scan_math_class_number(0);                  halfword options = tex_scan_math_class_number(0);                  halfword reserved = tex_scan_math_class_number(0); -                if (valid_math_class_code(class)) { -                    code = internal_int_location(first_math_parent_code + class); +                if (valid_math_class_code(mathclass)) { +                    code = internal_int_location(first_math_parent_code + mathclass);                      value = (reserved << 24) + (options << 16) + (pre << 8) + post;                      tex_word_define(a, code, value);                   // tex_assign_internal_int_value(a, code, value); @@ -5052,9 +5062,9 @@ static void tex_aux_set_math_parameter(int a)              }          case math_parameter_options:              { -                halfword class = tex_scan_math_class_number(0); -                if (valid_math_class_code(class)) { -                    code = internal_int_location(first_math_options_code + class); +                halfword mathclass = tex_scan_math_class_number(0); +                if (valid_math_class_code(mathclass)) { +                    code = internal_int_location(first_math_options_code + mathclass);                      value = tex_scan_int(1, NULL);                      tex_word_define(a, code, value);                   // tex_assign_internal_int_value(a, code, value); @@ -5409,6 +5419,7 @@ void tex_run_prefixed_command(void)              case always_code:        flags = add_aliased_flag      (flags); force = 1; break;              /*tex This one is special */              case inherited_code:     flags = add_inherited_flag    (flags); break; +            case constant_code:      flags = add_constant_flag     (flags); break;              default:                  goto PICKUP;          } @@ -5956,7 +5967,7 @@ static void tex_aux_run_message(void)                  strnumber s = tex_aux_scan_string();                  if (error_help_par) {                      strnumber helpinfo = tex_tokens_to_string(error_help_par); -                    char *h = tex_to_cstring(helpinfo); +                    const char *h = tex_to_cstring(helpinfo);                      tex_handle_error(                          normal_error_type,                          "%T", diff --git a/source/luametatex/source/tex/texmarks.c b/source/luametatex/source/tex/texmarks.c index 01e002fbd..c967beb4b 100644 --- a/source/luametatex/source/tex/texmarks.c +++ b/source/luametatex/source/tex/texmarks.c @@ -21,8 +21,6 @@      Watch out: zero is always valid and the good old single mark! -    Todo: class -> index -  */  mark_state_info lmt_mark_state = { @@ -115,23 +113,23 @@ int tex_valid_mark(halfword m) {      return m < lmt_mark_state.mark_data.top;  } -halfword tex_new_mark(quarterword subtype, halfword class, halfword ptr) +halfword tex_new_mark(quarterword subtype, halfword index, halfword ptr)  {      halfword mark = tex_new_node(mark_node, subtype); -    mark_index(mark) = class; +    mark_index(mark) = index;      mark_ptr(mark) = ptr;      if (lmt_mark_state.min_used < 0) { -        lmt_mark_state.min_used = class; -        lmt_mark_state.max_used = class; +        lmt_mark_state.min_used = index; +        lmt_mark_state.max_used = index;      } else { -        if (class < lmt_mark_state.min_used) { -            lmt_mark_state.min_used = class; +        if (index < lmt_mark_state.min_used) { +            lmt_mark_state.min_used = index;          } -        if (class > lmt_mark_state.max_used) { -            lmt_mark_state.max_used = class; +        if (index > lmt_mark_state.max_used) { +            lmt_mark_state.max_used = index;          }      } -    tex_set_mark(class, current_marks_code, ptr); +    tex_set_mark(index, current_marks_code, ptr);      return mark;  } @@ -315,16 +313,16 @@ int tex_has_mark(halfword m)  void tex_run_mark(void)  { -    halfword class = 0; +    halfword index = 0;      halfword code = cur_chr;      switch (code) {          case set_marks_code:          case clear_marks_code:          case flush_marks_code: -            class = tex_scan_mark_number(); +            index = tex_scan_mark_number();              break;      } -    if (tex_valid_mark(class)) { +    if (tex_valid_mark(index)) {          quarterword subtype = set_mark_value_code;          halfword ptr = null;          switch (code) { @@ -333,13 +331,13 @@ void tex_run_mark(void)                  ptr = tex_scan_toks_expand(0, NULL, 0);                  break;              case clear_marks_code: -                tex_wipe_mark(class); +                tex_wipe_mark(index);                  return;              case flush_marks_code:                  subtype = reset_mark_value_code;                  break;          } -        tex_tail_append(tex_new_mark(subtype, class, ptr)); +        tex_tail_append(tex_new_mark(subtype, index, ptr));      } else {          /* error already issued */      } diff --git a/source/luametatex/source/tex/texmarks.h b/source/luametatex/source/tex/texmarks.h index e787fa9d0..9ce819f07 100644 --- a/source/luametatex/source/tex/texmarks.h +++ b/source/luametatex/source/tex/texmarks.h @@ -50,7 +50,7 @@ extern void     tex_reset_mark                (halfword m);  extern void     tex_wipe_mark                 (halfword m);  extern void     tex_delete_mark               (halfword m, int what);  extern halfword tex_get_some_mark             (halfword chr, halfword val); -extern halfword tex_new_mark                  (quarterword subtype, halfword cls, halfword ptr); +extern halfword tex_new_mark                  (quarterword subtype, halfword index, halfword ptr);  extern void     tex_update_top_marks          (void);  extern void     tex_update_first_and_bot_mark (halfword m);  extern void     tex_update_first_marks        (void); diff --git a/source/luametatex/source/tex/texmath.c b/source/luametatex/source/tex/texmath.c index 327e8e6a3..216ba553b 100644 --- a/source/luametatex/source/tex/texmath.c +++ b/source/luametatex/source/tex/texmath.c @@ -1929,9 +1929,9 @@ static void tex_aux_append_math_accent(mathcodeval mval, mathdictval dval)  */ -static void tex_aux_append_math_fence(halfword fence, quarterword class) +static void tex_aux_append_math_fence(halfword fence, quarterword mathclass)  { -    switch (class) { +    switch (mathclass) {          case open_noad_subtype:              {                  tex_aux_push_math(math_fence_group, cur_list.math_style); @@ -1968,7 +1968,7 @@ static void tex_aux_append_math_fence(halfword fence, quarterword class)      }  } -static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, quarterword class) +static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, quarterword mathclass)  {      halfword fence = tex_new_node(fence_noad, middle_fence_side);      halfword delimiter = tex_new_node(delimiter_node, mval.class_value); @@ -1981,10 +1981,10 @@ static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, qu      set_noad_classes(fence, mval.class_value);      /* todo : share the next three with the regular fences */      noad_options(fence) |= noad_option_no_check; -    if (class == middle_noad_subtype && cur_group != math_fence_group) {  +    if (mathclass == middle_noad_subtype && cur_group != math_fence_group) {           tex_aux_append_math_fence_val(tex_no_math_code(), tex_no_dict_code(), open_noad_subtype);      } -    tex_aux_append_math_fence(fence, class); +    tex_aux_append_math_fence(fence, mathclass);  }  static void tex_aux_append_math_char(mathcodeval mval, mathdictval dval, int automatic) @@ -2147,9 +2147,9 @@ int tex_scan_math_code_val(halfword code, mathcodeval *mval, mathdictval *dval)          case math_class_number_code:              {                  halfword family = cur_fam_par; -                halfword class  = tex_scan_int(0, NULL); +                halfword mathclass  = tex_scan_int(0, NULL);                  tex_scan_math_cmd_val(mval, dval); -                mval->class_value = (short) class; +                mval->class_value = (short) mathclass;                  mval->family_value = (short) family;              }              break; @@ -2518,7 +2518,7 @@ void tex_run_math_modifier(void)  */ -static void tex_aux_scan_delimiter(halfword target, int code, int class) +static void tex_aux_scan_delimiter(halfword target, int code, int mathclass)  {      delcodeval dval = tex_no_del_code();      mathcodeval mval = tex_no_math_code(); @@ -2584,8 +2584,8 @@ static void tex_aux_scan_delimiter(halfword target, int code, int class)              goto REALDELIMITER;      }    FAKEDELIMITER: -    if (class != unset_noad_class) { -        mval.class_value = (short) class;  +    if (mathclass != unset_noad_class) { +        mval.class_value = (short) mathclass;       }      dval.small = mval;      dval.large = mval; @@ -3451,7 +3451,7 @@ void tex_run_math_fraction(void)          halfword userstyle = -1;          halfword attrlist = null;          fullword options = 0; -        halfword class = fraction_noad_subtype; +        halfword mathclass = fraction_noad_subtype;          halfword rulethickness = preset_rule_thickness;          int ruledone = 0;          fraction_h_factor(fraction) = 1000; @@ -3581,7 +3581,7 @@ void tex_run_math_fraction(void)                              if (tex_scan_mandate_keyword("class", 1)) {                                  halfword c = (quarterword) tex_scan_math_class_number(0);                                  if (valid_math_class_code(c)) { -                                    class = c; +                                    mathclass = c;                                  }                              }                              break; @@ -3673,7 +3673,7 @@ void tex_run_math_fraction(void)          }          fraction_rule_thickness(fraction) = rulethickness;          noad_options(fraction) = options; -        set_noad_main_class(fraction, class); +        set_noad_main_class(fraction, mathclass);          if (attrlist) {              tex_attach_attribute_list_attribute(fraction, attrlist);          } @@ -5183,16 +5183,16 @@ void tex_reset_all_styles(halfword level)      }  } -inline static halfword tex_aux_math_class_default(halfword class) { -    return (class << 24) + (class << 16) + (class << 8) + class; +inline static halfword tex_aux_math_class_default(halfword mathclass) { +    return (mathclass << 24) + (mathclass << 16) + (mathclass << 8) + mathclass;  } -inline static void tex_set_math_class_default(halfword class, halfword parent, halfword options) +inline static void tex_set_math_class_default(halfword mathclass, halfword parent, halfword options)  { -    tex_word_define(0, internal_int_location(first_math_class_code   + class), tex_aux_math_class_default(parent)); -    tex_word_define(0, internal_int_location(first_math_atom_code    + class), tex_aux_math_class_default(class)); -    tex_word_define(0, internal_int_location(first_math_options_code + class), options); -    tex_word_define(0, internal_int_location(first_math_parent_code  + class), tex_aux_math_class_default(class)); +    tex_word_define(0, internal_int_location(first_math_class_code   + mathclass), tex_aux_math_class_default(parent)); +    tex_word_define(0, internal_int_location(first_math_atom_code    + mathclass), tex_aux_math_class_default(mathclass)); +    tex_word_define(0, internal_int_location(first_math_options_code + mathclass), options); +    tex_word_define(0, internal_int_location(first_math_parent_code  + mathclass), tex_aux_math_class_default(mathclass));  }  static void tex_aux_set_math_atom_rule(halfword left, halfword right, halfword newleft, halfword newright) @@ -5203,13 +5203,13 @@ static void tex_aux_set_math_atom_rule(halfword left, halfword right, halfword n  void tex_initialize_math_spacing(void)  { -    for (int class = 0; class <= max_math_class_code; class++) { -        tex_set_math_class_default(class, class, no_class_options); +    for (int mathclass = 0; mathclass <= max_math_class_code; mathclass++) { +        tex_set_math_class_default(mathclass, mathclass, no_class_options);          /*tex We do this here as there is no real need for yet another initializer. */ -        tex_word_define(0, internal_int_location(first_math_pre_penalty_code  + class), infinite_penalty); -        tex_word_define(0, internal_int_location(first_math_post_penalty_code + class), infinite_penalty); -        tex_word_define(0, internal_int_location(first_math_display_pre_penalty_code  + class), infinite_penalty); -        tex_word_define(0, internal_int_location(first_math_display_post_penalty_code + class), infinite_penalty); +        tex_word_define(0, internal_int_location(first_math_pre_penalty_code  + mathclass), infinite_penalty); +        tex_word_define(0, internal_int_location(first_math_post_penalty_code + mathclass), infinite_penalty); +        tex_word_define(0, internal_int_location(first_math_display_pre_penalty_code  + mathclass), infinite_penalty); +        tex_word_define(0, internal_int_location(first_math_display_post_penalty_code + mathclass), infinite_penalty);      }      tex_reset_all_styles(level_one); diff --git a/source/luametatex/source/tex/texmlist.c b/source/luametatex/source/tex/texmlist.c index 1d4cbacd8..b9453875e 100644 --- a/source/luametatex/source/tex/texmlist.c +++ b/source/luametatex/source/tex/texmlist.c @@ -477,11 +477,11 @@ static void tex_aux_trace_kerns(halfword kern, const char *what, const char *det      }  } -static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, halfword template, const char *trace) +static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, halfword attributetemplate, const char *trace)  {      /*tex Maybe |math_font_kern|, also to prevent expansion. */      halfword kern = tex_new_kern_node(amount, font_kern_subtype); -    tex_attach_attribute_list_copy(kern, template ? template : current); +    tex_attach_attribute_list_copy(kern, attributetemplate ? attributetemplate : current);      if (node_next(current)) {          tex_couple_nodes(kern, node_next(current));      } @@ -490,11 +490,11 @@ static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, h      return kern;   } -static halfword tex_aux_math_insert_italic_kern(halfword current, scaled amount, halfword template, const char *trace) +static halfword tex_aux_math_insert_italic_kern(halfword current, scaled amount, halfword attributetemplate, const char *trace)  {      /*tex Maybe |math_italic_kern|. */      halfword kern = tex_new_kern_node(amount, italic_kern_subtype); -    tex_attach_attribute_list_copy(kern, template ? template : current); +    tex_attach_attribute_list_copy(kern, attributetemplate ? attributetemplate : current);      if (node_next(current)) {          tex_couple_nodes(kern, node_next(current));      } @@ -1666,7 +1666,7 @@ inline static void tex_aux_calculate_glue(scaled m, scaled *f, scaled *n)      /*tex integer part of |m| */      *n = tex_x_over_n_r(m, unity, f);      /*tex the new glue specification */ -    if (f < 0) { +    if (*f < 0) {          --n;          f += unity;      } @@ -5540,9 +5540,9 @@ if (! stack && has_noad_option_exact(target)) {      }  } -inline static int tex_aux_fallback_math_spacing_class(halfword style, halfword class) +inline static int tex_aux_fallback_math_spacing_class(halfword style, halfword mathclass)  { -    unsigned parent = (unsigned) count_parameter(first_math_class_code + class); +    unsigned parent = (unsigned) count_parameter(first_math_class_code + mathclass);      switch (style) {          case display_style:       case cramped_display_style:       return (parent >> 24) & 0xFF;          case text_style:          case cramped_text_style:          return (parent >> 16) & 0xFF; @@ -5673,9 +5673,9 @@ static halfword tex_aux_math_spacing_glue(halfword ltype, halfword rtype, halfwo      }  } -inline static int tex_aux_fallback_math_ruling_class(halfword style, halfword class) +inline static int tex_aux_fallback_math_ruling_class(halfword style, halfword mathclass)  { -    unsigned parent = (unsigned) count_parameter(first_math_atom_code + class); +    unsigned parent = (unsigned) count_parameter(first_math_atom_code + mathclass);      switch (style) {          case display_style:       case cramped_display_style:       return (parent >> 24) & 0xFF;          case text_style:          case cramped_text_style:          return (parent >> 16) & 0xFF; @@ -6838,6 +6838,8 @@ static void tex_mlist_to_hlist_finalize_list(mliststate *state)              Apply some logic. The hard coded pairwise comparison is replaced by a generic one              because we can have more classes. For a while spacing and pairing was under a mode              control but that made no sense. We start with the begin class.   + +            Setting |state->beginclass| still fragile ... todo.           */          recent_class_overload = get_noad_right_class(current);          if (current_type == simple_noad && state->beginclass == unset_noad_class) { @@ -6853,6 +6855,9 @@ static void tex_mlist_to_hlist_finalize_list(mliststate *state)              current = node_next(current);              goto WIPE;          } +        if (recent_subtype == math_begin_class) { +            state->beginclass = current_subtype; +        }          /*tex               This is a special case where a sign starts something marked as (like) numeric, in               which there will be different spacing applied.  diff --git a/source/luametatex/source/tex/texprinting.c b/source/luametatex/source/tex/texprinting.c index bb021047e..86fa47e28 100644 --- a/source/luametatex/source/tex/texprinting.c +++ b/source/luametatex/source/tex/texprinting.c @@ -352,7 +352,7 @@ void tex_print_str(const char *s)              lmt_string_to_buffer(s);              return;          default: -            break; +            return;      }      if (terminal || logfile) {          int len = (int) strlen(s); diff --git a/source/luametatex/source/tex/texscanning.c b/source/luametatex/source/tex/texscanning.c index 15e887a71..e4354bba1 100644 --- a/source/luametatex/source/tex/texscanning.c +++ b/source/luametatex/source/tex/texscanning.c @@ -225,12 +225,12 @@ inline static void tex_aux_downgrade_cur_val(int level, int succeeded, int negat  static void tex_aux_set_cur_val_by_lua_value_cmd(halfword index, halfword property)  { -    int class = lua_value_none_code; +    int category = lua_value_none_code;      halfword value = 0; /* can also be scaled */      strnumber u = tex_save_cur_string();      lmt_token_state.luacstrings = 0; -    class = lmt_function_call_by_class(index, property, &value); -    switch (class) { +    category = lmt_function_call_by_category(index, property, &value); +    switch (category) {          case lua_value_none_code:              cur_val_level = no_val_level;              break; @@ -1380,20 +1380,20 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int                      case math_parameter_set_display_pre_penalty:                      case math_parameter_set_display_post_penalty:                          { -                            halfword class = tex_scan_math_class_number(0); -                            if (valid_math_class_code(class)) { +                            halfword mathclass = tex_scan_math_class_number(0); +                            if (valid_math_class_code(mathclass)) {                                  switch (chr) {                                      case math_parameter_set_pre_penalty: -                                        cur_val = count_parameter(first_math_pre_penalty_code + class); +                                        cur_val = count_parameter(first_math_pre_penalty_code + mathclass);                                          break;                                      case math_parameter_set_post_penalty: -                                        cur_val = count_parameter(first_math_post_penalty_code + class); +                                        cur_val = count_parameter(first_math_post_penalty_code + mathclass);                                          break;                                      case math_parameter_set_display_pre_penalty: -                                        cur_val = count_parameter(first_math_display_pre_penalty_code + class); +                                        cur_val = count_parameter(first_math_display_pre_penalty_code + mathclass);                                          break;                                      case math_parameter_set_display_post_penalty: -                                        cur_val = count_parameter(first_math_display_post_penalty_code + class); +                                        cur_val = count_parameter(first_math_display_post_penalty_code + mathclass);                                          break;                                  }                              } else { @@ -1411,9 +1411,9 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int                          }                      case math_parameter_options:                          { -                            halfword class = tex_scan_math_class_number(0); -                            if (valid_math_class_code(class)) { -                                cur_val = count_parameter(first_math_options_code + class); +                            halfword mathclass = tex_scan_math_class_number(0); +                            if (valid_math_class_code(mathclass)) { +                                cur_val = count_parameter(first_math_options_code + mathclass);                              } else {                                  cur_val = 0;                              } @@ -1890,6 +1890,20 @@ static void tex_aux_improper_constant_error(void)  */ +  +static void tex_aux_scan_int_no_number()  +{ +    /*tex Express astonishment that no number was here. Mo longer a goto because g++ doesn't like it. */ +    if (lmt_error_state.intercept) { +        lmt_error_state.last_intercept = 1 ; +        if (cur_cmd != spacer_cmd) { +            tex_back_input(cur_tok); +        } +    } else { +        tex_aux_missing_number_error(); +    } +} +  halfword tex_scan_int(int optional_equal, int *radix)  {      int negative = 0; @@ -1959,7 +1973,7 @@ halfword tex_scan_int(int optional_equal, int *radix)          result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0);          if (cur_val_level != int_val_level) {              result = 0; -            goto NONUMBER; +            tex_aux_scan_int_no_number();          }      } else if (cur_cmd == math_style_cmd) {          /* A pity that we need to check this way in |scan_int|. */ @@ -1970,7 +1984,7 @@ halfword tex_scan_int(int optional_equal, int *radix)              result = cur_chr;          } else {              result = 0; -            goto NONUMBER; +            tex_aux_scan_int_no_number();          }      } else {          /*tex has an error message been issued? */ @@ -1997,7 +2011,7 @@ halfword tex_scan_int(int optional_equal, int *radix)                          if (ok_so_far) {                              result = result * 8 + d;                              if (result > max_integer) { -                                result = infinity; +                                result = max_integer;                                  if (lmt_error_state.intercept) {                                      vacuous = 1;                                      goto DONE; @@ -2031,7 +2045,7 @@ halfword tex_scan_int(int optional_equal, int *radix)                          if (ok_so_far) {                              result = result * 16 + d;                              if (result > max_integer) { -                                result = infinity; +                                result = max_integer;                                  if (lmt_error_state.intercept) {                                      vacuous = 1;                                      goto DONE; @@ -2060,7 +2074,7 @@ halfword tex_scan_int(int optional_equal, int *radix)                          if (ok_so_far) {                              result = result * 10 + d;                              if (result > max_integer) { -                                result = infinity; +                                result = max_integer;                                  if (lmt_error_state.intercept) {                                      vacuous = 1;                                      goto DONE; @@ -2077,16 +2091,7 @@ halfword tex_scan_int(int optional_equal, int *radix)          }        DONE:          if (vacuous) { -            NONUMBER: -            /*tex Express astonishment that no number was here */ -            if (lmt_error_state.intercept) { -                lmt_error_state.last_intercept = 1 ; -                if (cur_cmd != spacer_cmd) { -                    tex_back_input(cur_tok); -                } -            } else { -                tex_aux_missing_number_error(); -            } +            tex_aux_scan_int_no_number();          } else {              tex_push_back(cur_tok, cur_cmd, cur_chr);          } @@ -3289,7 +3294,7 @@ halfword tex_scan_font_identifier(halfword *spec)                  if (tex_is_valid_font(fnt)) {                      return fnt;                  } else { -                    goto BAD; +                    break; /* to error */                  }              }          case internal_int_cmd: @@ -3301,7 +3306,7 @@ halfword tex_scan_font_identifier(halfword *spec)                          return fnt;                      }                  } -                goto BAD; +                break; /* to error */              }          default:              { @@ -3312,19 +3317,17 @@ halfword tex_scan_font_identifier(halfword *spec)                      if (tex_is_valid_font((halfword) fnt)) {                          return (halfword) fnt;                      } -                } else { -                    /*tex Fall through to a font error message. */                  } -              BAD: -                tex_handle_error( -                    back_error_type, -                    "Missing or invalid font identifier (or equivalent) or integer (register or otherwise)", -                    "I was looking for a control sequence whose current meaning has been defined by\n" -                    "\\font or a valid font id number." -                ); -                return null_font; +                break; /* to error */              }      } +    tex_handle_error( +        back_error_type, +        "Missing or invalid font identifier (or equivalent) or integer (register or otherwise)", +        "I was looking for a control sequence whose current meaning has been defined by\n" +        "\\font or a valid font id number." +    ); +    return null_font;  }  /*tex @@ -3612,9 +3615,10 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo                  *hash_brace = cur_tok;                  *p = tex_store_new_token(*p, cur_tok);                  *p = tex_store_new_token(*p, end_match_token); -                set_token_parameters(h, *counter - zero_token + 1); +                set_token_preamble(h, 1); +                set_token_parameters(h, *counter - zero_token);                  return 1; -            } else if (*counter == nine_token) { +            } else if (*counter == F_token_l) {                  tex_aux_too_many_parameters_error();              } else {                  switch (cur_tok) { @@ -3669,7 +3673,13 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo                      default:                          ++*counter;                          if (cur_tok != *counter) { -                            tex_aux_parameters_order_error(); +                            if (cur_tok >= A_token_l && cur_tok <= F_token_l) { +                                *counter += gap_match_count; +                                cur_tok += match_token - letter_token; +                                break; +                            } else {  +                                tex_aux_parameters_order_error(); +                            }                          }                          cur_tok += match_token - other_token;                          break; @@ -3682,7 +3692,8 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo      }      if (h != *p) {          *p = tex_store_new_token(*p, end_match_token); -        set_token_parameters(h, *counter - zero_token + 1); +        set_token_preamble(h, 1); +        set_token_parameters(h, *counter - zero_token);      }      if (cur_cmd == right_brace_cmd) {          ++lmt_input_state.align_state; @@ -3721,8 +3732,12 @@ halfword tex_scan_macro_normal(void)                  if (cur_cmd == parameter_cmd) {                      /*tex Keep the |#|. */                  } else if (cur_tok <= zero_token || cur_tok > counter) { -                    tex_aux_illegal_parameter_in_body_error(); -                    cur_tok = s; +                    if (cur_tok >= A_token_l && cur_tok <= F_token_l) { +                        cur_tok = token_val(parameter_reference_cmd, cur_chr - '0' - gap_match_count); +                    } else { +                        tex_aux_illegal_parameter_in_body_error(); +                        cur_tok = s; +                    }                  } else {                      cur_tok = token_val(parameter_reference_cmd, cur_chr - '0');                  } @@ -3799,8 +3814,12 @@ halfword tex_scan_macro_expand(void)                          if (cur_cmd == parameter_cmd) {                              /*tex Keep the |#|. */                          } else if (cur_tok <= zero_token || cur_tok > counter) { -                            tex_aux_illegal_parameter_in_body_error(); -                            cur_tok = s; +                            if (cur_tok >= A_token_l && cur_tok <= F_token_l) { +                                cur_tok = token_val(parameter_reference_cmd, cur_chr - '0' - gap_match_count); +                            } else { +                                tex_aux_illegal_parameter_in_body_error(); +                                cur_tok = s; +                            }                          } else {                              cur_tok = token_val(parameter_reference_cmd, cur_chr - '0');                          } @@ -4245,7 +4264,7 @@ static void tex_aux_scan_expr(halfword level)      switch (level) {          case int_val_level:          case attr_val_level: -            if ((factor > infinity) || (factor < -infinity)) { +            if ((factor > max_integer) || (factor < min_integer)) {                  lmt_scanner_state.arithmic_error = 1;                  factor = 0;              } @@ -4264,7 +4283,7 @@ static void tex_aux_scan_expr(halfword level)              }              break;          default: -            if ((state > expression_subtract) && ((factor > infinity) || (factor < -infinity))) { +            if ((state > expression_subtract) && ((factor > max_integer) || (factor < min_integer))) {                  lmt_scanner_state.arithmic_error = 1;                  factor = 0;              } @@ -4326,7 +4345,7 @@ static void tex_aux_scan_expr(halfword level)              switch (level) {                  case int_val_level:                  case attr_val_level: -                    term = tex_fract(term, numerator, factor, infinity); +                    term = tex_fract(term, numerator, factor, max_integer);                      break;                  case dimen_val_level:                      term = tex_fract(term, numerator, factor, max_dimen); @@ -4363,7 +4382,7 @@ static void tex_aux_scan_expr(halfword level)              switch (level) {                  case int_val_level:                  case attr_val_level: -                    expression = tex_aux_add_or_sub(expression, term, infinity, result); +                    expression = tex_aux_add_or_sub(expression, term, max_integer, result);                      break;                  case dimen_val_level:                      expression = tex_aux_add_or_sub(expression, term, max_dimen, result); @@ -4850,7 +4869,7 @@ static halfword tex_scan_bit_int(int *radix)          result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0);          if (cur_val_level != int_val_level) {              result = 0; -            goto NONUMBER; +            tex_aux_missing_number_error();          }      } else if (cur_cmd == math_style_cmd) {          result = (cur_chr == yet_unset_math_style) ? tex_scan_math_style_identifier(0, 0) : cur_chr; @@ -4859,7 +4878,7 @@ static halfword tex_scan_bit_int(int *radix)              result = cur_chr;          } else {              result = 0; -            goto NONUMBER; +            tex_aux_missing_number_error();          }      } else {          int vacuous = 1; @@ -4882,7 +4901,7 @@ static halfword tex_scan_bit_int(int *radix)                          if (ok_so_far) {                              result = result * 8 + d;                              if (result > max_integer) { -                                result = infinity; +                                result = max_integer;                                  tex_aux_number_to_big_error();                                  ok_so_far = 0;                              } @@ -4911,7 +4930,7 @@ static halfword tex_scan_bit_int(int *radix)                          if (ok_so_far) {                              result = result * 16 + d;                              if (result > max_integer) { -                                result = infinity; +                                result = max_integer;                                  tex_aux_number_to_big_error();                                  ok_so_far = 0;                              } @@ -4935,7 +4954,7 @@ static halfword tex_scan_bit_int(int *radix)                          if (ok_so_far) {                              result = result * 10 + d;                              if (result > max_integer) { -                                result = infinity; +                                result = max_integer;                                  tex_aux_number_to_big_error();                                  ok_so_far = 0;                              } @@ -4947,7 +4966,6 @@ static halfword tex_scan_bit_int(int *radix)          }        DONE:          if (vacuous) { -          NONUMBER:              tex_aux_missing_number_error();          } else {              tex_push_back(cur_tok, cur_cmd, cur_chr); @@ -5562,10 +5580,10 @@ static void tex_aux_scan_expression(int level)                                      break;                              }                          } -                        if (v < -infinity) { -                            v = -infinity; -                        } else if (v > infinity) { -                            v = infinity; +                        if (v < min_integer) { +                            v = min_integer; +                        } else if (v > max_integer) { +                            v = max_integer;                          }                          expression_entry(stack.tail) = v;                          break; diff --git a/source/luametatex/source/tex/texstringpool.h b/source/luametatex/source/tex/texstringpool.h index a15b9fad5..f053a642a 100644 --- a/source/luametatex/source/tex/texstringpool.h +++ b/source/luametatex/source/tex/texstringpool.h @@ -78,35 +78,33 @@ extern string_pool_info lmt_string_pool_state;  inline static void  tex_flush_char(void)       { --lmt_string_pool_state.string_temp_top; } -extern strnumber    tex_make_string            (void); -extern strnumber    tex_push_string            (const unsigned char *s, int l); -extern char        *tex_take_string            (int *len); -extern int          tex_str_eq_buf             (strnumber s, int k, int n); -extern int          tex_str_eq_str             (strnumber s, strnumber t); -extern int          tex_str_eq_cstr            (strnumber s, const char *, size_t); -extern int          tex_get_strings_started    (void); -extern void         tex_reset_cur_string       (void); -/*     strnumber    tex_search_string          (strnumber search); */ -/*     int          tex_used_strings           (void); */ -extern strnumber    tex_maketexstring          (const char *s); -extern strnumber    tex_maketexlstring         (const char *s, size_t); -extern void         tex_append_char            (unsigned char c); -extern void         tex_append_string          (const unsigned char *s, unsigned l); -extern char        *tex_makecstring            (int s, int *allocated); -extern char        *tex_makeclstring           (int s, size_t *len); -extern void         tex_dump_string_pool       (dumpstream f); -extern void         tex_undump_string_pool     (dumpstream f); -extern void         tex_initialize_string_pool (void); -extern void         tex_initialize_string_mem  (void); -extern void         tex_flush_str              (strnumber s); -extern strnumber    tex_save_cur_string        (void); -extern void         tex_restore_cur_string     (strnumber u); -                    -/*     void         tex_increment_pool_string  (int n); */ -/*     void         tex_decrement_pool_string  (int n); */ -                    -extern void         tex_compact_string_pool    (void); -                   -inline static char *tex_to_cstring             (int s) { return str_length(s) > 0 ? (char *) str_string(s) : ""; } +extern strnumber  tex_make_string            (void); +extern strnumber  tex_push_string            (const unsigned char *s, int l); +extern char      *tex_take_string            (int *len); +extern int        tex_str_eq_buf             (strnumber s, int k, int n); +extern int        tex_str_eq_str             (strnumber s, strnumber t); +extern int        tex_str_eq_cstr            (strnumber s, const char *, size_t); +extern int        tex_get_strings_started    (void); +extern void       tex_reset_cur_string       (void); +/*     strnumber  tex_search_string          (strnumber search); */ +/*     int        tex_used_strings           (void); */ +extern strnumber  tex_maketexstring          (const char *s); +extern strnumber  tex_maketexlstring         (const char *s, size_t); +extern void       tex_append_char            (unsigned char c); +extern void       tex_append_string          (const unsigned char *s, unsigned l); +extern char      *tex_makecstring            (int s, int *allocated); +extern char      *tex_makeclstring           (int s, size_t *len); +extern void       tex_dump_string_pool       (dumpstream f); +extern void       tex_undump_string_pool     (dumpstream f); +extern void       tex_initialize_string_pool (void); +extern void       tex_initialize_string_mem  (void); +extern void       tex_flush_str              (strnumber s); +extern strnumber  tex_save_cur_string        (void); +extern void       tex_restore_cur_string     (strnumber u); +extern void       tex_compact_string_pool    (void); +/*     void       tex_increment_pool_string  (int n); */ +/*     void       tex_decrement_pool_string  (int n); */ +                                    +inline static const char *tex_to_cstring (int s) { return str_length(s) > 0 ? (char *) str_string(s) : ""; }  # endif diff --git a/source/luametatex/source/tex/textoken.c b/source/luametatex/source/tex/textoken.c index b46e6de85..f820e51d7 100644 --- a/source/luametatex/source/tex/textoken.c +++ b/source/luametatex/source/tex/textoken.c @@ -92,7 +92,7 @@ token_state_info lmt_token_state = {      .buffer         = NULL,      .bufloc         = 0,      .bufmax         = 0, -    .padding        = 0, +    .empty          = null,   };  /*tex Some properties are dumped in the format so these are aet already! */ @@ -212,6 +212,7 @@ void tex_compact_tokens(void)                      }              }          } +        lmt_token_state.empty = mapper[lmt_token_state.empty];       // print(dump_state.format_identifier);          tex_print_format("tokenlist compacted from %i to %i entries, ", lmt_token_memory_state.tokens_data.top, nc);          if (nofluacmds) { @@ -335,27 +336,41 @@ void tex_add_token_reference(halfword p)  {      if (get_token_reference(p) < max_token_reference) {          add_token_reference(p); -    } else { -        tex_overflow_error("reference count", max_token_reference); + //   } else { + //       tex_overflow_error("reference count", max_token_reference);      }  }  void tex_increment_token_reference(halfword p, int n)  {      if ((get_token_reference(p) + n) < max_token_reference) { -        inc_token_reference(p,n); -    } else { -        tex_overflow_error("reference count", max_token_reference); +        inc_token_reference(p, n); +    } else {  +        inc_token_reference(p, max_token_reference - get_token_reference(p)); + // } else { + //     tex_overflow_error("reference count", max_token_reference);      }  } +// void tex_delete_token_reference(halfword p) +// { +//     if (p) { +//         if (get_token_reference(p)) { +//             sub_token_reference(p); +//         } else { +//             tex_flush_token_list(p); +//         } +//     } +// } +  void tex_delete_token_reference(halfword p)  {      if (p) { -        if (get_token_reference(p)) { -            sub_token_reference(p); -        } else { +        halfword r = get_token_reference(p); +        if (! r) {              tex_flush_token_list(p); +        } if(r < max_token_reference) { +            sub_token_reference(p);          }      }  } @@ -465,6 +480,9 @@ void tex_print_meaning(halfword code)                  tex_print_cs(cur_cs);                  return;              } else { +                if (cur_chr && get_token_reference(cur_chr) == max_token_reference) { +                    tex_print_str("constant "); +                }                  switch (code) {                      case meaning_code:                      case meaning_full_code: @@ -477,7 +495,7 @@ void tex_print_meaning(halfword code)                          tex_print_cs(cur_cs);                          tex_print_char(' ');                          if (cur_chr && token_link(cur_chr)) { -                            halfword body = get_token_parameters(cur_chr) ? tex_show_token_list(token_link(cur_chr), null, default_token_show_max, 1) : token_link(cur_chr); +                            halfword body = get_token_preamble(cur_chr) ? tex_show_token_list(token_link(cur_chr), null, default_token_show_max, 1) : token_link(cur_chr);                              tex_print_char('{');                              if (body) {                                  tex_show_token_list(body, null, default_token_show_max, 0); @@ -582,7 +600,7 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis)  {      if (p) {          /*tex the highest parameter number, as an \ASCII\ digit */ -        unsigned char n = '0'; +        unsigned char n = 0;          int min = 0;          int max = lmt_token_memory_state.tokens_data.top;          lmt_print_state.tally = 0; @@ -639,6 +657,8 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis)                          tex_print_tex_str(match_visualizer);                          if (chr <= 9) {                              tex_print_char(chr + '0'); +                        } else if (chr <= max_match_count) { +                            tex_print_char(chr + '0' + gap_match_count);                          } else {                              tex_print_char('!');                              return null; @@ -650,7 +670,7 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis)                              ++n;                          }                          tex_print_char(chr ? chr : '0'); -                        if (n > '9') { +                        if (n > max_match_count) {                              /*tex Can this happen at all? */                              return null;                          } else { @@ -698,8 +718,9 @@ inline static halfword get_unichar_from_buffer(int *b)      if (a <= 0x80) {          *b += 1;      } else { -        a = (halfword) aux_str2uni(lmt_fileio_state.io_buffer + *b); -        *b += utf8_size(a); +        int al;  +        a = (halfword) aux_str2uni_len(lmt_fileio_state.io_buffer + *b, &al); +        *b += al;      }      return a;  } @@ -892,7 +913,7 @@ int tex_scan_optional_keyword(const char *s)  /*tex      Here we know that the first character(s) matched so we are in the middle of a keyword already -    which means a different loop than the previous one. +    which means a different loop than the previous one.   */  int tex_scan_mandate_keyword(const char *s, int offset) @@ -2111,8 +2132,9 @@ halfword tex_string_to_toks(const char *ss)      halfword p = null;      /*tex new node being added to the token list via |store_new_token| */      while (s < se) { -        halfword t = (halfword) aux_str2uni((const unsigned char *) s); -        s += utf8_size(t); +        int tl;  +        halfword t = (halfword) aux_str2uni_len((const unsigned char *) s, &tl); +        s += tl;          if (t == ' ') {              t = space_token;          } else { @@ -2148,8 +2170,9 @@ static halfword lmt_str_toks(lstring b) /* returns head */      halfword head = null;      halfword tail = head;      while (k < (unsigned char *) b.s + b.l) { -        halfword t = aux_str2uni(k); -        k += utf8_size(t); +        int tl;  +        halfword t = aux_str2uni_len(k, &tl); +        k += tl;          if (t == ' ') {              t = space_token;          } else { @@ -2190,14 +2213,14 @@ halfword tex_str_toks(lstring s, halfword *tail)          unsigned char *k = s.s;          unsigned char *l = k + s.l;          while (k < l) { -            halfword t = aux_str2uni(k); +            int tl; +            halfword t = aux_str2uni_len(k, &tl);              if (t == ' ') { -                k += 1;                  t = space_token;              } else { -                k += utf8_size(t);                  t += other_token;              } +            k += tl;              p = tex_store_new_token(p, t);              if (! h) {                  h = p; @@ -2220,14 +2243,14 @@ halfword tex_cur_str_toks(halfword *tail)          /*tex tail of the token list */          while (k < l) {              /*tex token being appended */ -            halfword t = aux_str2uni(k); +            int tl; +            halfword t = aux_str2uni_len(k, &tl);              if (t == ' ') { -                k += 1;                  t = space_token;              } else { -                k += utf8_size(t);                  t += other_token;              } +            k += tl;              p = tex_store_new_token(p, t);              if (! h) {                  h = p; @@ -2261,8 +2284,9 @@ halfword tex_str_scan_toks(int ct, lstring ls)      while (k < l) {          int cc;          /*tex token being appended */ -        halfword t = aux_str2uni(k); -        k += utf8_size(t); +        int lt; +        halfword t = aux_str2uni_len(k, <); +        k += lt;          cc = tex_get_cat_code(ct, t);          if (cc == 0) {              /*tex We have a potential control sequence so we check for it. */ @@ -2271,8 +2295,7 @@ halfword tex_str_scan_toks(int ct, lstring ls)              int c = 0 ;              unsigned char *name = k ;              while (k < l) { -                t = (halfword) aux_str2uni((const unsigned char *) k); -                s = utf8_size(t); +                t = (halfword) aux_str2uni_len((const unsigned char *) k, &s);                  c = tex_get_cat_code(ct,t);                  if (c == 11) {                      k += s ; @@ -3131,7 +3154,7 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb          int p = token_link(pp);          if (p) {              int e = escape_char_par;  /*tex The serialization of the escape, normally a backlash. */ -            int n = '0';              /*tex The character after |#|, so |#0| upto |#9| */ +            int n = 0;                /*tex The character after |#|, so |#0| upto |#9| */              int min = 0;              int max = lmt_token_memory_state.tokens_data.top;              int skip = 0; @@ -3149,7 +3172,7 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb              }              lmt_token_state.bufloc = 0;              if (skippreamble) { -                skip = get_token_parameters(pp); +                skip = get_token_preamble(pp);              }              while (p) {                  if (p < min || p > max) { @@ -3192,12 +3215,14 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb                                      tex_aux_append_char_to_buffer(match_visualizer);                                      if (chr <= 9) {                                          tex_aux_append_char_to_buffer(chr + '0'); +                                    } else if (chr <= max_match_count) { +                                        tex_aux_append_char_to_buffer(chr + '0' + gap_match_count);                                      } else { -                                        tex_aux_append_char_to_buffer('!'); +                                        tex_aux_append_char_to_buffer('!');                                           goto EXIT;                                      }                                  } else { -                                    if (chr > 9) { +                                    if (chr > max_match_count) {                                          goto EXIT;                                      }                                  } @@ -3210,9 +3235,14 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb                                      ++n;                                  }                                  if (! skip) { -                                    tex_aux_append_char_to_buffer(chr ? chr : '0'); +                                 // tex_aux_append_char_to_buffer(chr ? chr : '0'); +                                    if (chr <= 9) { +                                        tex_aux_append_char_to_buffer(chr + '0'); +                                    } else if (chr <= max_match_count) { +                                        tex_aux_append_char_to_buffer(chr + '0' + gap_match_count); +                                    }                                  } -                                if (n > '9') { +                                if (n > max_match_count) {                                      goto EXIT;                                  }                                  break; @@ -3457,14 +3487,14 @@ halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const          const char *se = str + lstr;          while (str < se) {              /*tex hh: |str2uni| could return len too (also elsewhere) */ -            halfword u = (halfword) aux_str2uni((const unsigned char *) str); +            int ul; +            halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &ul);              halfword t = null;              halfword cc = tex_get_cat_code(ct, u); -            str += utf8_size(u); +            str += ul;              /*tex -                This is a relating simple converter; if more is needed one can just use -                |tex.print| with a regular |\def| or |\gdef| and feed the string into the -                regular scanner. +                This is a relative simple converter; if more is needed one can just use |tex.print| +                with a regular |\def| or |\gdef| and feed the string into the regular scanner.              */              switch (cc) {                  case escape_cmd: @@ -3473,8 +3503,8 @@ halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const                          int lname = 0;                          const char *name  = str;                          while (str < se) { -                            halfword u = (halfword) aux_str2uni((const unsigned char *) str); -                            int s = utf8_size(u); +                            int s;  +                            halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &s);                              int c = tex_get_cat_code(ct, u);                              if (c == letter_cmd) {                                  str += s; diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h index da2d01f7c..68632792b 100644 --- a/source/luametatex/source/tex/textoken.h +++ b/source/luametatex/source/tex/textoken.h @@ -108,32 +108,39 @@ typedef struct token_state_info {      char     *buffer;      int       bufloc;      int       bufmax; -    int       padding; +    int       empty;  } token_state_info;  extern token_state_info lmt_token_state; -// # define max_token_reference 0x7FFF /* we can bump to 0xFFFF when we go unsigned here */ -// -//define token_reference(a)  token_memory_state.tokens[a].half1 -// -// #define get_token_parameters(a) lmt_token_memory_state.tokens[a].quart2 -// #define get_token_reference(a)  lmt_token_memory_state.tokens[a].quart3 -// -// #define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].quart2  = (b) -// -// #define add_token_reference(a)    lmt_token_memory_state.tokens[a].quart3 += 1 -// #define sub_token_reference(a)    lmt_token_memory_state.tokens[a].quart3 -= 1 -// #define inc_token_reference(a,b)  lmt_token_memory_state.tokens[a].quart3 += (quarterword) (b) -// #define dec_token_reference(a,b)  lmt_token_memory_state.tokens[a].quart3 -= (quarterword) (b) +/*tex + +    We now can have 15 paremeters but if needed we can go higher. However, we then also need to  +    cache more and change the |preamble| and |count| to some funny bit ranges. If needed we can  +    bump the reference count maximum but quite likely one already has run out of something else +    already.    + +    \starttyping +    preamble  = 0xF0000000 : 1 when we have one, including trailing # +    count     = 0x0F000000 +    reference = 0x00FFFFFF +    \stoptyping + +*/ + +# define max_match_count 15 +# define gap_match_count  7 -# define max_token_reference 0x0FFFFFFF +# define max_token_reference 0x00FFFFFF -# define get_token_parameters(a) (lmt_token_memory_state.tokens[a].hulf1 >> 28) -# define get_token_reference(a)  (lmt_token_memory_state.tokens[a].hulf1 & 0x0FFFFFFF) +# define get_token_preamble(a)   ((lmt_token_memory_state.tokens[a].hulf1 >> 28) & 0xF) +# define get_token_parameters(a) ((lmt_token_memory_state.tokens[a].hulf1 >> 24) & 0xF) +# define get_token_reference(a)  ((lmt_token_memory_state.tokens[a].hulf1      ) & max_token_reference) -# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28)  /* normally the variable is still zero here */ +# define set_token_preamble(a,b)   lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28)  /* normally the variable is still zero here */ +# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 24)  /* normally the variable is still zero here */ +# define set_token_reference(a,b)  lmt_token_memory_state.tokens[a].hulf1 += (b)  # define add_token_reference(a)    lmt_token_memory_state.tokens[a].hulf1 += 1            /* we are way off the parameter count */  # define sub_token_reference(a)    lmt_token_memory_state.tokens[a].hulf1 -= 1            /* we are way off the parameter count */  # define inc_token_reference(a,b)  lmt_token_memory_state.tokens[a].hulf1 += (b)          /* we are way off the parameter count */ diff --git a/source/luametatex/source/tex/textypes.h b/source/luametatex/source/tex/textypes.h index a09409522..c2cd57e64 100644 --- a/source/luametatex/source/tex/textypes.h +++ b/source/luametatex/source/tex/textypes.h @@ -155,7 +155,7 @@ extern halfword tex_badness(  # define one_bp                                 65781 -# define infinity                        017777777777 /*tex the largest positive value that \TEX\ knows */ +# define max_infinity                      0x7FFFFFFF /*tex the largest positive value that \TEX\ knows */  # define min_infinity                     -0x7FFFFFFF  # define awful_bad                        07777777777 /*tex more than a billion demerits |0x3FFFFFFF| */   # define infinite_bad                           10000 /*tex infinitely bad value */ diff --git a/source/luametatex/source/utilities/auxunistring.c b/source/luametatex/source/utilities/auxunistring.c index 9fe5531d6..746fde4ad 100644 --- a/source/luametatex/source/utilities/auxunistring.c +++ b/source/luametatex/source/utilities/auxunistring.c @@ -11,36 +11,100 @@  */ -unsigned aux_str2uni(const unsigned char *k) +// unsigned xaux_str2uni(const unsigned char *k) +// { +//     const unsigned char *text = k; +//     int ch = *text++; +//     if (ch < 0x80) { +//         return (unsigned) ch; +//     } else if (ch <= 0xbf) { +//         return 0xFFFD; +//     } else if (ch <= 0xdf) { +//         if (text[0] >= 0x80 && text[0] < 0xc0) { +//             return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f)); +//         } +//     } else if (ch <= 0xef) { +//         if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) { +//             return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f)); +//         } +//     } else if (ch <= 0xf7) { +//         if (text[0] <  0x80 || text[1] <  0x80 || text[2] <  0x80 || +//             text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) { +//             return 0xFFFD; +//         } else { +//             int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1; +//             int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f); +//             w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4); +//             return (unsigned) (w1 * 0x400 + w2 + 0x10000); +//         } +//     } +//     return 0xFFFD; +// } + +unsigned aux_str2uni(const unsigned char *text) +{ +    if (text[0] < 0x80) { +        return (unsigned) text[0]; +    } else if (text[0] <= 0xbf) { +        return 0xFFFD; +    } else if (text[0] <= 0xdf) { +        if (text[1] >= 0x80 && text[1] < 0xc0) { +            return (unsigned) (((text[0] & 0x1f) << 6) | (text[1] & 0x3f)); +        } +    } else if (text[0] <= 0xef) { +        if (text[1] >= 0x80 && text[1] < 0xc0 && text[2] >= 0x80 && text[2] < 0xc0) { +            return (unsigned) (((text[0] & 0xf) << 12) | ((text[1] & 0x3f) << 6) | (text[2] & 0x3f)); +        } +    } else if (text[0] <= 0xf7) { +        if (text[1] <  0x80 || text[2] <  0x80 || text[3] <  0x80 || +            text[1] >= 0xc0 || text[2] >= 0xc0 || text[3] >= 0xc0) { +            return 0xFFFD; +        } else { +            int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1; +            int w2 = ((text[2] & 0xf) << 6) | (text[3] & 0x3f); +            w1 = (w1 << 6) | ((text[1] & 0xf) << 2) | ((text[2] & 0x30) >> 4); +            return (unsigned) (w1 * 0x400 + w2 + 0x10000); +        } +    } +    return 0xFFFD; +} + +unsigned aux_str2uni_len(const unsigned char *text, int *len)  { -    const unsigned char *text = k; -    int ch = *text++; -    if (ch < 0x80) { -        return (unsigned) ch; -    } else if (ch <= 0xbf) { +    if (text[0] < 0x80) { +        *len = 1; +        return (unsigned) text[0]; +    } else if (text[0] <= 0xbf) { +        *len = 1;          return 0xFFFD; -    } else if (ch <= 0xdf) { -        if (text[0] >= 0x80 && text[0] < 0xc0) { -            return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f)); +    } else if (text[0] <= 0xdf) { +        if (text[1] >= 0x80 && text[1] < 0xc0) { +            *len = 2; +            return (unsigned) (((text[0] & 0x1f) << 6) | (text[1] & 0x3f));          } -    } else if (ch <= 0xef) { -        if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) { -            return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f)); +    } else if (text[0] <= 0xef) { +        if (text[1] >= 0x80 && text[1] < 0xc0 && text[2] >= 0x80 && text[2] < 0xc0) { +            *len = 3; +            return (unsigned) (((text[0] & 0xf) << 12) | ((text[1] & 0x3f) << 6) | (text[2] & 0x3f));          } -    } else if (ch <= 0xf7) { -        if (text[0] <  0x80 || text[1] <  0x80 || text[2] <  0x80 || -            text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) { +    } else if (text[0] <= 0xf7) { +        if (text[1] <  0x80 || text[2] <  0x80 || text[3] <  0x80 || +            text[1] >= 0xc0 || text[2] >= 0xc0 || text[3] >= 0xc0) { +            *len = 4;              return 0xFFFD;          } else { -            int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1; -            int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f); -            w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4); +            *len = 4; +            int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1; +            int w2 = ((text[2] & 0xf) << 6) | (text[3] & 0x3f); +            w1 = (w1 << 6) | ((text[1] & 0xf) << 2) | ((text[2] & 0x30) >> 4);              return (unsigned) (w1 * 0x400 + w2 + 0x10000);          }      } +    *len = 1;      return 0xFFFD;  } +  unsigned char *aux_uni2str(unsigned unic)  {      unsigned char *buf = lmt_memory_malloc(5); diff --git a/source/luametatex/source/utilities/auxunistring.h b/source/luametatex/source/utilities/auxunistring.h index 4c5ee3639..92f46d91c 100644 --- a/source/luametatex/source/utilities/auxunistring.h +++ b/source/luametatex/source/utilities/auxunistring.h @@ -6,7 +6,8 @@  # define LMT_UTILITIES_UNISTRING_H  extern unsigned char *aux_uni2str      (unsigned); -extern unsigned       aux_str2uni      (const unsigned char *); +extern unsigned       aux_str2uni      (const unsigned char *text); +extern unsigned       aux_str2uni_len  (const unsigned char *text, int *len);  extern char          *aux_uni2string   (char *utf8_text, unsigned ch);  extern unsigned       aux_splitutf2uni (unsigned int *ubuf, const char *utf8buf);  extern size_t         aux_utf8len      (const char *text, size_t size);  | 
