summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authorHans Hagen <pragma@wxs.nl>2022-12-15 19:13:44 +0100
committerContext Git Mirror Bot <phg@phi-gamma.net>2022-12-15 19:13:44 +0100
commit8f472c629a996f1c08281198210354a253d7f56b (patch)
tree26f5fa0bda520d7ea9e87947df6978b9085a7d58 /source
parent3bdc9b9072bba774cd5c604fe185d39ddbdc911e (diff)
downloadcontext-8f472c629a996f1c08281198210354a253d7f56b.tar.gz
2022-12-15 17:52:00
Diffstat (limited to 'source')
-rw-r--r--source/luametatex/source/lua/lmtinterface.h3
-rw-r--r--source/luametatex/source/lua/lmtstatuslib.c6
-rw-r--r--source/luametatex/source/lua/lmttexlib.c13
-rw-r--r--source/luametatex/source/lua/lmttokenlib.c77
-rw-r--r--source/luametatex/source/lua/lmttokenlib.h2
-rw-r--r--source/luametatex/source/luametatex.h2
-rw-r--r--source/luametatex/source/tex/texcommands.c11
-rw-r--r--source/luametatex/source/tex/texcommands.h10
-rw-r--r--source/luametatex/source/tex/texconditional.c22
-rw-r--r--source/luametatex/source/tex/texdumpdata.h2
-rw-r--r--source/luametatex/source/tex/texequivalents.c2
-rw-r--r--source/luametatex/source/tex/texequivalents.h3
-rw-r--r--source/luametatex/source/tex/texexpand.c36
-rw-r--r--source/luametatex/source/tex/texinputstack.c25
-rw-r--r--source/luametatex/source/tex/texlanguage.c5
-rw-r--r--source/luametatex/source/tex/texmaincontrol.c103
-rw-r--r--source/luametatex/source/tex/texmarks.c30
-rw-r--r--source/luametatex/source/tex/texmarks.h2
-rw-r--r--source/luametatex/source/tex/texmath.c52
-rw-r--r--source/luametatex/source/tex/texmlist.c23
-rw-r--r--source/luametatex/source/tex/texprinting.c2
-rw-r--r--source/luametatex/source/tex/texscanning.c140
-rw-r--r--source/luametatex/source/tex/texstringpool.h58
-rw-r--r--source/luametatex/source/tex/textoken.c114
-rw-r--r--source/luametatex/source/tex/textoken.h43
-rw-r--r--source/luametatex/source/tex/textypes.h2
-rw-r--r--source/luametatex/source/utilities/auxunistring.c100
-rw-r--r--source/luametatex/source/utilities/auxunistring.h3
28 files changed, 551 insertions, 340 deletions
diff --git a/source/luametatex/source/lua/lmtinterface.h b/source/luametatex/source/lua/lmtinterface.h
index 67c3f56d5..c60a78266 100644
--- a/source/luametatex/source/lua/lmtinterface.h
+++ b/source/luametatex/source/lua/lmtinterface.h
@@ -562,6 +562,7 @@ make_lua_key(L, condition);\
make_lua_key(L, conditional);\
make_lua_key(L, conditionalmathskip);\
make_lua_key(L, connectoroverlapmin);\
+make_lua_key(L, constant);\
make_lua_key(L, container);\
make_lua_key(L, contributehead);\
make_lua_key(L, convert);\
@@ -1127,6 +1128,7 @@ make_lua_key(L, properties);\
make_lua_key(L, proportional);\
make_lua_key(L, protected);\
make_lua_key(L, protected_call);\
+make_lua_key(L, semi_protected_call);\
make_lua_key(L, protrudechars);\
make_lua_key(L, protrusion);\
make_lua_key(L, ptr);\
@@ -1387,6 +1389,7 @@ make_lua_key(L, tolerance);\
make_lua_key(L, tolerant);\
make_lua_key(L, tolerant_call);\
make_lua_key(L, tolerant_protected_call);\
+make_lua_key(L, tolerant_semi_protected_call);\
make_lua_key(L, top);\
make_lua_key(L, topaccent);\
make_lua_key(L, topaccentvariant);\
diff --git a/source/luametatex/source/lua/lmtstatuslib.c b/source/luametatex/source/lua/lmtstatuslib.c
index cf665ede2..841ddeec0 100644
--- a/source/luametatex/source/lua/lmtstatuslib.c
+++ b/source/luametatex/source/lua/lmtstatuslib.c
@@ -254,8 +254,8 @@ static int statslib_getconstants(lua_State *L)
lua_set_integer_by_key(L, "no_catcode_table", no_catcode_table_preset);
lua_set_integer_by_key(L, "default_catcode_table", default_catcode_table_preset);
- lua_set_cardinal_by_key(L, "max_cardinal", max_cardinal);
- lua_set_cardinal_by_key(L, "min_cardinal", min_cardinal);
+ lua_set_cardinal_by_key(L,"max_cardinal", max_cardinal);
+ lua_set_cardinal_by_key(L,"min_cardinal", min_cardinal);
lua_set_integer_by_key(L, "max_integer", max_integer);
lua_set_integer_by_key(L, "min_integer", min_integer);
lua_set_integer_by_key(L, "max_dimen", max_dimen);
@@ -268,7 +268,7 @@ static int statslib_getconstants(lua_State *L)
lua_set_integer_by_key(L, "one_bp", one_bp);
- lua_set_integer_by_key(L, "infinity", infinity);
+ lua_set_integer_by_key(L, "infinity", max_infinity);
lua_set_integer_by_key(L, "min_infinity", min_infinity);
lua_set_integer_by_key(L, "awful_bad", awful_bad);
lua_set_integer_by_key(L, "infinite_bad", infinite_bad);
diff --git a/source/luametatex/source/lua/lmttexlib.c b/source/luametatex/source/lua/lmttexlib.c
index 7d9395eb7..0d84eebdd 100644
--- a/source/luametatex/source/lua/lmttexlib.c
+++ b/source/luametatex/source/lua/lmttexlib.c
@@ -903,7 +903,7 @@ static const char *texlib_aux_scan_integer_part(lua_State *L, const char *ss, in
DONE:
if (overflow) {
luaL_error(L, "number too big");
- result = infinity;
+ result = max_integer;
} else if (vacuous) {
luaL_error(L, "missing number, treated as zero") ;
}
@@ -1246,6 +1246,9 @@ int lmt_check_for_flags(lua_State *L, int slot, int *flags, int prefixes, int nu
} else if (lua_key_eq(str, value)) {
slot += 1;
*flags = add_value_flag(*flags);
+ } else if (lua_key_eq(str, constant)) {
+ slot += 1;
+ *flags = add_constant_flag(*flags);
} else if (lua_key_eq(str, conditional) || lua_key_eq(str, condition)) {
/* condition will go, conditional stays */
slot += 1;
@@ -2690,7 +2693,7 @@ static int texlib_aux_scan_internal(lua_State *L, int cmd, int code, int values)
default:
{
int texstr = tex_the_scanned_result();
- char *str = tex_to_cstring(texstr);
+ const char *str = tex_to_cstring(texstr);
if (str) {
lua_pushstring(L, str);
} else {
@@ -3582,7 +3585,7 @@ static int texlib_enableprimitives(lua_State *L)
for (int cs = 0; cs < prim_size; cs++) {
strnumber s = get_prim_text(cs);
if (s > 0) {
- char *prm = tex_to_cstring(s);
+ const char *prm = tex_to_cstring(s);
texlib_aux_enableprimitive(pre, lpre, prm);
}
}
@@ -4116,7 +4119,7 @@ static int texlib_runlocal(lua_State *L)
} else {
halfword ref = eq_value(cs);
halfword head = token_link(ref);
- if (head && get_token_parameters(ref)) {
+ if (head && get_token_preamble(ref)) {
tex_local_control_message("macro takes arguments and is ignored");
return 0;
} else {
@@ -4605,7 +4608,7 @@ static int texlib_setdimensionvalue(lua_State *L)
static int texlib_aux_getvalue(lua_State *L, halfword level, halfword cs)
{
halfword chr = eq_value(cs);
- if (chr && ! get_token_parameters(chr)) {
+ if (chr && ! get_token_preamble(chr)) { /* or get_token_parameters as we don't want trailing # */
halfword value = 0;
tex_begin_inserted_list(tex_get_available_token(cs_token_flag + cs));
if (tex_scan_tex_value(level, &value)) {
diff --git a/source/luametatex/source/lua/lmttokenlib.c b/source/luametatex/source/lua/lmttokenlib.c
index 97ca1b144..41ee6c485 100644
--- a/source/luametatex/source/lua/lmttokenlib.c
+++ b/source/luametatex/source/lua/lmttokenlib.c
@@ -213,10 +213,10 @@ void lmt_tokenlib_initialize(void)
/* lmt_interface.command_names[string_cmd] = (command_item) { .id = string_cmd, .lua = lua_key_index(string), .name = lua_key(string), .kind = regular_command_item, .min = ignore_entry, .max = max_integer, .base = 0, .fixedvalue = 0 }; */
lmt_interface.command_names[call_cmd] = (command_item) { .id = call_cmd, .lua = lua_key_index(call), .name = lua_key(call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
lmt_interface.command_names[protected_call_cmd] = (command_item) { .id = protected_call_cmd, .lua = lua_key_index(protected_call), .name = lua_key(protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
- lmt_interface.command_names[semi_protected_call_cmd] = (command_item) { .id = semi_protected_call_cmd, .lua = lua_key_index(protected_call), .name = lua_key(protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
+ lmt_interface.command_names[semi_protected_call_cmd] = (command_item) { .id = semi_protected_call_cmd, .lua = lua_key_index(semi_protected_call), .name = lua_key(protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
lmt_interface.command_names[tolerant_call_cmd] = (command_item) { .id = tolerant_call_cmd, .lua = lua_key_index(tolerant_call), .name = lua_key(tolerant_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
lmt_interface.command_names[tolerant_protected_call_cmd] = (command_item) { .id = tolerant_protected_call_cmd, .lua = lua_key_index(tolerant_protected_call), .name = lua_key(tolerant_protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
- lmt_interface.command_names[tolerant_semi_protected_call_cmd] = (command_item) { .id = tolerant_semi_protected_call_cmd, .lua = lua_key_index(tolerant_protected_call), .name = lua_key(tolerant_protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
+ lmt_interface.command_names[tolerant_semi_protected_call_cmd] = (command_item) { .id = tolerant_semi_protected_call_cmd, .lua = lua_key_index(tolerant_semi_protected_call), .name = lua_key(tolerant_protected_call), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
lmt_interface.command_names[deep_frozen_end_template_cmd] = (command_item) { .id = deep_frozen_end_template_cmd, .lua = lua_key_index(deep_frozen_cs_end_template), .name = lua_key(deep_frozen_cs_end_template), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
lmt_interface.command_names[deep_frozen_dont_expand_cmd] = (command_item) { .id = deep_frozen_dont_expand_cmd, .lua = lua_key_index(deep_frozen_cs_dont_expand), .name = lua_key(deep_frozen_cs_dont_expand), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
lmt_interface.command_names[internal_glue_reference_cmd] = (command_item) { .id = internal_glue_reference_cmd, .lua = lua_key_index(internal_glue_reference), .name = lua_key(internal_glue_reference), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 };
@@ -468,8 +468,9 @@ halfword lmt_token_list_from_lua(lua_State *L, int slot)
if (s[i] == ascii_space) {
tok = token_val(spacer_cmd, s[i]);
} else {
- int k = (int) aux_str2uni((const unsigned char *) (s + i));
- i = i + (size_t) (utf8_size(k)) - 1;
+ int kl;
+ int k = (int) aux_str2uni_len((const unsigned char *) (s + i), &kl);
+ i = i + kl - 1;
tok = token_val(other_char_cmd, k);
}
p = tex_store_new_token(p, tok);
@@ -737,15 +738,15 @@ static void tokenlib_aux_to_token(lua_State *L, int i, int m, int *head, int *ta
const unsigned char *p = (const unsigned char *) s;
size_t n = aux_utf8len(s, l);
for (size_t j = 0; j < n; j++) {
- int ch = *p;
- halfword x = tex_get_available_token(tokenlib_aux_to_token_val(aux_str2uni(p)));
+ int xl;
+ halfword x = tex_get_available_token(tokenlib_aux_to_token_val(aux_str2uni_len(p, &xl)));
if (*head) {
token_link(*tail) = x;
} else {
*head = x;
}
*tail = x;
- p += utf8_size(ch);
+ p += xl;
}
break;
}
@@ -2370,7 +2371,7 @@ static int tokenlib_getprimitives(lua_State *L)
while (cs < prim_size) {
strnumber s = get_prim_text(cs);
if (s > 0 && (get_prim_origin(cs) != no_command)) {
- char *ss = tex_to_cstring(s);
+ const char *ss = tex_to_cstring(s);
int cmd = prim_eq_type(cs);
int chr = prim_equiv(cs);
if (! raw) {
@@ -2668,6 +2669,19 @@ inline static int tokenlib_get_parameters(lua_State *L)
return 0;
}
+inline static int tokenlib_get_constant(lua_State *L)
+{
+ lua_token *n = tokenlib_aux_check_istoken(L, 1);
+ halfword tok = token_info(n->token);
+ int result = 0;
+ if (tok >= cs_token_flag && is_call_cmd(eq_type(tok - cs_token_flag))) {
+ halfword v = eq_value(tok - cs_token_flag);
+ result = v && get_token_reference(v) == max_token_reference;
+ }
+ lua_pushboolean(L, result);
+ return 1;
+}
+
static int tokenlib_getfield(lua_State *L)
{
const char *s = lua_tostring(L, 2);
@@ -2711,6 +2725,8 @@ static int tokenlib_getfield(lua_State *L)
return tokenlib_get_flags(L);
} else if (lua_key_eq(s, parameters)) {
return tokenlib_get_parameters(L);
+ } else if (lua_key_eq(s, constant)) {
+ return tokenlib_get_constant(L);
} else {
lua_pushnil(L);
}
@@ -3229,11 +3245,17 @@ static int tokenlib_set_macro(lua_State *L) /* todo: protected */
slot = lmt_check_for_flags(L, slot, &flags, 1, 1);
}
if (tex_define_permitted(cs, flags)) { /* we check before we allocate */
- halfword h = get_reference_token();
- halfword t = h;
+ halfword h;
if (lstr > 0) {
+ h = get_reference_token();
/*tex Options: 1=create (will trigger an error), 2=ignore. */
- tex_parse_str_to_tok(h, &t, ct, str, lstr, lua_toboolean(L, slot++) ? 2 : 1);
+ tex_parse_str_to_tok(h, null, ct, str, lstr, lua_toboolean(L, slot++) ? 2 : 1);
+ if (is_constant(flags)) {
+ set_token_reference(h, max_token_reference);
+ }
+ } else {
+ h = lmt_token_state.empty;
+ // tex_add_token_reference(h);
}
tex_define(flags, cs, tex_flags_to_cmd(flags), h);
}
@@ -3388,7 +3410,7 @@ static int tokenlib_set_char(lua_State *L) /* also in texlib */
/* a weird place, these should be in tex */
-static int tokenlib_set_constant(lua_State *L, singleword cmd, halfword min, halfword max)
+static int tokenlib_set_constant_value(lua_State *L, singleword cmd, halfword min, halfword max)
{
int top = lua_gettop(L);
if (top >= 2) {
@@ -3409,7 +3431,7 @@ static int tokenlib_set_constant(lua_State *L, singleword cmd, halfword min, hal
return 0;
}
-static int tokenlib_get_constant(lua_State *L, halfword cmd)
+static int tokenlib_get_constant_value(lua_State *L, halfword cmd)
{
if (lua_type(L, 1) == LUA_TSTRING) {
size_t l;
@@ -3428,32 +3450,32 @@ static int tokenlib_get_constant(lua_State *L, halfword cmd)
static int tokenlib_set_integer(lua_State *L)
{
- return tokenlib_set_constant(L, integer_cmd, min_integer, max_integer);
+ return tokenlib_set_constant_value(L, integer_cmd, min_integer, max_integer);
}
static int tokenlib_set_dimension(lua_State *L)
{
- return tokenlib_set_constant(L, dimension_cmd, min_dimen, max_dimen);
+ return tokenlib_set_constant_value(L, dimension_cmd, min_dimen, max_dimen);
}
// static int tokenlib_set_gluespec(lua_State *L)
// {
-// return tokenlib_set_constant(L, gluespec_cmd, min_dimen, max_dimen);
+// return tokenlib_set_constant_value(L, gluespec_cmd, min_dimen, max_dimen);
// }
static int tokenlib_get_integer(lua_State *L)
{
- return tokenlib_get_constant(L, integer_cmd);
+ return tokenlib_get_constant_value(L, integer_cmd);
}
static int tokenlib_get_dimension(lua_State *L)
{
- return tokenlib_get_constant(L, dimension_cmd);
+ return tokenlib_get_constant_value(L, dimension_cmd);
}
// static int tokenlib_get_gluespec(lua_State *L)
// {
-// return tokenlib_get_constant(L, gluespec_cmd);
+// return tokenlib_get_constant_value(L, gluespec_cmd);
// }
/*
@@ -3575,6 +3597,7 @@ static const struct luaL_Reg tokenlib_function_list[] = {
{ "getinstance", tokenlib_get_instance },
{ "getflags", tokenlib_get_flags },
{ "getparameters", tokenlib_get_parameters },
+ { "getconstant", tokenlib_get_constant },
{ "getmacro", tokenlib_get_macro },
{ "getmeaning", tokenlib_get_meaning },
{ "getcmdchrcs", tokenlib_get_cmdchrcs },
@@ -3721,11 +3744,13 @@ void lmt_local_call(int slot)
lua_settop(L, stacktop);
}
-int lmt_function_call_by_class(int slot, int property, halfword *value)
+/*tex We replaced |class| by |category because of g++ issues. */
+
+int lmt_function_call_by_category(int slot, int property, halfword *value)
{
lua_State *L = lmt_lua_state.lua_instance;
int stacktop = lua_gettop(L);
- int class = lua_value_none_code;
+ int category = lua_value_none_code;
lua_pushcfunction(L, lmt_traceback);
lua_rawgeti(L, LUA_REGISTRYINDEX, lmt_lua_state.function_table_id);
if (lua_rawgeti(L, -1, slot) == LUA_TFUNCTION) {
@@ -3744,9 +3769,9 @@ int lmt_function_call_by_class(int slot, int property, halfword *value)
lmt_error(L, "function call", slot, i == LUA_ERRRUN ? 0 : 1);
} else {
if (lua_type(L, -2) == LUA_TNUMBER) {
- class = lmt_tointeger(L, -2);
+ category = lmt_tointeger(L, -2);
}
- switch (class) {
+ switch (category) {
case lua_value_none_code:
{
break;
@@ -3798,7 +3823,7 @@ int lmt_function_call_by_class(int slot, int property, halfword *value)
case lua_value_float_code:
case lua_value_string_code:
{
- class = lua_value_none_code;
+ category = lua_value_none_code;
break;
}
case lua_value_boolean_code:
@@ -3816,14 +3841,14 @@ int lmt_function_call_by_class(int slot, int property, halfword *value)
break;
default:
{
- class = lua_value_none_code;
+ category = lua_value_none_code;
break;
}
}
}
}
lua_settop(L, stacktop);
- return class;
+ return category;
}
/* some day maybe an alternative too
diff --git a/source/luametatex/source/lua/lmttokenlib.h b/source/luametatex/source/lua/lmttokenlib.h
index 450c6173a..bfc3ed6f2 100644
--- a/source/luametatex/source/lua/lmttokenlib.h
+++ b/source/luametatex/source/lua/lmttokenlib.h
@@ -33,7 +33,7 @@ extern halfword lmt_token_list_from_lua (lua_State *L, int slot);
extern halfword lmt_token_code_from_lua (lua_State *L, int slot);
extern void lmt_function_call (int slot, int prefix);
-extern int lmt_function_call_by_class (int slot, int property, halfword *value);
+extern int lmt_function_call_by_category (int slot, int property, halfword *value);
extern void lmt_token_call (int p);
extern void lmt_local_call (int slot);
diff --git a/source/luametatex/source/luametatex.h b/source/luametatex/source/luametatex.h
index 07921d53a..973b405ae 100644
--- a/source/luametatex/source/luametatex.h
+++ b/source/luametatex/source/luametatex.h
@@ -89,7 +89,7 @@
# define luametatex_version 210
# define luametatex_revision 04
# define luametatex_version_string "2.10.04"
-# define luametatex_development_id 20221208
+# define luametatex_development_id 20221214
# define luametatex_name_camelcase "LuaMetaTeX"
# define luametatex_name_lowercase "luametatex"
diff --git a/source/luametatex/source/tex/texcommands.c b/source/luametatex/source/tex/texcommands.c
index c7ec0a2f4..0ad91e420 100644
--- a/source/luametatex/source/tex/texcommands.c
+++ b/source/luametatex/source/tex/texcommands.c
@@ -774,10 +774,12 @@ void tex_initialize_commands(void)
tex_primitive(tex_command, "def", def_cmd, def_code, 0);
tex_primitive(tex_command, "xdef", def_cmd, global_expanded_def_code, 0);
tex_primitive(tex_command, "gdef", def_cmd, global_def_code, 0);
+ tex_primitive(luatex_command, "cdef", def_cmd, constant_def_code, 0);
tex_primitive(luatex_command, "edefcsname", def_cmd, expanded_def_csname_code, 0);
tex_primitive(luatex_command, "defcsname", def_cmd, def_csname_code, 0);
tex_primitive(luatex_command, "xdefcsname", def_cmd, global_expanded_def_csname_code, 0);
tex_primitive(luatex_command, "gdefcsname", def_cmd, global_def_csname_code, 0);
+ tex_primitive(luatex_command, "cdefcsname", def_cmd, constant_def_csname_code, 0);
tex_primitive(tex_command, "scriptfont", define_family_cmd, script_size, 0);
tex_primitive(tex_command, "scriptscriptfont", define_family_cmd, script_script_size, 0);
@@ -961,6 +963,7 @@ void tex_initialize_commands(void)
tex_primitive(luatex_command, "semiprotected", prefix_cmd, semiprotected_code, 0);
tex_primitive(luatex_command, "enforced", prefix_cmd, enforced_code, 0);
tex_primitive(luatex_command, "inherited", prefix_cmd, inherited_code, 0);
+ tex_primitive(luatex_command, "constant", prefix_cmd, constant_code, 0);
tex_primitive(tex_command, "long", prefix_cmd, long_code, 0);
tex_primitive(tex_command, "outer", prefix_cmd, outer_code, 0);
@@ -1303,10 +1306,16 @@ void tex_initialize_commands(void)
cs_text(deep_frozen_cs_protection_code) = tex_maketexstring("inaccessible");
cs_text(deep_frozen_cs_end_write_code) = tex_maketexstring("endwrite");
- set_eq_level(deep_frozen_cs_end_write_code, level_one);
set_eq_type(deep_frozen_cs_end_write_code, call_cmd);
set_eq_flag(deep_frozen_cs_end_write_code, 0);
set_eq_value(deep_frozen_cs_end_write_code, null);
+ set_eq_level(deep_frozen_cs_end_write_code, level_one);
+
+ /*tex The empty list reference should be reassigned after compacting! */
+
+ lmt_token_state.empty = get_reference_token();
+ // tex_add_token_reference(lmt_token_state.empty);
+ set_token_reference(lmt_token_state.empty, max_token_reference);
lmt_string_pool_state.reserved = lmt_string_pool_state.string_pool_data.ptr;
lmt_hash_state.no_new_cs = 1;
diff --git a/source/luametatex/source/tex/texcommands.h b/source/luametatex/source/tex/texcommands.h
index 55de1dce6..8df61a4db 100644
--- a/source/luametatex/source/tex/texcommands.h
+++ b/source/luametatex/source/tex/texcommands.h
@@ -797,6 +797,11 @@ typedef enum local_control_codes {
bits for this but we don't have enough. Now, because frozen macros can be unfrozen we can
indeed have a prefix that bypasses the check. Explicit (re)definitions are then up to the user.
+ Constant macros are special in the sense that we set the reference count to the maximum. This is
+ then a signal that we have an expanded macro with a meaning that we can immediately copy into
+ the expanded token list, as in csname construction. This saves some memory access and token
+ allocation.
+
*/
typedef enum prefix_codes {
@@ -820,6 +825,7 @@ typedef enum prefix_codes {
enforced_code,
always_code,
inherited_code,
+ constant_code,
long_code,
outer_code,
} prefix_codes;
@@ -859,9 +865,11 @@ typedef enum def_codes {
def_csname_code,
global_expanded_def_csname_code,
global_def_csname_code,
+ constant_def_code,
+ constant_def_csname_code,
} def_codes;
-# define last_def_code global_def_csname_code
+# define last_def_code constant_def_csname_code
typedef enum let_codes {
global_let_code,
diff --git a/source/luametatex/source/tex/texconditional.c b/source/luametatex/source/tex/texconditional.c
index 2197e9065..925e9fac9 100644
--- a/source/luametatex/source/tex/texconditional.c
+++ b/source/luametatex/source/tex/texconditional.c
@@ -555,9 +555,7 @@ void tex_conditional_if(halfword code, int unless)
}
goto RESULT;
case if_zero_int_code:
- {
- result = tex_scan_int(0, NULL) == 0;
- }
+ result = tex_scan_int(0, NULL) == 0;
goto RESULT;
case if_abs_dim_code:
case if_dim_code:
@@ -587,15 +585,10 @@ void tex_conditional_if(halfword code, int unless)
}
goto RESULT;
case if_zero_dim_code:
- {
- result = tex_scan_dimen(0, 0, 0, 0, NULL) == 0;
- }
+ result = tex_scan_dimen(0, 0, 0, 0, NULL) == 0;
goto RESULT;
case if_odd_code:
- {
- halfword v = tex_scan_int(0, NULL);
- result = odd(v);
- }
+ result = odd(tex_scan_int(0, NULL));
goto RESULT;
case if_vmode_code:
result = abs(cur_list.mode) == vmode;
@@ -970,8 +963,7 @@ void tex_conditional_if(halfword code, int unless)
halfword t = token_info(lmt_input_state.cur_input.loc);
lmt_input_state.cur_input.loc = token_link(lmt_input_state.cur_input.loc);
if (t < cs_token_flag && token_cmd(t) == parameter_reference_cmd) {
- // result = token_info(input_state.parameter_stack[input_state.cur_input.parameter_start + token_chr(t) - 1]) != null ? 1 : 2;
- result = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + token_chr(t) - 1] != null ? 1 : 2;
+ result = lmt_input_state.parameter_stack[lmt_input_state.cur_input.parameter_start + token_chr(t) - 1] != null ? 1 : 2;
}
}
goto CASE;
@@ -1103,19 +1095,19 @@ void tex_conditional_if(halfword code, int unless)
// }
default:
{
- int class;
+ int category;
strnumber u = tex_save_cur_string();
int save_scanner_status = lmt_input_state.scanner_status;
lmt_input_state.scanner_status = scanner_is_normal;
lmt_token_state.luacstrings = 0;
- class = lmt_function_call_by_class(code - last_if_test_code, 0, &result);
+ category = lmt_function_call_by_category(code - last_if_test_code, 0, &result);
tex_restore_cur_string(u);
lmt_input_state.scanner_status = save_scanner_status;
if (lmt_token_state.luacstrings > 0) {
tex_lua_string_start();
/* bad */
}
- switch (class) {
+ switch (category) {
case lua_value_integer_code:
case lua_value_cardinal_code:
case lua_value_dimension_code:
diff --git a/source/luametatex/source/tex/texdumpdata.h b/source/luametatex/source/tex/texdumpdata.h
index 02514bdf3..4f3450ef4 100644
--- a/source/luametatex/source/tex/texdumpdata.h
+++ b/source/luametatex/source/tex/texdumpdata.h
@@ -55,7 +55,7 @@
*/
-# define luametatex_format_fingerprint 678
+# define luametatex_format_fingerprint 679
/* These end up in the string pool. */
diff --git a/source/luametatex/source/tex/texequivalents.c b/source/luametatex/source/tex/texequivalents.c
index bdf21446e..223e46e15 100644
--- a/source/luametatex/source/tex/texequivalents.c
+++ b/source/luametatex/source/tex/texequivalents.c
@@ -243,6 +243,7 @@ void tex_dump_equivalents_mem(dumpstream f)
/*tex A special register. */
dump_int(f, lmt_token_state.par_loc);
/* dump_int(f, lmt_token_state.line_par_loc); */ /*tex See note in textoken.c|. */
+ dump_int(f, lmt_token_state.empty);
}
void tex_undump_equivalents_mem(dumpstream f)
@@ -284,6 +285,7 @@ void tex_undump_equivalents_mem(dumpstream f)
/* } else { */
/* tex_fatal_undump_error("lineparloc"); */
/* } */
+ undump_int(f, lmt_token_state.empty);
return;
}
diff --git a/source/luametatex/source/tex/texequivalents.h b/source/luametatex/source/tex/texequivalents.h
index 336c9e206..c84b90226 100644
--- a/source/luametatex/source/tex/texequivalents.h
+++ b/source/luametatex/source/tex/texequivalents.h
@@ -1155,6 +1155,7 @@ typedef enum flag_bit {
value_flag_bit = 0x08000,
semiprotected_flag_bit = 0x10000,
inherited_flag_bit = 0x20000,
+ constant_flag_bit = 0x40000,
} flag_bits;
/*tex Flags: */
@@ -1180,6 +1181,7 @@ typedef enum flag_bit {
# define add_conditional_flag(a) ((a) | conditional_flag_bit)
# define add_value_flag(a) ((a) | value_flag_bit)
# define add_inherited_flag(a) ((a) | inherited_flag_bit)
+# define add_constant_flag(a) ((a) | constant_flag_bit)
# define remove_flag(a,b) ((a) & ~(b))
@@ -1220,6 +1222,7 @@ typedef enum flag_bit {
# define is_conditional(a) (((a) & conditional_flag_bit) == conditional_flag_bit)
# define is_value(a) (((a) & value_flag_bit) == value_flag_bit)
# define is_inherited(a) (((a) & inherited_flag_bit) == inherited_flag_bit)
+# define is_constant(a) (((a) & constant_flag_bit) == constant_flag_bit)
# define is_expandable(cmd) (cmd > max_command_cmd)
diff --git a/source/luametatex/source/tex/texexpand.c b/source/luametatex/source/tex/texexpand.c
index 8a2fa79a0..f257f8b0f 100644
--- a/source/luametatex/source/tex/texexpand.c
+++ b/source/luametatex/source/tex/texexpand.c
@@ -88,7 +88,8 @@ inline static void tex_aux_expand_after(void)
if (cur_cmd > max_command_cmd) {
tex_expand_current_token();
} else {
- tex_back_input(t2);
+ tex_back_input(t2);
+ /* token_link(t1) = t2; */ /* no gain, rarely happens */
}
tex_back_input(t1);
}
@@ -615,7 +616,6 @@ inline static int tex_aux_uni_to_buffer(unsigned char *b, int m, int c)
much sense. It also long token lists that never (should) match anyway.
*/
-
static int tex_aux_collect_cs_tokens(halfword *p, int *n)
{
while (1) {
@@ -650,7 +650,17 @@ static int tex_aux_collect_cs_tokens(halfword *p, int *n)
*/
case call_cmd:
case tolerant_call_cmd:
- tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
+ if (get_token_reference(cur_chr) == max_token_reference) { // ! get_token_parameters(cur_chr)) {
+ /* we avoid the macro stack and expansion and we don't trace either */
+ halfword h = token_link(cur_chr);
+ while (h) {
+ *p = tex_store_new_token(*p, token_info(h));
+ *n += 1;
+ h = token_link(h);
+ }
+ } else {
+ tex_aux_macro_call(cur_cs, cur_cmd, cur_chr);
+ }
break;
case end_cs_name_cmd:
return 1;
@@ -677,7 +687,7 @@ int tex_is_valid_csname(void)
tex_get_x_or_protected(); /* we skip unprotected ! */
} while (cur_cmd != end_cs_name_cmd);
goto FINISH;
- /* no real gain: */
+ /* no real gain as we hardly ever end up here */
// while (1) {
// tex_get_token();
// if (cur_cmd == end_cs_name_cmd) {
@@ -941,6 +951,16 @@ int tex_get_parameter_count(void)
return n;
}
+/*tex
+ We can avoid the copy of parameters to the stack but it complicates the code because we also need
+ to clean up the previous set of parameters etc. It's not worth the effort. However, there are
+ plenty of optimizations compared to the original. Some are measurable on an average run, others
+ are more likely to increase performance when thousands of successive runs happen in e.g. a virtual
+ environment where threads fight for memory access and cpu cache. And because \CONTEXT\ is us used
+ that way we keep looking into ways to gain performance, but not at the cost of dirty hacks (that
+ I tried out of curiosity but rejected in the end).
+*/
+
static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)
{
int tracing = tracing_macros_par > 0;
@@ -955,7 +975,7 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)
if (is_untraced(eq_flag(cs))) {
tracing = 0;
} else {
- if (! get_token_parameters(chr)) {
+ if (! get_token_preamble(chr)) {
tex_print_str("->");
} else {
/* maybe move the preamble scanner to here */
@@ -964,14 +984,14 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)
}
tex_end_diagnostic();
}
- if (get_token_parameters(chr)) {
+ if (get_token_preamble(chr)) {
halfword matchpointer = token_link(chr);
halfword matchtoken = token_info(matchpointer);
int save_scanner_status = lmt_input_state.scanner_status;
halfword save_warning_index = lmt_input_state.warning_index;
int nofscanned = 0;
int nofarguments = 0;
- halfword pstack[9]; /* We could go for 15 if we accept |#A-#F|. */
+ halfword pstack[max_match_count];
/*tex
Scan the parameters and make |link(r)| point to the macro body; but |return| if an
illegal |\par| is detected.
@@ -1334,7 +1354,7 @@ static void tex_aux_macro_call(halfword cs, halfword cmd, halfword chr)
++nofscanned;
if (tracing) {
tex_begin_diagnostic();
- tex_print_format("%c%i<-", match_visualizer, nofscanned);
+ tex_print_format("%c%c<-", match_visualizer, '0' + nofscanned + (nofscanned > 9 ? gap_match_count : 0));
tex_show_token_list(pstack[nofscanned - 1], null, default_token_show_max, 0);
tex_end_diagnostic();
}
diff --git a/source/luametatex/source/tex/texinputstack.c b/source/luametatex/source/tex/texinputstack.c
index e73451226..52262e486 100644
--- a/source/luametatex/source/tex/texinputstack.c
+++ b/source/luametatex/source/tex/texinputstack.c
@@ -62,9 +62,15 @@ input_file_state_info input_file_state = {
.line = 0,
};
-#define reserved_input_stack_slots 2
-#define reserved_in_stack_slots 2
-#define reserved_param_stack_slots 10 /*tex We play safe and always keep 10 in reserve (we have 9 max anyway). */
+/*tex
+ We play safe and always keep a few batches of parameter slots in reserve so that we
+ are unlikely to overrun.
+*/
+
+# define reserved_input_stack_slots 2
+# define reserved_in_stack_slots 2
+//define reserved_param_stack_slots 32
+# define reserved_param_stack_slots (2 * max_match_count)
void tex_initialize_input_state(void)
{
@@ -793,7 +799,7 @@ void tex_end_token_list(void)
case macro_text:
{
tex_delete_token_reference(lmt_input_state.cur_input.start);
- if (get_token_parameters(lmt_input_state.cur_input.start)) {
+ if (get_token_preamble(lmt_input_state.cur_input.start)) {
/*tex Parameters must be flushed: */
int ptr = lmt_input_state.parameter_stack_data.ptr;
int start = lmt_input_state.cur_input.parameter_start;
@@ -850,10 +856,17 @@ void tex_cleanup_input_state(void)
ptr = lmt_input_state.parameter_stack_data.ptr;
start = lmt_input_state.cur_input.parameter_start;
while (ptr > start) {
- --ptr;
- if (lmt_input_state.parameter_stack[ptr]) {
+ if (lmt_input_state.parameter_stack[--ptr]) {
tex_flush_token_list(lmt_input_state.parameter_stack[ptr]);
}
+ // halfword p = lmt_input_state.parameter_stack[--ptr];
+ // if (p) {
+ // if (! token_link(p)) {
+ // tex_put_available_token(p); /* very little gain on average */
+ // } else {
+ // tex_flush_token_list(p);
+ // }
+ // }
}
lmt_input_state.parameter_stack_data.ptr = start;
break;
diff --git a/source/luametatex/source/tex/texlanguage.c b/source/luametatex/source/tex/texlanguage.c
index 0fcd3b243..200ffbd1e 100644
--- a/source/luametatex/source/tex/texlanguage.c
+++ b/source/luametatex/source/tex/texlanguage.c
@@ -1279,8 +1279,9 @@ static int tex_aux_still_okay(halfword f, halfword l, halfword r, int n, const c
tex_normal_warning("language", "the hyphenated word contains non-glyphs, skipping");
return 0;
} else {
- halfword c = (halfword) aux_str2uni((const unsigned char *) utf8original);
- utf8original += utf8_size(c);
+ int cl;
+ halfword c = (halfword) aux_str2uni_len((const unsigned char *) utf8original, &cl);
+ utf8original += cl;
if (! (c && c == glyph_character(f))) {
tex_normal_warning("language", "the hyphenated word contains different characters, skipping");
return 0;
diff --git a/source/luametatex/source/tex/texmaincontrol.c b/source/luametatex/source/tex/texmaincontrol.c
index 24729d8cb..dbb52ab15 100644
--- a/source/luametatex/source/tex/texmaincontrol.c
+++ b/source/luametatex/source/tex/texmaincontrol.c
@@ -814,10 +814,10 @@ typedef enum saved_localbox_items {
static void tex_aux_scan_local_box(int code) {
quarterword options = 0;
- halfword class = 0;
- tex_scan_local_boxes_keys(&options, &class);
+ halfword index = 0;
+ tex_scan_local_boxes_keys(&options, &index);
tex_set_saved_record(saved_localbox_item_location, local_box_location_save_type, 0, code);
- tex_set_saved_record(saved_localbox_item_index, local_box_index_save_type, 0, class);
+ tex_set_saved_record(saved_localbox_item_index, local_box_index_save_type, 0, index);
tex_set_saved_record(saved_localbox_item_options, local_box_options_save_type, 0, options);
lmt_save_state.save_stack_data.ptr += saved_localbox_n_of_items;
tex_new_save_level(local_box_group);
@@ -894,23 +894,6 @@ static void tex_aux_finish_local_box(void)
}
}
-// static void tex_aux_run_leader(void) {
-// switch (cur_chr) {
-// case a_leaders_code:
-// tex_aux_scan_box(a_leaders_flag, 0, 0);
-// break;
-// case c_leaders_code:
-// tex_aux_scan_box(c_leaders_flag, 0, 0);
-// break;
-// case x_leaders_code:
-// tex_aux_scan_box(x_leaders_flag, 0, 0);
-// break;
-// case g_leaders_code:
-// tex_aux_scan_box(g_leaders_flag, 0, 0);
-// break;
-// }
-// }
-
static int leader_flags[] = {
a_leaders_flag,
c_leaders_flag,
@@ -1244,6 +1227,10 @@ static void tex_aux_run_text_boundary(void) {
case protrusion_boundary:
boundary_data(n) = tex_scan_int(0, NULL);
break;
+ case page_boundary:
+ /* or maybe force vmode */
+ tex_scan_int(0, NULL);
+ break;
default:
break;
}
@@ -1260,6 +1247,7 @@ static void tex_aux_run_math_boundary(void) {
break;
}
case protrusion_boundary:
+ case page_boundary:
tex_scan_int(0, NULL);
break;
}
@@ -4479,7 +4467,7 @@ static void tex_aux_set_define_font(int a)
static void tex_aux_set_def(int a, int force)
{
- halfword expand = 0;
+ int expand = 0;
switch (cur_chr) {
case expanded_def_code:
expand = 1;
@@ -4505,6 +4493,15 @@ static void tex_aux_set_def(int a, int force)
cur_cs = tex_create_csname();
a = add_global_flag(a);
goto DONE;
+ case constant_def_code:
+ expand = 2;
+ a = add_constant_flag(a);
+ break;
+ case constant_def_csname_code:
+ expand = 2;
+ cur_cs = tex_create_csname();
+ a = add_constant_flag(a);
+ goto DONE;
}
tex_get_r_token();
DONE:
@@ -4513,7 +4510,13 @@ static void tex_aux_set_def(int a, int force)
}
if (force || tex_define_permitted(cur_cs, a)) {
halfword p = cur_cs;
- halfword t = expand ? tex_scan_macro_expand() : tex_scan_macro_normal();
+ halfword t = expand == 2 ? tex_scan_toks_expand(0, null, 1) : (expand ? tex_scan_macro_expand() : tex_scan_macro_normal());
+ if (is_constant(a)) {
+ /* todo: check if already defined or just accept a leak */
+ set_token_reference(t, max_token_reference);
+ } else if (! token_link(t)) {
+ t = lmt_token_state.empty; /* maybe in tex_define */
+ }
tex_define(a, p, tex_flags_to_cmd(a), t);
}
}
@@ -4674,7 +4677,14 @@ static void tex_aux_set_let(int a, int force)
a = add_global_flag(a);
}
if (force || tex_define_permitted(cur_cs, a)) {
- tex_define(a, cur_cs, tex_flags_to_cmd(a), get_reference_token());
+ /*tex
+ The commented line permits plenty empty definitions, a |\let| can run out of
+ ref count so maybe some day \unknown
+ */
+ // halfword empty = get_reference_token();
+ halfword empty = lmt_token_state.empty;
+ // tex_add_token_reference(empty);
+ tex_define(a, cur_cs, tex_flags_to_cmd(a), empty);
}
return;
default:
@@ -4711,7 +4721,7 @@ static void tex_aux_set_let(int a, int force)
}
tex_define_inherit(a, p, (singleword) newf, (singleword) cmd, cur_chr);
} else {
- tex_define(a, p, (singleword) cur_cmd, cur_chr);
+ tex_define(a, p, (singleword) cur_cmd, cur_chr);
}
}
@@ -4929,18 +4939,18 @@ static void tex_aux_set_math_parameter(int a)
case math_parameter_let_spacing:
case math_parameter_let_atom_rule:
{
- halfword class = tex_scan_math_class_number(0);
+ halfword mathclass = tex_scan_math_class_number(0);
halfword display = tex_scan_math_class_number(1);
halfword text = tex_scan_math_class_number(0);
halfword script = tex_scan_math_class_number(0);
halfword scriptscript = tex_scan_math_class_number(0);
- if (valid_math_class_code(class)) {
+ if (valid_math_class_code(mathclass)) {
switch (code) {
case math_parameter_let_spacing:
- code = internal_int_location(first_math_class_code + class);
+ code = internal_int_location(first_math_class_code + mathclass);
break;
case math_parameter_let_atom_rule:
- code = internal_int_location(first_math_atom_code + class);
+ code = internal_int_location(first_math_atom_code + mathclass);
break;
}
value = (display << 24) + (text << 16) + (script << 8) + scriptscript;
@@ -4959,20 +4969,20 @@ static void tex_aux_set_math_parameter(int a)
case math_parameter_copy_atom_rule:
case math_parameter_copy_parent:
{
- halfword class = tex_scan_math_class_number(0);
+ halfword mathclass = tex_scan_math_class_number(0);
halfword parent = tex_scan_math_class_number(1);
- if (valid_math_class_code(class) && valid_math_class_code(parent)) {
+ if (valid_math_class_code(mathclass) && valid_math_class_code(parent)) {
switch (code) {
case math_parameter_copy_spacing:
- code = internal_int_location(first_math_class_code + class);
+ code = internal_int_location(first_math_class_code + mathclass);
value = count_parameter(first_math_class_code + parent);
break;
case math_parameter_copy_atom_rule:
- code = internal_int_location(first_math_atom_code + class);
+ code = internal_int_location(first_math_atom_code + mathclass);
value = count_parameter(first_math_atom_code + parent);
break;
case math_parameter_copy_parent:
- code = internal_int_location(first_math_parent_code + class);
+ code = internal_int_location(first_math_parent_code + mathclass);
value = count_parameter(first_math_parent_code + parent);
break;
}
@@ -4991,21 +5001,21 @@ static void tex_aux_set_math_parameter(int a)
case math_parameter_set_display_pre_penalty:
case math_parameter_set_display_post_penalty:
{
- halfword class = tex_scan_math_class_number(0);
+ halfword mathclass = tex_scan_math_class_number(0);
halfword penalty = tex_scan_int(1, NULL);
- if (valid_math_class_code(class)) {
+ if (valid_math_class_code(mathclass)) {
switch (code) {
case math_parameter_set_pre_penalty:
- code = internal_int_location(first_math_pre_penalty_code + class);
+ code = internal_int_location(first_math_pre_penalty_code + mathclass);
break;
case math_parameter_set_post_penalty:
- code = internal_int_location(first_math_post_penalty_code + class);
+ code = internal_int_location(first_math_post_penalty_code + mathclass);
break;
case math_parameter_set_display_pre_penalty:
- code = internal_int_location(first_math_display_pre_penalty_code + class);
+ code = internal_int_location(first_math_display_pre_penalty_code + mathclass);
break;
case math_parameter_set_display_post_penalty:
- code = internal_int_location(first_math_display_post_penalty_code + class);
+ code = internal_int_location(first_math_display_post_penalty_code + mathclass);
break;
}
tex_word_define(a, code, penalty);
@@ -5021,13 +5031,13 @@ static void tex_aux_set_math_parameter(int a)
}
case math_parameter_let_parent:
{
- halfword class = tex_scan_math_class_number(0);
+ halfword mathclass = tex_scan_math_class_number(0);
halfword pre = tex_scan_math_class_number(1);
halfword post = tex_scan_math_class_number(0);
halfword options = tex_scan_math_class_number(0);
halfword reserved = tex_scan_math_class_number(0);
- if (valid_math_class_code(class)) {
- code = internal_int_location(first_math_parent_code + class);
+ if (valid_math_class_code(mathclass)) {
+ code = internal_int_location(first_math_parent_code + mathclass);
value = (reserved << 24) + (options << 16) + (pre << 8) + post;
tex_word_define(a, code, value);
// tex_assign_internal_int_value(a, code, value);
@@ -5052,9 +5062,9 @@ static void tex_aux_set_math_parameter(int a)
}
case math_parameter_options:
{
- halfword class = tex_scan_math_class_number(0);
- if (valid_math_class_code(class)) {
- code = internal_int_location(first_math_options_code + class);
+ halfword mathclass = tex_scan_math_class_number(0);
+ if (valid_math_class_code(mathclass)) {
+ code = internal_int_location(first_math_options_code + mathclass);
value = tex_scan_int(1, NULL);
tex_word_define(a, code, value);
// tex_assign_internal_int_value(a, code, value);
@@ -5409,6 +5419,7 @@ void tex_run_prefixed_command(void)
case always_code: flags = add_aliased_flag (flags); force = 1; break;
/*tex This one is special */
case inherited_code: flags = add_inherited_flag (flags); break;
+ case constant_code: flags = add_constant_flag (flags); break;
default:
goto PICKUP;
}
@@ -5956,7 +5967,7 @@ static void tex_aux_run_message(void)
strnumber s = tex_aux_scan_string();
if (error_help_par) {
strnumber helpinfo = tex_tokens_to_string(error_help_par);
- char *h = tex_to_cstring(helpinfo);
+ const char *h = tex_to_cstring(helpinfo);
tex_handle_error(
normal_error_type,
"%T",
diff --git a/source/luametatex/source/tex/texmarks.c b/source/luametatex/source/tex/texmarks.c
index 01e002fbd..c967beb4b 100644
--- a/source/luametatex/source/tex/texmarks.c
+++ b/source/luametatex/source/tex/texmarks.c
@@ -21,8 +21,6 @@
Watch out: zero is always valid and the good old single mark!
- Todo: class -> index
-
*/
mark_state_info lmt_mark_state = {
@@ -115,23 +113,23 @@ int tex_valid_mark(halfword m) {
return m < lmt_mark_state.mark_data.top;
}
-halfword tex_new_mark(quarterword subtype, halfword class, halfword ptr)
+halfword tex_new_mark(quarterword subtype, halfword index, halfword ptr)
{
halfword mark = tex_new_node(mark_node, subtype);
- mark_index(mark) = class;
+ mark_index(mark) = index;
mark_ptr(mark) = ptr;
if (lmt_mark_state.min_used < 0) {
- lmt_mark_state.min_used = class;
- lmt_mark_state.max_used = class;
+ lmt_mark_state.min_used = index;
+ lmt_mark_state.max_used = index;
} else {
- if (class < lmt_mark_state.min_used) {
- lmt_mark_state.min_used = class;
+ if (index < lmt_mark_state.min_used) {
+ lmt_mark_state.min_used = index;
}
- if (class > lmt_mark_state.max_used) {
- lmt_mark_state.max_used = class;
+ if (index > lmt_mark_state.max_used) {
+ lmt_mark_state.max_used = index;
}
}
- tex_set_mark(class, current_marks_code, ptr);
+ tex_set_mark(index, current_marks_code, ptr);
return mark;
}
@@ -315,16 +313,16 @@ int tex_has_mark(halfword m)
void tex_run_mark(void)
{
- halfword class = 0;
+ halfword index = 0;
halfword code = cur_chr;
switch (code) {
case set_marks_code:
case clear_marks_code:
case flush_marks_code:
- class = tex_scan_mark_number();
+ index = tex_scan_mark_number();
break;
}
- if (tex_valid_mark(class)) {
+ if (tex_valid_mark(index)) {
quarterword subtype = set_mark_value_code;
halfword ptr = null;
switch (code) {
@@ -333,13 +331,13 @@ void tex_run_mark(void)
ptr = tex_scan_toks_expand(0, NULL, 0);
break;
case clear_marks_code:
- tex_wipe_mark(class);
+ tex_wipe_mark(index);
return;
case flush_marks_code:
subtype = reset_mark_value_code;
break;
}
- tex_tail_append(tex_new_mark(subtype, class, ptr));
+ tex_tail_append(tex_new_mark(subtype, index, ptr));
} else {
/* error already issued */
}
diff --git a/source/luametatex/source/tex/texmarks.h b/source/luametatex/source/tex/texmarks.h
index e787fa9d0..9ce819f07 100644
--- a/source/luametatex/source/tex/texmarks.h
+++ b/source/luametatex/source/tex/texmarks.h
@@ -50,7 +50,7 @@ extern void tex_reset_mark (halfword m);
extern void tex_wipe_mark (halfword m);
extern void tex_delete_mark (halfword m, int what);
extern halfword tex_get_some_mark (halfword chr, halfword val);
-extern halfword tex_new_mark (quarterword subtype, halfword cls, halfword ptr);
+extern halfword tex_new_mark (quarterword subtype, halfword index, halfword ptr);
extern void tex_update_top_marks (void);
extern void tex_update_first_and_bot_mark (halfword m);
extern void tex_update_first_marks (void);
diff --git a/source/luametatex/source/tex/texmath.c b/source/luametatex/source/tex/texmath.c
index 327e8e6a3..216ba553b 100644
--- a/source/luametatex/source/tex/texmath.c
+++ b/source/luametatex/source/tex/texmath.c
@@ -1929,9 +1929,9 @@ static void tex_aux_append_math_accent(mathcodeval mval, mathdictval dval)
*/
-static void tex_aux_append_math_fence(halfword fence, quarterword class)
+static void tex_aux_append_math_fence(halfword fence, quarterword mathclass)
{
- switch (class) {
+ switch (mathclass) {
case open_noad_subtype:
{
tex_aux_push_math(math_fence_group, cur_list.math_style);
@@ -1968,7 +1968,7 @@ static void tex_aux_append_math_fence(halfword fence, quarterword class)
}
}
-static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, quarterword class)
+static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, quarterword mathclass)
{
halfword fence = tex_new_node(fence_noad, middle_fence_side);
halfword delimiter = tex_new_node(delimiter_node, mval.class_value);
@@ -1981,10 +1981,10 @@ static void tex_aux_append_math_fence_val(mathcodeval mval, mathdictval dval, qu
set_noad_classes(fence, mval.class_value);
/* todo : share the next three with the regular fences */
noad_options(fence) |= noad_option_no_check;
- if (class == middle_noad_subtype && cur_group != math_fence_group) {
+ if (mathclass == middle_noad_subtype && cur_group != math_fence_group) {
tex_aux_append_math_fence_val(tex_no_math_code(), tex_no_dict_code(), open_noad_subtype);
}
- tex_aux_append_math_fence(fence, class);
+ tex_aux_append_math_fence(fence, mathclass);
}
static void tex_aux_append_math_char(mathcodeval mval, mathdictval dval, int automatic)
@@ -2147,9 +2147,9 @@ int tex_scan_math_code_val(halfword code, mathcodeval *mval, mathdictval *dval)
case math_class_number_code:
{
halfword family = cur_fam_par;
- halfword class = tex_scan_int(0, NULL);
+ halfword mathclass = tex_scan_int(0, NULL);
tex_scan_math_cmd_val(mval, dval);
- mval->class_value = (short) class;
+ mval->class_value = (short) mathclass;
mval->family_value = (short) family;
}
break;
@@ -2518,7 +2518,7 @@ void tex_run_math_modifier(void)
*/
-static void tex_aux_scan_delimiter(halfword target, int code, int class)
+static void tex_aux_scan_delimiter(halfword target, int code, int mathclass)
{
delcodeval dval = tex_no_del_code();
mathcodeval mval = tex_no_math_code();
@@ -2584,8 +2584,8 @@ static void tex_aux_scan_delimiter(halfword target, int code, int class)
goto REALDELIMITER;
}
FAKEDELIMITER:
- if (class != unset_noad_class) {
- mval.class_value = (short) class;
+ if (mathclass != unset_noad_class) {
+ mval.class_value = (short) mathclass;
}
dval.small = mval;
dval.large = mval;
@@ -3451,7 +3451,7 @@ void tex_run_math_fraction(void)
halfword userstyle = -1;
halfword attrlist = null;
fullword options = 0;
- halfword class = fraction_noad_subtype;
+ halfword mathclass = fraction_noad_subtype;
halfword rulethickness = preset_rule_thickness;
int ruledone = 0;
fraction_h_factor(fraction) = 1000;
@@ -3581,7 +3581,7 @@ void tex_run_math_fraction(void)
if (tex_scan_mandate_keyword("class", 1)) {
halfword c = (quarterword) tex_scan_math_class_number(0);
if (valid_math_class_code(c)) {
- class = c;
+ mathclass = c;
}
}
break;
@@ -3673,7 +3673,7 @@ void tex_run_math_fraction(void)
}
fraction_rule_thickness(fraction) = rulethickness;
noad_options(fraction) = options;
- set_noad_main_class(fraction, class);
+ set_noad_main_class(fraction, mathclass);
if (attrlist) {
tex_attach_attribute_list_attribute(fraction, attrlist);
}
@@ -5183,16 +5183,16 @@ void tex_reset_all_styles(halfword level)
}
}
-inline static halfword tex_aux_math_class_default(halfword class) {
- return (class << 24) + (class << 16) + (class << 8) + class;
+inline static halfword tex_aux_math_class_default(halfword mathclass) {
+ return (mathclass << 24) + (mathclass << 16) + (mathclass << 8) + mathclass;
}
-inline static void tex_set_math_class_default(halfword class, halfword parent, halfword options)
+inline static void tex_set_math_class_default(halfword mathclass, halfword parent, halfword options)
{
- tex_word_define(0, internal_int_location(first_math_class_code + class), tex_aux_math_class_default(parent));
- tex_word_define(0, internal_int_location(first_math_atom_code + class), tex_aux_math_class_default(class));
- tex_word_define(0, internal_int_location(first_math_options_code + class), options);
- tex_word_define(0, internal_int_location(first_math_parent_code + class), tex_aux_math_class_default(class));
+ tex_word_define(0, internal_int_location(first_math_class_code + mathclass), tex_aux_math_class_default(parent));
+ tex_word_define(0, internal_int_location(first_math_atom_code + mathclass), tex_aux_math_class_default(mathclass));
+ tex_word_define(0, internal_int_location(first_math_options_code + mathclass), options);
+ tex_word_define(0, internal_int_location(first_math_parent_code + mathclass), tex_aux_math_class_default(mathclass));
}
static void tex_aux_set_math_atom_rule(halfword left, halfword right, halfword newleft, halfword newright)
@@ -5203,13 +5203,13 @@ static void tex_aux_set_math_atom_rule(halfword left, halfword right, halfword n
void tex_initialize_math_spacing(void)
{
- for (int class = 0; class <= max_math_class_code; class++) {
- tex_set_math_class_default(class, class, no_class_options);
+ for (int mathclass = 0; mathclass <= max_math_class_code; mathclass++) {
+ tex_set_math_class_default(mathclass, mathclass, no_class_options);
/*tex We do this here as there is no real need for yet another initializer. */
- tex_word_define(0, internal_int_location(first_math_pre_penalty_code + class), infinite_penalty);
- tex_word_define(0, internal_int_location(first_math_post_penalty_code + class), infinite_penalty);
- tex_word_define(0, internal_int_location(first_math_display_pre_penalty_code + class), infinite_penalty);
- tex_word_define(0, internal_int_location(first_math_display_post_penalty_code + class), infinite_penalty);
+ tex_word_define(0, internal_int_location(first_math_pre_penalty_code + mathclass), infinite_penalty);
+ tex_word_define(0, internal_int_location(first_math_post_penalty_code + mathclass), infinite_penalty);
+ tex_word_define(0, internal_int_location(first_math_display_pre_penalty_code + mathclass), infinite_penalty);
+ tex_word_define(0, internal_int_location(first_math_display_post_penalty_code + mathclass), infinite_penalty);
}
tex_reset_all_styles(level_one);
diff --git a/source/luametatex/source/tex/texmlist.c b/source/luametatex/source/tex/texmlist.c
index 1d4cbacd8..b9453875e 100644
--- a/source/luametatex/source/tex/texmlist.c
+++ b/source/luametatex/source/tex/texmlist.c
@@ -477,11 +477,11 @@ static void tex_aux_trace_kerns(halfword kern, const char *what, const char *det
}
}
-static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, halfword template, const char *trace)
+static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, halfword attributetemplate, const char *trace)
{
/*tex Maybe |math_font_kern|, also to prevent expansion. */
halfword kern = tex_new_kern_node(amount, font_kern_subtype);
- tex_attach_attribute_list_copy(kern, template ? template : current);
+ tex_attach_attribute_list_copy(kern, attributetemplate ? attributetemplate : current);
if (node_next(current)) {
tex_couple_nodes(kern, node_next(current));
}
@@ -490,11 +490,11 @@ static halfword tex_aux_math_insert_font_kern(halfword current, scaled amount, h
return kern;
}
-static halfword tex_aux_math_insert_italic_kern(halfword current, scaled amount, halfword template, const char *trace)
+static halfword tex_aux_math_insert_italic_kern(halfword current, scaled amount, halfword attributetemplate, const char *trace)
{
/*tex Maybe |math_italic_kern|. */
halfword kern = tex_new_kern_node(amount, italic_kern_subtype);
- tex_attach_attribute_list_copy(kern, template ? template : current);
+ tex_attach_attribute_list_copy(kern, attributetemplate ? attributetemplate : current);
if (node_next(current)) {
tex_couple_nodes(kern, node_next(current));
}
@@ -1666,7 +1666,7 @@ inline static void tex_aux_calculate_glue(scaled m, scaled *f, scaled *n)
/*tex integer part of |m| */
*n = tex_x_over_n_r(m, unity, f);
/*tex the new glue specification */
- if (f < 0) {
+ if (*f < 0) {
--n;
f += unity;
}
@@ -5540,9 +5540,9 @@ if (! stack && has_noad_option_exact(target)) {
}
}
-inline static int tex_aux_fallback_math_spacing_class(halfword style, halfword class)
+inline static int tex_aux_fallback_math_spacing_class(halfword style, halfword mathclass)
{
- unsigned parent = (unsigned) count_parameter(first_math_class_code + class);
+ unsigned parent = (unsigned) count_parameter(first_math_class_code + mathclass);
switch (style) {
case display_style: case cramped_display_style: return (parent >> 24) & 0xFF;
case text_style: case cramped_text_style: return (parent >> 16) & 0xFF;
@@ -5673,9 +5673,9 @@ static halfword tex_aux_math_spacing_glue(halfword ltype, halfword rtype, halfwo
}
}
-inline static int tex_aux_fallback_math_ruling_class(halfword style, halfword class)
+inline static int tex_aux_fallback_math_ruling_class(halfword style, halfword mathclass)
{
- unsigned parent = (unsigned) count_parameter(first_math_atom_code + class);
+ unsigned parent = (unsigned) count_parameter(first_math_atom_code + mathclass);
switch (style) {
case display_style: case cramped_display_style: return (parent >> 24) & 0xFF;
case text_style: case cramped_text_style: return (parent >> 16) & 0xFF;
@@ -6838,6 +6838,8 @@ static void tex_mlist_to_hlist_finalize_list(mliststate *state)
Apply some logic. The hard coded pairwise comparison is replaced by a generic one
because we can have more classes. For a while spacing and pairing was under a mode
control but that made no sense. We start with the begin class.
+
+ Setting |state->beginclass| still fragile ... todo.
*/
recent_class_overload = get_noad_right_class(current);
if (current_type == simple_noad && state->beginclass == unset_noad_class) {
@@ -6853,6 +6855,9 @@ static void tex_mlist_to_hlist_finalize_list(mliststate *state)
current = node_next(current);
goto WIPE;
}
+ if (recent_subtype == math_begin_class) {
+ state->beginclass = current_subtype;
+ }
/*tex
This is a special case where a sign starts something marked as (like) numeric, in
which there will be different spacing applied.
diff --git a/source/luametatex/source/tex/texprinting.c b/source/luametatex/source/tex/texprinting.c
index bb021047e..86fa47e28 100644
--- a/source/luametatex/source/tex/texprinting.c
+++ b/source/luametatex/source/tex/texprinting.c
@@ -352,7 +352,7 @@ void tex_print_str(const char *s)
lmt_string_to_buffer(s);
return;
default:
- break;
+ return;
}
if (terminal || logfile) {
int len = (int) strlen(s);
diff --git a/source/luametatex/source/tex/texscanning.c b/source/luametatex/source/tex/texscanning.c
index 15e887a71..e4354bba1 100644
--- a/source/luametatex/source/tex/texscanning.c
+++ b/source/luametatex/source/tex/texscanning.c
@@ -225,12 +225,12 @@ inline static void tex_aux_downgrade_cur_val(int level, int succeeded, int negat
static void tex_aux_set_cur_val_by_lua_value_cmd(halfword index, halfword property)
{
- int class = lua_value_none_code;
+ int category = lua_value_none_code;
halfword value = 0; /* can also be scaled */
strnumber u = tex_save_cur_string();
lmt_token_state.luacstrings = 0;
- class = lmt_function_call_by_class(index, property, &value);
- switch (class) {
+ category = lmt_function_call_by_category(index, property, &value);
+ switch (category) {
case lua_value_none_code:
cur_val_level = no_val_level;
break;
@@ -1380,20 +1380,20 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int
case math_parameter_set_display_pre_penalty:
case math_parameter_set_display_post_penalty:
{
- halfword class = tex_scan_math_class_number(0);
- if (valid_math_class_code(class)) {
+ halfword mathclass = tex_scan_math_class_number(0);
+ if (valid_math_class_code(mathclass)) {
switch (chr) {
case math_parameter_set_pre_penalty:
- cur_val = count_parameter(first_math_pre_penalty_code + class);
+ cur_val = count_parameter(first_math_pre_penalty_code + mathclass);
break;
case math_parameter_set_post_penalty:
- cur_val = count_parameter(first_math_post_penalty_code + class);
+ cur_val = count_parameter(first_math_post_penalty_code + mathclass);
break;
case math_parameter_set_display_pre_penalty:
- cur_val = count_parameter(first_math_display_pre_penalty_code + class);
+ cur_val = count_parameter(first_math_display_pre_penalty_code + mathclass);
break;
case math_parameter_set_display_post_penalty:
- cur_val = count_parameter(first_math_display_post_penalty_code + class);
+ cur_val = count_parameter(first_math_display_post_penalty_code + mathclass);
break;
}
} else {
@@ -1411,9 +1411,9 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int
}
case math_parameter_options:
{
- halfword class = tex_scan_math_class_number(0);
- if (valid_math_class_code(class)) {
- cur_val = count_parameter(first_math_options_code + class);
+ halfword mathclass = tex_scan_math_class_number(0);
+ if (valid_math_class_code(mathclass)) {
+ cur_val = count_parameter(first_math_options_code + mathclass);
} else {
cur_val = 0;
}
@@ -1890,6 +1890,20 @@ static void tex_aux_improper_constant_error(void)
*/
+
+static void tex_aux_scan_int_no_number()
+{
+ /*tex Express astonishment that no number was here. Mo longer a goto because g++ doesn't like it. */
+ if (lmt_error_state.intercept) {
+ lmt_error_state.last_intercept = 1 ;
+ if (cur_cmd != spacer_cmd) {
+ tex_back_input(cur_tok);
+ }
+ } else {
+ tex_aux_missing_number_error();
+ }
+}
+
halfword tex_scan_int(int optional_equal, int *radix)
{
int negative = 0;
@@ -1959,7 +1973,7 @@ halfword tex_scan_int(int optional_equal, int *radix)
result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0);
if (cur_val_level != int_val_level) {
result = 0;
- goto NONUMBER;
+ tex_aux_scan_int_no_number();
}
} else if (cur_cmd == math_style_cmd) {
/* A pity that we need to check this way in |scan_int|. */
@@ -1970,7 +1984,7 @@ halfword tex_scan_int(int optional_equal, int *radix)
result = cur_chr;
} else {
result = 0;
- goto NONUMBER;
+ tex_aux_scan_int_no_number();
}
} else {
/*tex has an error message been issued? */
@@ -1997,7 +2011,7 @@ halfword tex_scan_int(int optional_equal, int *radix)
if (ok_so_far) {
result = result * 8 + d;
if (result > max_integer) {
- result = infinity;
+ result = max_integer;
if (lmt_error_state.intercept) {
vacuous = 1;
goto DONE;
@@ -2031,7 +2045,7 @@ halfword tex_scan_int(int optional_equal, int *radix)
if (ok_so_far) {
result = result * 16 + d;
if (result > max_integer) {
- result = infinity;
+ result = max_integer;
if (lmt_error_state.intercept) {
vacuous = 1;
goto DONE;
@@ -2060,7 +2074,7 @@ halfword tex_scan_int(int optional_equal, int *radix)
if (ok_so_far) {
result = result * 10 + d;
if (result > max_integer) {
- result = infinity;
+ result = max_integer;
if (lmt_error_state.intercept) {
vacuous = 1;
goto DONE;
@@ -2077,16 +2091,7 @@ halfword tex_scan_int(int optional_equal, int *radix)
}
DONE:
if (vacuous) {
- NONUMBER:
- /*tex Express astonishment that no number was here */
- if (lmt_error_state.intercept) {
- lmt_error_state.last_intercept = 1 ;
- if (cur_cmd != spacer_cmd) {
- tex_back_input(cur_tok);
- }
- } else {
- tex_aux_missing_number_error();
- }
+ tex_aux_scan_int_no_number();
} else {
tex_push_back(cur_tok, cur_cmd, cur_chr);
}
@@ -3289,7 +3294,7 @@ halfword tex_scan_font_identifier(halfword *spec)
if (tex_is_valid_font(fnt)) {
return fnt;
} else {
- goto BAD;
+ break; /* to error */
}
}
case internal_int_cmd:
@@ -3301,7 +3306,7 @@ halfword tex_scan_font_identifier(halfword *spec)
return fnt;
}
}
- goto BAD;
+ break; /* to error */
}
default:
{
@@ -3312,19 +3317,17 @@ halfword tex_scan_font_identifier(halfword *spec)
if (tex_is_valid_font((halfword) fnt)) {
return (halfword) fnt;
}
- } else {
- /*tex Fall through to a font error message. */
}
- BAD:
- tex_handle_error(
- back_error_type,
- "Missing or invalid font identifier (or equivalent) or integer (register or otherwise)",
- "I was looking for a control sequence whose current meaning has been defined by\n"
- "\\font or a valid font id number."
- );
- return null_font;
+ break; /* to error */
}
}
+ tex_handle_error(
+ back_error_type,
+ "Missing or invalid font identifier (or equivalent) or integer (register or otherwise)",
+ "I was looking for a control sequence whose current meaning has been defined by\n"
+ "\\font or a valid font id number."
+ );
+ return null_font;
}
/*tex
@@ -3612,9 +3615,10 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo
*hash_brace = cur_tok;
*p = tex_store_new_token(*p, cur_tok);
*p = tex_store_new_token(*p, end_match_token);
- set_token_parameters(h, *counter - zero_token + 1);
+ set_token_preamble(h, 1);
+ set_token_parameters(h, *counter - zero_token);
return 1;
- } else if (*counter == nine_token) {
+ } else if (*counter == F_token_l) {
tex_aux_too_many_parameters_error();
} else {
switch (cur_tok) {
@@ -3669,7 +3673,13 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo
default:
++*counter;
if (cur_tok != *counter) {
- tex_aux_parameters_order_error();
+ if (cur_tok >= A_token_l && cur_tok <= F_token_l) {
+ *counter += gap_match_count;
+ cur_tok += match_token - letter_token;
+ break;
+ } else {
+ tex_aux_parameters_order_error();
+ }
}
cur_tok += match_token - other_token;
break;
@@ -3682,7 +3692,8 @@ inline static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfwo
}
if (h != *p) {
*p = tex_store_new_token(*p, end_match_token);
- set_token_parameters(h, *counter - zero_token + 1);
+ set_token_preamble(h, 1);
+ set_token_parameters(h, *counter - zero_token);
}
if (cur_cmd == right_brace_cmd) {
++lmt_input_state.align_state;
@@ -3721,8 +3732,12 @@ halfword tex_scan_macro_normal(void)
if (cur_cmd == parameter_cmd) {
/*tex Keep the |#|. */
} else if (cur_tok <= zero_token || cur_tok > counter) {
- tex_aux_illegal_parameter_in_body_error();
- cur_tok = s;
+ if (cur_tok >= A_token_l && cur_tok <= F_token_l) {
+ cur_tok = token_val(parameter_reference_cmd, cur_chr - '0' - gap_match_count);
+ } else {
+ tex_aux_illegal_parameter_in_body_error();
+ cur_tok = s;
+ }
} else {
cur_tok = token_val(parameter_reference_cmd, cur_chr - '0');
}
@@ -3799,8 +3814,12 @@ halfword tex_scan_macro_expand(void)
if (cur_cmd == parameter_cmd) {
/*tex Keep the |#|. */
} else if (cur_tok <= zero_token || cur_tok > counter) {
- tex_aux_illegal_parameter_in_body_error();
- cur_tok = s;
+ if (cur_tok >= A_token_l && cur_tok <= F_token_l) {
+ cur_tok = token_val(parameter_reference_cmd, cur_chr - '0' - gap_match_count);
+ } else {
+ tex_aux_illegal_parameter_in_body_error();
+ cur_tok = s;
+ }
} else {
cur_tok = token_val(parameter_reference_cmd, cur_chr - '0');
}
@@ -4245,7 +4264,7 @@ static void tex_aux_scan_expr(halfword level)
switch (level) {
case int_val_level:
case attr_val_level:
- if ((factor > infinity) || (factor < -infinity)) {
+ if ((factor > max_integer) || (factor < min_integer)) {
lmt_scanner_state.arithmic_error = 1;
factor = 0;
}
@@ -4264,7 +4283,7 @@ static void tex_aux_scan_expr(halfword level)
}
break;
default:
- if ((state > expression_subtract) && ((factor > infinity) || (factor < -infinity))) {
+ if ((state > expression_subtract) && ((factor > max_integer) || (factor < min_integer))) {
lmt_scanner_state.arithmic_error = 1;
factor = 0;
}
@@ -4326,7 +4345,7 @@ static void tex_aux_scan_expr(halfword level)
switch (level) {
case int_val_level:
case attr_val_level:
- term = tex_fract(term, numerator, factor, infinity);
+ term = tex_fract(term, numerator, factor, max_integer);
break;
case dimen_val_level:
term = tex_fract(term, numerator, factor, max_dimen);
@@ -4363,7 +4382,7 @@ static void tex_aux_scan_expr(halfword level)
switch (level) {
case int_val_level:
case attr_val_level:
- expression = tex_aux_add_or_sub(expression, term, infinity, result);
+ expression = tex_aux_add_or_sub(expression, term, max_integer, result);
break;
case dimen_val_level:
expression = tex_aux_add_or_sub(expression, term, max_dimen, result);
@@ -4850,7 +4869,7 @@ static halfword tex_scan_bit_int(int *radix)
result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0);
if (cur_val_level != int_val_level) {
result = 0;
- goto NONUMBER;
+ tex_aux_missing_number_error();
}
} else if (cur_cmd == math_style_cmd) {
result = (cur_chr == yet_unset_math_style) ? tex_scan_math_style_identifier(0, 0) : cur_chr;
@@ -4859,7 +4878,7 @@ static halfword tex_scan_bit_int(int *radix)
result = cur_chr;
} else {
result = 0;
- goto NONUMBER;
+ tex_aux_missing_number_error();
}
} else {
int vacuous = 1;
@@ -4882,7 +4901,7 @@ static halfword tex_scan_bit_int(int *radix)
if (ok_so_far) {
result = result * 8 + d;
if (result > max_integer) {
- result = infinity;
+ result = max_integer;
tex_aux_number_to_big_error();
ok_so_far = 0;
}
@@ -4911,7 +4930,7 @@ static halfword tex_scan_bit_int(int *radix)
if (ok_so_far) {
result = result * 16 + d;
if (result > max_integer) {
- result = infinity;
+ result = max_integer;
tex_aux_number_to_big_error();
ok_so_far = 0;
}
@@ -4935,7 +4954,7 @@ static halfword tex_scan_bit_int(int *radix)
if (ok_so_far) {
result = result * 10 + d;
if (result > max_integer) {
- result = infinity;
+ result = max_integer;
tex_aux_number_to_big_error();
ok_so_far = 0;
}
@@ -4947,7 +4966,6 @@ static halfword tex_scan_bit_int(int *radix)
}
DONE:
if (vacuous) {
- NONUMBER:
tex_aux_missing_number_error();
} else {
tex_push_back(cur_tok, cur_cmd, cur_chr);
@@ -5562,10 +5580,10 @@ static void tex_aux_scan_expression(int level)
break;
}
}
- if (v < -infinity) {
- v = -infinity;
- } else if (v > infinity) {
- v = infinity;
+ if (v < min_integer) {
+ v = min_integer;
+ } else if (v > max_integer) {
+ v = max_integer;
}
expression_entry(stack.tail) = v;
break;
diff --git a/source/luametatex/source/tex/texstringpool.h b/source/luametatex/source/tex/texstringpool.h
index a15b9fad5..f053a642a 100644
--- a/source/luametatex/source/tex/texstringpool.h
+++ b/source/luametatex/source/tex/texstringpool.h
@@ -78,35 +78,33 @@ extern string_pool_info lmt_string_pool_state;
inline static void tex_flush_char(void) { --lmt_string_pool_state.string_temp_top; }
-extern strnumber tex_make_string (void);
-extern strnumber tex_push_string (const unsigned char *s, int l);
-extern char *tex_take_string (int *len);
-extern int tex_str_eq_buf (strnumber s, int k, int n);
-extern int tex_str_eq_str (strnumber s, strnumber t);
-extern int tex_str_eq_cstr (strnumber s, const char *, size_t);
-extern int tex_get_strings_started (void);
-extern void tex_reset_cur_string (void);
-/* strnumber tex_search_string (strnumber search); */
-/* int tex_used_strings (void); */
-extern strnumber tex_maketexstring (const char *s);
-extern strnumber tex_maketexlstring (const char *s, size_t);
-extern void tex_append_char (unsigned char c);
-extern void tex_append_string (const unsigned char *s, unsigned l);
-extern char *tex_makecstring (int s, int *allocated);
-extern char *tex_makeclstring (int s, size_t *len);
-extern void tex_dump_string_pool (dumpstream f);
-extern void tex_undump_string_pool (dumpstream f);
-extern void tex_initialize_string_pool (void);
-extern void tex_initialize_string_mem (void);
-extern void tex_flush_str (strnumber s);
-extern strnumber tex_save_cur_string (void);
-extern void tex_restore_cur_string (strnumber u);
-
-/* void tex_increment_pool_string (int n); */
-/* void tex_decrement_pool_string (int n); */
-
-extern void tex_compact_string_pool (void);
-
-inline static char *tex_to_cstring (int s) { return str_length(s) > 0 ? (char *) str_string(s) : ""; }
+extern strnumber tex_make_string (void);
+extern strnumber tex_push_string (const unsigned char *s, int l);
+extern char *tex_take_string (int *len);
+extern int tex_str_eq_buf (strnumber s, int k, int n);
+extern int tex_str_eq_str (strnumber s, strnumber t);
+extern int tex_str_eq_cstr (strnumber s, const char *, size_t);
+extern int tex_get_strings_started (void);
+extern void tex_reset_cur_string (void);
+/* strnumber tex_search_string (strnumber search); */
+/* int tex_used_strings (void); */
+extern strnumber tex_maketexstring (const char *s);
+extern strnumber tex_maketexlstring (const char *s, size_t);
+extern void tex_append_char (unsigned char c);
+extern void tex_append_string (const unsigned char *s, unsigned l);
+extern char *tex_makecstring (int s, int *allocated);
+extern char *tex_makeclstring (int s, size_t *len);
+extern void tex_dump_string_pool (dumpstream f);
+extern void tex_undump_string_pool (dumpstream f);
+extern void tex_initialize_string_pool (void);
+extern void tex_initialize_string_mem (void);
+extern void tex_flush_str (strnumber s);
+extern strnumber tex_save_cur_string (void);
+extern void tex_restore_cur_string (strnumber u);
+extern void tex_compact_string_pool (void);
+/* void tex_increment_pool_string (int n); */
+/* void tex_decrement_pool_string (int n); */
+
+inline static const char *tex_to_cstring (int s) { return str_length(s) > 0 ? (char *) str_string(s) : ""; }
# endif
diff --git a/source/luametatex/source/tex/textoken.c b/source/luametatex/source/tex/textoken.c
index b46e6de85..f820e51d7 100644
--- a/source/luametatex/source/tex/textoken.c
+++ b/source/luametatex/source/tex/textoken.c
@@ -92,7 +92,7 @@ token_state_info lmt_token_state = {
.buffer = NULL,
.bufloc = 0,
.bufmax = 0,
- .padding = 0,
+ .empty = null,
};
/*tex Some properties are dumped in the format so these are aet already! */
@@ -212,6 +212,7 @@ void tex_compact_tokens(void)
}
}
}
+ lmt_token_state.empty = mapper[lmt_token_state.empty];
// print(dump_state.format_identifier);
tex_print_format("tokenlist compacted from %i to %i entries, ", lmt_token_memory_state.tokens_data.top, nc);
if (nofluacmds) {
@@ -335,27 +336,41 @@ void tex_add_token_reference(halfword p)
{
if (get_token_reference(p) < max_token_reference) {
add_token_reference(p);
- } else {
- tex_overflow_error("reference count", max_token_reference);
+ // } else {
+ // tex_overflow_error("reference count", max_token_reference);
}
}
void tex_increment_token_reference(halfword p, int n)
{
if ((get_token_reference(p) + n) < max_token_reference) {
- inc_token_reference(p,n);
- } else {
- tex_overflow_error("reference count", max_token_reference);
+ inc_token_reference(p, n);
+ } else {
+ inc_token_reference(p, max_token_reference - get_token_reference(p));
+ // } else {
+ // tex_overflow_error("reference count", max_token_reference);
}
}
+// void tex_delete_token_reference(halfword p)
+// {
+// if (p) {
+// if (get_token_reference(p)) {
+// sub_token_reference(p);
+// } else {
+// tex_flush_token_list(p);
+// }
+// }
+// }
+
void tex_delete_token_reference(halfword p)
{
if (p) {
- if (get_token_reference(p)) {
- sub_token_reference(p);
- } else {
+ halfword r = get_token_reference(p);
+ if (! r) {
tex_flush_token_list(p);
+ } if(r < max_token_reference) {
+ sub_token_reference(p);
}
}
}
@@ -465,6 +480,9 @@ void tex_print_meaning(halfword code)
tex_print_cs(cur_cs);
return;
} else {
+ if (cur_chr && get_token_reference(cur_chr) == max_token_reference) {
+ tex_print_str("constant ");
+ }
switch (code) {
case meaning_code:
case meaning_full_code:
@@ -477,7 +495,7 @@ void tex_print_meaning(halfword code)
tex_print_cs(cur_cs);
tex_print_char(' ');
if (cur_chr && token_link(cur_chr)) {
- halfword body = get_token_parameters(cur_chr) ? tex_show_token_list(token_link(cur_chr), null, default_token_show_max, 1) : token_link(cur_chr);
+ halfword body = get_token_preamble(cur_chr) ? tex_show_token_list(token_link(cur_chr), null, default_token_show_max, 1) : token_link(cur_chr);
tex_print_char('{');
if (body) {
tex_show_token_list(body, null, default_token_show_max, 0);
@@ -582,7 +600,7 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis)
{
if (p) {
/*tex the highest parameter number, as an \ASCII\ digit */
- unsigned char n = '0';
+ unsigned char n = 0;
int min = 0;
int max = lmt_token_memory_state.tokens_data.top;
lmt_print_state.tally = 0;
@@ -639,6 +657,8 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis)
tex_print_tex_str(match_visualizer);
if (chr <= 9) {
tex_print_char(chr + '0');
+ } else if (chr <= max_match_count) {
+ tex_print_char(chr + '0' + gap_match_count);
} else {
tex_print_char('!');
return null;
@@ -650,7 +670,7 @@ halfword tex_show_token_list(halfword p, halfword q, int l, int asis)
++n;
}
tex_print_char(chr ? chr : '0');
- if (n > '9') {
+ if (n > max_match_count) {
/*tex Can this happen at all? */
return null;
} else {
@@ -698,8 +718,9 @@ inline static halfword get_unichar_from_buffer(int *b)
if (a <= 0x80) {
*b += 1;
} else {
- a = (halfword) aux_str2uni(lmt_fileio_state.io_buffer + *b);
- *b += utf8_size(a);
+ int al;
+ a = (halfword) aux_str2uni_len(lmt_fileio_state.io_buffer + *b, &al);
+ *b += al;
}
return a;
}
@@ -892,7 +913,7 @@ int tex_scan_optional_keyword(const char *s)
/*tex
Here we know that the first character(s) matched so we are in the middle of a keyword already
- which means a different loop than the previous one.
+ which means a different loop than the previous one.
*/
int tex_scan_mandate_keyword(const char *s, int offset)
@@ -2111,8 +2132,9 @@ halfword tex_string_to_toks(const char *ss)
halfword p = null;
/*tex new node being added to the token list via |store_new_token| */
while (s < se) {
- halfword t = (halfword) aux_str2uni((const unsigned char *) s);
- s += utf8_size(t);
+ int tl;
+ halfword t = (halfword) aux_str2uni_len((const unsigned char *) s, &tl);
+ s += tl;
if (t == ' ') {
t = space_token;
} else {
@@ -2148,8 +2170,9 @@ static halfword lmt_str_toks(lstring b) /* returns head */
halfword head = null;
halfword tail = head;
while (k < (unsigned char *) b.s + b.l) {
- halfword t = aux_str2uni(k);
- k += utf8_size(t);
+ int tl;
+ halfword t = aux_str2uni_len(k, &tl);
+ k += tl;
if (t == ' ') {
t = space_token;
} else {
@@ -2190,14 +2213,14 @@ halfword tex_str_toks(lstring s, halfword *tail)
unsigned char *k = s.s;
unsigned char *l = k + s.l;
while (k < l) {
- halfword t = aux_str2uni(k);
+ int tl;
+ halfword t = aux_str2uni_len(k, &tl);
if (t == ' ') {
- k += 1;
t = space_token;
} else {
- k += utf8_size(t);
t += other_token;
}
+ k += tl;
p = tex_store_new_token(p, t);
if (! h) {
h = p;
@@ -2220,14 +2243,14 @@ halfword tex_cur_str_toks(halfword *tail)
/*tex tail of the token list */
while (k < l) {
/*tex token being appended */
- halfword t = aux_str2uni(k);
+ int tl;
+ halfword t = aux_str2uni_len(k, &tl);
if (t == ' ') {
- k += 1;
t = space_token;
} else {
- k += utf8_size(t);
t += other_token;
}
+ k += tl;
p = tex_store_new_token(p, t);
if (! h) {
h = p;
@@ -2261,8 +2284,9 @@ halfword tex_str_scan_toks(int ct, lstring ls)
while (k < l) {
int cc;
/*tex token being appended */
- halfword t = aux_str2uni(k);
- k += utf8_size(t);
+ int lt;
+ halfword t = aux_str2uni_len(k, &lt);
+ k += lt;
cc = tex_get_cat_code(ct, t);
if (cc == 0) {
/*tex We have a potential control sequence so we check for it. */
@@ -2271,8 +2295,7 @@ halfword tex_str_scan_toks(int ct, lstring ls)
int c = 0 ;
unsigned char *name = k ;
while (k < l) {
- t = (halfword) aux_str2uni((const unsigned char *) k);
- s = utf8_size(t);
+ t = (halfword) aux_str2uni_len((const unsigned char *) k, &s);
c = tex_get_cat_code(ct,t);
if (c == 11) {
k += s ;
@@ -3131,7 +3154,7 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb
int p = token_link(pp);
if (p) {
int e = escape_char_par; /*tex The serialization of the escape, normally a backlash. */
- int n = '0'; /*tex The character after |#|, so |#0| upto |#9| */
+ int n = 0; /*tex The character after |#|, so |#0| upto |#9| */
int min = 0;
int max = lmt_token_memory_state.tokens_data.top;
int skip = 0;
@@ -3149,7 +3172,7 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb
}
lmt_token_state.bufloc = 0;
if (skippreamble) {
- skip = get_token_parameters(pp);
+ skip = get_token_preamble(pp);
}
while (p) {
if (p < min || p > max) {
@@ -3192,12 +3215,14 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb
tex_aux_append_char_to_buffer(match_visualizer);
if (chr <= 9) {
tex_aux_append_char_to_buffer(chr + '0');
+ } else if (chr <= max_match_count) {
+ tex_aux_append_char_to_buffer(chr + '0' + gap_match_count);
} else {
- tex_aux_append_char_to_buffer('!');
+ tex_aux_append_char_to_buffer('!');
goto EXIT;
}
} else {
- if (chr > 9) {
+ if (chr > max_match_count) {
goto EXIT;
}
}
@@ -3210,9 +3235,14 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb
++n;
}
if (! skip) {
- tex_aux_append_char_to_buffer(chr ? chr : '0');
+ // tex_aux_append_char_to_buffer(chr ? chr : '0');
+ if (chr <= 9) {
+ tex_aux_append_char_to_buffer(chr + '0');
+ } else if (chr <= max_match_count) {
+ tex_aux_append_char_to_buffer(chr + '0' + gap_match_count);
+ }
}
- if (n > '9') {
+ if (n > max_match_count) {
goto EXIT;
}
break;
@@ -3457,14 +3487,14 @@ halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const
const char *se = str + lstr;
while (str < se) {
/*tex hh: |str2uni| could return len too (also elsewhere) */
- halfword u = (halfword) aux_str2uni((const unsigned char *) str);
+ int ul;
+ halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &ul);
halfword t = null;
halfword cc = tex_get_cat_code(ct, u);
- str += utf8_size(u);
+ str += ul;
/*tex
- This is a relating simple converter; if more is needed one can just use
- |tex.print| with a regular |\def| or |\gdef| and feed the string into the
- regular scanner.
+ This is a relative simple converter; if more is needed one can just use |tex.print|
+ with a regular |\def| or |\gdef| and feed the string into the regular scanner.
*/
switch (cc) {
case escape_cmd:
@@ -3473,8 +3503,8 @@ halfword tex_parse_str_to_tok(halfword head, halfword *tail, halfword ct, const
int lname = 0;
const char *name = str;
while (str < se) {
- halfword u = (halfword) aux_str2uni((const unsigned char *) str);
- int s = utf8_size(u);
+ int s;
+ halfword u = (halfword) aux_str2uni_len((const unsigned char *) str, &s);
int c = tex_get_cat_code(ct, u);
if (c == letter_cmd) {
str += s;
diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h
index da2d01f7c..68632792b 100644
--- a/source/luametatex/source/tex/textoken.h
+++ b/source/luametatex/source/tex/textoken.h
@@ -108,32 +108,39 @@ typedef struct token_state_info {
char *buffer;
int bufloc;
int bufmax;
- int padding;
+ int empty;
} token_state_info;
extern token_state_info lmt_token_state;
-// # define max_token_reference 0x7FFF /* we can bump to 0xFFFF when we go unsigned here */
-//
-//define token_reference(a) token_memory_state.tokens[a].half1
-//
-// #define get_token_parameters(a) lmt_token_memory_state.tokens[a].quart2
-// #define get_token_reference(a) lmt_token_memory_state.tokens[a].quart3
-//
-// #define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].quart2 = (b)
-//
-// #define add_token_reference(a) lmt_token_memory_state.tokens[a].quart3 += 1
-// #define sub_token_reference(a) lmt_token_memory_state.tokens[a].quart3 -= 1
-// #define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 += (quarterword) (b)
-// #define dec_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 -= (quarterword) (b)
+/*tex
+
+ We now can have 15 paremeters but if needed we can go higher. However, we then also need to
+ cache more and change the |preamble| and |count| to some funny bit ranges. If needed we can
+ bump the reference count maximum but quite likely one already has run out of something else
+ already.
+
+ \starttyping
+ preamble = 0xF0000000 : 1 when we have one, including trailing #
+ count = 0x0F000000
+ reference = 0x00FFFFFF
+ \stoptyping
+
+*/
+
+# define max_match_count 15
+# define gap_match_count 7
-# define max_token_reference 0x0FFFFFFF
+# define max_token_reference 0x00FFFFFF
-# define get_token_parameters(a) (lmt_token_memory_state.tokens[a].hulf1 >> 28)
-# define get_token_reference(a) (lmt_token_memory_state.tokens[a].hulf1 & 0x0FFFFFFF)
+# define get_token_preamble(a) ((lmt_token_memory_state.tokens[a].hulf1 >> 28) & 0xF)
+# define get_token_parameters(a) ((lmt_token_memory_state.tokens[a].hulf1 >> 24) & 0xF)
+# define get_token_reference(a) ((lmt_token_memory_state.tokens[a].hulf1 ) & max_token_reference)
-# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28) /* normally the variable is still zero here */
+# define set_token_preamble(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28) /* normally the variable is still zero here */
+# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 24) /* normally the variable is still zero here */
+# define set_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 += (b)
# define add_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 += 1 /* we are way off the parameter count */
# define sub_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 -= 1 /* we are way off the parameter count */
# define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 += (b) /* we are way off the parameter count */
diff --git a/source/luametatex/source/tex/textypes.h b/source/luametatex/source/tex/textypes.h
index a09409522..c2cd57e64 100644
--- a/source/luametatex/source/tex/textypes.h
+++ b/source/luametatex/source/tex/textypes.h
@@ -155,7 +155,7 @@ extern halfword tex_badness(
# define one_bp 65781
-# define infinity 017777777777 /*tex the largest positive value that \TEX\ knows */
+# define max_infinity 0x7FFFFFFF /*tex the largest positive value that \TEX\ knows */
# define min_infinity -0x7FFFFFFF
# define awful_bad 07777777777 /*tex more than a billion demerits |0x3FFFFFFF| */
# define infinite_bad 10000 /*tex infinitely bad value */
diff --git a/source/luametatex/source/utilities/auxunistring.c b/source/luametatex/source/utilities/auxunistring.c
index 9fe5531d6..746fde4ad 100644
--- a/source/luametatex/source/utilities/auxunistring.c
+++ b/source/luametatex/source/utilities/auxunistring.c
@@ -11,36 +11,100 @@
*/
-unsigned aux_str2uni(const unsigned char *k)
+// unsigned xaux_str2uni(const unsigned char *k)
+// {
+// const unsigned char *text = k;
+// int ch = *text++;
+// if (ch < 0x80) {
+// return (unsigned) ch;
+// } else if (ch <= 0xbf) {
+// return 0xFFFD;
+// } else if (ch <= 0xdf) {
+// if (text[0] >= 0x80 && text[0] < 0xc0) {
+// return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f));
+// }
+// } else if (ch <= 0xef) {
+// if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) {
+// return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f));
+// }
+// } else if (ch <= 0xf7) {
+// if (text[0] < 0x80 || text[1] < 0x80 || text[2] < 0x80 ||
+// text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) {
+// return 0xFFFD;
+// } else {
+// int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1;
+// int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f);
+// w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4);
+// return (unsigned) (w1 * 0x400 + w2 + 0x10000);
+// }
+// }
+// return 0xFFFD;
+// }
+
+unsigned aux_str2uni(const unsigned char *text)
+{
+ if (text[0] < 0x80) {
+ return (unsigned) text[0];
+ } else if (text[0] <= 0xbf) {
+ return 0xFFFD;
+ } else if (text[0] <= 0xdf) {
+ if (text[1] >= 0x80 && text[1] < 0xc0) {
+ return (unsigned) (((text[0] & 0x1f) << 6) | (text[1] & 0x3f));
+ }
+ } else if (text[0] <= 0xef) {
+ if (text[1] >= 0x80 && text[1] < 0xc0 && text[2] >= 0x80 && text[2] < 0xc0) {
+ return (unsigned) (((text[0] & 0xf) << 12) | ((text[1] & 0x3f) << 6) | (text[2] & 0x3f));
+ }
+ } else if (text[0] <= 0xf7) {
+ if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 ||
+ text[1] >= 0xc0 || text[2] >= 0xc0 || text[3] >= 0xc0) {
+ return 0xFFFD;
+ } else {
+ int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1;
+ int w2 = ((text[2] & 0xf) << 6) | (text[3] & 0x3f);
+ w1 = (w1 << 6) | ((text[1] & 0xf) << 2) | ((text[2] & 0x30) >> 4);
+ return (unsigned) (w1 * 0x400 + w2 + 0x10000);
+ }
+ }
+ return 0xFFFD;
+}
+
+unsigned aux_str2uni_len(const unsigned char *text, int *len)
{
- const unsigned char *text = k;
- int ch = *text++;
- if (ch < 0x80) {
- return (unsigned) ch;
- } else if (ch <= 0xbf) {
+ if (text[0] < 0x80) {
+ *len = 1;
+ return (unsigned) text[0];
+ } else if (text[0] <= 0xbf) {
+ *len = 1;
return 0xFFFD;
- } else if (ch <= 0xdf) {
- if (text[0] >= 0x80 && text[0] < 0xc0) {
- return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f));
+ } else if (text[0] <= 0xdf) {
+ if (text[1] >= 0x80 && text[1] < 0xc0) {
+ *len = 2;
+ return (unsigned) (((text[0] & 0x1f) << 6) | (text[1] & 0x3f));
}
- } else if (ch <= 0xef) {
- if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) {
- return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f));
+ } else if (text[0] <= 0xef) {
+ if (text[1] >= 0x80 && text[1] < 0xc0 && text[2] >= 0x80 && text[2] < 0xc0) {
+ *len = 3;
+ return (unsigned) (((text[0] & 0xf) << 12) | ((text[1] & 0x3f) << 6) | (text[2] & 0x3f));
}
- } else if (ch <= 0xf7) {
- if (text[0] < 0x80 || text[1] < 0x80 || text[2] < 0x80 ||
- text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) {
+ } else if (text[0] <= 0xf7) {
+ if (text[1] < 0x80 || text[2] < 0x80 || text[3] < 0x80 ||
+ text[1] >= 0xc0 || text[2] >= 0xc0 || text[3] >= 0xc0) {
+ *len = 4;
return 0xFFFD;
} else {
- int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1;
- int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f);
- w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4);
+ *len = 4;
+ int w1 = (((text[0] & 0x7) << 2) | ((text[1] & 0x30) >> 4)) - 1;
+ int w2 = ((text[2] & 0xf) << 6) | (text[3] & 0x3f);
+ w1 = (w1 << 6) | ((text[1] & 0xf) << 2) | ((text[2] & 0x30) >> 4);
return (unsigned) (w1 * 0x400 + w2 + 0x10000);
}
}
+ *len = 1;
return 0xFFFD;
}
+
unsigned char *aux_uni2str(unsigned unic)
{
unsigned char *buf = lmt_memory_malloc(5);
diff --git a/source/luametatex/source/utilities/auxunistring.h b/source/luametatex/source/utilities/auxunistring.h
index 4c5ee3639..92f46d91c 100644
--- a/source/luametatex/source/utilities/auxunistring.h
+++ b/source/luametatex/source/utilities/auxunistring.h
@@ -6,7 +6,8 @@
# define LMT_UTILITIES_UNISTRING_H
extern unsigned char *aux_uni2str (unsigned);
-extern unsigned aux_str2uni (const unsigned char *);
+extern unsigned aux_str2uni (const unsigned char *text);
+extern unsigned aux_str2uni_len (const unsigned char *text, int *len);
extern char *aux_uni2string (char *utf8_text, unsigned ch);
extern unsigned aux_splitutf2uni (unsigned int *ubuf, const char *utf8buf);
extern size_t aux_utf8len (const char *text, size_t size);