diff options
Diffstat (limited to 'source/luametatex/source/tex/textoken.h')
-rw-r--r-- | source/luametatex/source/tex/textoken.h | 39 |
1 files changed, 38 insertions, 1 deletions
diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h index ad67dfcb5..843304405 100644 --- a/source/luametatex/source/tex/textoken.h +++ b/source/luametatex/source/tex/textoken.h @@ -390,6 +390,43 @@ extern halfword tex_copy_token_list (halfword h, halfword *t); extern halfword tex_parse_str_to_tok (halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option); -inline int tex_valid_token (int t) { return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top)); } +inline static int tex_valid_token (int t) { return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top)); } + +/*tex + + This is also a sort of documentation. Active characters are stored in the hash using a prefix + which assumes that users don't use that one. So far we've seen no clashes which is due to the + fact that the namespace prefix U+FFFF is an invalid \UNICODE\ character and it's kind of hard + to get that one into the input anyway. + + The replacement character U+FFFD is a kind of fallback when we run into some troubles or when + a control sequence is expected (and undefined is unacceptable). + + U+FFFD REPLACEMENT CHARACTER + U+FFFE NOT A CHARACTER + U+FFFF NOT A CHARACTER + + I experimented with a namespace character (catcodtable id) as fourth character but there are + some unwanted side effects, for instance in testing an active character as separator (in + arguments) so that code waa eventually removed. I might come back to this one day (active + characters in the catcode regime namespace). + +*/ + +# define utf_fffd_string "\xEF\xBF\xBD" /* U+FFFD : 65533 */ + +# define active_character_namespace "\xEF\xBF\xBF" /* U+FFFF : 65535 */ + +# define active_character_first '\xEF' +# define active_character_second '\xBF' +# define active_character_third '\xBF' + +# define active_first 0xEF +# define active_second 0xBF +# define active_third 0xBF + +# define active_character_unknown "\xEF\xBF\xBD" /* utf_fffd_string */ + +# define active_cs_value(A) aux_str2uni(str_string(A)+3) # endif |