summaryrefslogtreecommitdiff
path: root/source/luametatex/source/tex/textoken.h
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/tex/textoken.h')
-rw-r--r--source/luametatex/source/tex/textoken.h39
1 files changed, 38 insertions, 1 deletions
diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h
index ad67dfcb5..843304405 100644
--- a/source/luametatex/source/tex/textoken.h
+++ b/source/luametatex/source/tex/textoken.h
@@ -390,6 +390,43 @@ extern halfword tex_copy_token_list (halfword h, halfword *t);
extern halfword tex_parse_str_to_tok (halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option);
-inline int tex_valid_token (int t) { return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top)); }
+inline static int tex_valid_token (int t) { return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top)); }
+
+/*tex
+
+ This is also a sort of documentation. Active characters are stored in the hash using a prefix
+ which assumes that users don't use that one. So far we've seen no clashes which is due to the
+ fact that the namespace prefix U+FFFF is an invalid \UNICODE\ character and it's kind of hard
+ to get that one into the input anyway.
+
+ The replacement character U+FFFD is a kind of fallback when we run into some troubles or when
+ a control sequence is expected (and undefined is unacceptable).
+
+ U+FFFD REPLACEMENT CHARACTER
+ U+FFFE NOT A CHARACTER
+ U+FFFF NOT A CHARACTER
+
+ I experimented with a namespace character (catcodtable id) as fourth character but there are
+ some unwanted side effects, for instance in testing an active character as separator (in
+ arguments) so that code waa eventually removed. I might come back to this one day (active
+ characters in the catcode regime namespace).
+
+*/
+
+# define utf_fffd_string "\xEF\xBF\xBD" /* U+FFFD : 65533 */
+
+# define active_character_namespace "\xEF\xBF\xBF" /* U+FFFF : 65535 */
+
+# define active_character_first '\xEF'
+# define active_character_second '\xBF'
+# define active_character_third '\xBF'
+
+# define active_first 0xEF
+# define active_second 0xBF
+# define active_third 0xBF
+
+# define active_character_unknown "\xEF\xBF\xBD" /* utf_fffd_string */
+
+# define active_cs_value(A) aux_str2uni(str_string(A)+3)
# endif