summaryrefslogtreecommitdiff
path: root/source/luametatex/source/tex/textoken.h
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/tex/textoken.h')
-rw-r--r--source/luametatex/source/tex/textoken.h399
1 files changed, 399 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h
new file mode 100644
index 000000000..1996f351c
--- /dev/null
+++ b/source/luametatex/source/tex/textoken.h
@@ -0,0 +1,399 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# ifndef LMT_TEXTOKEN_H
+# define LMT_TEXTOKEN_H
+
+# include "luametatex.h"
+
+/*tex
+
+ These are constants that can be added to a chr value and then give a token with the right cmd
+ and chr combination, whichs is then equivalent to |token_val (cmd, chr)|. The cmd results from
+ shifting right 21 bits. The following tokens therefore should match the order of the (first
+ bunch) of cmd codes!
+
+ \TEX\ stores the specific match character which defaults to |#|. When tokens get serialized the
+ machinery starts with |match_chr = '#'| but overloads that by the last stored variant. So the
+ last (!) seen |match_chr| in the macro preamble determines what gets used in showing the body.
+ One could argue that this is a buglet but I more see it as a side effect. In practice there is
+ never a mix of such characters used. Anyway, one could as well use the first seen in the
+ preamble and use that for the rest because consistency is better than confusion. Even better is
+ to just always use |#| and store the numbers in preamble match tokens, which opens up
+ possibilities (for strict or tolerant matching, skipping spaces, optional delimiters and even
+ more arguments).
+
+*/
+
+//define cs_token_flag 0x1FFFFFFF
+
+# define node_token_max 0x0FFFFF
+# define node_token_flag 0x100000
+# define node_token_lsb(sum) (sum & 0x0000FFFF)
+# define node_token_msb(sum) (((sum & 0xFFFF0000) >> 16) + node_token_flag)
+# define node_token_sum(msb,lsb) (((msb & 0x0000FFFF) << 16) + lsb)
+# define node_token_overflow(sum) (sum > node_token_max)
+# define node_token_flagged(sum) (sum > node_token_flag)
+
+/*tex
+ Instead of |fixmem| we use |tokens| because it is dynamic anyway and we then better match variables
+ that deal with managing that. Most was already hidden in a few files anyway.
+*/
+
+typedef struct token_memory_state_info {
+ memoryword *tokens; /*tex |memoryword *volatile fixmem;| */
+ memory_data tokens_data;
+ halfword available;
+ int padding;
+} token_memory_state_info;
+
+extern token_memory_state_info lmt_token_memory_state;
+
+typedef enum read_states {
+ reading_normal, /*tex we're going ahead */
+ reading_just_opened, /*tex newly opened, first line not yet read */
+ reading_closed, /*tex not open, or at end of file */
+} read_states;
+
+typedef enum lua_input_types {
+ unset_lua_input,
+ string_lua_input,
+ packed_lua_input,
+ token_lua_input,
+ token_list_lua_input,
+ node_lua_input,
+} lua_input_types;
+
+typedef enum tex_input_types {
+ eof_tex_input,
+ string_tex_input,
+ token_tex_input,
+ token_list_tex_input,
+ node_tex_input,
+} tex_input_types;
+
+typedef enum catcode_table_presets {
+ default_catcode_table_preset = -1,
+ no_catcode_table_preset = -2,
+} catcode_table_presets;
+
+/*tex
+*
+ There are a few temporary head pointers, one is |temp_token_head|. This one we keep because
+ when we expand, we can run into situations where we need that pointer. But, |backup_head| is
+ a real temporary one: we can replace that with local variables. Okay, it is kind of kept in
+ the format file but if it ends up there we're in some kind of troubles anyway. So,
+ |backup_head| is now local and |temp_token_head| only global when we are scanning; in cases
+ where we serialize tokens lists it has been replaced by local variables (and the related
+ functions now keep track of head and tail). This makes sense because in \LUAMETATEX\ we often
+ go between \TEX\ and \LUA\ and this keeps it kind of simple. This also makes clear when we
+ are scanning (the global head is used) and doing something simple with a list. The same is
+ true for |match_token_head| thatmoved to the expand state. The |backup_head| variable is gone
+ because we now use locals.
+
+*/
+
+typedef struct token_state_info {
+ halfword null_list; /*tex permanently empty list */
+ int in_lua_escape;
+ int force_eof;
+ int luacstrings;
+ /*tex These are pseudo constants, their value depends on the number of primitives etc. */
+ halfword par_loc;
+ halfword par_token;
+ /* halfword line_par_loc; */ /*tex See note in textoken.c|. */
+ /* halfword line_par_token; */ /*tex See note in textoken.c|. */
+ /* */
+ char *buffer;
+ int bufloc;
+ int bufmax;
+ int padding;
+} token_state_info;
+
+extern token_state_info lmt_token_state;
+
+// # define max_token_reference 0x7FFF /* we can bump to 0xFFFF when we go unsigned here */
+//
+//define token_reference(a) token_memory_state.tokens[a].half1
+//
+// #define get_token_parameters(a) lmt_token_memory_state.tokens[a].quart2
+// #define get_token_reference(a) lmt_token_memory_state.tokens[a].quart3
+//
+// #define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].quart2 = (b)
+//
+// #define add_token_reference(a) lmt_token_memory_state.tokens[a].quart3 += 1
+// #define sub_token_reference(a) lmt_token_memory_state.tokens[a].quart3 -= 1
+// #define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 += (quarterword) (b)
+// #define dec_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 -= (quarterword) (b)
+
+# define max_token_reference 0x0FFFFFFF
+
+# define get_token_parameters(a) (lmt_token_memory_state.tokens[a].hulf1 >> 28)
+# define get_token_reference(a) (lmt_token_memory_state.tokens[a].hulf1 & 0x0FFFFFFF)
+
+# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28) /* normally the variable is still zero here */
+
+# define add_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 += 1 /* we are way off the parameter count */
+# define sub_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 -= 1 /* we are way off the parameter count */
+# define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 += (b) /* we are way off the parameter count */
+# define dec_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 -= (b) /* we are way off the parameter count */
+
+/* */
+
+# define token_info(a) lmt_token_memory_state.tokens[a].half1
+# define token_link(a) lmt_token_memory_state.tokens[a].half0
+# define get_token_info(a) lmt_token_memory_state.tokens[a].half1
+# define get_token_link(a) lmt_token_memory_state.tokens[a].half0
+# define set_token_info(a,b) lmt_token_memory_state.tokens[a].half1 = (b)
+# define set_token_link(a,b) lmt_token_memory_state.tokens[a].half0 = (b)
+
+# define token_cmd(A) ((A) >> cs_offset_bits)
+# define token_chr(A) ((A) & cs_offset_max)
+# define token_val(A,B) (((A) << cs_offset_bits) + (B))
+
+/*tex
+ Sometimes we add a value directly. Instead we could use |token_val| on the spot but then we
+ also need different range checkers. We use numbers because we don't have the cmd codes defined
+ yet when we're here. so we can't use for instance |token_val (spacer_cmd, 20)| yet.
+*/
+
+# define left_brace_token token_val( 1, 0) // token_val(left_brace_cmd,0)
+# define right_brace_token token_val( 2, 0) // token_val(right_brace_cmd,0)
+# define math_shift_token token_val( 3, 0) // token_val(math_shift_cmd,0)
+# define alignment_token token_val( 4, 0)
+# define superscript_token token_val( 7, 0)
+# define subscript_token token_val( 8, 0)
+# define ignore_token token_val( 9, 0) // token_val(ignore_cmd,0)
+# define space_token token_val(10,32) // token_val(spacer_cmd,32)
+# define letter_token token_val(11, 0) // token_val(letter_cmd,0)
+# define other_token token_val(12, 0) // token_val(other_char_cmd,0)
+# define active_token token_val(13, 0)
+
+# define match_token token_val(19,0) // token_val(match_cmd,0)
+# define end_match_token token_val(20,0) // token_val(end_match_cmd,0)
+
+# define left_brace_limit right_brace_token
+# define right_brace_limit math_shift_token
+
+# define octal_token (other_token + '\'') /*tex apostrophe, indicates an octal constant */
+# define hex_token (other_token + '"') /*tex double quote, indicates a hex constant */
+# define alpha_token (other_token + '`') /*tex reverse apostrophe, precedes alpha constants */
+# define point_token (other_token + '.') /*tex decimal point */
+# define continental_point_token (other_token + ',') /*tex decimal point, Eurostyle */
+# define period_token (other_token + '.') /*tex decimal point */
+# define comma_token (other_token + ',') /*tex decimal comma */
+# define plus_token (other_token + '+')
+# define minus_token (other_token + '-')
+# define slash_token (other_token + '/')
+# define asterisk_token (other_token + '*')
+# define colon_token (other_token + ':')
+# define semi_colon_token (other_token + ';')
+# define equal_token (other_token + '=')
+# define less_token (other_token + '<')
+# define more_token (other_token + '>')
+# define exclamation_token_o (other_token + '!')
+# define exclamation_token_l (letter_token + '!')
+# define underscore_token (other_token + '_')
+# define underscore_token_o (other_token + '_')
+# define underscore_token_l (letter_token + '_')
+# define circumflex_token (other_token + '^')
+# define circumflex_token_o (other_token + '^')
+# define circumflex_token_l (letter_token + '^')
+# define escape_token (other_token + '\\')
+# define left_parent_token (other_token + '(')
+# define right_parent_token (other_token + ')')
+# define zero_token (other_token + '0') /*tex zero, the smallest digit */
+# define five_token (other_token + '5')
+# define seven_token (other_token + '7')
+# define nine_token (other_token + '9') /*tex zero, the smallest digit */
+
+# define a_token_l (letter_token + 'a') /*tex the smallest special hex digit */
+# define a_token_o (other_token + 'a')
+
+# define b_token_l (letter_token + 'b') /*tex the smallest special hex digit */
+# define b_token_o (other_token + 'b')
+
+# define d_token_l (letter_token + 'd')
+# define d_token_o (other_token + 'd')
+
+# define e_token_l (letter_token + 'e')
+# define e_token_o (other_token + 'e')
+
+# define f_token_l (letter_token + 'f') /*tex the largest special hex digit */
+# define f_token_o (other_token + 'f')
+
+# define i_token_l (letter_token + 'i')
+# define i_token_o (other_token + 'i')
+
+# define l_token_l (letter_token + 'l')
+# define l_token_o (other_token + 'l')
+
+# define m_token_l (letter_token + 'm')
+# define m_token_o (other_token + 'm')
+
+# define n_token_l (letter_token + 'n')
+# define n_token_o (other_token + 'n')
+
+# define o_token_l (letter_token + 'o')
+# define o_token_o (other_token + 'o')
+
+# define p_token_l (letter_token + 'p')
+# define p_token_o (other_token + 'p')
+
+# define r_token_l (letter_token + 'r')
+# define r_token_o (other_token + 'r')
+
+# define s_token_l (letter_token + 's')
+# define s_token_o (other_token + 's')
+
+# define t_token_l (letter_token + 't')
+# define t_token_o (other_token + 't')
+
+# define u_token_l (letter_token + 'u')
+# define u_token_o (other_token + 'u')
+
+# define x_token_l (letter_token + 'x')
+# define x_token_o (other_token + 'x')
+
+# define A_token_l (letter_token + 'A') /*tex the smallest special hex digit */
+# define A_token_o (other_token + 'A')
+
+# define E_token_l (letter_token + 'E')
+# define E_token_o (other_token + 'E')
+
+# define F_token_l (letter_token + 'F') /*tex the largest special hex digit */
+# define F_token_o (other_token + 'F')
+
+# define P_token_l (letter_token + 'P') /*tex the largest special hex digit */
+# define P_token_o (other_token + 'P')
+
+# define X_token_l (letter_token + 'X')
+# define X_token_o (other_token + 'X')
+
+# define at_token_l (letter_token + '@')
+# define at_token_o (other_token + '@')
+
+# define match_visualizer '#'
+# define match_spacer '*' /* ignore spaces */
+# define match_bracekeeper '+' /* keep the braces */
+# define match_thrasher '-' /* discard and don't count the argument */
+# define match_par_spacer '.' /* ignore pars and spaces */
+# define match_keep_spacer ',' /* push back space when no match */
+# define match_pruner '/' /* remove leading and trailing spaces and pars */
+# define match_continuator ':' /* pick up scanning here */
+# define match_quitter ';' /* quit scanning */
+# define match_mandate '=' /* braces are mandate */
+# define match_spacekeeper '^' /* keep leading spaces */
+# define match_mandate_keep '_' /* braces are mandate and kept */
+# define match_par_command '@' /* par delimiter, only internal */
+
+# define spacer_match_token (match_token + match_spacer)
+# define keep_match_token (match_token + match_bracekeeper)
+# define thrash_match_token (match_token + match_thrasher)
+# define par_spacer_match_token (match_token + match_par_spacer)
+# define keep_spacer_match_token (match_token + match_keep_spacer)
+# define prune_match_token (match_token + match_pruner)
+# define continue_match_token (match_token + match_continuator)
+# define quit_match_token (match_token + match_quitter)
+# define mandate_match_token (match_token + match_mandate)
+# define leading_match_token (match_token + match_spacekeeper)
+# define mandate_keep_match_token (match_token + match_mandate_keep)
+# define par_command_match_token (match_token + match_par_command)
+
+# define is_valid_match_ref(r) (r != thrash_match_token && r != spacer_match_token && r != keep_spacer_match_token && r != continue_match_token && r != quit_match_token)
+
+/*tex
+ Managing the head of the list of available one-word nodes. The |get_avail| function has been
+ given a more verbose name. It gets from the pool and should not be confused with |get_token|
+ which reads from the input or token list. The |free_avail| function got renamed to
+ |put_available_token| so we have some symmetry here.
+*/
+
+extern void tex_compact_tokens (void);
+extern void tex_initialize_tokens (void);
+extern void tex_initialize_token_mem (void);
+extern halfword tex_get_available_token (halfword t);
+extern void tex_put_available_token (halfword p);
+extern halfword tex_store_new_token (halfword p, halfword t);
+extern void tex_delete_token_reference (halfword p);
+extern void tex_add_token_reference (halfword p);
+extern void tex_increment_token_reference (halfword p, int n);
+
+# define get_reference_token() tex_get_available_token(null)
+
+/*tex
+
+ The |no_expand_flag| is a special character value that is inserted by |get_next| if it wants to
+ suppress expansion.
+
+*/
+
+# define no_expand_flag special_char /* no_expand_relax_code */
+
+/*tex A few special values: */
+
+# define default_token_show_min 32
+# define default_token_show_max 2500
+# define extreme_token_show_max 0x3FFFFFFF
+
+/*tex All kind of helpers: */
+
+extern void tex_dump_token_mem (dumpstream f);
+extern void tex_undump_token_mem (dumpstream f);
+extern void tex_print_meaning (halfword code);
+extern void tex_flush_token_list (halfword p);
+extern void tex_flush_token_list_head_tail (halfword h, halfword t, int n);
+extern halfword tex_show_token_list (halfword p, halfword q, int l, int asis); /* Here |l| will go away. */
+extern void tex_token_show (halfword p, int max);
+/* void tex_add_token_ref (halfword p); */
+/* void tex_delete_token_ref (halfword p); */
+extern void tex_get_next (void);
+extern halfword tex_scan_character (const char *s, int left_brace, int skip_space, int skip_relax);
+extern int tex_scan_optional_keyword (const char *s);
+extern int tex_scan_mandate_keyword (const char *s, int offset);
+extern void tex_aux_show_keyword_error (const char *s);
+extern int tex_scan_keyword (const char *s);
+extern int tex_scan_keyword_case_sensitive (const char *s);
+extern halfword tex_active_to_cs (int c, int force);
+extern halfword tex_string_to_toks (const char *s);
+extern int tex_get_char_cat_code (int c);
+extern halfword tex_get_token (void);
+extern halfword tex_str_toks (lstring s, halfword *tail); /* returns head */
+extern halfword tex_cur_str_toks (halfword *tail); /* returns head */
+extern halfword tex_str_scan_toks (int c, lstring b); /* returns head */
+extern void tex_run_combine_the_toks (void);
+extern void tex_run_convert_tokens (halfword code);
+extern strnumber tex_the_convert_string (halfword c, int i);
+extern strnumber tex_tokens_to_string (halfword p);
+/* char *tex_tokenlist_to_cstring (int p, int inhibit_par, int *siz); */
+extern char *tex_tokenlist_to_tstring (int p, int inhibit_par, int *siz, int skip, int nospace, int strip);
+
+extern halfword tex_get_tex_dimen_register (int j, int internal);
+extern halfword tex_get_tex_skip_register (int j, int internal);
+extern halfword tex_get_tex_mu_skip_register (int j, int internal);
+extern halfword tex_get_tex_count_register (int j, int internal);
+extern halfword tex_get_tex_attribute_register (int j, int internal);
+extern halfword tex_get_tex_box_register (int j, int internal);
+extern halfword tex_get_tex_toks_register (int j, int internal);
+
+extern void tex_set_tex_dimen_register (int j, halfword v, int flags, int internal);
+extern void tex_set_tex_skip_register (int j, halfword v, int flags, int internal);
+extern void tex_set_tex_mu_skip_register (int j, halfword v, int flags, int internal);
+extern void tex_set_tex_count_register (int j, halfword v, int flags, int internal);
+extern void tex_set_tex_attribute_register (int j, halfword v, int flags, int internal);
+extern void tex_set_tex_box_register (int j, halfword v, int flags, int internal);
+
+extern void tex_set_tex_toks_register (int j, lstring s, int flags, int internal);
+extern void tex_scan_tex_toks_register (int j, int c, lstring s, int flags, int internal);
+
+extern halfword tex_copy_token_list (halfword h, halfword *t);
+
+extern halfword tex_parse_str_to_tok (halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option);
+
+inline int tex_valid_token(int t)
+{
+ return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top));
+}
+
+# endif