diff options
Diffstat (limited to 'source/luametatex/source/tex/textoken.h')
-rw-r--r-- | source/luametatex/source/tex/textoken.h | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h new file mode 100644 index 000000000..1996f351c --- /dev/null +++ b/source/luametatex/source/tex/textoken.h @@ -0,0 +1,399 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_TEXTOKEN_H +# define LMT_TEXTOKEN_H + +# include "luametatex.h" + +/*tex + + These are constants that can be added to a chr value and then give a token with the right cmd + and chr combination, whichs is then equivalent to |token_val (cmd, chr)|. The cmd results from + shifting right 21 bits. The following tokens therefore should match the order of the (first + bunch) of cmd codes! + + \TEX\ stores the specific match character which defaults to |#|. When tokens get serialized the + machinery starts with |match_chr = '#'| but overloads that by the last stored variant. So the + last (!) seen |match_chr| in the macro preamble determines what gets used in showing the body. + One could argue that this is a buglet but I more see it as a side effect. In practice there is + never a mix of such characters used. Anyway, one could as well use the first seen in the + preamble and use that for the rest because consistency is better than confusion. Even better is + to just always use |#| and store the numbers in preamble match tokens, which opens up + possibilities (for strict or tolerant matching, skipping spaces, optional delimiters and even + more arguments). + +*/ + +//define cs_token_flag 0x1FFFFFFF + +# define node_token_max 0x0FFFFF +# define node_token_flag 0x100000 +# define node_token_lsb(sum) (sum & 0x0000FFFF) +# define node_token_msb(sum) (((sum & 0xFFFF0000) >> 16) + node_token_flag) +# define node_token_sum(msb,lsb) (((msb & 0x0000FFFF) << 16) + lsb) +# define node_token_overflow(sum) (sum > node_token_max) +# define node_token_flagged(sum) (sum > node_token_flag) + +/*tex + Instead of |fixmem| we use |tokens| because it is dynamic anyway and we then better match variables + that deal with managing that. Most was already hidden in a few files anyway. +*/ + +typedef struct token_memory_state_info { + memoryword *tokens; /*tex |memoryword *volatile fixmem;| */ + memory_data tokens_data; + halfword available; + int padding; +} token_memory_state_info; + +extern token_memory_state_info lmt_token_memory_state; + +typedef enum read_states { + reading_normal, /*tex we're going ahead */ + reading_just_opened, /*tex newly opened, first line not yet read */ + reading_closed, /*tex not open, or at end of file */ +} read_states; + +typedef enum lua_input_types { + unset_lua_input, + string_lua_input, + packed_lua_input, + token_lua_input, + token_list_lua_input, + node_lua_input, +} lua_input_types; + +typedef enum tex_input_types { + eof_tex_input, + string_tex_input, + token_tex_input, + token_list_tex_input, + node_tex_input, +} tex_input_types; + +typedef enum catcode_table_presets { + default_catcode_table_preset = -1, + no_catcode_table_preset = -2, +} catcode_table_presets; + +/*tex +* + There are a few temporary head pointers, one is |temp_token_head|. This one we keep because + when we expand, we can run into situations where we need that pointer. But, |backup_head| is + a real temporary one: we can replace that with local variables. Okay, it is kind of kept in + the format file but if it ends up there we're in some kind of troubles anyway. So, + |backup_head| is now local and |temp_token_head| only global when we are scanning; in cases + where we serialize tokens lists it has been replaced by local variables (and the related + functions now keep track of head and tail). This makes sense because in \LUAMETATEX\ we often + go between \TEX\ and \LUA\ and this keeps it kind of simple. This also makes clear when we + are scanning (the global head is used) and doing something simple with a list. The same is + true for |match_token_head| thatmoved to the expand state. The |backup_head| variable is gone + because we now use locals. + +*/ + +typedef struct token_state_info { + halfword null_list; /*tex permanently empty list */ + int in_lua_escape; + int force_eof; + int luacstrings; + /*tex These are pseudo constants, their value depends on the number of primitives etc. */ + halfword par_loc; + halfword par_token; + /* halfword line_par_loc; */ /*tex See note in textoken.c|. */ + /* halfword line_par_token; */ /*tex See note in textoken.c|. */ + /* */ + char *buffer; + int bufloc; + int bufmax; + int padding; +} token_state_info; + +extern token_state_info lmt_token_state; + +// # define max_token_reference 0x7FFF /* we can bump to 0xFFFF when we go unsigned here */ +// +//define token_reference(a) token_memory_state.tokens[a].half1 +// +// #define get_token_parameters(a) lmt_token_memory_state.tokens[a].quart2 +// #define get_token_reference(a) lmt_token_memory_state.tokens[a].quart3 +// +// #define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].quart2 = (b) +// +// #define add_token_reference(a) lmt_token_memory_state.tokens[a].quart3 += 1 +// #define sub_token_reference(a) lmt_token_memory_state.tokens[a].quart3 -= 1 +// #define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 += (quarterword) (b) +// #define dec_token_reference(a,b) lmt_token_memory_state.tokens[a].quart3 -= (quarterword) (b) + +# define max_token_reference 0x0FFFFFFF + +# define get_token_parameters(a) (lmt_token_memory_state.tokens[a].hulf1 >> 28) +# define get_token_reference(a) (lmt_token_memory_state.tokens[a].hulf1 & 0x0FFFFFFF) + +# define set_token_parameters(a,b) lmt_token_memory_state.tokens[a].hulf1 += ((b) << 28) /* normally the variable is still zero here */ + +# define add_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 += 1 /* we are way off the parameter count */ +# define sub_token_reference(a) lmt_token_memory_state.tokens[a].hulf1 -= 1 /* we are way off the parameter count */ +# define inc_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 += (b) /* we are way off the parameter count */ +# define dec_token_reference(a,b) lmt_token_memory_state.tokens[a].hulf1 -= (b) /* we are way off the parameter count */ + +/* */ + +# define token_info(a) lmt_token_memory_state.tokens[a].half1 +# define token_link(a) lmt_token_memory_state.tokens[a].half0 +# define get_token_info(a) lmt_token_memory_state.tokens[a].half1 +# define get_token_link(a) lmt_token_memory_state.tokens[a].half0 +# define set_token_info(a,b) lmt_token_memory_state.tokens[a].half1 = (b) +# define set_token_link(a,b) lmt_token_memory_state.tokens[a].half0 = (b) + +# define token_cmd(A) ((A) >> cs_offset_bits) +# define token_chr(A) ((A) & cs_offset_max) +# define token_val(A,B) (((A) << cs_offset_bits) + (B)) + +/*tex + Sometimes we add a value directly. Instead we could use |token_val| on the spot but then we + also need different range checkers. We use numbers because we don't have the cmd codes defined + yet when we're here. so we can't use for instance |token_val (spacer_cmd, 20)| yet. +*/ + +# define left_brace_token token_val( 1, 0) // token_val(left_brace_cmd,0) +# define right_brace_token token_val( 2, 0) // token_val(right_brace_cmd,0) +# define math_shift_token token_val( 3, 0) // token_val(math_shift_cmd,0) +# define alignment_token token_val( 4, 0) +# define superscript_token token_val( 7, 0) +# define subscript_token token_val( 8, 0) +# define ignore_token token_val( 9, 0) // token_val(ignore_cmd,0) +# define space_token token_val(10,32) // token_val(spacer_cmd,32) +# define letter_token token_val(11, 0) // token_val(letter_cmd,0) +# define other_token token_val(12, 0) // token_val(other_char_cmd,0) +# define active_token token_val(13, 0) + +# define match_token token_val(19,0) // token_val(match_cmd,0) +# define end_match_token token_val(20,0) // token_val(end_match_cmd,0) + +# define left_brace_limit right_brace_token +# define right_brace_limit math_shift_token + +# define octal_token (other_token + '\'') /*tex apostrophe, indicates an octal constant */ +# define hex_token (other_token + '"') /*tex double quote, indicates a hex constant */ +# define alpha_token (other_token + '`') /*tex reverse apostrophe, precedes alpha constants */ +# define point_token (other_token + '.') /*tex decimal point */ +# define continental_point_token (other_token + ',') /*tex decimal point, Eurostyle */ +# define period_token (other_token + '.') /*tex decimal point */ +# define comma_token (other_token + ',') /*tex decimal comma */ +# define plus_token (other_token + '+') +# define minus_token (other_token + '-') +# define slash_token (other_token + '/') +# define asterisk_token (other_token + '*') +# define colon_token (other_token + ':') +# define semi_colon_token (other_token + ';') +# define equal_token (other_token + '=') +# define less_token (other_token + '<') +# define more_token (other_token + '>') +# define exclamation_token_o (other_token + '!') +# define exclamation_token_l (letter_token + '!') +# define underscore_token (other_token + '_') +# define underscore_token_o (other_token + '_') +# define underscore_token_l (letter_token + '_') +# define circumflex_token (other_token + '^') +# define circumflex_token_o (other_token + '^') +# define circumflex_token_l (letter_token + '^') +# define escape_token (other_token + '\\') +# define left_parent_token (other_token + '(') +# define right_parent_token (other_token + ')') +# define zero_token (other_token + '0') /*tex zero, the smallest digit */ +# define five_token (other_token + '5') +# define seven_token (other_token + '7') +# define nine_token (other_token + '9') /*tex zero, the smallest digit */ + +# define a_token_l (letter_token + 'a') /*tex the smallest special hex digit */ +# define a_token_o (other_token + 'a') + +# define b_token_l (letter_token + 'b') /*tex the smallest special hex digit */ +# define b_token_o (other_token + 'b') + +# define d_token_l (letter_token + 'd') +# define d_token_o (other_token + 'd') + +# define e_token_l (letter_token + 'e') +# define e_token_o (other_token + 'e') + +# define f_token_l (letter_token + 'f') /*tex the largest special hex digit */ +# define f_token_o (other_token + 'f') + +# define i_token_l (letter_token + 'i') +# define i_token_o (other_token + 'i') + +# define l_token_l (letter_token + 'l') +# define l_token_o (other_token + 'l') + +# define m_token_l (letter_token + 'm') +# define m_token_o (other_token + 'm') + +# define n_token_l (letter_token + 'n') +# define n_token_o (other_token + 'n') + +# define o_token_l (letter_token + 'o') +# define o_token_o (other_token + 'o') + +# define p_token_l (letter_token + 'p') +# define p_token_o (other_token + 'p') + +# define r_token_l (letter_token + 'r') +# define r_token_o (other_token + 'r') + +# define s_token_l (letter_token + 's') +# define s_token_o (other_token + 's') + +# define t_token_l (letter_token + 't') +# define t_token_o (other_token + 't') + +# define u_token_l (letter_token + 'u') +# define u_token_o (other_token + 'u') + +# define x_token_l (letter_token + 'x') +# define x_token_o (other_token + 'x') + +# define A_token_l (letter_token + 'A') /*tex the smallest special hex digit */ +# define A_token_o (other_token + 'A') + +# define E_token_l (letter_token + 'E') +# define E_token_o (other_token + 'E') + +# define F_token_l (letter_token + 'F') /*tex the largest special hex digit */ +# define F_token_o (other_token + 'F') + +# define P_token_l (letter_token + 'P') /*tex the largest special hex digit */ +# define P_token_o (other_token + 'P') + +# define X_token_l (letter_token + 'X') +# define X_token_o (other_token + 'X') + +# define at_token_l (letter_token + '@') +# define at_token_o (other_token + '@') + +# define match_visualizer '#' +# define match_spacer '*' /* ignore spaces */ +# define match_bracekeeper '+' /* keep the braces */ +# define match_thrasher '-' /* discard and don't count the argument */ +# define match_par_spacer '.' /* ignore pars and spaces */ +# define match_keep_spacer ',' /* push back space when no match */ +# define match_pruner '/' /* remove leading and trailing spaces and pars */ +# define match_continuator ':' /* pick up scanning here */ +# define match_quitter ';' /* quit scanning */ +# define match_mandate '=' /* braces are mandate */ +# define match_spacekeeper '^' /* keep leading spaces */ +# define match_mandate_keep '_' /* braces are mandate and kept */ +# define match_par_command '@' /* par delimiter, only internal */ + +# define spacer_match_token (match_token + match_spacer) +# define keep_match_token (match_token + match_bracekeeper) +# define thrash_match_token (match_token + match_thrasher) +# define par_spacer_match_token (match_token + match_par_spacer) +# define keep_spacer_match_token (match_token + match_keep_spacer) +# define prune_match_token (match_token + match_pruner) +# define continue_match_token (match_token + match_continuator) +# define quit_match_token (match_token + match_quitter) +# define mandate_match_token (match_token + match_mandate) +# define leading_match_token (match_token + match_spacekeeper) +# define mandate_keep_match_token (match_token + match_mandate_keep) +# define par_command_match_token (match_token + match_par_command) + +# define is_valid_match_ref(r) (r != thrash_match_token && r != spacer_match_token && r != keep_spacer_match_token && r != continue_match_token && r != quit_match_token) + +/*tex + Managing the head of the list of available one-word nodes. The |get_avail| function has been + given a more verbose name. It gets from the pool and should not be confused with |get_token| + which reads from the input or token list. The |free_avail| function got renamed to + |put_available_token| so we have some symmetry here. +*/ + +extern void tex_compact_tokens (void); +extern void tex_initialize_tokens (void); +extern void tex_initialize_token_mem (void); +extern halfword tex_get_available_token (halfword t); +extern void tex_put_available_token (halfword p); +extern halfword tex_store_new_token (halfword p, halfword t); +extern void tex_delete_token_reference (halfword p); +extern void tex_add_token_reference (halfword p); +extern void tex_increment_token_reference (halfword p, int n); + +# define get_reference_token() tex_get_available_token(null) + +/*tex + + The |no_expand_flag| is a special character value that is inserted by |get_next| if it wants to + suppress expansion. + +*/ + +# define no_expand_flag special_char /* no_expand_relax_code */ + +/*tex A few special values: */ + +# define default_token_show_min 32 +# define default_token_show_max 2500 +# define extreme_token_show_max 0x3FFFFFFF + +/*tex All kind of helpers: */ + +extern void tex_dump_token_mem (dumpstream f); +extern void tex_undump_token_mem (dumpstream f); +extern void tex_print_meaning (halfword code); +extern void tex_flush_token_list (halfword p); +extern void tex_flush_token_list_head_tail (halfword h, halfword t, int n); +extern halfword tex_show_token_list (halfword p, halfword q, int l, int asis); /* Here |l| will go away. */ +extern void tex_token_show (halfword p, int max); +/* void tex_add_token_ref (halfword p); */ +/* void tex_delete_token_ref (halfword p); */ +extern void tex_get_next (void); +extern halfword tex_scan_character (const char *s, int left_brace, int skip_space, int skip_relax); +extern int tex_scan_optional_keyword (const char *s); +extern int tex_scan_mandate_keyword (const char *s, int offset); +extern void tex_aux_show_keyword_error (const char *s); +extern int tex_scan_keyword (const char *s); +extern int tex_scan_keyword_case_sensitive (const char *s); +extern halfword tex_active_to_cs (int c, int force); +extern halfword tex_string_to_toks (const char *s); +extern int tex_get_char_cat_code (int c); +extern halfword tex_get_token (void); +extern halfword tex_str_toks (lstring s, halfword *tail); /* returns head */ +extern halfword tex_cur_str_toks (halfword *tail); /* returns head */ +extern halfword tex_str_scan_toks (int c, lstring b); /* returns head */ +extern void tex_run_combine_the_toks (void); +extern void tex_run_convert_tokens (halfword code); +extern strnumber tex_the_convert_string (halfword c, int i); +extern strnumber tex_tokens_to_string (halfword p); +/* char *tex_tokenlist_to_cstring (int p, int inhibit_par, int *siz); */ +extern char *tex_tokenlist_to_tstring (int p, int inhibit_par, int *siz, int skip, int nospace, int strip); + +extern halfword tex_get_tex_dimen_register (int j, int internal); +extern halfword tex_get_tex_skip_register (int j, int internal); +extern halfword tex_get_tex_mu_skip_register (int j, int internal); +extern halfword tex_get_tex_count_register (int j, int internal); +extern halfword tex_get_tex_attribute_register (int j, int internal); +extern halfword tex_get_tex_box_register (int j, int internal); +extern halfword tex_get_tex_toks_register (int j, int internal); + +extern void tex_set_tex_dimen_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_skip_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_mu_skip_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_count_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_attribute_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_box_register (int j, halfword v, int flags, int internal); + +extern void tex_set_tex_toks_register (int j, lstring s, int flags, int internal); +extern void tex_scan_tex_toks_register (int j, int c, lstring s, int flags, int internal); + +extern halfword tex_copy_token_list (halfword h, halfword *t); + +extern halfword tex_parse_str_to_tok (halfword head, halfword *tail, halfword ct, const char *str, size_t lstr, int option); + +inline int tex_valid_token(int t) +{ + return ((t >= 0) && (t <= (int) lmt_token_memory_state.tokens_data.top)); +} + +# endif |