diff options
Diffstat (limited to 'source/luametatex/source/tex/texinputstack.h')
-rw-r--r-- | source/luametatex/source/tex/texinputstack.h | 452 |
1 files changed, 452 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/texinputstack.h b/source/luametatex/source/tex/texinputstack.h new file mode 100644 index 000000000..7ae677d56 --- /dev/null +++ b/source/luametatex/source/tex/texinputstack.h @@ -0,0 +1,452 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_INPUTSTACK_H +# define LMT_INPUTSTACK_H + +/*tex + + The state of \TEX's input mechanism appears in the input stack, whose entries are records with + six fields, called |state|, |index|, |start|, |loc|, |limit|, and |name|. + +*/ + +/* todo: there is no need to be sparse here */ + +typedef struct in_state_record { + halfword start; + halfword loc; + unsigned short state; + union { unsigned short index; unsigned short token_type; }; /*tex: So, no macro but name. */ + union { halfword limit; halfword parameter_start; }; /*tex: So, no macro but name. */ + halfword name; + signed short cattable; /*tex The category table used by the current line (see |textoken.c|). */ + unsigned short partial; /*tex Is the current line partial (see |textoken.c|)? */ + int state_file; /*tex Here we stack the tag of the current file. */ + int state_line; /*tex Not used. */ +} in_state_record; + +typedef struct input_stack_record { + halfword input_file_callback_id; + halfword line; + halfword end_of_file_seen; + halfword group; + halfword if_ptr; + halfword padding; + char *full_source_filename; +} input_stack_record; + +// todo: better names for in_state_record and input_stack_record ... now mixed up + +typedef struct input_state_info { + in_state_record *input_stack; + memory_data input_stack_data; + input_stack_record *in_stack; + memory_data in_stack_data; + halfword *parameter_stack; + memory_data parameter_stack_data; + in_state_record cur_input; /*tex The \quote {top} input state. Why not just pointing. */ + int input_line; + int scanner_status; + halfword def_ref; /*tex Has to be set for error recovery etc. */ + int align_state; + int base_ptr; + halfword warning_index; + int open_files; + int padding; +} input_state_info; + +extern input_state_info lmt_input_state; + +typedef struct input_file_state_info { + int forced_file; + int forced_line; + halfword mode; + halfword line; +} input_file_state_info; + +extern input_file_state_info input_file_state; + +static inline int input_file_value(void) +{ + return input_file_state.forced_file ? input_file_state.forced_file : lmt_input_state.cur_input.state_file; +} + +static inline int input_line_value(void) +{ + return input_file_state.forced_line ? input_file_state.forced_line : (input_file_state.line ? input_file_state.line : lmt_input_state.input_line); +} + +/*tex + + In \LUAMETATEX\ the io model was stepwise changed a bit, mostly in the \LUA\ feedback area. + Support for nodes, tokens, short and long string were improved. Around 2.06.17 specification + nodes became dynamic and that left the pseudo files as only variable node type. By removing + variable nodes we can avoid some code in node management so getting rid of pseudo files made + sense. The token scan macros used these but now use a lightweight varian tof the \LUA\ scanner, + which we had anyway. The only complication is the |\everyeof| of |\scantokens|. Also, tracing + (if at all) is now different but these three scanners are seldom used and were introduced in + \ETEX\ (|scantokens|), \LUATEX\ (|\scantextokens|) and \LUAMETATEX\ (|tokenized|). The new + approach also gives more room for future extensions. + + All this has been a very stepwise process, because we know that there are users who use \LMTX\ + in production and small steps are easier to test. Experiments mostly happen in parts of the + code that is less critital ... after all \LUAMETATEX\ is also an experimental engine ... but + io related code changes are kind of critital. + + Just to remember wahat we came from: the first 15 were reserved read channels but that is now + delegated to \LUA, so we had an offset of 16 in: + +*/ + +typedef enum io_codes { + io_initial_input_code, + io_lua_input_code, + io_token_input_code, + io_token_eof_input_code, + io_tex_macro_code, + io_file_input_code, +} io_codes; + +/* +* + Now, these |io_codes| are used in the name field but that field can also be a way larger number, + i.e.\ the string index of the file. That also assumes that the first used index is above the last + io_code. It can be the warning index too, just for the sake of an error context message. So: + symbolic (small) number, tex string being the filename, and macro name. But, because we also + have that information in other places (partly as side effect of luafication) a simpler model is + used now where we use a few dedicates codes. It also means that we no longer store the filename + in the string pool. + +*/ + +# define io_token_input(c) (c >= io_lua_input_code && c <= io_token_eof_input_code) +# define io_file_input(c) (c >= io_file_input_code) + +/*tex + + Let's look more closely now at the control variables (|state|, |index|, |start|, |loc|, |limit|, + |name|), assuming that \TEX\ is reading a line of characters that have been input from some file + or from the user's terminal. There is an array called |buffer| that acts as a stack of all lines + of characters that are currently being read from files, including all lines on subsidiary levels + of the input stack that are not yet completed. \TEX\ will return to the other lines when it is + finished with the present input file. + + (Incidentally, on a machine with byte-oriented addressing, it might be appropriate to combine + |buffer| with the |str_pool| array, letting the buffer entries grow downward from the top of the + string pool and checking that these two tables don't bump into each other.) + + The line we are currently working on begins in position |start| of the buffer; the next character + we are about to read is |buffer[loc]|; and |limit| is the location of the last character present. + If |loc > limit|, the line has been completely read. Usually |buffer[limit]| is the + |end_line_char|, denoting the end of a line, but this is not true if the current line is an + insertion that was entered on the user's terminal in response to an error message. + + The |name| variable is a string number that designates the name of the current file, if we are + reading a text file. It is zero if we are reading from the terminal; it is |n+1| if we are reading + from input stream |n|, where |0 <= n <= 16|. (Input stream 16 stands for an invalid stream number; + in such cases the input is actually from the terminal, under control of the procedure |read_toks|.) + Finally |18 <= name <=20| indicates that we are reading a pseudo file created by the |\scantokens| + or |\scantextokens| command. A larger value is reserved for input coming from \LUA. + + The |state| variable has one of three values, when we are scanning such files: + + \startitemize + \startitem + |mid_line| is the normal state. + \stopitem + \startitem + |skip_blanks| is like |mid_line|, but blanks are ignored. + \stopitem + \startitem + |new_line| is the state at the beginning of a line. + \stopitem + \stopitemize + + These state values are assigned numeric codes so that if we add the state code to the next + character's command code, we get distinct values. For example, |mid_line + spacer| stands for the + case that a blank space character occurs in the middle of a line when it is not being ignored; + after this case is processed, the next value of |state| will be |skip_blanks|. + + As with other constants, we only add some prefix or suffix but keep the normal name as much as + possible, so that the original documentation still applies. + +*/ + +typedef enum state_codes { + token_list_state = 0, + /*tex when scanning a line of characters */ + mid_line_state = 1, + /*tex when ignoring blanks */ + skip_blanks_state = 2 + max_char_code, + /*tex at the start of a line */ + new_line_state = 3 + max_char_code + max_char_code, +} state_codes; + +/*tex + + Additional information about the current line is available via the |index| variable, which + counts how many lines of characters are present in the buffer below the current level. We + have |index = 0| when reading from the terminal and prompting the user for each line; then if + the user types, e.g., |\input paper|, we will have |index = 1| while reading the file + |paper.tex|. However, it does not follow that |index| is the same as the input stack pointer, + since many of the levels on the input stack may come from token lists. For example, the + instruction |\input paper| might occur in a token list. + + The global variable |in_open| is equal to the |index| value of the highest \quote {non token + list} level. Thus, the number of partially read lines in the buffer is |in_open + 1|, and we + have |in_open = index| when we are not reading a token list. + + If we are not currently reading from the terminal, or from an input stream, we are reading from + the file variable |input_file [index]|. We use the notation |terminal_input| as a convenient + abbreviation for |name = 0|, and |cur_file| as an abbreviation for |input_file [index]|. + + The global variable |line| contains the line number in the topmost open file, for use in error + messages. If we are not reading from the terminal, |line_stack [index]| holds the line number + or the enclosing level, so that |line| can be restored when the current file has been read. + Line numbers should never be negative, since the negative of the current line number is used to + identify the user's output routine in the |mode_line| field of the semantic nest entries. + + If more information about the input state is needed, it can be included in small arrays like + those shown here. For example, the current page or segment number in the input file might be + put into a variable |page|, maintained for enclosing levels in ||page_stack:array [1 .. + max_input_open] of integer| by analogy with |line_stack|. + + Users of \TEX\ sometimes forget to balance left and right braces properly, and one of the ways + \TEX\ tries to spot such errors is by considering an input file as broken into subfiles by + control sequences that are declared to be |\outer|. + + A variable called |scanner_status| tells \TEX\ whether or not to complain when a subfile ends. + This variable has six possible values: + + \startitemize + + \startitem + |normal|, means that a subfile can safely end here without incident. + \stopitem + + \startitem + |skipping|, means that a subfile can safely end here, but not a file, because we're reading + past some conditional text that was not selected. + \stopitem + + \startitem + |defining|, means that a subfile shouldn't end now because a macro is being defined. + \stopitem + + \startitem + |matching|, means that a subfile shouldn't end now because a macro is being used and we are + searching for the end of its arguments. + \stopitem + + \startitem + |aligning|, means that a subfile shouldn't end now because we are not finished with the + preamble of an |\halign| or |\valign|. + \stopitem + + \startitem + |absorbing|, means that a subfile shouldn't end now because we are reading a balanced token + list for |\message|, |\write|, etc. + \stopitem + + \stopitemize + + If the |scanner_status| is not |normal|, the variable |warning_index| points to the |eqtb| + location for the relevant control sequence name to print in an error message. + +*/ + +typedef enum scanner_states { + scanner_is_normal, /*tex passing conditional text */ + scanner_is_skipping, /*tex passing conditional text */ + scanner_is_defining, /*tex reading a macro definition */ + scanner_is_matching, /*tex reading macro arguments */ + scanner_is_tolerant, /*tex reading tolerant macro arguments */ + scanner_is_aligning, /*tex reading an alignment preamble */ + scanner_is_absorbing, /*tex reading a balanced text */ +} scanner_states; + +extern void tex_show_runaway(void); /*tex This is only used when running out of token memory. */ + +/*tex + + However, the discussion about input state really applies only to the case that we are inputting + from a file. There is another important case, namely when we are currently getting input from a + token list. In this case |state = token_list|, and the conventions about the other state + variables are + different: + + \startitemize + + \startitem + |loc| is a pointer to the current node in the token list, i.e., the node that will be read + next. If |loc=null|, the token list has been fully read. + \stopitem + + \startitem + |start| points to the first node of the token list; this node may or may not contain a + reference count, depending on the type of token list involved. + \stopitem + + \startitem + |token_type|, which takes the place of |index| in the discussion above, is a code number + that explains what kind of token list is being scanned. + \stopitem + + \startitem + |name| points to the |eqtb| address of the control sequence being expanded, if the current + token list is a macro. + \stopitem + + \startitem + |param_start|, which takes the place of |limit|, tells where the parameters of the current + macro begin in the |param_stack|, if the current token list is a macro. + \stopitem + + \stopitemize + + The |token_type| can take several values, depending on where the current token list came from: + + \startitemize + + \startitem + |parameter|, if a parameter is being scanned; + \stopitem + + \startitem + |u_template|, if the |u_j| part of an alignment template is being scanned; + \stopitem + + \startitem + |v_template|, if the |v_j| part of an alignment template is being scanned; + \stopitem + + \startitem + |backed_up|, if the token list being scanned has been inserted as \quotation {to be read + again}. + \stopitem + + \startitem + |inserted|, if the token list being scanned has been inserted as the text expansion of a + |\count| or similar variable; + \stopitem + + \startitem + |macro|, if a user-defined control sequence is being scanned; + \stopitem + + \startitem + |output_text|, if an |\output| routine is being scanned; + \stopitem + + \startitem + |every_par_text|, if the text of |\everypar| is being scanned; + \stopitem + + \startitem + |every_math_text|, if the text of |\everymath| is being scanned; + \stopitem + + \startitem + |every_display_text|, if the text of \everydisplay| is being scanned; + \stopitem + + \startitem + |every_hbox_text|, if the text of |\everyhbox| is being scanned; + \stopitem + + \startitem + |every_vbox_text|, if the text of |\everyvbox| is being scanned; + \stopitem + + \startitem + |every_job_text|, if the text of |\everyjob| is being scanned; + \stopitem + + \startitem + |every_cr_text|, if the text of |\everycr| is being scanned; + \stopitem + + \startitem + |mark_text|, if the text of a |\mark| is being scanned; + \stopitem + + \startitem + |write_text|, if the text of a |\write| is being scanned. + \stopitem + + \stopitemize + + The codes for |output_text|, |every_par_text|, etc., are equal to a constant plus the + corresponding codes for token list parameters |output_routine_loc|, |every_par_loc|, etc. + + The token list begins with a reference count if and only if |token_type >= macro|. + + Since \ETEX's additional token list parameters precede |toks_base|, the corresponding token + types must precede |write_text|. However, in \LUAMETATEX\ we delegate all the read and write + primitives to \LUA\ so that model has been simplified. + +*/ + +/* #define token_type input_state.cur_input.token_type */ /*tex type of current token list */ +/* #define param_start input_state.cur_input.param_start */ /*tex base of macro parameters in |param_stack| */ + +typedef enum token_types { + parameter_text, /*tex parameter */ + template_pre_text, /*tex |u_j| template */ + template_post_text, /*tex |v_j| template */ + backed_up_text, /*tex text to be reread */ + inserted_text, /*tex inserted texts */ + macro_text, /*tex defined control sequences */ + output_text, /*tex output routines */ + every_par_text, /*tex |\everypar| */ + every_math_text, /*tex |\everymath| */ + every_display_text, /*tex |\everydisplay| */ + every_hbox_text, /*tex |\everyhbox| */ + every_vbox_text, /*tex |\everyvbox| */ + every_math_atom_text, /*tex |\everymathatom| */ + every_job_text, /*tex |\everyjob| */ + every_cr_text, /*tex |\everycr| */ + every_tab_text, /*tex |\everytab| */ + error_help_text, + every_before_par_text, /*tex |\everybeforeeof| */ + every_eof_text, /*tex |\everyeof| */ + end_of_group_text, + mark_text, /*tex |\topmark|, etc. */ + loop_text, + end_paragraph_text, /*tex |\everyendpar| */ + write_text, /*tex |\write| */ + local_text, + local_loop_text, +} token_types; + +extern void tex_initialize_input_state (void); +/* int tex_room_on_param_stack (void); */ +/* int tex_room_on_in_stack (void); */ +/* int tex_room_on_input_stack (void); */ +extern void tex_copy_pstack_to_param_stack (halfword *pstack, int n); +extern void tex_show_context (void); +extern void tex_show_validity (void); +extern void tex_set_trick_count (void); +extern void tex_begin_token_list (halfword t, quarterword kind); /* include some tracing */ +extern void tex_begin_parameter_list (halfword t); /* less inlining code */ +extern void tex_begin_backed_up_list (halfword t); /* less inlining code */ +extern void tex_begin_inserted_list (halfword t); /* less inlining code */ +extern void tex_begin_macro_list (halfword t); /* less inlining code */ +extern void tex_end_token_list (void); +extern void tex_cleanup_input_state (void); +extern void tex_back_input (halfword t); +extern void tex_reinsert_token (halfword t); +extern void tex_insert_input (halfword h); +extern void tex_append_input (halfword h); +extern void tex_begin_file_reading (void); +extern void tex_end_file_reading (void); +extern void tex_initialize_inputstack (void); +extern void tex_lua_string_start (void); +extern void tex_tex_string_start (int iotype, int cattable); +extern void tex_any_string_start (char *s); +extern halfword tex_wrapped_token_list (halfword h); +extern const char *tex_current_input_file_name (void); + +# endif |