summaryrefslogtreecommitdiff
path: root/source/luametatex/source/tex/texprinting.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/tex/texprinting.c')
-rw-r--r--source/luametatex/source/tex/texprinting.c1460
1 files changed, 1460 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/texprinting.c b/source/luametatex/source/tex/texprinting.c
new file mode 100644
index 000000000..005c2a3c8
--- /dev/null
+++ b/source/luametatex/source/tex/texprinting.c
@@ -0,0 +1,1460 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "luametatex.h"
+
+print_state_info lmt_print_state = {
+ .logfile = NULL,
+ .loggable_info = NULL,
+ .selector = 0,
+ .tally = 0,
+ .terminal_offset = 0,
+ .logfile_offset = 0,
+ .new_string_line = 0,
+ .trick_buffer = { 0 },
+ .trick_count = 0,
+ .first_count = 0,
+ .saved_selector = 0,
+ .font_in_short_display = 0,
+ .saved_logfile = NULL,
+ .saved_logfile_offset = 0,
+};
+
+/*tex
+
+ During the development of \LUAMETATEX\ reporting has been stepwise upgraded, for instance with more
+ abstract print functions and a formatter. Much more detail is shown and additional tracing options
+ have been added (like for marks, inserts, adjust, math, etc.). The format of the traditonal messages
+ was mostly kept (sometimes under paramameter control using a higher tracing value) but after reading
+ the nth ridiculous comment about logging in \LUATEX\ related to \CONTEXT\ I decided that it no
+ longer made sense to offer compatibility because it will never satisfy everyone and we want to move
+ on, so per spring 2022 we will see even further normalization and log compatility options get (are)
+ dropped. If there are inconsistencies left, assume they will be dealt with. It's all about being able
+ to recognize what gets logged. If someone longs for the old reporting, there are plenty alternative
+ engines available.
+
+ [where: ...] : all kind of tracing
+ {...} : more traditional tex tracing
+ <...> : if tracing (maybe)
+
+*/
+
+/*tex
+
+ Messages that are sent to a user's terminal and to the transcript-log file are produced by
+ several |print| procedures. These procedures will direct their output to a variety of places,
+ based on the setting of the global variable |selector|, which has the following possible values:
+
+ \startitemize
+
+ \startitem
+ |term_and_log|, the normal setting, prints on the terminal and on the transcript file.
+ \stopitem
+
+ \startitem
+ |log_only|, prints only on the transcript file.
+ \stopitem
+
+ \startitem
+ |term_only|, prints only on the terminal.
+ \stopitem
+
+ \startitem
+ |no_print|, doesn't print at all. This is used only in rare cases before the transcript
+ file is open.
+ \stopitem
+
+ \startitem
+ |pseudo|, puts output into a cyclic buffer that is used by the |show_context| routine; when
+ we get to that routine we shall discuss the reasoning behind this curious mode.
+ \stopitem
+
+ \startitem
+ |new_string|, appends the output to the current string in the string pool.
+ \stopitem
+
+ \startitem
+ 0 to 15, prints on one of the sixteen files for |\write| output.
+ \stopitem
+
+ \stopitemize
+
+ The symbolic names |term_and_log|, etc., have been assigned numeric codes that satisfy the
+ convenient relations |no_print + 1 = term_only|, |no_print + 2 = log_only|, |term_only + 2 =
+ log_only + 1 = term_and_log|.
+
+ Three additional global variables, |tally| and |term_offset| and |file_offset|, record the
+ number of characters that have been printed since they were most recently cleared to zero. We
+ use |tally| to record the length of (possibly very long) stretches of printing; |term_offset|
+ and |file_offset|, on the other hand, keep track of how many characters have appeared so far on
+ the current line that has been output to the terminal or to the transcript file, respectively.
+
+ The state structure collects: |new_string_line| and |escape_controls|, the transcript handle of
+ a \TEX\ session: |log_file|, the target of a message: |selector|, the digits in a number being
+ output |dig[23]|, the number of characters recently printed |tally|, the number of characters
+ on the current terminal line |term_offset|, the number of characters on the current file line
+ |file_offset|, the circular buffer for pseudoprinting |trick_buf|, the threshold for
+ pseudoprinting (explained later) |trick_count|, another variable for pseudoprinting
+ |first_count|, a blocker for minor adjustments to |show_token_list| namely |inhibit_par_tokens|.
+
+ To end a line of text output, we call |print_ln|:
+
+*/
+
+void tex_print_ln(void)
+{
+ switch (lmt_print_state.selector) {
+ case no_print_selector_code:
+ break;
+ case terminal_selector_code:
+ fputc('\n', stdout);
+ lmt_print_state.terminal_offset = 0;
+ break;
+ case logfile_selector_code:
+ fputc('\n', lmt_print_state.logfile);
+ lmt_print_state.logfile_offset = 0;
+ break;
+ case terminal_and_logfile_selector_code:
+ fputc('\n', stdout);
+ fputc('\n', lmt_print_state.logfile);
+ lmt_print_state.terminal_offset = 0;
+ lmt_print_state.logfile_offset = 0;
+ break;
+ case pseudo_selector_code:
+ break;
+ case new_string_selector_code:
+ if (lmt_print_state.new_string_line > 0) {
+ tex_print_char(lmt_print_state.new_string_line);
+ }
+ break;
+ case luabuffer_selector_code:
+ lmt_newline_to_buffer();
+ break;
+ default:
+ break;
+ }
+ /*tex |tally| is not affected */
+}
+
+
+/*tex
+
+ The |print_char| procedure sends one byte to the desired destination. All printing comes through
+ |print_ln| or |print_char|, except for the case of |print_str| (see below).
+
+ The checking of the line length is an inheritance from previous engines and we dropped it here.
+ It doesn't make much sense nowadays. The same is true for escaping.
+
+ Incrementing the tally ... only needed in pseudo mode :
+
+*/
+
+// void tex_print_char(int s)
+// {
+// if (s < 0 || s > 255) {
+// tex_formatted_warning("print", "weird character %i", s);
+// } else if (s == new_line_char_par && (lmt_print_state.selector < pseudo_selector_code)) {
+// tex_print_ln();
+// } else {
+// switch (lmt_print_state.selector) {
+// case no_print_selector_code:
+// break;
+// case terminal_selector_code:
+// fputc(s, stdout);
+// ++lmt_print_state.terminal_offset;
+// break;
+// case logfile_selector_code:
+// fputc(s, lmt_print_state.logfile);
+// ++lmt_print_state.logfile_offset;
+// break;
+// case terminal_and_logfile_selector_code:
+// fputc(s, stdout);
+// fputc(s, lmt_print_state.logfile);
+// ++lmt_print_state.terminal_offset;
+// ++lmt_print_state.logfile_offset;
+// break;
+// case pseudo_selector_code:
+// if (lmt_print_state.tally < lmt_print_state.trick_count) {
+// lmt_print_state.trick_buffer[lmt_print_state.tally % lmt_error_state.line_limits.size] = (unsigned char) s;
+// }
+// ++lmt_print_state.tally;
+// break;
+// case new_string_selector_code:
+// tex_append_char((unsigned char) s);
+// break;
+// case luabuffer_selector_code:
+// lmt_char_to_buffer((char) s);
+// break;
+// default:
+// break;
+// }
+// }
+// }
+
+void tex_print_char(int s)
+{
+ if (s < 0 || s > 255) {
+ tex_formatted_warning("print", "weird character %i", s);
+ } else {
+ switch (lmt_print_state.selector) {
+ case no_print_selector_code:
+ break;
+ case terminal_selector_code:
+ if (s == new_line_char_par) {
+ fputc('\n', stdout);
+ lmt_print_state.terminal_offset = 0;
+ } else {
+ fputc(s, stdout);
+ ++lmt_print_state.terminal_offset;
+ }
+ break;
+ case logfile_selector_code:
+ if (s == new_line_char_par) {
+ fputc('\n', lmt_print_state.logfile);
+ lmt_print_state.logfile_offset = 0;
+ } else {
+ fputc(s, lmt_print_state.logfile);
+ ++lmt_print_state.logfile_offset;
+ }
+ break;
+ case terminal_and_logfile_selector_code:
+ if (s == new_line_char_par) {
+ fputc('\n', stdout);
+ fputc('\n', lmt_print_state.logfile);
+ lmt_print_state.terminal_offset = 0;
+ lmt_print_state.logfile_offset = 0;
+ } else {
+ fputc(s, stdout);
+ fputc(s, lmt_print_state.logfile);
+ ++lmt_print_state.terminal_offset;
+ ++lmt_print_state.logfile_offset;
+ }
+ break;
+ case pseudo_selector_code:
+ if (lmt_print_state.tally < lmt_print_state.trick_count) {
+ lmt_print_state.trick_buffer[lmt_print_state.tally % lmt_error_state.line_limits.size] = (unsigned char) s;
+ }
+ ++lmt_print_state.tally;
+ break;
+ case new_string_selector_code:
+ tex_append_char((unsigned char) s);
+ break;
+ case luabuffer_selector_code:
+ lmt_char_to_buffer((char) s);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/*tex
+
+ An entire string is output by calling |print|. Note that if we are outputting the single
+ standard \ASCII\ character |c|, we could call |print("c")|, since |"c" = 99| is the number of a
+ single-character string, as explained above. But |print_char("c")| is quicker, so \TEX\ goes
+ directly to the |print_char| routine when it knows that this is safe. (The present
+ implementation assumes that it is always safe to print a visible \ASCII\ character.)
+
+ The first 256 entries above the 17th unicode plane are used for a special trick: when \TEX\ has
+ to print items in that range, it will instead print the character that results from substracting
+ 0x110000 from that value. This allows byte-oriented output to things like |\specials|.
+
+ This feature will disappear.
+
+*/
+
+static void tex_aux_uprint(int s)
+{
+ /*tex We're not sure about this so it's disabled for now! */
+ /*
+ if ((print_state.selector > pseudo_selector_code)) {
+ / *tex internal strings are not expanded * /
+ print_char(s);
+ return;
+ }
+ */
+ if (s == new_line_char_par && lmt_print_state.selector < pseudo_selector_code) {
+ tex_print_ln();
+ return;
+ } else if (s <= 0x7F) {
+ tex_print_char(s);
+ } else if (s <= 0x7FF) {
+ tex_print_char(0xC0 + (s / 0x40));
+ tex_print_char(0x80 + (s % 0x40));
+ } else if (s <= 0xFFFF) {
+ tex_print_char(0xE0 + (s / 0x1000));
+ tex_print_char(0x80 + ((s % 0x1000) / 0x40));
+ tex_print_char(0x80 + ((s % 0x1000) % 0x40));
+ } else if (s >= 0x110000) {
+ int c = s - 0x110000;
+ if (c >= 256) {
+ tex_formatted_warning("print", "bad raw byte to print (c=%d), skipped",c);
+ } else {
+ tex_print_char(c);
+ }
+ } else {
+ tex_print_char(0xF0 + (s / 0x40000));
+ tex_print_char(0x80 + ((s % 0x40000) / 0x1000));
+ tex_print_char(0x80 + (((s % 0x40000) % 0x1000) / 0x40));
+ tex_print_char(0x80 + (((s % 0x40000) % 0x1000) % 0x40));
+ }
+}
+
+static void tex_aux_lprint(lstring *ss) {
+ /*tex current character code position */
+ unsigned char *j = ss->s;
+ unsigned char *l = j + ss->l;
+ while (j < l) {
+ /*tex We don't bother checking the last two bytes explicitly */
+ /* 0x110000 in utf=8: 0xF4 0x90 0x80 0x80 */
+ if ((j < l - 4) && (*j == 0xF4) && (*(j + 1) == 0x90)) {
+ int c = (*(j + 2) - 128) * 64 + (*(j + 3) - 128);
+ tex_print_char(c);
+ j = j + 4;
+ } else {
+ tex_print_char(*j);
+ ++j;
+ }
+ }
+}
+
+void tex_print_tex_str(int s)
+{
+ if (s >= lmt_string_pool_state.string_pool_data.ptr) {
+ tex_normal_warning("print", "bad string pointer");
+ } else if (s < cs_offset_value) {
+ if (s < 0) {
+ tex_normal_warning("print", "bad string offset");
+ } else {
+ tex_aux_uprint(s);
+ }
+ } else if (lmt_print_state.selector == new_string_selector_code) {
+ tex_append_string(str_string(s), (unsigned) str_length(s));
+ } else {
+ tex_aux_lprint(&str_lstring(s));
+ }
+}
+
+/*tex
+
+ The procedure |print_nl| is like |print|, but it makes sure that the string appears at the
+ beginning of a new line.
+
+*/
+
+void tex_print_nlp(void)
+{
+ if (lmt_print_state.new_string_line > 0) {
+ tex_print_char(lmt_print_state.new_string_line);
+ } else {
+ switch (lmt_print_state.selector) {
+ case terminal_selector_code:
+ if (lmt_print_state.terminal_offset > 0) {
+ fputc('\n', stdout);
+ lmt_print_state.terminal_offset = 0;
+ }
+ break;
+ case logfile_selector_code:
+ if (lmt_print_state.logfile_offset > 0) {
+ fputc('\n', lmt_print_state.logfile);
+ lmt_print_state.logfile_offset = 0;
+ }
+ break;
+ case terminal_and_logfile_selector_code:
+ if (lmt_print_state.terminal_offset > 0) {
+ fputc('\n', stdout);
+ lmt_print_state.terminal_offset = 0;
+ }
+ if (lmt_print_state.logfile_offset > 0) {
+ fputc('\n', lmt_print_state.logfile);
+ lmt_print_state.logfile_offset = 0;
+ }
+ break;
+ case luabuffer_selector_code:
+ lmt_newline_to_buffer();
+ break;
+ }
+ }
+}
+
+/*tex
+
+ The |char *| versions of the same procedures. |print_str| is different because it uses
+ buffering, which works well because most of the output actually comes through |print_str|.
+
+*/
+
+void tex_print_str(const char *s)
+{
+ int logfile = 0;
+ int terminal = 0;
+ switch (lmt_print_state.selector) {
+ case no_print_selector_code:
+ return;
+ case terminal_selector_code:
+ terminal = 1;
+ break;
+ case logfile_selector_code:
+ logfile = 1;
+ break;
+ case terminal_and_logfile_selector_code:
+ logfile = 1;
+ terminal = 1;
+ break;
+ case pseudo_selector_code:
+ while ((*s) && (lmt_print_state.tally < lmt_print_state.trick_count)) {
+ lmt_print_state.trick_buffer[lmt_print_state.tally % lmt_error_state.line_limits.size] = (unsigned char) *s++;
+ lmt_print_state.tally++;
+ }
+ return;
+ case new_string_selector_code:
+ tex_append_string((const unsigned char *) s, (unsigned) strlen(s));
+ return;
+ case luabuffer_selector_code:
+ lmt_string_to_buffer(s);
+ return;
+ default:
+ break;
+ }
+ if (terminal || logfile) {
+ int len = (int) strlen(s);
+ if (logfile && ! lmt_fileio_state.log_opened) {
+ logfile = 0;
+ }
+ if (len > 0) {
+ int newline = s[len-1] == '\n';
+ if (logfile) {
+ fputs(s, lmt_print_state.logfile);
+ if (newline) {
+ lmt_print_state.logfile_offset = 0;
+ } else {
+ lmt_print_state.logfile_offset += len;
+ }
+ }
+ if (terminal) {
+ fputs(s, stdout);
+ if (newline) {
+ lmt_print_state.terminal_offset = 0;
+ } else {
+ lmt_print_state.terminal_offset += len;
+ }
+ }
+ }
+ }
+}
+
+/*tex
+
+ Here is the very first thing that \TEX\ prints: a headline that identifies the version number
+ and format package. The |term_offset| variable is temporarily incorrect, but the discrepancy is
+ not serious since we assume that the banner and format identifier together will occupy at most
+ |max_print_line| character positions. Well, we dropped that check in this variant.
+
+ Maybe we should drop printing the format identifier.
+
+*/
+
+void tex_print_banner(void)
+{
+ fprintf(
+ stdout,
+ "%s %s\n",
+ lmt_engine_state.luatex_banner,
+ str_string(lmt_dump_state.format_identifier)
+ );
+}
+
+void tex_print_log_banner(void)
+{
+ fprintf(
+ lmt_print_state.logfile,
+ "engine: %s, format id: %s, time stamp: %d-%d-%d %d:%d, startup file: %s, job name: %s, dump name: %s",
+ lmt_engine_state.luatex_banner,
+ str_string(lmt_dump_state.format_identifier),
+ year_par, month_par > 12 ? 0 : month_par, day_par, time_par / 60, time_par % 60,
+ lmt_engine_state.startup_filename ? lmt_engine_state.startup_filename : "-",
+ lmt_engine_state.startup_jobname ? lmt_engine_state.startup_jobname : "-",
+ lmt_engine_state.dump_name ? lmt_engine_state.dump_name : "-"
+ );
+}
+
+void tex_print_version_banner(void)
+{
+ fputs(lmt_engine_state.luatex_banner, stdout);
+}
+
+/*tex
+
+ The procedure |print_esc| prints a string that is preceded by the user's escape character
+ (which is usually a backslash).
+
+*/
+
+void tex_print_tex_str_esc(strnumber s)
+{
+ /*tex Set variable |c| to the current escape character: */
+ int c = escape_char_par;
+ if (c >= 0 && c < 0x110000) {
+ tex_print_tex_str(c);
+ }
+ if (s) {
+ tex_print_tex_str(s);
+ }
+}
+
+/*tex This prints escape character, then |s|. */
+
+void tex_print_str_esc(const char *s)
+{
+ /*tex Set variable |c| to the current escape character: */
+ int c = escape_char_par;
+ if (c >= 0 && c < 0x110000) {
+ tex_print_tex_str(c);
+ }
+ if (s) {
+ tex_print_str(s);
+ }
+}
+
+/*tex
+ An array of digits in the range |0..15| is printed by |print_the_digs|. These digits are in the
+ reverse order: |dig[k-1]|$\,\ldots\,$|dig[0]|!
+*/
+
+// inline static void tex_print_decimal_digits(const unsigned char *digits, int k)
+// {
+// while (k-- > 0) {
+// tex_print_char('0' + digits[k]);
+// }
+// }
+
+// inline static void tex_print_hexadecimal_digits(const unsigned char *digits, int k)
+// {
+// while (k-- > 0) {
+// if (digits[k] < 10) {
+// tex_print_char('0' + digits[k]);
+// } else {
+// tex_print_char('A' - 10 + digits[k]);
+// }
+// }
+// }
+
+/*tex
+
+ The following procedure, which prints out the decimal representation of a given integer |n|,
+ has been written carefully so that it works properly if |n = 0| or if |(-n)| would cause
+ overflow. It does not apply |mod| or |div| to negative arguments, since such operations are not
+ implemented consistently by all \PASCAL\ compilers.
+
+*/
+
+// void tex_print_int(int n)
+// {
+// /*tex In the end a 0..9 fast path works out best. */
+// if (n >= 0 && n <= 9) {
+// tex_print_char('0' + n);
+// } else {
+// /*tex index to current digit; we assume that $|n|<10^{23}$ */
+// int k = 0;
+// unsigned char digits[24];
+// if (n < 0) {
+// tex_print_char('-');
+// n = -n;
+// }
+// do {
+// digits[k] = (unsigned char) (n % 10);
+// n = n / 10;
+// ++k;
+// } while (n != 0);
+// tex_print_decimal_digits(digits, k);
+// }
+// }
+
+void tex_print_int(int n)
+{
+ /*tex In the end a 0..9 fast path works out best; using |sprintf| is slower. */
+ if (n >= 0 && n <= 9) {
+ tex_print_char('0' + n);
+ } else {
+ int k = 0;
+ unsigned char digits[24];
+ if (n < 0) {
+ tex_print_char('-');
+ n = -n;
+ }
+ do {
+ digits[k] = '0' + (unsigned char) (n % 10);
+ n = n / 10;
+ ++k;
+ } while (n != 0);
+ while (k-- > 0) {
+ tex_print_char(digits[k]);
+ }
+ }
+}
+
+/*tex
+
+ Conversely, here is a procedure analogous to |print_int|. If the output of this procedure is
+ subsequently read by \TEX\ and converted by the |round_decimals| routine above, it turns out
+ that the original value will be reproduced exactly; the \quote {simplest} such decimal number
+ is output, but there is always at least one digit following the decimal point.
+
+ The invariant relation in the |repeat| loop is that a sequence of decimal digits yet to be
+ printed will yield the original number if and only if they form a fraction~$f$ in the range $s
+ - \delta \L10 \cdot 2^{16} f < s$. We can stop if and only if $f = 0$ satisfies this condition;
+ the loop will terminate before $s$ can possibly become zero.
+
+ The next one prints a scaled real, rounded to five digits.
+
+*/
+
+void tex_print_dimension(scaled s, int unit)
+{
+ if (s == 0) {
+ tex_print_str("0.0"); /* really .. just 0 is not ok for some applications */
+ } else {
+ /*tex The amount of allowable inaccuracy: */
+ scaled delta = 10;
+ char buffer[20] = { 0 } ;
+ int i = 0;
+ if (s < 0) {
+ /*tex Print the sign, if negative. */
+ tex_print_char('-');
+ s = -s;
+ }
+ /*tex Print the integer part. */
+ tex_print_int(s / unity);
+ buffer[i++] = '.';
+ s = 10 * (s % unity) + 5;
+ do {
+ if (delta > unity) {
+ /*tex Round the last digit. */
+ s = s + 0100000 - 50000;
+ }
+ buffer[i++] = (unsigned char) ('0' + (s / unity));
+ s = 10 * (s % unity);
+ delta *= 10;
+ } while (s > delta);
+ // buffer[i++] = '\0';
+ tex_print_str(buffer);
+ }
+ if (unit != no_unit) {
+ tex_print_unit(unit);
+ }
+}
+
+void tex_print_sparse_dimension(scaled s, int unit)
+{
+ if (s == 0) {
+ tex_print_char('0');
+ } else if (s == unity) {
+ tex_print_char('1');
+ } else {
+ /*tex The amount of allowable inaccuracy: */
+ scaled delta = 10;
+ char buffer[20];
+ int i = 0;
+ if (s < 0) {
+ /*tex Print the sign, if negative. */
+ tex_print_char('-');
+ /*tex So we trust it here while in printing int we mess around. */
+ s = -s;
+ }
+ /*tex Print the integer part. */
+ tex_print_int(s / unity);
+ s = 10 * (s % unity) + 5;
+ do {
+ if (delta > unity) {
+ /*tex Round the last digit. */
+ s = s + 0100000 - 50000;
+ }
+ buffer[i++] = (unsigned char) ('0' + (s / unity));
+ s = 10 * (s % unity);
+ delta *= 10;
+ } while (s > delta);
+ if (i == 1 && buffer[i-1] == '0') {
+ /* no need */
+ } else {
+ buffer[i++] = '\0';
+ tex_print_char('.');
+ tex_print_str(buffer);
+ }
+ }
+ if (unit != no_unit) {
+ tex_print_unit(unit);
+ }
+}
+
+/*tex
+
+ Hexadecimal printing of nonnegative integers is accomplished by |print_hex|. We have a few
+ variants. Because we have bitsets that can give upto |0xFFFFFFFF| we treat the given integer
+ as an unsigned.
+*/
+
+// void tex_print_hex(int n)
+// {
+// /*tex index to current digit; we assume that $0\L n<16^{22}$ */
+// int k = 0 ;
+// unsigned char digits[24];
+// do {
+// digits[k] = n % 16;
+// n = n / 16;
+// ++k;
+// } while (n != 0);
+// tex_print_hexadecimal_digits(digits, k);
+// }
+
+void tex_print_hex(int sn)
+{
+ unsigned int n = (unsigned int) sn;
+ int k = 0;
+ unsigned char digits[24];
+ if (n < 0) {
+ tex_print_char('-');
+ n = -n;
+ }
+ do {
+ unsigned char d = (unsigned char) (n % 16);
+ if (d < 10) {
+ digits[k] = '0' + d;
+ } else {
+ digits[k] = 'A' - 10 + d;
+ }
+ n = n / 16;
+ ++k;
+ } while (n != 0);
+ while (k-- > 0) {
+ tex_print_char(digits[k]);
+ }
+}
+
+void tex_print_qhex(int n)
+{
+ tex_print_char('"');
+ tex_print_hex(n);
+}
+
+void tex_print_uhex(int n)
+{
+ tex_print_str("U+");
+ if (n < 16) {
+ tex_print_char('0');
+ }
+ if (n < 256) {
+ tex_print_char('0');
+ }
+ if (n < 4096) {
+ tex_print_char('0');
+ }
+ tex_print_hex(n);
+}
+
+/*tex
+
+ Roman numerals are produced by the |print_roman_int| routine. Readers who like puzzles might
+ enjoy trying to figure out how this tricky code works; therefore no explanation will be given.
+ Notice that 1990 yields |mcmxc|, not |mxm|.
+
+*/
+
+void tex_print_roman_int(int n)
+{
+ char mystery[] = "m2d5c2l5x2v5i";
+ char *j = (char *) mystery;
+ int v = 1000;
+ while (1) {
+ while (n >= v) {
+ tex_print_char(*j);
+ n = n - v;
+ }
+ if (n <= 0) {
+ /*tex nonpositive input produces no output */
+ return;
+ } else {
+ char *k = j + 2;
+ int u = v / (*(k - 1) - '0');
+ if (*(k - 1) == '2') {
+ k = k + 2;
+ u = u / (*(k - 1) - '0');
+ }
+ if (n + u >= v) {
+ tex_print_char(*k);
+ n = n + u;
+ } else {
+ j = j + 2;
+ v = v / (*(j - 1) - '0');
+ }
+ }
+ }
+}
+
+/*tex
+
+ The |print| subroutine will not print a string that is still being created. The following
+ procedure will.
+
+*/
+
+void tex_print_current_string(void)
+{
+ for (int j = 0; j < lmt_string_pool_state.string_temp_top; j++) {
+ tex_print_char(lmt_string_pool_state.string_temp[j++]);
+ }
+}
+
+/*tex
+
+ The procedure |print_cs| prints the name of a control sequence, given a pointer to its address
+ in |eqtb|. A space is printed after the name unless it is a single nonletter or an active
+ character. This procedure might be invoked with invalid data, so it is \quote {extra robust}.
+ The individual characters must be printed one at a time using |print|, since they may be
+ unprintable.
+
+*/
+
+void tex_print_cs_checked(halfword p)
+{
+ if (p == null_cs) {
+ tex_print_str_esc("csname");
+ tex_print_str_esc("endcsname");
+ tex_print_char(' ');
+ } else if (p < hash_base) {
+ tex_print_str(error_string_impossible(11));
+ } else if (p == undefined_control_sequence) {
+ tex_print_str_esc("undefined");
+ tex_print_char(' ');
+ } else if (eqtb_out_of_range(p)) {
+ tex_print_str(error_string_impossible(12));
+ } else {
+ strnumber t = cs_text(p);
+ if (t < 0 || t >= lmt_string_pool_state.string_pool_data.ptr) {
+ tex_print_str(error_string_nonexistent(13));
+ } else if (tex_is_active_cs(t)) {
+ tex_print_tex_str(active_cs_value(t));
+ } else {
+ tex_print_tex_str_esc(t);
+ if (! tex_single_letter(t) || (tex_get_cat_code(cat_code_table_par, aux_str2uni(str_string(t))) == letter_cmd)) {
+ tex_print_char(' ');
+ }
+ }
+ }
+}
+
+/*tex
+
+ Here is a similar procedure; it avoids the error checks, and it never prints a space after the
+ control sequence. The other one doesn't even print the bogus cs.
+
+*/
+
+void tex_print_cs(halfword p)
+{
+ if (p == null_cs) {
+ tex_print_str_esc("csname");
+ tex_print_str_esc("endcsname");
+ } else {
+ strnumber t = cs_text(p);
+ if (tex_is_active_cs(t)) {
+ tex_print_tex_str(active_cs_value(t));
+ } else {
+ tex_print_tex_str_esc(t);
+ }
+ }
+}
+
+void tex_print_cs_name(halfword p)
+{
+ if (p != null_cs) {
+ strnumber t = cs_text(p);
+ if (tex_is_active_cs(t)) {
+ tex_print_tex_str(active_cs_value(t));
+ } else {
+ tex_print_tex_str(t);
+ }
+ }
+}
+
+/*tex
+
+ Then there is a subroutine that prints glue stretch and shrink, possibly followed by the name
+ of finite units:
+
+*/
+
+void tex_print_glue(scaled d, int order, int unit)
+{
+ tex_print_dimension(d, no_unit);
+ if ((order < normal_glue_order) || (order > filll_glue_order)) {
+ tex_print_str("foul");
+ } else if (order > normal_glue_order) {
+ tex_print_str("fi");
+ while (order > fi_glue_order) {
+ tex_print_char('l');
+ --order;
+ }
+ } else {
+ tex_print_unit(unit);
+ }
+}
+
+/*tex The next subroutine prints a whole glue specification. */
+
+void tex_print_unit(int unit)
+{
+ if (unit != no_unit) {
+ tex_print_str(unit == pt_unit ? "pt" : "mu");
+ }
+}
+
+void tex_print_spec(int p, int unit)
+{
+ if (p < 0) {
+ tex_print_char('*');
+ } else if (p == 0) {
+ tex_print_dimension(0, unit);
+ } else {
+ tex_print_dimension(glue_amount(p), unit);
+ if (glue_stretch(p)) {
+ tex_print_str(" plus ");
+ tex_print_glue(glue_stretch(p), glue_stretch_order(p), unit);
+ }
+ if (glue_shrink(p)) {
+ tex_print_str(" minus ");
+ tex_print_glue(glue_shrink(p), glue_shrink_order(p), unit);
+ }
+ }
+}
+
+void tex_print_fontspec(int p)
+{
+ tex_print_int(font_spec_identifier(p));
+ if (font_spec_scale(p) != unused_scale_value) {
+ tex_print_str(" scale ");
+ tex_print_int(font_spec_scale(p));
+ }
+ if (font_spec_x_scale(p) != unused_scale_value) {
+ tex_print_str(" xscale ");
+ tex_print_int(font_spec_x_scale(p));
+ }
+ if (font_spec_y_scale(p) != unused_scale_value) {
+ tex_print_str(" yscale ");
+ tex_print_int(font_spec_y_scale(p));
+ }
+}
+
+/*tex Math characters: */
+
+void tex_print_mathspec(int p)
+{
+ if (p) {
+ mathcodeval m = tex_get_math_spec(p);
+ tex_show_mathcode_value(m, node_subtype(p));
+ } else {
+ tex_print_str("[invalid mathspec]");
+ }
+}
+
+/*tex
+
+ We can reinforce our knowledge of the data structures just introduced by considering two
+ procedures that display a list in symbolic form. The first of these, called |short_display|, is
+ used in \quotation {overfull box} messages to give the top-level description of a list. The
+ other one, called |show_node_list|, prints a detailed description of exactly what is in the
+ data structure.
+
+ The philosophy of |short_display| is to ignore the fine points about exactly what is inside
+ boxes, except that ligatures and discretionary breaks are expanded. As a result,
+ |short_display| is a recursive procedure, but the recursion is never more than one level deep.
+
+ A global variable |font_in_short_display| keeps track of the font code that is assumed to be
+ present when |short_display| begins; deviations from this font will be printed.
+
+ Boxes, rules, inserts, whatsits, marks, and things in general that are sort of \quote
+ {complicated} are indicated only by printing |[]|.
+
+ We print a bit more than original \TEX. A value of 0 or 1 or any large value will behave the
+ same as before. The reason for this extension is that a |name| not always makes sense.
+
+ \starttyping
+ 0 \foo xyz
+ 1 \foo (bar)
+ 2 <bar> xyz
+ 3 <bar @ ..> xyz
+ 4 <id>
+ 5 <id: bar>
+ 6 <id: bar @ ..> xyz
+ \stoptyping
+
+*/
+
+void tex_print_char_identifier(halfword c) // todo: use string_print_format
+{
+ if (c <= 0x10FFFF) {
+ char b[10];
+ if ( (c >= 0x00E000 && c <= 0x00F8FF) || (c >= 0x0F0000 && c <= 0x0FFFFF) ||
+ (c >= 0x100000 && c <= 0x10FFFF) || (c >= 0x00D800 && c <= 0x00DFFF) ) {
+ sprintf(b, "0x%06X", c);
+ tex_print_str(b);
+ } else {
+ sprintf(b, "U+%06X", c);
+ tex_print_str(b);
+ tex_print_char(' ');
+ tex_print_tex_str(c);
+ }
+ }
+}
+
+void tex_print_font_identifier(halfword f)
+{
+ /*tex |< >| is less likely to clash with text parenthesis */
+ if (tex_is_valid_font(f)) {
+ // switch (tracing_fonts_par) {
+ // case 0:
+ // case 1:
+ // if (font_original(f)) {
+ // tex_print_format(font_original(f));
+ // } else {
+ // tex_print_format("font: %i", f);
+ // }
+ // if (tracing_fonts_par == 0) {
+ // break;
+ // } else if (font_size(f) == font_design_size(f)) {
+ // tex_print_format(" (%s)", font_name(f));
+ // } else {
+ // tex_print_format(" (%s @ %D)", font_name(f), font_size(f), pt_unit);
+ // }
+ // break;
+ // case 2:
+ // tex_print_format("<%s>", font_name(f));
+ // break;
+ // case 3:
+ // tex_print_format("<%s @ %D>", font_name(f), font_size(f), pt_unit);
+ // break;
+ // case 4:
+ // tex_print_format("<%i>", f);
+ // break;
+ // case 5:
+ // tex_print_format("<%i: %s>", f, font_name(f));
+ // break;
+ // /* case 6: */
+ // default:
+ tex_print_format("<%i: %s @ %D>", f, font_name(f), font_size(f), pt_unit);
+ // break;
+ // }
+ } else {
+ tex_print_str("<*>");
+ }
+}
+
+void tex_print_font_specifier(halfword e)
+{
+ if (e && tex_is_valid_font(font_spec_identifier(e))) {
+ tex_print_format("<%i: %i %i %i>", font_spec_identifier(e), font_spec_scale(e), font_spec_x_scale(e), font_spec_y_scale(e));
+ } else {
+ tex_print_str("<*>");
+ }
+}
+
+void tex_print_font(halfword f)
+{
+ if (! f) {
+ tex_print_str("nullfont");
+ } else if (tex_is_valid_font(f)) {
+ tex_print_str(font_name(f));
+ /* if (font_size(f) != font_design_size(f)) { */
+ /*tex
+ Nowadays this check for designsize is rather meaningless so we could as well
+ always enter this branch. We can even make this while blob a callback.
+ */
+ tex_print_format(" at %D", font_size(f), pt_unit);
+ /* } */
+ } else {
+ tex_print_str("nofont");
+ }
+}
+
+/*tex This prints highlights of list |p|. */
+
+void tex_short_display(halfword p)
+{
+ tex_print_levels();
+ if (p) {
+ tex_print_short_node_contents(p);
+ } else {
+ tex_print_str("empty list");
+ }
+}
+
+/*tex This prints token list data in braces. */
+
+void tex_print_token_list(const char *s, halfword p)
+{
+ tex_print_levels();
+ tex_print_str("..");
+ if (s) {
+ tex_print_str(s);
+ tex_print_char(' ');
+ }
+ tex_print_char('{');
+ if ((p >= 0) && (p <= (int) lmt_token_memory_state.tokens_data.top)) {
+ tex_show_token_list(p, null, default_token_show_max, 0);
+ } else {
+ tex_print_str(error_string_clobbered(21));
+ }
+ tex_print_char('}');
+}
+
+/*tex This prints dimensions of a rule node. */
+
+void tex_print_rule_dimen(scaled d)
+{
+ if (d == null_flag) {
+ tex_print_char('*');
+ } else {
+ tex_print_dimension(d, pt_unit);
+ }
+}
+
+/*tex
+
+ Since boxes can be inside of boxes, |show_node_list| is inherently recursive, up to a given
+ maximum number of levels. The history of nesting is indicated by the current string, which
+ will be printed at the beginning of each line; the length of this string, namely |cur_length|,
+ is the depth of nesting.
+
+ A global variable called |depth_threshold| is used to record the maximum depth of nesting for
+ which |show_node_list| will show information. If we have |depth_threshold = 0|, for example,
+ only the top level information will be given and no sublists will be traversed. Another global
+ variable, called |breadth_max|, tells the maximum number of items to show at each level;
+ |breadth_max| had better be positive, or you won't see anything.
+
+ The maximum nesting depth in box displays is kept in |depth_threshold| and the maximum number
+ of items shown at the same list level in |breadth_max|.
+
+ The recursive machinery is started by calling |show_box|. Assign the values |depth_threshold :=
+ show_box_depth| and |breadth_max := show_box_breadth|
+
+*/
+
+void tex_show_box(halfword p)
+{
+ /*tex the show starts at |p| */
+ tex_show_node_list(p, show_box_depth_par, show_box_breadth_par);
+ tex_print_ln();
+}
+
+/*tex
+
+ \TEX\ is occasionally supposed to print diagnostic information that goes only into the
+ transcript file, unless |tracing_online| is positive. Here are two routines that adjust the
+ destination of print commands:
+
+*/
+
+void tex_begin_diagnostic(void)
+{
+ lmt_print_state.saved_selector = lmt_print_state.selector;
+ if ((tracing_online_par <= 0) && (lmt_print_state.selector == terminal_and_logfile_selector_code)) {
+ lmt_print_state.selector = logfile_selector_code;
+ if (lmt_error_state.history == spotless) {
+ lmt_error_state.history = warning_issued;
+ }
+ }
+ tex_print_levels();
+}
+
+/*tex Restore proper conditions after tracing. */
+
+void tex_end_diagnostic(void)
+{
+ tex_print_nlp();
+ lmt_print_state.selector = lmt_print_state.saved_selector;
+}
+
+static void tex_print_padding(void)
+{
+ switch (lmt_print_state.selector) {
+ case terminal_selector_code:
+ if (! odd(lmt_print_state.terminal_offset)) {
+ tex_print_char(' ');
+ }
+ break;
+ case logfile_selector_code:
+ case terminal_and_logfile_selector_code:
+ if (! odd(lmt_print_state.logfile_offset)) {
+ tex_print_char(' ');
+ }
+ break;
+ case luabuffer_selector_code:
+ break;
+ }
+}
+
+void tex_print_levels(void)
+{
+ int l0 = tracing_levels_par;
+ tex_print_nlp();
+ if (l0 > 0) {
+ int l1 = (l0 & 0x01) == tracing_levels_group;
+ int l2 = (l0 & 0x02) == tracing_levels_input;
+ int l4 = (l0 & 0x04) == tracing_levels_catcodes;
+ if (l1) {
+ tex_print_int(cur_level);
+ tex_print_char(':');
+ }
+ if (l2) {
+ tex_print_int(lmt_input_state.input_stack_data.ptr);
+ tex_print_char(':');
+ }
+ if (l4) {
+ tex_print_int(cat_code_table_par);
+ tex_print_char(':');
+ }
+ if (l1 || l2 || l4) {
+ tex_print_char(' ');
+ }
+ tex_print_padding();
+ }
+}
+
+/* maybe %GROUP% where we scan upto [UPPER][%], so %G and %GR are also is ok
+
+ shared with error messages, so at some point we will merge:
+
+ %c int char
+ %s *char string
+ %q *char 'string'
+ %i int integer
+ %e backslash (tex escape)
+ %C int int symbolic representation of cmd chr
+ %E *char \cs
+ %S int tex cs string
+ %M int mode
+ %T int tex string
+ %% percent
+
+ specific for print (I need to identify the rest)
+
+ ! %U int unicode
+ ! %D int dimension
+
+ ! %B int badness
+ ! %G int group
+
+ ! %L int (if) linenumber
+
+*/
+
+extern void tex_print_format(const char *format, ...)
+{
+ va_list args;
+ va_start(args, format); /* hm, weird, no number */
+ while (1) {
+ int chr = *format++;
+ switch (chr) {
+ case '\0':
+ goto DONE;
+ case '%':
+ {
+ chr = *format++;
+ switch (chr) {
+ case '\0':
+ goto DONE;
+ case 'c':
+ tex_print_char(va_arg(args, int));
+ break;
+ case 'e':
+ tex_print_str_esc(NULL);
+ break;
+ case 'i':
+ tex_print_int(va_arg(args, int));
+ break;
+ case 'l':
+ tex_print_levels();
+ break;
+ case 'n':
+ tex_print_extended_subtype(null, (quarterword) va_arg(args, int));
+ break;
+ case 'm':
+ tex_print_cs_checked(va_arg(args, int));
+ break;
+ case 's':
+ tex_print_str(va_arg(args, char *));
+ break;
+ case 'q':
+ tex_print_char('\'');
+ tex_print_str(va_arg(args, char *));
+ tex_print_char('\'');
+ break;
+ case 'x':
+ tex_print_qhex(va_arg(args, int));
+ break;
+ /*
+ case 'u':
+ tex_print_unit(va_arg(args, int));
+ break;
+ */
+ case 'B': /* badness */
+ {
+ scaled b = va_arg(args, halfword);
+ if (b == awful_bad) {
+ tex_print_char('*');
+ } else {
+ tex_print_int(b);
+ }
+ break;
+ }
+ case 'C':
+ {
+ int cmd = va_arg(args, int);
+ int val = va_arg(args, int);
+ tex_print_cmd_chr((singleword) cmd, val); /* inlining doesn't work */
+ break;
+ }
+ case 'D': /* dimension */
+ {
+ scaled s = va_arg(args, scaled);
+ int u = va_arg(args, int);
+ tex_print_dimension(s, u);
+ break;
+ }
+ case 'E':
+ tex_print_str_esc(va_arg(args, char *));
+ break;
+ case 'G':
+ {
+ halfword g = va_arg(args, int);
+ tex_print_group(g);
+ break;
+ }
+ case 'F':
+ {
+ halfword i = va_arg(args, int);
+ tex_print_font_identifier(i);
+ break;
+ }
+ case 'L':
+ {
+ /* typically used for if line */
+ halfword line = va_arg(args, int);
+ if (line) {
+ tex_print_str(" entered on line ");
+ tex_print_int(line);
+ }
+ break;
+ }
+ case 'M':
+ {
+ halfword mode = va_arg(args, int);
+ tex_print_str(tex_string_mode(mode));
+ break;
+ }
+ case 'P':
+ {
+ scaled total = va_arg(args, int);
+ scaled stretch = va_arg(args, int);
+ scaled filstretch = va_arg(args, int);
+ scaled fillstretch = va_arg(args, int);
+ scaled filllstretch = va_arg(args, int);
+ scaled shrink= va_arg(args, int);
+ tex_print_dimension(total, pt_unit);
+ if (stretch) {
+ tex_print_str(" plus ");
+ tex_print_dimension(stretch, pt_unit);
+ } else if (filstretch) {
+ tex_print_str(" plus ");
+ tex_print_dimension(filstretch, no_unit);
+ tex_print_str(" fil");
+ } else if (fillstretch) {
+ tex_print_str(" plus ");
+ tex_print_dimension(fillstretch, no_unit);
+ tex_print_str(" fill");
+ } else if (filllstretch) {
+ tex_print_str(" plus ");
+ tex_print_dimension(fillstretch, no_unit);
+ tex_print_str(" filll");
+ }
+ if (shrink) {
+ tex_print_str(" minus ");
+ tex_print_dimension(shrink, pt_unit);
+ }
+ break;
+ }
+ case 'S':
+ {
+ halfword cs = va_arg(args, int);
+ tex_print_cs(cs);
+ break;
+ }
+ case 'T':
+ {
+ strnumber s = va_arg(args, int);
+ tex_print_tex_str(s);
+ break;
+ }
+ case 'U':
+ {
+ halfword c = va_arg(args, int);
+ tex_print_uhex(c);
+ break;
+ }
+ case '%':
+ tex_print_char('%');
+ break;
+ // case '[':
+ // tex_begin_diagnostic();
+ // tex_print_char('[');
+ // break;
+ // case ']':
+ // tex_print_char(']');
+ // tex_end_diagnostic();
+ // break;
+ default:
+ /* ignore bad one */
+ break;
+ }
+ }
+ break;
+ default:
+ tex_print_char(chr); /* todo: utf */
+ break;
+ }
+ }
+ DONE:
+ va_end(args);
+}
+
+/*tex
+
+ Group codes were introcued in \ETEX\ but have been extended in the meantime in \LUATEX\ and
+ later again in \LUAMETATEX. We might have (even) more granularity in the future.
+
+ Todo: combine this with an array of struct(id,name,lua) ... a rainy day + stack of new cd's job.
+
+*/
+
+void tex_print_group(int e)
+{
+ int line = tex_saved_line_at_level();
+ tex_print_str(lmt_interface.group_code_values[cur_group].name);
+ if (cur_group != bottom_level_group) {
+ tex_print_str(" group");
+ if (line) {
+ tex_print_str(e ? " entered at line " : " at line ");
+ tex_print_int(line);
+ }
+ }
+}
+
+void tex_print_message(const char *s)
+{
+ tex_print_nlp();
+ tex_print_char('(');
+ tex_print_str(s);
+ tex_print_char(')');
+ tex_print_nlp();
+}