From 89f7bbac9616406b3990b8608c17c542f5fb476d Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Mon, 24 Aug 2020 20:03:53 +0200 Subject: 2020-08-24 19:49:00 --- .../documents/general/manuals/luametatex.pdf | Bin 1231680 -> 1219876 bytes .../manuals/luametatex/luametatex-differences.tex | 33 +++-- .../manuals/luametatex/luametatex-enhancements.tex | 145 +++++++++------------ .../manuals/luametatex/luametatex-introduction.tex | 18 ++- .../manuals/luametatex/luametatex-languages.tex | 81 ++++++------ .../general/manuals/luametatex/luametatex-lua.tex | 25 +++- .../luametatex/luametatex-modifications.tex | 107 ++++++++++----- .../manuals/luametatex/luametatex-preamble.tex | 48 +++---- .../manuals/luametatex/luametatex-style.tex | 2 +- .../general/manuals/luametatex/luametatex-tex.tex | 3 +- .../general/manuals/luametatex/luametatex.tex | 2 +- .../sources/general/manuals/mk/mk-memory.tex | 8 +- 12 files changed, 252 insertions(+), 220 deletions(-) (limited to 'doc') diff --git a/doc/context/documents/general/manuals/luametatex.pdf b/doc/context/documents/general/manuals/luametatex.pdf index 072f63507..36b1c54c1 100644 Binary files a/doc/context/documents/general/manuals/luametatex.pdf and b/doc/context/documents/general/manuals/luametatex.pdf differ diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-differences.tex b/doc/context/sources/general/manuals/luametatex/luametatex-differences.tex index a4b5fe6da..482371cd9 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-differences.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-differences.tex @@ -121,7 +121,8 @@ In the \type {node}, \type {tex} and \type {status} library we no longer have helpers and variables that relate to the backend. The \LUAMETATEX\ engine is in principle \DVI\ and \PDF\ unaware. There are only generic whatsit nodes that can be used for some management related tasks. For instance you can use them to -implement user nodes. +implement user nodes. More extensive status information is provided in the +overhauled status library. The margin kern nodes are gone and we now use regular kern nodes for them. As a consequence there are two extra subtypes indicating the injected left or right @@ -139,11 +140,16 @@ find_write_file find_format_file open_data_file The callbacks related to errors are changed: \starttyping -intercept_tex_error intercept_lua_error, +intercept_tex_error intercept_lua_error show_error_message show_warning_message \stoptyping -and a new ont \type {if_end_of_file} has been added to the repertoire. +There is a hook that gets called when one of the fundamental memory structures +gets reallocated. + +\starttyping +trace_memory +\stoptyping The (job) management hooks are kept: @@ -163,18 +169,9 @@ show_whatsit Being the core of extensibility, the typesetting callbacks of course stayed. This is what we ended up with: -% \ctxlua{inspect(table.sortedkeys(callbacks.list))} - -\starttyping -find_log_file, find_format_file, open_data_file, if_end_of_file, process_jobname, -start_run, stop_run, define_font, pre_output_filter, buildpage_filter, -hpack_filter, vpack_filter, hyphenate, ligaturing, kerning, pre_linebreak_filter, -linebreak_filter, post_linebreak_filter, append_to_vlist_filter, mlist_to_hlist, -pre_dump, start_file, stop_file, intercept_tex_error, intercept_lua_error, -show_error_message, show_warning_message, hpack_quality, vpack_quality, -insert_local_par, contribute_filter, build_page_insert, wrapup_run, new_graf, -make_extensible, show_whatsit -\stoptyping +\startalign[flushleft,nothyphenated] +\tt \cldcontext{table.concat(table.sortedkeys(callbacks.list), ", ")} +\stopalign As in \LUATEX\ font loading happens with the following callback. This time it really needs to be set because there is no built|-|in font loader. @@ -186,7 +183,7 @@ define_font There are all kinds of subtle differences in the implementation, for instance we no longer intercept \type {*} and \type {&} as these were already replaced long ago in \TEX\ engines by command line options. Talking of options, only a few are -left. +left. All input goes via \LUA, even the console. We took our time for reaching a stable state in \LUATEX. Among the reasons is the fact that most was experimented with in \CONTEXT. It took many man|-|years to @@ -237,7 +234,7 @@ if luatex and luametatex then luatex = table.tohash(luatex) luametatex = table.tohash(luametatex) - context.page() + -- context.page() context("The following primitives are available in \\LUATEX\\ but not in \\LUAMETATEX.") context("Some of these are emulated in \\CONTEXT.") @@ -252,7 +249,7 @@ if luatex and luametatex then context.stopcolumns() - context.page() + -- context.page() context("The following primitives are available in \\LUAMETATEX\\ only.") context("At some point in time some might be added to \\LUATEX.") diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-enhancements.tex b/doc/context/sources/general/manuals/luametatex/luametatex-enhancements.tex index 03dfc1bad..a233bf630 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-enhancements.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-enhancements.tex @@ -83,9 +83,9 @@ There are three primitives to test the version of \LUATEX\ (and \LUAMETATEX): \BC explanation \NC \NR \TB \NC \lpr {luatexbanner} \NC \VersionHack{\luatexbanner} - \NC the banner reported on the command line \NC \NR + \NC the banner reported on the console \NC \NR \NC \lpr {luatexversion} \NC \the\luatexversion - \NC a combination of major and minor number \NC \NR + \NC major and minor number combined \NC \NR \NC \lpr {luatexrevision} \NC \the\luatexrevision \NC the revision number \NC \NR \LL @@ -178,7 +178,9 @@ the time comes to print a character $c>=1{,}114{,}112$, \LUATEX\ will actually print the single byte corresponding to $c$ minus 1{,}114{,}112. Contrary to other \TEX\ engines, the output to the terminal is as|-|is so there -is no escaping with \type {^^}. We operate in a \UTF\ universe. +is no escaping with \type {^^}. We operate in a \UTF\ universe. Because we +operate in a \CCODE\ universum, zero characters are special but because we also +live in a \UNICODE\ galaxy that is no real problem. \stopsubsection @@ -400,6 +402,12 @@ mode, but in \LUAMETATEX\ there is no error message and the box the height and depth are equally divided. Of course in text mode there is no math axis related offset applied. +It is possible to change or add to the attributes assigned to a box: + +\starttyping +\boxattr 0 123 456 +\stoptyping + \stopsubsection \stopsection @@ -475,20 +483,22 @@ will result in Note that the expansion of \prm {directlua} is a sequence of characters, not of tokens, contrary to all \TEX\ commands. So formally speaking its expansion is -null, but it places material on a pseudo-file to be immediately read by \TEX, as -\ETEX's \prm {scantokens}. For a description of print functions look at \in -{section} [sec:luaprint]. +null, but it collects material in a new level on the input stack to be +immediately read by \TEX\ after the \LUA\ call as finished. It is a bit like +\ETEX's \prm {scantokens}, which now uses the same mechanism. For a description +of print functions look at \in {section} [sec:luaprint]. Because the \syntax {} is a chunk, the normal \LUA\ error handling is triggered if there is a problem in the included code. The \LUA\ error messages -should be clear enough, but the contextual information is still pretty bad. -Often, you will only see the line number of the right brace at the end of the -code. +should be clear enough, but the contextual information is often suboptimal +because it can come from deep down, and \TEX\ has no knowledge about what you do +in \LUA. Often, you will only see the line number of the right brace at the end +of the code. While on the subject of errors: some of the things you can do inside \LUA\ code can break up \LUAMETATEX\ pretty bad. If you are not careful while working with -the node list interface, you may even end up with assertion errors from within -the \TEX\ portion of the executable. +the node list interface, you may even end up with errors or even crashes from +within the \TEX\ portion of the executable. \stopsubsection @@ -565,53 +575,6 @@ tokens and assume that the function is available when that token expands. On the other hand, as we have tested this functionality in relative complex situations normal usage should not give problems. -There are another three (still experimental) primitives that behave like \lpr -{luafunction} but they expect the function to return an integer, dimension (also -an integer) or a gluespec node. The return values gets injected into the input. - -\starttyping -\luacountfunction 997 123 -\luadimenfunction 998 123pt -\luaskipfunction 999 123pt plus 10pt minus 20pt -\stoptyping - -Examples of function 997 in the above lines are: - -\starttyping -function() return token.scan_int() end -function() return 1234 end -\stoptyping - -This itself is not spectacular so there is more. These functions can be called in -two modes: either \TEX\ is expecting a value, or it is not and just expanding the -call. - -\starttyping -local n = 0 -function(slot,scanning) - if scanning then - return n - else - n = token.scan_int() - end -end -\stoptyping - -So, assuming that the function is in slot 997, you can do this: - -\starttyping -\luacountfunction 997 123 -\count100=\luacountfunction 997 -\stoptyping - -After which \type {\count 100} has the value \type {123}. - -% Also experimental (I need to play with this a bit more when I have time): -% -% The \type {token.set_lua} function already accepts some strings as optional -% arguments (\type {protected} and \type {global}) and now also handles \type -% {count}, \type {dimen} and \type {skip}. - \stopsubsection \startsubsection[title={\lpr {luabytecode} and \lpr {luabytecodecall}}] @@ -663,14 +626,14 @@ contents is stored and retrieved from the format file. \startsubsection[title={\lpr {catcodetable}}] -\startsyntax -\catcodetable <15-bit number> -\stopsyntax - The primitive \lpr {catcodetable} switches to a different catcode table. Such a table has to be previously created using one of the two primitives below, or it has to be zero. Table zero is initialized by \INITEX. +\startsyntax +\catcodetable <15-bit number> +\stopsyntax + \stopsubsection \startsubsection[title={\lpr {initcatcodetable}}] @@ -710,11 +673,9 @@ initial values are: \lpr {savecatcodetable} copies the current set of catcodes to a new table with the requested number. The definitions in this new table are all treated as if -they were made in the outermost level. - -The new table is allocated globally: it will not go away after the current group -has ended. If the supplied number is the currently active table, an error is -raised. +they were made in the outermost level. Again, the new table is allocated globally: +it will not go away after the current group has ended. If the supplied number is +the currently active table, an error is raised. \stopsubsection @@ -722,7 +683,7 @@ raised. \startsection[title={Tokens, commands and strings}] -\startsubsection[title={\lpr {scantextokens}}] +\startsubsection[title={\lpr {scantextokens} and \lpr {tokenized}}] \topicindex {tokens+scanning} @@ -732,8 +693,7 @@ differences are: \startitemize \startitem - The last (and usually only) line does not have a \prm {endlinechar} - appended. + The last (and usually only) line does not have a \prm {endlinechar} appended. \stopitem \startitem \lpr {scantextokens} never raises an EOF error, and it does not execute @@ -746,6 +706,18 @@ differences are: \stopitem \stopitemize +The implementation in \LUAMETATEX\ is different in the sense that it uses the same +methods as printing from \LUA\ to \TEX\ does. Therefore, in addition to the two +commands we also have this expandable command: + +\startsyntax +\tokenized {...} +\tokenized catcodetable {...} +\stopsyntax + +The \ETEX\ command \type {\tracingscantokens} has been dropped in the process as +that was interwoven with the old code. + \stopsubsection \startsubsection[title={\lpr {toksapp}, \lpr {tokspre}, \lpr {etoksapp}, \lpr {etokspre}, @@ -805,7 +777,7 @@ that it saves a few tokens and can make code a bit more readable. This primitive complements the \ETEX\ mark primitives and clears a mark class completely, resetting all three connected mark texts to empty. It is an -immediate command. +immediate command (no synchronization node is used). \startsyntax \clearmarks <16-bit number> @@ -1102,8 +1074,6 @@ valid style identifier (a primitive identifier or number). The \type \stopsubsection -\stopsubsection - \startsubsection[title={\lpr {ifempty}}] This primitive tests for the following token (control sequence) having no @@ -1219,7 +1189,7 @@ we use \type {\unless} to negate the result. \stopsubsection -\startsubsection[title={\lpr {orelse}}] +\startsubsection[title={\lpr {orelse} and \lpr {orunless}}] Sometimes you have successive tests that, when laid out in the source lead to deep trees. The \type {\ifcase} test is an exception. Experiments with \type @@ -1285,6 +1255,12 @@ Of course it is only useful at the right level, so you might end up with cases l \fi \stoptyping +The \lpr {orunless} variant negates the next test, just like \prm {unless}. In +some cases these commands look at the next token to see if it is an if|-|test so +a following negation will not work (read: making that work would complicate the +code and hurt efficiency too). Side note: interesting is that in \CONTEXT\ we +hardly use this kind of negation. + \stopsubsection \startsubsection[title={\lpr {ifprotected}, \lpr {frozen}, \lpr {iffrozen} and \lpr {ifusercmd}}] @@ -1315,13 +1291,6 @@ the acceptable range is from 0 to 65535. \stopsubsection -\startsubsection[title={\prm {vpack}, \prm {hpack} and \prm {tpack}}] - -These three primitives are like \prm {vbox}, \prm {hbox} and \prm {vtop} -but don't apply the related callbacks. - -\stopsubsection - \startsubsection[title={\prm {vsplit}}] \topicindex {splitting} @@ -1332,7 +1301,7 @@ a split of the given size but result has the natural dimensions then. \stopsubsection -\startsubsection[title={Images and reused box objects},reference=sec:imagedandforms] +\startsubsection[title={Images and reused box objects},reference=sec:imagesandforms] In original \TEX\ image support is dealt with via specials. It's not a native feature of the engine. All that \TEX\ cares about is dimensions, so in practice @@ -1409,10 +1378,12 @@ packages. \startsubsection[title={\lpr {hpack}, \lpr {vpack} and \lpr {tpack}}] -These three primitives are the equivalents of \type {\hbox}, \type {\vbox} and -\type {\vtop} but they don't trigger the packaging related callbacks. Of course +These three primitives are the equivalents of \prm {hbox}, \prm {vbox} and +\prm {vtop} but they don't trigger the packaging related callbacks. Of course one never know if content needs a treatment so using them should be done with -care. +care. Apart from accepting more keywords (and therefore options) the normal +box behave the same as before. The \prm {vcenter} builder also works in text +mode. \stopsubsection @@ -1864,6 +1835,10 @@ shows that is't okay, they will become official, so we just mention them: \type {\boxdirection}, \type {\boxattr}, \type {\boxorientation}, \type {\boxxoffset}, \type {\boxyoffset}, \type {\boxxmove}, \type {\boxymove} and \type {\boxtotal}. +{\em This is still somewhat experimental and will be documented in more detail +when I've used it more in \CONTEXT\ and the specification is frozen. This might +take some time (and user input).} + \stopsubsection \stopsection diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-introduction.tex b/doc/context/sources/general/manuals/luametatex/luametatex-introduction.tex index 774f3d8d9..2afdf75bd 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-introduction.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-introduction.tex @@ -47,7 +47,8 @@ further) adapted. It also discusses the (main) differences. Some of the new primitives or functions that show up in \LUAMETATEX\ might show up in \LUATEX\ at some point, others might not, so don't take this manual as reference for \LUATEX ! For now it is an experimental engine in which we can change things at will but -with \CONTEXT\ in tandem so that this macro package will keep working. +with \CONTEXT\ in tandem so that this macro package will keep working. Often you +can find examples of usage in \CONTEXT\ related documents and the source code. For \CONTEXT\ users the \LUAMETATEX\ engine will become the default. The \CONTEXT\ variant for this engine is tagged \LMTX. The pair can be used in @@ -55,7 +56,7 @@ production, just as with \LUATEX\ and \MKIV. In fact, most users will probably not really notice the difference. In some cases there will be a drop in performance, due to more work being delegated to \LUA, but on the average performance will be better, also due to some changes below the hood of the -engine. +engine. Memory consumption is also less. As this follow up is closely related to \CONTEXT\ development, and because we expect stock \LUATEX\ to be used outside the \CONTEXT\ proper, there will be no @@ -100,10 +101,15 @@ Hans Hagen \vfilll -{\bf remark:} \LUAMETATEX\ development is mostly done by Hans Hagen and Alan -Braslau, who love playing with the three languages involved. And as usual Mojca -Miklavec make sure all compiles well on the buildbot infrastructure. Testing is -done by \CONTEXT\ developers and users. Many thanks for their patience! +{\bf remark:} \LUAMETATEX\ development is mostly done by Hans Hagen and in +adapting the macros to the new features Wolfgang Schuster, who knows the code +inside||out is a instrumental. In the initial phase Alan Braslau, who love +playing with the three languages did extensive testing and compiled for several +platforms. Later Mojca Miklavec make sure all compiles well on the buildbot +infrastructure. After the first release more users got involved in testing. Many +thanks for their patience! The development also triggered upgrading of the wiki +support infrastructure where Taco Hoekwater and Paul Mazaitis have teamed up. So, +progress all around. {\bf remark:} When there are non|-|intrusive features that also make sense in \LUATEX, these will be applied in the experimental branch first, so that there is diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-languages.tex b/doc/context/sources/general/manuals/luametatex/luametatex-languages.tex index 0a3ecd6bf..85701bdc3 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-languages.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-languages.tex @@ -192,15 +192,15 @@ The start and end of a sequence of characters is signalled by a \nod {glue}, \no {penalty}, \nod {kern} or \nod {boundary} node. But by default also a \nod {hlist}, \nod {vlist}, \nod {rule}, \nod {dir}, \nod {whatsit}, \nod {ins}, and \nod {adjust} node indicate a start or end. You can omit the last set from the -test by setting \lpr {hyphenationbounds} to a non|-|zero value: +test by setting flags in \lpr {hyphenationmode}: \starttabulate[|c|l|] -\DB value \BC behaviour \NC \NR +\DB value \BC behaviour \NC \NR \TB -\NC \type{0} \NC not strict \NC \NR -\NC \type{1} \NC strict start \NC \NR -\NC \type{2} \NC strict end \NC \NR -\NC \type{3} \NC strict start and strict end \NC \NR +\NC \NC not strict \NC \NR +\NC \type{64} \NC strict start \NC \NR +\NC \type{128} \NC strict end \NC \NR +\NC \type{192} \NC strict start and strict end \NC \NR \LL \stoptabulate @@ -210,11 +210,11 @@ The word start is determined as follows: \DB node \BC behaviour \NC \NR \TB \BC boundary \NC yes when wordboundary \NC \NR -\BC hlist \NC when hyphenationbounds 1 or 3 \NC \NR -\BC vlist \NC when hyphenationbounds 1 or 3 \NC \NR -\BC rule \NC when hyphenationbounds 1 or 3 \NC \NR -\BC dir \NC when hyphenationbounds 1 or 3 \NC \NR -\BC whatsit \NC when hyphenationbounds 1 or 3 \NC \NR +\BC hlist \NC when the start bit is set \NC \NR +\BC vlist \NC when the start bit is set \NC \NR +\BC rule \NC when the start bit is set \NC \NR +\BC dir \NC when the start bit is set \NC \NR +\BC whatsit \NC when the start bit is set \NC \NR \BC glue \NC yes \NC \NR \BC math \NC skipped \NC \NR \BC glyph \NC exhyphenchar (one only) : yes (so no -- ---) \NC \NR @@ -232,13 +232,13 @@ The word end is determined as follows: \BC glue \NC yes \NC \NR \BC penalty \NC yes \NC \NR \BC kern \NC yes when not italic (for some historic reason) \NC \NR -\BC hlist \NC when hyphenationbounds 2 or 3 \NC \NR -\BC vlist \NC when hyphenationbounds 2 or 3 \NC \NR -\BC rule \NC when hyphenationbounds 2 or 3 \NC \NR -\BC dir \NC when hyphenationbounds 2 or 3 \NC \NR -\BC whatsit \NC when hyphenationbounds 2 or 3 \NC \NR -\BC ins \NC when hyphenationbounds 2 or 3 \NC \NR -\BC adjust \NC when hyphenationbounds 2 or 3 \NC \NR +\BC hlist \NC when the end bit is set \NC \NR +\BC vlist \NC when the end bit is set \NC \NR +\BC rule \NC when the end bit is set \NC \NR +\BC dir \NC when the end bit is set \NC \NR +\BC whatsit \NC when the end bit is set \NC \NR +\BC ins \NC when the end bit is set \NC \NR +\BC adjust \NC when the end bit is set \NC \NR \LL \stoptabulate @@ -256,52 +256,52 @@ min values to 1 and make sure that the words hyphenate at each character. \hbox to 2cm {% \vtop {% \hsize 1pt - \hyphenationbounds#1 + \advance\hyphenationmode#1\relax #2 \par}}} \startplacefigure[reference=hb:1,title={\type{one}}] \startcombination[4*1] - {\SomeTest{0}{one}} {\type{0}} - {\SomeTest{1}{one}} {\type{1}} - {\SomeTest{2}{one}} {\type{2}} - {\SomeTest{3}{one}} {\type{3}} + {\SomeTest {0}{one}} {\type {0}} + {\SomeTest {64}{one}} {\type {64}} + {\SomeTest{128}{one}} {\type{128}} + {\SomeTest{192}{one}} {\type{192}} \stopcombination \stopplacefigure \startplacefigure[reference=hb:2,title={\type{one\null two}}] \startcombination[4*1] - {\SomeTest{0}{one\null two}} {\type{0}} - {\SomeTest{1}{one\null two}} {\type{1}} - {\SomeTest{2}{one\null two}} {\type{2}} - {\SomeTest{3}{one\null two}} {\type{3}} + {\SomeTest {0}{one\null two}} {\type {0}} + {\SomeTest {64}{one\null two}} {\type {64}} + {\SomeTest{128}{one\null two}} {\type{128}} + {\SomeTest{192}{one\null two}} {\type{192}} \stopcombination \stopplacefigure \startplacefigure[reference=hb:3,title={\type{\null one\null two}}] \startcombination[4*1] - {\SomeTest{0}{\null one\null two}} {\type{0}} - {\SomeTest{1}{\null one\null two}} {\type{1}} - {\SomeTest{2}{\null one\null two}} {\type{2}} - {\SomeTest{3}{\null one\null two}} {\type{3}} + {\SomeTest {0}{\null one\null two}} {\type {0}} + {\SomeTest {64}{\null one\null two}} {\type {64}} + {\SomeTest{128}{\null one\null two}} {\type{128}} + {\SomeTest{192}{\null one\null two}} {\type{192}} \stopcombination \stopplacefigure \startplacefigure[reference=hb:4,title={\type{one\null two\null}}] \startcombination[4*1] - {\SomeTest{0}{one\null two\null}} {\type{0}} - {\SomeTest{1}{one\null two\null}} {\type{1}} - {\SomeTest{2}{one\null two\null}} {\type{2}} - {\SomeTest{3}{one\null two\null}} {\type{3}} + {\SomeTest {0}{one\null two\null}} {\type {0}} + {\SomeTest {64}{one\null two\null}} {\type {64}} + {\SomeTest{128}{one\null two\null}} {\type{128}} + {\SomeTest{192}{one\null two\null}} {\type{192}} \stopcombination \stopplacefigure \startplacefigure[reference=hb:5,title={\type{\null one\null two\null}}] \startcombination[4*1] - {\SomeTest{0}{\null one\null two\null}} {\type{0}} - {\SomeTest{1}{\null one\null two\null}} {\type{1}} - {\SomeTest{2}{\null one\null two\null}} {\type{2}} - {\SomeTest{3}{\null one\null two\null}} {\type{3}} + {\SomeTest {0}{\null one\null two\null}} {\type {0}} + {\SomeTest {64}{\null one\null two\null}} {\type {64}} + {\SomeTest{128}{\null one\null two\null}} {\type{128}} + {\SomeTest{192}{\null one\null two\null}} {\type{192}} \stopcombination \stopplacefigure @@ -312,7 +312,8 @@ some control over aspects of the hyphenation and yet another one concerns automatic hyphens (e.g.\ \type {-} characters in the input). Hyphenation and discretionary injection is driven by a mode parameter which is -a bitset made from the following values. +a bitset made from the following values, some of which we saw in the previous +examples. \starttabulate[|||] \NC \number \normalhyphenationmodecode \NC honour (normal) \prm{discretionary}'s \NC \NR diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-lua.tex b/doc/context/sources/general/manuals/luametatex/luametatex-lua.tex index a126a95dc..803820de1 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-lua.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-lua.tex @@ -31,9 +31,10 @@ In this mode, it will set \LUA's \type {arg[0]} to the found script name, pushin preceding options in negative values and the rest of the command line in the positive values, just like the \LUA\ interpreter does. -\LUAMETATEX\ will exit immediately after executing the specified \LUA\ script and is, -in effect, a somewhat bulky stand alone \LUA\ interpreter with a bunch of extra -preloaded libraries. +\LUAMETATEX\ will exit immediately after executing the specified \LUA\ script and +is, in effect, a somewhat bulky stand alone \LUA\ interpreter with a bunch of +extra preloaded libraries. But we really want to keep the binary small, if +possible below the 3MB which is okay for a script engine. When no argument is given, \LUAMETATEX\ will look for a \LUA\ file with the same name as the binary and run that one when present. This makes it possible to use @@ -105,7 +106,7 @@ values from the \type {texconfig} table at the end of script execution (see the description of the \type {texconfig} table later on in this document for more details on which ones exactly). -So let's summarize this. The handling of when is called jobname is a bit complex. +So let's summarize this. The handling of what is called jobname is a bit complex. There can be explicit names set on the command line but when not set they can be taken from the \type {texconfig} table. @@ -117,7 +118,9 @@ taken from the \type {texconfig} table. These names are initialized according to \type {--luaonly} or the first filename seen in the list of options. Special treatment of \type {&} and \type {*} as well -as interactive startup is gone. +as interactive startup is gone but we still enter \TEX\ via an forced \type {\input} +into the input buffer. \footnote {This might change at some point into an explicit +loading triggered via \LUA.} When we are in \TEX\ mode at some point the engine needs a filename, for instance for opening a log file. At that moment the set jobname becomes the internal one @@ -133,6 +136,12 @@ access etc.\ internally uses the current code page but to the user is exposed as % engine_state .startup_jobname : handles by option parser % environment_state.input_name : temporary interceptor +There is an extra options \type{--permitloadlib} that needs to be given when you +load external libraries via \LUA. Although you could manage this via \LUA\ itself +in a startup script, the reason for having this as option is the wish for +security (at some point that became a demand for \LUATEX), so this might give an +extra feeling of protection. + \stopsubsection \stopsection @@ -199,8 +208,10 @@ some care you can deal with \UNICODE\ just fine. There are some more libraries present. These are discussed on a later chapter. For instance we embed \type {luasocket} but contrary to \LUATEX\ don't embed the related \LUA\ code. An adapted version of \type {luafilesystem} is also included. -There is a more extensive math library and there are libraries that deal with -encryption and compression. +There are more extensive math libraries and there are libraries that deal with +encryption and compression. At some point some of these might become so called +optional libraries (read: the handful that we provide interfaces for but that get +loaded on demand). \stopsection diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-modifications.tex b/doc/context/sources/general/manuals/luametatex/luametatex-modifications.tex index 6448f2b01..9827884ad 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-modifications.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-modifications.tex @@ -16,14 +16,15 @@ The first version of \LUATEX, made by Hartmut after we discussed the possibility of an extension language, only had a few extra primitives and it was largely the same as \PDFTEX. It was presented to the public in 2005. As part of the Oriental -\TEX\ project, Taco merged substantial parts of \ALEPH\ into the code and some -more primitives were added. Then we started more fundamental experiments. After -many years, when the engine had become more stable, the decision was made to -clean up the rather hybrid nature of the program. This means that some primitives -were promoted to core primitives, often with a different name, and that others -were removed. This also made it possible to start cleaning up the code base. In -\in {chapter} [enhancements] we discuss some new primitives, here we will cover -most of the adapted ones. +\TEX\ project, Taco merged some parts of \ALEPH\ into the code and some more +primitives were added. Then we started more fundamental experiments. After many +years, when the engine had become more stable, the decision was made to clean up +the rather hybrid nature of the program. This means that some primitives were +promoted to core primitives, often with a different name, and that others were +removed. This also made it possible to start cleaning up the code base, which +showed decades of stepwise additions to original \TEX. In \in {chapter} +[enhancements] we discuss some new primitives, here we will cover most of the +adapted ones. During more than a decade stepwise new functionality was added and after 10 years the more of less stable version 1.0 was presented. But we continued and after @@ -50,10 +51,10 @@ most still comes from original Knuthian \TEX. But we divert a bit. \startitemize \startitem - The current code base is written in \CCODE, not \PASCAL. The original \CWEB\ + The current code base is written in \CCODE, not \PASCAL. The original \WEB\ documentation is kept when possible and not wrapped in tagged comments. As a consequence instead of one large file plus change files, we now have multiple - files organized in categories like \type {tex}, \type {luaf}, \type + files organized in categories like \type {tex}, \type {lua}, \type {languages}, \type {fonts}, \type {libraries}, etc. There are some artifacts of the conversion to \CCODE, but these got (and get) removed stepwise. The documentation, which actually comes from the mix of engines (via so called @@ -61,8 +62,8 @@ most still comes from original Knuthian \TEX. But we divert a bit. close as possible to the original so that the documentation of the fundamentals behind \TEX\ by Don Knuth still applies. However, because we use \CCODE, some documentation is a bit off. Also, most global variables are now - collected in structures, but the original names were kept. There are lots of - so called macros too. + collected in structures, but the original names and level of abstraction were + mostly kept. On the other hand, opening up had its impact on the code. \stopitem \startitem @@ -74,14 +75,20 @@ most still comes from original Knuthian \TEX. But we divert a bit. wherever we like. There are various options to control discretionary injection and related penalties are now integrated in these nodes. Language information is now bound to glyphs. The number of languages in \LUAMETATEX\ - is smaller than in \LUATEX. + is smaller than in \LUATEX. Control over discretionaries is more granular and + now managed by less variables. \stopitem \startitem There is no pool file, all strings are embedded during compilation. This also removed some memory constraints. We kept token and node memory management because it is convenient and efficient but parts were reimplemented in order - to remove some constraints. Token memory management is largely the same. + to remove some constraints. Token memory management is largely the same. All + the other large memory structures, like those related to nesting, the save + stack, input levels, the hash table and table of equivalents, etc. now all + start out small and are enlarged when needed, where maxima are controlled in + the usual way. In principle the initial memory footprint is smaller while at + the same time we can go real large. \stopitem \startitem @@ -125,6 +132,12 @@ most still comes from original Knuthian \TEX. But we divert a bit. backend. \stopitem +\startitem + The math style related primitives can use numbers as well as symbolic names. + There is some more (control over) math anyway, which is a side effect of + supporting \OPENTYPE\ math. +\stopitem + \startitem When detailed logging is enabled more detail is output with respect to what nodes are involved. This is a side effect of the core nodes having more @@ -171,6 +184,20 @@ features, but with a few small adaptations. the mixed flat & sparse model from \ETEX. \stopitem +\startitem + Because we have more nodes, conditionals, etc.\ the \ETEX\ status related + variables are adapted to \LUAMETATEX: we use different \quote {constants}, + but that should be no problem because any sane macro package uses + abstraction. +\stopitem + +\startitem + The \type {\scantokens} primitive is now using the same mechanism as \LUA\ + print|-|to|-|\TEX\ uses, which simplifies the code. There is a little + performance hit but it will not be noticed in \CONTEXT, because we never use + this primitive. +\stopitem + \startitem Because we don't use change files on top of original \TEX, the integration of \ETEX\ functionality is bit more natural, code wise. @@ -292,7 +319,8 @@ Here is a summary of inherited functionality: \startitem Glues {\it immediately after} direction change commands are not legal - breakpoints. There is a bit more sanity testing for the direction state. + breakpoints. There is a bit more sanity testing for the direction state. This + can be configured. \stopitem \startitem @@ -303,7 +331,7 @@ Here is a summary of inherited functionality: \startitem There are no direction related primitives for page and body directions. The paragraph, text and math directions are specified using primitives that - take a number. + take a number. The three letter codes are dropped. \stopitem \stopitemize @@ -334,7 +362,10 @@ The single internal memory heap that traditional \TEX\ used for tokens and nodes is split into two separate arrays. Each of these will grow dynamically when needed. Internally a token or node is an index into these arrays. This permits for an efficient implementation and is also responsible for the performance of -the core. The original documentation in \TEX\ The Program mostly applies! +the core. All other data structures are mostly the same but managed dynamically +too. Because we operate in a 64 bit world, the parallel table of equivalents +needed for managing levels, is gone. Anyhow, the original documentation in \TEX\ +The Program mostly applies! \stopsubsection @@ -352,10 +383,6 @@ assignments don't show up when using the \ETEX\ tracing routines \prm {tracingassigns} and \prm {tracingrestores} but we don't see that as a real limitation. It also saves a lot of clutter. -A side|-|effect of the current implementation is that \prm {global} is now more -expensive in terms of processing than non|-|global assignments but not many users -will notice that. - The glyph ids within a font are also managed by means of a sparse array as glyph ids can go up to index $2^{21}-1$ but these are never accessed directly so again users will not notice this. @@ -367,26 +394,33 @@ users will not notice this. \topicindex {csnames} Single|-|character commands are no longer treated specially in the internals, -they are stored in the hash just like the multiletter csnames. +they are stored in the hash just like the multiletter control sequences. This is +a side effect of going \UNICODE\ and \UTF. Where using 256 slots in an array add +no burden supporting the whole \UNICODE\ range is a waste of space. Therefore, +also active characters are internally implemented as a special type of +multi|-|letter control sequences that uses a prefix that is otherwise impossible +to obtain. The code that displays control sequences explicitly checks if the length is one when it has to decide whether or not to add a trailing space. -Active characters are internally implemented as a special type of multi|-|letter -control sequences that uses a prefix that is otherwise impossible to obtain. - \stopsubsection \startsubsection[title=Binary file reading] \topicindex {files+binary} -All of the internal code is changed in such a way that if one of the \type -{read_xxx_file} callbacks is not set, then the file is read by a \CCODE\ function -using basically the same convention as the callback: a single read into a buffer -big enough to hold the entire file contents. While this uses more memory than the -previous code (that mostly used \type {getc} calls), it can be quite a bit faster -(depending on your \IO\ subsystem). So far we never had issues with this approach. +All input now goes via \LUA: files loaded with \type {\input} as well as files +that are opened with \type {\openin}. Actually the later has to be implemented +in terms of macros and \LUA\ calls. This also means that compared to \LUATEX\ +the internal handling of input has been changed but users won't notice that. + +Setting a callback is expected now. Although reading input natively using \type +{getc} calls is more efficient, we now fetch lines from \LUA, put them in a +buffer and then pick successive bytes (keep in mind that we read \UTF) from that. +The performance is quite ok, also because \LUA\ is fast, todays operating systems +cache, and storage media have become very fast. Also, \TEX\ is spending more time +messing around with what it has input than actually reading input. \stopsubsection @@ -419,9 +453,9 @@ more details anyway. The information that goes into the log file can be different from \LUATEX, and might even differ a bit more in the future. The main reason is that inside the engine we have more granularity, which for instance means that we output subtype -related information when nodes are printed. Of course we could have offered a -compatibility mode but it serves no purpose. Over time there have been many -subtle changes to control logs in the \TEX\ ecosystems so another one is +and attribute related information when nodes are printed. Of course we could have +offered a compatibility mode but it serves no purpose. Over time there have been +many subtle changes to control logs in the \TEX\ ecosystems so another one is bearable. In a similar fashion, there is a bit different behaviour when \TEX\ expects @@ -429,7 +463,10 @@ input, which in turn is a side effect of removing the interception of \type {*} and \type {&} which made for cleaner code (quite a bit had accumulated as side effect of continuous adaptations in the \TEX\ ecosystems). There was already code that was never executed, simply as side effect of the way \LUATEX\ initializes -itself (one needs to enable classes of primitives for instance). +itself (one needs to enable classes of primitives for instance). Keep in mind +that over time system dependencies have been handles with \TEX\ change files, the +\WEBC\ infrastructure, \KPSE\ features, compilation variables and flags, etc. In +\LUAMETATEX\ we try to minimize all that. \stopsubsection diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-preamble.tex b/doc/context/sources/general/manuals/luametatex/luametatex-preamble.tex index 8f1400c9f..9a2fe5690 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-preamble.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-preamble.tex @@ -47,19 +47,21 @@ If you still decide to read on, then it's good to know what nodes are, so we do quick introduction here. If you input this text: \starttyping -Hi There +Hi There ... \stoptyping eventually we will get a linked lists of nodes, which in \ASCII\ art looks like: \starttyping -H <=> i <=> [glue] <=> T <=> h <=> e <=> r <=> e +H <=> i <=> [glue] <=> T <=> h <=> e <=> r <=> e ... \stoptyping -When we have a paragraph, we actually get something: +When we have a paragraph, we actually get something like this, where a \type +{localpar} node stores some metadata and is followed by a \type {hlist} flagged +as indent box: \starttyping -[localpar] <=> H <=> i <=> [glue] <=> T <=> h <=> e <=> r <=> e <=> [glue] +[localpar] <=> [hlist] <=> H <=> i <=> [glue] <=> T <=> h <=> e <=> r <=> e ... \stoptyping Each character becomes a so called glyph node, a record with properties like the @@ -69,14 +71,14 @@ back to a previous node or next node, given that these exist. Sometimes multiple characters are represented by one glyphs, so one can also get: \starttyping -[localpar] <=> H <=> i <=> [glue] <=> Th <=> e <=> r <=> e <=> [glue] +[localpar] <=> [hlist] <=> H <=> i <=> [glue] <=> Th <=> e <=> r <=> e ... \stoptyping And maybe some characters get positioned relative to each other, so we might see: \starttyping -[localpar] <=> H <=> [kern] <=> i <=> [glue] <=> Th <=> e <=> r <=> e <=> [glue] +[localpar] <=> [hlist] <=> H <=> [kern] <=> i <=> [glue] <=> Th <=> e <=> r <=> e ... \stoptyping It's also good to know beforehand that \TEX\ is basically centered around @@ -96,6 +98,8 @@ means these hooks. The \TEX\ engine itself is pretty well optimized but when you kick in much \LUA\ code, you will notices that performance drops. Don't blame and bother the authors with performance issues. In \CONTEXT\ over 50\% of the time can be spent in \LUA, but so far we didn't get many complaints about efficiency. +Adding more callbacks makes no sense, also because at some point the performance +hit gets too large. There are plenty ways to achieve one goals. Where plain \TEX\ is basically a basic framework for writing a specific style, macro packages like \CONTEXT\ and \LATEX\ provide the user a whole lot of @@ -108,7 +112,7 @@ mess around to much, the engine eventually might bark and quit. It can even crash, because testing everywhere for what users can do wrong is no real option. When you read about nodes in the following chapters it's good to keep in mind -their commands that relate to then. Here are a few: +what commands relate to them. Here are a few: \starttabulate[|l|l|p|] \DB command \BC node \BC explanation \NC \NR @@ -137,23 +141,23 @@ all terms used in the next chapters. Don't worry, they loose their magic once yo use \TEX\ a lot. You have access to most of the internals and when not, at least it is possible to query some state we're in or level we're at. -When we talk about packing it can mean two things. When \TEX\ has consumed some -tokens that represent text the next can happen. When the text is put into a so -called \type {\hbox} it (normally) first gets hyphenated, next ligatures are -build, and finally kerns are added. Each of that stages can be overloaded using -\LUA\ code. When these three stages are finished, the dimension of the content is -calculated and the box gets its width, height and depth. What happens with the -box depends on what macros do with it. +When we talk about pack(ag)ing it can mean two things. When \TEX\ has consumed +some tokens that represent text. When the text is put into a so called \type +{\hbox} it (normally) first gets hyphenated (even in an horizontal list), next +ligatures are build, and finally kerns are added. Each of these stages can be +overloaded using \LUA\ code. When these three stages are finished, the dimension +of the content is calculated and the box gets its width, height and depth. What +happens with the box depends on what macros do with it. The other thing that can happen is that the text starts a new paragraph. In that -case some (directional) information is put in front, indentation is prepended and -some skip appended at the end. Again the three stages are applied but this time, -afterwards, the long line is broken into lines and the result is either added to -the content of a box or to the main vertical list (the running text so to say). -This is called par building. At some point \TEX\ decides that enough is enough -and it will trigger the page builder. So, building is another concept we will -encounter. Another example of a builder is the one that turns an intermediate -math list into something typeset. +case some information is stored in a leading \type {localpar} node. Then +indentation is appended and the paragraph ends with some glue. Again the three +stages are applied but this time, afterwards, the long line is broken into lines +and the result is either added to the content of a box or to the main vertical +list (the running text so to say). This is called par building. At some point +\TEX\ decides that enough is enough and it will trigger the page builder. So, +building is another concept we will encounter. Another example of a builder is +the one that turns an intermediate math list into something typeset. Wrapping something in a box is called packing. Adding something to a list is described in terms of contributing. The more complicated processes are wrapped diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-style.tex b/doc/context/sources/general/manuals/luametatex/luametatex-style.tex index a07cbe9dd..4d3ef36e3 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-style.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-style.tex @@ -136,7 +136,7 @@ \definecolor[othercolor][r=.5,g=.5] \writestatus{luametatex manual}{} -\writestatus{luametatex manual}{defining lucodaot} \usebodyfont [lucidaot] +\writestatus{luametatex manual}{defining lucidaot} \usebodyfont [lucidaot] \writestatus{luametatex manual}{defining pagella} \usebodyfont [pagella] \writestatus{luametatex manual}{defining cambria} \usebodyfont [cambria] \writestatus{luametatex manual}{defining modern} \usebodyfont [modern] diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-tex.tex b/doc/context/sources/general/manuals/luametatex/luametatex-tex.tex index 648ee5eef..28cf20840 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-tex.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-tex.tex @@ -173,11 +173,12 @@ but you {\it can\/} use \type {pairs} on \type {info}, of course. If you do not need the full list, you can also ask for a single item by using its name as an index into \type {status}. The current list is: +{\em This table is under reconstrction!} + \starttabulate[|l|p|] \DB key \BC explanation \NC \NR \TB \NC \type{banner} \NC terminal display banner \NC \NR -\NC \type{best_page_break} \NC the current best break (a node) \NC \NR \NC \type{buf_size} \NC current allocated size of the line buffer \NC \NR \NC \type{callbacks} \NC total number of executed callbacks so far \NC \NR \NC \type{cs_count} \NC number of control sequences \NC \NR diff --git a/doc/context/sources/general/manuals/luametatex/luametatex.tex b/doc/context/sources/general/manuals/luametatex/luametatex.tex index 6192c8e94..b58b108a2 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex.tex @@ -57,7 +57,7 @@ \startdocument [manual=LuaMeta\TeX, - % status=experimental, + %status=experimental, version=\cldcontext{status.luatex_verbose}] \component luametatex-titlepage diff --git a/doc/context/sources/general/manuals/mk/mk-memory.tex b/doc/context/sources/general/manuals/mk/mk-memory.tex index 4c16e613e..f8259c7db 100644 --- a/doc/context/sources/general/manuals/mk/mk-memory.tex +++ b/doc/context/sources/general/manuals/mk/mk-memory.tex @@ -131,10 +131,10 @@ If you want to run such tests yourself, you need to load a module at startup: The graphics can be generated with: \starttyping -\def\ShowUsage {optional filename} -\def\ShowNamedUsage {optional filename}{red graphic}{blue graphic} -\def\ShowMemoryUsage{optional filename} -\def\ShowNodeUsage {optional filename} +\ShowUsage {optional filename} +\ShowNamedUsage {optional filename}{red graphic}{blue graphic} +\ShowMemoryUsage{optional filename} +\ShowNodeUsage {optional filename} \stoptyping (This interface may change.) -- cgit v1.2.3