diff options
Diffstat (limited to 'doc')
35 files changed, 3922 insertions, 0 deletions
diff --git a/doc/context/documents/general/qrcs/setup-cs.pdf b/doc/context/documents/general/qrcs/setup-cs.pdf Binary files differindex 44eeccf0a..234e60862 100644 --- a/doc/context/documents/general/qrcs/setup-cs.pdf +++ b/doc/context/documents/general/qrcs/setup-cs.pdf diff --git a/doc/context/documents/general/qrcs/setup-de.pdf b/doc/context/documents/general/qrcs/setup-de.pdf Binary files differindex af03ab079..7338085d2 100644 --- a/doc/context/documents/general/qrcs/setup-de.pdf +++ b/doc/context/documents/general/qrcs/setup-de.pdf diff --git a/doc/context/documents/general/qrcs/setup-en.pdf b/doc/context/documents/general/qrcs/setup-en.pdf Binary files differindex 613e7b795..fa852c437 100644 --- a/doc/context/documents/general/qrcs/setup-en.pdf +++ b/doc/context/documents/general/qrcs/setup-en.pdf diff --git a/doc/context/documents/general/qrcs/setup-fr.pdf b/doc/context/documents/general/qrcs/setup-fr.pdf Binary files differindex fe19f0714..771a15749 100644 --- a/doc/context/documents/general/qrcs/setup-fr.pdf +++ b/doc/context/documents/general/qrcs/setup-fr.pdf diff --git a/doc/context/documents/general/qrcs/setup-it.pdf b/doc/context/documents/general/qrcs/setup-it.pdf Binary files differindex 19d6116b0..c7cfba0aa 100644 --- a/doc/context/documents/general/qrcs/setup-it.pdf +++ b/doc/context/documents/general/qrcs/setup-it.pdf diff --git a/doc/context/documents/general/qrcs/setup-mapping-cs.pdf b/doc/context/documents/general/qrcs/setup-mapping-cs.pdf Binary files differindex 41d8fcaa8..fa480a30c 100644 --- a/doc/context/documents/general/qrcs/setup-mapping-cs.pdf +++ b/doc/context/documents/general/qrcs/setup-mapping-cs.pdf diff --git a/doc/context/documents/general/qrcs/setup-mapping-de.pdf b/doc/context/documents/general/qrcs/setup-mapping-de.pdf Binary files differindex 504af5ad3..6265f0c38 100644 --- a/doc/context/documents/general/qrcs/setup-mapping-de.pdf +++ b/doc/context/documents/general/qrcs/setup-mapping-de.pdf diff --git a/doc/context/documents/general/qrcs/setup-mapping-en.pdf b/doc/context/documents/general/qrcs/setup-mapping-en.pdf Binary files differindex 57e964371..4f571a1b6 100644 --- a/doc/context/documents/general/qrcs/setup-mapping-en.pdf +++ b/doc/context/documents/general/qrcs/setup-mapping-en.pdf diff --git a/doc/context/documents/general/qrcs/setup-mapping-fr.pdf b/doc/context/documents/general/qrcs/setup-mapping-fr.pdf Binary files differindex 8b9c80e88..d0b205ef9 100644 --- a/doc/context/documents/general/qrcs/setup-mapping-fr.pdf +++ b/doc/context/documents/general/qrcs/setup-mapping-fr.pdf diff --git a/doc/context/documents/general/qrcs/setup-mapping-it.pdf b/doc/context/documents/general/qrcs/setup-mapping-it.pdf Binary files differindex c9d24184f..dca532c4d 100644 --- a/doc/context/documents/general/qrcs/setup-mapping-it.pdf +++ b/doc/context/documents/general/qrcs/setup-mapping-it.pdf diff --git a/doc/context/documents/general/qrcs/setup-mapping-nl.pdf b/doc/context/documents/general/qrcs/setup-mapping-nl.pdf Binary files differindex 7a464a5df..7b9004ebc 100644 --- a/doc/context/documents/general/qrcs/setup-mapping-nl.pdf +++ b/doc/context/documents/general/qrcs/setup-mapping-nl.pdf diff --git a/doc/context/documents/general/qrcs/setup-mapping-ro.pdf b/doc/context/documents/general/qrcs/setup-mapping-ro.pdf Binary files differindex 2c41c2576..b54a444d6 100644 --- a/doc/context/documents/general/qrcs/setup-mapping-ro.pdf +++ b/doc/context/documents/general/qrcs/setup-mapping-ro.pdf diff --git a/doc/context/documents/general/qrcs/setup-nl.pdf b/doc/context/documents/general/qrcs/setup-nl.pdf Binary files differindex 10704318e..9f3c9574c 100644 --- a/doc/context/documents/general/qrcs/setup-nl.pdf +++ b/doc/context/documents/general/qrcs/setup-nl.pdf diff --git a/doc/context/documents/general/qrcs/setup-ro.pdf b/doc/context/documents/general/qrcs/setup-ro.pdf Binary files differindex 0c29df23f..5e4a2c7f5 100644 --- a/doc/context/documents/general/qrcs/setup-ro.pdf +++ b/doc/context/documents/general/qrcs/setup-ro.pdf diff --git a/doc/context/sources/general/manuals/followingup/followingup-bitmaps.tex b/doc/context/sources/general/manuals/followingup/followingup-bitmaps.tex new file mode 100644 index 000000000..cf74c0cad --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-bitmaps.tex @@ -0,0 +1,189 @@ +% language=us + +\startcomponent followingup-bitmaps + +\environment followingup-style + +\startchapter[title={Bitmap images}] + +\startsection[title={Introduction}] + +In \TEX\ image inclusion is traditionally handled by specials. Think of a signal +added someplace in the page stream that says: + +\starttyping +\special{image: foo.png 2000 3000} +\stoptyping + +Here the number for instance indicate a scale factor to be divided by 1000. +Because \TEX\ has no floating point numbers, normally one uses an integer and the +magic multiplier 1000 representing 1.000. Such a special is called a \quote +{whatsit} and is one reason why \TEX\ is so flexible and adaptive. + +In \PDFTEX\ instead of a \type {\special} the command \type {\pdfximage} and its +companions are used. In \LUATEX\ this concept has been generalized to \type +{\useimageresource} which internally is not a so called whatsit (an extension +node) but a special kind of rule. This makes for nicer code as now we don't need +to check if a certain whatsit node is actually one with dimensions, while rules +already are part of calculating box dimensions, so no extra overhead in checking +for whatsits is added. In retrospect this was one of the more interesting +conceptual changes in \LUATEX. + +In \LUAMETATEX\ we don't have such primitives but we do have these special rule +nodes; we're talking of subtypes and the frontend doesn't look at those details. +Depending on what the backend needs one can easily define a scanner that +implements a primitive. We already did that in \CONTEXT. More important is that +inclusion is not handled by the engine simply because there is no backend. This +means that we need to do it ourselves. There are two steps involved in this that +we will discuss below. + +\stopsection + +\startsection[title={Identifying}] + +There is only a handful of image formats that makes sense in a typesetting +workflow. Because \PDF\ inclusion is supported (but not discussed here) one can +actually take any format as long as it converts to \PDF, and tools like graphic +magic do a decent job on that. \footnote {Although one really need to check a +converted image. When we moved to pplib, I found out that lots of converted +images in a project had invalid \PDF\ objects, but apart from a warning nothing +bad resulted from this because those objects were not used.} The main bitmap +formats that we care about are \JPEG, \JPEG2000, and \PNG. We could deal with +\JBIG\ files but I never encountered them so let's forget about them for now. + +One of the problems with a built|-|in analyzer (and embedder) is that it can +crash or just abort the engine. The main reason is that when the used libraries +run into some issue, the engine is not always able to recover from it: a +converter just aborts which then cleans up (potentially messed up) memory. In +\LUATEX\ we also abort, simply because we have no clue to what extend further on +the libraries are still working as expected. We play safe. For the average user +this is quite ok as it signals that an image has to be fixed. + +In a workflow that runs unattended on a server and where users push images to a +resource tree, there is a good change that a \TEX\ job fails because of some +problem with images. A crash is not really an option then. This is one reason why +converting bitmaps to \PDF\ makes much sense. Another reason is that some color +profiling might be involved. Runtime manipulations make no sense, unless there is +only one typesetting run. + +Because in \LMTX\ we do the analyzing ourselves \footnote {Actually, in \MKIV\ +this was also possible but not widely advertised, but we now exclusively keep +this for \LMTX.} we can recover much easier. The main reason is of course that +because we use \LUA, memory management and garbage collection happens pretty well +controlled. And crashing \LUA\ code can easily be intercepted by a \type {pcall}. + +Most (extensible) file formats are based on tables that gets accessed from an +index of names and offsets into the file. This means that filtering for instance +metadata like dimensions and resolutions is no big deal (we always did that). I +can extend analyzing when needed without a substantial change in the engine that +can affect other macro packages. And \LUA\ is fast enough (and often faster) for +such tasks. + +\stopsection + +\startsection[title={Embeding}] + +Once identified the frontend can use that information for scaling and (if needed) +reuse of the same image. Embedding of the image resource happens when a page is +shipped out. For \JPEG\ images this is actually quite simple: we only need to +create a dictionary with the right information and push the bitmap itself into +the associated stream. + +For \PNG\ images it's a bit different. Unfortunately \PDF\ only supports certain +formats, for instance masks are separated and transparency needs to be resolved. +This means that there are two routes: either pass the bitmap blob to the stream, +or convert it to a suitable format supported by \PDF. In \LUATEX\ that is +normally done by the backend code, which uses a library for this. It is a typical +example of a dependency of something much larger than actually needed. In +\LUATEX\ the original poppler library used for filtering objects from a \PDF\ +file as well as the \PNG\ library also have tons of code on board that relates to +manipulating (writing) data. But we don't need those features. As a side note: +this is something rather general. You decide to use a small library for a simple +task only to find out after a decade that it has grown a lot offering features +and having extra dependencies that you really don't want. Even worse: you end up +with constant updates due to fixed security (read: bug) fixes. + +Passing the \PNG\ blob unchanged in itself to the \PDF\ file is trivial, but +massaging it into an acceptable form when it doesn't suit the \PDF\ specification +takes a bit more code. In fact, \PDF\ does not really support \PNG\ as format, +but it supports \PNG\ compression (aka filters). + +Trying to support more complex \PNG\ files is a nice way to test if you can +transform a public specification into a program as for instance happens with +\PDF, \OPENTYPE, and font embedding in \CONTEXT. So this again was a nice +exercise in coding. After a while I was able to process the \PNG\ test suite +using \LUA. Optimizing the code came with understanding the specification. +However, for large images, especially interlaced ones, runtime was definitely not +to be ignored. It all depended on the tasks at hand: + +\startitemize + +\startitem + A \PNG\ blob is compressed with \ZIP\ compression, so first it needs to be + decompressed. This takes a bit of time (and in the process we found out that + the \type {zlib} library used in \LUATEX\ had a bug that surfaced when a + mostly zero byte image was uncompressed and we can then hit a filled up + buffer condition. +\stopitem + +\startitem + The resulting uncompressed stream is itself compressed with a so called + filter. Each row starts with a filter byte that indicates how to convert + bytes into other bytes. The most commonly used methods are deltas with + preceding pixels and/or pixels on a previous row. When done the filter bytes + can go away. +\stopitem + +\startitem + Sometimes an image uses 1, 2 or 4 bits per pixel, in which case the rows + needs to be expanded. This can involve a multiplication factor per pixel (it + can also be an index in a palette). +\stopitem + +\startitem + An image can be interlaced which means that there are seven parts of the + image that stepwise build up the whole. In professional workflows with high + res images interlacing makes no sense as transfer over the internet is not an + issue and the overhead due to reassembling the image and the potentially + larger file size (due to independent compression of the seven parts) are not + what we want either. +\stopitem + +\startitem + There can be an image mask that needs to be separated from the main blob. A + single byte gray scale image then has two bytes per pixel, and a double byte + pixel has four bytes of information. An \RGB\ image has three bytes per pixel + plus an alpha byte, and in the case of double byte pixels we get eight bytes + per pixel. +\stopitem + +\startitem + Finally the resulting blob has to be compressed again. The current amount of + time involved in that suggests that there is room for improvement. +\stopitem + +\stopitemize + +The process is controlled by number of rows and columns, the number of bytes per +pixel (one or two) and the color space which effectively means one or three +bytes. These numbers get fed into the filter, deinterlacer, expander and|/|or +mask separator. In order to speed up the embedding these basic operations can be +assisted by a helpers written in \CCODE. Because \LUA\ is quite good with +strings, we pass strings and get back strings. So, most of the logic stays at the +\LUA\ end. + +\stopsection + +\startsection[title=Conclusion] + +Going for a library|-|less solution for bitmap inclusion is quite doable and in +most cases as efficient. Because we have a pure \LUA\ implementation for testing +and an optimized variant for production, we can experiment as we like. A positive +side effect is that we can more robustly intercept bad images and inject a +placeholder instead. + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-cleanup.tex b/doc/context/sources/general/manuals/followingup/followingup-cleanup.tex new file mode 100644 index 000000000..7dcb3b3b1 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-cleanup.tex @@ -0,0 +1,332 @@ +% language=us + +% Youtube: TheLucs play with Jacob Collier // Don't stop til you get enough + +\startcomponent followingup-cleanup + +\environment followingup-style + +\logo [ALGOL] {Algol} +\logo [FORTRAN] {FORTRAN} +\logo [SPSS] {SPSS} +\logo [DEC] {DEC} +\logo [VAX] {VAX} +\logo [AMIGA] {Amiga} + +\startchapter[title={Cleanup}] + +\startsection[title={Introduction}] + +Original \TEX\ is a literate program, which means that code and documentation are +mixed. This mix, called a \WEB, is split into a source file and a \TEX\ file and +both parts are processed independently into a program (binary) and a typeset +document. The evolution of \TEX\ went through stages but in the end a \PASCAL\ +\WEB\ file was the result. This fact has lead to the more or less standard \WEBC\ +compilation infrastructure which is the basis for \TEXLIVE. + +% My programming experience started with programming a micro processor kit (using +% an 1802 processor), but at the university I went from \ALGOL\ to \PASCAL\ (okay, +% I also remember lots of \SPSS\ kind|-|of|-|\FORTRAN\ programming. The \PASCAL\ +% was the one provided on \DEC\ and \VAX\ machines and it was a bit beyond standard +% \PASCAL. Later I did quite some programming in \MODULA 2 in (for a while an +% \AMIGA) but mostly on personal computers. The reason that I mention this it that +% it still determines the way I look at programs. For instance that code goes +% through a couple if stepwise improvements (and that it can always be done +% better). That you need to keep an eye on memory consumption (can be a nice +% challenge). That a properly formatted source code is important (at least for me). +% +% When into \PASCAL, I ran into the \TEX\ series and as it looked familiar it ended +% up on my bookshelf. However, I could not really get an idea what it was about, +% simply because I had no access to the \TEX\ program. But the magic stayed with +% me. The fact that \LUA\ resembles \PASCAL, made it a good candidate for extending +% \TEX\ (there were other reasons as well). When decades later, after using \TEX\ +% in practice, I ended up looking at the source, it was the \LUATEX\ source. + +So, \TEX\ is a woven program and this is also true for the starting point of +\LUATEX: \PDFTEX. But, because we wanted to open up the internals, and because +\LUA\ is written in \CCODE, already in an early stage Taco decided to start from +the \CCODE\ translated from \PASCAL. A permanent conversion was achieved using +additional scripts and the original documentation stayed in the source. The one +large file was split into more logical smaller parts and combined with snippets +from \ALEPH . + +After we released version 1.0 I went through the documentation parts of the code +and normalized that a bit. The at that moment still sort of simple \WEB\ files +became regular \CCODE\ files, and the idea was (and is) that at some point it +should be possible to process the documentation (using \CONTEXT). + +Over time the \CCODE\ code evolved and functions ended up in places that at that +made most sense at that moment. After the previously described stripping process, +I decided to go through the files and see if a bit of reshuffling made sense, +mostly because that would make documenting easier. (I'm not literate enough to +turn it into a proper literate program.) It was also a good moment to get rid of +unused code (not that much) and unused macros (some more than expected). It also +made sense to change a few names (for instance to avoid potential future clashes +with \type {lua_} core functions). However, all this takes quite some careful +checking and compilation runs, so I expect that after this first cleanup, for +quite some time stepwise improvements can happen (especially in adding comments). +\footnote {This is and will be an ongoing effort. It probably doesn't show, but +getting the code base in the state it is in now, took quite some time. It +probably won't take away complaints and nagging but I've decided no longer to pay +attention to those on the sideline.} \footnote {In the end not much \PDFTEX\ and +\ALEPH\ code is present in \LUAMETATEX , but these were useful intermediate +steps. No matter how lean \LUAMETATEX\ becomes, I have a weak spot for \PDFTEX\ +as it always served us well and without it \TEX\ would be less present today.} + +One of the things that I keep in mind when doing this, is that we use \LUA. This +component compiles on most relevant platforms and as such we can assume that +\LUAMETATEX\ also should (and can be) made a bit less dependent on old mechanisms +that are used in stock \LUATEX. For instance, we don't come from \PASCAL\ any +longer but there are traces of that transition still present. We also don't use +specific operating system features, and those that we use are also used in \LUA. +And, as we try to share code we can also delegate some (more) to \LUA. For +instance file related code is not dependent on other components in the \TEX\ +infrastructure, but maybe at some point the runtime loadable \KPSE\ library can +kick in. So, basically the idea is to sort of go bare bone first and later see +how with the help of \LUA\ we can get bring some back. For the record: this is +not needed for \CONTEXT\ as it already has this interface to \TDS. \footnote +{This has been removed from my agenda.} + +\stopsection + +\startsection[title={Motivation}] + +The \LUATEX\ project started as an experiment of adding \LUA\ to \PDFTEX, which +was done by Hartmut and in order to avoid confusion we named it \LUATEX. When we +figured out that there this had possibilities we decided to go further and Taco +took the challenge to rework the code base. Part of that work was sponsored by +Idris' Oriental \TEX\ project. I have fond memory of the intensive and rapid +development cycles: online discussions, binaries going my directions, +experimental \CONTEXT\ code going the other way. When we had reached a sort of +stable state but at some point, read: usage in \CONTEXT\ had become crucial, a +steady further development started, where Taco redid \METAPOST\ into \MPLIB, +funded by user groups. At some point Luigi took over from Taco the task of +integration of components (also into \TEX Live), introduced \LUAJIT\ into the +binary, conducted the (again partially funded) swiglib project, followed by +support for \FFI. A while later I myself started messing around in the code base +directly and continued extending the engine and \LUA\ interfaces. + +I could work on this because I have quite some freedom at the place where I work. +We use (part of) \CONTEXT\ for some projects and especially in dealing with \XML\ +we could benefit from \LUATEX. It must be said that (long running) projects like +these never pay off (on the contrary, they cost a lot in terms of money and +energy) so it's quite safe to conclude that \LUATEX\ development is to a large +extend a (many man years) work of love for the subject. I guess that no sane +company will do (permit) such a thing. It is also for that reason that I keep +spending time on it, and as a simplification of the code base was always one of +my dreams, this is what I spend my time on now. After all, \LUATEX\ is just +juggling bytes and as it is written in \CCODE, and has no graphical user +interface or complex dependencies, it should be possible to have a relative +simple setup in terms of code files and compilation. Of course this is also made +possible by the fact that I can use \LUA. It's also why I decided to +\quotation {Just do it}, and then \quotation {Let's see where I end up}. No +matter how it turns out, it makes a good vehicle for further development and +years of fun. + +\stopsection + +\startsection[title={Files}] + +After a decade of adding and moving around code it's about time to reorganize the +code a bit, but we do so without deviating too much from the original setup. For +instance we started out with a small number of \LUA\ interface macros and these +were collected in a few files, and defined in one \type {h} file, but it made +sense to have header files alongside the libraries that implement helpers. This +is a rather tedious job but with music videos or video casts on a second screen +it is bearable. + +When I reached a state where we only needed the \LUATEX\ files plus the minimal +set of libraries I tried to get rid of directories in the source tree that were +placeholders, but with \type {automake} files, like those for \PDFTEX\ and +\XETEX. After a couple of attempts I gave up on that because the build setup is +rather hard coded for checking them. Also, there were some (puzzling) +dependencies in the configuring on \OMEGA\ files as well as some \DVI\ related +tools. So, that bit is for later to sort out. \footnote {Of course later the +decision was made to forget about using \type {autotools} and go for an as simple +as possible \type {cmake} solution.} + +\stopsection + +\startsection[title={Command line arguments}] + +As we need to set up a backend and deal with font loading in \LUA, we can as well +delegate some of the command line handling to \LUA\ as well. Therefore, only the +a limited set of options is dealt with: those that determine the startup and \LUA\ +behavior. In principle we can even get rid of all and always use a startup script +but for now it makes sense to not deviate too much from a regular \TEX\ run. + +At the time of this writing some code is still in place that is a candidate for +removal. For instance, using the \type {&} to define a format file has long be +replaced by \type {--fmt}. There are sentimental reasons for keeping it but at +the same time we need to realize that shells use these special characters too. A +for me unknown (or forgotten) feature of prefixing a jobname with a \type {*} +will be removed as it makes no sense. There is some \MSWINDOWS\ specific last +resort code that probably will go too, unless I can figure out why it is needed +in the first place. \footnote {Intercepting these symbols has been dropped in +favor of the command line flags.} + +Now left with a very simple set of command line options it also makes sense to +use a simple option analyzer, so that was a next step as it rid us of a +dependency and produces less code. + +So, the option parser has now been replaced by a simple variant that is more in +tune with what will happen when you deal with options in \LUA: no magic. One +problem is that \TEX's first input file is moved from the command line to the +input buffer and a an interactive session is emulated. As mentioned before, there +is some extra \type {&}, \type {*} and \type {\\} parsing involved. One can +wonder if this still makes sense in a situation where one has to specify a format +and \LUA\ file (using \type {--fmt} and \type {--ini}) so that might as well be +redone a bit some day. \footnote {In the end only these explicit command line +options were supported.} + +\stopsection + +\startsection[title={Platforms}] + +When going through the code I noticed conditional sections for long obsolete +platforms: \type {amiga}, \type {dos} and \type {djgpp}, \type {os/2}, \type +{aix}, \type {solaris}, etc. Also, with 64 bit becoming the standard, it makes +sense to assume that users will use a modern 64 platform (intel or arm combined +with \MSWINDOWS\ or some popular \UNIX\ variant). We don't need large and complex +code management for obscure platforms and architectures simply because we want to +proof that \LUAMETATEX\ runs everywhere. With respect to \MSWINDOWS\ we use a +cross compiler (\type {mingw}) as reference but native compilation should be no +big deal eventually. We can cross that bridge when we have a simplified +compilation set up. Right now it doesn't make sense to waste time on a native +\MICROSOFT\ compilation as it would also pollute the code with conditional +sections. We'll see what happens when I'm bored. \footnote {In the meantime no +effort is made to let the source compile otherwise than with the cross compiler. +Best is to keep the code as clean as possible with respect to conditional code +sections. So don't bother me with patches.} + +\stopsection + +\startsection[title={Stubs}] + +A \CONTEXT\ run is managed by \MTXRUN\ in combination with a specific script + +\starttyping +mtxrun --script context +\stoptyping + +On windows, we use a stub because using a \type {cmd} file create an indirectness +that is not seen as executable and therefore in other command files needs to +be called in a special way to guarantee continuation. So, there we have a small +binary: + +\starttyping +mtxrun.exe ... +\stoptyping + +that will call: + +\starttyping +luatex --luaonly mtxrun.lua ... +\stoptyping + +And when the stub has a different name than \type {mtxrun}, say: + +\starttyping +context.exe ... +\stoptyping + +it effectively becomes: + +\starttyping +luatex --luaonly mtxrun.lua --script context ... +\stoptyping + +Because the stripped down version assumes some kind of initializations anyway a +small extension made it possible to use \LUAMETATEX\ as stub too. So, when we +rename \type {luametatex.exe} to \type {mtxrun.exe} (on \UNIX\ we don't use a +suffix) it will start up as \LUA\ interpreter when it finds a script with the +name \type {mtxrun.lua} in the same path. When we rename it to \type +{context.exe} it will search for \type {context.lua} and all that that script has +to do is this: + +\starttyping +arg[0] = "mtxrun" + +table.insert(arg,1,"mtx-context") +table.insert(arg,1,"--script") + +dofile(os.selfpath .. "/" .. "mtxrun.lua") +\stoptyping + +So, it basically becomes a call to \type {mtxrun}, but we stay in \LUAMETATEX. +Because we want an isolated run this will launch \LUAMETATEX\ again with the +right command line arguments. This sounds inefficient but because we have a small +binary this is no real issue, and as that run is isolated, it cannot influence +the caller. The overhead is really small: on my somewhat older laptop it's .2 +seconds, but we had that management overhead already for decades, so no one +bothers about it. On all platforms using symbolic links works ok too. + +\stopsection + +\startsection[title={Global variables}] + +There are quite a bit global variables and function in the code base, but in the +process of opening up I got rid of some. The cleanup turned some more into +locals which saved executable bytes (keep in mind that we also use the engine as +\LUA\ interpreter so, the smaller, the more friendly). \footnote {Later the +global variables were collected in so called \CCODE\ structs.} This is work +in progress. + +\stopsection + +\startsection[title={Memory usage}] + +By going over all the code a couple of times, I was able to decrease the amount +of used memory a bit as well as avoid some memory allocations. This has no +consequences for performance but is nicer when multiple runs at the same time +(e.g.\ on virtual machines) have to compete for resources. \footnote {I will +probably have to spend some more time on this in order to reach a state that I'm +satisfied with.} + +\stopsection + +\startsection[title={\METAPOST}] + +The current code base doesn't have that many files. We can imagine that, when +\LUA\ can be compiled on a platform, that compiling \LUAMETATEX\ is also no that +complicated. However, the rather complex build infrastructure demonstrates the +opposite. One of the complications is that \MPLIB\ is codes in \CWEB\ and that +needs some juggling to get \CCODE. The process has quite some dependencies. There +are some upstream patches needed, but for now occasionally checking with the +upstream sources used for compiling \MPLIB\ in \LUATEX\ works okay. \footnote +{Later I decided to cleanup the \MPLIB\ code: unused font related code was +removed, the \POSTSCRIPT\ backend was untangled, the translation from \CWEB\ to +\CCODE\ got done by a \LUA\ script, aspects like error reporting and \IO\ were +redone, and in the end some new extensions were added. Some of that might trickle +back to th original, as long as it doesn't harm compatibility; after all +\METAPOST\ (the program) is standardized and considered functionally stable.} + +As \LUAMETATEX\ is also used for experiments we use a copy of the \LUA\ library +interface. That way we don't interfere with the stable \LUATEX\ situation. When +we play with extensions, we can always decide to backport them, once they are +found useful and in good working order. But, as that interface was just \CCODE\ +this was trivial. + +\stopsection + +\startsection[title={Files}] + +In a relative late stage I decided to cleanup some of the filename handling. +First I got rid of the \type {area}, \type {name} and \type {ext} decomposition +and optional recomposition. In the original engine that goes through the string +pool and although there is some recovery in the end, with many files and fonts +being used, the pool can get exhausted. For instance when you have hundreds of +thousands of \typ {\font \foo = bar} kind of definitions, each definition wipes +out the previous entry in the hash, but its font name is kept in the string pool. +I got rid of that side effect by reusing strings but in the end decided to avoid +the pool altogether. It was then a small step to also do that for other +filenames. In the process I also decided that it made no sense to keep the code +around that reads a filename from the console: we now just quit. Restarting the +program with a proper filename is no big deal today. I might do some more cleanup +there. In the end we can best use a callback for handling input from the console. + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-compilation.tex b/doc/context/sources/general/manuals/followingup/followingup-compilation.tex new file mode 100644 index 000000000..a0e67d4be --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-compilation.tex @@ -0,0 +1,84 @@ +% language=us + +\startcomponent followingup-compilation + +\environment followingup-style + +\startchapter[title={Compilation}] + +Compiling \LUATEX\ is possible because after all it's what I do on my machine. +The \LUATEX\ source tree is part of a larger infrastructure: \TEX Live. Managing +that one is work for specialists and the current build system is the work of +experts over a quite long period of time. When you only compile \LUATEX\ it goes +unnoticed that there are many dependencies, some of which are actually unrelated +to \LUATEX\ itself but are a side effect of the complexity of the build +structure. + +When going from \LUATEX\ to \LUAMETATEX\ many dependencies were removed and I +eventually ended up with a simpler setup. The source tree went down to less than +30 MB and zipped to around 4 MB. That makes it possible to consider adding the +code to the regular \CONTEXT\ distribution. + +One reason for doing that is that one keeps the current version of the engine +packaged with the current version of \CONTEXT. But a more important one is that +it fulfils a demand. Some time ago we were asked by some teachers participating +in a (basically free) math method for technical education what guarantees there +are that the tools used are available forever. Now, even with \LUAMETATEX\ one +has to set up a compiler but it is much easier than installing the whole \TEX +Live infrastructure for that. A third reason is that it gives me a comfortable +feeling that I myself can compile it anywhere as can \CONTEXT\ users who want to +do that. + +The source tree traditionally has libs in a separate directory (lua, luajit, zlib +and zziplib). However, it is more practical to have them alongside our normal +source. These are relative small collections of files that never change so there +is no reason not to do it. \footnote {If I ever decide to add more libraries, +only the minimal interfaces needed will be provided, but at this moment there are +no such plans.} + +Another assumption we're going to make is that we use 64 bit binaries. There is +no need to support obsolete platforms either. As a start we make sure it compiles +on the platforms used by \CONTEXT\ users. Basically we make a kind of utility. +For now I can compile the \WINDOWS\ 32 bit binaries that my colleague needs in +half a minute anyway, but in the long run we will settle for 64 bits. + +I spent about a week figuring out why the compilation is so complex (by +selectively removing components). At some point compilation on \OSX\ stopped +working. When the minimum was reached I decided to abandon the automake tool +chain and see if \type {cmake} could be used (after all, Mojca challenged that). +In retrospect I should have done that sooner because in a day I could get all +relevant platforms working. Flattening the source tree was a next step and so +there is no way back now. What baffled me (and Alan, who at some point joined in +testing \OSX) is the speed of compilation. My pretty old laptop needed about half +a minute to get the job done and even on a raspberry pi with only a flash card +just a few minutes were needed. At that point, as we could remove more make +related files, the compressed 11 MB archive (\type {tar.xz}) shrunk to just over +2~MB. Interesting is that compiling \MPLIB\ takes most time, and when one compiles +in parallel (on more cores) that one finishes last. + +For the record: I do all this on a laptop running \MSWINDOWS\ 10 using the Linux +subsystem. When that came around, Luigi made me a working setup for cross +compilation but in the meantime with GCC 8.2 all works out of the box. I edit the +files at the \MSWINDOWS\ end (using \SCITE), compile at the \LINUX\ end, and test +everything on \MSWINDOWS. It is a pretty convenient setup. + +When compilation got faster it became also more convenient to do some more code +reshuffling. This time I decided to pack the global variables into structures, +more or less organized the way the header files were organized. It gives a bit +more verbosity but also has the side effects that (at least in principle) the +\CPU\ cache can perform better because neighboring variables are often cached as +part of the deal. Now it might be imagination, but in the process I did notice +that mid March processing the manual went down to below 11.7 seconds while before +it stayed around 12.1 seconds. Of course this is not that relevant currently, but +I might make a difference on less capable processors (as in a low power setup). +It anyway didn't hurt. + +In the meantime some of the constants used in the program got prefixes or +suffixes to make them more unique and for instance the use of \type {normal} as +equivalent for zero was made a bit more distinctive as we now have more subtypes. +That is: all the subtypes were collected in enumerations instead of \CCODE\ +defines. Back to the basics. + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-contents.tex b/doc/context/sources/general/manuals/followingup/followingup-contents.tex new file mode 100644 index 000000000..bd6af6d81 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-contents.tex @@ -0,0 +1,11 @@ +\startcomponent followingup-titlepage + +\environment followingup-style + +\starttitle[title={Table of contents}] + + \placelist[chapter] + +\stoptitle + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-directions.tex b/doc/context/sources/general/manuals/followingup/followingup-directions.tex new file mode 100644 index 000000000..244baff90 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-directions.tex @@ -0,0 +1,778 @@ +% language=us + +\startcomponent followingup-directions + +\environment followingup-style + +\startchapter[title={Directions}] + +\startsection[title={Introduction}] + +In \LUATEX\ the directional model taken from \OMEGA\ has been upgraded a bit. For +instance in addition to the \type {\*dir} commands we have \type {\*direction} +commands that take a number instead of a keyword. This is a bit more efficient and +consistent as using these keywords was kind of un|-|\TEX. Internally direction +related nodes (text directions) are not whatsits but first class nodes. We also +use a subtype that indicates the push or pop state. + +The \LUATEX\ directional model provides four directions which is a subset of the +many that \OMEGA\ provided, indicated by three letters, like \type {TRT} and +\type {LTT}. In the beginning we had them all fixed\footnote {This was doen by +Hartmut by rigorously checking all possible combinations} and thereby implemented +but being in doubt about their usefulness we dropped most of them, just four were +kept. However, in practice only right|-|to|-|left makes sense. Going from top to +bottom in Japanese or Mongolian can also involve glyph rotation, which actually +is not implemented in the engine at all. Spacing and inter|-|character breaks +have to be implemented and in the end one has to combine the results into a +page body. So, in practice you end up with juggling node list and macro magic in +the page builder. The \type {LTL} (number~2) and \type {RTT} (number~3) +directions are not used for serious work. Therefore, in \LUAMETATEX\ the model +has been adapted. In the end, it was not entirely clear anyway what the three +letters were indicating in each direction property (page, body, par, text, math) +as most had no real meaning. + +As a side note: if you leave the (not really working well) vertical directions +out of the picture, directional typesetting is not that hard to deal with and has +hardly any consequences for the code. This is because horizontal dimensions are +not affected by direction, only the final ship out is: when a run (wrapped in an +hbox) goes the other way, the backend effectively has to skip the width and then +with each component goes back. Not much more is involved. This means that a +bidirectional engine is rather simple. The complications are more in the way a +macro package deals with it, in relation to the input as well as the layout. The +backend has to do the real work. \footnote {Of course when one hooks in \LUA\ +code taking care of direction can be needed!} + +\stopsection + +\startsection[title=Two directions] + +We now have only two directions left: the default left|-|to|-|right (l2r) and +right|-|to|-|left (r2l). They work the same as before and in the backend we can +get rid of the fuzzy parallel and rotation (which actually was just stacking +nodes) heuristics. + +Reducing the lot to two directions simplifies some code in the engine. This is +because when calculating dimensions a change in horizontal direction doesn't +influence the width, height and depth in an orthogonal way. Because there are no +longer top|-|down items we don't need to swap the height and or depth with the +width. This also means that we don't need to keep much track of direction +changes. Technically an hpack doesn't need to know its own direction and we can +set it to any value afterwards if we want because the calculation are not +influenced by it; so that also simplified matters. + +The \type {\bodydir} and \type {\pagedir} already didn't make much sense, and in +\CONTEXT\ we actually intercepted them, so now they are removed. The body +direction is always left|-|to|-|right and the page direction was only consulted +in the backend code which we no longer have. Another side effect of going with +only two directions is that rules no longer need to carry the direction property: +there is no flipping of width with height and depth needed. + +\stopsection + +\startsection[title=Four orientations] + +Instead of the top|-|bottom variants we now have four orientations plus a bunch +of anchoring options. Of course one could use the backend save, restore and +matrix whatsits but a natural feature makes more sense. Let's start with what +happens normally: + +\startbuffer[1] +This is a \LUAMETATEX\ goodie. +\stopbuffer + +\startbuffer[2] +\hbox orientation 2{This is a \LUAMETATEX\ goodie.} +\stopbuffer + +\startbuffer[3] +This is a \hbox orientation 2{\LUAMETATEX} goodie. +\stopbuffer + +\startbuffer[4] +\hbox orientation 2{This is a \hbox orientation 002{\LUAMETATEX} goodie.} +\stopbuffer + +\blank{\showstruts\strut}\quad\ruledhbox{\inlinebuffer[1]}\quad{\showstruts\strut}\blank + +This line has height and depth. We can rotate this sentence by 180 degrees around +the baseline in which case the depth and height are flipped. + +\blank{\showstruts\strut}\quad\ruledhbox{\inlinebuffer[2]}\quad{\showstruts\strut}\blank + +or we flip part: + +\blank{\showstruts\strut}\quad\ruledhbox{\inlinebuffer[3]}\quad{\showstruts\strut}\blank + +or flip nested: + +\blank{\showstruts\strut}\quad\ruledhbox{\inlinebuffer[4]}\quad{\showstruts\strut}\blank + +but we're talking boxes, so the above examples are defined as: + +\typebuffer[1,2,3,4] + +The \type {orientation} keyword does the magic here. There are four such +orientations with zero being the default. We saw that two rotates over 180 +degrees, so one and three are left for up and down. + +\startbuffer[5] +\hbox orientation 0 {\TEX} and +\hbox orientation 1 {\TEX} and +\hbox orientation 2 {\TEX} and +\hbox orientation 3 {\TEX} +\stopbuffer + +\blank{\showstruts\strut}\quad\ruledhbox{\inlinebuffer[5]}\quad{\showstruts\strut}\blank + +This is codes as: + +\typebuffer[5] + +The landscape and seascape variants both sit on top of the baseline while the +flipped variant has its depth swapped with the height. Although this would be +enough a bit more control is possible. The number is actually a three byte hex +number: + +\starttyping +0x<X><Y><O> +\stoptyping + +or in \TEX\ syntax + +\starttyping +"<X><Y><O> +\stoptyping + +We saw that the last byte regulates the orientation. The first and second one +deal with anchoring horizontally and vertically. The vertical options of the +horizontal variants anchor on the baseline, lower corner, upper corner or center. + +\startbuffer[6] +\hbox orientation "002 {\TEX} and +\hbox orientation "012 {\TEX} and +\hbox orientation "022 {\TEX} and +\hbox orientation "032 {\TEX} +\stopbuffer + +\typebuffer[6] + +\blank{\showstruts\strut}\quad\ruledhbox{\inlinebuffer[6]}\quad{\showstruts\strut}\blank + +\startbuffer[7] +\hbox orientation "002 {\TEX} and +\hbox orientation "102 {\TEX} and +\hbox orientation "202 {\TEX} and +\hbox orientation "302 {\TEX} and +\hbox orientation "402 {\TEX} +\stopbuffer + +The horizontal options of the horizontal variants anchor in the center, left, right, +halfway left and halfway right. + +\typebuffer[7] + +\blank{\showstruts\strut}\quad\ruledhbox{\inlinebuffer[7]}\quad{\showstruts\strut}\blank + +All combinations will be shown on the next pages, so we suffice with telling that +for the vertical variants we can vertically anchor on the baseline, top, bottom +or center, while horizontally we center, hang left or right, halfway left or +right, and in addition align on the (rotated) baseline left or right. + +The orientation has consequences for the dimensions so they are dealt with in the +expected way in constructing lines, paragraphs and pages, but the anchoring is +virtual. As a bonus, we have two extra variants for orientation zero: on top of +baseline or below, with dimensions taken into account. + +\startbuffer[8] +\hbox orientation "000 {\TEX} and +\hbox orientation "004 {\TEX} and +\hbox orientation "005 {\TEX} +\stopbuffer + +\typebuffer[8] + +\blank{\showstruts\strut}\quad\ruledhbox{\inlinebuffer[8]}\quad{\showstruts\strut}\blank + +\definecolor[brcolorh][r=1,t=.5,a=1] +\definecolor[brcolord][b=1,t=.5,a=1] +\definecolor[brcolorm][g=1,t=.5,a=1] + +\starttexdefinition ShowAnchor + \blackrule[width=2pt,height=1pt,depth=1pt,color=darkgray] +\stoptexdefinition + +\starttexdefinition DemoRule#1#2#3 + \ShowAnchor + \ruledhbox { + \hbox orientation "#1#2#3 { + \blackrule[height=6mm,depth=0mm,width=8mm,color=brcolorh]\kern-8mm\relax + \blackrule[height=0mm,depth=3mm,width=8mm,color=brcolord]\kern-8mm\relax + \blackrule[height=2mm,depth=-1mm,width=8mm,color=brcolorm] + } + } + \ShowAnchor +\stoptexdefinition + +\starttexdefinition DemoText#1#2#3 + \ShowAnchor + \ruledhbox{\hbox orientation "#1#2#3 {\red\TEX}} + \ShowAnchor +\stoptexdefinition + +\starttexdefinition DemoSet #1#2 + \startcombination[nx=4,ny=7,width=10cm] + {#200#1}{\ttxx 0x00#1} {#201#1}{\ttxx 0x01#1} {#202#1}{\ttxx 0x02#1} {#203#1}{\ttxx 0x03#1} + {#210#1}{\ttxx 0x10#1} {#211#1}{\ttxx 0x11#1} {#212#1}{\ttxx 0x12#1} {#213#1}{\ttxx 0x13#1} + {#220#1}{\ttxx 0x20#1} {#221#1}{\ttxx 0x21#1} {#222#1}{\ttxx 0x22#1} {#223#1}{\ttxx 0x23#1} + {#230#1}{\ttxx 0x30#1} {#231#1}{\ttxx 0x31#1} {#232#1}{\ttxx 0x32#1} {#233#1}{\ttxx 0x33#1} + {#240#1}{\ttxx 0x40#1} {#241#1}{\ttxx 0x41#1} {#242#1}{\ttxx 0x42#1} {#243#1}{\ttxx 0x43#1} + {#250#1}{\ttxx 0x50#1} {#251#1}{\ttxx 0x51#1} {#252#1}{\ttxx 0x52#1} {#253#1}{\ttxx 0x53#1} + {#260#1}{\ttxx 0x60#1} {#261#1}{\ttxx 0x61#1} {#262#1}{\ttxx 0x62#1} {#263#1}{\ttxx 0x63#1} + \stopcombination +\stoptexdefinition + +\startplacefigure[title={orientation 0}]\scale[width=\textwidth]{\framed[offset=1cm]{\DemoSet0\DemoRule}}\stopplacefigure +\startplacefigure[title={orientation 1}]\scale[width=\textwidth]{\framed[offset=1cm]{\DemoSet1\DemoRule}}\stopplacefigure +\startplacefigure[title={orientation 2}]\scale[width=\textwidth]{\framed[offset=1cm]{\DemoSet2\DemoRule}}\stopplacefigure +\startplacefigure[title={orientation 3}]\scale[width=\textwidth]{\framed[offset=1cm]{\DemoSet3\DemoRule}}\stopplacefigure + +\startplacefigure[title={orientation 0}]\scale[width=\textwidth]{\framed[offset=1cm]{\DemoSet0\DemoText}}\stopplacefigure +\startplacefigure[title={orientation 1}]\scale[width=\textwidth]{\framed[offset=1cm]{\DemoSet1\DemoText}}\stopplacefigure +\startplacefigure[title={orientation 2}]\scale[width=\textwidth]{\framed[offset=1cm]{\DemoSet2\DemoText}}\stopplacefigure +\startplacefigure[title={orientation 3}]\scale[width=\textwidth]{\framed[offset=1cm]{\DemoSet3\DemoText}}\stopplacefigure + +% \page + +The anchoring can look somewhat confusing but you need to keep in mind that it is +normally only used in very controlled circumstances and not in running text. +Wrapped in macros users don't see the details. We're talking boxes here, so or +instance: + +\startbuffer +test\quad +\hbox orientation 3 \bgroup + \strut test\hbox orientation "002 \bgroup\strut test\egroup test% +\egroup \quad +\hbox orientation 3 \bgroup + \strut test\hbox orientation "002 \bgroup\strut test\egroup test% +\egroup \quad +\hbox orientation 3 \bgroup + \strut test\hbox orientation "012 \bgroup\strut test\egroup test% +\egroup \quad +\hbox orientation 3 \bgroup + \strut test\hbox orientation "022 \bgroup\strut test\egroup test% +\egroup \quad +\hbox orientation 3 \bgroup + \strut test\hbox orientation "032 \bgroup\strut test\egroup test% +\egroup \quad +\hbox orientation 3 \bgroup + \strut test\hbox orientation "042 \bgroup\strut test\egroup test% +\egroup +\quad test +\stopbuffer + +\typebuffer + +gives: + +\startlinecorrection[blank] +\ruledhbox\bgroup \showcolorstruts \showboxes \inlinebuffer \egroup +\stoplinecorrection + +\stopsection + +\startsection[title={Right|-|to|-|left typesetting}] + +Another aspect to keep in mind when we transform is the already mentioned +right|-|to|-|left direction. We show some examples where we do things like this: + +\starttyping +\hbox{\hbox + orientation #1 + {\strut abcd}} +\hbox{\hbox + orientation #1 + to 15mm + {\strut abcd}} +\hbox{\hbox + orientation #1 + direction 1 + {\righttoleft\strut abcd}} +\hbox{\hbox + orientation #1 + direction 1 + to 15mm {\righttoleft\strut abcd}} +\stoptyping + +\starttexdefinition TestH #1 + \dontcomplain + \setbox\scratchbox\hbox{abcd}% + x\ruledhbox{\hbox orientation #1 to \wd\scratchbox {\strut abcd}}x\quad + x\ruledhbox{\hbox orientation #1 to 15mm {\strut abcd}}x\quad + x\ruledhbox{\hbox orientation #1 direction 1 to \wd\scratchbox {\righttoleft\strut abcd}}x\quad + x\ruledhbox{\hbox orientation #1 direction 1 to 15mm {\righttoleft\strut abcd}}x% +\stoptexdefinition + +\starttexdefinition TestV #1 + \dontcomplain + \setbox\scratchbox\hbox{abcd}% + x\ruledvbox{\vbox orientation #1 {\hsize \wd\scratchbox\strut abcd}}x\quad + x\ruledvbox{\vbox orientation #1 {\hsize 15mm\strut abcd}}x\quad + x\ruledvbox{\vbox orientation #1 {\righttoleft\hsize \wd\scratchbox\strut abcd}}x\quad + x\ruledvbox{\vbox orientation #1 {\righttoleft\hsize 15mm\strut abcd}}x% +\stoptexdefinition + +\startplacefigure[title={Horizontal boxes.}] + \startcombination[nx=2,ny=2] + {\TestH 0} {orientation 0} + {\TestH 2} {orientation 2} + {\TestH 1} {orientation 1} + {\TestH 3} {orientation 3} + \stopcombination +\stopplacefigure + +\startplacefigure[title={Vertical boxes.}] + \startcombination[nx=2,ny=2] + {\TestV 0} {orientation 0} + {\TestV 2} {orientation 2} + {\TestV 1} {orientation 1} + {\TestV 3} {orientation 3} + \stopcombination +\stopplacefigure + +\stopsection + +\startsection[title={Vertical typesetting}] + +I'm no expert on vertical typesetting and have no application for it either. But +from what I've seen vertically positioned glyphs are normally used in rather +straightforward situations. Here I will just give some examples of how +transformations can be used to achieve certain effects. It is no big deal to make +macros or use \LUA\ to apply magic to node lists but it is beyond this description +to discuss that. + +Before we fine tune this example we have to discuss another feature. When a \typ +{orientation} keyword is given optionally \type {xoffset} and \type {yoffset} can +be specified. These offsets are {\em not} taken into account when calculating +dimensions. This is different from the offsets (at the \LUA\ end) used in glyphs +because there the vertical offset is taken into account. Here are some examples +of offsets in packaged lists: + +\startbuffer +\hbox + {test 1} +\hbox + orientation 0 + yoffset 15pt + xoffset 150pt + {test} +\vbox + orientation 0 + {\hbox{test}} +\vbox + orientation 0 + yoffset -5pt + xoffset 130pt + {\hbox{test}} +\vbox + orientation 0 + yoffset 2pt + {\hbox{test}} +\stopbuffer + +\typebuffer + +\startlinecorrection[blank] + \start \showboxes \bfd \getbuffer \stop +\stoplinecorrection + +In order to demonstrate some hacking, we first define a font that supports +chinese glyphs: + +\startbuffer +\definefont[NotoCJK][NotoSansCJKtc-Regular*default @ 24pt] +\stopbuffer + +\typebuffer \getbuffer + +We put some text in a horizontal box; it doesn't show up in verbatim but you +get the idea nevertheless: + +\startbuffer +\hbox{\NotoCJK 通用规范汉å—表} +\stopbuffer + +\typebuffer + +\startlinecorrection[blank] + \start \showboxes \getbuffer \stop +\stoplinecorrection + +Let's now rotate this line of text: + +\startbuffer[1] +\hbox orientation 1 {\NotoCJK 通用规范汉å—表} +\stopbuffer + +\typebuffer[1] + +The result is shown in a while. Because we also need to rotate the glyphs we +deconstruct the box. + +\startbuffer[2] +\hbox orientation 1 \bgroup \NotoCJK % + \vbox {\hbox {通}}% + \vbox {\hbox {用}}% + \vbox {\hbox {规}}% + \vbox {\hbox {test}}% + \vbox {\hbox {范}}% + \vbox {\hbox {汉}}% + \vbox {\hbox {å—}}% + \vbox {\hbox {表}}% +\egroup +\stopbuffer + +\typebuffer[2] + +Next we rotate the glyphs. + +\startbuffer[3] +\hbox orientation 1 \bgroup \NotoCJK % + \vbox orientation 3 {\hbox {通}}% + \vbox orientation 3 {\hbox {用}}% + \vbox orientation 3 {\hbox {规}}% + \vbox orientation 0 {\hbox {test}}% + \vbox orientation 3 {\hbox {范}}% + \vbox orientation 3 {\hbox {汉}}% + \vbox orientation 3 {\hbox {å—}}% + \vbox orientation 3 {\hbox {表}}% +\egroup +\stopbuffer + +\typebuffer[3] + +This still looks bad so we kick in some offsets and glue: + +\startbuffer[4] +\dontleavehmode\hbox orientation 1 \bgroup \NotoCJK + \vbox + orientation 0 yoffset -.1ex + {\hbox orientation 3 {通}}\hskip.2ex + \vbox + orientation 0 yoffset -.1ex + {\hbox orientation 3 {用}}\hskip.2ex + \vbox + orientation 0 yoffset -.1ex + {\hbox orientation 3 {规}}\hskip.6ex + \vbox + {\hbox {test}}\hskip.2ex + \vbox + orientation 0 yoffset -.1ex + {\hbox orientation 3 {范}}\hskip.2ex + \vbox + orientation 0 yoffset -.1ex + {\hbox orientation 3 {汉}}\hskip.2ex + \vbox + orientation 0 yoffset -.1ex + {\hbox orientation 3 {å—}}\hskip.2ex + \vbox + orientation 0 yoffset -.1ex + {\hbox orientation 3 {表}}\hskip.2ex +\egroup +\stopbuffer + +\typebuffer[4] + +Now we're ready to compare the results + +\startlinecorrection[blank] + \startcombination[9*1] + {\showboxes \getbuffer[1]} {1} + {\showboxes \getbuffer[2]} {2} + {\showboxes \getbuffer[3]} {3} + {\showboxes \getbuffer[4]} {4} + {\quad}{} + {\getbuffer[1]} {1} + {\getbuffer[2]} {2} + {\getbuffer[3]} {3} + {\getbuffer[4]} {4} + \stopcombination +\stoplinecorrection + +This could of course also be done with traditional kerns, raising and|/|or +lowering and messing around with dimensions. It's just that when manipulating +such rather complex constructs a little help (and efficiency) makes a difference, +also at the \LUA\ end. Of course one can argue the result but all is +programmable in the end. + +\stopsection + +\startsection[title={Considerations}] + +Just in case you wonder if using these offsets is better than using normal kerning +and shifting, in practice offsets are not more efficient. Let's compare the +alternatives. We go from most to least efficient. + +\starttyping +\setbox\scratchbox\hpack{} +\boxxoffset\scratchbox\scratchdimen +\boxyoffset\scratchbox\scratchdimen +\stoptyping + +This sets the offsets and by setting them we also trigger the transform. Scanning +is fast and so is setting them. One million times expanding this takes (as +reference) 0.73 seconds on my current machine. + +\starttyping +\setbox\scratchbox\hpack + orientation \zerocount + xoffset \scratchdimen + yoffset \scratchdimen + {} +\stoptyping + +This takes a bit more time, 1.11 seconds, because the keywords have to be scanned +which happens on a token by token base. + +\starttyping +\setbox\scratchbox\hpack{} +\scratchheight\ht\scratchbox +\scratchdepth\dp\scratchbox +\setbox\scratchbox\hpack + {\kern\scratchdimen + \raise\scratchdimen\box\scratchbox + \kern\scratchdimen} +\ht\scratchbox\scratchheight +\dp\scratchbox\scratchdepth +\stoptyping + +Now we're up to 1.69 seconds for the million expansions. Not only do we have some +parsing going on, but we also have assignments and extra packing, which means +calculations taking place. + +\starttyping +\setbox\scratchbox\hpack{} +\scratchwidth\wd\scratchbox +\scratchheight\ht\scratchbox +\scratchdepth\dp\scratchbox +\setbox\scratchbox\hpack + {\kern\scratchdimen + \raise\scratchdimen\box\scratchbox} +\wd\scratchbox\scratchwidth +\ht\scratchbox\scratchheight +\dp\scratchbox\scratchdepth +\stoptyping + +This variant is about as fast, as I measured 1.72 seconds. So, compared to the +0.73 seconds for the first variant, is this better? Does it help when we look at +our existing macros and adapt them? + +Normally we don't have an empty box and normally we use \type {\hbox} because we +want the content to be processed. And a million times building a list and +processing content (which means runs over the list) will make the differences +in timing become noise. Add to that garbage collection (in \LUA) and memory +management (in \TEX) and it even becomes unpredictable. Seeing differences of +a factor two in such timings is no exception. + +Another aspect is the parsing. When these commands are wrapped in macros we're +talking expanding tokens which is pretty fast. When it comes from the input file +a conversion to tokens has to happen too. And we will never see millions of such +sequences in a source file. + +The backend also plays a role. Handling a kern or shift is more efficient than +analyzing transforms (and offsets) especially in a \LUA\ variant. But on the +other hand, we don't have an extra wrapping in a box so that actually saves work. + +So, before a \CONTEXT\ user thinks \quotation {Let's update macros and change +policy.}, just consider staying with proven good old \TEX\ approaches. These +features are mostly meant for efficient low level manipulations as discussed in +relation to for instance handling scripts. In the rather large \CONTEXT\ code +base there are really only a few places where it will make code look nicer, but +there I don't expect an impact on performance. + +\stopsection + +\startsection[title={Integration}] + +How these mechanisms are used depends on ones needs and the macro package used. +It makes no sense to cook up generic solutions because integration in a macro +package is too different. But anyhow we'll give an example of some (definitely +non optimized) \LUA\ magic. + +\startbuffer +\startluacode +local glyph_id = node.id("glyph") +local fontdata = fonts.hashes.identifiers -- assumes generic font loader + +local function is_vertical(c) + -- more ranges matter but this will do here + return c >= 0x04E00 and c <= 0x09FFF +end + +function document.go_vertical(boxnumber) + local box = tex.getbox(boxnumber) + local n = box.list + while n do + if n.id == glyph_id and is_vertical(n.char) then + local o = .2 * fontdata[n.font].parameters.xheight + local prev, next = n.prev, n.next + n.next, n.prev = nil, nil + local l = nodes.new("hlist") + l.list = n + local w, h, d = n.width, n.height, n.depth + if prev then + prev.next, l.prev = l, prev + else + box.list = l + end + if next then + l.next, next.prev = next, l + end + l.width, l.height, l.depth = h + d + o, w, 0 + l.orientation = 0x003 + l.xoffset, l.yoffset = o/2, -o/2 + l.hoffset, l.doffset = h, d - o + n = next + else + n = n.next + end + end +end +\stopluacode +\stopbuffer + +\typebuffer \getbuffer + +We will use some other magic that we won't discuss here which relates to handling +scripts. For Hangul one needs to inject breakpoints and if needed also glue +between characters. The script environment does this. We also need to bump the +interline spacing. First we define a regular text helper and an auxiliary box. + +\startbuffer[1] +\unexpanded\def\stripe#1% + {\hbox orientation 0 yoffset .2\exheight{\strut #1}} + +\newbox\MyVerticalBox +\stopbuffer + +\typebuffer[1] + +Next we fill that box with some mix of text (I have no clue what, as I just +copied it from some web page). + +\startbuffer[2a] +\setbox\MyVerticalBox\hbox \bgroup + \NotoCJK + \startscript[hangul]% + \dorecurse{20}{通用规范汉å—表 \stripe{test #1} }% + \unskip % remove last space + \stopscript +\egroup +\stopbuffer + +\typebuffer[2a] + +We then apply the \LUA\ magic to the result: + +\startbuffer[3a] +\ctxlua{document.go_vertical(\number\MyVerticalBox)} +\stopbuffer + +\typebuffer[3a] + +and finally assemble the result: + +\startbuffer[4a] +\ruledvbox orientation 1 to \textwidth \bgroup + \setupinterlinespace[40pt] + \hsize .95\textheight + \unhbox\MyVerticalBox + \vfill +\egroup +\stopbuffer + +\typebuffer[4a] + +The result is shown in \in {figure} [fig:verticalmagic-1]. Of course this +approach is not that user friendly but it just serves as example. In \CONTEXT\ we +can follow a different route. First we define a new font feature. It is probably +clear that we need some code elsewhere that does something useful with this +information, but I will nos show this as it is rather \CONTEXT\ dependent. + +\startbuffer[2b] +\definefontfeature + [vertical] + [vertical={% + orientation=3,% + down=.1,% + right=.1,% + ranges={% + cjkcompatibility,% + cjkcompatibilityforms,% + cjkcompatibilityideographs,% + cjkcompatibilityideographssupplement,% + cjkradicalssupplement,% + % cjkstrokes,% + cjksymbolsandpunctuation,% + cjkunifiedideographs,% + cjkunifiedideographsextensiona,% + cjkunifiedideographsextensionb,% + cjkunifiedideographsextensionc,% + cjkunifiedideographsextensiond,% + cjkunifiedideographsextensione,% + cjkunifiedideographsextensionf,% + }% + }] +\stopbuffer + +\typebuffer[2b] + +We apply this feature to a font: + +\startbuffer[3b] +\definefont + [NotoCJKvertical] + [NotoSansCJKtc-Regular*default,vertical @ 24pt] +\stopbuffer + +\typebuffer[3b] + +\startbuffer[4b] +\setbox\MyVerticalBox\hbox\bgroup + \NotoCJKvertical + \startscript[hangul]% + \dorecurse{20}{通用规范汉å—表 \stripe{test #1} }% + \unskip + \stopscript +\egroup +\stopbuffer + +\typebuffer[4b] + +\startbuffer[5b] +\ruledvbox orientation 1 to \textwidth \bgroup + \setupinterlinespace[40pt] + \hsize .95\textheight + \unhbox\MyVerticalBox + \vfill +\egroup +\stopbuffer + +\typebuffer[5b] + +The result is shown in \in {figure} [fig:verticalmagic-2]. Again this approach is +not that user friendly but it already is a bit easier. + +\startplacefigure[reference=fig:verticalmagic-1,title={Some vertical magic using manipulations.}] + \getbuffer[1,2a,3a,4a] +\stopplacefigure + +\startplacefigure[reference=fig:verticalmagic-2,title={Some vertical magic using fonts.}] + \getbuffer[1,2b,3b,4b,5b] +\stopplacefigure + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-evolution.tex b/doc/context/sources/general/manuals/followingup/followingup-evolution.tex new file mode 100644 index 000000000..730f4cc1b --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-evolution.tex @@ -0,0 +1,373 @@ +% language=us + +\startcomponent followingup-evolution + +\environment followingup-style + +% Yes, music is still evolving in qualitive ways ... +% +% Home Is - Jacob Collier with VOCES8 +% +% and as long as there's interesting new music to run into I keep +% doing thse kind of things. + +\startchapter[title={Evolution}] + +\startsection[title={Introduction}] + +The original idea behind \TEX\ is that of a relatively small kernel with (either +or not system dependent) extensions. One such extension is the \DVI\ backend, and +later \PDFTEX\ added a \PDF\ backend. Other extensions are \quote {writing to +files} and \quote {writing to the output medium} using so called specials. This +extension mechanism permits \TEX\ to support, for instance, color and image +inclusion. + +The \LUATEX\ project started from \PDFTEX, including its extensions like font +expansion, and combined that with (bi|)|directional typesetting from the, at that +moment, stable \OMEGA\ variant \ALEPH. During the more than a decade development +we integrated expansion in a more efficient way and limited directions to the +four that made sense. The assumption that \UNICODE\ has the future lead to \UTF8 +being used all over the place. + +The \LUATEX\ variant opens up the internals using the \LUA\ extension language. +The idea was (and still is) that instead if adding more and more hard coded +solutions, one can use \LUA\ to do it on demand. So, for instance \OPENTYPE\ +fonts are supported by providing a font file reader but the implementation of +features is up to \LUA. From \PDFTEX\ the graphic inclusions were inherited but +an image and \PDF\ reading library provided a few more possibilities, for +instance for querying properties. An important integral part of \LUATEX\ is the +\METAPOST\ library, but apart from that one, the amount of libraries is kept at a +minimum. That way we're free of dependencies and compilation hassles. + +With version 1.0 the functionality became official and with version 1.1 the +functionality became more of less frozen. The main reason for this is that +further extensions would violate the principle of using \LUA\ instead of hard +coding solutions. Another reason is that at some point you have to provide a +stable machinery for macro packages so that backward as well as forward +compatibility over a longer period is possible. Also, because one can use \TEX\ +in (unattended) workflows sudden changes become undesirable. + +\stopsection + +\startsection[title={What next?}] + +Does it stop here? We have reached a reasonable stable state with \CONTEXT\ +\MKIV\ and can basically do what we want to do. However, during the more than a +decade development of this \MKII\ follow up, the idea surfaced that we can go +more minimal in the engine. Basically we can go back to where \TEX\ started: a +core plus extension mechanism. What does that mean? First of all, there is the +very efficient frontend: scanning macros, expanding them and constructing node +lists, all within a powerful grouping mechanism. There is no reason to reconsider +that. The core of the interface is also well documented, for instance in the +\TEX\ book. We added some primitives to \LUATEX, but most of them are of no real +importance to users; they make more sense to macro package writers. + +Original \TEX\ has a \DVI\ backend which is a simple representation of a page: +characters and rules positioned on some grid. A separate program has to convert +that into something for a printer. There is a basic extension mechanism that +permits injection of so called specials that get passed to the external program +so that for instance an image can be included. Given that \LUATEX\ is mostly used +to generate \PDF, using so called wide fonts in a \UNICODE\ universe, a \DVI\ +backend is not that useful. In fact, one can then better use the faster \PDFTEX\ +program or just \ETEX\ or \TEX: use the best tool available for the job. + +The backend however can be left out and can be implemented in \LUA\ instead. In +fact, most of the backend related code in \CONTEXT\ doesn't really use the +\LUATEX\ backend features at all. The backend is only used to convert the page +stream to a \PDF\ content stream, include images, include fonts and manage low +level objects. Everything specific to \PDF\ is already done in \LUA. Of course +this has a performance penalty but given the overhead already present in +\CONTEXT\ it is bearable. + +Alongside the frontend the \METAPOST\ library plays an important role in +\CONTEXT: integration between \TEX, \METAPOST\ and \LUA\ is pretty tight and a +unique property of \CONTEXT. But, for instance the font reader library is no +longer used. Also the interfacing to the \TEX\ Directory Structure was done in +\LUA, originally for performance reasons as it reduced startup time by more that +a second. For some of the frontend code (like hyphenation and par building) we +can kick in \LUA\ variants too but there is not much to gain there. (I know that +some users use them with success.) + +So, traditional \TEX\ can be summarized as: + +\starttyping +tex core + dvi backend + tex extensions +\stoptyping + +where the extension interface provide a few goodies. If we would have to summarize +\LUATEX\ we could say: + +\starttyping +tex core + dvi & pdf backend + tex extensions + lua callbacks +\stoptyping + +The core interprets the input and does the typesetting. In order to be able to +typeset \TEX\ only needs the dimensions of characters and information about +spacing (which in principle are sort of independent) in math mode a few more +properties are needed, like snippets that make large symbols. In text mode +ligature and kerning information can be used too. However, in \LUATEX, where +normally \OPENTYPE\ fonts are used, that information is provided from \LUA. This +means that one can also think of: + +\starttyping +tex core + basic font data + tex extensions + lua callbacks +\stoptyping + +Compared to regular \TEX\ this is not that different, and it's what \CONTEXT\ can +do with. So, it will be no surprise that when I wondered what \LUATEX\ 2.0 could +be that a more minimalistic approach was considered: back to the basics. + +\stopsection + +\startsection[title={Roadmap}] + +Before I continue it is good to mention the following. One of the burdens that +\CONTEXT\ users (and developers) carry is that the outside world likes putting +labels on \CONTEXT, like \quotation {A macro package depending on \PDFTEX} in a +time that we supported \DVI\ at the same level using a more of less generic +driver model. The same is true for \MKIV, e.g.\ \quotation {\CONTEXT\ uses a lot +of \LUA\ and moves away from \TEX} while in fact we provide a hybrid tool: you +can use \TEX\ input (which most users do) but also \LUA\ (which can be handy) or +\XML\ (which some publishers demand and definitely seems to be used by some +\CONTEXT\ power users). A special one is \quotation {\CONTEXT\ is kind of plain +\TEX, so you have to program all yourself.} Reality is that \CONTEXT\ is an +integrated system, where \TEX\ and \METAPOST\ work together to provide a lot of +integrated functionality. Because of \LUATEX\ development and the relation +between an updated engine and the beta version of \CONTEXT, the impression can be +that we have an unstable system. This strategy of parallel adaptation is the only +way to really test of things work as expected. Because we have a rather fast +update cycle normally users don't suffer that much from it. + +The core of whatever we follow up with is and remains \TEX, just because I like +it. So, when I talk about a small core, I actually still talk about \TEX. The +main reason is that it's way easier (and readable) to code some solutions in this +hybrid fashion. A pure \LUA\ solution is no fun, maybe even a pain, and I have no +use for it, but a pure \TEX\ solution can be cumbersome too. And \TEX\ input is +just very convenient and for that one needs a \TEX\ interpreter. I would already +have dropped out when \TEX\ was not part of the game: an intriguing, puzzling and +powerful toy. And \METAPOST\ and \LUA\ add even more fun. So, I settle for a mix +between three interesting languages. And, because I seldom run into professional +demand for \LUATEX\ related support (or high end, high performance rendering), +the fun factor has always been the driving force. + +All that said, for practical reasons, when we explore a follow up in the +perspective of \CONTEXT, we will use the working title \LUAMETATEX\ instead. +\LUAMETATEX\ has the current \LUATEX\ frontend, some \LUA\ libraries, but no +backend. Gone are the font reader, image inclusion, \DVI\ and \PDF\ backend +(including font inclusion) and the interface to the \TDS. Can that work? As +mentioned, the font reader was already not used in \CONTEXT\ for quite a while. An +alternative page stream builder was also in good working condition in \CONTEXT\ +when \LUATEX\ 1.08 was released and around \LUATEX\ 1.09 image inclusion was +replaced (\PDF\ inclusion was already accompanied for a while by a \LUA\ +variant). Currently (fall 2018) \CONTEXT\ is able to completely construct the +\PDF\ file which also meant font inclusion. However, it didn't make much sense to +release that code yet because after all, there was minimal gain when using it +with a full blown \LUATEX. Also, switching to this variant involved some runtime +adaption of code which might confuse users. But above all, it needed more +testing, and releasing something before an upcoming \TEX Live code freeze is a +bad idea. + +During \LUATEX\ development a few times we got suggestions for additional +features but merely looking at them already made clear that what works for +someone in a particular case, can introduce side effects that make (for instance) +\CONTEXT\ fail. And, how many folks keep \CONTEXT\ in mind? So, when \LUATEX\ +goes into maintenance mode, specific distributions could accept patches outside +our control, which has the danger that a binary (suggesting to be \LUATEX) +doesn't work with \CONTEXT. Of course we cannot change something ourselves either +without looking around. And I'm not even bringing possible negative side effects +on performance into the discussion here. + +When developing \LUATEX\ some ideas were dropped or delayed and these can now be +explored without the danger of messing up the stable version. It has always been +relatively easy to adapt \CONTEXT\ to changes so an (at least for now) +experimental follow up can be dealt with too, but this time the concept of \quote +{experimental} is really bound to \CONTEXT. When something is found useful (or +can be improved) it can always (after testing it for a while) be fed back into +\LUATEX, as long as it doesn't break something. I'll decide on that later. + +In the documentation of \TEX, when discussing the extension mechanism, Donald +Knuth says: + +\startquotation +The goal of a \TEX\ extender should be to minimize alterations to the standard +parts of the program, and to avoid them completely if possible. He or she should +also be quite sure that there's no easy way to accomplish the desired goals with +the standard features that \TEX\ already has. \quotation {Think thrice before +extending}, because that may save a lot of work, and it will also keep +incompatible extensions of \TEX\ from proliferating. +\stopquotation + +With the in the next chapters discussed reduction of backend and some frontend +code, combined with hooks that can trigger callbacks, we try to come close to +this objective. Now, the last sentence of this quote relates to stability and +this is also a reason why we enter this new thread: the smaller the core is, the +less subjected we are to change. Think of this: I haven't used \CONTEXT\ \MKII\ +in over a decade. A \PDFTEX\ format still gets generated but I have no clue if +the engine has been changed in ways that make some code behave differently (it +could also be the ecosystem related to that engine), but I assume it's still +behaving the same. The same has to become true for stock \LUATEX\ and \MKIV\ and +for \CONTEXT\ it can even become more true with \LUAMETATEX. We'll see. + +\stopsection + +\startsection[title={Experiments}] + +This (still sort of) prototype of what \LUAMETATEX\ could be boils down to a much +smaller binary, and not that much more \LUA\ code on top of what we already have. +There are no longer dependencies on third party code, apart from \LUA\ (\type +{pplib} is tuned for \LUATEX\ and permanent part of the code base). Performance +wise the backend of the experimental version makes a run upto 5\% slower than +when using a native backend (on processing the \LUATEX\ manual) but history has +learned that we can gain some of that back in due time. Performance also depends +a bit on the properties of the document. Interesting is that better control over +the output showed that \PDF\ output of the mentioned manual was a bit smaller +(but that might change). \footnote {In the meantime the experimental version can +process the \LUATEX\ manual 5\endash10\% faster and the result is still smaller.} + +The experiments actually started already years ago with no longer using the font +loader. It sort of went this way: + +\startitemize +\startitem + Stepwise \CONTEXT\ functionality started using a combination of \TEX\ and + \LUA\ code and we got an idea of what was needed. The most demanding part + was support for fonts. +\stopitem +\startitem + Font handling was done in \LUA\ because it's flexible which is what \TEX ies + are accustomed to. The \OPENTYPE\ and \PDF\ standards would not be called + standards if some implementation was impossible and so far we're ok. (Some + more script support will be provided in future versions.) +\stopitem +\startitem + We stopped using the fontforge font loader but use one written in \LUA\ + instead. One reason for this was that when variable fonts showed up we wanted + to support it in \CONTEXT\ right from the start (not that there has been much + demand). The same is true for fonts using color (like emoji). Also, fighting + the built|-|in \FONTFORGE\ heuristics was hard. +\stopitem +\startitem + The (large and dependent on \CPLUSPLUS) poppler library used for \PDF\ + embedding has been replaced by a small lightweight library in pure \CCODE. + This was triggered at a chat during a bacho\TEX\ meeting. +\stopitem +\startitem + The hard coded \PDF\ inclusion can be swapped with a \LUA\ based one so that + we can for instance filter the page stream. We already had a hybrid solution + in \CONTEXT\ anyway for other reasons (merging annotations, layers, + bookmarks, etc.). +\stopitem +\startitem + The page stream constructor got a (shipout and xforms) by a \LUA\ variant, + but I decided not to make that an independent option in stock \LUATEX\ with + \CONTEXT\ \MKIV, although for a while I had the option \type {--lmtx} for + activating that experimental code. +\stopitem +\startitem + Then of course bitmap image inclusion had to be done by \LUA\ code, in order + to see if we can get rid of another external dependency as some of these + libraries get frequent updates while in practice we only use a very small + subset of functionality. Indeed this was possible. \footnote {I have a pure + \LUA\ parser for \PDF\ too, so at some point that might get included in the + \CONTEXT\ code base.} +\stopitem +\startitem + With some effort (deciphering specs and such) the font inclusion could also + be done by a \LUA. This was made possible by the fact that we already had + support for variable fonts. More tricks are possible and will be explored. +\stopitem +\startitem + Finally the \PDF\ file construction and \PDF\ object management had to be + implemented. This was actually the easiest part. +\stopitem +\stopitemize + +Performance wise the \LUA\ font loader is faster than the built in one. The same +is true for \PDF\ inclusion but in practice that is unnoticeable. Bitmap +inclusion is currently slower for interlaced images (seldom used in print) and +just as efficient for other types. The page stream constructor is definitely +slower but this is compensated by the faster font inclusion and \PDF\ file +construction. Of course it all depends on the kind of content, but these are the +observation as of fall 2018. Anyway, they were enough reason to continue this +experiment. + +One thing to keep in mind is that the smaller the binary and the less code paths +we have, the better future performance might be. Computers are not becoming much +faster for single thread processes like \TEX, so the less we jump around code +space (memory) the better it probably is for \CPU\ caching (as caches are not +growing much either). + +\stopsection + +\startsection[title={Conclusion}] + +Normally when writing this kind of code I make sure that I can enable such new +mechanisms on top of others but at some point one has to decide how to really +integrate them. For instance, we can do font inclusion independent of \PDF\ +generation or page stream construction independent of \PDF\ generation and|/|or +font inclusion but in the end that doesn't make sense and makes the code base a +bit of a mess. So, this is how it will go. + +Stock \LUATEX\ with \MKIV\ will use the normal backend but probably there might +be an option to overload the built|-|in image inclusion so that one can avoid the +abortion of a run in case of problematic images. Complete \PDF\ file +construction, which then also includes page stream construction, font embedding +and object management might be available as option for \MKIV\ with \LUATEX\ 1.10 +(for a while) but will be default when using \LUAMETATEX. When we move on \LMTX\ +support might evolve in more sophisticated trickery. \footnote {A few months +later I decided that this made no sense, and that it was cleaner to just leave +that approach for \LMTX\ only. So, now both engines use different code +exclusively.} + +Once tested a bit in real documents experimental code will end up in the +distribution. That code can then be turned into production code (read: cleaned up +and reshuffled a bit). We can streamline the engine code base: strip the +components that are not needed any more, remove some obsolete features, optimize +the code, strip some functions from \LUA\ libraries, rename some helpers, and +finally add some documentation. There are some plans to extend \METAPOST\ so also +things can get added. Concerning the \LUA\ interface it means that \type +{slunicode} is removed, the embedded socket related \LUA\ code goes external (but +the library stays), the font loader gets removed, the \type {img} library goes +away, no longer \PNG\ libraries are embedded, synctex is stripped out (but the +fields in nodes stay or get extended). \footnote {Much later I also decided to +remove the zip file reader library.} The resulting binary will be much smaller +and the code base more independent and smaller too. In the process \LUAJIT\ +support might be dropped as well, simply because it no longer is in sync with +stock \LUA, but that also depends on how complex long term maintenance becomes. +\footnote {As we will see in following chapters, indeed support for \LUAJIT\ has +been dropped while \LUA\ got upgraded to 5.4.} + +Because such a stripped down binary is no longer what got presented as \LUATEX\ +version~1, it will basically become \LUATEX\ version 2, but then we have the +problem that its binary name clashes with the original. This is why it will be +run as \typ {luametatex}. For \CONTEXT\ it's not that relevant as it will run on +both \LUATEX\ 1.10 and its lean and mean successor. I might also provide a plain +\TEX\ (read: generic) version but that is to be decided because it probably +doesn't make much sense to spend time on it. As usual we will test this within +the \CONTEXT\ beta program. The good thing is that it doesn't interact with +\LUATEX, so that other macro packages are not affected. Another side effect can +be that we uncover issues with \LUATEX\ 1.10 and that we can experiment with some +improvements that we feed back into the parent. + +At the \CONTEXT\ end of this there are some plans to extend the export, maybe +improve already present \PDF\ tagging (if found useful), add some more input +(xml) manipulations, and maybe extend (virtual) font handling a bit, now that we +no longer are bound to the currently used packet model. Contrary to what one +might expect this is not really dependent on the engine. + +How do we proceed? As with the transition from \MKII\ to \MKIV, it will all +happen stepwise. This means that for a while the code base will be a bit hybrid +but at some point it might be partially split to make things cleaner, not that I +expect many fundamental differences (certainly not in the front|-|end). This +dualistic approach means more work but also makes that we keep a working +\CONTEXT. We also need to keep an eye on for instance generic commands as used in +tikz: we can't drop them so we emulate them (so far with success). As the time of +this writing, begin November 2018, the \CONTEXT\ test suite can be processed in +\LMTX\ mode without problems so I'm confident that it will work out ok. The next +chapter describes the results of how we did the above in more detail. + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-feedback.tex b/doc/context/sources/general/manuals/followingup/followingup-feedback.tex new file mode 100644 index 000000000..d77ef302c --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-feedback.tex @@ -0,0 +1,306 @@ +% language=us + +\startcomponent followingup-feedback + +\environment followingup-style + +\logo [AMD] {AMD} +\logo [INTEL] {Intel} + +\startchapter[title={Feedback}] + +\startsection[title={Introduction}] + +As \LUATEX\ 1.10 is basically frozen in terms of functionality not much can or +will be added. But it made sense to some of the (small) improvements that were +made in \LUAMETATEX\ got feedback to \LUATEX\ (or will be at some point). Because +we are also experimenting, there can be a delay. \footnote {Later chapters +mention a few more possible extensions.} + +Of course the question is \quotation {Should we feedback (retrofit) at all?}. I'm +still not sure about it. There should be a good reason to do it because it can +harm stability of the original. At some point \CONTEXT\ can default to the follow +up in which case testing the original becomes more difficult for specific +features. I never ran into (useful) demands for extensions so retrofit can have a +low priority. + +Another factor is that when distributions start adding stuff to stock \LUATEX\ on +top of what is our default (after all isn't that what happens with open source +projects), it makes not much sense to look back and retrofit new functionality, +because there is not much change that we will use such a variant ourselves and we +could introduce errors in the process. Providing bloatware is not our objective. + +Related to this is the question if we should always go into \LMTX\ mode and I'm +no longer sure if we shouldn't do that. We can use plain \TEX\ with the regular +\LUATEX\ backend and just forget about some generic framework. The danger of it +backfiring is just too large. It is a waste of time and will keep us back. + +One reason for a dual mode is that it made possible some timings in order to +determine bottlenecks. I did some rough tests and that is enough to get the +picture. Take this document: + +\starttyping +\starttext + \dorecurse + {1000} + {\samplefile{sapolsky} {\bf\samplefile{sapolsky}}\par} +\stoptext +\stoptyping + +Using regular \LUATEX\ this takes on an \INTEL\ i7-3840 mobile processor about +9.3 seconds while \LUAMETATEX\ needs 11.2 seconds, so we loose time. This is +because we have only text so the native backend wins on piping out the page +stream. On my domotica fitlet with an low power \AMD\ A10 processor running +\LINUX\ the runtime goes from 25.4 seconds to 27.8 seconds, so again a slow down. + +But this is not a typical document! Add a bit more spice and the numbers reverse. +For processing the \LUATEX\ manual stock \LUATEX\ takes 12.6 seconds on the +\INTEL\ and \LUAMETATEX\ needs 12.4 seconds. On the \AMD\ runtime goes from 35.1 +seconds down to 32.8 seconds. So here we win some. + +These are rough timings and a few weeks later we go these timings on the \INTEL: +\footnote {On the more modern gaming laptop of a nephew we measured half these +numbers.} + +\starttabulate[|l|c|c|c|] +\BC engine \BC backend \BC runtime \BC \LUAJIT\ vm \BC \NR +\HL +\NC \LUATEX\ 1.10 \NC normal \NC 12.4 \NC 9.9 \NC \NR +\NC \LUATEX\ 1.10 \NC lmtx \NC 12.7 \NC 9.8 \NC \NR +\NC \LUAMETATEX\ 2.00 \NC lmtx \NC 12.2 \NC 9.3 \NC \NR +\stoptabulate + +Because we have more \LUA\ code active, we pay a price with \LMTX\ but not on +\LUAMETATEX\ (as of now, later we will see a performance bump). The gain when +using the \LUAJIT\ virtual machine is more noticeable. And, there is probably +some more to gain. In case you wonder why this matters: think of the low power +\AMD\ processor. When we have to replace computers we can consider using low +power ones, with weaker processors, less memory, and less cache. For the record: +I use cross compiled \MINGW\ binaries on windows (they are quite a bit faster +than native windows binaries). And the binaries are less than 3MB (small files +and less resources are nice when running on remote file systems). + +This all indicates that we have no real reason to stick to a mixed approach: if we +want we can just switch to always \LMTX\ and never look back. + +\stopsection + +\startsection[title={Expressions}] + +When writing an article that involved using a \type {\numexpr} it struck me that +we should have a proper integer division. Just compare these: + +\startbuffer[1] +\the\numexpr 13/2\relax +\stopbuffer + +\startbuffer[2] +\scratchcounter13 \divide\scratchcounter 2 \the\scratchcounter +\stopbuffer + +\typebuffer[1] + +and + +\typebuffer[2] + +This gives {\bf \inlinebuffer[1]} and {\bf \inlinebuffer[2]}. We now also have: + +\startbuffer[3] +\the\numexpr 13:2\relax +\stopbuffer + +\typebuffer[3] + +which gives {\bf \inlinebuffer[3]}. I considered using a double slash (as in +\LUA) but using a colon is easier. Of course those who make that an active +character are probably toast. This is an easy patch but it's hard to predict +possible side effects outside \CONTEXT. + +\stopsection + +\startsection[title={Looking ahead}] + +Sometimes you want to look ahead and act upon the presence of a specific +character. Implementing that in pure \TEX\ primitives is no big deal, but +especially when you want to ignore leading spaces it leads to rather verbose code +when tracing is enabled. Out of curiosity I played with a primitive that can help +us out. Although there is also a performance gain, in practice one will not +notice that unless such a feature is used millions of times, but in that case the +gain is noise compared to the rest of the run. + +\startbuffer +\def\foo{\futureexpand/\fooyes\foonop} +\def\fooyes/#1/#2{[#1:#2]} +\def\foonop #1{(#1)} + +\foo/yes/{one}\quad +\foo {two} +\stopbuffer + +\typebuffer + +We either expand \type {\fooyes} or \type {\foonop}, depending on the presence +of a \type {/} after \type {\foo}. So, the result is: + +{\tttf\getbuffer} + +The next examples demonstrates two variants. The second one doesn't inject spaces +back into the stream. + +\startbuffer +\def\f<{\futureexpand/\y\n} +\def\y/#1/{#1} +\def\n {} + +(\f</yes/>)\quad +(\f< >) +\stopbuffer + +\typebuffer + +Watch the space in the \type {\n} case. + +{\tttf\getbuffer} + +\startbuffer +\def\f<{\futureexpandis/\y\n} +\def\y/#1/{#1} +\def\n {} + +(\f</yes/>)\quad +(\f< >) +\stopbuffer + +\typebuffer + +This time the space is not injected (\type{is} is short for ignore spaces). + +{\tttf\getbuffer} + +I will probably use this one in \CONTEXT, but as said, not for performance +reasons but because it reduces code and therefore tracing. \footnote {In the +\CONTEXT\ code base there are several places where less code takes precedence +over efficiency. But in all cases extensive tests were done to see if it made a +dent in practical performance.} + +\stopsection + +\startsection[title={Checking numbers an dimensions}] + +The \CONTEXT\ user interface often uses parameters that take keywords as well as +a number or dimension. In practice it has never been an issue to check for that but +there are a few cases where we'd like to be a bit more flexible. + +\startbuffer +\doifelsenumber{123999999999999999}YN +\doifelsenumber {123}YN +\doifelsenumber {A}YN +\doifelsenumber {\char123}YN +\doifelsenumber {\toks123}YN +\doifelsenumber{123\scratchcounter}YN + +\doifelsedimension{123999999999999999pt}YN +\doifelsedimension {123pt}YN +\doifelsedimension {A}YN +\doifelsedimension {\char123}YN +\doifelsedimension {\toks123}YN +\doifelsedimension {123\scratchdimen}YN +\stopbuffer + +\typebuffer + +This typesets: + +\startpacked +\getbuffer +\stoppacked + +especially the \type {123\scratch...} checking is tricky. For that reason we now +have two new built|-|in checkers. Again, performance is not one of the arguments, +because these checks are not much faster than what we have already, they are just +a bit more robust for weird cases. A possible use of the primitives is: + +\starttyping +\ifdimen123\or + yes +\else % or \or + no +\fi +\stoptyping + +and: + +\starttyping +\ifnumber123\or + yes +\else % or \or + no +\fi +\stoptyping + +When a valid number or dimension is gobbled, the value pushed in the branches +is~1, and when an error is encountered the value~2 is pushed. Deep down we have +just an \type {\ifcase} and by not using the value zero we nicely skip the +invalid code. It might look a bit weird but we need a sentinel for the number +(and the \type {\or} serves as such, without introducing strange new constructs. +We'll see if we keep it (as testing must prove its usefulness). + +\stopsection + +\stopsection + +\startsection[title={Comparing tokens}] + +The following code compares (for instance) two strings: + +\starttyping +\def\thisorthat#1#2% + {\edef\one{#1} + \edef\two{#2} + \ifx\one\two + this% + \else + that% + \fi} + +\thisorthat{foo}{bar} +\stoptyping + +but this looks a bit cleaner (in a trace): + +\starttyping +\def\thisorthat#1#2% + {\iftok{#1}{#2}% + this% + \else + that% + \fi} + +\thisorthat{foo}{bar} +\stoptyping + +It's not that much faster (unless one uses it a real lot) as similar things have +to happen to get the test to work. But the nice things of this checker is that it +works with token registers and macros too. But in order use it in relevant places +in \CONTEXT\ I would have to adapt quite some code. This could actually be a +reason for a \MKIV\ freeze and \LMTX\ code base (as with \MKII). The question is: +does it pay off? + +\stopsection + +\stopchapter + +\stopcomponent + +% \newbox\mybox \setbox\mybox\ruledhbox{\blackrule[width=3cm,height=1cm]} + +% \dontleavehmode\copy\mybox\quad \ruledhbox{\copy\mybox} \blank[3*line] +% \boxxmove\mybox 10pt +% \dontleavehmode\copy\mybox\quad \ruledhbox{\copy\mybox} \blank[3*line] +% \wd\mybox\dimexpr\wd\mybox+10pt\relax +% \dontleavehmode\copy\mybox\quad \ruledhbox{\copy\mybox} \blank[3*line] +% \boxymove\mybox 10pt +% \dontleavehmode\copy\mybox\quad \ruledhbox{\copy\mybox} \blank[3*line] +% \ht\mybox\dimexpr\ht\mybox+10pt\relax +% \dontleavehmode\copy\mybox\quad \ruledhbox{\copy\mybox} \blank[3*line] diff --git a/doc/context/sources/general/manuals/followingup/followingup-introduction.tex b/doc/context/sources/general/manuals/followingup/followingup-introduction.tex new file mode 100644 index 000000000..c4a5920bd --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-introduction.tex @@ -0,0 +1,104 @@ +% language=us + +\startcomponent followingup-introduction + +\environment followingup-style + +\startchapter[title={Introduction}] + +This document, the fifth in a series, describes the follow up project on +\CONTEXT\ \MKIV\ & \LUATEX\ which carries the working title \CONTEXT\ \LMTX. This +four letter acronym represents \LUA, \METAPOST\ and \TEX, and if you want you can +see the last character representing \XML, as that has been an integral part of +\CONTEXT\ for a long time. But the \quote {x} can also be found in \quote +{experimental}, \quote {extreme}, \quote {experience} and \quote {extravagant}, +so take your choice. + +Of course \CONTEXT\ is and will be a typesetting system using the \TEX\ language +and typesetting core, but a rather substantial amount of the functionality is a +hybrid of \TEX\ macros and \LUA\ code. The built|-|in graphic support is driven +by \METAPOST, but there we also use \LUA\ as an extension language. The \LUA\ +machinery is used for alternative input and handling data too. The same is true +for \XML, \SQL, \CSV, \JSON, etc. + +The output from \CONTEXT\ is normally \PDF\ and \MKIV\ doesn't even enable \DVI\ +output. Mid 2018 I started experimenting with a backend that no longer used the +one provided by the engine. After all, we only used page stream building, font +embedding and bitmap inclusion and all other features were always done in \LUA. +The experiments also concerned a \METAPOST\ and \LUA\ backend. Those familiar +with \CONTEXT\ know that there is already an export feature which till now runs +in parallel with the \CONTEXT\ \PDF\ backend (it started as a kind of joke but in +the end was seen as relevant and kept and maybe so some point I will rewrite that +code). + +The idea behind \CONTEXT\ \LMTX\ is that we will use a minimalist engine. Being +minimalist also means that probably only \CONTEXT\ will use it and therefore no +other package will be affected by further experiments, although at some point a +sort of general low level layer might be provided. The frontend is mostly the +same as \LUATEX\ 1.1 but the backend and related code is gone and|/|or different. +Libraries have (and are) being cleaned up and reorganized too. At least for a +while, \CONTEXT\ will work on \LUATEX\ 1.1 (stable) as well as its (experimental) +follow up, where the follow up will evolve over a few years and be tested in the +usual \CONTEXT\ (garden) beta setting. The next chapters will explain this in +more detail. + +Just to be clear I repeat: \LUATEX\ 1.1 will be supported by \CONTEXT\ and +maintained as usual, including binaries generated on the \CONTEXT\ garden. We've +invested many years in it and it serves its purpose well, but our experiments +will happen in its follow up, so that it doesn't affect stable workflows. Of +course there have been (and probably are) bugs in \LUATEX\ but the engine could +be used pretty well right from the start with \CONTEXT. The same will be true for +the follow up. + +One of the ideas of the follow up is to provide a combination of a stable engine +independent of libraries with a relative simple compilation setup and a macro +package that has proven to exploit a mix of \TEX, \METAPOST\ and \LUA. As a side +effect I can explore some postponed ideas. Of course there can be valid reasons +to move to the successor sooner. In that case we might create a stable snapshot +of \MKIV\ as was done with \MKII. As to be expected in \CONTEXT, the user +interfaces won't change nor will the functionality, but there will be two code +paths, one for \MKIV\ and one for \LMTX. There will also be new functionality in +\CONTEXT\ that is only available in \LMTX. So, eventually we expect all users to +migrate. + +In the beginning of december 2018 most of the work was done and users involved in +development could start testing. By the end of the year a reasonable stable state +was reached. In 2019 the code base was further overhauled and libraries got +upgraded. The code base became smaller and compilation easier, smoother and much +faster. Eventually the source code (now some 11MB uncompressed and 3MB +compressed) will be part of the \CONTEXT\ distribution, so that we have a +complete package (also in the archival sense). + +The next chapters discuss the process and choices that were made. The chapters +were written in order so later chapters can amend earlier ones. Consider it a +history, and one cannot cheat by patching history. In some cases footnotes were +added to earlier chapters when writing later ones. It's not a manual! Reported +typos (for sure there are many) will be fixed but changes in later versions of +the follow discussed here will not end up in this document. + +This document is dedicated to Wolfgang Schuster, who has been instrumental in the +transition from \MKII\ to \MKIV, and often baffles me with his knowledge of the +(even obscure bits) of the \CONTEXT\ internals. Without him checking the code +base, fundamental changes like those that are and might get introduced in this +follow up are impossible. + +I want to thank Alan Braslau who accompanies me on this journey and patiently +compiles the lot for some platforms. He, Thomas Schmitz and Aditya Mahajan are +examples of power users who also are early adopters of something new like this +and are willing to take the risks. And of course there is Mojca Miklavec without +whose enthusiasm and optimism developments like this would never take place. In +the meantime Luigi Scarso made sure that the (frozen) \LUATEX\ code base served +existing users. It is hard to tell how users experience the transition: there are +no that many issues reported which can be a good or bad sign. We will see. + +\blank[2*big] + +\startlines +Hans Hagen +PRAGMA ADE, Hasselt NL +August 2018\enspace\endash\enspace May 2019 +\stoplines + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-logging.tex b/doc/context/sources/general/manuals/followingup/followingup-logging.tex new file mode 100644 index 000000000..e82df3c04 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-logging.tex @@ -0,0 +1,58 @@ +% language=us + +\startcomponent followingup-logging + +\environment followingup-style + +\startchapter[title={Logging}] + +\startsection[title={Introduction}] + +In \CONTEXT\ we have quite some logging enabled by default and even more when +you enable trackers. Most logging is done with \LUA, which is quite efficient. +Information from the \TEX\ machinery follows a different path and one reason +for that is that it often happens on a character (or small strings) basis. + +The runtime of a job is, in spite of what one may expect, also dependent on the +speed of the console: what fonts are used (there can be font features being +applied), is the output buffered, and with what delays, how large is the history, +etc. When more complex fonts arrived I found out that on \OSX\ generating a +format was impacted by seconds. When on \MSWINDOWS\ the normal console was used +its character|-|by|-|character flushing made it sluggish, and on \LINUX\ it +depended on the font, kind of console, delays, etc. Lucky me, the \SCITE\ editors +log pane beats them all. \footnote {I use the \LINUX\ subsystem on \MSWINDOWS\ +for cross compiling \LUATEX, and with the advent of that subsystem the regular +console was also rewritten so most of the delays are gone now.} + +At the \TEX\ end a few decades of coding has made the system also complex. +\footnote {Interfaces like that are only partly defined by \TEX\ and left to the +implementation.} Each string goes through a mechanism that checks with line +ending to apply and where to cut off lines exceeding a preset maximum length, +where \LUATEX\ also needs to take \UTF\ into account. Some characters can +(optionally) be escaped with \type {^^} and occasionally the line length gets +reset by explicit newline commands. + +In \CONTEXT\ already for a long time we always used an (at least) 10K line length +and disabled output escaping. We have consoles that can handle long lines and +live in an \UTF\ world so escaping makes no sense. And, when \OPENTYPE\ features +get applied random line breaks can interfere badly. Just in case one wonders what +happens with so called \type{null} characters: as all goes through \CCODE\ +anyway, such a character just terminates a string. Therefore the line length +limitations have been removed and the line|-|ending substitution be optimized. In +principle this gives simpler codes and less overhead. + +The log is not always compatible with \LUATEX. For instance we output more details +about node lists. This is natural because we have more subtypes and these can +provide additional information (clues) when debugging \TEX\ code. + +In \LUATEX\ the error handling is already such that some can be delegated to +\LUA, and later I will look into more isolation. But, error handling is quite +interwoven in the code and I don't want to mess up the original concept too much. +\footnote {Indeed the error handling was redone in such a way that we now have an +even better isolation.} + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-lua.tex b/doc/context/sources/general/manuals/followingup/followingup-lua.tex new file mode 100644 index 000000000..f0f3350a3 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-lua.tex @@ -0,0 +1,151 @@ +% language=us + +\startcomponent followingup-lua + +\environment followingup-style + +\startchapter[title={\LUA}] + +\startsection[title={Move to 5.4}] + +Another experiment concerned testing \LUA\ 5.4 which looks like a minor update in +terms of new functionality but has some consequences. By now the old module model +is even more deprecated and compatibility mode no longer makes much sense. As a +consequence we now need to adapt the way libraries are loaded (and we use global +ones) and a few other low level calls had to be adapted. This is no real issue +and once that was done, I found out that the bit32 module was even more obsolete +so I decided to get rid of it. We already have a bit32 replacement in \CONTEXT\ +so I had to enable that. As \CONTEXT\ doesn't need compatibility mode it was no +problem to drop that too. + +The biggest changes in 5.4 are under the hood: some optimized byte code and a new +generational garbage collector. I did a few runs and a 12.4 seconds run on the +manual now dropped to around 12.1 and given that we spend (probably) more than +half the time in \LUA\ that means some 5\% gain in performance. This is still +more than the 9.6 seconds that \LUAJIT\ needs but it looks like every \LUA\ +release gains a bit and I'm pretty sure that there is more to gain. \footnote {In +the meantime there are experiments in 5.4 with \type {<const>} directives which +might have advantages.} + +An interesting experiment was to disable the automatic string to number +conversion when a number is expected but a string is needed. So far I only had to +adapt two lines of code in the in the meantime considerable amount of \LUA\ code +that comes with \CONTEXT. + +\stopsection + +\startsection[title={No more \LUAJIT}] + +One thing I had to consider was the future of \LUAJIT. This project is sort of +stalled and will not follow \LUA\ development. Now, to some extend we can deal +with this but with the faster \LUA\ 5.4 around the corner, the limitations of +\LUAJIT\ with respect to loading large tables, as well as the fact that we need a +patched hash function to get an advantage over regular \LUA\ anyway, it makes +sense to drop it in \LUAMETATEX. After discussing this with Alan, who crunched +numbers in order to make impressive graphics with \METAPOST, we came to the +conclusion that we should not overestimate the benefits. There is still a gain +but removing the need to support both could also makes it possible to improve +existing code (although one should not expect too much from that; it's more a +matter of convenience for me). Also, for as long as have \LUAJITTEX\ that is +still an option when one has to squeeze out every second. + +A valid question is if ditching \LUAJIT\ will harm users. The answer to this +depends on the kind of documents that you process. Given decent programming, you +can gain quite a bit of runtime, but on the average the difference is not that +large. There is for instance always the overhead of callbacks and crossing the so +called \CCODE\ boundary that has an impact. + +\stopsection + +\startsection[title={Performance}] + +At the time of writing this Thomas Schmitz was wondering if there was a +significant difference in runtime between the table mechanisms and especially +natural tables and extreme tables. Some test demonstrated that extreme tables +were best for his case. That case concerned generating about 400 pages of tables +from \XML\ files, including some juggling of data in \LUA. The bottleneck in that +document can be roughly simulated with the following test. We assume one pass +over the table but in practice there are upto four, but only the last one has +frames. So, the test concerns 80.000 (400 pages with 40 rows of 5 columns) calls +to \type {\framed}. + +% 400 pages : 5 cells * 40 rows = 80000 framed + +\starttyping +1 \hpack{\framed {oeps}} +2 \hpack{\framed[frame=off] {oeps}} +3 \setupframed[frame=off] \hpack{\framed {oeps}} +4 \hpack{\framed[frame=on] {oeps}} +5 \setupframed[frame=on] \hpack{\framed {oeps}} +6 \hpack{\framed[frame=closed]{oeps}} +7 \setupframed[frame=closed] \hpack{\framed {oeps}} +\stoptyping + +\starttabulate[|c|c|c|c|] +\HL +\BC sample \BC luatex & mkiv \BC luajittex & mkiv \BC luametatex & lmtx \BC \NR +\HL +\NC 1 \NC 17.3 \NC 16.8 \NC 13.5 \NC \NR +\NC 2 \NC 17.8 \NC 17.2 \NC 14.0 \NC \NR +\NC 3 \NC 17.3 \NC 16.8 \NC 13.3 \NC \NR +\NC 4 \NC 17.9 \NC 17.4 \NC 13.7 \NC \NR +\NC 5 \NC 17.4 \NC 17.1 \NC 13.3 \NC \NR +\NC 6 \NC 17.4 \NC 16.8 \NC 12.9 \NC \NR +\NC 7 \NC 16.4 \NC 16.0 \NC 12.6 \NC \NR +\HL +\stoptabulate + +Even if we add the usual .1 second interval around these values it will be clear +that we gain enough not to worry about the loss of \LUAJIT, also because the gain +is not in the \LUA\ part only. A nice consequence of this is that when we replace +the \CPU's in a server with low power ones that perform 25\% less, we can +compensate that by using \LMTX. \footnote {There's still room for improvement, +because mid July 2019 we're at 12.9, 13.2, 12.9, 13.5, 13.0, 12.5 and 12.2 +seconds or less. But don't expect too many miracles.} + +When wrapping this up, the \LUATEX\ manual processed with \LMTX\ took slightly +less than 11.9 seconds, compared to a normal run of 12.6 seconds, so we're +gaining some there too. And just after I wrote this we went down to 11.7 seconds +by (as experiment) changing the \LUA\ virtual machine dispatcher, so there is +still some to gain. In the energy saving fitlet with small amd processor +processing the manual with stock \LUATEX\ takes about 37 seconds, but 33.5 with +\LMTX\ so here also we're not off worse. + +\stopsection + +\startsection[title={Modules}] + +Right from the start \LUATEX\ had some extra libraries linked in: \type {md5} +(for hashing), \type {lfs} (for accessing file properties), \type {slunicode} +(for basic \UTF\ handling), \type {gzip} and \type {zlib} (for zipping files and +streams), \type {zip} (for accessing zip files) and \type {socket} (for +communicating other than with files). + +In \LUAMETATEX\ the not so useful \type {slunicode} library was removed pretty +early but the others stayed around. The more backend specific \type {img} and +\type {pdf} libraries went away too, as did the (already not used) \type +{fontloader} library. The \type {kpse} library is also gone, as we do those +things in \LUA. The \type {epdf} library was kept. A couple of libraries were +added, like \type {sha2}, \type {basexx}, and \type {flate}, plus a few handy +helper libraries that are still experimental and therefore not mentioned here. + +The \type {flate} library is also an experiment but will replace the \type {gzip} +and \type {zlib} libraries. Currently these use \type {libz} but \type +{libdeflate} will be the low level replacement once it support streams and is +already used for \type {flate}. The \type {md5} library has been redone using +utility code \type {pplib}, as \type {sha2} does. The type {basexx} library also +falls back on utility code form \type {pplib} (that code is actually +independent). + +The \type {lfs} code has been replaced by a variant that omits features not +common to the platforms and with a iterator that permits much faster directory +scans and has a few more helpers. It is not compatible but we kept the name +because of legacy usage. I might strip the socket code to what is actually used, +but on the other hand: don't touch what works well. The original code doesn't +change that much anyway. + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-mp.tex b/doc/context/sources/general/manuals/followingup/followingup-mp.tex new file mode 100644 index 000000000..22e8e8356 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-mp.tex @@ -0,0 +1,166 @@ +% language=us + +\registerctxluafile{mlib-scn}{} + +\startcomponent followingup-mp + +\environment followingup-style + +\startchapter[title={\METAPOST}] + +\startsection[title={Introduction}] + +Relatively late in the followup I started wondering about what to do with \MPLIB. +Alan Braslau is working on the \type {luapost} module and we discuss handy +extensions written in \LUA\ and \METAPOST\ code but who knows what more is +needed. Some ideas were put on delay but it looked like a good moment to pick up +on them. One problem is that when we play with the \MPLIB\ code itself in +\LUAMETATEX, the question is how to keep in sync with the official library. In +this chapter I'll discuss both: keeping up with the official code, and keeping +ahead with ideas. + +\stopsection + +\startsection[title={The code base}] + +The \MPLIB\ code is written in \CWEB\ and lives in files with the suffix \type +{w}. These files need to be converted to \type {c} and \type {h} files, something +that is done with the \type {ctangle} program. To avoid that dependency I just +took the \CCODE\ files from \LUATEX, but I had to apply a few patches (to get rid +of dependencies). Now, it is a fact that \METAPOST\ doesn't really develop fast and +in principle a diff could identify the changes easily. So, why shouldn't I also +start experimenting with \MPLIB\ itself in the follow up? It's easy to merge +future changes (in both directions). + +The first thing I wrote was a \type {w-to-c} script. This was not that hard given +that I already had written lexers. After a first prototype worked out well, I +redid the code a bit (so that in the future I can also implement support for +change files for instance). A complication was that I found out that the regular +\CWEB\ converter messes around a bit with the code. So, I had to write another +script to mimmick that to the level that I could compare the results. For +example, spaces are removed before and after operators and all leading space gets +removed too. When I got the same output I could get rid of that code and output +what I want. For instance I'd like to keep the spacing the same because compilers +can warn about some issues, like missing \type {;} and misleading indentation in +simple \type {if} and \type {while} constructs where braces are omitted. +\footnote {This is no problem in for instance \PASCAL\ where we always have a +\type {begin} and \type {end}.} One can argue that this is not important, but if +not, then why enable warnings at all. I had to fix half a dozen places in the +\type {w} file to make the compiler happy, so the price was small. + +Once I had a more or less instantaneous conversion \footnote {Conversion of the +\type {w} files involved took just over half a second at that time, currently it +takes just over a quarter of a second, on a relatively old machine that is.} I +got the same feeling as with the rest of the code: experimenting became +convenient due to the fast edit|-|compile cycle. So, with al this covered I could +do what I always had wanted to do: remove traces of the backends (including the +full \POSTSCRIPT\ one), because they are actually to be plug|-|ins, and also get +rid of internal font handling, which is bound to \TYPEONE\ (rendering) and small +size \TFM\ (generating). With respect to that export: I wonder if anyone used +that these days because even the Gust font project always had their own tool +chain alongside \METAPOST. I could also void the hacks needed to trick the +library in not being dependent of \type {png.h} and \type {zlib.h} headers, for +which I had to use dummies. \footnote {The converter can load a file with patches +to be applied but by now there are no patches.} + +It took a few days scripting the converter (most time went into getting identical +output in order to check the converter which was later dropped), a few days +stripping unused code, another day cleaning up the remaining code and then I +could start playing with some new extensions. The binary has shrunk with 200KB +and the whole \LUAMETATEX\ code base in compressed \type {tar.xz} format is now +below 1.8MB while before it was above 2MB. Not that it matters much, but it was +an nice side effect. \footnote {Size matters as we want to code to end up in the +\CONTEXT\ distribution. It might grow a bit as side effect of adding some more +features to \MPLIB.} + +What new extensions would show up was still open. Because Alan and I play with +scanners it made sense to look into that. Error handling and logging has also +been on my radar for a while. In the process some more code might be dropped, but +actually the current version is still useable as library for a stand alone +program, given that one reconstructs the \POSTSCRIPT\ driver from the dropped +code (not that much work). Some configuration options are missing then but that +could be provided as extensions (after all we can have change files.) On the +other hand, wrapping code in \CONTEXT, like: + +\starttyping +\starttext +\startMPpage + ........ +\stopMPpage +\startMPpage + ........ +\stopMPpage +\stoptext +\stoptyping + +will give a \PDF\ file that can be converted to all kinds of formats, and the +advantage is that one has full font support. There is already a script in the +distribution that does this anyway. + +\stopsection + +\startsection[title={Communication}] + +The first experiment concerns a change in the interfacing between the \METAPOST\ +and \LUA\ end. In the original library all file \IO\ is handled by the library +itself. The filenames can be resolved via a callback. Once an instance is +initialized, snippets of code are passed to the instance via the \type {execute} +call. Log, terminal and error information is collected and returned as part of +the return value (a table). This means that reporting back to the user has a +delay: it can be shown {\em after} all code in the buffer has been processed. The +code given as argument to \type {execute} is passed to the engine as (fake) +terminal input, which nicely fits in the concept of interactive input, which +already is part of the \METAPOST\ concept. + +In our follow up variant all file \IO\ goes via \LUA. This means that we have a +bit more control over matters. In \CONTEXT\ we now can use the usual file +handling code. One defines an \type {open_file} callback that returns a table +with possible methods \type {close}, \type {reader} and \type {writer}, as in +similar \LUATEX\ callbacks. A special file, with the name \type {terminal} is +used for terminal communication. Now, when the \type {execute} command is +handled, the string that gets passed ends up in the terminal, so the file handler +has to deal with it: the string gets written to the handle, and the handle has to +return it as lines on request. In \CONTEXT\ we directly feed the to be executed +code into the terminal cache. + +It's all experimental and subject to changes but as we keep \CONTEXT\ \LMTX\ and +\LUAMETATEX\ in sync, this is no problem. Users will not use these low level +interfaces directly. It might take a few years to settle on this. + +The reports that come from the \METAPOST\ engine are now passed on to the \type +{run_logger} callback. That one gets a target and a string passed. Where the +original library can output stuff twice, once for the log and once for the +console, in the new situation it gets output once, with the target being +terminal, log file or both. The nice thing about this callback is that there is no +delay: the messages come as the code is processed. + +We combine this logging with the new \type {halt_on_error} flag, which makes the +engine abort after one error. This mechanism will be improved as we go. The +interaction option \type {silent} hides some of the less useful messages. + +The overall efficiency of the library doesn't suffer from these changes, and in +some cases it can perform even better. Anyhow, the user experience is much better +with synchronous reports. + +Although not strictly related to \IO, we already has extended the library with +the option to support \UTF-8, which is handy for special symbols, as for instance +used in the \type {luapost} library. + +\stopsection + +\startsection[title={Scanning}] + +Another extension is more fundamental in the sense that it can affect the way +users see \METAFUN: extending the user interface. It is again an example of why +is having an independent code base has benefits: we can do such experiments for a +long time, before we decide that (and how) it can end up in the parent (of course +the same is true for the mentioned \IO\ features). I will not discuss these +features here. For now it is enough to know that it gets applied in \CONTEXT\ and +will provide a convenient additional interface. Once it is stable I'll wrap it up +in writing. + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-performance.tex b/doc/context/sources/general/manuals/followingup/followingup-performance.tex new file mode 100644 index 000000000..40eb1971d --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-performance.tex @@ -0,0 +1,107 @@ +% language=us + +\startcomponent followingup-performance + +\environment followingup-style + +\startchapter[title={Performance}] + +\startsection[title={Introduction}] + +Those who've read the other documents describing the development of \LUATEX, know +that performance is always on my radar. A decent performance is a must for a +useable workflow, especially because typesetting is a multi|-|pass process. +\footnote {I'm often baffled by reports of (non|-|\CONTEXT) \LUATEX\ users about +the performance of \LUATEX. It seems easier to blame an engine than ones own +macros or setup and most of those tests make no sense anyway. Believe it or not, +but if performance of \CONTEXT\ \MKIV\ was much worse than \MKII\ (using \PDFTEX\ +or \XETEX) it would have backfired and the project would never have taken of. +Just think of this: would Hans really use \LUATEX\ and continue with development +if it were that slow?} One page reference changing from two digits to three +digits can influence whatever follows and we're not only talking of a different +page break, even a change in line breaks can have consequences. The core engine +cannot be made much faster. When the (single core) run has the whole cpu +available not much can be gained. But multiple processes are run at the same +time, the cache has to be shared and misses can become an issue. So, efficiency +of code is still important. Occasionally a (tiny) improvement can be made, but +only the accumulation of such improvements can make a dent. The feeling is that +over time \LUATEX\ has not become slower but we keep an eye on possible other +improvements. The memory footprint is also something to keep an eye on. \footnote +{Of course this is all becoming less relevant now that having e.g. a browser open +in the background will set you back with a constant 5\endash10\% cpu load and +slowly accumulating gigabyte memory usage. That actually was something I had to +keep in mind when running \LUAMETATEX\ benchmarks.} + +The more we delegate to \LUA, the less we can benefit from for instance \CPU\ +improvements: in that case the \LUA\ virtual machine is the bottleneck. And there +is not much we can do about that. This also means that when we delegate more to +\LUA\ we sacrifice performance. Sometimes things can be done more efficient in +\LUA, but those are often tasks that are not performed frequently. That said, I'm +convinced most of the \CONTEXT\ code is quite efficient and not much can be +gained. + +The biggest change in \LUAMETATEX\ is the backend. We gain some efficiency in +terms of speed, performance and output in some cases, while in other cases we +loose a bit. On the average the small performance hit is bearable. Because +\CONTEXT\ users don't complain about performance I think that I have some slack +here. + +\stopsection + +\startsection[title={An example}] + +There are a few places where \LUATEX\ looks ahead to check something and goes back +when the condition is not met. Take these: + +\starttyping +\hbox {...} +\hbox to 10cm {...} +\hrule width 10cm height 10cm \relax +\dimen0 =10cm +\dimen0 10cm +\mydimen 10cm +\toks0 {...} +\toks0 \toks2 +\stoptyping + +Spaces and sometimes \type {\relax} after the trigger (\type {\hbox}, \type +{\dimen}, etc.) are skipped and in some case there can be an optional \type {=} +sign. So, there are quite some cases where there is first a check for an optional +equal which itself can be preceded by optional spaces. When there is no equal +sign the last seen token is pushed back into the scanned which effectively means +that a temporary token is allocated, and a one token list is pushed on the input +stack. Then scanning goes on. The same can happen with the open brace in case of +a token list assignment: it gets pushed back and the content scanned checks it +again. In the case of keywords something similar takes place, because here +\LUATEX\ checks explicitly for e.g.\ type {width}, and when it is not found again +it pushes back consumed tokens and checks for the \type {width}. In the case of +the specifiers of the box we don't need to check at all when we have an opening +brace. In the follow up, when the \type {orientation} keyword was added, and the +\type {dir} and \type {bdir} were replaced by \type {direction} a little bit more +was optimized. + +In \LUATEX\ this code comes from \PDFTEX\ which takes if from \TEX, but in both +cases some code side effects occur from the transition from \PASCAL\ to \CCODE. +But, in \LUATEX\ we stick to the \CCODE, so we can try to get rid of these +artifacts. During the last years, especially when additional keywords were +introduces (for instance for attributes) already some optimization took place. In +the follow up again some optimizations were applied, for instance quite often we +can combine the check for an equal sign with skipping the spaces. + +The gain is not spectacular but as all small bits add up eventually it is +measurable in a complex run. What definitely is true, is that we avoid some +memory access which in turn might pay back when multiple runs happen in parallel. + +Of course one can argue that such optimizations are to be avoided but as long as +they don't obscure the code, it's okay. After all, just as one optimizes for +instance a compression algorithm or search routine, there is no reason not to +mildly optimize some of the critical code in \LUATEX. And in \CONTEXT\ we have +plenty of opportunities to check if that works out well. At some point some might +be retrofit into \LUATEX\ 1.2 (or later). \footnote {But it makes less sense now +that there are variants popping up that might depend on the stable base.} + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-rejected.tex b/doc/context/sources/general/manuals/followingup/followingup-rejected.tex new file mode 100644 index 000000000..f357c0ae5 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-rejected.tex @@ -0,0 +1,83 @@ +% language=us + +\startcomponent followingup-rejected + +\environment followingup-style + +\startchapter[title={Rejected}] + +\startsection[title={Introduction}] + +During the development of \LUATEX\ some extensions were considered but rejected +after some experiments. I already forgot about some that were tried the last +decade. I will not discuss what has been added already to \LUATEX. + +\stopsection + +\startsection[title={Conditionals}] + +The \LUATEX\ manual describes a few conditional primitives that were added. One +thing I played with was a native definer, think of \type {\idef} but in the end +rejected it, because in practice it was seldom needed. Another useful one would +be \type {\ifnothing} but the current implementation of \type {\ifx} is already +pretty efficient so there is nothing to gain here. Another rejected one is \type +{\ifxcase} which takes a token and compares that with a sequence, like + +\starttyping +\ifxcase\foo\alpha +\or\beta +\or\gamma +\else +\fi +\stoptyping + +As this was never available, in \CONTEXT\ already different strategies were +followed so I could only find a few places where this could make code more +readable. But who knows, I might change my mind when I split the code base and can +adapt code accordingly although it doesn't make much sense for the more high +level modules because it would only affect a few lines and maintaining duplicate +files is no fun. \footnote {But playing with extensions that make for better code +{\em is} fun.} + +\stopsection + +\startsection[title={Dimensions}] + +A primitive that returns the height plus depth would make sense (\type {hd}) but +one can easily define one and the gain can be neglected. So, for now this has +been rejected. Also, one can use the token scanners to implement that kind of +primitives but of course that then does have a penalty in terms of performance. +\footnote {Okay, in the end I decided to just add a primitive for this, but only +as part of a larger set of box related primitives.} + +\stopsection + +\startsection[title={The something}] + +I played a bit with intercepting \type {\the} so that we could define commands +that also respond to this expander. It didn't work out well because full +expansion happens, even with protected macros: + +\starttyping +\protected\def\foo{...] \the\foo +\stoptyping + +We just have to accept this and it's no big deal. + +\stopsection + +\startsection[title={Primitives}] + +Occasionally I'm wondering if we should have a way to flag primitives and macros +as being frozen but in the end it might not pay off. At some point I decided that +at least the \type {\primitive} and \type {\ifprimitive} could go away as they +are not really working as expected. It's better to have nothing than something +bad. Also, we can easily clone the whole set of primitives in a new namespace +with \LUA\ if we want. \footnote {But \unknown\ in the end we got something else +back.} + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-retrospect.tex b/doc/context/sources/general/manuals/followingup/followingup-retrospect.tex new file mode 100644 index 000000000..b99185b77 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-retrospect.tex @@ -0,0 +1,188 @@ +% language=us + +\startcomponent followingup-retrospect + +\environment followingup-style + +\startchapter[title={Retrospect}] + +% \startsection[title={Introduction}] +% \stopsection + +At some point in a new development, and \LUAMETATEX\ feels like that, there comes +a moment when you need to make a decision. In this case the question is if we +need to make hybrid \MKIV\ and \LMTX\ files or do the same as with the transition +from \MKII\ to \MKIV: use two variants. For \TEX\ files a conditional section has +only overhead in the format generation as skipped code doesn't end up in the +format. With conditional \LUA\ code it's different: the ignored section is still +present in byte code. But even for \TEX\ code a conditional section is not +entirely invisible: encountered control sequences are still creating (bogus) hash +entries. So the question is: do we go lean and mean and do we omit historic +non|-|\LMTX\ code? + +A comparison with the transition from \MKII\ is actually relevant. For instance +right from the start \CONTEXT\ had an abstract backend layer, and support for +engines and output formats was loaded on demand. There was never any specific +code in the core. With \MKIV\ we changed the model but there is still some +abstraction. + +In \MKII\ we also had to deal with encodings and that has consequences for +font handling, language support and input encodings. In \MKIV\ all that changed: +internal all is \UTF, as is normally the input (but we can still use encodings), +and fonts are always mapped to \UNICODE. + +Anyhow, much that made sense for \MKII\ was no longer relevant for \MKIV: code +could be dropped. But some mechanisms were reimplemented using \LUA: code was +added. The user interface stayed the same but in \MKIV\ uses a conceptually +different approach deep down. Therefore the code base was split in \MKII\ and +\MKIV\ files but this transition was made stepwise. + +So should the same happen with \LMTX ? There is not that much that needs to be +added to \MKIV\ in terms of functionality. In the end, for the \TEX\ code the +differences are not that substantial, so there we can consider loading different +files. The files involved are rather stable so there is not much danger of +functionality between \MKIV\ and \LMTX\ getting out of sync. The same is true for +the \LUA\ files, although synchronization is probably more an issue there. + +Another option is to always assume that \LUAMETATEX\ is used. For testing regular +\LUATEX\ (patches) we can just use a 2019 stable \CONTEXT. But in order for users +to benefit from developments we then expect them all to move on to \LMTX. Using a +frozen 2019 version with upcoming \LUATEX\ is no big deal as we've done the same +with \MKII\ and that worked out okay. + +When we started with \CONTEXT\ development in the previous century we were doing +pretty weird things. I remember getting comments that what we did made no sense +because it was not what \TEX\ was meant for and some even suggested that it +disrupted the picture. Highly structured input, a clear separation (and +abstraction) of front and backend, inheritance and user defined styling, +integrated support for \XML, embedded \METAPOST, advanced interactive documents, +handling of fonts en encodings, the list is long. Occasionally some of the things +that came with \CONTEXT\ were ridiculed, like the fact that a script was used to +manage the (multiple) run(s), but in the end, look at how many script are around +now. Some even wondered why we used \TEX\ at all because \TEX\ was meant for +typesetting math. And who needs \XML\ let alone \MATHML ? Or interactive \PDF\ +features? Much in \CONTEXT\ and its management got smoother over time and the +\LUAMETATEX\ engine fits nicely into this evolution. It's hard to keep the +cutting edge but at least we have the instruments. + +During \BACHOTEX\ 2019 (end of April, beginning of May) this project was +presented the first time outside the \CONTEXT\ community. During that meeting +Mojca Miklavec, one of the driving forces behind \CONTEXT, upgraded the compile +farm that already was used to compile (intermediate versions of) \LUATEX\ and +\TEXLIVE\ to also compile \type {pplib} (handy for development) and \LUAMETATEX. +This permits us to fine|-|tune the \type {cmake} setup which is still work in +progress. And, also further improvements take place in the code base itself. + +One of the properties of open source is that one can build upon an existing code +base, so when at \BACHOTEX\ Arthur announced that he was going to make a merge of +\XETEX\ (which he maintains) and \LUATEX\ no one was surprised. But it could be a +strong argument for a rather strict code freeze: spin|-|offs need stability. I've +been told that there are now several projects where more libraries (like +Harfbuzz) get integrated. Those cases don't influence the parent but here +stability of the original also is expected, unless of course additional features +go in these engines, which itself creates instability, but that's another matter. +One could actually argue that the arrival of variants defeats the argument that +stability is important: if a macro package uses new features, it needs to adapt, +and naturally (temporary) issues might show up. Such are the dynamics of todays +software development. History in general shows that not that much is persistent +(or even accumulative) and programs are probably the least, so maybe the whole +stability aspect has lost its relevance. \footnote {In a similar way as that the +argument \quotation {Publishers want this or that, so we as \TEX\ community need +to provide it.} is no longer that relevant because publishing is now more a +business model than vocation.} Of course \LUAMETATEX\ is also a follow up, but +one of the ideas behind it was that I could use it as platform for (independent) +experiments that could result in code being put into \LUATEX. Also, the changes +have a limited impact: only \CONTEXT\ will be affected. \footnote {So maybe, in +the end, stability boils down to \quotation {The engine behaves the same and the +\CONTEXT\ that comes with it exploits its features as good as possible}.} + +It is not feasible to make \CONTEXT\ work with all kind of engines that in +practice are not used by its users. For instance, after \XETEX\ showed up it went +through several iterations or font rendering, so we never really spent time on +the low level features that it provided (there was no demand anyway). One cannot +simply claim that one method is better than another that replaces it and expect +constant adaptation (probably for the sake of a few potential users). There +simply is no \quote {best} engine and no \quote {perfect} solution. Another +aspect is that when we would adapt \CONTEXT\ to \LUATEX\ variants the +dependencies on specific functionality that itself depends on the outside world +is kind of unavoidable. Especially languages and fonts are fluid and for the +average user there is not that much difference in that department. Should we +really complicate matters for a few (potential) users? In \CONTEXT\ support like +that is added on demand, driven by specific needs of users who use \TEX\ for a +reason and are willing to test. + +There's enough huge and complex software around that demonstrates what happens +when programs are extended, keep growing, their code base becoming more complex. +Such a process doesn't really fit in my ideas about for \TEX. We positioned 1.10 +as long term stable, with the option to add a few handy things in the long run. +For sure there are niches to fill and it is a fact that the \TEX\ community can +deal with variants of engines: just look at the different \CJK\ engines around, +with prefixes like \type {p}, \type {up}, \type {ep}, etc. But the question is, +where does that put further \LUATEX\ development? And, more important, what +consequences does it have for the \CONTEXT\ code base? + +The reason I mention this is that I had in mind to eventually backport features +that work out well in \LUAMETATEX. I also mentioned that in order to support +stock \LUATEX\ it made no sense to split the \CONTEXT\ code base. After all, a +few conditional sections could deal with the difference between \LUATEX\ and +\LUAMETATEX: some differences could be temporary anyway. But, given recent +developments it actually made sense to split the code base: why spent time on +backporting when the engine user base is spread over different spinoffs. I can +better just assume \CONTEXT\ to exclusively use \LUAMETATEX\ and that other macro +packages use (one or more) \LUATEX\ variants. I can then keep the generic code up +to date and maybe occasionally add some proven stable features. It is also no big +deal to keep the minimum subset needed for (plain) font handling compatible, +assuming \LUATEX\ compatibility, as in the end that engine is the benchmark, +especially when I strip it a bit from features not needed outside \CONTEXT. + +Thoughts like this show how fragile plans and predictions are: within a year one +has to adapt ideas and assumptions. But it also proves that \LUAMETATEX\ was a +good choice for \CONTEXT, especially because it is bound to \CONTEXT\ +development, which keep the users independent and isolated from developments that +don't mind that much the (side) effects on \CONTEXT. + +% \footnote {I mentioned stability a few times, but this aspect is somewhat vague: +% often I see complaints about \LUATEX, or comparisons with other engines, that +% have nothing to do with the engine per se, but more with misunderstanding and|/| +% assumptions, strange usage, maybe or even likely bad user code, comparing apples +% and pears, etc. The term \type {bug} is very popular and often a preferred +% qualifications, and it sounds even more impressive when it's qualified as a bug +% one. I guess that a more tight coupling between specific engines and macro +% packages at least that aspect becomes cleaner.} + +Around the \CONTEXT\ meeting (or maybe a bit later) we hope to have the new +installation infrastructure stable too (currently it is also experimental). By +that time it will also be clear how we will proceed with the \LMTX\ project. In +the meantime I have decided so put \LUAMETATEX\ specific files alongside the +\MKIV\ files, simply because I always need to be able run stock \LUATEX. In order +to show the close relationship these files are flagged as \MKXL, so we bump from +\quote {Mark Four} to \quote {Mark Fourty}. The suffixes \type {mkiv}, \type +{mkvi} and \type {mpiv} get company from \type {mkxl}, \type {mklx} and \type +{mpxl}. Depending on backporting features, files can come and go. I'm not yet +sure about the \LUA\ files but the \type {lmt} suffix is already reserved for +future use. \footnote {This is because \LUA\ 5.4 introduces some new syntax +elements and where we can get away with the difference between 5.2 (\LUAJITTEX) +and 5.3 (\LUATEX) such a syntax change is more drastic.} All this is also driven +by (user) demand. + +Consider this (and these thoughts) a snapshot. There will be the usual reports on +experiments and developments. And in due time there will also be a manual for +\LUAMETATEX. \footnote {In fact it already lives on my machine but I'm not in +ready yet for the usual complaints about manuals, so I'm not in that much of a +hurry.} And yes, at some point I have to make up my mind with respect to +backporting features that have proven to be useful. + +% \footnote {Actually, it seems to come with the Internet: folks wining on whatever +% platform about lack of documentation (most of the \CONTEXT\ distribution actually +% is documentation and quite some articles are, have been, and will be written) or +% possible bug (always huge, even if no bug at all) without exposing much actual +% research or knowledge about these matters. Write, post and shout before thinking +% it through, increase the number hits on your profile. It's for sure a way to make +% something end up at the bottom of my to do list, if at all. A valid response +% could be: whatever did you contribute to the community that I myself (or +% \CONTEXT\ users) can benefit from. Quite likely: nothing (or little)! It looks +% like even the normally friendly \TEX\ community sometimes gets infected by this.} + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-stripping.tex b/doc/context/sources/general/manuals/followingup/followingup-stripping.tex new file mode 100644 index 000000000..69af6376c --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-stripping.tex @@ -0,0 +1,369 @@ +% language=us + +% 2,777,600 / 11,561,471 cont-en.fmt + +% Hooverphonic - Live at the Ancienne Belgique (Geike Arnaert) + +\startcomponent followingup-stripping + +\environment followingup-style + +\startchapter[title={Stripping}] + +\startsection[title={Introduction}] + +Normally I need a couple of iterations to reach the implementation that I like +(an average of three rewrites is rather normal). So, I sat down and started +stripping the engine and did so a few times in order to get an idea of how to +proceed. One drawback of going public too soon (and we ran into that with +\LUATEX) is that as soon as there are more users, one gets stuck into the +situation that a different approach is not really possible. This is why from now +on experimental is really experimental, even if that means: it works ok in +\CONTEXT\ (even for production) but we can change interfaces be better, e.g.\ +more consistent (although we're also stuck with existing \TEX\ terminology). +Anyway, let's proceed. + +\stopsection + +\startsection[title={The binary}] + +In 2014 the \LUATEX\ binary was some 10.9 MB large. The version 1.09 binary of +October 2018 was about 6.8MB, and the reduction was due to removing the bitmap +generation from \MPLIB\ as well as replacing poppler by pplib. As an exercise I +decided to see how easy it was to make a small version suitable for \CONTEXT\ +\LMTX, and as expected the binary shrunk to below 3MB (plus a \LUA\ and \KPSE\ +dll). This is a reasonable size given what is still present. + +There is hardly any file related code left because in practice the backend used +the most different file types. That also meant that we could remove \KPSE\ +related code and keep all that in the library part. In principle one can load +that library and hook it into the few callbacks that relate to loading files. +Once we're stable I'll probably write some code for that. \footnote {In the +meantime I think it makes not much sense to do that.} Launching the binary with a +startup script can deal with all matters needed, because the command line +arguments are available. + +We could actually go even smaller by removing the built|-|in \TFM\ and \VF\ +readers. For instance it made not much sense to read and store information that +is never used anyway, like virtual font data: as long as the backend has access +to what it needs it's fine. By removing unused code and stripping no longer used +fields in the internal font tables (which is also good for memory consumption), +and cleaning up a bit here and there the experimental binary ended up at a bit +above 2.5MB (plus a \LUA\ dll). \footnote {Mid January we were just below 2.7 MB +with a static, all inclusive, binary. In March the static ended up at 2.9 MB on +\MSWINDOWS\ and 2.6 MB in \UNIX.} + +\stopsection + +\startsection[title={Functionality}] + +There is no real reason to change much in the functionality of the frontend but +as we have no backend now, some primitives are gone. These have to be implemented +as part of creating a backend. + +\starttyping +\dviextension \dvivariable \dvifeedback +\pdfextension \pdfvariable \pdffeedback +\stoptyping + +The already obsolete related dimensions are also removed: + +\starttyping +\pageleftoffset \pagerightoffset +\pagetopoffset \pagebottomoffset +\stoptyping + +And we no longer need the page dimensions because they are just registers that +are normally used in the backend. So, we got rid of: + +\starttyping +\pageheight +\pagewidth +\stoptyping + +Some font related inheritances from \PDFTEX\ have also been dropped: + +\starttyping +\letterspacefont +\copyfont +\expandglyphsinfont +\ignoreligaturesinfont +\tagcode +\stoptyping + +Internally all backend whatsits are gone, but generic \type {literal}, \type +{save}, \type {restore} and \type {setmatrix} nodes can still be created. Under +consideration is to let them be so called user nodes but for testing it made +sense to keep them around for a while. \footnote {Don't take this as a reference: +later we will see that more was changed.} + +The resource relates primitives are backend dependent so the primitives have been +removed. As with other backend related primitives, their arguments depend on the +implementation. So, no more: + +\starttyping +\saveboxresource +\useboxresource +\lastsavedboxresourceindex +\stoptyping + +and: + +\starttyping +\saveimageresource +\useimageresource +\lastsavedimageresourceindex +\lastsavedimageresourcepages +\stoptyping + +Of course the rule nodes subtypes are still there, so the typesetting machinery +will handle them fine. It is no big deal to define a pseudo|-|primitive that +provides the functionality at the \TEX\ level. + +The position related primitives are also backend dependent so again they were +removed. \footnote {There was some sentimental element in this. Long ago, even +before \PDFTEX\ showed up, \CONTEXT\ already had a positional mechanism. It +worked by using specials in combination with a program that calculated the +positions from the \DVI\ file. At some point that functionality was integrated +into \PDFTEX. For me it always was a nice example of demonstrating that +complaints like \quotation {\TEX\ is limited because we don't know the position +of an element in the text.} make no sense: \TEX\ can do more than one thinks, +given that one thinks the right way.} + +\starttyping +\savepos +\lastxpos +\lastypos +\stoptyping + +We could have kept \type {\savepos} but better is to be consistent. We no longer +need these: + +\starttyping +\outputmode +\draftmode +\synctex +\stoptyping + +These could go because we no longer have a backend and if one needs it it's easy +to define a meaningful variable and listen to that. + +The \type {\shipout} primitive does no ship out but just flushes the content of +the box, if that hasn't happened already. + +Because we have \LUA\ on board, and because we can now use the token scanners to +implement features, we no longer need the hard coded randomizer extensions. In +fact, also the \METAPOST\ should now use the \LUA\ randomizer, so that we are +consistent. Anyway, removed are: + +\starttyping +\randomseed +\setrandomseed +\normaldeviate +\uniformdeviate +\stoptyping + +plus the helpers in the \type {tex} library. + +\stopsection + +\startsection[title={Fonts}] + +Fonts are sort of special. We need the data at the \LUA\ end in order to process +\OPENTYPE\ fonts and the backend code needs the virtual commands. The par builder +also needs to access font properties, as does the math renderer, but here is no +real reason to carry virtual font information around (which involves packing and +unpacking virtual packets). So, in the end it made much sense to also delegate +the \TFM\ and \VF\ loading to \LUA\ as well. And, as a consequence dumping and +undumping font information could go away too, which is okay, as we didn't preload +fonts in \CONTEXT\ anyway. The saving in binary bytes is not impressive but +keeping unused code around neither. In principle we can get rid of the internal +representation if we fetch relevant data from the \LUA\ tables but that might be +unwise from the perspective of performance. By removing the no longer needed +fields the memory footprint became somewhat smaller and font loading (passing +from \LUA\ to \TEX) more efficient. + +\stopsection + +\startsection[title={File IO}] + +What came next? A program like \LUATEX\ interacts with its environment and one of +the nice things about \TEX\ is that it has a standard ecosystem, organized as the +\quotation {\TEX\ Directory Structure}. There is library that interfaces with +this structure: \KPSE, but in \CONTEXT\ \MKIV\ we implement its functionality in +\LUA. The primary reason for this was performance. When we started with \LUATEX\ +the startup on my machine (\MSWINDOWS) and a few servers (\LINUX) of a \TEX\ +engine took seconds and most fo that was due to loading the rather large file +databases, because a \TEX\ Live installation was a gigabyte adventure. With the +\LUA\ variant I could bring that down to milliseconds, because I could pre|-|hash +the database and limit it to files relevant for \CONTEXT\ (still a lot, as fonts +made up most). Nowadays we have \SSD\ disks and plenty of memory for caching, so +these things are less urgent, but on network shares it still matters. + +So, as we don't use \KPSE, we can remove that library. By doing that we simplify +compilation a lot as then all dependencies are in the engine's source tree, and +we're no longer dependent on updates. One can argue that we then sacrifice too +much, but already for a decade we don't use it and the \LUA\ variant does the job +well within the \TDS\ ecosystem. Also, in our by now stripped down engine, there +is not that much lookup going on anyway: we're already in \LUA\ when we do fonts. +But on the other hand, some generic usage could benefit from the library to be +present, so we face a choice. The choice is made even more difficult by the fact +that we can remove all kind of tweaks once we delegate for instance control over +command execution to \LUA\ completely. But, we might provide \KPSE\ as loadable +\LUA\ module so that when needed one can use a stub to start the program with a +\LUA\ script that as first action loads this library that then can take care of +further file management. As command line arguments are available in \LUA, one can +also implement the relevant extra switches (and even more if needed). + +Now, the interesting thing is that because we have a \LUA\ interface to \KPSE\ we +can actually drop some hard coded solutions. This means that we can have a binary +without \KPSE, in which case one has to cook up callbacks that do what this +library does. But in a version with \KPSE\ embedded one also has to define some +file related callbacks although they can be rather simple. By keeping a handful +of file related callbacks the code base could be simplified a lot. In the process +the recorder option went away (not that we ever used it). It is relatively easy +to support this in the \quote {find} related callbacks and one has to deal with +other files (like images and fonts) also, so keeping this feature was a cheat +anyway. + +At this point it is important to notice that while we're dropping some command +line options, they can still be passed and intercepted at the \LUA\ end. So, +providing compatible (or alternative solution) is no big deal. For instance, +execution of (shell) programs is a \LUA\ activity and can be managed from there. + +\stopsection + +\startsection[title={Callbacks}] + +Callbacks can be organized in groups. First there are those related to +\IO. We only have to deal with a few types: all kind of \TEX\ files (data +files), format files and \LUA\ modules (but these to are on the list of +potentially dropped files as this can be programmed in \LUA). + +\starttyping +find_write_file +find_data_file open_data_file read_data_file +find_format_file find_lua_file find_clua_file +\stoptyping + +The callbacks related to errors stay: \footnote {Some more error handling was +added later, as was intercepting user input related to it.} + +\starttyping +show_error_hook show_lua_error_hook, +show_error_message show_warning_message +\stoptyping + +% We kept the buffer handlers but dropped the output handler later anyway, so we +% have left: +% +% \starttyping +% process_input_buffer +% \stoptyping + +The management hooks were kept (but the edit one might go): \footnote {And +indeed, that one went away.} + +\starttyping +process_jobname +call_edit +start_run stop_run wrapup_run +pre_dump +start_file stop_file +\stoptyping + +Of course the typesetting callbacks remain too as they are the backbone of the +opening up: + +\starttyping +buildpage_filter hpack_filter vpack_filter +hyphenate ligaturing kerning +pre_output_filter contribute_filter build_page_insert +pre_linebreak_filter linebreak_filter post_linebreak_filter +insert_local_par append_to_vlist_filter new_graf +hpack_quality vpack_quality +mlist_to_hlist make_extensible +\stoptyping + +Finally we mention one of the important callbacks: + +\starttyping +define_font +\stoptyping + +Without that one defined not much will happen with respect to typesetting. I +could actually remove the \type {\font} primitive but that would be a bit weird +as other font related commands stay. Also, it's one of the fundamental frontend +primitives, so removal was never really considered. + +\stopsection + +\startsection[title={Bits and pieces}] + +In the process some helpers and status queries were removed. From the summary +above you can deduce that this concerns images, backend, and file management. +Also not used variables (some inherited from the past and predecessors) were +removed. These and other changes are the reason why there is a separate manual +for \LUAMETATEX. \footnote {Relatively late in the project I decided to be more +selective in what got initialized in \LUA\ only mode.} + +One of my objectives was to see how lean and mean the code base could be. But +even if we don't use that many files, the rather complex build system makes that +we need to have (make and configure) files in the tree that are not really used +but even then omitting them aborts a build. I played a bit with that but the +problem is that it needs to be dealt with upstream in order to prevent repetitive +work. So, this is something to sort out later. Eventually it would be nice to be +able to compile with a minimal set of source files, also because other programs +(all kind of \TEX\ variants) that are checked for but not compiled depend on +libraries that we don't need (and therefore want) to have in the stripped down +source tree. \footnote {In the end, the source tree was redesigned completely.} + +For now we also brought down the number of catcode tables (to 256) \footnote {As +with math families, and if more tables are needed one should wonder about the +\TEX\ code used.}, and the number of languages (to 8192) \footnote {This is +already a lot and because languages are loaded run time, we can go much lower +than this.} as that saves some initially allocated memory. + +\stopsection + +\startsection[title={What's next}] + +Basically the experiment ends here. A next step is to create a stable code base, +make compilation easy and consider the way the code is packages. Then some +cleanup can take place. Also, as it's a window to the outside world, \type {ffi} +support will move to the code base and be integral to \LUAMETATEX. And of course +the decision about \LUAJIT\ support has to be made some day soon. The same is +true for \LUA\ 5.4: in \LUATEX\ for now we stick to 5.3 but experimenting with +5.4 in \LUAMETATEX\ can't harm us. \footnote {The choice has been made: +\LUAMETATEX\ will not have a \LUAJIT\ based companion.} + +To what extend the \CONTEXT\ code base will have a special files for \LMTX\ is +yet to be decided, but we have some ideas about new features that might make that +desirable from the perspective of maintenance. The main question is: do I want to +have hybrid files or clean files for each variant (stock \MKIV\ and \LMTX). + +For the record: at the time of wrapping this up, processing the \LUATEX\ manual +of 294 pages took 13.5 seconds using stock \LUATEX\ while using the stripped down +binary, where \LUA\ takes over some tasks, took 13.9 seconds. \footnote {In the +meantime we're down to around 11.6MB. These are all rough numbers and mostly +indicate relative speeds at some point.} The \LUAJITTEX\ variant needed 10.9 and +10.8 seconds. So, there is no real reason to not explore this route, although +\unknown\ the \PDF\ file size shrinks from 1.48MB to 1.18MB (and optionally we +can squeeze out more) but one can wonder if I didn't make big mistakes. It is +good to realize that there is not much performance to gain in the engine simply +because most code is already pretty well optimized. The same is true for the +\CONTEXT\ code: there might be a few places where we can squeeze out a few +milliseconds but probably it will go unnoticed. + +On the todo list went removal of \type {\primitive} which we never use (need) and +the possible introduction of a way to protect primitives and macros against +redefinition, but on the other hand, it might impact performance and be not worth +the trouble. In the end it is a macro package issue anyway and we never really +ran into users redefining primitives. \footnote {Indeed this primitive has been +removed.} + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-stubs.tex b/doc/context/sources/general/manuals/followingup/followingup-stubs.tex new file mode 100644 index 000000000..69d02adc7 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-stubs.tex @@ -0,0 +1,262 @@ +% language=us + +\startcomponent followingup-stubs + +\environment followingup-style + +\startchapter[title={Stubs}] + +\startsection[title={Bare bone}] + +The most barebone way to process a \CONTEXT\ file is something like: + +\starttyping +luametatex + --fmt="<cache path to>/luametatex/cont-en" + --lua="<cache path to>/luametatex/cont-en.lui" + --jobname="article" + "cont-yes.mkiv" +\stoptyping + +We pas extra options, like: + +\starttyping + --c:autopdf + --c:currentrun=1 + --c:fulljobname="./article.tex" + --c:input="./article.tex" + --c:kindofrun=1 + --c:maxnofruns=9 + --c:texmfbinpath="c:/data/develop/tex-context/tex/texmf-win64/bin" +\stoptyping + +but for what we are going to discuss here it doesn't really matter. The main point is +that we use a \LUA\ startup file. That one has a minimal amount of code so that the +format can be loaded as we like it. For instance we need to start up with initial +memory settings. + +The file \type {cont-yes} sets up the way processing content happens. This can be the +\type {jobname} file but also something different. It is enough to know that this +startup is quite controlled. + +I will explore a different approach to format loading but for now this is how it +goes. After al, we need to be compatible with \LUATEX\ and normal \MKIV\ runs, at +least for now. + +\stopsection + +\startsection[title={Management (some history)}] + +In \CONTEXT\ we always had a script: \type {texexec}, originally a \MODULA2 +program, later a \PERL\ script, then a \RUBY\ script but now we have \type +{mtxrun}, a \LUA\ script. All take care of making sure that the file is +processed enough times to get the cross references, tables of contents, indexes, +multi|-|pass data stable. It also makes it possible to avoid using these special +binaries (or links) that trick the engine into thinking it is bound to a format: +we never had \type {pdfcontext} or \type {luacontext}, just one \type {context}. +Actually, because we have multiple user interfaces, we would have needed many +stubs instead. Getting this approach accepted was not easy but in the meantime +I've seen management scripts for other packages being mentioned occasionally. + +The same is true for scripts: for a long time \CONTEXT\ came with quite some +scripts but when an average \TEX\ distribution started growing, including many +other scripts, we abandoned this approach and stuck to one management script that +also launched auxiliary scripts. That way we could be sure that there were no +clashes in names. If you look at a full \TEX\ installation you see many stubs to +scripts and more keep coming. How that can work out well without unexpected side +effects (name clashes) is not entirely clear to me, as a modern computer can have +large bin paths. Just imagine that all large programs (or ecosystems) would +introduce hundreds of new \quote {binaries}. + +Anyway, in the end a \CONTEXT\ installation using \MKIV\ only needs \type {mtxrun} +and as bonus \type {context}. The above call is triggered by: + +\starttyping +mtxrun --autogenerate --script context --autopdf article.tex +\stoptyping + +from the editor. Here we create formats when none is found, and start or activate +the \PDF\ viewer afterwards, so more minimal is: + +\starttyping +mtxrun --script context article.tex +\stoptyping + +Normally there is also a \type {context} stub so this also works: + +\starttyping +context article.tex +\stoptyping + +\stopsection + +\startsection[title={The launch process (more history)}] + +In \MKII, when we use \PDFTEX, the actual launch of these script is somewhat +complex and a bit different per platform. But, on all platforms \KPSE\ does the +lookup of the script. Already long ago I found out that this startup overhead +could amount to seconds on a complete \TEX Live installation (imagine running +over a network) which is why eventually we came up with the minimals. The reason +is that the file databases have to be loaded: first for looking up, then for the +stub that also needs that information and finally by the actual program. There +were no \SSD's then. + +The first hurdle we took was to combine the lookup and the runner. Of course this +is sort of out of our control because an installer can decide to still use a +lookup approach but at least on \MSWINDOWS\ this was achieved quite easy. Sort +of: + +\starttyping +texexex -> [lookup] --> + texexec.pl -> [lookup] -> + pdftex + formats -> + [lookup] -> processing +\stoptyping + +The first lookup can be avoided by some fast relative lookup, but for more +complex management the second one is always there. Over time this mechanism +became more sophisticated, for instance we use caching, could work over sockets +using a \KPSE\ server, etc. + +When \LUATEX\ came around, it was already decided early that it also would serve +as script engine for the \CONTEXT\ runner, this time \type {mtxrun}. The way this +works differs per platform. On \WINDOWS\ there is a small binary, say \type +{runner.exe}. It gets two copies: \type {mtxrun.exe} and \type {context.exe}. If +you find more copies on your system, something might be wrong with your +installation. + +\starttyping +mtxrun.exe -> loads mtxrun.lua in same path +context.exe -> idem but runs with --script=context +\stoptyping + +The \type {mtxrun.lua} script will load its file database which is very efficient +and fast. It will then load the given script and execute it. In the case of \type +{context.exe} the \type {mtx-context.lua} script is loaded, which lives in the +normal place in the \TEX\ tree (alongside other scripts). + +So, a minimal amount of programs and scripts is then: + +\starttyping +texmf-win64/bin/luatex.exe +texmf-win64/bin/mtxrun.exe +texmf-win64/bin/mtxrun.lua +texmf-win64/bin/context.exe +\stoptyping + +with (we also need to font manager): + +\starttyping +texmf-context/scripts/context/lua/mtx-context.lua +texmf-context/scripts/context/lua/mtx-fonts.lua +\stoptyping + +But \unknown\ there is a catch here: \LUATEX\ has to be started in script mode in +order to process \type {mtxrun}. So, in fact we see this in distributions. + +\starttyping +texmf-win64/bin/luatex.exe +texmf-win64/bin/texlua.exe +texmf-win64/bin/mtxrun.exe +texmf-win64/bin/mtxrun.lua +texmf-win64/bin/context.exe +\stoptyping + +The \type {texlua} program is just a copy of \type {luatex} that by its name +knows that is is supposed to run scripts and not process \TEX\ files. The setup +can be different using dynamic libraries (more files but a shared engine part) +but the principles are the same. Nowadays the stub doesn't need the \type +{texlua.exe} binary any more, so this is the real setup: + +\starttyping +texmf-win64/bin/luatex.exe large program +texmf-win64/bin/mtxrun.exe small program +texmf-win64/bin/mtxrun.lua large lua file +texmf-win64/bin/context.exe small program +\stoptyping + +Just for the record: we cannot really use batch files here because we need to +know the original command, and when run from a script that is normally not known. +It works to some extend but for instance when started indirectly from an editor +it can fail, depending on how that editor is calling programs. Therefore the stub +is the most robust method. + +On a \UNIX\ system the situation differs: + +\starttyping +texmf-linux-64/bin/luatex large program +texmf-linux-64/bin/texlua symlink to luatex +texmf-linux-64/bin/mtxrun large lua file +texmf-linux-64/bin/context shell script that starts mtxrun +\stoptyping + +Here \type {mtxrun.lua} is renamed to \type {mtxrun} with a shebang line that +triggers loading by \type {texlua} which is a symlink to \type {luatex} because +shebang lines don't support the \type {--texlua} argument. As on windows, this +is not really pretty. + +\stopsection + +\startsection[title={The \LMTX\ way (the present)}] + +Now when we move to \LMTX\ we need to make sure that the method that we choose is +acceptable for distributions but also nicely consistent over platforms. We only +have one binary \type {luametatex} with all messy logic removed and no second +face like \type {metaluatex}. When it is copied to another instance (or linked) +it will load the script with its own name when it finds one. So on \WINDOWS\ we +now have: + +\starttyping +texmf-win64/bin/luametatex.exe medium program +texmf-win64/bin/mtxrun.exe copy (or link) of luametatex +texmf-win64/bin/mtxrun.lua large lua file +texmf-win64/bin/context.exe copy (or link) of luametatex +texmf-win64/bin/context.lua small lua file +\stoptyping + +and in \UNIX: + +\starttyping +texmf-linux-64/bin/luametatex mediumprogram +texmf-linux-64/bin/mtxrun copy (or link) of luametatex +texmf-linux-64/bin/mtxrun.lua large lua file +texmf-linux-64/bin/context copy (or link) of luametatex +texmf-linux-64/bin/context.lua small lua file +\stoptyping + +So, \type {luametatex[.exe]}, \type {mtxrun[.exe]} and \type {context[.exe]} are +all the same. On both platforms there is \type {mtxrun.lua} (with suffix) and on +both we also use the same runner approach. The \type {context.lua} script is +really small and just sets the script command line argument before loading \type +{mtxrun.lua} from the same path. In the case of copied binaries: keep in mind +that the three copies together are not (much) larger than the \type {luatex} and +\type {texlua} pair (especially when you take additional libraries into account). + +The disadvantage of using copies is that one can forget to copy with an update, +but the fact that one can use them might be easier for installers. It's up to +those who create the installers. + +One complication is that the \type {mtxrun.lua} script has to deal with the old +and the new setup. But, when we release we will assume that one used either +\LUATEX\ or \LUAMETATEX, not some mix. As \type {mtxrun} and \type {context} know +what got it started they will then trigger the right engine, unless one passes +\typ {--engine=luatex}. In that case the \LUAMETATEX\ launcher will trigger a +\LUATEX\ run. But a mixed installation is unlikely to happen. + +\stopsection + +\startsection[title={Why not \unknown}] + +Technically we could use one call for both the runner and \TEX\ processor but +when multiple runs are needed this would demand an internal engine reset as well +as macro package reset while keeping some (multi|-|pass) data around. A way +in|-|between could be to spawn the next run. In the end the gain would be minimal +(we have now .2 seconds overhead per total run, which can trigger multiple +passes, due to the management script, to basically we can neglect it. (Triggering +the viewer takes more time.) + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-style.tex b/doc/context/sources/general/manuals/followingup/followingup-style.tex new file mode 100644 index 000000000..68b52043c --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-style.tex @@ -0,0 +1,66 @@ +% \enablelmtx +% \nopdfcompression + +\startenvironment followingup-style + +\usemodule[abbreviations-smallcaps] + +\logo [LUAMETATEX] {LuaMeta\TeXsuffix} + +\setupbodyfont[plex] % not that ok for titling + +\setuplayout + [width=middle, + height=middle, + header=0pt, + footer=1cm, + footerdistance=5mm, + backspace=2cm, + cutspace=15mm, + topspace=2cm, + bottomspace=1cm, + style=bold, + color=maincolor] + +\setuppagenumbering + [alternative=doublesided] + +\setupwhitespace + [big] + +\setupfootertexts + [][{\getmarking[chapter]\quad\pagenumber}] + [{\pagenumber\quad\getmarking[chapter]}][] + +\definecolor + [maincolor] + [darkblue] + +\setuphead + [chapter] + [style=\bfc, + color=maincolor] + +\setuphead + [section] + [style=\bfa, + color=maincolor] + +\setuphead + [subsection] + [style=\bf, + color=maincolor] + +\setupalign + [tolerant,stretch] + +\setuptyping + [color=maincolor] + +\setuptype + [color=maincolor] + +\setupitemize + [color=maincolor] + +\stopenvironment diff --git a/doc/context/sources/general/manuals/followingup/followingup-tex.tex b/doc/context/sources/general/manuals/followingup/followingup-tex.tex new file mode 100644 index 000000000..5524baf53 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-tex.tex @@ -0,0 +1,125 @@ +% language=us + +\startcomponent followingup-tex + +\environment followingup-style + +\startchapter[title={\TEX}] + +\startsection[title={Prefixes}] + +The fact that we merged \ETEX, a bit of \PDFTEX\ and some of \ALEPH\ into +\LUATEX, already makes it a non|-|standard \TEX\ engine. In \LUAMETATEX\ we go a +bit further. Completely outsourcing the backend has the side effect that some +(extension related) primitives have to be implemented explicitly. The fact that +\LUA\ is integrated has consequences for, for instance, initialization. +Defaulting to \UTF-8 input makes it different too. And delegating many font +matters to \LUA\ also doesn't make it behave like good old \TEX. + +Here I discuss another difference. One can argue that this definitely makes it +less \TEX, but in practice this is not that problematic. We're talking prefixes +here. Traditional \TEX\ has only prefixes: + +\startitemize[n] +\startitem + \type {\global}: when used, it will make the next definition a global one. + The \type {\globaldefs} parameter can be used to force global or local + definitions. +\stopitem +\startitem + \type {\long}: when applied, this will make a macro bark on a \type {\par} + (or its equivalent) when grabbing an argument. In \LUATEX\ this check can be + disabled. \footnote {In a similar fashion barking about a \type {\par} in + math mode can be disabled. Such warnings made much sense when a \TEX\ run + took much time and was triggered and traced on relative slow output devices.} +\stopitem +\startitem + \type {\outer}: when applied the macro can only be used at the outer level. +\stopitem +\stopitemize + +Multiple prefixes can be given and their effects accumulate. The \ETEX\ extension +adds another one: + +\startitemize[continue] +\startitem + \type {\protected}: this will make a macro unexpandable inside an \type + {\edef}, an \type {\xdef} or token list serialization. +\stopitem +\stopitemize + +In \CONTEXT\ we never use(d) \type {\outer} and I can't even think of a useful +application in a large macro package. in \MKII\ most interface macros are defined +as \type {\long}, and because in \MKIV\ we block the complaints, we don't need +this prefix either. On the other hand, many macros are defined \type +{\protected}. \footnote {Or in \CONTEXT\ speak, they are defined as \typ +{\unexpanded}, because we already had \typ {\protected} as well as \typ +{\unexpanded} before these were introduces as primitives.} + +When you look at the implementation, \type {\long} and \type {\outer} are +properties of the so called command code: we have normal, long, outer and long +outer macros, and each has a unique command code. For some reason \type +{\protected} is not implemented with command codes, which would have doubled the +number to eight, but as special token injected in front of the macro preamble. +Using a command code would have made more sense as there is no real speed penalty +in that, while the special token indicating is a macro (body) is protected now +has to be intercepted in some cases. + +Anyhow, already for a while I wondered if I should drop \type {\long} and \type +{\outer} (making them no|-|ops). I also had on my agenda to promote \type +{\protected} to a normal command code. And, already for a long time I wanted to +play with a new prefix: \footnote {This is a typical example of a feature that I +like playing with, before deciding if it will stay (as such).} + +\startitemize[continue] +\startitem + \type {\frozen}: this will protect a macro (for now only a macro) against + redefinition, which provides a bit of protection for a user. +\stopitem +\stopitemize + +Promoting \type {\protected} brings the set of call commands from four to eight, +and a \type {\frozen} property would bump it to sixteen. This is still okay, but +in some places it would involve mode testing. However, dropping \type {\long} and +\type {\outer} would not only keep the set small (just four) but also rid it of +some tests. There is no performance penalty either (even a bit of gain in case of +many protected macros as we no longer need to skip the special signal token) and +it even saves some memory (but not that much). + +As a bonus there are a few more conditionals: \type {\ifprotected}, \type +{\iffrozen}, and, very experimental, \type {\ifusercmd}, which can be used to +check if something is user defined (often not a primitive). These probably only +make sense for diagnostic purposes. + +In the end, the implementation was not that hard. In the process I also removed +the \type {\suppress...} parameters so \type {\par} no longer plays havoc. If this +new prefix \type {\frozen} stays of will affect more definitions, we'll see. + +\stopsection + +\startsection[title={Conditionals}] + +Another domain where there have been some extensions is conditions. In a previous +chapter I mentioned \type {\iftok} already. As this is not a manual I will not go +into details about other new conditionals. For instance we have a few that can be +used to check for valid dimensions and numbers. This can lead to a bit cleaner +code, although for instance in \CONTEXT\ we always used support macros for this. +We seldom needed more than we had but when interfacing with \METAPOST\ it helps a +little. + +Another, maybe interesting one is \type {\ifcondition} which when \TEX\ is in +jump over branches mode is seen as a valid \type {\if<cmd>} token but when it +comes to expansion the following macro determines a true or false state. A second +nice experiment is \type {\orelse} which is to be followed by a valid \type +{\if<cmd>} token and makes for less nesting which sometimes looks nicer and also +has some advantages. + +I might wrap up these and other extensions in articles once they are considered +stable and useful. But first I'll test them in real situation, which in practice +means that \CONTEXT\ users will test them, probably without noticing. + +\stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-titlepage.tex b/doc/context/sources/general/manuals/followingup/followingup-titlepage.tex new file mode 100644 index 000000000..1256c049a --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-titlepage.tex @@ -0,0 +1,58 @@ +\startcomponent followingup-titlepage + +\environment followingup-style + +\startluacode + function document.graphic() + local min, max, random, round = math.min, math.max, math.random, math.round + + local width = 210 + local height = 297 + + local bitmap = graphics.bitmaps.new(width,height,"rgb",1) + local data = bitmap.data + + for i=1,height do + local d = data[i] + for j=1,width do + -- d[j] = { 0, 0, random(100,200) } + d[j] = { 0, 0, random(128,255) } + end + end + + graphics.bitmaps.tocontext(bitmap,"210bp","297bp") + end +\stopluacode + +\startuseMPgraphic{graphic} + StartPage ; + % fill Page + % withcolor "maincolor" ; + draw textext.urt("\bf \WORD{following up}") + rotated 90 + ysized (PaperHeight-10mm) + shifted lrcorner Page + shifted (-10mm,5mm) + withcolor "middlegray" ; + draw textext.lft("\bf\strut\ConTeXt") + ysized 4.5cm + shifted lrcorner Page + shifted (-50mm,70mm) + withcolor "white" ; + draw textext.lft("\bf\strut lm\kern-.1ex tx") + ysized 4cm + shifted lrcorner Page + shifted (-50mm,37.5mm) + withcolor "white" ; + StopPage ; +\stopuseMPgraphic + +\startpagemakeup[pagestate=stop,doublesdided=no] + \startoverlay + % {luametatex} % for searching + {\scale[width=\paperwidth]{\ctxlua{document.graphic()}}} + {\useMPgraphic{graphic}} + \stopoverlay +\stoppagemakeup + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup-whatsits.tex b/doc/context/sources/general/manuals/followingup/followingup-whatsits.tex new file mode 100644 index 000000000..64c817573 --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup-whatsits.tex @@ -0,0 +1,78 @@ +% language=us + +\startcomponent followingup-whatsits + +\environment followingup-style + +\startchapter[title={Whatsits}] + +% \startsection[title={...}] + +Whatsits provide the natural extension mechanism for \TEX. In \PDFTEX\ there are +plenty such whatsits, for instance for \PDF\ annotations. In \LUATEX\ this +mechanism was reorganized so that the code was better isolated. In the first +versions of \LUAMETATEX\ only a handful was left. Stepwise some were removed and +in the end we could stick to only one general whatsit because one can implement +the few needed to be compatible with \TEX. + +We started out with this set of whatsits: + +\starttabulate[|B|p|] +\NC open \NC open a file for writing (delayed) \NC \NR +\NC write \NC write to an open file (or terminal otherwise) \NC \NR +\NC close \NC close an opened file \NC \NR +\NC special \NC write some literal \PDF\ code to the output file \NC \NR +\NC user \NC store and retrieve data in a node \NC \NR +\NC latelua \NC execute code delayed (in the backend) \NC \NR +\NC literal \NC write some literal \PDF\ code to the output file, controlled by a mode \NC \NR +\NC save \NC push the transformation state \NC \NR +\NC restore \NC pop the transformation state \NC \NR +\NC matrix \NC apply a transformation (\type {rx sx sy ry}) \NC \NR +\NC savepos \NC register a position to be queried afterwards (\type {x y}) \NC \NR +\stoptabulate + +The \type {\openout}, \type {\write} and \type {\closeout} primitives relate to +the first three but they can be prefixed with \type {\immediate} in which case +they don't end up as whatsits but are applied directly. The \type {special} is +actually meant for \DVI\ while the \type {\(pdf)literal} is for \PDF\ output. The +first four are available in regular \TEX. + +The last four are dealt with exclusively in the backend and by removing the +backend they basically became no|-|ops. I kept them for a while but in the end +decided to kick them out. Instead a generic whatsit was introduced that could be +used as signal with the same function. That simple whatsit only has a subtype +(and of course optionally attributes). And, as \CONTEXT\ has its own backend, we +can intercept them as we like. The saving in code is not spectacular but keeping +it around (basically doing nothing) neither. The impact on \CONTEXT\ was not that +large because for instance saving positions is done differently and +transformations are encapsulated in a few helpers that could easily be adapted. + +From there it was a small step to also remove the literal whatsit, so then we had +five whatsits left, plus the generic one. I then entered sentimental mode: should +we keep the first four or not. Of course we want to be \TEX\ compatible but we +can remove the code and provide a compatible replacement using macros and our own +simple whatsit nodes. That keeps all the housekeeping at the \LUA\ end, +simplifies the \CCODE, and we're still \TEX. + +Of course, once we remove these and only have the delayed \LUA\ whatsit and user +whatsits left, we can as well replace these too. In \LUATEX\ user nodes are +actually not dealt with in the backend. One can create them at the \LUA\ end and +query them in callbacks. The \TEX\ machinery just ignores them, like any whatsit. +In retrospect they could have been first class nodes, but making them whatsits +was wise because that way they can be ignored consistently when needed. + +So, in the end all we need is a simple whatsit. As I removed the subtypes +stepwise there was an intermediate mix of code to recognize simple whatsits from +core whatsits but that distinction went away. Doing this kind of refactoring is +best done stepwise because that way I can compile some large documents and see if +things break. As a consequence again some code could be simplified as we +basically no longer have extensions. Of course at the \CONTEXT\ end the removed +primitives had to be added but that didn't take much effort. The binary shrunk +some 30K but (a small amount of) \LUA\ code was added to provide a compatible +functionality (not that we use it). + +% \stopsection + +\stopchapter + +\stopcomponent diff --git a/doc/context/sources/general/manuals/followingup/followingup.tex b/doc/context/sources/general/manuals/followingup/followingup.tex new file mode 100644 index 000000000..a9aaaf71c --- /dev/null +++ b/doc/context/sources/general/manuals/followingup/followingup.tex @@ -0,0 +1,34 @@ +\environment followingup-style + +\dontcomplain + +\startdocument + + \component followingup-titlepage + + \startfrontmatter + \component followingup-contents + \stopfrontmatter + + \startbodymatter + \component followingup-introduction + \component followingup-evolution + \component followingup-stripping + \component followingup-bitmaps + \component followingup-logging + \component followingup-directions + \component followingup-performance + \component followingup-cleanup + \component followingup-rejected + \component followingup-whatsits + \component followingup-feedback + \component followingup-lua + \component followingup-compilation + \component followingup-stubs + \component followingup-mp + \component followingup-tex + \component followingup-retrospect + \stopbodymatter + +\stopdocument + |