summaryrefslogtreecommitdiff
path: root/doc/context/sources/general/manuals/luametatex/luametatex-modifications.tex
blob: c18a2bc485a71a251b52423bbae4ffc7c18274d8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
% language=us runpath=texruns:manuals/luametatex

\environment luametatex-style

\startcomponent luametatex-modifications

\startchapter[reference=modifications,title={The original engines}]

\startsection[title=The merged engines]

\startsubsection[title=The rationale]

\topicindex {engines}
\topicindex {history}

The first version of \LUATEX, made by Hartmut after we discussed the possibility
of an extension language, only had a few extra primitives and it was largely the
same as \PDFTEX. It was presented to the public in 2005. As part of the Oriental
\TEX\ project, Taco merged some parts of \ALEPH\ into the code and some more
primitives were added. Then we started more fundamental experiments. After many
years, when the engine had become more stable, the decision was made to clean up
the rather hybrid nature of the program. This means that some primitives were
promoted to core primitives, often with a different name, and that others were
removed. This also made it possible to start cleaning up the code base, which
showed decades of stepwise additions to original \TEX. In \in {chapter}
[enhancements] we discuss some new primitives, here we will cover most of the
adapted ones.

During more than a decade stepwise new functionality was added and after 10 years
the more of less stable version 1.0 was presented. But we continued and after
some 15 years the \LUAMETATEX\ follow up entered its first testing stage. But
before details about the engine are discussed in successive chapters, we first
summarize where we started from. Keep in mind that in \LUAMETATEX\ we have a bit
less than in \LUATEX, so this section differs from the one in the \LUATEX\
manual.

Besides the expected changes caused by new functionality, there are a number of
not|-|so|-|expected changes. These are sometimes a side|-|effect of a new
(conflicting) feature, or, more often than not, a change necessary to clean up
the internal interfaces. These will also be mentioned.

Again we stress that {\em this is not a \TEX\ manual, nor a tutorial}. If you are
unfamiliar with \TEX\ first play a little with a macro package, take a look at
the \TEX\ book, make yourself familiar with the concepts and macro language. That
will likely take days and not hours. Also, many of the new concepts introduced in
\LUATEX\ and \LUAMETATEX\ are explained in documents that come with the \CONTEXT\
distribution, articles and presentations. It doesn't pay of to repeat that here,
especially not in a time when users often search instead of read from cover to
cover.

Occasionally there are extensions to \PDFTEX\ and \LUATEX\ but these are unlikely
to en dup in \LUAMETATEX. If needed one can add functionality using \LUA. Another
reason is that because the way we handle files and generate output being
compatible would only harm the engine. We have some fundamental extensions that
overcome limitations anyway. One area where the are significate changes is in
logging: at some point it no longer made sense to be compatible (with \LUATEX)
because we carry around more information.

\stopsubsection

\startsubsection[title={Original \TEX}]

\topicindex {\TEX}

Of course it all starts with traditional \TEX. Even if we started with the
\PDFTEX\ code base, most still comes from original Knuthian \TEX. But we divert a
bit.

\startitemize

\startitem
    The current code base is written in \CCODE, not \PASCAL. The original \WEB\
    documentation is kept when possible and not wrapped in tagged comments. As a
    consequence instead of one large file plus change files, we now have multiple
    files organized in categories like \type {tex}, \type {lua}, \type
    {languages}, \type {fonts}, \type {libraries}, etc. There are some artifacts
    of the conversion to \CCODE, but these got (and get) removed stepwise. The
    documentation, which actually comes from the mix of engines (via so called
    change files), is a mix of what authors of the engines wove into the source,
    and most is of course from Don Knuths original. In \LUAMETATEX\ we try to
    stay as close as possible to the original so that the documentation of the
    fundamentals behind \TEX\ by Don Knuth still applies. However, because we use
    \CCODE, some documentation is a bit off. Also, most global variables are now
    collected in structures, but the original names and level of abstraction were
    mostly kept. On the other hand, opening up had its impact on the code, so
    that makes some documentation a bit off too. Adapting that all will take time.
\stopitem

\startitem
    See \in {chapter} [languages] for quite some changes related to paragraph
    building, language handling and hyphenation. Because we have independent runs
    over the node list for hyphenation, kerning, ligature building, plus
    callbacks that also can tweak the list, adding a brace group in the middle of
    a word (like in \type {of{}fice}) does not prevent ligature creation. In
    fact, preventing kerns and ligatures can now be done with glyph options so
    that we don't depend on side effects of the engine. Because hyphenation,
    ligature building and kerning has been split so that we can hook in
    alternative or extra code wherever we like. There are various options to
    control discretionary injection and related penalties are now integrated in
    these nodes. Language information is now bound to glyphs. The number of
    languages in \LUAMETATEX\ is smaller than in \LUATEX. Control over
    discretionaries is more granular and now managed by less variables. Although
    \LUAMETATEX\ behaves pretty much like you expect from \TEX, due to the many
    possibilities it is unlikely that you get identical output.
\stopitem

\startitem
    There is no pool file, all strings are embedded during compilation. This also
    removed some memory constraints. We kept token and node memory management
    because it is convenient and efficient but parts were reimplemented in order
    to remove some constraints. Token and node memory management is a bit more
    efficient which was needed because we carry around more information. All the
    other large memory structures, like those related to nesting, the save stack,
    input levels, the hash table and table of equivalents, etc. now all start out
    small and are enlarged when needed, where maxima are controlled in the usual
    way. In principle the initial memory footprint is smaller while at the same
    time we can go real large. Because we have wide memory words some data
    (arrays) used for housekeeping could be reorganized a bit.
\stopitem

\startitem
    The macro (definition and expansion) parsers are extended and we can have more
    detailed argument parsing. This has been done in a way that keeps compatibility.
\stopitem

\startitem
    The specifier \type {plus 1 fillll} does not generate an error. The extra
    \quote {l} is simply typeset.
\stopitem

\startitem
    The upper limit to \prm {endlinechar} and \prm {newlinechar} is 127.
\stopitem

\startitem
    Because the backend is not built|-|in, the magnification (\tex {mag})
    primitive is gone. A \tex {shipout} command just discards the content of the
    given box. The write related primitives have to be implemented in the used
    macro package using \LUA. None of the \PDFTEX\ derived primitives is present.
\stopitem

\startitem
    Because there is no font loader, a \LUA\ variant is free to either support or
    not the \OMEGA\ \type {ofm} file format. As there are hardly any such fonts
    it probably makes no sense. There is plenty of control over the way glyphs
    get treated and scaling of fonts and glyphs is also more dynamic.
\stopitem

\startitem
    There is more control over some (formerly hard|-|coded) math properties. In
    fact, there is a whole extra bit of math related code because we need to deal
    with \OPENTYPE\ fonts. The math processing has been adapted to the new
    (dynamic) font and glyph scaling features. Because there is more granular
    control, for instance because there are more classes, the engine has to be
    set up differently. This is also true for features that control how for
    instance math fonts are processed. An intermediate, improved, variant of the
    \LUATEX\ dual code path approach has been sacrificed in the process.
\stopitem

\startitem
    Math atoms and constructs like fractions, fences, radicals and accents have
    all been extended. The new variants accept all kind of keywords that control
    the rendering. As direct consequence noads (and nodes in general) are much
    bigger in terms of memory usage. For now we keep the old commands available
    but that might change when we see no eight bit fonts being used.
\stopitem

\startitem
    The \prm {outer} and \prm {long} prefixed are silently ignored but other
    prefixes have been added. It is permitted to use \prm {par} in math and
    there are more such convenience options.
\stopitem

\startitem
    The lack of a backend means that some primitives related to it are not
    implemented. This is no big deal because it is possible to use the scanner
    library to implement them as needed, which depends on the macro package and
    backend.
\stopitem

\startitem
    The math style related primitives can use numbers as well as symbolic names.
    There is some more (control over) math anyway, which is a side effect of
    supporting \OPENTYPE\ math.
\stopitem

\stopitemize

There is much more to say here but at least this gives an idea of what you end up
with if you move from traditional \TEX\ to \LUAMETATEX: a more complex but also
more flexible system.

\stopsubsection

\startsubsection[title={Goodies from \ETEX}]

\topicindex {\ETEX}

Being the de|-|facto standard extension of course we provide the \ETEX\ features,
but only those that make sense. We used version 2.2 which is basically the only
version that was ever released.

\startitemize

\startitem
    The \ETEX\ functionality is always present and enabled so the prepended
    asterisk or \type {-etex} switch for \INITEX\ is not needed.
\stopitem

\startitem
    The \TEXXET\ extension is not present, so the primitives \type
    {\TeXXeTstate}, \type {\beginR}, \type {\beginL}, \type {\endR} and \type
    {\endL} are missing. Instead we used the \OMEGA|/|\ALEPH\ approach to
    directionality as starting point, albeit it has been changed quite a bit, so
    that we're probably not that far from \TEXXET. In the end right to left
    typesetting mostly boils down to marking regions in the node list and reverse
    flushing these in the backend. The main addition that \OMEGA\ brought was the
    initial paragraph node that stores the direction.
\stopitem

\startitem
    Some of the tracing information that is output by \ETEX's \prm
    {tracingassigns} and \prm {tracingrestores} is not there. Where \ETEX\ added
    some tracing, \LUAMETATEX\ adds much more and also permits to set details.
    Tracing is not compatible, if only because we have more complex nodes and do
    more in all kind of mechanism.
\stopitem

\startitem
    Register management in \LUAMETATEX\ uses the \OMEGA|/|\ALEPH\ model, so the
    maximum value is 65535 and the implementation uses a flat array instead of
    the mixed flat & sparse model from \ETEX.
\stopitem

\startitem
    Because we have more nodes, conditionals, etc.\ the \ETEX\ status related
    variables are adapted to \LUAMETATEX: we use different \quote {constants},
    but that should be no problem because any sane macro package uses
    abstraction. All these properties can be queried via \LUA.
\stopitem

\startitem
    The \prm {scantokens} primitive is now using the same mechanism as \LUA\
    print|-|to|-|\TEX\ uses, which simplifies the code. There is a little
    performance hit but it will not be noticed in \CONTEXT, because we never use
    this primitive.
\stopitem

\startitem
    The \ETEX\ engine provides \prm {protected} and although we have that too,
    the implementation is different. Users should not notice that.
\stopitem

\startitem
    Because we don't use change files on top of original \TEX, the integration of
    \ETEX\ functionality is bit more natural, code wise.
\stopitem

\startitem
    The \tex {readline} primitive has to be implemented in \LUA. This is a side
    effect of delegating all file \IO.
\stopitem

\startitem
    Most of the code is rewritten but the original primitives are still tagged as
    coming from \ETEX.
\stopitem

\stopitemize

\stopsubsection

\startsubsection[title={Bits of \PDFTEX}]

\topicindex {\PDFTEX}

Because we want to produce \PDF\ the most natural starting point was the popular
\PDFTEX\ program, so we took version 1.40. We inherit the stable features,
dropped most of the experimental code and promoted some functionality to core
\LUATEX\ functionality which in turn triggered renaming primitives. However, as
the backend was dropped, not that much from \PDFTEX\ is present any more.
Basically all we now inherit from \PDFTEX\ is expansion and protrusion but even
that has been adapted. So don't expect \LUAMETATEX\ to be compatible.

\startitemize

\startitem
    The experimental primitives \prm {ifabsnum} and \prm {ifabsdim} have been
    promoted to core primitives and became part of the much larger repertoire
    of \LUAMETATEX\ conditionals. The primitives \prm {ifincsname} is also
    inherited but has a different implementation.
\stopitem

\startitem
    Of course \prm {quitvmode} has become a core primitive too.
\stopitem

\startitem
    As the hz (expansion) and protrusion mechanism are part of the core the
    related primitives \prm {lpcode}, \prm {rpcode}, \prm {efcode}, \prm
    {leftmarginkern}, \prm {rightmarginkern} are promoted to core primitives. The
    two commands \prm {protrudechars} and \prm {adjustspacing} control these
    processes. The protrusion and kern related primitives are now dimensions
    while expansion is still one of these 1000 based scales.
\stopitem

\startitem
    In \LUAMETATEX\ three extra primitives can be used to overload the font
    specific settings: \prm {adjustspacingstep} (max: 100), \prm
    {adjustspacingstretch} (max: 1000) and \prm {adjustspacingshrink} (max: 500).
\stopitem

\startitem
    The hz optimization code has been redone so that we no longer need to create
    extra font instances. The front- and backend have been decoupled and the
    glyph and kern nodes carry the used values. In \LUATEX\ that made a more
    efficient generation of \PDF\ code possible. It also resulted in much cleaner
    code. The backend code is gone, but of course the information is still
    carried around. Performance in \LUAMETATEX\ should be a bit better than in
    \PDFTEX\ although of course its 32 bit machinery is in general slower than
    the eight bit \PDFTEX.
\stopitem

\startitem
    When \prm {adjustspacing} has value~2, hz optimization will be applied to
    glyphs and kerns. When the value is~3, only glyphs will be treated. A value
    smaller than~2 disables this feature.
\stopitem

\startitem
    When \prm {protrudechars} has a value larger than zero characters at the edge
    of a line can be made to hang out. A value of~2 will take the protrusion into
    account when breaking a paragraph into lines. A value of~3 will try to deal
    with right|-|to|-|left rendering; this is a still experimental feature.
\stopitem

\startitem
    The pixel multiplier dimension \prm {pxdimen} has be inherited as core
    primitive.
\stopitem

\startitem
    The primitive \prm {tracingfonts} is now a core primitive but doesn't relate
    to the backend.
\stopitem

\startitem
    The image inclusion code was already different in \LUATEX\ and is gone in
    \LUAMETATEX\ which has no backend. One can implement the same abstraction
    layer (aka resouces) using \LUA.
\stopitem

\stopitemize

Even if not that much is present from \PDFTEX\ in \LUAMETATEX\ we still see it as
its ancestor. After all, without \PDFTEX\ the \TEX\ community would not be where
it is now. We still use it as reference when we check something (that we
changed).

\stopsubsection

\startsubsection[title=Direcionality from \ALEPH]

\topicindex {\ALEPH}

In \LUATEX\ we took the 32 bit aspects of \ALEPH\ RC4, the stable version of
\OMEGA\ that also integrated \ETEX. In \LUATEX\ we also took much of the
directional mechanisms and merged it into the \PDFTEX\ code base as starting
point for further development. Then we simplified directionality, fixed it and
opened it up. In \LUAMETATEX\ not that much of this is left. We only have two
horizontal directions. Instead of vertical directions we introduce an orientation
model bound to boxes. We kept the initial par node, local boxes (that also use
par nodes) and directional nodes.

The already reduced|-|to|-|four set of directions now only has two members:
left|-|to|-|right and right|-|to|-|left. They don't do much as it is the backend
that has to deal with them. When paragraphs are constructed a change in
horizontal direction is irrelevant for calculating the dimensions. So, basically
most that we do is registering state and passing that on till the backend can do
something with it.

Here is a summary of inherited functionality:

\startitemize

\startitem
    The \type {^^} notation has been extended: after \type {^^^^} four
    hexadecimal characters are expected and after \type {^^^^^^} six hexadecimal
    characters have to be given. The original \TEX\ interpretation is still valid
    for the \type {^^} case but the four and six variants do no backtracking,
    i.e.\ when they are not followed by the right number of hexadecimal digits
    they issue an error message. Because \type {^^^} is a normal \TEX\ case, we
    don't support the odd number of \type {^^^^^} either. This kind of parsing
    can be disabled in \LUAMETATEX.
\stopitem

\startitem
    Glues {\it immediately after} direction change commands are not legal
    breakpoints. There is a bit more sanity testing for the direction state. This
    can be configured.
\stopitem

\startitem
    The placement of math formula numbers is direction aware and adapts
    accordingly. Boxes carry directional information but rules don't.
\stopitem

\startitem
    There are no direction related primitives for page and body directions. The
    paragraph, text and math directions are specified using primitives that
    take a number. The three letter codes are dropped.
\stopitem

\startitem
    The local box mechamism has been extended and redone which permits a more
    generalized and robust usage.
\stopitem

\stopitemize

Most of the directional work is actually up to the backend. As \OMEGA\ never had
a \PDF\ backend, the \LUATEX\ took care of the many directions. We now only have
two directions so the backend code that has to be provided can be relatively
simple. The biggest complication is in handling fonts and synchronizing the glyph
streams. Much is also macro package (and usage) dependent.

\stopsubsection

\startsubsection[title={No longer \WEBC}]

\topicindex {\WEBC}

The \LUAMETATEX\ codebase is not dependent on the \WEBC\ framework. The
interaction with the file system and \TDS\ is up to \LUA. There still might be
traces but eventually the code base should be lean and mean. The \METAPOST\
library is coded in \CWEB\ and in order to be independent from related tools,
conversion to \CCODE\ is done with a \LUA\ script ran by, surprise, \LUAMETATEX.

The biggest consequence of this is that there are no dependencies, also not on
ever changing libraries that we occasionally see break compilation of \LUATEX.
Even on older machines (say 2013\endash2020) compilation should be sub minute.
The amount of platform specific code is minimal.

\stopsubsection

\startsubsection[title={The follow up on \LUATEX}]

\topicindex {\LUATEX}

This engine is a follow up on \LUATEX, that became more or less frozen after
version 1.10, so that is the version we started from. Apart from reorganizing the
code base, simplifying the build, limiting dependencies etc.\ this project also
adds new functionality and removes some as well. The main differences are
discussed in a separate section. The basic ideas remain the same but the engine
is not downward compatible. This is why we have \CONTEXT\ \MKIV\ for \LUATEX\ and
\CONTEXT\ \LMTX\ for \LUAMETATEX .

There is no \LUAJIT\ version of \LUAMETATEX, simply because there is not that
much gain in the average run (at least not in \CONTEXT. Depending on the kind of
documents, complexity of macro code and usage of \LUA, the \LUAMETATEX\ engine
can be upto 30\percent\ faster than \LUATEX\ anyway.

\stopsubsection

\stopsection

\startsection[title=Implementation notes]

\startsubsection[title=Memory allocation]

\topicindex {memory}

The single internal memory heap that traditional \TEX\ used for tokens and nodes
is split into two separate arrays. Each of these will grow dynamically when
needed. Internally a token or node is an index into these arrays. This permits
for an efficient implementation and is also responsible for the performance of
the core. All other data structures are mostly the same but managed dynamically
too. Because we operate in a 64 bit world, the parallel table of equivalents
needed for managing levels, is gone. Anyhow, the original documentation in \TEX\
The Program mostly applies!

\stopsubsection

\startsubsection[title=Sparse arrays]

The \prm {mathcode}, \prm {delcode}, \prm {catcode}, \prm {sfcode}, \prm {lccode}
and \prm {uccode} (and the new \prm {hjcode}) tables are now sparse arrays that
are implemented in~\CCODE. They are no longer part of the \TEX\ \quote
{equivalence table} and because each had 1.1 million entries with a few memory
words each, this makes a major difference in memory usage. Performance is not
really hurt by this.

The \prm {catcode}, \prm {sfcode}, \prm {lccode}, \prm {uccode} and \prm {hjcode}
assignments don't show up when using the \ETEX\ tracing routines \prm
{tracingassigns} and \prm {tracingrestores} but we don't see that as a real
limitation. It also saves a lot of clutter.

The glyph ids within a font are also managed by means of a sparse array as glyph
ids can go up to index $2^{21}-1$ but these are never accessed directly so again
users will not notice this.

\stopsubsection

\startsubsection[title=Simple single|-|character csnames]

\topicindex {csnames}

Single|-|character commands are no longer treated specially in the internals,
they are stored in the hash just like the multiletter control sequences. This is
a side effect of going \UNICODE\ and \UTF. Where using 256 slots in an array add
no burden supporting the whole \UNICODE\ range is a waste of space. Therefore,
also active characters are internally implemented as a special type of
multi|-|letter control sequences that uses a prefix that is otherwise impossible
to obtain.

The code that displays control sequences explicitly checks if the length is one
when it has to decide whether or not to add a trailing space.

\stopsubsection

\startsubsection[title=Binary file reading]

\topicindex {files+binary}

All input now goes via \LUA: files loaded with \type {\input} as well as files
that are opened with \type {\openin}. Actually the later has to be implemented
in terms of macros and \LUA\ calls. This also means that compared to \LUATEX\
the internal handling of input has been changed but users won't notice that.

Setting a callback is expected now. Although reading input natively using \type
{getc} calls is more efficient, we now fetch lines from \LUA, put them in a
buffer and then pick successive bytes (keep in mind that we read \UTF) from that.
The performance is quite ok, also because \LUA\ is fast, todays operating systems
cache, and storage media have become very fast. Also, \TEX\ is spending more time
messing around with what it has input than actually reading input.

\stopsubsection

\startsubsection[title=Tabs and spaces]

\topicindex {space}
\topicindex {newline}

We conform to the way other \TEX\ engines handle trailing tabs and spaces. For
decades trailing tabs and spaces (before a newline) were removed from the input
but this behaviour was changed in September 2017 to only handle spaces. We are
aware that this can introduce compatibility issues in existing workflows but
because we don't want too many differences with upstream \TEXLIVE\ we just follow
up on that patch (which is a functional one and not really a fix). It is up to
macro packages maintainers to deal with possible compatibility issues and in
\LUAMETATEX\ they can do so via the callbacks that deal with reading from files.

The previous behaviour was a known side effect and (as that kind of input
normally comes from generated sources) it was normally dealt with by adding a
comment token to the line in case the spaces and|/|or tabs were intentional and
to be kept. We are aware of the fact that this contradicts some of our other
choices but consistency with other engines. We still stick to our view that at
the log level we can (and might be) more incompatible. We already expose some
more details anyway.

\stopsubsection

\startsubsection[title=Logging]

When detailed logging is enabled more detail is output with respect to what nodes
are involved. This is a side effect of the core nodes having more detailed
subtype information. The benefit of more detail wins from any wish to be byte
compatible in the logging. One can always write additional logging in \LUA.

The information that goes into the log file can be different from \LUATEX, and
might even differ a bit more in the future. The main reason is that inside the
engine we have more granularity, which for instance means that we output subtype
and attribute related information when nodes are printed. Of course we could have
offered a compatibility mode but it serves no purpose. Over time there have been
many subtle changes to control logs in the \TEX\ ecosystems so another one is
bearable.

In a similar fashion, there is a bit different behaviour when \TEX\ expects
input, which in turn is a side effect of removing the interception of \type {*}
and \type {&} which made for cleaner code (quite a bit had accumulated as side
effect of continuous adaptations in the \TEX\ ecosystems). There was already code
that was never executed, simply as side effect of the way \LUATEX\ initializes
itself (one needs to enable classes of primitives for instance). Keep in mind
that over time system dependencies have been handles with \TEX\ change files, the
\WEBC\ infrastructure, \KPSE\ features, compilation variables and flags, etc. In
\LUAMETATEX\ we try to minimize all that.

When it became unavoidable that we output more detail, it also became clear that
it made no sense to stay log and trace compatible. Some is controlled by
parameters in order to stay close the original, but \CONTEXT\ is configured such
that we benefit from the new possibilities. Examples are that in addition to
\prm {meaning} we have \prm {meaningfull} that also exposes macro properties,
and \prm {meaningless} that only exposes the body. The \prm {untraced} prefix
will suppress some in the log, and we set \prm {tracinglevels} to 3 in order to
get details about the input and grouping level. When there's less shown than
expected keep in mind that \LUAMETATEX\ has a somewhat optimized saving and
restoring of meanings so less can happen which is reflected in tracing. When node
lists are serialized (as with \prm {showbox}) some nodes, like discretionaries
report more detail. The compact serializer, used for instance to signal overfull
boxes, also shows a bit more detail with respect to non|-|content nodes. I math
more is shown if only because we have more control and additional mechanisms.

\stopsubsection

\startsubsection[title=Parsing]

Token parsers have been upgraded for the sake of \LUA, \prm {csname} handling
has been extended, macro definitions can be more flexible so there code was
adapted, more conditionals also brought some changes. But we build upon the
(reorganized) \TEX\ foundation so the basics can definitely be recognized.

Because of interfacing in \LUA\ the internal token and node organization has
been normalized (read: we cannot cheat because all is kind of visible). On
the one hand this can come with a performance penalty but that is more than
compensated by extensions, optimized parsers and such. Still the fact that we
are \UTF\ based (32 bit) makes the machinery slower than the 8~bit original.
The reworked \LUAMETATEX\ engine is substantially faster than the \LUATEX\
predecessor.

The handling of conditionals has been adapted so that we can have flatter
branches (\prm {orelse} cum suis). This again has some consequences for
parsing. Because parsing alignments is rather interwoven in general parsing and
expansion the handling of related primitives has been slightly adapted (also for
the sake of \LUA\ interfacing) and dealing with \prm {noalign} situations is a
bit more convenient.

This are just a few of the adaptations and most of this happened stepwise with
testing in the \CONTEXT\ code base. It will be clear that \LUAMETATEX\ is a quite
different extension to the original. You're warned.

\stopsubsection

\startsubsection[title=Changes in keyword scanning]

\topicindex {keywords}

Some primitives accept (optional) keywords and in \LUAMETATEX\ there are more
keywords than in \LUATEX. Scanning can trigger error messages and lookahead side
effects and in \LUAMETATEX\ these can be different. This is no big deal because
errors are still errors.

\stopsubsection

\startsection[reference=differences,title={Differences with \LUATEX}]

\startsubsection[title=Dropped primitives]

As \LUAMETATEX\ is a leaner and meaner \LUATEX. This means that substantial parts and
dependencies are gone: quite some font code, all backend code with related frontend
code and of course image and font inclusion. There is also new functionality which
makes for less lean but in the end we still have less, also in terms of dependencies.
This chapter will discuss what is gone. We start with the primitives that were dropped.

\starttabulate[|l|pl|]
\BC fonts       \NC \type {\letterspacefont}
                    \type {\copyfont}
                    \type {\expandglyphsinfont}
                    \type {\ignoreligaturesinfont}
                    \type {\tagcode}
                    \type {\leftghost}
                    \type {\rightghost}
                \NC \NR
\BC backend     \NC \type {\dviextension}
                    \type {\dvivariable }
                    \type {\dvifeedback}
                    \type {\pdfextension}
                    \type {\pdfvariable }
                    \type {\pdffeedback}
                    \type {\dviextension}
                    \type {\draftmode}
                    \type {\outputmode}
                \NC \NR
\BC dimensions  \NC \type {\pageleftoffset}
                    \type {\pagerightoffset}
                    \type {\pagetopoffset}
                    \type {\pagebottomoffset}
                    \type {\pageheight}
                    \type {\pagewidth}
                \NC \NR
\BC resources   \NC \type {\saveboxresource}
                    \type {\useboxresource}
                    \type {\lastsavedboxresourceindex}
                    \type {\saveimageresource}
                    \type {\useimageresource}
                    \type {\lastsavedimageresourceindex}
                    \type {\lastsavedimageresourcepages}
                \NC \NR
\BC positioning \NC \type {\savepos}
                    \type {\lastxpos}
                    \type {\lastypos}
                \NC \NR
\BC directions  \NC \type {\textdir}
                    \type {\linedir}
                    \type {\mathdir}
                    \type {\pardir}
                    \type {\pagedir}
                    \type {\bodydir}
                    \type {\pagedirection}
                    \type {\bodydirection}
                \NC \NR
\BC randomizer  \NC \type {\randomseed}
                    \type {\setrandomseed}
                    \type {\normaldeviate}
                    \type {\uniformdeviate}
                \NC \NR
\BC utilities   \NC \type {\synctex}
                \NC \NR
\BC extensions  \NC \type {\latelua}
                    \type {\lateluafunction}
                    \type {\openout}
                    \type {\write}
                    \type {\closeout}
                    \type {\openin}
                    \type {\read}
                    \type {\readline}
                    \type {\closein}
                    \type {\ifeof}
                \NC \NR
\BC control     \NC \type {\suppressfontnotfounderror}
                    \type {\suppresslongerror}
                    \type {\suppressprimitiveerror}
                    \type {\suppressmathparerror}
                    \type {\suppressifcsnameerror}
                    \type {\suppressoutererror}
                    \type {\mathoption}
                \NC \NR
\BC system      \NC \type {\primitive}
                    \type {\ifprimitive}
                    \type {\formatname}
                \NC \NR
\BC ignored     \NC \type {\long}
                    \type {\outer}
                    \type {\mag}
                \NC \NR
\stoptabulate

The math machinery has been overhauled stepwise. In the process detailed control
has been added but later some of that got removed or replaced. The engine now
assumes that \OPENTYPE\ fonts are used but you do need to set up the engine
properly, something that has to be done with respect to fonts anyway. By enabling
and|/|disabling certain features you can emulate the traditional engine. Font
parameters no longer are taken from the traditional parameters when they are not
set. We just assume properly passed so called math constants and quite a few new
ones have been added.

The resources and positioning primitives are actually useful but can be defined
as macros that (via \LUA) inject nodes in the input that suit the macro package
and backend. The three||letter direction primitives are gone and the numeric
variants are now leading. There is no need for page and body related directions
and they don't work well in \LUATEX\ anyway. We only have two directions left.
Because we can hook in \LUA\ functions that get information about what is expected
(consumer or provider) there are plenty possibilities for adding functionality
using this scripting language.

The primitive related extensions were not that useful and reliable so they have
been removed. There are some new variants that will be discussed later. The \prm
{outer} and \prm {long} prefixes are gone as they don't make much sense
nowadays and them becoming dummies opened the way to something new: control
sequence properties that permit protection against as well as controlled
overloading of definitions. I don't think that (\CONTEXT) users will notice these
prefixes being gone. The definition and parsing related \type {\suppress..}
features are now default and can't be changed so related primitives are gone.

The \prm {shipout} primitive does no ship out but just erases the content of
the box unless of course that has happened already in another way. A macro
package should implement its own backend and related shipout. Talking of backend,
the extension primitives that relate to backends can be implemented as part of a
backend design using generic whatsits. There is only one type of whatsit now. In
fact we're now closer to original \TEX\ with respect to the extensions.

The \type {img} library has been removed as it's rather bound to the backend. The
\type {slunicode} library is also gone. There are some helpers in the string
library that can be used instead and one can write additional \LUA\ code if
needed. There is no longer a \type {pdf} backend library but we have an up to
date \PDF\ parsing library on board.

In the \type {node}, \type {tex} and \type {status} library we no longer have
helpers and variables that relate to the backend. The \LUAMETATEX\ engine is in
principle \DVI\ and \PDF\ unaware. There are, as mentioned, only generic whatsit
nodes that can be used for some management related tasks. For instance you can
use them to implement user nodes. More extensive status information is provided
in the overhauled status library. All libraries have additional functionality and
names of functions have been normalized (for as far as possible).

The margin kern nodes are gone and we now use regular kern nodes for them. As a
consequence there are two extra subtypes indicating the injected left or right
kern. The glyph field served no real purpose so there was no reason for a special
kind of node.

The \KPSE\ library is no longer built|-|in, but one can use an external \KPSE\
library, assuming that it is present on the system, because the engine has a so
called optional library interface to it. Because there is no backend, quite some
file related callbacks could go away. The following file related callbacks
remained (till now):

\starttyping
find_write_file find_format_file open_data_file
\stoptyping

The callbacks related to errors are changed:

\starttyping
intercept_tex_error intercept_lua_error
show_error_message show_warning_message
\stoptyping

There is a hook that gets called when one of the fundamental memory structures
gets reallocated.

\starttyping
trace_memory
\stoptyping

When you use the overload protect mechanisms, a callback can be plugged in to handle
exceptions:

\starttyping
handle_overload
\stoptyping

The (job) management hooks are kept:

\starttyping
process_jobname
start_run stop_run wrapup_run
pre_dump
start_file stop_file
\stoptyping

Because we use a more generic whatsit model, there is a new callback:

\starttyping
show_whatsit
\stoptyping

Because tracing boxes now reports a lot more information, we have a plug in for
detail:

\starttyping
get_attribute
\stoptyping

Being the core of extensibility, the typesetting callbacks of course stayed. This
is what we ended up with:

\startalign[flushleft,nothyphenated]
\tt \cldcontext{table.concat(table.sortedkeys(callbacks.list), ", ")}
\stopalign

As in \LUATEX\ font loading happens with the following callback. This time it
really needs to be set because there is no built|-|in font loader.

\starttyping
define_font
\stoptyping

There are all kinds of subtle differences in the implementation, for instance we
no longer intercept \type {*} and \type {&} as these were already replaced long
ago in \TEX\ engines by command line options. Talking of options, only a few are
left. All input goes via \LUA, even the console. One can program a terminal if
needed.

We took our time for reaching a stable state in \LUATEX. Among the reasons is the
fact that most was experimented with in \CONTEXT, which we can adapt to the
engine as we go. It took many years to decide what to keep and how to do things.
Of course there are places when things can be improved but that most likely only
happens in \LUAMETATEX. Contrary to what is sometimes suggested, the
\LUATEX|-|\CONTEXT\ \MKIV\ combination (assuming matched versions) has been quite
stable. It made no sense otherwise. Most \CONTEXT\ functionality didn't change
much at the user level. Of course there have been issues, as is natural with
everything new and beta, but we have a fast update cycle.

The same is true for \LUAMETATEX\ and \CONTEXT\ \LMTX: it can be used for
production as usual and in practice \CONTEXT\ users tend to use the beta
releases, which proves this. Of course, if you use low level features that are
experimental you're on your own. Also, as with \LUATEX\ it might take many years
before a long term stable is defined. The good news is that, when the source code
has become part of the \CONTEXT\ distribution, there is always a properly
working, more or less long term stable, snapshot.

The error reporting subsystem has been redone quite a bit but is still
fundamentally the same. We don't really assume interactive usage but if someone
uses it, it might be noticed that it is not possible to backtrack or inject
something. Of course it is no big deal to implement all that in \LUA\ if needed.
It removes a system dependency and makes for a bit cleaner code. In \CONTEXT\ we
quit on an error simply because one has to fix source anyway and runs are fast
enough. Logging provides more detail and new primitives can be used to prevent
clutter in tracing (the more complex a macro package becomes, the more extreme
tracing becomes).

\stopsubsection

\startsubsection[title=New primitives]

There are new primitives as well as some extensions to existing primitive
functionality. These are described in following chapters but there might be
hidden treasures in the binary. If you locate them, don't automatically assume
them to stay, some might be part of experiments! There are for instance a few
csname related definers, we have integer and dimension constants, the macro
argument parser can be brought in tolerant mode, the repertoire of conditionals
has been extended, some internals can be controlled (think of normalization of
lines, hyphenation etc.), and macros can be protected against user overload. Not
all is discussed in detail in this manual but there are introductions in the
\CONTEXT\ distribution that explain them. But the \TEX\ kernel is of course
omnipresent.

\startluacode

local luametatex = tex.primitives()
local luatex     = table.load("luatex-primitives.lua")

if not luatex then
    local tex = "\\starttext \\ctxlua {table.save(tex.jobname .. '.lua',tex.primitives())} \\stoptext"

    io.savedata("luatex-primitives.tex",    tex)

    os.execute("context --luatex --once luatex-primitives")

    luatex = table.load("luatex-primitives.lua")
end


if luatex and luametatex then

    local match = string.match

    local found = { }

    local function collect(index)
        if index then
            local data = index.entries
            for i=1,#data do
                found[match(data[i].list[1][1],"\\tex%s*{(.-)}") or ""] = true
            end
         -- inspect(found)
        end
    end

    collect(structures.registers.collected and structures.registers.collected.texindex)
    collect(structures.registers.collected and structures.registers.collected.luatexindex)

    luatex     = table.tohash(luatex)
    luametatex = table.tohash(luametatex)

 -- context.page()

    context("The following primitives are available in \\LUATEX\\ but not in \\LUAMETATEX.  ")
    context("Some of these are emulated in \\CONTEXT.")

    context.blank()
    context.startcolumns { n = 2 }
        for k, v in table.sortedhash(luatex) do
            if not luametatex[k] then
                if not found[k] then
                    context.dontleavehmode()
                end
                context.type(k)
                context.crlf()
            end
        end
    context.stopcolumns()
    context.blank()

 -- context.page()

    context("The following primitives are available in \\LUAMETATEX\\ only. In the meantime ")
    context("the \\LUAMETATEX\\ code base is so different from \\LUATEX\\ that backporting ")
    context("is no longer reasonable.")

    context.blank()
    context.startcolumns { n = 2 }
        for k, v in table.sortedhash(luametatex) do
            if not luatex[k] then
                if not found[k] then
                    context.dontleavehmode()
                    context.llap("\\infofont[todo] ")
                end
                context.type(k)
                context.crlf()
            end
        end
    context.stopcolumns()
    context.blank()

end

\stopluacode

\stopsubsection

\startsubsection[title=Changed function names]

As part of a bit more consistency some function names also changed. Names with an
\type {_} got that removed (as that was the minority). It's easy to provide a
back mapping if needed (just alias the functions).

{\em Todo: only mention the \LUATEX\ ones.}

\starttabulate[|l|l|l|l|]
\DB library  \BC old name          \BC new name         \BC comment \NC \NR
\TB
\NC language \NC clear_patterns    \NC clearpatterns    \NC \NR
\NC          \NC clear_hyphenation \NC clearhyphenation \NC \NR
\NC mplib    \NC italcor           \NC italic           \NC \NR
\NC          \NC pen_info          \NC peninfo          \NC \NR
\NC          \NC solve_path        \NC solvepath        \NC \NR
\NC texio    \NC write_nl          \NC writenl          \NC old name stays \NC \NR
\NC node     \NC protect_glyph     \NC protectglyph     \NC \NR
\NC          \NC protect_glyphs    \NC protectglyphs    \NC \NR
\NC          \NC unprotect_glyph   \NC unprotectglyph   \NC \NR
\NC          \NC unprotect_glyphs  \NC unprotectglyphs  \NC \NR
\NC          \NC end_of_math       \NC endofmath        \NC \NR
\NC          \NC mlist_to_hlist    \NC mlisttohlist     \NC \NR
\NC          \NC effective_glue    \NC effectiveglue    \NC \NR
\NC          \NC has_glyph         \NC hasglyph         \NC \NR
\NC          \NC first_glyph       \NC firstglyph       \NC \NR
\NC          \NC has_field         \NC hasfield         \NC \NR
\NC          \NC copy_list         \NC copylist         \NC \NR
\NC          \NC flush_node        \NC flushnode        \NC \NR
\NC          \NC flush_list        \NC flushlist        \NC \NR
\NC          \NC insert_before     \NC insertbefore     \NC \NR
\NC          \NC insert_after      \NC insertafter      \NC \NR
\NC          \NC last_node         \NC lastnode         \NC \NR
\NC          \NC is_zero_glue      \NC iszeroglue       \NC \NR
\NC          \NC make_extensible   \NC makeextensible   \NC \NR
\NC          \NC uses_font         \NC usesfont         \NC \NR
\NC          \NC is_char           \NC ischar           \NC \NR
\NC          \NC is_direct         \NC isdirect         \NC \NR
\NC          \NC is_glyph          \NC isglyph          \NC \NR
\NC          \NC is_node           \NC isnode           \NC \NR
\NC token    \NC scan_keyword      \NC scankeyword      \NC \NR
\NC          \NC scan_keywordcs    \NC scankeywordcs    \NC \NR
\NC          \NC scan_int          \NC scanint          \NC \NR
\NC          \NC scan_real         \NC scanreal         \NC \NR
\NC          \NC scan_float        \NC scanfloat        \NC \NR
\NC          \NC scan_dimen        \NC scandimen        \NC \NR
\NC          \NC scan_glue         \NC scanglue         \NC \NR
\NC          \NC scan_toks         \NC scantoks         \NC \NR
\NC          \NC scan_code         \NC scancode         \NC \NR
\NC          \NC scan_string       \NC scanstring       \NC \NR
\NC          \NC scan_argument     \NC scanargument     \NC \NR
\NC          \NC scan_word         \NC scanword         \NC \NR
\NC          \NC scan_csname       \NC scancsname       \NC \NR
\NC          \NC scan_list         \NC scanlist         \NC \NR
\NC          \NC scan_box          \NC scanbox          \NC \NR
\LL
\stoptabulate

It's all part of trying to make the code base consistent but it is sometimes a
bit annoying. However, that's why we develop this engine independent of the
\LUATEX\ code base. It's anyway a change that has been on my todo list for quite
a while because those inconsistencies annoyed me. It might take some years to
get all done.

\stopsubsection

\stopsection

\stopchapter

\stopcomponent