From 9191d12efe40ce045f76b695fc5c02fa6a1a7d6a Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Fri, 9 Apr 2021 20:43:45 +0200 Subject: 2021-04-09 19:56:00 --- .../documents/general/manuals/luametatex.pdf | Bin 1385220 -> 1387741 bytes doc/context/scripts/mkiv/mtx-fonts.html | 2 + doc/context/scripts/mkiv/mtx-fonts.xml | 4 + .../general/manuals/languages/languages-mkiv.tex | 1 + .../manuals/languages/languages-options.tex | 424 +++++++++++++++++++++ .../manuals/luametatex/luametatex-callbacks.tex | 6 +- .../manuals/luametatex/luametatex-languages.tex | 13 + .../manuals/luametatex/luametatex-registers.tex | 6 +- .../general/manuals/luametatex/luametatex.tex | 2 + 9 files changed, 451 insertions(+), 7 deletions(-) create mode 100644 doc/context/sources/general/manuals/languages/languages-options.tex (limited to 'doc') diff --git a/doc/context/documents/general/manuals/luametatex.pdf b/doc/context/documents/general/manuals/luametatex.pdf index 5c2e3aea1..99b706c84 100644 Binary files a/doc/context/documents/general/manuals/luametatex.pdf and b/doc/context/documents/general/manuals/luametatex.pdf differ diff --git a/doc/context/scripts/mkiv/mtx-fonts.html b/doc/context/scripts/mkiv/mtx-fonts.html index b175139c8..f3b7cfbf5 100644 --- a/doc/context/scripts/mkiv/mtx-fonts.html +++ b/doc/context/scripts/mkiv/mtx-fonts.html @@ -61,6 +61,8 @@

Examples

mtxrun --script font --list somename (== --pattern=*somename*) +

mtxrun --script font --list --file filename +
mtxrun --script font --list --name --pattern=*somefile*

mtxrun --script font --list --name somename
mtxrun --script font --list --name --pattern=*somename*

mtxrun --script font --list --spec somename diff --git a/doc/context/scripts/mkiv/mtx-fonts.xml b/doc/context/scripts/mkiv/mtx-fonts.xml index d001f4f9a..acb595a25 100644 --- a/doc/context/scripts/mkiv/mtx-fonts.xml +++ b/doc/context/scripts/mkiv/mtx-fonts.xml @@ -38,6 +38,10 @@ mtxrun --script font --list somename (== --pattern=*somename*) + + mtxrun --script font --list --file filename + mtxrun --script font --list --name --pattern=*somefile* + mtxrun --script font --list --name somename mtxrun --script font --list --name --pattern=*somename* diff --git a/doc/context/sources/general/manuals/languages/languages-mkiv.tex b/doc/context/sources/general/manuals/languages/languages-mkiv.tex index a65c6d532..4bfaa9c0b 100644 --- a/doc/context/sources/general/manuals/languages/languages-mkiv.tex +++ b/doc/context/sources/general/manuals/languages/languages-mkiv.tex @@ -56,6 +56,7 @@ \component languages-typesetting \component languages-goodies \component languages-sorting + \component languages-options \stopbodymatter \startappendices diff --git a/doc/context/sources/general/manuals/languages/languages-options.tex b/doc/context/sources/general/manuals/languages/languages-options.tex new file mode 100644 index 000000000..e2e5a61c3 --- /dev/null +++ b/doc/context/sources/general/manuals/languages/languages-options.tex @@ -0,0 +1,424 @@ +% language=uk + +\startcomponent languages-options + +\environment languages-environment + +\startchapter[title=Options][color=darkblue] + +\startsection[title=Introduction] + +Hyphenation of words is controlled by so called patterns. They take a word and +try to match parts with a pattern that describes where a hyphen can be injected. +Preferred and discouraged injection points accumulate to a score that in the end +determine where so called discretionary nodes gets injected in the list of +glyphs that make a word. The patterns are language specific. + +This mechanism is agnostic when it comes to the characters involved: they are +just numbers. However, when in a next step font features like ligature building +and kerning are applied we also have to deal with language specific properties +(and meanings). Often a ligature at the boundary of a composed word can make +reading confusing and has to be avoided. Some of that can be controlled by the +font when it implements language specific features but because that approach is +not based on a dictionary it is more about playing safe and prevention than about +quality. + +In the next sections a mechanism is discussed that also uses patterns. This time +it is about controlling fonts as well as how hyphenation patterns are applied. +This process kicks in before hyphenation is applied but it definitely has to be +seen as part of that same process. It is integrated in hyphenation machinery and +acts as preprocessor with the possibility to feedback and move forward. The +implementation is such that when it's not used there is no performance penalty. +\footnote {There are by now plenty of alternative approaches to these problems +but after some discussion about the pro's and cons of each this new mechanism was +made. I admit that the fun factor played a role. It is also one of the things we +can do in \LUAMETATEX\ without worrying about a possible negative impact on +\LUATEX\ users other than \CONTEXT .} + +There are several predefined operations that are characterized by keywords and +shortcuts and collected in an option list that is part of a language goodie file. +Examples can be found in the distribution in files with the suffix \type {llg} +(\LUA\ language goodie). The framework of such a file is: + +\starttyping +return { + name = "whatever", + version = "1.00", + comment = "Goodies for experiments and demo.", + author = "Hans Hagen", + copyright = "ConTeXt development team", + options = { + { ... }, + ........ + { ... }, + } +} +\stoptyping + +These options will eventually result in patterns that are bound to words, +think of: + +\starttabulate[|T||||] +\NC effe \NC \type {foo|bar} \NC \type {..|..} \NC inhibit ligature \NC \NR +\NC foobar \NC \type {foo=bar} \NC \type {...=...} \NC inhibit kerning \NC \NR +\NC somemore \NC \type {some+more} \NC \type {....+....} \NC compound word \NC \NR +\stoptabulate + +The whole repertoire is: + +\starttabulate[||T|] +\NC \type {a|b} \NC a:norightligature, b:noleftligature \NC \NR +\NC \type {a=b} \NC a:norightkern, b:noleftkern \NC \NR +\NC \type {ab} \NC a:norightkern \NC \NR +\NC \type {a+b} \NC a:compound:b \NC \NR +\stoptabulate + +Later we will see how some can be combined. An option can be defined using entries +in a subtable: + +\starttabulate[|T|||] +\NC patterns \NC hash \NC \type {[snippet] = "replacement pattern"} \NC \NR +\NC words \NC string \NC string of words, separated by whitespace \NC \NR +\NC prefixes \NC string \NC snippets that combine with words (at the start) \NC \NR +\NC suffixes \NC string \NC snippets that combine with words (at the end) \NC \NR +\NC matches \NC array or number \NC a number or table indicating which match matters \NC \NR +\NC actions \NC hash \NC \type {[character] = "action(s)"} \NC \NR +\NC characters \NC string \NC permitted characters (additional hjcodes) \NC \NR +\NC return \NC integer \NC what to do next \NC \NR +\stoptabulate + +The default return value is~2 but there are some more: + +\starttabulate[|T||] +\NC 0 \NC go to the next (valid) word \NC \NR +\NC 1 \NC restart \NC \NR +\NC 2 \NC exceptions and after that patterns \NC \NR +\NC 3 \NC patterns \NC \NR +\stoptabulate + +There are some safeguards built in that force a restart. For instance when a word +is replaced a restart is enforces unless we skip the word. A restart will not +permit a second replacement (after all we need to avoid endless loops). + +In a multi|-|line word list, lines that start with a comment trigger: \LUA's +double dash or the usual \TEX\ percent sign. + +\stopsection + +\startsection[title=Inhibiting] + +The next definition replaces \type {ff} by \type {f|f} in the words given and +eventually block a ligature. + +\starttyping +{ + patterns = { + ff = "f|f", + }, + words = [[ + effe + ]], +} +\stoptyping + +Some fonts provide the \type {ij} ligature or do some special kerning between +these characters (something Dutch). Because it depends on the font logic if a +dedicated replacement or kerning is used this is an example where we do this: + +\starttyping +{ + patterns = { + ij = "i|j", + }, + actions = { + ["|"] = "nokern noligature", + }, + words = [[ + ijverig + -- fijn -- to ligature fi or ij, that's the question + ]], +} +\stoptyping + +A more extensive definition is the following. Here we explicitly define that only +the first match in a word get treated. Here we not only block ligatures but also +kerns. + +\starttyping +{ + patterns = { + ff = "f|f", + }, + matches = { 1 }, + actions = { + ["|"] = "noligature nokern" + }, + words = [[ + effe + effeffe + ]], +} +\stoptyping + +You can also omit the pattern when you inject specifiers yourself: + +\starttyping +{ + actions = { + ["|"] = "noligature nokern" + }, + words = [[ + ef|fe + ef|fef|fe + ]], +} +\stoptyping + +You can also use different shortcuts: + +\starttyping +{ + actions = { + ["1"] = "noligature" + ["2"] = "nokern" + }, + words = [[ + ef1fe + ef1fef2fe + ]], +} +\stoptyping + +Although I cannot come up with a nice example, there can be reasons for +inhibiting kerns. Here we inhibit kerns left of the upcoming character: + +\starttyping +{ + patterns = { + fo = "f eof = % function () % \stopfunctioncall -% -% \stopsection + +\stopsection \startsection[title={Data processing callbacks}][library=callback] @@ -719,7 +719,7 @@ font structure. Setting this callback to \type {false} is pointless as it will prevent font loading completely but will nevertheless generate errors. -\subsection{\cbk {show+whatsit}} +\subsection{\cbk {show_whatsit}} \topicindex{callbacks+whatsits} diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-languages.tex b/doc/context/sources/general/manuals/luametatex/luametatex-languages.tex index 77c2d93d8..4681f6bea 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-languages.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-languages.tex @@ -337,6 +337,7 @@ examples. \topicindex {main loop} \topicindex {hyphenation} +\topicindex {hyphenation+tracing} In \LUATEX's main loop, almost all input characters that are to be typeset are converted into \nod {glyph} node records with subtype \quote {character}, but @@ -447,6 +448,18 @@ The usage of these penalties is controlled by the \lpr {hyphenationmode} flags \number\explicitpenaltyhyphenationmodecode\space and when these are not set \prm {exhyphenpenalty} is used. +You can use the \lpr {tracinghyphenation} variable to get a bit more information +about what happens. + +\starttabulate[|lT|l|] +\DB value \BC effect \NC\NR +\TB +\NC 1 \NC report redundant pattern (happens by default in \LUATEX) \NC\NR +\NC 2 \NC report words that reach the hyphenator and got treated \NC\NR +\NC 3 \NC show the result of a hyphenated word (a node list) \NC\NR +\LL +\stoptabulate + \stopsection \startsection[title={Loading patterns and exceptions},reference=patternsexceptions] diff --git a/doc/context/sources/general/manuals/luametatex/luametatex-registers.tex b/doc/context/sources/general/manuals/luametatex/luametatex-registers.tex index f230a4500..6d33ed3f4 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex-registers.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex-registers.tex @@ -12,8 +12,7 @@ This register contains the primitives that are mentioned in the manual. There are of course many more primitives. The \LUATEX\ primitives are typeset in - bold. The primitives from \PDFTEX\ are not supported that way but mentioned - anyway. + bold. \placeregister[primitiveindex][indicator=no] @@ -28,8 +27,7 @@ \startchapter[title=Nodes] This register contains the nodes that are known to \LUATEX. The primary nodes - are in bold, whatsits that are determined by their subtype are normal. The - names prefixed by \type {pdf_} are backend specific. + are in bold, whatsits that are determined by their subtype are normal. \placeregister[nodeindex] diff --git a/doc/context/sources/general/manuals/luametatex/luametatex.tex b/doc/context/sources/general/manuals/luametatex/luametatex.tex index 1327ea3a1..a46e595ca 100644 --- a/doc/context/sources/general/manuals/luametatex/luametatex.tex +++ b/doc/context/sources/general/manuals/luametatex/luametatex.tex @@ -78,6 +78,8 @@ % 290 pages, 10.8 sec, 292M lua, 99M tex, 158 instances % 290 pages, 9.5 sec, 149M lua, 35M tex, 30 instances +% with mimalloc and msvc we get a better native performance than crosscompiled + \enableexperiments[fonts.compact] % \enabledirectives[fonts.injections.method=advance] % tricky ... not all xoffsets are advance robust -- cgit v1.2.3