summaryrefslogtreecommitdiff
path: root/doc/context/sources/general/manuals/languages
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2016-02-24 12:15:08 +0100
committerContext Git Mirror Bot <phg42.2a@gmail.com>2016-02-24 12:15:08 +0100
commit33e29b1ba0661b0f58605749528362a0e12eae52 (patch)
treed9c1ceebec1f4869ffa0bdeafbeef390cb916525 /doc/context/sources/general/manuals/languages
parenta3170089e0016cfc0489c433d9113d94b9ce3d67 (diff)
downloadcontext-33e29b1ba0661b0f58605749528362a0e12eae52.tar.gz
2016-02-24 11:22:00
Diffstat (limited to 'doc/context/sources/general/manuals/languages')
-rw-r--r--doc/context/sources/general/manuals/languages/languages-appendix.tex48
-rw-r--r--doc/context/sources/general/manuals/languages/languages-back.tex14
-rw-r--r--doc/context/sources/general/manuals/languages/languages-basics.tex348
-rw-r--r--doc/context/sources/general/manuals/languages/languages-contents.tex13
-rw-r--r--doc/context/sources/general/manuals/languages/languages-cover.tex133
-rw-r--r--doc/context/sources/general/manuals/languages/languages-environment.tex347
-rw-r--r--doc/context/sources/general/manuals/languages/languages-goodies.tex138
-rw-r--r--doc/context/sources/general/manuals/languages/languages-hyphenation.tex810
-rw-r--r--doc/context/sources/general/manuals/languages/languages-introduction.tex69
-rw-r--r--doc/context/sources/general/manuals/languages/languages-labels.tex141
-rw-r--r--doc/context/sources/general/manuals/languages/languages-mkiv.tex38
-rw-r--r--doc/context/sources/general/manuals/languages/languages-numbering.tex295
-rw-r--r--doc/context/sources/general/manuals/languages/languages-sorting.tex235
13 files changed, 2629 insertions, 0 deletions
diff --git a/doc/context/sources/general/manuals/languages/languages-appendix.tex b/doc/context/sources/general/manuals/languages/languages-appendix.tex
new file mode 100644
index 000000000..d8f33e44a
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-appendix.tex
@@ -0,0 +1,48 @@
+% language=uk
+
+\startcomponent languages-appendix
+
+\environment languages-environment
+
+\startchapter[title=Appendix][color=darkgray]
+
+\startsection[title=The language files]
+
+ Todo.
+
+\stopsection
+
+\startsection[title=The \type {mtx-patterns} script]
+
+ Todo.
+
+\stopsection
+
+\startsection[title=Installed sorters]
+
+\startbuffer
+\usemodule[s-languages-sorting]
+
+\showinstalledsorting
+\stopbuffer
+
+\typebuffer \blank[2*line] \getbuffer
+
+\stopsection
+
+\startsection[title=Verbose counters]
+
+\startbuffer
+\usemodule[s-languages-counters]
+
+\showverbosecounters[language={en,es}]
+\stopbuffer
+
+\typebuffer \blank[2*line] \getbuffer
+
+\stopsection
+
+\stopchapter
+
+\stopcomponent
+
diff --git a/doc/context/sources/general/manuals/languages/languages-back.tex b/doc/context/sources/general/manuals/languages/languages-back.tex
new file mode 100644
index 000000000..aadcc3aa4
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-back.tex
@@ -0,0 +1,14 @@
+% \doifmodeelse {simple} {
+%
+% \page
+% \page[empty,left]
+%
+% \startMPpage
+% DrawCoverPage("back") ;
+% \stopMPpage
+%
+% } {
+%
+% % not needed as it's part of the cover page
+%
+% }
diff --git a/doc/context/sources/general/manuals/languages/languages-basics.tex b/doc/context/sources/general/manuals/languages/languages-basics.tex
new file mode 100644
index 000000000..39ce840f3
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-basics.tex
@@ -0,0 +1,348 @@
+% language=uk
+
+\environment languages-environment
+
+\startcomponent languages-basics
+
+\startchapter[title=Some basics][color=darkyellow]
+
+\startsection[title={Introduction}]
+
+In this chapter we will see how we can toggle between languages. A first
+introduction to patterns will be given. Some details of how to control the
+hyphenation with specific patterns will be given in a later chapter.
+
+\stopsection
+
+\startsection[title={Available languages}]
+
+When you use the English version of \CONTEXT\ you will default to US English as
+main language. This means that hyphenation will be US specific, which by the way
+is different from the rules in GB. All labels that are generated by the system
+are also in English. Languages can often be accessed by names like \type
+{english} or \type {dutch} although it is quite common to use the short tags like
+\type {en} and \type {nl}. Because we want to be as compatible as possible with
+\MKII, there are quite some synonyms. The following table lists the languages that
+for which support is built|-|in.\footnote {More languages can be defined. It is
+up to users to provide the information.}
+
+\startbuffer
+\usemodule[languages-system]
+
+\loadinstalledlanguages
+\showinstalledlanguages
+\stopbuffer
+
+\getbuffer
+
+\noindentation You can call up such a table with the following commands:
+
+\typebuffer
+
+\noindentation Instead you can run \type {context --global languages-system.mkiv}.
+
+As you can see, many languages have hyphenation patterns but for Japanese,
+Korean, Chinese as well as Arabic languages they make no sense. The patterns are
+loaded on demand. The number is the internal number that is used in the engine; a
+user never has to use that number. Numbers $<1$ are used to disable hyphenation.
+The file tag is used to locate and load a specification. Such files have names
+like type {lang-nl.lua}.
+
+Some languages share the same hyphenation patterns but can have demands that
+differ, like labels or quotes. The characters shown in the table are those found
+in the pattern files. The number of patterns differs a lot between languages.
+This relates to the systematic behind them. Some languages use word stems, others
+base their hyphenation on syllables. Some language have inflections which adds to
+the complexity while others can combine words in ways that demand special care
+for word boundaries. Of course a low or high number can signal a low quality as
+well, but most pattern collections are assembled over many years and updated when
+for instance spelling rules change. I think that we can safely say that most patterns
+are quite stable and of good quality.
+
+\stopsection
+
+\startsection[title=Switching]
+
+The document language is set with
+
+\starttyping
+\mainlanguage[en]
+\stoptyping
+
+but when you want to apply the proper hyphenation rules to an embedded language
+you can use:
+
+\starttyping
+\language[en]
+\stoptyping
+
+or just:
+
+\starttyping
+\en
+\stoptyping
+
+The main language determines what labels show up, how numbering happens, in what
+way dates get formatted, etc. Normally the \typ {\mainlanguage} command comes
+before the \typ {\starttext} command.
+
+\stopsection
+
+\startsection[title=Hyphenation]
+
+In \LUATEX\ each character that gets typeset not only carries a font id and character
+code, but also a language number. You can switch language whenever you want and
+the change will be carried with the characters. Switching within a word doesn't make
+sense but it is permitted:
+
+\starttabulate[|||T|]
+\NC 1 \NC \type{\de incrediblykompliziert} \NC \hyphenatedword{\de incrediblykompliziert} \NC \NR
+\NC 2 \NC \type{\en incrediblykompliziert} \NC \hyphenatedword{\en incrediblykompliziert} \NC \NR
+\NC 3 \NC \type{\en incredibly\de kompliziert} \NC \hyphenatedword{\en incredibly\de kompliziert} \NC \NR
+\NC 4 \NC \type{\en incredibly\de\-kompliziert} \NC \hyphenatedword{\en incredibly\de\-kompliziert} \NC \NR
+\NC 5 \NC \type{\en incredibly\de-kompliziert} \NC \hyphenatedword{\en incredibly\de-kompliziert} \NC \NR
+\stoptabulate
+
+In the line 4 we have a \type {\-} between the two words, and in the last
+line just a \type {-}. If you look closely you will notice that the snippets
+can be quite small. If we typeset a word with a 1mm text width we get this:
+
+\blank \noindentation \start \en \hsize 1mm incredibly \par \stop \blank
+
+If you are familiar with the details of hyphenation, you know that the number of
+characters at the end and beginning of a word is controlled by the two variables
+\typ {\lefthyphenmin} and \typ {\righthyphenmin}. However, these only influence
+the hyphenation process. What bits and pieces eventually end up on a line is
+determined by the par builder and there the \type {\hsize} matters. In practice
+you will not run into these situations, unless you have extreme long words and a
+narrow column.
+
+Hyphenation normally is limited to regular characters that make up the alphabet of
+a language. It is insensitive for capitalization as the following text shows:
+
+\blank
+
+\startnarrower \noindentation
+\hyphenatedword {This time the musical distraction while developing code came
+from watching youtube performances of Cory Henry (also known from Snarky Puppy,
+a conglomerate of excellent players). Just search the web for his name with \quote
+{Stevie Wonder and Michael Jackson Tribute}. There is no keyboard he can't play.
+Another interesting keyboard player is Sun Rai (a short name for Rai
+Thistlethwayte, just google for \quote {The Beatles, Come Together, Live Piano
+Acoustic with Loop Pedal}, or do a combined search with \quote {Matt
+Chamberlain}. Okay, and talking of keyboards, let's not forget Vika Yermolyeva
+(vkgoeswild) as she's one of a kind too on the web. And then there is Jacob
+Collier, in one word: incredible (or hyphenated the Dutch way {\nl incredible},
+let me repeat that in French {\fr incredible}).} \footnote {Get me right, there
+are of course many more fantastic musicians.}
+\stopnarrower
+
+\blank
+
+\noindentation Of course, names are often short and don't need to be hyphenated
+(or the left and right settings prohibit it). Another complication with names is
+that they can come from another language so we either need to switch language
+temporarily or we need to add an exception (more about that later).
+
+\stopsection
+
+\startsection[title=Primitives]
+
+In traditional \TEX\ the language is not a property of a character but is
+triggered by a signal in the (so called) list. Think of:
+
+\starttyping
+<language 1>this is <language 2>nederlands<language 1> mixed with english
+\stoptyping
+
+This number is set by the primitive \typ {\language}. Language triggers are
+injected into the list depending on the value of this number. There is also a \typ
+{\setlanguage} primitive that can inject triggers without setting the \typ
+{\language} number. Because in \LUATEX\ the state is kept with the character
+you don't need to worry about the subtle differences here.
+
+In \CONTEXT\ the \typ {\language} and \typ {\setlanguage} commands are overloaded
+by a more advanced switch macro. You cannot assume that they work as explained in
+general manuals about \TEX. Currently you can still assign a number but that
+might change. Just consider the language to be an abstraction and don't mess with
+this number. Both commands not only change the current language but also do
+specific initializations when needed.
+
+What characters get involved in hyhenation is historically determines by the so
+called \type {\lccode} values. Each character can have such a value which maps
+an uppercase to a lowercase character. This concept has been extended in \ETEX\
+where it binds to a pattern set (language). However, in \CONTEXT\ the user never
+has to worry about such details.
+
+% The \type {\patterns} primitive is
+% The \type {\hyphenation} primitive is
+
+In traditional hyphenation there will not be hyphenated if the sum of \typ
+{\lefthyphenmin} and \typ {\righthyphenmin} exceeds 62. This limitation is not
+present in the to be presented \LUA\ variant of this routine as there is no
+good reason for this limitation other than implementation constraints.
+
+\stopsection
+
+\startsection[title=Control]
+
+We already mentioned \typ {\lefthyphenmin} and \typ {\righthyphenmin}. These
+two variables control the area in a word that is subjected to hyphenation.
+Setting these values is a matter of taste but making them too small can result in
+bad hyphenation when the patterns are made with the assumptions that certain
+minima are used. Using a \typ {\lefthyphenmin} of 2 while the patterns are made
+with a value of 3 in mind is a bad idea.
+
+\startlinecorrection[blank]
+\startluacode
+context.bTABLE { option = "stretch", align= "middle" }
+ context.bTR()
+ context.bTD { ny = 2, align = "middle,lohi", style = "monobold" }
+ context.verbatim("\\lefthyphenmin")
+ context.eTD()
+ context.bTD { nx = 5, style = "monobold" }
+ context.verbatim("\\righthyphenmin")
+ context.eTD()
+ context.eTR()
+ context.bTR()
+ for right=1,5 do
+ context.bTD()
+ context.mono(right)
+ context.eTD()
+ end
+ context.eTR()
+ for left=1,5 do
+ context.bTR()
+ context.bTD()
+ context.mono(left)
+ context.eTD()
+ for right=1,5 do
+ context.bTD()
+ context("\\lefthyphenmin %s \\righthyphenmin %s \\hyphenatedword{interesting}",left,right)
+ context.eTD()
+ end
+ context.eTR()
+ end
+context.eTABLE()
+\stopluacode
+\stoplinecorrection
+
+When \TEX\ breaks a paragraph into lines it will try do so without hyphenation.
+When that fails (read: when the badness becomes too high) a next effort will take
+hyphenation into account. \footnote {Because in \LUATEX\ we always hyphenate
+there is no real gain in trying not to hyphenate. Because in traditional \TEX\
+hyphenation happens on the fly a pass without hyphenating makes more sense.} When
+the badness is still too high, an optional emergency pass can be made but only
+when the tolerances are set to permit this. In \CONTEXT\ you can try these
+settings when you get too many over- or underfull boxes reported on the console.
+
+\starttyping
+\setupalign[tolerant]
+\setupalign[verytolerant]
+\setupalign[verytolerant,stretch]
+\stoptyping
+
+Personally I tend to use the last setting, especially in automated flows. After
+all, \TEX\ will not apply stretch unless it's really needed.
+
+The two \typ {\*hyphenmin} parameters can be set any time and the current value
+is stored with each character. They can also be set with the language which we
+will see later.
+
+When \TEX\ hyphenates words it has to decide where a word starts and ends. In
+traditional \TEX\ the words starts normally at a character that falls within the
+scope of the hyphenator. It ends at when a box (hlist or vlist) is seen, but also
+at a rule, discretionary, accent (forget about this in \CONTEXT) or math. An
+example will be given in the chapter that discussed the \LUA\ alternative.
+
+\stopsection
+
+\startsection[title=Installing]
+
+ todo
+
+\stopsection
+
+\startsection[title=Modes]
+
+Languages are one of the mechanisms where you can access the current state. There are
+for instance two (official) macros that contain the current (main) language:
+
+\startbuffer
+\starttabulate[||Tc|]
+\HL
+\NC \bf macro \NC \bf value \NC \NR
+\HL
+\NC \type {\currentmainlanguage} \NC \currentmainlanguage \NC \NR
+\NC \type {\currentlanguage} \NC \currentlanguage \NC \NR
+\HL
+\stoptabulate
+\stopbuffer
+
+\getbuffer
+
+When we have set \type {\language[nl]} we get this:
+
+\start \nl \getbuffer \stop
+
+If you write a style that needs to adapt to a language you can use modes. There
+are several ways to do this:
+
+\startbuffer
+\language[nl]
+
+\startmode[**en]
+ \color[darkred]{main english}
+\stopmode
+
+\startmode[*en]
+ \color[darkred]{local english}
+\stopmode
+
+\startmode[**nl]
+ \color[darkblue]{main dutch}
+\stopmode
+
+\startmode[*nl]
+ \color[darkblue]{local dutch}
+\stopmode
+
+\startmodeset
+ [*en] {\color[darkgreen]{english set}}
+ [*nl] {\color[darkgreen]{dutch set}}
+\stopmodeset
+\stopbuffer
+
+\typebuffer
+
+\noindentation This typesets:
+
+\blank \start \setupindenting[no] \getbuffer \stop \blank
+
+When you use setups you can use the following trick:
+
+\startbuffer
+\language[nl]
+
+\startsetups language:en
+ \color[darkorange]{something english}
+\stopsetups
+
+\startsetups language:nl
+ \color[darkorange]{something dutch}
+\stopsetups
+
+\setups[language:\currentlanguage]
+\stopbuffer
+
+\typebuffer
+
+\noindentation As expected we get:
+
+\blank \start \setupindenting[no] \getbuffer \stop \blank
+
+\stopsection
+
+\stopchapter
+
+\stopcomponent
diff --git a/doc/context/sources/general/manuals/languages/languages-contents.tex b/doc/context/sources/general/manuals/languages/languages-contents.tex
new file mode 100644
index 000000000..10fadb56b
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-contents.tex
@@ -0,0 +1,13 @@
+% language=uk
+
+\startcomponent languages-contents
+
+\environment languages-environment
+
+\starttitle[title=Contents][color=darkgray]
+
+\placelist[chapter][criterium=text]
+
+\stoptitle
+
+\stopcomponent
diff --git a/doc/context/sources/general/manuals/languages/languages-cover.tex b/doc/context/sources/general/manuals/languages/languages-cover.tex
new file mode 100644
index 000000000..75a033b5f
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-cover.tex
@@ -0,0 +1,133 @@
+% language=uk
+
+\startcomponent languages-cover
+
+\environment languages-environment
+
+\startbuffer[abstract]
+
+This book explains how we support languages (and
+scripts) in \CONTEXT\ \MKIV\ and \LUATEX. Some of
+the mechanisms discussed are generic and not
+\CONTEXT\ specific. We discuss the way languages are
+dealt with in the engine, hyphenation, standard
+features and additional goodies. Tracing and the
+extensibility of code are also discussed.
+
+\stopbuffer
+
+\startsetups document:abstract
+ \framed [
+ foregroundstyle=bold,
+ foregroundcolor=white,
+ width=7.5cm,
+ align={normal,tolerant},
+ frame=off,
+ strut=no,
+ ] {
+ \getbuffer[abstract]
+ }
+\stopsetups
+
+\startMPextensions
+
+ def DrawCoverPage (expr what) =
+
+ begingroup ;
+
+ save SpineWidth ; numeric SpineWidth ; SpineWidth := 8mm ;
+ save PaperBleed ; numeric PaperBleed ; PaperBleed := 2mm ; % todo
+
+ StartCover ;
+
+ save width, size, anchor ;
+
+ numeric width, size ; pair anchor, offset ;
+
+ color ColorVariant[] ;
+
+ % ColorVariant[1] := \MPcolor{darkred} ;
+ % ColorVariant[2] := \MPcolor{darkgreen} ;
+ % ColorVariant[3] := \MPcolor{darkblue} ;
+ % ColorVariant[4] := \MPcolor{darkyellow} ;
+
+ ColorVariant[1] := \MPcolor{darkmagenta} ;
+ ColorVariant[2] := \MPcolor{darkorange} ;
+ ColorVariant[3] := \MPcolor{darkyellow} ;
+ ColorVariant[4] := \MPcolor{darkcyan} ;
+
+ fill CoverPage enlarged PaperBleed withcolor \MPcolor{darkgray} ;
+
+ fill Spine bottomenlarged -.5CoverHeight withcolor ColorVariant[3] ;
+ fill Spine topenlarged -.5CoverHeight withcolor ColorVariant[2] ;
+
+ width := FrontPageWidth ;
+ height := FrontPageHeight ;
+ size := 4 * width / 3 ;
+ offset := (-1cm,1cm) ;
+ anchor := .5[lrcorner CoverPage,urcorner CoverPage] ;
+
+ fill anchored.llft(lltriangle scaled size, urcorner FrontPage) withcolor ColorVariant[1] ;
+ fill anchored.urt (urtriangle scaled size, llcorner FrontPage) withcolor ColorVariant[2] ;
+ fill anchored.lrt (lrtriangle scaled size, ulcorner FrontPage) withcolor ColorVariant[3] ;
+ fill anchored.ulft(ultriangle scaled size, lrcorner FrontPage) withcolor ColorVariant[4] ;
+
+ fill anchored.lrt (lrtriangle scaled size, ulcorner BackPage) withcolor ColorVariant[1] ;
+ fill anchored.llft(lltriangle scaled size, urcorner BackPage) withcolor ColorVariant[3] ;
+ fill anchored.ulft(ultriangle scaled size, lrcorner BackPage) withcolor ColorVariant[2] ;
+ fill anchored.urt (urtriangle scaled size, llcorner BackPage) withcolor ColorVariant[4] ;
+
+ draw thetextext.llft(textext("\bf Languages in \ConTeXt\hskip-.1em") rotated 45 ysized .350height, anchor shifted (6*offset+offset)) withcolor white ;
+ draw thetextext.llft(textext("\bf explaining luatex and mkiv") rotated 45 ysized .275height, anchor shifted (5*offset+offset)) withcolor white ;
+ draw thetextext.llft(textext("\bf Hans Hagen") rotated 45 ysized .200height, anchor shifted (2*offset+offset)) withcolor white ;
+ draw thetextext.llft(textext("\bf PRAGMA ADE") rotated 45 ysized .200height, anchor shifted (1*offset+offset)) withcolor white ;
+
+ % for the moment
+
+ draw thetextext.top(textext("\bf work in progress") xsized 4cm, lrcorner Page shifted (-3cm,1cm)) withcolor white ;
+
+ % till here
+
+ width := BackPageWidth ;
+
+ draw thetextext(textext("\bf\setups[document:abstract]") xsized .65width rotated 45 , center BackPage) withcolor white ;
+
+ anchor := .5[ulcorner Spine,urcorner Spine] shifted (0,-1cm);
+
+ draw thetextext.bot(textext("\bf Languages in \ConTeXt\hskip-.1em") rotated 90 xsized .8SpineWidth, anchor) withcolor white ;
+
+ anchor := .5[llcorner Spine,lrcorner Spine] shifted (0,1cm);
+
+ draw thetextext.top(textext("\bf Hans Hagen") rotated 90 xsized .8SpineWidth, anchor) withcolor white ;
+
+ StopCover ;
+
+ if what = "front" :
+ clip currentpicture to FrontPage ;
+ elseif what = "back" :
+ clip currentpicture to BackPage ;
+ else :
+ drawboundary CoverPage ;
+ fi ;
+
+ endgroup ;
+
+ enddef ;
+
+\stopMPextensions
+
+\doifmodeelse {simple} {
+
+ \startMPpage
+ DrawCoverPage("front") ;
+ \stopMPpage
+
+} {
+
+ \startMPpage
+ DrawCoverPage("cover") ;
+ \stopMPpage
+
+}
+
+\stopcomponent
diff --git a/doc/context/sources/general/manuals/languages/languages-environment.tex b/doc/context/sources/general/manuals/languages/languages-environment.tex
new file mode 100644
index 000000000..ff529a8d1
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-environment.tex
@@ -0,0 +1,347 @@
+\startenvironment languages-environment
+
+\usemodule[visual]
+\usemodule[simulate]
+
+\dontcomplain
+
+\definepapersize
+ [book]
+ [width=18cm,
+ height=24cm]
+
+\setuppapersize
+ [book]
+
+\startmode[oversized]
+
+ \setuppapersize
+ [book]
+ [oversized]
+
+ \setuplayout
+ [marking=on]
+
+\stopmode
+
+\setuplayout
+ [location=middle,
+ topspace=1.5cm,
+ bottomspace=2cm,
+ backspace=2cm,
+ cutspace=2cm,
+ header=0cm,
+ footer=0cm,
+ margindistance=.25cm,
+ margin=1.25cm,
+ width=middle,
+ height=middle]
+
+\setuplayout
+ [style=\ss]
+
+\usetypescript
+ [dejavu-condensed]
+
+\setupbodyfont
+ [ipaex,9pt]
+
+\setupbodyfont
+ [dejavu,9pt]
+
+\definetyping
+ [narrowtyping]
+ [typing]
+ [bodyfont=dejavu-condensed]
+
+% This is way too ugly for a manual:
+%
+% \setuptyping
+% [indentnext=no]
+%
+% \setupindenting
+% [medium,yes]
+
+\setupwhitespace
+ [big]
+
+\defineoverlay
+ [page]
+ [\useMPgraphic{page}]
+
+\setupbackgrounds
+ [page]
+ [background=page]
+
+\setuphead
+ [chapter,section,subsection]
+ [color=\namedstructureuservariable{chapter}{color}]
+
+\setuphead
+ [chapter]
+ [style=\bfc]
+
+\setuphead
+ [section]
+ [style=\bfb]
+
+\setuphead
+ [subsection]
+ [style=\bf,
+ before=\blank,
+ after=\blank]
+
+\startsetups document:chapter:inside
+ \definecolor[maincolor][1.0(\namedheadparameter{chapter}{color})] % this expands the color
+ \definecolor[halfcolor][0.5(maincolor,white)]
+\stopsetups
+
+\setuphead
+ [chapter]
+ [insidesection=\setup{document:chapter:inside}]
+
+\setuppagenumbering
+ [alternative=doublesided,
+ location=]
+
+\setuplist
+ [aligntitle=yes]
+
+\setuplist
+ [chapter]
+ [pagenumber=no,
+ style=bold,
+ before={\blank\startcolor[\structurelistuservariable{color}]},
+ after={\placelist[section]\stopcolor}]
+
+\setuplist
+ [chapter,section]
+ [width=3em]
+
+\startuseMPgraphic{page}
+
+ StartPage ;
+
+ linecap := butt ;
+
+ if OnRightPage :
+
+ path p ; p := (
+ urcorner Page shifted (-2cm,0) --
+ urcorner Page --
+ urcorner Page shifted (0,-2cm) -- cycle
+ ) shifted (-2.5mm,-2.5mm) ;
+
+ else :
+
+ path p ; p := (
+ ulcorner Page shifted (2cm,0) --
+ ulcorner Page --
+ ulcorner Page shifted (0,-2cm) -- cycle
+ ) shifted (2.5mm,-2.5mm) ;
+
+ fi ;
+
+ fill p
+ withpen pencircle scaled 1mm
+ % withcolor \MPcolor{\namedstructureuservariable{chapter}{color}} ;
+ withcolor \MPcolor{maincolor} ;
+
+ draw thetextext("\bf\userpagenumber",
+ if OnRightPage : urcorner p shifted (-5mm,-5mm) else : ulcorner p shifted (5mm,-5mm) fi)
+ withcolor white ;
+
+ if OnRightPage :
+
+ path p ; p := (
+ lrcorner Page shifted (-5cm,0) --
+ lrcorner Page
+ ) shifted (-5mm,10mm)
+
+ else :
+
+ path p ; p := (
+ llcorner Page shifted (5cm,0) --
+ llcorner Page
+ ) shifted (5mm,10mm) ;
+
+ fi ;
+
+ draw p
+ withpen pencircle scaled 1mm
+ % withcolor \MPcolor{\namedstructureuservariable{chapter}{color}} ;
+ withcolor \MPcolor{maincolor} ;
+
+ draw if OnRightPage :
+ thetextext.lft("\bf\getspecificstructuretitle{chapter}",lrcorner Page shifted (-5mm,5mm))
+ else :
+ thetextext.rt ("\bf\getspecificstructuretitle{chapter}",llcorner Page shifted ( 5mm,5mm))
+ fi withcolor \MPcolor{\namedstructureuservariable{chapter}{color}} ;
+ % fi withcolor \MPcolor{maincolor} ;
+
+ StopPage ;
+\stopuseMPgraphic
+
+\definecolor[orange] [r=1,g=.6,b=.1]
+
+\definecolor[middlegray] [s=.75]
+\definecolor[darkgray] [s=.25]
+\definecolor[darkred] [r=.5]
+\definecolor[darkgreen] [g=.5]
+\definecolor[darkblue] [b=.5]
+
+\definecolor[darkyellow] [.5(red,green)]
+\definecolor[darkmagenta][.5(red,blue)]
+\definecolor[darkcyan] [.5(green,blue)]
+
+\definecolor[darkorange] [.5(orange)]
+
+\definecolor [mix-1] [r=1,g=.75,b=.25] \definecolor [darkmix-1][.5(mix-1)]
+\definecolor [mix-2] [r=1,g=.25,b=.75] \definecolor [darkmix-2][.5(mix-2)]
+\definecolor [mix-3] [r=.75,g=1,b=.25] \definecolor [darkmix-3][.5(mix-3)]
+\definecolor [mix-4] [r=.75,g=.25,b=1] \definecolor [darkmix-4][.5(mix-4)]
+\definecolor [mix-5] [r=.25,g=1,b=.75] \definecolor [darkmix-5][.5(mix-5)]
+\definecolor [mix-6] [r=.25,g=.75,b=1] \definecolor [darkmix-6][.5(mix-6)]
+
+% \blackrule[height=1cm,width=10cm,color=mix-1] \blackrule[height=1cm,width=10cm,color=darkmix-1]
+% \blackrule[height=1cm,width=10cm,color=mix-2] \blackrule[height=1cm,width=10cm,color=darkmix-2]
+% \blackrule[height=1cm,width=10cm,color=mix-3] \blackrule[height=1cm,width=10cm,color=darkmix-3]
+% \blackrule[height=1cm,width=10cm,color=mix-4] \blackrule[height=1cm,width=10cm,color=darkmix-4]
+% \blackrule[height=1cm,width=10cm,color=mix-5] \blackrule[height=1cm,width=10cm,color=darkmix-5]
+% \blackrule[height=1cm,width=10cm,color=mix-6] \blackrule[height=1cm,width=10cm,color=darkmix-6]
+
+\definecolor[maincolor] [darkgray]
+
+% modules
+
+\usemodule
+ [abr-01]
+
+\setupsorting
+ [logo]
+ [style=]
+
+\usemodule [chart]
+\usemodule [nodechart]
+
+% \usemodule [s] [fonts-tables]
+% \usemodule [s] [fonts-missing]
+% \usemodule [s] [fonts-vectors]
+% \usemodule [s] [fonts-features]
+
+\usemodule [s] [languages-words]
+\usemodule [s] [languages-hyphenation]
+\usemodule [s] [languages-frequencies]
+\usemodule [s] [languages-sorting]
+\usemodule [s] [languages-counters]
+\usemodule [s] [languages-system]
+
+\usemodule [s] [math-extensibles]
+
+\setupFLOWchart
+ [width=6em,
+ height=3em,
+ offset=-2em, % maybe default : todo: hoffset and voffset
+ dx=2em,
+ dy=2em]
+
+\setupFLOWlines
+ [color=maincolor]
+
+\setupFLOWshapes
+ [rulecolor=maincolor]
+
+% tables
+
+\setuptabulate
+ [rulethickness=.5mm,
+ rulecolor=maincolor]
+
+% special commands
+
+\startuniqueMPgraphic{reference}{color}
+ fill
+ .5[llcorner OverlayBox,ulcorner OverlayBox] --
+ .5[ulcorner OverlayBox,urcorner OverlayBox] --
+ .5[urcorner OverlayBox,lrcorner OverlayBox] --
+ .5[lrcorner OverlayBox,llcorner OverlayBox] -- cycle
+ withcolor \MPvar{color} ;
+\stopuniqueMPgraphic
+
+\startuniqueMPgraphic{reference}{color}
+ path p ; p := OverlayBox enlarged -1mm ;
+ filldraw
+ ulcorner p --
+ urcorner p --
+ lrcorner p --
+ .5[lrcorner p,llcorner p] shifted (0,-1ExHeight) --
+ llcorner p -- cycle
+ withpen pencircle scaled 1mm
+ withcolor \MPvar{color} ;
+ setbounds currentpicture to OverlayBox enlarged 1ExHeight ;
+\stopuniqueMPgraphic
+
+\defineoverlay[reference][\uniqueMPgraphic{reference}{color=maincolor,}]
+
+\definecolor[maincolor][red]
+
+\definemargindata
+ [appendixdata]
+% [inner]
+ [outer]
+ [stack=yes]
+
+\definemarginframed
+ [appendixdata]
+ [width=3.5em,
+ height=\lineheight, % we know what goes in there
+ align=middle,
+ offset=overlay,
+ foregroundcolor=white,
+ background=reference]
+
+% todo:
+%
+% \appendixdata[reference=bla]{}
+%
+% whole data
+
+% interaction
+
+\setupinteraction
+ [state=start,
+ click=no,
+ color=,
+ contrastcolor=,
+ style=]
+
+% extra fonts (shared among chapters)
+
+\definefontfeature[demo-onum][kern=yes,onum=yes]
+%definefontfeature[demo-lnum][kern=yes,lnum=yes]
+\definefontfeature[demo-tnum][kern=yes,tnum=yes]
+\definefontfeature[demo-pnum][kern=yes,pnum=yes]
+\definefontfeature[demo-zero][zero=yes]
+\definefontfeature[demo-none][]
+
+\definefont[DemoOnumLM][file:lmroman10-regular*demo-onum at 42pt]
+%definefont[DemoLnumLM][file:lmroman10-regular*demo-lnum at 42pt]
+\definefont[DemoTnumLM][file:lmroman10-regular*demo-tnum at 42pt]
+\definefont[DemoPnumLM][file:lmroman10-regular*demo-pnum at 42pt]
+
+\definefont[DemoZeroLM][file:lmroman10-regular*demo-zero at 42pt]
+\definefont[DemoNoneLM][file:lmroman10-regular*demo-none at 42pt]
+
+\definefont[DemoZeroLT][file:lmtypewriter10-regular*demo-zero at 42pt]
+\definefont[DemoNoneLT][file:lmtypewriter10-regular*demo-none at 42pt]
+
+\definestartstop
+ [notabene]
+ [before=\blank,
+ after=\blank,
+ style=\em]
+
+\setuphyphenation
+ [method=default]
+
+\stopenvironment
diff --git a/doc/context/sources/general/manuals/languages/languages-goodies.tex b/doc/context/sources/general/manuals/languages/languages-goodies.tex
new file mode 100644
index 000000000..fbbfafc34
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-goodies.tex
@@ -0,0 +1,138 @@
+% language=uk
+
+\environment languages-environment
+
+\startcomponent languages-goodies
+
+\startchapter[title=Goodies][color=darkorange]
+
+\startsection[title=Introduction]
+
+There are some features that will only be used in rare cases. They were often
+implemented as experiment but found useful enough to keep around.
+
+\stopsection
+
+\startsection[title=Spell checking]
+
+There are some means to check the spelling of words in your document but get it
+right: \CONTEXT\ is not a spell|-|checker. These features were added in order to
+be able to do some quick checking of documents written by multiple authors. There
+are currently three options and we only show a simple examples.
+
+First you need to load word lists. These are either text files with just words
+separated by spacing.
+
+\starttyping[color=maincolor]
+foobar foo-bar foo=bar foo{}{}{}bar foo{}{}{bar}
+\stoptyping
+
+All these words become \type {foobar} which means that one can use words with
+discretionary specifications. A text list is loaded with:
+
+\startbuffer
+\loadspellchecklist[en][t:/manuals/lua/words-en.txt]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+Instead you can load a \LUA\ file with words. Here we use the same structure that
+we use for the spell checker provided for \SCITE:
+
+\starttyping[color=maincolor]
+return {
+ max = 9,
+ min = 6,
+ n = 2,
+ words = {
+ ["barfoo"] = "Barfoo"
+ ["foobarred"] = "foobarred",
+ }
+}
+\stoptyping
+
+We use the same load command (you can also load bytecode files with suffix \type
+{luc} this way):
+
+\startbuffer
+\loadspellchecklist[nl][t:/scite/data/context/lexers/data/spell-nl.lua]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+Usage boils down to enabling the checker. If needed we can add more methods. The
+first method colors the known and unknown colors. Words shorter then the
+threshold of 4 will be skipped.
+
+\startbuffer
+\setupspellchecking[state=start,method=1]
+\en Is this written right or is this wromg?\par % m -> n error
+\nl Is dit goed geschreven of niet?\par
+\setupspellchecking[state=stop]
+\stopbuffer
+
+\typebuffer \startpacked \getbuffer \stoppacked
+
+You can change the colors:
+
+\starttyping
+\definecolor[word:yes] [g=.75]
+\definecolor[word:no] [r=.75]
+\stoptyping
+
+The second method doesn't show anything but produces a file \type
+{jobname.words}) with used words. The \type {found} value of \type {list} is used
+as key in the produced table.
+
+\startbuffer
+\setupspellchecking[state=start,method=2,list=found]
+\en Is this written right or is this wrong?\par
+\nl Is dit goed geschreven of niet?\par
+\setupspellchecking[state=stop]
+\stopbuffer
+
+\typebuffer \startpacked \getbuffer \stoppacked
+
+The produced table is:
+
+\typefile{\jobname.words}
+
+The result can be traced with a module:
+
+\startbuffer
+\usemodule[s-languages-words]
+
+\showwords
+\stopbuffer
+
+\typebuffer
+
+This shows up as:
+
+\getbuffer
+
+The third mechanism colors languages differently. We only defined a few colors:
+
+\starttyping
+\definecolor[word:en] [b=.75]
+\definecolor[word:de] [r=.75]
+\definecolor[word:nl] [g=.75]
+\definecolor[word:unknown][r=.75,g=.75]
+\stoptyping
+
+but you can of course define a color for your favourite language in a similar way.
+
+\startbuffer
+\setupspellchecking[state=start,method=3]
+\en Is this written right or is this wrong?\par
+\nl Is dit goed geschreven of niet?\par
+\setupspellchecking[state=stop]
+\stopbuffer
+
+\typebuffer \startpacked \getbuffer \stoppacked
+
+\stopsection
+
+\stopchapter
+
+\stopcomponent
diff --git a/doc/context/sources/general/manuals/languages/languages-hyphenation.tex b/doc/context/sources/general/manuals/languages/languages-hyphenation.tex
new file mode 100644
index 000000000..48e6eb385
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-hyphenation.tex
@@ -0,0 +1,810 @@
+% language=uk
+
+\environment languages-environment
+
+\startcomponent languages-hyphenation
+
+\startchapter[title=Hyphenation][color=darkmagenta]
+
+\startsection[title=How it works]
+
+Proper hyphenation is one of the strong points of \TEX. Hyphenation in \TEX\ is
+done using so called hyphenation patterns. Making these patterns is an art
+and most users (including me) happily use whatever is available. Patterns can be
+created automatically using \type {patgen} but often manual tweaking is needed
+too. A pattern looks as follows:
+
+\starttyping
+pat1tern
+\stoptyping
+
+This means as much as: you can split the word \type {pattern} in two pieces, with
+a hyphen between the two \type {t}'s. Actually it will also split the word \type
+{patterns} because the hyphenation mechanism looks at substrings. When no number
+between characters in a pattern is given, a zero is assumed. This means as much
+as {\em undefined}. An even number inhibits hyphenation, an odd number permits
+it. The larger the number (weight), the more influence it has. A more restricted
+pattern is:
+
+\starttyping
+.pat1tern.
+\stoptyping
+
+Here the periods set the word boundaries. The pattern dictionary for us
+english has smaller patterns and the next trace shows how these are applied.
+
+\starthyphenation[traditional]
+\showhyphenationtrace[en][pattern]
+\stophyphenation
+
+The effective hyphenation of a word is determined by several factors:
+
+\startitemize[packed]
+\startitem the current language, each language can have different patterns \stopitem
+\startitem the characters, as some characters might block hyphenation \stopitem
+\startitem the settings of \type {\lefthyphenmin} and \type {\righthyphenmin} \stopitem
+\stopitemize
+
+A place where a word can be hyphenated is called a discretionary. When \TEX\
+analyzes a stream, it will inject discretionary nodes into that stream.
+
+\starttyping
+pat\discretionary{-}{}{}tern.
+\stoptyping
+
+In traditional \TEX\ hyphenation, ligature building and kerning are tightly
+interwoven which is quite effective. However, there was also a strong
+relationship between the current font and hyphenation. This is a side effect of
+traditional \TEX\ having at most 256 characters in a font and the fact that the
+used character is fact a reference to a slot in a font. There a character in the
+input initially ends up as a character node and eventually becomes a glyph node.
+For instance two characters \type {fi} can become a ligature glyph representing
+this combination.
+
+In \LUATEX\ the hyphenation, ligature building and kerning stages are separated
+and can be overloaded. In \CONTEXT\ all three can be replaced by code written in
+\LUA. Because normally hyphenation happens before font logic is applied, there is
+no relationship with font encoding. I wrote the first \LUA\ version of the
+hyohenator on a rainy weekend and the result was not that bad so it was presented
+at the 2014 \CONTEXT\ meeting. After some polishing I decided to add this routine
+to the standard \MKIV\ repertoire which then involved some proper interfacing.
+
+You can enable the \LUA\ variant with the following command:
+
+\starttyping
+\setuphyphenation[method=traditional]
+\stoptyping
+
+We call this method \type {traditional} because in principle we can have
+many more methods and this one is (supposed to be) mostly compatible to the
+built-in method. This is a global setting. You can switch back with:
+
+\starttyping
+\setuphyphenation[method=default]
+\stoptyping
+
+In the next sections we will see how we can provide alternatives within the
+traditional method. These alternatives can be set local and therefore can operate
+over a limited range of characters.
+
+One complication in interfacing is that \TEX\ has grouping (which permits local
+settings) and we want to limit some of the above functionality using groups. At
+the same time hyphenation is a paragraph related action so we need to enable the
+hyphenation related code at a global level (or at least make sure that it gets
+exercised by forcing a \type {\par}). That means that the alternative
+hyphenator has to be quite compatible so that we could just enable it for a whole
+document. This can have an impact on performance but in practice that can be
+neglected. In \LUATEX\ the \LUA\ variant is 4~times slower than the built-in one,
+in \LUAJITTEX\ it's 3~times slower. But the good news is that the amount of time
+spent in the hyphenator is relatively small compared to other manipulations and
+macro expansion. The additional time needed for loading and preparing the
+patterns into a more \LUA\ specific format can be neglected.
+
+You can check how words get hyphenated using the patterns management script:
+
+\starttyping
+>mtxrun --script patterns --hyphenate language
+
+hyphenator |
+hyphenator | . l a n g u a g e . . l a n g u a g e .
+hyphenator | 0a2n0 0 0 2 0 0 0 0 0 0
+hyphenator | 2a0n0g0 0 2 2 0 0 0 0 0 0
+hyphenator | 0n1g0u0 0 2 2 1 0 0 0 0 0
+hyphenator | 0g0u4a0 0 2 2 1 0 4 0 0 0
+hyphenator | 2g0e0.0 0 2 2 1 0 4 2 0 0
+hyphenator | .0l2a2n1g0u4a2g0e0. . l a n-g u a g e .
+hyphenator |
+mtx-patterns | us 3 3 : language : lan-guage
+\stoptyping
+
+\stopsection
+
+\startsection[title=The last words]
+
+Mid 2014 we had to upgrade a style for a \PDF\ assembly service: chapters from
+(technical) school books are combined into arbitrary new books. There are some
+nasty aspects with this flow: for instance, all section numbers in a chapter are
+replaced by new numbers and this also involves figure and table prefixes.
+It boils down to splitting up books, analyzing the typeset content and
+preparing it for replacements. The structure is described in \XML\ files so that
+we can generate tables of contents. The reason for not generating from \XML\
+sources is that the publisher doesn't have a \XML\ workflow and that books
+already were available. Also, books from several series are combined and even
+within a series structure (and rendering) differs.
+
+What has this to do with hyphenation? Writing a style for such a flow always
+results in a more complex one that estimated and as usual it's in the details.
+The original style was written in \MKII\ and used some box juggling to achieve
+reasonable results but in \MKIV\ we can do better.
+
+Each chapter has a title and books get titles and subtitles as well. The titles
+are typeset each time a new book is composed. This happens within some layout
+constraints. Think of constraints like these:
+
+\startitemize[packed]
+\startitem the title goes on top of a shape that doesn't permit much overflow \stopitem
+\startitem there can be very long words (not uncommon in Dutch or German) \stopitem
+\startitem a short word or hyphenated part should not end up on the last line \stopitem
+\startitem the left and right hyphenation minima are at least four \stopitem
+\stopitemize
+
+The last requirement is a compromise because in most cases publishers seem to
+want ragged right not hyphenated rendering (at least in Dutch schoolbooks). The
+arguments for this are quite weak and probably originate in fear of bad rendering
+given past experiences. It's this kind of situations that drive the development
+of the more obscure features that ship with \CONTEXT\ and a (partial) solution
+for this specific case will be given later.
+
+If you look at thousands of titles and turn these into (small) paragraphs \TEX\
+does a pretty good job. It's the few exceptions that we need to catch. The next
+examples demonstrate such an extreme case.
+
+\startbuffer[example]
+\dorecurse{5} { % dejavu
+ \startlinecorrection[blank]
+ \bTABLE
+ \bTR
+ \bTD[align=middle,width=2em,foregroundstyle=bold]
+ #1
+ \eTD
+ \bTD[align={verytolerant,flushleft},width=15em,offset=1ex]
+ \hsize \dimexpr11\emwidth-#1\dimexpr.5\emwidth\relax
+ \dontcomplain
+ \lefthyphenmin=4\righthyphenmin=4
+ \blackrule[color=darkyellow,width=\hsize,height=-3pt,depth=5pt]\par
+ \begstrut\getbuffer[long]\endstrut\par
+ \eTD
+ \bTD[align={verytolerant,flushleft},width=15em,offset=1ex]
+ \sethyphenationfeatures[demo]
+ \hsize \dimexpr11\emwidth-#1\dimexpr.5\emwidth\relax
+ \dontcomplain
+ \blackrule[color=darkyellow,width=\hsize,height=-3pt,depth=5pt]\par
+ \begstrut\getbuffer[long]\endstrut\par
+ \eTD
+ \eTR
+ \eTABLE
+ \stoplinecorrection
+}
+\stopbuffer
+
+\definehyphenationfeatures
+ [demo]
+ [rightwords=1,
+ lefthyphenmin=4,
+ righthyphenmin=4]
+
+\startbuffer[long]
+a verylongword and then anevenlongerword
+\stopbuffer
+
+\starthyphenation[traditional]
+ \enabletrackers[hyphenator.visualize]
+ \getbuffer[example]\par
+ \disabletrackers[hyphenator.visualize]
+\stophyphenation
+
+Of course in practice there need to be some reasonable width and when we pose
+these limits the longest possible word should fit into the allocated space. In
+these examples the rule shows the width. In the right columns we see a red
+colored word and that one will not get hyphenated.
+
+\stopsection
+
+\startsection[title=Explicit hyphens]
+
+Another special case that we needed to handle were (compound) words with explicit
+hyphens. Because often data comes from \XML\ files we can not really control the
+typesetting as in a \TEX\ document where the author sees what gets done. So here
+we need a way to turn these hyphens into proper hyphenation directives and at the
+same time permit the words to be hyphenated.
+
+\definehyphenationfeatures
+ [demo]
+ [hyphens=yes,
+ lefthyphenmin=4,
+ righthyphenmin=4]
+
+\startbuffer[long]
+a very-long-word and then an-even-longer-word
+\stopbuffer
+
+\starthyphenation[traditional]
+ \enabletrackers[hyphenator.visualize]
+ \getbuffer[example]\par
+ \disabletrackers[hyphenator.visualize]
+\stophyphenation
+
+\stopsection
+
+\startsection[title=Extended patterns]
+
+As with more opened up mechanisms, in \MKIV\ we can extend functionality. As an
+example I have implemented the extensions discussed in the article by László
+Németh in the Proceedings of Euro\TEX\ 2006: {\em Hyphenation in OpenOffice.org}
+(TUGboat, Volume 27, 2006). The syntax for these extension is somewhat ugly and
+involves optional offsets and ranges. \footnote {I'm not sure if there were ever
+patterns released that used this syntax.}
+
+\startbuffer
+\registerhyphenationpattern[nl][e1ë/e=e]
+\registerhyphenationpattern[nl][a9atje./a=t,1,3]
+\registerhyphenationpattern[en][eigh1tee/t=t,5,1]
+\registerhyphenationpattern[de][c1k/k=k]
+\registerhyphenationpattern[de][schif1f/ff=f,5,2]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+These patterns result in the following hyphenations:
+
+\starthyphenation[traditional]
+ \switchtobodyfont[big]
+ \starttabulate[|||]
+ \NC reëel \NC \language[nl]\hyphenatedcoloredword{reëel} \NC \NR
+ \NC omaatje \NC \language[nl]\hyphenatedcoloredword{omaatje} \NC \NR
+ \NC eighteen \NC \language[en]\hyphenatedcoloredword{eighteen} \NC \NR
+ \NC Zucker \NC \language[de]\hyphenatedcoloredword{Zucker} \NC \NR
+ \NC Schiffahrt \NC \language[de]\hyphenatedcoloredword{Schiffahrt} \NC \NR
+ \stoptabulate
+\stophyphenation
+
+In a specification, the \type {.} indicates a word boundary and numbers indicate
+the weight of a breakpoint. The optional extended specification comes after the
+\type {/}. The values separated by a \type {=} are the pre and post sequences:
+these end up at the end of the current line and beginning of the next one. The
+optional numbers are the start position and length. These default to~1 and~2, so
+in the first example they identify \type {eë} (the weights don't count).
+
+There is a pitfall here. When the language already has patterns that for
+instance prohibit a hyphen between \type {e} and type {ë}, like \type{e2ë}, we
+need to make sure that we give our new one a higher priority, which is why we
+used a \type{e9ë}.
+
+This feature is somewhat experimental and can be improved. Here is a more \LUA-ish
+way of setting such patterns:
+
+\starttyping
+local registerpattern =
+ languages.hyphenators.traditional.registerpattern
+
+registerpattern("nl","e1ë", {
+ start = 1,
+ length = 2,
+ before = "e",
+ after = "e",
+} )
+
+registerpattern("nl","a9atje./a=t,1,3")
+\stoptyping
+
+Just adding extra patterns to an existing set without much testing is not wise. For
+instance we could add these to the dutch dictionary:
+
+\starttyping
+\registerhyphenationpattern[nl][e3ë/e=e]
+\registerhyphenationpattern[nl][o3ë/o=e]
+\registerhyphenationpattern[nl][e3ï/e=i]
+\registerhyphenationpattern[nl][i3ë/i=e]
+\registerhyphenationpattern[nl][a5atje./a=t,1,3]
+\registerhyphenationpattern[nl][toma8at5je]
+\stoptyping
+
+That would work oke well for words like
+
+\starttyping
+coëfficiënt
+geïntroduceerd
+copiëren
+omaatje
+tomaatje
+\stoptyping
+
+However, the last word only goes right because we explicitly added a pattern
+for it. One reason is that the existing patterns already contain rules to
+prevent weird hyphenations. The same is true for the accented characters. So,
+consider these examples and coordinate additional patterns with other users
+so that errors can be identified.
+
+\stopsection
+
+\startsection[title=Exceptions]
+
+We have a variant on the \TEX\ primitive \type {\hyphenation}, the official way
+to register a specific way to hyphenate a word.
+
+\startbuffer
+\registerhyphenationexception[aaaaa-bbbbb]
+aaaaabbbbb \par
+\stopbuffer
+
+\typebuffer
+
+\noindentation This code is self explaining and results in:
+
+\blank
+
+\starthyphenation[traditional]
+\setupindenting[no]\hsize 1mm \lefthyphenmin 1 \righthyphenmin 1 \getbuffer
+\stophyphenation
+
+\noindentation There can be multiple hyphens and even multiple words in such a
+specification:
+
+\startbuffer
+\registerhyphenationexception[aaaaa-bbbbb cc-ccc-ddd-dd]
+aaaaabbbbb \par
+cccccddddd \par
+\stopbuffer
+
+\typebuffer
+
+\noindentation We get:
+
+\blank
+
+\starthyphenation[traditional]
+\setupindenting[no]\hsize 1mm \lefthyphenmin 1 \righthyphenmin 1 \getbuffer
+\stophyphenation
+
+
+\stopsection
+
+\startsection[title=Boundaries]
+
+A box, rule, math or discretionary will end a word and prohibit hyphenation
+of that word. Take this example:
+
+\startbuffer[demo]
+whatever \par
+whatever\hbox{!} \par
+\vl whatever\vl \par
+whatever$x$ \par
+whatever-whatever \par
+\stopbuffer
+
+\typebuffer[demo]
+
+These lines will hyphenate differently and in traditional \TEX\ you need to
+insert penalties and|/|or glue to get around it. In the \LUA\ variant we can
+enable that limitation.
+
+\startbuffer
+\definehyphenationfeatures
+ [strict]
+ [rightedge=tex]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+Here we show the three variants: traditional \TEX\ and \LUA\ with and without
+strict settings.
+
+\starttabulate[|p|p|p|]
+\HL
+\NC \ttbf \hbox to 11em{default\hss}
+\NC \ttbf \hbox to 11em{traditional\hss}
+\NC \ttbf \hbox to 11em{traditional strict\hss}
+\NC \NR
+\HL
+\NC \starthyphenation[default] \hsize1mm \getbuffer[demo] \stophyphenation
+\NC \starthyphenation[traditional] \hsize1mm \getbuffer[demo] \stophyphenation
+\NC \starthyphenation[traditional] \sethyphenationfeatures[strict]
+ \hsize1mm \getbuffer[demo] \stophyphenation
+\NC \NR
+\HL
+\stoptabulate
+
+By default \CONTEXT\ is configured to hyphenate words that start with an
+uppercase character. This behaviour is controlled in \TEX\ by the \typ {\uchyph}
+variable. A positive value will enable this and a negative one disables it.
+
+\starttabulate[|p|p|p|p|]
+\HL
+\NC \ttbf \hbox to 8em{default 0\hss}
+\NC \ttbf \hbox to 8em{default 1\hss}
+\NC \ttbf \hbox to 8em{traditional 0\hss}
+\NC \ttbf \hbox to 8em{traditional 1\hss}
+\NC \NR
+\HL
+\NC \starthyphenation[default] \hsize1mm \uchyph\zerocount TEXified \dontcomplain \stophyphenation
+\NC \starthyphenation[traditional] \hsize1mm \uchyph\zerocount TEXified \dontcomplain \stophyphenation
+\NC \starthyphenation[default] \hsize1mm \uchyph\plusone TEXified \dontcomplain \stophyphenation
+\NC \starthyphenation[traditional] \hsize1mm \uchyph\plusone TEXified \dontcomplain \stophyphenation
+\NC \NR
+\HL
+\stoptabulate
+
+The \LUA\ variants behaves the same as the built-in implementation (that of course
+remains the reference).
+
+\stopsection
+
+\startsection[title=Plug-ins]
+
+The default hyphenator is similar to the built-in one, with a couple of
+extensions as mentioned. However, you can plug in your own code, given that it
+does return a proper hyphenation result. One reason for providing this plug is
+that there are users who want to play with hyphenators based on a different
+logic. In \CONTEXT\ we already have some methods to deal with languages that
+(for instance) have no spaces but split on words or syllabes. A more tight
+integration with the hyphenator can have advantages so I will explore these
+options when there is demand.
+
+A result table indicates where we can break a word. If we have a four character
+word and can break after the second character, the result looks like this:
+
+\starttyping
+result = { false, true, false, false }
+\stoptyping
+
+Instead of \type {true} we can also have a table that has entries like the
+extensions discussed in a previous section. Let's give an example of a
+plug-in.
+
+\startbuffer
+\startluacode
+ local subset = {
+ a = true,
+ e = true,
+ i = true,
+ o = true,
+ u = true,
+ y = true,
+ }
+
+ languages.hyphenators.traditional.installmethod("test",
+ function(dictionary,word,n)
+ local t = { }
+ for i=1,#word do
+ local w = word[i]
+ if subset[w] then
+ t[i] = {
+ before = "<" .. w,
+ after = w .. ">",
+ left = false,
+ right = false,
+ }
+ else
+ t[i] = false
+ end
+ end
+ return t
+ end
+ )
+\stopluacode
+\stopbuffer
+
+\typebuffer \getbuffer
+
+Here we hyphenate on vowels and surround them by angle brackets when
+split over lines. This alternative is installed as follows:
+
+\startbuffer
+\definehyphenationfeatures
+ [demo]
+ [alternative=test]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+We can now use it as follows:
+
+\starttyping
+\setuphyphenation[method=traditional]
+\sethyphenationfeatures[demo]
+\stoptyping
+
+When applied to one the tufte example we get:
+
+\startbuffer[demo]
+\starthyphenation[traditional]
+ \setuptolerance[tolerant]
+ \sethyphenationfeatures[demo]
+ \noindentation % \dontleavehmode
+ \input tufte\relax
+\stophyphenation
+\stopbuffer
+
+\blank \startnarrower \getbuffer[demo] \stopnarrower \blank
+
+A more realistic (but not perfect) example is the following:
+
+\startbuffer
+\startluacode
+ local packslashes = false
+
+ local specials = {
+ ["!"] = "before", ["?"] = "before",
+ ['"'] = "before", ["'"] = "before",
+ ["/"] = "before", ["\\"] = "before",
+ ["#"] = "before",
+ ["$"] = "before",
+ ["%"] = "before",
+ ["&"] = "before",
+ ["*"] = "before",
+ ["+"] = "before", ["-"] = "before",
+ [","] = "before", ["."] = "before",
+ [":"] = "before", [";"] = "before",
+ ["<"] = "before", [">"] = "before",
+ ["="] = "before",
+ ["@"] = "before",
+ ["("] = "before",
+ ["["] = "before",
+ ["{"] = "before",
+ ["^"] = "before", ["_"] = "before",
+ ["`"] = "before",
+ ["|"] = "before",
+ ["~"] = "before",
+ --
+ [")"] = "after",
+ ["]"] = "after",
+ ["}"] = "after",
+ }
+
+ languages.hyphenators.traditional.installmethod("url",
+ function(dictionary,word,n)
+ local t = { }
+ local p = nil
+ for i=1,#word do
+ local w = word[i]
+ local s = specials[w]
+ if s == "after" then
+ s = {
+ start = 1,
+ length = 1,
+ after = w,
+ left = false,
+ right = false,
+ }
+ specials[w] = s
+ elseif s == "before" then
+ s = {
+ start = 1,
+ length = 1,
+ before = w,
+ left = false,
+ right = false,
+ }
+ specials[w] = s
+ end
+ if not s then
+ s = false
+ elseif w == p and w == "/" then
+ t[i-1] = false
+ end
+ t[i] = s
+ if packslashes then
+ p = w
+ end
+ end
+ return t
+ end
+ )
+\stopluacode
+\stopbuffer
+
+\typebuffer \getbuffer
+
+Again we define a plug:
+
+\startbuffer
+\definehyphenationfeatures
+ [url]
+ [characters=all,
+ alternative=url]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+So, we only break a line after symbols.
+
+\startlinecorrection[blank]
+ \starthyphenation[traditional]
+ \tt
+ \sethyphenationfeatures[url]
+ \scale[width=\hsize]{\hyphenatedcoloredword{http://www.pragma-ade.nl}}
+ \stophyphenation
+\stoplinecorrection
+
+\noindentation A quick test can look as follows:
+
+\startbuffer
+\starthyphenation[traditional]
+ \sethyphenationfeatures[url]
+ \tt
+ \dontcomplain
+ \hsize 1mm
+ http://www.pragma-ade.nl
+\stophyphenation
+\stopbuffer
+
+\typebuffer
+
+Or:
+
+\getbuffer
+
+\stopsection
+
+\startsection[title=Blocking ligatures]
+
+Yet another predefined feature is the ability to block a ligature. In
+traditional \TEX\ this can be done by putting a \type {{}} between
+the characters, although that effect can get lost when the text is
+manipulated. The natural way to do this in a \UNICODE\ environment
+is to use the special characters \type {zwj} and \type {zwnj}.
+
+We use the following example lines:
+
+\startbuffer[sample]
+supereffective \blank
+superef\zwnj fective
+\stopbuffer
+
+\typebuffer[sample]
+
+\noindentation and define two featuresets:
+
+\startbuffer
+\definehyphenationfeatures
+ [demo-1]
+ [characters=\zwnj\zwj,
+ joiners=yes]
+
+\definehyphenationfeatures
+ [demo-2]
+ [joiners=no]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+\noindentation We limit the width to 1mm and get:
+
+\startlinecorrection[blank]
+\bTABLE[option=stretch,offset=.5ex]
+ \bTR
+ \bTD \tx
+ \type{method=default}
+ \eTD
+ \bTD \tx
+ \type{method=traditional}
+ \eTD
+ \bTD \tx
+ \type{method=traditional}\par
+ \type{featureset=demo-1}
+ \eTD
+ \bTD \tx
+ \type{method=traditional}\par
+ \type{featureset=demo-2}
+ \eTD
+ \eTR
+ \bTR
+ \bTD
+ \hsize 1mm \dontcomplain
+ \starthyphenation[default]
+ \getbuffer[sample]
+ \stophyphenation
+ \eTD
+ \bTD
+ \hsize 1mm \dontcomplain
+ \starthyphenation[traditional]
+ \getbuffer[sample]
+ \stophyphenation
+ \eTD
+ \bTD
+ \hsize 1mm \dontcomplain
+ \starthyphenation[traditional]
+ \sethyphenationfeatures[demo-1]
+ \getbuffer[sample]
+ \stophyphenation
+ \eTD
+ \bTD
+ \hsize 1mm \dontcomplain
+ \starthyphenation[traditional]
+ \sethyphenationfeatures[demo-2]
+ \getbuffer[sample]
+ \stophyphenation
+ \eTD
+ \eTR
+\eTABLE
+\stoplinecorrection
+
+\stopsection
+
+\startsection[title=Special characters]
+
+The \type {characters} example can be used (to some extend) to do the
+same as the breakpoints mechanism (compounds).
+
+\startbuffer
+\definehyphenationfeatures
+ [demo-3]
+ [characters={()[]}]
+\stopbuffer
+
+\typebuffer \blank \getbuffer \blank
+
+\startbuffer[demo]
+\starthyphenation[traditional]
+ \sethyphenationfeatures[demo-3]
+ \dontcomplain
+ \hsize 1mm \noindentation
+ we use (super)special(ized) patterns
+\stophyphenation
+\stopbuffer
+
+\typebuffer[demo] \blank \getbuffer[demo] \blank
+
+We can make this more clever by adding patterns:
+
+\startbuffer
+\registerhyphenationpattern[en][)9]
+\registerhyphenationpattern[en][9(]
+\stopbuffer
+
+\typebuffer \blank \getbuffer \blank
+
+\noindentation This gives:
+
+\blank \getbuffer[demo] \blank
+
+\noindentation A detailed trace shows that these patterns get applied:
+
+\starthyphenation[traditional]
+ \ttx
+ \showhyphenationtrace[en][(super)special(ized)]
+\stophyphenation
+
+\unregisterhyphenationpattern[en][)9]
+\unregisterhyphenationpattern[en][9(]
+
+\noindentation The somewhat weird hyphens at the edges will in practice not show
+up because there is always one regular character there.
+
+\stopsection
+
+\startsection[title=Tracing]
+
+Among the tracing options (low level trackers) there is one for pattern developers:
+
+\startbuffer
+\usemodule[s-languages-hyphenation]
+
+\startcomparepatterns[de,nl,en,fr]
+ \input zapf \quad (\showcomparepatternslegend)
+\stopcomparepatterns
+\stopbuffer
+
+\typebuffer
+
+The different hyphenation points are shown with colored bars. Some valid points
+might not be shown because the font engine can collapse successive
+discretionaries.
+
+\getbuffer
+
+\stopsection
+
+\stopchapter
+
+\stopcomponent
diff --git a/doc/context/sources/general/manuals/languages/languages-introduction.tex b/doc/context/sources/general/manuals/languages/languages-introduction.tex
new file mode 100644
index 000000000..25bbb1a90
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-introduction.tex
@@ -0,0 +1,69 @@
+% language=uk
+
+\startcomponent languages-introduction
+
+\environment languages-environment
+
+\startchapter[title=Introduction][color=darkgray]
+
+This document describes an important property of the \TEX\ typesetting system and
+\CONTEXT\ in particular: the ability to deal with different languages at the same
+time. With languages we refer to natural languages. So, we're not going to
+discuss the \TEX\ language itself, not \METAPOST, nor \LUA.
+
+The original application of \TEX\ was English that uses the Latin script. The
+fonts that came with \TEX\ were suitable for that usage. When lines became too
+long they could be hyphenated using so called hyphenation patterns. Due to the
+implementation for many years there was a close relationship between fonts and
+hyphenation. Although at some point many more languages and scripts were
+supported, it was only when the \UNICODE\ aware variants showed up that
+hyphenation and fonts were decoupled. This makes it much more easier to mix
+languages that use different scripts. Although Greek, Cyrillic, Arabic, Chinese,
+Japanese, Korean and other languages have been supported for a while using
+(sometimes dirty) tricks, we now have cleaner implementations.
+
+We can hyphenate words in all languages (and scripts) that have a need for it,
+that is, split it at the end of a line and add a symbol before and|/|or after the
+break. The way words are broken into parts is called hyphenation and so called
+patterns are used to achieve that goal. The way these patterns are constructed
+and applied was part of the research related to \TEX\ development. The method
+used is also applied in other programs and is probably one of the few popular
+ways to deal with hyphenation. There have been ideas about extensions that cover
+the demands of certain languages but so far nothing better has shown up. In the
+end \TEX\ does a pretty decent job and more advanced tricks don't necessarily
+lead to better results.
+
+Hyphenation is driven by a language number and that's about it. This means that
+one cannot claim that \TEX\ in its raw form supports languages, other than that
+it can hyphenate and use fonts that provide the glyphs. It's upto a macro package
+to wrap this into a mechanism that provides the user an interface. So, when we
+speak about language support, hyphenation is only one aspect. Labels, like the
+\type {figure} in {\em figure~1.2} need to adapt to the main document language.
+When dates are shown they can be language specific. Scientific units and math
+function names can also be subjected to translation. Registers and other lists
+have to be sorted according to specific rules. Spacing dan differ per language.
+
+In this manual we will cover some of functionality in \CONTEXT\ \MKIV\ that
+relates to languages (and scripts). This manual is a compliment to other manuals,
+articles and documentation. Here we mostly focus on the language aspects. Some of
+the content (or maybe most) might looks alien and complex to you. This is because
+one purpose of this manual is to provide a place to wrap up some aspects of
+\CONTEXT. If you're not interested in that, just stick to the more general
+manuals that also cover language aspects.
+
+\startnotabene
+ This document is still under construction. The functionality discussed here
+ will stay and more might show up. Of course there are errors, and they're all
+ mine. The text is not checked for spelling errors. Feel free to let me know
+ what should get added.
+\stopnotabene
+
+\startlines
+Hans Hagen
+PRAGMA ADE, Hasselt NL
+2013 \emdash\ 2016
+\stoplines
+
+\stopchapter
+
+\stopcomponent
diff --git a/doc/context/sources/general/manuals/languages/languages-labels.tex b/doc/context/sources/general/manuals/languages/languages-labels.tex
new file mode 100644
index 000000000..3d1c00c04
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-labels.tex
@@ -0,0 +1,141 @@
+% language=uk
+
+\environment languages-environment
+
+\startcomponent languages-labels
+
+\startchapter[title=Labels][color=darkcyan]
+
+\startsection[title=Introduction]
+
+When we started using \TEX, I naturally started with plain \TEX. But it didn't
+take long before we tried \LATEX. Because our documents were in Dutch one of the
+first fights with this package was to get rid of the english labels. Because
+rather soon we decided to cook up an alternative package, a decent label
+mechanism was one of the first things to show up. And as soon as multiple
+language typesetting gets into view, such a mechanism becomes one of those
+language dependent features. In this chapter the basics will be covered.
+
+\stopsection
+
+\startsection[title=Defining labels]
+
+Before we define a label we need to define a label class. You probably seldom
+need that but this is how it's done:
+
+\startbuffer
+\definelabelclass [mylabel]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+There are some classes predefined:
+
+\starttabulate[|lB|l|]
+ \NC head \NC (complete) titles like \headtext {chapter} and \headtext {figure} \NC \NR
+ \NC label \NC in||text labels like \labeltext {chapter} and \labeltext {figure} \NC \NR
+ \NC mathlabel \NC function names like \mathlabeltext{sin} and \mathlabeltext{cos} \NC \NR
+ \NC taglabel \NC labels used for tagging purposed in the backend \NC \NR
+ \NC btxlabel \NC labels used in typesetting bibliographic items \NC \NR
+\stoptabulate
+
+The physical units mechanism also uses labels: unit, operator, prefix and suffix.
+All these labels are defined per language with a fall back on english.
+
+Given that we have defined class \type {mylabel}, a label itself is set like
+this:
+
+\startbuffer
+\setupmylabeltext
+ [en]
+ [first={<after first},
+ second={{before second>},{<after second}}]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+The first argument (the language) is optional. In the next section we will see
+how these labels are used. A lot of labels are predefined, in \MKIV\ this happens
+in the file \type {lang-txt.lua}. There is no need to adapt this file as you can
+always add labels run time.
+
+\stopsection
+
+\startsection[title=Using labels]
+
+How a label is called depends on the way it needs to be used. In any case the
+main language set determines the language of the label. So, when in an Dutch text
+we temporary switch to German, the Dutch labels are used.
+
+\startbuffer
+\starttabulate[||||]
+ \NC \bf command \NC \ttbf first \NC \ttbf {second} \NC \NR
+ \HL
+ \NC \type {\leftmylabeltext {tag}} \NC \leftmylabeltext {first} \NC \leftmylabeltext {second} \NC \NR
+ \NC \type {\rightmylabeltext{tag}} \NC \rightmylabeltext{first} \NC \rightmylabeltext{second} \NC \NR
+ \NC \type {\mylabeltext {tag}} \NC \mylabeltext {first} \NC \mylabeltext {second} \NC \NR
+ \NC \type {\mylabeltexts {tag}{text}} \NC \mylabeltexts {first}{text} \NC \mylabeltexts {second}{text} \NC \NR
+\stoptabulate
+\stopbuffer
+
+\getbuffer
+
+\stopsection
+
+\startsection[title=Hooks]
+
+Some mechanisms have label support built in, most noticeably sections
+heads and numbered items, like figure captions.
+
+\startbuffer
+\definehead
+ [myhead]
+ [subsection]
+
+\setuphead
+ [myhead]
+ [bodypartlabel=bodypartmyhead]
+
+\setuplabeltext
+ [en]
+ [bodypartmyhead=My Head: ]
+
+\myhead{Welcome}
+\stopbuffer
+
+\typebuffer \getbuffer
+
+The head text label class can be used as follows:
+
+\startbuffer
+\setupheadtext
+ [SomeHead=Just A Title]
+
+\subsection
+ [title=\headtext{SomeHead}]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+A label will obey the style settings, as in:
+
+\startbuffer
+\definehead
+ [MyFancyHead]
+ [subsection]
+ [style={\bs\setcharactercasing[Words]}]
+
+\setupheadtext
+ [SomeHead=just another title]
+
+\MyFancyHead
+ [title=\headtext{SomeHead}]
+\stopbuffer
+
+\typebuffer \getbuffer
+
+\stopsection
+
+\stopchapter
+
+\stopcomponent
diff --git a/doc/context/sources/general/manuals/languages/languages-mkiv.tex b/doc/context/sources/general/manuals/languages/languages-mkiv.tex
new file mode 100644
index 000000000..dbf9875e1
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-mkiv.tex
@@ -0,0 +1,38 @@
+\enablemode[simple] % ,oversized
+
+\startproduct languages-mkiv
+
+\environment languages-environment
+
+\setupbackgrounds[page][background=]
+
+\component languages-cover
+
+% \startcovermatter
+
+\page[dummy] \setupbackgrounds[page][background=page] \resetuserpagenumber
+
+% \stopcovermatter
+
+\startfrontmatter
+ \component languages-contents
+ \component languages-introduction
+\stopfrontmatter
+
+\startbodymatter
+ \component languages-basics
+ \component languages-hyphenation
+ \component languages-labels
+ \component languages-numbering
+ \component languages-typesetting
+ \component languages-goodies
+ \component languages-sorting
+\stopbodymatter
+
+\startappendices
+ \component languages-appendix
+\stopappendices
+
+\component languages-back
+
+\stopproduct
diff --git a/doc/context/sources/general/manuals/languages/languages-numbering.tex b/doc/context/sources/general/manuals/languages/languages-numbering.tex
new file mode 100644
index 000000000..510f51034
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-numbering.tex
@@ -0,0 +1,295 @@
+% language=uk
+
+\environment languages-environment
+
+\startcomponent languages-numbering
+
+\startchapter[title=Numbering][color=darkgreen]
+
+\startsection[title=Introduction]
+
+Numbering is complex and in \CONTEXT\ it's not easy either. This is because we
+not only have 1, 2, 3 \unknown\ but also sub numbers like 1a, 1b, 1ic \unknown\
+or 1.a, 1.b, 1.c \unknown\ There can be many levels, different separators, final
+symbols. As we're talking languages we only discuss conversion here: the
+mechanism that turns a number in for instance a letter. It happens that the
+mapping from a number onto a letter is language dependent. The next lines show
+how English, Spanish and Slovenian numbers:
+
+\blank
+\startpacked
+\startcolor[maincolor]
+\noindentation\dontleavehmode
+ {\ttbf\mainlanguage[en]\dorecurse{28}
+ {\hbox to 1.5em{\convertnumber{alphabetic}{#1}\hss}}}\par
+\noindentation\dontleavehmode
+ {\ttbf\mainlanguage[es]\dorecurse{28}
+ {\hbox to 1.5em{\convertnumber{alphabetic}{#1}\hss}}}\par
+\noindentation\dontleavehmode
+ {\ttbf\mainlanguage[sl]\dorecurse{28}
+ {\hbox to 1.5em{\convertnumber{alphabetic}{#1}\hss}}}\par
+\stopcolor
+\stoppacked
+\blank
+
+You convert a number into a letter with:
+
+\starttyping
+\convertnumber{alphabetic}{15}
+\stoptyping
+
+There is also \type {\uconvertnumber} which does not expand unless typesetting
+is going on. Normally you don't need to bother about this.
+
+The \type {alphabetic} converter adapts to the current main language. When a
+language has no special alphabet, the regular 26 characters are used.
+
+A converter can also convert to a roman numeral, a language specific ordered
+list, a day or month, an ordinal string and again there can be a language
+specific conversion. The general conversion macro takes a conversion name and
+a number. When a conversion can be set (for instance in an itemized list, or in
+section numbering) you can use these names. You can define additional
+converters if needed, as long as the converter can handle a number.
+
+\starttyping
+\defineconversion [alphabetic] [\alphabeticnumerals]
+\stoptyping
+
+Here \type {\alphabeticnumerals} is a converter. If you look into the source of
+\CONTEXT\ you will see that many converters are calling out to \LUA, where we
+have implemented those specific conversions. The following table has long and
+short names. The short one are historic.
+
+\starttabulate
+\FL
+\NC month \NC \type {\monthlong} \NC \NR
+\NC month:mnem \NC \type {\monthshort} \NC \NR
+\ML
+\NC character \NC \type {\character} \NC \NR
+\NC Character \NC \type {\Character} \NC \NR
+\NC characters \NC \type {\characters} \NC \NR
+\NC Characters \NC \type {\Characters} \NC \NR
+\ML
+\NC AK \NC \type {\smallcappedcharacters} \NC \NR
+\NC KA \NC \type {\smallcappedcharacters} \NC \NR
+\ML
+\NC alphabetic a \NC \type {\alphabeticnumerals} \NC \NR
+\NC Alphabetic A \NC \type {\Alphabeticnumerals} \NC \NR
+\ML
+\NC number numbers n \NC \type {\numbers} \NC \NR
+\NC Numbers N \NC \type {\Numbers} \NC \NR
+\NC mediaeval m \NC \type {\mediaeval} \NC \NR
+\ML
+\NC word words \NC \type {\verbosenumber} \NC \NR
+\NC Word Words \NC \type {\VerboseNumber} \NC \NR
+\ML
+\NC ordinal \NC \type {\ordinalnumber} \NC \NR
+\NC Ordinal \NC \type {\Ordinalnumber} \NC \NR
+\ML
+\NC romannumerals i r \NC \type {\romannumerals} \NC \NR
+\NC Romannumerals I R \NC \type {\Romannumerals} \NC \NR
+\ML
+\NC o \NC \type {\oldstylenumerals} \NC \NR
+\NC O \NC \type {\oldstylenumerals} \NC \NR
+\NC or \NC \type {\oldstyleromannumerals} \NC \NR
+\ML
+\NC KR \NC \type {\smallcappedromannumerals} \NC \NR
+\NC RK \NC \type {\smallcappedromannumerals} \NC \NR
+\ML
+\NC greek g \NC \type {\greeknumerals} \NC \NR
+\NC Greek G \NC \type {\Greeknumerals} \NC \NR
+\NC mathgreek \NC \type {\mathgreek} \NC \NR
+\ML
+\NC abjadnumerals \NC \type {\abjadnumerals} \NC \NR
+\NC abjadnodotnumerals \NC \type {\abjadnodotnumerals} \NC \NR
+\NC abjadnaivenumerals \NC \type {\abjadnaivenumerals} \NC \NR
+\ML
+\NC thainumerals \NC \type {\thainumerals} \NC \NR
+\NC devanagarinumerals \NC \type {\devanagarinumerals} \NC \NR
+\NC gurmurkhinumerals \NC \type {\gurmurkhinumerals} \NC \NR
+\NC gujaratinumerals \NC \type {\gujaratinumerals} \NC \NR
+\NC tibetannumerals \NC \type {\tibetannumerals} \NC \NR
+\NC greeknumerals \NC \type {\greeknumerals} \NC \NR
+\NC Greeknumerals \NC \type {\Greeknumerals} \NC \NR
+\NC arabicnumerals \NC \type {\arabicnumerals} \NC \NR
+\NC persiannumerals \NC \type {\persiannumerals} \NC \NR
+\NC arabicexnumerals \NC \type {\arabicexnumerals} \NC \NR
+\NC arabicdecimals \NC \type {\arabicdecimals} \NC \NR
+\NC persiandecimals \NC \type {\persiandecimals} \NC \NR
+\ML
+\NC koreannumerals kr \NC \type {\koreannumerals} \NC \NR
+\NC koreanparenthesisnumerals kr-p \NC \type {\koreanparenthesisnumerals} \NC \NR
+\NC koreancirclenumerals kr-c \NC \type {\koreancirclenumerals} \NC \NR
+\ML
+\NC chinesenumerals cn \NC \type {\chinesenumerals} \NC \NR
+\NC chinesecapnumerals cn-c \NC \type {\chinesecapnumerals} \NC \NR
+\NC chineseallnumerals cn-a \NC \type {\chineseallnumerals} \NC \NR
+\ML
+\NC sloveniannumerals \NC \type {\sloveniannumerals} \NC \NR
+\NC slovenianNumerals \NC \type {\slovenianNumerals} \NC \NR
+\ML
+\NC spanishnumerals \NC \type {\spanishnumerals} \NC \NR
+\NC spanishNumerals \NC \type {\spanishNumerals} \NC \NR
+\LR
+\stoptabulate
+
+The \type {alphabetic} and \type {Alphabetic} converters adapt to slovenian and
+spanish as do their small capped alternatives. There are more general helpers for it
+too:
+
+\starttyping
+\languagecharacters{number}
+\languageCharacters{number}
+\stoptyping
+
+Also language related is the \type {\continuednumber} macro. Here we see an
+application:
+
+\startbuffer
+1 \continuednumber{1}
+1, 2 \continuednumber{2}
+1, 2, 3 \continuednumber{3}
+\stopbuffer
+
+\typebuffer
+
+\noindentation What renders as:
+
+\startlines[color=maincolor]
+\getbuffer
+\stoplines
+
+Such a macro is typically used in combination with counters ant it just typesets
+a label text depending on the valu ebeing non|-|zero.
+
+\startbuffer
+\setuplabeltext[en][continued={and so on}]
+1, 2, 3 \continuednumber{3}
+1, 2, 3 \convertnumber{continued}{3}
+\stopbuffer
+
+\typebuffer
+
+\noindentation This gives:
+
+\startlines[color=maincolor]
+\getbuffer
+\stoplines
+
+In the rare case that you want to check if a conversion is defined you can use
+
+\starttyping
+\doifelseconversiondefined{name}{true}{false}
+\stoptyping
+
+So,
+
+\startbuffer
+\doifelseconversiondefined{characters}{we can convert}{forget about it}
+\stopbuffer
+
+\typebuffer
+
+\noindentation Gives:
+
+\startlines[color=maincolor]
+\getbuffer
+\stoplines
+
+There are also some non language related converters that we mention here for
+completeness:
+
+\blank
+\noindentation\type {set 0}: \startcolor[maincolor]\dorecurse{20}{\convertnumber{set 0}{#1} }\stopcolor\par
+\noindentation\type {set 1}: \startcolor[maincolor]\dorecurse{20}{\convertnumber{set 1}{#1} }\stopcolor\par
+\noindentation\type {set 2}: \startcolor[maincolor]\dorecurse{20}{\convertnumber{set 2}{#1} }\stopcolor\par
+\noindentation\type {set 3}: \startcolor[maincolor]\dorecurse{20}{\convertnumber{set 3}{#1} }\stopcolor\par
+\blank
+
+When a set overruns we start again at the first element.
+
+\noindentation The ordinal converter produces output like \color [maincolor]
+{\convertnumber {ordinal}{123}} and \color [maincolor] {\convertnumber
+{ordinal}{654}}. The corresponding string renderer is \type {\highordinalstr}.
+
+% quite limited currently op not documented here:
+%
+% \wordtonumber{two}{3}
+% \wordtonumber{fivethousand}{unknown}
+
+\stopsection
+
+\startsection[title=Dates]
+
+Dates are also language dependent. The following macros take a number and return
+the name of the month or day.
+
+\starttabulate
+\NC \type {\monthlong } \NC \monthlong {10} \NC \NR
+\NC \type {\monthshort} \NC \monthshort{10} \NC \NR
+\NC \type {\MONTH } \NC \MONTH {10} \NC \NR
+\NC \type {\MONTHLONG } \NC \MONTHLONG {10} \NC \NR
+\NC \type {\MONTHSHORT} \NC \MONTHSHORT{10} \NC \NR
+\NC \type {\weekday } \NC \weekday {5} \NC \NR
+\NC \type {\WEEKDAY } \NC \WEEKDAY {5} \NC \NR
+\stoptabulate
+
+The current date can be typeset with \type {\currentdate} and a
+specific date with \type {\date}, for instance:
+
+\startbuffer
+\currentdate[weekday,day,month,year]
+\currentdate[WEEKDAY,day,MONTH,year]
+\date[d=12,m=12,y=1998][weekday]
+\date[d=12,m=12,y=1998]
+\stopbuffer
+
+\typebuffer
+
+\startlines[color=maincolor]
+\getbuffer
+\stoplines
+
+\noindentation Possible elements of the specification are:
+
+\starttabulate
+\FL
+\NC + ord \NC ordinal suffix \NC \NR
+\NC ++ highord \NC high ordinal suffix \NC \NR
+\ML
+\NC mnem: \NC mnemonic prefix \NC \NR
+\ML
+\NC Y y year \NC year 4 digits \NC \NR
+\NC yy \NC year 2 digits \NC \NR
+\ML
+\NC M \NC month 1 or 2 digits \NC \NR
+\NC mm \NC month 2 digits \NC \NR
+\ML
+\NC D \NC day 1 or 2 digits \NC \NR
+\NC dd \NC day 2 digits \NC \NR
+\ML
+\NC W \NC 1 digit \NC \NR
+\ML
+\NC month m \NC language dependent (can be mnemonic) \NC \NR
+\NC day d \NC language dependent \NC \NR
+\NC weekday w \NC language dependent \NC \NR
+\ML
+\NC MONTH \NC month uppercased \NC \NR
+\NC WEEKDAY \NC weekday uppercased \NC \NR
+\ML
+\NC referral \NC YYYMMDD \NC \NR
+\ML
+\NC space \\ \NC space \NC \NR
+\NC <word> \NC word \NC \NR
+\LL
+\stoptabulate
+
+\stopsection
+
+% \startsection[title=Counters]
+%
+% \stopsection
+
+\stopchapter
+
+\stopcomponent
diff --git a/doc/context/sources/general/manuals/languages/languages-sorting.tex b/doc/context/sources/general/manuals/languages/languages-sorting.tex
new file mode 100644
index 000000000..abf7b292c
--- /dev/null
+++ b/doc/context/sources/general/manuals/languages/languages-sorting.tex
@@ -0,0 +1,235 @@
+% language=uk
+
+\environment languages-environment
+
+\startcomponent languages-sorting
+
+\startchapter[title=Sorting][color=darkblue]
+
+\startsection[title=Introduction]
+
+Sorting is complex, not so much for English, Dutch, German, etc. only texts but
+there are languages and scripts that are more demanding. There are several
+complications:
+
+\startitemize
+
+ \startitem
+ There can be characters that have accents, like à, á, â, ã, ä
+ \unknown\ that have a base shape a and in an index these often end up
+ close to each other. The order can differ per language.
+ \stopitem
+
+ \startitem
+ There are upper and lowercase words and there can be different
+ expectations to them being mixed or separated.
+ \stopitem
+ \startitem
+ Some scripts have characters that are combinations, like Æ, and
+ one might want to see them as one character or two, in which the
+ second one obeys the sorting order. The shape can dominate here.
+ \stopitem
+ \startitem
+ Some scripts, like Japanese, are a combination of several scripts
+ and sorting then depends on normalization.
+ \stopitem
+ \startitem
+ When there are many glyphs, like in Chinese, the order can depend
+ on the complexity of the glyph and when we're lucky that order is
+ reflected in the numeric character order.
+ \stopitem
+\stopitemize
+
+Often the rules are somewhat strict and one can doubt of the same rules would
+have been imposed if computers had been developed earlier. Given discussions one
+can doubt if the rules are really consistent or just there because someone (or a
+group) with influence set the standard (not so much different from grammar). So,
+if we deal with sorting, we do that in such a way that users can (to some extend)
+influence the outcome. After all, one important aspect of typesetting and
+organizing content is that the users gets the feeling of control and a diversion
+from a standard can be part of that. The reader will often not notice these
+details. In the next sections we will explore the way sorting is done in
+\CONTEXT. The method evolved over a few decades. In \MKII\ sorting happened
+between runs and it was just part of the processing of a document that users
+never really saw in action. Sorting just happened and few users will have noticed
+that we moved from a \MODULA\ program to a \PERL\ script and ended up with a
+\RUBY\ script. In fact, there is a \LUA\ replacement but it never got tested well
+because we moved in to \MKIV. There all happens inside the engine using \LUA.
+Some principles stayed the same but we are more flexible now.
+
+\stopsection
+
+\startsection[title=How it works]
+
+How does sorting work out? Take these words:
+
+\startlines
+abracadabra
+abräcàdábra
+àbracádabrä
+ábracadàbra
+äbrácadabrà
+\stoplines
+
+As long as they end up in an order where the reader can find it, we're okay.
+After all we're pretty good in pattern recognition.
+
+There are probably many ways to implement a sorter but the one we uses is more or
+less a follow up on the one we had for over a decade and was the result of an
+evolution based on user demand. It boils down to cleaning up the string in such a
+way that it can be split into meaningful characters. One can argue that we should
+use some kd of standardized sorting method but the problem is that we always have
+to deal with for instance embedded tex commands and mixed content, for instance
+numbers. And users using the same language can have different opinions about the
+rules too.
+
+A word (or sequence of words) is split into characters. Because there can be
+\TEX\ commands in there some cleanup happens beforehand. After that we create
+several lists with numbers that will be compared when sorting two entries.
+
+\startluacode
+
+-- local ignoredoffset = sorters.constants.ignoredoffset
+-- local replacementoffset = sorters.constants.replacementoffset
+-- local digitsoffset = sorters.constants.digitsoffset
+-- local digitsmaximum = sorters.constants.digitsmaximum
+
+local context = context
+
+local utfchar = utf.char
+local utfyte = utf.byte
+local concat = table.concat
+local gsub = string.gsub
+local formatters = string.formatters
+
+local f_char = formatters["%s"]
+local f_byte = formatters["x%02X"]
+
+local meaning = {
+ ch = "raw character",
+ mm = "minus mapping",
+ zm = "zero mapping",
+ pm = "plus mapping",
+ mc = "lowercase - 1",
+ zc = "lowercase",
+ pc = "lowercase + 1",
+ uc = "unicode",
+}
+
+local function show(s,key,bodyfont)
+ local c = s[key]
+ local t = { }
+ for i=1,#c do
+ local ci = c[i]
+ if type(ci) == "string" then
+ t[i] = f_char(ci)
+ else
+ t[i] = f_byte(ci)
+ end
+ end
+ t = concat(t,"~")
+ context.NC() context.maincolor() context(key)
+ context.NC() context.maincolor() context(meaning[key])
+ context.NC() if bodyfont then context.switchtobodyfont{bodyfont} end context(t)
+ context.NC() context.NR()
+end
+
+function document.ShowSortSplit(str,language,bodyfont)
+ sorters.setlanguage(language or "en")
+ local s = sorters.splitters.utf(str)
+ context.starttabulate{ "|Tl|Tlj2|Tp|" }
+ context.FL()
+ context.NC()
+ context.NC() context.maincolor() context(language)
+ context.NC() if bodyfont then context.switchtobodyfont{bodyfont} end context.maincolor() context(str)
+ context.NC() context.NR()
+ context.ML()
+ show(s,"ch",bodyfont)
+ show(s,"uc")
+ show(s,"zc")
+ show(s,"mc")
+ show(s,"pc")
+ show(s,"zm")
+ show(s,"mm")
+ show(s,"pm")
+ context.LL()
+ context.stoptabulate()
+end
+
+\stopluacode
+
+We can best demonstrate this with a few examples. As usual an English language
+example is trivial.
+
+\ctxlua{document.ShowSortSplit("abracadabra","en")}
+
+When we add an uppercase character we get a slightly different outcome:
+
+\ctxlua{document.ShowSortSplit("Abracadabra","en")}
+
+Some characters will be split, like \type {æ}:
+
+\ctxlua{document.ShowSortSplit("æsop","en")}
+
+It gets more complex when langiage specific demands kick in. Compare an English, German
+and Austrian split:
+
+\ctxlua{document.ShowSortSplit("Abräcàdábra","en")}
+\ctxlua{document.ShowSortSplit("Abräcàdábra","de")}
+\ctxlua{document.ShowSortSplit("Abräcàdábra","de-at")}
+
+The way a character gets replaced, like \type {ä} into \type {ae}, is defined in
+\type {sort-lan.lua} using \LUA\ tables. We will not explain all the obscure
+details here; most of the work is already done, so users are not bothered by
+these definitions. And new ones can often be made by copying and adapting an
+existing one.
+
+The sorting itself is specified by a sequence:
+
+\starttabulate[|TlCT{maincolor}|Tl|]
+\NC default \NC zc,pc,zm,pm,uc \NC \NR
+\NC before \NC mm,mc,uc \NC \NR
+\NC after \NC pm,mc,uc \NC \NR
+\NC first \NC pc,mm,uc \NC \NR
+\NC last \NC mc,mm,uc \NC \NR
+\stoptabulate
+
+The raw character is what we get after the (language specific) replacement has
+been applied and the unicodes are used when comparing. Lowercasing is done using
+the \UNICODE\ lowercase code, but one can define language specific ones too. The
+plus and minus variants can be used to force lowercase before or after uppercase.
+The mapping is based on an alphabet specification so this can differ per language
+and again we also provide plus and minus values that depend on case. When a
+character has no case we use shapes instead. For instance, the shape of \type
+{à} is \type {a}. Digits are treated special and currently get an offset so that
+they end up last in the sort order.
+
+\defineregister[jindex]
+
+\startbuffer
+ぱあ \jindex{ぱあ}
+ぱー \jindex{ぱー}
+ぱぁ \jindex{ぱぁ}
+\stopbuffer
+
+{\switchtobodyfont[ipaex]\startlines\typebuffer\stoplines}
+
+This three entry index\jindex{ぱあ}\jindex{ぱー}\jindex{ぱぁ} should be sorted in the order:
+{\switchtobodyfont[ipaex]\ruledhbox{ぱー}\enspace\ruledhbox{ぱぁ}\enspace\ruledhbox{ぱあ}}.
+
+{\mainlanguage[jp]\switchtobodyfont[ipaex]\placeregister[jindex][language=jp,n=1,method=default]}
+{\mainlanguage[jp]\switchtobodyfont[ipaex]\placeregister[jindex][language=jp,n=1,method=zm]}
+
+\ctxlua{document.ShowSortSplit("ぱあ","jp","ipaex")}
+\ctxlua{document.ShowSortSplit("ぱー","jp","ipaex")}
+\ctxlua{document.ShowSortSplit("ぱぁ","jp","ipaex")}
+
+{\em To be continued!}
+
+\stopsection
+
+% ぱー $\prec$ ぱぁ $\prec$ ぱあ
+
+\stopchapter
+
+\stopcomponent