summaryrefslogtreecommitdiff
path: root/tex/context/base/mkii/enco-ini.mkii
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
committerContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
commit8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkii/enco-ini.mkii
parentf5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
downloadcontext-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
2016-01-12 16:26:00
Diffstat (limited to 'tex/context/base/mkii/enco-ini.mkii')
-rw-r--r--tex/context/base/mkii/enco-ini.mkii1137
1 files changed, 1137 insertions, 0 deletions
diff --git a/tex/context/base/mkii/enco-ini.mkii b/tex/context/base/mkii/enco-ini.mkii
new file mode 100644
index 000000000..8603f2420
--- /dev/null
+++ b/tex/context/base/mkii/enco-ini.mkii
@@ -0,0 +1,1137 @@
+%D \module
+%D [ file=enco-ini,
+%D version=2007.02.19, % 2000.12.27, % 1998.12.03,
+%D title=\CONTEXT\ Encoding Macros,
+%D subtitle=Initialization,
+%D author=Hans Hagen,
+%D date=\currentdate,
+%D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
+%C
+%C This module is part of the \CONTEXT\ macro||package and is
+%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
+%C details.
+
+%D Quite some code will be moved to the mk files once we're ready
+%D for it.
+
+%D This module is a reimplementation of the module that handled
+%D composed characters and non \ASCII\ characters. The changed
+%D are not that fundamental, and mainly concerns moving
+%D definitions of specific glyphs and accents to other files as
+%D well as moving plain handling of accents to this module
+%D instead of overloading plain \TEX\ commands.
+
+%D Patterns are kind of mixed with font encodings and
+%D mappings. Alas.
+
+\ifx\synchronizepatterns\undefined \let\synchronizepatterns\relax \fi
+
+%D While dealing with input (the text source) and output (the
+%D glyphs), encoding comes into view. To summarize a few:
+%D
+%D \startitemize
+%D \item Bytes in the input file are mapped to an internal
+%D representation. An~\type {a} often stays an~\type {a},
+%D but~\type {\"e} can become either one code or become
+%D two codes (ending in overlapping glyphs).
+%D \item Characters can be made active and mapped onto another
+%D character.
+%D \item When changing case, characters are mapped onto
+%D themselves, their case||counterpart or a reasonable
+%D alternative, like~\"e onto~e.
+%D \item Single character representations in a \DVI\ file can
+%D be mapped onto one or more characters, either of not
+%D in more than one font file (virtual fonts).
+%D \item In the final format, fonts collections can be
+%D partially embedded, thereby losing the one||to||one
+%D relation between several instances of one font.
+%D \item For special purposes, individual characters should be
+%D mapped onto a dedicated encoding vector, for instance
+%D \PDF\ document encoding.
+%D \stopitemize
+%D
+%D These and other kind of mappings are to be dealt with, and
+%D the exact way of dealing often depends on the language to be
+%D typeset.
+
+\writestatus{loading}{ConTeXt Encoding Macros / Initialization}
+
+\unprotect
+
+%D First we define a few local or not yet initialized constants.
+
+\def\@map@{@m@ap@} % mapping prefix
+\def\@fha@{@f@ha@} % font prefix
+\def\@cas@{@c@as@} % casecom prefix
+
+\ifx\currentlanguage\undefined \let\currentlanguage\s!en \fi
+
+%D \macros
+%D {setupencoding}
+%D
+%D The following setup command is used to tune encoding
+%D handling.
+
+\def\setupencoding
+ {\dosingleargument\dosetupencoding}
+
+\def\dosetupencoding[#1]%
+ {\getparameters[\??ec][#1]%
+ \edef\defaultencoding
+ {\ifx\@@ecdefault\empty\s!default\else\@@ecdefault\fi}}
+
+%D \macros
+%D {useencoding}
+%D
+%D Encodings things are defined in separate files and are
+%D loaded only once, using:
+%D
+%D \showsetup{useencoding}
+
+\def\douseencoding#1%
+ {\doifundefined{\c!file\f!encodingprefix#1}%
+ {\letvalue{\c!file\f!encodingprefix#1}\empty
+ \makeshortfilename[\truefilename{\f!encodingprefix#1}]%
+ \startreadingfile
+ \readsysfile{\shortfilename.mkii}
+ {\showmessage\m!encodings2{#1}}
+ {\showmessage\m!encodings3{#1}}%
+ \stopreadingfile}}
+
+\def\useencoding[#1]%
+ {\processcommalist[#1]\douseencoding}
+
+%D \macros
+%D {startmapping,enablemapping}
+%D
+%D In order to process patterns, convert from lower to
+%D uppercase and vise versa and some more, we provide a
+%D mechanism to define mappings. The first real application
+%D of this command was:
+%D
+%D \starttyping
+%D \startmapping [something]
+%D \definecasemap 165 181 165
+%D \definecasemap 171 187 171
+%D ...
+%D \defineuppercasecom \i {I}
+%D \defineuppercasecom \l \L
+%D \definelowercasecom \AE \ae
+%D ...
+%D \stopmapping
+%D \stoptyping
+%D
+%D So, character 165 becomes 181 in uppercase and 165 in
+%D lowercase. A mapping is activated with \type {\enablemapping}.
+
+\def\startsavingmappingtoks#1%
+ {\bgroup
+ \edef\charactermapping{@#1@}%
+ \checkmappingtoks
+ \setmappingtoks
+ \the\mappingtoks}
+
+\def\stopsavingmappingtoks
+ {\global\mappingtoks\emptytoks
+ \dostepwiserecurse{0}{255}\plusone
+ {\edef\@@expanded
+ {\the\mappingtoks
+ \ifnum\recurselevel>127
+ \noexpand\settoletterunlessactive{\recurselevel}%
+ \fi
+ \lccode\recurselevel\ifnum\lccode\recurselevel=\zerocount\zerocount\else\space\the\lccode\recurselevel\space\fi
+ \uccode\recurselevel\ifnum\uccode\recurselevel=\zerocount\zerocount\else\space\the\uccode\recurselevel\space\fi
+ \ifnum\sfcode\recurselevel=\plusthousand\else\sfcode\recurselevel=\the\sfcode\recurselevel\space\fi
+ }%
+ \global\mappingtoks\expandafter{\@@expanded}}%
+ \egroup
+ \let\enabledmapping\empty
+ \enablemapping[\currentmapping]}
+
+\def\startmapping[#1]%
+ {\startsavingmappingtoks{#1}}
+
+\def\stopmapping
+ {\stopsavingmappingtoks}
+
+\def\optimizemapping[#1]%
+ {\startsavingmappingtoks{#1}%
+ % nothing, just an automatic cleanup
+ \stopsavingmappingtoks
+ % we need to resync
+ %\let\enabledmapping\relax
+ }%\enablemapping[\currentmapping]}
+
+\def\setmappingtoks
+ {\@EA\let\@EA\mappingtoks\csname\@map@\charactermapping\endcsname
+ \@EA\let\@EA\casecomtoks\csname\@cas@\charactermapping\endcsname}
+
+\def\checkmappingtoks
+ {\ifundefined{\@map@\charactermapping}%
+ \expandafter\newtoks\csname\@map@\charactermapping\endcsname
+ \fi
+ \ifundefined{\@cas@\charactermapping}%
+ \expandafter\newtoks\csname\@cas@\charactermapping\endcsname
+ \fi}
+
+\def\definecasemap #1 #2 #3 % code lower upper
+ {\doifelse{#2}{to}
+ {\presetcaserange{#1}{#3}}
+ {\lccode#1=#2\relax
+ \uccode#1=#3\relax}%
+ \ignorespaces}
+
+%D Saves a few tokens
+
+\def\definecaseswap #1 #2 % lower upper
+ {\lccode#1=#1\relax
+ \uccode#2=#2\relax
+ \lccode#2=#1\relax
+ \uccode#1=#2\relax
+ \ignorespaces}
+
+\def\definecaseself #1 % lower=upper=self
+ {\lccode#1=#1\relax
+ \uccode#1=#1\relax
+ \ignorespaces}
+
+%D Watch the \type {\definecasemap 127 to 255} option!
+%D Dedicated to Taco there is also:
+
+\def\definecasemaps #1 to #2 lc #3 uc #4 % from to lc+ uc+
+ {\dostepwiserecurse{#1}{#2}\plusone
+ {\scratchcounter\recurselevel\advance\scratchcounter#3\lccode\recurselevel=\scratchcounter
+ \scratchcounter\recurselevel\advance\scratchcounter#4\uccode\recurselevel=\scratchcounter}%
+ \ignorespaces}
+
+%D This can be used like:
+%D
+%D \starttyping
+%D \definecasemaps 128 to 156 lc 32 uc 0
+%D \definecasemaps 160 to 188 lc -32 uc 0
+%D \definecasemaps 160 to 188 lc -32 uc 0
+%D \definecasemaps 192 to 255 lc 32 uc 0
+%D \stoptyping
+%D
+%D and saves a lot of typing (copying).
+
+\def\resetcaserange #1 to #2
+ {\dostepwiserecurse{#1}{#2}\plusone
+ {\lccode\recurselevel\zerocount
+ \uccode\recurselevel\zerocount}%
+ \ignorespaces}
+
+\def\presetcaserange#1#2% could be pre-expanded
+ {\dostepwiserecurse{#1}{#2}\plusone
+ {\lccode\recurselevel=\recurselevel
+ \uccode\recurselevel=\recurselevel}%
+ \ignorespaces}
+
+\def\setcasemap #1 #2 #3 %
+ {\settoletterunlessactive{#1}%
+ \lccode #1=#2
+ \uccode #1=#3 }
+
+\def\setcaseswap #1 #2 %
+ {\settoletterunlessactive{#1}%
+ \settoletterunlessactive{#2}%
+ \lccode #1=#1
+ \uccode #2=#2
+ \lccode #2=#1
+ \uccode #1=#2 }
+
+\def\setcaseself #1 %
+ {\settoletterunlessactive{#1}%
+ \lccode #1=#1
+ \uccode #1=#1 }
+
+\def\definespacemap #1 #2 % code sfcode
+ {\sfcode#1=#2%
+ \ignorespaces}
+
+\def\setspacemap #1 #2 %
+ {\settootherunlessactive{#1}%
+ %\lccode #1=\zerocount
+ %\uccode #1=\zerocount
+ \sfcode #1=#2 }
+
+\def\defineuppercasecom#1#2%
+ {\global\casecomtoks\expandafter{\the\casecomtoks\setuppercasecom#1{#2}}%
+ \ignorespaces}
+
+\def\definelowercasecom#1#2%
+ {\global\casecomtoks\expandafter{\the\casecomtoks\setlowercasecom#1{#2}}%
+ \ignorespaces}
+
+\let\setuppercasecom\gobbletwoarguments
+\let\setlowercasecom\gobbletwoarguments
+
+\def\setcasecom#1#2{\def#1{#2}}
+
+\let\enabledmapping\empty % indirect, needed to handle default too
+
+\def\enablemapping[#1]%
+ {\edef\charactermapping{@#1@}%
+ \ifx\enabledmapping\charactermapping \else
+ \doifdefined{\@map@\charactermapping}
+ {%\expandafter\showthe\csname\@map@\charactermapping\endcsname\endcsname
+ \the\csname\@map@\charactermapping\endcsname}%
+ % == \the\executeifdefined{\@map@\charactermapping}\emptytoks
+ \edef\enabledmapping{\charactermapping}%
+ \enablelanguagespecifics[\currentlanguage]% new
+ % \edef\enabledmapping{\charactermapping\currentlanguage}% can be comma list
+ \fi
+ \synchronizepatterns}
+
+% on behalf of font switching:
+
+\def\fastenablemapping#1%
+ {\edef\charactermapping{@#1@}%
+ \ifx\enabledmapping\charactermapping \else
+ \@EA\ifx\csname\@map@\charactermapping\endcsname\relax\else
+ \the\csname\@map@\charactermapping\endcsname
+ \fi
+ % == \the\executeifdefined{\@map@\charactermapping}\emptytoks
+ \let\enabledmapping\charactermapping
+ \enablelanguagespecifics[\currentlanguage]% to faster
+ \fi}
+
+%D This macro wil be implemented in \type {lang-ini.tex}.
+
+\ifx\enablelanguagespecifics\undefined
+ \def\enablelanguagespecifics[#1]{}
+\fi
+
+%D Further on we have to take some precautions when dealing
+%D with special characters like~\type{~}, \type{_}
+%D and~\type{^}, so let us define ourselve some handy macros
+%D first.
+
+\def\protectfontcharacters
+ {\edef\unprotectfontcharacters
+ {\catcode`\noexpand ~=\the\catcode`~\relax
+ \catcode`\noexpand _=\the\catcode`_\relax
+ \catcode`\noexpand ^=\the\catcode`^\relax}%
+ \catcode`~=\@@letter
+ \catcode`_=\@@letter
+ \catcode`^=\@@letter\relax}
+
+%D The completeness of the Computer Modern Roman typefaces
+%D makes clear how incomplete other faces are. To honour 7~bit
+%D \ASCII, these fonts were designed using only the first 127
+%D values of the 256 ones that can be presented by one byte.
+%D Nowadays 8~bit character codings are more common, mainly
+%D because they permit us to predefine some composed
+%D characters, which are needed in most european languages.
+%D
+%D Supporting more than the standard \TEX\ encoding vector
+%D |<|which in itself is far from standard and differs per
+%D font|>| puts a burden on the fonts mechanism. The \CONTEXT\
+%D mechanism is far from complete, but can handle several
+%D schemes at once. The main problem lays in the accented
+%D characters and ligatures like~ff, although handling
+%D ligatures is not the responsibility of this module.
+%D
+%D By default, we use \PLAIN\ \TEX's approach of placing
+%D accents. All other schemes sooner or later give problems
+%D when we distribute \DVI||files are distributed across
+%D machines and platforms. Nevertheless, we have to take care
+%D of different encoding vectors, which tell us where to find
+%D the characters we need. This means that all kind of
+%D character placement macro's like \type{\"} and \type{\ae}
+%D have to be implemented and adapted in a way that suits
+%D these vectors.
+%D
+%D The main difference between different vector is the way
+%D accents are ordered and/or the availability of prebuilt
+%D accented characters. Accented characters can for instance be
+%D called for by sequences like \type{\"e}. Here the \type{\"}
+%D is defined as:
+%D
+%D \starttyping
+%D \def\"#1{{\accent"7F #1}}
+%D \stoptyping
+%D
+%D This macro places the accent \accent"7F {} on top of an~e
+%D gives \"e. Some fonts however can have prebuild accents and
+%D use a more direct approach like
+%D
+%D \starttyping
+%D \def\"#1{\if#1e\char 235\else ... \fi}
+%D \stoptyping
+%D
+%D The latter approach is not used in \CONTEXT, because we
+%D store relevant combinations of accents and characters in
+%D individual macros.
+
+%D We define character substitutes and commands with definition
+%D commands like:
+%D
+%D \starttyping
+%D \startcoding[texnansi]
+%D
+%D \defineaccent " a 228
+%D \defineaccent ^ e 234
+%D \defineaccent ' {\dotlessi} 237
+%D
+%D \definecharacter ae 230
+%D \definecharacter oe 156
+%D
+%D \definecommand b \texnansiencodedb
+%D \definecommand c \texnansiencodedc
+%D
+%D \stopcoding
+%D \stoptyping
+%D
+%D The last argument of \type{\defineaccent} and
+%D \type{\definecharacter} tells \TEX\ the position of the
+%D accented character in the encoding vector. In order to
+%D complish this, we tag each implementation with the character
+%D coding identifier. We therefore need two auxiliary variables
+%D \type{\characterencoding} and \type{\nocharacterencoding}. These
+%D contain the current and default encoding vectors and both
+%D default to the \PLAIN\ one.
+
+\edef\characterencoding {@\s!default @}
+\edef\nocharacterencoding {@\s!default @}
+\edef\charactermapping {@\s!default @}
+
+% todo, else \d j == \dj, print file and check
+
+\def\accentprefix {}%{*}
+\def\commandprefix {}%{=}
+\def\characterprefix{}%{-}
+
+%D \macros
+%D {startcoding, reducetocoding}
+%D
+%D Before we can redefine accents and special characters, we
+%D have to tell \CONTEXT\ what encoding is in force. The next
+%D command is responsible for doing this and also takes care of
+%D the definition of the recoding commands. We use the \type
+%D {\start}||\type {\stop}||commands for definitions and the
+%D \type {\reduceto}||command for local switching to
+%D simplified commands.
+
+% etex : \ifcsname
+
+\def\justhandleaccent#1#2% \empty makes #2={} save % no \unexpanded
+ {\ifundefined{\accentprefix\characterencoding#1\string#2\empty}%
+ #2%
+ \else
+ \csname\accentprefix\characterencoding#1\string#2\empty\endcsname
+ \fi}
+
+\def\justhandlecommand#1% % no \unexpanded, otherwise pdfdoc will fail
+ {\ifundefined{\commandprefix\characterencoding#1}% as well as hyph patterns
+ #1%
+ \else
+ \csname\commandprefix\characterencoding#1\endcsname
+ \fi}
+
+\def\enableencoding
+ {\dodoubleempty\doenableencoding}
+
+\def\doenableencoding[#1][#2]% main fallback
+ {\iffirstargument\edef\characterencoding{@#1@}\fi
+ \edef\nocharacterencoding{@\ifsecondargument#2\else\s!default\fi @}%
+ \synchronizepatterns}
+
+\edef\xnocharacterencoding{@\s!default @}
+
+\def\fastenableencoding#1%
+ {\edef\characterencoding{@#1@}%
+ \let\nocharacterencoding\xnocharacterencoding}
+
+\def\startencoding
+ {\dodoubleempty\dostartencoding}
+
+\def\dostartencoding[#1][#2]% encoding regime
+ {%\showmessage\m!encodings1{#1}%
+ \pushmacro\characterencoding
+ \pushmacro\currentregime
+ \pushmacro\dohandleaccent % still needed?
+ \pushmacro\dohandlecommand % still needed?
+ \pushmacro\doautosetregime
+ \let\dohandleaccent\donthandleaccent % still needed?
+ \let\dohandlecommand\donthandlecommand % still needed?
+ %let\definesortkey\savesortkey
+ \edef\characterencoding{@#1@}%
+ \doifelsenothing{#2}%
+ {\let\doautosetregime\gobbletwoarguments}
+ {\def\currentregime{#2}}}
+
+\def\stopencoding
+ {\popmacro\doautosetregime
+ \popmacro\dohandlecommand % still needed?
+ \popmacro\dohandleaccent % still needed?
+ \popmacro\currentregime
+ \popmacro\characterencoding}
+
+% probably obsolete (hm, not yet)
+
+\def\reducetocoding[#1]% use grouped!
+ {\doifsomething{#1}
+ {\let\dohandleaccent \justhandleaccent
+ \let\dohandlecommand\justhandlecommand
+ \enableencoding[#1]%
+ \enablelanguagespecifics[\currentlanguage]}}
+
+\let\startcoding \startencoding
+\def\stopcoding {\stopencoding}
+\let\enablecoding \enableencoding
+
+%D The use of these macros are not limited to font
+%D definition files, but may also be used when loading
+%D patterns.
+
+%D \macros
+%D {definesortkey,flushsortkeys,flushsortkey}
+%D
+%D Yet another definition concerns sorting of indexes and
+%D lists.
+%D
+%D \starttyping
+%D \definesortkey {\'e} {e} {a} {\'e}
+%D \stoptyping
+%D
+%D The first argument denotes the string to be treated. The
+%D second argument is the raw replacement, while the third
+%D argument determines the sort order given the replacement.
+%D The last argument is used as entry in the index (a, b, etc).
+%D
+%D The keys can be flushed using \type {\flushsortkeys}
+%D which in turn results in a sequence of calls to \type
+%D {\flushsortkey}, a macro taking 4~arguments.
+%D
+%D This mechanism is currently being tested and subjected to
+%D changes! Obsolete:
+
+\let\definesortkey\gobblefourarguments
+\let\savesortkey \gobblefourarguments
+\let\flushsortkeys\relax
+\let\flushsortkey \relax
+
+%D \macros
+%D {defineaccent, definecharacter, definecommand}
+%D
+%D The actual definition of accents, special characters and
+%D commands is done with the next three commands.
+
+\def\defineaccent
+ {\protectfontcharacters
+ \dodefineaccent}
+
+\def\dodefineaccent#1 #2 %
+ {\unprotectfontcharacters
+ \dododefineaccent#1 #2 }
+
+\def\dododefineaccent#1 #2 #3 %
+ {\setvalue{#1}{\dohandleaccent{#1}}%
+ \doifnumberelse{\string#3}
+ {\setvalue{\accentprefix\characterencoding#1\string#2}{\char#3 }} % space added
+ {\setvalue{\accentprefix\characterencoding#1\string#2}{#3}}}
+
+\def\dohandleaccent#1#2%
+ {\ifcsname\accentprefix\characterencoding#1\string#2\empty\endcsname
+ \csname\accentprefix\characterencoding#1\string#2\empty\endcsname
+ \else\ifcsname\accentprefix\nocharacterencoding#1\string#2\empty\endcsname
+ \csname\accentprefix\nocharacterencoding#1\string#2\empty\endcsname
+ \else\ifcsname\accentprefix\characterencoding#1\endcsname
+ \csname\accentprefix\characterencoding#1\endcsname{#2}%
+ \else%\ifcsname\accentprefix\nocharacterencoding#1\endcsname
+ \csname\accentprefix\nocharacterencoding#1\endcsname{#2}%
+% \else
+% \donormaltextaccent{#1}{#2}%
+ \fi\fi\fi}%\fi}
+
+\def\patternchar#1 {\rawcharacter{#1}} % space is part of character definition !
+
+% \ifx \enablepatterntokens\undefined
+% \def\handlepatterntoken#1]{\csname#1\endcsname}
+% \fi
+
+% we need to postpone catcode changes, e.g. hr patterns
+% have \catcode" -> which fails when " is letter
+
+\def\pathypsettings
+ {\ifx \enablepatterntokens\undefined
+ \defineactivecharacter [ {\handlepatterntoken}%
+ \else
+ \enablepatterntokens
+ \fi
+ \let\dochar\thechr
+ \lccode16=16 % brrr, extra quote in ec (turkish)
+ \lccode17=17 % brrr, extra quote in ec (turkish)
+ \lccode`\-=`\-
+ \lccode`\'=`\'
+ \lccode`\"=`\"
+ \relax}
+
+\def\patterns {\pathypsettings\normalpatterns }
+\def\hyphenation{\pathypsettings\normalhyphenation}
+
+%D Because we don't want to use the second command grouped, we
+%D (re)define it as follows:
+
+\def\hyphenation
+ {\begingroup\def\hyphenation{\normalhyphenation{\the\scratchtoks}\endgroup}%
+ \pathypsettings\afterassignment\hyphenation\scratchtoks=}
+
+%D This is not needed for patterns because they are loaded grouped
+%D anyway and it saves us an assignment. Can go ... no longer
+%D shared patterns.
+
+\def\startpatternloading#1#2#3% % we should use \everypatternloading
+ {\startreadingfile
+ \bgroup
+ % let's get rid of interfering stuff
+ \let\everyjob\scratchtoks
+ \let\message \gobbleoneargument
+ % we want direct characters
+ \let\char\patternchar
+ \doifelsenothing{#2}{\enableencoding[ec]}{\enableencoding[#2]}%
+ \doifelsenothing{#3}{\enablemapping [ec]}{\enablemapping [#3]}%
+ \expanded{\doifinstring{\f!languageprefix}{#1}}
+ {\ifx \enablepatternxml\undefined \else
+ \enablepatternxml
+ \fi}%
+ \let\dohandleaccent\normaldohandleaccent}
+
+\def\stoppatternloading
+ {\egroup
+ \stopreadingfile}
+
+ \def\thechr#1{\char#1 } % just in case \relax interferes
+\unexpanded\def\numchr#1{\char#1\relax}
+\unexpanded\def\strchr#1{\csname#1\endcsname}
+
+\let\dochar\numchr
+
+\def\startdirectcharacters {\pushmacro\dochar \let\dochar\thechr}
+\def\stopdirectcharacters {\popmacro \dochar}
+
+\def\definecharacter#1 #2 %
+ {\ifundefined{#1}\setvalue{#1}{\dohandlecharacter{#1}}\fi
+ \doifnumberelse{\string#2}
+ {\setvalue{\characterprefix\characterencoding\string#1}{\dochar{#2}}%
+ \doautosetregime{#1}{#2}}
+ {\setvalue{\characterprefix\characterencoding\string#1}{#2}}}
+
+\def\dohandlecharacter#1%
+ {\csname\characterprefix\ifcsname\characterprefix\characterencoding#1\endcsname
+ \characterencoding\else\nocharacterencoding\fi#1\endcsname}
+
+% \def\fallbackpatternchar{x} % makes no sense, duplicate patterns
+
+\def\defaultcharacter#1%
+ {\csname\characterprefix\nocharacterencoding\strippedcsname#1\endcsname}
+
+%D Instead of numbers, a command may be entered.
+
+\def\definecommand#1 #2 %
+ {\setvalue{\string#1}{\dohandlecommand{#1}}%
+ %\redefinecommand #1 % just to be sure
+ \setvalue{\commandprefix\characterencoding\string#1}{#2}}
+
+%D Here we see that redefining accents is characters is more
+%D or less the same as redefining commands. We also could have
+%D said:
+%D
+%D \starttyping
+%D \def\defineaccent#1 #2 {\definecommand#1\string#2 \char}
+%D \def\definecharacter#1 {\definecommand#1 \char}
+%D \stoptyping
+
+%D \macros
+%D {defineaccentcommand}
+%D
+%D When needed, one can overload the default positions of the
+%D accents. The \PLAIN\ \TEX\ defaults are:
+%D
+%D \starttyping
+%D \defineaccentcommand ` 18
+%D \defineaccentcommand ' 19
+%D \defineaccentcommand v 20
+%D \defineaccentcommand u 21
+%D \defineaccentcommand = 22
+%D \defineaccentcommand ^ 94
+%D \defineaccentcommand . 95
+%D \defineaccentcommand H 125 % "7D
+%D \defineaccentcommand ~ 126 % "7E
+%D \defineaccentcommand " 127 % "7F
+%D \stoptyping
+
+\def\defineaccentcommand
+ {\protectfontcharacters
+ \dodefineaccentcommand}
+
+\def\dodefineaccentcommand#1 #2 % \string toegevoegd
+ {\doifnumberelse{\string#2}
+ {\setvalue{\accentprefix\characterencoding\string#1}##1{{\accent#2 ##1}}}
+ {\setvalue{\accentprefix\characterencoding\string#1}##1{{#2##1}}}%
+ \unprotectfontcharacters}
+
+%D We don't have to define them for the default \PLAIN\ case.
+%D Commands may be used instead of character codes.
+
+%D \macros
+%D {redefinecommand}
+%D
+%D Redefinition of encoding dependant commands like \type{\b}
+%D and \type{\c} can be triggered by:
+%D
+%D \starttyping
+%D \redefinecommand b % something math
+%D \redefinecommand c % something math
+%D \stoptyping
+%D
+%D Handling of characters is easier than handling accents
+%D because here we don't have to take care of arguments. We
+%D just call for the right glyph in the right place.
+%D
+%D The \type{\next} construction permits handling of commands
+%D that take arguments. This means that we can use this
+%D command to redefine accent handling commands too
+%D (although today the next is not needed any longer in test
+%D macros).
+
+\def\redefinecommand#1 %
+ {% no \unexpanded, else pdfdoc fails
+ \setvalue{\string#1}{\dohandlecommand{#1}}}%
+
+\def\dohandlecommand#1%
+ {\csname\commandprefix
+ \ifcsname\commandprefix\characterencoding#1\endcsname
+ \characterencoding
+ \else
+ \nocharacterencoding
+ \fi
+ #1\endcsname}
+
+%D \macros
+%D {currentencoding, currentmapping}
+%D
+%D When we show 'm, we don't want to see the protection
+%D measures.
+
+\def\currentencoding{\@EA\dopureencodingname\characterencoding}
+\def\currentmapping {\@EA\dopureencodingname\charactermapping }
+
+\def\dopureencodingname @#1@{#1}
+
+\def\pureencodingname#1{\@EA\dopureencodingname#1}
+
+%D \macros
+%D {showaccents, showcharacters,
+%D showcharacterbounds, showhyphenations}
+%D
+%D Encoding is a tricky business. Therefore we provide a
+%D a few macros that show most of the characters involved. The
+%D next two tables show the result of \type {\showaccents}.
+%D
+%D \placetable
+%D {The special glyphs in default encoding.}
+%D {\showaccents}
+%D
+%D \placetable
+%D {The special glyphs in texnansi encoding.}
+%D {\switchtobodyfont[lbr]\showaccents}
+%D
+%D The command
+%D
+%D \starttyping
+%D \showhyphenations{doordefini\"eren}
+%D \stoptyping
+%D
+%D can be used to check the correct loading of hyphenation
+%D patterns.
+
+\fetchruntimecommand \showaccents {\f!encodingprefix\s!run}
+\fetchruntimecommand \showcharacters {\f!encodingprefix\s!run}
+\fetchruntimecommand \showcharacterbounds {\f!encodingprefix\s!run}
+\fetchruntimecommand \showhyphenations {\f!encodingprefix\s!run}
+\fetchruntimecommand \showmapping {\f!encodingprefix\s!run}
+
+%D \macros
+%D {everyuppercase, EveryUppercase,
+%D everyuppercase, EveryUppercase}
+%D
+%D When we want to uppercase strings of characters, we have to
+%D take care of those characters that have a special meaning or
+%D are only accessible by means of macros. The next hack was
+%D introduced when Tobias Burnus started translating head and
+%D label texts into spanish and italian. The first application
+%D of this token register therefore can be found in the module
+%D that deals with these texts.
+
+\newevery \everyuppercase \EveryUppercase
+\newevery \everylowercase \EveryLowercase
+
+%D This magic trick maps takes care of mapping from lower to
+%D upper case and reverse.
+
+\def\reloadmapping{\the\executeifdefined{\@cas@\charactermapping}\emptytoks}
+
+\appendtoks\let\setuppercasecom\setcasecom\to\everyuppercase
+\appendtoks\let\setlowercasecom\setcasecom\to\everylowercase
+
+\appendtoks\reloadmapping\to\everyuppercase % slow, will be sped up
+\appendtoks\reloadmapping\to\everylowercase % slow, will be sped up
+
+\newtoks\everyULmap
+
+\appendtoks\let\remapcase\remapuppercase\the\everyULmap\to\everyuppercase
+\appendtoks\let\remapcase\remaplowercase\the\everyULmap\to\everylowercase
+
+\let\remapcase\gobbletwoarguments
+
+\def\remapuppercase#1#2{\let#2#1} % more efficient:
+\def\remaplowercase#1#2{\let#1#2} \let\remaplowercase\let
+
+\def\defineLCcharacter #1 #2 %
+ {\appendtoks\let\to\everylowercase
+ \@EA\appendtoks\csname#1\endcsname\to\everylowercase
+ \@EA\appendtoks\csname#2\endcsname\to\everylowercase}
+
+\def\defineUCcharacter #1 #2 %
+ {\appendtoks\let\to\everyuppercase
+ \@EA\appendtoks\csname#1\endcsname\to\everyuppercase
+ \@EA\appendtoks\csname#2\endcsname\to\everyuppercase}
+
+\def\defineULcharacter #1 #2 %
+ {\appendtoks\remapcase\to\everyULmap
+ \@EA\appendtoks\csname#1\endcsname\to\everyULmap
+ \@EA\appendtoks\csname#2\endcsname\to\everyULmap}
+
+% slightly faster with \smallcapped's but far more hash and stringspace
+%
+% \newif\ifuppercase \appendtoks\uppercasetrue\to\everyuppercase
+% \newif\iflowercase \appendtoks\lowercasetrue\to\everylowercase
+%
+% \def\defineULcharacter #1 #2 %
+% {\def\!!stringa{@#1}\@EA\letvalue\@EA\!!stringa\csname#1\endcsname
+% \def\!!stringa{@#2}\@EA\letvalue\@EA\!!stringa\csname#2\endcsname
+% \setvalue{#1}{\getvalue{@\ifuppercase#2\else#1\fi}}%
+% \setvalue{#2}{\getvalue{@\iflowercase#1\else#2\fi}}}
+
+% 2 = tricky, since expanding \definedfont[lowcasename] ... goes wrong
+
+\chardef\uppercasemode\plusthree % 0=ignore 1=normal 2=expand 3=auto
+\chardef\casecommode \plusone % 0=noexpand 1=expand
+
+\def\setcasecom #1#2{\def#1{\ifcase\casecommode\noexpand#1\else#2\fi}}
+
+% \def\OEPS{whatever}
+%
+% \startmapping[ec]
+% \defineuppercasecom \oeps {\getvalue{OEPS}}
+% \stopmapping
+%
+% \WORD{xx \oeps}
+
+\def\douppercase#1%
+ {\bgroup
+ \let\douppercase\firstofoneargument
+ \the\everyuppercase % currently also checks uppercasemode
+ \let\dochar\rawcharacter
+ \ifcase\uppercasemode
+ #1%
+ \or % No expansion here, otherwise \getvalue problems! Default!!!
+ %\edef\next{#1}% keep this to prevent roll back
+ %\uppercase\expandafter{\next}% keep this to prevent roll back
+ \uppercase{#1}%
+ \or
+ \chardef\casecommode\zerocount
+ \let\docasecom\firstoftwoarguments
+ \edef\ascii{#1}%
+ \edef\ascii{\expandafter\uppercase\expandafter{\ascii}}% needed when in regime
+ \chardef\casecommode\plusone
+ \ascii
+ \else
+ % mode three may trigger setting 2 elsewhere (e.g. regime test)
+ \uppercase{#1}%
+ \fi
+ \egroup}
+
+\prependtoksonce
+ \doifnot\currentregime\s!default
+ {\ifnum\uppercasemode=\plusthree \chardef\uppercasemode\plustwo \fi}%
+\to \everyuppercase
+
+%D \macros
+%D {everysanitize, EverySanitize}
+%D
+%D Whenever we are sanitizing strings, like we sometimes do
+%D when we deal with specials, the next token register can be
+%D called.
+
+\newevery \everysanitize \EverySanitize
+
+%D \macros
+%D {defineuclass,defineudigit,udigit}
+%D
+%D The next few macros are experimental and needed for unicoded
+%D chinese characters.
+
+\def\defineuclass #1 #2 #3 %
+ {\setvalue{uc\the\numexpr#2*256+#3\relax}{#1}}
+
+\def\defineudigit #1 #2 #3 {\setvalue{\characterencoding uc#1}{\uchar{#2}{#3}}}
+
+%D It may look strange, but for the moment, we want the encoding
+%D to be part of the digit specification. This may change!
+
+\unexpanded\def\udigit#1#2{\getvalue{@#1@uc\number#2}}
+
+%D \macros
+%D {uchar, octuchar, hexuchar}
+
+\ifx\uchar\undefined \def\uchar#1#2{(\number#1,\number#2)} \fi
+
+\def\octuchar#1#2{\uchar{`#1}{`#2}}
+\def\hexuchar#1#2{\uchar{"#1}{"#2}}
+
+%D Basics and fallbacks.
+
+\newif\ifignoreaccent
+
+\let\textaccent \accent
+\let\normaltextaccent\textaccent
+
+% ** we will explicitly embrace the two arguments, since in definitions
+% this may not be the case, and we don't want faulty expansions like
+% "\dobuildtextaccent \char 18 a" but "\dobuildtextaccent {\char 18}{a}"
+% instead
+
+\def\buildmathaccent#1%
+ {\mathaccent#1 }
+
+\def\buildtextaccent#1#2% **
+ {\ifignoreaccent
+ \expandafter\nobuildtextaccent
+ \else
+ \expandafter\dobuildtextaccent
+ \fi{#1}{#2}}
+
+\unexpanded\def\nobuildtextaccent#1#2%
+ {#2}
+
+\unexpanded\def\dobuildtextaccent#1#2%
+ {{\let\char\normalaccent#1\let\char\normalchar#2}}
+
+% some fake ones, name will change into build
+
+\unexpanded\def\bottomaccent#1#2#3#4#5% down right slantcorrection accent char
+ {\dontleavehmode % why this align mess
+ \vtop
+ {\forgetall
+ \baselineskip\zeropoint
+ \lineskip#1%
+ \everycr\emptytoks
+ \tabskip\zeropoint
+ \lineskiplimit\zeropoint
+ \setbox0\hbox{#4}%
+ \halign
+ {##\crcr\hbox{#5}\crcr
+ \hidewidth
+ \hskip#2\wd0
+ \hskip-#3\slantperpoint % in plain 1ex * dimenless value
+ \vbox to .2ex{\box0\vss}\hidewidth
+ \crcr}}}
+
+\def\buildtextmacron {\bottomaccent{.25ex}{0}{15}{\textmacron}}
+\def\buildtextbottomdot{\bottomaccent{.25ex}{0}{5}{\textbottomdot}}
+\def\buildtextcedilla {\bottomaccent{0ex}{0}{5}{\textcedilla}}
+\def\buildtextogonek {\bottomaccent{-.1ex}{.5}{0}{\textogonek}}
+
+%D A collectors item:
+
+\def\buildtextbottomcomma{\bottomaccent{.15ex}{0}{5}{\tx,}}
+
+%D Rarely needed but there:
+
+\unexpanded\def\topaccent#1#2#3#4#5% down right slantcorrection accent char
+ {\dontleavehmode
+ \bgroup
+ \setbox0\hbox{#4}%
+ \setbox2\hbox{#5}%
+ \hbox to \wd2 \bgroup
+ \hss\copy2\hss
+ \hskip-\wd2
+ \hss\hskip#2\wd0\hskip-#3\slantperpoint\raise#1\hbox{#4}\hss
+ \egroup
+ \egroup}
+
+\def\buildtextgrave{\topaccent{0pt}{0}{15}{\textgrave}} % e.g.
+
+% \definecharacter schwa {\hbox{\rotate[rotation=180,location=high]{\hbox{e}}}}
+% \definecharacter schwagrave {\buildtextgrave\schwa}
+
+% math stuff, will change
+
+\def\definemathaccent#1 #2%
+ {\setvalue{\string#1}{#2}%
+ \setvalue{normalmathaccent\string#1}{#2}}
+
+\def\donormalmathaccent#1%
+ {\getvalue{normalmathaccent\string#1}}
+
+%D Some precautions:
+
+\ifx\usepdffontresource\undefined
+ \def\usepdffontresource #1 {} % this will be defined elsewhere
+\fi
+
+\def\donthandleaccent #1{\expandafter\string\csname#1\endcsname\space}
+\def\donthandlecommand #1{\expandafter\string\csname#1\endcsname\space}
+\def\donthandlecharacter #1{\expandafter\string\csname#1\endcsname\space}
+
+\def\stringifyhandleaccent #1{\strchr{#1}}
+\def\stringifyhandlecommand #1{\strchr{#1}}
+\def\stringifyhandlecharacter#1{\strchr{#1}}
+
+\def\keephandleaccent #1{\expandafter\noexpand\csname#1\endcsname}
+\def\keephandlecommand #1{\expandafter\noexpand\csname#1\endcsname}
+\def\keephandlecharacter #1{\expandafter\noexpand\csname#1\endcsname}
+
+\def\handleaccent #1{\csname#1\endcsname}
+\def\handlecommand #1{\csname#1\endcsname}
+\def\handlecharacter #1{\csname#1\endcsname}
+
+\def\dontexpandencoding
+ {\let\dohandleaccent \donthandleaccent
+ \let\dohandlecommand \donthandlecommand
+ \let\dohandlecharacter\donthandlecharacter}
+
+\def\keepencodedtokens
+ {\let\dohandleaccent \keephandleaccent
+ \let\dohandlecommand \keephandlecommand
+ \let\dohandlecharacter\keephandlecharacter}
+
+\def\literateencodedtokens
+ {% \let\dohandleaccent \keephandleaccent
+ % \let\dohandlecommand \keephandlecommand
+ \let\dohandlecharacter\keephandlecharacter}
+
+\def\stringifyencodedtokens
+ {% \let\dohandleaccent \stringifyhandleaccent
+ % \let\dohandlecommand \stringifyhandlecommand
+ \let\dohandlecharacter\stringifyhandlecharacter}
+
+\unexpanded\def\uhandleaccent #1{\csname#1\endcsname}
+\unexpanded\def\uhandlecommand #1{\csname#1\endcsname}
+\unexpanded\def\uhandlecharacter#1{\csname#1\endcsname}
+
+\def\dontexpandencodedtokens
+ {\def\dohandleaccent {\uhandleaccent}%
+ \def\dohandlecommand {\uhandlecommand}%
+ \def\dohandlecharacter{\uhandlecharacter}}
+
+% no longer: \def\convertencodedtokens{\dontexpandencoding} but:
+
+\def\convertencodedtokens{\stringifyencodedtokens}
+
+% test case:
+%
+% \enableregime[cp1250]
+% \mainlanguage[cz]
+%
+% \starttext
+%
+% \title{Ϭuޯu餭 kon졺p
+% \placelist[chapter][criterium=all]
+%
+% \startbuffer
+% <chapter>
+% <title>Ϭuޯu餭 kon졺p󛱴itle>
+% </chapter>
+% \stopbuffer
+%
+% \defineXMLenvironment
+% [chapter]
+% {\defineXMLsave[title]}
+% {\expanded{\chapter{\XMLflush{title}}}}
+% \processXMLbuffer
+%
+% \setuphead[chapter][expansion=yes]
+% \defineXMLenvironment
+% [chapter]
+% {\defineXMLsave[title]}
+% {\chapter{\XMLflush{title}}}
+% \processXMLbuffer
+%
+% \stoptext
+
+%D Still valid? To be checked:
+
+\def\doignoreaccent #1#2{\string#1\string#2}%
+\def\doignorecommand #1{\string#1}
+\def\doignorecharacter#1{\string#1}
+
+\def\ignoreencoding
+ {\let\dohandleaccent \doignoreaccent
+ \let\dohandlecommand \doignorecommand
+ \let\dohandlecharacter\doignorecharacter}
+
+\appendtoks
+ \ignoreencoding
+\to \everycleanupfeatures
+
+\appendtoks
+ \keepencodedtokens
+\to \everysafeexpanded
+
+%D Now we will not redefine any more, so:
+
+\let\normaldohandleaccent \dohandleaccent
+\let\normaldohandlecharacter\dohandlecharacter
+
+\definecommand ` {\buildtextaccent\textgrave}
+\definecommand ' {\buildtextaccent\textacute}
+\definecommand r {\buildtextaccent\textring}
+\definecommand v {\buildtextaccent\textcaron}
+\definecommand u {\buildtextaccent\textbreve}
+\definecommand = {\buildtextaccent\textmacron}
+\definecommand ^ {\buildtextaccent\textcircumflex}
+\definecommand . {\buildtextaccent\textdotaccent}
+\definecommand H {\buildtextaccent\texthungarumlaut}
+\definecommand ~ {\buildtextaccent\texttilde}
+\definecommand " {\buildtextaccent\textdiaeresis}
+
+\definecommand c {\buildtextcedilla}
+\definecommand b {\buildtextmacron}
+\definecommand d {\buildtextbottomdot}
+\definecommand k {\buildtextogonek}
+
+\definemathaccent acute {\buildmathaccent\mathacute}
+\definemathaccent grave {\buildmathaccent\mathgrave}
+\definemathaccent ddot {\buildmathaccent\mathddot}
+\definemathaccent tilde {\buildmathaccent\mathtilde}
+\definemathaccent bar {\buildmathaccent\mathbar}
+\definemathaccent breve {\buildmathaccent\mathbreve}
+\definemathaccent check {\buildmathaccent\mathcheck}
+\definemathaccent hat {\buildmathaccent\mathhat}
+\definemathaccent vec {\buildmathaccent\mathvec}
+\definemathaccent dot {\buildmathaccent\mathdot}
+\definemathaccent widetilde {\buildmathaccent\mathwidetilde}
+\definemathaccent widehat {\buildmathaccent\mathwidehat}
+
+\useencoding[def] % defaults (partly simplified)
+\useencoding[acc] % accent commands
+\useencoding[raw] % simplified (incomplete)
+\useencoding[com] % a few commands
+\useencoding[cas] % case mapping, not needed in mkiv
+\useencoding[mis] % a few commands
+
+%D We preload several encodings:
+
+\ifnum\texengine=\xetexengine
+ \setupencoding[\s!default=\s!default]
+\else
+ \useencoding[ans,il2,ec,tbo,pdf,pol,qx,t5,l7x,cyr,agr] % pol and il2 will go away, not needed in mkiv, uc removed
+ \useencoding[032,033,037] % fallbacks for some unicode chars
+ \setupencoding[\s!default=ec] % was: [\s!default=\s!default]
+\fi
+
+\protect \endinput