%D \module %D [ file=enco-ini, %D version=2007.02.19, % 2000.12.27, % 1998.12.03, %D title=\CONTEXT\ Encoding Macros, %D subtitle=Initialization, %D author=Hans Hagen, %D date=\currentdate, %D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}] %C %C This module is part of the \CONTEXT\ macro||package and is %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. %D Quite some code will be moved to the mk files once we're ready %D for it. %D This module is a reimplementation of the module that handled %D composed characters and non \ASCII\ characters. The changed %D are not that fundamental, and mainly concerns moving %D definitions of specific glyphs and accents to other files as %D well as moving plain handling of accents to this module %D instead of overloading plain \TEX\ commands. %D Patterns are kind of mixed with font encodings and %D mappings. Alas. \ifx\synchronizepatterns\undefined \let\synchronizepatterns\relax \fi %D While dealing with input (the text source) and output (the %D glyphs), encoding comes into view. To summarize a few: %D %D \startitemize %D \item Bytes in the input file are mapped to an internal %D representation. An~\type {a} often stays an~\type {a}, %D but~\type {\"e} can become either one code or become %D two codes (ending in overlapping glyphs). %D \item Characters can be made active and mapped onto another %D character. %D \item When changing case, characters are mapped onto %D themselves, their case||counterpart or a reasonable %D alternative, like~\"e onto~e. %D \item Single character representations in a \DVI\ file can %D be mapped onto one or more characters, either of not %D in more than one font file (virtual fonts). %D \item In the final format, fonts collections can be %D partially embedded, thereby losing the one||to||one %D relation between several instances of one font. %D \item For special purposes, individual characters should be %D mapped onto a dedicated encoding vector, for instance %D \PDF\ document encoding. %D \stopitemize %D %D These and other kind of mappings are to be dealt with, and %D the exact way of dealing often depends on the language to be %D typeset. \writestatus{loading}{ConTeXt Encoding Macros / Initialization} \unprotect %D First we define a few local or not yet initialized constants. \def\@map@{@m@ap@} % mapping prefix \def\@fha@{@f@ha@} % font prefix \def\@cas@{@c@as@} % casecom prefix \ifx\currentlanguage\undefined \let\currentlanguage\s!en \fi %D \macros %D {setupencoding} %D %D The following setup command is used to tune encoding %D handling. \def\setupencoding {\dosingleargument\dosetupencoding} \def\dosetupencoding[#1]% {\getparameters[\??ec][#1]% \edef\defaultencoding {\ifx\@@ecdefault\empty\s!default\else\@@ecdefault\fi}} %D \macros %D {useencoding} %D %D Encodings things are defined in separate files and are %D loaded only once, using: %D %D \showsetup{useencoding} \def\douseencoding#1% {\doifundefined{\c!file\f!encodingprefix#1}% {\letvalue{\c!file\f!encodingprefix#1}\empty \makeshortfilename[\truefilename{\f!encodingprefix#1}]% \startreadingfile \readsysfile{\shortfilename.mkii} {\showmessage\m!encodings2{#1}} {\showmessage\m!encodings3{#1}}% \stopreadingfile}} \def\useencoding[#1]% {\processcommalist[#1]\douseencoding} %D \macros %D {startmapping,enablemapping} %D %D In order to process patterns, convert from lower to %D uppercase and vise versa and some more, we provide a %D mechanism to define mappings. The first real application %D of this command was: %D %D \starttyping %D \startmapping [something] %D \definecasemap 165 181 165 %D \definecasemap 171 187 171 %D ... %D \defineuppercasecom \i {I} %D \defineuppercasecom \l \L %D \definelowercasecom \AE \ae %D ... %D \stopmapping %D \stoptyping %D %D So, character 165 becomes 181 in uppercase and 165 in %D lowercase. A mapping is activated with \type {\enablemapping}. \def\startsavingmappingtoks#1% {\bgroup \edef\charactermapping{@#1@}% \checkmappingtoks \setmappingtoks \the\mappingtoks} \def\stopsavingmappingtoks {\global\mappingtoks\emptytoks \dostepwiserecurse{0}{255}\plusone {\edef\@@expanded {\the\mappingtoks \ifnum\recurselevel>127 \noexpand\settoletterunlessactive{\recurselevel}% \fi \lccode\recurselevel\ifnum\lccode\recurselevel=\zerocount\zerocount\else\space\the\lccode\recurselevel\space\fi \uccode\recurselevel\ifnum\uccode\recurselevel=\zerocount\zerocount\else\space\the\uccode\recurselevel\space\fi \ifnum\sfcode\recurselevel=\plusthousand\else\sfcode\recurselevel=\the\sfcode\recurselevel\space\fi }% \global\mappingtoks\expandafter{\@@expanded}}% \egroup \let\enabledmapping\empty \enablemapping[\currentmapping]} \def\startmapping[#1]% {\startsavingmappingtoks{#1}} \def\stopmapping {\stopsavingmappingtoks} \def\optimizemapping[#1]% {\startsavingmappingtoks{#1}% % nothing, just an automatic cleanup \stopsavingmappingtoks % we need to resync %\let\enabledmapping\relax }%\enablemapping[\currentmapping]} \def\setmappingtoks {\@EA\let\@EA\mappingtoks\csname\@map@\charactermapping\endcsname \@EA\let\@EA\casecomtoks\csname\@cas@\charactermapping\endcsname} \def\checkmappingtoks {\ifundefined{\@map@\charactermapping}% \expandafter\newtoks\csname\@map@\charactermapping\endcsname \fi \ifundefined{\@cas@\charactermapping}% \expandafter\newtoks\csname\@cas@\charactermapping\endcsname \fi} \def\definecasemap #1 #2 #3 % code lower upper {\doifelse{#2}{to} {\presetcaserange{#1}{#3}} {\lccode#1=#2\relax \uccode#1=#3\relax}% \ignorespaces} %D Saves a few tokens \def\definecaseswap #1 #2 % lower upper {\lccode#1=#1\relax \uccode#2=#2\relax \lccode#2=#1\relax \uccode#1=#2\relax \ignorespaces} \def\definecaseself #1 % lower=upper=self {\lccode#1=#1\relax \uccode#1=#1\relax \ignorespaces} %D Watch the \type {\definecasemap 127 to 255} option! %D Dedicated to Taco there is also: \def\definecasemaps #1 to #2 lc #3 uc #4 % from to lc+ uc+ {\dostepwiserecurse{#1}{#2}\plusone {\scratchcounter\recurselevel\advance\scratchcounter#3\lccode\recurselevel=\scratchcounter \scratchcounter\recurselevel\advance\scratchcounter#4\uccode\recurselevel=\scratchcounter}% \ignorespaces} %D This can be used like: %D %D \starttyping %D \definecasemaps 128 to 156 lc 32 uc 0 %D \definecasemaps 160 to 188 lc -32 uc 0 %D \definecasemaps 160 to 188 lc -32 uc 0 %D \definecasemaps 192 to 255 lc 32 uc 0 %D \stoptyping %D %D and saves a lot of typing (copying). \def\resetcaserange #1 to #2 {\dostepwiserecurse{#1}{#2}\plusone {\lccode\recurselevel\zerocount \uccode\recurselevel\zerocount}% \ignorespaces} \def\presetcaserange#1#2% could be pre-expanded {\dostepwiserecurse{#1}{#2}\plusone {\lccode\recurselevel=\recurselevel \uccode\recurselevel=\recurselevel}% \ignorespaces} \def\setcasemap #1 #2 #3 % {\settoletterunlessactive{#1}% \lccode #1=#2 \uccode #1=#3 } \def\setcaseswap #1 #2 % {\settoletterunlessactive{#1}% \settoletterunlessactive{#2}% \lccode #1=#1 \uccode #2=#2 \lccode #2=#1 \uccode #1=#2 } \def\setcaseself #1 % {\settoletterunlessactive{#1}% \lccode #1=#1 \uccode #1=#1 } \def\definespacemap #1 #2 % code sfcode {\sfcode#1=#2% \ignorespaces} \def\setspacemap #1 #2 % {\settootherunlessactive{#1}% %\lccode #1=\zerocount %\uccode #1=\zerocount \sfcode #1=#2 } \def\defineuppercasecom#1#2% {\global\casecomtoks\expandafter{\the\casecomtoks\setuppercasecom#1{#2}}% \ignorespaces} \def\definelowercasecom#1#2% {\global\casecomtoks\expandafter{\the\casecomtoks\setlowercasecom#1{#2}}% \ignorespaces} \let\setuppercasecom\gobbletwoarguments \let\setlowercasecom\gobbletwoarguments \def\setcasecom#1#2{\def#1{#2}} \let\enabledmapping\empty % indirect, needed to handle default too \def\enablemapping[#1]% {\edef\charactermapping{@#1@}% \ifx\enabledmapping\charactermapping \else \doifdefined{\@map@\charactermapping} {%\expandafter\showthe\csname\@map@\charactermapping\endcsname\endcsname \the\csname\@map@\charactermapping\endcsname}% % == \the\executeifdefined{\@map@\charactermapping}\emptytoks \edef\enabledmapping{\charactermapping}% \enablelanguagespecifics[\currentlanguage]% new % \edef\enabledmapping{\charactermapping\currentlanguage}% can be comma list \fi \synchronizepatterns} % on behalf of font switching: \def\fastenablemapping#1% {\edef\charactermapping{@#1@}% \ifx\enabledmapping\charactermapping \else \@EA\ifx\csname\@map@\charactermapping\endcsname\relax\else \the\csname\@map@\charactermapping\endcsname \fi % == \the\executeifdefined{\@map@\charactermapping}\emptytoks \let\enabledmapping\charactermapping \enablelanguagespecifics[\currentlanguage]% to faster \fi} %D This macro wil be implemented in \type {lang-ini.tex}. \ifx\enablelanguagespecifics\undefined \def\enablelanguagespecifics[#1]{} \fi %D Further on we have to take some precautions when dealing %D with special characters like~\type{~}, \type{_} %D and~\type{^}, so let us define ourselve some handy macros %D first. \def\protectfontcharacters {\edef\unprotectfontcharacters {\catcode`\noexpand ~=\the\catcode`~\relax \catcode`\noexpand _=\the\catcode`_\relax \catcode`\noexpand ^=\the\catcode`^\relax}% \catcode`~=\@@letter \catcode`_=\@@letter \catcode`^=\@@letter\relax} %D The completeness of the Computer Modern Roman typefaces %D makes clear how incomplete other faces are. To honour 7~bit %D \ASCII, these fonts were designed using only the first 127 %D values of the 256 ones that can be presented by one byte. %D Nowadays 8~bit character codings are more common, mainly %D because they permit us to predefine some composed %D characters, which are needed in most european languages. %D %D Supporting more than the standard \TEX\ encoding vector %D |<|which in itself is far from standard and differs per %D font|>| puts a burden on the fonts mechanism. The \CONTEXT\ %D mechanism is far from complete, but can handle several %D schemes at once. The main problem lays in the accented %D characters and ligatures like~ff, although handling %D ligatures is not the responsibility of this module. %D %D By default, we use \PLAIN\ \TEX's approach of placing %D accents. All other schemes sooner or later give problems %D when we distribute \DVI||files are distributed across %D machines and platforms. Nevertheless, we have to take care %D of different encoding vectors, which tell us where to find %D the characters we need. This means that all kind of %D character placement macro's like \type{\"} and \type{\ae} %D have to be implemented and adapted in a way that suits %D these vectors. %D %D The main difference between different vector is the way %D accents are ordered and/or the availability of prebuilt %D accented characters. Accented characters can for instance be %D called for by sequences like \type{\"e}. Here the \type{\"} %D is defined as: %D %D \starttyping %D \def\"#1{{\accent"7F #1}} %D \stoptyping %D %D This macro places the accent \accent"7F {} on top of an~e %D gives \"e. Some fonts however can have prebuild accents and %D use a more direct approach like %D %D \starttyping %D \def\"#1{\if#1e\char 235\else ... \fi} %D \stoptyping %D %D The latter approach is not used in \CONTEXT, because we %D store relevant combinations of accents and characters in %D individual macros. %D We define character substitutes and commands with definition %D commands like: %D %D \starttyping %D \startcoding[texnansi] %D %D \defineaccent " a 228 %D \defineaccent ^ e 234 %D \defineaccent ' {\dotlessi} 237 %D %D \definecharacter ae 230 %D \definecharacter oe 156 %D %D \definecommand b \texnansiencodedb %D \definecommand c \texnansiencodedc %D %D \stopcoding %D \stoptyping %D %D The last argument of \type{\defineaccent} and %D \type{\definecharacter} tells \TEX\ the position of the %D accented character in the encoding vector. In order to %D complish this, we tag each implementation with the character %D coding identifier. We therefore need two auxiliary variables %D \type{\characterencoding} and \type{\nocharacterencoding}. These %D contain the current and default encoding vectors and both %D default to the \PLAIN\ one. \edef\characterencoding {@\s!default @} \edef\nocharacterencoding {@\s!default @} \edef\charactermapping {@\s!default @} % todo, else \d j == \dj, print file and check \def\accentprefix {}%{*} \def\commandprefix {}%{=} \def\characterprefix{}%{-} %D \macros %D {startcoding, reducetocoding} %D %D Before we can redefine accents and special characters, we %D have to tell \CONTEXT\ what encoding is in force. The next %D command is responsible for doing this and also takes care of %D the definition of the recoding commands. We use the \type %D {\start}||\type {\stop}||commands for definitions and the %D \type {\reduceto}||command for local switching to %D simplified commands. % etex : \ifcsname \def\justhandleaccent#1#2% \empty makes #2={} save % no \unexpanded {\ifundefined{\accentprefix\characterencoding#1\string#2\empty}% #2% \else \csname\accentprefix\characterencoding#1\string#2\empty\endcsname \fi} \def\justhandlecommand#1% % no \unexpanded, otherwise pdfdoc will fail {\ifundefined{\commandprefix\characterencoding#1}% as well as hyph patterns #1% \else \csname\commandprefix\characterencoding#1\endcsname \fi} \def\enableencoding {\dodoubleempty\doenableencoding} \def\doenableencoding[#1][#2]% main fallback {\iffirstargument\edef\characterencoding{@#1@}\fi \edef\nocharacterencoding{@\ifsecondargument#2\else\s!default\fi @}% \synchronizepatterns} \edef\xnocharacterencoding{@\s!default @} \def\fastenableencoding#1% {\edef\characterencoding{@#1@}% \let\nocharacterencoding\xnocharacterencoding} \def\startencoding {\dodoubleempty\dostartencoding} \def\dostartencoding[#1][#2]% encoding regime {%\showmessage\m!encodings1{#1}% \pushmacro\characterencoding \pushmacro\currentregime \pushmacro\dohandleaccent % still needed? \pushmacro\dohandlecommand % still needed? \pushmacro\doautosetregime \let\dohandleaccent\donthandleaccent % still needed? \let\dohandlecommand\donthandlecommand % still needed? %let\definesortkey\savesortkey \edef\characterencoding{@#1@}% \doifelsenothing{#2}% {\let\doautosetregime\gobbletwoarguments} {\def\currentregime{#2}}} \def\stopencoding {\popmacro\doautosetregime \popmacro\dohandlecommand % still needed? \popmacro\dohandleaccent % still needed? \popmacro\currentregime \popmacro\characterencoding} % probably obsolete (hm, not yet) \def\reducetocoding[#1]% use grouped! {\doifsomething{#1} {\let\dohandleaccent \justhandleaccent \let\dohandlecommand\justhandlecommand \enableencoding[#1]% \enablelanguagespecifics[\currentlanguage]}} \let\startcoding \startencoding \def\stopcoding {\stopencoding} \let\enablecoding \enableencoding %D The use of these macros are not limited to font %D definition files, but may also be used when loading %D patterns. %D \macros %D {definesortkey,flushsortkeys,flushsortkey} %D %D Yet another definition concerns sorting of indexes and %D lists. %D %D \starttyping %D \definesortkey {\'e} {e} {a} {\'e} %D \stoptyping %D %D The first argument denotes the string to be treated. The %D second argument is the raw replacement, while the third %D argument determines the sort order given the replacement. %D The last argument is used as entry in the index (a, b, etc). %D %D The keys can be flushed using \type {\flushsortkeys} %D which in turn results in a sequence of calls to \type %D {\flushsortkey}, a macro taking 4~arguments. %D %D This mechanism is currently being tested and subjected to %D changes! Obsolete: \let\definesortkey\gobblefourarguments \let\savesortkey \gobblefourarguments \let\flushsortkeys\relax \let\flushsortkey \relax %D \macros %D {defineaccent, definecharacter, definecommand} %D %D The actual definition of accents, special characters and %D commands is done with the next three commands. \def\defineaccent {\protectfontcharacters \dodefineaccent} \def\dodefineaccent#1 #2 % {\unprotectfontcharacters \dododefineaccent#1 #2 } \def\dododefineaccent#1 #2 #3 % {\setvalue{#1}{\dohandleaccent{#1}}% \doifnumberelse{\string#3} {\setvalue{\accentprefix\characterencoding#1\string#2}{\char#3 }} % space added {\setvalue{\accentprefix\characterencoding#1\string#2}{#3}}} \def\dohandleaccent#1#2% {\ifcsname\accentprefix\characterencoding#1\string#2\empty\endcsname \csname\accentprefix\characterencoding#1\string#2\empty\endcsname \else\ifcsname\accentprefix\nocharacterencoding#1\string#2\empty\endcsname \csname\accentprefix\nocharacterencoding#1\string#2\empty\endcsname \else\ifcsname\accentprefix\characterencoding#1\endcsname \csname\accentprefix\characterencoding#1\endcsname{#2}% \else%\ifcsname\accentprefix\nocharacterencoding#1\endcsname \csname\accentprefix\nocharacterencoding#1\endcsname{#2}% % \else % \donormaltextaccent{#1}{#2}% \fi\fi\fi}%\fi} \def\patternchar#1 {\rawcharacter{#1}} % space is part of character definition ! % \ifx \enablepatterntokens\undefined % \def\handlepatterntoken#1]{\csname#1\endcsname} % \fi % we need to postpone catcode changes, e.g. hr patterns % have \catcode" -> which fails when " is letter \def\pathypsettings {\ifx \enablepatterntokens\undefined \defineactivecharacter [ {\handlepatterntoken}% \else \enablepatterntokens \fi \let\dochar\thechr \lccode16=16 % brrr, extra quote in ec (turkish) \lccode17=17 % brrr, extra quote in ec (turkish) \lccode`\-=`\- \lccode`\'=`\' \lccode`\"=`\" \relax} \def\patterns {\pathypsettings\normalpatterns } \def\hyphenation{\pathypsettings\normalhyphenation} %D Because we don't want to use the second command grouped, we %D (re)define it as follows: \def\hyphenation {\begingroup\def\hyphenation{\normalhyphenation{\the\scratchtoks}\endgroup}% \pathypsettings\afterassignment\hyphenation\scratchtoks=} %D This is not needed for patterns because they are loaded grouped %D anyway and it saves us an assignment. Can go ... no longer %D shared patterns. \def\startpatternloading#1#2#3% % we should use \everypatternloading {\startreadingfile \bgroup % let's get rid of interfering stuff \let\everyjob\scratchtoks \let\message \gobbleoneargument % we want direct characters \let\char\patternchar \doifelsenothing{#2}{\enableencoding[ec]}{\enableencoding[#2]}% \doifelsenothing{#3}{\enablemapping [ec]}{\enablemapping [#3]}% \expanded{\doifinstring{\f!languageprefix}{#1}} {\ifx \enablepatternxml\undefined \else \enablepatternxml \fi}% \let\dohandleaccent\normaldohandleaccent} \def\stoppatternloading {\egroup \stopreadingfile} \def\thechr#1{\char#1 } % just in case \relax interferes \unexpanded\def\numchr#1{\char#1\relax} \unexpanded\def\strchr#1{\csname#1\endcsname} \let\dochar\numchr \def\startdirectcharacters {\pushmacro\dochar \let\dochar\thechr} \def\stopdirectcharacters {\popmacro \dochar} \def\definecharacter#1 #2 % {\ifundefined{#1}\setvalue{#1}{\dohandlecharacter{#1}}\fi \doifnumberelse{\string#2} {\setvalue{\characterprefix\characterencoding\string#1}{\dochar{#2}}% \doautosetregime{#1}{#2}} {\setvalue{\characterprefix\characterencoding\string#1}{#2}}} \def\dohandlecharacter#1% {\csname\characterprefix\ifcsname\characterprefix\characterencoding#1\endcsname \characterencoding\else\nocharacterencoding\fi#1\endcsname} % \def\fallbackpatternchar{x} % makes no sense, duplicate patterns \def\defaultcharacter#1% {\csname\characterprefix\nocharacterencoding\strippedcsname#1\endcsname} %D Instead of numbers, a command may be entered. \def\definecommand#1 #2 % {\setvalue{\string#1}{\dohandlecommand{#1}}% %\redefinecommand #1 % just to be sure \setvalue{\commandprefix\characterencoding\string#1}{#2}} %D Here we see that redefining accents is characters is more %D or less the same as redefining commands. We also could have %D said: %D %D \starttyping %D \def\defineaccent#1 #2 {\definecommand#1\string#2 \char} %D \def\definecharacter#1 {\definecommand#1 \char} %D \stoptyping %D \macros %D {defineaccentcommand} %D %D When needed, one can overload the default positions of the %D accents. The \PLAIN\ \TEX\ defaults are: %D %D \starttyping %D \defineaccentcommand ` 18 %D \defineaccentcommand ' 19 %D \defineaccentcommand v 20 %D \defineaccentcommand u 21 %D \defineaccentcommand = 22 %D \defineaccentcommand ^ 94 %D \defineaccentcommand . 95 %D \defineaccentcommand H 125 % "7D %D \defineaccentcommand ~ 126 % "7E %D \defineaccentcommand " 127 % "7F %D \stoptyping \def\defineaccentcommand {\protectfontcharacters \dodefineaccentcommand} \def\dodefineaccentcommand#1 #2 % \string toegevoegd {\doifnumberelse{\string#2} {\setvalue{\accentprefix\characterencoding\string#1}##1{{\accent#2 ##1}}} {\setvalue{\accentprefix\characterencoding\string#1}##1{{#2##1}}}% \unprotectfontcharacters} %D We don't have to define them for the default \PLAIN\ case. %D Commands may be used instead of character codes. %D \macros %D {redefinecommand} %D %D Redefinition of encoding dependant commands like \type{\b} %D and \type{\c} can be triggered by: %D %D \starttyping %D \redefinecommand b % something math %D \redefinecommand c % something math %D \stoptyping %D %D Handling of characters is easier than handling accents %D because here we don't have to take care of arguments. We %D just call for the right glyph in the right place. %D %D The \type{\next} construction permits handling of commands %D that take arguments. This means that we can use this %D command to redefine accent handling commands too %D (although today the next is not needed any longer in test %D macros). \def\redefinecommand#1 % {% no \unexpanded, else pdfdoc fails \setvalue{\string#1}{\dohandlecommand{#1}}}% \def\dohandlecommand#1% {\csname\commandprefix \ifcsname\commandprefix\characterencoding#1\endcsname \characterencoding \else \nocharacterencoding \fi #1\endcsname} %D \macros %D {currentencoding, currentmapping} %D %D When we show 'm, we don't want to see the protection %D measures. \def\currentencoding{\@EA\dopureencodingname\characterencoding} \def\currentmapping {\@EA\dopureencodingname\charactermapping } \def\dopureencodingname @#1@{#1} \def\pureencodingname#1{\@EA\dopureencodingname#1} %D \macros %D {showaccents, showcharacters, %D showcharacterbounds, showhyphenations} %D %D Encoding is a tricky business. Therefore we provide a %D a few macros that show most of the characters involved. The %D next two tables show the result of \type {\showaccents}. %D %D \placetable %D {The special glyphs in default encoding.} %D {\showaccents} %D %D \placetable %D {The special glyphs in texnansi encoding.} %D {\switchtobodyfont[lbr]\showaccents} %D %D The command %D %D \starttyping %D \showhyphenations{doordefini\"eren} %D \stoptyping %D %D can be used to check the correct loading of hyphenation %D patterns. \fetchruntimecommand \showaccents {\f!encodingprefix\s!run} \fetchruntimecommand \showcharacters {\f!encodingprefix\s!run} \fetchruntimecommand \showcharacterbounds {\f!encodingprefix\s!run} \fetchruntimecommand \showhyphenations {\f!encodingprefix\s!run} \fetchruntimecommand \showmapping {\f!encodingprefix\s!run} %D \macros %D {everyuppercase, EveryUppercase, %D everyuppercase, EveryUppercase} %D %D When we want to uppercase strings of characters, we have to %D take care of those characters that have a special meaning or %D are only accessible by means of macros. The next hack was %D introduced when Tobias Burnus started translating head and %D label texts into spanish and italian. The first application %D of this token register therefore can be found in the module %D that deals with these texts. \newevery \everyuppercase \EveryUppercase \newevery \everylowercase \EveryLowercase %D This magic trick maps takes care of mapping from lower to %D upper case and reverse. \def\reloadmapping{\the\executeifdefined{\@cas@\charactermapping}\emptytoks} \appendtoks\let\setuppercasecom\setcasecom\to\everyuppercase \appendtoks\let\setlowercasecom\setcasecom\to\everylowercase \appendtoks\reloadmapping\to\everyuppercase % slow, will be sped up \appendtoks\reloadmapping\to\everylowercase % slow, will be sped up \newtoks\everyULmap \appendtoks\let\remapcase\remapuppercase\the\everyULmap\to\everyuppercase \appendtoks\let\remapcase\remaplowercase\the\everyULmap\to\everylowercase \let\remapcase\gobbletwoarguments \def\remapuppercase#1#2{\let#2#1} % more efficient: \def\remaplowercase#1#2{\let#1#2} \let\remaplowercase\let \def\defineLCcharacter #1 #2 % {\appendtoks\let\to\everylowercase \@EA\appendtoks\csname#1\endcsname\to\everylowercase \@EA\appendtoks\csname#2\endcsname\to\everylowercase} \def\defineUCcharacter #1 #2 % {\appendtoks\let\to\everyuppercase \@EA\appendtoks\csname#1\endcsname\to\everyuppercase \@EA\appendtoks\csname#2\endcsname\to\everyuppercase} \def\defineULcharacter #1 #2 % {\appendtoks\remapcase\to\everyULmap \@EA\appendtoks\csname#1\endcsname\to\everyULmap \@EA\appendtoks\csname#2\endcsname\to\everyULmap} % slightly faster with \smallcapped's but far more hash and stringspace % % \newif\ifuppercase \appendtoks\uppercasetrue\to\everyuppercase % \newif\iflowercase \appendtoks\lowercasetrue\to\everylowercase % % \def\defineULcharacter #1 #2 % % {\def\!!stringa{@#1}\@EA\letvalue\@EA\!!stringa\csname#1\endcsname % \def\!!stringa{@#2}\@EA\letvalue\@EA\!!stringa\csname#2\endcsname % \setvalue{#1}{\getvalue{@\ifuppercase#2\else#1\fi}}% % \setvalue{#2}{\getvalue{@\iflowercase#1\else#2\fi}}} % 2 = tricky, since expanding \definedfont[lowcasename] ... goes wrong \chardef\uppercasemode\plusthree % 0=ignore 1=normal 2=expand 3=auto \chardef\casecommode \plusone % 0=noexpand 1=expand \def\setcasecom #1#2{\def#1{\ifcase\casecommode\noexpand#1\else#2\fi}} % \def\OEPS{whatever} % % \startmapping[ec] % \defineuppercasecom \oeps {\getvalue{OEPS}} % \stopmapping % % \WORD{xx \oeps} \def\douppercase#1% {\bgroup \let\douppercase\firstofoneargument \the\everyuppercase % currently also checks uppercasemode \let\dochar\rawcharacter \ifcase\uppercasemode #1% \or % No expansion here, otherwise \getvalue problems! Default!!! %\edef\next{#1}% keep this to prevent roll back %\uppercase\expandafter{\next}% keep this to prevent roll back \uppercase{#1}% \or \chardef\casecommode\zerocount \let\docasecom\firstoftwoarguments \edef\ascii{#1}% \edef\ascii{\expandafter\uppercase\expandafter{\ascii}}% needed when in regime \chardef\casecommode\plusone \ascii \else % mode three may trigger setting 2 elsewhere (e.g. regime test) \uppercase{#1}% \fi \egroup} \prependtoksonce \doifnot\currentregime\s!default {\ifnum\uppercasemode=\plusthree \chardef\uppercasemode\plustwo \fi}% \to \everyuppercase %D \macros %D {everysanitize, EverySanitize} %D %D Whenever we are sanitizing strings, like we sometimes do %D when we deal with specials, the next token register can be %D called. \newevery \everysanitize \EverySanitize %D \macros %D {defineuclass,defineudigit,udigit} %D %D The next few macros are experimental and needed for unicoded %D chinese characters. \def\defineuclass #1 #2 #3 % {\setvalue{uc\the\numexpr#2*256+#3\relax}{#1}} \def\defineudigit #1 #2 #3 {\setvalue{\characterencoding uc#1}{\uchar{#2}{#3}}} %D It may look strange, but for the moment, we want the encoding %D to be part of the digit specification. This may change! \unexpanded\def\udigit#1#2{\getvalue{@#1@uc\number#2}} %D \macros %D {uchar, octuchar, hexuchar} \ifx\uchar\undefined \def\uchar#1#2{(\number#1,\number#2)} \fi \def\octuchar#1#2{\uchar{`#1}{`#2}} \def\hexuchar#1#2{\uchar{"#1}{"#2}} %D Basics and fallbacks. \newif\ifignoreaccent \let\textaccent \accent \let\normaltextaccent\textaccent % ** we will explicitly embrace the two arguments, since in definitions % this may not be the case, and we don't want faulty expansions like % "\dobuildtextaccent \char 18 a" but "\dobuildtextaccent {\char 18}{a}" % instead \def\buildmathaccent#1% {\mathaccent#1 } \def\buildtextaccent#1#2% ** {\ifignoreaccent \expandafter\nobuildtextaccent \else \expandafter\dobuildtextaccent \fi{#1}{#2}} \unexpanded\def\nobuildtextaccent#1#2% {#2} \unexpanded\def\dobuildtextaccent#1#2% {{\let\char\normalaccent#1\let\char\normalchar#2}} % some fake ones, name will change into build \unexpanded\def\bottomaccent#1#2#3#4#5% down right slantcorrection accent char {\dontleavehmode % why this align mess \vtop {\forgetall \baselineskip\zeropoint \lineskip#1% \everycr\emptytoks \tabskip\zeropoint \lineskiplimit\zeropoint \setbox0\hbox{#4}% \halign {##\crcr\hbox{#5}\crcr \hidewidth \hskip#2\wd0 \hskip-#3\slantperpoint % in plain 1ex * dimenless value \vbox to .2ex{\box0\vss}\hidewidth \crcr}}} \def\buildtextmacron {\bottomaccent{.25ex}{0}{15}{\textmacron}} \def\buildtextbottomdot{\bottomaccent{.25ex}{0}{5}{\textbottomdot}} \def\buildtextcedilla {\bottomaccent{0ex}{0}{5}{\textcedilla}} \def\buildtextogonek {\bottomaccent{-.1ex}{.5}{0}{\textogonek}} %D A collectors item: \def\buildtextbottomcomma{\bottomaccent{.15ex}{0}{5}{\tx,}} %D Rarely needed but there: \unexpanded\def\topaccent#1#2#3#4#5% down right slantcorrection accent char {\dontleavehmode \bgroup \setbox0\hbox{#4}% \setbox2\hbox{#5}% \hbox to \wd2 \bgroup \hss\copy2\hss \hskip-\wd2 \hss\hskip#2\wd0\hskip-#3\slantperpoint\raise#1\hbox{#4}\hss \egroup \egroup} \def\buildtextgrave{\topaccent{0pt}{0}{15}{\textgrave}} % e.g. % \definecharacter schwa {\hbox{\rotate[rotation=180,location=high]{\hbox{e}}}} % \definecharacter schwagrave {\buildtextgrave\schwa} % math stuff, will change \def\definemathaccent#1 #2% {\setvalue{\string#1}{#2}% \setvalue{normalmathaccent\string#1}{#2}} \def\donormalmathaccent#1% {\getvalue{normalmathaccent\string#1}} %D Some precautions: \ifx\usepdffontresource\undefined \def\usepdffontresource #1 {} % this will be defined elsewhere \fi \def\donthandleaccent #1{\expandafter\string\csname#1\endcsname\space} \def\donthandlecommand #1{\expandafter\string\csname#1\endcsname\space} \def\donthandlecharacter #1{\expandafter\string\csname#1\endcsname\space} \def\stringifyhandleaccent #1{\strchr{#1}} \def\stringifyhandlecommand #1{\strchr{#1}} \def\stringifyhandlecharacter#1{\strchr{#1}} \def\keephandleaccent #1{\expandafter\noexpand\csname#1\endcsname} \def\keephandlecommand #1{\expandafter\noexpand\csname#1\endcsname} \def\keephandlecharacter #1{\expandafter\noexpand\csname#1\endcsname} \def\handleaccent #1{\csname#1\endcsname} \def\handlecommand #1{\csname#1\endcsname} \def\handlecharacter #1{\csname#1\endcsname} \def\dontexpandencoding {\let\dohandleaccent \donthandleaccent \let\dohandlecommand \donthandlecommand \let\dohandlecharacter\donthandlecharacter} \def\keepencodedtokens {\let\dohandleaccent \keephandleaccent \let\dohandlecommand \keephandlecommand \let\dohandlecharacter\keephandlecharacter} \def\literateencodedtokens {% \let\dohandleaccent \keephandleaccent % \let\dohandlecommand \keephandlecommand \let\dohandlecharacter\keephandlecharacter} \def\stringifyencodedtokens {% \let\dohandleaccent \stringifyhandleaccent % \let\dohandlecommand \stringifyhandlecommand \let\dohandlecharacter\stringifyhandlecharacter} \unexpanded\def\uhandleaccent #1{\csname#1\endcsname} \unexpanded\def\uhandlecommand #1{\csname#1\endcsname} \unexpanded\def\uhandlecharacter#1{\csname#1\endcsname} \def\dontexpandencodedtokens {\def\dohandleaccent {\uhandleaccent}% \def\dohandlecommand {\uhandlecommand}% \def\dohandlecharacter{\uhandlecharacter}} % no longer: \def\convertencodedtokens{\dontexpandencoding} but: \def\convertencodedtokens{\stringifyencodedtokens} % test case: % % \enableregime[cp1250] % \mainlanguage[cz] % % \starttext % % \title{Ϭuޯu餭 kon졺p % \placelist[chapter][criterium=all] % % \startbuffer % % Ϭuޯu餭 kon졺p󛱴itle> % </chapter> % \stopbuffer % % \defineXMLenvironment % [chapter] % {\defineXMLsave[title]} % {\expanded{\chapter{\XMLflush{title}}}} % \processXMLbuffer % % \setuphead[chapter][expansion=yes] % \defineXMLenvironment % [chapter] % {\defineXMLsave[title]} % {\chapter{\XMLflush{title}}} % \processXMLbuffer % % \stoptext %D Still valid? To be checked: \def\doignoreaccent #1#2{\string#1\string#2}% \def\doignorecommand #1{\string#1} \def\doignorecharacter#1{\string#1} \def\ignoreencoding {\let\dohandleaccent \doignoreaccent \let\dohandlecommand \doignorecommand \let\dohandlecharacter\doignorecharacter} \appendtoks \ignoreencoding \to \everycleanupfeatures \appendtoks \keepencodedtokens \to \everysafeexpanded %D Now we will not redefine any more, so: \let\normaldohandleaccent \dohandleaccent \let\normaldohandlecharacter\dohandlecharacter \definecommand ` {\buildtextaccent\textgrave} \definecommand ' {\buildtextaccent\textacute} \definecommand r {\buildtextaccent\textring} \definecommand v {\buildtextaccent\textcaron} \definecommand u {\buildtextaccent\textbreve} \definecommand = {\buildtextaccent\textmacron} \definecommand ^ {\buildtextaccent\textcircumflex} \definecommand . {\buildtextaccent\textdotaccent} \definecommand H {\buildtextaccent\texthungarumlaut} \definecommand ~ {\buildtextaccent\texttilde} \definecommand " {\buildtextaccent\textdiaeresis} \definecommand c {\buildtextcedilla} \definecommand b {\buildtextmacron} \definecommand d {\buildtextbottomdot} \definecommand k {\buildtextogonek} \definemathaccent acute {\buildmathaccent\mathacute} \definemathaccent grave {\buildmathaccent\mathgrave} \definemathaccent ddot {\buildmathaccent\mathddot} \definemathaccent tilde {\buildmathaccent\mathtilde} \definemathaccent bar {\buildmathaccent\mathbar} \definemathaccent breve {\buildmathaccent\mathbreve} \definemathaccent check {\buildmathaccent\mathcheck} \definemathaccent hat {\buildmathaccent\mathhat} \definemathaccent vec {\buildmathaccent\mathvec} \definemathaccent dot {\buildmathaccent\mathdot} \definemathaccent widetilde {\buildmathaccent\mathwidetilde} \definemathaccent widehat {\buildmathaccent\mathwidehat} \useencoding[def] % defaults (partly simplified) \useencoding[acc] % accent commands \useencoding[raw] % simplified (incomplete) \useencoding[com] % a few commands \useencoding[cas] % case mapping, not needed in mkiv \useencoding[mis] % a few commands %D We preload several encodings: \ifnum\texengine=\xetexengine \setupencoding[\s!default=\s!default] \else \useencoding[ans,il2,ec,tbo,pdf,pol,qx,t5,l7x,cyr,agr] % pol and il2 will go away, not needed in mkiv, uc removed \useencoding[032,033,037] % fallbacks for some unicode chars \setupencoding[\s!default=ec] % was: [\s!default=\s!default] \fi \protect \endinput