2016-01-12 16:26:00

author: Context Git Mirror Bot <phg42.2a@gmail.com> 2016-01-12 17:15:07 +0100
committer: Context Git Mirror Bot <phg42.2a@gmail.com> 2016-01-12 17:15:07 +0100
commit: 8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree: 94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkii/font-uni.mkii
parent: f5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
download: context-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
1 files changed, 444 insertions, 0 deletions
diff --git a/tex/context/base/mkii/font-uni.mkii b/tex/context/base/mkii/font-uni.mkii
new file mode 100644
index 000000000..77eb680e2
--- /dev/null
+++ b/tex/context/base/mkii/font-uni.mkii
@@ -0,0 +1,444 @@
+%D \module
+%D   [       file=font-uni,
+%D        version=1999.10.10,
+%D          title=\CONTEXT\ Font Macros,
+%D       subtitle=\UNICODE,
+%D         author=Hans Hagen,
+%D           date=\currentdate,
+%D      copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
+%C
+%C This module is part of the \CONTEXT\ macro||package and is
+%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
+%C details.
+
+\writestatus{loading}{ConTeXt Font Macros / Unicode}
+
+%D In \XETEX, unicode support is straightforward, so we
+%D simply output a \type {\char} with a 16||bit number.
+
+\ifnum\texengine=\xetexengine
+    \unexpanded\def\uchar#1#2{\char\numexpr#2+#1*\pluscclvi\relax}
+    \let\uc\uchar
+    \expandafter \endinput
+\fi
+
+%D Now comes the more traditional 8 bit \TEX\ hackery.
+
+%D I wrote this module when Wang Lei asked me how to use
+%D Chinese in \CONTEXT. From the samples he sent me, I deduced
+%D that some mixture of one and two byte encoding was used,
+%D which he confirmed. Since \TEX\ normally does not use the
+%D characters $>127$, so as long as the two byte characters
+%D have a first character with code $>127$, we can use active
+%D characters to handle them. In an optimistic mood, I called
+%D this module the \UNICODE\ font module. In the module that
+%D handles Chinese, we will see that some more interpretation
+%D is involved, which is why the macros handling those
+%D characters look ahead.
+
+\unprotect
+
+%D \macros
+%D   {handleunicodeflowglyph, uchar,
+%D    handleunicodeglyph, insertunicodeglyph,
+%D    unicodeposition, unicodeone, unicodetwo}
+%D
+%D For the moment \UNICODE\ support is rather primitive but
+%D nevertheless effective. The reference to \UNICODE\ is not
+%D entirely correct, since in many cases one will use \quote
+%D {older} mappings, but in principle, \UNICODE\ can be
+%D supported.
+%D
+%D We expect each character to come as two eight bit
+%D characters. Those doubles are handled by making all
+%D characters in the range $>127$ active, so that they can
+%D pick up the next one, and act upon both their values.
+%D Internally only numbers are used. A first implementation
+%D simply internally prefixed the second part of the \UNICODE\
+%D pair with \type {\string} or \type {\char}, but this was
+%D not that handy when it came to testing those values.
+%D Because in principle we are dealing with an encoding, the
+%D making active is handled in \type {enco-uni}.
+%D
+%D There are two commands to handle unicode characters:
+%D
+%D \starttyping
+%D \handleunicodeflowglyph{number}{character}
+%D \uchar{number}{number}
+%D \stoptyping
+%D
+%D The first one can be assigned to an active character, the
+%D second one can be used to directly access a glyph. Both
+%D command call \type {\handleunicodeglyph} that in turn
+%D calls \type {\insertunicodeglyph}. Both can be overruled
+%D in specialized modules. The low level command \type
+%D {\unicodeglyph} can best be left untouched, which is not
+%D so much a problem because there is a hook into this macro:
+%D \type {\unicodecharcommand}.
+%D
+%D In most cases one will redefine \type {\handleunicodeglyph}
+%D in such a way that it identifies special situations first,
+%D takes some actions next, calls \type {\insertunicodeglyph},
+%D if needed with \type {\unicodecharcommand} changed, and
+%D finally does some finishing:
+%D
+%D \starttyping
+%D \def\handleunicodeglyph
+%D   {take actions based on \unicodeone-two-position cq. \nextutoken
+%D    redefine \unicodecharcommand if needed
+%D    expand \insertunicodeglyph
+%D    take some final actions}
+%D \stoptyping
+
+\newcount\unicodeposition
+
+%D The multistep approach is needed to pick up the second
+%D token, since this token can have any value and any
+%D catcode.
+
+% the \relax trick prevents eating up the space (needed for
+% korean
+
+\def\handleunicodeflowglyph#1#2%
+  {\begingroup
+   \edef\unicodeone{#1}%
+   \@EA\afterassignment\@EA\dohandleunicodeflowglyph % two redundant ea's
+     \@EA\chardef\@EA\nexttoken\@EA`\string#2\relax}
+
+\def\dohandleunicodeflowglyph\relax
+  {\futurelet\nextutoken\dodohandleunicodeflowglyph}
+
+\def\dodohandleunicodeflowglyph % todo tex (or maybe no longer)
+  {\edef\unicodetwo{\the\nexttoken}%
+   \unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
+   \handleunicodeglyph
+   \endgroup}
+
+\unexpanded\def\uchar#1#2% use as standalone glyph
+  {\begingroup
+   \edef\unicodeone{#1}%
+   \edef\unicodetwo{#2}%
+   \unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
+   \handleunicodeglyph
+   \endgroup}
+
+\let\nextutoken\relax
+
+\unexpanded\def\lookaheaduchar#1#2%
+  {\def\dolookaheaduchar{\uchar{#1}{#2}\let\nextutoken\relax}%
+   \futurelet\nextutoken\dolookaheaduchar}
+
+\def\dohandleucflowglyph
+  {\unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
+   \handleunicodeglyph
+   \endgroup}
+
+\unexpanded\def\uc#1#2% used in tricky situations
+  {\begingroup
+   \edef\unicodeone{#1}%
+   \edef\unicodetwo{#2}%
+   \futurelet\nextutoken\dohandleucflowglyph}
+
+\def\insertunicodeglyph
+  {\unicodeglyph\unicodeone\unicodetwo}
+
+\let\handleunicodeglyph\insertunicodeglyph
+
+%D One can use the \type {\unicodeposition} in the macros
+%D that handle pre and post material.
+
+%D \macros
+%D   {unicodestyle, unicodecharcommand}
+%D
+%D Each character pair will become one glyph. Because \TEX\
+%D cannot handle fonts with more that 256 characters, we use
+%D \TFM\ files for each range. The first character of the pair
+%D is appended to the name of a font, and the second is used to
+%D access the glyph in that font. This means that a particular
+%D font is split up in subfonts with names in the range:
+%D
+%D \starttyping
+%D <filename>80 ... <filename>ff
+%D \stoptyping
+%D
+%D The \type {<filename>} as well as the composed name are
+%D mapped ones. The next macros take care of this mapping.
+%D Let us assume that the next mapping has taken place,
+%D
+%D \starttyping
+%D \definefontsynonym [UnicodeRegular] [gbsong]
+%D \stoptyping
+%D
+%D Let us also assume that we are dealing with the range \type
+%D {b1}. Given that a font name results from:
+%D
+%D \starttyping
+%D \truefontname{\truefontname{UnicodeRegular}b1}
+%D \stoptyping
+%D
+%D we get \type {gbsongb1}. The outer \type {\truefontname}
+%D takes care of additional mapping, so when we say:
+%D
+%D \starttyping
+%D \definefontsynonym [gbsongb1] [gbsong-b1]
+%D \stoptyping
+%D
+%D the filename used will be \type {gbsong-b1}. From the next
+%D definition it will be clear that other fontshapes are also
+%D supported. The prefix \type {Unicode} is mapped!
+%D
+%D The command \type {\unicodecharcommand} can be used to
+%D handle special cases. At that moment \type {1em} is known.
+
+\def\unicodestyle
+  {\truefontname\s!Unicode\fontstylesuffix}
+
+\let\unicodecharcommand\firstofoneargument
+
+\unexpanded\def\unicodeglyph#1#2% watch the double mapping
+  {\begingroup
+   \getvalue{@@\currentucharmapping\strippedcsname\uchar}{#1}{#2}% map to a to hex font range
+   \bodyfontsize\unicodescale\bodyfontsize
+   % readable:
+   % \doifelsefontsynonym{\unicodestyle\unicodeone}
+   %   {\font\unicodefont=\truefontname{\unicodestyle\unicodeone}
+   %       at \currentfontscale\bodyfontsize}
+   %   {\font\unicodefont=\truefontname{\truefontname\unicodestyle\unicodeone}
+   %       at \currentfontscale\bodyfontsize}%
+   % unreadable but more efficient:
+   \font\unicodefont=\truefontname{\doifelsefontsynonym{\unicodestyle
+     \unicodeone}\empty\truefontname\unicodestyle\unicodeone}
+      at \currentfontscale\bodyfontsize
+   \unicodestrut % off by default
+   \unicodefont\unicodecharcommand{\char\unicodetwo\relax}%
+   \endgroup}
+
+%D This handler is used by default, for instance in:
+%D
+%D \starttyping
+%D \defineunicodefont [MySwitch] [MyFont] % [strut=no,command=\insertunicodeglyph]
+%D
+%D \definefontsynonym [MyFontRegular40] [Sans]
+%D \definefontsynonym [MyFontBold40]    [SansBold]
+%D
+%D {\MySwitch     \uchar{"40}{`a}}
+%D {\MySwitch \bf \uchar{"40}{`a}}
+%D \stoptyping
+%D
+%D \starttyping
+%D \definefontsynonym [MyFontRegular] [Sans]
+%D \definefontsynonym [MyFontBold]    [SansBold]
+%D \stoptyping
+%D
+%D Is also possible, but in that case the number is appended to the raw font
+%D name!
+
+%D \macros
+%D   {currentucharmapping,defineucharmapping}
+%D
+%D A (plane,char) pair can be remapped using a uchar mapping
+%D function. The default mapping is to convert the plane to a
+%D lowercase hexadecimal number, and leave the number
+%D untouched. The current remapping is kept in a macro.
+
+\let\currentucharmapping\s!default
+
+\def\defineucharmapping#1%
+  {\setvalue{@@#1\strippedcsname\uchar}}
+
+\defineucharmapping{\s!default}#1#2%
+  {\edef\unicodeone{\lchexnumbers{#1}}\edef\unicodetwo{#2}}
+
+%D An example of a remapping is the following:
+%D
+%D \starttyping
+%D \defineucharmapping{GBK}#1#2%
+%D   {\unicodeposition=#1
+%D    \advance\unicodeposition -129
+%D    \multiply\unicodeposition 190
+%D    \advance\unicodeposition #2
+%D    \advance\unicodeposition-\ifnum#2>127 65\else64\fi
+%D    \dorepositionunicode}
+%D \stoptyping
+%D
+%D This maps the GBK vector onto a compact GBK one. The
+%D auxiliary macro is defined here as a goody.
+
+\def\dorepositionunicode
+  {\dosetdivision\unicodeposition{256}\scratchcounter
+   \advance\scratchcounter \plusone
+   \edef\unicodeone{\ifnum\scratchcounter<10 0\fi\the\scratchcounter}%
+   \dosetmodulo\unicodeposition{256}\scratchcounter
+   \edef\unicodetwo{\the\scratchcounter}}
+
+%D \macros
+%D   {setunicodestrut, setunicodescale, nextutoken,
+%D    handleunicodeglyph, insertunicodeglyph}
+%D
+%D A careful analysis of the previous macros, learns that the
+%D process of mapping comes down to:
+%D
+%D \startitemize[packed,n]
+%D \item  taking care of preceding material (and spacing)
+%D \item  defining the font at \type {\currentfontscale} $\times$
+%D       \type {\unicodescale} $\times$ \type {\bodyfontsize}
+%D \item  inserting a \type {\unicodestrut}
+%D \item  inserting the character (glyph)
+%D \item  executing some actions afterwards
+%D \stopitemize
+%D
+%D The actions before and after placing the glyph, is up to
+%D the user supplied handler. This handler (\type
+%D {\handleunicodeglpyh}) must, at a certain moment, insert
+%D the glyph using \type {\insertunicodeglyph}
+
+\def\setunicodescale#1%
+  {\def\unicodescale{#1}}
+
+\def\dosetunicodestrut#1#2% height depth
+  {\def\unicodestrut
+     {\vrule
+        \!!width \zeropoint
+        \!!height#1\strutht
+        \!!depth #2\strutdp
+        \relax}}
+
+\def\setunicodestrut#1#2% height depth
+  {\ifdim#1\strutht>\zeropoint
+     \dosetunicodestrut{#1}{#2}%
+   \else\ifdim#1\strutdp>\zeropoint
+     \dosetunicodestrut{#1}{#2}%
+   \else
+     \let\unicodestrut\empty
+   \fi\fi}
+
+\def\resetunicodestrut
+  {\let\unicodestrut\empty}
+
+%D The additional scaling and strut default to:
+
+\setunicodescale{1}
+\setunicodestrut{1}{1}
+
+%D But better is not to have a strut added by default:
+
+\resetunicodestrut
+
+%D The actual code for the additional actions as well as
+%D specific spacing is handled outside these routines. The
+%D character after the two that are under treatment is
+%D available in \type {\nextutoken}.
+
+%D \macros
+%D   {defineunicodefont, setupunicodefont}
+%D
+%D Apart from this rather low level implementation, we also
+%D provide a more user friendly alternative. Given that one
+%D has defined:
+%D
+%D \starttyping
+%D \defineunicodefont
+%D   [SimChi] [SimplifiedChinese]
+%D   [\c!scale=0.85,
+%D    \c!height=1.25,
+%D    \c!depth=1.00,
+%D    \c!interlinespaceinterlinie=yes,
+%D    \c!conversion=\chinesenumber,
+%D    \c!command=\handlechineseunicodeglyph]
+%D \stoptyping
+%D
+%D Together with:
+%D
+%D \starttyping
+%D \definefontsynonym [SimplifiedChineseRegular] [gbsong]
+%D \definefontsynonym [SimplifiedChineseSlanted] [gbsongsl]
+%D \stoptyping
+%D
+%D we can now switch to Simplified Chinese by saying \type
+%D {SimChi}. Some values can be changed afterwards with
+%D
+%D \starttyping
+%D \setupunicodefont[SimChi][...=...]
+%D \stoptyping
+%D
+%D Specific initializations can be assigned to \type
+%D {commands}.
+
+\def\defineunicodefont
+  {\dotripleempty\dodefineunicodefont}
+
+\def\dodefineunicodefont[#1][#2][#3]%
+  {\doifassignmentelse{#3}
+     {\setupunicodefont[#1][#3]}
+     {\doifelsenothing{#3}
+        {\setupunicodefont[#1][#3]}
+        {\copyparameters
+           [\??uc#1][\??uc#3]
+           [\c!height,\c!depth,\c!scale,\c!commands,\c!strut,
+            \c!interlinespace,\c!command,\c!conversion]}}%
+   \doifelsenothing{#2}
+     {\setvalue{#1}{[uc font #1 undefined]}}
+     {\setvalue{\??uc#1\c!file}{#2}%
+      \doifundefined{\??ff#2\s!Bold}
+        {\definefontsynonym[#2\s!Bold]       [#2\s!Regular]%
+         \definefontsynonym[#2\s!Slanted]    [#2\s!Regular]%
+         \definefontsynonym[#2\s!Italic]     [#2\s!Regular]%
+         \definefontsynonym[#2\s!BoldSlanted][#2\s!Slanted]%
+         \definefontsynonym[#2\s!BoldItalic] [#2\s!Italic]}%
+      \unexpanded\setvalue{#1}{\enableunicodefont{#1}}}}
+
+\def\setupunicodefont
+  {\dodoubleempty\dosetupunicodefont}
+
+\def\dosetupunicodefont[#1][#2]% also predefines
+  {\doifundefined{\??uc#1\c!command}
+     {\copyparameters
+        [\??uc#1][\??uc\s!default]
+        [\c!height,\c!depth,\c!scale,\c!commands,\v!strut,
+         \c!interlinespace,\c!command,\c!conversion]}%
+   \getparameters[\??uc#1][#2]}
+
+\def\enableunicodefont#1%
+  {\definefontsynonym[\s!Unicode][\getvalue{\??uc#1\c!file}]%
+   \def\unicodescale             {\getvalue{\??uc#1\c!scale}}%
+   \def\unicodeheight            {\getvalue{\??uc#1\c!height}}%
+   \def\unicodedepth             {\getvalue{\??uc#1\c!depth}}%
+   \def\unicodedigits            {\getvalue{\??uc#1\c!conversion}}%
+   \def\handleunicodeglyph       {\getvalue{\??uc#1\c!command}}%
+   \doifnot\currentregime{utf}{\enableregime[unicode]}%
+   % the following \relax's are realy needed
+   \doifvalue{\??uc#1\c!interlinespace}\v!yes
+      \setupinterlinespace\relax
+   \doifelsevalue{\??uc#1\c!strut}\v!yes
+      {\setunicodestrut\unicodeheight\unicodedepth}
+      {\resetunicodestrut}%
+   \getvalue{\??uc#1\c!commands}\relax}
+
+%D \macros
+%D   {unicodedigits}
+%D
+%D For convenience we also predefine a number conversion
+%D macro:
+
+\let\unicodedigits\number
+
+%D Because we cannot be sure of the pressence of all font
+%D styles, we remap some by default.
+
+\definefontsynonym [\s!Unicode\s!Bold]        [\s!Unicode\s!Regular]
+\definefontsynonym [\s!Unicode\s!Slanted]     [\s!Unicode\s!Regular]
+\definefontsynonym [\s!Unicode\s!Italic]      [\s!Unicode\s!Regular]
+\definefontsynonym [\s!Unicode\s!BoldSlanted] [\s!Unicode\s!Slanted]
+\definefontsynonym [\s!Unicode\s!BoldItalic]  [\s!Unicode\s!Italic]
+
+\setupunicodefont
+  [\s!default]
+  [\c!height=1,
+   \c!depth=1,
+   \c!scale=1,
+   \c!strut=\v!no,
+   \c!interlinespace=\v!no,
+   \c!command=\insertunicodeglyph,
+   \c!conversion=\number]
+
+\protect \endinput
author	Context Git Mirror Bot <phg42.2a@gmail.com>	2016-01-12 17:15:07 +0100
committer	Context Git Mirror Bot <phg42.2a@gmail.com>	2016-01-12 17:15:07 +0100
commit	8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree	94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkii/font-uni.mkii
parent	f5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
download	context-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz