summaryrefslogtreecommitdiff
path: root/tex/context/base/mkii/font-uni.mkii
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
committerContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
commit8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkii/font-uni.mkii
parentf5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
downloadcontext-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
2016-01-12 16:26:00
Diffstat (limited to 'tex/context/base/mkii/font-uni.mkii')
-rw-r--r--tex/context/base/mkii/font-uni.mkii444
1 files changed, 444 insertions, 0 deletions
diff --git a/tex/context/base/mkii/font-uni.mkii b/tex/context/base/mkii/font-uni.mkii
new file mode 100644
index 000000000..77eb680e2
--- /dev/null
+++ b/tex/context/base/mkii/font-uni.mkii
@@ -0,0 +1,444 @@
+%D \module
+%D [ file=font-uni,
+%D version=1999.10.10,
+%D title=\CONTEXT\ Font Macros,
+%D subtitle=\UNICODE,
+%D author=Hans Hagen,
+%D date=\currentdate,
+%D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}]
+%C
+%C This module is part of the \CONTEXT\ macro||package and is
+%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
+%C details.
+
+\writestatus{loading}{ConTeXt Font Macros / Unicode}
+
+%D In \XETEX, unicode support is straightforward, so we
+%D simply output a \type {\char} with a 16||bit number.
+
+\ifnum\texengine=\xetexengine
+ \unexpanded\def\uchar#1#2{\char\numexpr#2+#1*\pluscclvi\relax}
+ \let\uc\uchar
+ \expandafter \endinput
+\fi
+
+%D Now comes the more traditional 8 bit \TEX\ hackery.
+
+%D I wrote this module when Wang Lei asked me how to use
+%D Chinese in \CONTEXT. From the samples he sent me, I deduced
+%D that some mixture of one and two byte encoding was used,
+%D which he confirmed. Since \TEX\ normally does not use the
+%D characters $>127$, so as long as the two byte characters
+%D have a first character with code $>127$, we can use active
+%D characters to handle them. In an optimistic mood, I called
+%D this module the \UNICODE\ font module. In the module that
+%D handles Chinese, we will see that some more interpretation
+%D is involved, which is why the macros handling those
+%D characters look ahead.
+
+\unprotect
+
+%D \macros
+%D {handleunicodeflowglyph, uchar,
+%D handleunicodeglyph, insertunicodeglyph,
+%D unicodeposition, unicodeone, unicodetwo}
+%D
+%D For the moment \UNICODE\ support is rather primitive but
+%D nevertheless effective. The reference to \UNICODE\ is not
+%D entirely correct, since in many cases one will use \quote
+%D {older} mappings, but in principle, \UNICODE\ can be
+%D supported.
+%D
+%D We expect each character to come as two eight bit
+%D characters. Those doubles are handled by making all
+%D characters in the range $>127$ active, so that they can
+%D pick up the next one, and act upon both their values.
+%D Internally only numbers are used. A first implementation
+%D simply internally prefixed the second part of the \UNICODE\
+%D pair with \type {\string} or \type {\char}, but this was
+%D not that handy when it came to testing those values.
+%D Because in principle we are dealing with an encoding, the
+%D making active is handled in \type {enco-uni}.
+%D
+%D There are two commands to handle unicode characters:
+%D
+%D \starttyping
+%D \handleunicodeflowglyph{number}{character}
+%D \uchar{number}{number}
+%D \stoptyping
+%D
+%D The first one can be assigned to an active character, the
+%D second one can be used to directly access a glyph. Both
+%D command call \type {\handleunicodeglyph} that in turn
+%D calls \type {\insertunicodeglyph}. Both can be overruled
+%D in specialized modules. The low level command \type
+%D {\unicodeglyph} can best be left untouched, which is not
+%D so much a problem because there is a hook into this macro:
+%D \type {\unicodecharcommand}.
+%D
+%D In most cases one will redefine \type {\handleunicodeglyph}
+%D in such a way that it identifies special situations first,
+%D takes some actions next, calls \type {\insertunicodeglyph},
+%D if needed with \type {\unicodecharcommand} changed, and
+%D finally does some finishing:
+%D
+%D \starttyping
+%D \def\handleunicodeglyph
+%D {take actions based on \unicodeone-two-position cq. \nextutoken
+%D redefine \unicodecharcommand if needed
+%D expand \insertunicodeglyph
+%D take some final actions}
+%D \stoptyping
+
+\newcount\unicodeposition
+
+%D The multistep approach is needed to pick up the second
+%D token, since this token can have any value and any
+%D catcode.
+
+% the \relax trick prevents eating up the space (needed for
+% korean
+
+\def\handleunicodeflowglyph#1#2%
+ {\begingroup
+ \edef\unicodeone{#1}%
+ \@EA\afterassignment\@EA\dohandleunicodeflowglyph % two redundant ea's
+ \@EA\chardef\@EA\nexttoken\@EA`\string#2\relax}
+
+\def\dohandleunicodeflowglyph\relax
+ {\futurelet\nextutoken\dodohandleunicodeflowglyph}
+
+\def\dodohandleunicodeflowglyph % todo tex (or maybe no longer)
+ {\edef\unicodetwo{\the\nexttoken}%
+ \unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
+ \handleunicodeglyph
+ \endgroup}
+
+\unexpanded\def\uchar#1#2% use as standalone glyph
+ {\begingroup
+ \edef\unicodeone{#1}%
+ \edef\unicodetwo{#2}%
+ \unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
+ \handleunicodeglyph
+ \endgroup}
+
+\let\nextutoken\relax
+
+\unexpanded\def\lookaheaduchar#1#2%
+ {\def\dolookaheaduchar{\uchar{#1}{#2}\let\nextutoken\relax}%
+ \futurelet\nextutoken\dolookaheaduchar}
+
+\def\dohandleucflowglyph
+ {\unicodeposition\numexpr\unicodeone*256+\unicodetwo\relax
+ \handleunicodeglyph
+ \endgroup}
+
+\unexpanded\def\uc#1#2% used in tricky situations
+ {\begingroup
+ \edef\unicodeone{#1}%
+ \edef\unicodetwo{#2}%
+ \futurelet\nextutoken\dohandleucflowglyph}
+
+\def\insertunicodeglyph
+ {\unicodeglyph\unicodeone\unicodetwo}
+
+\let\handleunicodeglyph\insertunicodeglyph
+
+%D One can use the \type {\unicodeposition} in the macros
+%D that handle pre and post material.
+
+%D \macros
+%D {unicodestyle, unicodecharcommand}
+%D
+%D Each character pair will become one glyph. Because \TEX\
+%D cannot handle fonts with more that 256 characters, we use
+%D \TFM\ files for each range. The first character of the pair
+%D is appended to the name of a font, and the second is used to
+%D access the glyph in that font. This means that a particular
+%D font is split up in subfonts with names in the range:
+%D
+%D \starttyping
+%D <filename>80 ... <filename>ff
+%D \stoptyping
+%D
+%D The \type {<filename>} as well as the composed name are
+%D mapped ones. The next macros take care of this mapping.
+%D Let us assume that the next mapping has taken place,
+%D
+%D \starttyping
+%D \definefontsynonym [UnicodeRegular] [gbsong]
+%D \stoptyping
+%D
+%D Let us also assume that we are dealing with the range \type
+%D {b1}. Given that a font name results from:
+%D
+%D \starttyping
+%D \truefontname{\truefontname{UnicodeRegular}b1}
+%D \stoptyping
+%D
+%D we get \type {gbsongb1}. The outer \type {\truefontname}
+%D takes care of additional mapping, so when we say:
+%D
+%D \starttyping
+%D \definefontsynonym [gbsongb1] [gbsong-b1]
+%D \stoptyping
+%D
+%D the filename used will be \type {gbsong-b1}. From the next
+%D definition it will be clear that other fontshapes are also
+%D supported. The prefix \type {Unicode} is mapped!
+%D
+%D The command \type {\unicodecharcommand} can be used to
+%D handle special cases. At that moment \type {1em} is known.
+
+\def\unicodestyle
+ {\truefontname\s!Unicode\fontstylesuffix}
+
+\let\unicodecharcommand\firstofoneargument
+
+\unexpanded\def\unicodeglyph#1#2% watch the double mapping
+ {\begingroup
+ \getvalue{@@\currentucharmapping\strippedcsname\uchar}{#1}{#2}% map to a to hex font range
+ \bodyfontsize\unicodescale\bodyfontsize
+ % readable:
+ % \doifelsefontsynonym{\unicodestyle\unicodeone}
+ % {\font\unicodefont=\truefontname{\unicodestyle\unicodeone}
+ % at \currentfontscale\bodyfontsize}
+ % {\font\unicodefont=\truefontname{\truefontname\unicodestyle\unicodeone}
+ % at \currentfontscale\bodyfontsize}%
+ % unreadable but more efficient:
+ \font\unicodefont=\truefontname{\doifelsefontsynonym{\unicodestyle
+ \unicodeone}\empty\truefontname\unicodestyle\unicodeone}
+ at \currentfontscale\bodyfontsize
+ \unicodestrut % off by default
+ \unicodefont\unicodecharcommand{\char\unicodetwo\relax}%
+ \endgroup}
+
+%D This handler is used by default, for instance in:
+%D
+%D \starttyping
+%D \defineunicodefont [MySwitch] [MyFont] % [strut=no,command=\insertunicodeglyph]
+%D
+%D \definefontsynonym [MyFontRegular40] [Sans]
+%D \definefontsynonym [MyFontBold40] [SansBold]
+%D
+%D {\MySwitch \uchar{"40}{`a}}
+%D {\MySwitch \bf \uchar{"40}{`a}}
+%D \stoptyping
+%D
+%D \starttyping
+%D \definefontsynonym [MyFontRegular] [Sans]
+%D \definefontsynonym [MyFontBold] [SansBold]
+%D \stoptyping
+%D
+%D Is also possible, but in that case the number is appended to the raw font
+%D name!
+
+%D \macros
+%D {currentucharmapping,defineucharmapping}
+%D
+%D A (plane,char) pair can be remapped using a uchar mapping
+%D function. The default mapping is to convert the plane to a
+%D lowercase hexadecimal number, and leave the number
+%D untouched. The current remapping is kept in a macro.
+
+\let\currentucharmapping\s!default
+
+\def\defineucharmapping#1%
+ {\setvalue{@@#1\strippedcsname\uchar}}
+
+\defineucharmapping{\s!default}#1#2%
+ {\edef\unicodeone{\lchexnumbers{#1}}\edef\unicodetwo{#2}}
+
+%D An example of a remapping is the following:
+%D
+%D \starttyping
+%D \defineucharmapping{GBK}#1#2%
+%D {\unicodeposition=#1
+%D \advance\unicodeposition -129
+%D \multiply\unicodeposition 190
+%D \advance\unicodeposition #2
+%D \advance\unicodeposition-\ifnum#2>127 65\else64\fi
+%D \dorepositionunicode}
+%D \stoptyping
+%D
+%D This maps the GBK vector onto a compact GBK one. The
+%D auxiliary macro is defined here as a goody.
+
+\def\dorepositionunicode
+ {\dosetdivision\unicodeposition{256}\scratchcounter
+ \advance\scratchcounter \plusone
+ \edef\unicodeone{\ifnum\scratchcounter<10 0\fi\the\scratchcounter}%
+ \dosetmodulo\unicodeposition{256}\scratchcounter
+ \edef\unicodetwo{\the\scratchcounter}}
+
+%D \macros
+%D {setunicodestrut, setunicodescale, nextutoken,
+%D handleunicodeglyph, insertunicodeglyph}
+%D
+%D A careful analysis of the previous macros, learns that the
+%D process of mapping comes down to:
+%D
+%D \startitemize[packed,n]
+%D \item taking care of preceding material (and spacing)
+%D \item defining the font at \type {\currentfontscale} $\times$
+%D \type {\unicodescale} $\times$ \type {\bodyfontsize}
+%D \item inserting a \type {\unicodestrut}
+%D \item inserting the character (glyph)
+%D \item executing some actions afterwards
+%D \stopitemize
+%D
+%D The actions before and after placing the glyph, is up to
+%D the user supplied handler. This handler (\type
+%D {\handleunicodeglpyh}) must, at a certain moment, insert
+%D the glyph using \type {\insertunicodeglyph}
+
+\def\setunicodescale#1%
+ {\def\unicodescale{#1}}
+
+\def\dosetunicodestrut#1#2% height depth
+ {\def\unicodestrut
+ {\vrule
+ \!!width \zeropoint
+ \!!height#1\strutht
+ \!!depth #2\strutdp
+ \relax}}
+
+\def\setunicodestrut#1#2% height depth
+ {\ifdim#1\strutht>\zeropoint
+ \dosetunicodestrut{#1}{#2}%
+ \else\ifdim#1\strutdp>\zeropoint
+ \dosetunicodestrut{#1}{#2}%
+ \else
+ \let\unicodestrut\empty
+ \fi\fi}
+
+\def\resetunicodestrut
+ {\let\unicodestrut\empty}
+
+%D The additional scaling and strut default to:
+
+\setunicodescale{1}
+\setunicodestrut{1}{1}
+
+%D But better is not to have a strut added by default:
+
+\resetunicodestrut
+
+%D The actual code for the additional actions as well as
+%D specific spacing is handled outside these routines. The
+%D character after the two that are under treatment is
+%D available in \type {\nextutoken}.
+
+%D \macros
+%D {defineunicodefont, setupunicodefont}
+%D
+%D Apart from this rather low level implementation, we also
+%D provide a more user friendly alternative. Given that one
+%D has defined:
+%D
+%D \starttyping
+%D \defineunicodefont
+%D [SimChi] [SimplifiedChinese]
+%D [\c!scale=0.85,
+%D \c!height=1.25,
+%D \c!depth=1.00,
+%D \c!interlinespaceinterlinie=yes,
+%D \c!conversion=\chinesenumber,
+%D \c!command=\handlechineseunicodeglyph]
+%D \stoptyping
+%D
+%D Together with:
+%D
+%D \starttyping
+%D \definefontsynonym [SimplifiedChineseRegular] [gbsong]
+%D \definefontsynonym [SimplifiedChineseSlanted] [gbsongsl]
+%D \stoptyping
+%D
+%D we can now switch to Simplified Chinese by saying \type
+%D {SimChi}. Some values can be changed afterwards with
+%D
+%D \starttyping
+%D \setupunicodefont[SimChi][...=...]
+%D \stoptyping
+%D
+%D Specific initializations can be assigned to \type
+%D {commands}.
+
+\def\defineunicodefont
+ {\dotripleempty\dodefineunicodefont}
+
+\def\dodefineunicodefont[#1][#2][#3]%
+ {\doifassignmentelse{#3}
+ {\setupunicodefont[#1][#3]}
+ {\doifelsenothing{#3}
+ {\setupunicodefont[#1][#3]}
+ {\copyparameters
+ [\??uc#1][\??uc#3]
+ [\c!height,\c!depth,\c!scale,\c!commands,\c!strut,
+ \c!interlinespace,\c!command,\c!conversion]}}%
+ \doifelsenothing{#2}
+ {\setvalue{#1}{[uc font #1 undefined]}}
+ {\setvalue{\??uc#1\c!file}{#2}%
+ \doifundefined{\??ff#2\s!Bold}
+ {\definefontsynonym[#2\s!Bold] [#2\s!Regular]%
+ \definefontsynonym[#2\s!Slanted] [#2\s!Regular]%
+ \definefontsynonym[#2\s!Italic] [#2\s!Regular]%
+ \definefontsynonym[#2\s!BoldSlanted][#2\s!Slanted]%
+ \definefontsynonym[#2\s!BoldItalic] [#2\s!Italic]}%
+ \unexpanded\setvalue{#1}{\enableunicodefont{#1}}}}
+
+\def\setupunicodefont
+ {\dodoubleempty\dosetupunicodefont}
+
+\def\dosetupunicodefont[#1][#2]% also predefines
+ {\doifundefined{\??uc#1\c!command}
+ {\copyparameters
+ [\??uc#1][\??uc\s!default]
+ [\c!height,\c!depth,\c!scale,\c!commands,\v!strut,
+ \c!interlinespace,\c!command,\c!conversion]}%
+ \getparameters[\??uc#1][#2]}
+
+\def\enableunicodefont#1%
+ {\definefontsynonym[\s!Unicode][\getvalue{\??uc#1\c!file}]%
+ \def\unicodescale {\getvalue{\??uc#1\c!scale}}%
+ \def\unicodeheight {\getvalue{\??uc#1\c!height}}%
+ \def\unicodedepth {\getvalue{\??uc#1\c!depth}}%
+ \def\unicodedigits {\getvalue{\??uc#1\c!conversion}}%
+ \def\handleunicodeglyph {\getvalue{\??uc#1\c!command}}%
+ \doifnot\currentregime{utf}{\enableregime[unicode]}%
+ % the following \relax's are realy needed
+ \doifvalue{\??uc#1\c!interlinespace}\v!yes
+ \setupinterlinespace\relax
+ \doifelsevalue{\??uc#1\c!strut}\v!yes
+ {\setunicodestrut\unicodeheight\unicodedepth}
+ {\resetunicodestrut}%
+ \getvalue{\??uc#1\c!commands}\relax}
+
+%D \macros
+%D {unicodedigits}
+%D
+%D For convenience we also predefine a number conversion
+%D macro:
+
+\let\unicodedigits\number
+
+%D Because we cannot be sure of the pressence of all font
+%D styles, we remap some by default.
+
+\definefontsynonym [\s!Unicode\s!Bold] [\s!Unicode\s!Regular]
+\definefontsynonym [\s!Unicode\s!Slanted] [\s!Unicode\s!Regular]
+\definefontsynonym [\s!Unicode\s!Italic] [\s!Unicode\s!Regular]
+\definefontsynonym [\s!Unicode\s!BoldSlanted] [\s!Unicode\s!Slanted]
+\definefontsynonym [\s!Unicode\s!BoldItalic] [\s!Unicode\s!Italic]
+
+\setupunicodefont
+ [\s!default]
+ [\c!height=1,
+ \c!depth=1,
+ \c!scale=1,
+ \c!strut=\v!no,
+ \c!interlinespace=\v!no,
+ \c!command=\insertunicodeglyph,
+ \c!conversion=\number]
+
+\protect \endinput