diff options
Diffstat (limited to 'tex/context/base/mkii/lang-frq.mkii')
-rw-r--r-- | tex/context/base/mkii/lang-frq.mkii | 207 |
1 files changed, 207 insertions, 0 deletions
diff --git a/tex/context/base/mkii/lang-frq.mkii b/tex/context/base/mkii/lang-frq.mkii new file mode 100644 index 000000000..afeb5329c --- /dev/null +++ b/tex/context/base/mkii/lang-frq.mkii @@ -0,0 +1,207 @@ +%D \module +%D [ file=lang-frq, +%D version=2004.01.15, +%D title=\CONTEXT\ Language Macros, +%D subtitle=Frequency Tables, +%D author=Hans Hagen, +%D date=\currentdate, +%D copyright={PRAGMA ADE \& \CONTEXT\ Development Team}] +%C +%C This module is part of the \CONTEXT\ macro||package and is +%C therefore copyrighted by \PRAGMA. See mreadme.pdf for +%C details. + +\writestatus{loading}{ConTeXt Language Macros / Frequency Tables} + +\unprotect + +\ifx\s!en\undefined \def\v!en{en} \fi +\ifx\??lg\undefined \def\??lg{@@lg} \fi + +%M \usemodule[layout] + +%D \macros +%D {charwidthmethod} +%D +%D This module implements a method for determining the width of an +%D average character in a language. It uses the dimensions of the +%D current fonts. +%D +%D \def\ShwChrWd#1#2#3% +%D {\chardef\charwidthmethod#1\relax +%D \mainlanguage[#2#3]\the\dimexpr(\averagecharwidth)} +%D +%D \starttabulate[|c|c|c|c|c|c|] +%D \HL +%D \NC \NC\bf0=amount\NC\bf1=.5em\NC2=ex\NC\bf3=frequency\NC\bf4=list\NC\NR +%D \HL +%D \NC\bf en\NC\ShwChrWd0en\NC\ShwChrWd1en\NC\ShwChrWd2en\NC\ShwChrWd3en\NC\ShwChrWd4en\NC\NR +%D \NC\bf nl\NC\ShwChrWd0nl\NC\ShwChrWd1nl\NC\ShwChrWd2nl\NC\ShwChrWd3nl\NC\ShwChrWd4nl\NC\NR +%D \NC\bf de\NC\ShwChrWd0de\NC\ShwChrWd1de\NC\ShwChrWd2de\NC\ShwChrWd3de\NC\ShwChrWd4de\NC\NR +%D \HL +%D \stoptabulate +%D +%D Method~1 ignores the widths and assumes that each character has a +%D width of .5em, which is true for most monospaced fonts. Method~2 +%D takes the x as starting point, and assumes that it's height kind of +%D matches its width. Method~3 is the best one, and determines the +%D average width based on the language specific character table. +%D Method~4 is a mixture between the first two methods: character +%D specific widths applied to an equal distribution. Method~0 reports +%D the total count, which normally is~100. + +\chardef\charwidthmethod=3 % 0=amount 1=em 2=ex 3=frequency 4=flattened >4=ex + +%D \macros +%D {charwidthlanguage} +%D +%D The language used for the calculations is defined as: + +\def\charwidthlanguage{\currentmainlanguage} + +%D \macros +%D {charfreq} +%D +%D This method comes into action in the following macro: + +\def\charfreq#1 #2 % character fraction + {+(\ifcase\charwidthmethod + #2\dimexpr100\onepoint\relax + \or + #2\dimexpr.5em\relax % \emwidth/2 + \or + #2\dimexpr\exheight\relax + \or + #2\fontcharwd\font`#1% + \or + \dimexpr100\fontcharwd\font`#1/\charactertsize\charwidthlanguage\relax % ugly hack + \else + #2\dimexpr\exheight\relax + \fi)} + +%D \macros +%D {startcharactertable} +%D +%D A frequency table is defined with the following macro. The \type +%D {charfreq} macro is used in this table. + +\def\startcharactertable[#1]#2\stopcharactertable % \dimexpr has fuzzy lookahead + {\startnointerference + \long\setgvalue{\??lg:w:#1}{#2}% the width vector + \scratchcounter\zerocount \def\charfreq##1 ##2 {\advance\scratchcounter\plusone} #2% + \long\setxvalue{\??lg:c:#1}{\the\scratchcounter}% the character count + \stopnointerference} + +%D \macros +%D {charactertable,charactertsize} +%D +%D The table content as well as the number of entries can be fetched with +%D the following two macros. The architecture of the table and calling +%D macro permits a fully expandable application. + +\def\charactertable#1% + {\csname\??lg:w:\ifcsname\??lg:w:#1\endcsname#1\else\s!en\fi\endcsname} + +\def\charactertsize#1% + {\csname\??lg:c:\ifcsname\??lg:c:#1\endcsname#1\else\s!en\fi\endcsname} + +%D Although it is of hardly any use, you can inherit a character table: +%D +%D \starttyping +%D \startcharactertable[cz] \charactertable{en} \stopcharactertable +%D \stoptyping +%D +%D We define a default vector with 100\% x's. + +\startcharactertable[en] 100 x \stopcharactertable % kind of default + +%D \macros +%D {averagecharwidth} +%D +%D This macro reports the average width for the current main +%D language (\the \dimexpr (\averagecharwidth)). + +\def\averagecharwidth{\dimexpr((\zeropoint\charactertable\charwidthlanguage)/100)} + +\def\showcharfreq + {\hbox\bgroup + \charwidthlanguage:% + \dostepwiserecurse041% + {\chardef\charwidthmethod\recurselevel\relax + \enspace\recurselevel/\the\dimexpr(\averagecharwidth)}% + \egroup} + +%D Just for fun, we show a few frequency tables as graphic (\in {figure} +%D [fig:charfreq]). +%D +%D \startbuffer +%D \definepalet [charfreq] [en=darkred, nl=darkgreen, de=darkblue] +%D +%D \def\charfreq#1 #2 % +%D {\startMPdrawing +%D interim linejoin := butt ; +%D a := ASCII "#1" ; +%D if (a >= (ASCII "a")) and (a <= (ASCII "z")) : +%D draw ((0,#2*.25cm)--origin--(0,#2*.5cm)) +%D shifted (a*4mm+o,0) +%D withpen pencircle scaled .5mm +%D withcolor c; +%D fi ; +%D \stopMPdrawing} +%D +%D \resetMPdrawing +%D \startMPdrawing +%D numeric a, o ; a := o := 0 ; +%D color c ; c := .5white ; +%D string s ; s := "" ; +%D \stopMPdrawing +%D +%D \startMPdrawing o := 0mm ; c := \MPcolor{charfreq:en} ; \stopMPdrawing +%D \charactertable{en} +%D +%D \startMPdrawing o := 1mm ; c := \MPcolor{charfreq:nl} ; \stopMPdrawing +%D \charactertable{nl} +%D +%D \startMPdrawing o := 2mm ; c := \MPcolor{charfreq:de} ; \stopMPdrawing +%D \charactertable{de} +%D +%D \startMPdrawing +%D for a := ASCII "a" upto ASCII "z" : +%D draw textext.bot("\strut\tttf " & char a) shifted (a*4mm+1mm,-1mm) ; +%D endfor ; +%D \stopMPdrawing +%D +%D \MPdrawingdonetrue \getMPdrawing \resetMPdrawing +%D \stopbuffer +%D +%D \placefigure +%D [here] +%D [fig:charfreq] +%D {The character distributions for English, Dutch and German.} +%D {\getbuffer} +%D +%D A few samples of usage of this mechanism are shown below: +%D +%D \startbuffer +%D {\mainlanguage[en]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} +%D {\mainlanguage[nl]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} +%D {\mainlanguage[de]\hsize65\averagecharwidth\mainlanguage[en]\input ward \blank} +%D \stopbuffer +%D +%D \typebuffer \getbuffer +%D +%D Although the widthts differ, the consequenes for breaking the paragraph +%D into lines are minimal. + +%D \macros +%D {freezeaveragecharacterwidth} +%D +%D This macro can be used to make sure that the width does not change during a +%D page break when another font is used. + +\let\normalaveragecharacterwidth\averagecharacterwidth + +\def\freezeaveragecharacterwidth % global + {\xdef\averagecharacterwidth{\dimexpr(\the\normalaveragecharacterwidth)}} + +\protect \endinput |