From d2f628a96f94ce8e83ac33f461e091f3f56f62fb Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Tue, 15 Feb 2011 16:11:00 +0100 Subject: beta 2011.02.15 16:11 --- tex/context/base/char-def.lua | 68 ++++---- tex/context/base/char-ini.lua | 233 ++++++++++++++++++++-------- tex/context/base/cont-new.mkii | 2 +- tex/context/base/cont-new.mkiv | 2 +- tex/context/base/context.mkii | 2 +- tex/context/base/context.mkiv | 2 +- tex/context/base/node-mig.lua | 2 + tex/context/base/pack-rul.mkiv | 68 -------- tex/context/base/page-mar.mkiv | 2 + tex/context/base/spac-ver.mkiv | 2 +- tex/context/base/status-files.pdf | Bin 23236 -> 23241 bytes tex/context/base/task-ini.lua | 6 +- tex/context/base/unic-ini.lua | 19 +++ tex/context/base/unic-ini.mkiv | 14 +- tex/generic/context/luatex-fonts-merged.lua | 2 +- 15 files changed, 249 insertions(+), 175 deletions(-) create mode 100644 tex/context/base/unic-ini.lua (limited to 'tex') diff --git a/tex/context/base/char-def.lua b/tex/context/base/char-def.lua index 9549cab68..f53f870b7 100644 --- a/tex/context/base/char-def.lua +++ b/tex/context/base/char-def.lua @@ -83980,15 +83980,14 @@ characters.data={ specials={ "square", 0x0067, 0x0061, 0x006C }, unicodeslot=0x33FF, }, - [0x3400]={ - category="lo", - cjkwd="w", - description="", - direction="l", - linebreak="id", ---unicodeslot=0x3400, - range={ first=0x3400, last=0x4DB5 }, - }, +--~ [0x3400]={ +--~ category="lo", +--~ cjkwd="w", +--~ description="", +--~ direction="l", +--~ linebreak="id", +--~ range={ first=0x3400, last=0x4DB5 }, +--~ }, [0x4DC0]={ category="so", description="HEXAGRAM FOR THE CREATIVE HEAVEN", @@ -84437,15 +84436,14 @@ characters.data={ linebreak="al", unicodeslot=0x4DFF, }, - [0x4E00]={ - category="lo", - cjkwd="w", - description="", - direction="l", - linebreak="id", ---unicodeslot=0x4E00, - range={ first=0x4E00, last=0x9FBB }, - }, +--~ [0x4E00]={ +--~ category="lo", +--~ cjkwd="w", +--~ description="", +--~ direction="l", +--~ linebreak="id", +--~ range={ first=0x4E00, last=0x9FBB }, +--~ }, [0xA000]={ category="lo", cjkwd="w", @@ -95109,15 +95107,14 @@ characters.data={ linebreak="ex", unicodeslot=0xA877, }, - [0xAC00]={ - category="lo", - cjkwd="w", - description="", - direction="l", - linebreak="h2", ---unicodeslot=0xAC00, - range={ first=0xAC00, last=0xD7A3 }, - }, +--~ [0xAC00]={ +--~ category="lo", +--~ cjkwd="w", +--~ description="", +--~ direction="l", +--~ linebreak="h2", +--~ range={ first=0xAC00, last=0xD7A3 }, +--~ }, [0xD800]={ category="cs", description="", @@ -133710,15 +133707,14 @@ characters.data={ specials={ "font", 0x0039 }, unicodeslot=0x1D7FF, }, - [0x20000]={ - category="lo", - cjkwd="w", - description="", - direction="l", - linebreak="id", ---unicodeslot=0x20000, - range={ first=0x20000, last=0x2A6D6 }, - }, +--~ [0x20000]={ +--~ category="lo", +--~ cjkwd="w", +--~ description="", +--~ direction="l", +--~ linebreak="id", +--~ range={ first=0x20000, last=0x2A6D6 }, +--~ }, [0x2F800]={ category="lo", cjkwd="w", diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index 798e79e18..3f7750d86 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -6,12 +6,14 @@ if not modules then modules = { } end modules ['char-ini'] = { license = "see context related readme files" } +-- todo: make two files, one for format generation, one for format use + local tex = tex local utf = unicode.utf8 local utfchar, utfbyte, utfvalues = utf.char, utf.byte, string.utfvalues local concat, unpack = table.concat, table.unpack -local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset +local next, tonumber, type, rawget, rawset, setmetatable = next, tonumber, type, rawget, rawset, setmetatable local texsprint, texprint = tex.sprint, tex.print local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch local texsetlccode, texsetuccode, texsetsfcode, texsetcatcode = tex.setlccode, tex.setuccode, tex.setsfcode, tex.setcatcode @@ -47,21 +49,6 @@ else os.exit() end -if not characters.ranges then - local ranges, r = allocate { }, 0 - characters.ranges = ranges - for k, v in next, data do - if v.range then - r = r + 1 - ranges[r] = v - end - end -end - -storage.register("characters/ranges",characters.ranges,"characters.ranges") - -local ranges = characters.ranges - --[[ldx--

This converts a string (if given) into a number.

--ldx]]-- @@ -105,7 +92,144 @@ local private = { description = "PRIVATE SLOT", } -local extenders = { } +-- Hangul Syllable + +local hangul_syllable_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "h2", + } +} + +local hangul_syllable_extender = function(k,v) + local shcode = -- for the moment we misuse the shcode .. in fact we should have the components + k < 0xAC00 and k -- original + or k > 0xD7AF and k -- original + or k >= 0xD558 and 0x314E -- 하 => ㅎ + or k >= 0xD30C and 0x314D -- 파 => ㅍ + or k >= 0xD0C0 and 0x314C -- 타 => ㅌ + or k >= 0xCE74 and 0x314B -- 카 => ㅋ + or k >= 0xCC28 and 0x314A -- 차 => ㅊ + or k >= 0xC790 and 0x3148 -- 자 => ㅈ + or k >= 0xC544 and 0x3147 -- 아 => ㅇ + or k >= 0xC0AC and 0x3145 -- 사 => ㅅ + or k >= 0xBC14 and 0x3142 -- 바 => ㅂ + or k >= 0xB9C8 and 0x3141 -- 마 => ㅁ + or k >= 0xB77C and 0x3139 -- 라 => ㄹ + or k >= 0xB2E4 and 0x3137 -- 다 => ㄷ + or k >= 0xB098 and 0x3134 -- 나 => ㄴ + or k >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20 + or k -- can't happen + local t = { + shcode = shcode, + unicodeslot = k, + } + setmetatable(t,hangul_syllable_metatable) + return t +end + +local hangul_syllable_range = { + first = 0xAC00, + last = 0xD7A3, + extender = hangul_syllable_extender, +} + +setmetatable(hangul_syllable_range, hangul_syllable_metatable) + +-- CJK Ideograph + +local cjk_ideograph_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_metatable) + return t +end + +local cjk_ideograph_range = { + first = 0x4E00, + last = 0x9FBB, + extender = cjk_ideograph_extender, +} + +-- CJK Ideograph Extension A + +local cjk_ideograph_extension_a_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extension_a_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_extension_a_metatable) + return t +end + +local cjk_ideograph_extension_a_range = { + first = 0x3400, + last = 0x4DB5, + extender = cjk_ideograph_extension_a_extender, +} + +-- CJK Ideograph Extension B + +local cjk_ideograph_extension_b_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extension_b_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_extension_b_metatable) + return t +end + +local cjk_ideograph_extension_b_range = { + first = 0x20000, + last = 0x2A6D6, + extender = cjk_ideograph_extension_b_extender, +} + +-- Ranges + +local ranges = { + hangul_syllable_range, + cjk_ideograph_range, + cjk_ideograph_extension_a_range, + cjk_ideograph_extension_b_range, +} + +-- setmetatablekey(data, "__index", function(t,k) if type(k) == "string" then @@ -123,11 +247,9 @@ setmetatablekey(data, "__index", function(t,k) end if k < 0xF0000 then for r=1,#ranges do - local rr = ranges[r].range - local first, last = rr.first, rr.last - if k >= first and k <= last then - local v = t[first] - local extender = extenders[v.description] + local rr = ranges[r] + if k >= rr.first and k <= rr.last then + local extender = rr.extender if extender then v = extender(k,v) end @@ -139,45 +261,6 @@ setmetatablekey(data, "__index", function(t,k) return private -- handy for when we loop over characters in fonts and check for a property end ) -local metatables = { } - -extenders[""] = function(k,v) - local shcode = -- for the moment we misuse the shcode .. in fact we should have the components - k < 0xAC00 and k -- original - or k > 0xD7AF and k -- original - or k >= 0xD558 and 0x314E -- 하 => ㅎ - or k >= 0xD30C and 0x314D -- 파 => ㅍ - or k >= 0xD0C0 and 0x314C -- 타 => ㅌ - or k >= 0xCE74 and 0x314B -- 카 => ㅋ - or k >= 0xCC28 and 0x314A -- 차 => ㅊ - or k >= 0xC790 and 0x3148 -- 자 => ㅈ - or k >= 0xC544 and 0x3147 -- 아 => ㅇ - or k >= 0xC0AC and 0x3145 -- 사 => ㅅ - or k >= 0xBC14 and 0x3142 -- 바 => ㅂ - or k >= 0xB9C8 and 0x3141 -- 마 => ㅁ - or k >= 0xB77C and 0x3139 -- 라 => ㄹ - or k >= 0xB2E4 and 0x3137 -- 다 => ㄷ - or k >= 0xB098 and 0x3134 -- 나 => ㄴ - or k >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20 - or k -- can't happen - local t = { - -- category = "lo", - -- cjkwd = "w", - -- description = "", - -- direction = "l", - -- linebreak = "h2", - shcode = shcode, - unicodeslot = k, - } - local m = metatables[v] - if not m then - m = { __index = v } - metatables[v] = m - end - setmetatable(t,m) - return t -end - --~ setmetatable(data,{ __index = function(t,k) return "" end }) -- quite old, obsolete characters.blocks = allocate { @@ -760,6 +843,7 @@ setmetatable(ucchars, { __index = function(t,u) if u then local c = data[u] c = setmetatable(shchars, { __index = function(t,u) if u then local c = data[u] c = c and c.shcode c = c and utfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end } ) characters.specialchars = allocate() local specialchars = characters.specialchars -- lazy table +characters.descriptions = allocate() local descriptions = characters.descriptions -- lazy table setmetatable(specialchars, { __index = function(t,u) if u then @@ -788,6 +872,33 @@ setmetatable(specialchars, { __index = function(t,u) end end } ) +setmetatable(descriptions, { __index = function(t,k) + -- 0.05 - 0.10 sec + for u, c in next, data do + local d = c.description + if d then + d = gsub(d," ","") + d = lower(d) + t[d] = u + end + end + local d = rawget(t,k) + if not d then + t[k] = k + end + return d +end } ) + +function characters.unicodechar(asked) + local n = tonumber(asked) + if n then + return n + elseif type(asked) == "string" then + asked = gsub(asked," ","") + return descriptions[asked] + end +end + function characters.lower(str) local new, n = { }, 0 for u in utfvalues(str) do diff --git a/tex/context/base/cont-new.mkii b/tex/context/base/cont-new.mkii index 9dcdf261e..46e16e7c8 100644 --- a/tex/context/base/cont-new.mkii +++ b/tex/context/base/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2011.02.14 19:48} +\newcontextversion{2011.02.15 16:11} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 17d3a9959..57ce69dd0 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2011.02.14 19:48} +\newcontextversion{2011.02.15 16:11} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/context.mkii b/tex/context/base/context.mkii index 8f888a30c..e17d70eb5 100644 --- a/tex/context/base/context.mkii +++ b/tex/context/base/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2011.02.14 19:48} +\edef\contextversion{2011.02.15 16:11} %D For those who want to use this: diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index 2791cd0f0..0046482a5 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2011.02.14 19:48} +\edef\contextversion{2011.02.15 16:11} %D For those who want to use this: diff --git a/tex/context/base/node-mig.lua b/tex/context/base/node-mig.lua index e93887694..620cb3c0e 100644 --- a/tex/context/base/node-mig.lua +++ b/tex/context/base/node-mig.lua @@ -104,6 +104,8 @@ function nodes.handlers.migrate(head,where) end end +-- for the moment this way, this will disappear + experiments.register("marks.migrate", function(v) if v then tasks.enableaction("mvlbuilders", "nodes.handlers.migrate") diff --git a/tex/context/base/pack-rul.mkiv b/tex/context/base/pack-rul.mkiv index ccd3ebb83..b1aa05494 100644 --- a/tex/context/base/pack-rul.mkiv +++ b/tex/context/base/pack-rul.mkiv @@ -3374,74 +3374,6 @@ %D %D \typebuffer -% \unexpanded\def\defineframedcontent -% {\dodoubleempty\dodefineframedcontent} -% -% \def\dodefineframedcontent[#1][#2]% -% {\presetlocalframed[\??fc#1]% -% \getparameters[\??fc#1] -% [\c!leftoffset=\zeropoint, -% \c!rightoffset=\getvalue{\??fc#1\c!leftoffset}, -% \c!topoffset=\zeropoint, -% \c!bottomoffset=\getvalue{\??fc#1\c!topoffset}, -% \c!strut=\v!no, -% \c!offset=\v!overlay, -% \c!linecorrection=\v!no, -% \c!left=, -% \c!right=, -% #2]} -% -% \unexpanded\def\setupframedcontent -% {\dodoubleempty\dosetupframedcontent} -% -% \def\dosetupframedcontent[#1][#2]% -% {\def\docommand##1{\getparameters[\??fc##1][#2]}% -% \processcommacommand[#1]\docommand} -% -% \unexpanded\def\startframedcontent[#1]% -% {\bgroup -% \doifelse{#1}\v!off -% {\let\stopframedcontent\egroup} -% {\ifcsname\??fc#1\c!frame\endcsname -% \dostartframedcontent{#1}% -% \fi}} -% -% \def\dostartframedcontent#1% -% {\unexpanded\def\stopframedcontent{\dostopframedcontent{#1}}% -% \setbox\framebox\hbox\bgroup -% \setlocalhsize -% \hsize\localhsize -% \advance\hsize\dimexpr-\getvalue{\??fc#1\c!leftoffset}-\getvalue{\??fc#1\c!rightoffset} \relax -% \advance\vsize\dimexpr-\getvalue{\??fc#1\c!topoffset} -\getvalue{\??fc#1\c!bottomoffset}\relax -% \hskip\getvalue{\??fc#1\c!leftoffset}% -% \vbox\bgroup -% \vskip\getvalue{\??fc#1\c!topoffset}% -% \vbox\bgroup -% \forgetall -% \blank[\v!disable]} -% -% \def\dostopframedcontent#1% -% {\removelastskip -% \egroup -% \vskip\getvalue{\??fc#1\c!bottomoffset}% -% \egroup -% \hskip\getvalue{\??fc#1\c!rightoffset}% -% \egroup -% \doifvalue{\??fc#1\c!width}\v!fit -% {\letvalue{\??fc#1\c!width}\v!fixed}% no shapebox -% \ifinsidefloat -% \donefalse -% \else -% \doifelsevalue{\??fc#1\c!linecorrection}\v!yes\donetrue\donefalse -% \fi -% % plaats ? -% \ifdone\startlinecorrection\fi -% \getvalue{\??fc#1\c!left}% new -% \localframed[\??fc#1]{\box\framebox}% -% \getvalue{\??fc#1\c!right}% new -% \ifdone\stoplinecorrection\fi -% \egroup} - \def\framedcontentparameter #1{\csname\doframedcontentparameter{\??fc\@@framedcontent}#1\endcsname} \def\doframedcontentparameter #1#2{\ifcsname#1#2\endcsname#1#2\else\expandafter\doframedcontentparentparameter\csname#1\s!parent\endcsname#2\fi} \def\doframedcontentparentparameter#1#2{\ifx#1\relax\s!empty\else\doframedcontentparameter#1#2\fi} diff --git a/tex/context/base/page-mar.mkiv b/tex/context/base/page-mar.mkiv index 0868d62ad..1ae76f2cd 100644 --- a/tex/context/base/page-mar.mkiv +++ b/tex/context/base/page-mar.mkiv @@ -271,8 +271,10 @@ \v!no=>\let\margincontentalign\v!normal, \v!inner=>\let\margincontentalign#1, \v!outer=>\let\margincontentalign#2, + \v!flushright=>\let\margincontentalign\v!left, \v!left=>\let\margincontentalign\v!left, \v!middle=>\let\margincontentalign\v!middle, + \v!flushleft=>\let\margincontentalign\v!right, \v!right=>\let\margincontentalign\v!right]% \doifvaluesomething{\??im\margincontenttag\c!align} % watch {} around set {\edef\margincontentalign{{\getvalue{\??im\margincontenttag\c!align},\margincontentalign}}}% diff --git a/tex/context/base/spac-ver.mkiv b/tex/context/base/spac-ver.mkiv index 1da6d6d7c..9690c9638 100644 --- a/tex/context/base/spac-ver.mkiv +++ b/tex/context/base/spac-ver.mkiv @@ -1788,7 +1788,7 @@ % used both -\def\doinhibitblank{\vspacing[\v!disable]} % can be made faster +\def\doinhibitblank{\vspacing[\v!disable]} % can be made faster (keep this command, used in styles) \def\inhibitblank {\vspacing[\v!disable]} % can be made faster \let\setupblank \setupvspacing diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index e9fde1cd5..f818f6229 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/task-ini.lua b/tex/context/base/task-ini.lua index ca7023569..d3f58b5b4 100644 --- a/tex/context/base/task-ini.lua +++ b/tex/context/base/task-ini.lua @@ -27,7 +27,7 @@ tasks.appendaction("processors", "words", "languages.words.check") tasks.appendaction("processors", "fonts", "builders.paragraphs.solutions.splitters.split") -- experimental tasks.appendaction("processors", "fonts", "nodes.handlers.characters") -- maybe todo -tasks.appendaction("processors", "fonts", "nodes.injections.handler") -- maybe todo +tasks.appendaction("processors", "fonts", "nodes.injections.handler") -- maybe todo tasks.appendaction("processors", "fonts", "nodes.handlers.protectglyphs", nil, "nohead") -- maybe todo tasks.appendaction("processors", "fonts", "builders.kernel.ligaturing") -- always on tasks.appendaction("processors", "fonts", "builders.kernel.kerning") -- always on @@ -70,9 +70,9 @@ tasks.appendaction("finalizers", "fonts", "builders.paragraphs.solutions -- rather new tasks.appendaction("mvlbuilders", "normalizers", "nodes.handlers.migrate") -- -tasks.appendaction("mvlbuilders", "normalizers", "builders.vspacing.pagehandler") -- last ! +tasks.appendaction("mvlbuilders", "normalizers", "builders.vspacing.pagehandler") -- last ! -tasks.appendaction("vboxbuilders", "normalizers", "builders.vspacing.vboxhandler") -- +tasks.appendaction("vboxbuilders", "normalizers", "builders.vspacing.vboxhandler") -- -- speedup: only kick in when used diff --git a/tex/context/base/unic-ini.lua b/tex/context/base/unic-ini.lua new file mode 100644 index 000000000..cca1f0617 --- /dev/null +++ b/tex/context/base/unic-ini.lua @@ -0,0 +1,19 @@ +if not modules then modules = { } end modules ['unic-ini'] = { + version = 1.001, + comment = "companion to unic-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local utfchar = utf.char + +-- Beware, initializing unicodechar happens at first usage and takes +-- 0.05 -- 0.1 second (lots of function calls). + +function commands.unicodechar(asked) + local n = characters.unicodechar(asked) + if n then + context(utfchar(n)) + end +end diff --git a/tex/context/base/unic-ini.mkiv b/tex/context/base/unic-ini.mkiv index 0b7f19153..62fd82242 100644 --- a/tex/context/base/unic-ini.mkiv +++ b/tex/context/base/unic-ini.mkiv @@ -13,15 +13,27 @@ \writestatus{loading}{ConTeXt Unicode Support / Initialization} +\registerctxluafile{unic-ini}{1.001} + \unprotect \let\keeputfcharacters\relax % used in xtag \newconstant\utfunicodetracer % used in xtag -\def\unicodechar #1{\char\numexpr#1\relax} % no lookahead +%def\unicodechar #1{\char\numexpr#1\relax} % no lookahead \def\unicodenumber #1{\the \numexpr#1\relax} % no lookahead \def\unicodehexnumber#1{\cldcontext{number.toevenhex(\number#1))}} +%D Better is: +%D +%D \startbuffer +%D \unicodechar{left square bracket}okay\unicodechar{right square bracket} +%D \stopbuffer +%D +%D \typebuffer \getbuffer + +\def\unicodechar #1{\ctxcommand{unicodechar("#1")}} + \unexpanded\def\unknownchar{{\hbox{\vrule\!!width.5em\!!height1ex\!!depth\zeropoint}}} \ifx\zwnbsp\undefined \let\zwnbsp\relax \fi % zerowidthnonbreakablespace diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua index bc825a306..dc44eecfb 100644 --- a/tex/generic/context/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 02/14/11 19:48:13 +-- merge date : 02/15/11 16:11:51 do -- begin closure to overcome local limits and interference -- cgit v1.2.3