diff options
author | Hans Hagen <pragma@wxs.nl> | 2011-02-15 16:11:00 +0100 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2011-02-15 16:11:00 +0100 |
commit | d2f628a96f94ce8e83ac33f461e091f3f56f62fb (patch) | |
tree | a97f36d61ed3382278aea0729c22ed245d21b5e7 /tex/context/base/char-ini.lua | |
parent | 84b147aa9d550cbe549d68b00143201f4b68dddc (diff) | |
download | context-d2f628a96f94ce8e83ac33f461e091f3f56f62fb.tar.gz |
beta 2011.02.15 16:11
Diffstat (limited to 'tex/context/base/char-ini.lua')
-rw-r--r-- | tex/context/base/char-ini.lua | 233 |
1 files changed, 172 insertions, 61 deletions
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index 798e79e18..3f7750d86 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -6,12 +6,14 @@ if not modules then modules = { } end modules ['char-ini'] = { license = "see context related readme files" } +-- todo: make two files, one for format generation, one for format use + local tex = tex local utf = unicode.utf8 local utfchar, utfbyte, utfvalues = utf.char, utf.byte, string.utfvalues local concat, unpack = table.concat, table.unpack -local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset +local next, tonumber, type, rawget, rawset, setmetatable = next, tonumber, type, rawget, rawset, setmetatable local texsprint, texprint = tex.sprint, tex.print local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch local texsetlccode, texsetuccode, texsetsfcode, texsetcatcode = tex.setlccode, tex.setuccode, tex.setsfcode, tex.setcatcode @@ -47,21 +49,6 @@ else os.exit() end -if not characters.ranges then - local ranges, r = allocate { }, 0 - characters.ranges = ranges - for k, v in next, data do - if v.range then - r = r + 1 - ranges[r] = v - end - end -end - -storage.register("characters/ranges",characters.ranges,"characters.ranges") - -local ranges = characters.ranges - --[[ldx-- <p>This converts a string (if given) into a number.</p> --ldx]]-- @@ -105,7 +92,144 @@ local private = { description = "PRIVATE SLOT", } -local extenders = { } +-- Hangul Syllable + +local hangul_syllable_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "<Hangul Syllable>", + direction = "l", + linebreak = "h2", + } +} + +local hangul_syllable_extender = function(k,v) + local shcode = -- for the moment we misuse the shcode .. in fact we should have the components + k < 0xAC00 and k -- original + or k > 0xD7AF and k -- original + or k >= 0xD558 and 0x314E -- 하 => ㅎ + or k >= 0xD30C and 0x314D -- 파 => ㅍ + or k >= 0xD0C0 and 0x314C -- 타 => ㅌ + or k >= 0xCE74 and 0x314B -- 카 => ㅋ + or k >= 0xCC28 and 0x314A -- 차 => ㅊ + or k >= 0xC790 and 0x3148 -- 자 => ㅈ + or k >= 0xC544 and 0x3147 -- 아 => ㅇ + or k >= 0xC0AC and 0x3145 -- 사 => ㅅ + or k >= 0xBC14 and 0x3142 -- 바 => ㅂ + or k >= 0xB9C8 and 0x3141 -- 마 => ㅁ + or k >= 0xB77C and 0x3139 -- 라 => ㄹ + or k >= 0xB2E4 and 0x3137 -- 다 => ㄷ + or k >= 0xB098 and 0x3134 -- 나 => ㄴ + or k >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20 + or k -- can't happen + local t = { + shcode = shcode, + unicodeslot = k, + } + setmetatable(t,hangul_syllable_metatable) + return t +end + +local hangul_syllable_range = { + first = 0xAC00, + last = 0xD7A3, + extender = hangul_syllable_extender, +} + +setmetatable(hangul_syllable_range, hangul_syllable_metatable) + +-- CJK Ideograph + +local cjk_ideograph_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "<CJK Ideograph>", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_metatable) + return t +end + +local cjk_ideograph_range = { + first = 0x4E00, + last = 0x9FBB, + extender = cjk_ideograph_extender, +} + +-- CJK Ideograph Extension A + +local cjk_ideograph_extension_a_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "<CJK Ideograph Extension A>", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extension_a_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_extension_a_metatable) + return t +end + +local cjk_ideograph_extension_a_range = { + first = 0x3400, + last = 0x4DB5, + extender = cjk_ideograph_extension_a_extender, +} + +-- CJK Ideograph Extension B + +local cjk_ideograph_extension_b_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "<CJK Ideograph Extension B>", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extension_b_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_extension_b_metatable) + return t +end + +local cjk_ideograph_extension_b_range = { + first = 0x20000, + last = 0x2A6D6, + extender = cjk_ideograph_extension_b_extender, +} + +-- Ranges + +local ranges = { + hangul_syllable_range, + cjk_ideograph_range, + cjk_ideograph_extension_a_range, + cjk_ideograph_extension_b_range, +} + +-- setmetatablekey(data, "__index", function(t,k) if type(k) == "string" then @@ -123,11 +247,9 @@ setmetatablekey(data, "__index", function(t,k) end if k < 0xF0000 then for r=1,#ranges do - local rr = ranges[r].range - local first, last = rr.first, rr.last - if k >= first and k <= last then - local v = t[first] - local extender = extenders[v.description] + local rr = ranges[r] + if k >= rr.first and k <= rr.last then + local extender = rr.extender if extender then v = extender(k,v) end @@ -139,45 +261,6 @@ setmetatablekey(data, "__index", function(t,k) return private -- handy for when we loop over characters in fonts and check for a property end ) -local metatables = { } - -extenders["<Hangul Syllable>"] = function(k,v) - local shcode = -- for the moment we misuse the shcode .. in fact we should have the components - k < 0xAC00 and k -- original - or k > 0xD7AF and k -- original - or k >= 0xD558 and 0x314E -- 하 => ㅎ - or k >= 0xD30C and 0x314D -- 파 => ㅍ - or k >= 0xD0C0 and 0x314C -- 타 => ㅌ - or k >= 0xCE74 and 0x314B -- 카 => ㅋ - or k >= 0xCC28 and 0x314A -- 차 => ㅊ - or k >= 0xC790 and 0x3148 -- 자 => ㅈ - or k >= 0xC544 and 0x3147 -- 아 => ㅇ - or k >= 0xC0AC and 0x3145 -- 사 => ㅅ - or k >= 0xBC14 and 0x3142 -- 바 => ㅂ - or k >= 0xB9C8 and 0x3141 -- 마 => ㅁ - or k >= 0xB77C and 0x3139 -- 라 => ㄹ - or k >= 0xB2E4 and 0x3137 -- 다 => ㄷ - or k >= 0xB098 and 0x3134 -- 나 => ㄴ - or k >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20 - or k -- can't happen - local t = { - -- category = "lo", - -- cjkwd = "w", - -- description = "<Hangul Syllable>", - -- direction = "l", - -- linebreak = "h2", - shcode = shcode, - unicodeslot = k, - } - local m = metatables[v] - if not m then - m = { __index = v } - metatables[v] = m - end - setmetatable(t,m) - return t -end - --~ setmetatable(data,{ __index = function(t,k) return "" end }) -- quite old, obsolete characters.blocks = allocate { @@ -760,6 +843,7 @@ setmetatable(ucchars, { __index = function(t,u) if u then local c = data[u] c = setmetatable(shchars, { __index = function(t,u) if u then local c = data[u] c = c and c.shcode c = c and utfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end } ) characters.specialchars = allocate() local specialchars = characters.specialchars -- lazy table +characters.descriptions = allocate() local descriptions = characters.descriptions -- lazy table setmetatable(specialchars, { __index = function(t,u) if u then @@ -788,6 +872,33 @@ setmetatable(specialchars, { __index = function(t,u) end end } ) +setmetatable(descriptions, { __index = function(t,k) + -- 0.05 - 0.10 sec + for u, c in next, data do + local d = c.description + if d then + d = gsub(d," ","") + d = lower(d) + t[d] = u + end + end + local d = rawget(t,k) + if not d then + t[k] = k + end + return d +end } ) + +function characters.unicodechar(asked) + local n = tonumber(asked) + if n then + return n + elseif type(asked) == "string" then + asked = gsub(asked," ","") + return descriptions[asked] + end +end + function characters.lower(str) local new, n = { }, 0 for u in utfvalues(str) do |