summaryrefslogtreecommitdiff
path: root/tex/context/base/char-ini.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/char-ini.lua')
-rw-r--r--tex/context/base/char-ini.lua233
1 files changed, 172 insertions, 61 deletions
diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua
index 798e79e18..3f7750d86 100644
--- a/tex/context/base/char-ini.lua
+++ b/tex/context/base/char-ini.lua
@@ -6,12 +6,14 @@ if not modules then modules = { } end modules ['char-ini'] = {
license = "see context related readme files"
}
+-- todo: make two files, one for format generation, one for format use
+
local tex = tex
local utf = unicode.utf8
local utfchar, utfbyte, utfvalues = utf.char, utf.byte, string.utfvalues
local concat, unpack = table.concat, table.unpack
-local next, tonumber, type, rawget, rawset = next, tonumber, type, rawget, rawset
+local next, tonumber, type, rawget, rawset, setmetatable = next, tonumber, type, rawget, rawset, setmetatable
local texsprint, texprint = tex.sprint, tex.print
local format, lower, gsub, match, gmatch = string.format, string.lower, string.gsub, string.match, string.match, string.gmatch
local texsetlccode, texsetuccode, texsetsfcode, texsetcatcode = tex.setlccode, tex.setuccode, tex.setsfcode, tex.setcatcode
@@ -47,21 +49,6 @@ else
os.exit()
end
-if not characters.ranges then
- local ranges, r = allocate { }, 0
- characters.ranges = ranges
- for k, v in next, data do
- if v.range then
- r = r + 1
- ranges[r] = v
- end
- end
-end
-
-storage.register("characters/ranges",characters.ranges,"characters.ranges")
-
-local ranges = characters.ranges
-
--[[ldx--
<p>This converts a string (if given) into a number.</p>
--ldx]]--
@@ -105,7 +92,144 @@ local private = {
description = "PRIVATE SLOT",
}
-local extenders = { }
+-- Hangul Syllable
+
+local hangul_syllable_metatable = {
+ __index = {
+ category = "lo",
+ cjkwd = "w",
+ description = "<Hangul Syllable>",
+ direction = "l",
+ linebreak = "h2",
+ }
+}
+
+local hangul_syllable_extender = function(k,v)
+ local shcode = -- for the moment we misuse the shcode .. in fact we should have the components
+ k < 0xAC00 and k -- original
+ or k > 0xD7AF and k -- original
+ or k >= 0xD558 and 0x314E -- 하 => ㅎ
+ or k >= 0xD30C and 0x314D -- 파 => ㅍ
+ or k >= 0xD0C0 and 0x314C -- 타 => ㅌ
+ or k >= 0xCE74 and 0x314B -- 카 => ㅋ
+ or k >= 0xCC28 and 0x314A -- 차 => ㅊ
+ or k >= 0xC790 and 0x3148 -- 자 => ㅈ
+ or k >= 0xC544 and 0x3147 -- 아 => ㅇ
+ or k >= 0xC0AC and 0x3145 -- 사 => ㅅ
+ or k >= 0xBC14 and 0x3142 -- 바 => ㅂ
+ or k >= 0xB9C8 and 0x3141 -- 마 => ㅁ
+ or k >= 0xB77C and 0x3139 -- 라 => ㄹ
+ or k >= 0xB2E4 and 0x3137 -- 다 => ㄷ
+ or k >= 0xB098 and 0x3134 -- 나 => ㄴ
+ or k >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20
+ or k -- can't happen
+ local t = {
+ shcode = shcode,
+ unicodeslot = k,
+ }
+ setmetatable(t,hangul_syllable_metatable)
+ return t
+end
+
+local hangul_syllable_range = {
+ first = 0xAC00,
+ last = 0xD7A3,
+ extender = hangul_syllable_extender,
+}
+
+setmetatable(hangul_syllable_range, hangul_syllable_metatable)
+
+-- CJK Ideograph
+
+local cjk_ideograph_metatable = {
+ __index = {
+ category = "lo",
+ cjkwd = "w",
+ description = "<CJK Ideograph>",
+ direction = "l",
+ linebreak = "id",
+ }
+}
+
+local cjk_ideograph_extender = function(k,v)
+ local t = {
+ -- shcode = shcode,
+ unicodeslot = k,
+ }
+ setmetatable(t,cjk_ideograph_metatable)
+ return t
+end
+
+local cjk_ideograph_range = {
+ first = 0x4E00,
+ last = 0x9FBB,
+ extender = cjk_ideograph_extender,
+}
+
+-- CJK Ideograph Extension A
+
+local cjk_ideograph_extension_a_metatable = {
+ __index = {
+ category = "lo",
+ cjkwd = "w",
+ description = "<CJK Ideograph Extension A>",
+ direction = "l",
+ linebreak = "id",
+ }
+}
+
+local cjk_ideograph_extension_a_extender = function(k,v)
+ local t = {
+ -- shcode = shcode,
+ unicodeslot = k,
+ }
+ setmetatable(t,cjk_ideograph_extension_a_metatable)
+ return t
+end
+
+local cjk_ideograph_extension_a_range = {
+ first = 0x3400,
+ last = 0x4DB5,
+ extender = cjk_ideograph_extension_a_extender,
+}
+
+-- CJK Ideograph Extension B
+
+local cjk_ideograph_extension_b_metatable = {
+ __index = {
+ category = "lo",
+ cjkwd = "w",
+ description = "<CJK Ideograph Extension B>",
+ direction = "l",
+ linebreak = "id",
+ }
+}
+
+local cjk_ideograph_extension_b_extender = function(k,v)
+ local t = {
+ -- shcode = shcode,
+ unicodeslot = k,
+ }
+ setmetatable(t,cjk_ideograph_extension_b_metatable)
+ return t
+end
+
+local cjk_ideograph_extension_b_range = {
+ first = 0x20000,
+ last = 0x2A6D6,
+ extender = cjk_ideograph_extension_b_extender,
+}
+
+-- Ranges
+
+local ranges = {
+ hangul_syllable_range,
+ cjk_ideograph_range,
+ cjk_ideograph_extension_a_range,
+ cjk_ideograph_extension_b_range,
+}
+
+--
setmetatablekey(data, "__index", function(t,k)
if type(k) == "string" then
@@ -123,11 +247,9 @@ setmetatablekey(data, "__index", function(t,k)
end
if k < 0xF0000 then
for r=1,#ranges do
- local rr = ranges[r].range
- local first, last = rr.first, rr.last
- if k >= first and k <= last then
- local v = t[first]
- local extender = extenders[v.description]
+ local rr = ranges[r]
+ if k >= rr.first and k <= rr.last then
+ local extender = rr.extender
if extender then
v = extender(k,v)
end
@@ -139,45 +261,6 @@ setmetatablekey(data, "__index", function(t,k)
return private -- handy for when we loop over characters in fonts and check for a property
end )
-local metatables = { }
-
-extenders["<Hangul Syllable>"] = function(k,v)
- local shcode = -- for the moment we misuse the shcode .. in fact we should have the components
- k < 0xAC00 and k -- original
- or k > 0xD7AF and k -- original
- or k >= 0xD558 and 0x314E -- 하 => ㅎ
- or k >= 0xD30C and 0x314D -- 파 => ㅍ
- or k >= 0xD0C0 and 0x314C -- 타 => ㅌ
- or k >= 0xCE74 and 0x314B -- 카 => ㅋ
- or k >= 0xCC28 and 0x314A -- 차 => ㅊ
- or k >= 0xC790 and 0x3148 -- 자 => ㅈ
- or k >= 0xC544 and 0x3147 -- 아 => ㅇ
- or k >= 0xC0AC and 0x3145 -- 사 => ㅅ
- or k >= 0xBC14 and 0x3142 -- 바 => ㅂ
- or k >= 0xB9C8 and 0x3141 -- 마 => ㅁ
- or k >= 0xB77C and 0x3139 -- 라 => ㄹ
- or k >= 0xB2E4 and 0x3137 -- 다 => ㄷ
- or k >= 0xB098 and 0x3134 -- 나 => ㄴ
- or k >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20
- or k -- can't happen
- local t = {
- -- category = "lo",
- -- cjkwd = "w",
- -- description = "<Hangul Syllable>",
- -- direction = "l",
- -- linebreak = "h2",
- shcode = shcode,
- unicodeslot = k,
- }
- local m = metatables[v]
- if not m then
- m = { __index = v }
- metatables[v] = m
- end
- setmetatable(t,m)
- return t
-end
-
--~ setmetatable(data,{ __index = function(t,k) return "" end }) -- quite old, obsolete
characters.blocks = allocate {
@@ -760,6 +843,7 @@ setmetatable(ucchars, { __index = function(t,u) if u then local c = data[u] c =
setmetatable(shchars, { __index = function(t,u) if u then local c = data[u] c = c and c.shcode c = c and utfstring(c) or (type(u) == "number" and utfchar(u)) or u t[u] = c return c end end } )
characters.specialchars = allocate() local specialchars = characters.specialchars -- lazy table
+characters.descriptions = allocate() local descriptions = characters.descriptions -- lazy table
setmetatable(specialchars, { __index = function(t,u)
if u then
@@ -788,6 +872,33 @@ setmetatable(specialchars, { __index = function(t,u)
end
end } )
+setmetatable(descriptions, { __index = function(t,k)
+ -- 0.05 - 0.10 sec
+ for u, c in next, data do
+ local d = c.description
+ if d then
+ d = gsub(d," ","")
+ d = lower(d)
+ t[d] = u
+ end
+ end
+ local d = rawget(t,k)
+ if not d then
+ t[k] = k
+ end
+ return d
+end } )
+
+function characters.unicodechar(asked)
+ local n = tonumber(asked)
+ if n then
+ return n
+ elseif type(asked) == "string" then
+ asked = gsub(asked," ","")
+ return descriptions[asked]
+ end
+end
+
function characters.lower(str)
local new, n = { }, 0
for u in utfvalues(str) do