From c4b239b7d8f3e478e14d17ef0d9127c596f99210 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Wed, 16 Feb 2011 19:30:00 +0100 Subject: beta 2011.02.16 19:30 --- tex/context/base/char-cjk.lua | 299 ++++++++++++++++++++++++++++++++++++++ tex/context/base/char-ini.lua | 186 +----------------------- tex/context/base/char-ini.mkiv | 1 + tex/context/base/status-files.pdf | Bin 23246 -> 23234 bytes 4 files changed, 302 insertions(+), 184 deletions(-) create mode 100644 tex/context/base/char-cjk.lua diff --git a/tex/context/base/char-cjk.lua b/tex/context/base/char-cjk.lua new file mode 100644 index 000000000..bce1df615 --- /dev/null +++ b/tex/context/base/char-cjk.lua @@ -0,0 +1,299 @@ +if not modules then modules = { } end modules ['char-cjk'] = { + version = 1.001, + comment = "companion to char-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local setmetatable = setmetatable +local insert = table.insert +local floor = math.floor +local format = string.format +local utfchar = utf.char + +local ranges = characters.ranges + +-- Hangul Syllable + +-- The following conversion is taken from unicode.org/reports/tr15/tr15-23.html#Hangul +-- but adapted to our needs + +local SBase = 0xAC00 + +local LBase, LCount = 0x1100, 19 +local VBase, VCount = 0x1161, 21 +local TBase, TCount = 0x11A7, 28 + +local NCount = VCount * TCount +local SCount = LCount * NCount + +local L_TABLE = { [0] = + "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", + "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H" +} + +local V_TABLE = { [0] = + "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", + "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI", + "YU", "EU", "YI", "I" +} + +local T_TABLE = { [0] = + "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", + "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", + "S", "SS", "NG", "J", "C", "K", "T", "P", "H" +} + + +local remapped = { -- this will be merged into char-def.lua + [0x1100] = 0x3131, -- G + [0x1101] = 0x3132, -- GG + [0x1102] = 0x3134, -- N + [0x1103] = 0x3137, -- D + [0x1104] = 0x3138, -- DD + [0x1105] = 0x3139, -- R + [0X111A] = 0x3140, -- + [0x1106] = 0x3141, -- M + [0x1107] = 0x3142, -- B + [0x1108] = 0x3143, -- BB + [0x1121] = 0x3144, + [0x1109] = 0x3145, -- S + [0x110A] = 0x3146, -- SS + [0x110B] = 0x3147, -- (IEUNG) + [0x110C] = 0x3148, -- J + [0x110D] = 0x3149, -- JJ + [0x110E] = 0x314A, -- C + [0x110F] = 0x314B, -- K + [0x1110] = 0x314C, -- T + [0x1111] = 0x314D, -- P + [0x1112] = 0x314E, -- H + + [0x1161] = 0x314F, -- A + [0x1162] = 0x3150, -- AE + [0x1163] = 0x3151, -- YA + [0x1164] = 0x3152, -- YAE + [0x1165] = 0x3153, -- EO + [0x1166] = 0x3154, -- E + [0x1167] = 0x3155, -- YEO + [0x1168] = 0x3156, -- YE + [0x1169] = 0x3157, -- O + [0x116A] = 0x3158, -- WA + [0x116B] = 0x3159, -- WAE + [0x116C] = 0x315A, -- OE + [0x116D] = 0x315B, -- YO + [0x116E] = 0x315C, -- U + [0x116F] = 0x315D, -- WEO + [0x1170] = 0x315E, -- WE + [0x1171] = 0x315F, -- WI + [0x1172] = 0x3160, -- YU + [0x1173] = 0x3161, -- EU + [0x1174] = 0x3162, -- YI + [0x1175] = 0x3163, -- I + + [0x11A7] = 0x3131, -- G + [0x11A8] = 0x3132, -- GG + -- [0x11A9] = 0x0000, -- GS + [0x11AA] = 0x3134, -- N + -- [0x11AB] = 0x0000, -- NJ + -- [0x11AC] = 0x0000, -- NH + [0x11AD] = 0x3137, -- D + -- [0x11AE] = 0x0000, -- L + -- [0x11AF] = 0x0000, -- LG + -- [0x11B0] = 0x0000, -- LM + -- [0x11B1] = 0x0000, -- LB + -- [0x11B2] = 0x0000, -- LS + -- [0x11B3] = 0x0000, -- LT + -- [0x11B4] = 0x0000, -- LP + -- [0x11B5] = 0x0000, -- LH + [0x11B6] = 0x3141, -- M + [0x11B7] = 0x3142, -- B + -- [0x11B8] = 0x0000, -- BS + [0x11B9] = 0x3145, -- S + -- [0x11BA] = 0x0000, -- SS + -- [0x11BB] = 0x0000, -- NG + [0x11BC] = 0x3148, -- J + [0x11BD] = 0x314A, -- C + [0x11BE] = 0x314B, -- K + [0x11BF] = 0x314C, -- T + [0x11C0] = 0x314D, -- P + [0x11C1] = 0x314E, -- H +} + +local function decomposed(unicode) + local SIndex = unicode - SBase + if SIndex >= 0 and SIndex < SCount then + local L = LBase + floor(SIndex / NCount) + local V = VBase + floor((SIndex % NCount) / TCount) + local T = TBase + SIndex % TCount + if T ~= TBase then + return L, V, T + else + return L, V + end + end +end + +local function description(unicode) + local SIndex = unicode - SBase + if SIndex >= 0 and SIndex < SCount then + local LIndex = floor(SIndex / NCount) + local VIndex = floor((SIndex % NCount) / TCount) + local TIndex = SIndex % TCount + return format("HANGUL SYLLABLE %s%s%s",L_TABLE[LIndex],V_TABLE[VIndex],T_TABLE[TIndex]) + end +end + +characters.hangul = { + decomposed = decomposed, + description = description, + remapped = remapped, +} + +-- so far + +local hangul_syllable_basetable = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "h2", +} + +local hangul_syllable_metatable = { + __index = function(t,k) + local u = t.unicodeslot + if k == "fscode" then + local fscode = -- firstsplitcode + u < 0xAC00 and nil -- original + or u > 0xD7AF and nil -- original + or u >= 0xD558 and 0x314E -- 하 => ㅎ + or u >= 0xD30C and 0x314D -- 파 => ㅍ + or u >= 0xD0C0 and 0x314C -- 타 => ㅌ + or u >= 0xCE74 and 0x314B -- 카 => ㅋ + or u >= 0xCC28 and 0x314A -- 차 => ㅊ + or u >= 0xC790 and 0x3148 -- 자 => ㅈ + or u >= 0xC544 and 0x3147 -- 아 => ㅇ + or u >= 0xC0AC and 0x3145 -- 사 => ㅅ + or u >= 0xBC14 and 0x3142 -- 바 => ㅂ + or u >= 0xB9C8 and 0x3141 -- 마 => ㅁ + or u >= 0xB77C and 0x3139 -- 라 => ㄹ + or u >= 0xB2E4 and 0x3137 -- 다 => ㄷ + or u >= 0xB098 and 0x3134 -- 나 => ㄴ + or u >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20 + or nil -- can't happen + t[k] = fscode + return fscode + elseif k == "specials" then + return { "char", decomposed(u) } + elseif k == "description" then + return description(u) + else + return hangul_syllable_basetable[k]-- no store + end + end +} + +local hangul_syllable_extender = function(k,v) + local t = { + unicodeslot = k, + } + setmetatable(t,hangul_syllable_metatable) + return t +end + +local hangul_syllable_range = { + first = 0xAC00, + last = 0xD7A3, + extender = hangul_syllable_extender, +} + +setmetatable(hangul_syllable_range, hangul_syllable_metatable) + +-- CJK Ideograph + +local cjk_ideograph_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_metatable) + return t +end + +local cjk_ideograph_range = { + first = 0x4E00, + last = 0x9FBB, + extender = cjk_ideograph_extender, +} + +-- CJK Ideograph Extension A + +local cjk_ideograph_extension_a_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extension_a_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_extension_a_metatable) + return t +end + +local cjk_ideograph_extension_a_range = { + first = 0x3400, + last = 0x4DB5, + extender = cjk_ideograph_extension_a_extender, +} + +-- CJK Ideograph Extension B + +local cjk_ideograph_extension_b_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extension_b_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_extension_b_metatable) + return t +end + +local cjk_ideograph_extension_b_range = { + first = 0x20000, + last = 0x2A6D6, + extender = cjk_ideograph_extension_b_extender, +} + +-- Ranges + +insert(ranges, hangul_syllable_range) +insert(ranges, cjk_ideograph_range) +insert(ranges, cjk_ideograph_extension_a_range) +insert(ranges, cjk_ideograph_extension_b_range) diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index 0f26d8f5b..6b6900ecd 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -92,190 +92,8 @@ local private = { description = "PRIVATE SLOT", } --- Hangul Syllable - ---~ local hangul_syllable_metatable = { ---~ __index = { ---~ category = "lo", ---~ cjkwd = "w", ---~ description = "", ---~ direction = "l", ---~ linebreak = "h2", ---~ } ---~ } - ---~ local hangul_syllable_extender = function(k,v) ---~ local fscode = -- firstsplitcode ---~ k < 0xAC00 and k -- original ---~ or k > 0xD7AF and k -- original ---~ or k >= 0xD558 and 0x314E -- 하 => ㅎ ---~ or k >= 0xD30C and 0x314D -- 파 => ㅍ ---~ or k >= 0xD0C0 and 0x314C -- 타 => ㅌ ---~ or k >= 0xCE74 and 0x314B -- 카 => ㅋ ---~ or k >= 0xCC28 and 0x314A -- 차 => ㅊ ---~ or k >= 0xC790 and 0x3148 -- 자 => ㅈ ---~ or k >= 0xC544 and 0x3147 -- 아 => ㅇ ---~ or k >= 0xC0AC and 0x3145 -- 사 => ㅅ ---~ or k >= 0xBC14 and 0x3142 -- 바 => ㅂ ---~ or k >= 0xB9C8 and 0x3141 -- 마 => ㅁ ---~ or k >= 0xB77C and 0x3139 -- 라 => ㄹ ---~ or k >= 0xB2E4 and 0x3137 -- 다 => ㄷ ---~ or k >= 0xB098 and 0x3134 -- 나 => ㄴ ---~ or k >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20 ---~ or k -- can't happen ---~ local t = { ---~ fscode = fscode, ---~ unicodeslot = k, ---~ } ---~ setmetatable(t,hangul_syllable_metatable) ---~ return t ---~ end - -local hangul_syllable_basetable = { - category = "lo", - cjkwd = "w", - description = "", - direction = "l", - linebreak = "h2", -} - -local hangul_syllable_metatable = { - __index = function(t,k) - if k == "fscode" then - local u = t.unicodeslot - local fscode = -- firstsplitcode - u < 0xAC00 and nil -- original - or u > 0xD7AF and nil -- original - or u >= 0xD558 and 0x314E -- 하 => ㅎ - or u >= 0xD30C and 0x314D -- 파 => ㅍ - or u >= 0xD0C0 and 0x314C -- 타 => ㅌ - or u >= 0xCE74 and 0x314B -- 카 => ㅋ - or u >= 0xCC28 and 0x314A -- 차 => ㅊ - or u >= 0xC790 and 0x3148 -- 자 => ㅈ - or u >= 0xC544 and 0x3147 -- 아 => ㅇ - or u >= 0xC0AC and 0x3145 -- 사 => ㅅ - or u >= 0xBC14 and 0x3142 -- 바 => ㅂ - or u >= 0xB9C8 and 0x3141 -- 마 => ㅁ - or u >= 0xB77C and 0x3139 -- 라 => ㄹ - or u >= 0xB2E4 and 0x3137 -- 다 => ㄷ - or u >= 0xB098 and 0x3134 -- 나 => ㄴ - or u >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20 - or nil -- can't happen - t[k] = fscode - return fscode - else - return hangul_syllable_basetable[k]-- no store - end - end -} - -local hangul_syllable_extender = function(k,v) - local t = { - unicodeslot = k, - } - setmetatable(t,hangul_syllable_metatable) - return t -end - -local hangul_syllable_range = { - first = 0xAC00, - last = 0xD7A3, - extender = hangul_syllable_extender, -} - -setmetatable(hangul_syllable_range, hangul_syllable_metatable) - --- CJK Ideograph - -local cjk_ideograph_metatable = { - __index = { - category = "lo", - cjkwd = "w", - description = "", - direction = "l", - linebreak = "id", - } -} - -local cjk_ideograph_extender = function(k,v) - local t = { - -- shcode = shcode, - unicodeslot = k, - } - setmetatable(t,cjk_ideograph_metatable) - return t -end - -local cjk_ideograph_range = { - first = 0x4E00, - last = 0x9FBB, - extender = cjk_ideograph_extender, -} - --- CJK Ideograph Extension A - -local cjk_ideograph_extension_a_metatable = { - __index = { - category = "lo", - cjkwd = "w", - description = "", - direction = "l", - linebreak = "id", - } -} - -local cjk_ideograph_extension_a_extender = function(k,v) - local t = { - -- shcode = shcode, - unicodeslot = k, - } - setmetatable(t,cjk_ideograph_extension_a_metatable) - return t -end - -local cjk_ideograph_extension_a_range = { - first = 0x3400, - last = 0x4DB5, - extender = cjk_ideograph_extension_a_extender, -} - --- CJK Ideograph Extension B - -local cjk_ideograph_extension_b_metatable = { - __index = { - category = "lo", - cjkwd = "w", - description = "", - direction = "l", - linebreak = "id", - } -} - -local cjk_ideograph_extension_b_extender = function(k,v) - local t = { - -- shcode = shcode, - unicodeslot = k, - } - setmetatable(t,cjk_ideograph_extension_b_metatable) - return t -end - -local cjk_ideograph_extension_b_range = { - first = 0x20000, - last = 0x2A6D6, - extender = cjk_ideograph_extension_b_extender, -} - --- Ranges - -local ranges = { - hangul_syllable_range, - cjk_ideograph_range, - cjk_ideograph_extension_a_range, - cjk_ideograph_extension_b_range, -} - --- +local ranges = allocate() +characters.ranges = ranges setmetatablekey(data, "__index", function(t,k) if type(k) == "string" then diff --git a/tex/context/base/char-ini.mkiv b/tex/context/base/char-ini.mkiv index cf27fdc14..2cb225988 100644 --- a/tex/context/base/char-ini.mkiv +++ b/tex/context/base/char-ini.mkiv @@ -15,6 +15,7 @@ \registerctxluafile{char-def}{1.001} % let's load this one first \registerctxluafile{char-ini}{1.001} +\registerctxluafile{char-cjk}{1.001} \registerctxluafile{char-map}{1.001} % maybe we will load this someplace else \registerctxluafile{char-tex}{1.001} diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index 9f2f8c774..c70c006c4 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ -- cgit v1.2.3