From 3d13a5b42d3ea3d2935738ef29d2f08694064257 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Mon, 4 Nov 2013 15:51:00 +0100 Subject: beta 2013.11.04 15:51 --- tex/context/base/char-cjk.lua | 365 +++++++++++++++++++++ tex/context/base/cont-new.mkiv | 2 +- tex/context/base/context-version.pdf | Bin 4107 -> 4110 bytes tex/context/base/context.mkiv | 2 +- tex/context/base/status-files.pdf | Bin 24560 -> 24553 bytes tex/context/base/status-lua.pdf | Bin 225257 -> 225210 bytes tex/context/base/strc-ref.lua | 4 +- tex/generic/context/luatex/luatex-fonts-merged.lua | 2 +- 8 files changed, 370 insertions(+), 5 deletions(-) create mode 100644 tex/context/base/char-cjk.lua diff --git a/tex/context/base/char-cjk.lua b/tex/context/base/char-cjk.lua new file mode 100644 index 000000000..3d7de1423 --- /dev/null +++ b/tex/context/base/char-cjk.lua @@ -0,0 +1,365 @@ +if not modules then modules = { } end modules ['char-cjk'] = { + version = 1.001, + comment = "companion to char-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local setmetatable = setmetatable +local insert = table.insert +local floor = math.floor +local format = string.format +local utfchar = utf.char + +local ranges = characters.ranges +local allocate = utilities.storage.allocate + +-- Hangul Syllable + +-- The following conversion is taken from unicode.org/reports/tr15/tr15-23.html#Hangul +-- but adapted to our needs. + +-- local SBase = 0xAC00 +-- +-- local LBase, LCount = 0x1100, 19 +-- local VBase, VCount = 0x1161, 21 +-- local TBase, TCount = 0x11A7, 28 +-- +-- local NCount = VCount * TCount +-- local SCount = LCount * NCount +-- +-- local function decomposed(unicode) +-- local SIndex = unicode - SBase +-- if SIndex >= 0 and SIndex < SCount then +-- local lead_consonant = LBase + floor( SIndex / NCount) +-- local medial_vowel = VBase + floor((SIndex % NCount) / TCount) +-- local tail_consonant = TBase + SIndex % TCount +-- if tail_consonant ~= TBase then +-- return lead_consonant, medial_vowel, tail_consonant +-- else +-- return lead_consonant, medial_vowel +-- end +-- end +-- end +-- +-- Lua will optimize the inline constants so the next variant is +-- 10% faster. In practice this will go unnoticed, but it's also less +-- code, so let's do it. Pushing the constant section into the +-- function body saves 5%. + +local function decomposed(unicode) + local index = unicode - 0xAC00 + if index >= 0 and index < 19 * 21 * 28 then + local lead_consonant = 0x1100 + floor( index / (21 * 28)) + local medial_vowel = 0x1161 + floor((index % (21 * 28)) / 28) + local tail_consonant = 0x11A7 + index % 28 + if tail_consonant ~= 0x11A7 then + return lead_consonant, medial_vowel, tail_consonant + else + return lead_consonant, medial_vowel + end + end +end + +local lead_consonants = { [0] = + "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", + "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H" +} + +local medial_vowels = { [0] = + "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", + "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI", + "YU", "EU", "YI", "I" +} + +local tail_consonants = { [0] = + "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", + "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", + "S", "SS", "NG", "J", "C", "K", "T", "P", "H" +} + +-- local function description(unicode) +-- local index = unicode - 0xAC00 +-- if index >= 0 and index < 19 * 21 * 28 then +-- local lead_consonant = floor( index / NCount) +-- local medial_vowel = floor((index % NCount) / TCount) +-- local tail_consonant = index % TCount +-- return format( +-- "HANGUL SYLLABLE %s%s%s", +-- lead_consonants[lead_consonant], +-- medial_vowels [medial_vowel ], +-- tail_consonants[tail_consonant] +-- ) +-- end +-- end + +local function description(unicode) + local index = unicode - 0xAC00 + if index >= 0 and index < 19 * 21 * 28 then + local lead_consonant = floor( index / (21 * 28)) + local medial_vowel = floor((index % (21 * 28)) / 28) + local tail_consonant = index % 28 + return format( + "HANGUL SYLLABLE %s%s%s", + lead_consonants[lead_consonant], + medial_vowels [medial_vowel ], + tail_consonants[tail_consonant] + ) + end +end + +-- so far + +-- We have a [lead consonant,medial vowel,tail consonant] where the last one +-- is optional. For sort ranges we need the first one but some are collapsed. +-- Beware, we map to modern so the font should support it. + +local function leadconsonant(unicode) + return + -- unicode < 0xAC00 and nil -- original + -- unicode > 0xD7AF and nil or -- original + unicode >= 0xD558 and 0x314E or -- 하 => ㅎ + unicode >= 0xD30C and 0x314D or -- 파 => ㅍ + unicode >= 0xD0C0 and 0x314C or -- 타 => ㅌ + unicode >= 0xCE74 and 0x314B or -- 카 => ㅋ + unicode >= 0xCC28 and 0x314A or -- 차 => ㅊ + unicode >= 0xC790 and 0x3148 or -- 자 => ㅈ + unicode >= 0xC544 and 0x3147 or -- 아 => ㅇ + unicode >= 0xC0AC and 0x3145 or -- 사 => ㅅ + unicode >= 0xBC14 and 0x3142 or -- 바 => ㅂ + unicode >= 0xB9C8 and 0x3141 or -- 마 => ㅁ + unicode >= 0xB77C and 0x3139 or -- 라 => ㄹ + unicode >= 0xB2E4 and 0x3137 or -- 다 => ㄷ + unicode >= 0xB098 and 0x3134 or -- 나 => ㄴ + unicode >= 0xAC00 and 0x3131 or -- 가 => ㄱ + nil -- can't happen +end + +local remapped = { -- this might be merged into char-def.lua + [0x1100] = 0x3131, -- G + [0x1101] = 0x3132, -- GG + [0x1102] = 0x3134, -- N + [0x1103] = 0x3137, -- D + [0x1104] = 0x3138, -- DD + [0x1105] = 0x3139, -- R + -- [0X111A] = 0x3140, -- LH used for last sound + [0x1106] = 0x3141, -- M + [0x1107] = 0x3142, -- B + [0x1108] = 0x3143, -- BB + -- [0x1121] = 0x3144, -- BS used for last sound + [0x1109] = 0x3145, -- S + [0x110A] = 0x3146, -- SS + [0x110B] = 0x3147, -- (IEUNG) no sound but has form + [0x110C] = 0x3148, -- J + [0x110D] = 0x3149, -- JJ + [0x110E] = 0x314A, -- C + [0x110F] = 0x314B, -- K + [0x1110] = 0x314C, -- T + [0x1111] = 0x314D, -- P + [0x1112] = 0x314E, -- H + + [0x1161] = 0x314F, -- A + [0x1162] = 0x3150, -- AE + [0x1163] = 0x3151, -- YA + [0x1164] = 0x3152, -- YAE + [0x1165] = 0x3153, -- EO + [0x1166] = 0x3154, -- E + [0x1167] = 0x3155, -- YEO + [0x1168] = 0x3156, -- YE + [0x1169] = 0x3157, -- O + [0x116A] = 0x3158, -- WA + [0x116B] = 0x3159, -- WAE + [0x116C] = 0x315A, -- OE + [0x116D] = 0x315B, -- YO + [0x116E] = 0x315C, -- U + [0x116F] = 0x315D, -- WEO + [0x1170] = 0x315E, -- WE + [0x1171] = 0x315F, -- WI + [0x1172] = 0x3160, -- YU + [0x1173] = 0x3161, -- EU + [0x1174] = 0x3162, -- YI + [0x1175] = 0x3163, -- I + + [0x11A8] = 0x3131, -- G + [0x11A9] = 0x3132, -- GG + [0x11AA] = 0x3133, -- GS + [0x11AB] = 0x3134, -- N + [0x11AC] = 0x3135, -- NJ + [0x11AD] = 0x3136, -- NH + [0x11AE] = 0x3137, -- D + [0x11AF] = 0x3139, -- L + [0x11B0] = 0x313A, -- LG + [0x11B1] = 0x313B, -- LM + [0x11B2] = 0x313C, -- LB + [0x11B3] = 0x313D, -- LS + [0x11B4] = 0x313E, -- LT + [0x11B5] = 0x313F, -- LP + [0x11B6] = 0x3140, -- LH + [0x11B7] = 0x3141, -- M + [0x11B8] = 0x3142, -- B + [0x11B9] = 0x3144, -- BS + [0x11BA] = 0x3145, -- S + [0x11BB] = 0x3146, -- SS + [0x11BC] = 0x3147, -- NG + [0x11BD] = 0x3148, -- J + [0x11BE] = 0x314A, -- C + [0x11BF] = 0x314B, -- K + [0x11C0] = 0x314C, -- T + [0x11C1] = 0x314D, -- P + [0x11C2] = 0x314E, -- H +} + +characters.hangul = allocate { + decomposed = decomposed, + description = description, + leadconsonant = leadconsonant, + remapped = remapped, +} + +-- so far + +local hangul_syllable_basetable = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "h2", +} + +local hangul_syllable_metatable = { + __index = function(t,k) + local u = t.unicodeslot + if k == "fscode" or k == "leadconsonant" then + return leadconsonant(u) + elseif k == "decomposed" then + return { decomposed(u) } + elseif k == "specials" then + return { "char", decomposed(u) } + elseif k == "description" then + return description(u) + else + return hangul_syllable_basetable[k] + end + end +} + +function characters.remap_hangul_syllabe(t) + local tt = type(t) + if tt == "number" then + return remapped[t] or t + elseif tt == "table" then + local r = { } + for i=1,#t do + local ti = t[i] + r[i] = remapped[ti] or ti + end + return r + else + return t + end +end + +local hangul_syllable_extender = function(k,v) + local t = { + unicodeslot = k, + } + setmetatable(t,hangul_syllable_metatable) + return t +end + +local hangul_syllable_range = { + first = 0xAC00, + last = 0xD7A3, + extender = hangul_syllable_extender, +} + +setmetatable(hangul_syllable_range, hangul_syllable_metatable) + +-- CJK Ideograph + +local cjk_ideograph_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_metatable) + return t +end + +local cjk_ideograph_range = { + first = 0x4E00, + last = 0x9FBB, + extender = cjk_ideograph_extender, +} + +-- CJK Ideograph Extension A + +local cjk_ideograph_extension_a_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extension_a_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_extension_a_metatable) + return t +end + +local cjk_ideograph_extension_a_range = { + first = 0x3400, + last = 0x4DB5, + extender = cjk_ideograph_extension_a_extender, +} + +-- CJK Ideograph Extension B + +local cjk_ideograph_extension_b_metatable = { + __index = { + category = "lo", + cjkwd = "w", + description = "", + direction = "l", + linebreak = "id", + } +} + +local cjk_ideograph_extension_b_extender = function(k,v) + local t = { + -- shcode = shcode, + unicodeslot = k, + } + setmetatable(t,cjk_ideograph_extension_b_metatable) + return t +end + +local cjk_ideograph_extension_b_range = { + first = 0x20000, + last = 0x2A6D6, + extender = cjk_ideograph_extension_b_extender, +} + +-- Ranges + +insert(ranges, hangul_syllable_range) +insert(ranges, cjk_ideograph_range) +insert(ranges, cjk_ideograph_extension_a_range) +insert(ranges, cjk_ideograph_extension_b_range) diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 778340da1..0451545c2 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2013.11.04 14:10} +\newcontextversion{2013.11.04 15:51} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf index 34c46400d..7c50913e6 100644 Binary files a/tex/context/base/context-version.pdf and b/tex/context/base/context-version.pdf differ diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index 1318b2ab1..479522660 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -25,7 +25,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2013.11.04 14:10} +\edef\contextversion{2013.11.04 15:51} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index 29b73a56e..aad647724 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf index 929760a24..ce12b5bc1 100644 Binary files a/tex/context/base/status-lua.pdf and b/tex/context/base/status-lua.pdf differ diff --git a/tex/context/base/strc-ref.lua b/tex/context/base/strc-ref.lua index 62559ebb3..938af1ad7 100644 --- a/tex/context/base/strc-ref.lua +++ b/tex/context/base/strc-ref.lua @@ -418,9 +418,9 @@ local function register_from_lists(collected,derived,pages,sections) -- g[s] = g[s] or t -- first wins -- end local function action(s) --- if trace_referencing then + if trace_referencing then report_references("list entry %a provides %a reference %a on realpage %a",i,kind,s,realpage) --- end + end c[s] = c[s] or t -- share them d[s] = d[s] or t -- share them g[s] = g[s] or t -- first wins diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 7ebdb0666..a3407f015 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 11/04/13 14:10:07 +-- merge date : 11/04/13 15:51:09 do -- begin closure to overcome local limits and interference -- cgit v1.2.3