summaryrefslogtreecommitdiff
path: root/tex/context/base/char-cjk.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/char-cjk.lua')
-rw-r--r--tex/context/base/char-cjk.lua170
1 files changed, 109 insertions, 61 deletions
diff --git a/tex/context/base/char-cjk.lua b/tex/context/base/char-cjk.lua
index bce1df615..75daf2e01 100644
--- a/tex/context/base/char-cjk.lua
+++ b/tex/context/base/char-cjk.lua
@@ -17,36 +17,125 @@ local ranges = characters.ranges
-- Hangul Syllable
-- The following conversion is taken from unicode.org/reports/tr15/tr15-23.html#Hangul
--- but adapted to our needs
+-- but adapted to our needs.
+
+-- local SBase = 0xAC00
+--
+-- local LBase, LCount = 0x1100, 19
+-- local VBase, VCount = 0x1161, 21
+-- local TBase, TCount = 0x11A7, 28
+--
+-- local NCount = VCount * TCount
+-- local SCount = LCount * NCount
+--
+-- local function decomposed(unicode)
+-- local SIndex = unicode - SBase
+-- if SIndex >= 0 and SIndex < SCount then
+-- local lead_consonant = LBase + floor( SIndex / NCount)
+-- local medial_vowel = VBase + floor((SIndex % NCount) / TCount)
+-- local tail_consonant = TBase + SIndex % TCount
+-- if tail_consonant ~= TBase then
+-- return lead_consonant, medial_vowel, tail_consonant
+-- else
+-- return lead_consonant, medial_vowel
+-- end
+-- end
+-- end
+--
+-- Lua will optimize the inline constants so the next variant is
+-- 10% faster. In practice this will go unnoticed, but it's also less
+-- code, so let's do it. Pushing the constant section into the
+-- function body saves 5%.
-local SBase = 0xAC00
-
-local LBase, LCount = 0x1100, 19
-local VBase, VCount = 0x1161, 21
-local TBase, TCount = 0x11A7, 28
-
-local NCount = VCount * TCount
-local SCount = LCount * NCount
+local function decomposed(unicode)
+ local index = unicode - 0xAC00
+ if index >= 0 and index < 19 * 21 * 28 then
+ local lead_consonant = 0x1100 + floor( index / (21 * 28))
+ local medial_vowel = 0x1161 + floor((index % (21 * 28)) / 28)
+ local tail_consonant = 0x11A7 + index % 28
+ if tail_consonant ~= 0x11A7 then
+ return lead_consonant, medial_vowel, tail_consonant
+ else
+ return lead_consonant, medial_vowel
+ end
+ end
+end
-local L_TABLE = { [0] =
+local lead_consonants = { [0] =
"G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
"S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
}
-local V_TABLE = { [0] =
+local medial_vowels = { [0] =
"A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
"WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
"YU", "EU", "YI", "I"
}
-local T_TABLE = { [0] =
+local tail_consonants = { [0] =
"", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
"LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
"S", "SS", "NG", "J", "C", "K", "T", "P", "H"
}
+-- local function description(unicode)
+-- local index = unicode - 0xAC00
+-- if index >= 0 and index < 19 * 21 * 28 then
+-- local lead_consonant = floor( index / NCount)
+-- local medial_vowel = floor((index % NCount) / TCount)
+-- local tail_consonant = index % TCount
+-- return format(
+-- "HANGUL SYLLABLE %s%s%s",
+-- lead_consonants[lead_consonant],
+-- medial_vowels [medial_vowel ],
+-- tail_consonants[tail_consonant]
+-- )
+-- end
+-- end
+
+local function description(unicode)
+ local index = unicode - 0xAC00
+ if index >= 0 and index < 19 * 21 * 28 then
+ local lead_consonant = floor( index / (21 * 28))
+ local medial_vowel = floor((index % (21 * 28)) / 28)
+ local tail_consonant = index % 28
+ return format(
+ "HANGUL SYLLABLE %s%s%s",
+ lead_consonants[lead_consonant],
+ medial_vowels [medial_vowel ],
+ tail_consonants[tail_consonant]
+ )
+ end
+end
+
+-- so far
-local remapped = { -- this will be merged into char-def.lua
+-- We have a [lead consonant,medial vowel,tail consonant] where the last one
+-- is optional. For sort ranges we need the first one but some are collapsed.
+-- Beware, we map to modern so the font should support it.
+
+local function leadconsonant(unicode)
+ return
+ -- unicode < 0xAC00 and nil -- original
+ -- unicode > 0xD7AF and nil or -- original
+ unicode >= 0xD558 and 0x314E or -- 하 => ㅎ
+ unicode >= 0xD30C and 0x314D or -- 파 => ㅍ
+ unicode >= 0xD0C0 and 0x314C or -- 타 => ㅌ
+ unicode >= 0xCE74 and 0x314B or -- 카 => ㅋ
+ unicode >= 0xCC28 and 0x314A or -- 차 => ㅊ
+ unicode >= 0xC790 and 0x3148 or -- 자 => ㅈ
+ unicode >= 0xC544 and 0x3147 or -- 아 => ㅇ
+ unicode >= 0xC0AC and 0x3145 or -- 사 => ㅅ
+ unicode >= 0xBC14 and 0x3142 or -- 바 => ㅂ
+ unicode >= 0xB9C8 and 0x3141 or -- 마 => ㅁ
+ unicode >= 0xB77C and 0x3139 or -- 라 => ㄹ
+ unicode >= 0xB2E4 and 0x3137 or -- 다 => ㄷ
+ unicode >= 0xB098 and 0x3134 or -- 나 => ㄴ
+ unicode >= 0xAC00 and 0x3131 or -- 가 => ㄱ
+ nil -- can't happen
+end
+
+local remapped = { -- this might be merged into char-def.lua
[0x1100] = 0x3131, -- G
[0x1101] = 0x3132, -- GG
[0x1102] = 0x3134, -- N
@@ -120,34 +209,11 @@ local remapped = { -- this will be merged into char-def.lua
[0x11C1] = 0x314E, -- H
}
-local function decomposed(unicode)
- local SIndex = unicode - SBase
- if SIndex >= 0 and SIndex < SCount then
- local L = LBase + floor(SIndex / NCount)
- local V = VBase + floor((SIndex % NCount) / TCount)
- local T = TBase + SIndex % TCount
- if T ~= TBase then
- return L, V, T
- else
- return L, V
- end
- end
-end
-
-local function description(unicode)
- local SIndex = unicode - SBase
- if SIndex >= 0 and SIndex < SCount then
- local LIndex = floor(SIndex / NCount)
- local VIndex = floor((SIndex % NCount) / TCount)
- local TIndex = SIndex % TCount
- return format("HANGUL SYLLABLE %s%s%s",L_TABLE[LIndex],V_TABLE[VIndex],T_TABLE[TIndex])
- end
-end
-
characters.hangul = {
- decomposed = decomposed,
- description = description,
- remapped = remapped,
+ decomposed = decomposed,
+ description = description,
+ leadconsonant = leadconsonant,
+ remapped = remapped,
}
-- so far
@@ -164,32 +230,14 @@ local hangul_syllable_metatable = {
__index = function(t,k)
local u = t.unicodeslot
if k == "fscode" then
- local fscode = -- firstsplitcode
- u < 0xAC00 and nil -- original
- or u > 0xD7AF and nil -- original
- or u >= 0xD558 and 0x314E -- 하 => ㅎ
- or u >= 0xD30C and 0x314D -- 파 => ㅍ
- or u >= 0xD0C0 and 0x314C -- 타 => ㅌ
- or u >= 0xCE74 and 0x314B -- 카 => ㅋ
- or u >= 0xCC28 and 0x314A -- 차 => ㅊ
- or u >= 0xC790 and 0x3148 -- 자 => ㅈ
- or u >= 0xC544 and 0x3147 -- 아 => ㅇ
- or u >= 0xC0AC and 0x3145 -- 사 => ㅅ
- or u >= 0xBC14 and 0x3142 -- 바 => ㅂ
- or u >= 0xB9C8 and 0x3141 -- 마 => ㅁ
- or u >= 0xB77C and 0x3139 -- 라 => ㄹ
- or u >= 0xB2E4 and 0x3137 -- 다 => ㄷ
- or u >= 0xB098 and 0x3134 -- 나 => ㄴ
- or u >= 0xAC00 and 0x3131 -- 가 => ㄱ -- was 0xAC20
- or nil -- can't happen
- t[k] = fscode
- return fscode
+ -- no need to cache this as we normally use fscodes
+ return leadconsonant(u)
elseif k == "specials" then
return { "char", decomposed(u) }
elseif k == "description" then
return description(u)
else
- return hangul_syllable_basetable[k]-- no store
+ return hangul_syllable_basetable[k]
end
end
}