summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtx-unicode.lua
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/context/lua/mtx-unicode.lua')
-rw-r--r--scripts/context/lua/mtx-unicode.lua79
1 files changed, 74 insertions, 5 deletions
diff --git a/scripts/context/lua/mtx-unicode.lua b/scripts/context/lua/mtx-unicode.lua
index 060e563c6..0979239db 100644
--- a/scripts/context/lua/mtx-unicode.lua
+++ b/scripts/context/lua/mtx-unicode.lua
@@ -143,6 +143,7 @@ function scripts.unicode.update()
local eastasianwidth = texttables.eastasianwidth
local standardizedvariants = texttables.standardizedvariants
local arabicshaping = texttables.arabicshaping
+ local casefolding = texttables.casefolding
local index = texttables.index
local characterdata = characters.data
--
@@ -164,6 +165,8 @@ function scripts.unicode.update()
local cjkwd = ed and lower(ed[2] or "n")
local mirror = bd and tonumber(bd[2],16)
local arabic = nil
+ local lccode = false
+ local uccode = false
descriptions[description] = unicode
if sparse and direction == "l" then
direction = nil
@@ -203,6 +206,37 @@ function scripts.unicode.update()
if not combining or combining == 0 then
combining = nil
end
+ --
+ local cf = casefolding[unicode]
+ if cf and tonumber(cf[1],16) == unicode then
+ local how = cf[2]
+ if how == "C" or how == "S" then
+ local fold = tonumber(cf[3],16)
+ if fold == unicode then
+ -- print("SKIPPING",description)
+ elseif category == "ll" then
+ uccode = fold
+ elseif category == "lu" then
+ lccode = fold
+ end
+ elseif how == "F" then
+ -- we can use the first
+ local folding = { }
+ for s in gmatch(cf[3],"%S+") do
+ folding[#folding+1] = tonumber(s,16)
+ end
+ if category == "ll" then
+ uccode = folding
+ elseif category == "ul" then
+ lccode = folding
+ end
+ else
+ -- we skip these
+ -- print(description)
+ -- inspect(cf)
+ end
+ end
+ --
if not char then
report("%U : adding entry %a",unicode,description)
char = {
@@ -218,9 +252,34 @@ function scripts.unicode.update()
specials = specials,
arabic = arabic,
combining = combining,
+ uccode = uccode,
+ lccode = lccode,
}
characterdata[unicode] = char
else
+ -- we have more case mapping (e.g. cherokee)
+ if lccode then
+ if type(lccode) == "table" then
+ if type(char.lccode) ~= "table" or not are_equal(lccode,char.lccode) then
+ report("%U : setting lccode to % t, %a",unicode,lccode,description)
+ char.lccode = lccode
+ end
+ elseif char.lccode ~= lccode then
+ report("%U : setting lccode to %a, %a, %a",unicode,lccode,description)
+ char.lccode = lccode
+ end
+ end
+ if uccode then
+ if type(uccode) == "table" then
+ if type(char.uccode) ~= "table" or not are_equal(uccode,char.uccode) then
+ report("%U : setting uccode to % t, %a",unicode,uccode,description)
+ char.uccode = uccode
+ end
+ elseif char.uccode ~= uccode then
+ report("%U : setting uccode to %a, %a",unicode,uccode,description)
+ char.uccode = uccode
+ end
+ end
if direction then
if char.direction ~= direction then
report("%U : setting direction to %a, %a",unicode,direction,description)
@@ -306,7 +365,7 @@ function scripts.unicode.update()
elseif not find(comment,"check special") then
char.comment = comment .. ", check special"
end
- report("%U : check specials % + t, %a",unicode,t,description)
+ -- report("%U : check specials % + t, %a",unicode,t,description)
end
end
end
@@ -397,7 +456,7 @@ function scripts.unicode.update()
mark = descriptions["SOLIDUS"] -- SLASH
end
if base and mark then
- report("adding extra char special for %a",description)
+ -- report("adding extra char special for %a",description)
data.specials = { "with", base, mark }
data.comment = nil
end
@@ -429,7 +488,7 @@ function scripts.unicode.update()
end
for unicode, ud in table.sortedpairs(characterdata) do
if not rawget(ud,"category") and rawget(ud,"variants") then
- report("stripping %U (variant, takes from metacharacter)",unicode)
+ -- report("stripping %U (variant, takes from metacharacter)",unicode)
characterdata[unicode] = nil
end
end
@@ -521,6 +580,7 @@ function scripts.unicode.load()
eastasianwidth = resolvers.findfile("eastasianwidth.txt") or "",
standardizedvariants = resolvers.findfile("standardizedvariants.txt") or "",
arabicshaping = resolvers.findfile("arabicshaping.txt") or "",
+ casefolding = resolvers.findfile("casefolding.txt") or "",
index = resolvers.findfile("index.txt") or "",
}
--
@@ -531,6 +591,7 @@ function scripts.unicode.load()
eastasianwidth = textfiles.eastasianwidth ~= "" and io.loaddata(textfiles.eastasianwidth) or "",
standardizedvariants = textfiles.standardizedvariants ~= "" and io.loaddata(textfiles.standardizedvariants) or "",
arabicshaping = textfiles.arabicshaping ~= "" and io.loaddata(textfiles.arabicshaping) or "",
+ casefolding = textfiles.casefolding ~= "" and io.loaddata(textfiles.casefolding) or "",
index = textfiles.index ~= "" and io.loaddata(textfiles.index) or "",
}
texttables = {
@@ -540,6 +601,7 @@ function scripts.unicode.load()
eastasianwidth = splitdefinition(textdata.eastasianwidth,true),
standardizedvariants = splitdefinition(textdata.standardizedvariants,false),
arabicshaping = splitdefinition(textdata.arabicshaping,true),
+ casefolding = splitdefinition(textdata.casefolding,true),
index = splitindex(textdata.index),
}
--
@@ -563,11 +625,18 @@ end
-- [0xFE01]="centered form",
-- }
+-- local variants_style={
+-- [0xFE00]="chancery style",
+-- [0xFE01]="roundhand style",
+-- }
+
function scripts.unicode.save(filename)
if preamble then
local data = table.serialize(characters.data,"characters.data", { hexify = true, noquotes = true })
- data = gsub(data,"%{%s+%[0xFE0E%]=\"text style\",%s+%[0xFE0F%]=\"emoji style\",%s+%}","variants_emoji")
+ data = gsub(data,"%{%s+%[0xFE0E%]=\"text style\",%s+%[0xFE0F%]=\"emoji style\",%s+%}", "variants_emoji")
data = gsub(data,"%{%s+%[0xFE00%]=\"corner%-justified form\",%s+%[0xFE01%]=\"centered form\",%s+%}","variants_forms")
+ data = gsub(data,"%{%s+%[0xFE00%]=\"chancery style\",%s+%[0xFE01%]=\"roundhand style\",%s+%}", "variants_style")
+ data = gsub(data,"%{%s+%[0xFE00%]=\"dotted form\",%s+%}", "variants_dotted")
io.savedata(filename,preamble .. data)
end
end
@@ -764,7 +833,7 @@ else
scripts.unicode.save("char-def-new.lua")
scripts.unicode.emoji("char-emj-new.lua")
report("saved file %a","char-def-new.lua")
- report("saved file %a (current 12.0, check for updates, see above!)","char-emj-new.lua")
+ report("saved file %a (current 14.0, check for updates, see above!)","char-emj-new.lua")
else
report("nothing to do")
end