diff options
Diffstat (limited to 'scripts/context/lua/mtx-unicode.lua')
-rw-r--r-- | scripts/context/lua/mtx-unicode.lua | 79 |
1 files changed, 74 insertions, 5 deletions
diff --git a/scripts/context/lua/mtx-unicode.lua b/scripts/context/lua/mtx-unicode.lua index 060e563c6..0979239db 100644 --- a/scripts/context/lua/mtx-unicode.lua +++ b/scripts/context/lua/mtx-unicode.lua @@ -143,6 +143,7 @@ function scripts.unicode.update() local eastasianwidth = texttables.eastasianwidth local standardizedvariants = texttables.standardizedvariants local arabicshaping = texttables.arabicshaping + local casefolding = texttables.casefolding local index = texttables.index local characterdata = characters.data -- @@ -164,6 +165,8 @@ function scripts.unicode.update() local cjkwd = ed and lower(ed[2] or "n") local mirror = bd and tonumber(bd[2],16) local arabic = nil + local lccode = false + local uccode = false descriptions[description] = unicode if sparse and direction == "l" then direction = nil @@ -203,6 +206,37 @@ function scripts.unicode.update() if not combining or combining == 0 then combining = nil end + -- + local cf = casefolding[unicode] + if cf and tonumber(cf[1],16) == unicode then + local how = cf[2] + if how == "C" or how == "S" then + local fold = tonumber(cf[3],16) + if fold == unicode then + -- print("SKIPPING",description) + elseif category == "ll" then + uccode = fold + elseif category == "lu" then + lccode = fold + end + elseif how == "F" then + -- we can use the first + local folding = { } + for s in gmatch(cf[3],"%S+") do + folding[#folding+1] = tonumber(s,16) + end + if category == "ll" then + uccode = folding + elseif category == "ul" then + lccode = folding + end + else + -- we skip these + -- print(description) + -- inspect(cf) + end + end + -- if not char then report("%U : adding entry %a",unicode,description) char = { @@ -218,9 +252,34 @@ function scripts.unicode.update() specials = specials, arabic = arabic, combining = combining, + uccode = uccode, + lccode = lccode, } characterdata[unicode] = char else + -- we have more case mapping (e.g. cherokee) + if lccode then + if type(lccode) == "table" then + if type(char.lccode) ~= "table" or not are_equal(lccode,char.lccode) then + report("%U : setting lccode to % t, %a",unicode,lccode,description) + char.lccode = lccode + end + elseif char.lccode ~= lccode then + report("%U : setting lccode to %a, %a, %a",unicode,lccode,description) + char.lccode = lccode + end + end + if uccode then + if type(uccode) == "table" then + if type(char.uccode) ~= "table" or not are_equal(uccode,char.uccode) then + report("%U : setting uccode to % t, %a",unicode,uccode,description) + char.uccode = uccode + end + elseif char.uccode ~= uccode then + report("%U : setting uccode to %a, %a",unicode,uccode,description) + char.uccode = uccode + end + end if direction then if char.direction ~= direction then report("%U : setting direction to %a, %a",unicode,direction,description) @@ -306,7 +365,7 @@ function scripts.unicode.update() elseif not find(comment,"check special") then char.comment = comment .. ", check special" end - report("%U : check specials % + t, %a",unicode,t,description) + -- report("%U : check specials % + t, %a",unicode,t,description) end end end @@ -397,7 +456,7 @@ function scripts.unicode.update() mark = descriptions["SOLIDUS"] -- SLASH end if base and mark then - report("adding extra char special for %a",description) + -- report("adding extra char special for %a",description) data.specials = { "with", base, mark } data.comment = nil end @@ -429,7 +488,7 @@ function scripts.unicode.update() end for unicode, ud in table.sortedpairs(characterdata) do if not rawget(ud,"category") and rawget(ud,"variants") then - report("stripping %U (variant, takes from metacharacter)",unicode) + -- report("stripping %U (variant, takes from metacharacter)",unicode) characterdata[unicode] = nil end end @@ -521,6 +580,7 @@ function scripts.unicode.load() eastasianwidth = resolvers.findfile("eastasianwidth.txt") or "", standardizedvariants = resolvers.findfile("standardizedvariants.txt") or "", arabicshaping = resolvers.findfile("arabicshaping.txt") or "", + casefolding = resolvers.findfile("casefolding.txt") or "", index = resolvers.findfile("index.txt") or "", } -- @@ -531,6 +591,7 @@ function scripts.unicode.load() eastasianwidth = textfiles.eastasianwidth ~= "" and io.loaddata(textfiles.eastasianwidth) or "", standardizedvariants = textfiles.standardizedvariants ~= "" and io.loaddata(textfiles.standardizedvariants) or "", arabicshaping = textfiles.arabicshaping ~= "" and io.loaddata(textfiles.arabicshaping) or "", + casefolding = textfiles.casefolding ~= "" and io.loaddata(textfiles.casefolding) or "", index = textfiles.index ~= "" and io.loaddata(textfiles.index) or "", } texttables = { @@ -540,6 +601,7 @@ function scripts.unicode.load() eastasianwidth = splitdefinition(textdata.eastasianwidth,true), standardizedvariants = splitdefinition(textdata.standardizedvariants,false), arabicshaping = splitdefinition(textdata.arabicshaping,true), + casefolding = splitdefinition(textdata.casefolding,true), index = splitindex(textdata.index), } -- @@ -563,11 +625,18 @@ end -- [0xFE01]="centered form", -- } +-- local variants_style={ +-- [0xFE00]="chancery style", +-- [0xFE01]="roundhand style", +-- } + function scripts.unicode.save(filename) if preamble then local data = table.serialize(characters.data,"characters.data", { hexify = true, noquotes = true }) - data = gsub(data,"%{%s+%[0xFE0E%]=\"text style\",%s+%[0xFE0F%]=\"emoji style\",%s+%}","variants_emoji") + data = gsub(data,"%{%s+%[0xFE0E%]=\"text style\",%s+%[0xFE0F%]=\"emoji style\",%s+%}", "variants_emoji") data = gsub(data,"%{%s+%[0xFE00%]=\"corner%-justified form\",%s+%[0xFE01%]=\"centered form\",%s+%}","variants_forms") + data = gsub(data,"%{%s+%[0xFE00%]=\"chancery style\",%s+%[0xFE01%]=\"roundhand style\",%s+%}", "variants_style") + data = gsub(data,"%{%s+%[0xFE00%]=\"dotted form\",%s+%}", "variants_dotted") io.savedata(filename,preamble .. data) end end @@ -764,7 +833,7 @@ else scripts.unicode.save("char-def-new.lua") scripts.unicode.emoji("char-emj-new.lua") report("saved file %a","char-def-new.lua") - report("saved file %a (current 12.0, check for updates, see above!)","char-emj-new.lua") + report("saved file %a (current 14.0, check for updates, see above!)","char-emj-new.lua") else report("nothing to do") end |