diff options
author | Hans Hagen <pragma@wxs.nl> | 2017-04-27 01:41:25 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg42.2a@gmail.com> | 2017-04-27 01:41:25 +0200 |
commit | 762562da00d0ec1d50e6d3c2a701156ab42e6d71 (patch) | |
tree | 2dc664caa951963a393b0a2a9c9c85d1db6b8e10 /scripts/context/lua/mtx-unicode.lua | |
parent | f2a20e191bf71094aa21d37dee2ecd2f804dbc56 (diff) | |
download | context-762562da00d0ec1d50e6d3c2a701156ab42e6d71.tar.gz |
2017-04-27 01:06:00
Diffstat (limited to 'scripts/context/lua/mtx-unicode.lua')
-rw-r--r-- | scripts/context/lua/mtx-unicode.lua | 98 |
1 files changed, 93 insertions, 5 deletions
diff --git a/scripts/context/lua/mtx-unicode.lua b/scripts/context/lua/mtx-unicode.lua index 418df9261..557e70b79 100644 --- a/scripts/context/lua/mtx-unicode.lua +++ b/scripts/context/lua/mtx-unicode.lua @@ -52,7 +52,9 @@ local split, splitlines, strip = string.split, string.splitlines, string.strip local are_equal = table.are_equal local tonumber, tostring, rawget = tonumber, tostring, rawget local lpegmatch = lpeg.match +local P, C, S, R, Cs, Ct, Cg, Cf, Cc = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cg, lpeg.Cf, lpeg.Cc local formatters = string.formatters +local utfchar = utf.char local report = application.report @@ -73,7 +75,7 @@ local sparse = false local split_space_table = lpeg.tsplitat(" ") local split_space_two = lpeg.splitat (" ") local split_range_two = lpeg.splitat ("..") -local split_colon_table = lpeg.tsplitat(lpeg.P(" ")^0 * lpeg.P(";") * lpeg.P(" ")^0) +local split_colon_table = lpeg.tsplitat(P(" ")^0 * P(";") * P(" ")^0) local skipped = { [0x002C6] = true, -- MODIFIER LETTER CIRCUMFLEX ACCENT @@ -541,10 +543,10 @@ function scripts.unicode.extras() -- old code index[k] = nil end end --- for k, v in next, data do --- v.synonym = nil --- v.synonyms = nil --- end + -- for k, v in next, data do + -- v.synonym = nil + -- v.synonyms = nil + -- end for k, v in table.sortedhash(index) do local d = data[v] if d and d.description ~= upper(k) then @@ -571,6 +573,91 @@ function scripts.unicode.extras() -- old code end end +do + + local space = P(" ") + local spaces = space^0 + local semicolon = P(";") + local hash = P("#") + local newline = S("\n\r") + + local unicode = Cs(R("09","AF")^1)/function(n) return tonumber(n,16) end + * spaces + local components = Ct (unicode^1) + + -- local rubish_a = semicolon + -- * spaces + -- * P("Emoji_ZWJ_Sequence") + -- * spaces + -- * semicolon + -- * spaces + -- local description = C((1 - (spaces * (hash+newline)))^1) + -- local rubish_b = (1-newline)^0 + -- * newline^1 + -- + -- local pattern_1 = Ct ( ( + -- Cf ( Ct("") * + -- Cg (Cc("components") * components) + -- * rubish_a + -- * Cg (Cc("description") * description ) + -- * rubish_b + -- , rawset) + -- + P(1) )^1 ) + + local rubish_a = semicolon + * spaces + * P("non-")^0 * P("fully-qualified") + * spaces + * hash + * spaces + local textstring = C((1 - space)^1) + * spaces + local description = ((1 - (spaces * newline))^1) / string.lower + local rubish_b = (1-newline)^0 + * newline^1 + + local pattern_2 = Ct ( ( + Cf ( Ct("") * + Cg (Cc("components") * components) + * rubish_a + * Cg (Cc("textstring") * textstring) + * Cg (Cc("description") * description ) + * rubish_b + , rawset) + + P(1) )^1 ) + + function scripts.unicode.emoji(filename) + + local name = resolvers.findfile("emoji-test.txt") or "" + if name == "" then + return + end + local l = io.loaddata(name) + local t = lpegmatch(pattern_2,l) + + local hash = { } + + local replace = lpeg.replacer { + ["#"] = "hash", + ["*"] = "asterisk" + } + + for i=1,#t do + local v = t[i] + local d = v.description + local k = lpegmatch(replace,d) or d + hash[k] = v.components + end + local new = table.serialize(hash,"return", { hexify = true }) + local old = io.loaddata(resolvers.findfile("char-emj.lua")) + if old and old ~= "" then + new = gsub(old,"^(.-)return .*$","%1" .. new) + end + io.savedata(filename,new) + end + +end + -- the action local filename = environment.files[1] @@ -583,6 +670,7 @@ else scripts.unicode.update() scripts.unicode.extras() scripts.unicode.save("char-def-new.lua") + scripts.unicode.emoji("char-emj-new.lua") else report("nothing to do") end |