summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtx-unicode.lua
diff options
context:
space:
mode:
authorHans Hagen <pragma@wxs.nl>2017-04-27 01:41:25 +0200
committerContext Git Mirror Bot <phg42.2a@gmail.com>2017-04-27 01:41:25 +0200
commit762562da00d0ec1d50e6d3c2a701156ab42e6d71 (patch)
tree2dc664caa951963a393b0a2a9c9c85d1db6b8e10 /scripts/context/lua/mtx-unicode.lua
parentf2a20e191bf71094aa21d37dee2ecd2f804dbc56 (diff)
downloadcontext-762562da00d0ec1d50e6d3c2a701156ab42e6d71.tar.gz
2017-04-27 01:06:00
Diffstat (limited to 'scripts/context/lua/mtx-unicode.lua')
-rw-r--r--scripts/context/lua/mtx-unicode.lua98
1 files changed, 93 insertions, 5 deletions
diff --git a/scripts/context/lua/mtx-unicode.lua b/scripts/context/lua/mtx-unicode.lua
index 418df9261..557e70b79 100644
--- a/scripts/context/lua/mtx-unicode.lua
+++ b/scripts/context/lua/mtx-unicode.lua
@@ -52,7 +52,9 @@ local split, splitlines, strip = string.split, string.splitlines, string.strip
local are_equal = table.are_equal
local tonumber, tostring, rawget = tonumber, tostring, rawget
local lpegmatch = lpeg.match
+local P, C, S, R, Cs, Ct, Cg, Cf, Cc = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cg, lpeg.Cf, lpeg.Cc
local formatters = string.formatters
+local utfchar = utf.char
local report = application.report
@@ -73,7 +75,7 @@ local sparse = false
local split_space_table = lpeg.tsplitat(" ")
local split_space_two = lpeg.splitat (" ")
local split_range_two = lpeg.splitat ("..")
-local split_colon_table = lpeg.tsplitat(lpeg.P(" ")^0 * lpeg.P(";") * lpeg.P(" ")^0)
+local split_colon_table = lpeg.tsplitat(P(" ")^0 * P(";") * P(" ")^0)
local skipped = {
[0x002C6] = true, -- MODIFIER LETTER CIRCUMFLEX ACCENT
@@ -541,10 +543,10 @@ function scripts.unicode.extras() -- old code
index[k] = nil
end
end
--- for k, v in next, data do
--- v.synonym = nil
--- v.synonyms = nil
--- end
+ -- for k, v in next, data do
+ -- v.synonym = nil
+ -- v.synonyms = nil
+ -- end
for k, v in table.sortedhash(index) do
local d = data[v]
if d and d.description ~= upper(k) then
@@ -571,6 +573,91 @@ function scripts.unicode.extras() -- old code
end
end
+do
+
+ local space = P(" ")
+ local spaces = space^0
+ local semicolon = P(";")
+ local hash = P("#")
+ local newline = S("\n\r")
+
+ local unicode = Cs(R("09","AF")^1)/function(n) return tonumber(n,16) end
+ * spaces
+ local components = Ct (unicode^1)
+
+ -- local rubish_a = semicolon
+ -- * spaces
+ -- * P("Emoji_ZWJ_Sequence")
+ -- * spaces
+ -- * semicolon
+ -- * spaces
+ -- local description = C((1 - (spaces * (hash+newline)))^1)
+ -- local rubish_b = (1-newline)^0
+ -- * newline^1
+ --
+ -- local pattern_1 = Ct ( (
+ -- Cf ( Ct("") *
+ -- Cg (Cc("components") * components)
+ -- * rubish_a
+ -- * Cg (Cc("description") * description )
+ -- * rubish_b
+ -- , rawset)
+ -- + P(1) )^1 )
+
+ local rubish_a = semicolon
+ * spaces
+ * P("non-")^0 * P("fully-qualified")
+ * spaces
+ * hash
+ * spaces
+ local textstring = C((1 - space)^1)
+ * spaces
+ local description = ((1 - (spaces * newline))^1) / string.lower
+ local rubish_b = (1-newline)^0
+ * newline^1
+
+ local pattern_2 = Ct ( (
+ Cf ( Ct("") *
+ Cg (Cc("components") * components)
+ * rubish_a
+ * Cg (Cc("textstring") * textstring)
+ * Cg (Cc("description") * description )
+ * rubish_b
+ , rawset)
+ + P(1) )^1 )
+
+ function scripts.unicode.emoji(filename)
+
+ local name = resolvers.findfile("emoji-test.txt") or ""
+ if name == "" then
+ return
+ end
+ local l = io.loaddata(name)
+ local t = lpegmatch(pattern_2,l)
+
+ local hash = { }
+
+ local replace = lpeg.replacer {
+ ["#"] = "hash",
+ ["*"] = "asterisk"
+ }
+
+ for i=1,#t do
+ local v = t[i]
+ local d = v.description
+ local k = lpegmatch(replace,d) or d
+ hash[k] = v.components
+ end
+ local new = table.serialize(hash,"return", { hexify = true })
+ local old = io.loaddata(resolvers.findfile("char-emj.lua"))
+ if old and old ~= "" then
+ new = gsub(old,"^(.-)return .*$","%1" .. new)
+ end
+ io.savedata(filename,new)
+ end
+
+end
+
-- the action
local filename = environment.files[1]
@@ -583,6 +670,7 @@ else
scripts.unicode.update()
scripts.unicode.extras()
scripts.unicode.save("char-def-new.lua")
+ scripts.unicode.emoji("char-emj-new.lua")
else
report("nothing to do")
end