diff options
Diffstat (limited to 'tex/context/base/mkiv/util-jsn.lua')
-rw-r--r-- | tex/context/base/mkiv/util-jsn.lua | 465 |
1 files changed, 347 insertions, 118 deletions
diff --git a/tex/context/base/mkiv/util-jsn.lua b/tex/context/base/mkiv/util-jsn.lua index acbf16090..68c6a712e 100644 --- a/tex/context/base/mkiv/util-jsn.lua +++ b/tex/context/base/mkiv/util-jsn.lua @@ -20,155 +20,391 @@ if not modules then modules = { } end modules ['util-jsn'] = { local P, V, R, S, C, Cc, Cs, Ct, Cf, Cg = lpeg.P, lpeg.V, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cf, lpeg.Cg local lpegmatch = lpeg.match local format, gsub = string.format, string.gsub +local formatters = string.formatters local utfchar = utf.char -local concat = table.concat +local concat, sortedkeys = table.concat, table.sortedkeys local tonumber, tostring, rawset, type, next = tonumber, tostring, rawset, type, next local json = utilities.json or { } utilities.json = json --- \\ \/ \b \f \n \r \t \uHHHH - -local lbrace = P("{") -local rbrace = P("}") -local lparent = P("[") -local rparent = P("]") -local comma = P(",") -local colon = P(":") -local dquote = P('"') - -local whitespace = lpeg.patterns.whitespace -local optionalws = whitespace^0 - -local escapes = { - ["b"] = "\010", - ["f"] = "\014", - ["n"] = "\n", - ["r"] = "\r", - ["t"] = "\t", -} +do --- todo: also handle larger utf16 + -- \\ \/ \b \f \n \r \t \uHHHH -local escape_un = P("\\u")/"" * (C(R("09","AF","af")^-4) / function(s) - return utfchar(tonumber(s,16)) -end) + local lbrace = P("{") + local rbrace = P("}") + local lparent = P("[") + local rparent = P("]") + local comma = P(",") + local colon = P(":") + local dquote = P('"') -local escape_bs = P([[\]]) / "" * (P(1) / escapes) -- if not found then P(1) is returned i.e. the to be escaped char + local whitespace = lpeg.patterns.whitespace + local optionalws = whitespace^0 -local jstring = dquote * Cs((escape_un + escape_bs + (1-dquote))^0) * dquote -local jtrue = P("true") * Cc(true) -local jfalse = P("false") * Cc(false) -local jnull = P("null") * Cc(nil) -local jnumber = (1-whitespace-rparent-rbrace-comma)^1 / tonumber + local escapes = { + ["b"] = "\010", + ["f"] = "\014", + ["n"] = "\n", + ["r"] = "\r", + ["t"] = "\t", + } -local key = jstring + -- todo: also handle larger utf16 -local jsonconverter = { "value", - hash = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace, - pair = Cg(optionalws * key * optionalws * colon * V("value")), - array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent), --- value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws, - value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, -} + local escape_un = P("\\u")/"" * (C(R("09","AF","af")^-4) / function(s) + return utfchar(tonumber(s,16)) + end) + + local escape_bs = P([[\]]) / "" * (P(1) / escapes) -- if not found then P(1) is returned i.e. the to be escaped char + + local jstring = dquote * Cs((escape_un + escape_bs + (1-dquote))^0) * dquote + local jtrue = P("true") * Cc(true) + local jfalse = P("false") * Cc(false) + local jnull = P("null") * Cc(nil) + local jnumber = (1-whitespace-rparent-rbrace-comma)^1 / tonumber + + local key = jstring + + local jsonconverter = { "value", + hash = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace, + pair = Cg(optionalws * key * optionalws * colon * V("value")), + array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent), + -- value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws, + value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, + } + + -- local jsonconverter = { "value", + -- hash = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace, + -- pair = Cg(optionalws * V("string") * optionalws * colon * V("value")), + -- array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent), + -- string = jstring, + -- value = optionalws * (V("string") + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, + -- } + + -- lpeg.print(jsonconverter) -- size 181 --- local jsonconverter = { "value", --- hash = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace, --- pair = Cg(optionalws * V("string") * optionalws * colon * V("value")), --- array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent), --- string = jstring, --- value = optionalws * (V("string") + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, --- } + function json.tolua(str) + return lpegmatch(jsonconverter,str) + end --- lpeg.print(jsonconverter) -- size 181 + function json.load(filename) + local data = io.loaddata(filename) + if data then + return lpegmatch(jsonconverter,data) + end + end -function json.tolua(str) - return lpegmatch(jsonconverter,str) end -local escaper +do -local function tojson(value,t,n) -- we could optimize #t - local kind = type(value) - if kind == "table" then - local done = false - local size = #value - if size == 0 then - for k, v in next, value do - if done then - n = n + 1 ; t[n] = "," + -- It's pretty bad that JSON doesn't allow the trailing comma ... it's a + -- typical example of a spec that then forces all generators to check for + -- this. It's a way to make sure programmers keep jobs. + + local escaper + + local f_start_hash = formatters[ '%w{' ] + local f_start_array = formatters[ '%w[' ] + local f_start_hash_new = formatters[ "\n" .. '%w{' ] + local f_start_array_new = formatters[ "\n" .. '%w[' ] + local f_start_hash_key = formatters[ "\n" .. '%w"%s" : {' ] + local f_start_array_key = formatters[ "\n" .. '%w"%s" : [' ] + + local f_stop_hash = formatters[ "\n" .. '%w}' ] + local f_stop_array = formatters[ "\n" .. '%w]' ] + + local f_key_val_seq = formatters[ "\n" .. '%w"%s" : %s' ] + local f_key_val_str = formatters[ "\n" .. '%w"%s" : "%s"' ] + local f_key_val_num = f_key_val_seq + local f_key_val_yes = formatters[ "\n" .. '%w"%s" : true' ] + local f_key_val_nop = formatters[ "\n" .. '%w"%s" : false' ] + + local f_val_num = formatters[ "\n" .. '%w%s' ] + local f_val_str = formatters[ "\n" .. '%w"%s"' ] + local f_val_yes = formatters[ "\n" .. '%wtrue' ] + local f_val_nop = formatters[ "\n" .. '%wfalse' ] + local f_val_seq = f_val_num + + -- no empty tables because unknown if table or hash + + local t = { } + local n = 0 + + local function is_simple_table(tt) -- also used in util-tab so maybe public + local l = #tt + if l > 0 then + for i=1,l do + if type(tt[i]) == "table" then + return false + end + end + local nn = n + n = n + 1 t[n] = "[ " + for i=1,l do + if i > 1 then + n = n + 1 t[n] = ", " + end + local v = tt[i] + local tv = type(v) + if tv == "number" then + n = n + 1 t[n] = v + elseif tv == "string" then + n = n + 1 t[n] = '"' + n = n + 1 t[n] = lpegmatch(escaper,v) or v + n = n + 1 t[n] = '"' + elseif tv == "boolean" then + n = n + 1 t[n] = v and "true" or "false" else - n = n + 1 ; t[n] = "{" - done = true + n = n + 1 t[n] = tostring(v) end - n = n + 1 ; t[n] = format("%q:",k) - t, n = tojson(v,t,n) end - if done then - n = n + 1 ; t[n] = "}" + n = n + 1 t[n] = " ]" + local s = concat(t,"",nn+1,n) + n = nn + return s + end + return false + end + + local function tojsonpp(root,name,depth,level,size) + if root then + local indexed = size > 0 + n = n + 1 + if level == 0 then + if indexed then + t[n] = f_start_array(depth) + else + t[n] = f_start_hash(depth) + end + elseif name then + if tn == "string" then + name = lpegmatch(escaper,name) or name + elseif tn ~= "number" then + name = tostring(name) + end + if indexed then + t[n] = f_start_array_key(depth,name) + else + t[n] = f_start_hash_key(depth,name) + end else - n = n + 1 ; t[n] = "{}" + if indexed then + t[n] = f_start_array_new(depth) + else + t[n] = f_start_hash_new(depth) + end end - elseif size == 1 then - -- we can optimize for non tables - n = n + 1 ; t[n] = "[" - t, n = tojson(value[1],t,n) - n = n + 1 ; t[n] = "]" - else - for i=1,size do + depth = depth + 1 + if indexed then -- indexed + for i=1,size do + if i > 1 then + n = n + 1 t[n] = "," + end + local v = root[i] + local tv = type(v) + if tv == "number" then + n = n + 1 t[n] = f_val_num(depth,v) + elseif tv == "string" then + v = lpegmatch(escaper,v) or v + n = n + 1 t[n] = f_val_str(depth,v) + elseif tv == "table" then + if next(v) then + local st = is_simple_table(v) + if st then + n = n + 1 t[n] = f_val_seq(depth,st) + else + tojsonpp(v,k,depth,level+1,0) + end + end + elseif tv == "boolean" then + n = n + 1 + if v then + t[n] = f_val_yes(depth,v) + else + t[n] = f_val_nop(depth,v) + end + end + end + elseif next(root) then + local sk = sortedkeys(root) + for i=1,#sk do + if i > 1 then + n = n + 1 t[n] = "," + end + local k = sk[i] + local v = root[k] + local tv = type(v) + local tk = type(k) + if tv == "number" then + if tk == "number" then + n = n + 1 t[n] = f_key_val_num(depth,k,v) + elseif tk == "string" then + k = lpegmatch(escaper,k) or k + n = n + 1 t[n] = f_key_val_str(depth,k,v) + end + elseif tv == "string" then + if tk == "number" then + v = lpegmatch(escaper,v) or v + n = n + 1 t[n] = f_key_val_num(depth,k,v) + elseif tk == "string" then + k = lpegmatch(escaper,k) or k + v = lpegmatch(escaper,v) or v + n = n + 1 t[n] = f_key_val_str(depth,k,v) + end + elseif tv == "table" then + local l = #v + if l > 0 then + local st = is_simple_table(v) + if not st then + tojsonpp(v,k,depth,level+1,l) + elseif tk == "number" then + n = n + 1 t[n] = f_key_val_seq(depth,k,st) + elseif tk == "string" then + k = lpegmatch(escaper,k) or k + n = n + 1 t[n] = f_key_val_seq(depth,k,st) + end + elseif next(v) then + tojsonpp(v,k,depth,level+1,0) + end + elseif tv == "boolean" then + if tk == "number" then + n = n + 1 + if v then + t[n] = f_key_val_yes(depth,k) + else + t[n] = f_key_val_nop(depth,k) + end + elseif tk == "string" then + k = lpegmatch(escaper,k) or k + n = n + 1 + if v then + t[n] = f_key_val_yes(depth,k) + else + t[n] = f_key_val_nop(depth,k) + end + end + end + end + end + n = n + 1 + if indexed then + t[n] = f_stop_array(depth-1) + else + t[n] = f_stop_hash(depth-1) + end + end + end + + local function tojson(value,n) + local kind = type(value) + if kind == "table" then + local done = false + local size = #value + if size == 0 then + for k, v in next, value do + if done then + -- n = n + 1 ; t[n] = "," + n = n + 1 ; t[n] = ',"' + else + -- n = n + 1 ; t[n] = "{" + n = n + 1 ; t[n] = '{"' + done = true + end + n = n + 1 ; t[n] = lpegmatch(escaper,k) or k + n = n + 1 ; t[n] = '":' + t, n = tojson(v,n) + end if done then - n = n + 1 ; t[n] = "," + n = n + 1 ; t[n] = "}" else - n = n + 1 ; t[n] = "[" - done = true + n = n + 1 ; t[n] = "{}" end - t, n = tojson(value[i],t,n) + elseif size == 1 then + -- we can optimize for non tables + n = n + 1 ; t[n] = "[" + t, n = tojson(value[1],n) + n = n + 1 ; t[n] = "]" + else + for i=1,size do + if done then + n = n + 1 ; t[n] = "," + else + n = n + 1 ; t[n] = "[" + done = true + end + t, n = tojson(value[i],n) + end + n = n + 1 ; t[n] = "]" end - n = n + 1 ; t[n] = "]" + elseif kind == "string" then + n = n + 1 ; t[n] = '"' + n = n + 1 ; t[n] = lpegmatch(escaper,value) or value + n = n + 1 ; t[n] = '"' + elseif kind == "number" then + n = n + 1 ; t[n] = value + elseif kind == "boolean" then + n = n + 1 ; t[n] = tostring(value) end - elseif kind == "string" then - n = n + 1 ; t[n] = '"' - n = n + 1 ; t[n] = lpegmatch(escaper,value) or value - n = n + 1 ; t[n] = '"' - elseif kind == "number" then - n = n + 1 ; t[n] = value - elseif kind == "boolean" then - n = n + 1 ; t[n] = tostring(value) + return t, n end - return t, n -end -function json.tostring(value) - -- todo optimize for non table - local kind = type(value) - if kind == "table" then - if not escaper then - local escapes = { - ["\\"] = "\\u005C", - ["\""] = "\\u0022", - } - for i=0,0x20 do - escapes[utfchar(i)] = format("\\u%04X",i) - end - escaper = Cs( ( - (R('\0\x20') + S('\"\\')) / escapes - + P(1) - )^1 ) + -- escaping keys can become an option + + local function jsontostring(value,pretty) + -- todo optimize for non table + local kind = type(value) + if kind == "table" then + if not escaper then + local escapes = { + ["\\"] = "\\u005C", + ["\""] = "\\u0022", + } + for i=0,0x1F do + escapes[utfchar(i)] = format("\\u%04X",i) + end + escaper = Cs( ( + (R('\0\x20') + S('\"\\')) / escapes + + P(1) + )^1 ) + end + -- local to the closure (saves wrapping and local functions) + t = { } + n = 0 + if pretty then + tojsonpp(value,name,0,0,#value) +-- value = concat(t,"\n",1,n) + value = concat(t,"",1,n) + else + tojson(value,0) + value = concat(t,"",1,n) + end + t = nil + n = 0 + return value + elseif kind == "string" or kind == "number" then + return lpegmatch(escaper,value) or value + else + return tostring(value) end - return concat((tojson(value,{},0))) - elseif kind == "string" or kind == "number" then - return lpegmatch(escaper,value) or value - else - return tostring(value) end + + json.tostring = jsontostring + + function json.tojson(value) + return jsontostring(value,true) + end + end --- local tmp = [[ { "t" : "foobar", "a" : true, "b" : [ 123 , 456E-10, { "a" : true, "b" : [ 123 , 456 ] } ] } ]] +-- local tmp = [[ { "t\nt t" : "foo bar", "a" : true, "b" : [ 123 , 456E-10, { "a" : true, "b" : [ 123 , 456 ] } ] } ]] -- tmp = json.tolua(tmp) -- inspect(tmp) --- tmp = json.tostring(tmp) +-- tmp = json.tostring(tmp,true) -- inspect(tmp) -- tmp = json.tolua(tmp) -- inspect(tmp) @@ -176,13 +412,6 @@ end -- inspect(tmp) -- inspect(json.tostring(true)) -function json.load(filename) - local data = io.loaddata(filename) - if data then - return lpegmatch(jsonconverter,data) - end -end - -- local s = [[\foo"bar"]] -- local j = json.tostring { s = s } -- local l = json.tolua(j) |