diff options
author | Hans Hagen <pragma@wxs.nl> | 2013-03-10 14:36:00 +0100 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2013-03-10 14:36:00 +0100 |
commit | 748be39b9f88d15159ab8879ff8e9b88e4b8718a (patch) | |
tree | 6bef5e420c41621c8113e0179edac8135264ab52 /tex/context/base/util-str.lua | |
parent | 7b01e5c00c1538ceaabe8a1b818eeec0d5c69da9 (diff) | |
download | context-748be39b9f88d15159ab8879ff8e9b88e4b8718a.tar.gz |
beta 2013.03.10 14:36
Diffstat (limited to 'tex/context/base/util-str.lua')
-rw-r--r-- | tex/context/base/util-str.lua | 332 |
1 files changed, 252 insertions, 80 deletions
diff --git a/tex/context/base/util-str.lua b/tex/context/base/util-str.lua index a4889d252..959955867 100644 --- a/tex/context/base/util-str.lua +++ b/tex/context/base/util-str.lua @@ -10,14 +10,18 @@ utilities = utilities or {} utilities.strings = utilities.strings or { } local strings = utilities.strings -local load = load local format, gsub, rep, sub = string.format, string.gsub, string.rep, string.sub +local load, dump = load, string.dump local concat = table.concat -local P, V, C, S, R, Ct, Cs, Cp, Carg = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg +local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc local patterns, lpegmatch = lpeg.patterns, lpeg.match local utfchar, utfbyte = utf.char, utf.byte -local setmetatableindex = table.setmetatableindex --- +----- loadstripped = utilities.lua.loadstripped +----- setmetatableindex = table.setmetatableindex + +local loadstripped = _LUAVERSION < 5.2 and load or function(str) + return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load +end local stripper = patterns.stripzeros @@ -65,7 +69,7 @@ function strings.newrepeater(str,offset) return t end t = { } - setmetatableindex(t, function(t,k) + setmetatable(t, { __index = function(t,k) if not k then return "" end @@ -73,7 +77,7 @@ function strings.newrepeater(str,offset) local s = n > 0 and rep(str,n) or "" t[k] = s return s - end) + end }) s[offset] = t return t end @@ -85,6 +89,8 @@ local extra, tab, start = 0, 0, 4, 0 local nspaces = strings.newrepeater(" ") +string.nspaces = nspaces + local pattern = Carg(1) / function(t) extra, tab, start = 0, t or 7, 1 @@ -144,6 +150,13 @@ end -- Work in progress. Interesting is that compared to the built-in this -- is faster in luatex than in luajittex where we have a comparable speed. +-- It only makes sense to use the formatter when a (somewhat) complex format +-- is used a lot. Each formatter is a function so there is some overhead +-- and not all formatted output is worth that overhead. Keep in mind that +-- there is an extra function call involved. In principle we end up with a +-- string concatination so one could inline such a sequence but often at the +-- cost of less readabinity. So, it's a sort of (visual) compromise. Of course +-- there is the benefit of more variants. local n = 0 @@ -155,6 +168,20 @@ local n = 0 -- print(...,...,...) -- 1,1,1,2,3 -- end +local template_shortcuts = [[ +local tostring = tostring +local format = string.format +local concat = table.concat +local signed = number.signed +local points = number.points +local basepoints = number.basepoints +local utfchar = utf.char +local utfbyte = utf.byte +local lpegmatch = lpeg.match +local xmlescape = lpeg.patterns.xmlescape +local spaces = string.nspaces +]] + local prefix_any = C((S("+- .") + R("09"))^0) local prefix_tab = C((1-R("az","AZ","09","%%"))^0) @@ -163,23 +190,37 @@ local prefix_tab = C((1-R("az","AZ","09","%%"))^0) local format_s = function(f) n = n + 1 if f and f ~= "" then - return format("format('%%%ss',(select(%s,...)))",f,n) + return format("format('%%%ss',a%s)",f,n) + else + return format("a%s",n) + end +end + +local format_S = function(f) -- can be optimized + n = n + 1 + if f and f ~= "" then + return format("format('%%%ss',tostring(a%s))",f,n) else - return format("(select(%s,...))",n) + return format("tostring(a%s)",n) end end local format_q = function() n = n + 1 - return format("format('%%q',(select(%s,...)))",n) -- maybe an own lpeg + return format("format('%%q',a%s)",n) -- maybe an own lpeg +end + +local format_Q = function() -- can be optimized + n = n + 1 + return format("format('%%q',tostring(a%s))",n) end local format_i = function(f) n = n + 1 if f and f ~= "" then - return format("format('%%%si',(select(%s,...)))",f,n) + return format("format('%%%si',a%s)",f,n) else - return format("(select(%s,...))",n) + return format("a%s",n) end end @@ -196,79 +237,79 @@ end local format_I = function(f) n = n + 1 if f and f ~= "" then - return format("format('%%s%%%si',signed((select(%s,...))))",f,n) + return format("format('%%s%%%si',signed(a%s))",f,n) else - return format("format('%%s%%i',signed((select(%s,...))))",n) + return format("format('%%s%%i',signed(a%s))",n) end end local format_f = function(f) n = n + 1 - return format("format('%%%sf',(select(%s,...)))",f,n) + return format("format('%%%sf',a%s)",f,n) end local format_g = function(f) n = n + 1 - return format("format('%%%sg',(select(%s,...)))",f,n) + return format("format('%%%sg',a%s)",f,n) end local format_G = function(f) n = n + 1 - return format("format('%%%sG',(select(%s,...)))",f,n) + return format("format('%%%sG',a%s)",f,n) end local format_e = function(f) n = n + 1 - return format("format('%%%se',(select(%s,...)))",f,n) + return format("format('%%%se',a%s)",f,n) end local format_E = function(f) n = n + 1 - return format("format('%%%sE',(select(%s,...)))",f,n) + return format("format('%%%sE',a%s)",f,n) end local format_x = function(f) n = n + 1 - return format("format('%%%sx',(select(%s,...)))",f,n) + return format("format('%%%sx',a%s)",f,n) end local format_X = function(f) n = n + 1 - return format("format('%%%sX',(select(%s,...)))",f,n) + return format("format('%%%sX',a%s)",f,n) end local format_o = function(f) n = n + 1 - return format("format('%%%so',(select(%s,...)))",f,n) + return format("format('%%%so',a%s)",f,n) end local format_c = function() n = n + 1 - return format("utfchar((select(%s,...)))",n) + return format("utfchar(a%s)",n) end local format_r = function(f) n = n + 1 - return format("format('%%%s.0f',(select(%s,...)))",f,n) + return format("format('%%%s.0f',a%s)",f,n) end -local format_v = function(f) +local format_h = function(f) n = n + 1 if f == "-" then f = sub(f,2) - return format("format('%%%sx',utfbyte((select(%s,...))))",f == "" and "05" or f,n) + return format("format('%%%sx',utfbyte(a%s))",f == "" and "05" or f,n) else - return format("format('0x%%%sx',utfbyte((select(%s,...))))",f == "" and "05" or f,n) + return format("format('0x%%%sx',utfbyte(a%s))",f == "" and "05" or f,n) end end -local format_V = function(f) +local format_H = function(f) n = n + 1 if f == "-" then f = sub(f,2) - return format("format('%%%sX',utfbyte((select(%s,...))))",f == "" and "05" or f,n) + return format("format('%%%sX',utfbyte(a%s))",f == "" and "05" or f,n) else - return format("format('0x%%%sX',utfbyte((select(%s,...))))",f == "" and "05" or f,n) + return format("format('0x%%%sX',utfbyte(a%s))",f == "" and "05" or f,n) end end @@ -276,9 +317,9 @@ local format_u = function(f) n = n + 1 if f == "-" then f = sub(f,2) - return format("format('%%%sx',utfbyte((select(%s,...))))",f == "" and "05" or f,n) + return format("format('%%%sx',utfbyte(a%s))",f == "" and "05" or f,n) else - return format("format('u+%%%sx',utfbyte((select(%s,...))))",f == "" and "05" or f,n) + return format("format('u+%%%sx',utfbyte(a%s))",f == "" and "05" or f,n) end end @@ -286,57 +327,117 @@ local format_U = function(f) n = n + 1 if f == "-" then f = sub(f,2) - return format("format('%%%sX',utfbyte((select(%s,...))))",f == "" and "05" or f,n) + return format("format('%%%sX',utfbyte(a%s))",f == "" and "05" or f,n) else - return format("format('U+%%%sX',utfbyte((select(%s,...))))",f == "" and "05" or f,n) + return format("format('U+%%%sX',utfbyte(a%s))",f == "" and "05" or f,n) end end local format_p = function() n = n + 1 - return format("points((select(%s,...)))",n) + return format("points(a%s)",n) end local format_b = function() n = n + 1 - return format("basepoints((select(%s,...)))",n) + return format("basepoints(a%s)",n) end local format_t = function(f) n = n + 1 if f and f ~= "" then - return format("concat((select(%s,...)),%q)",n,f) + return format("concat(a%s,%q)",n,f) else - return format("concat((select(%s,...)))",n) + return format("concat(a%s)",n) end end local format_l = function() n = n + 1 - return format("(select(%s,...) and 'true' or 'false')",n) + return format("(a%s and 'true' or 'false')",n) +end + +local format_L = function() + n = n + 1 + return format("(a%s and 'TRUE' or 'FALSE')",n) +end + +local format_N = function() -- strips leading zeros + n = n + 1 + return format("tostring(tonumber(a%s) or a%s)",n,n) end local format_a = function(s) return format("%q",s) end -local builder = Ct { "start", - start = (P("%") * ( - V("s") + V("q") - + V("i") + V("d") - + V("f") + V("g") + V("G") + V("e") + V("E") - + V("x") + V("X") + V("o") - -- - + V("c") - -- - + V("r") - + V("v") + V("V") + V("u") + V("U") - + V("p") + V("b") - + V("t") - + V("l") - + V("I") - ) - + V("a") +local format_w = function(f) -- handy when doing depth related indent + n = n + 1 + f = tonumber(f) + if f then + return format("spaces[%s+tonumber(a%s)]",f,n) + else + return format("spaces[tonumber(a%s)]",n) + end +end + +local format_W = function(f) -- handy when doing depth related indent + return format("spaces[%s]",tonumber(f) or 0) +end + +local extensions = { } + +local format_extension = function(name) + n = n + 1 + local extension = extensions[name] or "tostring(%s)" + return format(extension,format("a%s",n)) +end + +function addextension(name,template,shortcuts) + extensions[name] = template + if shortcuts then + template_shortcuts = shortcuts .. "\n" .. template_shortcuts -- so we can't overload + end +end + +lpeg.patterns.xmlescape = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P('"')/""" + P(1))^0) +lpeg.patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + P(1))^0) + +addextension("xml",[[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]]) +addextension("tex",[[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]]) + +local builder = Cs { "start", + start = ( + ( + P("%") / "" + * ( + V("!") -- new + + V("s") + V("q") + + V("i") + V("d") + + V("f") + V("g") + V("G") + V("e") + V("E") + + V("x") + V("X") + V("o") + -- + + V("c") + + V("S") -- new + + V("Q") -- new + + V("N") -- new + -- + + V("r") + + V("h") + V("H") + V("u") + V("U") + + V("p") + V("b") + + V("t") + + V("l") + V("L") + + V("I") + + V("h") -- new + + V("w") -- new + + V("W") -- new + -- + + V("a") -- ignores probably messed up % + ) + + V("a") + ) +-- * (P(-1) + Cc("..")) + * (P(-1) + Carg(1)) )^0, -- ["s"] = (prefix_any * P("s")) / format_s, -- %s => regular %s (string) @@ -352,61 +453,132 @@ local builder = Ct { "start", ["X"] = (prefix_any * P("X")) / format_X, -- %X => regular %X (HEXADECIMAL) ["o"] = (prefix_any * P("o")) / format_o, -- %o => regular %o (octal) -- + ["S"] = (prefix_any * P("S")) / format_S, -- %S => %s (tostring) + ["Q"] = (prefix_any * P("Q")) / format_S, -- %Q => %q (tostring) + ["N"] = (prefix_any * P("N")) / format_N, -- %N => tonumber (strips leading zeros) ["c"] = (prefix_any * P("c")) / format_c, -- %c => utf character (extension to regular) -- ["r"] = (prefix_any * P("r")) / format_r, -- %r => round - ["v"] = (prefix_any * P("v")) / format_v, -- %v => 0x0a1b2 (when - no 0x) - ["V"] = (prefix_any * P("V")) / format_V, -- %V => 0x0A1B2 (when - no 0x) + ["h"] = (prefix_any * P("h")) / format_h, -- %h => 0x0a1b2 (when - no 0x) was v + ["H"] = (prefix_any * P("H")) / format_H, -- %H => 0x0A1B2 (when - no 0x) was V ["u"] = (prefix_any * P("u")) / format_u, -- %u => u+0a1b2 (when - no u+) ["U"] = (prefix_any * P("U")) / format_U, -- %U => U+0A1B2 (when - no U+) ["p"] = (prefix_any * P("p")) / format_p, -- %p => 12.345pt / maybe: P (and more units) ["b"] = (prefix_any * P("b")) / format_b, -- %b => 12.342bp / maybe: B (and more units) ["t"] = (prefix_tab * P("t")) / format_t, -- %t => concat ["l"] = (prefix_tab * P("l")) / format_l, -- %l => boolean + ["L"] = (prefix_tab * P("L")) / format_L, -- %L => BOOLEAN ["I"] = (prefix_any * P("I")) / format_I, -- %I => signed integer -- - ["a"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_a, -- %a => text (including %%) + ["w"] = (prefix_any * P("w")) / format_w, -- %w => n spaces (optional prefix is added) + ["W"] = (prefix_any * P("W")) / format_W, -- %w => mandate prefix, no specifier + -- + ["a"] = Cs(((1-P("%"))^1 + P("%%")/"%%%%")^1) / format_a, -- rest (including %%) + -- + -- ["!"] = P("!xml!") / format_xml, -- %!xml! => hypertext escaped " < > & + ["!"] = P("!") * C((1-P("!"))^1) * P("!") / format_extension, } -- we can be clever and only alias what is needed -local template = [[ -local format = string.format -local concat = table.concat -local signed = number.signed -local points = number.points -local basepoints = number.basepoints -local utfchar = utf.char -local utfbyte = utf.byte -return function(...) - return %s -end +local direct = Cs ( + P("%")/"" + * Cc([[local format = string.format return function(str) return format("%]]) + * C(S("+- .") + R("09"))^0 * S("sqidfgGeExXo") + * Cc([[",str) end]]) + * P(-1) + ) + +local template = [[ +%s +return function(%s) return %s end ]] +local arguments = { "a1" } -- faster than previously used (select(n,...)) + +setmetatable(arguments, { __index = + function(t,k) + local v = t[k-1] .. ",a" .. k + t[k] = v + return v + end +}) + local function make(t,str) - n = 0 - local p = lpegmatch(builder,str) --- inspect(p) - local c = format(template,concat(p,"..")) --- inspect(c) - formatter = load(c)() - t[str] = formatter - return formatter + local f + local p = lpegmatch(direct,str) + if p then + f = loadstripped(p)() + else + n = 0 + p = lpegmatch(builder,str,1,"..") -- after this we know n + if n > 0 then + p = format(template,template_shortcuts,arguments[n],p) + -- print("builder>",p) + f = loadstripped(p)() + else + f = function() return str end + end + end + t[str] = f + return f +end + +local function use(t,fmt,...) + return t[fmt](...) end local formatters = string.formatters or { } string.formatters = formatters -setmetatableindex(formatters,make) +setmetatable(formatters, { __index = make, __call = use }) -function string.makeformatter(str) +-- -- yes or no: +-- +-- local function make(t,str) +-- local f +-- local p = lpegmatch(direct,str) +-- if p then +-- f = loadstripped(p)() +-- else +-- n = 0 +-- p = lpegmatch(builder,str,1,",") -- after this we know n +-- if n > 0 then +-- p = format(template,template_shortcuts,arguments[n],p) +-- f = loadstripped(p)() +-- else +-- f = function() return str end +-- end +-- end +-- t[str] = f +-- return f +-- end +-- +-- local formatteds = string.formatteds or { } +-- string.formatteds = formatteds +-- +-- setmetatable(formatteds, { __index = make, __call = use }) + +-- + +-- print(formatters["hans %N and %N done"](123,"0123")) +-- local test = formatters["1%%23%4w56%s78 %p %!xml! test and %!tex! more %s"] +-- print(#string.dump(test)) +-- print(test(2,123,99999,"abc&def","# and $","okay","!!!")) +-- local test = formatters["%s"] +-- print(#string.dump(test)) +-- print(test("okay")) + +function string.makeformatter(str) -- redundant return formatters[str] end -function string.formatter(str,...) +function string.formatter(str,...) -- redundant return formatters[str](...) end +string.addformatter = addextension + -- local p1 = "%s test %f done %p and %c and %V or %+t or %%" -- local p2 = "%s test %f done %s and %s and 0x%05X or %s or %%" -- |