summaryrefslogtreecommitdiff
path: root/tex/context/base/util-str.lua
diff options
context:
space:
mode:
authorHans Hagen <pragma@wxs.nl>2013-03-10 14:36:00 +0100
committerHans Hagen <pragma@wxs.nl>2013-03-10 14:36:00 +0100
commit748be39b9f88d15159ab8879ff8e9b88e4b8718a (patch)
tree6bef5e420c41621c8113e0179edac8135264ab52 /tex/context/base/util-str.lua
parent7b01e5c00c1538ceaabe8a1b818eeec0d5c69da9 (diff)
downloadcontext-748be39b9f88d15159ab8879ff8e9b88e4b8718a.tar.gz
beta 2013.03.10 14:36
Diffstat (limited to 'tex/context/base/util-str.lua')
-rw-r--r--tex/context/base/util-str.lua332
1 files changed, 252 insertions, 80 deletions
diff --git a/tex/context/base/util-str.lua b/tex/context/base/util-str.lua
index a4889d252..959955867 100644
--- a/tex/context/base/util-str.lua
+++ b/tex/context/base/util-str.lua
@@ -10,14 +10,18 @@ utilities = utilities or {}
utilities.strings = utilities.strings or { }
local strings = utilities.strings
-local load = load
local format, gsub, rep, sub = string.format, string.gsub, string.rep, string.sub
+local load, dump = load, string.dump
local concat = table.concat
-local P, V, C, S, R, Ct, Cs, Cp, Carg = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg
+local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc
local patterns, lpegmatch = lpeg.patterns, lpeg.match
local utfchar, utfbyte = utf.char, utf.byte
-local setmetatableindex = table.setmetatableindex
---
+----- loadstripped = utilities.lua.loadstripped
+----- setmetatableindex = table.setmetatableindex
+
+local loadstripped = _LUAVERSION < 5.2 and load or function(str)
+ return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load
+end
local stripper = patterns.stripzeros
@@ -65,7 +69,7 @@ function strings.newrepeater(str,offset)
return t
end
t = { }
- setmetatableindex(t, function(t,k)
+ setmetatable(t, { __index = function(t,k)
if not k then
return ""
end
@@ -73,7 +77,7 @@ function strings.newrepeater(str,offset)
local s = n > 0 and rep(str,n) or ""
t[k] = s
return s
- end)
+ end })
s[offset] = t
return t
end
@@ -85,6 +89,8 @@ local extra, tab, start = 0, 0, 4, 0
local nspaces = strings.newrepeater(" ")
+string.nspaces = nspaces
+
local pattern =
Carg(1) / function(t)
extra, tab, start = 0, t or 7, 1
@@ -144,6 +150,13 @@ end
-- Work in progress. Interesting is that compared to the built-in this
-- is faster in luatex than in luajittex where we have a comparable speed.
+-- It only makes sense to use the formatter when a (somewhat) complex format
+-- is used a lot. Each formatter is a function so there is some overhead
+-- and not all formatted output is worth that overhead. Keep in mind that
+-- there is an extra function call involved. In principle we end up with a
+-- string concatination so one could inline such a sequence but often at the
+-- cost of less readabinity. So, it's a sort of (visual) compromise. Of course
+-- there is the benefit of more variants.
local n = 0
@@ -155,6 +168,20 @@ local n = 0
-- print(...,...,...) -- 1,1,1,2,3
-- end
+local template_shortcuts = [[
+local tostring = tostring
+local format = string.format
+local concat = table.concat
+local signed = number.signed
+local points = number.points
+local basepoints = number.basepoints
+local utfchar = utf.char
+local utfbyte = utf.byte
+local lpegmatch = lpeg.match
+local xmlescape = lpeg.patterns.xmlescape
+local spaces = string.nspaces
+]]
+
local prefix_any = C((S("+- .") + R("09"))^0)
local prefix_tab = C((1-R("az","AZ","09","%%"))^0)
@@ -163,23 +190,37 @@ local prefix_tab = C((1-R("az","AZ","09","%%"))^0)
local format_s = function(f)
n = n + 1
if f and f ~= "" then
- return format("format('%%%ss',(select(%s,...)))",f,n)
+ return format("format('%%%ss',a%s)",f,n)
+ else
+ return format("a%s",n)
+ end
+end
+
+local format_S = function(f) -- can be optimized
+ n = n + 1
+ if f and f ~= "" then
+ return format("format('%%%ss',tostring(a%s))",f,n)
else
- return format("(select(%s,...))",n)
+ return format("tostring(a%s)",n)
end
end
local format_q = function()
n = n + 1
- return format("format('%%q',(select(%s,...)))",n) -- maybe an own lpeg
+ return format("format('%%q',a%s)",n) -- maybe an own lpeg
+end
+
+local format_Q = function() -- can be optimized
+ n = n + 1
+ return format("format('%%q',tostring(a%s))",n)
end
local format_i = function(f)
n = n + 1
if f and f ~= "" then
- return format("format('%%%si',(select(%s,...)))",f,n)
+ return format("format('%%%si',a%s)",f,n)
else
- return format("(select(%s,...))",n)
+ return format("a%s",n)
end
end
@@ -196,79 +237,79 @@ end
local format_I = function(f)
n = n + 1
if f and f ~= "" then
- return format("format('%%s%%%si',signed((select(%s,...))))",f,n)
+ return format("format('%%s%%%si',signed(a%s))",f,n)
else
- return format("format('%%s%%i',signed((select(%s,...))))",n)
+ return format("format('%%s%%i',signed(a%s))",n)
end
end
local format_f = function(f)
n = n + 1
- return format("format('%%%sf',(select(%s,...)))",f,n)
+ return format("format('%%%sf',a%s)",f,n)
end
local format_g = function(f)
n = n + 1
- return format("format('%%%sg',(select(%s,...)))",f,n)
+ return format("format('%%%sg',a%s)",f,n)
end
local format_G = function(f)
n = n + 1
- return format("format('%%%sG',(select(%s,...)))",f,n)
+ return format("format('%%%sG',a%s)",f,n)
end
local format_e = function(f)
n = n + 1
- return format("format('%%%se',(select(%s,...)))",f,n)
+ return format("format('%%%se',a%s)",f,n)
end
local format_E = function(f)
n = n + 1
- return format("format('%%%sE',(select(%s,...)))",f,n)
+ return format("format('%%%sE',a%s)",f,n)
end
local format_x = function(f)
n = n + 1
- return format("format('%%%sx',(select(%s,...)))",f,n)
+ return format("format('%%%sx',a%s)",f,n)
end
local format_X = function(f)
n = n + 1
- return format("format('%%%sX',(select(%s,...)))",f,n)
+ return format("format('%%%sX',a%s)",f,n)
end
local format_o = function(f)
n = n + 1
- return format("format('%%%so',(select(%s,...)))",f,n)
+ return format("format('%%%so',a%s)",f,n)
end
local format_c = function()
n = n + 1
- return format("utfchar((select(%s,...)))",n)
+ return format("utfchar(a%s)",n)
end
local format_r = function(f)
n = n + 1
- return format("format('%%%s.0f',(select(%s,...)))",f,n)
+ return format("format('%%%s.0f',a%s)",f,n)
end
-local format_v = function(f)
+local format_h = function(f)
n = n + 1
if f == "-" then
f = sub(f,2)
- return format("format('%%%sx',utfbyte((select(%s,...))))",f == "" and "05" or f,n)
+ return format("format('%%%sx',utfbyte(a%s))",f == "" and "05" or f,n)
else
- return format("format('0x%%%sx',utfbyte((select(%s,...))))",f == "" and "05" or f,n)
+ return format("format('0x%%%sx',utfbyte(a%s))",f == "" and "05" or f,n)
end
end
-local format_V = function(f)
+local format_H = function(f)
n = n + 1
if f == "-" then
f = sub(f,2)
- return format("format('%%%sX',utfbyte((select(%s,...))))",f == "" and "05" or f,n)
+ return format("format('%%%sX',utfbyte(a%s))",f == "" and "05" or f,n)
else
- return format("format('0x%%%sX',utfbyte((select(%s,...))))",f == "" and "05" or f,n)
+ return format("format('0x%%%sX',utfbyte(a%s))",f == "" and "05" or f,n)
end
end
@@ -276,9 +317,9 @@ local format_u = function(f)
n = n + 1
if f == "-" then
f = sub(f,2)
- return format("format('%%%sx',utfbyte((select(%s,...))))",f == "" and "05" or f,n)
+ return format("format('%%%sx',utfbyte(a%s))",f == "" and "05" or f,n)
else
- return format("format('u+%%%sx',utfbyte((select(%s,...))))",f == "" and "05" or f,n)
+ return format("format('u+%%%sx',utfbyte(a%s))",f == "" and "05" or f,n)
end
end
@@ -286,57 +327,117 @@ local format_U = function(f)
n = n + 1
if f == "-" then
f = sub(f,2)
- return format("format('%%%sX',utfbyte((select(%s,...))))",f == "" and "05" or f,n)
+ return format("format('%%%sX',utfbyte(a%s))",f == "" and "05" or f,n)
else
- return format("format('U+%%%sX',utfbyte((select(%s,...))))",f == "" and "05" or f,n)
+ return format("format('U+%%%sX',utfbyte(a%s))",f == "" and "05" or f,n)
end
end
local format_p = function()
n = n + 1
- return format("points((select(%s,...)))",n)
+ return format("points(a%s)",n)
end
local format_b = function()
n = n + 1
- return format("basepoints((select(%s,...)))",n)
+ return format("basepoints(a%s)",n)
end
local format_t = function(f)
n = n + 1
if f and f ~= "" then
- return format("concat((select(%s,...)),%q)",n,f)
+ return format("concat(a%s,%q)",n,f)
else
- return format("concat((select(%s,...)))",n)
+ return format("concat(a%s)",n)
end
end
local format_l = function()
n = n + 1
- return format("(select(%s,...) and 'true' or 'false')",n)
+ return format("(a%s and 'true' or 'false')",n)
+end
+
+local format_L = function()
+ n = n + 1
+ return format("(a%s and 'TRUE' or 'FALSE')",n)
+end
+
+local format_N = function() -- strips leading zeros
+ n = n + 1
+ return format("tostring(tonumber(a%s) or a%s)",n,n)
end
local format_a = function(s)
return format("%q",s)
end
-local builder = Ct { "start",
- start = (P("%") * (
- V("s") + V("q")
- + V("i") + V("d")
- + V("f") + V("g") + V("G") + V("e") + V("E")
- + V("x") + V("X") + V("o")
- --
- + V("c")
- --
- + V("r")
- + V("v") + V("V") + V("u") + V("U")
- + V("p") + V("b")
- + V("t")
- + V("l")
- + V("I")
- )
- + V("a")
+local format_w = function(f) -- handy when doing depth related indent
+ n = n + 1
+ f = tonumber(f)
+ if f then
+ return format("spaces[%s+tonumber(a%s)]",f,n)
+ else
+ return format("spaces[tonumber(a%s)]",n)
+ end
+end
+
+local format_W = function(f) -- handy when doing depth related indent
+ return format("spaces[%s]",tonumber(f) or 0)
+end
+
+local extensions = { }
+
+local format_extension = function(name)
+ n = n + 1
+ local extension = extensions[name] or "tostring(%s)"
+ return format(extension,format("a%s",n))
+end
+
+function addextension(name,template,shortcuts)
+ extensions[name] = template
+ if shortcuts then
+ template_shortcuts = shortcuts .. "\n" .. template_shortcuts -- so we can't overload
+ end
+end
+
+lpeg.patterns.xmlescape = Cs((P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;" + P('"')/"&quot;" + P(1))^0)
+lpeg.patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + P(1))^0)
+
+addextension("xml",[[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]])
+addextension("tex",[[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]])
+
+local builder = Cs { "start",
+ start = (
+ (
+ P("%") / ""
+ * (
+ V("!") -- new
+ + V("s") + V("q")
+ + V("i") + V("d")
+ + V("f") + V("g") + V("G") + V("e") + V("E")
+ + V("x") + V("X") + V("o")
+ --
+ + V("c")
+ + V("S") -- new
+ + V("Q") -- new
+ + V("N") -- new
+ --
+ + V("r")
+ + V("h") + V("H") + V("u") + V("U")
+ + V("p") + V("b")
+ + V("t")
+ + V("l") + V("L")
+ + V("I")
+ + V("h") -- new
+ + V("w") -- new
+ + V("W") -- new
+ --
+ + V("a") -- ignores probably messed up %
+ )
+ + V("a")
+ )
+-- * (P(-1) + Cc(".."))
+ * (P(-1) + Carg(1))
)^0,
--
["s"] = (prefix_any * P("s")) / format_s, -- %s => regular %s (string)
@@ -352,61 +453,132 @@ local builder = Ct { "start",
["X"] = (prefix_any * P("X")) / format_X, -- %X => regular %X (HEXADECIMAL)
["o"] = (prefix_any * P("o")) / format_o, -- %o => regular %o (octal)
--
+ ["S"] = (prefix_any * P("S")) / format_S, -- %S => %s (tostring)
+ ["Q"] = (prefix_any * P("Q")) / format_S, -- %Q => %q (tostring)
+ ["N"] = (prefix_any * P("N")) / format_N, -- %N => tonumber (strips leading zeros)
["c"] = (prefix_any * P("c")) / format_c, -- %c => utf character (extension to regular)
--
["r"] = (prefix_any * P("r")) / format_r, -- %r => round
- ["v"] = (prefix_any * P("v")) / format_v, -- %v => 0x0a1b2 (when - no 0x)
- ["V"] = (prefix_any * P("V")) / format_V, -- %V => 0x0A1B2 (when - no 0x)
+ ["h"] = (prefix_any * P("h")) / format_h, -- %h => 0x0a1b2 (when - no 0x) was v
+ ["H"] = (prefix_any * P("H")) / format_H, -- %H => 0x0A1B2 (when - no 0x) was V
["u"] = (prefix_any * P("u")) / format_u, -- %u => u+0a1b2 (when - no u+)
["U"] = (prefix_any * P("U")) / format_U, -- %U => U+0A1B2 (when - no U+)
["p"] = (prefix_any * P("p")) / format_p, -- %p => 12.345pt / maybe: P (and more units)
["b"] = (prefix_any * P("b")) / format_b, -- %b => 12.342bp / maybe: B (and more units)
["t"] = (prefix_tab * P("t")) / format_t, -- %t => concat
["l"] = (prefix_tab * P("l")) / format_l, -- %l => boolean
+ ["L"] = (prefix_tab * P("L")) / format_L, -- %L => BOOLEAN
["I"] = (prefix_any * P("I")) / format_I, -- %I => signed integer
--
- ["a"] = Cs(((1-P("%"))^1 + P("%%")/"%%")^1) / format_a, -- %a => text (including %%)
+ ["w"] = (prefix_any * P("w")) / format_w, -- %w => n spaces (optional prefix is added)
+ ["W"] = (prefix_any * P("W")) / format_W, -- %w => mandate prefix, no specifier
+ --
+ ["a"] = Cs(((1-P("%"))^1 + P("%%")/"%%%%")^1) / format_a, -- rest (including %%)
+ --
+ -- ["!"] = P("!xml!") / format_xml, -- %!xml! => hypertext escaped " < > &
+ ["!"] = P("!") * C((1-P("!"))^1) * P("!") / format_extension,
}
-- we can be clever and only alias what is needed
-local template = [[
-local format = string.format
-local concat = table.concat
-local signed = number.signed
-local points = number.points
-local basepoints = number.basepoints
-local utfchar = utf.char
-local utfbyte = utf.byte
-return function(...)
- return %s
-end
+local direct = Cs (
+ P("%")/""
+ * Cc([[local format = string.format return function(str) return format("%]])
+ * C(S("+- .") + R("09"))^0 * S("sqidfgGeExXo")
+ * Cc([[",str) end]])
+ * P(-1)
+ )
+
+local template = [[
+%s
+return function(%s) return %s end
]]
+local arguments = { "a1" } -- faster than previously used (select(n,...))
+
+setmetatable(arguments, { __index =
+ function(t,k)
+ local v = t[k-1] .. ",a" .. k
+ t[k] = v
+ return v
+ end
+})
+
local function make(t,str)
- n = 0
- local p = lpegmatch(builder,str)
--- inspect(p)
- local c = format(template,concat(p,".."))
--- inspect(c)
- formatter = load(c)()
- t[str] = formatter
- return formatter
+ local f
+ local p = lpegmatch(direct,str)
+ if p then
+ f = loadstripped(p)()
+ else
+ n = 0
+ p = lpegmatch(builder,str,1,"..") -- after this we know n
+ if n > 0 then
+ p = format(template,template_shortcuts,arguments[n],p)
+ -- print("builder>",p)
+ f = loadstripped(p)()
+ else
+ f = function() return str end
+ end
+ end
+ t[str] = f
+ return f
+end
+
+local function use(t,fmt,...)
+ return t[fmt](...)
end
local formatters = string.formatters or { }
string.formatters = formatters
-setmetatableindex(formatters,make)
+setmetatable(formatters, { __index = make, __call = use })
-function string.makeformatter(str)
+-- -- yes or no:
+--
+-- local function make(t,str)
+-- local f
+-- local p = lpegmatch(direct,str)
+-- if p then
+-- f = loadstripped(p)()
+-- else
+-- n = 0
+-- p = lpegmatch(builder,str,1,",") -- after this we know n
+-- if n > 0 then
+-- p = format(template,template_shortcuts,arguments[n],p)
+-- f = loadstripped(p)()
+-- else
+-- f = function() return str end
+-- end
+-- end
+-- t[str] = f
+-- return f
+-- end
+--
+-- local formatteds = string.formatteds or { }
+-- string.formatteds = formatteds
+--
+-- setmetatable(formatteds, { __index = make, __call = use })
+
+--
+
+-- print(formatters["hans %N and %N done"](123,"0123"))
+-- local test = formatters["1%%23%4w56%s78 %p %!xml! test and %!tex! more %s"]
+-- print(#string.dump(test))
+-- print(test(2,123,99999,"abc&def","# and $","okay","!!!"))
+-- local test = formatters["%s"]
+-- print(#string.dump(test))
+-- print(test("okay"))
+
+function string.makeformatter(str) -- redundant
return formatters[str]
end
-function string.formatter(str,...)
+function string.formatter(str,...) -- redundant
return formatters[str](...)
end
+string.addformatter = addextension
+
-- local p1 = "%s test %f done %p and %c and %V or %+t or %%"
-- local p2 = "%s test %f done %s and %s and 0x%05X or %s or %%"
--