summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lualibs-lpeg.lua26
-rw-r--r--lualibs-table.lua4
-rw-r--r--lualibs-url.lua10
-rw-r--r--lualibs-util-str.lua139
4 files changed, 132 insertions, 47 deletions
diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua
index 399b3ad..6d3acd7 100644
--- a/lualibs-lpeg.lua
+++ b/lualibs-lpeg.lua
@@ -6,6 +6,10 @@ if not modules then modules = { } end modules ['l-lpeg'] = {
license = "see context related readme files"
}
+-- lpeg 12 vs lpeg 10: slower compilation, similar parsing speed (i need to check
+-- if i can use new features like capture / 2 and .B (at first sight the xml
+-- parser is some 5% slower)
+
-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)
-- move utf -> l-unicode
@@ -15,14 +19,15 @@ lpeg = require("lpeg")
-- The latest lpeg doesn't have print any more, and even the new ones are not
-- available by default (only when debug mode is enabled), which is a pitty as
--- as it helps bailign down bottlenecks. Performance seems comparable, although
+-- as it helps nailign down bottlenecks. Performance seems comparable: some 10%
+-- slower pattern compilation, same parsing speed, although,
--
-- local p = lpeg.C(lpeg.P(1)^0 * lpeg.P(-1))
--- local a = string.rep("123",10)
+-- local a = string.rep("123",100)
-- lpeg.match(p,a)
--
--- is nearly 20% slower and also still suboptimal (i.e. a match that runs from
--- begin to end, one of the cases where string matchers win).
+-- seems slower and is also still suboptimal (i.e. a match that runs from begin
+-- to end, one of the cases where string matchers win).
if not lpeg.print then function lpeg.print(...) print(lpeg.pcode(...)) end end
@@ -74,7 +79,9 @@ local lpegtype, lpegmatch, lpegprint = lpeg.type, lpeg.match, lpeg.print
-- let's start with an inspector:
-setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end)
+if setinspector then
+ setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end)
+end
-- Beware, we predefine a bunch of patterns here and one reason for doing so
-- is that we get consistent behaviour in some of the visualizers.
@@ -469,7 +476,7 @@ end
-- local pattern1 = P(1-P(pattern))^0 * P(pattern) : test for not nil
-- local pattern2 = (P(pattern) * Cc(true) + P(1))^0 : test for true (could be faster, but not much)
-function lpeg.finder(lst,makefunction) -- beware: slower than find with 'patternless finds'
+function lpeg.finder(lst,makefunction,isutf) -- beware: slower than find with 'patternless finds'
local pattern
if type(lst) == "table" then
pattern = P(false)
@@ -485,7 +492,12 @@ function lpeg.finder(lst,makefunction) -- beware: slower than find with 'pattern
else
pattern = P(lst)
end
- pattern = (1-pattern)^0 * pattern
+ if isutf then
+-- pattern = ((utf8char or 1)-pattern)^0 * pattern
+ pattern = ((utf8char or 1)-pattern)^0 * pattern
+ else
+ pattern = (1-pattern)^0 * pattern
+ end
if makefunction then
return function(str)
return lpegmatch(pattern,str)
diff --git a/lualibs-table.lua b/lualibs-table.lua
index f361f3d..c318c57 100644
--- a/lualibs-table.lua
+++ b/lualibs-table.lua
@@ -1006,7 +1006,9 @@ function table.print(t,...)
end
end
-setinspector(function(v) if type(v) == "table" then serialize(print,v,"table") return true end end)
+if setinspector then
+ setinspector(function(v) if type(v) == "table" then serialize(print,v,"table") return true end end)
+end
-- -- -- obsolete but we keep them for a while and might comment them later -- -- --
diff --git a/lualibs-url.lua b/lualibs-url.lua
index 7b7910f..7bb7312 100644
--- a/lualibs-url.lua
+++ b/lualibs-url.lua
@@ -26,6 +26,8 @@ local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replac
-- | ___________|____________ |
-- / \ / \ |
-- urn:example:animal:ferret:nose interpretable as extension
+--
+-- also nice: http://url.spec.whatwg.org/ (maybe some day ...)
url = url or { }
local url = url
@@ -43,7 +45,7 @@ local hexdigit = R("09","AF","af")
local plus = P("+")
local nothing = Cc("")
local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar
-local escaped = (plus / " ") + escapedchar
+local escaped = (plus / " ") + escapedchar -- so no loc://foo++.tex
local noslash = P("/") / ""
@@ -189,7 +191,11 @@ local function hashed(str) -- not yet ok (/test?test)
return s
end
--- inspect(hashed("template://test"))
+-- inspect(hashed("template:///test"))
+-- inspect(hashed("template:///test++.whatever"))
+-- inspect(hashed("template:///test%2B%2B.whatever"))
+-- inspect(hashed("template:///test%x.whatever"))
+-- inspect(hashed("tem%2Bplate:///test%x.whatever"))
-- Here we assume:
--
diff --git a/lualibs-util-str.lua b/lualibs-util-str.lua
index af8b165..f04f0e5 100644
--- a/lualibs-util-str.lua
+++ b/lualibs-util-str.lua
@@ -20,8 +20,16 @@ local utfchar, utfbyte = utf.char, utf.byte
----- loadstripped = utilities.lua.loadstripped
----- setmetatableindex = table.setmetatableindex
-local loadstripped = _LUAVERSION < 5.2 and load or function(str)
- return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load
+-- local loadstripped = _LUAVERSION < 5.2 and load or function(str)
+-- return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load
+-- end
+
+local loadstripped = function(str,shortcuts)
+ if shortcuts then
+ return load(dump(load(str),true),nil,nil,shortcuts)
+ else
+ return load(dump(load(str),true))
+ end
end
-- todo: make a special namespace for the formatter
@@ -291,33 +299,69 @@ function number.sparseexponent(f,n)
return tostring(n)
end
-local preamble = [[
-local type = type
-local tostring = tostring
-local tonumber = tonumber
-local format = string.format
-local concat = table.concat
-local signed = number.signed
-local points = number.points
-local basepoints = number.basepoints
-local utfchar = utf.char
-local utfbyte = utf.byte
-local lpegmatch = lpeg.match
-local nspaces = string.nspaces
-local tracedchar = string.tracedchar
-local autosingle = string.autosingle
-local autodouble = string.autodouble
-local sequenced = table.sequenced
-local formattednumber = number.formatted
-local sparseexponent = number.sparseexponent
-]]
-
local template = [[
%s
%s
return function(%s) return %s end
]]
+-- local environment = {
+-- lpeg = lpeg,
+-- type = type,
+-- string = string,
+-- number = number,
+-- table = table,
+-- utf = utf,
+-- }
+--
+-- local preamble = [[
+-- local type = type
+-- local tostring = tostring
+-- local tonumber = tonumber
+-- local format = string.format
+-- local concat = table.concat
+-- local signed = number.signed
+-- local points = number.points
+-- local basepoints = number.basepoints
+-- local utfchar = utf.char
+-- local utfbyte = utf.byte
+-- local lpegmatch = lpeg.match
+-- local nspaces = string.nspaces
+-- local tracedchar = string.tracedchar
+-- local autosingle = string.autosingle
+-- local autodouble = string.autodouble
+-- local sequenced = table.sequenced
+-- local formattednumber = number.formatted
+-- local sparseexponent = number.sparseexponent
+-- ]]
+
+local environment = {
+ global = global or _G,
+ lpeg = lpeg,
+ type = type,
+ tostring = tostring,
+ tonumber = tonumber,
+ format = string.format,
+ concat = table.concat,
+ signed = number.signed,
+ points = number.points,
+ basepoints = number.basepoints,
+ utfchar = utf.char,
+ utfbyte = utf.byte,
+ lpegmatch = lpeg.match,
+ nspaces = string.nspaces,
+ tracedchar = string.tracedchar,
+ autosingle = string.autosingle,
+ autodouble = string.autodouble,
+ sequenced = table.sequenced,
+ formattednumber = number.formatted,
+ sparseexponent = number.sparseexponent,
+}
+
+local preamble = ""
+
+-- -- --
+
local arguments = { "a1" } -- faster than previously used (select(n,...))
setmetatable(arguments, { __index =
@@ -740,28 +784,37 @@ local builder = Cs { "start",
-- we can be clever and only alias what is needed
+-- local direct = Cs (
+-- P("%")/""
+-- * Cc([[local format = string.format return function(str) return format("%]])
+-- * (S("+- .") + R("09"))^0
+-- * S("sqidfgGeExXo")
+-- * Cc([[",str) end]])
+-- * P(-1)
+-- )
+
local direct = Cs (
- P("%")/""
- * Cc([[local format = string.format return function(str) return format("%]])
- * (S("+- .") + R("09"))^0
- * S("sqidfgGeExXo")
- * Cc([[",str) end]])
- * P(-1)
- )
+ P("%")
+ * (S("+- .") + R("09"))^0
+ * S("sqidfgGeExXo")
+ * P(-1) / [[local format = string.format return function(str) return format("%0",str) end]]
+)
local function make(t,str)
local f
local p
local p = lpegmatch(direct,str)
if p then
+ -- f = loadstripped(p)()
+ -- print("builder 1 >",p)
f = loadstripped(p)()
else
n = 0
p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n
if n > 0 then
p = format(template,preamble,t._preamble_,arguments[n],p)
--- print("builder>",p)
- f = loadstripped(p)()
+ -- print("builder 2 >",p)
+ f = loadstripped(p,t._environment_)() -- t._environment is not populated (was experiment)
else
f = function() return str end
end
@@ -817,7 +870,11 @@ strings.formatters = { }
-- clear that table when a threshold is reached
function strings.formatters.new()
- local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter" }
+ local e = { } -- better make a copy as we can overload
+ for k, v in next, environment do
+ e[k] = v
+ end
+ local t = { _extensions_ = { }, _preamble_ = "", _environment_ = e, _type_ = "formatter" }
setmetatable(t, { __index = make, __call = use })
return t
end
@@ -838,8 +895,12 @@ string.formatter = function(str,...) return formatters[str](...) end -- someti
local function add(t,name,template,preamble)
if type(t) == "table" and t._type_ == "formatter" then
t._extensions_[name] = template or "%s"
- if preamble then
+ if type(preamble) == "string" then
t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload !
+ elseif type(preamble) == "table" then
+ for k, v in next, preamble do
+ t._environment_[k] = v
+ end
end
end
end
@@ -856,9 +917,13 @@ patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n"
-- escaping by lpeg is faster for strings without quotes, slower on a string with quotes, but
-- faster again when other q-escapables are found (the ones we don't need to escape)
-add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]])
-add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]])
-add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]])
+-- add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]])
+-- add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]])
+-- add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]])
+
+add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],{ xmlescape = lpeg.patterns.xmlescape })
+add(formatters,"tex", [[lpegmatch(texescape,%s)]],{ texescape = lpeg.patterns.texescape })
+add(formatters,"lua", [[lpegmatch(luaescape,%s)]],{ luaescape = lpeg.patterns.luaescape })
-- -- yes or no:
--