From 43ffeea64e686e25807959575d07162bc8bb02b6 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 2 Feb 2014 10:08:08 +0100 Subject: sync with Context as of 2014-02-02 --- lualibs-lpeg.lua | 26 +++++++--- lualibs-table.lua | 4 +- lualibs-url.lua | 10 +++- lualibs-util-str.lua | 139 +++++++++++++++++++++++++++++++++++++-------------- 4 files changed, 132 insertions(+), 47 deletions(-) diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index 399b3ad..6d3acd7 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -6,6 +6,10 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } +-- lpeg 12 vs lpeg 10: slower compilation, similar parsing speed (i need to check +-- if i can use new features like capture / 2 and .B (at first sight the xml +-- parser is some 5% slower) + -- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) -- move utf -> l-unicode @@ -15,14 +19,15 @@ lpeg = require("lpeg") -- The latest lpeg doesn't have print any more, and even the new ones are not -- available by default (only when debug mode is enabled), which is a pitty as --- as it helps bailign down bottlenecks. Performance seems comparable, although +-- as it helps nailign down bottlenecks. Performance seems comparable: some 10% +-- slower pattern compilation, same parsing speed, although, -- -- local p = lpeg.C(lpeg.P(1)^0 * lpeg.P(-1)) --- local a = string.rep("123",10) +-- local a = string.rep("123",100) -- lpeg.match(p,a) -- --- is nearly 20% slower and also still suboptimal (i.e. a match that runs from --- begin to end, one of the cases where string matchers win). +-- seems slower and is also still suboptimal (i.e. a match that runs from begin +-- to end, one of the cases where string matchers win). if not lpeg.print then function lpeg.print(...) print(lpeg.pcode(...)) end end @@ -74,7 +79,9 @@ local lpegtype, lpegmatch, lpegprint = lpeg.type, lpeg.match, lpeg.print -- let's start with an inspector: -setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end) +if setinspector then + setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end) +end -- Beware, we predefine a bunch of patterns here and one reason for doing so -- is that we get consistent behaviour in some of the visualizers. @@ -469,7 +476,7 @@ end -- local pattern1 = P(1-P(pattern))^0 * P(pattern) : test for not nil -- local pattern2 = (P(pattern) * Cc(true) + P(1))^0 : test for true (could be faster, but not much) -function lpeg.finder(lst,makefunction) -- beware: slower than find with 'patternless finds' +function lpeg.finder(lst,makefunction,isutf) -- beware: slower than find with 'patternless finds' local pattern if type(lst) == "table" then pattern = P(false) @@ -485,7 +492,12 @@ function lpeg.finder(lst,makefunction) -- beware: slower than find with 'pattern else pattern = P(lst) end - pattern = (1-pattern)^0 * pattern + if isutf then +-- pattern = ((utf8char or 1)-pattern)^0 * pattern + pattern = ((utf8char or 1)-pattern)^0 * pattern + else + pattern = (1-pattern)^0 * pattern + end if makefunction then return function(str) return lpegmatch(pattern,str) diff --git a/lualibs-table.lua b/lualibs-table.lua index f361f3d..c318c57 100644 --- a/lualibs-table.lua +++ b/lualibs-table.lua @@ -1006,7 +1006,9 @@ function table.print(t,...) end end -setinspector(function(v) if type(v) == "table" then serialize(print,v,"table") return true end end) +if setinspector then + setinspector(function(v) if type(v) == "table" then serialize(print,v,"table") return true end end) +end -- -- -- obsolete but we keep them for a while and might comment them later -- -- -- diff --git a/lualibs-url.lua b/lualibs-url.lua index 7b7910f..7bb7312 100644 --- a/lualibs-url.lua +++ b/lualibs-url.lua @@ -26,6 +26,8 @@ local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replac -- | ___________|____________ | -- / \ / \ | -- urn:example:animal:ferret:nose interpretable as extension +-- +-- also nice: http://url.spec.whatwg.org/ (maybe some day ...) url = url or { } local url = url @@ -43,7 +45,7 @@ local hexdigit = R("09","AF","af") local plus = P("+") local nothing = Cc("") local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar -local escaped = (plus / " ") + escapedchar +local escaped = (plus / " ") + escapedchar -- so no loc://foo++.tex local noslash = P("/") / "" @@ -189,7 +191,11 @@ local function hashed(str) -- not yet ok (/test?test) return s end --- inspect(hashed("template://test")) +-- inspect(hashed("template:///test")) +-- inspect(hashed("template:///test++.whatever")) +-- inspect(hashed("template:///test%2B%2B.whatever")) +-- inspect(hashed("template:///test%x.whatever")) +-- inspect(hashed("tem%2Bplate:///test%x.whatever")) -- Here we assume: -- diff --git a/lualibs-util-str.lua b/lualibs-util-str.lua index af8b165..f04f0e5 100644 --- a/lualibs-util-str.lua +++ b/lualibs-util-str.lua @@ -20,8 +20,16 @@ local utfchar, utfbyte = utf.char, utf.byte ----- loadstripped = utilities.lua.loadstripped ----- setmetatableindex = table.setmetatableindex -local loadstripped = _LUAVERSION < 5.2 and load or function(str) - return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load +-- local loadstripped = _LUAVERSION < 5.2 and load or function(str) +-- return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load +-- end + +local loadstripped = function(str,shortcuts) + if shortcuts then + return load(dump(load(str),true),nil,nil,shortcuts) + else + return load(dump(load(str),true)) + end end -- todo: make a special namespace for the formatter @@ -291,33 +299,69 @@ function number.sparseexponent(f,n) return tostring(n) end -local preamble = [[ -local type = type -local tostring = tostring -local tonumber = tonumber -local format = string.format -local concat = table.concat -local signed = number.signed -local points = number.points -local basepoints = number.basepoints -local utfchar = utf.char -local utfbyte = utf.byte -local lpegmatch = lpeg.match -local nspaces = string.nspaces -local tracedchar = string.tracedchar -local autosingle = string.autosingle -local autodouble = string.autodouble -local sequenced = table.sequenced -local formattednumber = number.formatted -local sparseexponent = number.sparseexponent -]] - local template = [[ %s %s return function(%s) return %s end ]] +-- local environment = { +-- lpeg = lpeg, +-- type = type, +-- string = string, +-- number = number, +-- table = table, +-- utf = utf, +-- } +-- +-- local preamble = [[ +-- local type = type +-- local tostring = tostring +-- local tonumber = tonumber +-- local format = string.format +-- local concat = table.concat +-- local signed = number.signed +-- local points = number.points +-- local basepoints = number.basepoints +-- local utfchar = utf.char +-- local utfbyte = utf.byte +-- local lpegmatch = lpeg.match +-- local nspaces = string.nspaces +-- local tracedchar = string.tracedchar +-- local autosingle = string.autosingle +-- local autodouble = string.autodouble +-- local sequenced = table.sequenced +-- local formattednumber = number.formatted +-- local sparseexponent = number.sparseexponent +-- ]] + +local environment = { + global = global or _G, + lpeg = lpeg, + type = type, + tostring = tostring, + tonumber = tonumber, + format = string.format, + concat = table.concat, + signed = number.signed, + points = number.points, + basepoints = number.basepoints, + utfchar = utf.char, + utfbyte = utf.byte, + lpegmatch = lpeg.match, + nspaces = string.nspaces, + tracedchar = string.tracedchar, + autosingle = string.autosingle, + autodouble = string.autodouble, + sequenced = table.sequenced, + formattednumber = number.formatted, + sparseexponent = number.sparseexponent, +} + +local preamble = "" + +-- -- -- + local arguments = { "a1" } -- faster than previously used (select(n,...)) setmetatable(arguments, { __index = @@ -740,28 +784,37 @@ local builder = Cs { "start", -- we can be clever and only alias what is needed +-- local direct = Cs ( +-- P("%")/"" +-- * Cc([[local format = string.format return function(str) return format("%]]) +-- * (S("+- .") + R("09"))^0 +-- * S("sqidfgGeExXo") +-- * Cc([[",str) end]]) +-- * P(-1) +-- ) + local direct = Cs ( - P("%")/"" - * Cc([[local format = string.format return function(str) return format("%]]) - * (S("+- .") + R("09"))^0 - * S("sqidfgGeExXo") - * Cc([[",str) end]]) - * P(-1) - ) + P("%") + * (S("+- .") + R("09"))^0 + * S("sqidfgGeExXo") + * P(-1) / [[local format = string.format return function(str) return format("%0",str) end]] +) local function make(t,str) local f local p local p = lpegmatch(direct,str) if p then + -- f = loadstripped(p)() + -- print("builder 1 >",p) f = loadstripped(p)() else n = 0 p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n if n > 0 then p = format(template,preamble,t._preamble_,arguments[n],p) --- print("builder>",p) - f = loadstripped(p)() + -- print("builder 2 >",p) + f = loadstripped(p,t._environment_)() -- t._environment is not populated (was experiment) else f = function() return str end end @@ -817,7 +870,11 @@ strings.formatters = { } -- clear that table when a threshold is reached function strings.formatters.new() - local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter" } + local e = { } -- better make a copy as we can overload + for k, v in next, environment do + e[k] = v + end + local t = { _extensions_ = { }, _preamble_ = "", _environment_ = e, _type_ = "formatter" } setmetatable(t, { __index = make, __call = use }) return t end @@ -838,8 +895,12 @@ string.formatter = function(str,...) return formatters[str](...) end -- someti local function add(t,name,template,preamble) if type(t) == "table" and t._type_ == "formatter" then t._extensions_[name] = template or "%s" - if preamble then + if type(preamble) == "string" then t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload ! + elseif type(preamble) == "table" then + for k, v in next, preamble do + t._environment_[k] = v + end end end end @@ -856,9 +917,13 @@ patterns.luaquoted = Cs(Cc('"') * ((1-S('"\n'))^1 + P('"')/'\\"' + P('\n')/'\\n" -- escaping by lpeg is faster for strings without quotes, slower on a string with quotes, but -- faster again when other q-escapables are found (the ones we don't need to escape) -add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]]) -add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]]) -add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]]) +-- add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]]) +-- add(formatters,"tex", [[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]]) +-- add(formatters,"lua", [[lpegmatch(luaescape,%s)]],[[local luaescape = lpeg.patterns.luaescape]]) + +add(formatters,"xml", [[lpegmatch(xmlescape,%s)]],{ xmlescape = lpeg.patterns.xmlescape }) +add(formatters,"tex", [[lpegmatch(texescape,%s)]],{ texescape = lpeg.patterns.texescape }) +add(formatters,"lua", [[lpegmatch(luaescape,%s)]],{ luaescape = lpeg.patterns.luaescape }) -- -- yes or no: -- -- cgit v1.2.3