summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/lxml-lpt.lua
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
committerContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
commit8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/lxml-lpt.lua
parentf5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
downloadcontext-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
2016-01-12 16:26:00
Diffstat (limited to 'tex/context/base/mkiv/lxml-lpt.lua')
-rw-r--r--tex/context/base/mkiv/lxml-lpt.lua1479
1 files changed, 1479 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/lxml-lpt.lua b/tex/context/base/mkiv/lxml-lpt.lua
new file mode 100644
index 000000000..6df89be20
--- /dev/null
+++ b/tex/context/base/mkiv/lxml-lpt.lua
@@ -0,0 +1,1479 @@
+if not modules then modules = { } end modules ['lxml-lpt'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- e.ni is only valid after a filter run
+-- todo: B/C/[get first match]
+
+local concat, remove, insert = table.concat, table.remove, table.insert
+local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select
+local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
+local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
+
+local setmetatableindex = table.setmetatableindex
+local formatters = string.formatters -- no need (yet) as paths are cached anyway
+
+-- beware, this is not xpath ... e.g. position is different (currently) and
+-- we have reverse-sibling as reversed preceding sibling
+
+--[[ldx--
+<p>This module can be used stand alone but also inside <l n='mkiv'/> in
+which case it hooks into the tracker code. Therefore we provide a few
+functions that set the tracers. Here we overload a previously defined
+function.</p>
+<p>If I can get in the mood I will make a variant that is XSLT compliant
+but I wonder if it makes sense.</P>
+--ldx]]--
+
+--[[ldx--
+<p>Expecially the lpath code is experimental, we will support some of xpath, but
+only things that make sense for us; as compensation it is possible to hook in your
+own functions. Apart from preprocessing content for <l n='context'/> we also need
+this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
+files.</p>
+
+<typing>
+a/b/c /*/c
+a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
+a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
+</typing>
+--ldx]]--
+
+local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end
+local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end
+local trace_lprofile = false if trackers then trackers.register("xml.profile", function(v) trace_lpath = v trace_lparse = v trace_lprofile = v end) end
+
+local report_lpath = logs.reporter("xml","lpath")
+
+--[[ldx--
+<p>We've now arrived at an interesting part: accessing the tree using a subset
+of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We
+will explain more about its usage in other documents.</p>
+--ldx]]--
+
+local xml = xml
+
+local lpathcalls = 0 function xml.lpathcalls () return lpathcalls end
+local lpathcached = 0 function xml.lpathcached() return lpathcached end
+
+xml.functions = xml.functions or { } -- internal
+local functions = xml.functions
+
+xml.expressions = xml.expressions or { } -- in expressions
+local expressions = xml.expressions
+
+xml.finalizers = xml.finalizers or { } -- fast do-with ... (with return value other than collection)
+local finalizers = xml.finalizers
+
+xml.specialhandler = xml.specialhandler or { }
+local specialhandler = xml.specialhandler
+
+lpegpatterns.xml = lpegpatterns.xml or { }
+local xmlpatterns = lpegpatterns.xml
+
+finalizers.xml = finalizers.xml or { }
+finalizers.tex = finalizers.tex or { }
+
+local function fallback (t, name)
+ local fn = finalizers[name]
+ if fn then
+ t[name] = fn
+ else
+ report_lpath("unknown sub finalizer %a",name)
+ fn = function() end
+ end
+ return fn
+end
+
+setmetatableindex(finalizers.xml, fallback)
+setmetatableindex(finalizers.tex, fallback)
+
+xml.defaultprotocol = "xml"
+
+-- as xsl does not follow xpath completely here we will also
+-- be more liberal especially with regards to the use of | and
+-- the rootpath:
+--
+-- test : all 'test' under current
+-- /test : 'test' relative to current
+-- a|b|c : set of names
+-- (a|b|c) : idem
+-- ! : not
+--
+-- after all, we're not doing transformations but filtering. in
+-- addition we provide filter functions (last bit)
+--
+-- todo: optimizer
+--
+-- .. : parent
+-- * : all kids
+-- / : anchor here
+-- // : /**/
+-- ** : all in between
+--
+-- so far we had (more practical as we don't transform)
+--
+-- {/test} : kids 'test' under current node
+-- {test} : any kid with tag 'test'
+-- {//test} : same as above
+
+-- evaluator (needs to be redone, for the moment copied)
+
+-- todo: apply_axis(list,notable) and collection vs single
+
+local apply_axis = { }
+
+apply_axis['root'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ local rt = ll
+ while ll do
+ ll = ll.__p__
+ if ll then
+ rt = ll
+ end
+ end
+ collected[l] = rt
+ end
+ return collected
+end
+
+apply_axis['self'] = function(list)
+--~ local collected = { }
+--~ for l=1,#list do
+--~ collected[l] = list[l]
+--~ end
+--~ return collected
+ return list
+end
+
+apply_axis['child'] = function(list)
+ local collected, c = { }, 0
+ for l=1,#list do
+ local ll = list[l]
+ local dt = ll.dt
+ if dt then -- weird that this is needed
+ local en = 0
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ c = c + 1
+ collected[c] = dk
+ dk.ni = k -- refresh
+ en = en + 1
+ dk.ei = en
+ end
+ end
+ ll.en = en
+ end
+ end
+ return collected
+end
+
+local function collect(list,collected,c)
+ local dt = list.dt
+ if dt then
+ local en = 0
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ c = c + 1
+ collected[c] = dk
+ dk.ni = k -- refresh
+ en = en + 1
+ dk.ei = en
+ c = collect(dk,collected,c)
+ end
+ end
+ list.en = en
+ end
+ return c
+end
+
+apply_axis['descendant'] = function(list)
+ local collected, c = { }, 0
+ for l=1,#list do
+ c = collect(list[l],collected,c)
+ end
+ return collected
+end
+
+local function collect(list,collected,c)
+ local dt = list.dt
+ if dt then
+ local en = 0
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ c = c + 1
+ collected[c] = dk
+ dk.ni = k -- refresh
+ en = en + 1
+ dk.ei = en
+ c = collect(dk,collected,c)
+ end
+ end
+ list.en = en
+ end
+ return c
+end
+apply_axis['descendant-or-self'] = function(list)
+ local collected, c = { }, 0
+ for l=1,#list do
+ local ll = list[l]
+ if ll.special ~= true then -- catch double root
+ c = c + 1
+ collected[c] = ll
+ end
+ c = collect(ll,collected,c)
+ end
+ return collected
+end
+
+apply_axis['ancestor'] = function(list)
+ local collected, c = { }, 0
+ for l=1,#list do
+ local ll = list[l]
+ while ll do
+ ll = ll.__p__
+ if ll then
+ c = c + 1
+ collected[c] = ll
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['ancestor-or-self'] = function(list)
+ local collected, c = { }, 0
+ for l=1,#list do
+ local ll = list[l]
+ c = c + 1
+ collected[c] = ll
+ while ll do
+ ll = ll.__p__
+ if ll then
+ c = c + 1
+ collected[c] = ll
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['parent'] = function(list)
+ local collected, c = { }, 0
+ for l=1,#list do
+ local pl = list[l].__p__
+ if pl then
+ c = c + 1
+ collected[c] = pl
+ end
+ end
+ return collected
+end
+
+apply_axis['attribute'] = function(list)
+ return { }
+end
+
+apply_axis['namespace'] = function(list)
+ return { }
+end
+
+apply_axis['following'] = function(list) -- incomplete
+--~ local collected, c = { }, 0
+--~ for l=1,#list do
+--~ local ll = list[l]
+--~ local p = ll.__p__
+--~ local d = p.dt
+--~ for i=ll.ni+1,#d do
+--~ local di = d[i]
+--~ if type(di) == "table" then
+--~ c = c + 1
+--~ collected[c] = di
+--~ break
+--~ end
+--~ end
+--~ end
+--~ return collected
+ return { }
+end
+
+apply_axis['preceding'] = function(list) -- incomplete
+--~ local collected, c = { }, 0
+--~ for l=1,#list do
+--~ local ll = list[l]
+--~ local p = ll.__p__
+--~ local d = p.dt
+--~ for i=ll.ni-1,1,-1 do
+--~ local di = d[i]
+--~ if type(di) == "table" then
+--~ c = c + 1
+--~ collected[c] = di
+--~ break
+--~ end
+--~ end
+--~ end
+--~ return collected
+ return { }
+end
+
+apply_axis['following-sibling'] = function(list)
+ local collected, c = { }, 0
+ for l=1,#list do
+ local ll = list[l]
+ local p = ll.__p__
+ local d = p.dt
+ for i=ll.ni+1,#d do
+ local di = d[i]
+ if type(di) == "table" then
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['preceding-sibling'] = function(list)
+ local collected, c = { }, 0
+ for l=1,#list do
+ local ll = list[l]
+ local p = ll.__p__
+ local d = p.dt
+ for i=1,ll.ni-1 do
+ local di = d[i]
+ if type(di) == "table" then
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['reverse-sibling'] = function(list) -- reverse preceding
+ local collected, c = { }, 0
+ for l=1,#list do
+ local ll = list[l]
+ local p = ll.__p__
+ local d = p.dt
+ for i=ll.ni-1,1,-1 do
+ local di = d[i]
+ if type(di) == "table" then
+ c = c + 1
+ collected[c] = di
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self']
+apply_axis['auto-descendant'] = apply_axis['descendant']
+apply_axis['auto-child'] = apply_axis['child']
+apply_axis['auto-self'] = apply_axis['self']
+apply_axis['initial-child'] = apply_axis['child']
+
+local function apply_nodes(list,directive,nodes)
+ -- todo: nodes[1] etc ... negated node name in set ... when needed
+ -- ... currently ignored
+ local maxn = #nodes
+ if maxn == 3 then --optimized loop
+ local nns, ntg = nodes[2], nodes[3]
+ if not nns and not ntg then -- wildcard
+ if directive then
+ return list
+ else
+ return { }
+ end
+ else
+ local collected, c, m, p = { }, 0, 0, nil
+ if not nns then -- only check tag
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ if directive then
+ if ntg == ltg then
+ local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
+ c = c + 1
+ collected[c], ll.mi = ll, m
+ end
+ elseif ntg ~= ltg then
+ local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
+ c = c + 1
+ collected[c], ll.mi = ll, m
+ end
+ end
+ end
+ elseif not ntg then -- only check namespace
+ for l=1,#list do
+ local ll = list[l]
+ local lns = ll.rn or ll.ns
+ if lns then
+ if directive then
+ if lns == nns then
+ local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
+ c = c + 1
+ collected[c], ll.mi = ll, m
+ end
+ elseif lns ~= nns then
+ local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
+ c = c + 1
+ collected[c], ll.mi = ll, m
+ end
+ end
+ end
+ else -- check both
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ local lns = ll.rn or ll.ns
+ local ok = ltg == ntg and lns == nns
+ if directive then
+ if ok then
+ local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
+ c = c + 1
+ collected[c], ll.mi = ll, m
+ end
+ elseif not ok then
+ local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
+ c = c + 1
+ collected[c], ll.mi = ll, m
+ end
+ end
+ end
+ end
+ return collected
+ end
+ else
+ local collected, c, m, p = { }, 0, 0, nil
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ local lns = ll.rn or ll.ns
+ local ok = false
+ for n=1,maxn,3 do
+ local nns, ntg = nodes[n+1], nodes[n+2]
+ ok = (not ntg or ltg == ntg) and (not nns or lns == nns)
+ if ok then
+ break
+ end
+ end
+ if directive then
+ if ok then
+ local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
+ c = c + 1
+ collected[c], ll.mi = ll, m
+ end
+ elseif not ok then
+ local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
+ c = c + 1
+ collected[c], ll.mi = ll, m
+ end
+ end
+ end
+ return collected
+ end
+end
+
+local quit_expression = false
+
+local function apply_expression(list,expression,order)
+ local collected, c = { }, 0
+ quit_expression = false
+ for l=1,#list do
+ local ll = list[l]
+ if expression(list,ll,l,order) then -- nasty, order alleen valid als n=1
+ c = c + 1
+ collected[c] = ll
+ end
+ if quit_expression then
+ break
+ end
+ end
+ return collected
+end
+
+-- this one can be made faster but there are not that many conversions so it doesn't
+-- really pay of
+
+local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb
+
+local spaces = S(" \n\r\t\f")^0
+local lp_space = S(" \n\r\t\f")
+local lp_any = P(1)
+local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
+local lp_doequal = P("=") / "=="
+local lp_or = P("|") / " or "
+local lp_and = P("&") / " and "
+
+local lp_builtin = P (
+ P("text") / "(ll.dt[1] or '')" + -- fragile
+ P("content") / "ll.dt" +
+ -- P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" +
+ P("name") / "((ll.ns~='' and ll.ns..':'..ll.tg) or ll.tg)" +
+ P("tag") / "ll.tg" +
+ P("position") / "l" + -- is element in finalizer
+ P("firstindex") / "1" +
+ P("lastindex") / "(#ll.__p__.dt or 1)" +
+ P("firstelement") / "1" +
+ P("lastelement") / "(ll.__p__.en or 1)" +
+ P("first") / "1" +
+ P("last") / "#list" +
+ P("rootposition") / "order" +
+ P("order") / "order" +
+ P("element") / "(ll.ei or 1)" +
+ P("index") / "(ll.ni or 1)" +
+ P("match") / "(ll.mi or 1)" +
+ -- P("namespace") / "ll.ns" +
+ P("ns") / "ll.ns"
+ ) * ((spaces * P("(") * spaces * P(")"))/"")
+
+-- for the moment we keep namespaces with attributes
+
+local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])")
+
+----- lp_fastpos_p = (P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end
+----- lp_fastpos_n = (P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end
+
+local lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0"
+local lp_fastpos_n = P("-") * R("09")^1 * P(-1) / "(%0<0 and (#list+%0==l))"
+local lp_fastpos = lp_fastpos_n + lp_fastpos_p
+
+local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false")
+
+-- local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling
+-- return t .. "("
+-- end
+
+-- local lp_lua_function = (R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / "%0("
+local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0"
+
+local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling
+ if expressions[t] then
+ return "expr." .. t .. "("
+ else
+ return "expr.error("
+ end
+end
+
+local lparent = P("(")
+local rparent = P(")")
+local noparent = 1 - (lparent+rparent)
+local nested = P{lparent * (noparent + V(1))^0 * rparent}
+local value = P(lparent * C((noparent + nested)^0) * rparent) -- P{"("*C(((1-S("()"))+V(1))^0)*")"}
+
+local lp_child = Cc("expr.child(ll,'") * R("az","AZ","--","__")^1 * Cc("')")
+local lp_number = S("+-") * R("09")^1
+local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'")
+local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"'))
+
+local cleaner
+
+local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s)
+ if expressions[t] then
+ s = s and s ~= "" and lpegmatch(cleaner,s)
+ if s and s ~= "" then
+ return "expr." .. t .. "(ll," .. s ..")"
+ else
+ return "expr." .. t .. "(ll)"
+ end
+ else
+ return "expr.error(" .. t .. ")"
+ end
+end
+
+local content =
+ lp_builtin +
+ lp_attribute +
+ lp_special +
+ lp_noequal + lp_doequal +
+ lp_or + lp_and +
+ lp_reserved +
+ lp_lua_function + lp_function +
+ lp_content + -- too fragile
+ lp_child +
+ lp_any
+
+local converter = Cs (
+ lp_fastpos + (P { lparent * (V(1))^0 * rparent + content } )^0
+)
+
+cleaner = Cs ( (
+ -- lp_fastpos +
+ lp_reserved +
+ lp_number +
+ lp_string +
+1 )^1 )
+
+local template_e = [[
+ local expr = xml.expressions
+ return function(list,ll,l,order)
+ return %s
+ end
+]]
+
+local template_f_y = [[
+ local finalizer = xml.finalizers['%s']['%s']
+ return function(collection)
+ return finalizer(collection,%s)
+ end
+]]
+
+local template_f_n = [[
+ return xml.finalizers['%s']['%s']
+]]
+
+--
+
+local register_self = { kind = "axis", axis = "self" } -- , apply = apply_axis["self"] }
+local register_parent = { kind = "axis", axis = "parent" } -- , apply = apply_axis["parent"] }
+local register_descendant = { kind = "axis", axis = "descendant" } -- , apply = apply_axis["descendant"] }
+local register_child = { kind = "axis", axis = "child" } -- , apply = apply_axis["child"] }
+local register_descendant_or_self = { kind = "axis", axis = "descendant-or-self" } -- , apply = apply_axis["descendant-or-self"] }
+local register_root = { kind = "axis", axis = "root" } -- , apply = apply_axis["root"] }
+local register_ancestor = { kind = "axis", axis = "ancestor" } -- , apply = apply_axis["ancestor"] }
+local register_ancestor_or_self = { kind = "axis", axis = "ancestor-or-self" } -- , apply = apply_axis["ancestor-or-self"] }
+local register_attribute = { kind = "axis", axis = "attribute" } -- , apply = apply_axis["attribute"] }
+local register_namespace = { kind = "axis", axis = "namespace" } -- , apply = apply_axis["namespace"] }
+local register_following = { kind = "axis", axis = "following" } -- , apply = apply_axis["following"] }
+local register_following_sibling = { kind = "axis", axis = "following-sibling" } -- , apply = apply_axis["following-sibling"] }
+local register_preceding = { kind = "axis", axis = "preceding" } -- , apply = apply_axis["preceding"] }
+local register_preceding_sibling = { kind = "axis", axis = "preceding-sibling" } -- , apply = apply_axis["preceding-sibling"] }
+local register_reverse_sibling = { kind = "axis", axis = "reverse-sibling" } -- , apply = apply_axis["reverse-sibling"] }
+
+local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" } -- , apply = apply_axis["auto-descendant-or-self"] }
+local register_auto_descendant = { kind = "axis", axis = "auto-descendant" } -- , apply = apply_axis["auto-descendant"] }
+local register_auto_self = { kind = "axis", axis = "auto-self" } -- , apply = apply_axis["auto-self"] }
+local register_auto_child = { kind = "axis", axis = "auto-child" } -- , apply = apply_axis["auto-child"] }
+
+local register_initial_child = { kind = "axis", axis = "initial-child" } -- , apply = apply_axis["initial-child"] }
+
+local register_all_nodes = { kind = "nodes", nodetest = true, nodes = { true, false, false } }
+
+local skip = { }
+
+local function errorrunner_e(str,cnv)
+ if not skip[str] then
+ report_lpath("error in expression: %s => %s",str,cnv)
+ skip[str] = cnv or str
+ end
+ return false
+end
+
+local function errorrunner_f(str,arg)
+ report_lpath("error in finalizer: %s(%s)",str,arg or "")
+ return false
+end
+
+local function register_nodes(nodetest,nodes)
+ return { kind = "nodes", nodetest = nodetest, nodes = nodes }
+end
+
+local function register_expression(expression)
+ local converted = lpegmatch(converter,expression)
+ local runner = load(format(template_e,converted))
+ runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
+ return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
+end
+
+local function register_finalizer(protocol,name,arguments)
+ local runner
+ if arguments and arguments ~= "" then
+ runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
+ else
+ runner = load(format(template_f_n,protocol or xml.defaultprotocol,name))
+ end
+ runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
+ return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
+end
+
+local expression = P { "ex",
+ ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]",
+ sq = "'" * (1 - S("'"))^0 * "'",
+ dq = '"' * (1 - S('"'))^0 * '"',
+}
+
+local arguments = P { "ar",
+ ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")",
+ nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end,
+ sq = P("'") * (1 - P("'"))^0 * P("'"),
+ dq = P('"') * (1 - P('"'))^0 * P('"'),
+}
+
+-- todo: better arg parser
+
+local function register_error(str)
+ return { kind = "error", error = format("unparsed: %s",str) }
+end
+
+-- there is a difference in * and /*/ and so we need to catch a few special cases
+
+local special_1 = P("*") * Cc(register_auto_descendant) * Cc(register_all_nodes) -- last one not needed
+local special_2 = P("/") * Cc(register_auto_self)
+local special_3 = P("") * Cc(register_auto_self)
+
+local no_nextcolon = P(-1) + #(1-P(":")) -- newer lpeg needs the P(-1)
+local no_nextlparent = P(-1) + #(1-P("(")) -- newer lpeg needs the P(-1)
+
+local pathparser = Ct { "patterns", -- can be made a bit faster by moving some patterns outside
+
+ patterns = spaces * V("protocol") * spaces * (
+ ( V("special") * spaces * P(-1) ) +
+ ( V("initial") * spaces * V("step") * spaces * (P("/") * spaces * V("step") * spaces)^0 )
+ ),
+
+ protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"),
+
+ -- the / is needed for // as descendant or self is somewhat special
+ -- step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
+ step = ((V("shortcuts") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
+
+ axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") +
+ V("descendant_or_self") + V("following_sibling") + V("following") +
+ V("reverse_sibling") + V("preceding_sibling") + V("preceding") + V("ancestor_or_self") +
+ #(1-P(-1)) * Cc(register_auto_child),
+
+ special = special_1 + special_2 + special_3,
+
+ initial = (P("/") * spaces * Cc(register_initial_child))^-1,
+
+ error = (P(1)^1) / register_error,
+
+ shortcuts_a = V("s_descendant_or_self") + V("s_descendant") + V("s_child") + V("s_parent") + V("s_self") + V("s_root") + V("s_ancestor"),
+
+ shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0,
+
+ s_descendant_or_self = (P("***/") + P("/")) * Cc(register_descendant_or_self), --- *** is a bonus
+ s_descendant = P("**") * Cc(register_descendant),
+ s_child = P("*") * no_nextcolon * Cc(register_child ),
+ s_parent = P("..") * Cc(register_parent ),
+ s_self = P("." ) * Cc(register_self ),
+ s_root = P("^^") * Cc(register_root ),
+ s_ancestor = P("^") * Cc(register_ancestor ),
+
+ descendant = P("descendant::") * Cc(register_descendant ),
+ child = P("child::") * Cc(register_child ),
+ parent = P("parent::") * Cc(register_parent ),
+ self = P("self::") * Cc(register_self ),
+ root = P('root::') * Cc(register_root ),
+ ancestor = P('ancestor::') * Cc(register_ancestor ),
+ descendant_or_self = P('descendant-or-self::') * Cc(register_descendant_or_self ),
+ ancestor_or_self = P('ancestor-or-self::') * Cc(register_ancestor_or_self ),
+ -- attribute = P('attribute::') * Cc(register_attribute ),
+ -- namespace = P('namespace::') * Cc(register_namespace ),
+ following = P('following::') * Cc(register_following ),
+ following_sibling = P('following-sibling::') * Cc(register_following_sibling ),
+ preceding = P('preceding::') * Cc(register_preceding ),
+ preceding_sibling = P('preceding-sibling::') * Cc(register_preceding_sibling ),
+ reverse_sibling = P('reverse-sibling::') * Cc(register_reverse_sibling ),
+
+ nodes = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes,
+
+ expressions = expression / register_expression,
+
+ letters = R("az")^1,
+ name = (1-S("/[]()|:*!"))^1, -- make inline
+ negate = P("!") * Cc(false),
+
+ nodefunction = V("negate") + P("not") * Cc(false) + Cc(true),
+ nodetest = V("negate") + Cc(true),
+ nodename = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))),
+ wildnodename = (C(V("name")) + P("*") * Cc(false)) * no_nextlparent,
+ nodeset = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces,
+
+ finalizer = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer,
+
+}
+
+xmlpatterns.pathparser = pathparser
+
+local cache = { }
+
+local function nodesettostring(set,nodetest)
+ local t = { }
+ for i=1,#set,3 do
+ local directive, ns, tg = set[i], set[i+1], set[i+2]
+ if not ns or ns == "" then ns = "*" end
+ if not tg or tg == "" then tg = "*" end
+ tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
+ t[#t+1] = (directive and tg) or format("not(%s)",tg)
+ end
+ if nodetest == false then
+ return format("not(%s)",concat(t,"|"))
+ else
+ return concat(t,"|")
+ end
+end
+
+local function tagstostring(list)
+ if #list == 0 then
+ return "no elements"
+ else
+ local t = { }
+ for i=1, #list do
+ local li = list[i]
+ local ns, tg = li.ns, li.tg
+ if not ns or ns == "" then ns = "*" end
+ if not tg or tg == "" then tg = "*" end
+ t[i] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
+ end
+ return concat(t," ")
+ end
+end
+
+xml.nodesettostring = nodesettostring
+
+local lpath -- we have a harmless kind of circular reference
+
+local function lshow(parsed)
+ if type(parsed) == "string" then
+ parsed = lpath(parsed)
+ end
+ report_lpath("%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern,
+ table.serialize(parsed,false))
+end
+
+xml.lshow = lshow
+
+local function add_comment(p,str)
+ local pc = p.comment
+ if not pc then
+ p.comment = { str }
+ else
+ pc[#pc+1] = str
+ end
+end
+
+lpath = function (pattern) -- the gain of caching is rather minimal
+ lpathcalls = lpathcalls + 1
+ if type(pattern) == "table" then
+ return pattern
+ else
+ local parsed = cache[pattern]
+ if parsed then
+ lpathcached = lpathcached + 1
+ else
+ parsed = lpegmatch(pathparser,pattern)
+ if parsed then
+ parsed.pattern = pattern
+ local np = #parsed
+ if np == 0 then
+ parsed = { pattern = pattern, register_self, state = "parsing error" }
+ report_lpath("parsing error in pattern: %s",pattern)
+ lshow(parsed)
+ else
+ -- we could have done this with a more complex parser but this
+ -- is cleaner
+ local pi = parsed[1]
+ if pi.axis == "auto-child" then
+ if false then
+ add_comment(parsed, "auto-child replaced by auto-descendant-or-self")
+ parsed[1] = register_auto_descendant_or_self
+ else
+ add_comment(parsed, "auto-child replaced by auto-descendant")
+ parsed[1] = register_auto_descendant
+ end
+ elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then
+ add_comment(parsed, "initial-child removed") -- we could also make it a auto-self
+ remove(parsed,1)
+ end
+ local np = #parsed -- can have changed
+ if np > 1 then
+ local pnp = parsed[np]
+ if pnp.kind == "nodes" and pnp.nodetest == true then
+ local nodes = pnp.nodes
+ if nodes[1] == true and nodes[2] == false and nodes[3] == false then
+ add_comment(parsed, "redundant final wildcard filter removed")
+ remove(parsed,np)
+ end
+ end
+ end
+ end
+ else
+ parsed = { pattern = pattern }
+ end
+ cache[pattern] = parsed
+ if trace_lparse and not trace_lprofile then
+ lshow(parsed)
+ end
+ end
+ return parsed
+ end
+end
+
+xml.lpath = lpath
+
+-- we can move all calls inline and then merge the trace back
+-- technically we can combine axis and the next nodes which is
+-- what we did before but this a bit cleaner (but slower too)
+-- but interesting is that it's not that much faster when we
+-- go inline
+--
+-- beware: we need to return a collection even when we filter
+-- else the (simple) cache gets messed up
+
+-- caching found lookups saves not that much (max .1 sec on a 8 sec run)
+-- and it also messes up finalizers
+
+-- watch out: when there is a finalizer, it's always called as there
+-- can be cases that a finalizer returns (or does) something in case
+-- there is no match; an example of this is count()
+
+local profiled = { } xml.profiled = profiled
+
+local function profiled_apply(list,parsed,nofparsed,order)
+ local p = profiled[parsed.pattern]
+ if p then
+ p.tested = p.tested + 1
+ else
+ p = { tested = 1, matched = 0, finalized = 0 }
+ profiled[parsed.pattern] = p
+ end
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,order)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected) -- no check on # here
+ p.matched = p.matched + 1
+ p.finalized = p.finalized + 1
+ return collected
+ end
+ if not collected or #collected == 0 then
+ local pn = i < nofparsed and parsed[nofparsed]
+ if pn and pn.kind == "finalizer" then
+ collected = pn.finalizer(collected)
+ p.finalized = p.finalized + 1
+ return collected
+ end
+ return nil
+ end
+ end
+ if collected then
+ p.matched = p.matched + 1
+ end
+ return collected
+end
+
+local function traced_apply(list,parsed,nofparsed,order)
+ if trace_lparse then
+ lshow(parsed)
+ end
+ report_lpath("collecting: %s",parsed.pattern)
+ report_lpath("root tags : %s",tagstostring(list))
+ report_lpath("order : %s",order or "unset")
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ report_lpath("% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ report_lpath("% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,order)
+ report_lpath("% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
+ return collected
+ end
+ if not collected or #collected == 0 then
+ local pn = i < nofparsed and parsed[nofparsed]
+ if pn and pn.kind == "finalizer" then
+ collected = pn.finalizer(collected)
+ report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pn.name,pn.arguments or "")
+ return collected
+ end
+ return nil
+ end
+ end
+ return collected
+end
+
+local function normal_apply(list,parsed,nofparsed,order)
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ local axis = pi.axis
+ if axis ~= "self" then
+ collected = apply_axis[axis](collected)
+ end
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,order)
+ elseif kind == "finalizer" then
+ return pi.finalizer(collected)
+ end
+ if not collected or #collected == 0 then
+ local pf = i < nofparsed and parsed[nofparsed].finalizer
+ if pf then
+ return pf(collected) -- can be anything
+ end
+ return nil
+ end
+ end
+ return collected
+end
+
+local function applylpath(list,pattern)
+ if not list then
+ return
+ end
+ local parsed = cache[pattern]
+ if parsed then
+ lpathcalls = lpathcalls + 1
+ lpathcached = lpathcached + 1
+ elseif type(pattern) == "table" then
+ lpathcalls = lpathcalls + 1
+ parsed = pattern
+ else
+ parsed = lpath(pattern) or pattern
+ end
+ if not parsed then
+ return
+ end
+ local nofparsed = #parsed
+ if nofparsed == 0 then
+ return -- something is wrong
+ end
+ if not trace_lpath then
+ return normal_apply ({ list },parsed,nofparsed,list.mi)
+ elseif trace_lprofile then
+ return profiled_apply({ list },parsed,nofparsed,list.mi)
+ else
+ return traced_apply ({ list },parsed,nofparsed,list.mi)
+ end
+end
+
+xml.applylpath = applylpath -- takes a table as first argment, which is what xml.filter will do
+
+--[[ldx--
+<p>This is the main filter function. It returns whatever is asked for.</p>
+--ldx]]--
+
+function xml.filter(root,pattern) -- no longer funny attribute handling here
+ return applylpath(root,pattern)
+end
+
+-- internal (parsed)
+
+expressions.child = function(e,pattern)
+ return applylpath(e,pattern) -- todo: cache
+end
+
+expressions.count = function(e,pattern) -- what if pattern == empty or nil
+ local collected = applylpath(e,pattern) -- todo: cache
+ return pattern and (collected and #collected) or 0
+end
+
+-- external
+
+-- expressions.oneof = function(s,...)
+-- local t = {...}
+-- for i=1,#t do
+-- if s == t[i] then
+-- return true
+-- end
+-- end
+-- return false
+-- end
+
+expressions.oneof = function(s,...)
+ for i=1,select("#",...) do
+ if s == select(i,...) then
+ return true
+ end
+ end
+ return false
+end
+
+expressions.error = function(str)
+ xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?")))
+ return false
+end
+
+expressions.undefined = function(s)
+ return s == nil
+end
+
+expressions.quit = function(s)
+ if s or s == nil then
+ quit_expression = true
+ end
+ return true
+end
+
+expressions.print = function(...)
+ print(...)
+ return true
+end
+
+expressions.find = find
+expressions.upper = upper
+expressions.lower = lower
+expressions.number = tonumber
+expressions.boolean = toboolean
+
+function expressions.contains(str,pattern)
+ local t = type(str)
+ if t == "string" then
+ if find(str,pattern) then
+ return true
+ end
+ elseif t == "table" then
+ for i=1,#str do
+ local d = str[i]
+ if type(d) == "string" and find(d,pattern) then
+ return true
+ end
+ end
+ end
+ return false
+end
+
+function xml.expressions.idstring(str)
+ return type(str) == "string" and gsub(str,"^#","") or ""
+end
+
+-- user interface
+
+local function traverse(root,pattern,handle)
+ -- report_lpath("use 'xml.selection' instead for pattern: %s",pattern)
+ local collected = applylpath(root,pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ handle(r,r.dt,e.ni)
+ end
+ end
+end
+
+local function selection(root,pattern,handle)
+ local collected = applylpath(root,pattern)
+ if collected then
+ if handle then
+ for c=1,#collected do
+ handle(collected[c])
+ end
+ else
+ return collected
+ end
+ end
+end
+
+xml.traverse = traverse -- old method, r, d, k
+xml.selection = selection -- new method, simple handle
+
+--~ function xml.cachedpatterns()
+--~ return cache
+--~ end
+
+-- generic function finalizer (independant namespace)
+
+local function dofunction(collected,fnc,...)
+ if collected then
+ local f = functions[fnc]
+ if f then
+ for c=1,#collected do
+ f(collected[c],...)
+ end
+ else
+ report_lpath("unknown function %a",fnc)
+ end
+ end
+end
+
+finalizers.xml["function"] = dofunction
+finalizers.tex["function"] = dofunction
+
+-- functions
+
+expressions.text = function(e,n)
+ local rdt = e.__p__.dt
+ return rdt and rdt[n] or ""
+end
+
+expressions.name = function(e,n) -- ns + tg
+ local found = false
+ n = tonumber(n) or 0
+ if n == 0 then
+ found = type(e) == "table" and e
+ elseif n < 0 then
+ local d, k = e.__p__.dt, e.ni
+ for i=k-1,1,-1 do
+ local di = d[i]
+ if type(di) == "table" then
+ if n == -1 then
+ found = di
+ break
+ else
+ n = n + 1
+ end
+ end
+ end
+ else
+ local d, k = e.__p__.dt, e.ni
+ for i=k+1,#d,1 do
+ local di = d[i]
+ if type(di) == "table" then
+ if n == 1 then
+ found = di
+ break
+ else
+ n = n - 1
+ end
+ end
+ end
+ end
+ if found then
+ local ns, tg = found.rn or found.ns or "", found.tg
+ if ns ~= "" then
+ return ns .. ":" .. tg
+ else
+ return tg
+ end
+ else
+ return ""
+ end
+end
+
+expressions.tag = function(e,n) -- only tg
+ if not e then
+ return ""
+ else
+ local found = false
+ n = tonumber(n) or 0
+ if n == 0 then
+ found = (type(e) == "table") and e -- seems to fail
+ elseif n < 0 then
+ local d, k = e.__p__.dt, e.ni
+ for i=k-1,1,-1 do
+ local di = d[i]
+ if type(di) == "table" then
+ if n == -1 then
+ found = di
+ break
+ else
+ n = n + 1
+ end
+ end
+ end
+ else
+ local d, k = e.__p__.dt, e.ni
+ for i=k+1,#d,1 do
+ local di = d[i]
+ if type(di) == "table" then
+ if n == 1 then
+ found = di
+ break
+ else
+ n = n - 1
+ end
+ end
+ end
+ end
+ return (found and found.tg) or ""
+ end
+end
+
+--[[ldx--
+<p>Often using an iterators looks nicer in the code than passing handler
+functions. The <l n='lua'/> book describes how to use coroutines for that
+purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
+code like:</p>
+
+<typing>
+for r, d, k in xml.elements(xml.load('text.xml'),"title") do
+ print(d[k]) -- old method
+end
+for e in xml.collected(xml.load('text.xml'),"title") do
+ print(e) -- new one
+end
+</typing>
+--ldx]]--
+
+-- local wrap, yield = coroutine.wrap, coroutine.yield
+-- local dummy = function() end
+--
+-- function xml.elements(root,pattern,reverse) -- r, d, k
+-- local collected = applylpath(root,pattern)
+-- if collected then
+-- if reverse then
+-- return wrap(function() for c=#collected,1,-1 do
+-- local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
+-- end end)
+-- else
+-- return wrap(function() for c=1,#collected do
+-- local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
+-- end end)
+-- end
+-- end
+-- return wrap(dummy)
+-- end
+--
+-- function xml.collected(root,pattern,reverse) -- e
+-- local collected = applylpath(root,pattern)
+-- if collected then
+-- if reverse then
+-- return wrap(function() for c=#collected,1,-1 do yield(collected[c]) end end)
+-- else
+-- return wrap(function() for c=1,#collected do yield(collected[c]) end end)
+-- end
+-- end
+-- return wrap(dummy)
+-- end
+
+-- faster:
+
+local dummy = function() end
+
+function xml.elements(root,pattern,reverse) -- r, d, k
+ local collected = applylpath(root,pattern)
+ if not collected then
+ return dummy
+ end
+ local n = #collected
+ if n == 0 then
+ return dummy
+ end
+ if reverse then
+ local c = n + 1
+ return function()
+ if c > 1 then
+ c = c - 1
+ local e = collected[c]
+ local r = e.__p__
+ return r, r.dt, e.ni
+ end
+ end
+ else
+ local c = 0
+ return function()
+ if c < n then
+ c = c + 1
+ local e = collected[c]
+ local r = e.__p__
+ return r, r.dt, e.ni
+ end
+ end
+ end
+end
+
+function xml.collected(root,pattern,reverse) -- e
+ local collected = applylpath(root,pattern)
+ if not collected then
+ return dummy
+ end
+ local n = #collected
+ if n == 0 then
+ return dummy
+ end
+ if reverse then
+ local c = n + 1
+ return function()
+ if c > 1 then
+ c = c - 1
+ return collected[c]
+ end
+ end
+ else
+ local c = 0
+ return function()
+ if c < n then
+ c = c + 1
+ return collected[c]
+ end
+ end
+ end
+end
+
+-- handy
+
+function xml.inspect(collection,pattern)
+ pattern = pattern or "."
+ for e in xml.collected(collection,pattern or ".") do
+ report_lpath("pattern: %s\n\n%s\n",pattern,xml.tostring(e))
+ end
+end
+
+-- texy (see xfdf):
+
+local function split(e) -- todo: use helpers / lpeg
+ local dt = e.dt
+ if dt then
+ for i=1,#dt do
+ local dti = dt[i]
+ if type(dti) == "string" then
+ dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1")
+ dti = gsub(dti,"[\n\r]+","\n\n")
+ dt[i] = dti
+ else
+ split(dti)
+ end
+ end
+ end
+ return e
+end
+
+function xml.finalizers.paragraphs(c)
+ for i=1,#c do
+ split(c[i])
+ end
+ return c
+end
+
+-- local lpegmatch = lpeg.match
+-- local w = lpeg.patterns.whitespace
+-- local p = w^0 * lpeg.Cf(lpeg.Ct("") * lpeg.Cg(lpeg.C((1-w)^1) * lpeg.Cc(true) * w^0)^1,rawset)
+
+-- function xml.functions.classes(e) -- cache
+-- local class = e.at.class
+-- if class then
+-- return lpegmatch(p,class)
+-- else
+-- return { }
+-- end
+-- end
+
+-- local gmatch = string.gmatch
+
+-- function xml.functions.hasclass(e,c,class)
+-- class = class and e.at[class] or e.at.class
+-- if class and class ~= "" then
+-- if class == c then
+-- return true
+-- else
+-- for s in gmatch(class,"%S+") do
+-- if s == c then
+-- return true
+-- end
+-- end
+-- end
+-- end
+-- return false
+-- end