diff options
Diffstat (limited to 'tex/context/base/lxml-lpt.lua')
-rw-r--r-- | tex/context/base/lxml-lpt.lua | 2932 |
1 files changed, 1466 insertions, 1466 deletions
diff --git a/tex/context/base/lxml-lpt.lua b/tex/context/base/lxml-lpt.lua index 2f57ced5b..51ab321b9 100644 --- a/tex/context/base/lxml-lpt.lua +++ b/tex/context/base/lxml-lpt.lua @@ -1,1466 +1,1466 @@ -if not modules then modules = { } end modules ['lxml-lpt'] = {
- version = 1.001,
- comment = "this module is the basis for the lxml-* ones",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
--- e.ni is only valid after a filter run
--- todo: B/C/[get first match]
-
-local concat, remove, insert = table.concat, table.remove, table.insert
-local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select
-local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
-local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
-
-local setmetatableindex = table.setmetatableindex
-local formatters = string.formatters -- no need (yet) as paths are cached anyway
-
--- beware, this is not xpath ... e.g. position is different (currently) and
--- we have reverse-sibling as reversed preceding sibling
-
---[[ldx--
-<p>This module can be used stand alone but also inside <l n='mkiv'/> in
-which case it hooks into the tracker code. Therefore we provide a few
-functions that set the tracers. Here we overload a previously defined
-function.</p>
-<p>If I can get in the mood I will make a variant that is XSLT compliant
-but I wonder if it makes sense.</P>
---ldx]]--
-
---[[ldx--
-<p>Expecially the lpath code is experimental, we will support some of xpath, but
-only things that make sense for us; as compensation it is possible to hook in your
-own functions. Apart from preprocessing content for <l n='context'/> we also need
-this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
-files.</p>
-
-<typing>
-a/b/c /*/c
-a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
-a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
-</typing>
---ldx]]--
-
-local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end
-local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end
-local trace_lprofile = false if trackers then trackers.register("xml.profile", function(v) trace_lpath = v trace_lparse = v trace_lprofile = v end) end
-
-local report_lpath = logs.reporter("xml","lpath")
-
---[[ldx--
-<p>We've now arrived at an interesting part: accessing the tree using a subset
-of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We
-will explain more about its usage in other documents.</p>
---ldx]]--
-
-local xml = xml
-
-local lpathcalls = 0 function xml.lpathcalls () return lpathcalls end
-local lpathcached = 0 function xml.lpathcached() return lpathcached end
-
-xml.functions = xml.functions or { } -- internal
-local functions = xml.functions
-
-xml.expressions = xml.expressions or { } -- in expressions
-local expressions = xml.expressions
-
-xml.finalizers = xml.finalizers or { } -- fast do-with ... (with return value other than collection)
-local finalizers = xml.finalizers
-
-xml.specialhandler = xml.specialhandler or { }
-local specialhandler = xml.specialhandler
-
-lpegpatterns.xml = lpegpatterns.xml or { }
-local xmlpatterns = lpegpatterns.xml
-
-finalizers.xml = finalizers.xml or { }
-finalizers.tex = finalizers.tex or { }
-
-local function fallback (t, name)
- local fn = finalizers[name]
- if fn then
- t[name] = fn
- else
- report_lpath("unknown sub finalizer %a",name)
- fn = function() end
- end
- return fn
-end
-
-setmetatableindex(finalizers.xml, fallback)
-setmetatableindex(finalizers.tex, fallback)
-
-xml.defaultprotocol = "xml"
-
--- as xsl does not follow xpath completely here we will also
--- be more liberal especially with regards to the use of | and
--- the rootpath:
---
--- test : all 'test' under current
--- /test : 'test' relative to current
--- a|b|c : set of names
--- (a|b|c) : idem
--- ! : not
---
--- after all, we're not doing transformations but filtering. in
--- addition we provide filter functions (last bit)
---
--- todo: optimizer
---
--- .. : parent
--- * : all kids
--- / : anchor here
--- // : /**/
--- ** : all in between
---
--- so far we had (more practical as we don't transform)
---
--- {/test} : kids 'test' under current node
--- {test} : any kid with tag 'test'
--- {//test} : same as above
-
--- evaluator (needs to be redone, for the moment copied)
-
--- todo: apply_axis(list,notable) and collection vs single
-
-local apply_axis = { }
-
-apply_axis['root'] = function(list)
- local collected = { }
- for l=1,#list do
- local ll = list[l]
- local rt = ll
- while ll do
- ll = ll.__p__
- if ll then
- rt = ll
- end
- end
- collected[l] = rt
- end
- return collected
-end
-
-apply_axis['self'] = function(list)
---~ local collected = { }
---~ for l=1,#list do
---~ collected[l] = list[l]
---~ end
---~ return collected
- return list
-end
-
-apply_axis['child'] = function(list)
- local collected, c = { }, 0
- for l=1,#list do
- local ll = list[l]
- local dt = ll.dt
- if dt then -- weird that this is needed
- local en = 0
- for k=1,#dt do
- local dk = dt[k]
- if dk.tg then
- c = c + 1
- collected[c] = dk
- dk.ni = k -- refresh
- en = en + 1
- dk.ei = en
- end
- end
- ll.en = en
- end
- end
- return collected
-end
-
-local function collect(list,collected,c)
- local dt = list.dt
- if dt then
- local en = 0
- for k=1,#dt do
- local dk = dt[k]
- if dk.tg then
- c = c + 1
- collected[c] = dk
- dk.ni = k -- refresh
- en = en + 1
- dk.ei = en
- c = collect(dk,collected,c)
- end
- end
- list.en = en
- end
- return c
-end
-
-apply_axis['descendant'] = function(list)
- local collected, c = { }, 0
- for l=1,#list do
- c = collect(list[l],collected,c)
- end
- return collected
-end
-
-local function collect(list,collected,c)
- local dt = list.dt
- if dt then
- local en = 0
- for k=1,#dt do
- local dk = dt[k]
- if dk.tg then
- c = c + 1
- collected[c] = dk
- dk.ni = k -- refresh
- en = en + 1
- dk.ei = en
- c = collect(dk,collected,c)
- end
- end
- list.en = en
- end
- return c
-end
-apply_axis['descendant-or-self'] = function(list)
- local collected, c = { }, 0
- for l=1,#list do
- local ll = list[l]
- if ll.special ~= true then -- catch double root
- c = c + 1
- collected[c] = ll
- end
- c = collect(ll,collected,c)
- end
- return collected
-end
-
-apply_axis['ancestor'] = function(list)
- local collected, c = { }, 0
- for l=1,#list do
- local ll = list[l]
- while ll do
- ll = ll.__p__
- if ll then
- c = c + 1
- collected[c] = ll
- end
- end
- end
- return collected
-end
-
-apply_axis['ancestor-or-self'] = function(list)
- local collected, c = { }, 0
- for l=1,#list do
- local ll = list[l]
- c = c + 1
- collected[c] = ll
- while ll do
- ll = ll.__p__
- if ll then
- c = c + 1
- collected[c] = ll
- end
- end
- end
- return collected
-end
-
-apply_axis['parent'] = function(list)
- local collected, c = { }, 0
- for l=1,#list do
- local pl = list[l].__p__
- if pl then
- c = c + 1
- collected[c] = pl
- end
- end
- return collected
-end
-
-apply_axis['attribute'] = function(list)
- return { }
-end
-
-apply_axis['namespace'] = function(list)
- return { }
-end
-
-apply_axis['following'] = function(list) -- incomplete
---~ local collected, c = { }, 0
---~ for l=1,#list do
---~ local ll = list[l]
---~ local p = ll.__p__
---~ local d = p.dt
---~ for i=ll.ni+1,#d do
---~ local di = d[i]
---~ if type(di) == "table" then
---~ c = c + 1
---~ collected[c] = di
---~ break
---~ end
---~ end
---~ end
---~ return collected
- return { }
-end
-
-apply_axis['preceding'] = function(list) -- incomplete
---~ local collected, c = { }, 0
---~ for l=1,#list do
---~ local ll = list[l]
---~ local p = ll.__p__
---~ local d = p.dt
---~ for i=ll.ni-1,1,-1 do
---~ local di = d[i]
---~ if type(di) == "table" then
---~ c = c + 1
---~ collected[c] = di
---~ break
---~ end
---~ end
---~ end
---~ return collected
- return { }
-end
-
-apply_axis['following-sibling'] = function(list)
- local collected, c = { }, 0
- for l=1,#list do
- local ll = list[l]
- local p = ll.__p__
- local d = p.dt
- for i=ll.ni+1,#d do
- local di = d[i]
- if type(di) == "table" then
- c = c + 1
- collected[c] = di
- end
- end
- end
- return collected
-end
-
-apply_axis['preceding-sibling'] = function(list)
- local collected, c = { }, 0
- for l=1,#list do
- local ll = list[l]
- local p = ll.__p__
- local d = p.dt
- for i=1,ll.ni-1 do
- local di = d[i]
- if type(di) == "table" then
- c = c + 1
- collected[c] = di
- end
- end
- end
- return collected
-end
-
-apply_axis['reverse-sibling'] = function(list) -- reverse preceding
- local collected, c = { }, 0
- for l=1,#list do
- local ll = list[l]
- local p = ll.__p__
- local d = p.dt
- for i=ll.ni-1,1,-1 do
- local di = d[i]
- if type(di) == "table" then
- c = c + 1
- collected[c] = di
- end
- end
- end
- return collected
-end
-
-apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self']
-apply_axis['auto-descendant'] = apply_axis['descendant']
-apply_axis['auto-child'] = apply_axis['child']
-apply_axis['auto-self'] = apply_axis['self']
-apply_axis['initial-child'] = apply_axis['child']
-
-local function apply_nodes(list,directive,nodes)
- -- todo: nodes[1] etc ... negated node name in set ... when needed
- -- ... currently ignored
- local maxn = #nodes
- if maxn == 3 then --optimized loop
- local nns, ntg = nodes[2], nodes[3]
- if not nns and not ntg then -- wildcard
- if directive then
- return list
- else
- return { }
- end
- else
- local collected, c, m, p = { }, 0, 0, nil
- if not nns then -- only check tag
- for l=1,#list do
- local ll = list[l]
- local ltg = ll.tg
- if ltg then
- if directive then
- if ntg == ltg then
- local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
- c = c + 1
- collected[c], ll.mi = ll, m
- end
- elseif ntg ~= ltg then
- local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
- c = c + 1
- collected[c], ll.mi = ll, m
- end
- end
- end
- elseif not ntg then -- only check namespace
- for l=1,#list do
- local ll = list[l]
- local lns = ll.rn or ll.ns
- if lns then
- if directive then
- if lns == nns then
- local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
- c = c + 1
- collected[c], ll.mi = ll, m
- end
- elseif lns ~= nns then
- local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
- c = c + 1
- collected[c], ll.mi = ll, m
- end
- end
- end
- else -- check both
- for l=1,#list do
- local ll = list[l]
- local ltg = ll.tg
- if ltg then
- local lns = ll.rn or ll.ns
- local ok = ltg == ntg and lns == nns
- if directive then
- if ok then
- local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
- c = c + 1
- collected[c], ll.mi = ll, m
- end
- elseif not ok then
- local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
- c = c + 1
- collected[c], ll.mi = ll, m
- end
- end
- end
- end
- return collected
- end
- else
- local collected, c, m, p = { }, 0, 0, nil
- for l=1,#list do
- local ll = list[l]
- local ltg = ll.tg
- if ltg then
- local lns = ll.rn or ll.ns
- local ok = false
- for n=1,maxn,3 do
- local nns, ntg = nodes[n+1], nodes[n+2]
- ok = (not ntg or ltg == ntg) and (not nns or lns == nns)
- if ok then
- break
- end
- end
- if directive then
- if ok then
- local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
- c = c + 1
- collected[c], ll.mi = ll, m
- end
- elseif not ok then
- local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end
- c = c + 1
- collected[c], ll.mi = ll, m
- end
- end
- end
- return collected
- end
-end
-
-local quit_expression = false
-
-local function apply_expression(list,expression,order)
- local collected, c = { }, 0
- quit_expression = false
- for l=1,#list do
- local ll = list[l]
- if expression(list,ll,l,order) then -- nasty, order alleen valid als n=1
- c = c + 1
- collected[c] = ll
- end
- if quit_expression then
- break
- end
- end
- return collected
-end
-
-local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb
-
-local spaces = S(" \n\r\t\f")^0
-local lp_space = S(" \n\r\t\f")
-local lp_any = P(1)
-local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
-local lp_doequal = P("=") / "=="
-local lp_or = P("|") / " or "
-local lp_and = P("&") / " and "
-
-local lp_builtin = P (
- P("text") / "(ll.dt[1] or '')" + -- fragile
- P("content") / "ll.dt" +
- -- P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" +
- P("name") / "((ll.ns~='' and ll.ns..':'..ll.tg) or ll.tg)" +
- P("tag") / "ll.tg" +
- P("position") / "l" + -- is element in finalizer
- P("firstindex") / "1" +
- P("lastindex") / "(#ll.__p__.dt or 1)" +
- P("firstelement") / "1" +
- P("lastelement") / "(ll.__p__.en or 1)" +
- P("first") / "1" +
- P("last") / "#list" +
- P("rootposition") / "order" +
- P("order") / "order" +
- P("element") / "(ll.ei or 1)" +
- P("index") / "(ll.ni or 1)" +
- P("match") / "(ll.mi or 1)" +
- -- P("namespace") / "ll.ns" +
- P("ns") / "ll.ns"
- ) * ((spaces * P("(") * spaces * P(")"))/"")
-
--- for the moment we keep namespaces with attributes
-
-local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])")
-
--- lp_fastpos_p = (P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end
--- lp_fastpos_n = (P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end
-
-lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0"
-lp_fastpos_n = P("-") * R("09")^1 * P(-1) / "(%0<0 and (#list+%0==l))"
-
-local lp_fastpos = lp_fastpos_n + lp_fastpos_p
-
-local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false")
-
--- local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling
--- return t .. "("
--- end
-
--- local lp_lua_function = (R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / "%0("
-local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0"
-
-local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling
- if expressions[t] then
- return "expr." .. t .. "("
- else
- return "expr.error("
- end
-end
-
-local lparent = P("(")
-local rparent = P(")")
-local noparent = 1 - (lparent+rparent)
-local nested = P{lparent * (noparent + V(1))^0 * rparent}
-local value = P(lparent * C((noparent + nested)^0) * rparent) -- P{"("*C(((1-S("()"))+V(1))^0)*")"}
-
-local lp_child = Cc("expr.child(ll,'") * R("az","AZ","--","__")^1 * Cc("')")
-local lp_number = S("+-") * R("09")^1
-local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'")
-local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"'))
-
-local cleaner
-
-local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s)
- if expressions[t] then
- s = s and s ~= "" and lpegmatch(cleaner,s)
- if s and s ~= "" then
- return "expr." .. t .. "(ll," .. s ..")"
- else
- return "expr." .. t .. "(ll)"
- end
- else
- return "expr.error(" .. t .. ")"
- end
-end
-
-local content =
- lp_builtin +
- lp_attribute +
- lp_special +
- lp_noequal + lp_doequal +
- lp_or + lp_and +
- lp_reserved +
- lp_lua_function + lp_function +
- lp_content + -- too fragile
- lp_child +
- lp_any
-
-local converter = Cs (
- lp_fastpos + (P { lparent * (V(1))^0 * rparent + content } )^0
-)
-
-cleaner = Cs ( (
- -- lp_fastpos +
- lp_reserved +
- lp_number +
- lp_string +
-1 )^1 )
-
-local template_e = [[
- local expr = xml.expressions
- return function(list,ll,l,order)
- return %s
- end
-]]
-
-local template_f_y = [[
- local finalizer = xml.finalizers['%s']['%s']
- return function(collection)
- return finalizer(collection,%s)
- end
-]]
-
-local template_f_n = [[
- return xml.finalizers['%s']['%s']
-]]
-
---
-
-local register_self = { kind = "axis", axis = "self" } -- , apply = apply_axis["self"] }
-local register_parent = { kind = "axis", axis = "parent" } -- , apply = apply_axis["parent"] }
-local register_descendant = { kind = "axis", axis = "descendant" } -- , apply = apply_axis["descendant"] }
-local register_child = { kind = "axis", axis = "child" } -- , apply = apply_axis["child"] }
-local register_descendant_or_self = { kind = "axis", axis = "descendant-or-self" } -- , apply = apply_axis["descendant-or-self"] }
-local register_root = { kind = "axis", axis = "root" } -- , apply = apply_axis["root"] }
-local register_ancestor = { kind = "axis", axis = "ancestor" } -- , apply = apply_axis["ancestor"] }
-local register_ancestor_or_self = { kind = "axis", axis = "ancestor-or-self" } -- , apply = apply_axis["ancestor-or-self"] }
-local register_attribute = { kind = "axis", axis = "attribute" } -- , apply = apply_axis["attribute"] }
-local register_namespace = { kind = "axis", axis = "namespace" } -- , apply = apply_axis["namespace"] }
-local register_following = { kind = "axis", axis = "following" } -- , apply = apply_axis["following"] }
-local register_following_sibling = { kind = "axis", axis = "following-sibling" } -- , apply = apply_axis["following-sibling"] }
-local register_preceding = { kind = "axis", axis = "preceding" } -- , apply = apply_axis["preceding"] }
-local register_preceding_sibling = { kind = "axis", axis = "preceding-sibling" } -- , apply = apply_axis["preceding-sibling"] }
-local register_reverse_sibling = { kind = "axis", axis = "reverse-sibling" } -- , apply = apply_axis["reverse-sibling"] }
-
-local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" } -- , apply = apply_axis["auto-descendant-or-self"] }
-local register_auto_descendant = { kind = "axis", axis = "auto-descendant" } -- , apply = apply_axis["auto-descendant"] }
-local register_auto_self = { kind = "axis", axis = "auto-self" } -- , apply = apply_axis["auto-self"] }
-local register_auto_child = { kind = "axis", axis = "auto-child" } -- , apply = apply_axis["auto-child"] }
-
-local register_initial_child = { kind = "axis", axis = "initial-child" } -- , apply = apply_axis["initial-child"] }
-
-local register_all_nodes = { kind = "nodes", nodetest = true, nodes = { true, false, false } }
-
-local skip = { }
-
-local function errorrunner_e(str,cnv)
- if not skip[str] then
- report_lpath("error in expression: %s => %s",str,cnv)
- skip[str] = cnv or str
- end
- return false
-end
-
-local function errorrunner_f(str,arg)
- report_lpath("error in finalizer: %s(%s)",str,arg or "")
- return false
-end
-
-local function register_nodes(nodetest,nodes)
- return { kind = "nodes", nodetest = nodetest, nodes = nodes }
-end
-
-local function register_expression(expression)
- local converted = lpegmatch(converter,expression)
- local runner = load(format(template_e,converted))
- runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
- return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
-end
-
-local function register_finalizer(protocol,name,arguments)
- local runner
- if arguments and arguments ~= "" then
- runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
- else
- runner = load(format(template_f_n,protocol or xml.defaultprotocol,name))
- end
- runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
- return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
-end
-
-local expression = P { "ex",
- ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]",
- sq = "'" * (1 - S("'"))^0 * "'",
- dq = '"' * (1 - S('"'))^0 * '"',
-}
-
-local arguments = P { "ar",
- ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")",
- nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end,
- sq = P("'") * (1 - P("'"))^0 * P("'"),
- dq = P('"') * (1 - P('"'))^0 * P('"'),
-}
-
--- todo: better arg parser
-
-local function register_error(str)
- return { kind = "error", error = format("unparsed: %s",str) }
-end
-
--- there is a difference in * and /*/ and so we need to catch a few special cases
-
-local special_1 = P("*") * Cc(register_auto_descendant) * Cc(register_all_nodes) -- last one not needed
-local special_2 = P("/") * Cc(register_auto_self)
-local special_3 = P("") * Cc(register_auto_self)
-
-local no_nextcolon = P(-1) + #(1-P(":")) -- newer lpeg needs the P(-1)
-local no_nextlparent = P(-1) + #(1-P("(")) -- newer lpeg needs the P(-1)
-
-local pathparser = Ct { "patterns", -- can be made a bit faster by moving some patterns outside
-
- patterns = spaces * V("protocol") * spaces * (
- ( V("special") * spaces * P(-1) ) +
- ( V("initial") * spaces * V("step") * spaces * (P("/") * spaces * V("step") * spaces)^0 )
- ),
-
- protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"),
-
- -- the / is needed for // as descendant or self is somewhat special
- -- step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
- step = ((V("shortcuts") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
-
- axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") +
- V("descendant_or_self") + V("following_sibling") + V("following") +
- V("reverse_sibling") + V("preceding_sibling") + V("preceding") + V("ancestor_or_self") +
- #(1-P(-1)) * Cc(register_auto_child),
-
- special = special_1 + special_2 + special_3,
-
- initial = (P("/") * spaces * Cc(register_initial_child))^-1,
-
- error = (P(1)^1) / register_error,
-
- shortcuts_a = V("s_descendant_or_self") + V("s_descendant") + V("s_child") + V("s_parent") + V("s_self") + V("s_root") + V("s_ancestor"),
-
- shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0,
-
- s_descendant_or_self = (P("***/") + P("/")) * Cc(register_descendant_or_self), --- *** is a bonus
- s_descendant = P("**") * Cc(register_descendant),
- s_child = P("*") * no_nextcolon * Cc(register_child ),
- s_parent = P("..") * Cc(register_parent ),
- s_self = P("." ) * Cc(register_self ),
- s_root = P("^^") * Cc(register_root ),
- s_ancestor = P("^") * Cc(register_ancestor ),
-
- descendant = P("descendant::") * Cc(register_descendant ),
- child = P("child::") * Cc(register_child ),
- parent = P("parent::") * Cc(register_parent ),
- self = P("self::") * Cc(register_self ),
- root = P('root::') * Cc(register_root ),
- ancestor = P('ancestor::') * Cc(register_ancestor ),
- descendant_or_self = P('descendant-or-self::') * Cc(register_descendant_or_self ),
- ancestor_or_self = P('ancestor-or-self::') * Cc(register_ancestor_or_self ),
- -- attribute = P('attribute::') * Cc(register_attribute ),
- -- namespace = P('namespace::') * Cc(register_namespace ),
- following = P('following::') * Cc(register_following ),
- following_sibling = P('following-sibling::') * Cc(register_following_sibling ),
- preceding = P('preceding::') * Cc(register_preceding ),
- preceding_sibling = P('preceding-sibling::') * Cc(register_preceding_sibling ),
- reverse_sibling = P('reverse-sibling::') * Cc(register_reverse_sibling ),
-
- nodes = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes,
-
- expressions = expression / register_expression,
-
- letters = R("az")^1,
- name = (1-S("/[]()|:*!"))^1, -- make inline
- negate = P("!") * Cc(false),
-
- nodefunction = V("negate") + P("not") * Cc(false) + Cc(true),
- nodetest = V("negate") + Cc(true),
- nodename = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))),
- wildnodename = (C(V("name")) + P("*") * Cc(false)) * no_nextlparent,
- nodeset = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces,
-
- finalizer = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer,
-
-}
-
-xmlpatterns.pathparser = pathparser
-
-local cache = { }
-
-local function nodesettostring(set,nodetest)
- local t = { }
- for i=1,#set,3 do
- local directive, ns, tg = set[i], set[i+1], set[i+2]
- if not ns or ns == "" then ns = "*" end
- if not tg or tg == "" then tg = "*" end
- tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
- t[i] = (directive and tg) or format("not(%s)",tg)
- end
- if nodetest == false then
- return format("not(%s)",concat(t,"|"))
- else
- return concat(t,"|")
- end
-end
-
-local function tagstostring(list)
- if #list == 0 then
- return "no elements"
- else
- local t = { }
- for i=1, #list do
- local li = list[i]
- local ns, tg = li.ns, li.tg
- if not ns or ns == "" then ns = "*" end
- if not tg or tg == "" then tg = "*" end
- t[i] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
- end
- return concat(t," ")
- end
-end
-
-xml.nodesettostring = nodesettostring
-
-local lpath -- we have a harmless kind of circular reference
-
-local lshowoptions = { functions = false }
-
-local function lshow(parsed)
- if type(parsed) == "string" then
- parsed = lpath(parsed)
- end
- report_lpath("%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern,
- table.serialize(parsed,false,lshowoptions))
-end
-
-xml.lshow = lshow
-
-local function add_comment(p,str)
- local pc = p.comment
- if not pc then
- p.comment = { str }
- else
- pc[#pc+1] = str
- end
-end
-
-lpath = function (pattern) -- the gain of caching is rather minimal
- lpathcalls = lpathcalls + 1
- if type(pattern) == "table" then
- return pattern
- else
- local parsed = cache[pattern]
- if parsed then
- lpathcached = lpathcached + 1
- else
- parsed = lpegmatch(pathparser,pattern)
- if parsed then
- parsed.pattern = pattern
- local np = #parsed
- if np == 0 then
- parsed = { pattern = pattern, register_self, state = "parsing error" }
- report_lpath("parsing error in pattern: %s",pattern)
- lshow(parsed)
- else
- -- we could have done this with a more complex parser but this
- -- is cleaner
- local pi = parsed[1]
- if pi.axis == "auto-child" then
- if false then
- add_comment(parsed, "auto-child replaced by auto-descendant-or-self")
- parsed[1] = register_auto_descendant_or_self
- else
- add_comment(parsed, "auto-child replaced by auto-descendant")
- parsed[1] = register_auto_descendant
- end
- elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then
- add_comment(parsed, "initial-child removed") -- we could also make it a auto-self
- remove(parsed,1)
- end
- local np = #parsed -- can have changed
- if np > 1 then
- local pnp = parsed[np]
- if pnp.kind == "nodes" and pnp.nodetest == true then
- local nodes = pnp.nodes
- if nodes[1] == true and nodes[2] == false and nodes[3] == false then
- add_comment(parsed, "redundant final wildcard filter removed")
- remove(parsed,np)
- end
- end
- end
- end
- else
- parsed = { pattern = pattern }
- end
- cache[pattern] = parsed
- if trace_lparse and not trace_lprofile then
- lshow(parsed)
- end
- end
- return parsed
- end
-end
-
-xml.lpath = lpath
-
--- we can move all calls inline and then merge the trace back
--- technically we can combine axis and the next nodes which is
--- what we did before but this a bit cleaner (but slower too)
--- but interesting is that it's not that much faster when we
--- go inline
---
--- beware: we need to return a collection even when we filter
--- else the (simple) cache gets messed up
-
--- caching found lookups saves not that much (max .1 sec on a 8 sec run)
--- and it also messes up finalizers
-
--- watch out: when there is a finalizer, it's always called as there
--- can be cases that a finalizer returns (or does) something in case
--- there is no match; an example of this is count()
-
-local profiled = { } xml.profiled = profiled
-
-local function profiled_apply(list,parsed,nofparsed,order)
- local p = profiled[parsed.pattern]
- if p then
- p.tested = p.tested + 1
- else
- p = { tested = 1, matched = 0, finalized = 0 }
- profiled[parsed.pattern] = p
- end
- local collected = list
- for i=1,nofparsed do
- local pi = parsed[i]
- local kind = pi.kind
- if kind == "axis" then
- collected = apply_axis[pi.axis](collected)
- elseif kind == "nodes" then
- collected = apply_nodes(collected,pi.nodetest,pi.nodes)
- elseif kind == "expression" then
- collected = apply_expression(collected,pi.evaluator,order)
- elseif kind == "finalizer" then
- collected = pi.finalizer(collected) -- no check on # here
- p.matched = p.matched + 1
- p.finalized = p.finalized + 1
- return collected
- end
- if not collected or #collected == 0 then
- local pn = i < nofparsed and parsed[nofparsed]
- if pn and pn.kind == "finalizer" then
- collected = pn.finalizer(collected)
- p.finalized = p.finalized + 1
- return collected
- end
- return nil
- end
- end
- if collected then
- p.matched = p.matched + 1
- end
- return collected
-end
-
-local function traced_apply(list,parsed,nofparsed,order)
- if trace_lparse then
- lshow(parsed)
- end
- report_lpath("collecting: %s",parsed.pattern)
- report_lpath("root tags : %s",tagstostring(list))
- report_lpath("order : %s",order or "unset")
- local collected = list
- for i=1,nofparsed do
- local pi = parsed[i]
- local kind = pi.kind
- if kind == "axis" then
- collected = apply_axis[pi.axis](collected)
- report_lpath("% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
- elseif kind == "nodes" then
- collected = apply_nodes(collected,pi.nodetest,pi.nodes)
- report_lpath("% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
- elseif kind == "expression" then
- collected = apply_expression(collected,pi.evaluator,order)
- report_lpath("% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted)
- elseif kind == "finalizer" then
- collected = pi.finalizer(collected)
- report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
- return collected
- end
- if not collected or #collected == 0 then
- local pn = i < nofparsed and parsed[nofparsed]
- if pn and pn.kind == "finalizer" then
- collected = pn.finalizer(collected)
- report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pn.name,pn.arguments or "")
- return collected
- end
- return nil
- end
- end
- return collected
-end
-
-local function normal_apply(list,parsed,nofparsed,order)
- local collected = list
- for i=1,nofparsed do
- local pi = parsed[i]
- local kind = pi.kind
- if kind == "axis" then
- local axis = pi.axis
- if axis ~= "self" then
- collected = apply_axis[axis](collected)
- end
- elseif kind == "nodes" then
- collected = apply_nodes(collected,pi.nodetest,pi.nodes)
- elseif kind == "expression" then
- collected = apply_expression(collected,pi.evaluator,order)
- elseif kind == "finalizer" then
- return pi.finalizer(collected)
- end
- if not collected or #collected == 0 then
- local pf = i < nofparsed and parsed[nofparsed].finalizer
- if pf then
- return pf(collected) -- can be anything
- end
- return nil
- end
- end
- return collected
-end
-
---~ local function applylpath(list,pattern)
---~ -- we avoid an extra call
---~ local parsed = cache[pattern]
---~ if parsed then
---~ lpathcalls = lpathcalls + 1
---~ lpathcached = lpathcached + 1
---~ elseif type(pattern) == "table" then
---~ lpathcalls = lpathcalls + 1
---~ parsed = pattern
---~ else
---~ parsed = lpath(pattern) or pattern
---~ end
---~ if not parsed then
---~ return
---~ end
---~ local nofparsed = #parsed
---~ if nofparsed == 0 then
---~ return -- something is wrong
---~ end
---~ local one = list[1] -- we could have a third argument: isroot and list or list[1] or whatever we like ... todo
---~ if not one then
---~ return -- something is wrong
---~ elseif not trace_lpath then
---~ return normal_apply(list,parsed,nofparsed,one.mi)
---~ elseif trace_lprofile then
---~ return profiled_apply(list,parsed,nofparsed,one.mi)
---~ else
---~ return traced_apply(list,parsed,nofparsed,one.mi)
---~ end
---~ end
-
-local function applylpath(list,pattern)
- if not list then
- return
- end
- local parsed = cache[pattern]
- if parsed then
- lpathcalls = lpathcalls + 1
- lpathcached = lpathcached + 1
- elseif type(pattern) == "table" then
- lpathcalls = lpathcalls + 1
- parsed = pattern
- else
- parsed = lpath(pattern) or pattern
- end
- if not parsed then
- return
- end
- local nofparsed = #parsed
- if nofparsed == 0 then
- return -- something is wrong
- end
- if not trace_lpath then
- return normal_apply ({ list },parsed,nofparsed,list.mi)
- elseif trace_lprofile then
- return profiled_apply({ list },parsed,nofparsed,list.mi)
- else
- return traced_apply ({ list },parsed,nofparsed,list.mi)
- end
-end
-
-xml.applylpath = applylpath -- takes a table as first argment, which is what xml.filter will do
-
---[[ldx--
-<p>This is the main filter function. It returns whatever is asked for.</p>
---ldx]]--
-
-function xml.filter(root,pattern) -- no longer funny attribute handling here
- return applylpath(root,pattern)
-end
-
--- internal (parsed)
-
-expressions.child = function(e,pattern)
- return applylpath(e,pattern) -- todo: cache
-end
-
-expressions.count = function(e,pattern) -- what if pattern == empty or nil
- local collected = applylpath(e,pattern) -- todo: cache
- return pattern and (collected and #collected) or 0
-end
-
--- external
-
--- expressions.oneof = function(s,...)
--- local t = {...}
--- for i=1,#t do
--- if s == t[i] then
--- return true
--- end
--- end
--- return false
--- end
-
-expressions.oneof = function(s,...)
- for i=1,select("#",...) do
- if s == select(i,...) then
- return true
- end
- end
- return false
-end
-
-expressions.error = function(str)
- xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?")))
- return false
-end
-
-expressions.undefined = function(s)
- return s == nil
-end
-
-expressions.quit = function(s)
- if s or s == nil then
- quit_expression = true
- end
- return true
-end
-
-expressions.print = function(...)
- print(...)
- return true
-end
-
-expressions.contains = find
-expressions.find = find
-expressions.upper = upper
-expressions.lower = lower
-expressions.number = tonumber
-expressions.boolean = toboolean
-
-function expressions.contains(str,pattern)
- local t = type(str)
- if t == "string" then
- if find(str,pattern) then
- return true
- end
- elseif t == "table" then
- for i=1,#str do
- local d = str[i]
- if type(d) == "string" and find(d,pattern) then
- return true
- end
- end
- end
- return false
-end
-
--- user interface
-
-local function traverse(root,pattern,handle)
- -- report_lpath("use 'xml.selection' instead for pattern: %s",pattern)
- local collected = applylpath(root,pattern)
- if collected then
- for c=1,#collected do
- local e = collected[c]
- local r = e.__p__
- handle(r,r.dt,e.ni)
- end
- end
-end
-
-local function selection(root,pattern,handle)
- local collected = applylpath(root,pattern)
- if collected then
- if handle then
- for c=1,#collected do
- handle(collected[c])
- end
- else
- return collected
- end
- end
-end
-
-xml.traverse = traverse -- old method, r, d, k
-xml.selection = selection -- new method, simple handle
-
---~ function xml.cachedpatterns()
---~ return cache
---~ end
-
--- generic function finalizer (independant namespace)
-
-local function dofunction(collected,fnc,...)
- if collected then
- local f = functions[fnc]
- if f then
- for c=1,#collected do
- f(collected[c],...)
- end
- else
- report_lpath("unknown function %a",fnc)
- end
- end
-end
-
-finalizers.xml["function"] = dofunction
-finalizers.tex["function"] = dofunction
-
--- functions
-
-expressions.text = function(e,n)
- local rdt = e.__p__.dt
- return rdt and rdt[n] or ""
-end
-
-expressions.name = function(e,n) -- ns + tg
- local found = false
- n = tonumber(n) or 0
- if n == 0 then
- found = type(e) == "table" and e
- elseif n < 0 then
- local d, k = e.__p__.dt, e.ni
- for i=k-1,1,-1 do
- local di = d[i]
- if type(di) == "table" then
- if n == -1 then
- found = di
- break
- else
- n = n + 1
- end
- end
- end
- else
- local d, k = e.__p__.dt, e.ni
- for i=k+1,#d,1 do
- local di = d[i]
- if type(di) == "table" then
- if n == 1 then
- found = di
- break
- else
- n = n - 1
- end
- end
- end
- end
- if found then
- local ns, tg = found.rn or found.ns or "", found.tg
- if ns ~= "" then
- return ns .. ":" .. tg
- else
- return tg
- end
- else
- return ""
- end
-end
-
-expressions.tag = function(e,n) -- only tg
- if not e then
- return ""
- else
- local found = false
- n = tonumber(n) or 0
- if n == 0 then
- found = (type(e) == "table") and e -- seems to fail
- elseif n < 0 then
- local d, k = e.__p__.dt, e.ni
- for i=k-1,1,-1 do
- local di = d[i]
- if type(di) == "table" then
- if n == -1 then
- found = di
- break
- else
- n = n + 1
- end
- end
- end
- else
- local d, k = e.__p__.dt, e.ni
- for i=k+1,#d,1 do
- local di = d[i]
- if type(di) == "table" then
- if n == 1 then
- found = di
- break
- else
- n = n - 1
- end
- end
- end
- end
- return (found and found.tg) or ""
- end
-end
-
---[[ldx--
-<p>Often using an iterators looks nicer in the code than passing handler
-functions. The <l n='lua'/> book describes how to use coroutines for that
-purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
-code like:</p>
-
-<typing>
-for r, d, k in xml.elements(xml.load('text.xml'),"title") do
- print(d[k]) -- old method
-end
-for e in xml.collected(xml.load('text.xml'),"title") do
- print(e) -- new one
-end
-</typing>
---ldx]]--
-
--- local wrap, yield = coroutine.wrap, coroutine.yield
--- local dummy = function() end
---
--- function xml.elements(root,pattern,reverse) -- r, d, k
--- local collected = applylpath(root,pattern)
--- if collected then
--- if reverse then
--- return wrap(function() for c=#collected,1,-1 do
--- local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
--- end end)
--- else
--- return wrap(function() for c=1,#collected do
--- local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
--- end end)
--- end
--- end
--- return wrap(dummy)
--- end
---
--- function xml.collected(root,pattern,reverse) -- e
--- local collected = applylpath(root,pattern)
--- if collected then
--- if reverse then
--- return wrap(function() for c=#collected,1,-1 do yield(collected[c]) end end)
--- else
--- return wrap(function() for c=1,#collected do yield(collected[c]) end end)
--- end
--- end
--- return wrap(dummy)
--- end
-
--- faster:
-
-local dummy = function() end
-
-function xml.elements(root,pattern,reverse) -- r, d, k
- local collected = applylpath(root,pattern)
- if not collected then
- return dummy
- elseif reverse then
- local c = #collected + 1
- return function()
- if c > 1 then
- c = c - 1
- local e = collected[c]
- local r = e.__p__
- return r, r.dt, e.ni
- end
- end
- else
- local n, c = #collected, 0
- return function()
- if c < n then
- c = c + 1
- local e = collected[c]
- local r = e.__p__
- return r, r.dt, e.ni
- end
- end
- end
-end
-
-function xml.collected(root,pattern,reverse) -- e
- local collected = applylpath(root,pattern)
- if not collected then
- return dummy
- elseif reverse then
- local c = #collected + 1
- return function()
- if c > 1 then
- c = c - 1
- return collected[c]
- end
- end
- else
- local n, c = #collected, 0
- return function()
- if c < n then
- c = c + 1
- return collected[c]
- end
- end
- end
-end
-
--- handy
-
-function xml.inspect(collection,pattern)
- pattern = pattern or "."
- for e in xml.collected(collection,pattern or ".") do
- report_lpath("pattern: %s\n\n%s\n",pattern,xml.tostring(e))
- end
-end
-
--- texy (see xfdf):
-
-local function split(e)
- local dt = e.dt
- if dt then
- for i=1,#dt do
- local dti = dt[i]
- if type(dti) == "string" then
- dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1")
- dti = gsub(dti,"[\n\r]+","\n\n")
- dt[i] = dti
- else
- split(dti)
- end
- end
- end
- return e
-end
-
-function xml.finalizers.paragraphs(c)
- for i=1,#c do
- split(c[i])
- end
- return c
-end
+if not modules then modules = { } end modules ['lxml-lpt'] = { + version = 1.001, + comment = "this module is the basis for the lxml-* ones", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- e.ni is only valid after a filter run +-- todo: B/C/[get first match] + +local concat, remove, insert = table.concat, table.remove, table.insert +local type, next, tonumber, tostring, setmetatable, load, select = type, next, tonumber, tostring, setmetatable, load, select +local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep +local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns + +local setmetatableindex = table.setmetatableindex +local formatters = string.formatters -- no need (yet) as paths are cached anyway + +-- beware, this is not xpath ... e.g. position is different (currently) and +-- we have reverse-sibling as reversed preceding sibling + +--[[ldx-- +<p>This module can be used stand alone but also inside <l n='mkiv'/> in +which case it hooks into the tracker code. Therefore we provide a few +functions that set the tracers. Here we overload a previously defined +function.</p> +<p>If I can get in the mood I will make a variant that is XSLT compliant +but I wonder if it makes sense.</P> +--ldx]]-- + +--[[ldx-- +<p>Expecially the lpath code is experimental, we will support some of xpath, but +only things that make sense for us; as compensation it is possible to hook in your +own functions. Apart from preprocessing content for <l n='context'/> we also need +this module for process management, like handling <l n='ctx'/> and <l n='rlx'/> +files.</p> + +<typing> +a/b/c /*/c +a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n) +a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n) +</typing> +--ldx]]-- + +local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end +local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end +local trace_lprofile = false if trackers then trackers.register("xml.profile", function(v) trace_lpath = v trace_lparse = v trace_lprofile = v end) end + +local report_lpath = logs.reporter("xml","lpath") + +--[[ldx-- +<p>We've now arrived at an interesting part: accessing the tree using a subset +of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We +will explain more about its usage in other documents.</p> +--ldx]]-- + +local xml = xml + +local lpathcalls = 0 function xml.lpathcalls () return lpathcalls end +local lpathcached = 0 function xml.lpathcached() return lpathcached end + +xml.functions = xml.functions or { } -- internal +local functions = xml.functions + +xml.expressions = xml.expressions or { } -- in expressions +local expressions = xml.expressions + +xml.finalizers = xml.finalizers or { } -- fast do-with ... (with return value other than collection) +local finalizers = xml.finalizers + +xml.specialhandler = xml.specialhandler or { } +local specialhandler = xml.specialhandler + +lpegpatterns.xml = lpegpatterns.xml or { } +local xmlpatterns = lpegpatterns.xml + +finalizers.xml = finalizers.xml or { } +finalizers.tex = finalizers.tex or { } + +local function fallback (t, name) + local fn = finalizers[name] + if fn then + t[name] = fn + else + report_lpath("unknown sub finalizer %a",name) + fn = function() end + end + return fn +end + +setmetatableindex(finalizers.xml, fallback) +setmetatableindex(finalizers.tex, fallback) + +xml.defaultprotocol = "xml" + +-- as xsl does not follow xpath completely here we will also +-- be more liberal especially with regards to the use of | and +-- the rootpath: +-- +-- test : all 'test' under current +-- /test : 'test' relative to current +-- a|b|c : set of names +-- (a|b|c) : idem +-- ! : not +-- +-- after all, we're not doing transformations but filtering. in +-- addition we provide filter functions (last bit) +-- +-- todo: optimizer +-- +-- .. : parent +-- * : all kids +-- / : anchor here +-- // : /**/ +-- ** : all in between +-- +-- so far we had (more practical as we don't transform) +-- +-- {/test} : kids 'test' under current node +-- {test} : any kid with tag 'test' +-- {//test} : same as above + +-- evaluator (needs to be redone, for the moment copied) + +-- todo: apply_axis(list,notable) and collection vs single + +local apply_axis = { } + +apply_axis['root'] = function(list) + local collected = { } + for l=1,#list do + local ll = list[l] + local rt = ll + while ll do + ll = ll.__p__ + if ll then + rt = ll + end + end + collected[l] = rt + end + return collected +end + +apply_axis['self'] = function(list) +--~ local collected = { } +--~ for l=1,#list do +--~ collected[l] = list[l] +--~ end +--~ return collected + return list +end + +apply_axis['child'] = function(list) + local collected, c = { }, 0 + for l=1,#list do + local ll = list[l] + local dt = ll.dt + if dt then -- weird that this is needed + local en = 0 + for k=1,#dt do + local dk = dt[k] + if dk.tg then + c = c + 1 + collected[c] = dk + dk.ni = k -- refresh + en = en + 1 + dk.ei = en + end + end + ll.en = en + end + end + return collected +end + +local function collect(list,collected,c) + local dt = list.dt + if dt then + local en = 0 + for k=1,#dt do + local dk = dt[k] + if dk.tg then + c = c + 1 + collected[c] = dk + dk.ni = k -- refresh + en = en + 1 + dk.ei = en + c = collect(dk,collected,c) + end + end + list.en = en + end + return c +end + +apply_axis['descendant'] = function(list) + local collected, c = { }, 0 + for l=1,#list do + c = collect(list[l],collected,c) + end + return collected +end + +local function collect(list,collected,c) + local dt = list.dt + if dt then + local en = 0 + for k=1,#dt do + local dk = dt[k] + if dk.tg then + c = c + 1 + collected[c] = dk + dk.ni = k -- refresh + en = en + 1 + dk.ei = en + c = collect(dk,collected,c) + end + end + list.en = en + end + return c +end +apply_axis['descendant-or-self'] = function(list) + local collected, c = { }, 0 + for l=1,#list do + local ll = list[l] + if ll.special ~= true then -- catch double root + c = c + 1 + collected[c] = ll + end + c = collect(ll,collected,c) + end + return collected +end + +apply_axis['ancestor'] = function(list) + local collected, c = { }, 0 + for l=1,#list do + local ll = list[l] + while ll do + ll = ll.__p__ + if ll then + c = c + 1 + collected[c] = ll + end + end + end + return collected +end + +apply_axis['ancestor-or-self'] = function(list) + local collected, c = { }, 0 + for l=1,#list do + local ll = list[l] + c = c + 1 + collected[c] = ll + while ll do + ll = ll.__p__ + if ll then + c = c + 1 + collected[c] = ll + end + end + end + return collected +end + +apply_axis['parent'] = function(list) + local collected, c = { }, 0 + for l=1,#list do + local pl = list[l].__p__ + if pl then + c = c + 1 + collected[c] = pl + end + end + return collected +end + +apply_axis['attribute'] = function(list) + return { } +end + +apply_axis['namespace'] = function(list) + return { } +end + +apply_axis['following'] = function(list) -- incomplete +--~ local collected, c = { }, 0 +--~ for l=1,#list do +--~ local ll = list[l] +--~ local p = ll.__p__ +--~ local d = p.dt +--~ for i=ll.ni+1,#d do +--~ local di = d[i] +--~ if type(di) == "table" then +--~ c = c + 1 +--~ collected[c] = di +--~ break +--~ end +--~ end +--~ end +--~ return collected + return { } +end + +apply_axis['preceding'] = function(list) -- incomplete +--~ local collected, c = { }, 0 +--~ for l=1,#list do +--~ local ll = list[l] +--~ local p = ll.__p__ +--~ local d = p.dt +--~ for i=ll.ni-1,1,-1 do +--~ local di = d[i] +--~ if type(di) == "table" then +--~ c = c + 1 +--~ collected[c] = di +--~ break +--~ end +--~ end +--~ end +--~ return collected + return { } +end + +apply_axis['following-sibling'] = function(list) + local collected, c = { }, 0 + for l=1,#list do + local ll = list[l] + local p = ll.__p__ + local d = p.dt + for i=ll.ni+1,#d do + local di = d[i] + if type(di) == "table" then + c = c + 1 + collected[c] = di + end + end + end + return collected +end + +apply_axis['preceding-sibling'] = function(list) + local collected, c = { }, 0 + for l=1,#list do + local ll = list[l] + local p = ll.__p__ + local d = p.dt + for i=1,ll.ni-1 do + local di = d[i] + if type(di) == "table" then + c = c + 1 + collected[c] = di + end + end + end + return collected +end + +apply_axis['reverse-sibling'] = function(list) -- reverse preceding + local collected, c = { }, 0 + for l=1,#list do + local ll = list[l] + local p = ll.__p__ + local d = p.dt + for i=ll.ni-1,1,-1 do + local di = d[i] + if type(di) == "table" then + c = c + 1 + collected[c] = di + end + end + end + return collected +end + +apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self'] +apply_axis['auto-descendant'] = apply_axis['descendant'] +apply_axis['auto-child'] = apply_axis['child'] +apply_axis['auto-self'] = apply_axis['self'] +apply_axis['initial-child'] = apply_axis['child'] + +local function apply_nodes(list,directive,nodes) + -- todo: nodes[1] etc ... negated node name in set ... when needed + -- ... currently ignored + local maxn = #nodes + if maxn == 3 then --optimized loop + local nns, ntg = nodes[2], nodes[3] + if not nns and not ntg then -- wildcard + if directive then + return list + else + return { } + end + else + local collected, c, m, p = { }, 0, 0, nil + if not nns then -- only check tag + for l=1,#list do + local ll = list[l] + local ltg = ll.tg + if ltg then + if directive then + if ntg == ltg then + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + c = c + 1 + collected[c], ll.mi = ll, m + end + elseif ntg ~= ltg then + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + c = c + 1 + collected[c], ll.mi = ll, m + end + end + end + elseif not ntg then -- only check namespace + for l=1,#list do + local ll = list[l] + local lns = ll.rn or ll.ns + if lns then + if directive then + if lns == nns then + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + c = c + 1 + collected[c], ll.mi = ll, m + end + elseif lns ~= nns then + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + c = c + 1 + collected[c], ll.mi = ll, m + end + end + end + else -- check both + for l=1,#list do + local ll = list[l] + local ltg = ll.tg + if ltg then + local lns = ll.rn or ll.ns + local ok = ltg == ntg and lns == nns + if directive then + if ok then + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + c = c + 1 + collected[c], ll.mi = ll, m + end + elseif not ok then + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + c = c + 1 + collected[c], ll.mi = ll, m + end + end + end + end + return collected + end + else + local collected, c, m, p = { }, 0, 0, nil + for l=1,#list do + local ll = list[l] + local ltg = ll.tg + if ltg then + local lns = ll.rn or ll.ns + local ok = false + for n=1,maxn,3 do + local nns, ntg = nodes[n+1], nodes[n+2] + ok = (not ntg or ltg == ntg) and (not nns or lns == nns) + if ok then + break + end + end + if directive then + if ok then + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + c = c + 1 + collected[c], ll.mi = ll, m + end + elseif not ok then + local llp = ll.__p__ ; if llp ~= p then p, m = llp, 1 else m = m + 1 end + c = c + 1 + collected[c], ll.mi = ll, m + end + end + end + return collected + end +end + +local quit_expression = false + +local function apply_expression(list,expression,order) + local collected, c = { }, 0 + quit_expression = false + for l=1,#list do + local ll = list[l] + if expression(list,ll,l,order) then -- nasty, order alleen valid als n=1 + c = c + 1 + collected[c] = ll + end + if quit_expression then + break + end + end + return collected +end + +local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb + +local spaces = S(" \n\r\t\f")^0 +local lp_space = S(" \n\r\t\f") +local lp_any = P(1) +local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==") +local lp_doequal = P("=") / "==" +local lp_or = P("|") / " or " +local lp_and = P("&") / " and " + +local lp_builtin = P ( + P("text") / "(ll.dt[1] or '')" + -- fragile + P("content") / "ll.dt" + + -- P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" + + P("name") / "((ll.ns~='' and ll.ns..':'..ll.tg) or ll.tg)" + + P("tag") / "ll.tg" + + P("position") / "l" + -- is element in finalizer + P("firstindex") / "1" + + P("lastindex") / "(#ll.__p__.dt or 1)" + + P("firstelement") / "1" + + P("lastelement") / "(ll.__p__.en or 1)" + + P("first") / "1" + + P("last") / "#list" + + P("rootposition") / "order" + + P("order") / "order" + + P("element") / "(ll.ei or 1)" + + P("index") / "(ll.ni or 1)" + + P("match") / "(ll.mi or 1)" + + -- P("namespace") / "ll.ns" + + P("ns") / "ll.ns" + ) * ((spaces * P("(") * spaces * P(")"))/"") + +-- for the moment we keep namespaces with attributes + +local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("(ll.at and ll.at['") * ((R("az","AZ") + S("-_:"))^1) * Cc("'])") + +-- lp_fastpos_p = (P("+")^0 * R("09")^1 * P(-1)) / function(s) return "l==" .. s end +-- lp_fastpos_n = (P("-") * R("09")^1 * P(-1)) / function(s) return "(" .. s .. "<0 and (#list+".. s .. "==l))" end + +lp_fastpos_p = P("+")^0 * R("09")^1 * P(-1) / "l==%0" +lp_fastpos_n = P("-") * R("09")^1 * P(-1) / "(%0<0 and (#list+%0==l))" + +local lp_fastpos = lp_fastpos_n + lp_fastpos_p + +local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false") + +-- local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling +-- return t .. "(" +-- end + +-- local lp_lua_function = (R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / "%0(" +local lp_lua_function = Cs((R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(")) / "%0" + +local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling + if expressions[t] then + return "expr." .. t .. "(" + else + return "expr.error(" + end +end + +local lparent = P("(") +local rparent = P(")") +local noparent = 1 - (lparent+rparent) +local nested = P{lparent * (noparent + V(1))^0 * rparent} +local value = P(lparent * C((noparent + nested)^0) * rparent) -- P{"("*C(((1-S("()"))+V(1))^0)*")"} + +local lp_child = Cc("expr.child(ll,'") * R("az","AZ","--","__")^1 * Cc("')") +local lp_number = S("+-") * R("09")^1 +local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'") +local lp_content = (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"')) + +local cleaner + +local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s) + if expressions[t] then + s = s and s ~= "" and lpegmatch(cleaner,s) + if s and s ~= "" then + return "expr." .. t .. "(ll," .. s ..")" + else + return "expr." .. t .. "(ll)" + end + else + return "expr.error(" .. t .. ")" + end +end + +local content = + lp_builtin + + lp_attribute + + lp_special + + lp_noequal + lp_doequal + + lp_or + lp_and + + lp_reserved + + lp_lua_function + lp_function + + lp_content + -- too fragile + lp_child + + lp_any + +local converter = Cs ( + lp_fastpos + (P { lparent * (V(1))^0 * rparent + content } )^0 +) + +cleaner = Cs ( ( + -- lp_fastpos + + lp_reserved + + lp_number + + lp_string + +1 )^1 ) + +local template_e = [[ + local expr = xml.expressions + return function(list,ll,l,order) + return %s + end +]] + +local template_f_y = [[ + local finalizer = xml.finalizers['%s']['%s'] + return function(collection) + return finalizer(collection,%s) + end +]] + +local template_f_n = [[ + return xml.finalizers['%s']['%s'] +]] + +-- + +local register_self = { kind = "axis", axis = "self" } -- , apply = apply_axis["self"] } +local register_parent = { kind = "axis", axis = "parent" } -- , apply = apply_axis["parent"] } +local register_descendant = { kind = "axis", axis = "descendant" } -- , apply = apply_axis["descendant"] } +local register_child = { kind = "axis", axis = "child" } -- , apply = apply_axis["child"] } +local register_descendant_or_self = { kind = "axis", axis = "descendant-or-self" } -- , apply = apply_axis["descendant-or-self"] } +local register_root = { kind = "axis", axis = "root" } -- , apply = apply_axis["root"] } +local register_ancestor = { kind = "axis", axis = "ancestor" } -- , apply = apply_axis["ancestor"] } +local register_ancestor_or_self = { kind = "axis", axis = "ancestor-or-self" } -- , apply = apply_axis["ancestor-or-self"] } +local register_attribute = { kind = "axis", axis = "attribute" } -- , apply = apply_axis["attribute"] } +local register_namespace = { kind = "axis", axis = "namespace" } -- , apply = apply_axis["namespace"] } +local register_following = { kind = "axis", axis = "following" } -- , apply = apply_axis["following"] } +local register_following_sibling = { kind = "axis", axis = "following-sibling" } -- , apply = apply_axis["following-sibling"] } +local register_preceding = { kind = "axis", axis = "preceding" } -- , apply = apply_axis["preceding"] } +local register_preceding_sibling = { kind = "axis", axis = "preceding-sibling" } -- , apply = apply_axis["preceding-sibling"] } +local register_reverse_sibling = { kind = "axis", axis = "reverse-sibling" } -- , apply = apply_axis["reverse-sibling"] } + +local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" } -- , apply = apply_axis["auto-descendant-or-self"] } +local register_auto_descendant = { kind = "axis", axis = "auto-descendant" } -- , apply = apply_axis["auto-descendant"] } +local register_auto_self = { kind = "axis", axis = "auto-self" } -- , apply = apply_axis["auto-self"] } +local register_auto_child = { kind = "axis", axis = "auto-child" } -- , apply = apply_axis["auto-child"] } + +local register_initial_child = { kind = "axis", axis = "initial-child" } -- , apply = apply_axis["initial-child"] } + +local register_all_nodes = { kind = "nodes", nodetest = true, nodes = { true, false, false } } + +local skip = { } + +local function errorrunner_e(str,cnv) + if not skip[str] then + report_lpath("error in expression: %s => %s",str,cnv) + skip[str] = cnv or str + end + return false +end + +local function errorrunner_f(str,arg) + report_lpath("error in finalizer: %s(%s)",str,arg or "") + return false +end + +local function register_nodes(nodetest,nodes) + return { kind = "nodes", nodetest = nodetest, nodes = nodes } +end + +local function register_expression(expression) + local converted = lpegmatch(converter,expression) + local runner = load(format(template_e,converted)) + runner = (runner and runner()) or function() errorrunner_e(expression,converted) end + return { kind = "expression", expression = expression, converted = converted, evaluator = runner } +end + +local function register_finalizer(protocol,name,arguments) + local runner + if arguments and arguments ~= "" then + runner = load(format(template_f_y,protocol or xml.defaultprotocol,name,arguments)) + else + runner = load(format(template_f_n,protocol or xml.defaultprotocol,name)) + end + runner = (runner and runner()) or function() errorrunner_f(name,arguments) end + return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner } +end + +local expression = P { "ex", + ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]", + sq = "'" * (1 - S("'"))^0 * "'", + dq = '"' * (1 - S('"'))^0 * '"', +} + +local arguments = P { "ar", + ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")", + nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end, + sq = P("'") * (1 - P("'"))^0 * P("'"), + dq = P('"') * (1 - P('"'))^0 * P('"'), +} + +-- todo: better arg parser + +local function register_error(str) + return { kind = "error", error = format("unparsed: %s",str) } +end + +-- there is a difference in * and /*/ and so we need to catch a few special cases + +local special_1 = P("*") * Cc(register_auto_descendant) * Cc(register_all_nodes) -- last one not needed +local special_2 = P("/") * Cc(register_auto_self) +local special_3 = P("") * Cc(register_auto_self) + +local no_nextcolon = P(-1) + #(1-P(":")) -- newer lpeg needs the P(-1) +local no_nextlparent = P(-1) + #(1-P("(")) -- newer lpeg needs the P(-1) + +local pathparser = Ct { "patterns", -- can be made a bit faster by moving some patterns outside + + patterns = spaces * V("protocol") * spaces * ( + ( V("special") * spaces * P(-1) ) + + ( V("initial") * spaces * V("step") * spaces * (P("/") * spaces * V("step") * spaces)^0 ) + ), + + protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"), + + -- the / is needed for // as descendant or self is somewhat special + -- step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, + step = ((V("shortcuts") + P("/") + V("axis")) * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0, + + axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") + + V("descendant_or_self") + V("following_sibling") + V("following") + + V("reverse_sibling") + V("preceding_sibling") + V("preceding") + V("ancestor_or_self") + + #(1-P(-1)) * Cc(register_auto_child), + + special = special_1 + special_2 + special_3, + + initial = (P("/") * spaces * Cc(register_initial_child))^-1, + + error = (P(1)^1) / register_error, + + shortcuts_a = V("s_descendant_or_self") + V("s_descendant") + V("s_child") + V("s_parent") + V("s_self") + V("s_root") + V("s_ancestor"), + + shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0, + + s_descendant_or_self = (P("***/") + P("/")) * Cc(register_descendant_or_self), --- *** is a bonus + s_descendant = P("**") * Cc(register_descendant), + s_child = P("*") * no_nextcolon * Cc(register_child ), + s_parent = P("..") * Cc(register_parent ), + s_self = P("." ) * Cc(register_self ), + s_root = P("^^") * Cc(register_root ), + s_ancestor = P("^") * Cc(register_ancestor ), + + descendant = P("descendant::") * Cc(register_descendant ), + child = P("child::") * Cc(register_child ), + parent = P("parent::") * Cc(register_parent ), + self = P("self::") * Cc(register_self ), + root = P('root::') * Cc(register_root ), + ancestor = P('ancestor::') * Cc(register_ancestor ), + descendant_or_self = P('descendant-or-self::') * Cc(register_descendant_or_self ), + ancestor_or_self = P('ancestor-or-self::') * Cc(register_ancestor_or_self ), + -- attribute = P('attribute::') * Cc(register_attribute ), + -- namespace = P('namespace::') * Cc(register_namespace ), + following = P('following::') * Cc(register_following ), + following_sibling = P('following-sibling::') * Cc(register_following_sibling ), + preceding = P('preceding::') * Cc(register_preceding ), + preceding_sibling = P('preceding-sibling::') * Cc(register_preceding_sibling ), + reverse_sibling = P('reverse-sibling::') * Cc(register_reverse_sibling ), + + nodes = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes, + + expressions = expression / register_expression, + + letters = R("az")^1, + name = (1-S("/[]()|:*!"))^1, -- make inline + negate = P("!") * Cc(false), + + nodefunction = V("negate") + P("not") * Cc(false) + Cc(true), + nodetest = V("negate") + Cc(true), + nodename = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))), + wildnodename = (C(V("name")) + P("*") * Cc(false)) * no_nextlparent, + nodeset = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces, + + finalizer = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer, + +} + +xmlpatterns.pathparser = pathparser + +local cache = { } + +local function nodesettostring(set,nodetest) + local t = { } + for i=1,#set,3 do + local directive, ns, tg = set[i], set[i+1], set[i+2] + if not ns or ns == "" then ns = "*" end + if not tg or tg == "" then tg = "*" end + tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg) + t[i] = (directive and tg) or format("not(%s)",tg) + end + if nodetest == false then + return format("not(%s)",concat(t,"|")) + else + return concat(t,"|") + end +end + +local function tagstostring(list) + if #list == 0 then + return "no elements" + else + local t = { } + for i=1, #list do + local li = list[i] + local ns, tg = li.ns, li.tg + if not ns or ns == "" then ns = "*" end + if not tg or tg == "" then tg = "*" end + t[i] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg) + end + return concat(t," ") + end +end + +xml.nodesettostring = nodesettostring + +local lpath -- we have a harmless kind of circular reference + +local lshowoptions = { functions = false } + +local function lshow(parsed) + if type(parsed) == "string" then + parsed = lpath(parsed) + end + report_lpath("%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern, + table.serialize(parsed,false,lshowoptions)) +end + +xml.lshow = lshow + +local function add_comment(p,str) + local pc = p.comment + if not pc then + p.comment = { str } + else + pc[#pc+1] = str + end +end + +lpath = function (pattern) -- the gain of caching is rather minimal + lpathcalls = lpathcalls + 1 + if type(pattern) == "table" then + return pattern + else + local parsed = cache[pattern] + if parsed then + lpathcached = lpathcached + 1 + else + parsed = lpegmatch(pathparser,pattern) + if parsed then + parsed.pattern = pattern + local np = #parsed + if np == 0 then + parsed = { pattern = pattern, register_self, state = "parsing error" } + report_lpath("parsing error in pattern: %s",pattern) + lshow(parsed) + else + -- we could have done this with a more complex parser but this + -- is cleaner + local pi = parsed[1] + if pi.axis == "auto-child" then + if false then + add_comment(parsed, "auto-child replaced by auto-descendant-or-self") + parsed[1] = register_auto_descendant_or_self + else + add_comment(parsed, "auto-child replaced by auto-descendant") + parsed[1] = register_auto_descendant + end + elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then + add_comment(parsed, "initial-child removed") -- we could also make it a auto-self + remove(parsed,1) + end + local np = #parsed -- can have changed + if np > 1 then + local pnp = parsed[np] + if pnp.kind == "nodes" and pnp.nodetest == true then + local nodes = pnp.nodes + if nodes[1] == true and nodes[2] == false and nodes[3] == false then + add_comment(parsed, "redundant final wildcard filter removed") + remove(parsed,np) + end + end + end + end + else + parsed = { pattern = pattern } + end + cache[pattern] = parsed + if trace_lparse and not trace_lprofile then + lshow(parsed) + end + end + return parsed + end +end + +xml.lpath = lpath + +-- we can move all calls inline and then merge the trace back +-- technically we can combine axis and the next nodes which is +-- what we did before but this a bit cleaner (but slower too) +-- but interesting is that it's not that much faster when we +-- go inline +-- +-- beware: we need to return a collection even when we filter +-- else the (simple) cache gets messed up + +-- caching found lookups saves not that much (max .1 sec on a 8 sec run) +-- and it also messes up finalizers + +-- watch out: when there is a finalizer, it's always called as there +-- can be cases that a finalizer returns (or does) something in case +-- there is no match; an example of this is count() + +local profiled = { } xml.profiled = profiled + +local function profiled_apply(list,parsed,nofparsed,order) + local p = profiled[parsed.pattern] + if p then + p.tested = p.tested + 1 + else + p = { tested = 1, matched = 0, finalized = 0 } + profiled[parsed.pattern] = p + end + local collected = list + for i=1,nofparsed do + local pi = parsed[i] + local kind = pi.kind + if kind == "axis" then + collected = apply_axis[pi.axis](collected) + elseif kind == "nodes" then + collected = apply_nodes(collected,pi.nodetest,pi.nodes) + elseif kind == "expression" then + collected = apply_expression(collected,pi.evaluator,order) + elseif kind == "finalizer" then + collected = pi.finalizer(collected) -- no check on # here + p.matched = p.matched + 1 + p.finalized = p.finalized + 1 + return collected + end + if not collected or #collected == 0 then + local pn = i < nofparsed and parsed[nofparsed] + if pn and pn.kind == "finalizer" then + collected = pn.finalizer(collected) + p.finalized = p.finalized + 1 + return collected + end + return nil + end + end + if collected then + p.matched = p.matched + 1 + end + return collected +end + +local function traced_apply(list,parsed,nofparsed,order) + if trace_lparse then + lshow(parsed) + end + report_lpath("collecting: %s",parsed.pattern) + report_lpath("root tags : %s",tagstostring(list)) + report_lpath("order : %s",order or "unset") + local collected = list + for i=1,nofparsed do + local pi = parsed[i] + local kind = pi.kind + if kind == "axis" then + collected = apply_axis[pi.axis](collected) + report_lpath("% 10i : ax : %s",(collected and #collected) or 0,pi.axis) + elseif kind == "nodes" then + collected = apply_nodes(collected,pi.nodetest,pi.nodes) + report_lpath("% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest)) + elseif kind == "expression" then + collected = apply_expression(collected,pi.evaluator,order) + report_lpath("% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted) + elseif kind == "finalizer" then + collected = pi.finalizer(collected) + report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "") + return collected + end + if not collected or #collected == 0 then + local pn = i < nofparsed and parsed[nofparsed] + if pn and pn.kind == "finalizer" then + collected = pn.finalizer(collected) + report_lpath("% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pn.name,pn.arguments or "") + return collected + end + return nil + end + end + return collected +end + +local function normal_apply(list,parsed,nofparsed,order) + local collected = list + for i=1,nofparsed do + local pi = parsed[i] + local kind = pi.kind + if kind == "axis" then + local axis = pi.axis + if axis ~= "self" then + collected = apply_axis[axis](collected) + end + elseif kind == "nodes" then + collected = apply_nodes(collected,pi.nodetest,pi.nodes) + elseif kind == "expression" then + collected = apply_expression(collected,pi.evaluator,order) + elseif kind == "finalizer" then + return pi.finalizer(collected) + end + if not collected or #collected == 0 then + local pf = i < nofparsed and parsed[nofparsed].finalizer + if pf then + return pf(collected) -- can be anything + end + return nil + end + end + return collected +end + +--~ local function applylpath(list,pattern) +--~ -- we avoid an extra call +--~ local parsed = cache[pattern] +--~ if parsed then +--~ lpathcalls = lpathcalls + 1 +--~ lpathcached = lpathcached + 1 +--~ elseif type(pattern) == "table" then +--~ lpathcalls = lpathcalls + 1 +--~ parsed = pattern +--~ else +--~ parsed = lpath(pattern) or pattern +--~ end +--~ if not parsed then +--~ return +--~ end +--~ local nofparsed = #parsed +--~ if nofparsed == 0 then +--~ return -- something is wrong +--~ end +--~ local one = list[1] -- we could have a third argument: isroot and list or list[1] or whatever we like ... todo +--~ if not one then +--~ return -- something is wrong +--~ elseif not trace_lpath then +--~ return normal_apply(list,parsed,nofparsed,one.mi) +--~ elseif trace_lprofile then +--~ return profiled_apply(list,parsed,nofparsed,one.mi) +--~ else +--~ return traced_apply(list,parsed,nofparsed,one.mi) +--~ end +--~ end + +local function applylpath(list,pattern) + if not list then + return + end + local parsed = cache[pattern] + if parsed then + lpathcalls = lpathcalls + 1 + lpathcached = lpathcached + 1 + elseif type(pattern) == "table" then + lpathcalls = lpathcalls + 1 + parsed = pattern + else + parsed = lpath(pattern) or pattern + end + if not parsed then + return + end + local nofparsed = #parsed + if nofparsed == 0 then + return -- something is wrong + end + if not trace_lpath then + return normal_apply ({ list },parsed,nofparsed,list.mi) + elseif trace_lprofile then + return profiled_apply({ list },parsed,nofparsed,list.mi) + else + return traced_apply ({ list },parsed,nofparsed,list.mi) + end +end + +xml.applylpath = applylpath -- takes a table as first argment, which is what xml.filter will do + +--[[ldx-- +<p>This is the main filter function. It returns whatever is asked for.</p> +--ldx]]-- + +function xml.filter(root,pattern) -- no longer funny attribute handling here + return applylpath(root,pattern) +end + +-- internal (parsed) + +expressions.child = function(e,pattern) + return applylpath(e,pattern) -- todo: cache +end + +expressions.count = function(e,pattern) -- what if pattern == empty or nil + local collected = applylpath(e,pattern) -- todo: cache + return pattern and (collected and #collected) or 0 +end + +-- external + +-- expressions.oneof = function(s,...) +-- local t = {...} +-- for i=1,#t do +-- if s == t[i] then +-- return true +-- end +-- end +-- return false +-- end + +expressions.oneof = function(s,...) + for i=1,select("#",...) do + if s == select(i,...) then + return true + end + end + return false +end + +expressions.error = function(str) + xml.errorhandler(format("unknown function in lpath expression: %s",tostring(str or "?"))) + return false +end + +expressions.undefined = function(s) + return s == nil +end + +expressions.quit = function(s) + if s or s == nil then + quit_expression = true + end + return true +end + +expressions.print = function(...) + print(...) + return true +end + +expressions.contains = find +expressions.find = find +expressions.upper = upper +expressions.lower = lower +expressions.number = tonumber +expressions.boolean = toboolean + +function expressions.contains(str,pattern) + local t = type(str) + if t == "string" then + if find(str,pattern) then + return true + end + elseif t == "table" then + for i=1,#str do + local d = str[i] + if type(d) == "string" and find(d,pattern) then + return true + end + end + end + return false +end + +-- user interface + +local function traverse(root,pattern,handle) + -- report_lpath("use 'xml.selection' instead for pattern: %s",pattern) + local collected = applylpath(root,pattern) + if collected then + for c=1,#collected do + local e = collected[c] + local r = e.__p__ + handle(r,r.dt,e.ni) + end + end +end + +local function selection(root,pattern,handle) + local collected = applylpath(root,pattern) + if collected then + if handle then + for c=1,#collected do + handle(collected[c]) + end + else + return collected + end + end +end + +xml.traverse = traverse -- old method, r, d, k +xml.selection = selection -- new method, simple handle + +--~ function xml.cachedpatterns() +--~ return cache +--~ end + +-- generic function finalizer (independant namespace) + +local function dofunction(collected,fnc,...) + if collected then + local f = functions[fnc] + if f then + for c=1,#collected do + f(collected[c],...) + end + else + report_lpath("unknown function %a",fnc) + end + end +end + +finalizers.xml["function"] = dofunction +finalizers.tex["function"] = dofunction + +-- functions + +expressions.text = function(e,n) + local rdt = e.__p__.dt + return rdt and rdt[n] or "" +end + +expressions.name = function(e,n) -- ns + tg + local found = false + n = tonumber(n) or 0 + if n == 0 then + found = type(e) == "table" and e + elseif n < 0 then + local d, k = e.__p__.dt, e.ni + for i=k-1,1,-1 do + local di = d[i] + if type(di) == "table" then + if n == -1 then + found = di + break + else + n = n + 1 + end + end + end + else + local d, k = e.__p__.dt, e.ni + for i=k+1,#d,1 do + local di = d[i] + if type(di) == "table" then + if n == 1 then + found = di + break + else + n = n - 1 + end + end + end + end + if found then + local ns, tg = found.rn or found.ns or "", found.tg + if ns ~= "" then + return ns .. ":" .. tg + else + return tg + end + else + return "" + end +end + +expressions.tag = function(e,n) -- only tg + if not e then + return "" + else + local found = false + n = tonumber(n) or 0 + if n == 0 then + found = (type(e) == "table") and e -- seems to fail + elseif n < 0 then + local d, k = e.__p__.dt, e.ni + for i=k-1,1,-1 do + local di = d[i] + if type(di) == "table" then + if n == -1 then + found = di + break + else + n = n + 1 + end + end + end + else + local d, k = e.__p__.dt, e.ni + for i=k+1,#d,1 do + local di = d[i] + if type(di) == "table" then + if n == 1 then + found = di + break + else + n = n - 1 + end + end + end + end + return (found and found.tg) or "" + end +end + +--[[ldx-- +<p>Often using an iterators looks nicer in the code than passing handler +functions. The <l n='lua'/> book describes how to use coroutines for that +purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits +code like:</p> + +<typing> +for r, d, k in xml.elements(xml.load('text.xml'),"title") do + print(d[k]) -- old method +end +for e in xml.collected(xml.load('text.xml'),"title") do + print(e) -- new one +end +</typing> +--ldx]]-- + +-- local wrap, yield = coroutine.wrap, coroutine.yield +-- local dummy = function() end +-- +-- function xml.elements(root,pattern,reverse) -- r, d, k +-- local collected = applylpath(root,pattern) +-- if collected then +-- if reverse then +-- return wrap(function() for c=#collected,1,-1 do +-- local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni) +-- end end) +-- else +-- return wrap(function() for c=1,#collected do +-- local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni) +-- end end) +-- end +-- end +-- return wrap(dummy) +-- end +-- +-- function xml.collected(root,pattern,reverse) -- e +-- local collected = applylpath(root,pattern) +-- if collected then +-- if reverse then +-- return wrap(function() for c=#collected,1,-1 do yield(collected[c]) end end) +-- else +-- return wrap(function() for c=1,#collected do yield(collected[c]) end end) +-- end +-- end +-- return wrap(dummy) +-- end + +-- faster: + +local dummy = function() end + +function xml.elements(root,pattern,reverse) -- r, d, k + local collected = applylpath(root,pattern) + if not collected then + return dummy + elseif reverse then + local c = #collected + 1 + return function() + if c > 1 then + c = c - 1 + local e = collected[c] + local r = e.__p__ + return r, r.dt, e.ni + end + end + else + local n, c = #collected, 0 + return function() + if c < n then + c = c + 1 + local e = collected[c] + local r = e.__p__ + return r, r.dt, e.ni + end + end + end +end + +function xml.collected(root,pattern,reverse) -- e + local collected = applylpath(root,pattern) + if not collected then + return dummy + elseif reverse then + local c = #collected + 1 + return function() + if c > 1 then + c = c - 1 + return collected[c] + end + end + else + local n, c = #collected, 0 + return function() + if c < n then + c = c + 1 + return collected[c] + end + end + end +end + +-- handy + +function xml.inspect(collection,pattern) + pattern = pattern or "." + for e in xml.collected(collection,pattern or ".") do + report_lpath("pattern: %s\n\n%s\n",pattern,xml.tostring(e)) + end +end + +-- texy (see xfdf): + +local function split(e) + local dt = e.dt + if dt then + for i=1,#dt do + local dti = dt[i] + if type(dti) == "string" then + dti = gsub(dti,"^[\n\r]*(.-)[\n\r]*","%1") + dti = gsub(dti,"[\n\r]+","\n\n") + dt[i] = dti + else + split(dti) + end + end + end + return e +end + +function xml.finalizers.paragraphs(c) + for i=1,#c do + split(c[i]) + end + return c +end |