summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtxrun.lua
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/context/lua/mtxrun.lua')
-rw-r--r--scripts/context/lua/mtxrun.lua2348
1 files changed, 1187 insertions, 1161 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 2a3a496a3..baad28e84 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -168,7 +168,7 @@ end
--~ split = lpeg.Ct(c*(p*c)^0)
--~ splitters[separator] = split
--~ end
---~ return lpeg.match(split,self)
+--~ return lpeg.match(split,self) -- split:match(self)
--~ else
--~ return { }
--~ end
@@ -325,7 +325,7 @@ end
--~ return self .. self.rep(chr or " ",n-#self)
--~ end
-function string:padd(n,chr)
+function string:rpadd(n,chr)
local m = n-#self
if m > 0 then
return self .. self.rep(chr or " ",m)
@@ -334,6 +334,17 @@ function string:padd(n,chr)
end
end
+function string:lpadd(n,chr)
+ local m = n-#self
+ if m > 0 then
+ return self.rep(chr or " ",m) .. self
+ else
+ return self
+ end
+end
+
+string.padd = string.rpadd
+
function is_number(str)
return str:find("^[%-%+]?[%d]-%.?[%d+]$") == 1
end
@@ -530,6 +541,8 @@ end
do
+ -- one of my first exercises in lua ...
+
-- 34.055.092 32.403.326 arabtype.tma
-- 1.620.614 1.513.863 lmroman10-italic.tma
-- 1.325.585 1.233.044 lmroman10-regular.tma
@@ -889,6 +902,25 @@ function table.tohash(t)
return h
end
+function table.contains(t, v)
+ if t then
+ for i=1, #t do
+ if t[i] == v then
+ return true
+ end
+ end
+ end
+ return false
+end
+
+function table.count(t)
+ local n, e = 0, next(t)
+ while e do
+ n, e = n + 1, next(t,e)
+ end
+ return n
+end
+
--~ function table.are_equal(a,b)
--~ return table.serialize(a) == table.serialize(b)
--~ end
@@ -1387,12 +1419,20 @@ function boolean.tonumber(b)
if b then return 1 else return 0 end
end
-function toboolean(str)
- if type(str) == "string" then
- return str == "true" or str == "yes" or str == "on" or str == "1"
- elseif type(str) == "number" then
- return tonumber(str) ~= 0
- elseif type(str) == "nil" then
+function toboolean(str,tolerant)
+ if tolerant then
+ if type(str) == "string" then
+ return str == "true" or str == "yes" or str == "on" or str == "1"
+ elseif type(str) == "number" then
+ return tonumber(str) ~= 0
+ elseif type(str) == "nil" then
+ return false
+ else
+ return str
+ end
+ elseif str == "true" then
+ return true
+ elseif str == "false" then
return false
else
return str
@@ -1427,13 +1467,14 @@ if not modules then modules = { } end modules ['l-xml'] = {
license = "see context related readme files"
}
--- todo: ns, tg = s:match("^(.-):?([^:]+)$")
+-- RJ: key=value ... lpeg.Ca(lpeg.Cc({}) * (pattern-producing-key-and-value / rawset)^0)
--[[ldx--
<p>The parser used here is inspired by the variant discussed in the lua book, but
handles comment and processing instructions, has a different structure, provides
parent access; a first version used different tricky but was less optimized to we
-went this route.</p>
+went this route. First we had a find based parser, now we have an <l n='lpeg'/> based one.
+The find based parser can be found in l-xml-edu.lua along with other older code.</p>
<p>Expecially the lpath code is experimental, we will support some of xpath, but
only things that make sense for us; as compensation it is possible to hook in your
@@ -1442,7 +1483,7 @@ this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
files.</p>
<typing>
-a/b/c /*/c (todo: a/b/(pattern)/d)
+a/b/c /*/c
a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
</typing>
@@ -1457,48 +1498,86 @@ tex = tex or { }
xml.trace_lpath = false
xml.trace_print = false
+xml.trace_remap = false
--[[ldx--
-<p>First a hack to enable namespace resolving.</p>
+<p>First a hack to enable namespace resolving. A namespace is characterized by
+a <l n='url'/>. The following function associates a namespace prefix with a
+pattern. We use <l n='lpeg'/>, which in this case is more than twice as fast as a
+find based solution where we loop over an array of patterns. Less code and
+much cleaner.</p>
--ldx]]--
+xml.xmlns = { }
+
do
- xml.xmlns = { }
+ local parser = lpeg.P(false) -- printing shows that this has no side effects
+
+ --[[ldx--
+ <p>The next function associates a namespace prefix with an <l n='url'/>. This
+ normally happens independent of parsing.</p>
- local data = { }
+ <typing>
+ xml.registerns("mml","mathml")
+ </typing>
+ --ldx]]--
- function xml.registerns(namespace,pattern)
- data[#data+1] = { namespace:lower(), pattern:lower() }
+ function xml.registerns(namespace, pattern) -- pattern can be an lpeg
+ parser = parser + lpeg.C(lpeg.P(pattern:lower())) / namespace
end
+ --[[ldx--
+ <p>The next function also registers a namespace, but this time we map a
+ given namespace prefix onto a registered one, using the given
+ <l n='url'/>. This used for attributes like <t>xmlns:m</t>.</p>
+
+ <typing>
+ xml.checkns("m","http://www.w3.org/mathml")
+ </typing>
+ --ldx]]--
+
function xml.checkns(namespace,url)
- url = url:lower()
- for i=1,#data do
- local d = data[i]
- if url:find(d[2]) then
- if namespace ~= d[1] then
- xml.xmlns[namespace] = d[1]
- end
- end
+ local ns = parser:match(url:lower())
+ if ns and namespace ~= ns then
+ xml.xmlns[namespace] = ns
end
end
+ --[[ldx--
+ <p>Next we provide a way to turn an <l n='url'/> into a registered
+ namespace. This used for the <t>xmlns</t> attribute.</p>
+
+ <typing>
+ resolvedns = xml.resolvens("http://www.w3.org/mathml")
+ </typing>
+
+ This returns <t>mml</t>.
+ --ldx]]--
+
function xml.resolvens(url)
- url = url:lower()
- for i=1,#data do
- local d = data[i]
- if url:find(d[2]) then
- return d[1]
- end
- end
- return ""
+ return parser:match(url:lower()) or ""
end
+ --[[ldx--
+ <p>A namespace in an element can be remapped onto the registered
+ one efficiently by using the <t>xml.xmlns</t> table.</p>
+ --ldx]]--
+
end
--[[ldx--
-<p>Next comes the loader. The dreadful doctype comes in many disguises:</p>
+<p>This version uses <l n='lpeg'/>. We follow the same approach as before, stack and top and
+such. This version is about twice as fast which is mostly due to the fact that
+we don't have to prepare the stream for cdata, doctype etc etc. This variant is
+is dedicated to Luigi Scarso, who challenged me with 40 megabyte <l n='xml'/> files that
+took 12.5 seconds to load (1.5 for file io and the rest for tree building). With
+the <l n='lpeg'/> implementation we got that down to less 7.3 seconds. Loading the 14
+<l n='context'/> interface definition files (2.6 meg) went down from 1.05 seconds to 0.55.</p>
+
+<p>Next comes the parser. The rather messy doctype definition comes in many
+disguises so it is no surprice that later on have to dedicate quite some
+<l n='lpeg'/> code to it.</p>
<typing>
<!DOCTYPE Something PUBLIC "... ..." "..." [ ... ] >
@@ -1508,320 +1587,466 @@ end
<!DOCTYPE Something [ ... ] >
<!DOCTYPE Something >
</typing>
+
+<p>The code may look a bit complex but this is mostly due to the fact that we
+resolve namespaces and attach metatables. There is only one public function:</p>
+
+<typing>
+local x = xml.convert(somestring)
+</typing>
+
+<p>An optional second boolean argument tells this function not to create a root
+element.</p>
--ldx]]--
do
- -- Loading 12 cont-*.xml and keys-*.xml files totaling to 2.62 MBytes takes 1.1 sec
- -- on a windows vista laptop with dual core 7600 (2.3 Ghz), which is not that bad.
- -- Of this half time is spent on doctype etc parsing.
-
- local doctype_patterns = {
- "<!DOCTYPE%s+(.-%s+PUBLIC%s+%b\"\"%s+%b\"\"%s+%b[])%s*>",
- "<!DOCTYPE%s+(.-%s+PUBLIC%s+%b\"\"%s+%b\"\")%s*>",
- "<!DOCTYPE%s+(.-%s+SYSTEM%s+%b\"\"%s+%b[])%s*>",
- "<!DOCTYPE%s+(.-%s+SYSTEM%s+%b\"\")%s*>",
- "<!DOCTYPE%s+(.-%s%b[])%s*>",
- "<!DOCTYPE%s+(.-)%s*>"
- }
+ local remove, nsremap = table.remove, xml.xmlns
- -- We assume no "<" which is the lunatic part of the xml spec
- -- especially since ">" is permitted; otherwise we need a char
- -- by char parser ... more something for later ... normally
- -- entities will be used anyway.
+ local stack, top, dt, at, xmlns, errorstr = {}, {}, {}, {}, {}, nil
- -- data = data:gsub(nothing done) is still a copy so we find first
+ local mt = { __tostring = xml.text }
- local function prepare(data,text)
- -- pack (for backward compatibility)
- if type(data) == "table" then
- data = table.concat(data,"")
- end
- -- CDATA
- if data:find("<%!%[CDATA%[") then
- data = data:gsub("<%!%[CDATA%[(.-)%]%]>", function(txt)
- text[#text+1] = txt or ""
- return string.format("<@cd@>%s</@cd@>",#text)
- end)
- end
- -- DOCTYPE
- if data:find("<!DOCTYPE ") then
- data = data:gsub("^(.-)(<[^%!%?])", function(a,b)
- if a:find("<!DOCTYPE ") then
- for _,v in ipairs(doctype_patterns) do
- a = a:gsub(v, function(d)
- text[#text+1] = d or ""
- return string.format("<@dd@>%s</@dd@>",#text)
- end)
- end
- end
- return a .. b
- end,1)
+ local function add_attribute(namespace,tag,value)
+ if tag == "xmlns" then
+ xmlns[#xmlns+1] = xml.resolvens(value)
+ at[tag] = value
+ elseif ns == "xmlns" then
+ xml.checkns(tag,value)
+ at["xmlns:" .. tag] = value
+ else
+ at[tag] = value
end
- -- comment / does not catch doctype
- data = data:gsub("<%!%-%-(.-)%-%->", function(txt)
- text[#text+1] = txt or ""
- return string.format("<@cm@>%s</@cm@>",#text)
- end)
- -- processing instructions / altijd 1
- data = data:gsub("<%?(.-)%?>", function(txt)
- text[#text+1] = txt or ""
- return string.format("<@pi@>%s</@pi@>",#text)
- end)
- return data, text
end
+ local function add_begin(spacing, namespace, tag)
+ if #spacing > 0 then
+ dt[#dt+1] = spacing
+ end
+ local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
+ top = { ns=namespace or "", nr=resolved, tg=tag, at=at, dt={}, __p__ = stack[#stack] }
+ setmetatable(top, mt)
+ dt = top.dt
+ stack[#stack+1] = top
+ at = { }
+ end
+ local function add_end(spacing, namespace, tag)
+ if #spacing > 0 then
+ dt[#dt+1] = spacing
+ end
+ local toclose = remove(stack)
+ top = stack[#stack]
+ if #stack < 1 then
+ errorstr = string.format("nothing to close with %s", tag)
+ elseif toclose.tg ~= tag then -- no namespace check
+ errorstr = string.format("unable to close %s with %s", toclose.tg, tag)
+ end
+ dt = top.dt
+ dt[#dt+1] = toclose
+ if at.xmlns then
+ remove(xmlns)
+ end
+ end
+ local function add_empty(spacing, namespace, tag)
+ if #spacing > 0 then
+ dt[#dt+1] = spacing
+ end
+ local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
+ top = stack[#stack]
+ setmetatable(top, mt)
+ dt = top.dt
+ dt[#dt+1] = { ns=namespace or "", nr=resolved, tg=tag, at=at, dt={}, __p__ = top }
+ at = { }
+ if at.xmlns then
+ remove(xmlns)
+ end
+ end
+ local function add_text(text)
+ dt[#dt+1] = text
+ end
+ local function add_special(what, spacing, text)
+ if #spacing > 0 then
+ dt[#dt+1] = spacing
+ end
+ top = stack[#stack]
+ setmetatable(top, mt)
+ dt[#dt+1] = { special=true, ns="", tg=what, dt={text} }
+ end
+ local function set_message(txt)
+ errorstr = "garbage at the end of the file: " .. txt:gsub("([ \n\r\t]*)","")
+ end
+
+ local space = lpeg.S(' \r\n\t')
+ local open = lpeg.P('<')
+ local close = lpeg.P('>')
+ local squote = lpeg.S("'")
+ local dquote = lpeg.S('"')
+ local equal = lpeg.P('=')
+ local slash = lpeg.P('/')
+ local colon = lpeg.P(':')
+ local valid = lpeg.R('az', 'AZ', '09') + lpeg.S('_-.')
+ local name_yes = lpeg.C(valid^1) * colon * lpeg.C(valid^1)
+ local name_nop = lpeg.C(lpeg.P(true)) * lpeg.C(valid^1)
+ local name = name_yes + name_nop
+
+ local utfbom = lpeg.P('\000\000\254\255') + lpeg.P('\255\254\000\000') +
+ lpeg.P('\255\254') + lpeg.P('\254\255') + lpeg.P('\239\187\191') -- no capture
+
+ local spacing = lpeg.C(space^0)
+ local justtext = lpeg.C((1-open)^1)
+ local somespace = space^1
+ local optionalspace = space^0
+
+ local value = (squote * lpeg.C((1 - squote)^0) * squote) + (dquote * lpeg.C((1 - dquote)^0) * dquote)
+ local attribute = (somespace * name * optionalspace * equal * optionalspace * value) / add_attribute
+ local attributes = attribute^0
+
+ local text = justtext / add_text
+ local balanced = lpeg.P { "[" * ((1 - lpeg.S"[]") + lpeg.V(1))^0 * "]" } -- taken from lpeg manual, () example
+
+ local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty
+ local beginelement = (spacing * open * name * attributes * optionalspace * close) / add_begin
+ local endelement = (spacing * open * slash * name * optionalspace * close) / add_end
+
+ local begincomment = open * lpeg.P("!--")
+ local endcomment = lpeg.P("--") * close
+ local begininstruction = open * lpeg.P("?")
+ local endinstruction = lpeg.P("?") * close
+ local begincdata = open * lpeg.P("![CDATA[")
+ local endcdata = lpeg.P("]]") * close
+
+ local someinstruction = lpeg.C((1 - endinstruction)^0)
+ local somecomment = lpeg.C((1 - endcomment )^0)
+ local somecdata = lpeg.C((1 - endcdata )^0)
+
+ local begindoctype = open * lpeg.P("!DOCTYPE")
+ local enddoctype = close
+ local publicdoctype = lpeg.P("PUBLIC") * somespace * value * somespace * value * somespace * balanced^0
+ local systemdoctype = lpeg.P("SYSTEM") * somespace * value * somespace * balanced^0
+ local simpledoctype = (1-close)^1 * balanced^0
+ local somedoctype = lpeg.C((somespace * lpeg.P(publicdoctype + systemdoctype + simpledoctype) * optionalspace)^0)
+
+ local instruction = (spacing * begininstruction * someinstruction * endinstruction) / function(...) add_special("@pi@",...) end
+ local comment = (spacing * begincomment * somecomment * endcomment ) / function(...) add_special("@cm@",...) end
+ local cdata = (spacing * begincdata * somecdata * endcdata ) / function(...) add_special("@cd@",...) end
+ local doctype = (spacing * begindoctype * somedoctype * enddoctype ) / function(...) add_special("@dd@",...) end
+
+ -- nicer but slower:
+ --
+ -- local instruction = (lpeg.Cc("@pi@") * spacing * begininstruction * someinstruction * endinstruction) / add_special
+ -- local comment = (lpeg.Cc("@cm@") * spacing * begincomment * somecomment * endcomment ) / add_special
+ -- local cdata = (lpeg.Cc("@cd@") * spacing * begincdata * somecdata * endcdata ) / add_special
+ -- local doctype = (lpeg.Cc("@dd@") * spacing * begindoctype * somedoctype * enddoctype ) / add_special
+
+ local trailer = space^0 * (justtext/set_message)^0
- -- maybe we will move the @tg@ stuff to a dedicated key, say 'st'; this will speed up
- -- serializing and testing
+ -- comment + emptyelement + text + cdata + instruction + lpeg.V("parent"), -- 6.5 seconds on 40 MB database file
+ -- text + comment + emptyelement + cdata + instruction + lpeg.V("parent"), -- 5.8
+ -- text + lpeg.V("parent") + emptyelement + comment + cdata + instruction, -- 5.5
- function xml.convert(data,no_root,collapse)
- local crap = { }
- data, crap = prepare(data, crap)
- local nsremap = xml.xmlns
- local remove = table.remove
- local stack, top = {}, {}
- local i, j, errorstr = 1, 1, nil
+
+ local grammar = lpeg.P { "preamble",
+ preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * lpeg.V("parent") * trailer,
+ parent = beginelement * lpeg.V("children")^0 * endelement,
+ children = text + lpeg.V("parent") + emptyelement + comment + cdata + instruction,
+ }
+
+ function xml.convert(data, no_root) -- no collapse any more
+ stack, top, at, xmlns, errorstr, result = {}, {}, {}, {}, nil, nil
stack[#stack+1] = top
top.dt = { }
- local dt = top.dt
- local id = 0
- local namespaces = { }
- local mt = { __tostring = xml.text }
- while true do
- local ni, first, attributes, last, fulltag
- ni, j, first, fulltag, attributes, last = data:find("<(/-)([^%s%>/]+)%s*([^>]-)%s*(/-)>", j)
- if not ni then break end
- local namespace, tag = fulltag:match("^(.-):(.+)$")
- if attributes ~= "" then
- local t = {}
- for ns, tag, _, value in attributes:gmatch("(%w-):?(%w+)=([\"\'])(.-)%3") do
- if tag == "xmlns" then -- not ok yet
- namespaces[#stack] = xml.resolvens(value)
- elseif ns == "" then
- t[tag] = value
- elseif ns == "xmlns" then
- xml.checkns(tag,value)
- else
- t[tag] = value
- end
- end
- attributes = t
- else
- attributes = { }
- end
- if namespace then -- realtime remapping
- namespace = nsremap[namespace] or namespace
- else
- namespace, tag = namespaces[#stack] or "", fulltag
- end
- local text = data:sub(i, ni-1)
- if text == "" or (collapse and text:find("^%s*$")) then
- -- no need for empty text nodes, beware, also packs <a>x y z</a>
- -- so is not that useful unless used with empty elements
- else
- dt[#dt+1] = text
- end
- if first == "/" then
- -- end tag
- local toclose = remove(stack) -- remove top
- top = stack[#stack]
- namespaces[#stack] = nil
- if #stack < 1 then
- errorstr = string.format("nothing to close with %s", tag)
- break
- elseif toclose.tg ~= tag then -- no namespace check
- errorstr = string.format("unable to close %s with %s", toclose.tg, tag)
- break
- end
- if tag:find("^@..@$") then
- dt[1] = crap[tonumber(dt[1])] or ""
- end
- dt = top.dt
- dt[#dt+1] = toclose
- elseif last == "/" then
- -- empty element tag
- dt[#dt+1] = { ns = namespace, tg = tag, dt = { }, at = attributes, __p__ = top }
- -- setmetatable(top, { __tostring = xml.text })
- setmetatable(top, mt)
- else
- -- begin tag
- top = { ns = namespace, tg = tag, dt = { }, at = attributes, __p__ = stack[#stack] }
- -- setmetatable(top, { __tostring = xml.text })
- setmetatable(top, mt)
- dt = top.dt
- stack[#stack+1] = top
- end
- i = j + 1
- end
- if not errorstr then
- local text = data:sub(i)
- if dt and not text:find("^%s*$") then
- dt[#dt+1] = text
- end
- if #stack > 1 then
- errorstr = string.format("unclosed %s", stack[#stack].tg)
- end
+ dt = top.dt
+ if not data or data == "" then
+ errorstr = "empty xml file"
+ elseif not grammar:match(data) then
+ errorstr = "invalid xml file"
end
if errorstr then
- stack = { { tg = "error", dt = { errorstr } } }
- -- setmetatable(stack, { __tostring = xml.text })
+ result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={} } } }
setmetatable(stack, mt)
- end
- if no_root then
- return stack[1]
+ if xml.error_handler then xml.error_handler("load",errorstr) end
else
- local t = { ns = "", tg = '@rt@', dt = stack[1].dt }
- -- setmetatable(t, { __tostring = xml.text })
- setmetatable(t, mt)
- for k,v in ipairs(t.dt) do
- if type(v) == "table" and v.tg ~= "@pi@" and v.tg ~= "@dd@" and v.tg ~= "@cm@" then
- t.ri = k -- rootindex
+ result = stack[1]
+ end
+ if not no_root then
+ result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={} }
+ setmetatable(result, mt)
+ for k,v in ipairs(result.dt) do
+ if type(v) == "table" and not v.special then -- always table -)
+ result.ri = k -- rootindex
break
end
end
- return t
end
+ return result
end
- function xml.copy(old,tables,parent) -- fast one
- tables = tables or { }
- if old then
- local new = { }
- if not table[old] then
- table[old] = new
- end
- for i,v in pairs(old) do
- -- new[i] = (type(v) == "table" and (table[v] or xml.copy(v, tables, table))) or v
- if type(v) == "table" then
- new[i] = table[v] or xml.copy(v, tables, table)
- else
- new[i] = v
- end
- end
- local mt = getmetatable(old)
- if mt then
- setmetatable(new,mt)
- end
- return new
- else
- return { }
- end
+ --[[ldx--
+ <p>Packaging data in an xml like table is done with the following
+ function. Maybe it will go away (when not used).</p>
+ --ldx]]--
+
+ function xml.package(tag,attributes,data)
+ local ns, tg = tag:match("^(.-):?([^:]+)$")
+ local t = { ns = ns, tg = tg, dt = data or "", at = attributes or {} }
+ setmetatable(t, mt)
+ return t
end
+ xml.error_handler = (logs and logs.report) or print
+
end
-function xml.load(filename,collapse)
+--[[ldx--
+<p>We cannot load an <l n='lpeg'/> from a filehandle so we need to load
+the whole file first. The function accepts a string representing
+a filename or a file handle.</p>
+--ldx]]--
+
+function xml.load(filename)
if type(filename) == "string" then
- local root, f = { }, io.open(filename,'r') -- no longer 'rb'
+ local root, f = { }, io.open(filename,'r')
if f then
- root = xml.convert(f:read("*all"),false,collapse)
+ root = xml.convert(f:read("*all"))
f:close()
+ else
+ -- if we want an error: root = xml.convert("")
end
- return root
+ return root -- no nil but an empty table if it fails
else
- return xml.convert(filename:read("*all"),false,collapse)
+ return xml.convert(filename:read("*all"))
end
end
-function xml.root(root)
- return (root.ri and root.dt[root.ri]) or root
+--[[ldx--
+<p>When we inject new elements, we need to convert strings to
+valid trees, which is what the next function does.</p>
+--ldx]]--
+
+function xml.toxml(data)
+ if type(data) == "string" then
+ local root = { xml.convert(data,true) }
+ return (#root > 1 and root) or root[1]
+ else
+ return data
+ end
end
-function xml.toxml(data,collapse)
- local t = { xml.convert(data,true,collapse) }
- if #t > 1 then
- return t
+--[[ldx--
+<p>For copying a tree we use a dedicated function instead of the
+generic table copier. Since we know what we're dealing with we
+can speed up things a bit. The second argument is not to be used!</p>
+--ldx]]--
+
+function xml.copy(old,tables)
+ if old then
+ tables = tables or { }
+ local new = { }
+ if not tables[old] then
+ tables[old] = new
+ end
+ for k,v in pairs(old) do
+ new[k] = (type(v) == "table" and (tables[v] or xml.copy(v, tables))) or v
+ end
+ local mt = getmetatable(old)
+ if mt then
+ setmetatable(new,mt)
+ end
+ return new
else
- return t[1]
+ return { }
end
end
-function xml.serialize(e, handle, textconverter, attributeconverter)
- handle = handle or (tex and tex.sprint) or io.write
- if not e then
- -- quit
- elseif e.command and xml.command then -- test for command == "" ?
- xml.command(e)
- elseif e.tg then
- local format, serialize = string.format, xml.serialize
- local ens, etg, eat, edt = e.ns, e.tg, e.at, e.dt
- -- no spaces, so no flush needed (check)
- if etg == "@pi@" then
- handle(format("<?%s?>",edt[1]))
- elseif etg == "@cm@" then
- handle(format("<!--%s-->",edt[1]))
- elseif etg == "@cd@" then
- handle(format("<![CDATA[%s]]>",edt[1]))
- elseif etg == "@dd@" then
- handle(format("<!DOCTYPE %s>",edt[1]))
- elseif etg == "@rt@" then
- serialize(edt,handle,textconverter,attributeconverter)
+--[[ldx--
+<p>In <l n='context'/> serializing the tree or parts of the tree is a major
+actitivity which is why the following function is pretty optimized resulting
+in a few more lines of code than needed. The variant that uses the formatting
+function for all components is about 15% slower than the concatinating
+alternative.</p>
+--ldx]]--
+
+do
+
+ -- todo: add <?xml version='1.0' standalone='yes'?> when not present
+
+ local fallbackhandle = (tex and tex.sprint) or io.write
+
+ function xml.serialize(e, handle, textconverter, attributeconverter, specialconverter, nocommands)
+ if not e then
+ -- quit
+ elseif not nocommands and e.command and xml.command then
+ xml.command(e)
else
- local ats = eat and next(eat) and { }
- if ats then
- if attributeconverter then
- for k,v in pairs(eat) do
- ats[#ats+1] = format('%s=%q',k,attributeconverter(v))
+ handle = handle or fallbackhandle
+ local etg = e.tg
+ if etg then
+ -- local format = string.format
+ if e.special then
+ local edt = e.dt
+ local spc = specialconverter and specialconverter[etg]
+ if spc then
+ local result = spc(edt[1])
+ if result then
+ handle(result)
+ else
+ -- no need to handle any further
+ end
+ elseif etg == "@pi@" then
+ -- handle(format("<?%s?>",edt[1]))
+ handle("<?" .. edt[1] .. "?>") -- maybe table.join(edt)
+ elseif etg == "@cm@" then
+ -- handle(format("<!--%s-->",edt[1]))
+ handle("<!--" .. edt[1] .. "-->")
+ elseif etg == "@cd@" then
+ -- handle(format("<![CDATA[%s]]>",edt[1]))
+ handle("<![CDATA[" .. edt[1] .. "]]>")
+ elseif etg == "@dd@" then
+ -- handle(format("<!DOCTYPE %s>",edt[1]))
+ handle("<!DOCTYPE " .. edt[1] .. ">")
+ elseif etg == "@rt@" then
+ xml.serialize(edt,handle,textconverter,attributeconverter,specialconverter,nocommands)
end
else
- for k,v in pairs(eat) do
- ats[#ats+1] = format('%s=%q',k,v)
- end
- end
- end
- if ens ~= "" then
- if edt and #edt > 0 then
+ local ens, eat, edt, ern = e.ns, e.at, e.dt, e.rn
+ local ats = eat and next(eat) and { }
if ats then
- handle(format("<%s:%s %s>",ens,etg,table.concat(ats," ")))
- else
- handle(format("<%s:%s>",ens,etg))
+ local format = string.format
+ if attributeconverter then
+ for k,v in pairs(eat) do
+ ats[#ats+1] = format('%s=%q',k,attributeconverter(v))
+ end
+ else
+ for k,v in pairs(eat) do
+ ats[#ats+1] = format('%s=%q',k,v)
+ end
+ end
end
- for i=1,#edt do
- serialize(edt[i],handle,textconverter,attributeconverter)
+ if ern and xml.trace_remap then
+ if ats then
+ ats[#ats+1] = string.format("xmlns:remapped='%s'",ern)
+ else
+ ats = { string.format("xmlns:remapped='%s'",ern) }
+ end
end
- handle(format("</%s:%s>",ens,etg))
- else
- if ats then
- handle(format("<%s:%s %s/>",ens,etg,table.concat(ats," ")))
+ if ens ~= "" then
+ if edt and #edt > 0 then
+ if ats then
+ -- handle(format("<%s:%s %s>",ens,etg,table.concat(ats," ")))
+ handle("<" .. ens .. ":" .. etg .. " " .. table.concat(ats," ") .. ">")
+ else
+ -- handle(format("<%s:%s>",ens,etg))
+ handle("<" .. ens .. ":" .. etg .. ">")
+ end
+ local serialize = xml.serialize
+ for i=1,#edt do
+ local e = edt[i]
+ if type(e) == "string" then
+ if textconverter then
+ handle(textconverter(e))
+ else
+ handle(e)
+ end
+ else
+ serialize(e,handle,textconverter,attributeconverter,specialconverter,nocommands)
+ end
+ end
+ -- handle(format("</%s:%s>",ens,etg))
+ handle("</" .. ens .. ":" .. etg .. ">")
+ else
+ if ats then
+ -- handle(format("<%s:%s %s/>",ens,etg,table.concat(ats," ")))
+ handle("<%" .. ens .. ":" .. etg .. table.concat(ats," ") .. "/>")
+ else
+ -- handle(format("<%s:%s/>",ens,etg))
+ handle("<%" .. ens .. ":" .. "/>")
+ end
+ end
else
- handle(format("<%s:%s/>",ens,etg))
+ if edt and #edt > 0 then
+ if ats then
+ -- handle(format("<%s %s>",etg,table.concat(ats," ")))
+ handle("<" .. etg .. " " .. table.concat(ats," ") .. ">")
+ else
+ -- handle(format("<%s>",etg))
+ handle("<" .. etg .. ">")
+ end
+ local serialize = xml.serialize
+ for i=1,#edt do
+ serialize(edt[i],handle,textconverter,attributeconverter,specialconverter,nocommands)
+ end
+ -- handle(format("</%s>",etg))
+ handle("</" .. etg .. ">")
+ else
+ if ats then
+ -- handle(format("<%s %s/>",etg,table.concat(ats," ")))
+ handle("<" .. etg .. table.concat(ats," ") .. "/>")
+ else
+ -- handle(format("<%s/>",etg))
+ handle("<" .. etg .. "/>")
+ end
+ end
end
end
- else
- if edt and #edt > 0 then
- if ats then
- handle(format("<%s %s>",etg,table.concat(ats," ")))
- else
- handle(format("<%s>",etg))
- end
- for i=1,#edt do
- serialize(edt[i],handle,textconverter,attributeconverter)
- end
- handle(format("</%s>",etg))
+ elseif type(e) == "string" then
+ if textconverter then
+ handle(textconverter(e))
else
- if ats then
- handle(format("<%s %s/>",etg,table.concat(ats," ")))
- else
- handle(format("<%s/>",etg))
- end
+ handle(e)
+ end
+ else
+ local serialize = xml.serialize
+ for i=1,#e do
+ serialize(e[i],handle,textconverter,attributeconverter,specialconverter,nocommands)
end
end
end
- elseif type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
- else
- handle(e)
- end
- else
- for i=1,#e do
- xml.serialize(e[i],handle,textconverter,attributeconverter)
+ end
+
+ function xml.checkbom(root)
+ if root.ri then
+ local dt, found = root.dt, false
+ for k,v in ipairs(dt) do
+ if type(v) == "table" and v.special and v.tg == "@pi" and v.dt:find("xml.*version=") then
+ found = true
+ break
+ end
+ end
+ if not found then
+ table.insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
+ table.insert(dt, 2, "\n" )
+ end
end
end
+
end
-function xml.string(e,handle) -- weird one that may become obsolete
- if e.tg then
+--[[ldx--
+<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
+and then handle the lot.</p>
+--ldx]]--
+
+function xml.tostring(root) -- 25% overhead due to collecting
+ if root then
+ if type(root) == 'string' then
+ return root
+ elseif next(root) then
+ local result = { }
+ xml.serialize(root,function(s) result[#result+1] = s end)
+ return table.concat(result,"")
+ end
+end
+ return ""
+end
+
+--[[ldx--
+<p>The next function operated on the content only and needs a handle function
+that accepts a string.</p>
+--ldx]]--
+
+function xml.string(e,handle)
+ if not handle or (e.special and e.tg ~= "@rt@") then
+ -- nothing
+ elseif e.tg then
local edt = e.dt
if edt then
for i=1,#edt do
@@ -1833,6 +2058,21 @@ function xml.string(e,handle) -- weird one that may become obsolete
end
end
+--[[ldx--
+<p>How you deal with saving data depends on your preferences. For a 40 MB database
+file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
+
+<lines>
+1.3 : load data from file to string
+6.1 : convert string into tree
+5.3 : saving in file using xmlsave
+6.8 : converting to string using xml.tostring
+3.6 : saving converted string in file
+</lines>
+
+<p>The save function is given below.</p>
+--ldx]]--
+
function xml.save(root,name)
local f = io.open(name,"w")
if f then
@@ -1841,535 +2081,67 @@ function xml.save(root,name)
end
end
-function xml.stringify(root)
- if root then
- if type(root) == 'string' then
- return root
- elseif next(root) then
- local result = { }
- xml.serialize(root,function(s) result[#result+1] = s end)
- return table.concat(result,"")
- end
- end
- return ""
-end
-
-xml.tostring = xml.stringify
-
-do
-
- -- print
-
- local newline = lpeg.P("\n")
- local space = lpeg.P(" ")
- local content = lpeg.C((1-newline)^1)
-
- if tex then
-
- -- taco: we need a kind of raw print into tex, i.e. embedded \n's become lineendings
- -- for tex and an empty line a par; could be a c-wrapper around existing stuff; i
- -- played a lot with tex.print but that does not work ok (should be obeylines save)
-
- local buffer = {}
-
- local function cprint(s)
- buffer[#buffer+1] = s
- end
- local function nprint( )
- if #buffer > 0 then
- if xml.trace_print then
- texio.write_nl(string.format("tex.print : [[[%s]]]", table.join(buffer)))
- end
- tex.print(table.join(buffer))
- buffer = {}
- else
- if xml.trace_print then
- texio.write_nl(string.format("tex.print : [[[%s]]]", ""))
- end
- tex.print("")
- end
- end
- local function fprint()
- if #buffer > 0 then
- if xml.trace_print then
- texio.write_nl(string.format("tex.sprint: [[[%s]]]", table.join(buffer)))
- end
- tex.sprint(table.join(buffer))
- buffer = { }
- end
- end
-
- local line_n = newline / nprint
- local line_c = content / cprint
- local capture = (line_n + line_c)^0
-
- local function sprint(root)
- if not root then
- -- quit
- elseif type(root) == 'string' then
- lpeg.match(capture,root)
- elseif next(root) then
- xml.serialize(root, sprint, nil, nil, true)
- end
- end
-
- function xml.sprint(root)
- buffer = {}
- sprint(root)
- if #buffer > 0 then
- nprint()
- end
- end
-
- xml.sflush = fprint
-
- else
-
- function xml.sprint(root)
- if not root then
- -- quit
- elseif type(root) == 'string' then
- print(root)
- elseif next(root) then
- xml.serialize(root, xml.sprint, nil, nil, true)
- end
- end
-
- end
-
- function xml.tprint(root)
- if type(root) == "table" then
- for i=1,#root do
- xml.sprint(root[i])
- end
- elseif type(root) == "string" then
- xml.sprint(root)
- end
- end
-
- -- lines (looks hackery, but we cannot pass variables in capture functions)
-
- local buffer, flush = {}, nil
-
- local function cprint(s)
- buffer[#buffer+1] = s
- end
- local function nprint()
- flush()
- end
-
- local line_n = newline / nprint
- local line_c = content / cprint
- local capture = (line_n + line_c)^0
-
- function lines(root)
- if not root then
- -- quit
- elseif type(root) == 'string' then
- lpeg.match(capture,root)
- elseif next(root) then
- xml.serialize(root, lines)
- end
- end
-
- function xml.lines(root)
- local result = { }
- flush = function()
- result[#result+1] = table.join(buffer)
- buffer = { }
- end
- buffer = {}
- lines(root)
- if #buffer > 0 then
- result[#result+1] = table.join(buffer)
- end
- return result
- end
+--[[ldx--
+<p>A few helpers:</p>
+--ldx]]--
+function xml.body(root)
+ return (root.ri and root.dt[root.ri]) or root
end
function xml.text(root)
- return (root and xml.stringify(root)) or ""
+ return (root and xml.tostring(root)) or ""
end
function xml.content(root)
return (root and root.dt and xml.tostring(root.dt)) or ""
end
-function xml.body(t) -- removes initial pi
- if t and t.dt and t.tg == "@rt@" then
- for k,v in ipairs(t.dt) do
- if type(v) == "table" and v.tg ~= "@pi@" then
- return v
- end
- end
- end
- return t
-end
+--[[ldx--
+<p>The next helper erases an element but keeps the table as it is,
+and since empty strings are not serialized (effectively) it does
+not harm. Copying the table would take more time. Usage:</p>
--- call: e[k] = xml.empty() or xml.empty(e,k)
+<typing>
+dt[k] = xml.empty() or xml.empty(dt,k)
+</typing>
+--ldx]]--
-function xml.empty(e,k) -- erases an element but keeps the table intact
- if e and k then
- e[k] = ""
- return e[k]
+function xml.empty(dt,k)
+ if dt and k then
+ dt[k] = ""
+ return dt[k]
else
return ""
end
end
--- call: e[k] = xml.assign(t) or xml.assign(e,k,t)
+--[[ldx--
+<p>The next helper assigns a tree (or string). Usage:</p>
+
+<typing>
+dt[k] = xml.assign(root) or xml.assign(dt,k,root)
+</typing>
+--ldx]]--
-function xml.assign(e,k,t) -- assigns xml tree / more testing will be done
- if e and k then
- if type(t) == "table" then
- e[k] = xml.body(t)
- else
- e[k] = t -- no parsing
- end
- return e[k]
+function xml.assign(dt,k,root)
+ if dt and k then
+ dt[k] = (type(root) == "table" and xml.body(root)) or root
+ return dt[k]
else
- return xml.body(t)
+ return xml.body(root)
end
end
--- 0=nomatch 1=match 2=wildcard 3=ancestor
-
--- "tag"
--- "tag1/tag2/tag3"
--- "*/tag1/tag2/tag3"
--- "/tag1/tag2/tag3"
--- "/tag1/tag2|tag3"
--- "tag[@att='value']
--- "tag1|tag2[@att='value']
-
-function xml.tag(e)
- return e.tg or ""
-end
-
-function xml.att(e,a)
- return (e.at and e.at[a]) or ""
-end
-
-xml.attribute = xml.att
-
---~ local cache = { }
-
---~ local function f_fault ( ) return 0 end
---~ local function f_wildcard( ) return 2 end
---~ local function f_result (b) if b then return 1 else return 0 end end
-
---~ function xml.lpath(str) --maybe @rt@ special
---~ if not str or str == "" then
---~ str = "*"
---~ end
---~ local m = cache[str]
---~ if not m then
---~ -- todo: text()
---~ if type(str) == "table" then
---~ if xml.trace_lpath then print("lpath", "table" , "inherit") end
---~ m = str
---~ elseif str == "/" then
---~ if xml.trace_lpath then print("lpath", "/", "root") end
---~ m = false
---~ elseif str == "*" then
---~ if xml.trace_lpath then print("lpath", "no string or *", "wildcard") end
---~ m = true
---~ else
---~ str = str:gsub("^//","") -- any
---~ if str == "" then
---~ if xml.trace_lpath then print("lpath", "//", "wildcard") end
---~ m = true
---~ else
---~ m = { }
---~ if not str:find("^/") then
---~ m[1] = 2
---~ end
---~ for v in str:gmatch("([^/]+)") do
---~ if v == "" or v == "*" then
---~ if #m > 0 then -- when not, then we get problems with root being second (after <?xml ...?> (we could start at dt[2])
---~ if xml.trace_lpath then print("lpath", "empty or *", "wildcard") end
---~ m[#m+1] = 2
---~ end
---~ elseif v == ".." then
---~ if xml.trace_lpath then print("lpath", "..", "ancestor") end
---~ m[#m+1] = 3
---~ else
---~ local a, b = v:match("^(.+)::(.-)$")
---~ if a and b then
---~ if a == "ancestor" then
---~ if xml.trace_lpath then print("lpath", a, "ancestor") end
---~ m[#m+1] = 3
---~ -- todo: b
---~ elseif a == "pi" then
---~ if xml.trace_lpath then print("lpath", a, "processing instruction") end
---~ local expr = "^" .. b .. " "
---~ m[#m+1] = function(e)
---~ if e.tg == '@pi@' and e.dt[1]:find(expr) then
---~ return 6
---~ else
---~ return 0
---~ end
---~ end
---~ end
---~ else
---~ local n, a, t = v:match("^(.-)%[@(.-)=(.-)%]$")
---~ if n and a and t then
---~ -- todo: namespace, negate
---~ -- t = t:gsub("^\'(.*)\'$", "%1")
---~ -- t = t:gsub("^\"(.*)\"$", "%1")
---~ -- t = t:sub(2,-2) -- "" or '' mandate
---~ t = t:gsub("^([\'\"])(.-)%1$", "%2")
---~ if n:find("|") then
---~ local tt = n:split("|")
---~ if xml.trace_lpath then print("lpath", "match", t, n) end
---~ m[#m+1] = function(e,i)
---~ for i=1,#tt do
---~ if e.at and e.tg == tt[i] and e.at[a] == t then return 1 end
---~ end
---~ return 0
---~ end
---~ else
---~ if xml.trace_lpath then print("lpath", "match", t, n) end
---~ m[#m+1] = function(e)
---~ if e.at and e.ns == s and e.tg == n and e.at[a] == t then
---~ return 1
---~ else
---~ return 0
---~ end
---~ end
---~ end
---~ else -- todo, better tracing (string.format, ook negate etc)
---~ local negate = v:sub(1,1) == '^'
---~ if negate then v = v:sub(2) end
---~ if v:find("|") then
---~ local t = { }
---~ for s in v:gmatch("([^|]+)") do
---~ local ns, tg = s:match("^(.-):(.+)$")
---~ if tg == "*" then
---~ if xml.trace_lpath then print("lpath", "or wildcard", ns, tg) end
---~ t[#t+1] = function(e) return e.ns == ns end
---~ elseif tg then
---~ if xml.trace_lpath then print("lpath", "or match", ns, tg) end
---~ t[#t+1] = function(e) return e.ns == ns and e.tg == tg end
---~ else
---~ if xml.trace_lpath then print("lpath", "or match", s) end
---~ t[#t+1] = function(e) return e.ns == "" and e.tg == s end
---~ end
---~ end
---~ if negate then
---~ m[#m+1] = function(e)
---~ for i=1,#t do if t[i](e) then return 0 end end return 1
---~ end
---~ else
---~ m[#m+1] = function(e)
---~ for i=1,#t do if t[i](e) then return 1 end end return 0
---~ end
---~ end
---~ else
---~ if xml.trace_lpath then print("lpath", "match", v) end
---~ local ns, tg = v:match("^(.-):(.+)$")
---~ if not tg then ns, tg = "", v end
---~ if tg == "*" then
---~ if ns ~= "" then
---~ m[#m+1] = function(e)
---~ if ns == e.ns then return 1 else return 0 end
---~ end
---~ end
---~ elseif negate then
---~ m[#m+1] = function(e)
---~ if ns == e.ns and tg == e.tg then return 0 else return 1 end
---~ end
---~ else
---~ m[#m+1] = function(e)
---~ if ns == e.ns and tg == e.tg then return 1 else return 0 end
---~ end
---~ end
---~ end
---~ end
---~ end
---~ end
---~ end
---~ end
---~ end
---~ if xml.trace_lpath then
---~ print("# lpath criteria:", (type(m) == "table" and #m) or "none")
---~ end
---~ cache[str] = m
---~ end
---~ return m
---~ end
-
---~ -- if handle returns true, then quit
-
---~ function xml.traverse(root,pattern,handle,reverse,index,wildcard)
---~ if not root then -- error
---~ return false
---~ elseif pattern == false then -- root
---~ handle(root,root.dt,root.ri)
---~ return false
---~ elseif pattern == true then -- wildcard
---~ local traverse = xml.traverse
---~ local rootdt = root.dt
---~ if rootdt then
---~ local start, stop, step = 1, #rootdt, 1
---~ if reverse then
---~ start, stop, step = stop, start, -1
---~ end
---~ for k=start,stop,step do
---~ if handle(root,rootdt,root.ri or k) then return false end
---~ if not traverse(rootdt[k],true,handle,reverse) then return false end
---~ end
---~ end
---~ return false
---~ elseif root and root.dt then
---~ index = index or 1
---~ local match = pattern[index] or f_wildcard
---~ local traverse = xml.traverse
---~ local rootdt = root.dt
---~ local start, stop, step = 1, #rootdt, 1
---~ if reverse and index == #pattern then -- maybe no index test here / error?
---~ start, stop, step = stop, start, -1
---~ end
---~ for k=start,stop,step do
---~ local e = rootdt[k]
---~ if e.tg then
---~ local m = (type(match) == "function" and match(e,root)) or match
---~ if m == 1 then -- match
---~ if index < #pattern then
---~ if not traverse(e,pattern,handle,reverse,index+1) then return false end
---~ else
---~ if handle(root,rootdt,root.ri or k) then
---~ return false
---~ end
---~ -- tricky, where do we pick up, is this ok now
---~ if pattern[1] == 2 then -- start again with new root (we need a way to inhibit this)
---~ if not traverse(e,pattern,handle,reverse,1) then return false end
---~ end
---~ end
---~ elseif m == 2 then -- wildcard
---~ if index < #pattern then
---~ -- <parent><a><b></b><c></c></a></parent> : "a" (true) "/a" (true) "b" (true) "/b" (false)
---~ -- not good yet, we need to pick up any prev level which is 2
---~ local p = pattern[2]
---~ if index == 1 and p then
---~ local mm = (type(p) == "function" and p(e,root)) or p -- pattern[2](e,root)
---~ if mm == 1 then
---~ if #pattern == 2 then
---~ if handle(root,rootdt,k) then
---~ return false
---~ end
---~ -- hack
---~ if pattern[1] == 2 then -- start again with new root (we need a way to inhibit this)
---~ if not traverse(e,pattern,handle,reverse,1) then return false end
---~ end
---~ else
---~ if not traverse(e,pattern,handle,reverse,3) then return false end
---~ end
---~ else
---~ if not traverse(e,pattern,handle,reverse,index+1,true) then return false end
---~ end
---~ else
---~ if not traverse(e,pattern,handle,reverse,index+1,true) then return false end
---~ end
---~ elseif handle(root,rootdt,k) then
---~ return false
---~ end
---~ elseif m == 3 then -- ancestor
---~ local ep = e.__p__
---~ if index < #pattern then
---~ if not traverse(ep,pattern,handle,reverse,index+1) then return false end
---~ elseif handle(root,rootdt,k) then
---~ return false
---~ end
---~ elseif m == 4 then -- just root
---~ if handle(root,rootdt,k) then
---~ return false
---~ end
---~ elseif m == 6 then -- pi
---~ if handle(root,rootdt,k) then
---~ return false
---~ end
---~ elseif wildcard then -- maybe two kind of wildcards: * ** //
---~ if not traverse(e,pattern,handle,reverse,index,wildcard) then return false end
---~ end
---~ end
---~ end
---~ end
---~ return true
---~ end
-
---~ Y a/b
---~ Y /a/b
---~ Y a/*/b
---~ Y a//b
---~ Y child::
---~ Y .//
---~ Y ..
---~ N id("tag")
---~ Y parent::
---~ Y child::
---~ N preceding-sibling:: (same name)
---~ N following-sibling:: (same name)
---~ N preceding-sibling-of-self:: (same name)
---~ N following-sibling-or-self:: (same name)
---~ Y ancestor::
---~ N descendent::
---~ N preceding::
---~ N following::
---~ N self::node()
---~ N node() == alles
---~ N a[position()=5]
---~ Y a[5]
---~ Y a[-5]
---~ N a[first()]
---~ N a[last()]
---~ Y a/(b|c|d)/e/f
---~ N (c/d|e)
---~ Y a/b[@bla]
---~ Y a/b[@bla='oeps']
---~ Y a/b[@bla=='oeps']
---~ Y a/b[@bla<>'oeps']
---~ Y a/b[@bla!='oeps']
---~ Y a/b/@bla
-
---~ Y ^/a/c (root)
---~ Y ^^/a/c (docroot)
---~ Y root::a/c (docroot)
-
---~ no wild card functions (yet)
-
---~ s = "/a//b/*/(c|d|e)/(f|g)/h[4]/h/child::i/j/(a/b)/p[-1]/q[4]/ancestor::q/r/../s/./t[@bla='true']/k"
-
--- // == /**/
--- / = ^ (root)
+--[[ldx--
+<p>We've now arrived at an intersting part: accessing the tree using a subset
+of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We
+will explain more about its usage in other documents.</p>
+--ldx]]--
do
- function analyze(str)
- if not str then
- return ""
- else
- local tmp, result, map, key = { }, { }, { }, str
- str = str:gsub("(%b[])", function(s) tmp[#tmp+1] = s return '[['..#tmp..']]' end)
- str = str:gsub("(%b())", function(s) tmp[#tmp+1] = s return '[['..#tmp..']]' end)
- str = str:gsub("(%^+)([^/])", "%1/%2")
- str = str:gsub("//+", "/**/")
- str = str:gsub(".*root::", "^/")
- str = str:gsub("child::", "")
- str = str:gsub("ancestor::", "../")
- str = str:gsub("self::", "./")
- str = str:gsub("^/", "^/")
- for s in str:gmatch("([^/]+)") do
- s = s:gsub("%[%[(%d+)%]%]",function(n) return tmp[tonumber(n)] end)
- result[#result+1] = s
- end
- cache[key] = result
- return result
- end
- end
-
- actions = {
+ local actions = {
[10] = "stay",
[11] = "parent",
[12] = "subtree root",
@@ -2381,112 +2153,168 @@ do
[21] = "match one of",
[22] = "match and attribute eq",
[23] = "match and attribute ne",
- [23] = "match and attribute present",
+ [24] = "match one of and attribute eq",
+ [25] = "match one of and attribute ne",
+ [27] = "has attribute",
+ [28] = "has value",
+ [29] = "fast match",
[30] = "select",
[40] = "processing instruction",
}
- function compose(result)
- if not result or #result == 0 then
+ local map = { }
+
+ local space = lpeg.S(' \r\n\t')
+ local squote = lpeg.S("'")
+ local dquote = lpeg.S('"')
+ local lparent = lpeg.P('(')
+ local rparent = lpeg.P(')')
+ local atsign = lpeg.P('@')
+ local lbracket = lpeg.P('[')
+ local rbracket = lpeg.P(']')
+ local exclam = lpeg.P('!')
+ local period = lpeg.P('.')
+ local eq = lpeg.P('==') + lpeg.P('=')
+ local ne = lpeg.P('<>') + lpeg.P('!=')
+ local star = lpeg.P('*')
+ local slash = lpeg.P('/')
+ local colon = lpeg.P(':')
+ local bar = lpeg.P('|')
+ local hat = lpeg.P('^')
+ local valid = lpeg.R('az', 'AZ', '09') + lpeg.S('_-')
+ local name_yes = lpeg.C(valid^1) * colon * lpeg.C(valid^1)
+ local name_nop = lpeg.C(lpeg.P(true)) * lpeg.C(valid^1)
+ local name = name_yes + name_nop
+ local number = lpeg.C((lpeg.S('+-')^0 * lpeg.R('09')^1)) / tonumber
+ local names = (bar^0 * name)^1
+ local morenames = name * (bar^0 * name)^1
+ local instructiontag = lpeg.P('pi::')
+ local spacing = lpeg.C(space^0)
+ local somespace = space^1
+ local optionalspace = space^0
+ local text = lpeg.C(valid^0)
+ local value = (squote * lpeg.C((1 - squote)^0) * squote) + (dquote * lpeg.C((1 - dquote)^0) * dquote)
+ local empty = 1-slash
+
+ local is_eq = lbracket * atsign * name * eq * value * rbracket
+ local is_ne = lbracket * atsign * name * ne * value * rbracket
+ local is_attribute = lbracket * atsign * name * rbracket
+ local is_value = lbracket * value * rbracket
+ local is_number = lbracket * number * rbracket
+
+ local is_one = name
+ local is_none = exclam * name
+ local is_one_of = ((lparent * names * rparent) + morenames)
+ local is_none_of = exclam * ((lparent * names * rparent) + morenames)
+
+ local stay = (period )
+ local parent = (period * period ) / function( ) map[#map+1] = { 11 } end
+ local subtreeroot = (slash + hat ) / function( ) map[#map+1] = { 12 } end
+ local documentroot = (hat * hat ) / function( ) map[#map+1] = { 13 } end
+ local any = (star ) / function( ) map[#map+1] = { 14 } end
+ local many = (star * star ) / function( ) map[#map+1] = { 15 } end
+ local initial = (hat * hat * hat ) / function( ) map[#map+1] = { 16 } end
+
+ local match = (is_one ) / function(...) map[#map+1] = { 20, true , ... } end
+ local match_one_of = (is_one_of ) / function(...) map[#map+1] = { 21, true , ... } end
+ local dont_match = (is_none ) / function(...) map[#map+1] = { 20, false, ... } end
+ local dont_match_one_of = (is_none_of ) / function(...) map[#map+1] = { 21, false, ... } end
+
+ local match_and_eq = (is_one * is_eq ) / function(...) map[#map+1] = { 22, true , ... } end
+ local match_and_ne = (is_one * is_ne ) / function(...) map[#map+1] = { 23, true , ... } end
+ local dont_match_and_eq = (is_none * is_eq ) / function(...) map[#map+1] = { 22, false, ... } end
+ local dont_match_and_ne = (is_none * is_ne ) / function(...) map[#map+1] = { 23, false, ... } end
+
+ local match_one_of_and_eq = (is_one_of * is_eq ) / function(...) map[#map+1] = { 24, true , ... } end
+ local match_one_of_and_ne = (is_one_of * is_ne ) / function(...) map[#map+1] = { 25, true , ... } end
+ local dont_match_one_of_and_eq = (is_none_of * is_eq ) / function(...) map[#map+1] = { 24, false, ... } end
+ local dont_match_one_of_and_ne = (is_none_of * is_ne ) / function(...) map[#map+1] = { 25, false, ... } end
+
+ local has_attribute = (is_one * is_attribute) / function(...) map[#map+1] = { 27, true , ... } end
+ local has_value = (is_one * is_value ) / function(...) map[#map+1] = { 28, true , ... } end
+ local dont_has_attribute = (is_none * is_attribute) / function(...) map[#map+1] = { 27, false, ... } end
+ local dont_has_value = (is_none * is_value ) / function(...) map[#map+1] = { 28, false, ... } end
+ local position = (is_one * is_number ) / function(...) map[#map+1] = { 30, true, ... } end
+ local dont_position = (is_none * is_number ) / function(...) map[#map+1] = { 30, false, ... } end
+
+ local instruction = (instructiontag * text ) / function(...) map[#map+1] = { 40, ... } end
+ local nothing = (empty ) / function( ) map[#map+1] = { 15 } end -- 15 ?
+ local crap = (1-slash)^1
+
+ -- a few ugly goodies:
+
+ local docroottag = lpeg.P('^^') / function( ) map[#map+1] = { 12 } end
+ local subroottag = lpeg.P('^') / function( ) map[#map+1] = { 13 } end
+ local roottag = lpeg.P('root::') / function( ) map[#map+1] = { 12 } end
+ local parenttag = lpeg.P('parent::') / function( ) map[#map+1] = { 11 } end
+ local childtag = lpeg.P('child::')
+ local selftag = lpeg.P('self::')
+
+ -- there will be more and order will be optimized
+
+ local selector = (
+ instruction +
+ many + any +
+ parent + stay +
+ dont_position + position +
+ dont_match_one_of_and_eq + dont_match_one_of_and_ne +
+ match_one_of_and_eq + match_one_of_and_ne +
+ dont_match_and_eq + dont_match_and_ne +
+ match_and_eq + match_and_ne +
+ has_attribute + has_value +
+ dont_match_one_of + match_one_of +
+ dont_match + match +
+ crap + empty
+ )
+
+ local grammar = lpeg.P { "startup",
+ startup = (initial + documentroot + subtreeroot + roottag + docroottag + subroottag)^0 * lpeg.V("followup"),
+ followup = ((slash + parenttag + childtag + selftag)^0 * selector)^1,
+ }
+
+ function compose(str)
+ if not str or str == "" then
-- wildcard
return true
- elseif #result == 1 then
- local r = result[1][1]
- if r == "14" or r == "15" then
- -- wildcard
+ elseif str == '/' then
+ -- root
+ return false
+ else
+ map = { }
+ grammar:match(str)
+ if #map == 0 then
return true
- elseif r == "12" then
- -- root
- return false
- end
- end
- local map = { }
- for r=1,#result do
- local ri = result[r]
- if ri == "." then
- -- skip
- elseif ri == ".." then
- map[#map+1] = { 11 }
- elseif ri == "^" then
- map[#map+1] = { 12 }
- elseif ri == "^^" then
- map[#map+1] = { 13 }
- elseif ri == "*" then
- map[#map+1] = { 14 }
- elseif ri == "**" then
- map[#map+1] = { 15 }
else
- local m = ri:match("^%((.*)%)$") -- (a|b|c)
- if m or ri:find('|') then
- m = m or ri
- if m:find("[%[%]%(%)%/]") then -- []()/
- -- error
- else
- local t = { 21 }
- for s in m:gmatch("([^|])") do
- local ns, tg = s:match("^(.-):?([^:]+)$")
- t[#t+1] = ns
- t[#t+1] = tg
- end
- map[#map+1] = t
- end
- else
- local s, f = ri:match("^(.-)%[%s*(.+)%s*%]$") --aaa[bbb]
- if s and f then
- local ns, tg = s:match("^(.-):?([^:]+)$")
- local at, op, vl = f:match("^@(.-)([!=<>]?)([^!=<>]+)$") -- [@a=='b']
- if op and op ~= "" then
- if op == '=' or op == '==' then
- map[#map+1] = { 22, ns, tg, at, (vl:gsub("^([\'\"])(.*)%1$", "%2")) }
- elseif op == '<>' or op == '!=' then
- map[#map+1] = { 23, ns, tg, at, (vl:gsub("^([\'\"])(.*)%1$", "%2")) }
- else
- -- error
- end
- elseif f:find("^([%-%+%d]+)$")then
- map[#map+1] = { 30, ns, tg, tonumber(f) }
- elseif vl ~= "" then
- map[#map+1] = { 24, ns, tg, vl }
- end
- else
- local pi = ri:match("^pi::(.-)$")
- if pi then
- map[#map+1] = { 40, pi }
- else
- map[#map+1] = { 20, ri:match("^(.-):?([^:]+)$") }
- end
+ local m = map[1][1]
+ if #map == 1 then
+ if m == 14 or m == 15 then
+ -- wildcard
+ return true
+ elseif m == 12 then
+ -- root
+ return false
end
+ elseif #map == 2 and m == 12 and map[2][1] == 20 then
+ return { { 29, map[2][2], map[2][3] } }
end
+ if m ~= 11 and m ~= 12 and m ~= 13 and m ~= 14 and m ~= 15 and m ~= 16 then
+ table.insert(map, 1, { 16 })
+ end
+ return map
end
end
- -- if we have a symbol, we can prepend that to the string, which is faster
- local mm = map[1] or { }
- local r = mm[1] or 0
- if #map == 1 then
- if r == 14 or r == 15 then
- -- wildcard
- return true
- elseif r == 12 then
- -- root
- return false
- end
- end
- if r ~= 11 and r ~= 12 and r ~= 13 and r ~= 14 and r ~= 15 then
- table.insert(map, 1, { 16 })
- end
- return map
end
- cache = { }
+ local cache = { }
- function xml.lpath(pattern)
+ function xml.lpath(pattern,trace)
if type(pattern) == "string" then
local result = cache[pattern]
if not result then
- result = compose(analyze(pattern))
+ result = compose(pattern)
cache[pattern] = result
end
- if xml.trace_lpath then
+ if trace or xml.trace_lpath then
xml.lshow(result)
end
return result
@@ -2495,23 +2323,58 @@ do
end
end
- function xml.lshow(pattern)
+ local fallbackreport = (texio and texio.write) or io.write
+
+ function xml.lshow(pattern,report)
+ report = report or fallbackreport
local lp = xml.lpath(pattern)
if lp == false then
- print("root")
+ report(" -: root\n")
elseif lp == true then
- print("wildcard")
+ report(" -: wildcard\n")
else
- if type(pattern) ~= "table" then
- print("pattern: " .. tostring(pattern))
+ if type(pattern) == "string" then
+ report(string.format("pattern: %s\n",pattern))
end
for k,v in ipairs(lp) do
- print(k,actions[v[1]],table.join(v," ",2))
+ if #v > 1 then
+ local t = { }
+ for i=2,#v do
+ local vv = v[i]
+ if type(vv) == "string" then
+ t[#t+1] = (vv ~= "" and vv) or "#"
+ elseif type(vv) == "boolean" then
+ t[#t+1] = (vv and "==") or "<>"
+ end
+ end
+ report(string.format("%2i: %s %s -> %s\n", k,v[1],actions[v[1]],table.join(t," ")))
+ else
+ report(string.format("%2i: %s %s\n", k,v[1],actions[v[1]]))
+ end
end
end
end
- function xml.traverse(root,pattern,handle,reverse,index,wildcard)
+end
+
+--[[ldx--
+<p>An <l n='lpath'/> is converted to a table with instructions for traversing the
+tree. Hoever, simple cases are signaled by booleans. Because we don't know in
+advance what we want to do with the found element the handle gets three arguments:</p>
+
+<lines>
+<t>r</t> : the root element of the data table
+<t>d</t> : the data table of the result
+<t>t</t> : the index in the data table of the result
+</lines>
+
+<p> Access to the root and data table makes it possible to construct insert and delete
+functions.</p>
+--ldx]]--
+
+do
+
+ function xml.traverse(root,pattern,handle,reverse,index,parent,wildcard)
if not root then -- error
return false
elseif pattern == false then -- root
@@ -2531,103 +2394,172 @@ do
end
end
return false
- elseif root and root.dt then
+ elseif root.dt then
index = index or 1
local action = pattern[index]
local command = action[1]
- if (command == 16 or command == 12) and index == 1 then -- initial
- wildcard = true
- index = index + 1
- action = pattern[index]
- command = action[1]
- end
- local traverse = xml.traverse
- local rootdt = root.dt
- local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1
- if command == 30 then
- if action[4] < 0 then
- start, stop, step = stop, start, -1
- dn = -1
+ if command == 29 then -- fast case /oeps
+ local rootdt = root.dt
+ for k=1,#rootdt do
+ local e = rootdt[k]
+ local ns, tg = e.rn or e.ns, e.tg
+ if ns == action[2] and tg == action[3] then
+ if handle(root,rootdt,k) then return false end
+ end
end
- elseif reverse and index == #pattern then
- start, stop, step = stop, start, -1
- end
- for k=start,stop,step do
- local e = rootdt[k]
- local ns, tg = e.ns, e.tg
- if tg then
+ elseif command == 11 then -- parent
+ local ep = root.__p__ or parent
+ if index < #pattern then
+ if not xml.traverse(ep,pattern,handle,reverse,index+1,root) then return false end
+ elseif handle(root,rootdt,k) then
+ return false
+ end
+ else
+ if (command == 16 or command == 12) and index == 1 then -- initial
+ wildcard = true
+ index = index + 1
+ action = pattern[index]
+ command = action and action[1] or 0 -- something is wrong
+ end
+ if command == 11 then -- parent
+ local ep = root.__p__ or parent
+ if index < #pattern then
+ if not xml.traverse(ep,pattern,handle,reverse,index+1,root) then return false end
+ elseif handle(root,rootdt,k) then
+ return false
+ end
+ else
+ local traverse = xml.traverse
+ local rootdt = root.dt
+ local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1
if command == 30 then
- if ns == action[2] and tg == action[3] then
- n = n + dn
- if n == action[4] then
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1) then return false end
- end
- break
- end
- elseif wildcard then
- if not traverse(e,pattern,handle,reverse,index,true) then return false end
+ if action[5] < 0 then
+ start, stop, step = stop, start, -1
+ dn = -1
end
- else
- local matched = false
- if command == 20 then -- match
- matched = ns == action[2] and tg == action[3]
- elseif command == 21 then -- match one of
- for i=2,#action,2 do
- if ns == action[i] and tg == action[i+1] then
- matched = true
- break
+ elseif reverse and index == #pattern then
+ start, stop, step = stop, start, -1
+ end
+ for k=start,stop,step do
+ local e = rootdt[k]
+ local ns, tg = e.rn or e.ns, e.tg
+ if tg then
+ if command == 30 then
+ local matched = ns == action[3] and tg == action[4]
+ if action[2] then matched = not matched end
+ if matched then
+ n = n + dn
+ if n == action[5] then
+ if index == #pattern then
+ if handle(root,rootdt,root.ri or k) then return false end
+ else
+ if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
+ end
+ break
+ end
+ elseif wildcard then
+ if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
end
- end
- elseif command == 22 then -- eq
- matched = ns == action[2] and tg == action[3] and e.at[action[4]] == action[5]
- elseif command == 23 then -- ne
- matched = ns == action[2] and tg == action[3] and e.at[action[4]] ~= action[5]
- elseif command == 24 then -- present
- matched = ns == action[2] and tg == action[3] and e.at[action[4]]
- end
- if matched then -- combine tg test and at test
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1) then return false end
- end
- elseif command == 14 then -- any
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1) then return false end
- end
- elseif command == 15 then -- many
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
else
- if not traverse(e,pattern,handle,reverse,index+1,true) then return false end
- end
- elseif command == 11 then -- parent
- local ep = e.__p__
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- break
- elseif command == 40 and tg == "@pi@" then -- pi
- local pi = action[2]
- if pi ~= "" then
- local pt = e.dt[1]
- if pt and pt:find(pi) then
- if handle(root,rootdt,k) then
+ local matched, multiple = false, false
+ if command == 20 then -- match
+ matched = ns == action[2] and tg == action[3]
+ if action[2] then matched = not matched end
+ elseif command == 21 then -- match one of
+ multiple = true
+ for i=2,#action,2 do
+ if ns == action[i] and tg == action[i+1] then matched = true break end
+ end
+ if action[2] then matched = not matched end
+ elseif command == 22 then -- eq
+ matched = ns == action[3] and tg == action[4]
+ if action[2] then matched = not matched end
+ matched = matched and e.at[action[6]] == action[7]
+ elseif command == 23 then -- ne
+ matched = ns == action[3] and tg == action[4]
+ if action[2] then matched = not matched end
+ matched = mached and e.at[action[6]] ~= action[7]
+ elseif command == 24 then -- one of eq
+ multiple = true
+ for i=3,#action-2,2 do
+ if ns == action[i] and tg == action[i+1] then matched = true break end
+ end
+ if action[2] then matched = not matched end
+ matched = matched and e.at[action[#action-1]] == action[#action]
+ elseif command == 25 then -- one of ne
+ multiple = true
+ for i=3,#action-2,2 do
+ if ns == action[i] and tg == action[i+1] then matched = true break end
+ end
+ if action[2] then matched = not matched end
+ matched = matched and e.at[action[#action-1]] ~= action[#action]
+ elseif command == 27 then -- has attribute
+ local ans = action[3]
+ matched = ns == action[3] and tg == action[4]
+ if action[2] then matched = not matched end
+ matched = matched and e.at[action[5]]
+ elseif command == 28 then -- has value
+ local edt = e.dt
+ matched = ns == action[3] and tg == action[4]
+ if action[2] then matched = not matched end
+ matched = matched and edt and edt[1] == action[5]
+ end
+ if matched then -- combine tg test and at test
+ if index == #pattern then
+ if handle(root,rootdt,root.ri or k) then return false end
+ if wildcard and multiple then
+ if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
+ end
+ else
+ if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
+ end
+ elseif command == 14 then -- any
+ if index == #pattern then
+ if handle(root,rootdt,root.ri or k) then return false end
+ else
+ if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
+ end
+ elseif command == 15 then -- many
+ if index == #pattern then
+ if handle(root,rootdt,root.ri or k) then return false end
+ else
+ if not traverse(e,pattern,handle,reverse,index+1,root,true) then return false end
+ end
+ -- not here : 11
+ elseif command == 11 then -- parent
+ local ep = e.__p__ or parent
+ if index < #pattern then
+ if not traverse(ep,pattern,handle,reverse,root,index+1) then return false end
+ elseif handle(root,rootdt,k) then
+ return false
+ end
+ elseif command == 40 and e.special and tg == "@pi@" then -- pi
+ local pi = action[2]
+ if pi ~= "" then
+ local pt = e.dt[1]
+ if pt and pt:find(pi) then
+ if handle(root,rootdt,k) then
+ return false
+ end
+ end
+ elseif handle(root,rootdt,k) then
return false
end
+ elseif wildcard then
+ if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
end
- elseif handle(root,rootdt,k) then
- return false
end
- elseif wildcard then
- if not traverse(e,pattern,handle,reverse,index,true) then return false end
+ else
+ -- not here : 11
+ if command == 11 then -- parent
+ local ep = e.__p__ or parent
+ if index < #pattern then
+ if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
+ elseif handle(root,rootdt,k) then
+ return false
+ end
+ break -- else loop
+ end
end
end
end
@@ -2636,15 +2568,71 @@ do
return true
end
+end
+
+--[[ldx--
+<p>Next come all kind of locators and manipulators. The most generic function here
+is <t>xml.filter(root,pattern)</t>. All registers functions in the filters namespace
+can be path of a search path, as in:</p>
+
+<typing>
+local r, d, k = xml.filter(root,"/a/b/c/position(4)"
+</typing>
+--ldx]]--
+
+do
+
local traverse, lpath, convert = xml.traverse, xml.lpath, xml.convert
xml.filters = { }
+ --[[ldx--
+ <p>For splitting the filter function from the path specification, we can
+ use string matching or lpeg matching. Here the difference in speed is
+ neglectable but the lpeg variant is more robust.</p>
+ --ldx]]--
+
+ -- function xml.filter(root,pattern)
+ -- local pat, fun, arg = pattern:match("^(.+)/(.-)%((.*)%)$")
+ -- if fun then
+ -- return (xml.filters[fun] or xml.filters.default)(root,pat,arg)
+ -- else
+ -- pat, arg = pattern:match("^(.+)/@(.-)$")
+ -- if arg then
+ -- return xml.filters.attributes(root,pat,arg)
+ -- else
+ -- return xml.filters.default(root,pattern)
+ -- end
+ -- end
+ -- end
+
+ -- not faster but hipper ... although ... i can't get rid of the trailing / in the path
+
+ local name = (lpeg.R("az","AZ")+lpeg.R("_-"))^1
+ local path = lpeg.C(((1-lpeg.P('/'))^0 * lpeg.P('/'))^1)
+ local argument = lpeg.P { "(" * lpeg.C(((1 - lpeg.S("()")) + lpeg.V(1))^0) * ")" }
+ local action = lpeg.Cc(1) * path * lpeg.C(name) * argument
+ local attribute = lpeg.Cc(2) * path * lpeg.P('@') * lpeg.C(name)
+
+ local parser = action + attribute
+
+ function xml.filter(root,pattern)
+ local kind, a, b, c = parser:match(pattern)
+ if kind == 1 then
+ return (xml.filters[b] or xml.filters.default)(root,a,c)
+ elseif kind == 2 then
+ return xml.filters.attributes(root,a,b)
+ else
+ return xml.filters.default(root,pattern)
+ end
+ end
+
function xml.filters.default(root,pattern)
local rt, dt, dk
traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end)
return dt and dt[dk], rt, dt, dk
end
+
function xml.filters.reverse(root,pattern)
local rt, dt, dk
traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse')
@@ -2698,17 +2686,14 @@ do
traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk, i = r, d, k, i-1 return i == 0 end, reverse)
if i == 0 then
return dt and dt[dk], rt, dt, dk
- else
- return nil, nil, nil, nil
end
- else
- return nil, nil, nil, nil
end
+ return nil, nil, nil, nil
end
function xml.filters.attributes(root,pattern,arguments)
local rt, dt, dk
traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end)
- local ekat = dt and dt[dk] and dt[dk].at
+ local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at)
if ekat then
if arguments then
return ekat[arguments] or "", rt, dt, dk
@@ -2722,69 +2707,33 @@ do
function xml.filters.attribute(root,pattern,arguments)
local rt, dt, dk
traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end)
- local ekat = dt and dt[dk] and dt[dk].at
+ local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at)
return (ekat and ekat[arguments]) or ""
end
function xml.filters.text(root,pattern,arguments)
- local ek, dt, dk, rt = xml.filters.index(root,pattern,arguments)
- return (ek and ek.dt) or "", rt, dt, dk
- end
-
- function xml.filter(root,pattern)
- local pat, fun, arg = pattern:match("^(.+)/(.-)%((.*)%)$")
- if fun then
- return (xml.filters[fun] or xml.filters.default)(root,pat,arg)
- else
- pat, arg = pattern:match("^(.+)/@(.-)$")
- if arg then
- return xml.filters.attributes(root,pat,arg)
+ local dtk, rt, dt, dk = xml.filters.index(root,pattern,arguments)
+ if dtk then
+ local dtkdt = dtk.dt
+ if #dtkdt == 1 and type(dtkdt[1]) == "string" then
+ return dtkdt[1], rt, dt, dk
else
- return xml.filters.default(root,pattern)
+ return xml.tostring(dtkdt), rt, dt, dk
end
+ else
+ return "", rt, dt, dk
end
end
- xml.filters.position = xml.filters.index
-
- -- these may go away
-
- xml.index_element = xml.filters.index
- xml.count_elements = xml.filters.count
- xml.first_element = xml.filters.first
- xml.last_element = xml.filters.last
- xml.index_text = xml.filters.text
- xml.first_text = function (root,pattern) return xml.filters.text(root,pattern, 1) end
- xml.last_text = function (root,pattern) return xml.filters.text(root,pattern,-1) end
-
- -- so far
-
- function xml.get_text(root,pattern,reverse)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end, reverse)
- local ek = dt and dt[dk]
- return (ek and ek.dt) or "", rt, dt, dk
- end
-
- function xml.each_element(root, pattern, handle, reverse)
- local ok
- traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse)
- return ok
- end
-
- function xml.get_element(root,pattern,reverse)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end, reverse)
- return dt and dt[dk], rt, dt, dk
- end
+ --[[ldx--
+ <p>The following functions collect elements and texts.</p>
+ --ldx]]--
- -- these may change
-
- function xml.all_elements(root, pattern, ignorespaces) -- ok?
+ function xml.collect_elements(root, pattern, ignorespaces)
local rr, dd = { }, { }
traverse(root, lpath(pattern), function(r,d,k)
local dk = d and d[k]
if dk then
- if ignorespaces and type(dk) == "string" and dk:find("^[\s\n]*$") then
+ if ignorespaces and type(dk) == "string" and dk:find("^%s*$") then
-- ignore
else
local n = #rr+1
@@ -2795,8 +2744,8 @@ do
return dd, rr
end
- function xml.all_texts(root, pattern, flatten) -- crap
- local t, r = { }, { }
+ function xml.collect_texts(root, pattern, flatten)
+ local t = { } -- no r collector
traverse(root, lpath(pattern), function(r,d,k)
if d then
local ek = d[k]
@@ -2813,10 +2762,76 @@ do
else
t[#t+1] = ""
end
- r[#r+1] = r
end)
- return t, r
+ return t
+ end
+
+ --[[ldx--
+ <p>Often using an iterators looks nicer in the code than passing handler
+ functions. The <l n='lua'/> book describes how to use coroutines for that
+ purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
+ code like:</p>
+
+ <typing>
+ for r, d, k in xml.elements(xml.load('text.xml'),"title") do
+ print(d[k])
end
+ </typing>
+
+ <p>Which will print all the titles in the document. The iterator variant takes
+ 1.5 times the runtime of the function variant which si due to the overhead in
+ creating the wrapper. So, instead of:</p>
+
+ <typing>
+ function xml.filters.first(root,pattern)
+ for rt,dt,dk in xml.elements(root,pattern)
+ return dt and dt[dk], rt, dt, dk
+ end
+ return nil, nil, nil, nil
+ end
+ </typing>
+
+ <p>We use the function variants in the filters.</p>
+ --ldx]]--
+
+ function xml.elements(root,pattern,reverse)
+ return coroutine.wrap(function() traverse(root, lpath(pattern), coroutine.yield, reverse) end)
+ end
+
+ function xml.each_element(root, pattern, handle, reverse)
+ local ok
+ traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse)
+ return ok
+ end
+
+ function xml.process_elements(root, pattern, handle)
+ traverse(root, lpath(pattern), function(r,d,k)
+ local dkdt = d[k].dt
+ if dkdt then
+ for i=1,#dkdt do
+ local v = dkdt[i]
+ if v.tg then handle(v) end
+ end
+ end
+ end)
+ end
+
+ function xml.process_attributes(root, pattern, handle)
+ traverse(root, lpath(pattern), function(r,d,k)
+ local ek = d[k]
+ local a = ek.at or { }
+ handle(a)
+ if next(a) then
+ ek.at = a
+ else
+ ek.at = nil
+ end
+ end)
+ end
+
+ --[[ldx--
+ <p>We've now arrives at the functions that manipulate the tree.</p>
+ --ldx]]--
function xml.inject_element(root, pattern, element, prepend)
if root and element then
@@ -2868,7 +2883,7 @@ do
function xml.insert_element(root, pattern, element, before) -- todo: element als functie
if root and element then
if pattern == "/" then
- xml.inject_element(root, pattern, element, before) -- todo: element als functie
+ xml.inject_element(root, pattern, element, before)
else
local matches, collect = { }, nil
if type(element) == "string" then
@@ -2898,8 +2913,6 @@ do
end
end
- -- first, last, each
-
xml.insert_element_after = xml.insert_element
xml.insert_element_before = function(r,p,e) xml.insert_element(r,p,e,true) end
xml.inject_element_after = xml.inject_element
@@ -2930,24 +2943,47 @@ do
end
end
- function xml.process(root, pattern, handle)
- traverse(root, lpath(pattern), function(r,d,k)
- if d[k].dt then
- for k,v in ipairs(d[k].dt) do
- if v.tg then handle(v) end
+ function xml.include(xmldata,element,attribute,pathlist,collapse)
+ element = element or 'ctx:include'
+ attribute = attribute or 'name'
+ pathlist = pathlist or { '.' }
+ -- todo, check op ri
+ local function include(r,d,k)
+ local ek = d[k]
+ local name = (ek.at and ek.at[attribute]) or ""
+ if name ~= "" then
+ -- maybe file lookup in tree
+ local fullname
+ for _, path in ipairs(pathlist) do
+ if path == '.' then
+ fullname = name
+ else
+ fullname = file.join(path,name)
+ end
+ local f = io.open(fullname)
+ if f then
+ xml.assign(d,k,xml.load(f,collapse))
+ f:close()
+ break
+ else
+ xml.empty(d,k)
+ end
end
+ else
+ xml.empty(d,k)
end
- end)
+ end
+ while xml.each_element(xmldata, element, include) do end
end
- function xml.strip(root, pattern)
+ function xml.strip_whitespace(root, pattern)
traverse(root, lpath(pattern), function(r,d,k)
local dkdt = d[k].dt
- if dkdt then
+ if dkdt then -- can be optimized
local t = { }
for i=1,#dkdt do
local str = dkdt[i]
- if type(str) == "string" and str:find("^[\032\010\012\013]*$") then
+ if type(str) == "string" and str:find("^[ \n\r\t]*$") then
-- stripped
else
t[#t+1] = str
@@ -2958,8 +2994,6 @@ do
end)
end
- --
-
function xml.rename_space(root, oldspace, newspace) -- fast variant
local ndt = #root.dt
local rename = xml.rename_space
@@ -2968,6 +3002,9 @@ do
if type(e) == "table" then
if e.ns == oldspace then
e.ns = newspace
+ if e.rn then
+ e.rn = newspace
+ end
end
local edt = e.dt
if edt then
@@ -2987,83 +3024,30 @@ do
d[k].ns = newns
end)
end
-
- -- function xml.process_attributes(root, pattern, handle)
- -- traverse(root, lpath(pattern), function(e,k) handle(e[k].at) end)
- -- end
-
- function xml.process_attributes(root, pattern, handle)
+ function xml.check_namespace(root, pattern, newns)
traverse(root, lpath(pattern), function(r,d,k)
- local ek = d[k]
- local a = ek.at or { }
- handle(a)
- if next(a) then
- ek.at = a
- else
- ek.at = nil
+ local dk = d[k]
+ if (not dk.rn or dk.rn == "") and dk.ns == "" then
+ dk.rn = newns
end
end)
end
-
- function xml.package(tag,attributes,data)
- local n, t = tag:match("^(.-):(.+)$")
- if attributes then
- return { ns = n or "", tg = t or tag, dt = data or "", at = attributes }
- else
- return { ns = n or "", tg = t or tag, dt = data or "" }
- end
- end
-
- -- some special functions, handy for the manual:
-
- function xml.gsub(t,old,new)
- if t.dt then
- for k,v in ipairs(t.dt) do
- if type(v) == "string" then
- t.dt[k] = v:gsub(old,new)
- else
- xml.gsub(v,old,new)
- end
- end
- end
- end
-
- function xml.strip_leading_spaces(ek, e, k) -- cosmetic, for manual
- if e and k and e[k-1] and type(e[k-1]) == "string" then
- local s = e[k-1]:match("\n(%s+)")
- xml.gsub(ek,"\n"..string.rep(" ",#s),"\n")
- end
- end
-
- function xml.serialize_path(root,lpath,handle)
- local ek, e, k = xml.first_element(root,lpath)
- ek = xml.copy(ek)
- xml.strip_leading_spaces(ek,e,k)
- xml.serialize(ek,handle)
- end
-
- -- http://www.lua.org/pil/9.3.html (or of course the book)
- --
- -- it's nice to have an iterator but it comes with some extra overhead
- --
- -- for r, d, k in xml.elements(xml.load('text.xml'),"title") do print(d[k]) end
-
- function xml.elements(root,pattern,reverse)
- return coroutine.wrap(function() traverse(root, lpath(pattern), coroutine.yield, reverse) end)
+ function xml.remap_name(root, pattern, newtg, newns, newrn)
+ traverse(root, lpath(pattern), function(r,d,k)
+ local dk = d[k]
+ dk.tg = newtg
+ dk.ns = newns
+ dk.rn = newrn
+ end)
end
- -- the iterator variant needs 1.5 times the runtime of the function variant
- --
- -- function xml.filters.first(root,pattern)
- -- for rt,dt,dk in xml.elements(root,pattern)
- -- return dt and dt[dk], rt, dt, dk
- -- end
- -- return nil, nil, nil, nil
- -- end
+end
- -- todo xml.gmatch for text
+--[[ldx--
+<p>Here are a few synonyms.</p>
+--ldx]]--
-end
+xml.filters.position = xml.filters.index
xml.count = xml.filters.count
xml.index = xml.filters.index
@@ -3072,7 +3056,10 @@ xml.first = xml.filters.first
xml.last = xml.filters.last
xml.each = xml.each_element
-xml.all = xml.all_elements
+xml.process = xml.process_element
+xml.strip = xml.strip_whitespace
+xml.collect = xml.collect_elements
+xml.all = xml.collect_elements
xml.insert = xml.insert_element_after
xml.inject = xml.inject_element_after
@@ -3081,39 +3068,38 @@ xml.before = xml.insert_element_before
xml.delete = xml.delete_element
xml.replace = xml.replace_element
--- a few helpers, the may move to lxml modules
+--[[ldx--
+<p>The following helper functions best belong to the <t>lmxl-ini</t>
+module. Some are here because we need then in the <t>mk</t>
+document and other manuals, others came up when playing with
+this module. Since this module is also used in <l n='mtxrun'/> we've
+put them here instead of loading mode modules there then needed.</p>
+--ldx]]--
-function xml.include(xmldata,element,attribute,pathlist,collapse)
- element = element or 'ctx:include'
- attribute = attribute or 'name'
- pathlist = pathlist or { '.' }
- -- todo, check op ri
- local function include(r,d,k)
- local ek = d[k]
- local name = (ek.at and ek.at[attribute]) or ""
- if name ~= "" then
- -- maybe file lookup in tree
- local fullname
- for _, path in ipairs(pathlist) do
- if path == '.' then
- fullname = name
- else
- fullname = file.join(path,name)
- end
- local f = io.open(fullname)
- if f then
- xml.assign(d,k,xml.load(f,collapse))
- f:close()
- break
- else
- xml.empty(d,k)
- end
+function xml.gsub(t,old,new)
+ if t.dt then
+ for k,v in ipairs(t.dt) do
+ if type(v) == "string" then
+ t.dt[k] = v:gsub(old,new)
+ else
+ xml.gsub(v,old,new)
end
- else
- xml.empty(d,k)
end
end
- while xml.each(xmldata, element, include) do end
+end
+
+function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
+ if d and k and d[k-1] and type(d[k-1]) == "string" then
+ local s = d[k-1]:match("\n(%s+)")
+ xml.gsub(dk,"\n"..string.rep(" ",#s),"\n")
+ end
+end
+
+function xml.serialize_path(root,lpath,handle)
+ local dk, r, d, k = xml.first(root,lpath)
+ dk = xml.copy(dk)
+ xml.strip_leading_spaces(dk,d,k)
+ xml.serialize(dk,handle)
end
xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
@@ -3124,22 +3110,37 @@ function xml.unescaped(str) return str:gsub("(&.-;)", xml.unescapes) end
function xml.cleansed (str) return str:gsub("<.->" , '' ) end -- "%b<>"
function xml.join(t,separator,lastseparator)
- local result = { }
- for k,v in pairs(t) do
- result[k] = xml.tostring(v)
- end
- if lastseparator then
- return table.join(result,separator,1,#result-1) .. lastseparator .. result[#result]
+ if #t > 0 then
+ local result = { }
+ for k,v in pairs(t) do
+ result[k] = xml.tostring(v)
+ end
+ if lastseparator then
+ return table.join(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result]
+ else
+ return table.join(result,separator)
+ end
else
- return table.join(result,separator)
+ return ""
end
end
-do if utf then
+--[[ldx--
+<p>We provide (at least here) two entity handlers. The more extensive
+resolver consults a hash first, tries to convert to <l n='utf'/> next,
+and finaly calls a handler when defines. When this all fails, the
+original entity is returned.</p>
+--ldx]]--
+
+do if unicode and unicode.utf8 then
+
+ xml.entities = xml.entities or { } -- xml.entities.handler == function
+
+ local char = unicode.utf8.char
local function toutf(s)
- return utf.char(tonumber(s,16))
+ return char(tonumber(s,16))
end
function xml.utfize(root)
@@ -3147,25 +3148,50 @@ do if utf then
for k=1,#d do
local dk = d[k]
if type(dk) == "string" then
- d[k] = dk:gsub("&#x(.-);",toutf)
+ -- test prevents copying if no match
+ if dk:find("&#x.-;") then
+ d[k] = dk:gsub("&#x(.-);",toutf)
+ end
else
xml.utfize(dk)
end
end
end
-else
- function xml.utfize()
- print("entity to utf conversion is not available")
+
+ local entities = xml.entities
+
+ local function resolve(e)
+ local e = entities[e]
+ if e then
+ return e
+ elseif e:find("#x") then
+ return char(tonumber(s:sub(3),16))
+ else
+ local h = entities.handler
+ return (h and h(e)) or "&" .. e .. ";"
+ end
end
-end end
+ function xml.resolve_entities(root)
+ local d = root.dt
+ for k=1,#d do
+ local dk = d[k]
+ if type(dk) == "string" then
+ if dk:find("&.-;") then
+ d[k] = dk:gsub("&(.-);",resolve)
+ end
+ else
+ xml.utfize(dk)
+ end
+ end
+ end
---- examples
+end end
---~ for _, e in ipairs(xml.filters.elements(ctxrunner.xmldata,"ctx:message")) do
---~ print(">>>",xml.tostring(e.dt))
---~ end
+--~ xml.lshow("/../../../a/(b|c)[@d='e']/f")
+--~ xml.lshow("/../../../a/!(b|c)[@d='e']/f")
+--~ xml.lshow("/../../../a/!b[@d!='e']/f")
-- filename : l-utils.lua