diff options
Diffstat (limited to 'scripts/context/lua/mtxrun.lua')
-rw-r--r-- | scripts/context/lua/mtxrun.lua | 2348 |
1 files changed, 1187 insertions, 1161 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 2a3a496a3..baad28e84 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -168,7 +168,7 @@ end --~ split = lpeg.Ct(c*(p*c)^0) --~ splitters[separator] = split --~ end ---~ return lpeg.match(split,self) +--~ return lpeg.match(split,self) -- split:match(self) --~ else --~ return { } --~ end @@ -325,7 +325,7 @@ end --~ return self .. self.rep(chr or " ",n-#self) --~ end -function string:padd(n,chr) +function string:rpadd(n,chr) local m = n-#self if m > 0 then return self .. self.rep(chr or " ",m) @@ -334,6 +334,17 @@ function string:padd(n,chr) end end +function string:lpadd(n,chr) + local m = n-#self + if m > 0 then + return self.rep(chr or " ",m) .. self + else + return self + end +end + +string.padd = string.rpadd + function is_number(str) return str:find("^[%-%+]?[%d]-%.?[%d+]$") == 1 end @@ -530,6 +541,8 @@ end do + -- one of my first exercises in lua ... + -- 34.055.092 32.403.326 arabtype.tma -- 1.620.614 1.513.863 lmroman10-italic.tma -- 1.325.585 1.233.044 lmroman10-regular.tma @@ -889,6 +902,25 @@ function table.tohash(t) return h end +function table.contains(t, v) + if t then + for i=1, #t do + if t[i] == v then + return true + end + end + end + return false +end + +function table.count(t) + local n, e = 0, next(t) + while e do + n, e = n + 1, next(t,e) + end + return n +end + --~ function table.are_equal(a,b) --~ return table.serialize(a) == table.serialize(b) --~ end @@ -1387,12 +1419,20 @@ function boolean.tonumber(b) if b then return 1 else return 0 end end -function toboolean(str) - if type(str) == "string" then - return str == "true" or str == "yes" or str == "on" or str == "1" - elseif type(str) == "number" then - return tonumber(str) ~= 0 - elseif type(str) == "nil" then +function toboolean(str,tolerant) + if tolerant then + if type(str) == "string" then + return str == "true" or str == "yes" or str == "on" or str == "1" + elseif type(str) == "number" then + return tonumber(str) ~= 0 + elseif type(str) == "nil" then + return false + else + return str + end + elseif str == "true" then + return true + elseif str == "false" then return false else return str @@ -1427,13 +1467,14 @@ if not modules then modules = { } end modules ['l-xml'] = { license = "see context related readme files" } --- todo: ns, tg = s:match("^(.-):?([^:]+)$") +-- RJ: key=value ... lpeg.Ca(lpeg.Cc({}) * (pattern-producing-key-and-value / rawset)^0) --[[ldx-- <p>The parser used here is inspired by the variant discussed in the lua book, but handles comment and processing instructions, has a different structure, provides parent access; a first version used different tricky but was less optimized to we -went this route.</p> +went this route. First we had a find based parser, now we have an <l n='lpeg'/> based one. +The find based parser can be found in l-xml-edu.lua along with other older code.</p> <p>Expecially the lpath code is experimental, we will support some of xpath, but only things that make sense for us; as compensation it is possible to hook in your @@ -1442,7 +1483,7 @@ this module for process management, like handling <l n='ctx'/> and <l n='rlx'/> files.</p> <typing> -a/b/c /*/c (todo: a/b/(pattern)/d) +a/b/c /*/c a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n) a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n) </typing> @@ -1457,48 +1498,86 @@ tex = tex or { } xml.trace_lpath = false xml.trace_print = false +xml.trace_remap = false --[[ldx-- -<p>First a hack to enable namespace resolving.</p> +<p>First a hack to enable namespace resolving. A namespace is characterized by +a <l n='url'/>. The following function associates a namespace prefix with a +pattern. We use <l n='lpeg'/>, which in this case is more than twice as fast as a +find based solution where we loop over an array of patterns. Less code and +much cleaner.</p> --ldx]]-- +xml.xmlns = { } + do - xml.xmlns = { } + local parser = lpeg.P(false) -- printing shows that this has no side effects + + --[[ldx-- + <p>The next function associates a namespace prefix with an <l n='url'/>. This + normally happens independent of parsing.</p> - local data = { } + <typing> + xml.registerns("mml","mathml") + </typing> + --ldx]]-- - function xml.registerns(namespace,pattern) - data[#data+1] = { namespace:lower(), pattern:lower() } + function xml.registerns(namespace, pattern) -- pattern can be an lpeg + parser = parser + lpeg.C(lpeg.P(pattern:lower())) / namespace end + --[[ldx-- + <p>The next function also registers a namespace, but this time we map a + given namespace prefix onto a registered one, using the given + <l n='url'/>. This used for attributes like <t>xmlns:m</t>.</p> + + <typing> + xml.checkns("m","http://www.w3.org/mathml") + </typing> + --ldx]]-- + function xml.checkns(namespace,url) - url = url:lower() - for i=1,#data do - local d = data[i] - if url:find(d[2]) then - if namespace ~= d[1] then - xml.xmlns[namespace] = d[1] - end - end + local ns = parser:match(url:lower()) + if ns and namespace ~= ns then + xml.xmlns[namespace] = ns end end + --[[ldx-- + <p>Next we provide a way to turn an <l n='url'/> into a registered + namespace. This used for the <t>xmlns</t> attribute.</p> + + <typing> + resolvedns = xml.resolvens("http://www.w3.org/mathml") + </typing> + + This returns <t>mml</t>. + --ldx]]-- + function xml.resolvens(url) - url = url:lower() - for i=1,#data do - local d = data[i] - if url:find(d[2]) then - return d[1] - end - end - return "" + return parser:match(url:lower()) or "" end + --[[ldx-- + <p>A namespace in an element can be remapped onto the registered + one efficiently by using the <t>xml.xmlns</t> table.</p> + --ldx]]-- + end --[[ldx-- -<p>Next comes the loader. The dreadful doctype comes in many disguises:</p> +<p>This version uses <l n='lpeg'/>. We follow the same approach as before, stack and top and +such. This version is about twice as fast which is mostly due to the fact that +we don't have to prepare the stream for cdata, doctype etc etc. This variant is +is dedicated to Luigi Scarso, who challenged me with 40 megabyte <l n='xml'/> files that +took 12.5 seconds to load (1.5 for file io and the rest for tree building). With +the <l n='lpeg'/> implementation we got that down to less 7.3 seconds. Loading the 14 +<l n='context'/> interface definition files (2.6 meg) went down from 1.05 seconds to 0.55.</p> + +<p>Next comes the parser. The rather messy doctype definition comes in many +disguises so it is no surprice that later on have to dedicate quite some +<l n='lpeg'/> code to it.</p> <typing> <!DOCTYPE Something PUBLIC "... ..." "..." [ ... ] > @@ -1508,320 +1587,466 @@ end <!DOCTYPE Something [ ... ] > <!DOCTYPE Something > </typing> + +<p>The code may look a bit complex but this is mostly due to the fact that we +resolve namespaces and attach metatables. There is only one public function:</p> + +<typing> +local x = xml.convert(somestring) +</typing> + +<p>An optional second boolean argument tells this function not to create a root +element.</p> --ldx]]-- do - -- Loading 12 cont-*.xml and keys-*.xml files totaling to 2.62 MBytes takes 1.1 sec - -- on a windows vista laptop with dual core 7600 (2.3 Ghz), which is not that bad. - -- Of this half time is spent on doctype etc parsing. - - local doctype_patterns = { - "<!DOCTYPE%s+(.-%s+PUBLIC%s+%b\"\"%s+%b\"\"%s+%b[])%s*>", - "<!DOCTYPE%s+(.-%s+PUBLIC%s+%b\"\"%s+%b\"\")%s*>", - "<!DOCTYPE%s+(.-%s+SYSTEM%s+%b\"\"%s+%b[])%s*>", - "<!DOCTYPE%s+(.-%s+SYSTEM%s+%b\"\")%s*>", - "<!DOCTYPE%s+(.-%s%b[])%s*>", - "<!DOCTYPE%s+(.-)%s*>" - } + local remove, nsremap = table.remove, xml.xmlns - -- We assume no "<" which is the lunatic part of the xml spec - -- especially since ">" is permitted; otherwise we need a char - -- by char parser ... more something for later ... normally - -- entities will be used anyway. + local stack, top, dt, at, xmlns, errorstr = {}, {}, {}, {}, {}, nil - -- data = data:gsub(nothing done) is still a copy so we find first + local mt = { __tostring = xml.text } - local function prepare(data,text) - -- pack (for backward compatibility) - if type(data) == "table" then - data = table.concat(data,"") - end - -- CDATA - if data:find("<%!%[CDATA%[") then - data = data:gsub("<%!%[CDATA%[(.-)%]%]>", function(txt) - text[#text+1] = txt or "" - return string.format("<@cd@>%s</@cd@>",#text) - end) - end - -- DOCTYPE - if data:find("<!DOCTYPE ") then - data = data:gsub("^(.-)(<[^%!%?])", function(a,b) - if a:find("<!DOCTYPE ") then - for _,v in ipairs(doctype_patterns) do - a = a:gsub(v, function(d) - text[#text+1] = d or "" - return string.format("<@dd@>%s</@dd@>",#text) - end) - end - end - return a .. b - end,1) + local function add_attribute(namespace,tag,value) + if tag == "xmlns" then + xmlns[#xmlns+1] = xml.resolvens(value) + at[tag] = value + elseif ns == "xmlns" then + xml.checkns(tag,value) + at["xmlns:" .. tag] = value + else + at[tag] = value end - -- comment / does not catch doctype - data = data:gsub("<%!%-%-(.-)%-%->", function(txt) - text[#text+1] = txt or "" - return string.format("<@cm@>%s</@cm@>",#text) - end) - -- processing instructions / altijd 1 - data = data:gsub("<%?(.-)%?>", function(txt) - text[#text+1] = txt or "" - return string.format("<@pi@>%s</@pi@>",#text) - end) - return data, text end + local function add_begin(spacing, namespace, tag) + if #spacing > 0 then + dt[#dt+1] = spacing + end + local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace + top = { ns=namespace or "", nr=resolved, tg=tag, at=at, dt={}, __p__ = stack[#stack] } + setmetatable(top, mt) + dt = top.dt + stack[#stack+1] = top + at = { } + end + local function add_end(spacing, namespace, tag) + if #spacing > 0 then + dt[#dt+1] = spacing + end + local toclose = remove(stack) + top = stack[#stack] + if #stack < 1 then + errorstr = string.format("nothing to close with %s", tag) + elseif toclose.tg ~= tag then -- no namespace check + errorstr = string.format("unable to close %s with %s", toclose.tg, tag) + end + dt = top.dt + dt[#dt+1] = toclose + if at.xmlns then + remove(xmlns) + end + end + local function add_empty(spacing, namespace, tag) + if #spacing > 0 then + dt[#dt+1] = spacing + end + local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace + top = stack[#stack] + setmetatable(top, mt) + dt = top.dt + dt[#dt+1] = { ns=namespace or "", nr=resolved, tg=tag, at=at, dt={}, __p__ = top } + at = { } + if at.xmlns then + remove(xmlns) + end + end + local function add_text(text) + dt[#dt+1] = text + end + local function add_special(what, spacing, text) + if #spacing > 0 then + dt[#dt+1] = spacing + end + top = stack[#stack] + setmetatable(top, mt) + dt[#dt+1] = { special=true, ns="", tg=what, dt={text} } + end + local function set_message(txt) + errorstr = "garbage at the end of the file: " .. txt:gsub("([ \n\r\t]*)","") + end + + local space = lpeg.S(' \r\n\t') + local open = lpeg.P('<') + local close = lpeg.P('>') + local squote = lpeg.S("'") + local dquote = lpeg.S('"') + local equal = lpeg.P('=') + local slash = lpeg.P('/') + local colon = lpeg.P(':') + local valid = lpeg.R('az', 'AZ', '09') + lpeg.S('_-.') + local name_yes = lpeg.C(valid^1) * colon * lpeg.C(valid^1) + local name_nop = lpeg.C(lpeg.P(true)) * lpeg.C(valid^1) + local name = name_yes + name_nop + + local utfbom = lpeg.P('\000\000\254\255') + lpeg.P('\255\254\000\000') + + lpeg.P('\255\254') + lpeg.P('\254\255') + lpeg.P('\239\187\191') -- no capture + + local spacing = lpeg.C(space^0) + local justtext = lpeg.C((1-open)^1) + local somespace = space^1 + local optionalspace = space^0 + + local value = (squote * lpeg.C((1 - squote)^0) * squote) + (dquote * lpeg.C((1 - dquote)^0) * dquote) + local attribute = (somespace * name * optionalspace * equal * optionalspace * value) / add_attribute + local attributes = attribute^0 + + local text = justtext / add_text + local balanced = lpeg.P { "[" * ((1 - lpeg.S"[]") + lpeg.V(1))^0 * "]" } -- taken from lpeg manual, () example + + local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty + local beginelement = (spacing * open * name * attributes * optionalspace * close) / add_begin + local endelement = (spacing * open * slash * name * optionalspace * close) / add_end + + local begincomment = open * lpeg.P("!--") + local endcomment = lpeg.P("--") * close + local begininstruction = open * lpeg.P("?") + local endinstruction = lpeg.P("?") * close + local begincdata = open * lpeg.P("![CDATA[") + local endcdata = lpeg.P("]]") * close + + local someinstruction = lpeg.C((1 - endinstruction)^0) + local somecomment = lpeg.C((1 - endcomment )^0) + local somecdata = lpeg.C((1 - endcdata )^0) + + local begindoctype = open * lpeg.P("!DOCTYPE") + local enddoctype = close + local publicdoctype = lpeg.P("PUBLIC") * somespace * value * somespace * value * somespace * balanced^0 + local systemdoctype = lpeg.P("SYSTEM") * somespace * value * somespace * balanced^0 + local simpledoctype = (1-close)^1 * balanced^0 + local somedoctype = lpeg.C((somespace * lpeg.P(publicdoctype + systemdoctype + simpledoctype) * optionalspace)^0) + + local instruction = (spacing * begininstruction * someinstruction * endinstruction) / function(...) add_special("@pi@",...) end + local comment = (spacing * begincomment * somecomment * endcomment ) / function(...) add_special("@cm@",...) end + local cdata = (spacing * begincdata * somecdata * endcdata ) / function(...) add_special("@cd@",...) end + local doctype = (spacing * begindoctype * somedoctype * enddoctype ) / function(...) add_special("@dd@",...) end + + -- nicer but slower: + -- + -- local instruction = (lpeg.Cc("@pi@") * spacing * begininstruction * someinstruction * endinstruction) / add_special + -- local comment = (lpeg.Cc("@cm@") * spacing * begincomment * somecomment * endcomment ) / add_special + -- local cdata = (lpeg.Cc("@cd@") * spacing * begincdata * somecdata * endcdata ) / add_special + -- local doctype = (lpeg.Cc("@dd@") * spacing * begindoctype * somedoctype * enddoctype ) / add_special + + local trailer = space^0 * (justtext/set_message)^0 - -- maybe we will move the @tg@ stuff to a dedicated key, say 'st'; this will speed up - -- serializing and testing + -- comment + emptyelement + text + cdata + instruction + lpeg.V("parent"), -- 6.5 seconds on 40 MB database file + -- text + comment + emptyelement + cdata + instruction + lpeg.V("parent"), -- 5.8 + -- text + lpeg.V("parent") + emptyelement + comment + cdata + instruction, -- 5.5 - function xml.convert(data,no_root,collapse) - local crap = { } - data, crap = prepare(data, crap) - local nsremap = xml.xmlns - local remove = table.remove - local stack, top = {}, {} - local i, j, errorstr = 1, 1, nil + + local grammar = lpeg.P { "preamble", + preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * lpeg.V("parent") * trailer, + parent = beginelement * lpeg.V("children")^0 * endelement, + children = text + lpeg.V("parent") + emptyelement + comment + cdata + instruction, + } + + function xml.convert(data, no_root) -- no collapse any more + stack, top, at, xmlns, errorstr, result = {}, {}, {}, {}, nil, nil stack[#stack+1] = top top.dt = { } - local dt = top.dt - local id = 0 - local namespaces = { } - local mt = { __tostring = xml.text } - while true do - local ni, first, attributes, last, fulltag - ni, j, first, fulltag, attributes, last = data:find("<(/-)([^%s%>/]+)%s*([^>]-)%s*(/-)>", j) - if not ni then break end - local namespace, tag = fulltag:match("^(.-):(.+)$") - if attributes ~= "" then - local t = {} - for ns, tag, _, value in attributes:gmatch("(%w-):?(%w+)=([\"\'])(.-)%3") do - if tag == "xmlns" then -- not ok yet - namespaces[#stack] = xml.resolvens(value) - elseif ns == "" then - t[tag] = value - elseif ns == "xmlns" then - xml.checkns(tag,value) - else - t[tag] = value - end - end - attributes = t - else - attributes = { } - end - if namespace then -- realtime remapping - namespace = nsremap[namespace] or namespace - else - namespace, tag = namespaces[#stack] or "", fulltag - end - local text = data:sub(i, ni-1) - if text == "" or (collapse and text:find("^%s*$")) then - -- no need for empty text nodes, beware, also packs <a>x y z</a> - -- so is not that useful unless used with empty elements - else - dt[#dt+1] = text - end - if first == "/" then - -- end tag - local toclose = remove(stack) -- remove top - top = stack[#stack] - namespaces[#stack] = nil - if #stack < 1 then - errorstr = string.format("nothing to close with %s", tag) - break - elseif toclose.tg ~= tag then -- no namespace check - errorstr = string.format("unable to close %s with %s", toclose.tg, tag) - break - end - if tag:find("^@..@$") then - dt[1] = crap[tonumber(dt[1])] or "" - end - dt = top.dt - dt[#dt+1] = toclose - elseif last == "/" then - -- empty element tag - dt[#dt+1] = { ns = namespace, tg = tag, dt = { }, at = attributes, __p__ = top } - -- setmetatable(top, { __tostring = xml.text }) - setmetatable(top, mt) - else - -- begin tag - top = { ns = namespace, tg = tag, dt = { }, at = attributes, __p__ = stack[#stack] } - -- setmetatable(top, { __tostring = xml.text }) - setmetatable(top, mt) - dt = top.dt - stack[#stack+1] = top - end - i = j + 1 - end - if not errorstr then - local text = data:sub(i) - if dt and not text:find("^%s*$") then - dt[#dt+1] = text - end - if #stack > 1 then - errorstr = string.format("unclosed %s", stack[#stack].tg) - end + dt = top.dt + if not data or data == "" then + errorstr = "empty xml file" + elseif not grammar:match(data) then + errorstr = "invalid xml file" end if errorstr then - stack = { { tg = "error", dt = { errorstr } } } - -- setmetatable(stack, { __tostring = xml.text }) + result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={} } } } setmetatable(stack, mt) - end - if no_root then - return stack[1] + if xml.error_handler then xml.error_handler("load",errorstr) end else - local t = { ns = "", tg = '@rt@', dt = stack[1].dt } - -- setmetatable(t, { __tostring = xml.text }) - setmetatable(t, mt) - for k,v in ipairs(t.dt) do - if type(v) == "table" and v.tg ~= "@pi@" and v.tg ~= "@dd@" and v.tg ~= "@cm@" then - t.ri = k -- rootindex + result = stack[1] + end + if not no_root then + result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={} } + setmetatable(result, mt) + for k,v in ipairs(result.dt) do + if type(v) == "table" and not v.special then -- always table -) + result.ri = k -- rootindex break end end - return t end + return result end - function xml.copy(old,tables,parent) -- fast one - tables = tables or { } - if old then - local new = { } - if not table[old] then - table[old] = new - end - for i,v in pairs(old) do - -- new[i] = (type(v) == "table" and (table[v] or xml.copy(v, tables, table))) or v - if type(v) == "table" then - new[i] = table[v] or xml.copy(v, tables, table) - else - new[i] = v - end - end - local mt = getmetatable(old) - if mt then - setmetatable(new,mt) - end - return new - else - return { } - end + --[[ldx-- + <p>Packaging data in an xml like table is done with the following + function. Maybe it will go away (when not used).</p> + --ldx]]-- + + function xml.package(tag,attributes,data) + local ns, tg = tag:match("^(.-):?([^:]+)$") + local t = { ns = ns, tg = tg, dt = data or "", at = attributes or {} } + setmetatable(t, mt) + return t end + xml.error_handler = (logs and logs.report) or print + end -function xml.load(filename,collapse) +--[[ldx-- +<p>We cannot load an <l n='lpeg'/> from a filehandle so we need to load +the whole file first. The function accepts a string representing +a filename or a file handle.</p> +--ldx]]-- + +function xml.load(filename) if type(filename) == "string" then - local root, f = { }, io.open(filename,'r') -- no longer 'rb' + local root, f = { }, io.open(filename,'r') if f then - root = xml.convert(f:read("*all"),false,collapse) + root = xml.convert(f:read("*all")) f:close() + else + -- if we want an error: root = xml.convert("") end - return root + return root -- no nil but an empty table if it fails else - return xml.convert(filename:read("*all"),false,collapse) + return xml.convert(filename:read("*all")) end end -function xml.root(root) - return (root.ri and root.dt[root.ri]) or root +--[[ldx-- +<p>When we inject new elements, we need to convert strings to +valid trees, which is what the next function does.</p> +--ldx]]-- + +function xml.toxml(data) + if type(data) == "string" then + local root = { xml.convert(data,true) } + return (#root > 1 and root) or root[1] + else + return data + end end -function xml.toxml(data,collapse) - local t = { xml.convert(data,true,collapse) } - if #t > 1 then - return t +--[[ldx-- +<p>For copying a tree we use a dedicated function instead of the +generic table copier. Since we know what we're dealing with we +can speed up things a bit. The second argument is not to be used!</p> +--ldx]]-- + +function xml.copy(old,tables) + if old then + tables = tables or { } + local new = { } + if not tables[old] then + tables[old] = new + end + for k,v in pairs(old) do + new[k] = (type(v) == "table" and (tables[v] or xml.copy(v, tables))) or v + end + local mt = getmetatable(old) + if mt then + setmetatable(new,mt) + end + return new else - return t[1] + return { } end end -function xml.serialize(e, handle, textconverter, attributeconverter) - handle = handle or (tex and tex.sprint) or io.write - if not e then - -- quit - elseif e.command and xml.command then -- test for command == "" ? - xml.command(e) - elseif e.tg then - local format, serialize = string.format, xml.serialize - local ens, etg, eat, edt = e.ns, e.tg, e.at, e.dt - -- no spaces, so no flush needed (check) - if etg == "@pi@" then - handle(format("<?%s?>",edt[1])) - elseif etg == "@cm@" then - handle(format("<!--%s-->",edt[1])) - elseif etg == "@cd@" then - handle(format("<![CDATA[%s]]>",edt[1])) - elseif etg == "@dd@" then - handle(format("<!DOCTYPE %s>",edt[1])) - elseif etg == "@rt@" then - serialize(edt,handle,textconverter,attributeconverter) +--[[ldx-- +<p>In <l n='context'/> serializing the tree or parts of the tree is a major +actitivity which is why the following function is pretty optimized resulting +in a few more lines of code than needed. The variant that uses the formatting +function for all components is about 15% slower than the concatinating +alternative.</p> +--ldx]]-- + +do + + -- todo: add <?xml version='1.0' standalone='yes'?> when not present + + local fallbackhandle = (tex and tex.sprint) or io.write + + function xml.serialize(e, handle, textconverter, attributeconverter, specialconverter, nocommands) + if not e then + -- quit + elseif not nocommands and e.command and xml.command then + xml.command(e) else - local ats = eat and next(eat) and { } - if ats then - if attributeconverter then - for k,v in pairs(eat) do - ats[#ats+1] = format('%s=%q',k,attributeconverter(v)) + handle = handle or fallbackhandle + local etg = e.tg + if etg then + -- local format = string.format + if e.special then + local edt = e.dt + local spc = specialconverter and specialconverter[etg] + if spc then + local result = spc(edt[1]) + if result then + handle(result) + else + -- no need to handle any further + end + elseif etg == "@pi@" then + -- handle(format("<?%s?>",edt[1])) + handle("<?" .. edt[1] .. "?>") -- maybe table.join(edt) + elseif etg == "@cm@" then + -- handle(format("<!--%s-->",edt[1])) + handle("<!--" .. edt[1] .. "-->") + elseif etg == "@cd@" then + -- handle(format("<![CDATA[%s]]>",edt[1])) + handle("<![CDATA[" .. edt[1] .. "]]>") + elseif etg == "@dd@" then + -- handle(format("<!DOCTYPE %s>",edt[1])) + handle("<!DOCTYPE " .. edt[1] .. ">") + elseif etg == "@rt@" then + xml.serialize(edt,handle,textconverter,attributeconverter,specialconverter,nocommands) end else - for k,v in pairs(eat) do - ats[#ats+1] = format('%s=%q',k,v) - end - end - end - if ens ~= "" then - if edt and #edt > 0 then + local ens, eat, edt, ern = e.ns, e.at, e.dt, e.rn + local ats = eat and next(eat) and { } if ats then - handle(format("<%s:%s %s>",ens,etg,table.concat(ats," "))) - else - handle(format("<%s:%s>",ens,etg)) + local format = string.format + if attributeconverter then + for k,v in pairs(eat) do + ats[#ats+1] = format('%s=%q',k,attributeconverter(v)) + end + else + for k,v in pairs(eat) do + ats[#ats+1] = format('%s=%q',k,v) + end + end end - for i=1,#edt do - serialize(edt[i],handle,textconverter,attributeconverter) + if ern and xml.trace_remap then + if ats then + ats[#ats+1] = string.format("xmlns:remapped='%s'",ern) + else + ats = { string.format("xmlns:remapped='%s'",ern) } + end end - handle(format("</%s:%s>",ens,etg)) - else - if ats then - handle(format("<%s:%s %s/>",ens,etg,table.concat(ats," "))) + if ens ~= "" then + if edt and #edt > 0 then + if ats then + -- handle(format("<%s:%s %s>",ens,etg,table.concat(ats," "))) + handle("<" .. ens .. ":" .. etg .. " " .. table.concat(ats," ") .. ">") + else + -- handle(format("<%s:%s>",ens,etg)) + handle("<" .. ens .. ":" .. etg .. ">") + end + local serialize = xml.serialize + for i=1,#edt do + local e = edt[i] + if type(e) == "string" then + if textconverter then + handle(textconverter(e)) + else + handle(e) + end + else + serialize(e,handle,textconverter,attributeconverter,specialconverter,nocommands) + end + end + -- handle(format("</%s:%s>",ens,etg)) + handle("</" .. ens .. ":" .. etg .. ">") + else + if ats then + -- handle(format("<%s:%s %s/>",ens,etg,table.concat(ats," "))) + handle("<%" .. ens .. ":" .. etg .. table.concat(ats," ") .. "/>") + else + -- handle(format("<%s:%s/>",ens,etg)) + handle("<%" .. ens .. ":" .. "/>") + end + end else - handle(format("<%s:%s/>",ens,etg)) + if edt and #edt > 0 then + if ats then + -- handle(format("<%s %s>",etg,table.concat(ats," "))) + handle("<" .. etg .. " " .. table.concat(ats," ") .. ">") + else + -- handle(format("<%s>",etg)) + handle("<" .. etg .. ">") + end + local serialize = xml.serialize + for i=1,#edt do + serialize(edt[i],handle,textconverter,attributeconverter,specialconverter,nocommands) + end + -- handle(format("</%s>",etg)) + handle("</" .. etg .. ">") + else + if ats then + -- handle(format("<%s %s/>",etg,table.concat(ats," "))) + handle("<" .. etg .. table.concat(ats," ") .. "/>") + else + -- handle(format("<%s/>",etg)) + handle("<" .. etg .. "/>") + end + end end end - else - if edt and #edt > 0 then - if ats then - handle(format("<%s %s>",etg,table.concat(ats," "))) - else - handle(format("<%s>",etg)) - end - for i=1,#edt do - serialize(edt[i],handle,textconverter,attributeconverter) - end - handle(format("</%s>",etg)) + elseif type(e) == "string" then + if textconverter then + handle(textconverter(e)) else - if ats then - handle(format("<%s %s/>",etg,table.concat(ats," "))) - else - handle(format("<%s/>",etg)) - end + handle(e) + end + else + local serialize = xml.serialize + for i=1,#e do + serialize(e[i],handle,textconverter,attributeconverter,specialconverter,nocommands) end end end - elseif type(e) == "string" then - if textconverter then - handle(textconverter(e)) - else - handle(e) - end - else - for i=1,#e do - xml.serialize(e[i],handle,textconverter,attributeconverter) + end + + function xml.checkbom(root) + if root.ri then + local dt, found = root.dt, false + for k,v in ipairs(dt) do + if type(v) == "table" and v.special and v.tg == "@pi" and v.dt:find("xml.*version=") then + found = true + break + end + end + if not found then + table.insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } ) + table.insert(dt, 2, "\n" ) + end end end + end -function xml.string(e,handle) -- weird one that may become obsolete - if e.tg then +--[[ldx-- +<p>At the cost of some 25% runtime overhead you can first convert the tree to a string +and then handle the lot.</p> +--ldx]]-- + +function xml.tostring(root) -- 25% overhead due to collecting + if root then + if type(root) == 'string' then + return root + elseif next(root) then + local result = { } + xml.serialize(root,function(s) result[#result+1] = s end) + return table.concat(result,"") + end +end + return "" +end + +--[[ldx-- +<p>The next function operated on the content only and needs a handle function +that accepts a string.</p> +--ldx]]-- + +function xml.string(e,handle) + if not handle or (e.special and e.tg ~= "@rt@") then + -- nothing + elseif e.tg then local edt = e.dt if edt then for i=1,#edt do @@ -1833,6 +2058,21 @@ function xml.string(e,handle) -- weird one that may become obsolete end end +--[[ldx-- +<p>How you deal with saving data depends on your preferences. For a 40 MB database +file the timing on a 2.3 Core Duo are as follows (time in seconds):</p> + +<lines> +1.3 : load data from file to string +6.1 : convert string into tree +5.3 : saving in file using xmlsave +6.8 : converting to string using xml.tostring +3.6 : saving converted string in file +</lines> + +<p>The save function is given below.</p> +--ldx]]-- + function xml.save(root,name) local f = io.open(name,"w") if f then @@ -1841,535 +2081,67 @@ function xml.save(root,name) end end -function xml.stringify(root) - if root then - if type(root) == 'string' then - return root - elseif next(root) then - local result = { } - xml.serialize(root,function(s) result[#result+1] = s end) - return table.concat(result,"") - end - end - return "" -end - -xml.tostring = xml.stringify - -do - - -- print - - local newline = lpeg.P("\n") - local space = lpeg.P(" ") - local content = lpeg.C((1-newline)^1) - - if tex then - - -- taco: we need a kind of raw print into tex, i.e. embedded \n's become lineendings - -- for tex and an empty line a par; could be a c-wrapper around existing stuff; i - -- played a lot with tex.print but that does not work ok (should be obeylines save) - - local buffer = {} - - local function cprint(s) - buffer[#buffer+1] = s - end - local function nprint( ) - if #buffer > 0 then - if xml.trace_print then - texio.write_nl(string.format("tex.print : [[[%s]]]", table.join(buffer))) - end - tex.print(table.join(buffer)) - buffer = {} - else - if xml.trace_print then - texio.write_nl(string.format("tex.print : [[[%s]]]", "")) - end - tex.print("") - end - end - local function fprint() - if #buffer > 0 then - if xml.trace_print then - texio.write_nl(string.format("tex.sprint: [[[%s]]]", table.join(buffer))) - end - tex.sprint(table.join(buffer)) - buffer = { } - end - end - - local line_n = newline / nprint - local line_c = content / cprint - local capture = (line_n + line_c)^0 - - local function sprint(root) - if not root then - -- quit - elseif type(root) == 'string' then - lpeg.match(capture,root) - elseif next(root) then - xml.serialize(root, sprint, nil, nil, true) - end - end - - function xml.sprint(root) - buffer = {} - sprint(root) - if #buffer > 0 then - nprint() - end - end - - xml.sflush = fprint - - else - - function xml.sprint(root) - if not root then - -- quit - elseif type(root) == 'string' then - print(root) - elseif next(root) then - xml.serialize(root, xml.sprint, nil, nil, true) - end - end - - end - - function xml.tprint(root) - if type(root) == "table" then - for i=1,#root do - xml.sprint(root[i]) - end - elseif type(root) == "string" then - xml.sprint(root) - end - end - - -- lines (looks hackery, but we cannot pass variables in capture functions) - - local buffer, flush = {}, nil - - local function cprint(s) - buffer[#buffer+1] = s - end - local function nprint() - flush() - end - - local line_n = newline / nprint - local line_c = content / cprint - local capture = (line_n + line_c)^0 - - function lines(root) - if not root then - -- quit - elseif type(root) == 'string' then - lpeg.match(capture,root) - elseif next(root) then - xml.serialize(root, lines) - end - end - - function xml.lines(root) - local result = { } - flush = function() - result[#result+1] = table.join(buffer) - buffer = { } - end - buffer = {} - lines(root) - if #buffer > 0 then - result[#result+1] = table.join(buffer) - end - return result - end +--[[ldx-- +<p>A few helpers:</p> +--ldx]]-- +function xml.body(root) + return (root.ri and root.dt[root.ri]) or root end function xml.text(root) - return (root and xml.stringify(root)) or "" + return (root and xml.tostring(root)) or "" end function xml.content(root) return (root and root.dt and xml.tostring(root.dt)) or "" end -function xml.body(t) -- removes initial pi - if t and t.dt and t.tg == "@rt@" then - for k,v in ipairs(t.dt) do - if type(v) == "table" and v.tg ~= "@pi@" then - return v - end - end - end - return t -end +--[[ldx-- +<p>The next helper erases an element but keeps the table as it is, +and since empty strings are not serialized (effectively) it does +not harm. Copying the table would take more time. Usage:</p> --- call: e[k] = xml.empty() or xml.empty(e,k) +<typing> +dt[k] = xml.empty() or xml.empty(dt,k) +</typing> +--ldx]]-- -function xml.empty(e,k) -- erases an element but keeps the table intact - if e and k then - e[k] = "" - return e[k] +function xml.empty(dt,k) + if dt and k then + dt[k] = "" + return dt[k] else return "" end end --- call: e[k] = xml.assign(t) or xml.assign(e,k,t) +--[[ldx-- +<p>The next helper assigns a tree (or string). Usage:</p> + +<typing> +dt[k] = xml.assign(root) or xml.assign(dt,k,root) +</typing> +--ldx]]-- -function xml.assign(e,k,t) -- assigns xml tree / more testing will be done - if e and k then - if type(t) == "table" then - e[k] = xml.body(t) - else - e[k] = t -- no parsing - end - return e[k] +function xml.assign(dt,k,root) + if dt and k then + dt[k] = (type(root) == "table" and xml.body(root)) or root + return dt[k] else - return xml.body(t) + return xml.body(root) end end --- 0=nomatch 1=match 2=wildcard 3=ancestor - --- "tag" --- "tag1/tag2/tag3" --- "*/tag1/tag2/tag3" --- "/tag1/tag2/tag3" --- "/tag1/tag2|tag3" --- "tag[@att='value'] --- "tag1|tag2[@att='value'] - -function xml.tag(e) - return e.tg or "" -end - -function xml.att(e,a) - return (e.at and e.at[a]) or "" -end - -xml.attribute = xml.att - ---~ local cache = { } - ---~ local function f_fault ( ) return 0 end ---~ local function f_wildcard( ) return 2 end ---~ local function f_result (b) if b then return 1 else return 0 end end - ---~ function xml.lpath(str) --maybe @rt@ special ---~ if not str or str == "" then ---~ str = "*" ---~ end ---~ local m = cache[str] ---~ if not m then ---~ -- todo: text() ---~ if type(str) == "table" then ---~ if xml.trace_lpath then print("lpath", "table" , "inherit") end ---~ m = str ---~ elseif str == "/" then ---~ if xml.trace_lpath then print("lpath", "/", "root") end ---~ m = false ---~ elseif str == "*" then ---~ if xml.trace_lpath then print("lpath", "no string or *", "wildcard") end ---~ m = true ---~ else ---~ str = str:gsub("^//","") -- any ---~ if str == "" then ---~ if xml.trace_lpath then print("lpath", "//", "wildcard") end ---~ m = true ---~ else ---~ m = { } ---~ if not str:find("^/") then ---~ m[1] = 2 ---~ end ---~ for v in str:gmatch("([^/]+)") do ---~ if v == "" or v == "*" then ---~ if #m > 0 then -- when not, then we get problems with root being second (after <?xml ...?> (we could start at dt[2]) ---~ if xml.trace_lpath then print("lpath", "empty or *", "wildcard") end ---~ m[#m+1] = 2 ---~ end ---~ elseif v == ".." then ---~ if xml.trace_lpath then print("lpath", "..", "ancestor") end ---~ m[#m+1] = 3 ---~ else ---~ local a, b = v:match("^(.+)::(.-)$") ---~ if a and b then ---~ if a == "ancestor" then ---~ if xml.trace_lpath then print("lpath", a, "ancestor") end ---~ m[#m+1] = 3 ---~ -- todo: b ---~ elseif a == "pi" then ---~ if xml.trace_lpath then print("lpath", a, "processing instruction") end ---~ local expr = "^" .. b .. " " ---~ m[#m+1] = function(e) ---~ if e.tg == '@pi@' and e.dt[1]:find(expr) then ---~ return 6 ---~ else ---~ return 0 ---~ end ---~ end ---~ end ---~ else ---~ local n, a, t = v:match("^(.-)%[@(.-)=(.-)%]$") ---~ if n and a and t then ---~ -- todo: namespace, negate ---~ -- t = t:gsub("^\'(.*)\'$", "%1") ---~ -- t = t:gsub("^\"(.*)\"$", "%1") ---~ -- t = t:sub(2,-2) -- "" or '' mandate ---~ t = t:gsub("^([\'\"])(.-)%1$", "%2") ---~ if n:find("|") then ---~ local tt = n:split("|") ---~ if xml.trace_lpath then print("lpath", "match", t, n) end ---~ m[#m+1] = function(e,i) ---~ for i=1,#tt do ---~ if e.at and e.tg == tt[i] and e.at[a] == t then return 1 end ---~ end ---~ return 0 ---~ end ---~ else ---~ if xml.trace_lpath then print("lpath", "match", t, n) end ---~ m[#m+1] = function(e) ---~ if e.at and e.ns == s and e.tg == n and e.at[a] == t then ---~ return 1 ---~ else ---~ return 0 ---~ end ---~ end ---~ end ---~ else -- todo, better tracing (string.format, ook negate etc) ---~ local negate = v:sub(1,1) == '^' ---~ if negate then v = v:sub(2) end ---~ if v:find("|") then ---~ local t = { } ---~ for s in v:gmatch("([^|]+)") do ---~ local ns, tg = s:match("^(.-):(.+)$") ---~ if tg == "*" then ---~ if xml.trace_lpath then print("lpath", "or wildcard", ns, tg) end ---~ t[#t+1] = function(e) return e.ns == ns end ---~ elseif tg then ---~ if xml.trace_lpath then print("lpath", "or match", ns, tg) end ---~ t[#t+1] = function(e) return e.ns == ns and e.tg == tg end ---~ else ---~ if xml.trace_lpath then print("lpath", "or match", s) end ---~ t[#t+1] = function(e) return e.ns == "" and e.tg == s end ---~ end ---~ end ---~ if negate then ---~ m[#m+1] = function(e) ---~ for i=1,#t do if t[i](e) then return 0 end end return 1 ---~ end ---~ else ---~ m[#m+1] = function(e) ---~ for i=1,#t do if t[i](e) then return 1 end end return 0 ---~ end ---~ end ---~ else ---~ if xml.trace_lpath then print("lpath", "match", v) end ---~ local ns, tg = v:match("^(.-):(.+)$") ---~ if not tg then ns, tg = "", v end ---~ if tg == "*" then ---~ if ns ~= "" then ---~ m[#m+1] = function(e) ---~ if ns == e.ns then return 1 else return 0 end ---~ end ---~ end ---~ elseif negate then ---~ m[#m+1] = function(e) ---~ if ns == e.ns and tg == e.tg then return 0 else return 1 end ---~ end ---~ else ---~ m[#m+1] = function(e) ---~ if ns == e.ns and tg == e.tg then return 1 else return 0 end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end ---~ if xml.trace_lpath then ---~ print("# lpath criteria:", (type(m) == "table" and #m) or "none") ---~ end ---~ cache[str] = m ---~ end ---~ return m ---~ end - ---~ -- if handle returns true, then quit - ---~ function xml.traverse(root,pattern,handle,reverse,index,wildcard) ---~ if not root then -- error ---~ return false ---~ elseif pattern == false then -- root ---~ handle(root,root.dt,root.ri) ---~ return false ---~ elseif pattern == true then -- wildcard ---~ local traverse = xml.traverse ---~ local rootdt = root.dt ---~ if rootdt then ---~ local start, stop, step = 1, #rootdt, 1 ---~ if reverse then ---~ start, stop, step = stop, start, -1 ---~ end ---~ for k=start,stop,step do ---~ if handle(root,rootdt,root.ri or k) then return false end ---~ if not traverse(rootdt[k],true,handle,reverse) then return false end ---~ end ---~ end ---~ return false ---~ elseif root and root.dt then ---~ index = index or 1 ---~ local match = pattern[index] or f_wildcard ---~ local traverse = xml.traverse ---~ local rootdt = root.dt ---~ local start, stop, step = 1, #rootdt, 1 ---~ if reverse and index == #pattern then -- maybe no index test here / error? ---~ start, stop, step = stop, start, -1 ---~ end ---~ for k=start,stop,step do ---~ local e = rootdt[k] ---~ if e.tg then ---~ local m = (type(match) == "function" and match(e,root)) or match ---~ if m == 1 then -- match ---~ if index < #pattern then ---~ if not traverse(e,pattern,handle,reverse,index+1) then return false end ---~ else ---~ if handle(root,rootdt,root.ri or k) then ---~ return false ---~ end ---~ -- tricky, where do we pick up, is this ok now ---~ if pattern[1] == 2 then -- start again with new root (we need a way to inhibit this) ---~ if not traverse(e,pattern,handle,reverse,1) then return false end ---~ end ---~ end ---~ elseif m == 2 then -- wildcard ---~ if index < #pattern then ---~ -- <parent><a><b></b><c></c></a></parent> : "a" (true) "/a" (true) "b" (true) "/b" (false) ---~ -- not good yet, we need to pick up any prev level which is 2 ---~ local p = pattern[2] ---~ if index == 1 and p then ---~ local mm = (type(p) == "function" and p(e,root)) or p -- pattern[2](e,root) ---~ if mm == 1 then ---~ if #pattern == 2 then ---~ if handle(root,rootdt,k) then ---~ return false ---~ end ---~ -- hack ---~ if pattern[1] == 2 then -- start again with new root (we need a way to inhibit this) ---~ if not traverse(e,pattern,handle,reverse,1) then return false end ---~ end ---~ else ---~ if not traverse(e,pattern,handle,reverse,3) then return false end ---~ end ---~ else ---~ if not traverse(e,pattern,handle,reverse,index+1,true) then return false end ---~ end ---~ else ---~ if not traverse(e,pattern,handle,reverse,index+1,true) then return false end ---~ end ---~ elseif handle(root,rootdt,k) then ---~ return false ---~ end ---~ elseif m == 3 then -- ancestor ---~ local ep = e.__p__ ---~ if index < #pattern then ---~ if not traverse(ep,pattern,handle,reverse,index+1) then return false end ---~ elseif handle(root,rootdt,k) then ---~ return false ---~ end ---~ elseif m == 4 then -- just root ---~ if handle(root,rootdt,k) then ---~ return false ---~ end ---~ elseif m == 6 then -- pi ---~ if handle(root,rootdt,k) then ---~ return false ---~ end ---~ elseif wildcard then -- maybe two kind of wildcards: * ** // ---~ if not traverse(e,pattern,handle,reverse,index,wildcard) then return false end ---~ end ---~ end ---~ end ---~ end ---~ return true ---~ end - ---~ Y a/b ---~ Y /a/b ---~ Y a/*/b ---~ Y a//b ---~ Y child:: ---~ Y .// ---~ Y .. ---~ N id("tag") ---~ Y parent:: ---~ Y child:: ---~ N preceding-sibling:: (same name) ---~ N following-sibling:: (same name) ---~ N preceding-sibling-of-self:: (same name) ---~ N following-sibling-or-self:: (same name) ---~ Y ancestor:: ---~ N descendent:: ---~ N preceding:: ---~ N following:: ---~ N self::node() ---~ N node() == alles ---~ N a[position()=5] ---~ Y a[5] ---~ Y a[-5] ---~ N a[first()] ---~ N a[last()] ---~ Y a/(b|c|d)/e/f ---~ N (c/d|e) ---~ Y a/b[@bla] ---~ Y a/b[@bla='oeps'] ---~ Y a/b[@bla=='oeps'] ---~ Y a/b[@bla<>'oeps'] ---~ Y a/b[@bla!='oeps'] ---~ Y a/b/@bla - ---~ Y ^/a/c (root) ---~ Y ^^/a/c (docroot) ---~ Y root::a/c (docroot) - ---~ no wild card functions (yet) - ---~ s = "/a//b/*/(c|d|e)/(f|g)/h[4]/h/child::i/j/(a/b)/p[-1]/q[4]/ancestor::q/r/../s/./t[@bla='true']/k" - --- // == /**/ --- / = ^ (root) +--[[ldx-- +<p>We've now arrived at an intersting part: accessing the tree using a subset +of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We +will explain more about its usage in other documents.</p> +--ldx]]-- do - function analyze(str) - if not str then - return "" - else - local tmp, result, map, key = { }, { }, { }, str - str = str:gsub("(%b[])", function(s) tmp[#tmp+1] = s return '[['..#tmp..']]' end) - str = str:gsub("(%b())", function(s) tmp[#tmp+1] = s return '[['..#tmp..']]' end) - str = str:gsub("(%^+)([^/])", "%1/%2") - str = str:gsub("//+", "/**/") - str = str:gsub(".*root::", "^/") - str = str:gsub("child::", "") - str = str:gsub("ancestor::", "../") - str = str:gsub("self::", "./") - str = str:gsub("^/", "^/") - for s in str:gmatch("([^/]+)") do - s = s:gsub("%[%[(%d+)%]%]",function(n) return tmp[tonumber(n)] end) - result[#result+1] = s - end - cache[key] = result - return result - end - end - - actions = { + local actions = { [10] = "stay", [11] = "parent", [12] = "subtree root", @@ -2381,112 +2153,168 @@ do [21] = "match one of", [22] = "match and attribute eq", [23] = "match and attribute ne", - [23] = "match and attribute present", + [24] = "match one of and attribute eq", + [25] = "match one of and attribute ne", + [27] = "has attribute", + [28] = "has value", + [29] = "fast match", [30] = "select", [40] = "processing instruction", } - function compose(result) - if not result or #result == 0 then + local map = { } + + local space = lpeg.S(' \r\n\t') + local squote = lpeg.S("'") + local dquote = lpeg.S('"') + local lparent = lpeg.P('(') + local rparent = lpeg.P(')') + local atsign = lpeg.P('@') + local lbracket = lpeg.P('[') + local rbracket = lpeg.P(']') + local exclam = lpeg.P('!') + local period = lpeg.P('.') + local eq = lpeg.P('==') + lpeg.P('=') + local ne = lpeg.P('<>') + lpeg.P('!=') + local star = lpeg.P('*') + local slash = lpeg.P('/') + local colon = lpeg.P(':') + local bar = lpeg.P('|') + local hat = lpeg.P('^') + local valid = lpeg.R('az', 'AZ', '09') + lpeg.S('_-') + local name_yes = lpeg.C(valid^1) * colon * lpeg.C(valid^1) + local name_nop = lpeg.C(lpeg.P(true)) * lpeg.C(valid^1) + local name = name_yes + name_nop + local number = lpeg.C((lpeg.S('+-')^0 * lpeg.R('09')^1)) / tonumber + local names = (bar^0 * name)^1 + local morenames = name * (bar^0 * name)^1 + local instructiontag = lpeg.P('pi::') + local spacing = lpeg.C(space^0) + local somespace = space^1 + local optionalspace = space^0 + local text = lpeg.C(valid^0) + local value = (squote * lpeg.C((1 - squote)^0) * squote) + (dquote * lpeg.C((1 - dquote)^0) * dquote) + local empty = 1-slash + + local is_eq = lbracket * atsign * name * eq * value * rbracket + local is_ne = lbracket * atsign * name * ne * value * rbracket + local is_attribute = lbracket * atsign * name * rbracket + local is_value = lbracket * value * rbracket + local is_number = lbracket * number * rbracket + + local is_one = name + local is_none = exclam * name + local is_one_of = ((lparent * names * rparent) + morenames) + local is_none_of = exclam * ((lparent * names * rparent) + morenames) + + local stay = (period ) + local parent = (period * period ) / function( ) map[#map+1] = { 11 } end + local subtreeroot = (slash + hat ) / function( ) map[#map+1] = { 12 } end + local documentroot = (hat * hat ) / function( ) map[#map+1] = { 13 } end + local any = (star ) / function( ) map[#map+1] = { 14 } end + local many = (star * star ) / function( ) map[#map+1] = { 15 } end + local initial = (hat * hat * hat ) / function( ) map[#map+1] = { 16 } end + + local match = (is_one ) / function(...) map[#map+1] = { 20, true , ... } end + local match_one_of = (is_one_of ) / function(...) map[#map+1] = { 21, true , ... } end + local dont_match = (is_none ) / function(...) map[#map+1] = { 20, false, ... } end + local dont_match_one_of = (is_none_of ) / function(...) map[#map+1] = { 21, false, ... } end + + local match_and_eq = (is_one * is_eq ) / function(...) map[#map+1] = { 22, true , ... } end + local match_and_ne = (is_one * is_ne ) / function(...) map[#map+1] = { 23, true , ... } end + local dont_match_and_eq = (is_none * is_eq ) / function(...) map[#map+1] = { 22, false, ... } end + local dont_match_and_ne = (is_none * is_ne ) / function(...) map[#map+1] = { 23, false, ... } end + + local match_one_of_and_eq = (is_one_of * is_eq ) / function(...) map[#map+1] = { 24, true , ... } end + local match_one_of_and_ne = (is_one_of * is_ne ) / function(...) map[#map+1] = { 25, true , ... } end + local dont_match_one_of_and_eq = (is_none_of * is_eq ) / function(...) map[#map+1] = { 24, false, ... } end + local dont_match_one_of_and_ne = (is_none_of * is_ne ) / function(...) map[#map+1] = { 25, false, ... } end + + local has_attribute = (is_one * is_attribute) / function(...) map[#map+1] = { 27, true , ... } end + local has_value = (is_one * is_value ) / function(...) map[#map+1] = { 28, true , ... } end + local dont_has_attribute = (is_none * is_attribute) / function(...) map[#map+1] = { 27, false, ... } end + local dont_has_value = (is_none * is_value ) / function(...) map[#map+1] = { 28, false, ... } end + local position = (is_one * is_number ) / function(...) map[#map+1] = { 30, true, ... } end + local dont_position = (is_none * is_number ) / function(...) map[#map+1] = { 30, false, ... } end + + local instruction = (instructiontag * text ) / function(...) map[#map+1] = { 40, ... } end + local nothing = (empty ) / function( ) map[#map+1] = { 15 } end -- 15 ? + local crap = (1-slash)^1 + + -- a few ugly goodies: + + local docroottag = lpeg.P('^^') / function( ) map[#map+1] = { 12 } end + local subroottag = lpeg.P('^') / function( ) map[#map+1] = { 13 } end + local roottag = lpeg.P('root::') / function( ) map[#map+1] = { 12 } end + local parenttag = lpeg.P('parent::') / function( ) map[#map+1] = { 11 } end + local childtag = lpeg.P('child::') + local selftag = lpeg.P('self::') + + -- there will be more and order will be optimized + + local selector = ( + instruction + + many + any + + parent + stay + + dont_position + position + + dont_match_one_of_and_eq + dont_match_one_of_and_ne + + match_one_of_and_eq + match_one_of_and_ne + + dont_match_and_eq + dont_match_and_ne + + match_and_eq + match_and_ne + + has_attribute + has_value + + dont_match_one_of + match_one_of + + dont_match + match + + crap + empty + ) + + local grammar = lpeg.P { "startup", + startup = (initial + documentroot + subtreeroot + roottag + docroottag + subroottag)^0 * lpeg.V("followup"), + followup = ((slash + parenttag + childtag + selftag)^0 * selector)^1, + } + + function compose(str) + if not str or str == "" then -- wildcard return true - elseif #result == 1 then - local r = result[1][1] - if r == "14" or r == "15" then - -- wildcard + elseif str == '/' then + -- root + return false + else + map = { } + grammar:match(str) + if #map == 0 then return true - elseif r == "12" then - -- root - return false - end - end - local map = { } - for r=1,#result do - local ri = result[r] - if ri == "." then - -- skip - elseif ri == ".." then - map[#map+1] = { 11 } - elseif ri == "^" then - map[#map+1] = { 12 } - elseif ri == "^^" then - map[#map+1] = { 13 } - elseif ri == "*" then - map[#map+1] = { 14 } - elseif ri == "**" then - map[#map+1] = { 15 } else - local m = ri:match("^%((.*)%)$") -- (a|b|c) - if m or ri:find('|') then - m = m or ri - if m:find("[%[%]%(%)%/]") then -- []()/ - -- error - else - local t = { 21 } - for s in m:gmatch("([^|])") do - local ns, tg = s:match("^(.-):?([^:]+)$") - t[#t+1] = ns - t[#t+1] = tg - end - map[#map+1] = t - end - else - local s, f = ri:match("^(.-)%[%s*(.+)%s*%]$") --aaa[bbb] - if s and f then - local ns, tg = s:match("^(.-):?([^:]+)$") - local at, op, vl = f:match("^@(.-)([!=<>]?)([^!=<>]+)$") -- [@a=='b'] - if op and op ~= "" then - if op == '=' or op == '==' then - map[#map+1] = { 22, ns, tg, at, (vl:gsub("^([\'\"])(.*)%1$", "%2")) } - elseif op == '<>' or op == '!=' then - map[#map+1] = { 23, ns, tg, at, (vl:gsub("^([\'\"])(.*)%1$", "%2")) } - else - -- error - end - elseif f:find("^([%-%+%d]+)$")then - map[#map+1] = { 30, ns, tg, tonumber(f) } - elseif vl ~= "" then - map[#map+1] = { 24, ns, tg, vl } - end - else - local pi = ri:match("^pi::(.-)$") - if pi then - map[#map+1] = { 40, pi } - else - map[#map+1] = { 20, ri:match("^(.-):?([^:]+)$") } - end + local m = map[1][1] + if #map == 1 then + if m == 14 or m == 15 then + -- wildcard + return true + elseif m == 12 then + -- root + return false end + elseif #map == 2 and m == 12 and map[2][1] == 20 then + return { { 29, map[2][2], map[2][3] } } end + if m ~= 11 and m ~= 12 and m ~= 13 and m ~= 14 and m ~= 15 and m ~= 16 then + table.insert(map, 1, { 16 }) + end + return map end end - -- if we have a symbol, we can prepend that to the string, which is faster - local mm = map[1] or { } - local r = mm[1] or 0 - if #map == 1 then - if r == 14 or r == 15 then - -- wildcard - return true - elseif r == 12 then - -- root - return false - end - end - if r ~= 11 and r ~= 12 and r ~= 13 and r ~= 14 and r ~= 15 then - table.insert(map, 1, { 16 }) - end - return map end - cache = { } + local cache = { } - function xml.lpath(pattern) + function xml.lpath(pattern,trace) if type(pattern) == "string" then local result = cache[pattern] if not result then - result = compose(analyze(pattern)) + result = compose(pattern) cache[pattern] = result end - if xml.trace_lpath then + if trace or xml.trace_lpath then xml.lshow(result) end return result @@ -2495,23 +2323,58 @@ do end end - function xml.lshow(pattern) + local fallbackreport = (texio and texio.write) or io.write + + function xml.lshow(pattern,report) + report = report or fallbackreport local lp = xml.lpath(pattern) if lp == false then - print("root") + report(" -: root\n") elseif lp == true then - print("wildcard") + report(" -: wildcard\n") else - if type(pattern) ~= "table" then - print("pattern: " .. tostring(pattern)) + if type(pattern) == "string" then + report(string.format("pattern: %s\n",pattern)) end for k,v in ipairs(lp) do - print(k,actions[v[1]],table.join(v," ",2)) + if #v > 1 then + local t = { } + for i=2,#v do + local vv = v[i] + if type(vv) == "string" then + t[#t+1] = (vv ~= "" and vv) or "#" + elseif type(vv) == "boolean" then + t[#t+1] = (vv and "==") or "<>" + end + end + report(string.format("%2i: %s %s -> %s\n", k,v[1],actions[v[1]],table.join(t," "))) + else + report(string.format("%2i: %s %s\n", k,v[1],actions[v[1]])) + end end end end - function xml.traverse(root,pattern,handle,reverse,index,wildcard) +end + +--[[ldx-- +<p>An <l n='lpath'/> is converted to a table with instructions for traversing the +tree. Hoever, simple cases are signaled by booleans. Because we don't know in +advance what we want to do with the found element the handle gets three arguments:</p> + +<lines> +<t>r</t> : the root element of the data table +<t>d</t> : the data table of the result +<t>t</t> : the index in the data table of the result +</lines> + +<p> Access to the root and data table makes it possible to construct insert and delete +functions.</p> +--ldx]]-- + +do + + function xml.traverse(root,pattern,handle,reverse,index,parent,wildcard) if not root then -- error return false elseif pattern == false then -- root @@ -2531,103 +2394,172 @@ do end end return false - elseif root and root.dt then + elseif root.dt then index = index or 1 local action = pattern[index] local command = action[1] - if (command == 16 or command == 12) and index == 1 then -- initial - wildcard = true - index = index + 1 - action = pattern[index] - command = action[1] - end - local traverse = xml.traverse - local rootdt = root.dt - local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1 - if command == 30 then - if action[4] < 0 then - start, stop, step = stop, start, -1 - dn = -1 + if command == 29 then -- fast case /oeps + local rootdt = root.dt + for k=1,#rootdt do + local e = rootdt[k] + local ns, tg = e.rn or e.ns, e.tg + if ns == action[2] and tg == action[3] then + if handle(root,rootdt,k) then return false end + end end - elseif reverse and index == #pattern then - start, stop, step = stop, start, -1 - end - for k=start,stop,step do - local e = rootdt[k] - local ns, tg = e.ns, e.tg - if tg then + elseif command == 11 then -- parent + local ep = root.__p__ or parent + if index < #pattern then + if not xml.traverse(ep,pattern,handle,reverse,index+1,root) then return false end + elseif handle(root,rootdt,k) then + return false + end + else + if (command == 16 or command == 12) and index == 1 then -- initial + wildcard = true + index = index + 1 + action = pattern[index] + command = action and action[1] or 0 -- something is wrong + end + if command == 11 then -- parent + local ep = root.__p__ or parent + if index < #pattern then + if not xml.traverse(ep,pattern,handle,reverse,index+1,root) then return false end + elseif handle(root,rootdt,k) then + return false + end + else + local traverse = xml.traverse + local rootdt = root.dt + local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1 if command == 30 then - if ns == action[2] and tg == action[3] then - n = n + dn - if n == action[4] then - if index == #pattern then - if handle(root,rootdt,root.ri or k) then return false end - else - if not traverse(e,pattern,handle,reverse,index+1) then return false end - end - break - end - elseif wildcard then - if not traverse(e,pattern,handle,reverse,index,true) then return false end + if action[5] < 0 then + start, stop, step = stop, start, -1 + dn = -1 end - else - local matched = false - if command == 20 then -- match - matched = ns == action[2] and tg == action[3] - elseif command == 21 then -- match one of - for i=2,#action,2 do - if ns == action[i] and tg == action[i+1] then - matched = true - break + elseif reverse and index == #pattern then + start, stop, step = stop, start, -1 + end + for k=start,stop,step do + local e = rootdt[k] + local ns, tg = e.rn or e.ns, e.tg + if tg then + if command == 30 then + local matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + if matched then + n = n + dn + if n == action[5] then + if index == #pattern then + if handle(root,rootdt,root.ri or k) then return false end + else + if not traverse(e,pattern,handle,reverse,index+1,root) then return false end + end + break + end + elseif wildcard then + if not traverse(e,pattern,handle,reverse,index,root,true) then return false end end - end - elseif command == 22 then -- eq - matched = ns == action[2] and tg == action[3] and e.at[action[4]] == action[5] - elseif command == 23 then -- ne - matched = ns == action[2] and tg == action[3] and e.at[action[4]] ~= action[5] - elseif command == 24 then -- present - matched = ns == action[2] and tg == action[3] and e.at[action[4]] - end - if matched then -- combine tg test and at test - if index == #pattern then - if handle(root,rootdt,root.ri or k) then return false end - else - if not traverse(e,pattern,handle,reverse,index+1) then return false end - end - elseif command == 14 then -- any - if index == #pattern then - if handle(root,rootdt,root.ri or k) then return false end - else - if not traverse(e,pattern,handle,reverse,index+1) then return false end - end - elseif command == 15 then -- many - if index == #pattern then - if handle(root,rootdt,root.ri or k) then return false end else - if not traverse(e,pattern,handle,reverse,index+1,true) then return false end - end - elseif command == 11 then -- parent - local ep = e.__p__ - if index < #pattern then - if not traverse(ep,pattern,handle,reverse,index+1) then return false end - elseif handle(root,rootdt,k) then - return false - end - break - elseif command == 40 and tg == "@pi@" then -- pi - local pi = action[2] - if pi ~= "" then - local pt = e.dt[1] - if pt and pt:find(pi) then - if handle(root,rootdt,k) then + local matched, multiple = false, false + if command == 20 then -- match + matched = ns == action[2] and tg == action[3] + if action[2] then matched = not matched end + elseif command == 21 then -- match one of + multiple = true + for i=2,#action,2 do + if ns == action[i] and tg == action[i+1] then matched = true break end + end + if action[2] then matched = not matched end + elseif command == 22 then -- eq + matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + matched = matched and e.at[action[6]] == action[7] + elseif command == 23 then -- ne + matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + matched = mached and e.at[action[6]] ~= action[7] + elseif command == 24 then -- one of eq + multiple = true + for i=3,#action-2,2 do + if ns == action[i] and tg == action[i+1] then matched = true break end + end + if action[2] then matched = not matched end + matched = matched and e.at[action[#action-1]] == action[#action] + elseif command == 25 then -- one of ne + multiple = true + for i=3,#action-2,2 do + if ns == action[i] and tg == action[i+1] then matched = true break end + end + if action[2] then matched = not matched end + matched = matched and e.at[action[#action-1]] ~= action[#action] + elseif command == 27 then -- has attribute + local ans = action[3] + matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + matched = matched and e.at[action[5]] + elseif command == 28 then -- has value + local edt = e.dt + matched = ns == action[3] and tg == action[4] + if action[2] then matched = not matched end + matched = matched and edt and edt[1] == action[5] + end + if matched then -- combine tg test and at test + if index == #pattern then + if handle(root,rootdt,root.ri or k) then return false end + if wildcard and multiple then + if not traverse(e,pattern,handle,reverse,index,root,true) then return false end + end + else + if not traverse(e,pattern,handle,reverse,index+1,root) then return false end + end + elseif command == 14 then -- any + if index == #pattern then + if handle(root,rootdt,root.ri or k) then return false end + else + if not traverse(e,pattern,handle,reverse,index+1,root) then return false end + end + elseif command == 15 then -- many + if index == #pattern then + if handle(root,rootdt,root.ri or k) then return false end + else + if not traverse(e,pattern,handle,reverse,index+1,root,true) then return false end + end + -- not here : 11 + elseif command == 11 then -- parent + local ep = e.__p__ or parent + if index < #pattern then + if not traverse(ep,pattern,handle,reverse,root,index+1) then return false end + elseif handle(root,rootdt,k) then + return false + end + elseif command == 40 and e.special and tg == "@pi@" then -- pi + local pi = action[2] + if pi ~= "" then + local pt = e.dt[1] + if pt and pt:find(pi) then + if handle(root,rootdt,k) then + return false + end + end + elseif handle(root,rootdt,k) then return false end + elseif wildcard then + if not traverse(e,pattern,handle,reverse,index,root,true) then return false end end - elseif handle(root,rootdt,k) then - return false end - elseif wildcard then - if not traverse(e,pattern,handle,reverse,index,true) then return false end + else + -- not here : 11 + if command == 11 then -- parent + local ep = e.__p__ or parent + if index < #pattern then + if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end + elseif handle(root,rootdt,k) then + return false + end + break -- else loop + end end end end @@ -2636,15 +2568,71 @@ do return true end +end + +--[[ldx-- +<p>Next come all kind of locators and manipulators. The most generic function here +is <t>xml.filter(root,pattern)</t>. All registers functions in the filters namespace +can be path of a search path, as in:</p> + +<typing> +local r, d, k = xml.filter(root,"/a/b/c/position(4)" +</typing> +--ldx]]-- + +do + local traverse, lpath, convert = xml.traverse, xml.lpath, xml.convert xml.filters = { } + --[[ldx-- + <p>For splitting the filter function from the path specification, we can + use string matching or lpeg matching. Here the difference in speed is + neglectable but the lpeg variant is more robust.</p> + --ldx]]-- + + -- function xml.filter(root,pattern) + -- local pat, fun, arg = pattern:match("^(.+)/(.-)%((.*)%)$") + -- if fun then + -- return (xml.filters[fun] or xml.filters.default)(root,pat,arg) + -- else + -- pat, arg = pattern:match("^(.+)/@(.-)$") + -- if arg then + -- return xml.filters.attributes(root,pat,arg) + -- else + -- return xml.filters.default(root,pattern) + -- end + -- end + -- end + + -- not faster but hipper ... although ... i can't get rid of the trailing / in the path + + local name = (lpeg.R("az","AZ")+lpeg.R("_-"))^1 + local path = lpeg.C(((1-lpeg.P('/'))^0 * lpeg.P('/'))^1) + local argument = lpeg.P { "(" * lpeg.C(((1 - lpeg.S("()")) + lpeg.V(1))^0) * ")" } + local action = lpeg.Cc(1) * path * lpeg.C(name) * argument + local attribute = lpeg.Cc(2) * path * lpeg.P('@') * lpeg.C(name) + + local parser = action + attribute + + function xml.filter(root,pattern) + local kind, a, b, c = parser:match(pattern) + if kind == 1 then + return (xml.filters[b] or xml.filters.default)(root,a,c) + elseif kind == 2 then + return xml.filters.attributes(root,a,b) + else + return xml.filters.default(root,pattern) + end + end + function xml.filters.default(root,pattern) local rt, dt, dk traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end) return dt and dt[dk], rt, dt, dk end + function xml.filters.reverse(root,pattern) local rt, dt, dk traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse') @@ -2698,17 +2686,14 @@ do traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk, i = r, d, k, i-1 return i == 0 end, reverse) if i == 0 then return dt and dt[dk], rt, dt, dk - else - return nil, nil, nil, nil end - else - return nil, nil, nil, nil end + return nil, nil, nil, nil end function xml.filters.attributes(root,pattern,arguments) local rt, dt, dk traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end) - local ekat = dt and dt[dk] and dt[dk].at + local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at) if ekat then if arguments then return ekat[arguments] or "", rt, dt, dk @@ -2722,69 +2707,33 @@ do function xml.filters.attribute(root,pattern,arguments) local rt, dt, dk traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end) - local ekat = dt and dt[dk] and dt[dk].at + local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at) return (ekat and ekat[arguments]) or "" end function xml.filters.text(root,pattern,arguments) - local ek, dt, dk, rt = xml.filters.index(root,pattern,arguments) - return (ek and ek.dt) or "", rt, dt, dk - end - - function xml.filter(root,pattern) - local pat, fun, arg = pattern:match("^(.+)/(.-)%((.*)%)$") - if fun then - return (xml.filters[fun] or xml.filters.default)(root,pat,arg) - else - pat, arg = pattern:match("^(.+)/@(.-)$") - if arg then - return xml.filters.attributes(root,pat,arg) + local dtk, rt, dt, dk = xml.filters.index(root,pattern,arguments) + if dtk then + local dtkdt = dtk.dt + if #dtkdt == 1 and type(dtkdt[1]) == "string" then + return dtkdt[1], rt, dt, dk else - return xml.filters.default(root,pattern) + return xml.tostring(dtkdt), rt, dt, dk end + else + return "", rt, dt, dk end end - xml.filters.position = xml.filters.index - - -- these may go away - - xml.index_element = xml.filters.index - xml.count_elements = xml.filters.count - xml.first_element = xml.filters.first - xml.last_element = xml.filters.last - xml.index_text = xml.filters.text - xml.first_text = function (root,pattern) return xml.filters.text(root,pattern, 1) end - xml.last_text = function (root,pattern) return xml.filters.text(root,pattern,-1) end - - -- so far - - function xml.get_text(root,pattern,reverse) - local rt, dt, dk - traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end, reverse) - local ek = dt and dt[dk] - return (ek and ek.dt) or "", rt, dt, dk - end - - function xml.each_element(root, pattern, handle, reverse) - local ok - traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse) - return ok - end - - function xml.get_element(root,pattern,reverse) - local rt, dt, dk - traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end, reverse) - return dt and dt[dk], rt, dt, dk - end + --[[ldx-- + <p>The following functions collect elements and texts.</p> + --ldx]]-- - -- these may change - - function xml.all_elements(root, pattern, ignorespaces) -- ok? + function xml.collect_elements(root, pattern, ignorespaces) local rr, dd = { }, { } traverse(root, lpath(pattern), function(r,d,k) local dk = d and d[k] if dk then - if ignorespaces and type(dk) == "string" and dk:find("^[\s\n]*$") then + if ignorespaces and type(dk) == "string" and dk:find("^%s*$") then -- ignore else local n = #rr+1 @@ -2795,8 +2744,8 @@ do return dd, rr end - function xml.all_texts(root, pattern, flatten) -- crap - local t, r = { }, { } + function xml.collect_texts(root, pattern, flatten) + local t = { } -- no r collector traverse(root, lpath(pattern), function(r,d,k) if d then local ek = d[k] @@ -2813,10 +2762,76 @@ do else t[#t+1] = "" end - r[#r+1] = r end) - return t, r + return t + end + + --[[ldx-- + <p>Often using an iterators looks nicer in the code than passing handler + functions. The <l n='lua'/> book describes how to use coroutines for that + purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits + code like:</p> + + <typing> + for r, d, k in xml.elements(xml.load('text.xml'),"title") do + print(d[k]) end + </typing> + + <p>Which will print all the titles in the document. The iterator variant takes + 1.5 times the runtime of the function variant which si due to the overhead in + creating the wrapper. So, instead of:</p> + + <typing> + function xml.filters.first(root,pattern) + for rt,dt,dk in xml.elements(root,pattern) + return dt and dt[dk], rt, dt, dk + end + return nil, nil, nil, nil + end + </typing> + + <p>We use the function variants in the filters.</p> + --ldx]]-- + + function xml.elements(root,pattern,reverse) + return coroutine.wrap(function() traverse(root, lpath(pattern), coroutine.yield, reverse) end) + end + + function xml.each_element(root, pattern, handle, reverse) + local ok + traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse) + return ok + end + + function xml.process_elements(root, pattern, handle) + traverse(root, lpath(pattern), function(r,d,k) + local dkdt = d[k].dt + if dkdt then + for i=1,#dkdt do + local v = dkdt[i] + if v.tg then handle(v) end + end + end + end) + end + + function xml.process_attributes(root, pattern, handle) + traverse(root, lpath(pattern), function(r,d,k) + local ek = d[k] + local a = ek.at or { } + handle(a) + if next(a) then + ek.at = a + else + ek.at = nil + end + end) + end + + --[[ldx-- + <p>We've now arrives at the functions that manipulate the tree.</p> + --ldx]]-- function xml.inject_element(root, pattern, element, prepend) if root and element then @@ -2868,7 +2883,7 @@ do function xml.insert_element(root, pattern, element, before) -- todo: element als functie if root and element then if pattern == "/" then - xml.inject_element(root, pattern, element, before) -- todo: element als functie + xml.inject_element(root, pattern, element, before) else local matches, collect = { }, nil if type(element) == "string" then @@ -2898,8 +2913,6 @@ do end end - -- first, last, each - xml.insert_element_after = xml.insert_element xml.insert_element_before = function(r,p,e) xml.insert_element(r,p,e,true) end xml.inject_element_after = xml.inject_element @@ -2930,24 +2943,47 @@ do end end - function xml.process(root, pattern, handle) - traverse(root, lpath(pattern), function(r,d,k) - if d[k].dt then - for k,v in ipairs(d[k].dt) do - if v.tg then handle(v) end + function xml.include(xmldata,element,attribute,pathlist,collapse) + element = element or 'ctx:include' + attribute = attribute or 'name' + pathlist = pathlist or { '.' } + -- todo, check op ri + local function include(r,d,k) + local ek = d[k] + local name = (ek.at and ek.at[attribute]) or "" + if name ~= "" then + -- maybe file lookup in tree + local fullname + for _, path in ipairs(pathlist) do + if path == '.' then + fullname = name + else + fullname = file.join(path,name) + end + local f = io.open(fullname) + if f then + xml.assign(d,k,xml.load(f,collapse)) + f:close() + break + else + xml.empty(d,k) + end end + else + xml.empty(d,k) end - end) + end + while xml.each_element(xmldata, element, include) do end end - function xml.strip(root, pattern) + function xml.strip_whitespace(root, pattern) traverse(root, lpath(pattern), function(r,d,k) local dkdt = d[k].dt - if dkdt then + if dkdt then -- can be optimized local t = { } for i=1,#dkdt do local str = dkdt[i] - if type(str) == "string" and str:find("^[\032\010\012\013]*$") then + if type(str) == "string" and str:find("^[ \n\r\t]*$") then -- stripped else t[#t+1] = str @@ -2958,8 +2994,6 @@ do end) end - -- - function xml.rename_space(root, oldspace, newspace) -- fast variant local ndt = #root.dt local rename = xml.rename_space @@ -2968,6 +3002,9 @@ do if type(e) == "table" then if e.ns == oldspace then e.ns = newspace + if e.rn then + e.rn = newspace + end end local edt = e.dt if edt then @@ -2987,83 +3024,30 @@ do d[k].ns = newns end) end - - -- function xml.process_attributes(root, pattern, handle) - -- traverse(root, lpath(pattern), function(e,k) handle(e[k].at) end) - -- end - - function xml.process_attributes(root, pattern, handle) + function xml.check_namespace(root, pattern, newns) traverse(root, lpath(pattern), function(r,d,k) - local ek = d[k] - local a = ek.at or { } - handle(a) - if next(a) then - ek.at = a - else - ek.at = nil + local dk = d[k] + if (not dk.rn or dk.rn == "") and dk.ns == "" then + dk.rn = newns end end) end - - function xml.package(tag,attributes,data) - local n, t = tag:match("^(.-):(.+)$") - if attributes then - return { ns = n or "", tg = t or tag, dt = data or "", at = attributes } - else - return { ns = n or "", tg = t or tag, dt = data or "" } - end - end - - -- some special functions, handy for the manual: - - function xml.gsub(t,old,new) - if t.dt then - for k,v in ipairs(t.dt) do - if type(v) == "string" then - t.dt[k] = v:gsub(old,new) - else - xml.gsub(v,old,new) - end - end - end - end - - function xml.strip_leading_spaces(ek, e, k) -- cosmetic, for manual - if e and k and e[k-1] and type(e[k-1]) == "string" then - local s = e[k-1]:match("\n(%s+)") - xml.gsub(ek,"\n"..string.rep(" ",#s),"\n") - end - end - - function xml.serialize_path(root,lpath,handle) - local ek, e, k = xml.first_element(root,lpath) - ek = xml.copy(ek) - xml.strip_leading_spaces(ek,e,k) - xml.serialize(ek,handle) - end - - -- http://www.lua.org/pil/9.3.html (or of course the book) - -- - -- it's nice to have an iterator but it comes with some extra overhead - -- - -- for r, d, k in xml.elements(xml.load('text.xml'),"title") do print(d[k]) end - - function xml.elements(root,pattern,reverse) - return coroutine.wrap(function() traverse(root, lpath(pattern), coroutine.yield, reverse) end) + function xml.remap_name(root, pattern, newtg, newns, newrn) + traverse(root, lpath(pattern), function(r,d,k) + local dk = d[k] + dk.tg = newtg + dk.ns = newns + dk.rn = newrn + end) end - -- the iterator variant needs 1.5 times the runtime of the function variant - -- - -- function xml.filters.first(root,pattern) - -- for rt,dt,dk in xml.elements(root,pattern) - -- return dt and dt[dk], rt, dt, dk - -- end - -- return nil, nil, nil, nil - -- end +end - -- todo xml.gmatch for text +--[[ldx-- +<p>Here are a few synonyms.</p> +--ldx]]-- -end +xml.filters.position = xml.filters.index xml.count = xml.filters.count xml.index = xml.filters.index @@ -3072,7 +3056,10 @@ xml.first = xml.filters.first xml.last = xml.filters.last xml.each = xml.each_element -xml.all = xml.all_elements +xml.process = xml.process_element +xml.strip = xml.strip_whitespace +xml.collect = xml.collect_elements +xml.all = xml.collect_elements xml.insert = xml.insert_element_after xml.inject = xml.inject_element_after @@ -3081,39 +3068,38 @@ xml.before = xml.insert_element_before xml.delete = xml.delete_element xml.replace = xml.replace_element --- a few helpers, the may move to lxml modules +--[[ldx-- +<p>The following helper functions best belong to the <t>lmxl-ini</t> +module. Some are here because we need then in the <t>mk</t> +document and other manuals, others came up when playing with +this module. Since this module is also used in <l n='mtxrun'/> we've +put them here instead of loading mode modules there then needed.</p> +--ldx]]-- -function xml.include(xmldata,element,attribute,pathlist,collapse) - element = element or 'ctx:include' - attribute = attribute or 'name' - pathlist = pathlist or { '.' } - -- todo, check op ri - local function include(r,d,k) - local ek = d[k] - local name = (ek.at and ek.at[attribute]) or "" - if name ~= "" then - -- maybe file lookup in tree - local fullname - for _, path in ipairs(pathlist) do - if path == '.' then - fullname = name - else - fullname = file.join(path,name) - end - local f = io.open(fullname) - if f then - xml.assign(d,k,xml.load(f,collapse)) - f:close() - break - else - xml.empty(d,k) - end +function xml.gsub(t,old,new) + if t.dt then + for k,v in ipairs(t.dt) do + if type(v) == "string" then + t.dt[k] = v:gsub(old,new) + else + xml.gsub(v,old,new) end - else - xml.empty(d,k) end end - while xml.each(xmldata, element, include) do end +end + +function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual + if d and k and d[k-1] and type(d[k-1]) == "string" then + local s = d[k-1]:match("\n(%s+)") + xml.gsub(dk,"\n"..string.rep(" ",#s),"\n") + end +end + +function xml.serialize_path(root,lpath,handle) + local dk, r, d, k = xml.first(root,lpath) + dk = xml.copy(dk) + xml.strip_leading_spaces(dk,d,k) + xml.serialize(dk,handle) end xml.escapes = { ['&'] = '&', ['<'] = '<', ['>'] = '>', ['"'] = '"' } @@ -3124,22 +3110,37 @@ function xml.unescaped(str) return str:gsub("(&.-;)", xml.unescapes) end function xml.cleansed (str) return str:gsub("<.->" , '' ) end -- "%b<>" function xml.join(t,separator,lastseparator) - local result = { } - for k,v in pairs(t) do - result[k] = xml.tostring(v) - end - if lastseparator then - return table.join(result,separator,1,#result-1) .. lastseparator .. result[#result] + if #t > 0 then + local result = { } + for k,v in pairs(t) do + result[k] = xml.tostring(v) + end + if lastseparator then + return table.join(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result] + else + return table.join(result,separator) + end else - return table.join(result,separator) + return "" end end -do if utf then +--[[ldx-- +<p>We provide (at least here) two entity handlers. The more extensive +resolver consults a hash first, tries to convert to <l n='utf'/> next, +and finaly calls a handler when defines. When this all fails, the +original entity is returned.</p> +--ldx]]-- + +do if unicode and unicode.utf8 then + + xml.entities = xml.entities or { } -- xml.entities.handler == function + + local char = unicode.utf8.char local function toutf(s) - return utf.char(tonumber(s,16)) + return char(tonumber(s,16)) end function xml.utfize(root) @@ -3147,25 +3148,50 @@ do if utf then for k=1,#d do local dk = d[k] if type(dk) == "string" then - d[k] = dk:gsub("&#x(.-);",toutf) + -- test prevents copying if no match + if dk:find("&#x.-;") then + d[k] = dk:gsub("&#x(.-);",toutf) + end else xml.utfize(dk) end end end -else - function xml.utfize() - print("entity to utf conversion is not available") + + local entities = xml.entities + + local function resolve(e) + local e = entities[e] + if e then + return e + elseif e:find("#x") then + return char(tonumber(s:sub(3),16)) + else + local h = entities.handler + return (h and h(e)) or "&" .. e .. ";" + end end -end end + function xml.resolve_entities(root) + local d = root.dt + for k=1,#d do + local dk = d[k] + if type(dk) == "string" then + if dk:find("&.-;") then + d[k] = dk:gsub("&(.-);",resolve) + end + else + xml.utfize(dk) + end + end + end ---- examples +end end ---~ for _, e in ipairs(xml.filters.elements(ctxrunner.xmldata,"ctx:message")) do ---~ print(">>>",xml.tostring(e.dt)) ---~ end +--~ xml.lshow("/../../../a/(b|c)[@d='e']/f") +--~ xml.lshow("/../../../a/!(b|c)[@d='e']/f") +--~ xml.lshow("/../../../a/!b[@d!='e']/f") -- filename : l-utils.lua |