if not modules then modules = { } end modules ['lxml-tab'] = { version = 1.001, comment = "this module is the basis for the lxml-* ones", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } -- this module needs a cleanup: check latest lpeg, passing args, (sub)grammar, etc etc -- stripping spaces from e.g. cont-en.xml saves .2 sec runtime so it's not worth the -- trouble -- todo: when serializing optionally remap named entities to hex (if known in char-ent.lua) -- maybe when letter -> utf, else name .. then we need an option to the serializer .. a bit -- of work so we delay this till we cleanup local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end) local report_xml = logs and logs.reporter("xml","core") or function(...) print(string.format(...)) end --[[ldx--

The parser used here is inspired by the variant discussed in the lua book, but handles comment and processing instructions, has a different structure, provides parent access; a first version used different trickery but was less optimized to we went this route. First we had a find based parser, now we have an based one. The find based parser can be found in l-xml-edu.lua along with other older code.

Beware, the interface may change. For instance at, ns, tg, dt may get more verbose names. Once the code is stable we will also remove some tracing and optimize the code.

I might even decide to reimplement the parser using the latest trickery as the current variant was written when showed up and it's easier now to build tables in one go.

--ldx]]-- if lpeg.setmaxstack then lpeg.setmaxstack(1000) end -- deeply nested xml files xml = xml or { } local xml = xml --~ local xml = xml local concat, remove, insert = table.concat, table.remove, table.insert local type, next, setmetatable, getmetatable, tonumber, rawset = type, next, setmetatable, getmetatable, tonumber, rawset local lower, find, match, gsub = string.lower, string.find, string.match, string.gsub local utfchar = utf.char local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns local P, S, R, C, V, C, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.C, lpeg.Cs local formatters = string.formatters --[[ldx--

First a hack to enable namespace resolving. A namespace is characterized by a . The following function associates a namespace prefix with a pattern. We use , which in this case is more than twice as fast as a find based solution where we loop over an array of patterns. Less code and much cleaner.

--ldx]]-- xml.xmlns = xml.xmlns or { } local check = P(false) local parse = check --[[ldx--

The next function associates a namespace prefix with an . This normally happens independent of parsing.

xml.registerns("mml","mathml") --ldx]]-- function xml.registerns(namespace, pattern) -- pattern can be an lpeg check = check + C(P(lower(pattern))) / namespace parse = P { P(check) + 1 * V(1) } end --[[ldx--

The next function also registers a namespace, but this time we map a given namespace prefix onto a registered one, using the given . This used for attributes like xmlns:m.

xml.checkns("m","http://www.w3.org/mathml") --ldx]]-- function xml.checkns(namespace,url) local ns = lpegmatch(parse,lower(url)) if ns and namespace ~= ns then xml.xmlns[namespace] = ns end end --[[ldx--

Next we provide a way to turn an into a registered namespace. This used for the xmlns attribute.

resolvedns = xml.resolvens("http://www.w3.org/mathml") This returns mml. --ldx]]-- function xml.resolvens(url) return lpegmatch(parse,lower(url)) or "" end --[[ldx--

A namespace in an element can be remapped onto the registered one efficiently by using the xml.xmlns table.

--ldx]]-- --[[ldx--

This version uses . We follow the same approach as before, stack and top and such. This version is about twice as fast which is mostly due to the fact that we don't have to prepare the stream for cdata, doctype etc etc. This variant is is dedicated to Luigi Scarso, who challenged me with 40 megabyte files that took 12.5 seconds to load (1.5 for file io and the rest for tree building). With the implementation we got that down to less 7.3 seconds. Loading the 14 interface definition files (2.6 meg) went down from 1.05 seconds to 0.55.

Next comes the parser. The rather messy doctype definition comes in many disguises so it is no surprice that later on have to dedicate quite some code to it.

The code may look a bit complex but this is mostly due to the fact that we resolve namespaces and attach metatables. There is only one public function:

local x = xml.convert(somestring)

An optional second boolean argument tells this function not to create a root element.

Valid entities are:

--ldx]]-- -- not just one big nested table capture (lpeg overflow) local nsremap, resolvens = xml.xmlns, xml.resolvens local stack = { } local top = { } local dt = { } local at = { } local xmlns = { } local errorstr = nil local entities = { } local strip = false local cleanup = false local utfize = false local resolve_predefined = false local unify_predefined = false local dcache = { } local hcache = { } local acache = { } local mt = { } local function initialize_mt(root) mt = { __index = root } -- will be redefined later end function xml.setproperty(root,k,v) getmetatable(root).__index[k] = v end function xml.checkerror(top,toclose) return "" -- can be set end local function add_attribute(namespace,tag,value) if cleanup and #value > 0 then value = cleanup(value) -- new end if tag == "xmlns" then xmlns[#xmlns+1] = resolvens(value) at[tag] = value elseif namespace == "" then at[tag] = value elseif namespace == "xmlns" then xml.checkns(tag,value) at["xmlns:" .. tag] = value else -- for the moment this way: at[namespace .. ":" .. tag] = value end end local function add_empty(spacing, namespace, tag) if #spacing > 0 then dt[#dt+1] = spacing end local resolved = namespace == "" and xmlns[#xmlns] or nsremap[namespace] or namespace top = stack[#stack] dt = top.dt local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top } dt[#dt+1] = t setmetatable(t, mt) if at.xmlns then remove(xmlns) end at = { } end local function add_begin(spacing, namespace, tag) if #spacing > 0 then dt[#dt+1] = spacing end local resolved = namespace == "" and xmlns[#xmlns] or nsremap[namespace] or namespace top = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = stack[#stack] } setmetatable(top, mt) dt = top.dt stack[#stack+1] = top at = { } end local function add_end(spacing, namespace, tag) if #spacing > 0 then dt[#dt+1] = spacing end local toclose = remove(stack) top = stack[#stack] if #stack < 1 then errorstr = formatters["unable to close %s %s"](tag,xml.checkerror(top,toclose) or "") report_xml(errorstr) elseif toclose.tg ~= tag then -- no namespace check errorstr = formatters["unable to close %s with %s %s"](toclose.tg,tag,xml.checkerror(top,toclose) or "") report_xml(errorstr) end dt = top.dt dt[#dt+1] = toclose -- dt[0] = top -- nasty circular reference when serializing table if toclose.at.xmlns then remove(xmlns) end end -- local function add_text(text) -- if cleanup and #text > 0 then -- dt[#dt+1] = cleanup(text) -- else -- dt[#dt+1] = text -- end -- end local function add_text(text) local n = #dt if cleanup and #text > 0 then if n > 0 then local s = dt[n] if type(s) == "string" then dt[n] = s .. cleanup(text) else dt[n+1] = cleanup(text) end else dt[1] = cleanup(text) end else if n > 0 then local s = dt[n] if type(s) == "string" then dt[n] = s .. text else dt[n+1] = text end else dt[1] = text end end end local function add_special(what, spacing, text) if #spacing > 0 then dt[#dt+1] = spacing end if strip and (what == "@cm@" or what == "@dt@") then -- forget it else dt[#dt+1] = { special=true, ns="", tg=what, dt={ text } } end end local function set_message(txt) errorstr = "garbage at the end of the file: " .. gsub(txt,"([ \n\r\t]*)","") end local reported_attribute_errors = { } local function attribute_value_error(str) if not reported_attribute_errors[str] then report_xml("invalid attribute value %a",str) reported_attribute_errors[str] = true at._error_ = str end return str end local function attribute_specification_error(str) if not reported_attribute_errors[str] then report_xml("invalid attribute specification %a",str) reported_attribute_errors[str] = true at._error_ = str end return str end local badentity = "&error;" local badentity = "&" xml.placeholders = { unknown_dec_entity = function(str) return str == "" and badentity or formatters["&%s;"](str) end, unknown_hex_entity = function(str) return formatters["&#x%s;"](str) end, unknown_any_entity = function(str) return formatters["&#x%s;"](str) end, } local placeholders = xml.placeholders local function fromhex(s) local n = tonumber(s,16) if n then return utfchar(n) else return formatters["h:%s"](s), true end end local function fromdec(s) local n = tonumber(s) if n then return utfchar(n) else return formatters["d:%s"](s), true end end -- one level expansion (simple case), no checking done local p_rest = (1-P(";"))^0 local p_many = P(1)^0 local p_char = lpegpatterns.utf8character local parsedentity = P("&") * (P("#x")*(p_rest/fromhex) + P("#")*(p_rest/fromdec)) * P(";") * P(-1) + (P("#x")*(p_many/fromhex) + P("#")*(p_many/fromdec)) -- parsing in the xml file local predefined_unified = { [38] = "&", [42] = """, [47] = "'", [74] = "<", [76] = ">", } local predefined_simplified = { [38] = "&", amp = "&", [42] = '"', quot = '"', [47] = "'", apos = "'", [74] = "<", lt = "<", [76] = ">", gt = ">", } local nofprivates = 0xF0000 -- shared but seldom used local privates_u = { -- unescaped [ [[&]] ] = "&", [ [["]] ] = """, [ [[']] ] = "'", [ [[<]] ] = "<", [ [[>]] ] = ">", } local privates_p = { } local privates_n = { -- keeps track of defined ones } -- -- local escaped = utf.remapper(privates_u) -- can't be used as it freezes -- -- local unprivatized = utf.remapper(privates_p) -- can't be used as it freezes -- -- local p_privates_u = false -- local p_privates_p = false -- -- table.setmetatablenewindex(privates_u,function(t,k,v) rawset(t,k,v) p_privates_u = false end) -- table.setmetatablenewindex(privates_p,function(t,k,v) rawset(t,k,v) p_privates_p = false end) -- -- local function escaped(str) -- if not str or str == "" then -- return "" -- else -- if not p_privates_u then -- p_privates_u = Cs((lpeg.utfchartabletopattern(privates_u)/privates_u + p_char)^0) -- end -- return lpegmatch(p_privates_u,str) -- end -- end -- -- local function unprivatized(str) -- if not str or str == "" then -- return "" -- else -- if not p_privates_p then -- p_privates_p = Cs((lpeg.utfchartabletopattern(privates_p)/privates_p + p_char)^0) -- end -- return lpegmatch(p_privates_p,str) -- end -- end local escaped = utf.remapper(privates_u,"dynamic") local unprivatized = utf.remapper(privates_p,"dynamic") xml.unprivatized = unprivatized local function unescaped(s) local p = privates_n[s] if not p then nofprivates = nofprivates + 1 p = utfchar(nofprivates) privates_n[s] = p s = "&" .. s .. ";" -- todo: use char-ent to map to hex privates_u[p] = s privates_p[p] = s end return p end xml.privatetoken = unescaped xml.privatecodes = privates_n local function handle_hex_entity(str) local h = hcache[str] if not h then local n = tonumber(str,16) h = unify_predefined and predefined_unified[n] if h then if trace_entities then report_xml("utfize, converting hex entity &#x%s; into %a",str,h) end elseif utfize then h = (n and utfchar(n)) or xml.unknown_hex_entity(str) or "" if not n then report_xml("utfize, ignoring hex entity &#x%s;",str) elseif trace_entities then report_xml("utfize, converting hex entity &#x%s; into %a",str,h) end else if trace_entities then report_xml("found entity &#x%s;",str) end h = "&#x" .. str .. ";" end hcache[str] = h end return h end local function handle_dec_entity(str) local d = dcache[str] if not d then local n = tonumber(str) d = unify_predefined and predefined_unified[n] if d then if trace_entities then report_xml("utfize, converting dec entity &#%s; into %a",str,d) end elseif utfize then d = (n and utfchar(n)) or placeholders.unknown_dec_entity(str) or "" if not n then report_xml("utfize, ignoring dec entity &#%s;",str) elseif trace_entities then report_xml("utfize, converting dec entity &#%s; into %a",str,d) end else if trace_entities then report_xml("found entity &#%s;",str) end d = "&#" .. str .. ";" end dcache[str] = d end return d end xml.parsedentitylpeg = parsedentity local function handle_any_entity(str) if resolve then local a = acache[str] -- per instance ! todo if not a then a = resolve_predefined and predefined_simplified[str] if a then if trace_entities then report_xml("resolving entity &%s; to predefined %a",str,a) end else if type(resolve) == "function" then a = resolve(str) or entities[str] else a = entities[str] end if a then if type(a) == "function" then if trace_entities then report_xml("expanding entity &%s; to function call",str) end a = a(str) or "" end a = lpegmatch(parsedentity,a) or a -- for nested if trace_entities then report_xml("resolving entity &%s; to internal %a",str,a) end else local unknown_any_entity = placeholders.unknown_any_entity if unknown_any_entity then a = unknown_any_entity(str) or "" end if a then if trace_entities then report_xml("resolving entity &%s; to external %s",str,a) end else if trace_entities then report_xml("keeping entity &%s;",str) end if str == "" then a = badentity else a = "&" .. str .. ";" end end end end acache[str] = a elseif trace_entities then if not acache[str] then report_xml("converting entity &%s; to %a",str,a) acache[str] = a end end return a else local a = acache[str] if not a then a = resolve_predefined and predefined_simplified[str] if a then -- one of the predefined acache[str] = a if trace_entities then report_xml("entity &%s; becomes %a",str,a) end elseif str == "" then if trace_entities then report_xml("invalid entity &%s;",str) end a = badentity acache[str] = a else if trace_entities then report_xml("entity &%s; is made private",str) end -- a = "&" .. str .. ";" a = unescaped(str) acache[str] = a end end return a end end -- local function handle_end_entity(chr) -- report_xml("error in entity, %a found instead of %a",chr,";") -- end local function handle_end_entity(str) report_xml("error in entity, %a found without ending %a",str,";") return str end local function handle_crap_error(chr) report_xml("error in parsing, unexpected %a found ",chr) add_text(chr) return chr end local space = S(' \r\n\t') local open = P('<') local close = P('>') local squote = S("'") local dquote = S('"') local equal = P('=') local slash = P('/') local colon = P(':') local semicolon = P(';') local ampersand = P('&') local valid = R('az', 'AZ', '09') + S('_-.') local name_yes = C(valid^1) * colon * C(valid^1) local name_nop = C(P(true)) * C(valid^1) local name = name_yes + name_nop local utfbom = lpegpatterns.utfbom -- no capture local spacing = C(space^0) ----- entitycontent = (1-open-semicolon)^0 local anyentitycontent = (1-open-semicolon-space-close-ampersand)^0 local hexentitycontent = R("AF","af","09")^0 local decentitycontent = R("09")^0 local parsedentity = P("#")/"" * ( P("x")/"" * (hexentitycontent/handle_hex_entity) + (decentitycontent/handle_dec_entity) ) + (anyentitycontent/handle_any_entity) ----- entity = ampersand/"" * parsedentity * ( (semicolon/"") + #(P(1)/handle_end_entity)) local entity = (ampersand/"") * parsedentity * (semicolon/"") + ampersand * (anyentitycontent / handle_end_entity) local text_unparsed = C((1-open)^1) local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1) local somespace = space^1 local optionalspace = space^0 ----- value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote) -- ampersand and < also invalid in value local value = (squote * Cs((entity + (1 - squote))^0) * squote) + (dquote * Cs((entity + (1 - dquote))^0) * dquote) -- ampersand and < also invalid in value local endofattributes = slash * close + close -- recovery of flacky html local whatever = space * name * optionalspace * equal ----- wrongvalue = C(P(1-whatever-close)^1 + P(1-close)^1) / attribute_value_error ----- wrongvalue = C(P(1-whatever-endofattributes)^1 + P(1-endofattributes)^1) / attribute_value_error ----- wrongvalue = C(P(1-space-endofattributes)^1) / attribute_value_error local wrongvalue = Cs(P(entity + (1-space-endofattributes))^1) / attribute_value_error local attributevalue = value + wrongvalue local attribute = (somespace * name * optionalspace * equal * optionalspace * attributevalue) / add_attribute ----- attributes = (attribute)^0 local attributes = (attribute + somespace^-1 * (((1-endofattributes)^1)/attribute_specification_error))^0 local parsedtext = text_parsed / add_text local unparsedtext = text_unparsed / add_text local balanced = P { "[" * ((1 - S"[]") + V(1))^0 * "]" } -- taken from lpeg manual, () example local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty local beginelement = (spacing * open * name * attributes * optionalspace * close) / add_begin local endelement = (spacing * open * slash * name * optionalspace * close) / add_end -- todo: combine the opens in: local begincomment = open * P("!--") local endcomment = P("--") * close local begininstruction = open * P("?") local endinstruction = P("?") * close local begincdata = open * P("![CDATA[") local endcdata = P("]]") * close local someinstruction = C((1 - endinstruction)^0) local somecomment = C((1 - endcomment )^0) local somecdata = C((1 - endcdata )^0) local function normalentity(k,v ) entities[k] = v end local function systementity(k,v,n) entities[k] = v end local function publicentity(k,v,n) entities[k] = v end -- todo: separate dtd parser local begindoctype = open * P("!DOCTYPE") local enddoctype = close local beginset = P("[") local endset = P("]") local doctypename = C((1-somespace-close)^0) local elementdoctype = optionalspace * P(" & cleanup = settings.text_cleanup entities = settings.entities or { } -- if utfize == nil then settings.utfize_entities = true utfize = true end if resolve_predefined == nil then settings.resolve_predefined_entities = true resolve_predefined = true end -- stack, top, at, xmlns, errorstr = { }, { }, { }, { }, nil acache, hcache, dcache = { }, { }, { } -- not stored reported_attribute_errors = { } if settings.parent_root then mt = getmetatable(settings.parent_root) else initialize_mt(top) end stack[#stack+1] = top top.dt = { } dt = top.dt if not data or data == "" then errorstr = "empty xml file" elseif utfize or resolve then if lpegmatch(grammar_parsed_text,data) then -- errorstr = "" can be set! else errorstr = "invalid xml file - parsed text" end elseif type(data) == "string" then if lpegmatch(grammar_unparsed_text,data) then errorstr = "" else errorstr = "invalid xml file - unparsed text" end else errorstr = "invalid xml file - no text at all" end local result if errorstr and errorstr ~= "" then result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={ }, er = true } } } setmetatable(result, mt) setmetatable(result.dt[1], mt) setmetatable(stack, mt) local errorhandler = settings.error_handler if errorhandler == false then -- no error message else errorhandler = errorhandler or xml.errorhandler if errorhandler then local currentresource = settings.currentresource if currentresource and currentresource ~= "" then xml.errorhandler(formatters["load error in [%s]: %s"](currentresource,errorstr)) else xml.errorhandler(formatters["load error: %s"](errorstr)) end end end else result = stack[1] end if not settings.no_root then result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={ }, entities = entities, settings = settings } setmetatable(result, mt) local rdt = result.dt for k=1,#rdt do local v = rdt[k] if type(v) == "table" and not v.special then -- always table -) result.ri = k -- rootindex v.__p__ = result -- new, experiment, else we cannot go back to settings, we need to test this ! break end end end if errorstr and errorstr ~= "" then result.error = true else errorstr = nil end result.statistics = { errormessage = errorstr, entities = { decimals = dcache, hexadecimals = hcache, names = acache, } } strip, utfize, resolve, resolve_predefined = nil, nil, nil, nil unify_predefined, cleanup, entities = nil, nil, nil stack, top, at, xmlns, errorstr = nil, nil, nil, nil, nil acache, hcache, dcache = nil, nil, nil reported_attribute_errors, mt, errorhandler = nil, nil, nil return result end -- Because we can have a crash (stack issues) with faulty xml, we wrap this one -- in a protector: local function xmlconvert(data,settings) local ok, result = pcall(function() return _xmlconvert_(data,settings) end) if ok then return result else return _xmlconvert_("",settings) end end xml.convert = xmlconvert function xml.inheritedconvert(data,xmldata) -- xmldata is parent local settings = xmldata.settings if settings then settings.parent_root = xmldata -- to be tested end -- settings.no_root = true local xc = xmlconvert(data,settings) -- hm, we might need to locate settings -- xc.settings = nil -- xc.entities = nil -- xc.special = nil -- xc.ri = nil -- print(xc.tg) return xc end --[[ldx--

Packaging data in an xml like table is done with the following function. Maybe it will go away (when not used).

--ldx]]-- function xml.is_valid(root) return root and root.dt and root.dt[1] and type(root.dt[1]) == "table" and not root.dt[1].er end function xml.package(tag,attributes,data) local ns, tg = match(tag,"^(.-):?([^:]+)$") local t = { ns = ns, tg = tg, dt = data or "", at = attributes or {} } setmetatable(t, mt) return t end function xml.is_valid(root) return root and not root.error end xml.errorhandler = report_xml --[[ldx--

We cannot load an from a filehandle so we need to load the whole file first. The function accepts a string representing a filename or a file handle.

--ldx]]-- function xml.load(filename,settings) local data = "" if type(filename) == "string" then -- local data = io.loaddata(filename) - -todo: check type in io.loaddata local f = io.open(filename,'r') -- why not 'rb' if f then data = f:read("*all") -- io.readall(f) ... only makes sense for large files f:close() end elseif filename then -- filehandle data = filename:read("*all") -- io.readall(f) ... only makes sense for large files end if settings then settings.currentresource = filename local result = xmlconvert(data,settings) settings.currentresource = nil return result else return xmlconvert(data,{ currentresource = filename }) end end --[[ldx--

When we inject new elements, we need to convert strings to valid trees, which is what the next function does.

--ldx]]-- local no_root = { no_root = true } function xml.toxml(data) if type(data) == "string" then local root = { xmlconvert(data,no_root) } return (#root > 1 and root) or root[1] else return data end end --[[ldx--

For copying a tree we use a dedicated function instead of the generic table copier. Since we know what we're dealing with we can speed up things a bit. The second argument is not to be used!

--ldx]]-- local function copy(old,tables) if old then tables = tables or { } local new = { } if not tables[old] then tables[old] = new end for k,v in next, old do new[k] = (type(v) == "table" and (tables[v] or copy(v, tables))) or v end local mt = getmetatable(old) if mt then setmetatable(new,mt) end return new else return { } end end xml.copy = copy --[[ldx--

In serializing the tree or parts of the tree is a major actitivity which is why the following function is pretty optimized resulting in a few more lines of code than needed. The variant that uses the formatting function for all components is about 15% slower than the concatinating alternative.

--ldx]]-- -- todo: add when not present function xml.checkbom(root) -- can be made faster if root.ri then local dt = root.dt for k=1,#dt do local v = dt[k] if type(v) == "table" and v.special and v.tg == "@pi@" and find(v.dt[1],"xml.*version=") then return end end insert(dt, 1, { special = true, ns = "", tg = "@pi@", dt = { "xml version='1.0' standalone='yes'" } } ) insert(dt, 2, "\n" ) end end --[[ldx--

At the cost of some 25% runtime overhead you can first convert the tree to a string and then handle the lot.

--ldx]]-- -- new experimental reorganized serialize local f_attribute = formatters['%s=%q'] local function verbose_element(e,handlers,escape) -- options local handle = handlers.handle local serialize = handlers.serialize local ens, etg, eat, edt, ern = e.ns, e.tg, e.at, e.dt, e.rn local ats = eat and next(eat) and { } if ats then local n = 0 for k,v in next, eat do n = n + 1 ats[n] = f_attribute(k,escaped(v)) end end if ern and trace_entities and ern ~= ens then ens = ern end if ens ~= "" then if edt and #edt > 0 then if ats then handle("<",ens,":",etg," ",concat(ats," "),">") else handle("<",ens,":",etg,">") end for i=1,#edt do local e = edt[i] if type(e) == "string" then handle(escaped(e)) else serialize(e,handlers) end end handle("") else if ats then handle("<",ens,":",etg," ",concat(ats," "),"/>") else handle("<",ens,":",etg,"/>") end end else if edt and #edt > 0 then if ats then handle("<",etg," ",concat(ats," "),">") else handle("<",etg,">") end for i=1,#edt do local e = edt[i] if type(e) == "string" then handle(escaped(e)) -- option: hexify escaped entities else serialize(e,handlers) end end handle("") else if ats then handle("<",etg," ",concat(ats," "),"/>") else handle("<",etg,"/>") end end end end local function verbose_pi(e,handlers) handlers.handle("") end local function verbose_comment(e,handlers) handlers.handle("") end local function verbose_cdata(e,handlers) handlers.handle("") end local function verbose_doctype(e,handlers) handlers.handle("") end local function verbose_root(e,handlers) handlers.serialize(e.dt,handlers) end local function verbose_text(e,handlers) handlers.handle(escaped(e)) end local function verbose_document(e,handlers) local serialize = handlers.serialize local functions = handlers.functions for i=1,#e do local ei = e[i] if type(ei) == "string" then functions["@tx@"](ei,handlers) else serialize(ei,handlers) end end end local function serialize(e,handlers,...) if e then local initialize = handlers.initialize local finalize = handlers.finalize local functions = handlers.functions if initialize then local state = initialize(...) if not state == true then return state end end local etg = e.tg if etg then (functions[etg] or functions["@el@"])(e,handlers) -- elseif type(e) == "string" then -- functions["@tx@"](e,handlers) else functions["@dc@"](e,handlers) -- dc ? end if finalize then return finalize() end end end local function xserialize(e,handlers) local functions = handlers.functions local etg = e.tg if etg then (functions[etg] or functions["@el@"])(e,handlers) -- elseif type(e) == "string" then -- functions["@tx@"](e,handlers) else functions["@dc@"](e,handlers) end end local handlers = { } local function newhandlers(settings) local t = table.copy(handlers[settings and settings.parent or "verbose"] or { }) -- merge if settings then for k,v in next, settings do if type(v) == "table" then local tk = t[k] if not tk then tk = { } t[k] = tk end for kk,vv in next, v do tk[kk] = vv end else t[k] = v end end if settings.name then handlers[settings.name] = t end end utilities.storage.mark(t) return t end local nofunction = function() end function xml.sethandlersfunction(handler,name,fnc) handler.functions[name] = fnc or nofunction end function xml.gethandlersfunction(handler,name) return handler.functions[name] end function xml.gethandlers(name) return handlers[name] end newhandlers { name = "verbose", initialize = false, -- faster than nil and mt lookup finalize = false, -- faster than nil and mt lookup serialize = xserialize, handle = print, functions = { ["@dc@"] = verbose_document, ["@dt@"] = verbose_doctype, ["@rt@"] = verbose_root, ["@el@"] = verbose_element, ["@pi@"] = verbose_pi, ["@cm@"] = verbose_comment, ["@cd@"] = verbose_cdata, ["@tx@"] = verbose_text, } } --[[ldx--

How you deal with saving data depends on your preferences. For a 40 MB database file the timing on a 2.3 Core Duo are as follows (time in seconds):

1.3 : load data from file to string 6.1 : convert string into tree 5.3 : saving in file using xmlsave 6.8 : converting to string using xml.tostring 3.6 : saving converted string in file

Beware, these were timing with the old routine but measurements will not be that much different I guess.

--ldx]]-- -- maybe this will move to lxml-xml local result local xmlfilehandler = newhandlers { name = "file", initialize = function(name) result = io.open(name,"wb") return result end, finalize = function() result:close() return true end, handle = function(...) result:write(...) end, } -- no checking on writeability here but not faster either -- -- local xmlfilehandler = newhandlers { -- initialize = function(name) -- io.output(name,"wb") -- return true -- end, -- finalize = function() -- io.close() -- return true -- end, -- handle = io.write, -- } function xml.save(root,name) serialize(root,xmlfilehandler,name) end local result local xmlstringhandler = newhandlers { name = "string", initialize = function() result = { } return result end, finalize = function() return concat(result) end, handle = function(...) result[#result+1] = concat { ... } end, } local function xmltostring(root) -- 25% overhead due to collecting if not root then return "" elseif type(root) == "string" then return root else -- if next(root) then -- next is faster than type (and >0 test) return serialize(root,xmlstringhandler) or "" end end local function __tostring(root) -- inline return (root and xmltostring(root)) or "" end initialize_mt = function(root) -- redefinition mt = { __tostring = __tostring, __index = root } end xml.defaulthandlers = handlers xml.newhandlers = newhandlers xml.serialize = serialize xml.tostring = xmltostring --[[ldx--

The next function operated on the content only and needs a handle function that accepts a string.

--ldx]]-- local function xmlstring(e,handle) if not handle or (e.special and e.tg ~= "@rt@") then -- nothing elseif e.tg then local edt = e.dt if edt then for i=1,#edt do xmlstring(edt[i],handle) end end else handle(e) end end xml.string = xmlstring --[[ldx--

A few helpers:

--ldx]]-- --~ xmlsetproperty(root,"settings",settings) function xml.settings(e) while e do local s = e.settings if s then return s else e = e.__p__ end end return nil end function xml.root(e) local r = e while e do e = e.__p__ if e then r = e end end return r end function xml.parent(root) return root.__p__ end function xml.body(root) return root.ri and root.dt[root.ri] or root -- not ok yet end function xml.name(root) if not root then return "" end local ns = root.ns local tg = root.tg if ns == "" then return tg else return ns .. ":" .. tg end end --[[ldx--

The next helper erases an element but keeps the table as it is, and since empty strings are not serialized (effectively) it does not harm. Copying the table would take more time. Usage:

--ldx]]-- function xml.erase(dt,k) if dt then if k then dt[k] = "" else for k=1,#dt do dt[1] = { "" } end end end end --[[ldx--

The next helper assigns a tree (or string). Usage:

dt[k] = xml.assign(root) or xml.assign(dt,k,root) --ldx]]-- function xml.assign(dt,k,root) if dt and k then dt[k] = type(root) == "table" and xml.body(root) or root return dt[k] else return xml.body(root) end end -- the following helpers may move --[[ldx--

The next helper assigns a tree (or string). Usage:

xml.tocdata(e) xml.tocdata(e,"error") --ldx]]-- function xml.tocdata(e,wrapper) -- a few more in the aux module local whatever = type(e) == "table" and xmltostring(e.dt) or e or "" if wrapper then whatever = formatters["<%s>%s"](wrapper,whatever,wrapper) end local t = { special = true, ns = "", tg = "@cd@", at = { }, rn = "", dt = { whatever }, __p__ = e } setmetatable(t,getmetatable(e)) e.dt = { t } end function xml.makestandalone(root) if root.ri then local dt = root.dt for k=1,#dt do local v = dt[k] if type(v) == "table" and v.special and v.tg == "@pi@" then local txt = v.dt[1] if find(txt,"xml.*version=") then v.dt[1] = txt .. " standalone='yes'" break end end end end return root end function xml.kind(e) local dt = e and e.dt if dt then local n = #dt if n == 1 then local d = dt[1] if d.special then local tg = d.tg if tg == "@cd@" then return "cdata" elseif tg == "@cm" then return "comment" elseif tg == "@pi@" then return "instruction" elseif tg == "@dt@" then return "declaration" end elseif type(d) == "string" then return "text" end return "element" elseif n > 0 then return "mixed" end end return "empty" end