diff options
author | Hans Hagen <pragma@wxs.nl> | 2009-10-16 16:13:00 +0200 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2009-10-16 16:13:00 +0200 |
commit | 7f9b179ad5be5000f67192f283d20e7120402bd9 (patch) | |
tree | 18f83a8cbfe7fed1c2a6939fb4b2cf10473abbbe /tex/context/base/lxml-tab.lua | |
parent | c878054f6360d50885dbdab96643a8f3ac61c46c (diff) | |
download | context-7f9b179ad5be5000f67192f283d20e7120402bd9.tar.gz |
beta 2009.10.16 16:13
Diffstat (limited to 'tex/context/base/lxml-tab.lua')
-rw-r--r-- | tex/context/base/lxml-tab.lua | 762 |
1 files changed, 467 insertions, 295 deletions
diff --git a/tex/context/base/lxml-tab.lua b/tex/context/base/lxml-tab.lua index b49bf0ecb..faafa4462 100644 --- a/tex/context/base/lxml-tab.lua +++ b/tex/context/base/lxml-tab.lua @@ -10,6 +10,8 @@ if not modules then modules = { } end modules ['lxml-tab'] = { -- stripping spaces from e.g. cont-en.xml saves .2 sec runtime so it's not worth the -- trouble +local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end) + --[[ldx-- <p>The parser used here is inspired by the variant discussed in the lua book, but handles comment and processing instructions, has a different structure, provides @@ -17,18 +19,6 @@ parent access; a first version used different trickery but was less optimized to went this route. First we had a find based parser, now we have an <l n='lpeg'/> based one. The find based parser can be found in l-xml-edu.lua along with other older code.</p> -<p>Expecially the lpath code is experimental, we will support some of xpath, but -only things that make sense for us; as compensation it is possible to hook in your -own functions. Apart from preprocessing content for <l n='context'/> we also need -this module for process management, like handling <l n='ctx'/> and <l n='rlx'/> -files.</p> - -<typing> -a/b/c /*/c -a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n) -a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n) -</typing> - <p>Beware, the interface may change. For instance at, ns, tg, dt may get more verbose names. Once the code is stable we will also remove some tracing and optimize the code.</p> @@ -39,26 +29,9 @@ xml = xml or { } --~ local xml = xml local concat, remove, insert = table.concat, table.remove, table.insert -local type, next, setmetatable, getmetatable = type, next, setmetatable, getmetatable +local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber local format, lower, find = string.format, string.lower, string.find - ---[[ldx-- -<p>This module can be used stand alone but also inside <l n='mkiv'/> in -which case it hooks into the tracker code. Therefore we provide a few -functions that set the tracers.</p> ---ldx]]-- - -local trace_remap = false - -if trackers then - trackers.register("xml.remap", function(v) trace_remap = v end) -end - -function xml.settrace(str,value) - if str == "remap" then - trace_remap = value or false - end -end +local utfchar = unicode.utf8.char --[[ldx-- <p>First a hack to enable namespace resolving. A namespace is characterized by @@ -165,17 +138,16 @@ element.</p> </typing> --ldx]]-- -xml.strip_cm_and_dt = false -- an extra global flag, in case we have many includes - -- not just one big nested table capture (lpeg overflow) local nsremap, resolvens = xml.xmlns, xml.resolvens local stack, top, dt, at, xmlns, errorstr, entities = {}, {}, {}, {}, {}, nil, {} +local strip, cleanup, utfize, resolve = false, false, false, false -local mt = { __tostring = xml.text } +local mt = { } -function initialize_mt(root) +function initialize_mt(root) -- we will make a xml.new that then sets the mt as field mt = { __tostring = xml.text, __index = root } end @@ -187,13 +159,6 @@ function xml.check_error(top,toclose) return "" end -local strip = false -local cleanup = false - -function xml.set_text_cleanup(fnc) - cleanup = fnc -end - local function add_attribute(namespace,tag,value) if cleanup and #value > 0 then value = cleanup(value) -- new @@ -209,6 +174,22 @@ local function add_attribute(namespace,tag,value) end end +local function add_empty(spacing, namespace, tag) + if #spacing > 0 then + dt[#dt+1] = spacing + end + local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace + top = stack[#stack] + dt = top.dt + local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top } + dt[#dt+1] = t + setmetatable(t, mt) + if at.xmlns then + remove(xmlns) + end + at = { } +end + local function add_begin(spacing, namespace, tag) if #spacing > 0 then dt[#dt+1] = spacing @@ -234,28 +215,12 @@ local function add_end(spacing, namespace, tag) end dt = top.dt dt[#dt+1] = toclose - dt[0] = top + -- dt[0] = top -- nasty circular reference when serializing table if toclose.at.xmlns then remove(xmlns) end end -local function add_empty(spacing, namespace, tag) - if #spacing > 0 then - dt[#dt+1] = spacing - end - local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace - top = stack[#stack] - dt = top.dt - local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top } - dt[#dt+1] = t - setmetatable(t, mt) - if at.xmlns then - remove(xmlns) - end - at = { } -end - local function add_text(text) if cleanup and #text > 0 then dt[#dt+1] = cleanup(text) @@ -279,7 +244,109 @@ local function set_message(txt) errorstr = "garbage at the end of the file: " .. gsub(txt,"([ \n\r\t]*)","") end -local P, S, R, C, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V +local reported_attribute_errors = { } + +local function attribute_value_error(str) + if not reported_attribute_errors[str] then + logs.report("xml","invalid attribute value: %q",str) + reported_attribute_errors[str] = true + at._error_ = str + end + return str +end +local function attribute_specification_error(str) + if not reported_attribute_errors[str] then + logs.report("xml","invalid attribute specification: %q",str) + reported_attribute_errors[str] = true + at._error_ = str + end + return str +end + +local dcache, hcache, acache = { }, { }, { } + +function xml.unknown_dec_entity_format(str) return format("&%s;", str) end +function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end +function xml.unknown_any_entity_format(str) return format("&%s;", str) end + +local function handle_hex_entity(str) + local h = hcache[str] + if not h then + if utfize then + local n = tonumber(str,16) + h = (n and utfchar(n)) or xml.unknown_hex_entity_format(str) or "" + if not n then + logs.report("xml","utfize, ignoring hex entity &#x%s;",str) + elseif trace_entities then + logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,c) + end + else + if trace_entities then + logs.report("xml","found entity &#x%s;",str) + end + h = "&#" .. str .. ";" + end + hcache[str] = h + end + return h +end +local function handle_dec_entity(str) + local d = dcache[str] + if not d then + if utfize then + local n = tonumber(str) + d = (n and utfchar(n)) or xml.unknown_dec_entity_format(str) or "" + if not n then + logs.report("xml","utfize, ignoring dec entity &#%s;",str) + elseif trace_entities then + logs.report("xml","utfize, converting dec entity &#%s; into %s",str,c) + end + else + if trace_entities then + logs.report("xml","found entity &#%s;",str) + end + d = "&" .. str .. ";" + end + dcache[str] = d + end + return d +end +local function handle_any_entity(str) + if resolve then + local a = entities[str] -- per instance ! + if not a then + a = acache[str] + if not a then + if trace_entities then + logs.report("xml","ignoring entity &%s;",str) + else + -- can be defined in a global mapper and intercepted elsewhere + -- as happens in lxml-tex.lua + end + a = xml.unknown_any_entity_format(str) or "" + acache[str] = a + end + elseif trace_entities then + if not acache[str] then + logs.report("xml","converting entity &%s; into %s",str,r) + acache[str] = a + end + end + return a + else + local a = acache[str] + if not a then + if trace_entities then + logs.report("xml","found entity &%s;",str) + end + a = "&" .. str .. ";" + acache[str] = a + end + return a + end +end + +local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs local space = S(' \r\n\t') local open = P('<') @@ -289,6 +356,7 @@ local dquote = S('"') local equal = P('=') local slash = P('/') local colon = P(':') +local semicolon = P(';') local ampersand = P('&') local valid = R('az', 'AZ', '09') + S('_-.') local name_yes = C(valid^1) * colon * C(valid^1) @@ -299,15 +367,36 @@ local utfbom = P('\000\000\254\255') + P('\255\254\000\000') + P('\255\254') + P('\254\255') + P('\239\187\191') -- no capture local spacing = C(space^0) -local justtext = C((1-open)^1) + +local entitycontent = (1-open-semicolon)^0 +local entity = ampersand/"" * ( + P("#")/"" * ( + P("x")/"" * (entitycontent/handle_hex_entity) + + (entitycontent/handle_dec_entity) + ) + (entitycontent/handle_any_entity) + ) * (semicolon/"") + +local text_unparsed = C((1-open)^1) +local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1) + local somespace = space^1 local optionalspace = space^0 local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote) -- ampersand and < also invalid in value -local attribute = (somespace * name * optionalspace * equal * optionalspace * value) / add_attribute -local attributes = attribute^0 -local text = justtext / add_text +local whatever = space * name * optionalspace * equal +local wrongvalue = C(P(1-whatever-close)^1 + P(1-close)^1) / attribute_value_error + +local attributevalue = value + wrongvalue + +local attribute = (somespace * name * optionalspace * equal * optionalspace * attributevalue) / add_attribute +----- attributes = (attribute)^0 + +local endofattributes = slash * close + close -- recovery of flacky html +local attributes = (attribute + somespace^-1 * (((1-endofattributes)^1)/attribute_specification_error))^0 + +local parsedtext = text_parsed / add_text +local unparsedtext = text_unparsed / add_text local balanced = P { "[" * ((1 - S"[]") + V(1))^0 * "]" } -- taken from lpeg manual, () example local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty @@ -360,25 +449,34 @@ local doctype = (spacing * begindoctype * somedoctype * enddoct -- local cdata = (lpeg.Cc("@cd@") * spacing * begincdata * somecdata * endcdata ) / add_special -- local doctype = (lpeg.Cc("@dt@") * spacing * begindoctype * somedoctype * enddoctype ) / add_special -local trailer = space^0 * (justtext/set_message)^0 +local trailer = space^0 * (text_unparsed/set_message)^0 -- comment + emptyelement + text + cdata + instruction + V("parent"), -- 6.5 seconds on 40 MB database file -- text + comment + emptyelement + cdata + instruction + V("parent"), -- 5.8 -- text + V("parent") + emptyelement + comment + cdata + instruction, -- 5.5 -local grammar = P { "preamble", +local grammar_parsed_text = P { "preamble", preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer, parent = beginelement * V("children")^0 * endelement, - children = text + V("parent") + emptyelement + comment + cdata + instruction, + children = parsedtext + V("parent") + emptyelement + comment + cdata + instruction, } --- todo: xml.new + properties like entities and strip and such (store in root) +local grammar_unparsed_text = P { "preamble", + preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer, + parent = beginelement * V("children")^0 * endelement, + children = unparsedtext + V("parent") + emptyelement + comment + cdata + instruction, +} -function xml.convert(data, no_root, strip_cm_and_dt, given_entities, parent_root) -- maybe use table met k/v (given_entities may disapear) - strip = strip_cm_and_dt or xml.strip_cm_and_dt - stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, given_entities or {} - if parent_root then - mt = getmetatable(parent_root) +local function xmlconvert(data, settings) + settings = settings or { } -- no_root strip_cm_and_dt given_entities parent_root error_handler + strip = settings.strip_cm_and_dt + utfize = settings.utfize_entities + resolve = settings.resolve_entities + cleanup = settings.text_cleanup + stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, settings.entities or {} + reported_attribute_errors = { } + if settings.parent_root then + mt = getmetatable(settings.parent_root) else initialize_mt(top) end @@ -387,20 +485,36 @@ function xml.convert(data, no_root, strip_cm_and_dt, given_entities, parent_root dt = top.dt if not data or data == "" then errorstr = "empty xml file" - elseif not grammar:match(data) then - errorstr = "invalid xml file" + elseif utfize or resolve then + if grammar_parsed_text:match(data) then + errorstr = "" + else + errorstr = "invalid xml file - parsed text" + end else - errorstr = "" + if grammar_unparsed_text:match(data) then + errorstr = "" + else + errorstr = "invalid xml file - unparsed text" + end end if errorstr and errorstr ~= "" then - result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } }, error = true } + result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } } } setmetatable(stack, mt) - if xml.error_handler then xml.error_handler("load",errorstr) end + local error_handler = settings.error_handler + if error_handler == false then + -- no error message + else + error_handler = error_handler or xml.error_handler + if error_handler then + xml.error_handler("load",errorstr) + end + end else result = stack[1] end - if not no_root then - result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities } + if not settings.no_root then + result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities, settings = settings } setmetatable(result, mt) local rdt = result.dt for k=1,#rdt do @@ -411,9 +525,14 @@ function xml.convert(data, no_root, strip_cm_and_dt, given_entities, parent_root end end end + if errorstr and errorstr ~= "" then + result.error = true + end return result end +xml.convert = xmlconvert + --[[ldx-- <p>Packaging data in an xml like table is done with the following function. Maybe it will go away (when not used).</p> @@ -446,16 +565,16 @@ function xml.load(filename) if type(filename) == "string" then local f = io.open(filename,'r') if f then - local root = xml.convert(f:read("*all")) + local root = xmlconvert(f:read("*all")) f:close() return root else - return xml.convert("") + return xmlconvert("") end elseif filename then -- filehandle - return xml.convert(filename:read("*all")) + return xmlconvert(filename:read("*all")) else - return xml.convert("") + return xmlconvert("") end end @@ -464,9 +583,11 @@ end valid trees, which is what the next function does.</p> --ldx]]-- +local no_root = { no_root = true } + function xml.toxml(data) if type(data) == "string" then - local root = { xml.convert(data,true) } + local root = { xmlconvert(data,no_root) } return (#root > 1 and root) or root[1] else return data @@ -511,222 +632,305 @@ alternative.</p> -- todo: add <?xml version='1.0' standalone='yes'?> when not present -local fallbackhandle = (tex and tex.sprint) or io.write - -local serializer - -function xml.setserializer(f) - serializer = f -end - -local function serialize(e, handle, textconverter, attributeconverter, specialconverter, nocommands) - if not e then - return - elseif not nocommands then - local ec = e.command - if ec ~= nil then -- we can have all kind of types - if e.special then - local etg, edt = e.tg, e.dt - local spc = specialconverter and specialconverter[etg] - if spc then - local result = spc(edt[1]) - if result then - handle(result) - return - else - -- no need to handle any further - end - end - end - if serializer then - serializer(e,ec) - return +function xml.checkbom(root) -- can be made faster + if root.ri then + local dt, found = root.dt, false + for k=1,#dt do + local v = dt[k] + if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then + found = true + break end end + if not found then + insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } ) + insert(dt, 2, "\n" ) + end end - handle = handle or fallbackhandle - local etg = e.tg - if etg then - if e.special then - local edt = e.dt - local spc = specialconverter and specialconverter[etg] - if spc then - local result = spc(edt[1]) - if result then - handle(result) +end + +--[[ldx-- +<p>At the cost of some 25% runtime overhead you can first convert the tree to a string +and then handle the lot.</p> +--ldx]]-- + +-- new experimental reorganized serialize + +local function verbose_element(e,handlers) + local handle = handlers.handle + local serialize = handlers.serialize + local ens, etg, eat, edt, ern = e.ns, e.tg, e.at, e.dt, e.rn + local ats = eat and next(eat) and { } + if ats then + for k,v in next, eat do + ats[#ats+1] = format('%s=%q',k,v) + end + end + if ern and trace_remap and ern ~= ens then + ens = ern + end + if ens ~= "" then + if edt and #edt > 0 then + if ats then + handle("<",ens,":",etg," ",concat(ats," "),">") + else + handle("<",ens,":",etg,">") + end + for i=1,#edt do + local e = edt[i] + if type(e) == "string" then + handle(e) else - -- no need to handle any further + serialize(e,handlers) end - elseif etg == "@pi@" then - -- handle(format("<?%s?>",edt[1])) - handle("<?" .. edt[1] .. "?>") - elseif etg == "@cm@" then - -- handle(format("<!--%s-->",edt[1])) - handle("<!--" .. edt[1] .. "-->") - elseif etg == "@cd@" then - -- handle(format("<![CDATA[%s]]>",edt[1])) - handle("<![CDATA[" .. edt[1] .. "]]>") - elseif etg == "@dt@" then - -- handle(format("<!DOCTYPE %s>",edt[1])) - handle("<!DOCTYPE " .. edt[1] .. ">") - elseif etg == "@rt@" then - serialize(edt,handle,textconverter,attributeconverter,specialconverter,nocommands) end + handle("</",ens,":",etg,">") else - local ens, eat, edt, ern = e.ns, e.at, e.dt, e.rn - local ats = eat and next(eat) and { } -- type test maybe faster if ats then - if attributeconverter then - for k,v in next, eat do - ats[#ats+1] = format('%s=%q',k,attributeconverter(v)) - end - else - for k,v in next, eat do - ats[#ats+1] = format('%s=%q',k,v) - end - end + handle("<",ens,":",etg," ",concat(ats," "),"/>") + else + handle("<",ens,":",etg,"/>") end - if ern and trace_remap and ern ~= ens then - ens = ern + end + else + if edt and #edt > 0 then + if ats then + handle("<",etg," ",concat(ats," "),">") + else + handle("<",etg,">") end - if ens ~= "" then - if edt and #edt > 0 then - if ats then - -- handle(format("<%s:%s %s>",ens,etg,concat(ats," "))) - handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. ">") - else - -- handle(format("<%s:%s>",ens,etg)) - handle("<" .. ens .. ":" .. etg .. ">") - end - for i=1,#edt do - local e = edt[i] - if type(e) == "string" then - if textconverter then - handle(textconverter(e)) - else - handle(e) - end - else - serialize(e,handle,textconverter,attributeconverter,specialconverter,nocommands) - end - end - -- handle(format("</%s:%s>",ens,etg)) - handle("</" .. ens .. ":" .. etg .. ">") + for i=1,#edt do + local ei = edt[i] + if type(ei) == "string" then + handle(ei) else - if ats then - -- handle(format("<%s:%s %s/>",ens,etg,concat(ats," "))) - handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. "/>") - else - -- handle(format("<%s:%s/>",ens,etg)) - handle("<" .. ens .. ":" .. etg .. "/>") - end + serialize(ei,handlers) end + end + handle("</",etg,">") + else + if ats then + handle("<",etg," ",concat(ats," "),"/>") else - if edt and #edt > 0 then - if ats then - -- handle(format("<%s %s>",etg,concat(ats," "))) - handle("<" .. etg .. " " .. concat(ats," ") .. ">") - else - -- handle(format("<%s>",etg)) - handle("<" .. etg .. ">") - end - for i=1,#edt do - local ei = edt[i] - if type(ei) == "string" then - if textconverter then - handle(textconverter(ei)) - else - handle(ei) - end - else - serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands) - end - end - -- handle(format("</%s>",etg)) - handle("</" .. etg .. ">") - else - if ats then - -- handle(format("<%s %s/>",etg,concat(ats," "))) - handle("<" .. etg .. " " .. concat(ats," ") .. "/>") - else - -- handle(format("<%s/>",etg)) - handle("<" .. etg .. "/>") - end - end + handle("<",etg,"/>") end end - elseif type(e) == "string" then - if textconverter then - handle(textconverter(e)) + end +end + +local function verbose_pi(e,handlers) + handlers.handle("<?",e.dt[1],"?>") +end + +local function verbose_comment(e,handlers) + handlers.handle("<!--",e.dt[1],"-->") +end + +local function verbose_cdata(e,handlers) + handlers.handle("<![CDATA[", e.dt[1],"]]>") +end + +local function verbose_doctype(e,handlers) + handlers.handle("<!DOCTYPE ",e.dt[1],">") +end + +local function verbose_root(e,handlers) + handlers.serialize(e.dt,handlers) +end + +local function verbose_text(e,handlers) + handlers.handle(e) +end + +local function verbose_document(e,handlers) + local serialize = handlers.serialize + local functions = handlers.functions + for i=1,#e do + local ei = e[i] + if type(ei) == "string" then + functions["@tx@"](ei,handlers) else - handle(e) + serialize(ei,handlers) end - else - for i=1,#e do - local ei = e[i] - if type(ei) == "string" then - if textconverter then - handle(textconverter(ei)) - else - handle(ei) - end - else - serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands) - end + end +end + +local function serialize(e,handlers,...) + local initialize = handlers.initialize + local finalize = handlers.finalize + local functions = handlers.functions + if initialize then + local state = initialize(...) + if not state == true then + return state end end + local etg = e.tg + if etg then + (functions[etg] or functions["@el@"])(e,handlers) + -- elseif type(e) == "string" then + -- functions["@tx@"](e,handlers) + else + functions["@dc@"](e,handlers) + end + if finalize then + return finalize() + end end -xml.serialize = serialize +local function xserialize(e,handlers) + local functions = handlers.functions + local etg = e.tg + if etg then + (functions[etg] or functions["@el@"])(e,handlers) + -- elseif type(e) == "string" then + -- functions["@tx@"](e,handlers) + else + functions["@dc@"](e,handlers) + end +end -function xml.checkbom(root) -- can be made faster - if root.ri then - local dt, found = root.dt, false - for k=1,#dt do - local v = dt[k] - if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then - found = true - break +local handlers = { } + +local function newhandlers(settings) + local t = table.copy(handlers.verbose or { }) -- merge + if settings then + for k,v in next, settings do + if type(v) == "table" then + tk = t[k] if not tk then tk = { } t[k] = tk end + for kk,vv in next, v do + tk[kk] = vv + end + else + t[k] = v end end - if not found then - insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } ) - insert(dt, 2, "\n" ) + if settings.name then + handlers[settings.name] = t end end + return t +end + +local nofunction = function() end + +function xml.sethandlersfunction(handler,name,fnc) + handler.functions[name] = fnc or nofunction +end + +function xml.gethandlersfunction(handler,name) + return handler.functions[name] end +function xml.gethandlers(name) + return handlers[name] +end + +newhandlers { + name = "verbose", + initialize = false, -- faster than nil and mt lookup + finalize = false, -- faster than nil and mt lookup + serialize = xserialize, + handle = print, + functions = { + ["@dc@"] = verbose_document, + ["@dt@"] = verbose_doctype, + ["@rt@"] = verbose_root, + ["@el@"] = verbose_element, + ["@pi@"] = verbose_pi, + ["@cm@"] = verbose_comment, + ["@cd@"] = verbose_cdata, + ["@tx@"] = verbose_text, + } +} + --[[ldx-- -<p>At the cost of some 25% runtime overhead you can first convert the tree to a string -and then handle the lot.</p> +<p>How you deal with saving data depends on your preferences. For a 40 MB database +file the timing on a 2.3 Core Duo are as follows (time in seconds):</p> + +<lines> +1.3 : load data from file to string +6.1 : convert string into tree +5.3 : saving in file using xmlsave +6.8 : converting to string using xml.tostring +3.6 : saving converted string in file +</lines> + +<p>Beware, these were timing with the old routine but measurements will not be that +much different I guess.</p> --ldx]]-- -function xml.tostring(root) -- 25% overhead due to collecting +-- maybe this will move to lxml-xml + +local result + +local xmlfilehandler = newhandlers { + name = "file", + initialize = function(name) result = io.open(name,"wb") return result end, + finalize = function() result:close() return true end, + handle = function(...) result:write(...) end, +} + +-- no checking on writeability here but not faster either +-- +-- local xmlfilehandler = newhandlers { +-- initialize = function(name) io.output(name,"wb") return true end, +-- finalize = function() io.close() return true end, +-- handle = io.write, +-- } + + +function xml.save(root,name) + serialize(root,xmlfilehandler,name) +end + +local result + +local xmlstringhandler = newhandlers { + name = "string", + initialize = function() result = { } return result end, + finalize = function() return concat(result) end, + handle = function(...) result[#result+1] = concat { ... } end +} + +local function xmltostring(root) -- 25% overhead due to collecting if root then if type(root) == 'string' then return root - elseif next(root) then -- next is faster than type (and >0 test) - local result = { } - serialize(root,function(s) result[#result+1] = s end) -- brrr, slow (direct printing is faster) - return concat(result,"") + else -- if next(root) then -- next is faster than type (and >0 test) + return serialize(root,xmlstringhandler) or "" end end return "" end +local function xmltext(root) -- inline + return (root and xmltostring(root)) or "" +end + +function initialize_mt(root) + mt = { __tostring = xmltext, __index = root } +end + +xml.defaulthandlers = handlers +xml.newhandlers = newhandlers +xml.serialize = serialize +xml.tostring = xmltostring +xml.text = xmltext + --[[ldx-- <p>The next function operated on the content only and needs a handle function that accepts a string.</p> --ldx]]-- -function xml.string(e,handle) +local function xmlstring(e,handle) if not handle or (e.special and e.tg ~= "@rt@") then -- nothing elseif e.tg then local edt = e.dt if edt then for i=1,#edt do - xml.string(edt[i],handle) + xmlstring(edt[i],handle) end end else @@ -734,33 +938,16 @@ function xml.string(e,handle) end end ---[[ldx-- -<p>How you deal with saving data depends on your preferences. For a 40 MB database -file the timing on a 2.3 Core Duo are as follows (time in seconds):</p> - -<lines> -1.3 : load data from file to string -6.1 : convert string into tree -5.3 : saving in file using xmlsave -6.8 : converting to string using xml.tostring -3.6 : saving converted string in file -</lines> - -<p>The save function is given below.</p> ---ldx]]-- - -function xml.save(root,name) - local f = io.open(name,"w") - if f then - xml.serialize(root,function(s) f:write(s) end) - f:close() - end -end +xml.string = xmlstring --[[ldx-- <p>A few helpers:</p> --ldx]]-- +function xml.parent(root) + return root.__p__ +end + function xml.body(root) return (root.ri and root.dt[root.ri]) or root end @@ -773,34 +960,19 @@ function xml.content(root) -- bugged return (root and root.dt and xml.tostring(root.dt)) or "" end -function xml.isempty(root, pattern) - if pattern == "" or pattern == "*" then - pattern = nil - end - if pattern then - -- todo - return false - else - return not root or not root.dt or #root.dt == 0 or root.dt == "" - end -end - --[[ldx-- <p>The next helper erases an element but keeps the table as it is, and since empty strings are not serialized (effectively) it does not harm. Copying the table would take more time. Usage:</p> - -<typing> -dt[k] = xml.empty() or xml.empty(dt,k) -</typing> --ldx]]-- -function xml.empty(dt,k) - if dt and k then - dt[k] = "" - return dt[k] - else - return "" +function xml.erase(dt,k) + if dt then + if k then + dt[k] = "" + else for k=1,#dt do + dt[1] = { "" } + end end end end |