summaryrefslogtreecommitdiff
path: root/tex/context/base/lxml-tab.lua
diff options
context:
space:
mode:
authorHans Hagen <pragma@wxs.nl>2009-10-16 16:13:00 +0200
committerHans Hagen <pragma@wxs.nl>2009-10-16 16:13:00 +0200
commit7f9b179ad5be5000f67192f283d20e7120402bd9 (patch)
tree18f83a8cbfe7fed1c2a6939fb4b2cf10473abbbe /tex/context/base/lxml-tab.lua
parentc878054f6360d50885dbdab96643a8f3ac61c46c (diff)
downloadcontext-7f9b179ad5be5000f67192f283d20e7120402bd9.tar.gz
beta 2009.10.16 16:13
Diffstat (limited to 'tex/context/base/lxml-tab.lua')
-rw-r--r--tex/context/base/lxml-tab.lua762
1 files changed, 467 insertions, 295 deletions
diff --git a/tex/context/base/lxml-tab.lua b/tex/context/base/lxml-tab.lua
index b49bf0ecb..faafa4462 100644
--- a/tex/context/base/lxml-tab.lua
+++ b/tex/context/base/lxml-tab.lua
@@ -10,6 +10,8 @@ if not modules then modules = { } end modules ['lxml-tab'] = {
-- stripping spaces from e.g. cont-en.xml saves .2 sec runtime so it's not worth the
-- trouble
+local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end)
+
--[[ldx--
<p>The parser used here is inspired by the variant discussed in the lua book, but
handles comment and processing instructions, has a different structure, provides
@@ -17,18 +19,6 @@ parent access; a first version used different trickery but was less optimized to
went this route. First we had a find based parser, now we have an <l n='lpeg'/> based one.
The find based parser can be found in l-xml-edu.lua along with other older code.</p>
-<p>Expecially the lpath code is experimental, we will support some of xpath, but
-only things that make sense for us; as compensation it is possible to hook in your
-own functions. Apart from preprocessing content for <l n='context'/> we also need
-this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
-files.</p>
-
-<typing>
-a/b/c /*/c
-a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
-a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
-</typing>
-
<p>Beware, the interface may change. For instance at, ns, tg, dt may get more
verbose names. Once the code is stable we will also remove some tracing and
optimize the code.</p>
@@ -39,26 +29,9 @@ xml = xml or { }
--~ local xml = xml
local concat, remove, insert = table.concat, table.remove, table.insert
-local type, next, setmetatable, getmetatable = type, next, setmetatable, getmetatable
+local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber
local format, lower, find = string.format, string.lower, string.find
-
---[[ldx--
-<p>This module can be used stand alone but also inside <l n='mkiv'/> in
-which case it hooks into the tracker code. Therefore we provide a few
-functions that set the tracers.</p>
---ldx]]--
-
-local trace_remap = false
-
-if trackers then
- trackers.register("xml.remap", function(v) trace_remap = v end)
-end
-
-function xml.settrace(str,value)
- if str == "remap" then
- trace_remap = value or false
- end
-end
+local utfchar = unicode.utf8.char
--[[ldx--
<p>First a hack to enable namespace resolving. A namespace is characterized by
@@ -165,17 +138,16 @@ element.</p>
</typing>
--ldx]]--
-xml.strip_cm_and_dt = false -- an extra global flag, in case we have many includes
-
-- not just one big nested table capture (lpeg overflow)
local nsremap, resolvens = xml.xmlns, xml.resolvens
local stack, top, dt, at, xmlns, errorstr, entities = {}, {}, {}, {}, {}, nil, {}
+local strip, cleanup, utfize, resolve = false, false, false, false
-local mt = { __tostring = xml.text }
+local mt = { }
-function initialize_mt(root)
+function initialize_mt(root) -- we will make a xml.new that then sets the mt as field
mt = { __tostring = xml.text, __index = root }
end
@@ -187,13 +159,6 @@ function xml.check_error(top,toclose)
return ""
end
-local strip = false
-local cleanup = false
-
-function xml.set_text_cleanup(fnc)
- cleanup = fnc
-end
-
local function add_attribute(namespace,tag,value)
if cleanup and #value > 0 then
value = cleanup(value) -- new
@@ -209,6 +174,22 @@ local function add_attribute(namespace,tag,value)
end
end
+local function add_empty(spacing, namespace, tag)
+ if #spacing > 0 then
+ dt[#dt+1] = spacing
+ end
+ local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
+ top = stack[#stack]
+ dt = top.dt
+ local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top }
+ dt[#dt+1] = t
+ setmetatable(t, mt)
+ if at.xmlns then
+ remove(xmlns)
+ end
+ at = { }
+end
+
local function add_begin(spacing, namespace, tag)
if #spacing > 0 then
dt[#dt+1] = spacing
@@ -234,28 +215,12 @@ local function add_end(spacing, namespace, tag)
end
dt = top.dt
dt[#dt+1] = toclose
- dt[0] = top
+ -- dt[0] = top -- nasty circular reference when serializing table
if toclose.at.xmlns then
remove(xmlns)
end
end
-local function add_empty(spacing, namespace, tag)
- if #spacing > 0 then
- dt[#dt+1] = spacing
- end
- local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
- top = stack[#stack]
- dt = top.dt
- local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top }
- dt[#dt+1] = t
- setmetatable(t, mt)
- if at.xmlns then
- remove(xmlns)
- end
- at = { }
-end
-
local function add_text(text)
if cleanup and #text > 0 then
dt[#dt+1] = cleanup(text)
@@ -279,7 +244,109 @@ local function set_message(txt)
errorstr = "garbage at the end of the file: " .. gsub(txt,"([ \n\r\t]*)","")
end
-local P, S, R, C, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V
+local reported_attribute_errors = { }
+
+local function attribute_value_error(str)
+ if not reported_attribute_errors[str] then
+ logs.report("xml","invalid attribute value: %q",str)
+ reported_attribute_errors[str] = true
+ at._error_ = str
+ end
+ return str
+end
+local function attribute_specification_error(str)
+ if not reported_attribute_errors[str] then
+ logs.report("xml","invalid attribute specification: %q",str)
+ reported_attribute_errors[str] = true
+ at._error_ = str
+ end
+ return str
+end
+
+local dcache, hcache, acache = { }, { }, { }
+
+function xml.unknown_dec_entity_format(str) return format("&%s;", str) end
+function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end
+function xml.unknown_any_entity_format(str) return format("&%s;", str) end
+
+local function handle_hex_entity(str)
+ local h = hcache[str]
+ if not h then
+ if utfize then
+ local n = tonumber(str,16)
+ h = (n and utfchar(n)) or xml.unknown_hex_entity_format(str) or ""
+ if not n then
+ logs.report("xml","utfize, ignoring hex entity &#x%s;",str)
+ elseif trace_entities then
+ logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,c)
+ end
+ else
+ if trace_entities then
+ logs.report("xml","found entity &#x%s;",str)
+ end
+ h = "&#" .. str .. ";"
+ end
+ hcache[str] = h
+ end
+ return h
+end
+local function handle_dec_entity(str)
+ local d = dcache[str]
+ if not d then
+ if utfize then
+ local n = tonumber(str)
+ d = (n and utfchar(n)) or xml.unknown_dec_entity_format(str) or ""
+ if not n then
+ logs.report("xml","utfize, ignoring dec entity &#%s;",str)
+ elseif trace_entities then
+ logs.report("xml","utfize, converting dec entity &#%s; into %s",str,c)
+ end
+ else
+ if trace_entities then
+ logs.report("xml","found entity &#%s;",str)
+ end
+ d = "&" .. str .. ";"
+ end
+ dcache[str] = d
+ end
+ return d
+end
+local function handle_any_entity(str)
+ if resolve then
+ local a = entities[str] -- per instance !
+ if not a then
+ a = acache[str]
+ if not a then
+ if trace_entities then
+ logs.report("xml","ignoring entity &%s;",str)
+ else
+ -- can be defined in a global mapper and intercepted elsewhere
+ -- as happens in lxml-tex.lua
+ end
+ a = xml.unknown_any_entity_format(str) or ""
+ acache[str] = a
+ end
+ elseif trace_entities then
+ if not acache[str] then
+ logs.report("xml","converting entity &%s; into %s",str,r)
+ acache[str] = a
+ end
+ end
+ return a
+ else
+ local a = acache[str]
+ if not a then
+ if trace_entities then
+ logs.report("xml","found entity &%s;",str)
+ end
+ a = "&" .. str .. ";"
+ acache[str] = a
+ end
+ return a
+ end
+end
+
+local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs
local space = S(' \r\n\t')
local open = P('<')
@@ -289,6 +356,7 @@ local dquote = S('"')
local equal = P('=')
local slash = P('/')
local colon = P(':')
+local semicolon = P(';')
local ampersand = P('&')
local valid = R('az', 'AZ', '09') + S('_-.')
local name_yes = C(valid^1) * colon * C(valid^1)
@@ -299,15 +367,36 @@ local utfbom = P('\000\000\254\255') + P('\255\254\000\000') +
P('\255\254') + P('\254\255') + P('\239\187\191') -- no capture
local spacing = C(space^0)
-local justtext = C((1-open)^1)
+
+local entitycontent = (1-open-semicolon)^0
+local entity = ampersand/"" * (
+ P("#")/"" * (
+ P("x")/"" * (entitycontent/handle_hex_entity) +
+ (entitycontent/handle_dec_entity)
+ ) + (entitycontent/handle_any_entity)
+ ) * (semicolon/"")
+
+local text_unparsed = C((1-open)^1)
+local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1)
+
local somespace = space^1
local optionalspace = space^0
local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote) -- ampersand and < also invalid in value
-local attribute = (somespace * name * optionalspace * equal * optionalspace * value) / add_attribute
-local attributes = attribute^0
-local text = justtext / add_text
+local whatever = space * name * optionalspace * equal
+local wrongvalue = C(P(1-whatever-close)^1 + P(1-close)^1) / attribute_value_error
+
+local attributevalue = value + wrongvalue
+
+local attribute = (somespace * name * optionalspace * equal * optionalspace * attributevalue) / add_attribute
+----- attributes = (attribute)^0
+
+local endofattributes = slash * close + close -- recovery of flacky html
+local attributes = (attribute + somespace^-1 * (((1-endofattributes)^1)/attribute_specification_error))^0
+
+local parsedtext = text_parsed / add_text
+local unparsedtext = text_unparsed / add_text
local balanced = P { "[" * ((1 - S"[]") + V(1))^0 * "]" } -- taken from lpeg manual, () example
local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty
@@ -360,25 +449,34 @@ local doctype = (spacing * begindoctype * somedoctype * enddoct
-- local cdata = (lpeg.Cc("@cd@") * spacing * begincdata * somecdata * endcdata ) / add_special
-- local doctype = (lpeg.Cc("@dt@") * spacing * begindoctype * somedoctype * enddoctype ) / add_special
-local trailer = space^0 * (justtext/set_message)^0
+local trailer = space^0 * (text_unparsed/set_message)^0
-- comment + emptyelement + text + cdata + instruction + V("parent"), -- 6.5 seconds on 40 MB database file
-- text + comment + emptyelement + cdata + instruction + V("parent"), -- 5.8
-- text + V("parent") + emptyelement + comment + cdata + instruction, -- 5.5
-local grammar = P { "preamble",
+local grammar_parsed_text = P { "preamble",
preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer,
parent = beginelement * V("children")^0 * endelement,
- children = text + V("parent") + emptyelement + comment + cdata + instruction,
+ children = parsedtext + V("parent") + emptyelement + comment + cdata + instruction,
}
--- todo: xml.new + properties like entities and strip and such (store in root)
+local grammar_unparsed_text = P { "preamble",
+ preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer,
+ parent = beginelement * V("children")^0 * endelement,
+ children = unparsedtext + V("parent") + emptyelement + comment + cdata + instruction,
+}
-function xml.convert(data, no_root, strip_cm_and_dt, given_entities, parent_root) -- maybe use table met k/v (given_entities may disapear)
- strip = strip_cm_and_dt or xml.strip_cm_and_dt
- stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, given_entities or {}
- if parent_root then
- mt = getmetatable(parent_root)
+local function xmlconvert(data, settings)
+ settings = settings or { } -- no_root strip_cm_and_dt given_entities parent_root error_handler
+ strip = settings.strip_cm_and_dt
+ utfize = settings.utfize_entities
+ resolve = settings.resolve_entities
+ cleanup = settings.text_cleanup
+ stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, settings.entities or {}
+ reported_attribute_errors = { }
+ if settings.parent_root then
+ mt = getmetatable(settings.parent_root)
else
initialize_mt(top)
end
@@ -387,20 +485,36 @@ function xml.convert(data, no_root, strip_cm_and_dt, given_entities, parent_root
dt = top.dt
if not data or data == "" then
errorstr = "empty xml file"
- elseif not grammar:match(data) then
- errorstr = "invalid xml file"
+ elseif utfize or resolve then
+ if grammar_parsed_text:match(data) then
+ errorstr = ""
+ else
+ errorstr = "invalid xml file - parsed text"
+ end
else
- errorstr = ""
+ if grammar_unparsed_text:match(data) then
+ errorstr = ""
+ else
+ errorstr = "invalid xml file - unparsed text"
+ end
end
if errorstr and errorstr ~= "" then
- result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } }, error = true }
+ result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } } }
setmetatable(stack, mt)
- if xml.error_handler then xml.error_handler("load",errorstr) end
+ local error_handler = settings.error_handler
+ if error_handler == false then
+ -- no error message
+ else
+ error_handler = error_handler or xml.error_handler
+ if error_handler then
+ xml.error_handler("load",errorstr)
+ end
+ end
else
result = stack[1]
end
- if not no_root then
- result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities }
+ if not settings.no_root then
+ result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities, settings = settings }
setmetatable(result, mt)
local rdt = result.dt
for k=1,#rdt do
@@ -411,9 +525,14 @@ function xml.convert(data, no_root, strip_cm_and_dt, given_entities, parent_root
end
end
end
+ if errorstr and errorstr ~= "" then
+ result.error = true
+ end
return result
end
+xml.convert = xmlconvert
+
--[[ldx--
<p>Packaging data in an xml like table is done with the following
function. Maybe it will go away (when not used).</p>
@@ -446,16 +565,16 @@ function xml.load(filename)
if type(filename) == "string" then
local f = io.open(filename,'r')
if f then
- local root = xml.convert(f:read("*all"))
+ local root = xmlconvert(f:read("*all"))
f:close()
return root
else
- return xml.convert("")
+ return xmlconvert("")
end
elseif filename then -- filehandle
- return xml.convert(filename:read("*all"))
+ return xmlconvert(filename:read("*all"))
else
- return xml.convert("")
+ return xmlconvert("")
end
end
@@ -464,9 +583,11 @@ end
valid trees, which is what the next function does.</p>
--ldx]]--
+local no_root = { no_root = true }
+
function xml.toxml(data)
if type(data) == "string" then
- local root = { xml.convert(data,true) }
+ local root = { xmlconvert(data,no_root) }
return (#root > 1 and root) or root[1]
else
return data
@@ -511,222 +632,305 @@ alternative.</p>
-- todo: add <?xml version='1.0' standalone='yes'?> when not present
-local fallbackhandle = (tex and tex.sprint) or io.write
-
-local serializer
-
-function xml.setserializer(f)
- serializer = f
-end
-
-local function serialize(e, handle, textconverter, attributeconverter, specialconverter, nocommands)
- if not e then
- return
- elseif not nocommands then
- local ec = e.command
- if ec ~= nil then -- we can have all kind of types
- if e.special then
- local etg, edt = e.tg, e.dt
- local spc = specialconverter and specialconverter[etg]
- if spc then
- local result = spc(edt[1])
- if result then
- handle(result)
- return
- else
- -- no need to handle any further
- end
- end
- end
- if serializer then
- serializer(e,ec)
- return
+function xml.checkbom(root) -- can be made faster
+ if root.ri then
+ local dt, found = root.dt, false
+ for k=1,#dt do
+ local v = dt[k]
+ if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then
+ found = true
+ break
end
end
+ if not found then
+ insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
+ insert(dt, 2, "\n" )
+ end
end
- handle = handle or fallbackhandle
- local etg = e.tg
- if etg then
- if e.special then
- local edt = e.dt
- local spc = specialconverter and specialconverter[etg]
- if spc then
- local result = spc(edt[1])
- if result then
- handle(result)
+end
+
+--[[ldx--
+<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
+and then handle the lot.</p>
+--ldx]]--
+
+-- new experimental reorganized serialize
+
+local function verbose_element(e,handlers)
+ local handle = handlers.handle
+ local serialize = handlers.serialize
+ local ens, etg, eat, edt, ern = e.ns, e.tg, e.at, e.dt, e.rn
+ local ats = eat and next(eat) and { }
+ if ats then
+ for k,v in next, eat do
+ ats[#ats+1] = format('%s=%q',k,v)
+ end
+ end
+ if ern and trace_remap and ern ~= ens then
+ ens = ern
+ end
+ if ens ~= "" then
+ if edt and #edt > 0 then
+ if ats then
+ handle("<",ens,":",etg," ",concat(ats," "),">")
+ else
+ handle("<",ens,":",etg,">")
+ end
+ for i=1,#edt do
+ local e = edt[i]
+ if type(e) == "string" then
+ handle(e)
else
- -- no need to handle any further
+ serialize(e,handlers)
end
- elseif etg == "@pi@" then
- -- handle(format("<?%s?>",edt[1]))
- handle("<?" .. edt[1] .. "?>")
- elseif etg == "@cm@" then
- -- handle(format("<!--%s-->",edt[1]))
- handle("<!--" .. edt[1] .. "-->")
- elseif etg == "@cd@" then
- -- handle(format("<![CDATA[%s]]>",edt[1]))
- handle("<![CDATA[" .. edt[1] .. "]]>")
- elseif etg == "@dt@" then
- -- handle(format("<!DOCTYPE %s>",edt[1]))
- handle("<!DOCTYPE " .. edt[1] .. ">")
- elseif etg == "@rt@" then
- serialize(edt,handle,textconverter,attributeconverter,specialconverter,nocommands)
end
+ handle("</",ens,":",etg,">")
else
- local ens, eat, edt, ern = e.ns, e.at, e.dt, e.rn
- local ats = eat and next(eat) and { } -- type test maybe faster
if ats then
- if attributeconverter then
- for k,v in next, eat do
- ats[#ats+1] = format('%s=%q',k,attributeconverter(v))
- end
- else
- for k,v in next, eat do
- ats[#ats+1] = format('%s=%q',k,v)
- end
- end
+ handle("<",ens,":",etg," ",concat(ats," "),"/>")
+ else
+ handle("<",ens,":",etg,"/>")
end
- if ern and trace_remap and ern ~= ens then
- ens = ern
+ end
+ else
+ if edt and #edt > 0 then
+ if ats then
+ handle("<",etg," ",concat(ats," "),">")
+ else
+ handle("<",etg,">")
end
- if ens ~= "" then
- if edt and #edt > 0 then
- if ats then
- -- handle(format("<%s:%s %s>",ens,etg,concat(ats," ")))
- handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. ">")
- else
- -- handle(format("<%s:%s>",ens,etg))
- handle("<" .. ens .. ":" .. etg .. ">")
- end
- for i=1,#edt do
- local e = edt[i]
- if type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
- else
- handle(e)
- end
- else
- serialize(e,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
- end
- -- handle(format("</%s:%s>",ens,etg))
- handle("</" .. ens .. ":" .. etg .. ">")
+ for i=1,#edt do
+ local ei = edt[i]
+ if type(ei) == "string" then
+ handle(ei)
else
- if ats then
- -- handle(format("<%s:%s %s/>",ens,etg,concat(ats," ")))
- handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. "/>")
- else
- -- handle(format("<%s:%s/>",ens,etg))
- handle("<" .. ens .. ":" .. etg .. "/>")
- end
+ serialize(ei,handlers)
end
+ end
+ handle("</",etg,">")
+ else
+ if ats then
+ handle("<",etg," ",concat(ats," "),"/>")
else
- if edt and #edt > 0 then
- if ats then
- -- handle(format("<%s %s>",etg,concat(ats," ")))
- handle("<" .. etg .. " " .. concat(ats," ") .. ">")
- else
- -- handle(format("<%s>",etg))
- handle("<" .. etg .. ">")
- end
- for i=1,#edt do
- local ei = edt[i]
- if type(ei) == "string" then
- if textconverter then
- handle(textconverter(ei))
- else
- handle(ei)
- end
- else
- serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
- end
- -- handle(format("</%s>",etg))
- handle("</" .. etg .. ">")
- else
- if ats then
- -- handle(format("<%s %s/>",etg,concat(ats," ")))
- handle("<" .. etg .. " " .. concat(ats," ") .. "/>")
- else
- -- handle(format("<%s/>",etg))
- handle("<" .. etg .. "/>")
- end
- end
+ handle("<",etg,"/>")
end
end
- elseif type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
+ end
+end
+
+local function verbose_pi(e,handlers)
+ handlers.handle("<?",e.dt[1],"?>")
+end
+
+local function verbose_comment(e,handlers)
+ handlers.handle("<!--",e.dt[1],"-->")
+end
+
+local function verbose_cdata(e,handlers)
+ handlers.handle("<![CDATA[", e.dt[1],"]]>")
+end
+
+local function verbose_doctype(e,handlers)
+ handlers.handle("<!DOCTYPE ",e.dt[1],">")
+end
+
+local function verbose_root(e,handlers)
+ handlers.serialize(e.dt,handlers)
+end
+
+local function verbose_text(e,handlers)
+ handlers.handle(e)
+end
+
+local function verbose_document(e,handlers)
+ local serialize = handlers.serialize
+ local functions = handlers.functions
+ for i=1,#e do
+ local ei = e[i]
+ if type(ei) == "string" then
+ functions["@tx@"](ei,handlers)
else
- handle(e)
+ serialize(ei,handlers)
end
- else
- for i=1,#e do
- local ei = e[i]
- if type(ei) == "string" then
- if textconverter then
- handle(textconverter(ei))
- else
- handle(ei)
- end
- else
- serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
+ end
+end
+
+local function serialize(e,handlers,...)
+ local initialize = handlers.initialize
+ local finalize = handlers.finalize
+ local functions = handlers.functions
+ if initialize then
+ local state = initialize(...)
+ if not state == true then
+ return state
end
end
+ local etg = e.tg
+ if etg then
+ (functions[etg] or functions["@el@"])(e,handlers)
+ -- elseif type(e) == "string" then
+ -- functions["@tx@"](e,handlers)
+ else
+ functions["@dc@"](e,handlers)
+ end
+ if finalize then
+ return finalize()
+ end
end
-xml.serialize = serialize
+local function xserialize(e,handlers)
+ local functions = handlers.functions
+ local etg = e.tg
+ if etg then
+ (functions[etg] or functions["@el@"])(e,handlers)
+ -- elseif type(e) == "string" then
+ -- functions["@tx@"](e,handlers)
+ else
+ functions["@dc@"](e,handlers)
+ end
+end
-function xml.checkbom(root) -- can be made faster
- if root.ri then
- local dt, found = root.dt, false
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then
- found = true
- break
+local handlers = { }
+
+local function newhandlers(settings)
+ local t = table.copy(handlers.verbose or { }) -- merge
+ if settings then
+ for k,v in next, settings do
+ if type(v) == "table" then
+ tk = t[k] if not tk then tk = { } t[k] = tk end
+ for kk,vv in next, v do
+ tk[kk] = vv
+ end
+ else
+ t[k] = v
end
end
- if not found then
- insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
- insert(dt, 2, "\n" )
+ if settings.name then
+ handlers[settings.name] = t
end
end
+ return t
+end
+
+local nofunction = function() end
+
+function xml.sethandlersfunction(handler,name,fnc)
+ handler.functions[name] = fnc or nofunction
+end
+
+function xml.gethandlersfunction(handler,name)
+ return handler.functions[name]
end
+function xml.gethandlers(name)
+ return handlers[name]
+end
+
+newhandlers {
+ name = "verbose",
+ initialize = false, -- faster than nil and mt lookup
+ finalize = false, -- faster than nil and mt lookup
+ serialize = xserialize,
+ handle = print,
+ functions = {
+ ["@dc@"] = verbose_document,
+ ["@dt@"] = verbose_doctype,
+ ["@rt@"] = verbose_root,
+ ["@el@"] = verbose_element,
+ ["@pi@"] = verbose_pi,
+ ["@cm@"] = verbose_comment,
+ ["@cd@"] = verbose_cdata,
+ ["@tx@"] = verbose_text,
+ }
+}
+
--[[ldx--
-<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
-and then handle the lot.</p>
+<p>How you deal with saving data depends on your preferences. For a 40 MB database
+file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
+
+<lines>
+1.3 : load data from file to string
+6.1 : convert string into tree
+5.3 : saving in file using xmlsave
+6.8 : converting to string using xml.tostring
+3.6 : saving converted string in file
+</lines>
+
+<p>Beware, these were timing with the old routine but measurements will not be that
+much different I guess.</p>
--ldx]]--
-function xml.tostring(root) -- 25% overhead due to collecting
+-- maybe this will move to lxml-xml
+
+local result
+
+local xmlfilehandler = newhandlers {
+ name = "file",
+ initialize = function(name) result = io.open(name,"wb") return result end,
+ finalize = function() result:close() return true end,
+ handle = function(...) result:write(...) end,
+}
+
+-- no checking on writeability here but not faster either
+--
+-- local xmlfilehandler = newhandlers {
+-- initialize = function(name) io.output(name,"wb") return true end,
+-- finalize = function() io.close() return true end,
+-- handle = io.write,
+-- }
+
+
+function xml.save(root,name)
+ serialize(root,xmlfilehandler,name)
+end
+
+local result
+
+local xmlstringhandler = newhandlers {
+ name = "string",
+ initialize = function() result = { } return result end,
+ finalize = function() return concat(result) end,
+ handle = function(...) result[#result+1] = concat { ... } end
+}
+
+local function xmltostring(root) -- 25% overhead due to collecting
if root then
if type(root) == 'string' then
return root
- elseif next(root) then -- next is faster than type (and >0 test)
- local result = { }
- serialize(root,function(s) result[#result+1] = s end) -- brrr, slow (direct printing is faster)
- return concat(result,"")
+ else -- if next(root) then -- next is faster than type (and >0 test)
+ return serialize(root,xmlstringhandler) or ""
end
end
return ""
end
+local function xmltext(root) -- inline
+ return (root and xmltostring(root)) or ""
+end
+
+function initialize_mt(root)
+ mt = { __tostring = xmltext, __index = root }
+end
+
+xml.defaulthandlers = handlers
+xml.newhandlers = newhandlers
+xml.serialize = serialize
+xml.tostring = xmltostring
+xml.text = xmltext
+
--[[ldx--
<p>The next function operated on the content only and needs a handle function
that accepts a string.</p>
--ldx]]--
-function xml.string(e,handle)
+local function xmlstring(e,handle)
if not handle or (e.special and e.tg ~= "@rt@") then
-- nothing
elseif e.tg then
local edt = e.dt
if edt then
for i=1,#edt do
- xml.string(edt[i],handle)
+ xmlstring(edt[i],handle)
end
end
else
@@ -734,33 +938,16 @@ function xml.string(e,handle)
end
end
---[[ldx--
-<p>How you deal with saving data depends on your preferences. For a 40 MB database
-file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
-
-<lines>
-1.3 : load data from file to string
-6.1 : convert string into tree
-5.3 : saving in file using xmlsave
-6.8 : converting to string using xml.tostring
-3.6 : saving converted string in file
-</lines>
-
-<p>The save function is given below.</p>
---ldx]]--
-
-function xml.save(root,name)
- local f = io.open(name,"w")
- if f then
- xml.serialize(root,function(s) f:write(s) end)
- f:close()
- end
-end
+xml.string = xmlstring
--[[ldx--
<p>A few helpers:</p>
--ldx]]--
+function xml.parent(root)
+ return root.__p__
+end
+
function xml.body(root)
return (root.ri and root.dt[root.ri]) or root
end
@@ -773,34 +960,19 @@ function xml.content(root) -- bugged
return (root and root.dt and xml.tostring(root.dt)) or ""
end
-function xml.isempty(root, pattern)
- if pattern == "" or pattern == "*" then
- pattern = nil
- end
- if pattern then
- -- todo
- return false
- else
- return not root or not root.dt or #root.dt == 0 or root.dt == ""
- end
-end
-
--[[ldx--
<p>The next helper erases an element but keeps the table as it is,
and since empty strings are not serialized (effectively) it does
not harm. Copying the table would take more time. Usage:</p>
-
-<typing>
-dt[k] = xml.empty() or xml.empty(dt,k)
-</typing>
--ldx]]--
-function xml.empty(dt,k)
- if dt and k then
- dt[k] = ""
- return dt[k]
- else
- return ""
+function xml.erase(dt,k)
+ if dt then
+ if k then
+ dt[k] = ""
+ else for k=1,#dt do
+ dt[1] = { "" }
+ end end
end
end