summaryrefslogtreecommitdiff
path: root/tex/context/base/lxml-tab.lua
diff options
context:
space:
mode:
authorHans Hagen <pragma@wxs.nl>2010-04-23 18:34:00 +0200
committerHans Hagen <pragma@wxs.nl>2010-04-23 18:34:00 +0200
commitde436da91fb6ac8bf2c4f729c86289bc7914676a (patch)
treeb20a5268cbb5ddd32ced269d069280436e591d8e /tex/context/base/lxml-tab.lua
parent5d4880ac91b0e605b3392d15dd7572489b92333e (diff)
downloadcontext-de436da91fb6ac8bf2c4f729c86289bc7914676a.tar.gz
beta 2010.04.23 18:34
Diffstat (limited to 'tex/context/base/lxml-tab.lua')
-rw-r--r--tex/context/base/lxml-tab.lua125
1 files changed, 72 insertions, 53 deletions
diff --git a/tex/context/base/lxml-tab.lua b/tex/context/base/lxml-tab.lua
index 08466665e..bc1963eaf 100644
--- a/tex/context/base/lxml-tab.lua
+++ b/tex/context/base/lxml-tab.lua
@@ -145,7 +145,7 @@ element.</p>
local nsremap, resolvens = xml.xmlns, xml.resolvens
local stack, top, dt, at, xmlns, errorstr, entities = { }, { }, { }, { }, { }, nil, { }
-local strip, cleanup, utfize, resolve, resolve_predefined = false, false, false, false, false
+local strip, cleanup, utfize, resolve, resolve_predefined, unify_predefined = false, false, false, false, false, false
local dcache, hcache, acache = { }, { }, { }
local mt = { }
@@ -273,22 +273,72 @@ function xml.unknown_dec_entity_format(str) return (str == "" and "&error;") or
function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end
function xml.unknown_any_entity_format(str) return format("&#x%s;",str) end
+local function fromhex(s)
+ local n = tonumber(s,16)
+ if n then
+ return utfchar(n)
+ else
+ return format("h:%s",s), true
+ end
+end
+
+local function fromdec(s)
+ local n = tonumber(s)
+ if n then
+ return utfchar(n)
+ else
+ return format("d:%s",s), true
+ end
+end
+
+-- one level expansion (simple case), no checking done
+
+local rest = (1-P(";"))^0
+local many = P(1)^0
+
+local parsedentity =
+ P("&") * (P("#x")*(rest/fromhex) + P("#")*(rest/fromdec)) * P(";") * P(-1) +
+ (P("#x")*(many/fromhex) + P("#")*(many/fromdec))
+
+-- parsing in the xml file
+
+local predefined_unified = {
+ [38] = "&amp;",
+ [42] = "&quot;",
+ [47] = "&apos;",
+ [74] = "&lt;",
+ [76] = "&gr;",
+}
+
+local predefined_simplified = {
+ [38] = "&", amp = "&",
+ [42] = '"', quot = '"',
+ [47] = "'", apos = "'",
+ [74] = "<", lt = "<",
+ [76] = ">", gt = ">",
+}
+
local function handle_hex_entity(str)
local h = hcache[str]
if not h then
- if utfize then
- local n = tonumber(str,16)
+ local n = tonumber(str,16)
+ h = unify_predefined and predefined_unified[n]
+ if h then
+ if trace_entities then
+ logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,h)
+ end
+ elseif utfize then
h = (n and utfchar(n)) or xml.unknown_hex_entity_format(str) or ""
if not n then
logs.report("xml","utfize, ignoring hex entity &#x%s;",str)
elseif trace_entities then
- logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,c)
+ logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,h)
end
else
if trace_entities then
logs.report("xml","found entity &#x%s;",str)
end
- h = "&#c" .. str .. ";"
+ h = "&#x" .. str .. ";"
end
hcache[str] = h
end
@@ -298,13 +348,18 @@ end
local function handle_dec_entity(str)
local d = dcache[str]
if not d then
- if utfize then
- local n = tonumber(str)
+ local n = tonumber(str)
+ d = unify_predefined and predefined_unified[n]
+ if d then
+ if trace_entities then
+ logs.report("xml","utfize, converting dec entity &#%s; into %s",str,d)
+ end
+ elseif utfize then
d = (n and utfchar(n)) or xml.unknown_dec_entity_format(str) or ""
if not n then
logs.report("xml","utfize, ignoring dec entity &#%s;",str)
elseif trace_entities then
- logs.report("xml","utfize, converting dec entity &#%s; into %s",str,c)
+ logs.report("xml","utfize, converting dec entity &#%s; into %s",str,h)
end
else
if trace_entities then
@@ -317,48 +372,13 @@ local function handle_dec_entity(str)
return d
end
--- one level expansion (simple case)
-
-local function fromhex(s)
- local n = tonumber(s,16)
- if n then
- return utfchar(n)
- else
- return format("h:%s",s), true
- end
-end
-
-local function fromdec(s)
- local n = tonumber(s)
- if n then
- return utfchar(n)
- else
- return format("d:%s",s), true
- end
-end
-
-local rest = (1-P(";"))^0
-local many = P(1)^0
-
-local parsedentity =
- P("&") * (P("#x")*(rest/fromhex) + P("#")*(rest/fromdec)) * P(";") * P(-1) +
- (P("#x")*(many/fromhex) + P("#")*(many/fromdec))
-
xml.parsedentitylpeg = parsedentity
-local predefined = {
- amp = "&",
- lt = "<",
- gt = ">",
- quot = '"',
- apos = "'",
-}
-
local function handle_any_entity(str)
if resolve then
local a = acache[str] -- per instance ! todo
if not a then
- a = resolve_predefined and predefined[str]
+ a = resolve_predefined and predefined_simplified[str]
if a then
-- one of the predefined
elseif type(resolve) == "function" then
@@ -404,7 +424,7 @@ local function handle_any_entity(str)
if trace_entities then
logs.report("xml","found entity &%s;",str)
end
- a = resolve_predefined and predefined[str]
+ a = resolve_predefined and predefined_simplified[str]
if a then
-- one of the predefined
acache[str] = a
@@ -554,6 +574,7 @@ local function xmlconvert(data, settings)
utfize = settings.utfize_entities
resolve = settings.resolve_entities
resolve_predefined = settings.resolve_predefined_entities -- in case we have escaped entities
+ unify_predefined = settings.unify_predefined_entities -- &#038; -> &amp;
cleanup = settings.text_cleanup
stack, top, at, xmlns, errorstr, result, entities = { }, { }, { }, { }, nil, nil, settings.entities or { }
acache, hcache, dcache = { }, { }, { } -- not stored
@@ -660,21 +681,19 @@ the whole file first. The function accepts a string representing
a filename or a file handle.</p>
--ldx]]--
-function xml.load(filename)
+function xml.load(filename,settings)
+ local data = ""
if type(filename) == "string" then
+ -- local data = io.loaddata(filename) - -todo: check type in io.loaddata
local f = io.open(filename,'r')
if f then
- local root = xmlconvert(f:read("*all"))
+ data = f:read("*all")
f:close()
- return root
- else
- return xmlconvert("")
end
elseif filename then -- filehandle
- return xmlconvert(filename:read("*all"))
- else
- return xmlconvert("")
+ data = filename:read("*all")
end
+ return xmlconvert(data,settings)
end
--[[ldx--