summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtxrun.lua
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/context/lua/mtxrun.lua')
-rw-r--r--scripts/context/lua/mtxrun.lua281
1 files changed, 197 insertions, 84 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 3c72f59d1..cefa3192b 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -3950,7 +3950,7 @@ element.</p>
local nsremap, resolvens = xml.xmlns, xml.resolvens
local stack, top, dt, at, xmlns, errorstr, entities = { }, { }, { }, { }, { }, nil, { }
-local strip, cleanup, utfize, resolve, resolve_predefined = false, false, false, false, false
+local strip, cleanup, utfize, resolve, resolve_predefined, unify_predefined = false, false, false, false, false, false
local dcache, hcache, acache = { }, { }, { }
local mt = { }
@@ -4078,22 +4078,72 @@ function xml.unknown_dec_entity_format(str) return (str == "" and "&error;") or
function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end
function xml.unknown_any_entity_format(str) return format("&#x%s;",str) end
+local function fromhex(s)
+ local n = tonumber(s,16)
+ if n then
+ return utfchar(n)
+ else
+ return format("h:%s",s), true
+ end
+end
+
+local function fromdec(s)
+ local n = tonumber(s)
+ if n then
+ return utfchar(n)
+ else
+ return format("d:%s",s), true
+ end
+end
+
+-- one level expansion (simple case), no checking done
+
+local rest = (1-P(";"))^0
+local many = P(1)^0
+
+local parsedentity =
+ P("&") * (P("#x")*(rest/fromhex) + P("#")*(rest/fromdec)) * P(";") * P(-1) +
+ (P("#x")*(many/fromhex) + P("#")*(many/fromdec))
+
+-- parsing in the xml file
+
+local predefined_unified = {
+ [38] = "&amp;",
+ [42] = "&quot;",
+ [47] = "&apos;",
+ [74] = "&lt;",
+ [76] = "&gr;",
+}
+
+local predefined_simplified = {
+ [38] = "&", amp = "&",
+ [42] = '"', quot = '"',
+ [47] = "'", apos = "'",
+ [74] = "<", lt = "<",
+ [76] = ">", gt = ">",
+}
+
local function handle_hex_entity(str)
local h = hcache[str]
if not h then
- if utfize then
- local n = tonumber(str,16)
+ local n = tonumber(str,16)
+ h = unify_predefined and predefined_unified[n]
+ if h then
+ if trace_entities then
+ logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,h)
+ end
+ elseif utfize then
h = (n and utfchar(n)) or xml.unknown_hex_entity_format(str) or ""
if not n then
logs.report("xml","utfize, ignoring hex entity &#x%s;",str)
elseif trace_entities then
- logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,c)
+ logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,h)
end
else
if trace_entities then
logs.report("xml","found entity &#x%s;",str)
end
- h = "&#c" .. str .. ";"
+ h = "&#x" .. str .. ";"
end
hcache[str] = h
end
@@ -4103,13 +4153,18 @@ end
local function handle_dec_entity(str)
local d = dcache[str]
if not d then
- if utfize then
- local n = tonumber(str)
+ local n = tonumber(str)
+ d = unify_predefined and predefined_unified[n]
+ if d then
+ if trace_entities then
+ logs.report("xml","utfize, converting dec entity &#%s; into %s",str,d)
+ end
+ elseif utfize then
d = (n and utfchar(n)) or xml.unknown_dec_entity_format(str) or ""
if not n then
logs.report("xml","utfize, ignoring dec entity &#%s;",str)
elseif trace_entities then
- logs.report("xml","utfize, converting dec entity &#%s; into %s",str,c)
+ logs.report("xml","utfize, converting dec entity &#%s; into %s",str,h)
end
else
if trace_entities then
@@ -4122,48 +4177,13 @@ local function handle_dec_entity(str)
return d
end
--- one level expansion (simple case)
-
-local function fromhex(s)
- local n = tonumber(s,16)
- if n then
- return utfchar(n)
- else
- return format("h:%s",s), true
- end
-end
-
-local function fromdec(s)
- local n = tonumber(s)
- if n then
- return utfchar(n)
- else
- return format("d:%s",s), true
- end
-end
-
-local rest = (1-P(";"))^0
-local many = P(1)^0
-
-local parsedentity =
- P("&") * (P("#x")*(rest/fromhex) + P("#")*(rest/fromdec)) * P(";") * P(-1) +
- (P("#x")*(many/fromhex) + P("#")*(many/fromdec))
-
xml.parsedentitylpeg = parsedentity
-local predefined = {
- amp = "&",
- lt = "<",
- gt = ">",
- quot = '"',
- apos = "'",
-}
-
local function handle_any_entity(str)
if resolve then
local a = acache[str] -- per instance ! todo
if not a then
- a = resolve_predefined and predefined[str]
+ a = resolve_predefined and predefined_simplified[str]
if a then
-- one of the predefined
elseif type(resolve) == "function" then
@@ -4209,7 +4229,7 @@ local function handle_any_entity(str)
if trace_entities then
logs.report("xml","found entity &%s;",str)
end
- a = resolve_predefined and predefined[str]
+ a = resolve_predefined and predefined_simplified[str]
if a then
-- one of the predefined
acache[str] = a
@@ -4359,6 +4379,7 @@ local function xmlconvert(data, settings)
utfize = settings.utfize_entities
resolve = settings.resolve_entities
resolve_predefined = settings.resolve_predefined_entities -- in case we have escaped entities
+ unify_predefined = settings.unify_predefined_entities -- &#038; -> &amp;
cleanup = settings.text_cleanup
stack, top, at, xmlns, errorstr, result, entities = { }, { }, { }, { }, nil, nil, settings.entities or { }
acache, hcache, dcache = { }, { }, { } -- not stored
@@ -4465,21 +4486,19 @@ the whole file first. The function accepts a string representing
a filename or a file handle.</p>
--ldx]]--
-function xml.load(filename)
+function xml.load(filename,settings)
+ local data = ""
if type(filename) == "string" then
+ -- local data = io.loaddata(filename) - -todo: check type in io.loaddata
local f = io.open(filename,'r')
if f then
- local root = xmlconvert(f:read("*all"))
+ data = f:read("*all")
f:close()
- return root
- else
- return xmlconvert("")
end
elseif filename then -- filehandle
- return xmlconvert(filename:read("*all"))
- else
- return xmlconvert("")
+ data = filename:read("*all")
end
+ return xmlconvert(data,settings)
end
--[[ldx--
@@ -5109,17 +5128,17 @@ apply_axis['child'] = function(list)
for l=1,#list do
local ll = list[l]
local dt = ll.dt
-local en = 0
+ local en = 0
for k=1,#dt do
local dk = dt[k]
if dk.tg then
collected[#collected+1] = dk
dk.ni = k -- refresh
-en = en + 1
-dk.ei = en
+ en = en + 1
+ dk.ei = en
end
end
-ll.en = en
+ ll.en = en
end
return collected
end
@@ -5127,18 +5146,18 @@ end
local function collect(list,collected)
local dt = list.dt
if dt then
-local en = 0
+ local en = 0
for k=1,#dt do
local dk = dt[k]
if dk.tg then
collected[#collected+1] = dk
dk.ni = k -- refresh
-en = en + 1
-dk.ei = en
+ en = en + 1
+ dk.ei = en
collect(dk,collected)
end
end
-list.en = en
+ list.en = en
end
end
apply_axis['descendant'] = function(list)
@@ -5152,18 +5171,18 @@ end
local function collect(list,collected)
local dt = list.dt
if dt then
-local en = 0
+ local en = 0
for k=1,#dt do
local dk = dt[k]
if dk.tg then
collected[#collected+1] = dk
dk.ni = k -- refresh
-en = en + 1
-dk.ei = en
+ en = en + 1
+ dk.ei = en
collect(dk,collected)
end
end
-list.en = en
+ list.en = en
end
end
apply_axis['descendant-or-self'] = function(list)
@@ -5800,17 +5819,17 @@ parse_pattern = function (pattern) -- the gain of caching is rather minimal
add_comment(parsed, "initial-child removed") -- we could also make it a auto-self
remove(parsed,1)
end
-local np = #parsed -- can have changed
-if np > 1 then
- local pnp = parsed[np]
- if pnp.kind == "nodes" and pnp.nodetest == true then
- local nodes = pnp.nodes
- if nodes[1] == true and nodes[2] == false and nodes[3] == false then
- add_comment(parsed, "redundant final wildcard filter removed")
- remove(parsed,np)
- end
- end
-end
+ local np = #parsed -- can have changed
+ if np > 1 then
+ local pnp = parsed[np]
+ if pnp.kind == "nodes" and pnp.nodetest == true then
+ local nodes = pnp.nodes
+ if nodes[1] == true and nodes[2] == false and nodes[3] == false then
+ add_comment(parsed, "redundant final wildcard filter removed")
+ remove(parsed,np)
+ end
+ end
+ end
end
else
parsed = { pattern = pattern }
@@ -5836,6 +5855,10 @@ end
-- caching found lookups saves not that much (max .1 sec on a 8 sec run)
-- and it also messes up finalizers
+-- watch out: when there is a finalizer, it's always called as there
+-- can be cases that a finalizer returns (or does) something in case
+-- there is no match; an example of this is count()
+
local profiled = { } xml.profiled = profiled
local function profiled_apply(list,parsed,nofparsed,order)
@@ -5863,6 +5886,12 @@ local function profiled_apply(list,parsed,nofparsed,order)
return collected
end
if not collected or #collected == 0 then
+ local pn = i < nofparsed and parsed[nofparsed]
+ if pn and pn.kind == "finalizer" then
+ collected = pn.finalizer(collected)
+ p.finalized = p.finalized + 1
+ return collected
+ end
return nil
end
end
@@ -5894,10 +5923,16 @@ local function traced_apply(list,parsed,nofparsed,order)
logs.report("lpath", "% 10i : ex : %s -> %s",(collected and #collected) or 0,pi.expression,pi.converted)
elseif kind == "finalizer" then
collected = pi.finalizer(collected)
- logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
+ logs.report("lpath", "% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
return collected
end
if not collected or #collected == 0 then
+ local pn = i < nofparsed and parsed[nofparsed]
+ if pn and pn.kind == "finalizer" then
+ collected = pn.finalizer(collected)
+ logs.report("lpath", "% 10i : fi : %s : %s(%s)",(type(collected) == "table" and #collected) or 0,parsed.protocol or xml.defaultprotocol,pn.name,pn.arguments or "")
+ return collected
+ end
return nil
end
end
@@ -5922,6 +5957,10 @@ local function normal_apply(list,parsed,nofparsed,order)
return pi.finalizer(collected)
end
if not collected or #collected == 0 then
+ local pf = i < nofparsed and parsed[nofparsed].finalizer
+ if pf then
+ return pf(collected) -- can be anything
+ end
return nil
end
end
@@ -6698,7 +6737,7 @@ function xml.strip_whitespace(root, pattern, nolines) -- strips all leading and
end
end
else
---~ str.ni = i
+ --~ str.ni = i
t[#t+1] = str
end
end
@@ -6708,6 +6747,78 @@ function xml.strip_whitespace(root, pattern, nolines) -- strips all leading and
end
end
+function xml.strip_whitespace(root, pattern, nolines, anywhere) -- strips all leading and trailing spacing
+ local collected = xmlparseapply({ root },pattern) -- beware, indices no longer are valid now
+ if collected then
+ for i=1,#collected do
+ local e = collected[i]
+ local edt = e.dt
+ if edt then
+ if anywhere then
+ local t = { }
+ for e=1,#edt do
+ local str = edt[e]
+ if type(str) ~= "string" then
+ t[#t+1] = str
+ elseif str ~= "" then
+ -- todo: lpeg for each case
+ if nolines then
+ str = gsub(str,"%s+"," ")
+ end
+ str = gsub(str,"^%s*(.-)%s*$","%1")
+ if str ~= "" then
+ t[#t+1] = str
+ end
+ end
+ end
+ e.dt = t
+ else
+ -- we can assume a regular sparse xml table with no successive strings
+ -- otherwise we should use a while loop
+ if #edt > 0 then
+ -- strip front
+ local str = edt[1]
+ if type(str) ~= "string" then
+ -- nothing
+ elseif str == "" then
+ remove(edt,1)
+ else
+ if nolines then
+ str = gsub(str,"%s+"," ")
+ end
+ str = gsub(str,"^%s+","")
+ if str == "" then
+ remove(edt,1)
+ else
+ edt[1] = str
+ end
+ end
+ end
+ if #edt > 1 then
+ -- strip end
+ local str = edt[#edt]
+ if type(str) ~= "string" then
+ -- nothing
+ elseif str == "" then
+ remove(edt)
+ else
+ if nolines then
+ str = gsub(str,"%s+"," ")
+ end
+ str = gsub(str,"%s+$","")
+ if str == "" then
+ remove(edt)
+ else
+ edt[#edt] = str
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+end
+
local function rename_space(root, oldspace, newspace) -- fast variant
local ndt = #root.dt
for i=1,ndt or 0 do
@@ -10623,11 +10734,13 @@ function statistics.check_fmt_status(texname)
local luv = dofile(luvname)
if luv and luv.sourcefile then
local sourcehash = md5.hex(io.loaddata(resolvers.find_file(luv.sourcefile)) or "unknown")
- if luv.enginebanner and luv.enginebanner ~= enginebanner then
- return "engine mismatch"
+ local luvbanner = luv.enginebanner or "?"
+ if luvbanner ~= enginebanner then
+ return string.format("engine mismatch (luv:%s <> bin:%s)",luvbanner,enginebanner)
end
- if luv.sourcehash and luv.sourcehash ~= sourcehash then
- return "source mismatch"
+ local luvhash = luv.sourcehash or "?"
+ if luvhash ~= sourcehash then
+ return string.format("source mismatch (luv:%s <> bin:%s)",luvhash,sourcehash)
end
else
return "invalid status file"
@@ -10982,7 +11095,7 @@ local _path_, libpaths, _cpath_, clibpaths
function package.libpaths()
if not _path_ or package.path ~= _path_ then
_path_ = package.path
- libpaths = file.split_path(_path_)
+ libpaths = file.split_path(_path_,";")
end
return libpaths
end
@@ -10990,7 +11103,7 @@ end
function package.clibpaths()
if not _cpath_ or package.cpath ~= _cpath_ then
_cpath_ = package.cpath
- clibpaths = file.split_path(_cpath_)
+ clibpaths = file.split_path(_cpath_,";")
end
return clibpaths
end