summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fonts/map/pdftex/context/original-public-lm.map78
-rw-r--r--scripts/context/lua/luatools.lua152
-rw-r--r--scripts/context/lua/mtx-context.lua64
-rw-r--r--scripts/context/lua/mtx-update.lua3
-rw-r--r--scripts/context/lua/mtxrun.lua4266
-rw-r--r--scripts/context/stubs/mswin/luatools.lua152
-rw-r--r--scripts/context/stubs/mswin/mtxrun.lua4266
-rwxr-xr-xscripts/context/stubs/unix/luatools152
-rwxr-xr-xscripts/context/stubs/unix/mtxrun4266
-rw-r--r--tex/context/base/cont-new.tex2
-rw-r--r--tex/context/base/context.mkiv1
-rw-r--r--tex/context/base/context.tex2
-rw-r--r--tex/context/base/font-ctx.lua38
-rw-r--r--tex/context/base/font-def.lua7
-rw-r--r--tex/context/base/font-gds.lua349
-rw-r--r--tex/context/base/font-gds.mkiv79
-rw-r--r--tex/context/base/font-ini.mkiv4
-rw-r--r--tex/context/base/font-mis.lua2
-rw-r--r--tex/context/base/font-otf.lua24
-rw-r--r--tex/context/base/font-otn.lua20
-rw-r--r--tex/context/base/font-pat.lua17
-rw-r--r--tex/context/base/font-tfm.lua2
-rw-r--r--tex/context/base/l-table.lua20
-rw-r--r--tex/context/base/luat-dum.lua5
-rw-r--r--tex/context/base/lxml-aux.lua2
-rw-r--r--tex/context/base/lxml-ctx.lua127
-rw-r--r--tex/context/base/lxml-ctx.mkiv64
-rw-r--r--tex/context/base/lxml-ent.lua1
-rw-r--r--tex/context/base/lxml-lpt.lua149
-rw-r--r--tex/context/base/lxml-tex.lua22
-rw-r--r--tex/context/base/lxml-xml.lua10
-rw-r--r--tex/context/base/m-directives.tex5
-rw-r--r--tex/context/base/m-trackers.tex (renamed from tex/context/base/m-track.tex)0
-rw-r--r--tex/context/base/node-inj.lua1
-rw-r--r--tex/context/base/trac-deb.lua10
-rw-r--r--tex/context/base/trac-deb.mkiv5
-rw-r--r--tex/context/base/trac-inf.lua1
-rw-r--r--tex/context/base/trac-tra.lua200
-rw-r--r--tex/generic/context/luatex-fonts-merged.lua81
39 files changed, 8757 insertions, 5892 deletions
diff --git a/fonts/map/pdftex/context/original-public-lm.map b/fonts/map/pdftex/context/original-public-lm.map
index 3d5ada70d..83fccd0aa 100644
--- a/fonts/map/pdftex/context/original-public-lm.map
+++ b/fonts/map/pdftex/context/original-public-lm.map
@@ -1,7 +1,7 @@
% LM replacement, thanks to Jacko
% official
-%
+
% cmb10 LMRomanDemi10-Regular <lm-rep-cmrm.enc <lmb10.pfb
% cmbx10 LMRoman10-Bold <lm-rep-cmrm.enc <lmbx10.pfb
% cmbx12 LMRoman12-Bold <lm-rep-cmrm.enc <lmbx12.pfb
@@ -126,45 +126,57 @@ cmtt8 CMTT8 <lm-rep-cmtt.enc <lmtt8.pfb
cmtt9 CMTT9 <lm-rep-cmtt.enc <lmtt9.pfb
cmvtt10 CMVTT10 <lm-rep-cmrm.enc <lmvtt10.pfb
+% Math
+
+cmex10 LMMathExtension10-Regular "enclmmathex ReEncodeFont" <lm-mathex.enc <lmex10.pfb
+cmex9 LMMathExtension10-Regular "enclmmathex ReEncodeFont" <lm-mathex.enc <lmex10.pfb
+cmex8 LMMathExtension10-Regular "enclmmathex ReEncodeFont" <lm-mathex.enc <lmex10.pfb
+cmex7 LMMathExtension10-Regular "enclmmathex ReEncodeFont" <lm-mathex.enc <lmex10.pfb
+
+cmmi5 LMMathItalic5-Italic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmi5.pfb
+cmmi6 LMMathItalic6-Italic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmi6.pfb
+cmmi7 LMMathItalic7-Italic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmi7.pfb
+cmmi8 LMMathItalic8-Italic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmi8.pfb
+cmmi9 LMMathItalic9-Italic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmi9.pfb
+cmmi10 LMMathItalic10-Italic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmi10.pfb
+cmmi12 LMMathItalic12-Italic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmi12.pfb
+
+cmmib5 LMMathItalic5-BoldItalic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmib5.pfb
+cmmib6 LMMathItalic6-BoldItalic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmib6.pfb
+cmmib7 LMMathItalic7-BoldItalic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmib7.pfb
+cmmib8 LMMathItalic8-BoldItalic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmib8.pfb
+cmmib9 LMMathItalic9-BoldItalic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmib9.pfb
+cmmib10 LMMathItalic10-BoldItalic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmib10.pfb
+cmmib12 LMMathItalic12-BoldItalic "enclmmathit ReEncodeFont" <lm-mathit.enc <lmmib12.pfb
+
+cmsy5 LMMathSymbols5-Italic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmsy5.pfb
+cmsy6 LMMathSymbols6-Italic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmsy6.pfb
+cmsy7 LMMathSymbols7-Italic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmsy7.pfb
+cmsy8 LMMathSymbols8-Italic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmsy8.pfb
+cmsy9 LMMathSymbols9-Italic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmsy9.pfb
+cmsy10 LMMathSymbols10-Italic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmsy10.pfb
+
+cmbsy5 LMMathSymbols5-BoldItalic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmbsy5.pfb
+cmbsy6 LMMathSymbols6-BoldItalic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmbsy6.pfb
+cmbsy7 LMMathSymbols7-BoldItalic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmbsy7.pfb
+cmbsy8 LMMathSymbols8-BoldItalic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmbsy8.pfb
+cmbsy9 LMMathSymbols9-BoldItalic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmbsy9.pfb
+cmbsy10 LMMathSymbols10-BoldItalic "enclmmathsy ReEncodeFont" <lm-mathsy.enc <lmbsy10.pfb
+
+
% original
-cmbsy10 CMBSY10 <cmbsy10.pfb
-cmbsy5 CMBSY5 <cmbsy5.pfb
-cmbsy6 CMBSY7 <cmbsy7.pfb
-cmbsy7 CMBSY7 <cmbsy7.pfb
-cmbsy8 CMBSY7 <cmbsy7.pfb
-cmbsy9 CMBSY10 <cmbsy10.pfb
cmdunh10 CMDUNH10 <cmdunh10.pfb
-cmex10 CMEX10 <cmex10.pfb
-cmex7 CMEX10 <cmex10.pfb
-cmex8 CMEX10 <cmex10.pfb
-cmex9 CMEX10 <cmex10.pfb
-cmff10 CMFF10 <cmff10.pfb
-cmfi10 CMFI10 <cmfi10.pfb
-cmfib8 CMFIB8 <cmfib8.pfb
-cmmi10 CMMI10 <cmmi10.pfb
-cmmi12 CMMI12 <cmmi12.pfb
-cmmi5 CMMI5 <cmmi5.pfb
-cmmi6 CMMI6 <cmmi6.pfb
-cmmi7 CMMI7 <cmmi7.pfb
-cmmi8 CMMI8 <cmmi8.pfb
-cmmi9 CMMI9 <cmmi9.pfb
-cmmib10 CMMIB10 <cmmib10.pfb
-cmmib5 CMMIB5 <cmmib5.pfb
-cmmib6 CMMIB7 <cmmib7.pfb
-cmmib7 CMMIB7 <cmmib7.pfb
-cmmib8 CMMIB7 <cmmib7.pfb
-cmmib9 CMMIB10 <cmmib10.pfb
-cmsy10 CMSY10 <cmsy10.pfb
-cmsy5 CMSY5 <cmsy5.pfb
-cmsy6 CMSY6 <cmsy6.pfb
-cmsy7 CMSY7 <cmsy7.pfb
-cmsy8 CMSY8 <cmsy8.pfb
-cmsy9 CMSY9 <cmsy9.pfb
+
cmtex10 CMTEX10 <cmtex10.pfb
cmtex8 CMTEX8 <cmtex8.pfb
cmtex9 CMTEX9 <cmtex9.pfb
+cmff10 CMFF10 <cmff10.pfb
+cmfi10 CMFI10 <cmfi10.pfb
+cmfib8 CMFIB8 <cmfib8.pfb
+
+
% a weird one, not used in context
cmu10 LMRoman10-Italic "-0.25 SlantFont" <lm-rep-cmit.enc <lmri10.pfb
diff --git a/scripts/context/lua/luatools.lua b/scripts/context/lua/luatools.lua
index a8cfbd5b0..2bc943210 100644
--- a/scripts/context/lua/luatools.lua
+++ b/scripts/context/lua/luatools.lua
@@ -230,6 +230,16 @@ function string:pattesc()
return (gsub(self,".",patterns_escapes))
end
+local simple_escapes = {
+ ["-"] = "%-",
+ ["."] = "%.",
+ ["*"] = ".*",
+}
+
+function string:simpleesc()
+ return (gsub(self,".",simple_escapes))
+end
+
function string:tohash()
local t = { }
for s in gmatch(self,"([^, ]+)") do -- lpeg
@@ -279,6 +289,12 @@ function string:compactlong() -- strips newlines and leading spaces
return self
end
+function string:striplong() -- strips newlines and leading spaces
+ self = gsub(self,"^%s*","")
+ self = gsub(self,"[\n\r]+ *","\n")
+ return self
+end
+
end -- of closure
@@ -387,6 +403,18 @@ function string:split(separator)
return c:match(self)
end
+--~ function lpeg.L(list,pp)
+--~ local p = pp
+--~ for l=1,#list do
+--~ if p then
+--~ p = p + lpeg.P(list[l])
+--~ else
+--~ p = lpeg.P(list[l])
+--~ end
+--~ end
+--~ return p
+--~ end
+
end -- of closure
@@ -420,6 +448,14 @@ function table.strip(tab)
return lst
end
+function table.keys(t)
+ local k = { }
+ for key,_ in next, t do
+ k[#k+1] = key
+ end
+ return k
+end
+
local function compare(a,b)
return (tostring(a) < tostring(b))
end
@@ -1192,21 +1228,35 @@ function table.reverse(t)
return tt
end
---~ function table.keys(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return k
---~ end
+function table.insert_before_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i,extra)
+ return
+ end
+ end
+ insert(t,1,extra)
+end
---~ function table.keys_as_string(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return concat(k,"")
---~ end
+function table.insert_after_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i+1,extra)
+ return
+ end
+ end
+ insert(t,#t+1,extra)
+end
end -- of closure
@@ -1413,7 +1463,7 @@ if not modules then modules = { } end modules ['l-number'] = {
license = "see context related readme files"
}
-local format = string.format
+local format, foor, insert = string.format, math.floor, table.insert
number = number or { }
@@ -1449,7 +1499,18 @@ function number.toset(n)
return one:match(tostring(n))
end
-
+function number.bits(n,zero)
+ local t, i = { }, (zero and 0) or 1
+ while n > 0 do
+ local m = n % 2
+ if m > 0 then
+ insert(t,1,i)
+ end
+ n = floor(n/2)
+ i = i + 1
+ end
+ return t
+end
end -- of closure
@@ -1914,11 +1975,11 @@ local rootbased = lpeg.P("/") + letter*lpeg.P(":")
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return qualified:match(filename)
+ return qualified:match(filename) ~= nil
end
function file.is_rootbased_path(filename)
- return rootbased:match(filename)
+ return rootbased:match(filename) ~= nil
end
local slash = lpeg.S("\\/")
@@ -3134,6 +3195,24 @@ function aux.accesstable(target)
return t
end
+-- as we use this a lot ...
+
+--~ function aux.cachefunction(action,weak)
+--~ local cache = { }
+--~ if weak then
+--~ setmetatable(cache, { __mode = "kv" } )
+--~ end
+--~ local function reminder(str)
+--~ local found = cache[str]
+--~ if not found then
+--~ found = action(str)
+--~ cache[str] = found
+--~ end
+--~ return found
+--~ end
+--~ return reminder, cache
+--~ end
+
end -- of closure
@@ -3156,7 +3235,7 @@ debugger = debugger or { }
local counters = { }
local names = { }
local getinfo = debug.getinfo
-local format, find, lower, gmatch = string.format, string.find, string.lower, string.gmatch
+local format, find, lower, gmatch, gsub = string.format, string.find, string.lower, string.gmatch, string.gsub
-- one
@@ -3290,7 +3369,7 @@ local data, done = { }, { }
local function set(what,value)
if type(what) == "string" then
- what = aux.settings_to_array(what)
+ what = aux.settings_to_array(what) -- inefficient but ok
end
for i=1,#what do
local w = what[i]
@@ -3315,6 +3394,19 @@ local function reset()
end
end
+local function enable(what)
+ set(what,true)
+end
+
+local function disable(what)
+ if not what or what == "" then
+ done = { }
+ reset()
+ else
+ set(what,false)
+ end
+end
+
function trackers.register(what,...)
what = lower(what)
local w = data[what]
@@ -3333,20 +3425,20 @@ function trackers.register(what,...)
end
function trackers.enable(what)
- done = { }
- set(what,true)
+ local e = trackers.enable
+ trackers.enable, done = enable, { }
+ enable(string.simpleesc(what))
+ trackers.enable, done = e, { }
end
function trackers.disable(what)
- done = { }
- if not what or what == "" then
- trackers.reset(what)
- else
- set(what,false)
- end
+ local e = trackers.disable
+ trackers.disable, done = disable, { }
+ disable(string.simpleesc(what))
+ trackers.disable, done = e, { }
end
-function trackers.reset(what)
+function trackers.reset()
done = { }
reset()
end
@@ -3423,7 +3515,7 @@ function environment.initialize_arguments(arg)
environment.arguments, environment.files, environment.sortedflags = arguments, files, nil
for index, argument in pairs(arg) do
if index > 0 then
- local flag, value = argument:match("^%-+(.+)=(.-)$")
+ local flag, value = argument:match("^%-+(.-)=(.-)$")
if flag then
arguments[flag] = string.unquote(value or "")
else
diff --git a/scripts/context/lua/mtx-context.lua b/scripts/context/lua/mtx-context.lua
index 418387fce..cf26c4e51 100644
--- a/scripts/context/lua/mtx-context.lua
+++ b/scripts/context/lua/mtx-context.lua
@@ -131,7 +131,6 @@ do
if f then
f:write("<?xml version='1.0' standalone='yes'?>\n\n")
f:write(string.format("<ctx:preplist local='%s'>\n",yn(ctxdata.runlocal)))
---~ for name, value in pairs(ctxdata.prepfiles) do
for _, name in ipairs(table.sortedkeys(ctxdata.prepfiles)) do
f:write(string.format("\t<ctx:prepfile done='%s'>%s</ctx:prepfile>\n",yn(ctxdata.prepfiles[name]),name))
end
@@ -189,8 +188,8 @@ do
end
end
-usedname = resolvers.find_file(ctxdata.ctxname,"tex")
-found = usedname ~= ""
+ usedname = resolvers.find_file(ctxdata.ctxname,"tex")
+ found = usedname ~= ""
if not found and defaultname and defaultname ~= "" and lfs.isfile(defaultname) then
usedname, found = defaultname, true
@@ -225,36 +224,35 @@ found = usedname ~= ""
logs.simple("ctx comment: %s", xml.tostring(message))
end
- xml.each(ctxdata.xmldata,"ctx:value[@name='job']", function(ek,e,k)
+ for r, e, k in xml.elements(ctxdata.xmldata,"ctx:value[@name='job']") do
e[k] = ctxdata.variables['job'] or ""
- end)
+ end
local commands = { }
- xml.each(ctxdata.xmldata,"/ctx:job/ctx:preprocess/ctx:processors/ctx:processor", function(r,d,k)
- local ek = d[k]
- commands[ek.at and ek.at['name'] or "unknown"] = ek
- end)
+ for e in xml.collected(ctxdata.xmldata,"/ctx:job/ctx:preprocess/ctx:processors/ctx:processor") do
+ commands[e.at and e.at['name'] or "unknown"] = e
+ end
local suffix = xml.filter(ctxdata.xmldata,"/ctx:job/ctx:preprocess/attribute(suffix)") or ctxdata.suffix
local runlocal = xml.filter(ctxdata.xmldata,"/ctx:job/ctx:preprocess/ctx:processors/attribute(local)")
runlocal = toboolean(runlocal)
- for _, files in ipairs(xml.filters.elements(ctxdata.xmldata,"/ctx:job/ctx:preprocess/ctx:files")) do
- for _, pattern in ipairs(xml.filters.elements(files,"ctx:file")) do
+ for files in xml.collected(ctxdata.xmldata,"/ctx:job/ctx:preprocess/ctx:files") do
+ for pattern in xml.collected(files,"ctx:file") do
preprocessor = pattern.at['processor'] or ""
if preprocessor ~= "" then
ctxdata.variables['old'] = ctxdata.jobname
- xml.each(ctxdata.xmldata,"ctx:value", function(r,d,k)
+ for r, d, k in xml.elements(ctxdata.xmldata,"ctx:value") do
local ek = d[k]
local ekat = ek.at['name']
if ekat == 'old' then
d[k] = ctxrunner.substitute(ctxdata.variables[ekat] or "")
end
- end)
+ end
pattern = ctxrunner.justtext(xml.tostring(pattern))
@@ -293,21 +291,21 @@ found = usedname ~= ""
if ctxdata.runlocal then
newfile = file.basename(newfile)
end
- xml.each(command,"ctx:old", function(r,d,k)
+ for r, d, k in xml.elements(command,"ctx:old") do
d[k] = ctxrunner.substitute(oldfile)
- end)
- xml.each(command,"ctx:new", function(r,d,k)
+ end
+ for r, d, k in xml.elements(command,"ctx:new") do
d[k] = ctxrunner.substitute(newfile)
- end)
+ end
ctxdata.variables['old'] = oldfile
ctxdata.variables['new'] = newfile
- xml.each(command,"ctx:value", function(r,d,k)
+ for r, d, k in xml.elements(command,"ctx:value") do
local ek = d[k]
local ekat = ek.at and ek.at['name']
if ekat then
d[k] = ctxrunner.substitute(ctxdata.variables[ekat] or "")
end
- end)
+ end
-- potential optimization: when mtxrun run internal
command = xml.text(command)
command = ctxrunner.justtext(command) -- command is still xml element here
@@ -444,6 +442,12 @@ function scripts.context.multipass.makeoptionfile(jobname,ctxdata,kindofrun,curr
if type(environment.argument("track")) == "string" then
setvalue ("track" , "\\enabletrackers[%s]")
end
+ if type(environment.argument("trackers")) == "string" then
+ setvalue ("trackers" , "\\enabletrackers[%s]")
+ end
+ if type(environment.argument("directives")) == "string" then
+ setvalue ("directives", "\\enabledirectives[%s]")
+ end
setfixed ("timing" , "\\usemodule[timing]")
setfixed ("batchmode" , "\\batchmode")
setfixed ("nonstopmode" , "\\nonstopmode")
@@ -1211,8 +1215,15 @@ end
-- todo: we need to do a dummy run
-function scripts.context.track()
- environment.files = { "m-track" }
+function scripts.context.trackers()
+ environment.files = { "m-trackers" }
+ scripts.context.multipass.nofruns = 1
+ scripts.context.run()
+ -- maybe filter from log
+end
+
+function scripts.context.directives()
+ environment.files = { "m-directives" }
scripts.context.multipass.nofruns = 1
scripts.context.run()
-- maybe filter from log
@@ -1403,7 +1414,8 @@ expert options:
--nostats omit runtime statistics at the end of the run
--update update context from website (not to be confused with contextgarden)
--profile profile job (use: mtxrun --script profile --analyse)
---track show/set tracker variables
+--trackers show/set tracker variables
+--directives show/set directive variables
--timing generate timing and statistics overview
--extra=name process extra (mtx-context-<name> in distribution)
--tracefiles show some extra info when locating files (at the tex end)
@@ -1462,8 +1474,12 @@ elseif environment.argument("extra") then
scripts.context.extra()
elseif environment.argument("help") then
logs.help(messages.help)
-elseif environment.argument("track") and type(environment.argument("track")) == "boolean" then
- scripts.context.track()
+elseif environment.argument("trackers") and type(environment.argument("trackers")) == "boolean" then
+ scripts.context.trackers()
+elseif environment.argument("directives") and type(environment.argument("directives")) == "boolean" then
+ scripts.context.directives()
+elseif environment.argument("track") and type(environment.argument("track")) == "boolean" then -- for old times sake, will go
+ scripts.context.trackers()
elseif environment.files[1] then
-- scripts.context.timed(scripts.context.run)
scripts.context.timed(scripts.context.autoctx)
diff --git a/scripts/context/lua/mtx-update.lua b/scripts/context/lua/mtx-update.lua
index ef05f087d..1d2e0672a 100644
--- a/scripts/context/lua/mtx-update.lua
+++ b/scripts/context/lua/mtx-update.lua
@@ -69,7 +69,6 @@ scripts.update.base = {
{ "context/img/", "texmf-context" },
{ "misc/setuptex/", "." },
{ "misc/web2c", "texmf" },
- { "bin/common/luatex/", "texmf-<platform>" },
{ "bin/common/<platform>/", "texmf-<platform>" },
{ "bin/context/<platform>/", "texmf-<platform>" },
{ "bin/metapost/<platform>/", "texmf-<platform>" },
@@ -87,10 +86,12 @@ scripts.update.engines = {
["xetex"] = {
{ "base/xetex/", "texmf" },
{ "fonts/new/", "texmf" },
+ { "bin/luatex/<platform>/", "texmf-<platform>" }, -- tools
{ "bin/xetex/<platform>/", "texmf-<platform>" },
},
["pdftex"] = {
{ "fonts/old/", "texmf" },
+ { "bin/luatex/<platform>/", "texmf-<platform>" }, -- tools
{ "bin/pdftex/<platform>/", "texmf-<platform>" },
},
["all"] = {
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 865994073..8bc88c900 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -239,6 +239,16 @@ function string:pattesc()
return (gsub(self,".",patterns_escapes))
end
+local simple_escapes = {
+ ["-"] = "%-",
+ ["."] = "%.",
+ ["*"] = ".*",
+}
+
+function string:simpleesc()
+ return (gsub(self,".",simple_escapes))
+end
+
function string:tohash()
local t = { }
for s in gmatch(self,"([^, ]+)") do -- lpeg
@@ -288,6 +298,12 @@ function string:compactlong() -- strips newlines and leading spaces
return self
end
+function string:striplong() -- strips newlines and leading spaces
+ self = gsub(self,"^%s*","")
+ self = gsub(self,"[\n\r]+ *","\n")
+ return self
+end
+
end -- of closure
@@ -396,6 +412,18 @@ function string:split(separator)
return c:match(self)
end
+--~ function lpeg.L(list,pp)
+--~ local p = pp
+--~ for l=1,#list do
+--~ if p then
+--~ p = p + lpeg.P(list[l])
+--~ else
+--~ p = lpeg.P(list[l])
+--~ end
+--~ end
+--~ return p
+--~ end
+
end -- of closure
@@ -429,6 +457,14 @@ function table.strip(tab)
return lst
end
+function table.keys(t)
+ local k = { }
+ for key,_ in next, t do
+ k[#k+1] = key
+ end
+ return k
+end
+
local function compare(a,b)
return (tostring(a) < tostring(b))
end
@@ -1009,7 +1045,7 @@ function table.tofile(filename,root,name,reduce,noquotes,hexify)
end
end
-local function flatten(t,f,complete)
+local function flatten(t,f,complete) -- is this used? meybe a variant with next, ...
for i=1,#t do
local v = t[i]
if type(v) == "table" then
@@ -1038,6 +1074,24 @@ end
table.flatten_one_level = table.unnest
+-- a better one:
+
+local function flattened(t,f)
+ if not f then
+ f = { }
+ end
+ for k, v in next, t do
+ if type(v) == "table" then
+ flattened(v,f)
+ else
+ f[k] = v
+ end
+ end
+ return f
+end
+
+table.flattened = flattened
+
-- the next three may disappear
function table.remove_value(t,value) -- todo: n
@@ -1201,21 +1255,35 @@ function table.reverse(t)
return tt
end
---~ function table.keys(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return k
---~ end
+function table.insert_before_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i,extra)
+ return
+ end
+ end
+ insert(t,1,extra)
+end
---~ function table.keys_as_string(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return concat(k,"")
---~ end
+function table.insert_after_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i+1,extra)
+ return
+ end
+ end
+ insert(t,#t+1,extra)
+end
end -- of closure
@@ -1422,7 +1490,7 @@ if not modules then modules = { } end modules ['l-number'] = {
license = "see context related readme files"
}
-local format = string.format
+local format, foor, insert = string.format, math.floor, table.insert
number = number or { }
@@ -1458,7 +1526,18 @@ function number.toset(n)
return one:match(tostring(n))
end
-
+function number.bits(n,zero)
+ local t, i = { }, (zero and 0) or 1
+ while n > 0 do
+ local m = n % 2
+ if m > 0 then
+ insert(t,1,i)
+ end
+ n = floor(n/2)
+ i = i + 1
+ end
+ return t
+end
end -- of closure
@@ -1923,11 +2002,11 @@ local rootbased = lpeg.P("/") + letter*lpeg.P(":")
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return qualified:match(filename)
+ return qualified:match(filename) ~= nil
end
function file.is_rootbased_path(filename)
- return rootbased:match(filename)
+ return rootbased:match(filename) ~= nil
end
local slash = lpeg.S("\\/")
@@ -2854,6 +2933,406 @@ function aux.accesstable(target)
return t
end
+-- as we use this a lot ...
+
+--~ function aux.cachefunction(action,weak)
+--~ local cache = { }
+--~ if weak then
+--~ setmetatable(cache, { __mode = "kv" } )
+--~ end
+--~ local function reminder(str)
+--~ local found = cache[str]
+--~ if not found then
+--~ found = action(str)
+--~ cache[str] = found
+--~ end
+--~ return found
+--~ end
+--~ return reminder, cache
+--~ end
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['trac-tra'] = {
+ version = 1.001,
+ comment = "companion to trac-tra.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- the <anonymous> tag is kind of generic and used for functions that are not
+-- bound to a variable, like node.new, node.copy etc (contrary to for instance
+-- node.has_attribute which is bound to a has_attribute local variable in mkiv)
+
+local getinfo = debug.getinfo
+local type, next = type, next
+local concat = table.concat
+local format, find, lower, gmatch, gsub = string.format, string.find, string.lower, string.gmatch, string.gsub
+
+debugger = debugger or { }
+
+local counters = { }
+local names = { }
+
+-- one
+
+local function hook()
+ local f = getinfo(2,"f").func
+ local n = getinfo(2,"Sn")
+-- if n.what == "C" and n.name then print (n.namewhat .. ': ' .. n.name) end
+ if f then
+ local cf = counters[f]
+ if cf == nil then
+ counters[f] = 1
+ names[f] = n
+ else
+ counters[f] = cf + 1
+ end
+ end
+end
+local function getname(func)
+ local n = names[func]
+ if n then
+ if n.what == "C" then
+ return n.name or '<anonymous>'
+ else
+ -- source short_src linedefined what name namewhat nups func
+ local name = n.name or n.namewhat or n.what
+ if not name or name == "" then name = "?" end
+ return format("%s : %s : %s", n.short_src or "unknown source", n.linedefined or "--", name)
+ end
+ else
+ return "unknown"
+ end
+end
+function debugger.showstats(printer,threshold)
+ printer = printer or texio.write or print
+ threshold = threshold or 0
+ local total, grandtotal, functions = 0, 0, 0
+ printer("\n") -- ugly but ok
+ -- table.sort(counters)
+ for func, count in pairs(counters) do
+ if count > threshold then
+ local name = getname(func)
+ if not name:find("for generator") then
+ printer(format("%8i %s", count, name))
+ total = total + count
+ end
+ end
+ grandtotal = grandtotal + count
+ functions = functions + 1
+ end
+ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
+end
+
+-- two
+
+--~ local function hook()
+--~ local n = getinfo(2)
+--~ if n.what=="C" and not n.name then
+--~ local f = tostring(debug.traceback())
+--~ local cf = counters[f]
+--~ if cf == nil then
+--~ counters[f] = 1
+--~ names[f] = n
+--~ else
+--~ counters[f] = cf + 1
+--~ end
+--~ end
+--~ end
+--~ function debugger.showstats(printer,threshold)
+--~ printer = printer or texio.write or print
+--~ threshold = threshold or 0
+--~ local total, grandtotal, functions = 0, 0, 0
+--~ printer("\n") -- ugly but ok
+--~ -- table.sort(counters)
+--~ for func, count in pairs(counters) do
+--~ if count > threshold then
+--~ printer(format("%8i %s", count, func))
+--~ total = total + count
+--~ end
+--~ grandtotal = grandtotal + count
+--~ functions = functions + 1
+--~ end
+--~ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
+--~ end
+
+-- rest
+
+function debugger.savestats(filename,threshold)
+ local f = io.open(filename,'w')
+ if f then
+ debugger.showstats(function(str) f:write(str) end,threshold)
+ f:close()
+ end
+end
+
+function debugger.enable()
+ debug.sethook(hook,"c")
+end
+
+function debugger.disable()
+ debug.sethook()
+--~ counters[debug.getinfo(2,"f").func] = nil
+end
+
+function debugger.tracing()
+ local n = tonumber(os.env['MTX.TRACE.CALLS']) or tonumber(os.env['MTX_TRACE_CALLS']) or 0
+ if n > 0 then
+ function debugger.tracing() return true end ; return true
+ else
+ function debugger.tracing() return false end ; return false
+ end
+end
+
+--~ debugger.enable()
+
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+
+--~ debugger.disable()
+
+--~ print("")
+--~ debugger.showstats()
+--~ print("")
+--~ debugger.showstats(print,3)
+
+setters = setters or { }
+setters.data = setters.data or { }
+
+local function set(t,what,value)
+ local data, done = t.data, t.done
+ if type(what) == "string" then
+ what = aux.settings_to_array(what) -- inefficient but ok
+ end
+ for i=1,#what do
+ local w = what[i]
+ for d, f in next, data do
+ if done[d] then
+ -- prevent recursion due to wildcards
+ elseif find(d,w) then
+ done[d] = true
+ for i=1,#f do
+ f[i](value)
+ end
+ end
+ end
+ end
+end
+
+local function reset(t)
+ for d, f in next, t.data do
+ for i=1,#f do
+ f[i](false)
+ end
+ end
+end
+
+local function enable(t,what)
+ set(t,what,true)
+end
+
+local function disable(t,what)
+ local data = t.data
+ if not what or what == "" then
+ t.done = { }
+ reset(t)
+ else
+ set(t,what,false)
+ end
+end
+
+function setters.register(t,what,...)
+ local data = t.data
+ what = lower(what)
+ local w = data[what]
+ if not w then
+ w = { }
+ data[what] = w
+ end
+ for _, fnc in next, { ... } do
+ local typ = type(fnc)
+ if typ == "function" then
+ w[#w+1] = fnc
+ elseif typ == "string" then
+ w[#w+1] = function(value) set(t,fnc,value,nesting) end
+ end
+ end
+end
+
+function setters.enable(t,what)
+ local e = t.enable
+ t.enable, t.done = enable, { }
+ enable(t,string.simpleesc(what))
+ t.enable, t.done = e, { }
+end
+
+function setters.disable(t,what)
+ local e = t.disable
+ t.disable, t.done = disable, { }
+ disable(t,string.simpleesc(what))
+ t.disable, t.done = e, { }
+end
+
+function setters.reset(t)
+ t.done = { }
+ reset(t)
+end
+
+function setters.list(t) -- pattern
+ local list = table.sortedkeys(t.data)
+ local user, system = { }, { }
+ for l=1,#list do
+ local what = list[l]
+ if find(what,"^%*") then
+ system[#system+1] = what
+ else
+ user[#user+1] = what
+ end
+ end
+ return user, system
+end
+
+function setters.show(t)
+ commands.writestatus("","")
+ for k,v in ipairs(setters.list(t)) do
+ commands.writestatus(t.name,v)
+ end
+ commands.writestatus("","")
+end
+
+-- we could have used a bit of oo and the trackers:enable syntax but
+-- there is already a lot of code around using the singluar tracker
+
+function setters.new(name)
+ local t
+ t = {
+ data = { },
+ name = name,
+ enable = function(...) setters.enable (t,...) end,
+ disable = function(...) setters.disable (t,...) end,
+ register = function(...) setters.register(t,...) end,
+ list = function(...) setters.list (t,...) end,
+ show = function(...) setters.show (t,...) end,
+ }
+ setters.data[name] = t
+ return t
+end
+
+trackers = setters.new("trackers")
+directives = setters.new("directives")
+
+-- nice trick: we overload two of the directives related functions with variants that
+-- do tracing (itself using a tracker) .. proof of concept
+
+local trace_directives = false local trace_directives = false trackers.register("system.directives", function(v) trace_directives = v end)
+
+local e = directives.enable
+local d = directives.disable
+
+function directives.enable(...)
+ commands.writestatus("directives","enabling: %s",concat({...}," "))
+ e(...)
+end
+
+function directives.disable(...)
+ commands.writestatus("directives","disabling: %s",concat({...}," "))
+ d(...)
+end
+
+--~ -- old code:
+--
+--~ trackers = trackers or { }
+--~ local data, done = { }, { }
+--~ local function set(what,value)
+--~ if type(what) == "string" then
+--~ what = aux.settings_to_array(what) -- inefficient but ok
+--~ end
+--~ for i=1,#what do
+--~ local w = what[i]
+--~ for d, f in next, data do
+--~ if done[d] then
+--~ -- prevent recursion due to wildcards
+--~ elseif find(d,w) then
+--~ done[d] = true
+--~ for i=1,#f do
+--~ f[i](value)
+--~ end
+--~ end
+--~ end
+--~ end
+--~ end
+--~ local function reset()
+--~ for d, f in next, data do
+--~ for i=1,#f do
+--~ f[i](false)
+--~ end
+--~ end
+--~ end
+--~ local function enable(what)
+--~ set(what,true)
+--~ end
+--~ local function disable(what)
+--~ if not what or what == "" then
+--~ done = { }
+--~ reset()
+--~ else
+--~ set(what,false)
+--~ end
+--~ end
+--~ function trackers.register(what,...)
+--~ what = lower(what)
+--~ local w = data[what]
+--~ if not w then
+--~ w = { }
+--~ data[what] = w
+--~ end
+--~ for _, fnc in next, { ... } do
+--~ local typ = type(fnc)
+--~ if typ == "function" then
+--~ w[#w+1] = fnc
+--~ elseif typ == "string" then
+--~ w[#w+1] = function(value) set(fnc,value,nesting) end
+--~ end
+--~ end
+--~ end
+--~ function trackers.enable(what)
+--~ local e = trackers.enable
+--~ trackers.enable, done = enable, { }
+--~ enable(string.simpleesc(what))
+--~ trackers.enable, done = e, { }
+--~ end
+--~ function trackers.disable(what)
+--~ local e = trackers.disable
+--~ trackers.disable, done = disable, { }
+--~ disable(string.simpleesc(what))
+--~ trackers.disable, done = e, { }
+--~ end
+--~ function trackers.reset()
+--~ done = { }
+--~ reset()
+--~ end
+--~ function trackers.list() -- pattern
+--~ local list = table.sortedkeys(data)
+--~ local user, system = { }, { }
+--~ for l=1,#list do
+--~ local what = list[l]
+--~ if find(what,"^%*") then
+--~ system[#system+1] = what
+--~ else
+--~ user[#user+1] = what
+--~ end
+--~ end
+--~ return user, system
+--~ end
+
end -- of closure
@@ -2867,6 +3346,12 @@ if not modules then modules = { } end modules ['lxml-tab'] = {
license = "see context related readme files"
}
+-- this module needs a cleanup: check latest lpeg, passing args, (sub)grammar, etc etc
+-- stripping spaces from e.g. cont-en.xml saves .2 sec runtime so it's not worth the
+-- trouble
+
+local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end)
+
--[[ldx--
<p>The parser used here is inspired by the variant discussed in the lua book, but
handles comment and processing instructions, has a different structure, provides
@@ -2874,18 +3359,6 @@ parent access; a first version used different trickery but was less optimized to
went this route. First we had a find based parser, now we have an <l n='lpeg'/> based one.
The find based parser can be found in l-xml-edu.lua along with other older code.</p>
-<p>Expecially the lpath code is experimental, we will support some of xpath, but
-only things that make sense for us; as compensation it is possible to hook in your
-own functions. Apart from preprocessing content for <l n='context'/> we also need
-this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
-files.</p>
-
-<typing>
-a/b/c /*/c
-a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
-a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
-</typing>
-
<p>Beware, the interface may change. For instance at, ns, tg, dt may get more
verbose names. Once the code is stable we will also remove some tracing and
optimize the code.</p>
@@ -2896,26 +3369,9 @@ xml = xml or { }
--~ local xml = xml
local concat, remove, insert = table.concat, table.remove, table.insert
-local type, next, setmetatable = type, next, setmetatable
+local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber
local format, lower, find = string.format, string.lower, string.find
-
---[[ldx--
-<p>This module can be used stand alone but also inside <l n='mkiv'/> in
-which case it hooks into the tracker code. Therefore we provide a few
-functions that set the tracers.</p>
---ldx]]--
-
-local trace_remap = false
-
-if trackers then
- trackers.register("xml.remap", function(v) trace_remap = v end)
-end
-
-function xml.settrace(str,value)
- if str == "remap" then
- trace_remap = value or false
- end
-end
+local utfchar = unicode.utf8.char
--[[ldx--
<p>First a hack to enable namespace resolving. A namespace is characterized by
@@ -3022,25 +3478,25 @@ element.</p>
</typing>
--ldx]]--
-xml.strip_cm_and_dt = false -- an extra global flag, in case we have many includes
-
-- not just one big nested table capture (lpeg overflow)
local nsremap, resolvens = xml.xmlns, xml.resolvens
local stack, top, dt, at, xmlns, errorstr, entities = {}, {}, {}, {}, {}, nil, {}
+local strip, cleanup, utfize, resolve = false, false, false, false
-local mt = { __tostring = xml.text }
+local mt = { }
-function xml.check_error(top,toclose)
- return ""
+function initialize_mt(root) -- we will make a xml.new that then sets the mt as field
+ mt = { __tostring = xml.text, __index = root }
end
-local strip = false
-local cleanup = false
+function xml.setproperty(root,k,v)
+ getmetatable(root).__index[k] = v
+end
-function xml.set_text_cleanup(fnc)
- cleanup = fnc
+function xml.check_error(top,toclose)
+ return ""
end
local function add_attribute(namespace,tag,value)
@@ -3058,6 +3514,22 @@ local function add_attribute(namespace,tag,value)
end
end
+local function add_empty(spacing, namespace, tag)
+ if #spacing > 0 then
+ dt[#dt+1] = spacing
+ end
+ local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
+ top = stack[#stack]
+ dt = top.dt
+ local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top }
+ dt[#dt+1] = t
+ setmetatable(t, mt)
+ if at.xmlns then
+ remove(xmlns)
+ end
+ at = { }
+end
+
local function add_begin(spacing, namespace, tag)
if #spacing > 0 then
dt[#dt+1] = spacing
@@ -3083,28 +3555,12 @@ local function add_end(spacing, namespace, tag)
end
dt = top.dt
dt[#dt+1] = toclose
- dt[0] = top
+ -- dt[0] = top -- nasty circular reference when serializing table
if toclose.at.xmlns then
remove(xmlns)
end
end
-local function add_empty(spacing, namespace, tag)
- if #spacing > 0 then
- dt[#dt+1] = spacing
- end
- local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
- top = stack[#stack]
- dt = top.dt
- local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top }
- dt[#dt+1] = t
- setmetatable(t, mt)
- if at.xmlns then
- remove(xmlns)
- end
- at = { }
-end
-
local function add_text(text)
if cleanup and #text > 0 then
dt[#dt+1] = cleanup(text)
@@ -3128,7 +3584,109 @@ local function set_message(txt)
errorstr = "garbage at the end of the file: " .. gsub(txt,"([ \n\r\t]*)","")
end
-local P, S, R, C, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V
+local reported_attribute_errors = { }
+
+local function attribute_value_error(str)
+ if not reported_attribute_errors[str] then
+ logs.report("xml","invalid attribute value: %q",str)
+ reported_attribute_errors[str] = true
+ at._error_ = str
+ end
+ return str
+end
+local function attribute_specification_error(str)
+ if not reported_attribute_errors[str] then
+ logs.report("xml","invalid attribute specification: %q",str)
+ reported_attribute_errors[str] = true
+ at._error_ = str
+ end
+ return str
+end
+
+local dcache, hcache, acache = { }, { }, { }
+
+function xml.unknown_dec_entity_format(str) return format("&%s;", str) end
+function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end
+function xml.unknown_any_entity_format(str) return format("&%s;", str) end
+
+local function handle_hex_entity(str)
+ local h = hcache[str]
+ if not h then
+ if utfize then
+ local n = tonumber(str,16)
+ h = (n and utfchar(n)) or xml.unknown_hex_entity_format(str) or ""
+ if not n then
+ logs.report("xml","utfize, ignoring hex entity &#x%s;",str)
+ elseif trace_entities then
+ logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,c)
+ end
+ else
+ if trace_entities then
+ logs.report("xml","found entity &#x%s;",str)
+ end
+ h = "&#" .. str .. ";"
+ end
+ hcache[str] = h
+ end
+ return h
+end
+local function handle_dec_entity(str)
+ local d = dcache[str]
+ if not d then
+ if utfize then
+ local n = tonumber(str)
+ d = (n and utfchar(n)) or xml.unknown_dec_entity_format(str) or ""
+ if not n then
+ logs.report("xml","utfize, ignoring dec entity &#%s;",str)
+ elseif trace_entities then
+ logs.report("xml","utfize, converting dec entity &#%s; into %s",str,c)
+ end
+ else
+ if trace_entities then
+ logs.report("xml","found entity &#%s;",str)
+ end
+ d = "&" .. str .. ";"
+ end
+ dcache[str] = d
+ end
+ return d
+end
+local function handle_any_entity(str)
+ if resolve then
+ local a = entities[str] -- per instance !
+ if not a then
+ a = acache[str]
+ if not a then
+ if trace_entities then
+ logs.report("xml","ignoring entity &%s;",str)
+ else
+ -- can be defined in a global mapper and intercepted elsewhere
+ -- as happens in lxml-tex.lua
+ end
+ a = xml.unknown_any_entity_format(str) or ""
+ acache[str] = a
+ end
+ elseif trace_entities then
+ if not acache[str] then
+ logs.report("xml","converting entity &%s; into %s",str,r)
+ acache[str] = a
+ end
+ end
+ return a
+ else
+ local a = acache[str]
+ if not a then
+ if trace_entities then
+ logs.report("xml","found entity &%s;",str)
+ end
+ a = "&" .. str .. ";"
+ acache[str] = a
+ end
+ return a
+ end
+end
+
+local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs
local space = S(' \r\n\t')
local open = P('<')
@@ -3138,6 +3696,8 @@ local dquote = S('"')
local equal = P('=')
local slash = P('/')
local colon = P(':')
+local semicolon = P(';')
+local ampersand = P('&')
local valid = R('az', 'AZ', '09') + S('_-.')
local name_yes = C(valid^1) * colon * C(valid^1)
local name_nop = C(P(true)) * C(valid^1)
@@ -3147,15 +3707,36 @@ local utfbom = P('\000\000\254\255') + P('\255\254\000\000') +
P('\255\254') + P('\254\255') + P('\239\187\191') -- no capture
local spacing = C(space^0)
-local justtext = C((1-open)^1)
+
+local entitycontent = (1-open-semicolon)^0
+local entity = ampersand/"" * (
+ P("#")/"" * (
+ P("x")/"" * (entitycontent/handle_hex_entity) +
+ (entitycontent/handle_dec_entity)
+ ) + (entitycontent/handle_any_entity)
+ ) * (semicolon/"")
+
+local text_unparsed = C((1-open)^1)
+local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1)
+
local somespace = space^1
local optionalspace = space^0
-local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote)
-local attribute = (somespace * name * optionalspace * equal * optionalspace * value) / add_attribute
-local attributes = attribute^0
+local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote) -- ampersand and < also invalid in value
+
+local whatever = space * name * optionalspace * equal
+local wrongvalue = C(P(1-whatever-close)^1 + P(1-close)^1) / attribute_value_error
+
+local attributevalue = value + wrongvalue
+
+local attribute = (somespace * name * optionalspace * equal * optionalspace * attributevalue) / add_attribute
+----- attributes = (attribute)^0
+
+local endofattributes = slash * close + close -- recovery of flacky html
+local attributes = (attribute + somespace^-1 * (((1-endofattributes)^1)/attribute_specification_error))^0
-local text = justtext / add_text
+local parsedtext = text_parsed / add_text
+local unparsedtext = text_unparsed / add_text
local balanced = P { "[" * ((1 - S"[]") + V(1))^0 * "]" } -- taken from lpeg manual, () example
local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty
@@ -3208,42 +3789,72 @@ local doctype = (spacing * begindoctype * somedoctype * enddoct
-- local cdata = (lpeg.Cc("@cd@") * spacing * begincdata * somecdata * endcdata ) / add_special
-- local doctype = (lpeg.Cc("@dt@") * spacing * begindoctype * somedoctype * enddoctype ) / add_special
-local trailer = space^0 * (justtext/set_message)^0
+local trailer = space^0 * (text_unparsed/set_message)^0
-- comment + emptyelement + text + cdata + instruction + V("parent"), -- 6.5 seconds on 40 MB database file
-- text + comment + emptyelement + cdata + instruction + V("parent"), -- 5.8
-- text + V("parent") + emptyelement + comment + cdata + instruction, -- 5.5
-local grammar = P { "preamble",
+local grammar_parsed_text = P { "preamble",
preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer,
parent = beginelement * V("children")^0 * endelement,
- children = text + V("parent") + emptyelement + comment + cdata + instruction,
+ children = parsedtext + V("parent") + emptyelement + comment + cdata + instruction,
}
--- todo: xml.new + properties like entities and strip and such (store in root)
+local grammar_unparsed_text = P { "preamble",
+ preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer,
+ parent = beginelement * V("children")^0 * endelement,
+ children = unparsedtext + V("parent") + emptyelement + comment + cdata + instruction,
+}
-function xml.convert(data, no_root, strip_cm_and_dt, given_entities) -- maybe use table met k/v (given_entities may disapear)
- strip = strip_cm_and_dt or xml.strip_cm_and_dt
- stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, given_entities or {}
+local function xmlconvert(data, settings)
+ settings = settings or { } -- no_root strip_cm_and_dt given_entities parent_root error_handler
+ strip = settings.strip_cm_and_dt
+ utfize = settings.utfize_entities
+ resolve = settings.resolve_entities
+ cleanup = settings.text_cleanup
+ stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, settings.entities or {}
+ reported_attribute_errors = { }
+ if settings.parent_root then
+ mt = getmetatable(settings.parent_root)
+ else
+ initialize_mt(top)
+ end
stack[#stack+1] = top
top.dt = { }
dt = top.dt
if not data or data == "" then
errorstr = "empty xml file"
- elseif not grammar:match(data) then
- errorstr = "invalid xml file"
+ elseif utfize or resolve then
+ if grammar_parsed_text:match(data) then
+ errorstr = ""
+ else
+ errorstr = "invalid xml file - parsed text"
+ end
else
- errorstr = ""
+ if grammar_unparsed_text:match(data) then
+ errorstr = ""
+ else
+ errorstr = "invalid xml file - unparsed text"
+ end
end
if errorstr and errorstr ~= "" then
- result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } }, error = true }
+ result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } } }
setmetatable(stack, mt)
- if xml.error_handler then xml.error_handler("load",errorstr) end
+ local error_handler = settings.error_handler
+ if error_handler == false then
+ -- no error message
+ else
+ error_handler = error_handler or xml.error_handler
+ if error_handler then
+ xml.error_handler("load",errorstr)
+ end
+ end
else
result = stack[1]
end
- if not no_root then
- result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities }
+ if not settings.no_root then
+ result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities, settings = settings }
setmetatable(result, mt)
local rdt = result.dt
for k=1,#rdt do
@@ -3254,9 +3865,14 @@ function xml.convert(data, no_root, strip_cm_and_dt, given_entities) -- maybe us
end
end
end
+ if errorstr and errorstr ~= "" then
+ result.error = true
+ end
return result
end
+xml.convert = xmlconvert
+
--[[ldx--
<p>Packaging data in an xml like table is done with the following
function. Maybe it will go away (when not used).</p>
@@ -3289,16 +3905,16 @@ function xml.load(filename)
if type(filename) == "string" then
local f = io.open(filename,'r')
if f then
- local root = xml.convert(f:read("*all"))
+ local root = xmlconvert(f:read("*all"))
f:close()
return root
else
- return xml.convert("")
+ return xmlconvert("")
end
elseif filename then -- filehandle
- return xml.convert(filename:read("*all"))
+ return xmlconvert(filename:read("*all"))
else
- return xml.convert("")
+ return xmlconvert("")
end
end
@@ -3307,9 +3923,11 @@ end
valid trees, which is what the next function does.</p>
--ldx]]--
+local no_root = { no_root = true }
+
function xml.toxml(data)
if type(data) == "string" then
- local root = { xml.convert(data,true) }
+ local root = { xmlconvert(data,no_root) }
return (#root > 1 and root) or root[1]
else
return data
@@ -3354,217 +3972,305 @@ alternative.</p>
-- todo: add <?xml version='1.0' standalone='yes'?> when not present
-local fallbackhandle = (tex and tex.sprint) or io.write
-
-local function serialize(e, handle, textconverter, attributeconverter, specialconverter, nocommands)
- if not e then
- return
- elseif not nocommands then
- local ec = e.command
- if ec ~= nil then -- we can have all kind of types
- if e.special then
- local etg, edt = e.tg, e.dt
- local spc = specialconverter and specialconverter[etg]
- if spc then
- local result = spc(edt[1])
- if result then
- handle(result)
- return
- else
- -- no need to handle any further
- end
- end
- end
- local xc = xml.command
- if xc then
- xc(e,ec)
- return
+function xml.checkbom(root) -- can be made faster
+ if root.ri then
+ local dt, found = root.dt, false
+ for k=1,#dt do
+ local v = dt[k]
+ if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then
+ found = true
+ break
end
end
+ if not found then
+ insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
+ insert(dt, 2, "\n" )
+ end
end
- handle = handle or fallbackhandle
- local etg = e.tg
- if etg then
- if e.special then
- local edt = e.dt
- local spc = specialconverter and specialconverter[etg]
- if spc then
- local result = spc(edt[1])
- if result then
- handle(result)
+end
+
+--[[ldx--
+<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
+and then handle the lot.</p>
+--ldx]]--
+
+-- new experimental reorganized serialize
+
+local function verbose_element(e,handlers)
+ local handle = handlers.handle
+ local serialize = handlers.serialize
+ local ens, etg, eat, edt, ern = e.ns, e.tg, e.at, e.dt, e.rn
+ local ats = eat and next(eat) and { }
+ if ats then
+ for k,v in next, eat do
+ ats[#ats+1] = format('%s=%q',k,v)
+ end
+ end
+ if ern and trace_remap and ern ~= ens then
+ ens = ern
+ end
+ if ens ~= "" then
+ if edt and #edt > 0 then
+ if ats then
+ handle("<",ens,":",etg," ",concat(ats," "),">")
+ else
+ handle("<",ens,":",etg,">")
+ end
+ for i=1,#edt do
+ local e = edt[i]
+ if type(e) == "string" then
+ handle(e)
else
- -- no need to handle any further
+ serialize(e,handlers)
end
- elseif etg == "@pi@" then
- -- handle(format("<?%s?>",edt[1]))
- handle("<?" .. edt[1] .. "?>")
- elseif etg == "@cm@" then
- -- handle(format("<!--%s-->",edt[1]))
- handle("<!--" .. edt[1] .. "-->")
- elseif etg == "@cd@" then
- -- handle(format("<![CDATA[%s]]>",edt[1]))
- handle("<![CDATA[" .. edt[1] .. "]]>")
- elseif etg == "@dt@" then
- -- handle(format("<!DOCTYPE %s>",edt[1]))
- handle("<!DOCTYPE " .. edt[1] .. ">")
- elseif etg == "@rt@" then
- serialize(edt,handle,textconverter,attributeconverter,specialconverter,nocommands)
end
+ handle("</",ens,":",etg,">")
else
- local ens, eat, edt, ern = e.ns, e.at, e.dt, e.rn
- local ats = eat and next(eat) and { } -- type test maybe faster
if ats then
- if attributeconverter then
- for k,v in next, eat do
- ats[#ats+1] = format('%s=%q',k,attributeconverter(v))
- end
- else
- for k,v in next, eat do
- ats[#ats+1] = format('%s=%q',k,v)
- end
- end
+ handle("<",ens,":",etg," ",concat(ats," "),"/>")
+ else
+ handle("<",ens,":",etg,"/>")
end
- if ern and trace_remap and ern ~= ens then
- ens = ern
+ end
+ else
+ if edt and #edt > 0 then
+ if ats then
+ handle("<",etg," ",concat(ats," "),">")
+ else
+ handle("<",etg,">")
end
- if ens ~= "" then
- if edt and #edt > 0 then
- if ats then
- -- handle(format("<%s:%s %s>",ens,etg,concat(ats," ")))
- handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. ">")
- else
- -- handle(format("<%s:%s>",ens,etg))
- handle("<" .. ens .. ":" .. etg .. ">")
- end
- for i=1,#edt do
- local e = edt[i]
- if type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
- else
- handle(e)
- end
- else
- serialize(e,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
- end
- -- handle(format("</%s:%s>",ens,etg))
- handle("</" .. ens .. ":" .. etg .. ">")
+ for i=1,#edt do
+ local ei = edt[i]
+ if type(ei) == "string" then
+ handle(ei)
else
- if ats then
- -- handle(format("<%s:%s %s/>",ens,etg,concat(ats," ")))
- handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. "/>")
- else
- -- handle(format("<%s:%s/>",ens,etg))
- handle("<" .. ens .. ":" .. etg .. "/>")
- end
+ serialize(ei,handlers)
end
+ end
+ handle("</",etg,">")
+ else
+ if ats then
+ handle("<",etg," ",concat(ats," "),"/>")
else
- if edt and #edt > 0 then
- if ats then
- -- handle(format("<%s %s>",etg,concat(ats," ")))
- handle("<" .. etg .. " " .. concat(ats," ") .. ">")
- else
- -- handle(format("<%s>",etg))
- handle("<" .. etg .. ">")
- end
- for i=1,#edt do
- local ei = edt[i]
- if type(ei) == "string" then
- if textconverter then
- handle(textconverter(ei))
- else
- handle(ei)
- end
- else
- serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
- end
- -- handle(format("</%s>",etg))
- handle("</" .. etg .. ">")
- else
- if ats then
- -- handle(format("<%s %s/>",etg,concat(ats," ")))
- handle("<" .. etg .. " " .. concat(ats," ") .. "/>")
- else
- -- handle(format("<%s/>",etg))
- handle("<" .. etg .. "/>")
- end
- end
+ handle("<",etg,"/>")
end
end
- elseif type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
+ end
+end
+
+local function verbose_pi(e,handlers)
+ handlers.handle("<?",e.dt[1],"?>")
+end
+
+local function verbose_comment(e,handlers)
+ handlers.handle("<!--",e.dt[1],"-->")
+end
+
+local function verbose_cdata(e,handlers)
+ handlers.handle("<![CDATA[", e.dt[1],"]]>")
+end
+
+local function verbose_doctype(e,handlers)
+ handlers.handle("<!DOCTYPE ",e.dt[1],">")
+end
+
+local function verbose_root(e,handlers)
+ handlers.serialize(e.dt,handlers)
+end
+
+local function verbose_text(e,handlers)
+ handlers.handle(e)
+end
+
+local function verbose_document(e,handlers)
+ local serialize = handlers.serialize
+ local functions = handlers.functions
+ for i=1,#e do
+ local ei = e[i]
+ if type(ei) == "string" then
+ functions["@tx@"](ei,handlers)
else
- handle(e)
+ serialize(ei,handlers)
end
- else
- for i=1,#e do
- local ei = e[i]
- if type(ei) == "string" then
- if textconverter then
- handle(textconverter(ei))
- else
- handle(ei)
- end
- else
- serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
+ end
+end
+
+local function serialize(e,handlers,...)
+ local initialize = handlers.initialize
+ local finalize = handlers.finalize
+ local functions = handlers.functions
+ if initialize then
+ local state = initialize(...)
+ if not state == true then
+ return state
end
end
+ local etg = e.tg
+ if etg then
+ (functions[etg] or functions["@el@"])(e,handlers)
+ -- elseif type(e) == "string" then
+ -- functions["@tx@"](e,handlers)
+ else
+ functions["@dc@"](e,handlers)
+ end
+ if finalize then
+ return finalize()
+ end
end
-xml.serialize = serialize
+local function xserialize(e,handlers)
+ local functions = handlers.functions
+ local etg = e.tg
+ if etg then
+ (functions[etg] or functions["@el@"])(e,handlers)
+ -- elseif type(e) == "string" then
+ -- functions["@tx@"](e,handlers)
+ else
+ functions["@dc@"](e,handlers)
+ end
+end
-function xml.checkbom(root) -- can be made faster
- if root.ri then
- local dt, found = root.dt, false
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then
- found = true
- break
+local handlers = { }
+
+local function newhandlers(settings)
+ local t = table.copy(handlers.verbose or { }) -- merge
+ if settings then
+ for k,v in next, settings do
+ if type(v) == "table" then
+ tk = t[k] if not tk then tk = { } t[k] = tk end
+ for kk,vv in next, v do
+ tk[kk] = vv
+ end
+ else
+ t[k] = v
end
end
- if not found then
- insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
- insert(dt, 2, "\n" )
+ if settings.name then
+ handlers[settings.name] = t
end
end
+ return t
+end
+
+local nofunction = function() end
+
+function xml.sethandlersfunction(handler,name,fnc)
+ handler.functions[name] = fnc or nofunction
end
+function xml.gethandlersfunction(handler,name)
+ return handler.functions[name]
+end
+
+function xml.gethandlers(name)
+ return handlers[name]
+end
+
+newhandlers {
+ name = "verbose",
+ initialize = false, -- faster than nil and mt lookup
+ finalize = false, -- faster than nil and mt lookup
+ serialize = xserialize,
+ handle = print,
+ functions = {
+ ["@dc@"] = verbose_document,
+ ["@dt@"] = verbose_doctype,
+ ["@rt@"] = verbose_root,
+ ["@el@"] = verbose_element,
+ ["@pi@"] = verbose_pi,
+ ["@cm@"] = verbose_comment,
+ ["@cd@"] = verbose_cdata,
+ ["@tx@"] = verbose_text,
+ }
+}
+
--[[ldx--
-<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
-and then handle the lot.</p>
+<p>How you deal with saving data depends on your preferences. For a 40 MB database
+file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
+
+<lines>
+1.3 : load data from file to string
+6.1 : convert string into tree
+5.3 : saving in file using xmlsave
+6.8 : converting to string using xml.tostring
+3.6 : saving converted string in file
+</lines>
+
+<p>Beware, these were timing with the old routine but measurements will not be that
+much different I guess.</p>
--ldx]]--
-function xml.tostring(root) -- 25% overhead due to collecting
+-- maybe this will move to lxml-xml
+
+local result
+
+local xmlfilehandler = newhandlers {
+ name = "file",
+ initialize = function(name) result = io.open(name,"wb") return result end,
+ finalize = function() result:close() return true end,
+ handle = function(...) result:write(...) end,
+}
+
+-- no checking on writeability here but not faster either
+--
+-- local xmlfilehandler = newhandlers {
+-- initialize = function(name) io.output(name,"wb") return true end,
+-- finalize = function() io.close() return true end,
+-- handle = io.write,
+-- }
+
+
+function xml.save(root,name)
+ serialize(root,xmlfilehandler,name)
+end
+
+local result
+
+local xmlstringhandler = newhandlers {
+ name = "string",
+ initialize = function() result = { } return result end,
+ finalize = function() return concat(result) end,
+ handle = function(...) result[#result+1] = concat { ... } end
+}
+
+local function xmltostring(root) -- 25% overhead due to collecting
if root then
if type(root) == 'string' then
return root
- elseif next(root) then -- next is faster than type (and >0 test)
- local result = { }
- serialize(root,function(s) result[#result+1] = s end) -- brrr, slow (direct printing is faster)
- return concat(result,"")
+ else -- if next(root) then -- next is faster than type (and >0 test)
+ return serialize(root,xmlstringhandler) or ""
end
end
return ""
end
+local function xmltext(root) -- inline
+ return (root and xmltostring(root)) or ""
+end
+
+function initialize_mt(root)
+ mt = { __tostring = xmltext, __index = root }
+end
+
+xml.defaulthandlers = handlers
+xml.newhandlers = newhandlers
+xml.serialize = serialize
+xml.tostring = xmltostring
+xml.text = xmltext
+
--[[ldx--
<p>The next function operated on the content only and needs a handle function
that accepts a string.</p>
--ldx]]--
-function xml.string(e,handle)
+local function xmlstring(e,handle)
if not handle or (e.special and e.tg ~= "@rt@") then
-- nothing
elseif e.tg then
local edt = e.dt
if edt then
for i=1,#edt do
- xml.string(edt[i],handle)
+ xmlstring(edt[i],handle)
end
end
else
@@ -3572,33 +4278,16 @@ function xml.string(e,handle)
end
end
---[[ldx--
-<p>How you deal with saving data depends on your preferences. For a 40 MB database
-file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
-
-<lines>
-1.3 : load data from file to string
-6.1 : convert string into tree
-5.3 : saving in file using xmlsave
-6.8 : converting to string using xml.tostring
-3.6 : saving converted string in file
-</lines>
-
-<p>The save function is given below.</p>
---ldx]]--
-
-function xml.save(root,name)
- local f = io.open(name,"w")
- if f then
- xml.serialize(root,function(s) f:write(s) end)
- f:close()
- end
-end
+xml.string = xmlstring
--[[ldx--
<p>A few helpers:</p>
--ldx]]--
+function xml.parent(root)
+ return root.__p__
+end
+
function xml.body(root)
return (root.ri and root.dt[root.ri]) or root
end
@@ -3611,34 +4300,19 @@ function xml.content(root) -- bugged
return (root and root.dt and xml.tostring(root.dt)) or ""
end
-function xml.isempty(root, pattern)
- if pattern == "" or pattern == "*" then
- pattern = nil
- end
- if pattern then
- -- todo
- return false
- else
- return not root or not root.dt or #root.dt == 0 or root.dt == ""
- end
-end
-
--[[ldx--
<p>The next helper erases an element but keeps the table as it is,
and since empty strings are not serialized (effectively) it does
not harm. Copying the table would take more time. Usage:</p>
-
-<typing>
-dt[k] = xml.empty() or xml.empty(dt,k)
-</typing>
--ldx]]--
-function xml.empty(dt,k)
- if dt and k then
- dt[k] = ""
- return dt[k]
- else
- return ""
+function xml.erase(dt,k)
+ if dt then
+ if k then
+ dt[k] = ""
+ else for k=1,#dt do
+ dt[1] = { "" }
+ end end
end
end
@@ -3672,96 +4346,403 @@ if not modules then modules = { } end modules ['lxml-pth'] = {
license = "see context related readme files"
}
+-- e.ni is only valid after a filter run
+
local concat, remove, insert = table.concat, table.remove, table.insert
local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, lower, gmatch, gsub, find, rep = string.format, string.lower, string.gmatch, string.gsub, string.find, string.rep
+local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
--[[ldx--
<p>This module can be used stand alone but also inside <l n='mkiv'/> in
which case it hooks into the tracker code. Therefore we provide a few
functions that set the tracers. Here we overload a previously defined
function.</p>
+<p>If I can get in the mood I will make a variant that is XSLT compliant
+but I wonder if it makes sense.</P>
--ldx]]--
-local trace_lpath = false
-
-if trackers then
- trackers.register("xml.lpath", function(v) trace_lpath = v end)
-end
+--[[ldx--
+<p>Expecially the lpath code is experimental, we will support some of xpath, but
+only things that make sense for us; as compensation it is possible to hook in your
+own functions. Apart from preprocessing content for <l n='context'/> we also need
+this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
+files.</p>
-local settrace = xml.settrace -- lxml-tab
+<typing>
+a/b/c /*/c
+a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
+a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
+</typing>
+--ldx]]--
-function xml.settrace(str,value)
- if str == "lpath" then
- trace_lpath = value or false
- else
- settrace(str,value) -- lxml-tab
- end
-end
+local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end
+local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end
+local trace_lprofile = false if trackers then trackers.register("xml.profile", function(v) trace_lpath = v trace_lparse = v trace_lprofile = v end) end
--[[ldx--
-<p>We've now arrived at an intersting part: accessing the tree using a subset
+<p>We've now arrived at an interesting part: accessing the tree using a subset
of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We
will explain more about its usage in other documents.</p>
--ldx]]--
-local lpathcalls = 0 -- statistics
-local lpathcached = 0 -- statistics
+local lpathcalls = 0 function xml.lpathcalls () return lpathcalls end
+local lpathcached = 0 function xml.lpathcached() return lpathcached end
-xml.functions = xml.functions or { }
-xml.expressions = xml.expressions or { }
+xml.functions = xml.functions or { } -- internal
+xml.expressions = xml.expressions or { } -- in expressions
+xml.finalizers = xml.finalizers or { } -- fast do-with ... (with return value other than collection)
+xml.specialhandler = xml.specialhandler or { }
local functions = xml.functions
local expressions = xml.expressions
+local finalizers = xml.finalizers
-local actions = {
- [10] = "stay",
- [11] = "parent",
- [12] = "subtree root",
- [13] = "document root",
- [14] = "any",
- [15] = "many",
- [16] = "initial",
- [20] = "match",
- [21] = "match one of",
- [22] = "match and attribute eq",
- [23] = "match and attribute ne",
- [24] = "match one of and attribute eq",
- [25] = "match one of and attribute ne",
- [27] = "has attribute",
- [28] = "has value",
- [29] = "fast match",
- [30] = "select",
- [31] = "expression",
- [40] = "processing instruction",
-}
+finalizers.xml = finalizers.xml or { }
+finalizers.tex = finalizers.tex or { }
+
+local function fallback (t, name)
+ local fn = finalizers[name]
+ if fn then
+ t[name] = fn
+ else
+ logs.report("xml","unknown sub finalizer '%s'",tostring(name))
+ fn = function() end
+ end
+ return fn
+end
+
+setmetatable(finalizers.xml, { __index = fallback })
+setmetatable(finalizers.tex, { __index = fallback })
+
+xml.defaultprotocol = "xml"
+
+-- as xsl does not follow xpath completely here we will also
+-- be more liberal especially with regards to the use of | and
+-- the rootpath:
+--
+-- test : all 'test' under current
+-- /test : 'test' relative to current
+-- a|b|c : set of names
+-- (a|b|c) : idem
+-- ! : not
+--
+-- after all, we're not doing transformations but filtering. in
+-- addition we provide filter functions (last bit)
+--
+-- todo: optimizer
+--
+-- .. : parent
+-- * : all kids
+-- / : anchor here
+-- // : /**/
+-- ** : all in between
+--
+-- so far we had (more practical as we don't transform)
+--
+-- {/test} : kids 'test' under current node
+-- {test} : any kid with tag 'test'
+-- {//test} : same as above
--- a rather dumb lpeg
+-- evaluator (needs to be redone, for the moment copied)
-local P, S, R, C, V, Cc = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc
+-- todo: apply_axis(list,notable) and collection vs single
--- instead of using functions we just parse a few names which saves a call
--- later on
+local apply_axis = { }
-local lp_position = P("position()") / "ps"
-local lp_index = P("index()") / "id"
-local lp_text = P("text()") / "tx"
-local lp_name = P("name()") / "(ns~='' and ns..':'..tg)" -- "((rt.ns~='' and rt.ns..':'..rt.tg) or '')"
-local lp_tag = P("tag()") / "tg" -- (rt.tg or '')
-local lp_ns = P("ns()") / "ns" -- (rt.ns or '')
-local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
-local lp_doequal = P("=") / "=="
-local lp_attribute = P("@") / "" * Cc("(at['") * R("az","AZ","--","__")^1 * Cc("'] or '')")
+apply_axis['root'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ local rt = ll
+ while ll do
+ ll = ll.__p__
+ if ll then
+ rt = ll
+ end
+ end
+ collected[#collected+1] = rt
+ end
+ return collected
+end
+
+apply_axis['self'] = function(list)
+--~ local collected = { }
+--~ for l=1,#list do
+--~ collected[#collected+1] = list[l]
+--~ end
+--~ return collected
+ return list
+end
-local lp_lua_function = C(R("az","AZ","--","__")^1 * (P(".") * R("az","AZ","--","__")^1)^1) * P("(") / function(t) -- todo: better . handling
+apply_axis['child'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local dt = list[l].dt
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ end
+ end
+ end
+ return collected
+end
+
+local function collect(list,collected)
+ local dt = list.dt
+ if dt then
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ collect(dk,collected)
+ end
+ end
+ end
+end
+apply_axis['descendant'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ collect(list[l],collected)
+ end
+ return collected
+end
+
+local function collect(list,collected)
+ local dt = list.dt
+ if dt then
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ collect(dk,collected)
+ end
+ end
+ end
+end
+apply_axis['descendant-or-self'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+if ll.special ~= true then -- catch double root
+ collected[#collected+1] = ll
+end
+ collect(ll,collected)
+ end
+ return collected
+end
+
+apply_axis['ancestor'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ while ll do
+ ll = ll.__p__
+ if ll then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['ancestor-or-self'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ collected[#collected+1] = ll
+ while ll do
+ ll = ll.__p__
+ if ll then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['parent'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local pl = list[l].__p__
+ if pl then
+ collected[#collected+1] = pl
+ end
+ end
+ return collected
+end
+
+apply_axis['attribute'] = function(list)
+ return { }
+end
+
+apply_axis['following'] = function(list)
+ return { }
+end
+
+apply_axis['following-sibling'] = function(list)
+ return { }
+end
+
+apply_axis['namespace'] = function(list)
+ return { }
+end
+
+apply_axis['preceding'] = function(list)
+ return { }
+end
+
+apply_axis['preceding-sibling'] = function(list)
+ return { }
+end
+
+apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self']
+apply_axis['auto-descendant'] = apply_axis['descendant']
+apply_axis['auto-child'] = apply_axis['child']
+apply_axis['auto-self'] = apply_axis['self']
+apply_axis['initial-child'] = apply_axis['child']
+
+local function apply_nodes(list,directive,nodes)
+ -- todo: nodes[1] etc ... negated node name in set ... when needed
+ -- ... currently ignored
+ local maxn = #nodes
+ if maxn == 3 then --optimized loop
+ local nns, ntg = nodes[2], nodes[3]
+ if not nns and not ntg then -- wildcard
+ if directive then
+ return list
+ else
+ return { }
+ end
+ else
+ local collected = { }
+ if not nns then -- only check tag
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ if directive then
+ if ntg == ltg then
+ collected[#collected+1] = ll
+ end
+ elseif ntg ~= ltg then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ elseif not ntg then -- only check namespace
+ for l=1,#list do
+ local ll = list[l]
+ local lns = ll.rn or ll.ns
+ if lns then
+ if directive then
+ if lns == nns then
+ collected[#collected+1] = ll
+ end
+ elseif lns ~= nns then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ else -- check both
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ local lns = ll.rn or ll.ns
+ local ok = ltg == ntg and lns == nns
+ if directive then
+ if ok then
+ collected[#collected+1] = ll
+ end
+ elseif not ok then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ end
+ return collected
+ end
+ else
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ local lns = ll.rn or ll.ns
+ local ok = false
+ for n=1,maxn,3 do
+ local nns, ntg = nodes[n+1], nodes[n+2]
+ ok = (not ntg or ltg == ntg) and (not nns or lns == nns)
+ if ok then
+ break
+ end
+ end
+ if directive then
+ if ok then
+ collected[#collected+1] = ll
+ end
+ elseif not ok then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+ end
+end
+
+local function apply_expression(list,expression,order)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ if expression(list,ll,l,order) then -- nasty, alleen valid als n=1
+ collected[#collected+1] = ll
+ end
+ end
+ return collected
+end
+
+local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb
+
+local spaces = S(" \n\r\t\f")^0
+
+local lp_space = S(" \n\r\t\f")
+local lp_any = P(1)
+
+local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
+local lp_doequal = P("=") / "=="
+local lp_or = P("|") / " or "
+local lp_and = P("&") / " and "
+
+local lp_builtin = P (
+ P("first") / "1" +
+ P("last") / "#list" +
+ P("position") / "l" +
+ P("rootposition") / "order" +
+ P("index") / "ll.ni" +
+ P("text") / "(ll.dt[1] or '')" +
+ P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" +
+ P("tag") / "ll.tg" +
+ P("ns") / "ll.ns"
+ ) * ((spaces * P("(") * spaces * P(")"))/"")
+
+local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("ll.at['") * R("az","AZ","--","__")^1 * Cc("']")
+local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end)
+
+local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false")
+
+local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling
return t .. "("
end
-local lp_function = C(R("az","AZ","--","__")^1) * P("(") / function(t) -- todo: better . handling
+local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling
if expressions[t] then
- return "expressions." .. t .. "("
+ return "expr." .. t .. "("
else
- return "expressions.error("
+ return "expr.error("
end
end
@@ -3771,337 +4752,527 @@ local noparent = 1 - (lparent+rparent)
local nested = lpeg.P{lparent * (noparent + lpeg.V(1))^0 * rparent}
local value = lpeg.P(lparent * lpeg.C((noparent + nested)^0) * rparent) -- lpeg.P{"("*C(((1-S("()"))+V(1))^0)*")"}
--- if we use a dedicated namespace then we don't need to pass rt and k
+local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')")
+local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'")
+local lp_content= (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"'))
+
+local cleaner
-local lp_special = (C(P("name")+P("text")+P("tag"))) * value / function(t,s)
+local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s)
if expressions[t] then
- if s then
- return "expressions." .. t .. "(r,k," .. s ..")"
+ s = s and s ~= "" and cleaner:match(s)
+ if s and s ~= "" then
+ return "expr." .. t .. "(e," .. s ..")"
else
- return "expressions." .. t .. "(r,k)"
+ return "expr." .. t .. "(e)"
end
else
- return "expressions.error(" .. t .. ")"
+ return "expr.error(" .. t .. ")"
end
end
-local converter = lpeg.Cs ( (
- lp_position +
- lp_index +
- lp_text + lp_name + -- fast one
+local content =
+ lp_builtin +
+ lp_attribute +
lp_special +
lp_noequal + lp_doequal +
- lp_attribute +
- lp_lua_function +
- lp_function +
+ lp_or + lp_and +
+ lp_reserved +
+ lp_lua_function + lp_function +
+ lp_content + -- too fragile
+ lp_child +
+ lp_any
+
+local converter = lpeg.Cs (
+ lp_fastpos + (lpeg.P { lparent * (lpeg.V(1))^0 * rparent + content } )^0
+)
+
+cleaner = lpeg.Cs ( (
+--~ lp_fastpos +
+ lp_reserved +
+ lp_string +
1 )^1 )
--- expressions,root,rootdt,k,e,edt,ns,tg,idx,hsh[tg] or 1
+--~ expr
-local template = [[
- return function(expressions,r,d,k,e,dt,ns,tg,id,ps)
- local at, tx = e.at or { }, dt[1] or ""
+local template_e = [[
+ local expr = xml.expressions
+ return function(list,ll,l,root)
return %s
end
]]
-local function make_expression(str)
- str = converter:match(str)
- return str, loadstring(format(template,str))()
-end
-
-local map = { }
-
-local space = S(' \r\n\t')
-local squote = S("'")
-local dquote = S('"')
-local lparent = P('(')
-local rparent = P(')')
-local atsign = P('@')
-local lbracket = P('[')
-local rbracket = P(']')
-local exclam = P('!')
-local period = P('.')
-local eq = P('==') + P('=')
-local ne = P('<>') + P('!=')
-local star = P('*')
-local slash = P('/')
-local colon = P(':')
-local bar = P('|')
-local hat = P('^')
-local valid = R('az', 'AZ', '09') + S('_-')
-local name_yes = C(valid^1 + star) * colon * C(valid^1 + star) -- permits ns:* *:tg *:*
-local name_nop = Cc("*") * C(valid^1)
-local name = name_yes + name_nop
-local number = C((S('+-')^0 * R('09')^1)) / tonumber
-local names = (bar^0 * name)^1
-local morenames = name * (bar^0 * name)^1
-local instructiontag = P('pi::')
-local spacing = C(space^0)
-local somespace = space^1
-local optionalspace = space^0
-local text = C(valid^0)
-local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote)
-local empty = 1-slash
-
-local is_eq = lbracket * atsign * name * eq * value * rbracket
-local is_ne = lbracket * atsign * name * ne * value * rbracket
-local is_attribute = lbracket * atsign * name * rbracket
-local is_value = lbracket * value * rbracket
-local is_number = lbracket * number * rbracket
-
-local nobracket = 1-(lbracket+rbracket) -- must be improved
-local is_expression = lbracket * C(((C(nobracket^1))/make_expression)) * rbracket
-
-local is_expression = lbracket * (C(nobracket^1))/make_expression * rbracket
-
-local is_one = name
-local is_none = exclam * name
-local is_one_of = ((lparent * names * rparent) + morenames)
-local is_none_of = exclam * ((lparent * names * rparent) + morenames)
-
-local stay = (period )
-local parent = (period * period ) / function( ) map[#map+1] = { 11 } end
-local subtreeroot = (slash + hat ) / function( ) map[#map+1] = { 12 } end
-local documentroot = (hat * hat ) / function( ) map[#map+1] = { 13 } end
-local any = (star ) / function( ) map[#map+1] = { 14 } end
-local many = (star * star ) / function( ) map[#map+1] = { 15 } end
-local initial = (hat * hat * hat ) / function( ) map[#map+1] = { 16 } end
-
-local match = (is_one ) / function(...) map[#map+1] = { 20, true , ... } end
-local match_one_of = (is_one_of ) / function(...) map[#map+1] = { 21, true , ... } end
-local dont_match = (is_none ) / function(...) map[#map+1] = { 20, false, ... } end
-local dont_match_one_of = (is_none_of ) / function(...) map[#map+1] = { 21, false, ... } end
-
-local match_and_eq = (is_one * is_eq ) / function(...) map[#map+1] = { 22, true , ... } end
-local match_and_ne = (is_one * is_ne ) / function(...) map[#map+1] = { 23, true , ... } end
-local dont_match_and_eq = (is_none * is_eq ) / function(...) map[#map+1] = { 22, false, ... } end
-local dont_match_and_ne = (is_none * is_ne ) / function(...) map[#map+1] = { 23, false, ... } end
-
-local match_one_of_and_eq = (is_one_of * is_eq ) / function(...) map[#map+1] = { 24, true , ... } end
-local match_one_of_and_ne = (is_one_of * is_ne ) / function(...) map[#map+1] = { 25, true , ... } end
-local dont_match_one_of_and_eq = (is_none_of * is_eq ) / function(...) map[#map+1] = { 24, false, ... } end
-local dont_match_one_of_and_ne = (is_none_of * is_ne ) / function(...) map[#map+1] = { 25, false, ... } end
-
-local has_attribute = (is_one * is_attribute) / function(...) map[#map+1] = { 27, true , ... } end
-local has_value = (is_one * is_value ) / function(...) map[#map+1] = { 28, true , ... } end
-local dont_has_attribute = (is_none * is_attribute) / function(...) map[#map+1] = { 27, false, ... } end
-local dont_has_value = (is_none * is_value ) / function(...) map[#map+1] = { 28, false, ... } end
-local position = (is_one * is_number ) / function(...) map[#map+1] = { 30, true, ... } end
-local dont_position = (is_none * is_number ) / function(...) map[#map+1] = { 30, false, ... } end
-
-local expression = (is_one * is_expression)/ function(...) map[#map+1] = { 31, true, ... } end
-local dont_expression = (is_none * is_expression)/ function(...) map[#map+1] = { 31, false, ... } end
-
-local self_expression = ( is_expression) / function(...) if #map == 0 then map[#map+1] = { 11 } end
- map[#map+1] = { 31, true, "*", "*", ... } end
-local dont_self_expression = (exclam * is_expression) / function(...) if #map == 0 then map[#map+1] = { 11 } end
- map[#map+1] = { 31, false, "*", "*", ... } end
-
-local instruction = (instructiontag * text ) / function(...) map[#map+1] = { 40, ... } end
-local nothing = (empty ) / function( ) map[#map+1] = { 15 } end -- 15 ?
-local crap = (1-slash)^1
-
--- a few ugly goodies:
-
-local docroottag = P('^^') / function( ) map[#map+1] = { 12 } end
-local subroottag = P('^') / function( ) map[#map+1] = { 13 } end
-local roottag = P('root::') / function( ) map[#map+1] = { 12 } end
-local parenttag = P('parent::') / function( ) map[#map+1] = { 11 } end
-local childtag = P('child::')
-local selftag = P('self::')
-
--- there will be more and order will be optimized
-
-local selector = (
- instruction +
--- many + any + -- brrr, not here !
- parent + stay +
- dont_position + position +
- dont_match_one_of_and_eq + dont_match_one_of_and_ne +
- match_one_of_and_eq + match_one_of_and_ne +
- dont_match_and_eq + dont_match_and_ne +
- match_and_eq + match_and_ne +
- dont_expression + expression +
- dont_self_expression + self_expression +
- has_attribute + has_value +
- dont_match_one_of + match_one_of +
- dont_match + match +
- many + any +
- crap + empty
-)
+local template_f_y = [[
+ local finalizer = xml.finalizers['%s']['%s']
+ return function(collection)
+ return finalizer(collection,%s)
+ end
+]]
-local grammar = P { "startup",
- startup = (initial + documentroot + subtreeroot + roottag + docroottag + subroottag)^0 * V("followup"),
- followup = ((slash + parenttag + childtag + selftag)^0 * selector)^1,
-}
+local template_f_n = [[
+ return xml.finalizers['%s']['%s']
+]]
-local function compose(str)
- if not str or str == "" then
- -- wildcard
- return true
- elseif str == '/' then
- -- root
- return false
+--
+
+local function errorrunner_e(str,cnv)
+ logs.report("lpath","error in expression: %s => %s",str,cnv)
+ return false
+end
+local function errorrunner_f(str,arg)
+ logs.report("lpath","error in finalizer: %s(%s)",str,arg or "")
+ return false
+end
+
+local function register_nodes(nodetest,nodes)
+ return { kind = "nodes", nodetest = nodetest, nodes = nodes }
+end
+
+local function register_expression(expression)
+ local converted = converter:match(expression)
+ local runner = loadstring(format(template_e,converted))
+ runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
+ return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
+end
+
+local function register_finalizer(protocol,name,arguments)
+ local runner
+ if arguments and arguments ~= "" then
+ runner = loadstring(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
else
- map = { }
- grammar:match(str)
- if #map == 0 then
- return true
- else
- local m = map[1][1]
- if #map == 1 then
- if m == 14 or m == 15 then
- -- wildcard
- return true
- elseif m == 12 then
- -- root
- return false
- end
- elseif #map == 2 and m == 12 and map[2][1] == 20 then
- -- return { { 29, map[2][2], map[2][3], map[2][4], map[2][5] } }
- map[2][1] = 29
- return { map[2] }
- end
- if m ~= 11 and m ~= 12 and m ~= 13 and m ~= 14 and m ~= 15 and m ~= 16 then
- insert(map, 1, { 16 })
- end
- -- print(gsub(table.serialize(map),"[ \n]+"," "))
- return map
- end
+ runner = loadstring(format(template_f_n,protocol or xml.defaultprotocol,name))
end
+ runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
+ return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
end
+local expression = P { "ex",
+ ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]",
+ sq = "'" * (1 - S("'"))^0 * "'",
+ dq = '"' * (1 - S('"'))^0 * '"',
+}
+
+local arguments = P { "ar",
+ ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")",
+ nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end,
+ sq = P("'") * (1 - P("'"))^0 * P("'"),
+ dq = P('"') * (1 - P('"'))^0 * P('"'),
+}
+
+-- todo: better arg parser
+
+local register_self = { kind = "axis", axis = "self" } -- , apply = apply_axis["self"] }
+local register_parent = { kind = "axis", axis = "parent" } -- , apply = apply_axis["parent"] }
+local register_descendant = { kind = "axis", axis = "descendant" } -- , apply = apply_axis["descendant"] }
+local register_child = { kind = "axis", axis = "child" } -- , apply = apply_axis["child"] }
+local register_descendant_or_self = { kind = "axis", axis = "descendant-or-self" } -- , apply = apply_axis["descendant-or-self"] }
+local register_root = { kind = "axis", axis = "root" } -- , apply = apply_axis["root"] }
+local register_ancestor = { kind = "axis", axis = "ancestor" } -- , apply = apply_axis["ancestor"] }
+local register_ancestor_or_self = { kind = "axis", axis = "ancestor-or-self" } -- , apply = apply_axis["ancestor-or-self"] }
+local register_attribute = { kind = "axis", axis = "attribute" } -- , apply = apply_axis["attribute"] }
+local register_namespace = { kind = "axis", axis = "namespace" } -- , apply = apply_axis["namespace"] }
+local register_following = { kind = "axis", axis = "following" } -- , apply = apply_axis["following"] }
+local register_following_sibling = { kind = "axis", axis = "following-sibling" } -- , apply = apply_axis["following-sibling"] }
+local register_preceding = { kind = "axis", axis = "preceding" } -- , apply = apply_axis["preceding"] }
+local register_preceding_sibling = { kind = "axis", axis = "preceding-sibling" } -- , apply = apply_axis["preceding-sibling"] }
+
+local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" } -- , apply = apply_axis["auto-descendant-or-self"] }
+local register_auto_descendant = { kind = "axis", axis = "auto-descendant" } -- , apply = apply_axis["auto-descendant"] }
+local register_auto_self = { kind = "axis", axis = "auto-self" } -- , apply = apply_axis["auto-self"] }
+local register_auto_child = { kind = "axis", axis = "auto-child" } -- , apply = apply_axis["auto-child"] }
+
+local register_initial_child = { kind = "axis", axis = "initial-child" } -- , apply = apply_axis["initial-child"] }
+
+local register_all_nodes = { kind = "nodes", nodetest = true, nodes = { true, false, false } }
+
+local function register_error(str)
+ return { kind = "error", comment = format("unparsed: %s",str) }
+end
+
+local parser = Ct { "patterns", -- can be made a bit faster by moving pattern outside
+
+ patterns = spaces * V("protocol") * spaces * V("initial") * spaces * V("step") * spaces *
+ (P("/") * spaces * V("step") * spaces)^0,
+
+ protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"),
+
+ step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
+
+ axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") +
+ V("descendant_or_self") + V("following") + V("following_sibling") +
+ V("preceding") + V("preceding_sibling") + V("ancestor_or_self") +
+ #(1-P(-1)) * Cc(register_auto_child),
+
+ initial = (P("/") * spaces * Cc(register_initial_child))^-1,
+
+ error = (P(1)^1) / register_error,
+
+ shortcuts_a = V("s_descendant_or_self") + V("s_descendant") + V("s_child") + V("s_parent") + V("s_self") + V("s_root") + V("s_ancestor"),
+
+ shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0,
+
+ s_descendant_or_self = P("/") * Cc(register_descendant_or_self),
+ s_descendant = P("**") * Cc(register_descendant),
+ s_child = P("*") * Cc(register_child ),
+ s_parent = P("..") * Cc(register_parent ),
+ s_self = P("." ) * Cc(register_self ),
+ s_root = P("^^") * Cc(register_root ),
+ s_ancestor = P("^") * Cc(register_ancestor ),
+
+ descendant = P("descendant::") * Cc(register_descendant ),
+ child = P("child::") * Cc(register_child ),
+ parent = P("parent::") * Cc(register_parent ),
+ self = P("self::") * Cc(register_self ),
+ root = P('root::') * Cc(register_root ),
+ ancestor = P('ancestor::') * Cc(register_ancestor ),
+ descendant_or_self = P('descendant-or-self::') * Cc(register_descendant_or_self ),
+ ancestor_or_self = P('ancestor-or-self::') * Cc(register_ancestor_or_self ),
+ -- attribute = P('attribute::') * Cc(register_attribute ),
+ -- namespace = P('namespace::') * Cc(register_namespace ),
+ following = P('following::') * Cc(register_following ),
+ following_sibling = P('following-sibling::') * Cc(register_following_sibling ),
+ preceding = P('preceding::') * Cc(register_preceding ),
+ preceding_sibling = P('preceding-sibling::') * Cc(register_preceding_sibling ),
+
+ nodes = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes,
+
+ expressions = expression / register_expression,
+
+ letters = R("az")^1,
+ name = (1-lpeg.S("/[]()|:*!"))^1,
+ negate = P("!") * Cc(false),
+
+ nodefunction = V("negate") + P("not") * Cc(false) + Cc(true),
+ nodetest = V("negate") + Cc(true),
+ nodename = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))),
+ wildnodename = (C(V("name")) + P("*") * Cc(false)) * #(1-P("(")),
+ nodeset = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces,
+
+ finalizer = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer,
+
+}
+
local cache = { }
-function xml.lpath(pattern,trace)
- lpathcalls = lpathcalls + 1
- if type(pattern) == "string" then
- local result = cache[pattern]
- if result == nil then -- can be false which is valid -)
- result = compose(pattern)
- cache[pattern] = result
- lpathcached = lpathcached + 1
- end
- if trace or trace_lpath then
- xml.lshow(result)
- end
- return result
+local function nodesettostring(set,nodetest)
+ local t = { }
+ for i=1,#set,3 do
+ local directive, ns, tg = set[i], set[i+1], set[i+2]
+ if not ns or ns == "" then ns = "*" end
+ if not tg or tg == "" then tg = "*" end
+ tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
+ t[#t+1] = (directive and tg) or format("not(%s)",tg)
+ end
+ if nodetest == false then
+ return format("not(%s)",concat(t,"|"))
else
- return pattern
+ return concat(t,"|")
end
end
-function xml.cached_patterns()
- return cache
+local function tagstostring(list)
+ if #list == 0 then
+ return "no elements"
+ else
+ local t = { }
+ for i=1, #list do
+ local li = list[i]
+ local ns, tg = li.ns, li.tg
+ if not ns or ns == "" then ns = "*" end
+ if not tg or tg == "" then tg = "*" end
+ t[#t+1] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
+ end
+ return concat(t," ")
+ end
end
--- we run out of locals (limited to 200)
---
--- local fallbackreport = (texio and texio.write) or io.write
-
-function xml.lshow(pattern,report)
--- report = report or fallbackreport
- report = report or (texio and texio.write) or io.write
- local lp = xml.lpath(pattern)
- if lp == false then
- report(" -: root\n")
- elseif lp == true then
- report(" -: wildcard\n")
+xml.nodesettostring = nodesettostring
+
+local function lshow(parsed)
+ if type(parsed) == "string" then
+ parsed = parse_pattern(parsed)
+ end
+ local s = table.serialize_functions -- ugly
+ table.serialize_functions = false -- ugly
+ logs.report("lpath","%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern,table.serialize(parsed,false))
+ table.serialize_functions = s -- ugly
+end
+
+xml.lshow = lshow
+
+local function parse_pattern(pattern) -- the gain of caching is rather minimal
+ lpathcalls = lpathcalls + 1
+ if type(pattern) == "table" then
+ return pattern
else
- if type(pattern) == "string" then
- report(format("pattern: %s\n",pattern))
- end
- for k=1,#lp do
- local v = lp[k]
- if #v > 1 then
- local t = { }
- for i=2,#v do
- local vv = v[i]
- if type(vv) == "string" then
- t[#t+1] = (vv ~= "" and vv) or "#"
- elseif type(vv) == "boolean" then
- t[#t+1] = (vv and "==") or "<>"
+ local parsed = cache[pattern]
+ if parsed then
+ lpathcached = lpathcached + 1
+ else
+ parsed = parser:match(pattern)
+ if parsed then
+ parsed.pattern = pattern
+ local np = #parsed
+ if np == 0 then
+ parsed = { pattern = pattern, register_self, state = "parsing error" }
+ logs.report("lpath","parsing error in '%s'",pattern)
+ lshow(parsed)
+ else
+ -- we could have done this with a more complex parsed but this
+ -- is cleaner
+ local pi = parsed[1]
+ if pi.axis == "auto-child" then
+ parsed.comment = "auto-child replaced by auto-descendant-or-self"
+ parsed[1] = register_auto_descendant_or_self
+ --~ parsed.comment = "auto-child replaced by auto-descendant"
+ --~ parsed[1] = register_auto_descendant
+ elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then
+ parsed.comment = "initial-child removed" -- we could also make it a auto-self
+ remove(parsed,1)
end
end
- report(format("%2i: %s %s -> %s\n", k,v[1],actions[v[1]],concat(t," ")))
else
- report(format("%2i: %s %s\n", k,v[1],actions[v[1]]))
+ parsed = { pattern = pattern }
+ end
+ cache[pattern] = parsed
+ if trace_lparse and not trace_lprofile then
+ lshow(parsed)
end
end
+ return parsed
end
end
-function xml.xshow(e,...) -- also handy when report is given, use () to isolate first e
- local t = { ... }
--- local report = (type(t[#t]) == "function" and t[#t]) or fallbackreport
- local report = (type(t[#t]) == "function" and t[#t]) or (texio and texio.write) or io.write
- if e == nil then
- report("<!-- no element -->\n")
- elseif type(e) ~= "table" then
- report(tostring(e))
- elseif e.tg then
- report(tostring(e) .. "\n")
+-- we can move all calls inline and then merge the trace back
+-- technically we can combine axis and the next nodes which is
+-- what we did before but this a bit cleaner (but slower too)
+-- but interesting is that it's not that much faster when we
+-- go inline
+--
+-- beware: we need to return a collection even when we filter
+-- else the (simple) cache gets messed up
+
+-- caching found lookups saves not that much (max .1 sec on a 8 sec run)
+-- and it also messes up finalizers
+
+local profiled = { } xml.profiled = profiled
+
+local function profiled_apply(list,parsed,nofparsed)
+ local p = profiled[parsed.pattern]
+ if p then
+ p.tested = p.tested + 1
else
- for i=1,#e do
- report(tostring(e[i]) .. "\n")
+ p = { tested = 1, matched = 0, finalized = 0 }
+ profiled[parsed.pattern] = p
+ end
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ p.matched = p.matched + 1
+ p.finalized = p.finalized + 1
+ return collected
+ end
+ if not collected or #collected == 0 then
+ return nil
end
end
+ if collected then
+ p.matched = p.matched + 1
+ end
+ return collected
+end
+
+local function traced_apply(list,parsed,nofparsed)
+ if trace_lparse then
+ lshow(parsed)
+ end
+ logs.report("lpath", "collecting : %s",parsed.pattern)
+ logs.report("lpath", " root tags : %s",tagstostring(list))
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ logs.report("lpath", "% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ logs.report("lpath", "% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ logs.report("lpath", "% 10i : ex : %s",(collected and #collected) or 0,pi.expression)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
+ return collected
+ end
+ if not collected or #collected == 0 then
+ return nil
+ end
+ end
+ return collected
end
---[[ldx--
-<p>An <l n='lpath'/> is converted to a table with instructions for traversing the
-tree. Hoever, simple cases are signaled by booleans. Because we don't know in
-advance what we want to do with the found element the handle gets three arguments:</p>
-
-<lines>
-<t>r</t> : the root element of the data table
-<t>d</t> : the data table of the result
-<t>t</t> : the index in the data table of the result
-</lines>
+local function parse_apply(list,pattern)
+ -- we avoid an extra call
+ local parsed = cache[pattern]
+ if parsed then
+ lpathcalls = lpathcalls + 1
+ lpathcached = lpathcached + 1
+ elseif type(pattern) == "table" then
+ lpathcalls = lpathcalls + 1
+ parsed = pattern
+ else
+ parsed = parse_pattern(pattern) or pattern
+ end
+ if not parsed then
+ return
+ end
+ local nofparsed = #parsed
+ if nofparsed == 0 then
+ -- something is wrong
+ elseif not trace_lpath then
+ -- normal apply, inline, no self
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ local axis = pi.axis
+ if axis ~= "self" then
+ collected = apply_axis[axis](collected)
+ end
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ elseif kind == "finalizer" then
+ return pi.finalizer(collected)
+ end
+ if not collected or #collected == 0 then
+ return nil
+ end
+ end
+ return collected
+ elseif trace_lprofile then
+ return profiled_apply(list,parsed,nofparsed)
+ else -- trace_lpath
+ return traced_apply(list,parsed,nofparsed)
+ end
+end
-<p> Access to the root and data table makes it possible to construct insert and delete
-functions.</p>
---ldx]]--
+-- internal (parsed)
-local functions = xml.functions
-local expressions = xml.expressions
+expressions.child = function(e,pattern)
+ return parse_apply({ e },pattern) -- todo: cache
+end
+expressions.count = function(e,pattern)
+ local collected = parse_apply({ e },pattern) -- todo: cache
+ return (collected and #collected) or 0
+end
-expressions.contains = string.find
-expressions.find = string.find
-expressions.upper = string.upper
-expressions.lower = string.lower
-expressions.number = tonumber
-expressions.boolean = toboolean
+-- external
expressions.oneof = function(s,...) -- slow
local t = {...} for i=1,#t do if s == t[i] then return true end end return false
end
-
expressions.error = function(str)
- xml.error_handler("unknown function in lpath expression",str or "?")
+ xml.error_handler("unknown function in lpath expression",tostring(str or "?"))
return false
end
+expressions.undefined = function(s)
+ return s == nil
+end
-functions.text = function(root,k,n) -- unchecked, maybe one deeper
- local t = type(t)
- if t == "string" then
- return t
- else -- todo n
- local rdt = root.dt
- return (rdt and rdt[k]) or root[k] or ""
+expressions.contains = find
+expressions.find = find
+expressions.upper = upper
+expressions.lower = lower
+expressions.number = tonumber
+expressions.boolean = toboolean
+
+-- user interface
+
+local function traverse(root,pattern,handle)
+ logs.report("xml","use 'xml.selection' instead for '%s'",pattern)
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ handle(r,r.dt,e.ni)
+ end
+ end
+end
+
+local function selection(root,pattern,handle)
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if handle then
+ for c=1,#collected do
+ handle(collected[c])
+ end
+ else
+ return collected
+ end
end
end
-functions.name = function(d,k,n) -- ns + tg
+xml.parse_parser = parser
+xml.parse_pattern = parse_pattern
+xml.parse_apply = parse_apply
+xml.traverse = traverse -- old method, r, d, k
+xml.selection = selection -- new method, simple handle
+
+local lpath = parse_pattern
+
+xml.lpath = lpath
+
+function xml.cached_patterns()
+ return cache
+end
+
+-- generic function finalizer (independant namespace)
+
+local function dofunction(collected,fnc)
+ if collected then
+ local f = functions[fnc]
+ if f then
+ for c=1,#collected do
+ f(collected[c])
+ end
+ else
+ logs.report("xml","unknown function '%s'",fnc)
+ end
+ end
+end
+
+xml.finalizers.xml["function"] = dofunction
+xml.finalizers.tex["function"] = dofunction
+
+-- functions
+
+expressions.text = function(e,n)
+ local rdt = e.__p__.dt
+ return (rdt and rdt[n]) or ""
+end
+
+expressions.name = function(e,n) -- ns + tg
local found = false
- n = n or 0
- if not k then
- -- not found
- elseif n == 0 then
- local dk = d[k]
- found = dk and (type(dk) == "table") and dk
+ n = tonumber(n) or 0
+ if n == 0 then
+ found = type(e) == "table" and e
elseif n < 0 then
+ local d, k = e.__p__.dt, e.ni
for i=k-1,1,-1 do
local di = d[i]
if type(di) == "table" then
@@ -4114,6 +5285,7 @@ functions.name = function(d,k,n) -- ns + tg
end
end
else
+ local d, k = e.__p__.dt, e.ni
for i=k+1,#d,1 do
local di = d[i]
if type(di) == "table" then
@@ -4138,15 +5310,13 @@ functions.name = function(d,k,n) -- ns + tg
end
end
-functions.tag = function(d,k,n) -- only tg
+expressions.tag = function(e,n) -- only tg
local found = false
- n = n or 0
- if not k then
- -- not found
- elseif n == 0 then
- local dk = d[k]
- found = dk and (type(dk) == "table") and dk
+ n = tonumber(n) or 0
+ if n == 0 then
+ found = (type(e) == "table") and e -- seems to fail
elseif n < 0 then
+ local d, k = e.__p__.dt, e.ni
for i=k-1,1,-1 do
local di = d[i]
if type(di) == "table" then
@@ -4159,6 +5329,7 @@ functions.tag = function(d,k,n) -- only tg
end
end
else
+ local d, k = e.__p__.dt, e.ni
for i=k+1,#d,1 do
local di = d[i]
if type(di) == "table" then
@@ -4174,664 +5345,403 @@ functions.tag = function(d,k,n) -- only tg
return (found and found.tg) or ""
end
-expressions.text = functions.text
-expressions.name = functions.name
-expressions.tag = functions.tag
+--[[ldx--
+<p>This is the main filter function. It returns whatever is asked for.</p>
+--ldx]]--
-local function traverse(root,pattern,handle,reverse,index,parent,wildcard) -- multiple only for tags, not for namespaces
- if not root then -- error
- return false
- elseif pattern == false then -- root
- handle(root,root.dt,root.ri)
- return false
- elseif pattern == true then -- wildcard
- local rootdt = root.dt
- if rootdt then
- local start, stop, step = 1, #rootdt, 1
- if reverse then
- start, stop, step = stop, start, -1
- end
- for k=start,stop,step do
- if handle(root,rootdt,root.ri or k) then return false end
- if not traverse(rootdt[k],true,handle,reverse) then return false end
- end
+function xml.filter(root,pattern) -- no longer funny attribute handling here
+ return parse_apply({ root },pattern)
+end
+
+--[[ldx--
+<p>Often using an iterators looks nicer in the code than passing handler
+functions. The <l n='lua'/> book describes how to use coroutines for that
+purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
+code like:</p>
+
+<typing>
+for r, d, k in xml.elements(xml.load('text.xml'),"title") do
+ print(d[k]) -- old method
+end
+for e in xml.collected(xml.load('text.xml'),"title") do
+ print(e) -- new one
+end
+</typing>
+--ldx]]--
+
+local wrap, yield = coroutine.wrap, coroutine.yield
+
+function xml.elements(root,pattern,reverse) -- r, d, k
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if reverse then
+ return wrap(function() for c=#collected,1,-1 do
+ local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
+ end end)
+ else
+ return wrap(function() for c=1,#collected do
+ local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
+ end end)
end
- return false
- elseif root.dt then
- index = index or 1
- local action = pattern[index]
- local command = action[1]
- if command == 29 then -- fast case /oeps
- local rootdt = root.dt
- for k=1,#rootdt do
- local e = rootdt[k]
- local tg = e.tg
- if e.tg then
- local ns = e.rn or e.ns
- local ns_a, tg_a = action[3], action[4]
- local matched = (ns_a == "*" or ns == ns_a) and (tg_a == "*" or tg == tg_a)
- if not action[2] then matched = not matched end
- if matched then
- if handle(root,rootdt,k) then return false end
- end
- end
- end
- elseif command == 11 then -- parent
- local ep = root.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
+ end
+ return wrap(function() end)
+end
+
+function xml.collected(root,pattern,reverse) -- e
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if reverse then
+ return wrap(function() for c=#collected,1,-1 do yield(collected[c]) end end)
else
- if (command == 16 or command == 12) and index == 1 then -- initial
- -- wildcard = true
- wildcard = command == 16 -- ok?
- index = index + 1
- action = pattern[index]
- command = action and action[1] or 0 -- something is wrong
- end
- if command == 11 then -- parent
- local ep = root.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- else
- local rootdt = root.dt
- local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1
- if command == 30 then
- if action[5] < 0 then
- start, stop, step = stop, start, -1
- dn = -1
- end
- elseif reverse and index == #pattern then
- start, stop, step = stop, start, -1
- end
- local idx = 0
- local hsh = { } -- this will slooow down the lot
- for k=start,stop,step do -- we used to have functions for all but a case is faster
- local e = rootdt[k]
- local ns, tg = e.rn or e.ns, e.tg
- if tg then
- -- we can optimize this for simple searches, but it probably does not pay off
- hsh[tg] = (hsh[tg] or 0) + 1
- idx = idx + 1
- if command == 30 then
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- if matched then
- n = n + dn
- if n == action[5] then
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- break
- end
- elseif wildcard then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- end
- else
- local matched, multiple = false, false
- if command == 20 then -- match
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- elseif command == 21 then -- match one of
- multiple = true
- for i=3,#action,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- elseif command == 22 then -- eq
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- matched = matched and e.at[action[6]] == action[7]
- elseif command == 23 then -- ne
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = mached and e.at[action[6]] ~= action[7]
- elseif command == 24 then -- one of eq
- multiple = true
- for i=3,#action-2,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[#action-1]] == action[#action]
- elseif command == 25 then -- one of ne
- multiple = true
- for i=3,#action-2,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[#action-1]] ~= action[#action]
- elseif command == 27 then -- has attribute
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[5]]
- elseif command == 28 then -- has value
- local edt, ns_a, tg_a = e.dt, action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = matched and edt and edt[1] == action[5]
- elseif command == 31 then
- local edt, ns_a, tg_a = e.dt, action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- if matched then
- matched = action[6](expressions,root,rootdt,k,e,edt,ns,tg,idx,hsh[tg] or 1)
- end
- end
- if matched then -- combine tg test and at test
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- if wildcard then
- if multiple then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- else
- -- maybe or multiple; anyhow, check on (section|title) vs just section and title in example in lxml
- if not traverse(e,pattern,handle,reverse,index,root) then return false end
- end
- end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- elseif command == 14 then -- any
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- elseif command == 15 then -- many
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root,true) then return false end
- end
- -- not here : 11
- elseif command == 11 then -- parent
- local ep = e.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,root,index+1) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- elseif command == 40 and e.special and tg == "@pi@" then -- pi
- local pi = action[2]
- if pi ~= "" then
- local pt = e.dt[1]
- if pt and pt:find(pi) then
- if handle(root,rootdt,k) then
- return false
- end
- end
- elseif handle(root,rootdt,k) then
- return false
- end
- elseif wildcard then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- end
- end
- else
- -- not here : 11
- if command == 11 then -- parent
- local ep = e.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- break -- else loop
- end
- end
- end
- end
+ return wrap(function() for c=1,#collected do yield(collected[c]) end end)
end
end
- return true
+ return wrap(function() end)
end
-xml.traverse = traverse
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['lxml-ent'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local type, next = type, next
+local texsprint, ctxcatcodes = tex.sprint, tex.ctxcatcodes
+local utf = unicode.utf8
+local utfupper = utf.upper
--[[ldx--
-<p>Next come all kind of locators and manipulators. The most generic function here
-is <t>xml.filter(root,pattern)</t>. All registers functions in the filters namespace
-can be path of a search path, as in:</p>
+<p>We provide (at least here) two entity handlers. The more extensive
+resolver consults a hash first, tries to convert to <l n='utf'/> next,
+and finaly calls a handler when defines. When this all fails, the
+original entity is returned.</p>
-<typing>
-local r, d, k = xml.filter(root,"/a/b/c/position(4)"
-</typing>
+<p>We do things different now but it's still somewhat experimental</p>
--ldx]]--
-local traverse, lpath, convert = xml.traverse, xml.lpath, xml.convert
+xml.entities = xml.entities or { } -- xml.entity_handler == function
-xml.filters = { }
+-- experimental, this will be done differently
-function xml.filters.default(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end)
- return dt and dt[dk], rt, dt, dk
+function xml.merge_entities(root)
+ local documententities = root.entities
+ local allentities = xml.entities
+ if documententities then
+ for k, v in next, documententities do
+ allentities[k] = v
+ end
+ end
end
-function xml.filters.attributes(root,pattern,arguments)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end)
- local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at)
- if ekat then
- if arguments then
- return ekat[arguments] or "", rt, dt, dk
+function xml.resolved_entity(str)
+ local e = xml.entities[str]
+ if e then
+ local te = type(e)
+ if te == "function" then
+ e(str)
else
- return ekat, rt, dt, dk
+ texsprint(ctxcatcodes,e)
end
else
- return { }, rt, dt, dk
+ texsprint(ctxcatcodes,"\\xmle{",str,"}{",utfupper(str),"}") -- we need to use our own upper
end
end
-function xml.filters.reverse(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse')
- return dt and dt[dk], rt, dt, dk
-end
+xml.entities.amp = function() tex.write("&") end
+xml.entities.lt = function() tex.write("<") end
+xml.entities.gt = function() tex.write(">") end
-function xml.filters.count(root,pattern,everything)
- local n = 0
- traverse(root, lpath(pattern), function(r,d,t)
- if everything or type(d[t]) == "table" then
- n = n + 1
- end
- end)
- return n
-end
-function xml.filters.elements(root, pattern) -- == all
- local t = { }
- traverse(root, lpath(pattern), function(r,d,k)
- local e = d[k]
- if e then
- t[#t+1] = e
- end
- end)
- return t
-end
+end -- of closure
-function xml.filters.texts(root, pattern)
- local t = { }
- traverse(root, lpath(pattern), function(r,d,k)
- local e = d[k]
- if e and e.dt then
- t[#t+1] = e.dt
- end
- end)
- return t
-end
+do -- create closure to overcome 200 locals limit
-function xml.filters.first(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end)
- return dt and dt[dk], rt, dt, dk
-end
+if not modules then modules = { } end modules ['lxml-mis'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
-function xml.filters.last(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse')
- return dt and dt[dk], rt, dt, dk
-end
+local concat = table.concat
+local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
+local format, gsub = string.format, string.gsub
-function xml.filters.index(root,pattern,arguments)
- local rt, dt, dk, reverse, i = nil, nil, nil, false, tonumber(arguments or '1') or 1
- if i and i ~= 0 then
- if i < 0 then
- reverse, i = true, -i
- end
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk, i = r, d, k, i-1 return i == 0 end, reverse)
- if i == 0 then
- return dt and dt[dk], rt, dt, dk
+--[[ldx--
+<p>The following helper functions best belong to the <t>lmxl-ini</t>
+module. Some are here because we need then in the <t>mk</t>
+document and other manuals, others came up when playing with
+this module. Since this module is also used in <l n='mtxrun'/> we've
+put them here instead of loading mode modules there then needed.</p>
+--ldx]]--
+
+
+local function xmlgsub(t,old,new)
+ local dt = t.dt
+ if dt then
+ for k=1,#dt do
+ local v = dt[k]
+ if type(v) == "string" then
+ dt[k] = gsub(v,old,new)
+ else
+ xmlgsub(v,old,new)
+ end
end
end
- return nil, nil, nil, nil
end
-function xml.filters.attribute(root,pattern,arguments)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end)
- local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at)
- -- return (ekat and (ekat[arguments] or ekat[gsub(arguments,"^([\"\'])(.*)%1$","%2")])) or ""
- return (ekat and (ekat[arguments] or (find(arguments,"^[\'\"]") and ekat[sub(arguments,2,-2)]))) or ""
-end
+xmlgsub = xmlgsub
-function xml.filters.text(root,pattern,arguments) -- ?? why index, tostring slow
- local dtk, rt, dt, dk = xml.filters.index(root,pattern,arguments)
- if dtk then -- n
- local dtkdt = dtk.dt
- if not dtkdt then
- return "", rt, dt, dk
- elseif #dtkdt == 1 and type(dtkdt[1]) == "string" then
- return dtkdt[1], rt, dt, dk
- else
- return xml.tostring(dtkdt), rt, dt, dk
+function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
+ if d and k then
+ local dkm = d[k-1]
+ if dkm and type(dkm) == "string" then
+ local s = match(dkm,"\n(%s+)")
+ xmlgsub(dk,"\n"..rep(" ",#s),"\n")
end
- else
- return "", rt, dt, dk
end
end
-function xml.filters.tag(root,pattern,n)
- local tag = ""
- traverse(root, lpath(pattern), function(r,d,k)
- tag = xml.functions.tag(d,k,n and tonumber(n))
- return true
- end)
- return tag
-end
+--~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
+--~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-function xml.filters.name(root,pattern,n)
- local tag = ""
- traverse(root, lpath(pattern), function(r,d,k)
- tag = xml.functions.name(d,k,n and tonumber(n))
- return true
- end)
- return tag
-end
+--~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
+--~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
+--~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
---[[ldx--
-<p>For splitting the filter function from the path specification, we can
-use string matching or lpeg matching. Here the difference in speed is
-neglectable but the lpeg variant is more robust.</p>
---ldx]]--
+local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
--- not faster but hipper ... although ... i can't get rid of the trailing / in the path
+-- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
+--
+-- 1021:0335:0287:0247
-local P, S, R, C, V, Cc = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc
+-- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
+--
+-- 1559:0257:0288:0190 (last one suggested by roberto)
-local slash = P('/')
-local name = (R("az","AZ","--","__"))^1
-local path = C(((1-slash)^0 * slash)^1)
-local argument = P { "(" * C(((1 - S("()")) + V(1))^0) * ")" }
-local action = Cc(1) * path * C(name) * argument
-local attribute = Cc(2) * path * P('@') * C(name)
-local direct = Cc(3) * Cc("../*") * slash^0 * C(name) * argument
+-- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
+-- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
+local normal = (1 - S("<&>"))^0
+local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
+local escaped = Cs(normal * (special * normal)^0)
-local parser = direct + action + attribute
+-- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-local filters = xml.filters
-local attribute_filter = xml.filters.attributes
-local default_filter = xml.filters.default
+local normal = (1 - S"&")^0
+local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
+local unescaped = Cs(normal * (special * normal)^0)
--- todo: also hash, could be gc'd
+-- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-function xml.filter(root,pattern)
- local kind, a, b, c = parser:match(pattern)
- if kind == 1 or kind == 3 then
- return (filters[b] or default_filter)(root,a,c)
- elseif kind == 2 then
- return attribute_filter(root,a,b)
- else
- return default_filter(root,pattern)
- end
-end
+local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
---~ slightly faster, but first we need a proper test file
---~
---~ local hash = { }
---~
---~ function xml.filter(root,pattern)
---~ local h = hash[pattern]
---~ if not h then
---~ local kind, a, b, c = parser:match(pattern)
---~ if kind == 1 then
---~ h = { kind, filters[b] or default_filter, a, b, c }
---~ elseif kind == 2 then
---~ h = { kind, attribute_filter, a, b, c }
---~ else
---~ h = { kind, default_filter, a, b, c }
---~ end
---~ hash[pattern] = h
---~ end
---~ local kind = h[1]
---~ if kind == 1 then
---~ return h[2](root,h[2],h[4])
---~ elseif kind == 2 then
---~ return h[2](root,h[2],h[3])
---~ else
---~ return h[2](root,pattern)
---~ end
---~ end
+xml.escaped_pattern = escaped
+xml.unescaped_pattern = unescaped
+xml.cleansed_pattern = cleansed
---[[ldx--
-<p>The following functions collect elements and texts.</p>
---ldx]]--
+function xml.escaped (str) return escaped :match(str) end
+function xml.unescaped(str) return unescaped:match(str) end
+function xml.cleansed (str) return cleansed :match(str) end
--- still somewhat bugged
-function xml.collect_elements(root, pattern, ignorespaces)
- local rr, dd = { }, { }
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d and d[k]
- if dk then
- if ignorespaces and type(dk) == "string" and dk:find("[^%S]") then
- -- ignore
- else
- local n = #rr+1
- rr[n], dd[n] = r, dk
- end
- end
- end)
- return dd, rr
-end
+end -- of closure
-function xml.collect_texts(root, pattern, flatten)
- local t = { } -- no r collector
- traverse(root, lpath(pattern), function(r,d,k)
- if d then
- local ek = d[k]
- local tx = ek and ek.dt
- if flatten then
- if tx then
- t[#t+1] = xml.tostring(tx) or ""
- else
- t[#t+1] = ""
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['lxml-aux'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- not all functions here make sense anymore vbut we keep them for
+-- compatibility reasons
+
+local xmlparseapply, xmlconvert, xmlcopy = xml.parse_apply, xml.convert, xml.copy
+
+local type = type
+local insert, remove = table.insert, table.remove
+local gmatch, gsub = string.gmatch, string.gsub
+
+local function withelements(e,handle,depth)
+ if e and handle then
+ local edt = e.dt
+ if edt then
+ depth = depth or 0
+ for i=1,#edt do
+ local e = edt[i]
+ if type(e) == "table" then
+ handle(e,depth)
+ withelements(e,handle,depth+1)
end
- else
- t[#t+1] = tx or ""
end
- else
- t[#t+1] = ""
end
- end)
- return t
+ end
end
-function xml.collect_tags(root, pattern, nonamespace)
- local t = { }
- xml.traverse(root, xml.lpath(pattern), function(r,d,k)
- local dk = d and d[k]
- if dk and type(dk) == "table" then
- local ns, tg = e.ns, e.tg
- if nonamespace then
- t[#t+1] = tg -- if needed we can return an extra table
- elseif ns == "" then
- t[#t+1] = tg
- else
- t[#t+1] = ns .. ":" .. tg
+xml.withelements = withelements
+
+function xml.withelement(e,n,handle) -- slow
+ if e and n ~= 0 and handle then
+ local edt = e.dt
+ if edt then
+ if n > 0 then
+ for i=1,#edt do
+ local ei = edt[i]
+ if type(ei) == "table" then
+ if n == 1 then
+ handle(ei)
+ return
+ else
+ n = n - 1
+ end
+ end
+ end
+ elseif n < 0 then
+ for i=#edt,1,-1 do
+ local ei = edt[i]
+ if type(ei) == "table" then
+ if n == -1 then
+ handle(ei)
+ return
+ else
+ n = n + 1
+ end
+ end
+ end
end
end
- end)
- return #t > 0 and {}
+ end
end
---[[ldx--
-<p>Often using an iterators looks nicer in the code than passing handler
-functions. The <l n='lua'/> book describes how to use coroutines for that
-purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
-code like:</p>
+xml.elements_only = xml.collected
-<typing>
-for r, d, k in xml.elements(xml.load('text.xml'),"title") do
- print(d[k])
+function xml.each_element(root, pattern, handle, reverse)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ if reverse then
+ for c=#collected,1,-1 do
+ handle(collected[c])
+ end
+ else
+ for c=1,#collected do
+ handle(collected[c])
+ end
+ end
+ return collected
+ end
end
-</typing>
-<p>Which will print all the titles in the document. The iterator variant takes
-1.5 times the runtime of the function variant which is due to the overhead in
-creating the wrapper. So, instead of:</p>
+xml.process_elements = xml.each_element
-<typing>
-function xml.filters.first(root,pattern)
- for rt,dt,dk in xml.elements(root,pattern)
- return dt and dt[dk], rt, dt, dk
+function xml.process_attributes(root, pattern, handle)
+ local collected = xmlparseapply({ root },pattern)
+ if collected and handle then
+ for c=1,#collected do
+ handle(collected[c].at)
+ end
end
- return nil, nil, nil, nil
+ return collected
end
-</typing>
-<p>We use the function variants in the filters.</p>
+--[[ldx--
+<p>The following functions collect elements and texts.</p>
--ldx]]--
-local wrap, yield = coroutine.wrap, coroutine.yield
+-- are these still needed -> lxml-cmp.lua
-function xml.elements(root,pattern,reverse)
- return wrap(function() traverse(root, lpath(pattern), yield, reverse) end)
+function xml.collect_elements(root, pattern)
+ return xmlparseapply({ root },pattern)
end
-function xml.elements_only(root,pattern,reverse)
- return wrap(function() traverse(root, lpath(pattern), function(r,d,k) yield(d[k]) end, reverse) end)
-end
-
-function xml.each_element(root, pattern, handle, reverse)
- local ok
- traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse)
- return ok
-end
-
-function xml.process_elements(root, pattern, handle)
- traverse(root, lpath(pattern), function(r,d,k)
- local dkdt = d[k].dt
- if dkdt then
- for i=1,#dkdt do
- local v = dkdt[i]
- if v.tg then handle(v) end
- end
+function xml.collect_texts(root, pattern, flatten) -- todo: variant with handle
+ local collected = xmlparseapply({ root },pattern)
+ if collected and flatten then
+ local xmltostring = xml.tostring
+ for c=1,#collected do
+ collected[c] = xmltostring(collected[c].dt)
end
- end)
+ end
+ return collected or { }
end
-function xml.process_attributes(root, pattern, handle)
- traverse(root, lpath(pattern), function(r,d,k)
- local ek = d[k]
- local a = ek.at or { }
- handle(a)
- if next(a) then -- next is faster than type (and >0 test)
- ek.at = a
- else
- ek.at = nil
+function xml.collect_tags(root, pattern, nonamespace)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collected[c]
+ local ns, tg = e.ns, e.tg
+ if nonamespace then
+ t[#t+1] = tg
+ elseif ns == "" then
+ t[#t+1] = tg
+ else
+ t[#t+1] = ns .. ":" .. tg
+ end
end
- end)
+ return t
+ end
end
--[[ldx--
<p>We've now arrives at the functions that manipulate the tree.</p>
--ldx]]--
+local no_root = { no_root = true }
+
function xml.inject_element(root, pattern, element, prepend)
if root and element then
- local matches, collect = { }, nil
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,no_root)
end
if element then
- collect = function(r,d,k) matches[#matches+1] = { r, d, k, element } end
- traverse(root, lpath(pattern), collect)
- for i=1,#matches do
- local m = matches[i]
- local r, d, k, element, edt = m[1], m[2], m[3], m[4], nil
- if element.ri then
- element = element.dt[element.ri].dt
- else
- element = element.dt
- end
- if r.ri then
- edt = r.dt[r.ri].dt
- else
- edt = d and d[k] and d[k].dt
- end
- if edt then
- local be, af
- if prepend then
- be, af = xml.copy(element), edt
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ local d = r.dt
+ local k = e.ni
+ if element.ri then
+ element = element.dt[element.ri].dt
else
- be, af = edt, xml.copy(element)
- end
- for i=1,#af do
- be[#be+1] = af[i]
+ element = element.dt
end
+ local edt
if r.ri then
- r.dt[r.ri].dt = be
+ edt = r.dt[r.ri].dt
else
- d[k].dt = be
+ edt = d and d[k] and d[k].dt
+ end
+ if edt then
+ local be, af
+ if prepend then
+ be, af = xmlcopy(element), edt
+ else
+ be, af = edt, xmlcopy(element)
+ end
+ for i=1,#af do
+ be[#be+1] = af[i]
+ end
+ if r.ri then
+ r.dt[r.ri].dt = be
+ else
+ d[k].dt = be
+ end
+ else
+ -- r.dt = element.dt -- todo
end
- else
- -- r.dt = element.dt -- todo
end
end
end
@@ -4847,32 +5757,31 @@ function xml.insert_element(root, pattern, element, before) -- todo: element als
else
local matches, collect = { }, nil
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,true)
end
if element and element.ri then
element = element.dt[element.ri]
end
if element then
- collect = function(r,d,k) matches[#matches+1] = { r, d, k, element } end
- traverse(root, lpath(pattern), collect)
- for i=#matches,1,-1 do
- local m = matches[i]
- local r, d, k, element = m[1], m[2], m[3], m[4]
- if not before then k = k + 1 end
- if element.tg then
- insert(d,k,element) -- untested
---~ elseif element.dt then
---~ for _,v in ipairs(element.dt) do -- i added
---~ insert(d,k,v)
---~ k = k + 1
---~ end
---~ end
- else
- local edt = element.dt
- if edt then
- for i=1,#edt do
- insert(d,k,edt[i])
- k = k + 1
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ local d = r.dt
+ local k = e.ni
+ if not before then
+ k = k + 1
+ end
+ if element.tg then
+ insert(d,k,element) -- untested
+ else
+ local edt = element.dt
+ if edt then
+ for i=1,#edt do
+ insert(d,k,edt[i])
+ k = k + 1
+ end
end
end
end
@@ -4888,105 +5797,114 @@ xml.inject_element_after = xml.inject_element
xml.inject_element_before = function(r,p,e) xml.inject_element(r,p,e,true) end
function xml.delete_element(root, pattern)
- local matches, deleted = { }, { }
- local collect = function(r,d,k) matches[#matches+1] = { r, d, k } end
- traverse(root, lpath(pattern), collect)
- for i=#matches,1,-1 do
- local m = matches[i]
- deleted[#deleted+1] = remove(m[2],m[3])
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ remove(e.__p__.dt,e.ni)
+ e.ni = nil
+ end
end
- return deleted
+ return collection
end
function xml.replace_element(root, pattern, element)
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,true)
end
if element and element.ri then
element = element.dt[element.ri]
end
if element then
- traverse(root, lpath(pattern), function(rm, d, k)
- d[k] = element.dt -- maybe not clever enough
- end)
- end
-end
-
-local function load_data(name) -- == io.loaddata
- local f, data = io.open(name), ""
- if f then
- data = f:read("*all",'b') -- 'b' ?
- f:close()
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ e.__p__.dt[e.ni] = element.dt -- maybe not clever enough
+ end
+ end
end
- return data
end
-function xml.include(xmldata,pattern,attribute,recursive,loaddata)
+local function include(xmldata,pattern,attribute,recursive,loaddata)
-- parse="text" (default: xml), encoding="" (todo)
-- attribute = attribute or 'href'
pattern = pattern or 'include'
- loaddata = loaddata or load_data
- local function include(r,d,k)
- local ek, name = d[k], nil
- if not attribute or attribute == "" then
+ loaddata = loaddata or io.loaddata
+ local collected = xmlparseapply({ xmldata },pattern)
+ if collected then
+ for c=1,#collected do
+ local ek = collected[c]
+ local name = nil
local ekdt = ek.dt
- name = (type(ekdt) == "table" and ekdt[1]) or ekdt
- end
- if not name then
- if ek.at then
+ local ekat = ek.at
+ local epdt = ek.__p__.dt
+ if not attribute or attribute == "" then
+ name = (type(ekdt) == "table" and ekdt[1]) or ekdt -- ckeck, probably always tab or str
+ end
+ if not name then
for a in gmatch(attribute or "href","([^|]+)") do
- name = ek.at[a]
+ name = ekat[a]
if name then break end
end
end
- end
- local data = (name and name ~= "" and loaddata(name)) or ""
- if data == "" then
- xml.empty(d,k)
- elseif ek.at["parse"] == "text" then -- for the moment hard coded
- d[k] = xml.escaped(data)
- else
- local xi = xml.convert(data)
- if not xi then
- xml.empty(d,k)
+ local data = (name and name ~= "" and loaddata(name)) or ""
+ if data == "" then
+ epdt[ek.ni] = "" -- xml.empty(d,k)
+ elseif ekat["parse"] == "text" then
+ -- for the moment hard coded
+ epdt[ek.ni] = xml.escaped(data) -- d[k] = xml.escaped(data)
else
- if recursive then
- xml.include(xi,pattern,attribute,recursive,loaddata)
+ local settings = xmldata.settings
+ settings.parent_root = xmldata -- to be tested
+ local xi = xmlconvert(data,settings)
+ if not xi then
+ epdt[ek.ni] = "" -- xml.empty(d,k)
+ else
+ if recursive then
+ include(xi,pattern,attribute,recursive,loaddata)
+ end
+ epdt[ek.ni] = xml.body(xi) -- xml.assign(d,k,xi)
end
- xml.assign(d,k,xi)
end
end
end
- xml.each_element(xmldata, pattern, include)
end
+xml.include = include
+
function xml.strip_whitespace(root, pattern, nolines) -- strips all leading and trailing space !
- traverse(root, lpath(pattern), function(r,d,k)
- local dkdt = d[k].dt
- if dkdt then -- can be optimized
- local t = { }
- for i=1,#dkdt do
- local str = dkdt[i]
- if type(str) == "string" then
- if str == "" then
- -- stripped
- else
- if nolines then
- str = gsub(str,"[ \n\r\t]+"," ")
- end
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for i=1,#collected do
+ local e = collected[i]
+ local edt = e.dt
+ if edt then
+ local t = { }
+ for i=1,#edt do
+ local str = edt[i]
+ if type(str) == "string" then
if str == "" then
-- stripped
else
- t[#t+1] = str
+ if nolines then
+ str = gsub(str,"[ \n\r\t]+"," ")
+ end
+ if str == "" then
+ -- stripped
+ else
+ t[#t+1] = str
+ end
end
+ else
+--~ str.ni = i
+ t[#t+1] = str
end
- else
- t[#t+1] = str
end
+ e.dt = t
end
- d[k].dt = t
end
- end)
+ end
end
local function rename_space(root, oldspace, newspace) -- fast variant
@@ -5011,59 +5929,49 @@ end
xml.rename_space = rename_space
function xml.remap_tag(root, pattern, newtg)
- traverse(root, lpath(pattern), function(r,d,k)
- d[k].tg = newtg
- end)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ collected[c].tg = newtg
+ end
+ end
end
+
function xml.remap_namespace(root, pattern, newns)
- traverse(root, lpath(pattern), function(r,d,k)
- d[k].ns = newns
- end)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ collected[c].ns = newns
+ end
+ end
end
+
function xml.check_namespace(root, pattern, newns)
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d[k]
- if (not dk.rn or dk.rn == "") and dk.ns == "" then
- dk.rn = newns
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ if (not e.rn or e.rn == "") and e.ns == "" then
+ e.rn = newns
+ end
end
- end)
-end
-function xml.remap_name(root, pattern, newtg, newns, newrn)
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d[k]
- dk.tg = newtg
- dk.ns = newns
- dk.rn = newrn
- end)
+ end
end
-function xml.filters.found(root,pattern,check_content)
- local found = false
- traverse(root, lpath(pattern), function(r,d,k)
- if check_content then
- local dk = d and d[k]
- found = dk and dk.dt and next(dk.dt) and true
- else
- found = true
+function xml.remap_name(root, pattern, newtg, newns, newrn)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ e.tg, e.ns, e.rn = newtg, newns, newrn
end
- return true
- end)
- return found
+ end
end
--[[ldx--
<p>Here are a few synonyms.</p>
--ldx]]--
-xml.filters.position = xml.filters.index
-
-xml.count = xml.filters.count
-xml.index = xml.filters.index
-xml.position = xml.filters.index
-xml.first = xml.filters.first
-xml.last = xml.filters.last
-xml.found = xml.filters.found
-
xml.each = xml.each_element
xml.process = xml.process_element
xml.strip = xml.strip_whitespace
@@ -5077,155 +5985,12 @@ xml.before = xml.insert_element_before
xml.delete = xml.delete_element
xml.replace = xml.replace_element
---[[ldx--
-<p>The following helper functions best belong to the <t>lmxl-ini</t>
-module. Some are here because we need then in the <t>mk</t>
-document and other manuals, others came up when playing with
-this module. Since this module is also used in <l n='mtxrun'/> we've
-put them here instead of loading mode modules there then needed.</p>
---ldx]]--
-
-function xml.gsub(t,old,new)
- local dt = t.dt
- if dt then
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "string" then
- dt[k] = gsub(v,old,new)
- else
- xml.gsub(v,old,new)
- end
- end
- end
-end
-
-function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
- if d and k and d[k-1] and type(d[k-1]) == "string" then
- local s = d[k-1]:match("\n(%s+)")
- xml.gsub(dk,"\n"..rep(" ",#s),"\n")
- end
-end
-
-function xml.serialize_path(root,lpath,handle)
- local dk, r, d, k = xml.first(root,lpath)
- dk = xml.copy(dk)
- xml.strip_leading_spaces(dk,d,k)
- xml.serialize(dk,handle)
-end
-
---~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
---~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-
---~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
---~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
---~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
-
-local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
-
--- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
---
--- 1021:0335:0287:0247
-
--- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
---
--- 1559:0257:0288:0190 (last one suggested by roberto)
-
--- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
--- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
-local normal = (1 - S("<&>"))^0
-local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
-local escaped = Cs(normal * (special * normal)^0)
-
--- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-
--- unescaped = Cs((S("&lt;")/"<" + S("&gt;")/">" + S("&amp;")/"&" + 1)^0)
--- unescaped = Cs((((P("&")/"") * (P("lt")/"<" + P("gt")/">" + P("amp")/"&") * (P(";")/"")) + 1)^0)
-local normal = (1 - S"&")^0
-local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
-local unescaped = Cs(normal * (special * normal)^0)
-
--- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-
-local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
-
-function xml.escaped (str) return escaped :match(str) end
-function xml.unescaped(str) return unescaped:match(str) end
-function xml.cleansed (str) return cleansed :match(str) end
-
-function xml.join(t,separator,lastseparator)
- if #t > 0 then
- local result = { }
- for k,v in pairs(t) do
- result[k] = xml.tostring(v)
- end
- if lastseparator then
- return concat(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result]
- else
- return concat(result,separator)
- end
- else
- return ""
- end
-end
-
-function xml.statistics()
- return {
- lpathcalls = lpathcalls,
- lpathcached = lpathcached,
- }
-end
-
--- xml.set_text_cleanup(xml.show_text_entities)
--- xml.set_text_cleanup(xml.resolve_text_entities)
-
---~ xml.lshow("/../../../a/(b|c)[@d='e']/f")
---~ xml.lshow("/../../../a/!(b|c)[@d='e']/f")
---~ xml.lshow("/../../../a/!b[@d!='e']/f")
-
---~ x = xml.convert([[
---~ <a>
---~ <b n='01'>01</b>
---~ <b n='02'>02</b>
---~ <b n='03'>03</b>
---~ <b n='04'>OK</b>
---~ <b n='05'>05</b>
---~ <b n='06'>06</b>
---~ <b n='07'>ALSO OK</b>
---~ </a>
---~ ]])
-
---~ xml.settrace("lpath",true)
-
---~ xml.xshow(xml.first(x,"b[position() > 2 and position() < 5 and text() == 'ok']"))
---~ xml.xshow(xml.first(x,"b[position() > 2 and position() < 5 and text() == upper('ok')]"))
---~ xml.xshow(xml.first(x,"b[@n=='03' or @n=='08']"))
---~ xml.xshow(xml.all (x,"b[number(@n)>2 and number(@n)<6]"))
---~ xml.xshow(xml.first(x,"b[find(text(),'ALSO')]"))
-
---~ str = [[
---~ <?xml version="1.0" encoding="utf-8"?>
---~ <story line='mojca'>
---~ <windows>my secret</mouse>
---~ </story>
---~ ]]
-
---~ x = xml.convert([[
---~ <a><b n='01'>01</b><b n='02'>02</b><x>xx</x><b n='03'>03</b><b n='04'>OK</b></a>
---~ ]])
---~ xml.xshow(xml.first(x,"b[tag(2) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(1) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(-1) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(-2) == 'x']"))
-
---~ print(xml.filter(x,"b/tag(2)"))
---~ print(xml.filter(x,"b/tag(1)"))
-
end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['lxml-ent'] = {
+if not modules then modules = { } end modules ['lxml-xml'] = {
version = 1.001,
comment = "this module is the basis for the lxml-* ones",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -5233,457 +5998,249 @@ if not modules then modules = { } end modules ['lxml-ent'] = {
license = "see context related readme files"
}
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, gsub, find = string.format, string.gsub, string.find
-local utfchar = unicode.utf8.char
-
---[[ldx--
-<p>We provide (at least here) two entity handlers. The more extensive
-resolver consults a hash first, tries to convert to <l n='utf'/> next,
-and finaly calls a handler when defines. When this all fails, the
-original entity is returned.</p>
---ldx]]--
+local finalizers = xml.finalizers.xml
+local xmlfilter = xml.filter -- we could inline this one for speed
+local xmltostring = xml.tostring
+local xmlserialize = xml.serialize
-xml.entities = xml.entities or { } -- xml.entity_handler == function
-
-function xml.entity_handler(e)
- return format("[%s]",e)
+local function first(collected)
+ return collected and collected[1]
end
-local function toutf(s)
- return utfchar(tonumber(s,16))
+local function last(collected)
+ return collected and collected[#collected]
end
-local function utfize(root)
- local d = root.dt
- for k=1,#d do
- local dk = d[k]
- if type(dk) == "string" then
- -- test prevents copying if no match
- if find(dk,"&#x.-;") then
- d[k] = gsub(dk,"&#x(.-);",toutf)
- end
- else
- utfize(dk)
- end
- end
+local function all(collected)
+ return collected
end
-xml.utfize = utfize
-
-local function resolve(e) -- hex encoded always first, just to avoid mkii fallbacks
- if find(e,"^#x") then
- return utfchar(tonumber(e:sub(3),16))
- elseif find(e,"^#") then
- return utfchar(tonumber(e:sub(2)))
- else
- local ee = xml.entities[e] -- we cannot shortcut this one (is reloaded)
- if ee then
- return ee
- else
- local h = xml.entity_handler
- return (h and h(e)) or "&" .. e .. ";"
+local function reverse(collected)
+ if collected then
+ local reversed = { }
+ for c=#collected,1,-1 do
+ reversed[#reversed+1] = collected[c]
end
+ return reversed
end
end
-local function resolve_entities(root)
- if not root.special or root.tg == "@rt@" then
- local d = root.dt
- for k=1,#d do
- local dk = d[k]
- if type(dk) == "string" then
- if find(dk,"&.-;") then
- d[k] = gsub(dk,"&(.-);",resolve)
- end
- else
- resolve_entities(dk)
- end
- end
- end
+local function attribute(collected,name)
+ local at = collected and collected[1].at
+ return at and at[name]
end
-xml.resolve_entities = resolve_entities
+local function att(id,name)
+ local at = id.at
+ return at and at[name]
+end
-function xml.utfize_text(str)
- if find(str,"&#") then
- return (gsub(str,"&#x(.-);",toutf))
- else
- return str
- end
+local function count(collected)
+ return (collected and #collected) or 0
end
-function xml.resolve_text_entities(str) -- maybe an lpeg. maybe resolve inline
- if find(str,"&") then
- return (gsub(str,"&(.-);",resolve))
- else
- return str
+local function position(collected,n)
+ if collected then
+ n = tonumber(n) or 0
+ if n < 0 then
+ return collected[#collected + n + 1]
+ else
+ return collected[n]
+ end
end
end
-function xml.show_text_entities(str)
- if find(str,"&") then
- return (gsub(str,"&(.-);","[%1]"))
- else
- return str
+local function index(collected)
+ if collected then
+ return collected[1].ni
end
end
--- experimental, this will be done differently
-
-function xml.merge_entities(root)
- local documententities = root.entities
- local allentities = xml.entities
- if documententities then
- for k, v in next, documententities do
- allentities[k] = v
+local function attributes(collected,arguments)
+ if collected then
+ local at = collected[1].at
+ if arguments then
+ return at[arguments]
+ elseif next(at) then
+ return at -- all of them
end
end
end
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
-if not modules then modules = { } end modules ['lxml-mis'] = {
- version = 1.001,
- comment = "this module is the basis for the lxml-* ones",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
-local concat = table.concat
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, gsub = string.format, string.gsub
-
---[[ldx--
-<p>The following helper functions best belong to the <t>lmxl-ini</t>
-module. Some are here because we need then in the <t>mk</t>
-document and other manuals, others came up when playing with
-this module. Since this module is also used in <l n='mtxrun'/> we've
-put them here instead of loading mode modules there then needed.</p>
---ldx]]--
-
-function xml.gsub(t,old,new)
- local dt = t.dt
- if dt then
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "string" then
- dt[k] = gsub(v,old,new)
+local function chainattribute(collected,arguments) -- todo: optional levels
+ if collected then
+ local e = collected[1]
+ while e do
+ local at = e.at
+ if at then
+ local a = at[arguments]
+ if a then
+ return a
+ end
else
- xml.gsub(v,old,new)
+ break -- error
end
+ e = e.__p__
end
end
+ return ""
end
-function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
- if d and k and d[k-1] and type(d[k-1]) == "string" then
- local s = d[k-1]:match("\n(%s+)")
- xml.gsub(dk,"\n"..string.rep(" ",#s),"\n")
+local function text(collected)
+ if collected then
+ return xmltostring(collected[1]) -- only first as we cannot concat function
+ else
+ return ""
end
end
-function xml.serialize_path(root,lpath,handle)
- local dk, r, d, k = xml.first(root,lpath)
- dk = xml.copy(dk)
- xml.strip_leading_spaces(dk,d,k)
- xml.serialize(dk,handle)
-end
-
---~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
---~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-
---~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
---~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
---~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
-
-local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
-
--- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
---
--- 1021:0335:0287:0247
-
--- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
---
--- 1559:0257:0288:0190 (last one suggested by roberto)
-
--- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
--- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
-local normal = (1 - S("<&>"))^0
-local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
-local escaped = Cs(normal * (special * normal)^0)
-
--- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-
--- unescaped = Cs((S("&lt;")/"<" + S("&gt;")/">" + S("&amp;")/"&" + 1)^0)
--- unescaped = Cs((((P("&")/"") * (P("lt")/"<" + P("gt")/">" + P("amp")/"&") * (P(";")/"")) + 1)^0)
-local normal = (1 - S"&")^0
-local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
-local unescaped = Cs(normal * (special * normal)^0)
-
--- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-
-local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
-
-xml.escaped_pattern = escaped
-xml.unescaped_pattern = unescaped
-xml.cleansed_pattern = cleansed
-
-function xml.escaped (str) return escaped :match(str) end
-function xml.unescaped(str) return unescaped:match(str) end
-function xml.cleansed (str) return cleansed :match(str) end
-
-function xml.join(t,separator,lastseparator)
- if #t > 0 then
- local result = { }
- for k,v in pairs(t) do
- result[k] = xml.tostring(v)
- end
- if lastseparator then
- return concat(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result]
- else
- return concat(result,separator)
+local function texts(collected)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collection[c]
+ if e and e.dt then
+ t[#t+1] = e.dt
+ end
end
- else
- return ""
+ return t
end
end
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
-if not modules then modules = { } end modules ['trac-tra'] = {
- version = 1.001,
- comment = "companion to trac-tra.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
--- the <anonymous> tag is kind of generic and used for functions that are not
--- bound to a variable, like node.new, node.copy etc (contrary to for instance
--- node.has_attribute which is bound to a has_attribute local variable in mkiv)
-
-debugger = debugger or { }
-
-local counters = { }
-local names = { }
-local getinfo = debug.getinfo
-local format, find, lower, gmatch = string.format, string.find, string.lower, string.gmatch
-
--- one
-
-local function hook()
- local f = getinfo(2,"f").func
- local n = getinfo(2,"Sn")
--- if n.what == "C" and n.name then print (n.namewhat .. ': ' .. n.name) end
- if f then
- local cf = counters[f]
- if cf == nil then
- counters[f] = 1
- names[f] = n
+local function tag(collected,n)
+ if collected then
+ local c
+ if n == 0 or not n then
+ c = collected[1]
+ elseif n > 1 then
+ c = collected[n]
else
- counters[f] = cf + 1
+ c = collected[#collected-n+1]
end
+ return c and c.tg
end
end
-local function getname(func)
- local n = names[func]
- if n then
- if n.what == "C" then
- return n.name or '<anonymous>'
+
+local function name(collected,n)
+ if collected then
+ local c
+ if n == 0 or not n then
+ c = collected[1]
+ elseif n > 1 then
+ c = collected[n]
else
- -- source short_src linedefined what name namewhat nups func
- local name = n.name or n.namewhat or n.what
- if not name or name == "" then name = "?" end
- return format("%s : %s : %s", n.short_src or "unknown source", n.linedefined or "--", name)
+ c = collected[#collected-n+1]
end
- else
- return "unknown"
- end
-end
-function debugger.showstats(printer,threshold)
- printer = printer or texio.write or print
- threshold = threshold or 0
- local total, grandtotal, functions = 0, 0, 0
- printer("\n") -- ugly but ok
- -- table.sort(counters)
- for func, count in pairs(counters) do
- if count > threshold then
- local name = getname(func)
- if not name:find("for generator") then
- printer(format("%8i %s", count, name))
- total = total + count
+ if c then
+ if c.ns == "" then
+ return c.tg
+ else
+ return c.ns .. ":" .. c.tg
end
end
- grandtotal = grandtotal + count
- functions = functions + 1
end
- printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
end
--- two
-
---~ local function hook()
---~ local n = getinfo(2)
---~ if n.what=="C" and not n.name then
---~ local f = tostring(debug.traceback())
---~ local cf = counters[f]
---~ if cf == nil then
---~ counters[f] = 1
---~ names[f] = n
---~ else
---~ counters[f] = cf + 1
---~ end
---~ end
---~ end
---~ function debugger.showstats(printer,threshold)
---~ printer = printer or texio.write or print
---~ threshold = threshold or 0
---~ local total, grandtotal, functions = 0, 0, 0
---~ printer("\n") -- ugly but ok
---~ -- table.sort(counters)
---~ for func, count in pairs(counters) do
---~ if count > threshold then
---~ printer(format("%8i %s", count, func))
---~ total = total + count
---~ end
---~ grandtotal = grandtotal + count
---~ functions = functions + 1
---~ end
---~ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
---~ end
-
--- rest
-
-function debugger.savestats(filename,threshold)
- local f = io.open(filename,'w')
- if f then
- debugger.showstats(function(str) f:write(str) end,threshold)
- f:close()
+local function tags(collected,nonamespace)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collected[c]
+ local ns, tg = e.ns, e.tg
+ if nonamespace or ns == "" then
+ t[#t+1] = tg
+ else
+ t[#t+1] = ns .. ":" .. tg
+ end
+ end
+ return t
end
end
-function debugger.enable()
- debug.sethook(hook,"c")
-end
-
-function debugger.disable()
- debug.sethook()
---~ counters[debug.getinfo(2,"f").func] = nil
-end
-
-function debugger.tracing()
- local n = tonumber(os.env['MTX.TRACE.CALLS']) or tonumber(os.env['MTX_TRACE_CALLS']) or 0
- if n > 0 then
- function debugger.tracing() return true end ; return true
- else
- function debugger.tracing() return false end ; return false
+local function empty(collected)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ if e then
+ local edt = e.dt
+ if edt then
+ local n = #edt
+ if n == 1 then
+ local edk = edt[1]
+ local typ = type(edk)
+ if typ == "table" then
+ return false
+ elseif edk ~= "" then -- maybe an extra tester for spacing only
+ return false
+ end
+ elseif n > 1 then
+ return false
+ end
+ end
+ end
+ end
end
+ return true
end
---~ debugger.enable()
-
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
-
---~ debugger.disable()
-
---~ print("")
---~ debugger.showstats()
---~ print("")
---~ debugger.showstats(print,3)
+finalizers.first = first
+finalizers.last = last
+finalizers.all = all
+finalizers.reverse = reverse
+finalizers.elements = all
+finalizers.default = all
+finalizers.attribute = attribute
+finalizers.att = att
+finalizers.count = count
+finalizers.position = position
+finalizers.index = index
+finalizers.attributes = attributes
+finalizers.chainattribute = chainattribute
+finalizers.text = text
+finalizers.texts = texts
+finalizers.tag = tag
+finalizers.name = name
+finalizers.tags = tags
+finalizers.empty = empty
-trackers = trackers or { }
+-- shortcuts -- we could support xmlfilter(id,pattern,first)
-local data, done = { }, { }
+function xml.first(id,pattern)
+ return first(xmlfilter(id,pattern))
+end
-local function set(what,value)
- if type(what) == "string" then
- what = aux.settings_to_array(what)
- end
- for i=1,#what do
- local w = what[i]
- for d, f in next, data do
- if done[d] then
- -- prevent recursion due to wildcards
- elseif find(d,w) then
- done[d] = true
- for i=1,#f do
- f[i](value)
- end
- end
- end
- end
+function xml.last(id,pattern)
+ return last(xmlfilter(id,pattern))
end
-local function reset()
- for d, f in next, data do
- for i=1,#f do
- f[i](false)
- end
- end
+function xml.count(id,pattern)
+ return count(xmlfilter(id,pattern))
end
-function trackers.register(what,...)
- what = lower(what)
- local w = data[what]
- if not w then
- w = { }
- data[what] = w
- end
- for _, fnc in next, { ... } do
- local typ = type(fnc)
- if typ == "function" then
- w[#w+1] = fnc
- elseif typ == "string" then
- w[#w+1] = function(value) set(fnc,value,nesting) end
- end
- end
+function xml.attribute(id,pattern,a,default)
+ return attribute(xmlfilter(id,pattern),a,default)
end
-function trackers.enable(what)
- done = { }
- set(what,true)
+function xml.text(id,pattern)
+ return text(xmlfilter(id,pattern))
end
-function trackers.disable(what)
- done = { }
- if not what or what == "" then
- trackers.reset(what)
- else
- set(what,false)
- end
+function xml.raw(id,pattern)
+ return xmlserialize(xmlfilter(id,pattern))
end
-function trackers.reset(what)
- done = { }
- reset()
+function xml.position(id,pattern,n)
+ return position(xmlfilter(id,pattern),n)
end
-function trackers.list() -- pattern
- local list = table.sortedkeys(data)
- local user, system = { }, { }
- for l=1,#list do
- local what = list[l]
- if find(what,"^%*") then
- system[#system+1] = what
- else
- user[#user+1] = what
- end
- end
- return user, system
+function xml.empty(id,pattern)
+ return empty(xmlfilter(id,pattern))
end
+xml.all = xml.filter
+xml.index = xml.position
+xml.found = xml.filter
+
end -- of closure
@@ -6135,6 +6692,7 @@ function statistics.timed(action,report)
end
+
end -- of closure
do -- create closure to overcome 200 locals limit
@@ -9814,11 +10372,13 @@ own.libs = { -- todo: check which ones are really needed
'l-utils.lua',
'l-aux.lua',
-- 'l-xml.lua',
+ 'trac-tra.lua',
'lxml-tab.lua',
- 'lxml-pth.lua',
+ 'lxml-lpt.lua',
'lxml-ent.lua',
'lxml-mis.lua',
- 'trac-tra.lua',
+ 'lxml-aux.lua',
+ 'lxml-xml.lua',
'luat-env.lua',
'trac-inf.lua',
'trac-log.lua',
@@ -9889,7 +10449,7 @@ if not resolvers then
os.exit()
end
-logs.setprogram('MTXrun',"TDS Runner Tool 1.22",environment.arguments["verbose"] or false)
+logs.setprogram('MTXrun',"TDS Runner Tool 1.23",environment.arguments["verbose"] or false)
local instance = resolvers.reset()
diff --git a/scripts/context/stubs/mswin/luatools.lua b/scripts/context/stubs/mswin/luatools.lua
index a8cfbd5b0..2bc943210 100644
--- a/scripts/context/stubs/mswin/luatools.lua
+++ b/scripts/context/stubs/mswin/luatools.lua
@@ -230,6 +230,16 @@ function string:pattesc()
return (gsub(self,".",patterns_escapes))
end
+local simple_escapes = {
+ ["-"] = "%-",
+ ["."] = "%.",
+ ["*"] = ".*",
+}
+
+function string:simpleesc()
+ return (gsub(self,".",simple_escapes))
+end
+
function string:tohash()
local t = { }
for s in gmatch(self,"([^, ]+)") do -- lpeg
@@ -279,6 +289,12 @@ function string:compactlong() -- strips newlines and leading spaces
return self
end
+function string:striplong() -- strips newlines and leading spaces
+ self = gsub(self,"^%s*","")
+ self = gsub(self,"[\n\r]+ *","\n")
+ return self
+end
+
end -- of closure
@@ -387,6 +403,18 @@ function string:split(separator)
return c:match(self)
end
+--~ function lpeg.L(list,pp)
+--~ local p = pp
+--~ for l=1,#list do
+--~ if p then
+--~ p = p + lpeg.P(list[l])
+--~ else
+--~ p = lpeg.P(list[l])
+--~ end
+--~ end
+--~ return p
+--~ end
+
end -- of closure
@@ -420,6 +448,14 @@ function table.strip(tab)
return lst
end
+function table.keys(t)
+ local k = { }
+ for key,_ in next, t do
+ k[#k+1] = key
+ end
+ return k
+end
+
local function compare(a,b)
return (tostring(a) < tostring(b))
end
@@ -1192,21 +1228,35 @@ function table.reverse(t)
return tt
end
---~ function table.keys(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return k
---~ end
+function table.insert_before_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i,extra)
+ return
+ end
+ end
+ insert(t,1,extra)
+end
---~ function table.keys_as_string(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return concat(k,"")
---~ end
+function table.insert_after_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i+1,extra)
+ return
+ end
+ end
+ insert(t,#t+1,extra)
+end
end -- of closure
@@ -1413,7 +1463,7 @@ if not modules then modules = { } end modules ['l-number'] = {
license = "see context related readme files"
}
-local format = string.format
+local format, foor, insert = string.format, math.floor, table.insert
number = number or { }
@@ -1449,7 +1499,18 @@ function number.toset(n)
return one:match(tostring(n))
end
-
+function number.bits(n,zero)
+ local t, i = { }, (zero and 0) or 1
+ while n > 0 do
+ local m = n % 2
+ if m > 0 then
+ insert(t,1,i)
+ end
+ n = floor(n/2)
+ i = i + 1
+ end
+ return t
+end
end -- of closure
@@ -1914,11 +1975,11 @@ local rootbased = lpeg.P("/") + letter*lpeg.P(":")
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return qualified:match(filename)
+ return qualified:match(filename) ~= nil
end
function file.is_rootbased_path(filename)
- return rootbased:match(filename)
+ return rootbased:match(filename) ~= nil
end
local slash = lpeg.S("\\/")
@@ -3134,6 +3195,24 @@ function aux.accesstable(target)
return t
end
+-- as we use this a lot ...
+
+--~ function aux.cachefunction(action,weak)
+--~ local cache = { }
+--~ if weak then
+--~ setmetatable(cache, { __mode = "kv" } )
+--~ end
+--~ local function reminder(str)
+--~ local found = cache[str]
+--~ if not found then
+--~ found = action(str)
+--~ cache[str] = found
+--~ end
+--~ return found
+--~ end
+--~ return reminder, cache
+--~ end
+
end -- of closure
@@ -3156,7 +3235,7 @@ debugger = debugger or { }
local counters = { }
local names = { }
local getinfo = debug.getinfo
-local format, find, lower, gmatch = string.format, string.find, string.lower, string.gmatch
+local format, find, lower, gmatch, gsub = string.format, string.find, string.lower, string.gmatch, string.gsub
-- one
@@ -3290,7 +3369,7 @@ local data, done = { }, { }
local function set(what,value)
if type(what) == "string" then
- what = aux.settings_to_array(what)
+ what = aux.settings_to_array(what) -- inefficient but ok
end
for i=1,#what do
local w = what[i]
@@ -3315,6 +3394,19 @@ local function reset()
end
end
+local function enable(what)
+ set(what,true)
+end
+
+local function disable(what)
+ if not what or what == "" then
+ done = { }
+ reset()
+ else
+ set(what,false)
+ end
+end
+
function trackers.register(what,...)
what = lower(what)
local w = data[what]
@@ -3333,20 +3425,20 @@ function trackers.register(what,...)
end
function trackers.enable(what)
- done = { }
- set(what,true)
+ local e = trackers.enable
+ trackers.enable, done = enable, { }
+ enable(string.simpleesc(what))
+ trackers.enable, done = e, { }
end
function trackers.disable(what)
- done = { }
- if not what or what == "" then
- trackers.reset(what)
- else
- set(what,false)
- end
+ local e = trackers.disable
+ trackers.disable, done = disable, { }
+ disable(string.simpleesc(what))
+ trackers.disable, done = e, { }
end
-function trackers.reset(what)
+function trackers.reset()
done = { }
reset()
end
@@ -3423,7 +3515,7 @@ function environment.initialize_arguments(arg)
environment.arguments, environment.files, environment.sortedflags = arguments, files, nil
for index, argument in pairs(arg) do
if index > 0 then
- local flag, value = argument:match("^%-+(.+)=(.-)$")
+ local flag, value = argument:match("^%-+(.-)=(.-)$")
if flag then
arguments[flag] = string.unquote(value or "")
else
diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua
index 865994073..8bc88c900 100644
--- a/scripts/context/stubs/mswin/mtxrun.lua
+++ b/scripts/context/stubs/mswin/mtxrun.lua
@@ -239,6 +239,16 @@ function string:pattesc()
return (gsub(self,".",patterns_escapes))
end
+local simple_escapes = {
+ ["-"] = "%-",
+ ["."] = "%.",
+ ["*"] = ".*",
+}
+
+function string:simpleesc()
+ return (gsub(self,".",simple_escapes))
+end
+
function string:tohash()
local t = { }
for s in gmatch(self,"([^, ]+)") do -- lpeg
@@ -288,6 +298,12 @@ function string:compactlong() -- strips newlines and leading spaces
return self
end
+function string:striplong() -- strips newlines and leading spaces
+ self = gsub(self,"^%s*","")
+ self = gsub(self,"[\n\r]+ *","\n")
+ return self
+end
+
end -- of closure
@@ -396,6 +412,18 @@ function string:split(separator)
return c:match(self)
end
+--~ function lpeg.L(list,pp)
+--~ local p = pp
+--~ for l=1,#list do
+--~ if p then
+--~ p = p + lpeg.P(list[l])
+--~ else
+--~ p = lpeg.P(list[l])
+--~ end
+--~ end
+--~ return p
+--~ end
+
end -- of closure
@@ -429,6 +457,14 @@ function table.strip(tab)
return lst
end
+function table.keys(t)
+ local k = { }
+ for key,_ in next, t do
+ k[#k+1] = key
+ end
+ return k
+end
+
local function compare(a,b)
return (tostring(a) < tostring(b))
end
@@ -1009,7 +1045,7 @@ function table.tofile(filename,root,name,reduce,noquotes,hexify)
end
end
-local function flatten(t,f,complete)
+local function flatten(t,f,complete) -- is this used? meybe a variant with next, ...
for i=1,#t do
local v = t[i]
if type(v) == "table" then
@@ -1038,6 +1074,24 @@ end
table.flatten_one_level = table.unnest
+-- a better one:
+
+local function flattened(t,f)
+ if not f then
+ f = { }
+ end
+ for k, v in next, t do
+ if type(v) == "table" then
+ flattened(v,f)
+ else
+ f[k] = v
+ end
+ end
+ return f
+end
+
+table.flattened = flattened
+
-- the next three may disappear
function table.remove_value(t,value) -- todo: n
@@ -1201,21 +1255,35 @@ function table.reverse(t)
return tt
end
---~ function table.keys(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return k
---~ end
+function table.insert_before_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i,extra)
+ return
+ end
+ end
+ insert(t,1,extra)
+end
---~ function table.keys_as_string(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return concat(k,"")
---~ end
+function table.insert_after_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i+1,extra)
+ return
+ end
+ end
+ insert(t,#t+1,extra)
+end
end -- of closure
@@ -1422,7 +1490,7 @@ if not modules then modules = { } end modules ['l-number'] = {
license = "see context related readme files"
}
-local format = string.format
+local format, foor, insert = string.format, math.floor, table.insert
number = number or { }
@@ -1458,7 +1526,18 @@ function number.toset(n)
return one:match(tostring(n))
end
-
+function number.bits(n,zero)
+ local t, i = { }, (zero and 0) or 1
+ while n > 0 do
+ local m = n % 2
+ if m > 0 then
+ insert(t,1,i)
+ end
+ n = floor(n/2)
+ i = i + 1
+ end
+ return t
+end
end -- of closure
@@ -1923,11 +2002,11 @@ local rootbased = lpeg.P("/") + letter*lpeg.P(":")
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return qualified:match(filename)
+ return qualified:match(filename) ~= nil
end
function file.is_rootbased_path(filename)
- return rootbased:match(filename)
+ return rootbased:match(filename) ~= nil
end
local slash = lpeg.S("\\/")
@@ -2854,6 +2933,406 @@ function aux.accesstable(target)
return t
end
+-- as we use this a lot ...
+
+--~ function aux.cachefunction(action,weak)
+--~ local cache = { }
+--~ if weak then
+--~ setmetatable(cache, { __mode = "kv" } )
+--~ end
+--~ local function reminder(str)
+--~ local found = cache[str]
+--~ if not found then
+--~ found = action(str)
+--~ cache[str] = found
+--~ end
+--~ return found
+--~ end
+--~ return reminder, cache
+--~ end
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['trac-tra'] = {
+ version = 1.001,
+ comment = "companion to trac-tra.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- the <anonymous> tag is kind of generic and used for functions that are not
+-- bound to a variable, like node.new, node.copy etc (contrary to for instance
+-- node.has_attribute which is bound to a has_attribute local variable in mkiv)
+
+local getinfo = debug.getinfo
+local type, next = type, next
+local concat = table.concat
+local format, find, lower, gmatch, gsub = string.format, string.find, string.lower, string.gmatch, string.gsub
+
+debugger = debugger or { }
+
+local counters = { }
+local names = { }
+
+-- one
+
+local function hook()
+ local f = getinfo(2,"f").func
+ local n = getinfo(2,"Sn")
+-- if n.what == "C" and n.name then print (n.namewhat .. ': ' .. n.name) end
+ if f then
+ local cf = counters[f]
+ if cf == nil then
+ counters[f] = 1
+ names[f] = n
+ else
+ counters[f] = cf + 1
+ end
+ end
+end
+local function getname(func)
+ local n = names[func]
+ if n then
+ if n.what == "C" then
+ return n.name or '<anonymous>'
+ else
+ -- source short_src linedefined what name namewhat nups func
+ local name = n.name or n.namewhat or n.what
+ if not name or name == "" then name = "?" end
+ return format("%s : %s : %s", n.short_src or "unknown source", n.linedefined or "--", name)
+ end
+ else
+ return "unknown"
+ end
+end
+function debugger.showstats(printer,threshold)
+ printer = printer or texio.write or print
+ threshold = threshold or 0
+ local total, grandtotal, functions = 0, 0, 0
+ printer("\n") -- ugly but ok
+ -- table.sort(counters)
+ for func, count in pairs(counters) do
+ if count > threshold then
+ local name = getname(func)
+ if not name:find("for generator") then
+ printer(format("%8i %s", count, name))
+ total = total + count
+ end
+ end
+ grandtotal = grandtotal + count
+ functions = functions + 1
+ end
+ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
+end
+
+-- two
+
+--~ local function hook()
+--~ local n = getinfo(2)
+--~ if n.what=="C" and not n.name then
+--~ local f = tostring(debug.traceback())
+--~ local cf = counters[f]
+--~ if cf == nil then
+--~ counters[f] = 1
+--~ names[f] = n
+--~ else
+--~ counters[f] = cf + 1
+--~ end
+--~ end
+--~ end
+--~ function debugger.showstats(printer,threshold)
+--~ printer = printer or texio.write or print
+--~ threshold = threshold or 0
+--~ local total, grandtotal, functions = 0, 0, 0
+--~ printer("\n") -- ugly but ok
+--~ -- table.sort(counters)
+--~ for func, count in pairs(counters) do
+--~ if count > threshold then
+--~ printer(format("%8i %s", count, func))
+--~ total = total + count
+--~ end
+--~ grandtotal = grandtotal + count
+--~ functions = functions + 1
+--~ end
+--~ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
+--~ end
+
+-- rest
+
+function debugger.savestats(filename,threshold)
+ local f = io.open(filename,'w')
+ if f then
+ debugger.showstats(function(str) f:write(str) end,threshold)
+ f:close()
+ end
+end
+
+function debugger.enable()
+ debug.sethook(hook,"c")
+end
+
+function debugger.disable()
+ debug.sethook()
+--~ counters[debug.getinfo(2,"f").func] = nil
+end
+
+function debugger.tracing()
+ local n = tonumber(os.env['MTX.TRACE.CALLS']) or tonumber(os.env['MTX_TRACE_CALLS']) or 0
+ if n > 0 then
+ function debugger.tracing() return true end ; return true
+ else
+ function debugger.tracing() return false end ; return false
+ end
+end
+
+--~ debugger.enable()
+
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+
+--~ debugger.disable()
+
+--~ print("")
+--~ debugger.showstats()
+--~ print("")
+--~ debugger.showstats(print,3)
+
+setters = setters or { }
+setters.data = setters.data or { }
+
+local function set(t,what,value)
+ local data, done = t.data, t.done
+ if type(what) == "string" then
+ what = aux.settings_to_array(what) -- inefficient but ok
+ end
+ for i=1,#what do
+ local w = what[i]
+ for d, f in next, data do
+ if done[d] then
+ -- prevent recursion due to wildcards
+ elseif find(d,w) then
+ done[d] = true
+ for i=1,#f do
+ f[i](value)
+ end
+ end
+ end
+ end
+end
+
+local function reset(t)
+ for d, f in next, t.data do
+ for i=1,#f do
+ f[i](false)
+ end
+ end
+end
+
+local function enable(t,what)
+ set(t,what,true)
+end
+
+local function disable(t,what)
+ local data = t.data
+ if not what or what == "" then
+ t.done = { }
+ reset(t)
+ else
+ set(t,what,false)
+ end
+end
+
+function setters.register(t,what,...)
+ local data = t.data
+ what = lower(what)
+ local w = data[what]
+ if not w then
+ w = { }
+ data[what] = w
+ end
+ for _, fnc in next, { ... } do
+ local typ = type(fnc)
+ if typ == "function" then
+ w[#w+1] = fnc
+ elseif typ == "string" then
+ w[#w+1] = function(value) set(t,fnc,value,nesting) end
+ end
+ end
+end
+
+function setters.enable(t,what)
+ local e = t.enable
+ t.enable, t.done = enable, { }
+ enable(t,string.simpleesc(what))
+ t.enable, t.done = e, { }
+end
+
+function setters.disable(t,what)
+ local e = t.disable
+ t.disable, t.done = disable, { }
+ disable(t,string.simpleesc(what))
+ t.disable, t.done = e, { }
+end
+
+function setters.reset(t)
+ t.done = { }
+ reset(t)
+end
+
+function setters.list(t) -- pattern
+ local list = table.sortedkeys(t.data)
+ local user, system = { }, { }
+ for l=1,#list do
+ local what = list[l]
+ if find(what,"^%*") then
+ system[#system+1] = what
+ else
+ user[#user+1] = what
+ end
+ end
+ return user, system
+end
+
+function setters.show(t)
+ commands.writestatus("","")
+ for k,v in ipairs(setters.list(t)) do
+ commands.writestatus(t.name,v)
+ end
+ commands.writestatus("","")
+end
+
+-- we could have used a bit of oo and the trackers:enable syntax but
+-- there is already a lot of code around using the singluar tracker
+
+function setters.new(name)
+ local t
+ t = {
+ data = { },
+ name = name,
+ enable = function(...) setters.enable (t,...) end,
+ disable = function(...) setters.disable (t,...) end,
+ register = function(...) setters.register(t,...) end,
+ list = function(...) setters.list (t,...) end,
+ show = function(...) setters.show (t,...) end,
+ }
+ setters.data[name] = t
+ return t
+end
+
+trackers = setters.new("trackers")
+directives = setters.new("directives")
+
+-- nice trick: we overload two of the directives related functions with variants that
+-- do tracing (itself using a tracker) .. proof of concept
+
+local trace_directives = false local trace_directives = false trackers.register("system.directives", function(v) trace_directives = v end)
+
+local e = directives.enable
+local d = directives.disable
+
+function directives.enable(...)
+ commands.writestatus("directives","enabling: %s",concat({...}," "))
+ e(...)
+end
+
+function directives.disable(...)
+ commands.writestatus("directives","disabling: %s",concat({...}," "))
+ d(...)
+end
+
+--~ -- old code:
+--
+--~ trackers = trackers or { }
+--~ local data, done = { }, { }
+--~ local function set(what,value)
+--~ if type(what) == "string" then
+--~ what = aux.settings_to_array(what) -- inefficient but ok
+--~ end
+--~ for i=1,#what do
+--~ local w = what[i]
+--~ for d, f in next, data do
+--~ if done[d] then
+--~ -- prevent recursion due to wildcards
+--~ elseif find(d,w) then
+--~ done[d] = true
+--~ for i=1,#f do
+--~ f[i](value)
+--~ end
+--~ end
+--~ end
+--~ end
+--~ end
+--~ local function reset()
+--~ for d, f in next, data do
+--~ for i=1,#f do
+--~ f[i](false)
+--~ end
+--~ end
+--~ end
+--~ local function enable(what)
+--~ set(what,true)
+--~ end
+--~ local function disable(what)
+--~ if not what or what == "" then
+--~ done = { }
+--~ reset()
+--~ else
+--~ set(what,false)
+--~ end
+--~ end
+--~ function trackers.register(what,...)
+--~ what = lower(what)
+--~ local w = data[what]
+--~ if not w then
+--~ w = { }
+--~ data[what] = w
+--~ end
+--~ for _, fnc in next, { ... } do
+--~ local typ = type(fnc)
+--~ if typ == "function" then
+--~ w[#w+1] = fnc
+--~ elseif typ == "string" then
+--~ w[#w+1] = function(value) set(fnc,value,nesting) end
+--~ end
+--~ end
+--~ end
+--~ function trackers.enable(what)
+--~ local e = trackers.enable
+--~ trackers.enable, done = enable, { }
+--~ enable(string.simpleesc(what))
+--~ trackers.enable, done = e, { }
+--~ end
+--~ function trackers.disable(what)
+--~ local e = trackers.disable
+--~ trackers.disable, done = disable, { }
+--~ disable(string.simpleesc(what))
+--~ trackers.disable, done = e, { }
+--~ end
+--~ function trackers.reset()
+--~ done = { }
+--~ reset()
+--~ end
+--~ function trackers.list() -- pattern
+--~ local list = table.sortedkeys(data)
+--~ local user, system = { }, { }
+--~ for l=1,#list do
+--~ local what = list[l]
+--~ if find(what,"^%*") then
+--~ system[#system+1] = what
+--~ else
+--~ user[#user+1] = what
+--~ end
+--~ end
+--~ return user, system
+--~ end
+
end -- of closure
@@ -2867,6 +3346,12 @@ if not modules then modules = { } end modules ['lxml-tab'] = {
license = "see context related readme files"
}
+-- this module needs a cleanup: check latest lpeg, passing args, (sub)grammar, etc etc
+-- stripping spaces from e.g. cont-en.xml saves .2 sec runtime so it's not worth the
+-- trouble
+
+local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end)
+
--[[ldx--
<p>The parser used here is inspired by the variant discussed in the lua book, but
handles comment and processing instructions, has a different structure, provides
@@ -2874,18 +3359,6 @@ parent access; a first version used different trickery but was less optimized to
went this route. First we had a find based parser, now we have an <l n='lpeg'/> based one.
The find based parser can be found in l-xml-edu.lua along with other older code.</p>
-<p>Expecially the lpath code is experimental, we will support some of xpath, but
-only things that make sense for us; as compensation it is possible to hook in your
-own functions. Apart from preprocessing content for <l n='context'/> we also need
-this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
-files.</p>
-
-<typing>
-a/b/c /*/c
-a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
-a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
-</typing>
-
<p>Beware, the interface may change. For instance at, ns, tg, dt may get more
verbose names. Once the code is stable we will also remove some tracing and
optimize the code.</p>
@@ -2896,26 +3369,9 @@ xml = xml or { }
--~ local xml = xml
local concat, remove, insert = table.concat, table.remove, table.insert
-local type, next, setmetatable = type, next, setmetatable
+local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber
local format, lower, find = string.format, string.lower, string.find
-
---[[ldx--
-<p>This module can be used stand alone but also inside <l n='mkiv'/> in
-which case it hooks into the tracker code. Therefore we provide a few
-functions that set the tracers.</p>
---ldx]]--
-
-local trace_remap = false
-
-if trackers then
- trackers.register("xml.remap", function(v) trace_remap = v end)
-end
-
-function xml.settrace(str,value)
- if str == "remap" then
- trace_remap = value or false
- end
-end
+local utfchar = unicode.utf8.char
--[[ldx--
<p>First a hack to enable namespace resolving. A namespace is characterized by
@@ -3022,25 +3478,25 @@ element.</p>
</typing>
--ldx]]--
-xml.strip_cm_and_dt = false -- an extra global flag, in case we have many includes
-
-- not just one big nested table capture (lpeg overflow)
local nsremap, resolvens = xml.xmlns, xml.resolvens
local stack, top, dt, at, xmlns, errorstr, entities = {}, {}, {}, {}, {}, nil, {}
+local strip, cleanup, utfize, resolve = false, false, false, false
-local mt = { __tostring = xml.text }
+local mt = { }
-function xml.check_error(top,toclose)
- return ""
+function initialize_mt(root) -- we will make a xml.new that then sets the mt as field
+ mt = { __tostring = xml.text, __index = root }
end
-local strip = false
-local cleanup = false
+function xml.setproperty(root,k,v)
+ getmetatable(root).__index[k] = v
+end
-function xml.set_text_cleanup(fnc)
- cleanup = fnc
+function xml.check_error(top,toclose)
+ return ""
end
local function add_attribute(namespace,tag,value)
@@ -3058,6 +3514,22 @@ local function add_attribute(namespace,tag,value)
end
end
+local function add_empty(spacing, namespace, tag)
+ if #spacing > 0 then
+ dt[#dt+1] = spacing
+ end
+ local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
+ top = stack[#stack]
+ dt = top.dt
+ local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top }
+ dt[#dt+1] = t
+ setmetatable(t, mt)
+ if at.xmlns then
+ remove(xmlns)
+ end
+ at = { }
+end
+
local function add_begin(spacing, namespace, tag)
if #spacing > 0 then
dt[#dt+1] = spacing
@@ -3083,28 +3555,12 @@ local function add_end(spacing, namespace, tag)
end
dt = top.dt
dt[#dt+1] = toclose
- dt[0] = top
+ -- dt[0] = top -- nasty circular reference when serializing table
if toclose.at.xmlns then
remove(xmlns)
end
end
-local function add_empty(spacing, namespace, tag)
- if #spacing > 0 then
- dt[#dt+1] = spacing
- end
- local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
- top = stack[#stack]
- dt = top.dt
- local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top }
- dt[#dt+1] = t
- setmetatable(t, mt)
- if at.xmlns then
- remove(xmlns)
- end
- at = { }
-end
-
local function add_text(text)
if cleanup and #text > 0 then
dt[#dt+1] = cleanup(text)
@@ -3128,7 +3584,109 @@ local function set_message(txt)
errorstr = "garbage at the end of the file: " .. gsub(txt,"([ \n\r\t]*)","")
end
-local P, S, R, C, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V
+local reported_attribute_errors = { }
+
+local function attribute_value_error(str)
+ if not reported_attribute_errors[str] then
+ logs.report("xml","invalid attribute value: %q",str)
+ reported_attribute_errors[str] = true
+ at._error_ = str
+ end
+ return str
+end
+local function attribute_specification_error(str)
+ if not reported_attribute_errors[str] then
+ logs.report("xml","invalid attribute specification: %q",str)
+ reported_attribute_errors[str] = true
+ at._error_ = str
+ end
+ return str
+end
+
+local dcache, hcache, acache = { }, { }, { }
+
+function xml.unknown_dec_entity_format(str) return format("&%s;", str) end
+function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end
+function xml.unknown_any_entity_format(str) return format("&%s;", str) end
+
+local function handle_hex_entity(str)
+ local h = hcache[str]
+ if not h then
+ if utfize then
+ local n = tonumber(str,16)
+ h = (n and utfchar(n)) or xml.unknown_hex_entity_format(str) or ""
+ if not n then
+ logs.report("xml","utfize, ignoring hex entity &#x%s;",str)
+ elseif trace_entities then
+ logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,c)
+ end
+ else
+ if trace_entities then
+ logs.report("xml","found entity &#x%s;",str)
+ end
+ h = "&#" .. str .. ";"
+ end
+ hcache[str] = h
+ end
+ return h
+end
+local function handle_dec_entity(str)
+ local d = dcache[str]
+ if not d then
+ if utfize then
+ local n = tonumber(str)
+ d = (n and utfchar(n)) or xml.unknown_dec_entity_format(str) or ""
+ if not n then
+ logs.report("xml","utfize, ignoring dec entity &#%s;",str)
+ elseif trace_entities then
+ logs.report("xml","utfize, converting dec entity &#%s; into %s",str,c)
+ end
+ else
+ if trace_entities then
+ logs.report("xml","found entity &#%s;",str)
+ end
+ d = "&" .. str .. ";"
+ end
+ dcache[str] = d
+ end
+ return d
+end
+local function handle_any_entity(str)
+ if resolve then
+ local a = entities[str] -- per instance !
+ if not a then
+ a = acache[str]
+ if not a then
+ if trace_entities then
+ logs.report("xml","ignoring entity &%s;",str)
+ else
+ -- can be defined in a global mapper and intercepted elsewhere
+ -- as happens in lxml-tex.lua
+ end
+ a = xml.unknown_any_entity_format(str) or ""
+ acache[str] = a
+ end
+ elseif trace_entities then
+ if not acache[str] then
+ logs.report("xml","converting entity &%s; into %s",str,r)
+ acache[str] = a
+ end
+ end
+ return a
+ else
+ local a = acache[str]
+ if not a then
+ if trace_entities then
+ logs.report("xml","found entity &%s;",str)
+ end
+ a = "&" .. str .. ";"
+ acache[str] = a
+ end
+ return a
+ end
+end
+
+local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs
local space = S(' \r\n\t')
local open = P('<')
@@ -3138,6 +3696,8 @@ local dquote = S('"')
local equal = P('=')
local slash = P('/')
local colon = P(':')
+local semicolon = P(';')
+local ampersand = P('&')
local valid = R('az', 'AZ', '09') + S('_-.')
local name_yes = C(valid^1) * colon * C(valid^1)
local name_nop = C(P(true)) * C(valid^1)
@@ -3147,15 +3707,36 @@ local utfbom = P('\000\000\254\255') + P('\255\254\000\000') +
P('\255\254') + P('\254\255') + P('\239\187\191') -- no capture
local spacing = C(space^0)
-local justtext = C((1-open)^1)
+
+local entitycontent = (1-open-semicolon)^0
+local entity = ampersand/"" * (
+ P("#")/"" * (
+ P("x")/"" * (entitycontent/handle_hex_entity) +
+ (entitycontent/handle_dec_entity)
+ ) + (entitycontent/handle_any_entity)
+ ) * (semicolon/"")
+
+local text_unparsed = C((1-open)^1)
+local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1)
+
local somespace = space^1
local optionalspace = space^0
-local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote)
-local attribute = (somespace * name * optionalspace * equal * optionalspace * value) / add_attribute
-local attributes = attribute^0
+local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote) -- ampersand and < also invalid in value
+
+local whatever = space * name * optionalspace * equal
+local wrongvalue = C(P(1-whatever-close)^1 + P(1-close)^1) / attribute_value_error
+
+local attributevalue = value + wrongvalue
+
+local attribute = (somespace * name * optionalspace * equal * optionalspace * attributevalue) / add_attribute
+----- attributes = (attribute)^0
+
+local endofattributes = slash * close + close -- recovery of flacky html
+local attributes = (attribute + somespace^-1 * (((1-endofattributes)^1)/attribute_specification_error))^0
-local text = justtext / add_text
+local parsedtext = text_parsed / add_text
+local unparsedtext = text_unparsed / add_text
local balanced = P { "[" * ((1 - S"[]") + V(1))^0 * "]" } -- taken from lpeg manual, () example
local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty
@@ -3208,42 +3789,72 @@ local doctype = (spacing * begindoctype * somedoctype * enddoct
-- local cdata = (lpeg.Cc("@cd@") * spacing * begincdata * somecdata * endcdata ) / add_special
-- local doctype = (lpeg.Cc("@dt@") * spacing * begindoctype * somedoctype * enddoctype ) / add_special
-local trailer = space^0 * (justtext/set_message)^0
+local trailer = space^0 * (text_unparsed/set_message)^0
-- comment + emptyelement + text + cdata + instruction + V("parent"), -- 6.5 seconds on 40 MB database file
-- text + comment + emptyelement + cdata + instruction + V("parent"), -- 5.8
-- text + V("parent") + emptyelement + comment + cdata + instruction, -- 5.5
-local grammar = P { "preamble",
+local grammar_parsed_text = P { "preamble",
preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer,
parent = beginelement * V("children")^0 * endelement,
- children = text + V("parent") + emptyelement + comment + cdata + instruction,
+ children = parsedtext + V("parent") + emptyelement + comment + cdata + instruction,
}
--- todo: xml.new + properties like entities and strip and such (store in root)
+local grammar_unparsed_text = P { "preamble",
+ preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer,
+ parent = beginelement * V("children")^0 * endelement,
+ children = unparsedtext + V("parent") + emptyelement + comment + cdata + instruction,
+}
-function xml.convert(data, no_root, strip_cm_and_dt, given_entities) -- maybe use table met k/v (given_entities may disapear)
- strip = strip_cm_and_dt or xml.strip_cm_and_dt
- stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, given_entities or {}
+local function xmlconvert(data, settings)
+ settings = settings or { } -- no_root strip_cm_and_dt given_entities parent_root error_handler
+ strip = settings.strip_cm_and_dt
+ utfize = settings.utfize_entities
+ resolve = settings.resolve_entities
+ cleanup = settings.text_cleanup
+ stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, settings.entities or {}
+ reported_attribute_errors = { }
+ if settings.parent_root then
+ mt = getmetatable(settings.parent_root)
+ else
+ initialize_mt(top)
+ end
stack[#stack+1] = top
top.dt = { }
dt = top.dt
if not data or data == "" then
errorstr = "empty xml file"
- elseif not grammar:match(data) then
- errorstr = "invalid xml file"
+ elseif utfize or resolve then
+ if grammar_parsed_text:match(data) then
+ errorstr = ""
+ else
+ errorstr = "invalid xml file - parsed text"
+ end
else
- errorstr = ""
+ if grammar_unparsed_text:match(data) then
+ errorstr = ""
+ else
+ errorstr = "invalid xml file - unparsed text"
+ end
end
if errorstr and errorstr ~= "" then
- result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } }, error = true }
+ result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } } }
setmetatable(stack, mt)
- if xml.error_handler then xml.error_handler("load",errorstr) end
+ local error_handler = settings.error_handler
+ if error_handler == false then
+ -- no error message
+ else
+ error_handler = error_handler or xml.error_handler
+ if error_handler then
+ xml.error_handler("load",errorstr)
+ end
+ end
else
result = stack[1]
end
- if not no_root then
- result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities }
+ if not settings.no_root then
+ result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities, settings = settings }
setmetatable(result, mt)
local rdt = result.dt
for k=1,#rdt do
@@ -3254,9 +3865,14 @@ function xml.convert(data, no_root, strip_cm_and_dt, given_entities) -- maybe us
end
end
end
+ if errorstr and errorstr ~= "" then
+ result.error = true
+ end
return result
end
+xml.convert = xmlconvert
+
--[[ldx--
<p>Packaging data in an xml like table is done with the following
function. Maybe it will go away (when not used).</p>
@@ -3289,16 +3905,16 @@ function xml.load(filename)
if type(filename) == "string" then
local f = io.open(filename,'r')
if f then
- local root = xml.convert(f:read("*all"))
+ local root = xmlconvert(f:read("*all"))
f:close()
return root
else
- return xml.convert("")
+ return xmlconvert("")
end
elseif filename then -- filehandle
- return xml.convert(filename:read("*all"))
+ return xmlconvert(filename:read("*all"))
else
- return xml.convert("")
+ return xmlconvert("")
end
end
@@ -3307,9 +3923,11 @@ end
valid trees, which is what the next function does.</p>
--ldx]]--
+local no_root = { no_root = true }
+
function xml.toxml(data)
if type(data) == "string" then
- local root = { xml.convert(data,true) }
+ local root = { xmlconvert(data,no_root) }
return (#root > 1 and root) or root[1]
else
return data
@@ -3354,217 +3972,305 @@ alternative.</p>
-- todo: add <?xml version='1.0' standalone='yes'?> when not present
-local fallbackhandle = (tex and tex.sprint) or io.write
-
-local function serialize(e, handle, textconverter, attributeconverter, specialconverter, nocommands)
- if not e then
- return
- elseif not nocommands then
- local ec = e.command
- if ec ~= nil then -- we can have all kind of types
- if e.special then
- local etg, edt = e.tg, e.dt
- local spc = specialconverter and specialconverter[etg]
- if spc then
- local result = spc(edt[1])
- if result then
- handle(result)
- return
- else
- -- no need to handle any further
- end
- end
- end
- local xc = xml.command
- if xc then
- xc(e,ec)
- return
+function xml.checkbom(root) -- can be made faster
+ if root.ri then
+ local dt, found = root.dt, false
+ for k=1,#dt do
+ local v = dt[k]
+ if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then
+ found = true
+ break
end
end
+ if not found then
+ insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
+ insert(dt, 2, "\n" )
+ end
end
- handle = handle or fallbackhandle
- local etg = e.tg
- if etg then
- if e.special then
- local edt = e.dt
- local spc = specialconverter and specialconverter[etg]
- if spc then
- local result = spc(edt[1])
- if result then
- handle(result)
+end
+
+--[[ldx--
+<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
+and then handle the lot.</p>
+--ldx]]--
+
+-- new experimental reorganized serialize
+
+local function verbose_element(e,handlers)
+ local handle = handlers.handle
+ local serialize = handlers.serialize
+ local ens, etg, eat, edt, ern = e.ns, e.tg, e.at, e.dt, e.rn
+ local ats = eat and next(eat) and { }
+ if ats then
+ for k,v in next, eat do
+ ats[#ats+1] = format('%s=%q',k,v)
+ end
+ end
+ if ern and trace_remap and ern ~= ens then
+ ens = ern
+ end
+ if ens ~= "" then
+ if edt and #edt > 0 then
+ if ats then
+ handle("<",ens,":",etg," ",concat(ats," "),">")
+ else
+ handle("<",ens,":",etg,">")
+ end
+ for i=1,#edt do
+ local e = edt[i]
+ if type(e) == "string" then
+ handle(e)
else
- -- no need to handle any further
+ serialize(e,handlers)
end
- elseif etg == "@pi@" then
- -- handle(format("<?%s?>",edt[1]))
- handle("<?" .. edt[1] .. "?>")
- elseif etg == "@cm@" then
- -- handle(format("<!--%s-->",edt[1]))
- handle("<!--" .. edt[1] .. "-->")
- elseif etg == "@cd@" then
- -- handle(format("<![CDATA[%s]]>",edt[1]))
- handle("<![CDATA[" .. edt[1] .. "]]>")
- elseif etg == "@dt@" then
- -- handle(format("<!DOCTYPE %s>",edt[1]))
- handle("<!DOCTYPE " .. edt[1] .. ">")
- elseif etg == "@rt@" then
- serialize(edt,handle,textconverter,attributeconverter,specialconverter,nocommands)
end
+ handle("</",ens,":",etg,">")
else
- local ens, eat, edt, ern = e.ns, e.at, e.dt, e.rn
- local ats = eat and next(eat) and { } -- type test maybe faster
if ats then
- if attributeconverter then
- for k,v in next, eat do
- ats[#ats+1] = format('%s=%q',k,attributeconverter(v))
- end
- else
- for k,v in next, eat do
- ats[#ats+1] = format('%s=%q',k,v)
- end
- end
+ handle("<",ens,":",etg," ",concat(ats," "),"/>")
+ else
+ handle("<",ens,":",etg,"/>")
end
- if ern and trace_remap and ern ~= ens then
- ens = ern
+ end
+ else
+ if edt and #edt > 0 then
+ if ats then
+ handle("<",etg," ",concat(ats," "),">")
+ else
+ handle("<",etg,">")
end
- if ens ~= "" then
- if edt and #edt > 0 then
- if ats then
- -- handle(format("<%s:%s %s>",ens,etg,concat(ats," ")))
- handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. ">")
- else
- -- handle(format("<%s:%s>",ens,etg))
- handle("<" .. ens .. ":" .. etg .. ">")
- end
- for i=1,#edt do
- local e = edt[i]
- if type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
- else
- handle(e)
- end
- else
- serialize(e,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
- end
- -- handle(format("</%s:%s>",ens,etg))
- handle("</" .. ens .. ":" .. etg .. ">")
+ for i=1,#edt do
+ local ei = edt[i]
+ if type(ei) == "string" then
+ handle(ei)
else
- if ats then
- -- handle(format("<%s:%s %s/>",ens,etg,concat(ats," ")))
- handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. "/>")
- else
- -- handle(format("<%s:%s/>",ens,etg))
- handle("<" .. ens .. ":" .. etg .. "/>")
- end
+ serialize(ei,handlers)
end
+ end
+ handle("</",etg,">")
+ else
+ if ats then
+ handle("<",etg," ",concat(ats," "),"/>")
else
- if edt and #edt > 0 then
- if ats then
- -- handle(format("<%s %s>",etg,concat(ats," ")))
- handle("<" .. etg .. " " .. concat(ats," ") .. ">")
- else
- -- handle(format("<%s>",etg))
- handle("<" .. etg .. ">")
- end
- for i=1,#edt do
- local ei = edt[i]
- if type(ei) == "string" then
- if textconverter then
- handle(textconverter(ei))
- else
- handle(ei)
- end
- else
- serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
- end
- -- handle(format("</%s>",etg))
- handle("</" .. etg .. ">")
- else
- if ats then
- -- handle(format("<%s %s/>",etg,concat(ats," ")))
- handle("<" .. etg .. " " .. concat(ats," ") .. "/>")
- else
- -- handle(format("<%s/>",etg))
- handle("<" .. etg .. "/>")
- end
- end
+ handle("<",etg,"/>")
end
end
- elseif type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
+ end
+end
+
+local function verbose_pi(e,handlers)
+ handlers.handle("<?",e.dt[1],"?>")
+end
+
+local function verbose_comment(e,handlers)
+ handlers.handle("<!--",e.dt[1],"-->")
+end
+
+local function verbose_cdata(e,handlers)
+ handlers.handle("<![CDATA[", e.dt[1],"]]>")
+end
+
+local function verbose_doctype(e,handlers)
+ handlers.handle("<!DOCTYPE ",e.dt[1],">")
+end
+
+local function verbose_root(e,handlers)
+ handlers.serialize(e.dt,handlers)
+end
+
+local function verbose_text(e,handlers)
+ handlers.handle(e)
+end
+
+local function verbose_document(e,handlers)
+ local serialize = handlers.serialize
+ local functions = handlers.functions
+ for i=1,#e do
+ local ei = e[i]
+ if type(ei) == "string" then
+ functions["@tx@"](ei,handlers)
else
- handle(e)
+ serialize(ei,handlers)
end
- else
- for i=1,#e do
- local ei = e[i]
- if type(ei) == "string" then
- if textconverter then
- handle(textconverter(ei))
- else
- handle(ei)
- end
- else
- serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
+ end
+end
+
+local function serialize(e,handlers,...)
+ local initialize = handlers.initialize
+ local finalize = handlers.finalize
+ local functions = handlers.functions
+ if initialize then
+ local state = initialize(...)
+ if not state == true then
+ return state
end
end
+ local etg = e.tg
+ if etg then
+ (functions[etg] or functions["@el@"])(e,handlers)
+ -- elseif type(e) == "string" then
+ -- functions["@tx@"](e,handlers)
+ else
+ functions["@dc@"](e,handlers)
+ end
+ if finalize then
+ return finalize()
+ end
end
-xml.serialize = serialize
+local function xserialize(e,handlers)
+ local functions = handlers.functions
+ local etg = e.tg
+ if etg then
+ (functions[etg] or functions["@el@"])(e,handlers)
+ -- elseif type(e) == "string" then
+ -- functions["@tx@"](e,handlers)
+ else
+ functions["@dc@"](e,handlers)
+ end
+end
-function xml.checkbom(root) -- can be made faster
- if root.ri then
- local dt, found = root.dt, false
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then
- found = true
- break
+local handlers = { }
+
+local function newhandlers(settings)
+ local t = table.copy(handlers.verbose or { }) -- merge
+ if settings then
+ for k,v in next, settings do
+ if type(v) == "table" then
+ tk = t[k] if not tk then tk = { } t[k] = tk end
+ for kk,vv in next, v do
+ tk[kk] = vv
+ end
+ else
+ t[k] = v
end
end
- if not found then
- insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
- insert(dt, 2, "\n" )
+ if settings.name then
+ handlers[settings.name] = t
end
end
+ return t
+end
+
+local nofunction = function() end
+
+function xml.sethandlersfunction(handler,name,fnc)
+ handler.functions[name] = fnc or nofunction
end
+function xml.gethandlersfunction(handler,name)
+ return handler.functions[name]
+end
+
+function xml.gethandlers(name)
+ return handlers[name]
+end
+
+newhandlers {
+ name = "verbose",
+ initialize = false, -- faster than nil and mt lookup
+ finalize = false, -- faster than nil and mt lookup
+ serialize = xserialize,
+ handle = print,
+ functions = {
+ ["@dc@"] = verbose_document,
+ ["@dt@"] = verbose_doctype,
+ ["@rt@"] = verbose_root,
+ ["@el@"] = verbose_element,
+ ["@pi@"] = verbose_pi,
+ ["@cm@"] = verbose_comment,
+ ["@cd@"] = verbose_cdata,
+ ["@tx@"] = verbose_text,
+ }
+}
+
--[[ldx--
-<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
-and then handle the lot.</p>
+<p>How you deal with saving data depends on your preferences. For a 40 MB database
+file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
+
+<lines>
+1.3 : load data from file to string
+6.1 : convert string into tree
+5.3 : saving in file using xmlsave
+6.8 : converting to string using xml.tostring
+3.6 : saving converted string in file
+</lines>
+
+<p>Beware, these were timing with the old routine but measurements will not be that
+much different I guess.</p>
--ldx]]--
-function xml.tostring(root) -- 25% overhead due to collecting
+-- maybe this will move to lxml-xml
+
+local result
+
+local xmlfilehandler = newhandlers {
+ name = "file",
+ initialize = function(name) result = io.open(name,"wb") return result end,
+ finalize = function() result:close() return true end,
+ handle = function(...) result:write(...) end,
+}
+
+-- no checking on writeability here but not faster either
+--
+-- local xmlfilehandler = newhandlers {
+-- initialize = function(name) io.output(name,"wb") return true end,
+-- finalize = function() io.close() return true end,
+-- handle = io.write,
+-- }
+
+
+function xml.save(root,name)
+ serialize(root,xmlfilehandler,name)
+end
+
+local result
+
+local xmlstringhandler = newhandlers {
+ name = "string",
+ initialize = function() result = { } return result end,
+ finalize = function() return concat(result) end,
+ handle = function(...) result[#result+1] = concat { ... } end
+}
+
+local function xmltostring(root) -- 25% overhead due to collecting
if root then
if type(root) == 'string' then
return root
- elseif next(root) then -- next is faster than type (and >0 test)
- local result = { }
- serialize(root,function(s) result[#result+1] = s end) -- brrr, slow (direct printing is faster)
- return concat(result,"")
+ else -- if next(root) then -- next is faster than type (and >0 test)
+ return serialize(root,xmlstringhandler) or ""
end
end
return ""
end
+local function xmltext(root) -- inline
+ return (root and xmltostring(root)) or ""
+end
+
+function initialize_mt(root)
+ mt = { __tostring = xmltext, __index = root }
+end
+
+xml.defaulthandlers = handlers
+xml.newhandlers = newhandlers
+xml.serialize = serialize
+xml.tostring = xmltostring
+xml.text = xmltext
+
--[[ldx--
<p>The next function operated on the content only and needs a handle function
that accepts a string.</p>
--ldx]]--
-function xml.string(e,handle)
+local function xmlstring(e,handle)
if not handle or (e.special and e.tg ~= "@rt@") then
-- nothing
elseif e.tg then
local edt = e.dt
if edt then
for i=1,#edt do
- xml.string(edt[i],handle)
+ xmlstring(edt[i],handle)
end
end
else
@@ -3572,33 +4278,16 @@ function xml.string(e,handle)
end
end
---[[ldx--
-<p>How you deal with saving data depends on your preferences. For a 40 MB database
-file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
-
-<lines>
-1.3 : load data from file to string
-6.1 : convert string into tree
-5.3 : saving in file using xmlsave
-6.8 : converting to string using xml.tostring
-3.6 : saving converted string in file
-</lines>
-
-<p>The save function is given below.</p>
---ldx]]--
-
-function xml.save(root,name)
- local f = io.open(name,"w")
- if f then
- xml.serialize(root,function(s) f:write(s) end)
- f:close()
- end
-end
+xml.string = xmlstring
--[[ldx--
<p>A few helpers:</p>
--ldx]]--
+function xml.parent(root)
+ return root.__p__
+end
+
function xml.body(root)
return (root.ri and root.dt[root.ri]) or root
end
@@ -3611,34 +4300,19 @@ function xml.content(root) -- bugged
return (root and root.dt and xml.tostring(root.dt)) or ""
end
-function xml.isempty(root, pattern)
- if pattern == "" or pattern == "*" then
- pattern = nil
- end
- if pattern then
- -- todo
- return false
- else
- return not root or not root.dt or #root.dt == 0 or root.dt == ""
- end
-end
-
--[[ldx--
<p>The next helper erases an element but keeps the table as it is,
and since empty strings are not serialized (effectively) it does
not harm. Copying the table would take more time. Usage:</p>
-
-<typing>
-dt[k] = xml.empty() or xml.empty(dt,k)
-</typing>
--ldx]]--
-function xml.empty(dt,k)
- if dt and k then
- dt[k] = ""
- return dt[k]
- else
- return ""
+function xml.erase(dt,k)
+ if dt then
+ if k then
+ dt[k] = ""
+ else for k=1,#dt do
+ dt[1] = { "" }
+ end end
end
end
@@ -3672,96 +4346,403 @@ if not modules then modules = { } end modules ['lxml-pth'] = {
license = "see context related readme files"
}
+-- e.ni is only valid after a filter run
+
local concat, remove, insert = table.concat, table.remove, table.insert
local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, lower, gmatch, gsub, find, rep = string.format, string.lower, string.gmatch, string.gsub, string.find, string.rep
+local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
--[[ldx--
<p>This module can be used stand alone but also inside <l n='mkiv'/> in
which case it hooks into the tracker code. Therefore we provide a few
functions that set the tracers. Here we overload a previously defined
function.</p>
+<p>If I can get in the mood I will make a variant that is XSLT compliant
+but I wonder if it makes sense.</P>
--ldx]]--
-local trace_lpath = false
-
-if trackers then
- trackers.register("xml.lpath", function(v) trace_lpath = v end)
-end
+--[[ldx--
+<p>Expecially the lpath code is experimental, we will support some of xpath, but
+only things that make sense for us; as compensation it is possible to hook in your
+own functions. Apart from preprocessing content for <l n='context'/> we also need
+this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
+files.</p>
-local settrace = xml.settrace -- lxml-tab
+<typing>
+a/b/c /*/c
+a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
+a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
+</typing>
+--ldx]]--
-function xml.settrace(str,value)
- if str == "lpath" then
- trace_lpath = value or false
- else
- settrace(str,value) -- lxml-tab
- end
-end
+local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end
+local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end
+local trace_lprofile = false if trackers then trackers.register("xml.profile", function(v) trace_lpath = v trace_lparse = v trace_lprofile = v end) end
--[[ldx--
-<p>We've now arrived at an intersting part: accessing the tree using a subset
+<p>We've now arrived at an interesting part: accessing the tree using a subset
of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We
will explain more about its usage in other documents.</p>
--ldx]]--
-local lpathcalls = 0 -- statistics
-local lpathcached = 0 -- statistics
+local lpathcalls = 0 function xml.lpathcalls () return lpathcalls end
+local lpathcached = 0 function xml.lpathcached() return lpathcached end
-xml.functions = xml.functions or { }
-xml.expressions = xml.expressions or { }
+xml.functions = xml.functions or { } -- internal
+xml.expressions = xml.expressions or { } -- in expressions
+xml.finalizers = xml.finalizers or { } -- fast do-with ... (with return value other than collection)
+xml.specialhandler = xml.specialhandler or { }
local functions = xml.functions
local expressions = xml.expressions
+local finalizers = xml.finalizers
-local actions = {
- [10] = "stay",
- [11] = "parent",
- [12] = "subtree root",
- [13] = "document root",
- [14] = "any",
- [15] = "many",
- [16] = "initial",
- [20] = "match",
- [21] = "match one of",
- [22] = "match and attribute eq",
- [23] = "match and attribute ne",
- [24] = "match one of and attribute eq",
- [25] = "match one of and attribute ne",
- [27] = "has attribute",
- [28] = "has value",
- [29] = "fast match",
- [30] = "select",
- [31] = "expression",
- [40] = "processing instruction",
-}
+finalizers.xml = finalizers.xml or { }
+finalizers.tex = finalizers.tex or { }
+
+local function fallback (t, name)
+ local fn = finalizers[name]
+ if fn then
+ t[name] = fn
+ else
+ logs.report("xml","unknown sub finalizer '%s'",tostring(name))
+ fn = function() end
+ end
+ return fn
+end
+
+setmetatable(finalizers.xml, { __index = fallback })
+setmetatable(finalizers.tex, { __index = fallback })
+
+xml.defaultprotocol = "xml"
+
+-- as xsl does not follow xpath completely here we will also
+-- be more liberal especially with regards to the use of | and
+-- the rootpath:
+--
+-- test : all 'test' under current
+-- /test : 'test' relative to current
+-- a|b|c : set of names
+-- (a|b|c) : idem
+-- ! : not
+--
+-- after all, we're not doing transformations but filtering. in
+-- addition we provide filter functions (last bit)
+--
+-- todo: optimizer
+--
+-- .. : parent
+-- * : all kids
+-- / : anchor here
+-- // : /**/
+-- ** : all in between
+--
+-- so far we had (more practical as we don't transform)
+--
+-- {/test} : kids 'test' under current node
+-- {test} : any kid with tag 'test'
+-- {//test} : same as above
--- a rather dumb lpeg
+-- evaluator (needs to be redone, for the moment copied)
-local P, S, R, C, V, Cc = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc
+-- todo: apply_axis(list,notable) and collection vs single
--- instead of using functions we just parse a few names which saves a call
--- later on
+local apply_axis = { }
-local lp_position = P("position()") / "ps"
-local lp_index = P("index()") / "id"
-local lp_text = P("text()") / "tx"
-local lp_name = P("name()") / "(ns~='' and ns..':'..tg)" -- "((rt.ns~='' and rt.ns..':'..rt.tg) or '')"
-local lp_tag = P("tag()") / "tg" -- (rt.tg or '')
-local lp_ns = P("ns()") / "ns" -- (rt.ns or '')
-local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
-local lp_doequal = P("=") / "=="
-local lp_attribute = P("@") / "" * Cc("(at['") * R("az","AZ","--","__")^1 * Cc("'] or '')")
+apply_axis['root'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ local rt = ll
+ while ll do
+ ll = ll.__p__
+ if ll then
+ rt = ll
+ end
+ end
+ collected[#collected+1] = rt
+ end
+ return collected
+end
+
+apply_axis['self'] = function(list)
+--~ local collected = { }
+--~ for l=1,#list do
+--~ collected[#collected+1] = list[l]
+--~ end
+--~ return collected
+ return list
+end
-local lp_lua_function = C(R("az","AZ","--","__")^1 * (P(".") * R("az","AZ","--","__")^1)^1) * P("(") / function(t) -- todo: better . handling
+apply_axis['child'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local dt = list[l].dt
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ end
+ end
+ end
+ return collected
+end
+
+local function collect(list,collected)
+ local dt = list.dt
+ if dt then
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ collect(dk,collected)
+ end
+ end
+ end
+end
+apply_axis['descendant'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ collect(list[l],collected)
+ end
+ return collected
+end
+
+local function collect(list,collected)
+ local dt = list.dt
+ if dt then
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ collect(dk,collected)
+ end
+ end
+ end
+end
+apply_axis['descendant-or-self'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+if ll.special ~= true then -- catch double root
+ collected[#collected+1] = ll
+end
+ collect(ll,collected)
+ end
+ return collected
+end
+
+apply_axis['ancestor'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ while ll do
+ ll = ll.__p__
+ if ll then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['ancestor-or-self'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ collected[#collected+1] = ll
+ while ll do
+ ll = ll.__p__
+ if ll then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['parent'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local pl = list[l].__p__
+ if pl then
+ collected[#collected+1] = pl
+ end
+ end
+ return collected
+end
+
+apply_axis['attribute'] = function(list)
+ return { }
+end
+
+apply_axis['following'] = function(list)
+ return { }
+end
+
+apply_axis['following-sibling'] = function(list)
+ return { }
+end
+
+apply_axis['namespace'] = function(list)
+ return { }
+end
+
+apply_axis['preceding'] = function(list)
+ return { }
+end
+
+apply_axis['preceding-sibling'] = function(list)
+ return { }
+end
+
+apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self']
+apply_axis['auto-descendant'] = apply_axis['descendant']
+apply_axis['auto-child'] = apply_axis['child']
+apply_axis['auto-self'] = apply_axis['self']
+apply_axis['initial-child'] = apply_axis['child']
+
+local function apply_nodes(list,directive,nodes)
+ -- todo: nodes[1] etc ... negated node name in set ... when needed
+ -- ... currently ignored
+ local maxn = #nodes
+ if maxn == 3 then --optimized loop
+ local nns, ntg = nodes[2], nodes[3]
+ if not nns and not ntg then -- wildcard
+ if directive then
+ return list
+ else
+ return { }
+ end
+ else
+ local collected = { }
+ if not nns then -- only check tag
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ if directive then
+ if ntg == ltg then
+ collected[#collected+1] = ll
+ end
+ elseif ntg ~= ltg then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ elseif not ntg then -- only check namespace
+ for l=1,#list do
+ local ll = list[l]
+ local lns = ll.rn or ll.ns
+ if lns then
+ if directive then
+ if lns == nns then
+ collected[#collected+1] = ll
+ end
+ elseif lns ~= nns then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ else -- check both
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ local lns = ll.rn or ll.ns
+ local ok = ltg == ntg and lns == nns
+ if directive then
+ if ok then
+ collected[#collected+1] = ll
+ end
+ elseif not ok then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ end
+ return collected
+ end
+ else
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ local lns = ll.rn or ll.ns
+ local ok = false
+ for n=1,maxn,3 do
+ local nns, ntg = nodes[n+1], nodes[n+2]
+ ok = (not ntg or ltg == ntg) and (not nns or lns == nns)
+ if ok then
+ break
+ end
+ end
+ if directive then
+ if ok then
+ collected[#collected+1] = ll
+ end
+ elseif not ok then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+ end
+end
+
+local function apply_expression(list,expression,order)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ if expression(list,ll,l,order) then -- nasty, alleen valid als n=1
+ collected[#collected+1] = ll
+ end
+ end
+ return collected
+end
+
+local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb
+
+local spaces = S(" \n\r\t\f")^0
+
+local lp_space = S(" \n\r\t\f")
+local lp_any = P(1)
+
+local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
+local lp_doequal = P("=") / "=="
+local lp_or = P("|") / " or "
+local lp_and = P("&") / " and "
+
+local lp_builtin = P (
+ P("first") / "1" +
+ P("last") / "#list" +
+ P("position") / "l" +
+ P("rootposition") / "order" +
+ P("index") / "ll.ni" +
+ P("text") / "(ll.dt[1] or '')" +
+ P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" +
+ P("tag") / "ll.tg" +
+ P("ns") / "ll.ns"
+ ) * ((spaces * P("(") * spaces * P(")"))/"")
+
+local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("ll.at['") * R("az","AZ","--","__")^1 * Cc("']")
+local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end)
+
+local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false")
+
+local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling
return t .. "("
end
-local lp_function = C(R("az","AZ","--","__")^1) * P("(") / function(t) -- todo: better . handling
+local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling
if expressions[t] then
- return "expressions." .. t .. "("
+ return "expr." .. t .. "("
else
- return "expressions.error("
+ return "expr.error("
end
end
@@ -3771,337 +4752,527 @@ local noparent = 1 - (lparent+rparent)
local nested = lpeg.P{lparent * (noparent + lpeg.V(1))^0 * rparent}
local value = lpeg.P(lparent * lpeg.C((noparent + nested)^0) * rparent) -- lpeg.P{"("*C(((1-S("()"))+V(1))^0)*")"}
--- if we use a dedicated namespace then we don't need to pass rt and k
+local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')")
+local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'")
+local lp_content= (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"'))
+
+local cleaner
-local lp_special = (C(P("name")+P("text")+P("tag"))) * value / function(t,s)
+local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s)
if expressions[t] then
- if s then
- return "expressions." .. t .. "(r,k," .. s ..")"
+ s = s and s ~= "" and cleaner:match(s)
+ if s and s ~= "" then
+ return "expr." .. t .. "(e," .. s ..")"
else
- return "expressions." .. t .. "(r,k)"
+ return "expr." .. t .. "(e)"
end
else
- return "expressions.error(" .. t .. ")"
+ return "expr.error(" .. t .. ")"
end
end
-local converter = lpeg.Cs ( (
- lp_position +
- lp_index +
- lp_text + lp_name + -- fast one
+local content =
+ lp_builtin +
+ lp_attribute +
lp_special +
lp_noequal + lp_doequal +
- lp_attribute +
- lp_lua_function +
- lp_function +
+ lp_or + lp_and +
+ lp_reserved +
+ lp_lua_function + lp_function +
+ lp_content + -- too fragile
+ lp_child +
+ lp_any
+
+local converter = lpeg.Cs (
+ lp_fastpos + (lpeg.P { lparent * (lpeg.V(1))^0 * rparent + content } )^0
+)
+
+cleaner = lpeg.Cs ( (
+--~ lp_fastpos +
+ lp_reserved +
+ lp_string +
1 )^1 )
--- expressions,root,rootdt,k,e,edt,ns,tg,idx,hsh[tg] or 1
+--~ expr
-local template = [[
- return function(expressions,r,d,k,e,dt,ns,tg,id,ps)
- local at, tx = e.at or { }, dt[1] or ""
+local template_e = [[
+ local expr = xml.expressions
+ return function(list,ll,l,root)
return %s
end
]]
-local function make_expression(str)
- str = converter:match(str)
- return str, loadstring(format(template,str))()
-end
-
-local map = { }
-
-local space = S(' \r\n\t')
-local squote = S("'")
-local dquote = S('"')
-local lparent = P('(')
-local rparent = P(')')
-local atsign = P('@')
-local lbracket = P('[')
-local rbracket = P(']')
-local exclam = P('!')
-local period = P('.')
-local eq = P('==') + P('=')
-local ne = P('<>') + P('!=')
-local star = P('*')
-local slash = P('/')
-local colon = P(':')
-local bar = P('|')
-local hat = P('^')
-local valid = R('az', 'AZ', '09') + S('_-')
-local name_yes = C(valid^1 + star) * colon * C(valid^1 + star) -- permits ns:* *:tg *:*
-local name_nop = Cc("*") * C(valid^1)
-local name = name_yes + name_nop
-local number = C((S('+-')^0 * R('09')^1)) / tonumber
-local names = (bar^0 * name)^1
-local morenames = name * (bar^0 * name)^1
-local instructiontag = P('pi::')
-local spacing = C(space^0)
-local somespace = space^1
-local optionalspace = space^0
-local text = C(valid^0)
-local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote)
-local empty = 1-slash
-
-local is_eq = lbracket * atsign * name * eq * value * rbracket
-local is_ne = lbracket * atsign * name * ne * value * rbracket
-local is_attribute = lbracket * atsign * name * rbracket
-local is_value = lbracket * value * rbracket
-local is_number = lbracket * number * rbracket
-
-local nobracket = 1-(lbracket+rbracket) -- must be improved
-local is_expression = lbracket * C(((C(nobracket^1))/make_expression)) * rbracket
-
-local is_expression = lbracket * (C(nobracket^1))/make_expression * rbracket
-
-local is_one = name
-local is_none = exclam * name
-local is_one_of = ((lparent * names * rparent) + morenames)
-local is_none_of = exclam * ((lparent * names * rparent) + morenames)
-
-local stay = (period )
-local parent = (period * period ) / function( ) map[#map+1] = { 11 } end
-local subtreeroot = (slash + hat ) / function( ) map[#map+1] = { 12 } end
-local documentroot = (hat * hat ) / function( ) map[#map+1] = { 13 } end
-local any = (star ) / function( ) map[#map+1] = { 14 } end
-local many = (star * star ) / function( ) map[#map+1] = { 15 } end
-local initial = (hat * hat * hat ) / function( ) map[#map+1] = { 16 } end
-
-local match = (is_one ) / function(...) map[#map+1] = { 20, true , ... } end
-local match_one_of = (is_one_of ) / function(...) map[#map+1] = { 21, true , ... } end
-local dont_match = (is_none ) / function(...) map[#map+1] = { 20, false, ... } end
-local dont_match_one_of = (is_none_of ) / function(...) map[#map+1] = { 21, false, ... } end
-
-local match_and_eq = (is_one * is_eq ) / function(...) map[#map+1] = { 22, true , ... } end
-local match_and_ne = (is_one * is_ne ) / function(...) map[#map+1] = { 23, true , ... } end
-local dont_match_and_eq = (is_none * is_eq ) / function(...) map[#map+1] = { 22, false, ... } end
-local dont_match_and_ne = (is_none * is_ne ) / function(...) map[#map+1] = { 23, false, ... } end
-
-local match_one_of_and_eq = (is_one_of * is_eq ) / function(...) map[#map+1] = { 24, true , ... } end
-local match_one_of_and_ne = (is_one_of * is_ne ) / function(...) map[#map+1] = { 25, true , ... } end
-local dont_match_one_of_and_eq = (is_none_of * is_eq ) / function(...) map[#map+1] = { 24, false, ... } end
-local dont_match_one_of_and_ne = (is_none_of * is_ne ) / function(...) map[#map+1] = { 25, false, ... } end
-
-local has_attribute = (is_one * is_attribute) / function(...) map[#map+1] = { 27, true , ... } end
-local has_value = (is_one * is_value ) / function(...) map[#map+1] = { 28, true , ... } end
-local dont_has_attribute = (is_none * is_attribute) / function(...) map[#map+1] = { 27, false, ... } end
-local dont_has_value = (is_none * is_value ) / function(...) map[#map+1] = { 28, false, ... } end
-local position = (is_one * is_number ) / function(...) map[#map+1] = { 30, true, ... } end
-local dont_position = (is_none * is_number ) / function(...) map[#map+1] = { 30, false, ... } end
-
-local expression = (is_one * is_expression)/ function(...) map[#map+1] = { 31, true, ... } end
-local dont_expression = (is_none * is_expression)/ function(...) map[#map+1] = { 31, false, ... } end
-
-local self_expression = ( is_expression) / function(...) if #map == 0 then map[#map+1] = { 11 } end
- map[#map+1] = { 31, true, "*", "*", ... } end
-local dont_self_expression = (exclam * is_expression) / function(...) if #map == 0 then map[#map+1] = { 11 } end
- map[#map+1] = { 31, false, "*", "*", ... } end
-
-local instruction = (instructiontag * text ) / function(...) map[#map+1] = { 40, ... } end
-local nothing = (empty ) / function( ) map[#map+1] = { 15 } end -- 15 ?
-local crap = (1-slash)^1
-
--- a few ugly goodies:
-
-local docroottag = P('^^') / function( ) map[#map+1] = { 12 } end
-local subroottag = P('^') / function( ) map[#map+1] = { 13 } end
-local roottag = P('root::') / function( ) map[#map+1] = { 12 } end
-local parenttag = P('parent::') / function( ) map[#map+1] = { 11 } end
-local childtag = P('child::')
-local selftag = P('self::')
-
--- there will be more and order will be optimized
-
-local selector = (
- instruction +
--- many + any + -- brrr, not here !
- parent + stay +
- dont_position + position +
- dont_match_one_of_and_eq + dont_match_one_of_and_ne +
- match_one_of_and_eq + match_one_of_and_ne +
- dont_match_and_eq + dont_match_and_ne +
- match_and_eq + match_and_ne +
- dont_expression + expression +
- dont_self_expression + self_expression +
- has_attribute + has_value +
- dont_match_one_of + match_one_of +
- dont_match + match +
- many + any +
- crap + empty
-)
+local template_f_y = [[
+ local finalizer = xml.finalizers['%s']['%s']
+ return function(collection)
+ return finalizer(collection,%s)
+ end
+]]
-local grammar = P { "startup",
- startup = (initial + documentroot + subtreeroot + roottag + docroottag + subroottag)^0 * V("followup"),
- followup = ((slash + parenttag + childtag + selftag)^0 * selector)^1,
-}
+local template_f_n = [[
+ return xml.finalizers['%s']['%s']
+]]
-local function compose(str)
- if not str or str == "" then
- -- wildcard
- return true
- elseif str == '/' then
- -- root
- return false
+--
+
+local function errorrunner_e(str,cnv)
+ logs.report("lpath","error in expression: %s => %s",str,cnv)
+ return false
+end
+local function errorrunner_f(str,arg)
+ logs.report("lpath","error in finalizer: %s(%s)",str,arg or "")
+ return false
+end
+
+local function register_nodes(nodetest,nodes)
+ return { kind = "nodes", nodetest = nodetest, nodes = nodes }
+end
+
+local function register_expression(expression)
+ local converted = converter:match(expression)
+ local runner = loadstring(format(template_e,converted))
+ runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
+ return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
+end
+
+local function register_finalizer(protocol,name,arguments)
+ local runner
+ if arguments and arguments ~= "" then
+ runner = loadstring(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
else
- map = { }
- grammar:match(str)
- if #map == 0 then
- return true
- else
- local m = map[1][1]
- if #map == 1 then
- if m == 14 or m == 15 then
- -- wildcard
- return true
- elseif m == 12 then
- -- root
- return false
- end
- elseif #map == 2 and m == 12 and map[2][1] == 20 then
- -- return { { 29, map[2][2], map[2][3], map[2][4], map[2][5] } }
- map[2][1] = 29
- return { map[2] }
- end
- if m ~= 11 and m ~= 12 and m ~= 13 and m ~= 14 and m ~= 15 and m ~= 16 then
- insert(map, 1, { 16 })
- end
- -- print(gsub(table.serialize(map),"[ \n]+"," "))
- return map
- end
+ runner = loadstring(format(template_f_n,protocol or xml.defaultprotocol,name))
end
+ runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
+ return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
end
+local expression = P { "ex",
+ ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]",
+ sq = "'" * (1 - S("'"))^0 * "'",
+ dq = '"' * (1 - S('"'))^0 * '"',
+}
+
+local arguments = P { "ar",
+ ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")",
+ nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end,
+ sq = P("'") * (1 - P("'"))^0 * P("'"),
+ dq = P('"') * (1 - P('"'))^0 * P('"'),
+}
+
+-- todo: better arg parser
+
+local register_self = { kind = "axis", axis = "self" } -- , apply = apply_axis["self"] }
+local register_parent = { kind = "axis", axis = "parent" } -- , apply = apply_axis["parent"] }
+local register_descendant = { kind = "axis", axis = "descendant" } -- , apply = apply_axis["descendant"] }
+local register_child = { kind = "axis", axis = "child" } -- , apply = apply_axis["child"] }
+local register_descendant_or_self = { kind = "axis", axis = "descendant-or-self" } -- , apply = apply_axis["descendant-or-self"] }
+local register_root = { kind = "axis", axis = "root" } -- , apply = apply_axis["root"] }
+local register_ancestor = { kind = "axis", axis = "ancestor" } -- , apply = apply_axis["ancestor"] }
+local register_ancestor_or_self = { kind = "axis", axis = "ancestor-or-self" } -- , apply = apply_axis["ancestor-or-self"] }
+local register_attribute = { kind = "axis", axis = "attribute" } -- , apply = apply_axis["attribute"] }
+local register_namespace = { kind = "axis", axis = "namespace" } -- , apply = apply_axis["namespace"] }
+local register_following = { kind = "axis", axis = "following" } -- , apply = apply_axis["following"] }
+local register_following_sibling = { kind = "axis", axis = "following-sibling" } -- , apply = apply_axis["following-sibling"] }
+local register_preceding = { kind = "axis", axis = "preceding" } -- , apply = apply_axis["preceding"] }
+local register_preceding_sibling = { kind = "axis", axis = "preceding-sibling" } -- , apply = apply_axis["preceding-sibling"] }
+
+local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" } -- , apply = apply_axis["auto-descendant-or-self"] }
+local register_auto_descendant = { kind = "axis", axis = "auto-descendant" } -- , apply = apply_axis["auto-descendant"] }
+local register_auto_self = { kind = "axis", axis = "auto-self" } -- , apply = apply_axis["auto-self"] }
+local register_auto_child = { kind = "axis", axis = "auto-child" } -- , apply = apply_axis["auto-child"] }
+
+local register_initial_child = { kind = "axis", axis = "initial-child" } -- , apply = apply_axis["initial-child"] }
+
+local register_all_nodes = { kind = "nodes", nodetest = true, nodes = { true, false, false } }
+
+local function register_error(str)
+ return { kind = "error", comment = format("unparsed: %s",str) }
+end
+
+local parser = Ct { "patterns", -- can be made a bit faster by moving pattern outside
+
+ patterns = spaces * V("protocol") * spaces * V("initial") * spaces * V("step") * spaces *
+ (P("/") * spaces * V("step") * spaces)^0,
+
+ protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"),
+
+ step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
+
+ axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") +
+ V("descendant_or_self") + V("following") + V("following_sibling") +
+ V("preceding") + V("preceding_sibling") + V("ancestor_or_self") +
+ #(1-P(-1)) * Cc(register_auto_child),
+
+ initial = (P("/") * spaces * Cc(register_initial_child))^-1,
+
+ error = (P(1)^1) / register_error,
+
+ shortcuts_a = V("s_descendant_or_self") + V("s_descendant") + V("s_child") + V("s_parent") + V("s_self") + V("s_root") + V("s_ancestor"),
+
+ shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0,
+
+ s_descendant_or_self = P("/") * Cc(register_descendant_or_self),
+ s_descendant = P("**") * Cc(register_descendant),
+ s_child = P("*") * Cc(register_child ),
+ s_parent = P("..") * Cc(register_parent ),
+ s_self = P("." ) * Cc(register_self ),
+ s_root = P("^^") * Cc(register_root ),
+ s_ancestor = P("^") * Cc(register_ancestor ),
+
+ descendant = P("descendant::") * Cc(register_descendant ),
+ child = P("child::") * Cc(register_child ),
+ parent = P("parent::") * Cc(register_parent ),
+ self = P("self::") * Cc(register_self ),
+ root = P('root::') * Cc(register_root ),
+ ancestor = P('ancestor::') * Cc(register_ancestor ),
+ descendant_or_self = P('descendant-or-self::') * Cc(register_descendant_or_self ),
+ ancestor_or_self = P('ancestor-or-self::') * Cc(register_ancestor_or_self ),
+ -- attribute = P('attribute::') * Cc(register_attribute ),
+ -- namespace = P('namespace::') * Cc(register_namespace ),
+ following = P('following::') * Cc(register_following ),
+ following_sibling = P('following-sibling::') * Cc(register_following_sibling ),
+ preceding = P('preceding::') * Cc(register_preceding ),
+ preceding_sibling = P('preceding-sibling::') * Cc(register_preceding_sibling ),
+
+ nodes = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes,
+
+ expressions = expression / register_expression,
+
+ letters = R("az")^1,
+ name = (1-lpeg.S("/[]()|:*!"))^1,
+ negate = P("!") * Cc(false),
+
+ nodefunction = V("negate") + P("not") * Cc(false) + Cc(true),
+ nodetest = V("negate") + Cc(true),
+ nodename = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))),
+ wildnodename = (C(V("name")) + P("*") * Cc(false)) * #(1-P("(")),
+ nodeset = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces,
+
+ finalizer = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer,
+
+}
+
local cache = { }
-function xml.lpath(pattern,trace)
- lpathcalls = lpathcalls + 1
- if type(pattern) == "string" then
- local result = cache[pattern]
- if result == nil then -- can be false which is valid -)
- result = compose(pattern)
- cache[pattern] = result
- lpathcached = lpathcached + 1
- end
- if trace or trace_lpath then
- xml.lshow(result)
- end
- return result
+local function nodesettostring(set,nodetest)
+ local t = { }
+ for i=1,#set,3 do
+ local directive, ns, tg = set[i], set[i+1], set[i+2]
+ if not ns or ns == "" then ns = "*" end
+ if not tg or tg == "" then tg = "*" end
+ tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
+ t[#t+1] = (directive and tg) or format("not(%s)",tg)
+ end
+ if nodetest == false then
+ return format("not(%s)",concat(t,"|"))
else
- return pattern
+ return concat(t,"|")
end
end
-function xml.cached_patterns()
- return cache
+local function tagstostring(list)
+ if #list == 0 then
+ return "no elements"
+ else
+ local t = { }
+ for i=1, #list do
+ local li = list[i]
+ local ns, tg = li.ns, li.tg
+ if not ns or ns == "" then ns = "*" end
+ if not tg or tg == "" then tg = "*" end
+ t[#t+1] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
+ end
+ return concat(t," ")
+ end
end
--- we run out of locals (limited to 200)
---
--- local fallbackreport = (texio and texio.write) or io.write
-
-function xml.lshow(pattern,report)
--- report = report or fallbackreport
- report = report or (texio and texio.write) or io.write
- local lp = xml.lpath(pattern)
- if lp == false then
- report(" -: root\n")
- elseif lp == true then
- report(" -: wildcard\n")
+xml.nodesettostring = nodesettostring
+
+local function lshow(parsed)
+ if type(parsed) == "string" then
+ parsed = parse_pattern(parsed)
+ end
+ local s = table.serialize_functions -- ugly
+ table.serialize_functions = false -- ugly
+ logs.report("lpath","%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern,table.serialize(parsed,false))
+ table.serialize_functions = s -- ugly
+end
+
+xml.lshow = lshow
+
+local function parse_pattern(pattern) -- the gain of caching is rather minimal
+ lpathcalls = lpathcalls + 1
+ if type(pattern) == "table" then
+ return pattern
else
- if type(pattern) == "string" then
- report(format("pattern: %s\n",pattern))
- end
- for k=1,#lp do
- local v = lp[k]
- if #v > 1 then
- local t = { }
- for i=2,#v do
- local vv = v[i]
- if type(vv) == "string" then
- t[#t+1] = (vv ~= "" and vv) or "#"
- elseif type(vv) == "boolean" then
- t[#t+1] = (vv and "==") or "<>"
+ local parsed = cache[pattern]
+ if parsed then
+ lpathcached = lpathcached + 1
+ else
+ parsed = parser:match(pattern)
+ if parsed then
+ parsed.pattern = pattern
+ local np = #parsed
+ if np == 0 then
+ parsed = { pattern = pattern, register_self, state = "parsing error" }
+ logs.report("lpath","parsing error in '%s'",pattern)
+ lshow(parsed)
+ else
+ -- we could have done this with a more complex parsed but this
+ -- is cleaner
+ local pi = parsed[1]
+ if pi.axis == "auto-child" then
+ parsed.comment = "auto-child replaced by auto-descendant-or-self"
+ parsed[1] = register_auto_descendant_or_self
+ --~ parsed.comment = "auto-child replaced by auto-descendant"
+ --~ parsed[1] = register_auto_descendant
+ elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then
+ parsed.comment = "initial-child removed" -- we could also make it a auto-self
+ remove(parsed,1)
end
end
- report(format("%2i: %s %s -> %s\n", k,v[1],actions[v[1]],concat(t," ")))
else
- report(format("%2i: %s %s\n", k,v[1],actions[v[1]]))
+ parsed = { pattern = pattern }
+ end
+ cache[pattern] = parsed
+ if trace_lparse and not trace_lprofile then
+ lshow(parsed)
end
end
+ return parsed
end
end
-function xml.xshow(e,...) -- also handy when report is given, use () to isolate first e
- local t = { ... }
--- local report = (type(t[#t]) == "function" and t[#t]) or fallbackreport
- local report = (type(t[#t]) == "function" and t[#t]) or (texio and texio.write) or io.write
- if e == nil then
- report("<!-- no element -->\n")
- elseif type(e) ~= "table" then
- report(tostring(e))
- elseif e.tg then
- report(tostring(e) .. "\n")
+-- we can move all calls inline and then merge the trace back
+-- technically we can combine axis and the next nodes which is
+-- what we did before but this a bit cleaner (but slower too)
+-- but interesting is that it's not that much faster when we
+-- go inline
+--
+-- beware: we need to return a collection even when we filter
+-- else the (simple) cache gets messed up
+
+-- caching found lookups saves not that much (max .1 sec on a 8 sec run)
+-- and it also messes up finalizers
+
+local profiled = { } xml.profiled = profiled
+
+local function profiled_apply(list,parsed,nofparsed)
+ local p = profiled[parsed.pattern]
+ if p then
+ p.tested = p.tested + 1
else
- for i=1,#e do
- report(tostring(e[i]) .. "\n")
+ p = { tested = 1, matched = 0, finalized = 0 }
+ profiled[parsed.pattern] = p
+ end
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ p.matched = p.matched + 1
+ p.finalized = p.finalized + 1
+ return collected
+ end
+ if not collected or #collected == 0 then
+ return nil
end
end
+ if collected then
+ p.matched = p.matched + 1
+ end
+ return collected
+end
+
+local function traced_apply(list,parsed,nofparsed)
+ if trace_lparse then
+ lshow(parsed)
+ end
+ logs.report("lpath", "collecting : %s",parsed.pattern)
+ logs.report("lpath", " root tags : %s",tagstostring(list))
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ logs.report("lpath", "% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ logs.report("lpath", "% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ logs.report("lpath", "% 10i : ex : %s",(collected and #collected) or 0,pi.expression)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
+ return collected
+ end
+ if not collected or #collected == 0 then
+ return nil
+ end
+ end
+ return collected
end
---[[ldx--
-<p>An <l n='lpath'/> is converted to a table with instructions for traversing the
-tree. Hoever, simple cases are signaled by booleans. Because we don't know in
-advance what we want to do with the found element the handle gets three arguments:</p>
-
-<lines>
-<t>r</t> : the root element of the data table
-<t>d</t> : the data table of the result
-<t>t</t> : the index in the data table of the result
-</lines>
+local function parse_apply(list,pattern)
+ -- we avoid an extra call
+ local parsed = cache[pattern]
+ if parsed then
+ lpathcalls = lpathcalls + 1
+ lpathcached = lpathcached + 1
+ elseif type(pattern) == "table" then
+ lpathcalls = lpathcalls + 1
+ parsed = pattern
+ else
+ parsed = parse_pattern(pattern) or pattern
+ end
+ if not parsed then
+ return
+ end
+ local nofparsed = #parsed
+ if nofparsed == 0 then
+ -- something is wrong
+ elseif not trace_lpath then
+ -- normal apply, inline, no self
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ local axis = pi.axis
+ if axis ~= "self" then
+ collected = apply_axis[axis](collected)
+ end
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ elseif kind == "finalizer" then
+ return pi.finalizer(collected)
+ end
+ if not collected or #collected == 0 then
+ return nil
+ end
+ end
+ return collected
+ elseif trace_lprofile then
+ return profiled_apply(list,parsed,nofparsed)
+ else -- trace_lpath
+ return traced_apply(list,parsed,nofparsed)
+ end
+end
-<p> Access to the root and data table makes it possible to construct insert and delete
-functions.</p>
---ldx]]--
+-- internal (parsed)
-local functions = xml.functions
-local expressions = xml.expressions
+expressions.child = function(e,pattern)
+ return parse_apply({ e },pattern) -- todo: cache
+end
+expressions.count = function(e,pattern)
+ local collected = parse_apply({ e },pattern) -- todo: cache
+ return (collected and #collected) or 0
+end
-expressions.contains = string.find
-expressions.find = string.find
-expressions.upper = string.upper
-expressions.lower = string.lower
-expressions.number = tonumber
-expressions.boolean = toboolean
+-- external
expressions.oneof = function(s,...) -- slow
local t = {...} for i=1,#t do if s == t[i] then return true end end return false
end
-
expressions.error = function(str)
- xml.error_handler("unknown function in lpath expression",str or "?")
+ xml.error_handler("unknown function in lpath expression",tostring(str or "?"))
return false
end
+expressions.undefined = function(s)
+ return s == nil
+end
-functions.text = function(root,k,n) -- unchecked, maybe one deeper
- local t = type(t)
- if t == "string" then
- return t
- else -- todo n
- local rdt = root.dt
- return (rdt and rdt[k]) or root[k] or ""
+expressions.contains = find
+expressions.find = find
+expressions.upper = upper
+expressions.lower = lower
+expressions.number = tonumber
+expressions.boolean = toboolean
+
+-- user interface
+
+local function traverse(root,pattern,handle)
+ logs.report("xml","use 'xml.selection' instead for '%s'",pattern)
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ handle(r,r.dt,e.ni)
+ end
+ end
+end
+
+local function selection(root,pattern,handle)
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if handle then
+ for c=1,#collected do
+ handle(collected[c])
+ end
+ else
+ return collected
+ end
end
end
-functions.name = function(d,k,n) -- ns + tg
+xml.parse_parser = parser
+xml.parse_pattern = parse_pattern
+xml.parse_apply = parse_apply
+xml.traverse = traverse -- old method, r, d, k
+xml.selection = selection -- new method, simple handle
+
+local lpath = parse_pattern
+
+xml.lpath = lpath
+
+function xml.cached_patterns()
+ return cache
+end
+
+-- generic function finalizer (independant namespace)
+
+local function dofunction(collected,fnc)
+ if collected then
+ local f = functions[fnc]
+ if f then
+ for c=1,#collected do
+ f(collected[c])
+ end
+ else
+ logs.report("xml","unknown function '%s'",fnc)
+ end
+ end
+end
+
+xml.finalizers.xml["function"] = dofunction
+xml.finalizers.tex["function"] = dofunction
+
+-- functions
+
+expressions.text = function(e,n)
+ local rdt = e.__p__.dt
+ return (rdt and rdt[n]) or ""
+end
+
+expressions.name = function(e,n) -- ns + tg
local found = false
- n = n or 0
- if not k then
- -- not found
- elseif n == 0 then
- local dk = d[k]
- found = dk and (type(dk) == "table") and dk
+ n = tonumber(n) or 0
+ if n == 0 then
+ found = type(e) == "table" and e
elseif n < 0 then
+ local d, k = e.__p__.dt, e.ni
for i=k-1,1,-1 do
local di = d[i]
if type(di) == "table" then
@@ -4114,6 +5285,7 @@ functions.name = function(d,k,n) -- ns + tg
end
end
else
+ local d, k = e.__p__.dt, e.ni
for i=k+1,#d,1 do
local di = d[i]
if type(di) == "table" then
@@ -4138,15 +5310,13 @@ functions.name = function(d,k,n) -- ns + tg
end
end
-functions.tag = function(d,k,n) -- only tg
+expressions.tag = function(e,n) -- only tg
local found = false
- n = n or 0
- if not k then
- -- not found
- elseif n == 0 then
- local dk = d[k]
- found = dk and (type(dk) == "table") and dk
+ n = tonumber(n) or 0
+ if n == 0 then
+ found = (type(e) == "table") and e -- seems to fail
elseif n < 0 then
+ local d, k = e.__p__.dt, e.ni
for i=k-1,1,-1 do
local di = d[i]
if type(di) == "table" then
@@ -4159,6 +5329,7 @@ functions.tag = function(d,k,n) -- only tg
end
end
else
+ local d, k = e.__p__.dt, e.ni
for i=k+1,#d,1 do
local di = d[i]
if type(di) == "table" then
@@ -4174,664 +5345,403 @@ functions.tag = function(d,k,n) -- only tg
return (found and found.tg) or ""
end
-expressions.text = functions.text
-expressions.name = functions.name
-expressions.tag = functions.tag
+--[[ldx--
+<p>This is the main filter function. It returns whatever is asked for.</p>
+--ldx]]--
-local function traverse(root,pattern,handle,reverse,index,parent,wildcard) -- multiple only for tags, not for namespaces
- if not root then -- error
- return false
- elseif pattern == false then -- root
- handle(root,root.dt,root.ri)
- return false
- elseif pattern == true then -- wildcard
- local rootdt = root.dt
- if rootdt then
- local start, stop, step = 1, #rootdt, 1
- if reverse then
- start, stop, step = stop, start, -1
- end
- for k=start,stop,step do
- if handle(root,rootdt,root.ri or k) then return false end
- if not traverse(rootdt[k],true,handle,reverse) then return false end
- end
+function xml.filter(root,pattern) -- no longer funny attribute handling here
+ return parse_apply({ root },pattern)
+end
+
+--[[ldx--
+<p>Often using an iterators looks nicer in the code than passing handler
+functions. The <l n='lua'/> book describes how to use coroutines for that
+purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
+code like:</p>
+
+<typing>
+for r, d, k in xml.elements(xml.load('text.xml'),"title") do
+ print(d[k]) -- old method
+end
+for e in xml.collected(xml.load('text.xml'),"title") do
+ print(e) -- new one
+end
+</typing>
+--ldx]]--
+
+local wrap, yield = coroutine.wrap, coroutine.yield
+
+function xml.elements(root,pattern,reverse) -- r, d, k
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if reverse then
+ return wrap(function() for c=#collected,1,-1 do
+ local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
+ end end)
+ else
+ return wrap(function() for c=1,#collected do
+ local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
+ end end)
end
- return false
- elseif root.dt then
- index = index or 1
- local action = pattern[index]
- local command = action[1]
- if command == 29 then -- fast case /oeps
- local rootdt = root.dt
- for k=1,#rootdt do
- local e = rootdt[k]
- local tg = e.tg
- if e.tg then
- local ns = e.rn or e.ns
- local ns_a, tg_a = action[3], action[4]
- local matched = (ns_a == "*" or ns == ns_a) and (tg_a == "*" or tg == tg_a)
- if not action[2] then matched = not matched end
- if matched then
- if handle(root,rootdt,k) then return false end
- end
- end
- end
- elseif command == 11 then -- parent
- local ep = root.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
+ end
+ return wrap(function() end)
+end
+
+function xml.collected(root,pattern,reverse) -- e
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if reverse then
+ return wrap(function() for c=#collected,1,-1 do yield(collected[c]) end end)
else
- if (command == 16 or command == 12) and index == 1 then -- initial
- -- wildcard = true
- wildcard = command == 16 -- ok?
- index = index + 1
- action = pattern[index]
- command = action and action[1] or 0 -- something is wrong
- end
- if command == 11 then -- parent
- local ep = root.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- else
- local rootdt = root.dt
- local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1
- if command == 30 then
- if action[5] < 0 then
- start, stop, step = stop, start, -1
- dn = -1
- end
- elseif reverse and index == #pattern then
- start, stop, step = stop, start, -1
- end
- local idx = 0
- local hsh = { } -- this will slooow down the lot
- for k=start,stop,step do -- we used to have functions for all but a case is faster
- local e = rootdt[k]
- local ns, tg = e.rn or e.ns, e.tg
- if tg then
- -- we can optimize this for simple searches, but it probably does not pay off
- hsh[tg] = (hsh[tg] or 0) + 1
- idx = idx + 1
- if command == 30 then
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- if matched then
- n = n + dn
- if n == action[5] then
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- break
- end
- elseif wildcard then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- end
- else
- local matched, multiple = false, false
- if command == 20 then -- match
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- elseif command == 21 then -- match one of
- multiple = true
- for i=3,#action,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- elseif command == 22 then -- eq
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- matched = matched and e.at[action[6]] == action[7]
- elseif command == 23 then -- ne
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = mached and e.at[action[6]] ~= action[7]
- elseif command == 24 then -- one of eq
- multiple = true
- for i=3,#action-2,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[#action-1]] == action[#action]
- elseif command == 25 then -- one of ne
- multiple = true
- for i=3,#action-2,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[#action-1]] ~= action[#action]
- elseif command == 27 then -- has attribute
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[5]]
- elseif command == 28 then -- has value
- local edt, ns_a, tg_a = e.dt, action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = matched and edt and edt[1] == action[5]
- elseif command == 31 then
- local edt, ns_a, tg_a = e.dt, action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- if matched then
- matched = action[6](expressions,root,rootdt,k,e,edt,ns,tg,idx,hsh[tg] or 1)
- end
- end
- if matched then -- combine tg test and at test
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- if wildcard then
- if multiple then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- else
- -- maybe or multiple; anyhow, check on (section|title) vs just section and title in example in lxml
- if not traverse(e,pattern,handle,reverse,index,root) then return false end
- end
- end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- elseif command == 14 then -- any
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- elseif command == 15 then -- many
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root,true) then return false end
- end
- -- not here : 11
- elseif command == 11 then -- parent
- local ep = e.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,root,index+1) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- elseif command == 40 and e.special and tg == "@pi@" then -- pi
- local pi = action[2]
- if pi ~= "" then
- local pt = e.dt[1]
- if pt and pt:find(pi) then
- if handle(root,rootdt,k) then
- return false
- end
- end
- elseif handle(root,rootdt,k) then
- return false
- end
- elseif wildcard then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- end
- end
- else
- -- not here : 11
- if command == 11 then -- parent
- local ep = e.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- break -- else loop
- end
- end
- end
- end
+ return wrap(function() for c=1,#collected do yield(collected[c]) end end)
end
end
- return true
+ return wrap(function() end)
end
-xml.traverse = traverse
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['lxml-ent'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local type, next = type, next
+local texsprint, ctxcatcodes = tex.sprint, tex.ctxcatcodes
+local utf = unicode.utf8
+local utfupper = utf.upper
--[[ldx--
-<p>Next come all kind of locators and manipulators. The most generic function here
-is <t>xml.filter(root,pattern)</t>. All registers functions in the filters namespace
-can be path of a search path, as in:</p>
+<p>We provide (at least here) two entity handlers. The more extensive
+resolver consults a hash first, tries to convert to <l n='utf'/> next,
+and finaly calls a handler when defines. When this all fails, the
+original entity is returned.</p>
-<typing>
-local r, d, k = xml.filter(root,"/a/b/c/position(4)"
-</typing>
+<p>We do things different now but it's still somewhat experimental</p>
--ldx]]--
-local traverse, lpath, convert = xml.traverse, xml.lpath, xml.convert
+xml.entities = xml.entities or { } -- xml.entity_handler == function
-xml.filters = { }
+-- experimental, this will be done differently
-function xml.filters.default(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end)
- return dt and dt[dk], rt, dt, dk
+function xml.merge_entities(root)
+ local documententities = root.entities
+ local allentities = xml.entities
+ if documententities then
+ for k, v in next, documententities do
+ allentities[k] = v
+ end
+ end
end
-function xml.filters.attributes(root,pattern,arguments)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end)
- local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at)
- if ekat then
- if arguments then
- return ekat[arguments] or "", rt, dt, dk
+function xml.resolved_entity(str)
+ local e = xml.entities[str]
+ if e then
+ local te = type(e)
+ if te == "function" then
+ e(str)
else
- return ekat, rt, dt, dk
+ texsprint(ctxcatcodes,e)
end
else
- return { }, rt, dt, dk
+ texsprint(ctxcatcodes,"\\xmle{",str,"}{",utfupper(str),"}") -- we need to use our own upper
end
end
-function xml.filters.reverse(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse')
- return dt and dt[dk], rt, dt, dk
-end
+xml.entities.amp = function() tex.write("&") end
+xml.entities.lt = function() tex.write("<") end
+xml.entities.gt = function() tex.write(">") end
-function xml.filters.count(root,pattern,everything)
- local n = 0
- traverse(root, lpath(pattern), function(r,d,t)
- if everything or type(d[t]) == "table" then
- n = n + 1
- end
- end)
- return n
-end
-function xml.filters.elements(root, pattern) -- == all
- local t = { }
- traverse(root, lpath(pattern), function(r,d,k)
- local e = d[k]
- if e then
- t[#t+1] = e
- end
- end)
- return t
-end
+end -- of closure
-function xml.filters.texts(root, pattern)
- local t = { }
- traverse(root, lpath(pattern), function(r,d,k)
- local e = d[k]
- if e and e.dt then
- t[#t+1] = e.dt
- end
- end)
- return t
-end
+do -- create closure to overcome 200 locals limit
-function xml.filters.first(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end)
- return dt and dt[dk], rt, dt, dk
-end
+if not modules then modules = { } end modules ['lxml-mis'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
-function xml.filters.last(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse')
- return dt and dt[dk], rt, dt, dk
-end
+local concat = table.concat
+local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
+local format, gsub = string.format, string.gsub
-function xml.filters.index(root,pattern,arguments)
- local rt, dt, dk, reverse, i = nil, nil, nil, false, tonumber(arguments or '1') or 1
- if i and i ~= 0 then
- if i < 0 then
- reverse, i = true, -i
- end
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk, i = r, d, k, i-1 return i == 0 end, reverse)
- if i == 0 then
- return dt and dt[dk], rt, dt, dk
+--[[ldx--
+<p>The following helper functions best belong to the <t>lmxl-ini</t>
+module. Some are here because we need then in the <t>mk</t>
+document and other manuals, others came up when playing with
+this module. Since this module is also used in <l n='mtxrun'/> we've
+put them here instead of loading mode modules there then needed.</p>
+--ldx]]--
+
+
+local function xmlgsub(t,old,new)
+ local dt = t.dt
+ if dt then
+ for k=1,#dt do
+ local v = dt[k]
+ if type(v) == "string" then
+ dt[k] = gsub(v,old,new)
+ else
+ xmlgsub(v,old,new)
+ end
end
end
- return nil, nil, nil, nil
end
-function xml.filters.attribute(root,pattern,arguments)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end)
- local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at)
- -- return (ekat and (ekat[arguments] or ekat[gsub(arguments,"^([\"\'])(.*)%1$","%2")])) or ""
- return (ekat and (ekat[arguments] or (find(arguments,"^[\'\"]") and ekat[sub(arguments,2,-2)]))) or ""
-end
+xmlgsub = xmlgsub
-function xml.filters.text(root,pattern,arguments) -- ?? why index, tostring slow
- local dtk, rt, dt, dk = xml.filters.index(root,pattern,arguments)
- if dtk then -- n
- local dtkdt = dtk.dt
- if not dtkdt then
- return "", rt, dt, dk
- elseif #dtkdt == 1 and type(dtkdt[1]) == "string" then
- return dtkdt[1], rt, dt, dk
- else
- return xml.tostring(dtkdt), rt, dt, dk
+function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
+ if d and k then
+ local dkm = d[k-1]
+ if dkm and type(dkm) == "string" then
+ local s = match(dkm,"\n(%s+)")
+ xmlgsub(dk,"\n"..rep(" ",#s),"\n")
end
- else
- return "", rt, dt, dk
end
end
-function xml.filters.tag(root,pattern,n)
- local tag = ""
- traverse(root, lpath(pattern), function(r,d,k)
- tag = xml.functions.tag(d,k,n and tonumber(n))
- return true
- end)
- return tag
-end
+--~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
+--~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-function xml.filters.name(root,pattern,n)
- local tag = ""
- traverse(root, lpath(pattern), function(r,d,k)
- tag = xml.functions.name(d,k,n and tonumber(n))
- return true
- end)
- return tag
-end
+--~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
+--~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
+--~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
---[[ldx--
-<p>For splitting the filter function from the path specification, we can
-use string matching or lpeg matching. Here the difference in speed is
-neglectable but the lpeg variant is more robust.</p>
---ldx]]--
+local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
--- not faster but hipper ... although ... i can't get rid of the trailing / in the path
+-- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
+--
+-- 1021:0335:0287:0247
-local P, S, R, C, V, Cc = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc
+-- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
+--
+-- 1559:0257:0288:0190 (last one suggested by roberto)
-local slash = P('/')
-local name = (R("az","AZ","--","__"))^1
-local path = C(((1-slash)^0 * slash)^1)
-local argument = P { "(" * C(((1 - S("()")) + V(1))^0) * ")" }
-local action = Cc(1) * path * C(name) * argument
-local attribute = Cc(2) * path * P('@') * C(name)
-local direct = Cc(3) * Cc("../*") * slash^0 * C(name) * argument
+-- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
+-- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
+local normal = (1 - S("<&>"))^0
+local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
+local escaped = Cs(normal * (special * normal)^0)
-local parser = direct + action + attribute
+-- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-local filters = xml.filters
-local attribute_filter = xml.filters.attributes
-local default_filter = xml.filters.default
+local normal = (1 - S"&")^0
+local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
+local unescaped = Cs(normal * (special * normal)^0)
--- todo: also hash, could be gc'd
+-- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-function xml.filter(root,pattern)
- local kind, a, b, c = parser:match(pattern)
- if kind == 1 or kind == 3 then
- return (filters[b] or default_filter)(root,a,c)
- elseif kind == 2 then
- return attribute_filter(root,a,b)
- else
- return default_filter(root,pattern)
- end
-end
+local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
---~ slightly faster, but first we need a proper test file
---~
---~ local hash = { }
---~
---~ function xml.filter(root,pattern)
---~ local h = hash[pattern]
---~ if not h then
---~ local kind, a, b, c = parser:match(pattern)
---~ if kind == 1 then
---~ h = { kind, filters[b] or default_filter, a, b, c }
---~ elseif kind == 2 then
---~ h = { kind, attribute_filter, a, b, c }
---~ else
---~ h = { kind, default_filter, a, b, c }
---~ end
---~ hash[pattern] = h
---~ end
---~ local kind = h[1]
---~ if kind == 1 then
---~ return h[2](root,h[2],h[4])
---~ elseif kind == 2 then
---~ return h[2](root,h[2],h[3])
---~ else
---~ return h[2](root,pattern)
---~ end
---~ end
+xml.escaped_pattern = escaped
+xml.unescaped_pattern = unescaped
+xml.cleansed_pattern = cleansed
---[[ldx--
-<p>The following functions collect elements and texts.</p>
---ldx]]--
+function xml.escaped (str) return escaped :match(str) end
+function xml.unescaped(str) return unescaped:match(str) end
+function xml.cleansed (str) return cleansed :match(str) end
--- still somewhat bugged
-function xml.collect_elements(root, pattern, ignorespaces)
- local rr, dd = { }, { }
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d and d[k]
- if dk then
- if ignorespaces and type(dk) == "string" and dk:find("[^%S]") then
- -- ignore
- else
- local n = #rr+1
- rr[n], dd[n] = r, dk
- end
- end
- end)
- return dd, rr
-end
+end -- of closure
-function xml.collect_texts(root, pattern, flatten)
- local t = { } -- no r collector
- traverse(root, lpath(pattern), function(r,d,k)
- if d then
- local ek = d[k]
- local tx = ek and ek.dt
- if flatten then
- if tx then
- t[#t+1] = xml.tostring(tx) or ""
- else
- t[#t+1] = ""
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['lxml-aux'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- not all functions here make sense anymore vbut we keep them for
+-- compatibility reasons
+
+local xmlparseapply, xmlconvert, xmlcopy = xml.parse_apply, xml.convert, xml.copy
+
+local type = type
+local insert, remove = table.insert, table.remove
+local gmatch, gsub = string.gmatch, string.gsub
+
+local function withelements(e,handle,depth)
+ if e and handle then
+ local edt = e.dt
+ if edt then
+ depth = depth or 0
+ for i=1,#edt do
+ local e = edt[i]
+ if type(e) == "table" then
+ handle(e,depth)
+ withelements(e,handle,depth+1)
end
- else
- t[#t+1] = tx or ""
end
- else
- t[#t+1] = ""
end
- end)
- return t
+ end
end
-function xml.collect_tags(root, pattern, nonamespace)
- local t = { }
- xml.traverse(root, xml.lpath(pattern), function(r,d,k)
- local dk = d and d[k]
- if dk and type(dk) == "table" then
- local ns, tg = e.ns, e.tg
- if nonamespace then
- t[#t+1] = tg -- if needed we can return an extra table
- elseif ns == "" then
- t[#t+1] = tg
- else
- t[#t+1] = ns .. ":" .. tg
+xml.withelements = withelements
+
+function xml.withelement(e,n,handle) -- slow
+ if e and n ~= 0 and handle then
+ local edt = e.dt
+ if edt then
+ if n > 0 then
+ for i=1,#edt do
+ local ei = edt[i]
+ if type(ei) == "table" then
+ if n == 1 then
+ handle(ei)
+ return
+ else
+ n = n - 1
+ end
+ end
+ end
+ elseif n < 0 then
+ for i=#edt,1,-1 do
+ local ei = edt[i]
+ if type(ei) == "table" then
+ if n == -1 then
+ handle(ei)
+ return
+ else
+ n = n + 1
+ end
+ end
+ end
end
end
- end)
- return #t > 0 and {}
+ end
end
---[[ldx--
-<p>Often using an iterators looks nicer in the code than passing handler
-functions. The <l n='lua'/> book describes how to use coroutines for that
-purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
-code like:</p>
+xml.elements_only = xml.collected
-<typing>
-for r, d, k in xml.elements(xml.load('text.xml'),"title") do
- print(d[k])
+function xml.each_element(root, pattern, handle, reverse)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ if reverse then
+ for c=#collected,1,-1 do
+ handle(collected[c])
+ end
+ else
+ for c=1,#collected do
+ handle(collected[c])
+ end
+ end
+ return collected
+ end
end
-</typing>
-<p>Which will print all the titles in the document. The iterator variant takes
-1.5 times the runtime of the function variant which is due to the overhead in
-creating the wrapper. So, instead of:</p>
+xml.process_elements = xml.each_element
-<typing>
-function xml.filters.first(root,pattern)
- for rt,dt,dk in xml.elements(root,pattern)
- return dt and dt[dk], rt, dt, dk
+function xml.process_attributes(root, pattern, handle)
+ local collected = xmlparseapply({ root },pattern)
+ if collected and handle then
+ for c=1,#collected do
+ handle(collected[c].at)
+ end
end
- return nil, nil, nil, nil
+ return collected
end
-</typing>
-<p>We use the function variants in the filters.</p>
+--[[ldx--
+<p>The following functions collect elements and texts.</p>
--ldx]]--
-local wrap, yield = coroutine.wrap, coroutine.yield
+-- are these still needed -> lxml-cmp.lua
-function xml.elements(root,pattern,reverse)
- return wrap(function() traverse(root, lpath(pattern), yield, reverse) end)
+function xml.collect_elements(root, pattern)
+ return xmlparseapply({ root },pattern)
end
-function xml.elements_only(root,pattern,reverse)
- return wrap(function() traverse(root, lpath(pattern), function(r,d,k) yield(d[k]) end, reverse) end)
-end
-
-function xml.each_element(root, pattern, handle, reverse)
- local ok
- traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse)
- return ok
-end
-
-function xml.process_elements(root, pattern, handle)
- traverse(root, lpath(pattern), function(r,d,k)
- local dkdt = d[k].dt
- if dkdt then
- for i=1,#dkdt do
- local v = dkdt[i]
- if v.tg then handle(v) end
- end
+function xml.collect_texts(root, pattern, flatten) -- todo: variant with handle
+ local collected = xmlparseapply({ root },pattern)
+ if collected and flatten then
+ local xmltostring = xml.tostring
+ for c=1,#collected do
+ collected[c] = xmltostring(collected[c].dt)
end
- end)
+ end
+ return collected or { }
end
-function xml.process_attributes(root, pattern, handle)
- traverse(root, lpath(pattern), function(r,d,k)
- local ek = d[k]
- local a = ek.at or { }
- handle(a)
- if next(a) then -- next is faster than type (and >0 test)
- ek.at = a
- else
- ek.at = nil
+function xml.collect_tags(root, pattern, nonamespace)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collected[c]
+ local ns, tg = e.ns, e.tg
+ if nonamespace then
+ t[#t+1] = tg
+ elseif ns == "" then
+ t[#t+1] = tg
+ else
+ t[#t+1] = ns .. ":" .. tg
+ end
end
- end)
+ return t
+ end
end
--[[ldx--
<p>We've now arrives at the functions that manipulate the tree.</p>
--ldx]]--
+local no_root = { no_root = true }
+
function xml.inject_element(root, pattern, element, prepend)
if root and element then
- local matches, collect = { }, nil
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,no_root)
end
if element then
- collect = function(r,d,k) matches[#matches+1] = { r, d, k, element } end
- traverse(root, lpath(pattern), collect)
- for i=1,#matches do
- local m = matches[i]
- local r, d, k, element, edt = m[1], m[2], m[3], m[4], nil
- if element.ri then
- element = element.dt[element.ri].dt
- else
- element = element.dt
- end
- if r.ri then
- edt = r.dt[r.ri].dt
- else
- edt = d and d[k] and d[k].dt
- end
- if edt then
- local be, af
- if prepend then
- be, af = xml.copy(element), edt
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ local d = r.dt
+ local k = e.ni
+ if element.ri then
+ element = element.dt[element.ri].dt
else
- be, af = edt, xml.copy(element)
- end
- for i=1,#af do
- be[#be+1] = af[i]
+ element = element.dt
end
+ local edt
if r.ri then
- r.dt[r.ri].dt = be
+ edt = r.dt[r.ri].dt
else
- d[k].dt = be
+ edt = d and d[k] and d[k].dt
+ end
+ if edt then
+ local be, af
+ if prepend then
+ be, af = xmlcopy(element), edt
+ else
+ be, af = edt, xmlcopy(element)
+ end
+ for i=1,#af do
+ be[#be+1] = af[i]
+ end
+ if r.ri then
+ r.dt[r.ri].dt = be
+ else
+ d[k].dt = be
+ end
+ else
+ -- r.dt = element.dt -- todo
end
- else
- -- r.dt = element.dt -- todo
end
end
end
@@ -4847,32 +5757,31 @@ function xml.insert_element(root, pattern, element, before) -- todo: element als
else
local matches, collect = { }, nil
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,true)
end
if element and element.ri then
element = element.dt[element.ri]
end
if element then
- collect = function(r,d,k) matches[#matches+1] = { r, d, k, element } end
- traverse(root, lpath(pattern), collect)
- for i=#matches,1,-1 do
- local m = matches[i]
- local r, d, k, element = m[1], m[2], m[3], m[4]
- if not before then k = k + 1 end
- if element.tg then
- insert(d,k,element) -- untested
---~ elseif element.dt then
---~ for _,v in ipairs(element.dt) do -- i added
---~ insert(d,k,v)
---~ k = k + 1
---~ end
---~ end
- else
- local edt = element.dt
- if edt then
- for i=1,#edt do
- insert(d,k,edt[i])
- k = k + 1
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ local d = r.dt
+ local k = e.ni
+ if not before then
+ k = k + 1
+ end
+ if element.tg then
+ insert(d,k,element) -- untested
+ else
+ local edt = element.dt
+ if edt then
+ for i=1,#edt do
+ insert(d,k,edt[i])
+ k = k + 1
+ end
end
end
end
@@ -4888,105 +5797,114 @@ xml.inject_element_after = xml.inject_element
xml.inject_element_before = function(r,p,e) xml.inject_element(r,p,e,true) end
function xml.delete_element(root, pattern)
- local matches, deleted = { }, { }
- local collect = function(r,d,k) matches[#matches+1] = { r, d, k } end
- traverse(root, lpath(pattern), collect)
- for i=#matches,1,-1 do
- local m = matches[i]
- deleted[#deleted+1] = remove(m[2],m[3])
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ remove(e.__p__.dt,e.ni)
+ e.ni = nil
+ end
end
- return deleted
+ return collection
end
function xml.replace_element(root, pattern, element)
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,true)
end
if element and element.ri then
element = element.dt[element.ri]
end
if element then
- traverse(root, lpath(pattern), function(rm, d, k)
- d[k] = element.dt -- maybe not clever enough
- end)
- end
-end
-
-local function load_data(name) -- == io.loaddata
- local f, data = io.open(name), ""
- if f then
- data = f:read("*all",'b') -- 'b' ?
- f:close()
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ e.__p__.dt[e.ni] = element.dt -- maybe not clever enough
+ end
+ end
end
- return data
end
-function xml.include(xmldata,pattern,attribute,recursive,loaddata)
+local function include(xmldata,pattern,attribute,recursive,loaddata)
-- parse="text" (default: xml), encoding="" (todo)
-- attribute = attribute or 'href'
pattern = pattern or 'include'
- loaddata = loaddata or load_data
- local function include(r,d,k)
- local ek, name = d[k], nil
- if not attribute or attribute == "" then
+ loaddata = loaddata or io.loaddata
+ local collected = xmlparseapply({ xmldata },pattern)
+ if collected then
+ for c=1,#collected do
+ local ek = collected[c]
+ local name = nil
local ekdt = ek.dt
- name = (type(ekdt) == "table" and ekdt[1]) or ekdt
- end
- if not name then
- if ek.at then
+ local ekat = ek.at
+ local epdt = ek.__p__.dt
+ if not attribute or attribute == "" then
+ name = (type(ekdt) == "table" and ekdt[1]) or ekdt -- ckeck, probably always tab or str
+ end
+ if not name then
for a in gmatch(attribute or "href","([^|]+)") do
- name = ek.at[a]
+ name = ekat[a]
if name then break end
end
end
- end
- local data = (name and name ~= "" and loaddata(name)) or ""
- if data == "" then
- xml.empty(d,k)
- elseif ek.at["parse"] == "text" then -- for the moment hard coded
- d[k] = xml.escaped(data)
- else
- local xi = xml.convert(data)
- if not xi then
- xml.empty(d,k)
+ local data = (name and name ~= "" and loaddata(name)) or ""
+ if data == "" then
+ epdt[ek.ni] = "" -- xml.empty(d,k)
+ elseif ekat["parse"] == "text" then
+ -- for the moment hard coded
+ epdt[ek.ni] = xml.escaped(data) -- d[k] = xml.escaped(data)
else
- if recursive then
- xml.include(xi,pattern,attribute,recursive,loaddata)
+ local settings = xmldata.settings
+ settings.parent_root = xmldata -- to be tested
+ local xi = xmlconvert(data,settings)
+ if not xi then
+ epdt[ek.ni] = "" -- xml.empty(d,k)
+ else
+ if recursive then
+ include(xi,pattern,attribute,recursive,loaddata)
+ end
+ epdt[ek.ni] = xml.body(xi) -- xml.assign(d,k,xi)
end
- xml.assign(d,k,xi)
end
end
end
- xml.each_element(xmldata, pattern, include)
end
+xml.include = include
+
function xml.strip_whitespace(root, pattern, nolines) -- strips all leading and trailing space !
- traverse(root, lpath(pattern), function(r,d,k)
- local dkdt = d[k].dt
- if dkdt then -- can be optimized
- local t = { }
- for i=1,#dkdt do
- local str = dkdt[i]
- if type(str) == "string" then
- if str == "" then
- -- stripped
- else
- if nolines then
- str = gsub(str,"[ \n\r\t]+"," ")
- end
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for i=1,#collected do
+ local e = collected[i]
+ local edt = e.dt
+ if edt then
+ local t = { }
+ for i=1,#edt do
+ local str = edt[i]
+ if type(str) == "string" then
if str == "" then
-- stripped
else
- t[#t+1] = str
+ if nolines then
+ str = gsub(str,"[ \n\r\t]+"," ")
+ end
+ if str == "" then
+ -- stripped
+ else
+ t[#t+1] = str
+ end
end
+ else
+--~ str.ni = i
+ t[#t+1] = str
end
- else
- t[#t+1] = str
end
+ e.dt = t
end
- d[k].dt = t
end
- end)
+ end
end
local function rename_space(root, oldspace, newspace) -- fast variant
@@ -5011,59 +5929,49 @@ end
xml.rename_space = rename_space
function xml.remap_tag(root, pattern, newtg)
- traverse(root, lpath(pattern), function(r,d,k)
- d[k].tg = newtg
- end)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ collected[c].tg = newtg
+ end
+ end
end
+
function xml.remap_namespace(root, pattern, newns)
- traverse(root, lpath(pattern), function(r,d,k)
- d[k].ns = newns
- end)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ collected[c].ns = newns
+ end
+ end
end
+
function xml.check_namespace(root, pattern, newns)
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d[k]
- if (not dk.rn or dk.rn == "") and dk.ns == "" then
- dk.rn = newns
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ if (not e.rn or e.rn == "") and e.ns == "" then
+ e.rn = newns
+ end
end
- end)
-end
-function xml.remap_name(root, pattern, newtg, newns, newrn)
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d[k]
- dk.tg = newtg
- dk.ns = newns
- dk.rn = newrn
- end)
+ end
end
-function xml.filters.found(root,pattern,check_content)
- local found = false
- traverse(root, lpath(pattern), function(r,d,k)
- if check_content then
- local dk = d and d[k]
- found = dk and dk.dt and next(dk.dt) and true
- else
- found = true
+function xml.remap_name(root, pattern, newtg, newns, newrn)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ e.tg, e.ns, e.rn = newtg, newns, newrn
end
- return true
- end)
- return found
+ end
end
--[[ldx--
<p>Here are a few synonyms.</p>
--ldx]]--
-xml.filters.position = xml.filters.index
-
-xml.count = xml.filters.count
-xml.index = xml.filters.index
-xml.position = xml.filters.index
-xml.first = xml.filters.first
-xml.last = xml.filters.last
-xml.found = xml.filters.found
-
xml.each = xml.each_element
xml.process = xml.process_element
xml.strip = xml.strip_whitespace
@@ -5077,155 +5985,12 @@ xml.before = xml.insert_element_before
xml.delete = xml.delete_element
xml.replace = xml.replace_element
---[[ldx--
-<p>The following helper functions best belong to the <t>lmxl-ini</t>
-module. Some are here because we need then in the <t>mk</t>
-document and other manuals, others came up when playing with
-this module. Since this module is also used in <l n='mtxrun'/> we've
-put them here instead of loading mode modules there then needed.</p>
---ldx]]--
-
-function xml.gsub(t,old,new)
- local dt = t.dt
- if dt then
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "string" then
- dt[k] = gsub(v,old,new)
- else
- xml.gsub(v,old,new)
- end
- end
- end
-end
-
-function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
- if d and k and d[k-1] and type(d[k-1]) == "string" then
- local s = d[k-1]:match("\n(%s+)")
- xml.gsub(dk,"\n"..rep(" ",#s),"\n")
- end
-end
-
-function xml.serialize_path(root,lpath,handle)
- local dk, r, d, k = xml.first(root,lpath)
- dk = xml.copy(dk)
- xml.strip_leading_spaces(dk,d,k)
- xml.serialize(dk,handle)
-end
-
---~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
---~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-
---~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
---~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
---~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
-
-local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
-
--- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
---
--- 1021:0335:0287:0247
-
--- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
---
--- 1559:0257:0288:0190 (last one suggested by roberto)
-
--- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
--- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
-local normal = (1 - S("<&>"))^0
-local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
-local escaped = Cs(normal * (special * normal)^0)
-
--- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-
--- unescaped = Cs((S("&lt;")/"<" + S("&gt;")/">" + S("&amp;")/"&" + 1)^0)
--- unescaped = Cs((((P("&")/"") * (P("lt")/"<" + P("gt")/">" + P("amp")/"&") * (P(";")/"")) + 1)^0)
-local normal = (1 - S"&")^0
-local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
-local unescaped = Cs(normal * (special * normal)^0)
-
--- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-
-local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
-
-function xml.escaped (str) return escaped :match(str) end
-function xml.unescaped(str) return unescaped:match(str) end
-function xml.cleansed (str) return cleansed :match(str) end
-
-function xml.join(t,separator,lastseparator)
- if #t > 0 then
- local result = { }
- for k,v in pairs(t) do
- result[k] = xml.tostring(v)
- end
- if lastseparator then
- return concat(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result]
- else
- return concat(result,separator)
- end
- else
- return ""
- end
-end
-
-function xml.statistics()
- return {
- lpathcalls = lpathcalls,
- lpathcached = lpathcached,
- }
-end
-
--- xml.set_text_cleanup(xml.show_text_entities)
--- xml.set_text_cleanup(xml.resolve_text_entities)
-
---~ xml.lshow("/../../../a/(b|c)[@d='e']/f")
---~ xml.lshow("/../../../a/!(b|c)[@d='e']/f")
---~ xml.lshow("/../../../a/!b[@d!='e']/f")
-
---~ x = xml.convert([[
---~ <a>
---~ <b n='01'>01</b>
---~ <b n='02'>02</b>
---~ <b n='03'>03</b>
---~ <b n='04'>OK</b>
---~ <b n='05'>05</b>
---~ <b n='06'>06</b>
---~ <b n='07'>ALSO OK</b>
---~ </a>
---~ ]])
-
---~ xml.settrace("lpath",true)
-
---~ xml.xshow(xml.first(x,"b[position() > 2 and position() < 5 and text() == 'ok']"))
---~ xml.xshow(xml.first(x,"b[position() > 2 and position() < 5 and text() == upper('ok')]"))
---~ xml.xshow(xml.first(x,"b[@n=='03' or @n=='08']"))
---~ xml.xshow(xml.all (x,"b[number(@n)>2 and number(@n)<6]"))
---~ xml.xshow(xml.first(x,"b[find(text(),'ALSO')]"))
-
---~ str = [[
---~ <?xml version="1.0" encoding="utf-8"?>
---~ <story line='mojca'>
---~ <windows>my secret</mouse>
---~ </story>
---~ ]]
-
---~ x = xml.convert([[
---~ <a><b n='01'>01</b><b n='02'>02</b><x>xx</x><b n='03'>03</b><b n='04'>OK</b></a>
---~ ]])
---~ xml.xshow(xml.first(x,"b[tag(2) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(1) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(-1) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(-2) == 'x']"))
-
---~ print(xml.filter(x,"b/tag(2)"))
---~ print(xml.filter(x,"b/tag(1)"))
-
end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['lxml-ent'] = {
+if not modules then modules = { } end modules ['lxml-xml'] = {
version = 1.001,
comment = "this module is the basis for the lxml-* ones",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -5233,457 +5998,249 @@ if not modules then modules = { } end modules ['lxml-ent'] = {
license = "see context related readme files"
}
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, gsub, find = string.format, string.gsub, string.find
-local utfchar = unicode.utf8.char
-
---[[ldx--
-<p>We provide (at least here) two entity handlers. The more extensive
-resolver consults a hash first, tries to convert to <l n='utf'/> next,
-and finaly calls a handler when defines. When this all fails, the
-original entity is returned.</p>
---ldx]]--
+local finalizers = xml.finalizers.xml
+local xmlfilter = xml.filter -- we could inline this one for speed
+local xmltostring = xml.tostring
+local xmlserialize = xml.serialize
-xml.entities = xml.entities or { } -- xml.entity_handler == function
-
-function xml.entity_handler(e)
- return format("[%s]",e)
+local function first(collected)
+ return collected and collected[1]
end
-local function toutf(s)
- return utfchar(tonumber(s,16))
+local function last(collected)
+ return collected and collected[#collected]
end
-local function utfize(root)
- local d = root.dt
- for k=1,#d do
- local dk = d[k]
- if type(dk) == "string" then
- -- test prevents copying if no match
- if find(dk,"&#x.-;") then
- d[k] = gsub(dk,"&#x(.-);",toutf)
- end
- else
- utfize(dk)
- end
- end
+local function all(collected)
+ return collected
end
-xml.utfize = utfize
-
-local function resolve(e) -- hex encoded always first, just to avoid mkii fallbacks
- if find(e,"^#x") then
- return utfchar(tonumber(e:sub(3),16))
- elseif find(e,"^#") then
- return utfchar(tonumber(e:sub(2)))
- else
- local ee = xml.entities[e] -- we cannot shortcut this one (is reloaded)
- if ee then
- return ee
- else
- local h = xml.entity_handler
- return (h and h(e)) or "&" .. e .. ";"
+local function reverse(collected)
+ if collected then
+ local reversed = { }
+ for c=#collected,1,-1 do
+ reversed[#reversed+1] = collected[c]
end
+ return reversed
end
end
-local function resolve_entities(root)
- if not root.special or root.tg == "@rt@" then
- local d = root.dt
- for k=1,#d do
- local dk = d[k]
- if type(dk) == "string" then
- if find(dk,"&.-;") then
- d[k] = gsub(dk,"&(.-);",resolve)
- end
- else
- resolve_entities(dk)
- end
- end
- end
+local function attribute(collected,name)
+ local at = collected and collected[1].at
+ return at and at[name]
end
-xml.resolve_entities = resolve_entities
+local function att(id,name)
+ local at = id.at
+ return at and at[name]
+end
-function xml.utfize_text(str)
- if find(str,"&#") then
- return (gsub(str,"&#x(.-);",toutf))
- else
- return str
- end
+local function count(collected)
+ return (collected and #collected) or 0
end
-function xml.resolve_text_entities(str) -- maybe an lpeg. maybe resolve inline
- if find(str,"&") then
- return (gsub(str,"&(.-);",resolve))
- else
- return str
+local function position(collected,n)
+ if collected then
+ n = tonumber(n) or 0
+ if n < 0 then
+ return collected[#collected + n + 1]
+ else
+ return collected[n]
+ end
end
end
-function xml.show_text_entities(str)
- if find(str,"&") then
- return (gsub(str,"&(.-);","[%1]"))
- else
- return str
+local function index(collected)
+ if collected then
+ return collected[1].ni
end
end
--- experimental, this will be done differently
-
-function xml.merge_entities(root)
- local documententities = root.entities
- local allentities = xml.entities
- if documententities then
- for k, v in next, documententities do
- allentities[k] = v
+local function attributes(collected,arguments)
+ if collected then
+ local at = collected[1].at
+ if arguments then
+ return at[arguments]
+ elseif next(at) then
+ return at -- all of them
end
end
end
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
-if not modules then modules = { } end modules ['lxml-mis'] = {
- version = 1.001,
- comment = "this module is the basis for the lxml-* ones",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
-local concat = table.concat
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, gsub = string.format, string.gsub
-
---[[ldx--
-<p>The following helper functions best belong to the <t>lmxl-ini</t>
-module. Some are here because we need then in the <t>mk</t>
-document and other manuals, others came up when playing with
-this module. Since this module is also used in <l n='mtxrun'/> we've
-put them here instead of loading mode modules there then needed.</p>
---ldx]]--
-
-function xml.gsub(t,old,new)
- local dt = t.dt
- if dt then
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "string" then
- dt[k] = gsub(v,old,new)
+local function chainattribute(collected,arguments) -- todo: optional levels
+ if collected then
+ local e = collected[1]
+ while e do
+ local at = e.at
+ if at then
+ local a = at[arguments]
+ if a then
+ return a
+ end
else
- xml.gsub(v,old,new)
+ break -- error
end
+ e = e.__p__
end
end
+ return ""
end
-function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
- if d and k and d[k-1] and type(d[k-1]) == "string" then
- local s = d[k-1]:match("\n(%s+)")
- xml.gsub(dk,"\n"..string.rep(" ",#s),"\n")
+local function text(collected)
+ if collected then
+ return xmltostring(collected[1]) -- only first as we cannot concat function
+ else
+ return ""
end
end
-function xml.serialize_path(root,lpath,handle)
- local dk, r, d, k = xml.first(root,lpath)
- dk = xml.copy(dk)
- xml.strip_leading_spaces(dk,d,k)
- xml.serialize(dk,handle)
-end
-
---~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
---~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-
---~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
---~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
---~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
-
-local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
-
--- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
---
--- 1021:0335:0287:0247
-
--- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
---
--- 1559:0257:0288:0190 (last one suggested by roberto)
-
--- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
--- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
-local normal = (1 - S("<&>"))^0
-local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
-local escaped = Cs(normal * (special * normal)^0)
-
--- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-
--- unescaped = Cs((S("&lt;")/"<" + S("&gt;")/">" + S("&amp;")/"&" + 1)^0)
--- unescaped = Cs((((P("&")/"") * (P("lt")/"<" + P("gt")/">" + P("amp")/"&") * (P(";")/"")) + 1)^0)
-local normal = (1 - S"&")^0
-local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
-local unescaped = Cs(normal * (special * normal)^0)
-
--- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-
-local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
-
-xml.escaped_pattern = escaped
-xml.unescaped_pattern = unescaped
-xml.cleansed_pattern = cleansed
-
-function xml.escaped (str) return escaped :match(str) end
-function xml.unescaped(str) return unescaped:match(str) end
-function xml.cleansed (str) return cleansed :match(str) end
-
-function xml.join(t,separator,lastseparator)
- if #t > 0 then
- local result = { }
- for k,v in pairs(t) do
- result[k] = xml.tostring(v)
- end
- if lastseparator then
- return concat(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result]
- else
- return concat(result,separator)
+local function texts(collected)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collection[c]
+ if e and e.dt then
+ t[#t+1] = e.dt
+ end
end
- else
- return ""
+ return t
end
end
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
-if not modules then modules = { } end modules ['trac-tra'] = {
- version = 1.001,
- comment = "companion to trac-tra.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
--- the <anonymous> tag is kind of generic and used for functions that are not
--- bound to a variable, like node.new, node.copy etc (contrary to for instance
--- node.has_attribute which is bound to a has_attribute local variable in mkiv)
-
-debugger = debugger or { }
-
-local counters = { }
-local names = { }
-local getinfo = debug.getinfo
-local format, find, lower, gmatch = string.format, string.find, string.lower, string.gmatch
-
--- one
-
-local function hook()
- local f = getinfo(2,"f").func
- local n = getinfo(2,"Sn")
--- if n.what == "C" and n.name then print (n.namewhat .. ': ' .. n.name) end
- if f then
- local cf = counters[f]
- if cf == nil then
- counters[f] = 1
- names[f] = n
+local function tag(collected,n)
+ if collected then
+ local c
+ if n == 0 or not n then
+ c = collected[1]
+ elseif n > 1 then
+ c = collected[n]
else
- counters[f] = cf + 1
+ c = collected[#collected-n+1]
end
+ return c and c.tg
end
end
-local function getname(func)
- local n = names[func]
- if n then
- if n.what == "C" then
- return n.name or '<anonymous>'
+
+local function name(collected,n)
+ if collected then
+ local c
+ if n == 0 or not n then
+ c = collected[1]
+ elseif n > 1 then
+ c = collected[n]
else
- -- source short_src linedefined what name namewhat nups func
- local name = n.name or n.namewhat or n.what
- if not name or name == "" then name = "?" end
- return format("%s : %s : %s", n.short_src or "unknown source", n.linedefined or "--", name)
+ c = collected[#collected-n+1]
end
- else
- return "unknown"
- end
-end
-function debugger.showstats(printer,threshold)
- printer = printer or texio.write or print
- threshold = threshold or 0
- local total, grandtotal, functions = 0, 0, 0
- printer("\n") -- ugly but ok
- -- table.sort(counters)
- for func, count in pairs(counters) do
- if count > threshold then
- local name = getname(func)
- if not name:find("for generator") then
- printer(format("%8i %s", count, name))
- total = total + count
+ if c then
+ if c.ns == "" then
+ return c.tg
+ else
+ return c.ns .. ":" .. c.tg
end
end
- grandtotal = grandtotal + count
- functions = functions + 1
end
- printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
end
--- two
-
---~ local function hook()
---~ local n = getinfo(2)
---~ if n.what=="C" and not n.name then
---~ local f = tostring(debug.traceback())
---~ local cf = counters[f]
---~ if cf == nil then
---~ counters[f] = 1
---~ names[f] = n
---~ else
---~ counters[f] = cf + 1
---~ end
---~ end
---~ end
---~ function debugger.showstats(printer,threshold)
---~ printer = printer or texio.write or print
---~ threshold = threshold or 0
---~ local total, grandtotal, functions = 0, 0, 0
---~ printer("\n") -- ugly but ok
---~ -- table.sort(counters)
---~ for func, count in pairs(counters) do
---~ if count > threshold then
---~ printer(format("%8i %s", count, func))
---~ total = total + count
---~ end
---~ grandtotal = grandtotal + count
---~ functions = functions + 1
---~ end
---~ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
---~ end
-
--- rest
-
-function debugger.savestats(filename,threshold)
- local f = io.open(filename,'w')
- if f then
- debugger.showstats(function(str) f:write(str) end,threshold)
- f:close()
+local function tags(collected,nonamespace)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collected[c]
+ local ns, tg = e.ns, e.tg
+ if nonamespace or ns == "" then
+ t[#t+1] = tg
+ else
+ t[#t+1] = ns .. ":" .. tg
+ end
+ end
+ return t
end
end
-function debugger.enable()
- debug.sethook(hook,"c")
-end
-
-function debugger.disable()
- debug.sethook()
---~ counters[debug.getinfo(2,"f").func] = nil
-end
-
-function debugger.tracing()
- local n = tonumber(os.env['MTX.TRACE.CALLS']) or tonumber(os.env['MTX_TRACE_CALLS']) or 0
- if n > 0 then
- function debugger.tracing() return true end ; return true
- else
- function debugger.tracing() return false end ; return false
+local function empty(collected)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ if e then
+ local edt = e.dt
+ if edt then
+ local n = #edt
+ if n == 1 then
+ local edk = edt[1]
+ local typ = type(edk)
+ if typ == "table" then
+ return false
+ elseif edk ~= "" then -- maybe an extra tester for spacing only
+ return false
+ end
+ elseif n > 1 then
+ return false
+ end
+ end
+ end
+ end
end
+ return true
end
---~ debugger.enable()
-
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
-
---~ debugger.disable()
-
---~ print("")
---~ debugger.showstats()
---~ print("")
---~ debugger.showstats(print,3)
+finalizers.first = first
+finalizers.last = last
+finalizers.all = all
+finalizers.reverse = reverse
+finalizers.elements = all
+finalizers.default = all
+finalizers.attribute = attribute
+finalizers.att = att
+finalizers.count = count
+finalizers.position = position
+finalizers.index = index
+finalizers.attributes = attributes
+finalizers.chainattribute = chainattribute
+finalizers.text = text
+finalizers.texts = texts
+finalizers.tag = tag
+finalizers.name = name
+finalizers.tags = tags
+finalizers.empty = empty
-trackers = trackers or { }
+-- shortcuts -- we could support xmlfilter(id,pattern,first)
-local data, done = { }, { }
+function xml.first(id,pattern)
+ return first(xmlfilter(id,pattern))
+end
-local function set(what,value)
- if type(what) == "string" then
- what = aux.settings_to_array(what)
- end
- for i=1,#what do
- local w = what[i]
- for d, f in next, data do
- if done[d] then
- -- prevent recursion due to wildcards
- elseif find(d,w) then
- done[d] = true
- for i=1,#f do
- f[i](value)
- end
- end
- end
- end
+function xml.last(id,pattern)
+ return last(xmlfilter(id,pattern))
end
-local function reset()
- for d, f in next, data do
- for i=1,#f do
- f[i](false)
- end
- end
+function xml.count(id,pattern)
+ return count(xmlfilter(id,pattern))
end
-function trackers.register(what,...)
- what = lower(what)
- local w = data[what]
- if not w then
- w = { }
- data[what] = w
- end
- for _, fnc in next, { ... } do
- local typ = type(fnc)
- if typ == "function" then
- w[#w+1] = fnc
- elseif typ == "string" then
- w[#w+1] = function(value) set(fnc,value,nesting) end
- end
- end
+function xml.attribute(id,pattern,a,default)
+ return attribute(xmlfilter(id,pattern),a,default)
end
-function trackers.enable(what)
- done = { }
- set(what,true)
+function xml.text(id,pattern)
+ return text(xmlfilter(id,pattern))
end
-function trackers.disable(what)
- done = { }
- if not what or what == "" then
- trackers.reset(what)
- else
- set(what,false)
- end
+function xml.raw(id,pattern)
+ return xmlserialize(xmlfilter(id,pattern))
end
-function trackers.reset(what)
- done = { }
- reset()
+function xml.position(id,pattern,n)
+ return position(xmlfilter(id,pattern),n)
end
-function trackers.list() -- pattern
- local list = table.sortedkeys(data)
- local user, system = { }, { }
- for l=1,#list do
- local what = list[l]
- if find(what,"^%*") then
- system[#system+1] = what
- else
- user[#user+1] = what
- end
- end
- return user, system
+function xml.empty(id,pattern)
+ return empty(xmlfilter(id,pattern))
end
+xml.all = xml.filter
+xml.index = xml.position
+xml.found = xml.filter
+
end -- of closure
@@ -6135,6 +6692,7 @@ function statistics.timed(action,report)
end
+
end -- of closure
do -- create closure to overcome 200 locals limit
@@ -9814,11 +10372,13 @@ own.libs = { -- todo: check which ones are really needed
'l-utils.lua',
'l-aux.lua',
-- 'l-xml.lua',
+ 'trac-tra.lua',
'lxml-tab.lua',
- 'lxml-pth.lua',
+ 'lxml-lpt.lua',
'lxml-ent.lua',
'lxml-mis.lua',
- 'trac-tra.lua',
+ 'lxml-aux.lua',
+ 'lxml-xml.lua',
'luat-env.lua',
'trac-inf.lua',
'trac-log.lua',
@@ -9889,7 +10449,7 @@ if not resolvers then
os.exit()
end
-logs.setprogram('MTXrun',"TDS Runner Tool 1.22",environment.arguments["verbose"] or false)
+logs.setprogram('MTXrun',"TDS Runner Tool 1.23",environment.arguments["verbose"] or false)
local instance = resolvers.reset()
diff --git a/scripts/context/stubs/unix/luatools b/scripts/context/stubs/unix/luatools
index a8cfbd5b0..2bc943210 100755
--- a/scripts/context/stubs/unix/luatools
+++ b/scripts/context/stubs/unix/luatools
@@ -230,6 +230,16 @@ function string:pattesc()
return (gsub(self,".",patterns_escapes))
end
+local simple_escapes = {
+ ["-"] = "%-",
+ ["."] = "%.",
+ ["*"] = ".*",
+}
+
+function string:simpleesc()
+ return (gsub(self,".",simple_escapes))
+end
+
function string:tohash()
local t = { }
for s in gmatch(self,"([^, ]+)") do -- lpeg
@@ -279,6 +289,12 @@ function string:compactlong() -- strips newlines and leading spaces
return self
end
+function string:striplong() -- strips newlines and leading spaces
+ self = gsub(self,"^%s*","")
+ self = gsub(self,"[\n\r]+ *","\n")
+ return self
+end
+
end -- of closure
@@ -387,6 +403,18 @@ function string:split(separator)
return c:match(self)
end
+--~ function lpeg.L(list,pp)
+--~ local p = pp
+--~ for l=1,#list do
+--~ if p then
+--~ p = p + lpeg.P(list[l])
+--~ else
+--~ p = lpeg.P(list[l])
+--~ end
+--~ end
+--~ return p
+--~ end
+
end -- of closure
@@ -420,6 +448,14 @@ function table.strip(tab)
return lst
end
+function table.keys(t)
+ local k = { }
+ for key,_ in next, t do
+ k[#k+1] = key
+ end
+ return k
+end
+
local function compare(a,b)
return (tostring(a) < tostring(b))
end
@@ -1192,21 +1228,35 @@ function table.reverse(t)
return tt
end
---~ function table.keys(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return k
---~ end
+function table.insert_before_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i,extra)
+ return
+ end
+ end
+ insert(t,1,extra)
+end
---~ function table.keys_as_string(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return concat(k,"")
---~ end
+function table.insert_after_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i+1,extra)
+ return
+ end
+ end
+ insert(t,#t+1,extra)
+end
end -- of closure
@@ -1413,7 +1463,7 @@ if not modules then modules = { } end modules ['l-number'] = {
license = "see context related readme files"
}
-local format = string.format
+local format, foor, insert = string.format, math.floor, table.insert
number = number or { }
@@ -1449,7 +1499,18 @@ function number.toset(n)
return one:match(tostring(n))
end
-
+function number.bits(n,zero)
+ local t, i = { }, (zero and 0) or 1
+ while n > 0 do
+ local m = n % 2
+ if m > 0 then
+ insert(t,1,i)
+ end
+ n = floor(n/2)
+ i = i + 1
+ end
+ return t
+end
end -- of closure
@@ -1914,11 +1975,11 @@ local rootbased = lpeg.P("/") + letter*lpeg.P(":")
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return qualified:match(filename)
+ return qualified:match(filename) ~= nil
end
function file.is_rootbased_path(filename)
- return rootbased:match(filename)
+ return rootbased:match(filename) ~= nil
end
local slash = lpeg.S("\\/")
@@ -3134,6 +3195,24 @@ function aux.accesstable(target)
return t
end
+-- as we use this a lot ...
+
+--~ function aux.cachefunction(action,weak)
+--~ local cache = { }
+--~ if weak then
+--~ setmetatable(cache, { __mode = "kv" } )
+--~ end
+--~ local function reminder(str)
+--~ local found = cache[str]
+--~ if not found then
+--~ found = action(str)
+--~ cache[str] = found
+--~ end
+--~ return found
+--~ end
+--~ return reminder, cache
+--~ end
+
end -- of closure
@@ -3156,7 +3235,7 @@ debugger = debugger or { }
local counters = { }
local names = { }
local getinfo = debug.getinfo
-local format, find, lower, gmatch = string.format, string.find, string.lower, string.gmatch
+local format, find, lower, gmatch, gsub = string.format, string.find, string.lower, string.gmatch, string.gsub
-- one
@@ -3290,7 +3369,7 @@ local data, done = { }, { }
local function set(what,value)
if type(what) == "string" then
- what = aux.settings_to_array(what)
+ what = aux.settings_to_array(what) -- inefficient but ok
end
for i=1,#what do
local w = what[i]
@@ -3315,6 +3394,19 @@ local function reset()
end
end
+local function enable(what)
+ set(what,true)
+end
+
+local function disable(what)
+ if not what or what == "" then
+ done = { }
+ reset()
+ else
+ set(what,false)
+ end
+end
+
function trackers.register(what,...)
what = lower(what)
local w = data[what]
@@ -3333,20 +3425,20 @@ function trackers.register(what,...)
end
function trackers.enable(what)
- done = { }
- set(what,true)
+ local e = trackers.enable
+ trackers.enable, done = enable, { }
+ enable(string.simpleesc(what))
+ trackers.enable, done = e, { }
end
function trackers.disable(what)
- done = { }
- if not what or what == "" then
- trackers.reset(what)
- else
- set(what,false)
- end
+ local e = trackers.disable
+ trackers.disable, done = disable, { }
+ disable(string.simpleesc(what))
+ trackers.disable, done = e, { }
end
-function trackers.reset(what)
+function trackers.reset()
done = { }
reset()
end
@@ -3423,7 +3515,7 @@ function environment.initialize_arguments(arg)
environment.arguments, environment.files, environment.sortedflags = arguments, files, nil
for index, argument in pairs(arg) do
if index > 0 then
- local flag, value = argument:match("^%-+(.+)=(.-)$")
+ local flag, value = argument:match("^%-+(.-)=(.-)$")
if flag then
arguments[flag] = string.unquote(value or "")
else
diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun
index 865994073..8bc88c900 100755
--- a/scripts/context/stubs/unix/mtxrun
+++ b/scripts/context/stubs/unix/mtxrun
@@ -239,6 +239,16 @@ function string:pattesc()
return (gsub(self,".",patterns_escapes))
end
+local simple_escapes = {
+ ["-"] = "%-",
+ ["."] = "%.",
+ ["*"] = ".*",
+}
+
+function string:simpleesc()
+ return (gsub(self,".",simple_escapes))
+end
+
function string:tohash()
local t = { }
for s in gmatch(self,"([^, ]+)") do -- lpeg
@@ -288,6 +298,12 @@ function string:compactlong() -- strips newlines and leading spaces
return self
end
+function string:striplong() -- strips newlines and leading spaces
+ self = gsub(self,"^%s*","")
+ self = gsub(self,"[\n\r]+ *","\n")
+ return self
+end
+
end -- of closure
@@ -396,6 +412,18 @@ function string:split(separator)
return c:match(self)
end
+--~ function lpeg.L(list,pp)
+--~ local p = pp
+--~ for l=1,#list do
+--~ if p then
+--~ p = p + lpeg.P(list[l])
+--~ else
+--~ p = lpeg.P(list[l])
+--~ end
+--~ end
+--~ return p
+--~ end
+
end -- of closure
@@ -429,6 +457,14 @@ function table.strip(tab)
return lst
end
+function table.keys(t)
+ local k = { }
+ for key,_ in next, t do
+ k[#k+1] = key
+ end
+ return k
+end
+
local function compare(a,b)
return (tostring(a) < tostring(b))
end
@@ -1009,7 +1045,7 @@ function table.tofile(filename,root,name,reduce,noquotes,hexify)
end
end
-local function flatten(t,f,complete)
+local function flatten(t,f,complete) -- is this used? meybe a variant with next, ...
for i=1,#t do
local v = t[i]
if type(v) == "table" then
@@ -1038,6 +1074,24 @@ end
table.flatten_one_level = table.unnest
+-- a better one:
+
+local function flattened(t,f)
+ if not f then
+ f = { }
+ end
+ for k, v in next, t do
+ if type(v) == "table" then
+ flattened(v,f)
+ else
+ f[k] = v
+ end
+ end
+ return f
+end
+
+table.flattened = flattened
+
-- the next three may disappear
function table.remove_value(t,value) -- todo: n
@@ -1201,21 +1255,35 @@ function table.reverse(t)
return tt
end
---~ function table.keys(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return k
---~ end
+function table.insert_before_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i,extra)
+ return
+ end
+ end
+ insert(t,1,extra)
+end
---~ function table.keys_as_string(t)
---~ local k = { }
---~ for k,_ in next, t do
---~ k[#k+1] = k
---~ end
---~ return concat(k,"")
---~ end
+function table.insert_after_value(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i+1,extra)
+ return
+ end
+ end
+ insert(t,#t+1,extra)
+end
end -- of closure
@@ -1422,7 +1490,7 @@ if not modules then modules = { } end modules ['l-number'] = {
license = "see context related readme files"
}
-local format = string.format
+local format, foor, insert = string.format, math.floor, table.insert
number = number or { }
@@ -1458,7 +1526,18 @@ function number.toset(n)
return one:match(tostring(n))
end
-
+function number.bits(n,zero)
+ local t, i = { }, (zero and 0) or 1
+ while n > 0 do
+ local m = n % 2
+ if m > 0 then
+ insert(t,1,i)
+ end
+ n = floor(n/2)
+ i = i + 1
+ end
+ return t
+end
end -- of closure
@@ -1923,11 +2002,11 @@ local rootbased = lpeg.P("/") + letter*lpeg.P(":")
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return qualified:match(filename)
+ return qualified:match(filename) ~= nil
end
function file.is_rootbased_path(filename)
- return rootbased:match(filename)
+ return rootbased:match(filename) ~= nil
end
local slash = lpeg.S("\\/")
@@ -2854,6 +2933,406 @@ function aux.accesstable(target)
return t
end
+-- as we use this a lot ...
+
+--~ function aux.cachefunction(action,weak)
+--~ local cache = { }
+--~ if weak then
+--~ setmetatable(cache, { __mode = "kv" } )
+--~ end
+--~ local function reminder(str)
+--~ local found = cache[str]
+--~ if not found then
+--~ found = action(str)
+--~ cache[str] = found
+--~ end
+--~ return found
+--~ end
+--~ return reminder, cache
+--~ end
+
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['trac-tra'] = {
+ version = 1.001,
+ comment = "companion to trac-tra.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- the <anonymous> tag is kind of generic and used for functions that are not
+-- bound to a variable, like node.new, node.copy etc (contrary to for instance
+-- node.has_attribute which is bound to a has_attribute local variable in mkiv)
+
+local getinfo = debug.getinfo
+local type, next = type, next
+local concat = table.concat
+local format, find, lower, gmatch, gsub = string.format, string.find, string.lower, string.gmatch, string.gsub
+
+debugger = debugger or { }
+
+local counters = { }
+local names = { }
+
+-- one
+
+local function hook()
+ local f = getinfo(2,"f").func
+ local n = getinfo(2,"Sn")
+-- if n.what == "C" and n.name then print (n.namewhat .. ': ' .. n.name) end
+ if f then
+ local cf = counters[f]
+ if cf == nil then
+ counters[f] = 1
+ names[f] = n
+ else
+ counters[f] = cf + 1
+ end
+ end
+end
+local function getname(func)
+ local n = names[func]
+ if n then
+ if n.what == "C" then
+ return n.name or '<anonymous>'
+ else
+ -- source short_src linedefined what name namewhat nups func
+ local name = n.name or n.namewhat or n.what
+ if not name or name == "" then name = "?" end
+ return format("%s : %s : %s", n.short_src or "unknown source", n.linedefined or "--", name)
+ end
+ else
+ return "unknown"
+ end
+end
+function debugger.showstats(printer,threshold)
+ printer = printer or texio.write or print
+ threshold = threshold or 0
+ local total, grandtotal, functions = 0, 0, 0
+ printer("\n") -- ugly but ok
+ -- table.sort(counters)
+ for func, count in pairs(counters) do
+ if count > threshold then
+ local name = getname(func)
+ if not name:find("for generator") then
+ printer(format("%8i %s", count, name))
+ total = total + count
+ end
+ end
+ grandtotal = grandtotal + count
+ functions = functions + 1
+ end
+ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
+end
+
+-- two
+
+--~ local function hook()
+--~ local n = getinfo(2)
+--~ if n.what=="C" and not n.name then
+--~ local f = tostring(debug.traceback())
+--~ local cf = counters[f]
+--~ if cf == nil then
+--~ counters[f] = 1
+--~ names[f] = n
+--~ else
+--~ counters[f] = cf + 1
+--~ end
+--~ end
+--~ end
+--~ function debugger.showstats(printer,threshold)
+--~ printer = printer or texio.write or print
+--~ threshold = threshold or 0
+--~ local total, grandtotal, functions = 0, 0, 0
+--~ printer("\n") -- ugly but ok
+--~ -- table.sort(counters)
+--~ for func, count in pairs(counters) do
+--~ if count > threshold then
+--~ printer(format("%8i %s", count, func))
+--~ total = total + count
+--~ end
+--~ grandtotal = grandtotal + count
+--~ functions = functions + 1
+--~ end
+--~ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
+--~ end
+
+-- rest
+
+function debugger.savestats(filename,threshold)
+ local f = io.open(filename,'w')
+ if f then
+ debugger.showstats(function(str) f:write(str) end,threshold)
+ f:close()
+ end
+end
+
+function debugger.enable()
+ debug.sethook(hook,"c")
+end
+
+function debugger.disable()
+ debug.sethook()
+--~ counters[debug.getinfo(2,"f").func] = nil
+end
+
+function debugger.tracing()
+ local n = tonumber(os.env['MTX.TRACE.CALLS']) or tonumber(os.env['MTX_TRACE_CALLS']) or 0
+ if n > 0 then
+ function debugger.tracing() return true end ; return true
+ else
+ function debugger.tracing() return false end ; return false
+ end
+end
+
+--~ debugger.enable()
+
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+--~ print(math.sin(1*.5))
+
+--~ debugger.disable()
+
+--~ print("")
+--~ debugger.showstats()
+--~ print("")
+--~ debugger.showstats(print,3)
+
+setters = setters or { }
+setters.data = setters.data or { }
+
+local function set(t,what,value)
+ local data, done = t.data, t.done
+ if type(what) == "string" then
+ what = aux.settings_to_array(what) -- inefficient but ok
+ end
+ for i=1,#what do
+ local w = what[i]
+ for d, f in next, data do
+ if done[d] then
+ -- prevent recursion due to wildcards
+ elseif find(d,w) then
+ done[d] = true
+ for i=1,#f do
+ f[i](value)
+ end
+ end
+ end
+ end
+end
+
+local function reset(t)
+ for d, f in next, t.data do
+ for i=1,#f do
+ f[i](false)
+ end
+ end
+end
+
+local function enable(t,what)
+ set(t,what,true)
+end
+
+local function disable(t,what)
+ local data = t.data
+ if not what or what == "" then
+ t.done = { }
+ reset(t)
+ else
+ set(t,what,false)
+ end
+end
+
+function setters.register(t,what,...)
+ local data = t.data
+ what = lower(what)
+ local w = data[what]
+ if not w then
+ w = { }
+ data[what] = w
+ end
+ for _, fnc in next, { ... } do
+ local typ = type(fnc)
+ if typ == "function" then
+ w[#w+1] = fnc
+ elseif typ == "string" then
+ w[#w+1] = function(value) set(t,fnc,value,nesting) end
+ end
+ end
+end
+
+function setters.enable(t,what)
+ local e = t.enable
+ t.enable, t.done = enable, { }
+ enable(t,string.simpleesc(what))
+ t.enable, t.done = e, { }
+end
+
+function setters.disable(t,what)
+ local e = t.disable
+ t.disable, t.done = disable, { }
+ disable(t,string.simpleesc(what))
+ t.disable, t.done = e, { }
+end
+
+function setters.reset(t)
+ t.done = { }
+ reset(t)
+end
+
+function setters.list(t) -- pattern
+ local list = table.sortedkeys(t.data)
+ local user, system = { }, { }
+ for l=1,#list do
+ local what = list[l]
+ if find(what,"^%*") then
+ system[#system+1] = what
+ else
+ user[#user+1] = what
+ end
+ end
+ return user, system
+end
+
+function setters.show(t)
+ commands.writestatus("","")
+ for k,v in ipairs(setters.list(t)) do
+ commands.writestatus(t.name,v)
+ end
+ commands.writestatus("","")
+end
+
+-- we could have used a bit of oo and the trackers:enable syntax but
+-- there is already a lot of code around using the singluar tracker
+
+function setters.new(name)
+ local t
+ t = {
+ data = { },
+ name = name,
+ enable = function(...) setters.enable (t,...) end,
+ disable = function(...) setters.disable (t,...) end,
+ register = function(...) setters.register(t,...) end,
+ list = function(...) setters.list (t,...) end,
+ show = function(...) setters.show (t,...) end,
+ }
+ setters.data[name] = t
+ return t
+end
+
+trackers = setters.new("trackers")
+directives = setters.new("directives")
+
+-- nice trick: we overload two of the directives related functions with variants that
+-- do tracing (itself using a tracker) .. proof of concept
+
+local trace_directives = false local trace_directives = false trackers.register("system.directives", function(v) trace_directives = v end)
+
+local e = directives.enable
+local d = directives.disable
+
+function directives.enable(...)
+ commands.writestatus("directives","enabling: %s",concat({...}," "))
+ e(...)
+end
+
+function directives.disable(...)
+ commands.writestatus("directives","disabling: %s",concat({...}," "))
+ d(...)
+end
+
+--~ -- old code:
+--
+--~ trackers = trackers or { }
+--~ local data, done = { }, { }
+--~ local function set(what,value)
+--~ if type(what) == "string" then
+--~ what = aux.settings_to_array(what) -- inefficient but ok
+--~ end
+--~ for i=1,#what do
+--~ local w = what[i]
+--~ for d, f in next, data do
+--~ if done[d] then
+--~ -- prevent recursion due to wildcards
+--~ elseif find(d,w) then
+--~ done[d] = true
+--~ for i=1,#f do
+--~ f[i](value)
+--~ end
+--~ end
+--~ end
+--~ end
+--~ end
+--~ local function reset()
+--~ for d, f in next, data do
+--~ for i=1,#f do
+--~ f[i](false)
+--~ end
+--~ end
+--~ end
+--~ local function enable(what)
+--~ set(what,true)
+--~ end
+--~ local function disable(what)
+--~ if not what or what == "" then
+--~ done = { }
+--~ reset()
+--~ else
+--~ set(what,false)
+--~ end
+--~ end
+--~ function trackers.register(what,...)
+--~ what = lower(what)
+--~ local w = data[what]
+--~ if not w then
+--~ w = { }
+--~ data[what] = w
+--~ end
+--~ for _, fnc in next, { ... } do
+--~ local typ = type(fnc)
+--~ if typ == "function" then
+--~ w[#w+1] = fnc
+--~ elseif typ == "string" then
+--~ w[#w+1] = function(value) set(fnc,value,nesting) end
+--~ end
+--~ end
+--~ end
+--~ function trackers.enable(what)
+--~ local e = trackers.enable
+--~ trackers.enable, done = enable, { }
+--~ enable(string.simpleesc(what))
+--~ trackers.enable, done = e, { }
+--~ end
+--~ function trackers.disable(what)
+--~ local e = trackers.disable
+--~ trackers.disable, done = disable, { }
+--~ disable(string.simpleesc(what))
+--~ trackers.disable, done = e, { }
+--~ end
+--~ function trackers.reset()
+--~ done = { }
+--~ reset()
+--~ end
+--~ function trackers.list() -- pattern
+--~ local list = table.sortedkeys(data)
+--~ local user, system = { }, { }
+--~ for l=1,#list do
+--~ local what = list[l]
+--~ if find(what,"^%*") then
+--~ system[#system+1] = what
+--~ else
+--~ user[#user+1] = what
+--~ end
+--~ end
+--~ return user, system
+--~ end
+
end -- of closure
@@ -2867,6 +3346,12 @@ if not modules then modules = { } end modules ['lxml-tab'] = {
license = "see context related readme files"
}
+-- this module needs a cleanup: check latest lpeg, passing args, (sub)grammar, etc etc
+-- stripping spaces from e.g. cont-en.xml saves .2 sec runtime so it's not worth the
+-- trouble
+
+local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end)
+
--[[ldx--
<p>The parser used here is inspired by the variant discussed in the lua book, but
handles comment and processing instructions, has a different structure, provides
@@ -2874,18 +3359,6 @@ parent access; a first version used different trickery but was less optimized to
went this route. First we had a find based parser, now we have an <l n='lpeg'/> based one.
The find based parser can be found in l-xml-edu.lua along with other older code.</p>
-<p>Expecially the lpath code is experimental, we will support some of xpath, but
-only things that make sense for us; as compensation it is possible to hook in your
-own functions. Apart from preprocessing content for <l n='context'/> we also need
-this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
-files.</p>
-
-<typing>
-a/b/c /*/c
-a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
-a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
-</typing>
-
<p>Beware, the interface may change. For instance at, ns, tg, dt may get more
verbose names. Once the code is stable we will also remove some tracing and
optimize the code.</p>
@@ -2896,26 +3369,9 @@ xml = xml or { }
--~ local xml = xml
local concat, remove, insert = table.concat, table.remove, table.insert
-local type, next, setmetatable = type, next, setmetatable
+local type, next, setmetatable, getmetatable, tonumber = type, next, setmetatable, getmetatable, tonumber
local format, lower, find = string.format, string.lower, string.find
-
---[[ldx--
-<p>This module can be used stand alone but also inside <l n='mkiv'/> in
-which case it hooks into the tracker code. Therefore we provide a few
-functions that set the tracers.</p>
---ldx]]--
-
-local trace_remap = false
-
-if trackers then
- trackers.register("xml.remap", function(v) trace_remap = v end)
-end
-
-function xml.settrace(str,value)
- if str == "remap" then
- trace_remap = value or false
- end
-end
+local utfchar = unicode.utf8.char
--[[ldx--
<p>First a hack to enable namespace resolving. A namespace is characterized by
@@ -3022,25 +3478,25 @@ element.</p>
</typing>
--ldx]]--
-xml.strip_cm_and_dt = false -- an extra global flag, in case we have many includes
-
-- not just one big nested table capture (lpeg overflow)
local nsremap, resolvens = xml.xmlns, xml.resolvens
local stack, top, dt, at, xmlns, errorstr, entities = {}, {}, {}, {}, {}, nil, {}
+local strip, cleanup, utfize, resolve = false, false, false, false
-local mt = { __tostring = xml.text }
+local mt = { }
-function xml.check_error(top,toclose)
- return ""
+function initialize_mt(root) -- we will make a xml.new that then sets the mt as field
+ mt = { __tostring = xml.text, __index = root }
end
-local strip = false
-local cleanup = false
+function xml.setproperty(root,k,v)
+ getmetatable(root).__index[k] = v
+end
-function xml.set_text_cleanup(fnc)
- cleanup = fnc
+function xml.check_error(top,toclose)
+ return ""
end
local function add_attribute(namespace,tag,value)
@@ -3058,6 +3514,22 @@ local function add_attribute(namespace,tag,value)
end
end
+local function add_empty(spacing, namespace, tag)
+ if #spacing > 0 then
+ dt[#dt+1] = spacing
+ end
+ local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
+ top = stack[#stack]
+ dt = top.dt
+ local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top }
+ dt[#dt+1] = t
+ setmetatable(t, mt)
+ if at.xmlns then
+ remove(xmlns)
+ end
+ at = { }
+end
+
local function add_begin(spacing, namespace, tag)
if #spacing > 0 then
dt[#dt+1] = spacing
@@ -3083,28 +3555,12 @@ local function add_end(spacing, namespace, tag)
end
dt = top.dt
dt[#dt+1] = toclose
- dt[0] = top
+ -- dt[0] = top -- nasty circular reference when serializing table
if toclose.at.xmlns then
remove(xmlns)
end
end
-local function add_empty(spacing, namespace, tag)
- if #spacing > 0 then
- dt[#dt+1] = spacing
- end
- local resolved = (namespace == "" and xmlns[#xmlns]) or nsremap[namespace] or namespace
- top = stack[#stack]
- dt = top.dt
- local t = { ns=namespace or "", rn=resolved, tg=tag, at=at, dt={}, __p__ = top }
- dt[#dt+1] = t
- setmetatable(t, mt)
- if at.xmlns then
- remove(xmlns)
- end
- at = { }
-end
-
local function add_text(text)
if cleanup and #text > 0 then
dt[#dt+1] = cleanup(text)
@@ -3128,7 +3584,109 @@ local function set_message(txt)
errorstr = "garbage at the end of the file: " .. gsub(txt,"([ \n\r\t]*)","")
end
-local P, S, R, C, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V
+local reported_attribute_errors = { }
+
+local function attribute_value_error(str)
+ if not reported_attribute_errors[str] then
+ logs.report("xml","invalid attribute value: %q",str)
+ reported_attribute_errors[str] = true
+ at._error_ = str
+ end
+ return str
+end
+local function attribute_specification_error(str)
+ if not reported_attribute_errors[str] then
+ logs.report("xml","invalid attribute specification: %q",str)
+ reported_attribute_errors[str] = true
+ at._error_ = str
+ end
+ return str
+end
+
+local dcache, hcache, acache = { }, { }, { }
+
+function xml.unknown_dec_entity_format(str) return format("&%s;", str) end
+function xml.unknown_hex_entity_format(str) return format("&#x%s;",str) end
+function xml.unknown_any_entity_format(str) return format("&%s;", str) end
+
+local function handle_hex_entity(str)
+ local h = hcache[str]
+ if not h then
+ if utfize then
+ local n = tonumber(str,16)
+ h = (n and utfchar(n)) or xml.unknown_hex_entity_format(str) or ""
+ if not n then
+ logs.report("xml","utfize, ignoring hex entity &#x%s;",str)
+ elseif trace_entities then
+ logs.report("xml","utfize, converting hex entity &#x%s; into %s",str,c)
+ end
+ else
+ if trace_entities then
+ logs.report("xml","found entity &#x%s;",str)
+ end
+ h = "&#" .. str .. ";"
+ end
+ hcache[str] = h
+ end
+ return h
+end
+local function handle_dec_entity(str)
+ local d = dcache[str]
+ if not d then
+ if utfize then
+ local n = tonumber(str)
+ d = (n and utfchar(n)) or xml.unknown_dec_entity_format(str) or ""
+ if not n then
+ logs.report("xml","utfize, ignoring dec entity &#%s;",str)
+ elseif trace_entities then
+ logs.report("xml","utfize, converting dec entity &#%s; into %s",str,c)
+ end
+ else
+ if trace_entities then
+ logs.report("xml","found entity &#%s;",str)
+ end
+ d = "&" .. str .. ";"
+ end
+ dcache[str] = d
+ end
+ return d
+end
+local function handle_any_entity(str)
+ if resolve then
+ local a = entities[str] -- per instance !
+ if not a then
+ a = acache[str]
+ if not a then
+ if trace_entities then
+ logs.report("xml","ignoring entity &%s;",str)
+ else
+ -- can be defined in a global mapper and intercepted elsewhere
+ -- as happens in lxml-tex.lua
+ end
+ a = xml.unknown_any_entity_format(str) or ""
+ acache[str] = a
+ end
+ elseif trace_entities then
+ if not acache[str] then
+ logs.report("xml","converting entity &%s; into %s",str,r)
+ acache[str] = a
+ end
+ end
+ return a
+ else
+ local a = acache[str]
+ if not a then
+ if trace_entities then
+ logs.report("xml","found entity &%s;",str)
+ end
+ a = "&" .. str .. ";"
+ acache[str] = a
+ end
+ return a
+ end
+end
+
+local P, S, R, C, V, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cs
local space = S(' \r\n\t')
local open = P('<')
@@ -3138,6 +3696,8 @@ local dquote = S('"')
local equal = P('=')
local slash = P('/')
local colon = P(':')
+local semicolon = P(';')
+local ampersand = P('&')
local valid = R('az', 'AZ', '09') + S('_-.')
local name_yes = C(valid^1) * colon * C(valid^1)
local name_nop = C(P(true)) * C(valid^1)
@@ -3147,15 +3707,36 @@ local utfbom = P('\000\000\254\255') + P('\255\254\000\000') +
P('\255\254') + P('\254\255') + P('\239\187\191') -- no capture
local spacing = C(space^0)
-local justtext = C((1-open)^1)
+
+local entitycontent = (1-open-semicolon)^0
+local entity = ampersand/"" * (
+ P("#")/"" * (
+ P("x")/"" * (entitycontent/handle_hex_entity) +
+ (entitycontent/handle_dec_entity)
+ ) + (entitycontent/handle_any_entity)
+ ) * (semicolon/"")
+
+local text_unparsed = C((1-open)^1)
+local text_parsed = Cs(((1-open-ampersand)^1 + entity)^1)
+
local somespace = space^1
local optionalspace = space^0
-local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote)
-local attribute = (somespace * name * optionalspace * equal * optionalspace * value) / add_attribute
-local attributes = attribute^0
+local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote) -- ampersand and < also invalid in value
+
+local whatever = space * name * optionalspace * equal
+local wrongvalue = C(P(1-whatever-close)^1 + P(1-close)^1) / attribute_value_error
+
+local attributevalue = value + wrongvalue
+
+local attribute = (somespace * name * optionalspace * equal * optionalspace * attributevalue) / add_attribute
+----- attributes = (attribute)^0
+
+local endofattributes = slash * close + close -- recovery of flacky html
+local attributes = (attribute + somespace^-1 * (((1-endofattributes)^1)/attribute_specification_error))^0
-local text = justtext / add_text
+local parsedtext = text_parsed / add_text
+local unparsedtext = text_unparsed / add_text
local balanced = P { "[" * ((1 - S"[]") + V(1))^0 * "]" } -- taken from lpeg manual, () example
local emptyelement = (spacing * open * name * attributes * optionalspace * slash * close) / add_empty
@@ -3208,42 +3789,72 @@ local doctype = (spacing * begindoctype * somedoctype * enddoct
-- local cdata = (lpeg.Cc("@cd@") * spacing * begincdata * somecdata * endcdata ) / add_special
-- local doctype = (lpeg.Cc("@dt@") * spacing * begindoctype * somedoctype * enddoctype ) / add_special
-local trailer = space^0 * (justtext/set_message)^0
+local trailer = space^0 * (text_unparsed/set_message)^0
-- comment + emptyelement + text + cdata + instruction + V("parent"), -- 6.5 seconds on 40 MB database file
-- text + comment + emptyelement + cdata + instruction + V("parent"), -- 5.8
-- text + V("parent") + emptyelement + comment + cdata + instruction, -- 5.5
-local grammar = P { "preamble",
+local grammar_parsed_text = P { "preamble",
preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer,
parent = beginelement * V("children")^0 * endelement,
- children = text + V("parent") + emptyelement + comment + cdata + instruction,
+ children = parsedtext + V("parent") + emptyelement + comment + cdata + instruction,
}
--- todo: xml.new + properties like entities and strip and such (store in root)
+local grammar_unparsed_text = P { "preamble",
+ preamble = utfbom^0 * instruction^0 * (doctype + comment + instruction)^0 * V("parent") * trailer,
+ parent = beginelement * V("children")^0 * endelement,
+ children = unparsedtext + V("parent") + emptyelement + comment + cdata + instruction,
+}
-function xml.convert(data, no_root, strip_cm_and_dt, given_entities) -- maybe use table met k/v (given_entities may disapear)
- strip = strip_cm_and_dt or xml.strip_cm_and_dt
- stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, given_entities or {}
+local function xmlconvert(data, settings)
+ settings = settings or { } -- no_root strip_cm_and_dt given_entities parent_root error_handler
+ strip = settings.strip_cm_and_dt
+ utfize = settings.utfize_entities
+ resolve = settings.resolve_entities
+ cleanup = settings.text_cleanup
+ stack, top, at, xmlns, errorstr, result, entities = {}, {}, {}, {}, nil, nil, settings.entities or {}
+ reported_attribute_errors = { }
+ if settings.parent_root then
+ mt = getmetatable(settings.parent_root)
+ else
+ initialize_mt(top)
+ end
stack[#stack+1] = top
top.dt = { }
dt = top.dt
if not data or data == "" then
errorstr = "empty xml file"
- elseif not grammar:match(data) then
- errorstr = "invalid xml file"
+ elseif utfize or resolve then
+ if grammar_parsed_text:match(data) then
+ errorstr = ""
+ else
+ errorstr = "invalid xml file - parsed text"
+ end
else
- errorstr = ""
+ if grammar_unparsed_text:match(data) then
+ errorstr = ""
+ else
+ errorstr = "invalid xml file - unparsed text"
+ end
end
if errorstr and errorstr ~= "" then
- result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } }, error = true }
+ result = { dt = { { ns = "", tg = "error", dt = { errorstr }, at={}, er = true } } }
setmetatable(stack, mt)
- if xml.error_handler then xml.error_handler("load",errorstr) end
+ local error_handler = settings.error_handler
+ if error_handler == false then
+ -- no error message
+ else
+ error_handler = error_handler or xml.error_handler
+ if error_handler then
+ xml.error_handler("load",errorstr)
+ end
+ end
else
result = stack[1]
end
- if not no_root then
- result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities }
+ if not settings.no_root then
+ result = { special = true, ns = "", tg = '@rt@', dt = result.dt, at={}, entities = entities, settings = settings }
setmetatable(result, mt)
local rdt = result.dt
for k=1,#rdt do
@@ -3254,9 +3865,14 @@ function xml.convert(data, no_root, strip_cm_and_dt, given_entities) -- maybe us
end
end
end
+ if errorstr and errorstr ~= "" then
+ result.error = true
+ end
return result
end
+xml.convert = xmlconvert
+
--[[ldx--
<p>Packaging data in an xml like table is done with the following
function. Maybe it will go away (when not used).</p>
@@ -3289,16 +3905,16 @@ function xml.load(filename)
if type(filename) == "string" then
local f = io.open(filename,'r')
if f then
- local root = xml.convert(f:read("*all"))
+ local root = xmlconvert(f:read("*all"))
f:close()
return root
else
- return xml.convert("")
+ return xmlconvert("")
end
elseif filename then -- filehandle
- return xml.convert(filename:read("*all"))
+ return xmlconvert(filename:read("*all"))
else
- return xml.convert("")
+ return xmlconvert("")
end
end
@@ -3307,9 +3923,11 @@ end
valid trees, which is what the next function does.</p>
--ldx]]--
+local no_root = { no_root = true }
+
function xml.toxml(data)
if type(data) == "string" then
- local root = { xml.convert(data,true) }
+ local root = { xmlconvert(data,no_root) }
return (#root > 1 and root) or root[1]
else
return data
@@ -3354,217 +3972,305 @@ alternative.</p>
-- todo: add <?xml version='1.0' standalone='yes'?> when not present
-local fallbackhandle = (tex and tex.sprint) or io.write
-
-local function serialize(e, handle, textconverter, attributeconverter, specialconverter, nocommands)
- if not e then
- return
- elseif not nocommands then
- local ec = e.command
- if ec ~= nil then -- we can have all kind of types
- if e.special then
- local etg, edt = e.tg, e.dt
- local spc = specialconverter and specialconverter[etg]
- if spc then
- local result = spc(edt[1])
- if result then
- handle(result)
- return
- else
- -- no need to handle any further
- end
- end
- end
- local xc = xml.command
- if xc then
- xc(e,ec)
- return
+function xml.checkbom(root) -- can be made faster
+ if root.ri then
+ local dt, found = root.dt, false
+ for k=1,#dt do
+ local v = dt[k]
+ if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then
+ found = true
+ break
end
end
+ if not found then
+ insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
+ insert(dt, 2, "\n" )
+ end
end
- handle = handle or fallbackhandle
- local etg = e.tg
- if etg then
- if e.special then
- local edt = e.dt
- local spc = specialconverter and specialconverter[etg]
- if spc then
- local result = spc(edt[1])
- if result then
- handle(result)
+end
+
+--[[ldx--
+<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
+and then handle the lot.</p>
+--ldx]]--
+
+-- new experimental reorganized serialize
+
+local function verbose_element(e,handlers)
+ local handle = handlers.handle
+ local serialize = handlers.serialize
+ local ens, etg, eat, edt, ern = e.ns, e.tg, e.at, e.dt, e.rn
+ local ats = eat and next(eat) and { }
+ if ats then
+ for k,v in next, eat do
+ ats[#ats+1] = format('%s=%q',k,v)
+ end
+ end
+ if ern and trace_remap and ern ~= ens then
+ ens = ern
+ end
+ if ens ~= "" then
+ if edt and #edt > 0 then
+ if ats then
+ handle("<",ens,":",etg," ",concat(ats," "),">")
+ else
+ handle("<",ens,":",etg,">")
+ end
+ for i=1,#edt do
+ local e = edt[i]
+ if type(e) == "string" then
+ handle(e)
else
- -- no need to handle any further
+ serialize(e,handlers)
end
- elseif etg == "@pi@" then
- -- handle(format("<?%s?>",edt[1]))
- handle("<?" .. edt[1] .. "?>")
- elseif etg == "@cm@" then
- -- handle(format("<!--%s-->",edt[1]))
- handle("<!--" .. edt[1] .. "-->")
- elseif etg == "@cd@" then
- -- handle(format("<![CDATA[%s]]>",edt[1]))
- handle("<![CDATA[" .. edt[1] .. "]]>")
- elseif etg == "@dt@" then
- -- handle(format("<!DOCTYPE %s>",edt[1]))
- handle("<!DOCTYPE " .. edt[1] .. ">")
- elseif etg == "@rt@" then
- serialize(edt,handle,textconverter,attributeconverter,specialconverter,nocommands)
end
+ handle("</",ens,":",etg,">")
else
- local ens, eat, edt, ern = e.ns, e.at, e.dt, e.rn
- local ats = eat and next(eat) and { } -- type test maybe faster
if ats then
- if attributeconverter then
- for k,v in next, eat do
- ats[#ats+1] = format('%s=%q',k,attributeconverter(v))
- end
- else
- for k,v in next, eat do
- ats[#ats+1] = format('%s=%q',k,v)
- end
- end
+ handle("<",ens,":",etg," ",concat(ats," "),"/>")
+ else
+ handle("<",ens,":",etg,"/>")
end
- if ern and trace_remap and ern ~= ens then
- ens = ern
+ end
+ else
+ if edt and #edt > 0 then
+ if ats then
+ handle("<",etg," ",concat(ats," "),">")
+ else
+ handle("<",etg,">")
end
- if ens ~= "" then
- if edt and #edt > 0 then
- if ats then
- -- handle(format("<%s:%s %s>",ens,etg,concat(ats," ")))
- handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. ">")
- else
- -- handle(format("<%s:%s>",ens,etg))
- handle("<" .. ens .. ":" .. etg .. ">")
- end
- for i=1,#edt do
- local e = edt[i]
- if type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
- else
- handle(e)
- end
- else
- serialize(e,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
- end
- -- handle(format("</%s:%s>",ens,etg))
- handle("</" .. ens .. ":" .. etg .. ">")
+ for i=1,#edt do
+ local ei = edt[i]
+ if type(ei) == "string" then
+ handle(ei)
else
- if ats then
- -- handle(format("<%s:%s %s/>",ens,etg,concat(ats," ")))
- handle("<" .. ens .. ":" .. etg .. " " .. concat(ats," ") .. "/>")
- else
- -- handle(format("<%s:%s/>",ens,etg))
- handle("<" .. ens .. ":" .. etg .. "/>")
- end
+ serialize(ei,handlers)
end
+ end
+ handle("</",etg,">")
+ else
+ if ats then
+ handle("<",etg," ",concat(ats," "),"/>")
else
- if edt and #edt > 0 then
- if ats then
- -- handle(format("<%s %s>",etg,concat(ats," ")))
- handle("<" .. etg .. " " .. concat(ats," ") .. ">")
- else
- -- handle(format("<%s>",etg))
- handle("<" .. etg .. ">")
- end
- for i=1,#edt do
- local ei = edt[i]
- if type(ei) == "string" then
- if textconverter then
- handle(textconverter(ei))
- else
- handle(ei)
- end
- else
- serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
- end
- -- handle(format("</%s>",etg))
- handle("</" .. etg .. ">")
- else
- if ats then
- -- handle(format("<%s %s/>",etg,concat(ats," ")))
- handle("<" .. etg .. " " .. concat(ats," ") .. "/>")
- else
- -- handle(format("<%s/>",etg))
- handle("<" .. etg .. "/>")
- end
- end
+ handle("<",etg,"/>")
end
end
- elseif type(e) == "string" then
- if textconverter then
- handle(textconverter(e))
+ end
+end
+
+local function verbose_pi(e,handlers)
+ handlers.handle("<?",e.dt[1],"?>")
+end
+
+local function verbose_comment(e,handlers)
+ handlers.handle("<!--",e.dt[1],"-->")
+end
+
+local function verbose_cdata(e,handlers)
+ handlers.handle("<![CDATA[", e.dt[1],"]]>")
+end
+
+local function verbose_doctype(e,handlers)
+ handlers.handle("<!DOCTYPE ",e.dt[1],">")
+end
+
+local function verbose_root(e,handlers)
+ handlers.serialize(e.dt,handlers)
+end
+
+local function verbose_text(e,handlers)
+ handlers.handle(e)
+end
+
+local function verbose_document(e,handlers)
+ local serialize = handlers.serialize
+ local functions = handlers.functions
+ for i=1,#e do
+ local ei = e[i]
+ if type(ei) == "string" then
+ functions["@tx@"](ei,handlers)
else
- handle(e)
+ serialize(ei,handlers)
end
- else
- for i=1,#e do
- local ei = e[i]
- if type(ei) == "string" then
- if textconverter then
- handle(textconverter(ei))
- else
- handle(ei)
- end
- else
- serialize(ei,handle,textconverter,attributeconverter,specialconverter,nocommands)
- end
+ end
+end
+
+local function serialize(e,handlers,...)
+ local initialize = handlers.initialize
+ local finalize = handlers.finalize
+ local functions = handlers.functions
+ if initialize then
+ local state = initialize(...)
+ if not state == true then
+ return state
end
end
+ local etg = e.tg
+ if etg then
+ (functions[etg] or functions["@el@"])(e,handlers)
+ -- elseif type(e) == "string" then
+ -- functions["@tx@"](e,handlers)
+ else
+ functions["@dc@"](e,handlers)
+ end
+ if finalize then
+ return finalize()
+ end
end
-xml.serialize = serialize
+local function xserialize(e,handlers)
+ local functions = handlers.functions
+ local etg = e.tg
+ if etg then
+ (functions[etg] or functions["@el@"])(e,handlers)
+ -- elseif type(e) == "string" then
+ -- functions["@tx@"](e,handlers)
+ else
+ functions["@dc@"](e,handlers)
+ end
+end
-function xml.checkbom(root) -- can be made faster
- if root.ri then
- local dt, found = root.dt, false
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "table" and v.special and v.tg == "@pi" and find(v.dt,"xml.*version=") then
- found = true
- break
+local handlers = { }
+
+local function newhandlers(settings)
+ local t = table.copy(handlers.verbose or { }) -- merge
+ if settings then
+ for k,v in next, settings do
+ if type(v) == "table" then
+ tk = t[k] if not tk then tk = { } t[k] = tk end
+ for kk,vv in next, v do
+ tk[kk] = vv
+ end
+ else
+ t[k] = v
end
end
- if not found then
- insert(dt, 1, { special=true, ns="", tg="@pi@", dt = { "xml version='1.0' standalone='yes'"} } )
- insert(dt, 2, "\n" )
+ if settings.name then
+ handlers[settings.name] = t
end
end
+ return t
+end
+
+local nofunction = function() end
+
+function xml.sethandlersfunction(handler,name,fnc)
+ handler.functions[name] = fnc or nofunction
end
+function xml.gethandlersfunction(handler,name)
+ return handler.functions[name]
+end
+
+function xml.gethandlers(name)
+ return handlers[name]
+end
+
+newhandlers {
+ name = "verbose",
+ initialize = false, -- faster than nil and mt lookup
+ finalize = false, -- faster than nil and mt lookup
+ serialize = xserialize,
+ handle = print,
+ functions = {
+ ["@dc@"] = verbose_document,
+ ["@dt@"] = verbose_doctype,
+ ["@rt@"] = verbose_root,
+ ["@el@"] = verbose_element,
+ ["@pi@"] = verbose_pi,
+ ["@cm@"] = verbose_comment,
+ ["@cd@"] = verbose_cdata,
+ ["@tx@"] = verbose_text,
+ }
+}
+
--[[ldx--
-<p>At the cost of some 25% runtime overhead you can first convert the tree to a string
-and then handle the lot.</p>
+<p>How you deal with saving data depends on your preferences. For a 40 MB database
+file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
+
+<lines>
+1.3 : load data from file to string
+6.1 : convert string into tree
+5.3 : saving in file using xmlsave
+6.8 : converting to string using xml.tostring
+3.6 : saving converted string in file
+</lines>
+
+<p>Beware, these were timing with the old routine but measurements will not be that
+much different I guess.</p>
--ldx]]--
-function xml.tostring(root) -- 25% overhead due to collecting
+-- maybe this will move to lxml-xml
+
+local result
+
+local xmlfilehandler = newhandlers {
+ name = "file",
+ initialize = function(name) result = io.open(name,"wb") return result end,
+ finalize = function() result:close() return true end,
+ handle = function(...) result:write(...) end,
+}
+
+-- no checking on writeability here but not faster either
+--
+-- local xmlfilehandler = newhandlers {
+-- initialize = function(name) io.output(name,"wb") return true end,
+-- finalize = function() io.close() return true end,
+-- handle = io.write,
+-- }
+
+
+function xml.save(root,name)
+ serialize(root,xmlfilehandler,name)
+end
+
+local result
+
+local xmlstringhandler = newhandlers {
+ name = "string",
+ initialize = function() result = { } return result end,
+ finalize = function() return concat(result) end,
+ handle = function(...) result[#result+1] = concat { ... } end
+}
+
+local function xmltostring(root) -- 25% overhead due to collecting
if root then
if type(root) == 'string' then
return root
- elseif next(root) then -- next is faster than type (and >0 test)
- local result = { }
- serialize(root,function(s) result[#result+1] = s end) -- brrr, slow (direct printing is faster)
- return concat(result,"")
+ else -- if next(root) then -- next is faster than type (and >0 test)
+ return serialize(root,xmlstringhandler) or ""
end
end
return ""
end
+local function xmltext(root) -- inline
+ return (root and xmltostring(root)) or ""
+end
+
+function initialize_mt(root)
+ mt = { __tostring = xmltext, __index = root }
+end
+
+xml.defaulthandlers = handlers
+xml.newhandlers = newhandlers
+xml.serialize = serialize
+xml.tostring = xmltostring
+xml.text = xmltext
+
--[[ldx--
<p>The next function operated on the content only and needs a handle function
that accepts a string.</p>
--ldx]]--
-function xml.string(e,handle)
+local function xmlstring(e,handle)
if not handle or (e.special and e.tg ~= "@rt@") then
-- nothing
elseif e.tg then
local edt = e.dt
if edt then
for i=1,#edt do
- xml.string(edt[i],handle)
+ xmlstring(edt[i],handle)
end
end
else
@@ -3572,33 +4278,16 @@ function xml.string(e,handle)
end
end
---[[ldx--
-<p>How you deal with saving data depends on your preferences. For a 40 MB database
-file the timing on a 2.3 Core Duo are as follows (time in seconds):</p>
-
-<lines>
-1.3 : load data from file to string
-6.1 : convert string into tree
-5.3 : saving in file using xmlsave
-6.8 : converting to string using xml.tostring
-3.6 : saving converted string in file
-</lines>
-
-<p>The save function is given below.</p>
---ldx]]--
-
-function xml.save(root,name)
- local f = io.open(name,"w")
- if f then
- xml.serialize(root,function(s) f:write(s) end)
- f:close()
- end
-end
+xml.string = xmlstring
--[[ldx--
<p>A few helpers:</p>
--ldx]]--
+function xml.parent(root)
+ return root.__p__
+end
+
function xml.body(root)
return (root.ri and root.dt[root.ri]) or root
end
@@ -3611,34 +4300,19 @@ function xml.content(root) -- bugged
return (root and root.dt and xml.tostring(root.dt)) or ""
end
-function xml.isempty(root, pattern)
- if pattern == "" or pattern == "*" then
- pattern = nil
- end
- if pattern then
- -- todo
- return false
- else
- return not root or not root.dt or #root.dt == 0 or root.dt == ""
- end
-end
-
--[[ldx--
<p>The next helper erases an element but keeps the table as it is,
and since empty strings are not serialized (effectively) it does
not harm. Copying the table would take more time. Usage:</p>
-
-<typing>
-dt[k] = xml.empty() or xml.empty(dt,k)
-</typing>
--ldx]]--
-function xml.empty(dt,k)
- if dt and k then
- dt[k] = ""
- return dt[k]
- else
- return ""
+function xml.erase(dt,k)
+ if dt then
+ if k then
+ dt[k] = ""
+ else for k=1,#dt do
+ dt[1] = { "" }
+ end end
end
end
@@ -3672,96 +4346,403 @@ if not modules then modules = { } end modules ['lxml-pth'] = {
license = "see context related readme files"
}
+-- e.ni is only valid after a filter run
+
local concat, remove, insert = table.concat, table.remove, table.insert
local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, lower, gmatch, gsub, find, rep = string.format, string.lower, string.gmatch, string.gsub, string.find, string.rep
+local format, upper, lower, gmatch, gsub, find, rep = string.format, string.upper, string.lower, string.gmatch, string.gsub, string.find, string.rep
--[[ldx--
<p>This module can be used stand alone but also inside <l n='mkiv'/> in
which case it hooks into the tracker code. Therefore we provide a few
functions that set the tracers. Here we overload a previously defined
function.</p>
+<p>If I can get in the mood I will make a variant that is XSLT compliant
+but I wonder if it makes sense.</P>
--ldx]]--
-local trace_lpath = false
-
-if trackers then
- trackers.register("xml.lpath", function(v) trace_lpath = v end)
-end
+--[[ldx--
+<p>Expecially the lpath code is experimental, we will support some of xpath, but
+only things that make sense for us; as compensation it is possible to hook in your
+own functions. Apart from preprocessing content for <l n='context'/> we also need
+this module for process management, like handling <l n='ctx'/> and <l n='rlx'/>
+files.</p>
-local settrace = xml.settrace -- lxml-tab
+<typing>
+a/b/c /*/c
+a/b/c/first() a/b/c/last() a/b/c/index(n) a/b/c/index(-n)
+a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
+</typing>
+--ldx]]--
-function xml.settrace(str,value)
- if str == "lpath" then
- trace_lpath = value or false
- else
- settrace(str,value) -- lxml-tab
- end
-end
+local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end
+local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end
+local trace_lprofile = false if trackers then trackers.register("xml.profile", function(v) trace_lpath = v trace_lparse = v trace_lprofile = v end) end
--[[ldx--
-<p>We've now arrived at an intersting part: accessing the tree using a subset
+<p>We've now arrived at an interesting part: accessing the tree using a subset
of <l n='xpath'/> and since we're not compatible we call it <l n='lpath'/>. We
will explain more about its usage in other documents.</p>
--ldx]]--
-local lpathcalls = 0 -- statistics
-local lpathcached = 0 -- statistics
+local lpathcalls = 0 function xml.lpathcalls () return lpathcalls end
+local lpathcached = 0 function xml.lpathcached() return lpathcached end
-xml.functions = xml.functions or { }
-xml.expressions = xml.expressions or { }
+xml.functions = xml.functions or { } -- internal
+xml.expressions = xml.expressions or { } -- in expressions
+xml.finalizers = xml.finalizers or { } -- fast do-with ... (with return value other than collection)
+xml.specialhandler = xml.specialhandler or { }
local functions = xml.functions
local expressions = xml.expressions
+local finalizers = xml.finalizers
-local actions = {
- [10] = "stay",
- [11] = "parent",
- [12] = "subtree root",
- [13] = "document root",
- [14] = "any",
- [15] = "many",
- [16] = "initial",
- [20] = "match",
- [21] = "match one of",
- [22] = "match and attribute eq",
- [23] = "match and attribute ne",
- [24] = "match one of and attribute eq",
- [25] = "match one of and attribute ne",
- [27] = "has attribute",
- [28] = "has value",
- [29] = "fast match",
- [30] = "select",
- [31] = "expression",
- [40] = "processing instruction",
-}
+finalizers.xml = finalizers.xml or { }
+finalizers.tex = finalizers.tex or { }
+
+local function fallback (t, name)
+ local fn = finalizers[name]
+ if fn then
+ t[name] = fn
+ else
+ logs.report("xml","unknown sub finalizer '%s'",tostring(name))
+ fn = function() end
+ end
+ return fn
+end
+
+setmetatable(finalizers.xml, { __index = fallback })
+setmetatable(finalizers.tex, { __index = fallback })
+
+xml.defaultprotocol = "xml"
+
+-- as xsl does not follow xpath completely here we will also
+-- be more liberal especially with regards to the use of | and
+-- the rootpath:
+--
+-- test : all 'test' under current
+-- /test : 'test' relative to current
+-- a|b|c : set of names
+-- (a|b|c) : idem
+-- ! : not
+--
+-- after all, we're not doing transformations but filtering. in
+-- addition we provide filter functions (last bit)
+--
+-- todo: optimizer
+--
+-- .. : parent
+-- * : all kids
+-- / : anchor here
+-- // : /**/
+-- ** : all in between
+--
+-- so far we had (more practical as we don't transform)
+--
+-- {/test} : kids 'test' under current node
+-- {test} : any kid with tag 'test'
+-- {//test} : same as above
--- a rather dumb lpeg
+-- evaluator (needs to be redone, for the moment copied)
-local P, S, R, C, V, Cc = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc
+-- todo: apply_axis(list,notable) and collection vs single
--- instead of using functions we just parse a few names which saves a call
--- later on
+local apply_axis = { }
-local lp_position = P("position()") / "ps"
-local lp_index = P("index()") / "id"
-local lp_text = P("text()") / "tx"
-local lp_name = P("name()") / "(ns~='' and ns..':'..tg)" -- "((rt.ns~='' and rt.ns..':'..rt.tg) or '')"
-local lp_tag = P("tag()") / "tg" -- (rt.tg or '')
-local lp_ns = P("ns()") / "ns" -- (rt.ns or '')
-local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
-local lp_doequal = P("=") / "=="
-local lp_attribute = P("@") / "" * Cc("(at['") * R("az","AZ","--","__")^1 * Cc("'] or '')")
+apply_axis['root'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ local rt = ll
+ while ll do
+ ll = ll.__p__
+ if ll then
+ rt = ll
+ end
+ end
+ collected[#collected+1] = rt
+ end
+ return collected
+end
+
+apply_axis['self'] = function(list)
+--~ local collected = { }
+--~ for l=1,#list do
+--~ collected[#collected+1] = list[l]
+--~ end
+--~ return collected
+ return list
+end
-local lp_lua_function = C(R("az","AZ","--","__")^1 * (P(".") * R("az","AZ","--","__")^1)^1) * P("(") / function(t) -- todo: better . handling
+apply_axis['child'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local dt = list[l].dt
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ end
+ end
+ end
+ return collected
+end
+
+local function collect(list,collected)
+ local dt = list.dt
+ if dt then
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ collect(dk,collected)
+ end
+ end
+ end
+end
+apply_axis['descendant'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ collect(list[l],collected)
+ end
+ return collected
+end
+
+local function collect(list,collected)
+ local dt = list.dt
+ if dt then
+ for k=1,#dt do
+ local dk = dt[k]
+ if dk.tg then
+ collected[#collected+1] = dk
+ dk.ni = k -- refresh
+ collect(dk,collected)
+ end
+ end
+ end
+end
+apply_axis['descendant-or-self'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+if ll.special ~= true then -- catch double root
+ collected[#collected+1] = ll
+end
+ collect(ll,collected)
+ end
+ return collected
+end
+
+apply_axis['ancestor'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ while ll do
+ ll = ll.__p__
+ if ll then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['ancestor-or-self'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ collected[#collected+1] = ll
+ while ll do
+ ll = ll.__p__
+ if ll then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+end
+
+apply_axis['parent'] = function(list)
+ local collected = { }
+ for l=1,#list do
+ local pl = list[l].__p__
+ if pl then
+ collected[#collected+1] = pl
+ end
+ end
+ return collected
+end
+
+apply_axis['attribute'] = function(list)
+ return { }
+end
+
+apply_axis['following'] = function(list)
+ return { }
+end
+
+apply_axis['following-sibling'] = function(list)
+ return { }
+end
+
+apply_axis['namespace'] = function(list)
+ return { }
+end
+
+apply_axis['preceding'] = function(list)
+ return { }
+end
+
+apply_axis['preceding-sibling'] = function(list)
+ return { }
+end
+
+apply_axis['auto-descendant-or-self'] = apply_axis['descendant-or-self']
+apply_axis['auto-descendant'] = apply_axis['descendant']
+apply_axis['auto-child'] = apply_axis['child']
+apply_axis['auto-self'] = apply_axis['self']
+apply_axis['initial-child'] = apply_axis['child']
+
+local function apply_nodes(list,directive,nodes)
+ -- todo: nodes[1] etc ... negated node name in set ... when needed
+ -- ... currently ignored
+ local maxn = #nodes
+ if maxn == 3 then --optimized loop
+ local nns, ntg = nodes[2], nodes[3]
+ if not nns and not ntg then -- wildcard
+ if directive then
+ return list
+ else
+ return { }
+ end
+ else
+ local collected = { }
+ if not nns then -- only check tag
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ if directive then
+ if ntg == ltg then
+ collected[#collected+1] = ll
+ end
+ elseif ntg ~= ltg then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ elseif not ntg then -- only check namespace
+ for l=1,#list do
+ local ll = list[l]
+ local lns = ll.rn or ll.ns
+ if lns then
+ if directive then
+ if lns == nns then
+ collected[#collected+1] = ll
+ end
+ elseif lns ~= nns then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ else -- check both
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ local lns = ll.rn or ll.ns
+ local ok = ltg == ntg and lns == nns
+ if directive then
+ if ok then
+ collected[#collected+1] = ll
+ end
+ elseif not ok then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ end
+ return collected
+ end
+ else
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ local ltg = ll.tg
+ if ltg then
+ local lns = ll.rn or ll.ns
+ local ok = false
+ for n=1,maxn,3 do
+ local nns, ntg = nodes[n+1], nodes[n+2]
+ ok = (not ntg or ltg == ntg) and (not nns or lns == nns)
+ if ok then
+ break
+ end
+ end
+ if directive then
+ if ok then
+ collected[#collected+1] = ll
+ end
+ elseif not ok then
+ collected[#collected+1] = ll
+ end
+ end
+ end
+ return collected
+ end
+end
+
+local function apply_expression(list,expression,order)
+ local collected = { }
+ for l=1,#list do
+ local ll = list[l]
+ if expression(list,ll,l,order) then -- nasty, alleen valid als n=1
+ collected[#collected+1] = ll
+ end
+ end
+ return collected
+end
+
+local P, V, C, Cs, Cc, Ct, R, S, Cg, Cb = lpeg.P, lpeg.V, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cb
+
+local spaces = S(" \n\r\t\f")^0
+
+local lp_space = S(" \n\r\t\f")
+local lp_any = P(1)
+
+local lp_noequal = P("!=") / "~=" + P("<=") + P(">=") + P("==")
+local lp_doequal = P("=") / "=="
+local lp_or = P("|") / " or "
+local lp_and = P("&") / " and "
+
+local lp_builtin = P (
+ P("first") / "1" +
+ P("last") / "#list" +
+ P("position") / "l" +
+ P("rootposition") / "order" +
+ P("index") / "ll.ni" +
+ P("text") / "(ll.dt[1] or '')" +
+ P("name") / "(ll.ns~='' and ll.ns..':'..ll.tg)" +
+ P("tag") / "ll.tg" +
+ P("ns") / "ll.ns"
+ ) * ((spaces * P("(") * spaces * P(")"))/"")
+
+local lp_attribute = (P("@") + P("attribute::")) / "" * Cc("ll.at['") * R("az","AZ","--","__")^1 * Cc("']")
+local lp_fastpos = ((R("09","--","++")^1 * P(-1)) / function(s) return "l==" .. s end)
+
+local lp_reserved = C("and") + C("or") + C("not") + C("div") + C("mod") + C("true") + C("false")
+
+local lp_lua_function = C(R("az","AZ","__")^1 * (P(".") * R("az","AZ","__")^1)^1) * ("(") / function(t) -- todo: better . handling
return t .. "("
end
-local lp_function = C(R("az","AZ","--","__")^1) * P("(") / function(t) -- todo: better . handling
+local lp_function = C(R("az","AZ","__")^1) * P("(") / function(t) -- todo: better . handling
if expressions[t] then
- return "expressions." .. t .. "("
+ return "expr." .. t .. "("
else
- return "expressions.error("
+ return "expr.error("
end
end
@@ -3771,337 +4752,527 @@ local noparent = 1 - (lparent+rparent)
local nested = lpeg.P{lparent * (noparent + lpeg.V(1))^0 * rparent}
local value = lpeg.P(lparent * lpeg.C((noparent + nested)^0) * rparent) -- lpeg.P{"("*C(((1-S("()"))+V(1))^0)*")"}
--- if we use a dedicated namespace then we don't need to pass rt and k
+local lp_child = Cc("expr.child(e,'") * R("az","AZ","--","__")^1 * Cc("')")
+local lp_string = Cc("'") * R("az","AZ","--","__")^1 * Cc("'")
+local lp_content= (P("'") * (1-P("'"))^0 * P("'") + P('"') * (1-P('"'))^0 * P('"'))
+
+local cleaner
-local lp_special = (C(P("name")+P("text")+P("tag"))) * value / function(t,s)
+local lp_special = (C(P("name")+P("text")+P("tag")+P("count")+P("child"))) * value / function(t,s)
if expressions[t] then
- if s then
- return "expressions." .. t .. "(r,k," .. s ..")"
+ s = s and s ~= "" and cleaner:match(s)
+ if s and s ~= "" then
+ return "expr." .. t .. "(e," .. s ..")"
else
- return "expressions." .. t .. "(r,k)"
+ return "expr." .. t .. "(e)"
end
else
- return "expressions.error(" .. t .. ")"
+ return "expr.error(" .. t .. ")"
end
end
-local converter = lpeg.Cs ( (
- lp_position +
- lp_index +
- lp_text + lp_name + -- fast one
+local content =
+ lp_builtin +
+ lp_attribute +
lp_special +
lp_noequal + lp_doequal +
- lp_attribute +
- lp_lua_function +
- lp_function +
+ lp_or + lp_and +
+ lp_reserved +
+ lp_lua_function + lp_function +
+ lp_content + -- too fragile
+ lp_child +
+ lp_any
+
+local converter = lpeg.Cs (
+ lp_fastpos + (lpeg.P { lparent * (lpeg.V(1))^0 * rparent + content } )^0
+)
+
+cleaner = lpeg.Cs ( (
+--~ lp_fastpos +
+ lp_reserved +
+ lp_string +
1 )^1 )
--- expressions,root,rootdt,k,e,edt,ns,tg,idx,hsh[tg] or 1
+--~ expr
-local template = [[
- return function(expressions,r,d,k,e,dt,ns,tg,id,ps)
- local at, tx = e.at or { }, dt[1] or ""
+local template_e = [[
+ local expr = xml.expressions
+ return function(list,ll,l,root)
return %s
end
]]
-local function make_expression(str)
- str = converter:match(str)
- return str, loadstring(format(template,str))()
-end
-
-local map = { }
-
-local space = S(' \r\n\t')
-local squote = S("'")
-local dquote = S('"')
-local lparent = P('(')
-local rparent = P(')')
-local atsign = P('@')
-local lbracket = P('[')
-local rbracket = P(']')
-local exclam = P('!')
-local period = P('.')
-local eq = P('==') + P('=')
-local ne = P('<>') + P('!=')
-local star = P('*')
-local slash = P('/')
-local colon = P(':')
-local bar = P('|')
-local hat = P('^')
-local valid = R('az', 'AZ', '09') + S('_-')
-local name_yes = C(valid^1 + star) * colon * C(valid^1 + star) -- permits ns:* *:tg *:*
-local name_nop = Cc("*") * C(valid^1)
-local name = name_yes + name_nop
-local number = C((S('+-')^0 * R('09')^1)) / tonumber
-local names = (bar^0 * name)^1
-local morenames = name * (bar^0 * name)^1
-local instructiontag = P('pi::')
-local spacing = C(space^0)
-local somespace = space^1
-local optionalspace = space^0
-local text = C(valid^0)
-local value = (squote * C((1 - squote)^0) * squote) + (dquote * C((1 - dquote)^0) * dquote)
-local empty = 1-slash
-
-local is_eq = lbracket * atsign * name * eq * value * rbracket
-local is_ne = lbracket * atsign * name * ne * value * rbracket
-local is_attribute = lbracket * atsign * name * rbracket
-local is_value = lbracket * value * rbracket
-local is_number = lbracket * number * rbracket
-
-local nobracket = 1-(lbracket+rbracket) -- must be improved
-local is_expression = lbracket * C(((C(nobracket^1))/make_expression)) * rbracket
-
-local is_expression = lbracket * (C(nobracket^1))/make_expression * rbracket
-
-local is_one = name
-local is_none = exclam * name
-local is_one_of = ((lparent * names * rparent) + morenames)
-local is_none_of = exclam * ((lparent * names * rparent) + morenames)
-
-local stay = (period )
-local parent = (period * period ) / function( ) map[#map+1] = { 11 } end
-local subtreeroot = (slash + hat ) / function( ) map[#map+1] = { 12 } end
-local documentroot = (hat * hat ) / function( ) map[#map+1] = { 13 } end
-local any = (star ) / function( ) map[#map+1] = { 14 } end
-local many = (star * star ) / function( ) map[#map+1] = { 15 } end
-local initial = (hat * hat * hat ) / function( ) map[#map+1] = { 16 } end
-
-local match = (is_one ) / function(...) map[#map+1] = { 20, true , ... } end
-local match_one_of = (is_one_of ) / function(...) map[#map+1] = { 21, true , ... } end
-local dont_match = (is_none ) / function(...) map[#map+1] = { 20, false, ... } end
-local dont_match_one_of = (is_none_of ) / function(...) map[#map+1] = { 21, false, ... } end
-
-local match_and_eq = (is_one * is_eq ) / function(...) map[#map+1] = { 22, true , ... } end
-local match_and_ne = (is_one * is_ne ) / function(...) map[#map+1] = { 23, true , ... } end
-local dont_match_and_eq = (is_none * is_eq ) / function(...) map[#map+1] = { 22, false, ... } end
-local dont_match_and_ne = (is_none * is_ne ) / function(...) map[#map+1] = { 23, false, ... } end
-
-local match_one_of_and_eq = (is_one_of * is_eq ) / function(...) map[#map+1] = { 24, true , ... } end
-local match_one_of_and_ne = (is_one_of * is_ne ) / function(...) map[#map+1] = { 25, true , ... } end
-local dont_match_one_of_and_eq = (is_none_of * is_eq ) / function(...) map[#map+1] = { 24, false, ... } end
-local dont_match_one_of_and_ne = (is_none_of * is_ne ) / function(...) map[#map+1] = { 25, false, ... } end
-
-local has_attribute = (is_one * is_attribute) / function(...) map[#map+1] = { 27, true , ... } end
-local has_value = (is_one * is_value ) / function(...) map[#map+1] = { 28, true , ... } end
-local dont_has_attribute = (is_none * is_attribute) / function(...) map[#map+1] = { 27, false, ... } end
-local dont_has_value = (is_none * is_value ) / function(...) map[#map+1] = { 28, false, ... } end
-local position = (is_one * is_number ) / function(...) map[#map+1] = { 30, true, ... } end
-local dont_position = (is_none * is_number ) / function(...) map[#map+1] = { 30, false, ... } end
-
-local expression = (is_one * is_expression)/ function(...) map[#map+1] = { 31, true, ... } end
-local dont_expression = (is_none * is_expression)/ function(...) map[#map+1] = { 31, false, ... } end
-
-local self_expression = ( is_expression) / function(...) if #map == 0 then map[#map+1] = { 11 } end
- map[#map+1] = { 31, true, "*", "*", ... } end
-local dont_self_expression = (exclam * is_expression) / function(...) if #map == 0 then map[#map+1] = { 11 } end
- map[#map+1] = { 31, false, "*", "*", ... } end
-
-local instruction = (instructiontag * text ) / function(...) map[#map+1] = { 40, ... } end
-local nothing = (empty ) / function( ) map[#map+1] = { 15 } end -- 15 ?
-local crap = (1-slash)^1
-
--- a few ugly goodies:
-
-local docroottag = P('^^') / function( ) map[#map+1] = { 12 } end
-local subroottag = P('^') / function( ) map[#map+1] = { 13 } end
-local roottag = P('root::') / function( ) map[#map+1] = { 12 } end
-local parenttag = P('parent::') / function( ) map[#map+1] = { 11 } end
-local childtag = P('child::')
-local selftag = P('self::')
-
--- there will be more and order will be optimized
-
-local selector = (
- instruction +
--- many + any + -- brrr, not here !
- parent + stay +
- dont_position + position +
- dont_match_one_of_and_eq + dont_match_one_of_and_ne +
- match_one_of_and_eq + match_one_of_and_ne +
- dont_match_and_eq + dont_match_and_ne +
- match_and_eq + match_and_ne +
- dont_expression + expression +
- dont_self_expression + self_expression +
- has_attribute + has_value +
- dont_match_one_of + match_one_of +
- dont_match + match +
- many + any +
- crap + empty
-)
+local template_f_y = [[
+ local finalizer = xml.finalizers['%s']['%s']
+ return function(collection)
+ return finalizer(collection,%s)
+ end
+]]
-local grammar = P { "startup",
- startup = (initial + documentroot + subtreeroot + roottag + docroottag + subroottag)^0 * V("followup"),
- followup = ((slash + parenttag + childtag + selftag)^0 * selector)^1,
-}
+local template_f_n = [[
+ return xml.finalizers['%s']['%s']
+]]
-local function compose(str)
- if not str or str == "" then
- -- wildcard
- return true
- elseif str == '/' then
- -- root
- return false
+--
+
+local function errorrunner_e(str,cnv)
+ logs.report("lpath","error in expression: %s => %s",str,cnv)
+ return false
+end
+local function errorrunner_f(str,arg)
+ logs.report("lpath","error in finalizer: %s(%s)",str,arg or "")
+ return false
+end
+
+local function register_nodes(nodetest,nodes)
+ return { kind = "nodes", nodetest = nodetest, nodes = nodes }
+end
+
+local function register_expression(expression)
+ local converted = converter:match(expression)
+ local runner = loadstring(format(template_e,converted))
+ runner = (runner and runner()) or function() errorrunner_e(expression,converted) end
+ return { kind = "expression", expression = expression, converted = converted, evaluator = runner }
+end
+
+local function register_finalizer(protocol,name,arguments)
+ local runner
+ if arguments and arguments ~= "" then
+ runner = loadstring(format(template_f_y,protocol or xml.defaultprotocol,name,arguments))
else
- map = { }
- grammar:match(str)
- if #map == 0 then
- return true
- else
- local m = map[1][1]
- if #map == 1 then
- if m == 14 or m == 15 then
- -- wildcard
- return true
- elseif m == 12 then
- -- root
- return false
- end
- elseif #map == 2 and m == 12 and map[2][1] == 20 then
- -- return { { 29, map[2][2], map[2][3], map[2][4], map[2][5] } }
- map[2][1] = 29
- return { map[2] }
- end
- if m ~= 11 and m ~= 12 and m ~= 13 and m ~= 14 and m ~= 15 and m ~= 16 then
- insert(map, 1, { 16 })
- end
- -- print(gsub(table.serialize(map),"[ \n]+"," "))
- return map
- end
+ runner = loadstring(format(template_f_n,protocol or xml.defaultprotocol,name))
end
+ runner = (runner and runner()) or function() errorrunner_f(name,arguments) end
+ return { kind = "finalizer", name = name, arguments = arguments, finalizer = runner }
end
+local expression = P { "ex",
+ ex = "[" * C((V("sq") + V("dq") + (1 - S("[]")) + V("ex"))^0) * "]",
+ sq = "'" * (1 - S("'"))^0 * "'",
+ dq = '"' * (1 - S('"'))^0 * '"',
+}
+
+local arguments = P { "ar",
+ ar = "(" * Cs((V("sq") + V("dq") + V("nq") + P(1-P(")")))^0) * ")",
+ nq = ((1 - S("),'\""))^1) / function(s) return format("%q",s) end,
+ sq = P("'") * (1 - P("'"))^0 * P("'"),
+ dq = P('"') * (1 - P('"'))^0 * P('"'),
+}
+
+-- todo: better arg parser
+
+local register_self = { kind = "axis", axis = "self" } -- , apply = apply_axis["self"] }
+local register_parent = { kind = "axis", axis = "parent" } -- , apply = apply_axis["parent"] }
+local register_descendant = { kind = "axis", axis = "descendant" } -- , apply = apply_axis["descendant"] }
+local register_child = { kind = "axis", axis = "child" } -- , apply = apply_axis["child"] }
+local register_descendant_or_self = { kind = "axis", axis = "descendant-or-self" } -- , apply = apply_axis["descendant-or-self"] }
+local register_root = { kind = "axis", axis = "root" } -- , apply = apply_axis["root"] }
+local register_ancestor = { kind = "axis", axis = "ancestor" } -- , apply = apply_axis["ancestor"] }
+local register_ancestor_or_self = { kind = "axis", axis = "ancestor-or-self" } -- , apply = apply_axis["ancestor-or-self"] }
+local register_attribute = { kind = "axis", axis = "attribute" } -- , apply = apply_axis["attribute"] }
+local register_namespace = { kind = "axis", axis = "namespace" } -- , apply = apply_axis["namespace"] }
+local register_following = { kind = "axis", axis = "following" } -- , apply = apply_axis["following"] }
+local register_following_sibling = { kind = "axis", axis = "following-sibling" } -- , apply = apply_axis["following-sibling"] }
+local register_preceding = { kind = "axis", axis = "preceding" } -- , apply = apply_axis["preceding"] }
+local register_preceding_sibling = { kind = "axis", axis = "preceding-sibling" } -- , apply = apply_axis["preceding-sibling"] }
+
+local register_auto_descendant_or_self = { kind = "axis", axis = "auto-descendant-or-self" } -- , apply = apply_axis["auto-descendant-or-self"] }
+local register_auto_descendant = { kind = "axis", axis = "auto-descendant" } -- , apply = apply_axis["auto-descendant"] }
+local register_auto_self = { kind = "axis", axis = "auto-self" } -- , apply = apply_axis["auto-self"] }
+local register_auto_child = { kind = "axis", axis = "auto-child" } -- , apply = apply_axis["auto-child"] }
+
+local register_initial_child = { kind = "axis", axis = "initial-child" } -- , apply = apply_axis["initial-child"] }
+
+local register_all_nodes = { kind = "nodes", nodetest = true, nodes = { true, false, false } }
+
+local function register_error(str)
+ return { kind = "error", comment = format("unparsed: %s",str) }
+end
+
+local parser = Ct { "patterns", -- can be made a bit faster by moving pattern outside
+
+ patterns = spaces * V("protocol") * spaces * V("initial") * spaces * V("step") * spaces *
+ (P("/") * spaces * V("step") * spaces)^0,
+
+ protocol = Cg(V("letters"),"protocol") * P("://") + Cg(Cc(nil),"protocol"),
+
+ step = (V("shortcuts") + V("axis") * spaces * V("nodes")^0 + V("error")) * spaces * V("expressions")^0 * spaces * V("finalizer")^0,
+
+ axis = V("descendant") + V("child") + V("parent") + V("self") + V("root") + V("ancestor") +
+ V("descendant_or_self") + V("following") + V("following_sibling") +
+ V("preceding") + V("preceding_sibling") + V("ancestor_or_self") +
+ #(1-P(-1)) * Cc(register_auto_child),
+
+ initial = (P("/") * spaces * Cc(register_initial_child))^-1,
+
+ error = (P(1)^1) / register_error,
+
+ shortcuts_a = V("s_descendant_or_self") + V("s_descendant") + V("s_child") + V("s_parent") + V("s_self") + V("s_root") + V("s_ancestor"),
+
+ shortcuts = V("shortcuts_a") * (spaces * "/" * spaces * V("shortcuts_a"))^0,
+
+ s_descendant_or_self = P("/") * Cc(register_descendant_or_self),
+ s_descendant = P("**") * Cc(register_descendant),
+ s_child = P("*") * Cc(register_child ),
+ s_parent = P("..") * Cc(register_parent ),
+ s_self = P("." ) * Cc(register_self ),
+ s_root = P("^^") * Cc(register_root ),
+ s_ancestor = P("^") * Cc(register_ancestor ),
+
+ descendant = P("descendant::") * Cc(register_descendant ),
+ child = P("child::") * Cc(register_child ),
+ parent = P("parent::") * Cc(register_parent ),
+ self = P("self::") * Cc(register_self ),
+ root = P('root::') * Cc(register_root ),
+ ancestor = P('ancestor::') * Cc(register_ancestor ),
+ descendant_or_self = P('descendant-or-self::') * Cc(register_descendant_or_self ),
+ ancestor_or_self = P('ancestor-or-self::') * Cc(register_ancestor_or_self ),
+ -- attribute = P('attribute::') * Cc(register_attribute ),
+ -- namespace = P('namespace::') * Cc(register_namespace ),
+ following = P('following::') * Cc(register_following ),
+ following_sibling = P('following-sibling::') * Cc(register_following_sibling ),
+ preceding = P('preceding::') * Cc(register_preceding ),
+ preceding_sibling = P('preceding-sibling::') * Cc(register_preceding_sibling ),
+
+ nodes = (V("nodefunction") * spaces * P("(") * V("nodeset") * P(")") + V("nodetest") * V("nodeset")) / register_nodes,
+
+ expressions = expression / register_expression,
+
+ letters = R("az")^1,
+ name = (1-lpeg.S("/[]()|:*!"))^1,
+ negate = P("!") * Cc(false),
+
+ nodefunction = V("negate") + P("not") * Cc(false) + Cc(true),
+ nodetest = V("negate") + Cc(true),
+ nodename = (V("negate") + Cc(true)) * spaces * ((V("wildnodename") * P(":") * V("wildnodename")) + (Cc(false) * V("wildnodename"))),
+ wildnodename = (C(V("name")) + P("*") * Cc(false)) * #(1-P("(")),
+ nodeset = spaces * Ct(V("nodename") * (spaces * P("|") * spaces * V("nodename"))^0) * spaces,
+
+ finalizer = (Cb("protocol") * P("/")^-1 * C(V("name")) * arguments * P(-1)) / register_finalizer,
+
+}
+
local cache = { }
-function xml.lpath(pattern,trace)
- lpathcalls = lpathcalls + 1
- if type(pattern) == "string" then
- local result = cache[pattern]
- if result == nil then -- can be false which is valid -)
- result = compose(pattern)
- cache[pattern] = result
- lpathcached = lpathcached + 1
- end
- if trace or trace_lpath then
- xml.lshow(result)
- end
- return result
+local function nodesettostring(set,nodetest)
+ local t = { }
+ for i=1,#set,3 do
+ local directive, ns, tg = set[i], set[i+1], set[i+2]
+ if not ns or ns == "" then ns = "*" end
+ if not tg or tg == "" then tg = "*" end
+ tg = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
+ t[#t+1] = (directive and tg) or format("not(%s)",tg)
+ end
+ if nodetest == false then
+ return format("not(%s)",concat(t,"|"))
else
- return pattern
+ return concat(t,"|")
end
end
-function xml.cached_patterns()
- return cache
+local function tagstostring(list)
+ if #list == 0 then
+ return "no elements"
+ else
+ local t = { }
+ for i=1, #list do
+ local li = list[i]
+ local ns, tg = li.ns, li.tg
+ if not ns or ns == "" then ns = "*" end
+ if not tg or tg == "" then tg = "*" end
+ t[#t+1] = (tg == "@rt@" and "[root]") or format("%s:%s",ns,tg)
+ end
+ return concat(t," ")
+ end
end
--- we run out of locals (limited to 200)
---
--- local fallbackreport = (texio and texio.write) or io.write
-
-function xml.lshow(pattern,report)
--- report = report or fallbackreport
- report = report or (texio and texio.write) or io.write
- local lp = xml.lpath(pattern)
- if lp == false then
- report(" -: root\n")
- elseif lp == true then
- report(" -: wildcard\n")
+xml.nodesettostring = nodesettostring
+
+local function lshow(parsed)
+ if type(parsed) == "string" then
+ parsed = parse_pattern(parsed)
+ end
+ local s = table.serialize_functions -- ugly
+ table.serialize_functions = false -- ugly
+ logs.report("lpath","%s://%s => %s",parsed.protocol or xml.defaultprotocol,parsed.pattern,table.serialize(parsed,false))
+ table.serialize_functions = s -- ugly
+end
+
+xml.lshow = lshow
+
+local function parse_pattern(pattern) -- the gain of caching is rather minimal
+ lpathcalls = lpathcalls + 1
+ if type(pattern) == "table" then
+ return pattern
else
- if type(pattern) == "string" then
- report(format("pattern: %s\n",pattern))
- end
- for k=1,#lp do
- local v = lp[k]
- if #v > 1 then
- local t = { }
- for i=2,#v do
- local vv = v[i]
- if type(vv) == "string" then
- t[#t+1] = (vv ~= "" and vv) or "#"
- elseif type(vv) == "boolean" then
- t[#t+1] = (vv and "==") or "<>"
+ local parsed = cache[pattern]
+ if parsed then
+ lpathcached = lpathcached + 1
+ else
+ parsed = parser:match(pattern)
+ if parsed then
+ parsed.pattern = pattern
+ local np = #parsed
+ if np == 0 then
+ parsed = { pattern = pattern, register_self, state = "parsing error" }
+ logs.report("lpath","parsing error in '%s'",pattern)
+ lshow(parsed)
+ else
+ -- we could have done this with a more complex parsed but this
+ -- is cleaner
+ local pi = parsed[1]
+ if pi.axis == "auto-child" then
+ parsed.comment = "auto-child replaced by auto-descendant-or-self"
+ parsed[1] = register_auto_descendant_or_self
+ --~ parsed.comment = "auto-child replaced by auto-descendant"
+ --~ parsed[1] = register_auto_descendant
+ elseif pi.axis == "initial-child" and np > 1 and parsed[2].axis then
+ parsed.comment = "initial-child removed" -- we could also make it a auto-self
+ remove(parsed,1)
end
end
- report(format("%2i: %s %s -> %s\n", k,v[1],actions[v[1]],concat(t," ")))
else
- report(format("%2i: %s %s\n", k,v[1],actions[v[1]]))
+ parsed = { pattern = pattern }
+ end
+ cache[pattern] = parsed
+ if trace_lparse and not trace_lprofile then
+ lshow(parsed)
end
end
+ return parsed
end
end
-function xml.xshow(e,...) -- also handy when report is given, use () to isolate first e
- local t = { ... }
--- local report = (type(t[#t]) == "function" and t[#t]) or fallbackreport
- local report = (type(t[#t]) == "function" and t[#t]) or (texio and texio.write) or io.write
- if e == nil then
- report("<!-- no element -->\n")
- elseif type(e) ~= "table" then
- report(tostring(e))
- elseif e.tg then
- report(tostring(e) .. "\n")
+-- we can move all calls inline and then merge the trace back
+-- technically we can combine axis and the next nodes which is
+-- what we did before but this a bit cleaner (but slower too)
+-- but interesting is that it's not that much faster when we
+-- go inline
+--
+-- beware: we need to return a collection even when we filter
+-- else the (simple) cache gets messed up
+
+-- caching found lookups saves not that much (max .1 sec on a 8 sec run)
+-- and it also messes up finalizers
+
+local profiled = { } xml.profiled = profiled
+
+local function profiled_apply(list,parsed,nofparsed)
+ local p = profiled[parsed.pattern]
+ if p then
+ p.tested = p.tested + 1
else
- for i=1,#e do
- report(tostring(e[i]) .. "\n")
+ p = { tested = 1, matched = 0, finalized = 0 }
+ profiled[parsed.pattern] = p
+ end
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ p.matched = p.matched + 1
+ p.finalized = p.finalized + 1
+ return collected
+ end
+ if not collected or #collected == 0 then
+ return nil
end
end
+ if collected then
+ p.matched = p.matched + 1
+ end
+ return collected
+end
+
+local function traced_apply(list,parsed,nofparsed)
+ if trace_lparse then
+ lshow(parsed)
+ end
+ logs.report("lpath", "collecting : %s",parsed.pattern)
+ logs.report("lpath", " root tags : %s",tagstostring(list))
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ logs.report("lpath", "% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ logs.report("lpath", "% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ logs.report("lpath", "% 10i : ex : %s",(collected and #collected) or 0,pi.expression)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
+ return collected
+ end
+ if not collected or #collected == 0 then
+ return nil
+ end
+ end
+ return collected
end
---[[ldx--
-<p>An <l n='lpath'/> is converted to a table with instructions for traversing the
-tree. Hoever, simple cases are signaled by booleans. Because we don't know in
-advance what we want to do with the found element the handle gets three arguments:</p>
-
-<lines>
-<t>r</t> : the root element of the data table
-<t>d</t> : the data table of the result
-<t>t</t> : the index in the data table of the result
-</lines>
+local function parse_apply(list,pattern)
+ -- we avoid an extra call
+ local parsed = cache[pattern]
+ if parsed then
+ lpathcalls = lpathcalls + 1
+ lpathcached = lpathcached + 1
+ elseif type(pattern) == "table" then
+ lpathcalls = lpathcalls + 1
+ parsed = pattern
+ else
+ parsed = parse_pattern(pattern) or pattern
+ end
+ if not parsed then
+ return
+ end
+ local nofparsed = #parsed
+ if nofparsed == 0 then
+ -- something is wrong
+ elseif not trace_lpath then
+ -- normal apply, inline, no self
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ local axis = pi.axis
+ if axis ~= "self" then
+ collected = apply_axis[axis](collected)
+ end
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ elseif kind == "finalizer" then
+ return pi.finalizer(collected)
+ end
+ if not collected or #collected == 0 then
+ return nil
+ end
+ end
+ return collected
+ elseif trace_lprofile then
+ return profiled_apply(list,parsed,nofparsed)
+ else -- trace_lpath
+ return traced_apply(list,parsed,nofparsed)
+ end
+end
-<p> Access to the root and data table makes it possible to construct insert and delete
-functions.</p>
---ldx]]--
+-- internal (parsed)
-local functions = xml.functions
-local expressions = xml.expressions
+expressions.child = function(e,pattern)
+ return parse_apply({ e },pattern) -- todo: cache
+end
+expressions.count = function(e,pattern)
+ local collected = parse_apply({ e },pattern) -- todo: cache
+ return (collected and #collected) or 0
+end
-expressions.contains = string.find
-expressions.find = string.find
-expressions.upper = string.upper
-expressions.lower = string.lower
-expressions.number = tonumber
-expressions.boolean = toboolean
+-- external
expressions.oneof = function(s,...) -- slow
local t = {...} for i=1,#t do if s == t[i] then return true end end return false
end
-
expressions.error = function(str)
- xml.error_handler("unknown function in lpath expression",str or "?")
+ xml.error_handler("unknown function in lpath expression",tostring(str or "?"))
return false
end
+expressions.undefined = function(s)
+ return s == nil
+end
-functions.text = function(root,k,n) -- unchecked, maybe one deeper
- local t = type(t)
- if t == "string" then
- return t
- else -- todo n
- local rdt = root.dt
- return (rdt and rdt[k]) or root[k] or ""
+expressions.contains = find
+expressions.find = find
+expressions.upper = upper
+expressions.lower = lower
+expressions.number = tonumber
+expressions.boolean = toboolean
+
+-- user interface
+
+local function traverse(root,pattern,handle)
+ logs.report("xml","use 'xml.selection' instead for '%s'",pattern)
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ handle(r,r.dt,e.ni)
+ end
+ end
+end
+
+local function selection(root,pattern,handle)
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if handle then
+ for c=1,#collected do
+ handle(collected[c])
+ end
+ else
+ return collected
+ end
end
end
-functions.name = function(d,k,n) -- ns + tg
+xml.parse_parser = parser
+xml.parse_pattern = parse_pattern
+xml.parse_apply = parse_apply
+xml.traverse = traverse -- old method, r, d, k
+xml.selection = selection -- new method, simple handle
+
+local lpath = parse_pattern
+
+xml.lpath = lpath
+
+function xml.cached_patterns()
+ return cache
+end
+
+-- generic function finalizer (independant namespace)
+
+local function dofunction(collected,fnc)
+ if collected then
+ local f = functions[fnc]
+ if f then
+ for c=1,#collected do
+ f(collected[c])
+ end
+ else
+ logs.report("xml","unknown function '%s'",fnc)
+ end
+ end
+end
+
+xml.finalizers.xml["function"] = dofunction
+xml.finalizers.tex["function"] = dofunction
+
+-- functions
+
+expressions.text = function(e,n)
+ local rdt = e.__p__.dt
+ return (rdt and rdt[n]) or ""
+end
+
+expressions.name = function(e,n) -- ns + tg
local found = false
- n = n or 0
- if not k then
- -- not found
- elseif n == 0 then
- local dk = d[k]
- found = dk and (type(dk) == "table") and dk
+ n = tonumber(n) or 0
+ if n == 0 then
+ found = type(e) == "table" and e
elseif n < 0 then
+ local d, k = e.__p__.dt, e.ni
for i=k-1,1,-1 do
local di = d[i]
if type(di) == "table" then
@@ -4114,6 +5285,7 @@ functions.name = function(d,k,n) -- ns + tg
end
end
else
+ local d, k = e.__p__.dt, e.ni
for i=k+1,#d,1 do
local di = d[i]
if type(di) == "table" then
@@ -4138,15 +5310,13 @@ functions.name = function(d,k,n) -- ns + tg
end
end
-functions.tag = function(d,k,n) -- only tg
+expressions.tag = function(e,n) -- only tg
local found = false
- n = n or 0
- if not k then
- -- not found
- elseif n == 0 then
- local dk = d[k]
- found = dk and (type(dk) == "table") and dk
+ n = tonumber(n) or 0
+ if n == 0 then
+ found = (type(e) == "table") and e -- seems to fail
elseif n < 0 then
+ local d, k = e.__p__.dt, e.ni
for i=k-1,1,-1 do
local di = d[i]
if type(di) == "table" then
@@ -4159,6 +5329,7 @@ functions.tag = function(d,k,n) -- only tg
end
end
else
+ local d, k = e.__p__.dt, e.ni
for i=k+1,#d,1 do
local di = d[i]
if type(di) == "table" then
@@ -4174,664 +5345,403 @@ functions.tag = function(d,k,n) -- only tg
return (found and found.tg) or ""
end
-expressions.text = functions.text
-expressions.name = functions.name
-expressions.tag = functions.tag
+--[[ldx--
+<p>This is the main filter function. It returns whatever is asked for.</p>
+--ldx]]--
-local function traverse(root,pattern,handle,reverse,index,parent,wildcard) -- multiple only for tags, not for namespaces
- if not root then -- error
- return false
- elseif pattern == false then -- root
- handle(root,root.dt,root.ri)
- return false
- elseif pattern == true then -- wildcard
- local rootdt = root.dt
- if rootdt then
- local start, stop, step = 1, #rootdt, 1
- if reverse then
- start, stop, step = stop, start, -1
- end
- for k=start,stop,step do
- if handle(root,rootdt,root.ri or k) then return false end
- if not traverse(rootdt[k],true,handle,reverse) then return false end
- end
+function xml.filter(root,pattern) -- no longer funny attribute handling here
+ return parse_apply({ root },pattern)
+end
+
+--[[ldx--
+<p>Often using an iterators looks nicer in the code than passing handler
+functions. The <l n='lua'/> book describes how to use coroutines for that
+purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
+code like:</p>
+
+<typing>
+for r, d, k in xml.elements(xml.load('text.xml'),"title") do
+ print(d[k]) -- old method
+end
+for e in xml.collected(xml.load('text.xml'),"title") do
+ print(e) -- new one
+end
+</typing>
+--ldx]]--
+
+local wrap, yield = coroutine.wrap, coroutine.yield
+
+function xml.elements(root,pattern,reverse) -- r, d, k
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if reverse then
+ return wrap(function() for c=#collected,1,-1 do
+ local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
+ end end)
+ else
+ return wrap(function() for c=1,#collected do
+ local e = collected[c] local r = e.__p__ yield(r,r.dt,e.ni)
+ end end)
end
- return false
- elseif root.dt then
- index = index or 1
- local action = pattern[index]
- local command = action[1]
- if command == 29 then -- fast case /oeps
- local rootdt = root.dt
- for k=1,#rootdt do
- local e = rootdt[k]
- local tg = e.tg
- if e.tg then
- local ns = e.rn or e.ns
- local ns_a, tg_a = action[3], action[4]
- local matched = (ns_a == "*" or ns == ns_a) and (tg_a == "*" or tg == tg_a)
- if not action[2] then matched = not matched end
- if matched then
- if handle(root,rootdt,k) then return false end
- end
- end
- end
- elseif command == 11 then -- parent
- local ep = root.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
+ end
+ return wrap(function() end)
+end
+
+function xml.collected(root,pattern,reverse) -- e
+ local collected = parse_apply({ root },pattern)
+ if collected then
+ if reverse then
+ return wrap(function() for c=#collected,1,-1 do yield(collected[c]) end end)
else
- if (command == 16 or command == 12) and index == 1 then -- initial
- -- wildcard = true
- wildcard = command == 16 -- ok?
- index = index + 1
- action = pattern[index]
- command = action and action[1] or 0 -- something is wrong
- end
- if command == 11 then -- parent
- local ep = root.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- else
- local rootdt = root.dt
- local start, stop, step, n, dn = 1, #rootdt, 1, 0, 1
- if command == 30 then
- if action[5] < 0 then
- start, stop, step = stop, start, -1
- dn = -1
- end
- elseif reverse and index == #pattern then
- start, stop, step = stop, start, -1
- end
- local idx = 0
- local hsh = { } -- this will slooow down the lot
- for k=start,stop,step do -- we used to have functions for all but a case is faster
- local e = rootdt[k]
- local ns, tg = e.rn or e.ns, e.tg
- if tg then
- -- we can optimize this for simple searches, but it probably does not pay off
- hsh[tg] = (hsh[tg] or 0) + 1
- idx = idx + 1
- if command == 30 then
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- if matched then
- n = n + dn
- if n == action[5] then
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- break
- end
- elseif wildcard then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- end
- else
- local matched, multiple = false, false
- if command == 20 then -- match
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- elseif command == 21 then -- match one of
- multiple = true
- for i=3,#action,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- elseif command == 22 then -- eq
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- matched = matched and e.at[action[6]] == action[7]
- elseif command == 23 then -- ne
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = mached and e.at[action[6]] ~= action[7]
- elseif command == 24 then -- one of eq
- multiple = true
- for i=3,#action-2,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[#action-1]] == action[#action]
- elseif command == 25 then -- one of ne
- multiple = true
- for i=3,#action-2,2 do
- local ns_a, tg_a = action[i], action[i+1]
- if (ns_a == "*" or ns == ns_a) and (tg == "*" or tg == tg_a) then
- matched = true
- break
- end
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[#action-1]] ~= action[#action]
- elseif command == 27 then -- has attribute
- local ns_a, tg_a = action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = matched and e.at[action[5]]
- elseif command == 28 then -- has value
- local edt, ns_a, tg_a = e.dt, action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- matched = matched and edt and edt[1] == action[5]
- elseif command == 31 then
- local edt, ns_a, tg_a = e.dt, action[3], action[4]
- if tg == tg_a then
- matched = ns_a == "*" or ns == ns_a
- elseif tg_a == '*' then
- matched, multiple = ns_a == "*" or ns == ns_a, true
- else
- matched = false
- end
- if not action[2] then matched = not matched end
- if matched then
- matched = action[6](expressions,root,rootdt,k,e,edt,ns,tg,idx,hsh[tg] or 1)
- end
- end
- if matched then -- combine tg test and at test
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- if wildcard then
- if multiple then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- else
- -- maybe or multiple; anyhow, check on (section|title) vs just section and title in example in lxml
- if not traverse(e,pattern,handle,reverse,index,root) then return false end
- end
- end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- elseif command == 14 then -- any
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root) then return false end
- end
- elseif command == 15 then -- many
- if index == #pattern then
- if handle(root,rootdt,root.ri or k) then return false end
- else
- if not traverse(e,pattern,handle,reverse,index+1,root,true) then return false end
- end
- -- not here : 11
- elseif command == 11 then -- parent
- local ep = e.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,root,index+1) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- elseif command == 40 and e.special and tg == "@pi@" then -- pi
- local pi = action[2]
- if pi ~= "" then
- local pt = e.dt[1]
- if pt and pt:find(pi) then
- if handle(root,rootdt,k) then
- return false
- end
- end
- elseif handle(root,rootdt,k) then
- return false
- end
- elseif wildcard then
- if not traverse(e,pattern,handle,reverse,index,root,true) then return false end
- end
- end
- else
- -- not here : 11
- if command == 11 then -- parent
- local ep = e.__p__ or parent
- if index < #pattern then
- if not traverse(ep,pattern,handle,reverse,index+1,root) then return false end
- elseif handle(root,rootdt,k) then
- return false
- end
- break -- else loop
- end
- end
- end
- end
+ return wrap(function() for c=1,#collected do yield(collected[c]) end end)
end
end
- return true
+ return wrap(function() end)
end
-xml.traverse = traverse
+
+end -- of closure
+
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['lxml-ent'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local type, next = type, next
+local texsprint, ctxcatcodes = tex.sprint, tex.ctxcatcodes
+local utf = unicode.utf8
+local utfupper = utf.upper
--[[ldx--
-<p>Next come all kind of locators and manipulators. The most generic function here
-is <t>xml.filter(root,pattern)</t>. All registers functions in the filters namespace
-can be path of a search path, as in:</p>
+<p>We provide (at least here) two entity handlers. The more extensive
+resolver consults a hash first, tries to convert to <l n='utf'/> next,
+and finaly calls a handler when defines. When this all fails, the
+original entity is returned.</p>
-<typing>
-local r, d, k = xml.filter(root,"/a/b/c/position(4)"
-</typing>
+<p>We do things different now but it's still somewhat experimental</p>
--ldx]]--
-local traverse, lpath, convert = xml.traverse, xml.lpath, xml.convert
+xml.entities = xml.entities or { } -- xml.entity_handler == function
-xml.filters = { }
+-- experimental, this will be done differently
-function xml.filters.default(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end)
- return dt and dt[dk], rt, dt, dk
+function xml.merge_entities(root)
+ local documententities = root.entities
+ local allentities = xml.entities
+ if documententities then
+ for k, v in next, documententities do
+ allentities[k] = v
+ end
+ end
end
-function xml.filters.attributes(root,pattern,arguments)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end)
- local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at)
- if ekat then
- if arguments then
- return ekat[arguments] or "", rt, dt, dk
+function xml.resolved_entity(str)
+ local e = xml.entities[str]
+ if e then
+ local te = type(e)
+ if te == "function" then
+ e(str)
else
- return ekat, rt, dt, dk
+ texsprint(ctxcatcodes,e)
end
else
- return { }, rt, dt, dk
+ texsprint(ctxcatcodes,"\\xmle{",str,"}{",utfupper(str),"}") -- we need to use our own upper
end
end
-function xml.filters.reverse(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse')
- return dt and dt[dk], rt, dt, dk
-end
+xml.entities.amp = function() tex.write("&") end
+xml.entities.lt = function() tex.write("<") end
+xml.entities.gt = function() tex.write(">") end
-function xml.filters.count(root,pattern,everything)
- local n = 0
- traverse(root, lpath(pattern), function(r,d,t)
- if everything or type(d[t]) == "table" then
- n = n + 1
- end
- end)
- return n
-end
-function xml.filters.elements(root, pattern) -- == all
- local t = { }
- traverse(root, lpath(pattern), function(r,d,k)
- local e = d[k]
- if e then
- t[#t+1] = e
- end
- end)
- return t
-end
+end -- of closure
-function xml.filters.texts(root, pattern)
- local t = { }
- traverse(root, lpath(pattern), function(r,d,k)
- local e = d[k]
- if e and e.dt then
- t[#t+1] = e.dt
- end
- end)
- return t
-end
+do -- create closure to overcome 200 locals limit
-function xml.filters.first(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end)
- return dt and dt[dk], rt, dt, dk
-end
+if not modules then modules = { } end modules ['lxml-mis'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
-function xml.filters.last(root,pattern)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt,dt,dk = r,d,k return true end, 'reverse')
- return dt and dt[dk], rt, dt, dk
-end
+local concat = table.concat
+local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
+local format, gsub = string.format, string.gsub
-function xml.filters.index(root,pattern,arguments)
- local rt, dt, dk, reverse, i = nil, nil, nil, false, tonumber(arguments or '1') or 1
- if i and i ~= 0 then
- if i < 0 then
- reverse, i = true, -i
- end
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk, i = r, d, k, i-1 return i == 0 end, reverse)
- if i == 0 then
- return dt and dt[dk], rt, dt, dk
+--[[ldx--
+<p>The following helper functions best belong to the <t>lmxl-ini</t>
+module. Some are here because we need then in the <t>mk</t>
+document and other manuals, others came up when playing with
+this module. Since this module is also used in <l n='mtxrun'/> we've
+put them here instead of loading mode modules there then needed.</p>
+--ldx]]--
+
+
+local function xmlgsub(t,old,new)
+ local dt = t.dt
+ if dt then
+ for k=1,#dt do
+ local v = dt[k]
+ if type(v) == "string" then
+ dt[k] = gsub(v,old,new)
+ else
+ xmlgsub(v,old,new)
+ end
end
end
- return nil, nil, nil, nil
end
-function xml.filters.attribute(root,pattern,arguments)
- local rt, dt, dk
- traverse(root, lpath(pattern), function(r,d,k) rt, dt, dk = r, d, k return true end)
- local ekat = (dt and dt[dk] and dt[dk].at) or (rt and rt.at)
- -- return (ekat and (ekat[arguments] or ekat[gsub(arguments,"^([\"\'])(.*)%1$","%2")])) or ""
- return (ekat and (ekat[arguments] or (find(arguments,"^[\'\"]") and ekat[sub(arguments,2,-2)]))) or ""
-end
+xmlgsub = xmlgsub
-function xml.filters.text(root,pattern,arguments) -- ?? why index, tostring slow
- local dtk, rt, dt, dk = xml.filters.index(root,pattern,arguments)
- if dtk then -- n
- local dtkdt = dtk.dt
- if not dtkdt then
- return "", rt, dt, dk
- elseif #dtkdt == 1 and type(dtkdt[1]) == "string" then
- return dtkdt[1], rt, dt, dk
- else
- return xml.tostring(dtkdt), rt, dt, dk
+function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
+ if d and k then
+ local dkm = d[k-1]
+ if dkm and type(dkm) == "string" then
+ local s = match(dkm,"\n(%s+)")
+ xmlgsub(dk,"\n"..rep(" ",#s),"\n")
end
- else
- return "", rt, dt, dk
end
end
-function xml.filters.tag(root,pattern,n)
- local tag = ""
- traverse(root, lpath(pattern), function(r,d,k)
- tag = xml.functions.tag(d,k,n and tonumber(n))
- return true
- end)
- return tag
-end
+--~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
+--~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-function xml.filters.name(root,pattern,n)
- local tag = ""
- traverse(root, lpath(pattern), function(r,d,k)
- tag = xml.functions.name(d,k,n and tonumber(n))
- return true
- end)
- return tag
-end
+--~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
+--~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
+--~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
---[[ldx--
-<p>For splitting the filter function from the path specification, we can
-use string matching or lpeg matching. Here the difference in speed is
-neglectable but the lpeg variant is more robust.</p>
---ldx]]--
+local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
--- not faster but hipper ... although ... i can't get rid of the trailing / in the path
+-- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
+--
+-- 1021:0335:0287:0247
-local P, S, R, C, V, Cc = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc
+-- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
+--
+-- 1559:0257:0288:0190 (last one suggested by roberto)
-local slash = P('/')
-local name = (R("az","AZ","--","__"))^1
-local path = C(((1-slash)^0 * slash)^1)
-local argument = P { "(" * C(((1 - S("()")) + V(1))^0) * ")" }
-local action = Cc(1) * path * C(name) * argument
-local attribute = Cc(2) * path * P('@') * C(name)
-local direct = Cc(3) * Cc("../*") * slash^0 * C(name) * argument
+-- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
+-- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
+local normal = (1 - S("<&>"))^0
+local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
+local escaped = Cs(normal * (special * normal)^0)
-local parser = direct + action + attribute
+-- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-local filters = xml.filters
-local attribute_filter = xml.filters.attributes
-local default_filter = xml.filters.default
+local normal = (1 - S"&")^0
+local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
+local unescaped = Cs(normal * (special * normal)^0)
--- todo: also hash, could be gc'd
+-- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-function xml.filter(root,pattern)
- local kind, a, b, c = parser:match(pattern)
- if kind == 1 or kind == 3 then
- return (filters[b] or default_filter)(root,a,c)
- elseif kind == 2 then
- return attribute_filter(root,a,b)
- else
- return default_filter(root,pattern)
- end
-end
+local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
---~ slightly faster, but first we need a proper test file
---~
---~ local hash = { }
---~
---~ function xml.filter(root,pattern)
---~ local h = hash[pattern]
---~ if not h then
---~ local kind, a, b, c = parser:match(pattern)
---~ if kind == 1 then
---~ h = { kind, filters[b] or default_filter, a, b, c }
---~ elseif kind == 2 then
---~ h = { kind, attribute_filter, a, b, c }
---~ else
---~ h = { kind, default_filter, a, b, c }
---~ end
---~ hash[pattern] = h
---~ end
---~ local kind = h[1]
---~ if kind == 1 then
---~ return h[2](root,h[2],h[4])
---~ elseif kind == 2 then
---~ return h[2](root,h[2],h[3])
---~ else
---~ return h[2](root,pattern)
---~ end
---~ end
+xml.escaped_pattern = escaped
+xml.unescaped_pattern = unescaped
+xml.cleansed_pattern = cleansed
---[[ldx--
-<p>The following functions collect elements and texts.</p>
---ldx]]--
+function xml.escaped (str) return escaped :match(str) end
+function xml.unescaped(str) return unescaped:match(str) end
+function xml.cleansed (str) return cleansed :match(str) end
--- still somewhat bugged
-function xml.collect_elements(root, pattern, ignorespaces)
- local rr, dd = { }, { }
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d and d[k]
- if dk then
- if ignorespaces and type(dk) == "string" and dk:find("[^%S]") then
- -- ignore
- else
- local n = #rr+1
- rr[n], dd[n] = r, dk
- end
- end
- end)
- return dd, rr
-end
+end -- of closure
-function xml.collect_texts(root, pattern, flatten)
- local t = { } -- no r collector
- traverse(root, lpath(pattern), function(r,d,k)
- if d then
- local ek = d[k]
- local tx = ek and ek.dt
- if flatten then
- if tx then
- t[#t+1] = xml.tostring(tx) or ""
- else
- t[#t+1] = ""
+do -- create closure to overcome 200 locals limit
+
+if not modules then modules = { } end modules ['lxml-aux'] = {
+ version = 1.001,
+ comment = "this module is the basis for the lxml-* ones",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- not all functions here make sense anymore vbut we keep them for
+-- compatibility reasons
+
+local xmlparseapply, xmlconvert, xmlcopy = xml.parse_apply, xml.convert, xml.copy
+
+local type = type
+local insert, remove = table.insert, table.remove
+local gmatch, gsub = string.gmatch, string.gsub
+
+local function withelements(e,handle,depth)
+ if e and handle then
+ local edt = e.dt
+ if edt then
+ depth = depth or 0
+ for i=1,#edt do
+ local e = edt[i]
+ if type(e) == "table" then
+ handle(e,depth)
+ withelements(e,handle,depth+1)
end
- else
- t[#t+1] = tx or ""
end
- else
- t[#t+1] = ""
end
- end)
- return t
+ end
end
-function xml.collect_tags(root, pattern, nonamespace)
- local t = { }
- xml.traverse(root, xml.lpath(pattern), function(r,d,k)
- local dk = d and d[k]
- if dk and type(dk) == "table" then
- local ns, tg = e.ns, e.tg
- if nonamespace then
- t[#t+1] = tg -- if needed we can return an extra table
- elseif ns == "" then
- t[#t+1] = tg
- else
- t[#t+1] = ns .. ":" .. tg
+xml.withelements = withelements
+
+function xml.withelement(e,n,handle) -- slow
+ if e and n ~= 0 and handle then
+ local edt = e.dt
+ if edt then
+ if n > 0 then
+ for i=1,#edt do
+ local ei = edt[i]
+ if type(ei) == "table" then
+ if n == 1 then
+ handle(ei)
+ return
+ else
+ n = n - 1
+ end
+ end
+ end
+ elseif n < 0 then
+ for i=#edt,1,-1 do
+ local ei = edt[i]
+ if type(ei) == "table" then
+ if n == -1 then
+ handle(ei)
+ return
+ else
+ n = n + 1
+ end
+ end
+ end
end
end
- end)
- return #t > 0 and {}
+ end
end
---[[ldx--
-<p>Often using an iterators looks nicer in the code than passing handler
-functions. The <l n='lua'/> book describes how to use coroutines for that
-purpose (<url href='http://www.lua.org/pil/9.3.html'/>). This permits
-code like:</p>
+xml.elements_only = xml.collected
-<typing>
-for r, d, k in xml.elements(xml.load('text.xml'),"title") do
- print(d[k])
+function xml.each_element(root, pattern, handle, reverse)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ if reverse then
+ for c=#collected,1,-1 do
+ handle(collected[c])
+ end
+ else
+ for c=1,#collected do
+ handle(collected[c])
+ end
+ end
+ return collected
+ end
end
-</typing>
-<p>Which will print all the titles in the document. The iterator variant takes
-1.5 times the runtime of the function variant which is due to the overhead in
-creating the wrapper. So, instead of:</p>
+xml.process_elements = xml.each_element
-<typing>
-function xml.filters.first(root,pattern)
- for rt,dt,dk in xml.elements(root,pattern)
- return dt and dt[dk], rt, dt, dk
+function xml.process_attributes(root, pattern, handle)
+ local collected = xmlparseapply({ root },pattern)
+ if collected and handle then
+ for c=1,#collected do
+ handle(collected[c].at)
+ end
end
- return nil, nil, nil, nil
+ return collected
end
-</typing>
-<p>We use the function variants in the filters.</p>
+--[[ldx--
+<p>The following functions collect elements and texts.</p>
--ldx]]--
-local wrap, yield = coroutine.wrap, coroutine.yield
+-- are these still needed -> lxml-cmp.lua
-function xml.elements(root,pattern,reverse)
- return wrap(function() traverse(root, lpath(pattern), yield, reverse) end)
+function xml.collect_elements(root, pattern)
+ return xmlparseapply({ root },pattern)
end
-function xml.elements_only(root,pattern,reverse)
- return wrap(function() traverse(root, lpath(pattern), function(r,d,k) yield(d[k]) end, reverse) end)
-end
-
-function xml.each_element(root, pattern, handle, reverse)
- local ok
- traverse(root, lpath(pattern), function(r,d,k) ok = true handle(r,d,k) end, reverse)
- return ok
-end
-
-function xml.process_elements(root, pattern, handle)
- traverse(root, lpath(pattern), function(r,d,k)
- local dkdt = d[k].dt
- if dkdt then
- for i=1,#dkdt do
- local v = dkdt[i]
- if v.tg then handle(v) end
- end
+function xml.collect_texts(root, pattern, flatten) -- todo: variant with handle
+ local collected = xmlparseapply({ root },pattern)
+ if collected and flatten then
+ local xmltostring = xml.tostring
+ for c=1,#collected do
+ collected[c] = xmltostring(collected[c].dt)
end
- end)
+ end
+ return collected or { }
end
-function xml.process_attributes(root, pattern, handle)
- traverse(root, lpath(pattern), function(r,d,k)
- local ek = d[k]
- local a = ek.at or { }
- handle(a)
- if next(a) then -- next is faster than type (and >0 test)
- ek.at = a
- else
- ek.at = nil
+function xml.collect_tags(root, pattern, nonamespace)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collected[c]
+ local ns, tg = e.ns, e.tg
+ if nonamespace then
+ t[#t+1] = tg
+ elseif ns == "" then
+ t[#t+1] = tg
+ else
+ t[#t+1] = ns .. ":" .. tg
+ end
end
- end)
+ return t
+ end
end
--[[ldx--
<p>We've now arrives at the functions that manipulate the tree.</p>
--ldx]]--
+local no_root = { no_root = true }
+
function xml.inject_element(root, pattern, element, prepend)
if root and element then
- local matches, collect = { }, nil
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,no_root)
end
if element then
- collect = function(r,d,k) matches[#matches+1] = { r, d, k, element } end
- traverse(root, lpath(pattern), collect)
- for i=1,#matches do
- local m = matches[i]
- local r, d, k, element, edt = m[1], m[2], m[3], m[4], nil
- if element.ri then
- element = element.dt[element.ri].dt
- else
- element = element.dt
- end
- if r.ri then
- edt = r.dt[r.ri].dt
- else
- edt = d and d[k] and d[k].dt
- end
- if edt then
- local be, af
- if prepend then
- be, af = xml.copy(element), edt
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ local d = r.dt
+ local k = e.ni
+ if element.ri then
+ element = element.dt[element.ri].dt
else
- be, af = edt, xml.copy(element)
- end
- for i=1,#af do
- be[#be+1] = af[i]
+ element = element.dt
end
+ local edt
if r.ri then
- r.dt[r.ri].dt = be
+ edt = r.dt[r.ri].dt
else
- d[k].dt = be
+ edt = d and d[k] and d[k].dt
+ end
+ if edt then
+ local be, af
+ if prepend then
+ be, af = xmlcopy(element), edt
+ else
+ be, af = edt, xmlcopy(element)
+ end
+ for i=1,#af do
+ be[#be+1] = af[i]
+ end
+ if r.ri then
+ r.dt[r.ri].dt = be
+ else
+ d[k].dt = be
+ end
+ else
+ -- r.dt = element.dt -- todo
end
- else
- -- r.dt = element.dt -- todo
end
end
end
@@ -4847,32 +5757,31 @@ function xml.insert_element(root, pattern, element, before) -- todo: element als
else
local matches, collect = { }, nil
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,true)
end
if element and element.ri then
element = element.dt[element.ri]
end
if element then
- collect = function(r,d,k) matches[#matches+1] = { r, d, k, element } end
- traverse(root, lpath(pattern), collect)
- for i=#matches,1,-1 do
- local m = matches[i]
- local r, d, k, element = m[1], m[2], m[3], m[4]
- if not before then k = k + 1 end
- if element.tg then
- insert(d,k,element) -- untested
---~ elseif element.dt then
---~ for _,v in ipairs(element.dt) do -- i added
---~ insert(d,k,v)
---~ k = k + 1
---~ end
---~ end
- else
- local edt = element.dt
- if edt then
- for i=1,#edt do
- insert(d,k,edt[i])
- k = k + 1
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ local r = e.__p__
+ local d = r.dt
+ local k = e.ni
+ if not before then
+ k = k + 1
+ end
+ if element.tg then
+ insert(d,k,element) -- untested
+ else
+ local edt = element.dt
+ if edt then
+ for i=1,#edt do
+ insert(d,k,edt[i])
+ k = k + 1
+ end
end
end
end
@@ -4888,105 +5797,114 @@ xml.inject_element_after = xml.inject_element
xml.inject_element_before = function(r,p,e) xml.inject_element(r,p,e,true) end
function xml.delete_element(root, pattern)
- local matches, deleted = { }, { }
- local collect = function(r,d,k) matches[#matches+1] = { r, d, k } end
- traverse(root, lpath(pattern), collect)
- for i=#matches,1,-1 do
- local m = matches[i]
- deleted[#deleted+1] = remove(m[2],m[3])
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ remove(e.__p__.dt,e.ni)
+ e.ni = nil
+ end
end
- return deleted
+ return collection
end
function xml.replace_element(root, pattern, element)
if type(element) == "string" then
- element = convert(element,true)
+ element = xmlconvert(element,true)
end
if element and element.ri then
element = element.dt[element.ri]
end
if element then
- traverse(root, lpath(pattern), function(rm, d, k)
- d[k] = element.dt -- maybe not clever enough
- end)
- end
-end
-
-local function load_data(name) -- == io.loaddata
- local f, data = io.open(name), ""
- if f then
- data = f:read("*all",'b') -- 'b' ?
- f:close()
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ e.__p__.dt[e.ni] = element.dt -- maybe not clever enough
+ end
+ end
end
- return data
end
-function xml.include(xmldata,pattern,attribute,recursive,loaddata)
+local function include(xmldata,pattern,attribute,recursive,loaddata)
-- parse="text" (default: xml), encoding="" (todo)
-- attribute = attribute or 'href'
pattern = pattern or 'include'
- loaddata = loaddata or load_data
- local function include(r,d,k)
- local ek, name = d[k], nil
- if not attribute or attribute == "" then
+ loaddata = loaddata or io.loaddata
+ local collected = xmlparseapply({ xmldata },pattern)
+ if collected then
+ for c=1,#collected do
+ local ek = collected[c]
+ local name = nil
local ekdt = ek.dt
- name = (type(ekdt) == "table" and ekdt[1]) or ekdt
- end
- if not name then
- if ek.at then
+ local ekat = ek.at
+ local epdt = ek.__p__.dt
+ if not attribute or attribute == "" then
+ name = (type(ekdt) == "table" and ekdt[1]) or ekdt -- ckeck, probably always tab or str
+ end
+ if not name then
for a in gmatch(attribute or "href","([^|]+)") do
- name = ek.at[a]
+ name = ekat[a]
if name then break end
end
end
- end
- local data = (name and name ~= "" and loaddata(name)) or ""
- if data == "" then
- xml.empty(d,k)
- elseif ek.at["parse"] == "text" then -- for the moment hard coded
- d[k] = xml.escaped(data)
- else
- local xi = xml.convert(data)
- if not xi then
- xml.empty(d,k)
+ local data = (name and name ~= "" and loaddata(name)) or ""
+ if data == "" then
+ epdt[ek.ni] = "" -- xml.empty(d,k)
+ elseif ekat["parse"] == "text" then
+ -- for the moment hard coded
+ epdt[ek.ni] = xml.escaped(data) -- d[k] = xml.escaped(data)
else
- if recursive then
- xml.include(xi,pattern,attribute,recursive,loaddata)
+ local settings = xmldata.settings
+ settings.parent_root = xmldata -- to be tested
+ local xi = xmlconvert(data,settings)
+ if not xi then
+ epdt[ek.ni] = "" -- xml.empty(d,k)
+ else
+ if recursive then
+ include(xi,pattern,attribute,recursive,loaddata)
+ end
+ epdt[ek.ni] = xml.body(xi) -- xml.assign(d,k,xi)
end
- xml.assign(d,k,xi)
end
end
end
- xml.each_element(xmldata, pattern, include)
end
+xml.include = include
+
function xml.strip_whitespace(root, pattern, nolines) -- strips all leading and trailing space !
- traverse(root, lpath(pattern), function(r,d,k)
- local dkdt = d[k].dt
- if dkdt then -- can be optimized
- local t = { }
- for i=1,#dkdt do
- local str = dkdt[i]
- if type(str) == "string" then
- if str == "" then
- -- stripped
- else
- if nolines then
- str = gsub(str,"[ \n\r\t]+"," ")
- end
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for i=1,#collected do
+ local e = collected[i]
+ local edt = e.dt
+ if edt then
+ local t = { }
+ for i=1,#edt do
+ local str = edt[i]
+ if type(str) == "string" then
if str == "" then
-- stripped
else
- t[#t+1] = str
+ if nolines then
+ str = gsub(str,"[ \n\r\t]+"," ")
+ end
+ if str == "" then
+ -- stripped
+ else
+ t[#t+1] = str
+ end
end
+ else
+--~ str.ni = i
+ t[#t+1] = str
end
- else
- t[#t+1] = str
end
+ e.dt = t
end
- d[k].dt = t
end
- end)
+ end
end
local function rename_space(root, oldspace, newspace) -- fast variant
@@ -5011,59 +5929,49 @@ end
xml.rename_space = rename_space
function xml.remap_tag(root, pattern, newtg)
- traverse(root, lpath(pattern), function(r,d,k)
- d[k].tg = newtg
- end)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ collected[c].tg = newtg
+ end
+ end
end
+
function xml.remap_namespace(root, pattern, newns)
- traverse(root, lpath(pattern), function(r,d,k)
- d[k].ns = newns
- end)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ collected[c].ns = newns
+ end
+ end
end
+
function xml.check_namespace(root, pattern, newns)
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d[k]
- if (not dk.rn or dk.rn == "") and dk.ns == "" then
- dk.rn = newns
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ if (not e.rn or e.rn == "") and e.ns == "" then
+ e.rn = newns
+ end
end
- end)
-end
-function xml.remap_name(root, pattern, newtg, newns, newrn)
- traverse(root, lpath(pattern), function(r,d,k)
- local dk = d[k]
- dk.tg = newtg
- dk.ns = newns
- dk.rn = newrn
- end)
+ end
end
-function xml.filters.found(root,pattern,check_content)
- local found = false
- traverse(root, lpath(pattern), function(r,d,k)
- if check_content then
- local dk = d and d[k]
- found = dk and dk.dt and next(dk.dt) and true
- else
- found = true
+function xml.remap_name(root, pattern, newtg, newns, newrn)
+ local collected = xmlparseapply({ root },pattern)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ e.tg, e.ns, e.rn = newtg, newns, newrn
end
- return true
- end)
- return found
+ end
end
--[[ldx--
<p>Here are a few synonyms.</p>
--ldx]]--
-xml.filters.position = xml.filters.index
-
-xml.count = xml.filters.count
-xml.index = xml.filters.index
-xml.position = xml.filters.index
-xml.first = xml.filters.first
-xml.last = xml.filters.last
-xml.found = xml.filters.found
-
xml.each = xml.each_element
xml.process = xml.process_element
xml.strip = xml.strip_whitespace
@@ -5077,155 +5985,12 @@ xml.before = xml.insert_element_before
xml.delete = xml.delete_element
xml.replace = xml.replace_element
---[[ldx--
-<p>The following helper functions best belong to the <t>lmxl-ini</t>
-module. Some are here because we need then in the <t>mk</t>
-document and other manuals, others came up when playing with
-this module. Since this module is also used in <l n='mtxrun'/> we've
-put them here instead of loading mode modules there then needed.</p>
---ldx]]--
-
-function xml.gsub(t,old,new)
- local dt = t.dt
- if dt then
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "string" then
- dt[k] = gsub(v,old,new)
- else
- xml.gsub(v,old,new)
- end
- end
- end
-end
-
-function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
- if d and k and d[k-1] and type(d[k-1]) == "string" then
- local s = d[k-1]:match("\n(%s+)")
- xml.gsub(dk,"\n"..rep(" ",#s),"\n")
- end
-end
-
-function xml.serialize_path(root,lpath,handle)
- local dk, r, d, k = xml.first(root,lpath)
- dk = xml.copy(dk)
- xml.strip_leading_spaces(dk,d,k)
- xml.serialize(dk,handle)
-end
-
---~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
---~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-
---~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
---~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
---~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
-
-local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
-
--- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
---
--- 1021:0335:0287:0247
-
--- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
---
--- 1559:0257:0288:0190 (last one suggested by roberto)
-
--- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
--- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
-local normal = (1 - S("<&>"))^0
-local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
-local escaped = Cs(normal * (special * normal)^0)
-
--- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-
--- unescaped = Cs((S("&lt;")/"<" + S("&gt;")/">" + S("&amp;")/"&" + 1)^0)
--- unescaped = Cs((((P("&")/"") * (P("lt")/"<" + P("gt")/">" + P("amp")/"&") * (P(";")/"")) + 1)^0)
-local normal = (1 - S"&")^0
-local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
-local unescaped = Cs(normal * (special * normal)^0)
-
--- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-
-local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
-
-function xml.escaped (str) return escaped :match(str) end
-function xml.unescaped(str) return unescaped:match(str) end
-function xml.cleansed (str) return cleansed :match(str) end
-
-function xml.join(t,separator,lastseparator)
- if #t > 0 then
- local result = { }
- for k,v in pairs(t) do
- result[k] = xml.tostring(v)
- end
- if lastseparator then
- return concat(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result]
- else
- return concat(result,separator)
- end
- else
- return ""
- end
-end
-
-function xml.statistics()
- return {
- lpathcalls = lpathcalls,
- lpathcached = lpathcached,
- }
-end
-
--- xml.set_text_cleanup(xml.show_text_entities)
--- xml.set_text_cleanup(xml.resolve_text_entities)
-
---~ xml.lshow("/../../../a/(b|c)[@d='e']/f")
---~ xml.lshow("/../../../a/!(b|c)[@d='e']/f")
---~ xml.lshow("/../../../a/!b[@d!='e']/f")
-
---~ x = xml.convert([[
---~ <a>
---~ <b n='01'>01</b>
---~ <b n='02'>02</b>
---~ <b n='03'>03</b>
---~ <b n='04'>OK</b>
---~ <b n='05'>05</b>
---~ <b n='06'>06</b>
---~ <b n='07'>ALSO OK</b>
---~ </a>
---~ ]])
-
---~ xml.settrace("lpath",true)
-
---~ xml.xshow(xml.first(x,"b[position() > 2 and position() < 5 and text() == 'ok']"))
---~ xml.xshow(xml.first(x,"b[position() > 2 and position() < 5 and text() == upper('ok')]"))
---~ xml.xshow(xml.first(x,"b[@n=='03' or @n=='08']"))
---~ xml.xshow(xml.all (x,"b[number(@n)>2 and number(@n)<6]"))
---~ xml.xshow(xml.first(x,"b[find(text(),'ALSO')]"))
-
---~ str = [[
---~ <?xml version="1.0" encoding="utf-8"?>
---~ <story line='mojca'>
---~ <windows>my secret</mouse>
---~ </story>
---~ ]]
-
---~ x = xml.convert([[
---~ <a><b n='01'>01</b><b n='02'>02</b><x>xx</x><b n='03'>03</b><b n='04'>OK</b></a>
---~ ]])
---~ xml.xshow(xml.first(x,"b[tag(2) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(1) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(-1) == 'x']"))
---~ xml.xshow(xml.first(x,"b[tag(-2) == 'x']"))
-
---~ print(xml.filter(x,"b/tag(2)"))
---~ print(xml.filter(x,"b/tag(1)"))
-
end -- of closure
do -- create closure to overcome 200 locals limit
-if not modules then modules = { } end modules ['lxml-ent'] = {
+if not modules then modules = { } end modules ['lxml-xml'] = {
version = 1.001,
comment = "this module is the basis for the lxml-* ones",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -5233,457 +5998,249 @@ if not modules then modules = { } end modules ['lxml-ent'] = {
license = "see context related readme files"
}
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, gsub, find = string.format, string.gsub, string.find
-local utfchar = unicode.utf8.char
-
---[[ldx--
-<p>We provide (at least here) two entity handlers. The more extensive
-resolver consults a hash first, tries to convert to <l n='utf'/> next,
-and finaly calls a handler when defines. When this all fails, the
-original entity is returned.</p>
---ldx]]--
+local finalizers = xml.finalizers.xml
+local xmlfilter = xml.filter -- we could inline this one for speed
+local xmltostring = xml.tostring
+local xmlserialize = xml.serialize
-xml.entities = xml.entities or { } -- xml.entity_handler == function
-
-function xml.entity_handler(e)
- return format("[%s]",e)
+local function first(collected)
+ return collected and collected[1]
end
-local function toutf(s)
- return utfchar(tonumber(s,16))
+local function last(collected)
+ return collected and collected[#collected]
end
-local function utfize(root)
- local d = root.dt
- for k=1,#d do
- local dk = d[k]
- if type(dk) == "string" then
- -- test prevents copying if no match
- if find(dk,"&#x.-;") then
- d[k] = gsub(dk,"&#x(.-);",toutf)
- end
- else
- utfize(dk)
- end
- end
+local function all(collected)
+ return collected
end
-xml.utfize = utfize
-
-local function resolve(e) -- hex encoded always first, just to avoid mkii fallbacks
- if find(e,"^#x") then
- return utfchar(tonumber(e:sub(3),16))
- elseif find(e,"^#") then
- return utfchar(tonumber(e:sub(2)))
- else
- local ee = xml.entities[e] -- we cannot shortcut this one (is reloaded)
- if ee then
- return ee
- else
- local h = xml.entity_handler
- return (h and h(e)) or "&" .. e .. ";"
+local function reverse(collected)
+ if collected then
+ local reversed = { }
+ for c=#collected,1,-1 do
+ reversed[#reversed+1] = collected[c]
end
+ return reversed
end
end
-local function resolve_entities(root)
- if not root.special or root.tg == "@rt@" then
- local d = root.dt
- for k=1,#d do
- local dk = d[k]
- if type(dk) == "string" then
- if find(dk,"&.-;") then
- d[k] = gsub(dk,"&(.-);",resolve)
- end
- else
- resolve_entities(dk)
- end
- end
- end
+local function attribute(collected,name)
+ local at = collected and collected[1].at
+ return at and at[name]
end
-xml.resolve_entities = resolve_entities
+local function att(id,name)
+ local at = id.at
+ return at and at[name]
+end
-function xml.utfize_text(str)
- if find(str,"&#") then
- return (gsub(str,"&#x(.-);",toutf))
- else
- return str
- end
+local function count(collected)
+ return (collected and #collected) or 0
end
-function xml.resolve_text_entities(str) -- maybe an lpeg. maybe resolve inline
- if find(str,"&") then
- return (gsub(str,"&(.-);",resolve))
- else
- return str
+local function position(collected,n)
+ if collected then
+ n = tonumber(n) or 0
+ if n < 0 then
+ return collected[#collected + n + 1]
+ else
+ return collected[n]
+ end
end
end
-function xml.show_text_entities(str)
- if find(str,"&") then
- return (gsub(str,"&(.-);","[%1]"))
- else
- return str
+local function index(collected)
+ if collected then
+ return collected[1].ni
end
end
--- experimental, this will be done differently
-
-function xml.merge_entities(root)
- local documententities = root.entities
- local allentities = xml.entities
- if documententities then
- for k, v in next, documententities do
- allentities[k] = v
+local function attributes(collected,arguments)
+ if collected then
+ local at = collected[1].at
+ if arguments then
+ return at[arguments]
+ elseif next(at) then
+ return at -- all of them
end
end
end
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
-if not modules then modules = { } end modules ['lxml-mis'] = {
- version = 1.001,
- comment = "this module is the basis for the lxml-* ones",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
-local concat = table.concat
-local type, next, tonumber, tostring, setmetatable, loadstring = type, next, tonumber, tostring, setmetatable, loadstring
-local format, gsub = string.format, string.gsub
-
---[[ldx--
-<p>The following helper functions best belong to the <t>lmxl-ini</t>
-module. Some are here because we need then in the <t>mk</t>
-document and other manuals, others came up when playing with
-this module. Since this module is also used in <l n='mtxrun'/> we've
-put them here instead of loading mode modules there then needed.</p>
---ldx]]--
-
-function xml.gsub(t,old,new)
- local dt = t.dt
- if dt then
- for k=1,#dt do
- local v = dt[k]
- if type(v) == "string" then
- dt[k] = gsub(v,old,new)
+local function chainattribute(collected,arguments) -- todo: optional levels
+ if collected then
+ local e = collected[1]
+ while e do
+ local at = e.at
+ if at then
+ local a = at[arguments]
+ if a then
+ return a
+ end
else
- xml.gsub(v,old,new)
+ break -- error
end
+ e = e.__p__
end
end
+ return ""
end
-function xml.strip_leading_spaces(dk,d,k) -- cosmetic, for manual
- if d and k and d[k-1] and type(d[k-1]) == "string" then
- local s = d[k-1]:match("\n(%s+)")
- xml.gsub(dk,"\n"..string.rep(" ",#s),"\n")
+local function text(collected)
+ if collected then
+ return xmltostring(collected[1]) -- only first as we cannot concat function
+ else
+ return ""
end
end
-function xml.serialize_path(root,lpath,handle)
- local dk, r, d, k = xml.first(root,lpath)
- dk = xml.copy(dk)
- xml.strip_leading_spaces(dk,d,k)
- xml.serialize(dk,handle)
-end
-
---~ xml.escapes = { ['&'] = '&amp;', ['<'] = '&lt;', ['>'] = '&gt;', ['"'] = '&quot;' }
---~ xml.unescapes = { } for k,v in pairs(xml.escapes) do xml.unescapes[v] = k end
-
---~ function xml.escaped (str) return (gsub(str,"(.)" , xml.escapes )) end
---~ function xml.unescaped(str) return (gsub(str,"(&.-;)", xml.unescapes)) end
---~ function xml.cleansed (str) return (gsub(str,"<.->" , '' )) end -- "%b<>"
-
-local P, S, R, C, V, Cc, Cs = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.V, lpeg.Cc, lpeg.Cs
-
--- 100 * 2500 * "oeps< oeps> oeps&" : gsub:lpeg|lpeg|lpeg
---
--- 1021:0335:0287:0247
-
--- 10 * 1000 * "oeps< oeps> oeps& asfjhalskfjh alskfjh alskfjh alskfjh ;al J;LSFDJ"
---
--- 1559:0257:0288:0190 (last one suggested by roberto)
-
--- escaped = Cs((S("<&>") / xml.escapes + 1)^0)
--- escaped = Cs((S("<")/"&lt;" + S(">")/"&gt;" + S("&")/"&amp;" + 1)^0)
-local normal = (1 - S("<&>"))^0
-local special = P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;"
-local escaped = Cs(normal * (special * normal)^0)
-
--- 100 * 1000 * "oeps&lt; oeps&gt; oeps&amp;" : gsub:lpeg == 0153:0280:0151:0080 (last one by roberto)
-
--- unescaped = Cs((S("&lt;")/"<" + S("&gt;")/">" + S("&amp;")/"&" + 1)^0)
--- unescaped = Cs((((P("&")/"") * (P("lt")/"<" + P("gt")/">" + P("amp")/"&") * (P(";")/"")) + 1)^0)
-local normal = (1 - S"&")^0
-local special = P("&lt;")/"<" + P("&gt;")/">" + P("&amp;")/"&"
-local unescaped = Cs(normal * (special * normal)^0)
-
--- 100 * 5000 * "oeps <oeps bla='oeps' foo='bar'> oeps </oeps> oeps " : gsub:lpeg == 623:501 msec (short tags, less difference)
-
-local cleansed = Cs(((P("<") * (1-P(">"))^0 * P(">"))/"" + 1)^0)
-
-xml.escaped_pattern = escaped
-xml.unescaped_pattern = unescaped
-xml.cleansed_pattern = cleansed
-
-function xml.escaped (str) return escaped :match(str) end
-function xml.unescaped(str) return unescaped:match(str) end
-function xml.cleansed (str) return cleansed :match(str) end
-
-function xml.join(t,separator,lastseparator)
- if #t > 0 then
- local result = { }
- for k,v in pairs(t) do
- result[k] = xml.tostring(v)
- end
- if lastseparator then
- return concat(result,separator or "",1,#result-1) .. (lastseparator or "") .. result[#result]
- else
- return concat(result,separator)
+local function texts(collected)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collection[c]
+ if e and e.dt then
+ t[#t+1] = e.dt
+ end
end
- else
- return ""
+ return t
end
end
-
-end -- of closure
-
-do -- create closure to overcome 200 locals limit
-
-if not modules then modules = { } end modules ['trac-tra'] = {
- version = 1.001,
- comment = "companion to trac-tra.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
--- the <anonymous> tag is kind of generic and used for functions that are not
--- bound to a variable, like node.new, node.copy etc (contrary to for instance
--- node.has_attribute which is bound to a has_attribute local variable in mkiv)
-
-debugger = debugger or { }
-
-local counters = { }
-local names = { }
-local getinfo = debug.getinfo
-local format, find, lower, gmatch = string.format, string.find, string.lower, string.gmatch
-
--- one
-
-local function hook()
- local f = getinfo(2,"f").func
- local n = getinfo(2,"Sn")
--- if n.what == "C" and n.name then print (n.namewhat .. ': ' .. n.name) end
- if f then
- local cf = counters[f]
- if cf == nil then
- counters[f] = 1
- names[f] = n
+local function tag(collected,n)
+ if collected then
+ local c
+ if n == 0 or not n then
+ c = collected[1]
+ elseif n > 1 then
+ c = collected[n]
else
- counters[f] = cf + 1
+ c = collected[#collected-n+1]
end
+ return c and c.tg
end
end
-local function getname(func)
- local n = names[func]
- if n then
- if n.what == "C" then
- return n.name or '<anonymous>'
+
+local function name(collected,n)
+ if collected then
+ local c
+ if n == 0 or not n then
+ c = collected[1]
+ elseif n > 1 then
+ c = collected[n]
else
- -- source short_src linedefined what name namewhat nups func
- local name = n.name or n.namewhat or n.what
- if not name or name == "" then name = "?" end
- return format("%s : %s : %s", n.short_src or "unknown source", n.linedefined or "--", name)
+ c = collected[#collected-n+1]
end
- else
- return "unknown"
- end
-end
-function debugger.showstats(printer,threshold)
- printer = printer or texio.write or print
- threshold = threshold or 0
- local total, grandtotal, functions = 0, 0, 0
- printer("\n") -- ugly but ok
- -- table.sort(counters)
- for func, count in pairs(counters) do
- if count > threshold then
- local name = getname(func)
- if not name:find("for generator") then
- printer(format("%8i %s", count, name))
- total = total + count
+ if c then
+ if c.ns == "" then
+ return c.tg
+ else
+ return c.ns .. ":" .. c.tg
end
end
- grandtotal = grandtotal + count
- functions = functions + 1
end
- printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
end
--- two
-
---~ local function hook()
---~ local n = getinfo(2)
---~ if n.what=="C" and not n.name then
---~ local f = tostring(debug.traceback())
---~ local cf = counters[f]
---~ if cf == nil then
---~ counters[f] = 1
---~ names[f] = n
---~ else
---~ counters[f] = cf + 1
---~ end
---~ end
---~ end
---~ function debugger.showstats(printer,threshold)
---~ printer = printer or texio.write or print
---~ threshold = threshold or 0
---~ local total, grandtotal, functions = 0, 0, 0
---~ printer("\n") -- ugly but ok
---~ -- table.sort(counters)
---~ for func, count in pairs(counters) do
---~ if count > threshold then
---~ printer(format("%8i %s", count, func))
---~ total = total + count
---~ end
---~ grandtotal = grandtotal + count
---~ functions = functions + 1
---~ end
---~ printer(format("functions: %s, total: %s, grand total: %s, threshold: %s\n", functions, total, grandtotal, threshold))
---~ end
-
--- rest
-
-function debugger.savestats(filename,threshold)
- local f = io.open(filename,'w')
- if f then
- debugger.showstats(function(str) f:write(str) end,threshold)
- f:close()
+local function tags(collected,nonamespace)
+ if collected then
+ local t = { }
+ for c=1,#collected do
+ local e = collected[c]
+ local ns, tg = e.ns, e.tg
+ if nonamespace or ns == "" then
+ t[#t+1] = tg
+ else
+ t[#t+1] = ns .. ":" .. tg
+ end
+ end
+ return t
end
end
-function debugger.enable()
- debug.sethook(hook,"c")
-end
-
-function debugger.disable()
- debug.sethook()
---~ counters[debug.getinfo(2,"f").func] = nil
-end
-
-function debugger.tracing()
- local n = tonumber(os.env['MTX.TRACE.CALLS']) or tonumber(os.env['MTX_TRACE_CALLS']) or 0
- if n > 0 then
- function debugger.tracing() return true end ; return true
- else
- function debugger.tracing() return false end ; return false
+local function empty(collected)
+ if collected then
+ for c=1,#collected do
+ local e = collected[c]
+ if e then
+ local edt = e.dt
+ if edt then
+ local n = #edt
+ if n == 1 then
+ local edk = edt[1]
+ local typ = type(edk)
+ if typ == "table" then
+ return false
+ elseif edk ~= "" then -- maybe an extra tester for spacing only
+ return false
+ end
+ elseif n > 1 then
+ return false
+ end
+ end
+ end
+ end
end
+ return true
end
---~ debugger.enable()
-
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
---~ print(math.sin(1*.5))
-
---~ debugger.disable()
-
---~ print("")
---~ debugger.showstats()
---~ print("")
---~ debugger.showstats(print,3)
+finalizers.first = first
+finalizers.last = last
+finalizers.all = all
+finalizers.reverse = reverse
+finalizers.elements = all
+finalizers.default = all
+finalizers.attribute = attribute
+finalizers.att = att
+finalizers.count = count
+finalizers.position = position
+finalizers.index = index
+finalizers.attributes = attributes
+finalizers.chainattribute = chainattribute
+finalizers.text = text
+finalizers.texts = texts
+finalizers.tag = tag
+finalizers.name = name
+finalizers.tags = tags
+finalizers.empty = empty
-trackers = trackers or { }
+-- shortcuts -- we could support xmlfilter(id,pattern,first)
-local data, done = { }, { }
+function xml.first(id,pattern)
+ return first(xmlfilter(id,pattern))
+end
-local function set(what,value)
- if type(what) == "string" then
- what = aux.settings_to_array(what)
- end
- for i=1,#what do
- local w = what[i]
- for d, f in next, data do
- if done[d] then
- -- prevent recursion due to wildcards
- elseif find(d,w) then
- done[d] = true
- for i=1,#f do
- f[i](value)
- end
- end
- end
- end
+function xml.last(id,pattern)
+ return last(xmlfilter(id,pattern))
end
-local function reset()
- for d, f in next, data do
- for i=1,#f do
- f[i](false)
- end
- end
+function xml.count(id,pattern)
+ return count(xmlfilter(id,pattern))
end
-function trackers.register(what,...)
- what = lower(what)
- local w = data[what]
- if not w then
- w = { }
- data[what] = w
- end
- for _, fnc in next, { ... } do
- local typ = type(fnc)
- if typ == "function" then
- w[#w+1] = fnc
- elseif typ == "string" then
- w[#w+1] = function(value) set(fnc,value,nesting) end
- end
- end
+function xml.attribute(id,pattern,a,default)
+ return attribute(xmlfilter(id,pattern),a,default)
end
-function trackers.enable(what)
- done = { }
- set(what,true)
+function xml.text(id,pattern)
+ return text(xmlfilter(id,pattern))
end
-function trackers.disable(what)
- done = { }
- if not what or what == "" then
- trackers.reset(what)
- else
- set(what,false)
- end
+function xml.raw(id,pattern)
+ return xmlserialize(xmlfilter(id,pattern))
end
-function trackers.reset(what)
- done = { }
- reset()
+function xml.position(id,pattern,n)
+ return position(xmlfilter(id,pattern),n)
end
-function trackers.list() -- pattern
- local list = table.sortedkeys(data)
- local user, system = { }, { }
- for l=1,#list do
- local what = list[l]
- if find(what,"^%*") then
- system[#system+1] = what
- else
- user[#user+1] = what
- end
- end
- return user, system
+function xml.empty(id,pattern)
+ return empty(xmlfilter(id,pattern))
end
+xml.all = xml.filter
+xml.index = xml.position
+xml.found = xml.filter
+
end -- of closure
@@ -6135,6 +6692,7 @@ function statistics.timed(action,report)
end
+
end -- of closure
do -- create closure to overcome 200 locals limit
@@ -9814,11 +10372,13 @@ own.libs = { -- todo: check which ones are really needed
'l-utils.lua',
'l-aux.lua',
-- 'l-xml.lua',
+ 'trac-tra.lua',
'lxml-tab.lua',
- 'lxml-pth.lua',
+ 'lxml-lpt.lua',
'lxml-ent.lua',
'lxml-mis.lua',
- 'trac-tra.lua',
+ 'lxml-aux.lua',
+ 'lxml-xml.lua',
'luat-env.lua',
'trac-inf.lua',
'trac-log.lua',
@@ -9889,7 +10449,7 @@ if not resolvers then
os.exit()
end
-logs.setprogram('MTXrun',"TDS Runner Tool 1.22",environment.arguments["verbose"] or false)
+logs.setprogram('MTXrun',"TDS Runner Tool 1.23",environment.arguments["verbose"] or false)
local instance = resolvers.reset()
diff --git a/tex/context/base/cont-new.tex b/tex/context/base/cont-new.tex
index 012a9c552..4798e31bc 100644
--- a/tex/context/base/cont-new.tex
+++ b/tex/context/base/cont-new.tex
@@ -11,7 +11,7 @@
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.
-\newcontextversion{2009.10.16 16:13}
+\newcontextversion{2009.10.18 15:20}
%D This file is loaded at runtime, thereby providing an
%D excellent place for hacks, patches, extensions and new
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index bd2bbe504..1a060d04f 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -241,6 +241,7 @@
\loadmarkfile{font-tra}
\loadmarkfile{font-uni}
\loadmarkfile{font-col}
+\loadmarkfile{font-gds}
\loadmarkfile{typo-spa}
\loadmarkfile{typo-krn}
diff --git a/tex/context/base/context.tex b/tex/context/base/context.tex
index 3f87e7a12..da8ad455b 100644
--- a/tex/context/base/context.tex
+++ b/tex/context/base/context.tex
@@ -20,7 +20,7 @@
%D your styles an modules.
\edef\contextformat {\jobname}
-\edef\contextversion{2009.10.16 16:13}
+\edef\contextversion{2009.10.18 15:20}
%D For those who want to use this:
diff --git a/tex/context/base/font-ctx.lua b/tex/context/base/font-ctx.lua
index de1422454..f3b7879fb 100644
--- a/tex/context/base/font-ctx.lua
+++ b/tex/context/base/font-ctx.lua
@@ -10,7 +10,7 @@ if not modules then modules = { } end modules ['font-ctx'] = {
local texsprint, count = tex.sprint, tex.count
local format, concat, gmatch, match, find, lower = string.format, table.concat, string.gmatch, string.match, string.find, string.lower
-local tostring, next = tostring, next
+local tostring, next, type = tostring, next, type
local ctxcatcodes = tex.ctxcatcodes
@@ -68,22 +68,25 @@ local settings_to_hash = aux.settings_to_hash
local default_features = fonts.otf.features.default
local function preset_context(name,parent,features) -- currently otf only
+ if features == "" and find(parent,"=") then
+ features = parent
+ parent = ""
+ end
if features == "" then
- if find(parent,"=") then
- features = parent
- parent = ""
- end
+ features = { }
+ elseif type(features) == "string" then
+ features = normalize_meanings(settings_to_hash(features))
+ else
+ features = normalize_meanings(features)
end
- local number = (setups[name] and setups[name].number) or 0
- local t = (features == "" and { }) or normalize_meanings(settings_to_hash(features))
-- todo: synonyms, and not otf bound
if parent ~= "" then
for p in gmatch(parent,"[^, ]+") do
local s = setups[p]
if s then
for k,v in next, s do
- if t[k] == nil then
- t[k] = v
+ if features[k] == nil then
+ features[k] = v
end
end
end
@@ -93,25 +96,26 @@ local function preset_context(name,parent,features) -- currently otf only
-- we need to preset them (we hash the features and adding a default
-- setting during initialization may result in a different hash)
for k,v in next, triggers do
- if type(t[v]) == "nil" then
+ if features[v] == nil then -- not false !
local vv = default_features[v]
- if vv then t[v] = vv end
+ if vv then features[v] = vv end
end
end
-- sparse 'm so that we get a better hash and less test (experimental
-- optimization)
- local tt = { } -- maybe avoid tt
- for k,v in next, t do
- if v then tt[k] = v end
+ local t = { } -- can we avoid t ?
+ for k,v in next, features do
+ if v then t[k] = v end
end
-- needed for dynamic features
+ local number = (setups[name] and setups[name].number) or 0
if number == 0 then
number = #numbers + 1
numbers[number] = name
end
- tt.number = number
- setups[name] = tt
- return number
+ t.number = number
+ setups[name] = t
+ return number, t
end
local function context_number(name) -- will be replaced
diff --git a/tex/context/base/font-def.lua b/tex/context/base/font-def.lua
index 9c9a67178..bc09d0f2e 100644
--- a/tex/context/base/font-def.lua
+++ b/tex/context/base/font-def.lua
@@ -9,7 +9,8 @@ if not modules then modules = { } end modules ['font-def'] = {
local format, concat, gmatch, match, find, lower = string.format, table.concat, string.gmatch, string.match, string.find, string.lower
local tostring, next = tostring, next
-local trace_defining = false trackers.register("fonts.defining", function(v) trace_defining = v end)
+local trace_defining = false trackers .register("fonts.defining", function(v) trace_defining = v end)
+local directive_embedall = false directives.register("fonts.embedall", function(v) directive_embedall = v end)
trackers.register("fonts.loading", "fonts.defining", "otf.loading", "afm.loading", "tfm.loading")
trackers.register("fonts.all", "fonts.*", "otf.*", "afm.*", "tfm.*")
@@ -283,7 +284,9 @@ function tfm.read(specification)
end
end
if tfmtable then
- if tfmtable.filename and fonts.dontembed[tfmtable.filename] then
+ if directive_embedall then
+ tfmtable.embedding = "full"
+ elseif tfmtable.filename and fonts.dontembed[tfmtable.filename] then
tfmtable.embedding = "no"
else
tfmtable.embedding = "subset"
diff --git a/tex/context/base/font-gds.lua b/tex/context/base/font-gds.lua
new file mode 100644
index 000000000..95623df1c
--- /dev/null
+++ b/tex/context/base/font-gds.lua
@@ -0,0 +1,349 @@
+if not modules then modules = { } end modules ['font-gds'] = {
+ version = 1.000,
+ comment = "companion to font-gds.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local flattened= table.flattened
+local type, next = type, next
+local gmatch = string.gmatch
+
+-- goodies=name,colorscheme=,featureset=
+
+-- goodies
+
+fonts.goodies = fonts.goodies or { }
+fonts.goodies.data = fonts.goodies.data or { }
+fonts.goodies.list = fonts.goodies.list or { }
+
+local data = fonts.goodies.data
+local list = fonts.goodies.list
+
+local function getgoodies(filename) -- maybe a merge is better
+ local goodies = data[filename]
+ if goodies ~= nil then
+ -- found or tagged unfound
+ elseif type(filename) == "string" then
+ local fullname = resolvers.find_file(file.addsuffix(filename,"lfg")) or "" -- prefered suffix
+ if fullname == "" then
+ fullname = resolvers.find_file(file.addsuffix(filename,"lua")) or "" -- fallback suffix
+ end
+ if fullname == "" then
+ logs.report("fonts", "goodie file '%s.lfg' is not found",filename)
+ data[filename] = false -- signal for not found
+ else
+ goodies = dofile(fullname) or false
+ data[filename] = goodies
+ end
+ end
+ return goodies
+end
+
+function fonts.goodies.register(name,fnc)
+ list[name] = fnc
+end
+
+fonts.goodies.get = getgoodies
+
+-- register goodies file
+
+local preset_context = fonts.define.specify.preset_context
+
+function fonts.initializers.common.goodies(tfmdata,value)
+ local goodies = { }
+ for filename in gmatch(value,"[^, ]+") do
+ local ok = getgoodies(filename)
+ if ok then
+ goodies[#goodies+1] = ok
+ if not ok.initialized then
+ for name, fnc in next, list do
+ fnc(ok,tfmdata)
+ end
+ ok.initialized = true
+ end
+ end
+ end
+ tfmdata.goodies = goodies -- shared ?
+end
+
+-- featuresets
+
+local function initialize(goodies,tfmdata)
+ local featuresets = goodies.featuresets
+ local goodiesname = goodies.name
+ if featuresets then
+ for name,set in next, featuresets do
+ local ff = flattened(set)
+ local n, s = preset_context(goodiesname .. "::" .. name,"",ff)
+ featuresets[name] = s -- set
+ end
+ end
+end
+
+fonts.goodies.register("featureset",initialize)
+
+function fonts.initializers.common.featureset(tfmdata,set)
+ local goodies = tfmdata.goodies -- shared ?
+ if goodies then
+ local features = tfmdata.shared.features
+ local what
+ for i=1,#goodies do
+ -- last one counts
+ local g = goodies[i]
+ what = (g.featuresets and g.featuresets[set]) or what
+ end
+ if what then
+ for feature, value in next, what do
+ if features[feature] == nil then
+ features[feature] = value
+ end
+ end
+ tfmdata.mode = features.mode or tfmdata.mode
+ end
+ end
+end
+
+-- colorschemes
+
+fonts.goodies.colorschemes = fonts.goodies.colorschemes or { }
+fonts.goodies.colorschemes.data = fonts.goodies.colorschemes.data or { }
+
+local colorschemes = fonts.goodies.colorschemes
+
+function fonts.initializers.common.colorscheme(tfmdata,scheme)
+ if type(scheme) == "string" then
+ local goodies = tfmdata.goodies
+ -- todo : check for already defined in shared
+ if goodies then
+ local what
+ for i=1,#goodies do
+ -- last one counts
+ local g = goodies[i]
+ what = (g.colorschemes and g.colorschemes[scheme]) or what
+ end
+ if what then
+ -- this is font bound but we can share them if needed
+ -- just as we could hash the conversions (per font)
+ local hash, reverse = tfmdata.luatex.unicodes, { }
+ for i=1,#what do
+ local w = what[i]
+ for j=1,#w do
+ local name = w[j]
+ local unicode = hash[name]
+ if unicode then
+ reverse[unicode] = i
+ end
+ end
+ end
+ tfmdata.colorscheme = reverse
+ return
+ end
+ end
+ end
+ tfmdata.colorscheme = false
+end
+
+local fontdata = fonts.ids
+local fcs = fonts.color.set
+local has_attribute = node.has_attribute
+local traverse_id = node.traverse_id
+local a_colorscheme = attributes.private('colorscheme')
+local glyph = node.id("glyph")
+
+function fonts.goodies.colorschemes.coloring(head)
+ local lastfont, lastscheme
+ for n in traverse_id(glyph,head) do
+ local a = has_attribute(n,a_colorscheme)
+ if a then
+ local f = n.font
+ if f ~= lastfont then
+ lastscheme, lastfont = fontdata[f].colorscheme, f
+ end
+ if lastscheme then
+ local sc = lastscheme[n.char]
+ if sc then
+ fcs(n,"colorscheme:"..a..":"..sc) -- slow
+ end
+ end
+ end
+ end
+end
+
+function fonts.goodies.colorschemes.enable()
+ tasks.appendaction("processors","fonts","fonts.goodies.colorschemes.coloring")
+ function fonts.goodies.colorschemes.enable() end
+end
+
+-- installation (collected to keep the overview)
+
+fonts.otf.tables.features['goodies'] = 'Goodies on top of built in features'
+fonts.otf.tables.features['featurset'] = 'Goodie Feature Set'
+fonts.otf.tables.features['colorscheme'] = 'Goodie Color Scheme'
+
+fonts.otf.features.register('goodies')
+fonts.otf.features.register('featureset')
+fonts.otf.features.register('colorscheme')
+
+table.insert(fonts.triggers, 1, "goodies")
+table.insert(fonts.triggers, 2, "featureset") -- insert after
+table.insert(fonts.triggers, "colorscheme")
+
+fonts.initializers.base.otf.goodies = fonts.initializers.common.goodies
+fonts.initializers.node.otf.goodies = fonts.initializers.common.goodies
+
+fonts.initializers.base.otf.featureset = fonts.initializers.common.featureset
+fonts.initializers.node.otf.featureset = fonts.initializers.common.featureset
+
+fonts.initializers.base.otf.colorscheme = fonts.initializers.common.colorscheme
+fonts.initializers.node.otf.colorscheme = fonts.initializers.common.colorscheme
+
+-- The following file (husayni.lfg) is the experimental setup that we used
+-- for Idris font. For the moment we don't store this in the cache and quite
+-- probably these files sit in one of the paths:
+--
+-- tex/context/fonts/goodies
+-- tex/fonts/goodies/context
+-- tex/fonts/data/foundry/collection
+
+--~ local yes = "yes", "node"
+
+--~ local basics = {
+--~ analyze = yes,
+--~ mode = "node",
+--~ language = "dflt",
+--~ script = "arab",
+--~ }
+
+--~ local analysis = {
+--~ ccmp = yes,
+--~ init = yes, medi = yes, fina = yes,
+--~ }
+
+--~ local regular = {
+--~ rlig = yes, calt = yes, salt = yes, anum = yes,
+--~ ss01 = yes, ss03 = yes, ss07 = yes, ss10 = yes, ss12 = yes, ss15 = yes, ss16 = yes,
+--~ ss19 = yes, ss24 = yes, ss25 = yes, ss26 = yes, ss27 = yes, ss31 = yes, ss34 = yes,
+--~ ss35 = yes, ss36 = yes, ss37 = yes, ss38 = yes, ss41 = yes, ss42 = yes, ss43 = yes,
+--~ js16 = yes,
+--~ }
+
+--~ local positioning = {
+--~ kern = yes, curs = yes, mark = yes, mkmk = yes,
+--~ }
+
+--~ return {
+--~ name = "husayni",
+--~ version = "1.00",
+--~ comment = "Goodies that complement the Husayni font by Idris Samawi Hamid.",
+--~ author = "Idris Samawi Hamid and Hans Hagen",
+--~ featuresets = {
+--~ default = {
+--~ basics, analysis, regular, positioning, -- xxxx = yes, yyyy = 2,
+--~ },
+--~ },
+--~ stylistics = {
+--~ ss01 = "Allah, Muhammad",
+--~ ss02 = "ss01 + Allah_final",
+--~ ss03 = "level-1 stack over Jiim, initial entry only",
+--~ ss04 = "level-1 stack over Jiim, initial/medial entry",
+--~ ss05 = "multi-level Jiim stacking, initial/medial entry",
+--~ ss06 = "aesthetic Faa/Qaaf for FJ_mm, FJ_mf connection",
+--~ ss07 = "initial-entry stacking over Haa",
+--~ ss08 = "initial/medial stacking over Haa, minus HM_mf strings",
+--~ ss09 = "initial/medial Haa stacking plus HM_mf strings",
+--~ ss10 = "basic dipped Miim, initial-entry B_S-stack over Miim",
+--~ ss11 = "full dipped Miim, initial-entry B_S-stack over Miim",
+--~ ss12 = "XBM_im initial-medial entry B_S-stack over Miim",
+--~ ss13 = "full initial-medial entry B_S-stacked Miim",
+--~ ss14 = "initial entry, stacked Laam on Miim",
+--~ ss15 = "full stacked Laam-on-Miim",
+--~ ss16 = "initial entry, stacked Ayn-on-Miim",
+--~ ss17 = "full stacked Ayn-on-Miim",
+--~ ss18 = "LMJ_im already contained in ss03--05, may remove",
+--~ ss19 = "LM_im",
+--~ ss20 = "KLM_m, sloped Miim",
+--~ ss21 = "KLM_i_mm/LM_mm, sloped Miim",
+--~ ss22 = "filled sloped Miim",
+--~ ss23 = "LM_mm, non-sloped Miim",
+--~ ss24 = "BR_i_mf, BN_i_mf",
+--~ ss25 = "basic LH_im might merge with ss24",
+--~ ss26 = "full Yaa.final special strings: BY_if, BY_mf, LY_mf",
+--~ ss27 = "basic thin Miim.final",
+--~ ss28 = "full thin Miim.final to be moved to jsnn",
+--~ ss29 = "basic short Miim.final",
+--~ ss30 = "full short Miim.final to be moved to jsnn",
+--~ ss31 = "basic Raa.final strings: JR and SR",
+--~ ss32 = "basic Raa.final strings: JR, SR, and BR",
+--~ ss33 = "TtR to be moved to jsnn",
+--~ ss34 = "AyR style also available in jsnn",
+--~ ss35 = "full Kaaf contexts",
+--~ ss36 = "full Laam contexts",
+--~ ss37 = "Miim-Miim contexts",
+--~ ss38 = "basic dipped Haa, B_SH_mm",
+--~ ss39 = "full dipped Haa, B_S_LH_i_mm_Mf",
+--~ ss40 = "aesthetic dipped medial Haa",
+--~ ss41 = "high and low Baa strings",
+--~ ss42 = "diagonal entry",
+--~ ss43 = "initial alternates",
+--~ ss44 = "hooked final alif",
+--~ ss45 = "BMA_f",
+--~ ss46 = "BM_mm_alt, for JBM combinations",
+--~ ss47 = "Shaddah-<kasrah> combo",
+--~ ss48 = "Auto-sukuun",
+--~ ss49 = "No vowels",
+--~ ss50 = "Shaddah/MaaddahHamzah only",
+--~ ss51 = "No Skuun",
+--~ ss52 = "No Waslah",
+--~ ss53 = "No Waslah",
+--~ ss54 = "chopped finals",
+--~ ss55 = "idgham-tanwin",
+--~ js01 = "Raawide",
+--~ js02 = "Yaawide",
+--~ js03 = "Kaafwide",
+--~ js04 = "Nuunwide",
+--~ js05 = "Kaafwide Nuunwide Siinwide Baawide",
+--~ js06 = "final Haa wide",
+--~ js07 = "thin Miim",
+--~ js08 = "short Miim",
+--~ js09 = "wide Siin",
+--~ js10 = "thuluth-style initial Haa, final Miim, MRw_mf",
+--~ js11 = "level-1 stretching",
+--~ js12 = "level-2 stretching",
+--~ js13 = "level-3 stretching",
+--~ js14 = "final Alif",
+--~ js15 = "hooked final Alif",
+--~ js16 = "aesthetic medial Faa/Qaaf",
+--~ js17 = "fancy isol Haa after Daal, Raa, and Waaw",
+--~ js18 = "Laamwide, alternate substitution",
+--~ js19 = "level-4 stretching, only siin and Hhaa for basmalah",
+--~ js20 = "level-5 stretching, only siin and Hhaa for basmalah",
+--~ js21 = "Haa.final_alt2",
+--~ },
+--~ colorschemes = {
+--~ default = {
+--~ [1] = {
+--~ "Onedotabove", "Onedotbelow", "Twodotsabove", "Twodotsbelow", "Threedotsabove", "Twodotsabove.vrt", "Twodotsbelow.vrt", "Twodotsabove.KBA", "Threedotsabove.KBA", "Threedotsbelowinv",
+--~ },
+--~ [2] = {
+--~ "Fathah", "Dammah", "Kasrah", "FathahVertical", "DammahInverted", "KasrahVertical", "FathahVertical.alt1", "KasrahVertical.alt1", "FathahTanwiin", "DammahTanwiin", "KasrahTanwiin", "Shaddah", "Sukuun", "MaaddahHamzah", "Jazm", "Maaddah", "DammahTanwiin_alt2", "DammahTanwiin_alt1", "FathahTanwiin_alt1", "KasrahTanwiin_alt1", "Fathah.mkmk", "Dammah.mkmk", "Kasrah.mkmk", "FathahVertical.mkmk", "DammahInverted.mkmk", "KasrahVertical.mkmk", "FathahTanwiin.mkmk", "DammahTanwiin.mkmk", "KasrahTanwiin.mkmk", "DammahTanwiin_alt1.mkmk",
+--~ },
+--~ [3] = {
+--~ "Ttaa.waqf", "SsLY.waqf", "QLY.waqf", "Miim.waqf", "LA.waqf", "Jiim.waqf", "Threedotsabove.waqf", "Siin.waqf", "Ssaad.waqf", "Qaaf.waqf", "SsL.waqf", "QF.waqf", "SKTH.waqf", "WQFH.waqf", "Kaaf.waqf", "Ayn.ruku",
+--~ },
+--~ [4] = {
+--~ "Hamzah","Hamzahabove", "Hamzahbelow", "MaaddahHamzah.identity", "Waslah",
+--~ },
+--~ [5] = {
+--~ "Waawsmall", "Yaasmall", "FathahVertical.alt2", "Waawsmall.isol", "Yaasmall.isol", "FathahVertical.isol",
+--~ },
+--~ [6] = {
+--~ "Miim.nuun_high", "Siin.Ssaad", "Nuunsmall", "emptydot_low", "emptydot_high", "Sifr.fill", "Miim.nuun_low", "Nuun.tanwiin",
+--~ },
+--~ [7] = {
+--~ "Ayah", "Yaasmall", "Ayah.alt1", "Ayah.alt2", "Ayah.alt3", "Ayah2",
+--~ }
+--~ }
+--~ }
+--~ }
diff --git a/tex/context/base/font-gds.mkiv b/tex/context/base/font-gds.mkiv
new file mode 100644
index 000000000..afdede721
--- /dev/null
+++ b/tex/context/base/font-gds.mkiv
@@ -0,0 +1,79 @@
+%D \module
+%D [ file=font-gds,
+%D version=2009.10.14,
+%D title=\CONTEXT\ Font Support,
+%D subtitle=Colorschemes,
+%D author=Hans Hagen,
+%D date=\currentdate,
+%D copyright=PRAGMA]
+%C
+%C This module is part of the \CONTEXT\ macro||package and is
+%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
+%C details.
+
+\writestatus{loading}{ConTeXt Font Support / Colorschemes}
+
+\registerctxluafile{font-gds}{1.001}
+
+\unprotect
+
+% this will become colorgroups
+
+\definecolor[colorscheme:1:1][s=.75]
+\definecolor[colorscheme:1:2][r=.75]
+\definecolor[colorscheme:1:3][g=.75]
+\definecolor[colorscheme:1:4][b=.75]
+\definecolor[colorscheme:1:5][c=.75]
+\definecolor[colorscheme:1:6][m=.75]
+\definecolor[colorscheme:1:7][y=.75]
+
+\definecolor[colorscheme:2:7][s=.75]
+\definecolor[colorscheme:2:6][r=.75]
+\definecolor[colorscheme:2:5][g=.75]
+\definecolor[colorscheme:2:4][b=.75]
+\definecolor[colorscheme:2:3][c=.75]
+\definecolor[colorscheme:2:2][m=.75]
+\definecolor[colorscheme:2:1][y=.75]
+
+\definesystemattribute[colorscheme]
+
+\def\setfontcolorscheme
+ {\ctxlua{fonts.goodies.colorschemes.enable()}%
+ \xdef\setfontcolorscheme[##1]{\dosetattribute{colorscheme}{##1}}%
+ \setfontcolorscheme}
+
+\edef\resetfontcolorscheme{\doresetattribute{colorscheme}}
+
+\protect \endinput
+
+% \definefontfeature[husayni-colored][goodies=husayni,colorscheme=default,featureset=default]
+%
+% \definedfont[husayni*husayni-colored at 36pt]
+%
+% \starttext \pardir TRT \textdir TRT
+%
+% \setfontcolorscheme[1]
+%
+% اَلْحَمْ‍دُ لِلّٰهِ حَمْدَ مُعْتَرِفٍ بِحَمْدِهٖ، مُغْتَرِفٌ مِنْ بِحَارِ
+% مَجْدِهٖ، بِلِسَانِ ٱلثَّنَاۤءِ شَاكِرًا، وَلِحُسْنِ اٰلاۤئِهٖ نَاشِرًا؛
+% اَلَّذِيْ خَلَقَ ٱلْمَوْتَ وَٱلْحَيٰوةَ، وَٱلْخَيْرَ وَٱلشَّرَّ،
+% وَٱلنَّفْعَ وَٱلضَّرَّ، وَٱلسُّكُوْنَ وَٱلْحَرَكَةَ، وَٱلْأَرْوَاحَ
+% وَٱلْأَجْسَامَ، وَٱلذِّكْرَ وَٱلنِّسْيَانَ.
+%
+% \setfontcolorscheme[2]
+%
+% اَلْحَمْ‍دُ لِلّٰهِ حَمْدَ مُعْتَرِفٍ بِحَمْدِهٖ، مُغْتَرِفٌ مِنْ بِحَارِ
+% مَجْدِهٖ، بِلِسَانِ ٱلثَّنَاۤءِ شَاكِرًا، وَلِحُسْنِ اٰلاۤئِهٖ نَاشِرًا؛
+% اَلَّذِيْ خَلَقَ ٱلْمَوْتَ وَٱلْحَيٰوةَ، وَٱلْخَيْرَ وَٱلشَّرَّ،
+% وَٱلنَّفْعَ وَٱلضَّرَّ، وَٱلسُّكُوْنَ وَٱلْحَرَكَةَ، وَٱلْأَرْوَاحَ
+% وَٱلْأَجْسَامَ، وَٱلذِّكْرَ وَٱلنِّسْيَانَ.
+%
+% \resetfontcolorscheme
+%
+% اَلْحَمْ‍دُ لِلّٰهِ حَمْدَ مُعْتَرِفٍ بِحَمْدِهٖ، مُغْتَرِفٌ مِنْ بِحَارِ
+% مَجْدِهٖ، بِلِسَانِ ٱلثَّنَاۤءِ شَاكِرًا، وَلِحُسْنِ اٰلاۤئِهٖ نَاشِرًا؛
+% اَلَّذِيْ خَلَقَ ٱلْمَوْتَ وَٱلْحَيٰوةَ، وَٱلْخَيْرَ وَٱلشَّرَّ،
+% وَٱلنَّفْعَ وَٱلضَّرَّ، وَٱلسُّكُوْنَ وَٱلْحَرَكَةَ، وَٱلْأَرْوَاحَ
+% وَٱلْأَجْسَامَ، وَٱلذِّكْرَ وَٱلنِّسْيَانَ.
+%
+% \stoptext
diff --git a/tex/context/base/font-ini.mkiv b/tex/context/base/font-ini.mkiv
index 16ca08160..2b4dbdaf7 100644
--- a/tex/context/base/font-ini.mkiv
+++ b/tex/context/base/font-ini.mkiv
@@ -2719,8 +2719,8 @@
{\dotripleargument\dodefinefontfeature}
\def\dodefinefontfeature[#1][#2][#3]%
- {\global\expandafter\chardef\csname\??fq=#1\endcsname
- \ctxlua{tex.write(fonts.define.specify.preset_context("#1","#2","#3"))}\relax}
+ {\global\expandafter\chardef\csname\??fq=#1\endcsname % beware () needed as we get two values returned
+ \ctxlua{tex.write((fonts.define.specify.preset_context("#1","#2","#3")))}\relax}
\definefontfeature
[default]
diff --git a/tex/context/base/font-mis.lua b/tex/context/base/font-mis.lua
index a1b717217..6cdb076ac 100644
--- a/tex/context/base/font-mis.lua
+++ b/tex/context/base/font-mis.lua
@@ -11,7 +11,7 @@ local lower, strip = string.lower, string.strip
fonts.otf = fonts.otf or { }
-fonts.otf.version = fonts.otf.version or 2.631
+fonts.otf.version = fonts.otf.version or 2.633
fonts.otf.pack = true
fonts.otf.cache = containers.define("fonts", "otf", fonts.otf.version, true)
diff --git a/tex/context/base/font-otf.lua b/tex/context/base/font-otf.lua
index 59aff301b..653d3e95a 100644
--- a/tex/context/base/font-otf.lua
+++ b/tex/context/base/font-otf.lua
@@ -82,7 +82,7 @@ otf.features.default = otf.features.default or { }
otf.enhancers = otf.enhancers or { }
otf.glists = { "gsub", "gpos" }
-otf.version = 2.631 -- beware: also sync font-mis.lua
+otf.version = 2.633 -- beware: also sync font-mis.lua
otf.pack = true -- beware: also sync font-mis.lua
otf.syncspace = true
otf.notdef = false
@@ -202,7 +202,7 @@ local enhancers = {
"patch bugs",
"merge cid fonts", "prepare unicode", "cleanup ttf tables", "compact glyphs", "reverse coverage",
"cleanup aat", "enrich with features", "add some missing characters",
---~ "reorganize mark classes",
+ "reorganize mark classes",
"reorganize kerns", -- moved here
"flatten glyph lookups", "flatten anchor tables", "flatten feature tables",
"prepare luatex tables",
@@ -674,21 +674,15 @@ otf.enhancers["analyse subtables"] = function(data,filename)
end
local flags = gk.flags
if flags then
---~ gk.flags = { -- forcing false packs nicer
---~ (flags.ignorecombiningmarks and "mark") or false,
---~ (flags.ignoreligatures and "ligature") or false,
---~ (flags.ignorebaseglyphs and "base") or false,
---~ flags.r2l or false,
---~ }
gk.flags = { -- forcing false packs nicer
- ((flags.ignorecombiningmarks or flags.mark_class) and "mark") or false,
- ( flags.ignoreligatures and "ligature") or false,
- ( flags.ignorebaseglyphs and "base") or false,
- flags.r2l or false,
+ (flags.ignorecombiningmarks and "mark") or false,
+ (flags.ignoreligatures and "ligature") or false,
+ (flags.ignorebaseglyphs and "base") or false,
+ flags.r2l or false,
}
---~ if flags.mark_class then
---~ gk.markclass = luatex.markclasses[flags.mark_class]
---~ end
+ if flags.mark_class then
+ gk.markclass = luatex.markclasses[flags.mark_class]
+ end
end
end
end
diff --git a/tex/context/base/font-otn.lua b/tex/context/base/font-otn.lua
index 14837d2e1..880b52a49 100644
--- a/tex/context/base/font-otn.lua
+++ b/tex/context/base/font-otn.lua
@@ -1443,12 +1443,11 @@ function chainprocs.gpos_pair(start,stop,kind,chainname,currentcontext,cache,cur
local factor = tfmdata.factor
while snext and snext.id == glyph and snext.subtype<256 and snext.font == currentfont do
local nextchar = snext.char
-local krn = kerns[nextchar]
+ local krn = kerns[nextchar]
if not krn and marks[nextchar] then
prev = snext
snext = snext.next
else
---~ local krn = kerns[nextchar]
if not krn then
-- skip
elseif type(krn) == "table" then
@@ -1520,7 +1519,7 @@ local function normal_handle_contextchain(start,kind,chainname,contexts,sequence
local flags, done = sequence.flags, false
local skipmark, skipligature, skipbase = flags[1], flags[2], flags[3]
local someskip = skipmark or skipligature or skipbase -- could be stored in flags for a fast test (hm, flags could be false !)
- local markclass = sequence.markclass
+ local markclass = sequence.markclass -- todo, first we need a proper test
for k=1,#contexts do
local match, current, last = true, start, start
local ck = contexts[k]
@@ -1554,10 +1553,7 @@ local function normal_handle_contextchain(start,kind,chainname,contexts,sequence
local ccd = descriptions[char]
if ccd then
local class = ccd.class
---~ if class == skipmark or class == skipligature or class == skipbase or (markclass and not markclass[char]) then
- if class == skipmark or class == skipligature or class == skipbase then
---~ if someskip and (class == skipmark or class == skipligature or class == skipbase) then
- -- skip 'm
+ if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
if trace_skips then
show_skip(kind,chainname,char,ck,class)
end
@@ -1601,10 +1597,7 @@ local function normal_handle_contextchain(start,kind,chainname,contexts,sequence
local ccd = descriptions[char]
if ccd then
local class = ccd.class
---~ if class == skipmark or class == skipligature or class == skipbase or (markclass and not markclass[char]) then
- if class == skipmark or class == skipligature or class == skipbase then
---~ if someskip and class == skipmark or class == skipligature or class == skipbase then
- -- skip 'm
+ if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
if trace_skips then
show_skip(kind,chainname,char,ck,class)
end
@@ -1658,10 +1651,7 @@ local function normal_handle_contextchain(start,kind,chainname,contexts,sequence
local ccd = descriptions[char]
if ccd then
local class = ccd.class
---~ if class == skipmark or class == skipligature or class == skipbase or (markclass and not markclass[char]) then
- if class == skipmark or class == skipligature or class == skipbase then
---~ if someskip and class == skipmark or class == skipligature or class == skipbase then
- -- skip 'm
+ if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
if trace_skips then
show_skip(kind,chainname,char,ck,class)
end
diff --git a/tex/context/base/font-pat.lua b/tex/context/base/font-pat.lua
index 4a97248c4..f2b86a48a 100644
--- a/tex/context/base/font-pat.lua
+++ b/tex/context/base/font-pat.lua
@@ -10,6 +10,8 @@ local match, lower = string.match, string.lower
local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end)
+-- this will become a per font patch file
+--
-- older versions of latin modern didn't have the designsize set
-- so for them we get it from the name
@@ -92,3 +94,18 @@ local function patch(data,filename)
end
patches["palatino.*arabic"] = patch
+
+local function patch(data,filename)
+ local m = data.math
+ if m then
+ local d = m.DisplayOperatorMinHeight or 0
+ if d < 2800 then
+ if trace_loading then
+ logs.report("load otf","patching DisplayOperatorMinHeight(%s -> 2800)",d)
+ end
+ m.DisplayOperatorMinHeight = 2800
+ end
+ end
+end
+
+patches["cambria"] = patch
diff --git a/tex/context/base/font-tfm.lua b/tex/context/base/font-tfm.lua
index 905da59ce..73412f821 100644
--- a/tex/context/base/font-tfm.lua
+++ b/tex/context/base/font-tfm.lua
@@ -258,7 +258,9 @@ function tfm.do_scale(tfmtable, scaledpoints)
t.unicodes = tfmtable.unicodes
t.indices = tfmtable.indices
t.marks = tfmtable.marks
+t.goodies = tfmtable.goodies
t.colorscheme = tfmtable.colorscheme
+--~ t.embedding = tfmtable.embedding
t.descriptions = descriptions
if tfmtable.fonts then
t.fonts = table.fastcopy(tfmtable.fonts) -- hm also at the end
diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua
index 9e0b12f7c..d7c2b0250 100644
--- a/tex/context/base/l-table.lua
+++ b/tex/context/base/l-table.lua
@@ -614,7 +614,7 @@ function table.tofile(filename,root,name,reduce,noquotes,hexify)
end
end
-local function flatten(t,f,complete)
+local function flatten(t,f,complete) -- is this used? meybe a variant with next, ...
for i=1,#t do
local v = t[i]
if type(v) == "table" then
@@ -643,6 +643,24 @@ end
table.flatten_one_level = table.unnest
+-- a better one:
+
+local function flattened(t,f)
+ if not f then
+ f = { }
+ end
+ for k, v in next, t do
+ if type(v) == "table" then
+ flattened(v,f)
+ else
+ f[k] = v
+ end
+ end
+ return f
+end
+
+table.flattened = flattened
+
-- the next three may disappear
function table.remove_value(t,value) -- todo: n
diff --git a/tex/context/base/luat-dum.lua b/tex/context/base/luat-dum.lua
index 699a0feef..dd5ade7a9 100644
--- a/tex/context/base/luat-dum.lua
+++ b/tex/context/base/luat-dum.lua
@@ -13,6 +13,11 @@ statistics = {
starttiming = dummyfunction,
stoptiming = dummyfunction,
}
+directives = {
+ register = dummyfunction,
+ enable = dummyfunction,
+ disable = dummyfunction,
+}
trackers = {
register = dummyfunction,
enable = dummyfunction,
diff --git a/tex/context/base/lxml-aux.lua b/tex/context/base/lxml-aux.lua
index ccff8e90d..eb2f3bb85 100644
--- a/tex/context/base/lxml-aux.lua
+++ b/tex/context/base/lxml-aux.lua
@@ -111,7 +111,7 @@ function xml.collect_texts(root, pattern, flatten) -- todo: variant with handle
if collected and flatten then
local xmltostring = xml.tostring
for c=1,#collected do
- collected[c] = xmltostring(collected[c])
+ collected[c] = xmltostring(collected[c].dt)
end
end
return collected or { }
diff --git a/tex/context/base/lxml-ctx.lua b/tex/context/base/lxml-ctx.lua
new file mode 100644
index 000000000..0ac12ab3c
--- /dev/null
+++ b/tex/context/base/lxml-ctx.lua
@@ -0,0 +1,127 @@
+if not modules then modules = { } end modules ['lxml-ctx'] = {
+ version = 1.001,
+ comment = "companion to lxml-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- is this still used?
+
+xml.ctx = { }
+xml.ctx.enhancers = { }
+
+
+-- hashen
+
+function xml.ctx.enhancers.compound(root,lpath,before,tokens,after) -- todo lpeg
+ local before = before or "[%a%d][%a%d][%a%d]"
+ local tokens = tokens or "[%/%-]"
+ local after = after or "[%a%d][%a%d][%a%d]"
+ local pattern = "(" .. before .. ")(" .. tokens .. ")(" .. after .. ")"
+ local action = function(a,b,c)
+ return a .. "<compound token=" .. string.format("%q",b) .. "/>" .. c
+ end
+ xml.enhance(root,lpath,pattern,action) -- still present?
+end
+
+local loaded = { }
+
+local nodesettostring = xml.nodesettostring
+
+-- maybe use detokenize instead of \type
+
+function xml.ctx.tshow(specification)
+ local pattern = specification.pattern
+ local xmlroot = specification.xmlroot
+ local attribute = specification.attribute
+ if context then
+ local xmlpattern = pattern
+ if not string.find(xmlpattern,"^[%a]+://") then
+ xmlpattern = "xml://" .. pattern
+ end
+ parsed = xml.parse_pattern(xmlpattern)
+ titlecommand = specification.title or "type"
+ if parsed.state then
+ context[titlecommand]("pattern: " .. pattern .. " (".. parsed.state .. ")")
+ else
+ context[titlecommand]("pattern: " .. pattern)
+ end
+ context.starttabulate({ "|Tr|Tl|Tp|" } )
+ if specification.warning and parsed.comment then
+ context.NC()
+ context("!")
+ context.NC()
+ context.rlap(parsed.comment)
+ context.NR()
+ context.TB()
+ end
+ for p=1,#parsed do
+ local pp = parsed[p]
+ local kind = pp.kind
+ context.NC()
+ context(p)
+ context.NC()
+ context(kind)
+ context.NC()
+ if kind == "axis" then
+ context(pp.axis)
+ elseif kind == "nodes" then
+ context(nodesettostring(pp.nodes,pp.nodetest))
+ elseif kind == "expression" then
+--~ context("%s => %s",pp.expression,pp.converted)
+ context(pp.expression)
+ elseif kind == "finalizer" then
+ context("%s(%s)",pp.name,pp.arguments)
+ elseif kind == "error" and pp.comment then
+ context(pp.comment)
+ end
+ context.NC()
+ context.NR()
+ end
+ context.stoptabulate()
+ if xmlroot and xmlroot ~= "" then
+ if not loaded[xmlroot] then
+ loaded[xmlroot] = { xml.convert(buffers.content(xmlroot) or "") }
+ end
+ local collected = xml.parse_apply(loaded[xmlroot],xmlpattern)
+ if collected then
+ local tc = type(collected)
+ if not tc then
+ -- skip
+ else
+ context.blank()
+ context.type("result : ")
+ if tc == "string" then
+ context.type(collected)
+ elseif tc == "table" then
+ if collected.tg then
+ collected = { collected }
+ end
+ for c=1,#collected do
+ local cc = collected[c]
+ if attribute and attribute ~= "" then
+ local ccat = cc.at
+ local a = ccat and ccat[attribute]
+ if a and a ~= "" then
+ context.type(a)
+ context.type(">")
+ end
+ end
+ local ccns = cc.ns
+ if ccns == "" then
+ context.type(cc.tg)
+ else
+ context.type(ccns .. ":" .. cc.tg)
+ end
+ context.space()
+ end
+ else
+ context.type(tostring(tc))
+ end
+ context.blank()
+ end
+ end
+ end
+ end
+end
diff --git a/tex/context/base/lxml-ctx.mkiv b/tex/context/base/lxml-ctx.mkiv
new file mode 100644
index 000000000..44d95ba96
--- /dev/null
+++ b/tex/context/base/lxml-ctx.mkiv
@@ -0,0 +1,64 @@
+%D \module
+%D [ file=lxml-ini,
+%D version=2007.08.17,
+%D title=\CONTEXT\ \XML\ Support,
+%D subtitle=Initialization,
+%D author=Hans Hagen,
+%D date=\currentdate,
+%D copyright={PRAGMA / Hans Hagen \& Ton Otten}]
+%C
+%C This module is part of the \CONTEXT\ macro||package and is
+%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
+%C details.
+
+%D Experimental. This might change! Also, it might become a module
+%D instead if core code.
+
+\writestatus{loading}{ConTeXt XML Support / Goodies}
+
+\registerctxluafile{lxml-ctx}{1.001}
+
+\unprotect
+
+% the letterbar is a messy hack and is needed for the tabulate
+
+\settrue \xmllshowbuffer
+\setfalse\xmllshowtitle
+\settrue \xmllshowwarning
+
+\definehead[lshowtitle][subsubsubsubsubject]
+\setuphead[lshowtitle][style=\tta]
+
+% \def\setuplxmlshow[#1]%
+% {\dodoubleargument\getparameters[\??xl]}
+
+\def\xmllshow#1%
+ {\begingroup
+ \let|=\letterbar
+ \ctxlua{xml.ctx.tshow {
+ pattern = \!!bs#1\!!es,
+ \ifconditional\xmllshowtitle
+ title = "lshowtitle",
+ \fi
+ \ifconditional\xmllshowwarning
+ warning = true,
+ \fi
+ } }%
+ \endgroup}
+
+\def\xmllshowbuffer#1#2#3%
+ {\begingroup
+ \let|=\letterbar
+ \ctxlua{xml.ctx.tshow {
+ pattern = \!!bs#2\!!es,
+ \ifconditional\xmllshowbuffer
+ xmlroot = "#1",
+ attribute = "#3",
+ \fi
+ \ifconditional\xmllshowwarning
+ warning = true,
+ \fi
+ } }%
+ \endgroup}
+
+\protect
diff --git a/tex/context/base/lxml-ent.lua b/tex/context/base/lxml-ent.lua
index 69c37e8f9..9003c9d83 100644
--- a/tex/context/base/lxml-ent.lua
+++ b/tex/context/base/lxml-ent.lua
@@ -8,6 +8,7 @@ if not modules then modules = { } end modules ['lxml-ent'] = {
local type, next = type, next
local texsprint, ctxcatcodes = tex.sprint, tex.ctxcatcodes
+local utf = unicode.utf8
local utfupper = utf.upper
--[[ldx--
diff --git a/tex/context/base/lxml-lpt.lua b/tex/context/base/lxml-lpt.lua
index 184d2f1ae..0f5407bda 100644
--- a/tex/context/base/lxml-lpt.lua
+++ b/tex/context/base/lxml-lpt.lua
@@ -35,8 +35,9 @@ a/b/c/text() a/b/c/text(1) a/b/c/text(-1) a/b/c/text(n)
</typing>
--ldx]]--
-local trace_lpath = false if trackers then trackers.register("xml.lpath", function(v) trace_lpath = v end) end
-local trace_lparse = false if trackers then trackers.register("xml.lparse", function(v) trace_lparse = v end) end
+local trace_lpath = false if trackers then trackers.register("xml.path", function(v) trace_lpath = v end) end
+local trace_lparse = false if trackers then trackers.register("xml.parse", function(v) trace_lparse = v end) end
+local trace_lprofile = false if trackers then trackers.register("xml.profile", function(v) trace_lpath = v trace_lparse = v trace_lprofile = v end) end
--[[ldx--
<p>We've now arrived at an interesting part: accessing the tree using a subset
@@ -694,7 +695,7 @@ local function parse_pattern(pattern) -- the gain of caching is rather minimal
parsed = { pattern = pattern }
end
cache[pattern] = parsed
- if trace_lparse then
+ if trace_lparse and not trace_lprofile then
lshow(parsed)
end
end
@@ -714,6 +715,73 @@ end
-- caching found lookups saves not that much (max .1 sec on a 8 sec run)
-- and it also messes up finalizers
+local profiled = { } xml.profiled = profiled
+
+local function profiled_apply(list,parsed,nofparsed)
+ local p = profiled[parsed.pattern]
+ if p then
+ p.tested = p.tested + 1
+ else
+ p = { tested = 1, matched = 0, finalized = 0 }
+ profiled[parsed.pattern] = p
+ end
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ p.matched = p.matched + 1
+ p.finalized = p.finalized + 1
+ return collected
+ end
+ if not collected or #collected == 0 then
+ return nil
+ end
+ end
+ if collected then
+ p.matched = p.matched + 1
+ end
+ return collected
+end
+
+local function traced_apply(list,parsed,nofparsed)
+ if trace_lparse then
+ lshow(parsed)
+ end
+ logs.report("lpath", "collecting : %s",parsed.pattern)
+ logs.report("lpath", " root tags : %s",tagstostring(list))
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ collected = apply_axis[pi.axis](collected)
+ logs.report("lpath", "% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ logs.report("lpath", "% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ logs.report("lpath", "% 10i : ex : %s",(collected and #collected) or 0,pi.expression)
+ elseif kind == "finalizer" then
+ collected = pi.finalizer(collected)
+ logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
+ return collected
+ end
+ if not collected or #collected == 0 then
+ return nil
+ end
+ end
+ return collected
+end
+
local function parse_apply(list,pattern)
-- we avoid an extra call
local parsed = cache[pattern]
@@ -730,60 +798,35 @@ local function parse_apply(list,pattern)
return
end
local nofparsed = #parsed
- if nofparsed > 0 then
- if trace_lpath then
- if trace_lparse then
- lshow(parsed)
- end
- logs.report("lpath", "collecting : %s",pattern)
- logs.report("lpath", " root tags : %s",tagstostring(list))
- local collected = list
- for i=1,nofparsed do
- local pi = parsed[i]
- local kind = pi.kind
- if kind == "axis" then
- collected = apply_axis[pi.axis](collected)
- -- collected = pi.apply(collected)
- logs.report("lpath", "% 10i : ax : %s",(collected and #collected) or 0,pi.axis)
- elseif kind == "nodes" then
- collected = apply_nodes(collected,pi.nodetest,pi.nodes)
- logs.report("lpath", "% 10i : ns : %s",(collected and #collected) or 0,nodesettostring(pi.nodes,pi.nodetest))
- elseif kind == "expression" then
- collected = apply_expression(collected,pi.evaluator,i)
- logs.report("lpath", "% 10i : ex : %s",(collected and #collected) or 0,pi.expression)
- elseif kind == "finalizer" then
- collected = pi.finalizer(collected)
- logs.report("lpath", "% 10i : fi : %s : %s(%s)",(collected and #collected) or 0,parsed.protocol or xml.defaultprotocol,pi.name,pi.arguments or "")
- return collected
- end
- if not collected or #collected == 0 then
- return nil
+ if nofparsed == 0 then
+ -- something is wrong
+ elseif not trace_lpath then
+ -- normal apply, inline, no self
+ local collected = list
+ for i=1,nofparsed do
+ local pi = parsed[i]
+ local kind = pi.kind
+ if kind == "axis" then
+ local axis = pi.axis
+ if axis ~= "self" then
+ collected = apply_axis[axis](collected)
end
+ elseif kind == "nodes" then
+ collected = apply_nodes(collected,pi.nodetest,pi.nodes)
+ elseif kind == "expression" then
+ collected = apply_expression(collected,pi.evaluator,i)
+ elseif kind == "finalizer" then
+ return pi.finalizer(collected)
end
- return collected
- else
- local collected = list
- for i=1,nofparsed do
- local pi = parsed[i]
- local kind = pi.kind
- if kind == "axis" then
- local axis = pi.axis
- if axis ~= "self" then
- collected = apply_axis[axis](collected)
- end
- elseif kind == "nodes" then
- collected = apply_nodes(collected,pi.nodetest,pi.nodes)
- elseif kind == "expression" then
- collected = apply_expression(collected,pi.evaluator,i)
- elseif kind == "finalizer" then
- return pi.finalizer(collected)
- end
- if not collected or #collected == 0 then
- return nil
- end
+ if not collected or #collected == 0 then
+ return nil
end
- return collected
end
+ return collected
+ elseif trace_lprofile then
+ return profiled_apply(list,parsed,nofparsed)
+ else -- trace_lpath
+ return traced_apply(list,parsed,nofparsed)
end
end
diff --git a/tex/context/base/lxml-tex.lua b/tex/context/base/lxml-tex.lua
index 69f5b5116..0759a7277 100644
--- a/tex/context/base/lxml-tex.lua
+++ b/tex/context/base/lxml-tex.lua
@@ -1289,6 +1289,28 @@ statistics.register("lxml preparation time", function()
end
end)
+statistics.register("lxml lpath profile", function()
+ local p = xml.profiled
+ if p and next(p) then
+ local s = table.sortedkeys(p)
+ local tested, matched, finalized = 0, 0, 0
+ texio.write_nl("log","\nbegin of lxml profile\n")
+ texio.write_nl("log","\n tested matched finalized pattern\n\n")
+ for i=1,#s do
+ local pattern = s[i]
+ local pp = p[pattern]
+ local t, m, f = pp.tested, pp.matched, pp.finalized
+ tested, matched, finalized = tested + t, matched + m, finalized + f
+ texio.write_nl("log",format("%9i %9i %9i %s",t,m,f,pattern))
+ end
+ texio.write_nl("log","\nend of lxml profile\n")
+ return format("%s patterns, %s tested, %s matched, %s finalized (see log for details)",#s,tested,matched,finalized)
+ else
+ return nil
+ end
+end)
+
+
-- misc
function lxml.nonspace(id,pattern) -- slow, todo loop
diff --git a/tex/context/base/lxml-xml.lua b/tex/context/base/lxml-xml.lua
index 7ff3f5955..215635e78 100644
--- a/tex/context/base/lxml-xml.lua
+++ b/tex/context/base/lxml-xml.lua
@@ -173,7 +173,15 @@ local function empty(collected)
local edt = e.dt
if edt then
local n = #edt
- if (n > 2) or (n > 0 and edt[1] == "") then
+ if n == 1 then
+ local edk = edt[1]
+ local typ = type(edk)
+ if typ == "table" then
+ return false
+ elseif edk ~= "" then -- maybe an extra tester for spacing only
+ return false
+ end
+ elseif n > 1 then
return false
end
end
diff --git a/tex/context/base/m-directives.tex b/tex/context/base/m-directives.tex
new file mode 100644
index 000000000..c958f6cad
--- /dev/null
+++ b/tex/context/base/m-directives.tex
@@ -0,0 +1,5 @@
+\doifnotmode{mkiv} {\endinput}
+
+\starttext
+ \showdirectives
+\stoptext
diff --git a/tex/context/base/m-track.tex b/tex/context/base/m-trackers.tex
index cfcbbabff..cfcbbabff 100644
--- a/tex/context/base/m-track.tex
+++ b/tex/context/base/m-trackers.tex
diff --git a/tex/context/base/node-inj.lua b/tex/context/base/node-inj.lua
index 2befb0167..5829513d3 100644
--- a/tex/context/base/node-inj.lua
+++ b/tex/context/base/node-inj.lua
@@ -312,6 +312,7 @@ function nodes.inject_kerns(head,where,keep)
local k = wx[p]
if k then
n.xoffset = p.xoffset - d[1] - k[2]
+--~ n.xoffset = p.xoffset - k[2]
else
n.xoffset = p.xoffset - d[1]
end
diff --git a/tex/context/base/trac-deb.lua b/tex/context/base/trac-deb.lua
index cd006e68b..4cd324922 100644
--- a/tex/context/base/trac-deb.lua
+++ b/tex/context/base/trac-deb.lua
@@ -194,13 +194,3 @@ function tracers.register_dump_hash(delta)
end
main.register_stop_actions(1,function() tracers.dump_hash(nil,true) end) -- at front
end
-
--- trackers (maybe group the show by class)
-
-function trackers.show()
- commands.writestatus("","")
- for k,v in ipairs(trackers.list()) do
- commands.writestatus("tracker",v)
- end
- commands.writestatus("","")
-end
diff --git a/tex/context/base/trac-deb.mkiv b/tex/context/base/trac-deb.mkiv
index 870c452ad..24e17f486 100644
--- a/tex/context/base/trac-deb.mkiv
+++ b/tex/context/base/trac-deb.mkiv
@@ -41,3 +41,8 @@
\def\resettrackers {\ctxlua{trackers.reset()}}
\def\enabletrackers [#1]{\ctxlua{trackers.enable("#1")}}
\def\disabletrackers[#1]{\ctxlua{trackers.disable("#1")}}
+
+\def\showdirectives {\ctxlua{directives.show()}}
+%def\resetdirectives {\ctxlua{directives.reset()}} % would be weird to use
+\def\enabledirectives [#1]{\ctxlua{directives.enable("#1")}}
+\def\disabledirectives[#1]{\ctxlua{directives.disable("#1")}}
diff --git a/tex/context/base/trac-inf.lua b/tex/context/base/trac-inf.lua
index 4386c0c96..1a1977f3f 100644
--- a/tex/context/base/trac-inf.lua
+++ b/tex/context/base/trac-inf.lua
@@ -161,3 +161,4 @@ function statistics.timed(action,report)
statistics.stoptiming(timer)
report("total runtime: %s",statistics.elapsedtime(timer))
end
+
diff --git a/tex/context/base/trac-tra.lua b/tex/context/base/trac-tra.lua
index 56ca02dfb..3a8f14074 100644
--- a/tex/context/base/trac-tra.lua
+++ b/tex/context/base/trac-tra.lua
@@ -10,12 +10,15 @@ if not modules then modules = { } end modules ['trac-tra'] = {
-- bound to a variable, like node.new, node.copy etc (contrary to for instance
-- node.has_attribute which is bound to a has_attribute local variable in mkiv)
+local getinfo = debug.getinfo
+local type, next = type, next
+local concat = table.concat
+local format, find, lower, gmatch, gsub = string.format, string.find, string.lower, string.gmatch, string.gsub
+
debugger = debugger or { }
local counters = { }
local names = { }
-local getinfo = debug.getinfo
-local format, find, lower, gmatch, gsub = string.format, string.find, string.lower, string.gmatch, string.gsub
-- one
@@ -143,11 +146,11 @@ end
--~ print("")
--~ debugger.showstats(print,3)
-trackers = trackers or { }
-
-local data, done = { }, { }
+setters = setters or { }
+setters.data = setters.data or { }
-local function set(what,value)
+local function set(t,what,value)
+ local data, done = t.data, t.done
if type(what) == "string" then
what = aux.settings_to_array(what) -- inefficient but ok
end
@@ -166,28 +169,30 @@ local function set(what,value)
end
end
-local function reset()
- for d, f in next, data do
+local function reset(t)
+ for d, f in next, t.data do
for i=1,#f do
f[i](false)
end
end
end
-local function enable(what)
- set(what,true)
+local function enable(t,what)
+ set(t,what,true)
end
-local function disable(what)
+local function disable(t,what)
+ local data = t.data
if not what or what == "" then
- done = { }
- reset()
+ t.done = { }
+ reset(t)
else
- set(what,false)
+ set(t,what,false)
end
end
-function trackers.register(what,...)
+function setters.register(t,what,...)
+ local data = t.data
what = lower(what)
local w = data[what]
if not w then
@@ -199,32 +204,32 @@ function trackers.register(what,...)
if typ == "function" then
w[#w+1] = fnc
elseif typ == "string" then
- w[#w+1] = function(value) set(fnc,value,nesting) end
+ w[#w+1] = function(value) set(t,fnc,value,nesting) end
end
end
end
-function trackers.enable(what)
- local e = trackers.enable
- trackers.enable, done = enable, { }
- enable(string.simpleesc(what))
- trackers.enable, done = e, { }
+function setters.enable(t,what)
+ local e = t.enable
+ t.enable, t.done = enable, { }
+ enable(t,string.simpleesc(what))
+ t.enable, t.done = e, { }
end
-function trackers.disable(what)
- local e = trackers.disable
- trackers.disable, done = disable, { }
- disable(string.simpleesc(what))
- trackers.disable, done = e, { }
+function setters.disable(t,what)
+ local e = t.disable
+ t.disable, t.done = disable, { }
+ disable(t,string.simpleesc(what))
+ t.disable, t.done = e, { }
end
-function trackers.reset()
- done = { }
- reset()
+function setters.reset(t)
+ t.done = { }
+ reset(t)
end
-function trackers.list() -- pattern
- local list = table.sortedkeys(data)
+function setters.list(t) -- pattern
+ local list = table.sortedkeys(t.data)
local user, system = { }, { }
for l=1,#list do
local what = list[l]
@@ -236,3 +241,136 @@ function trackers.list() -- pattern
end
return user, system
end
+
+function setters.show(t)
+ commands.writestatus("","")
+ for k,v in ipairs(setters.list(t)) do
+ commands.writestatus(t.name,v)
+ end
+ commands.writestatus("","")
+end
+
+-- we could have used a bit of oo and the trackers:enable syntax but
+-- there is already a lot of code around using the singluar tracker
+
+function setters.new(name)
+ local t
+ t = {
+ data = { },
+ name = name,
+ enable = function(...) setters.enable (t,...) end,
+ disable = function(...) setters.disable (t,...) end,
+ register = function(...) setters.register(t,...) end,
+ list = function(...) setters.list (t,...) end,
+ show = function(...) setters.show (t,...) end,
+ }
+ setters.data[name] = t
+ return t
+end
+
+trackers = setters.new("trackers")
+directives = setters.new("directives")
+
+-- nice trick: we overload two of the directives related functions with variants that
+-- do tracing (itself using a tracker) .. proof of concept
+
+local trace_directives = false local trace_directives = false trackers.register("system.directives", function(v) trace_directives = v end)
+
+local e = directives.enable
+local d = directives.disable
+
+function directives.enable(...)
+ commands.writestatus("directives","enabling: %s",concat({...}," "))
+ e(...)
+end
+
+function directives.disable(...)
+ commands.writestatus("directives","disabling: %s",concat({...}," "))
+ d(...)
+end
+
+--~ -- old code:
+--
+--~ trackers = trackers or { }
+--~ local data, done = { }, { }
+--~ local function set(what,value)
+--~ if type(what) == "string" then
+--~ what = aux.settings_to_array(what) -- inefficient but ok
+--~ end
+--~ for i=1,#what do
+--~ local w = what[i]
+--~ for d, f in next, data do
+--~ if done[d] then
+--~ -- prevent recursion due to wildcards
+--~ elseif find(d,w) then
+--~ done[d] = true
+--~ for i=1,#f do
+--~ f[i](value)
+--~ end
+--~ end
+--~ end
+--~ end
+--~ end
+--~ local function reset()
+--~ for d, f in next, data do
+--~ for i=1,#f do
+--~ f[i](false)
+--~ end
+--~ end
+--~ end
+--~ local function enable(what)
+--~ set(what,true)
+--~ end
+--~ local function disable(what)
+--~ if not what or what == "" then
+--~ done = { }
+--~ reset()
+--~ else
+--~ set(what,false)
+--~ end
+--~ end
+--~ function trackers.register(what,...)
+--~ what = lower(what)
+--~ local w = data[what]
+--~ if not w then
+--~ w = { }
+--~ data[what] = w
+--~ end
+--~ for _, fnc in next, { ... } do
+--~ local typ = type(fnc)
+--~ if typ == "function" then
+--~ w[#w+1] = fnc
+--~ elseif typ == "string" then
+--~ w[#w+1] = function(value) set(fnc,value,nesting) end
+--~ end
+--~ end
+--~ end
+--~ function trackers.enable(what)
+--~ local e = trackers.enable
+--~ trackers.enable, done = enable, { }
+--~ enable(string.simpleesc(what))
+--~ trackers.enable, done = e, { }
+--~ end
+--~ function trackers.disable(what)
+--~ local e = trackers.disable
+--~ trackers.disable, done = disable, { }
+--~ disable(string.simpleesc(what))
+--~ trackers.disable, done = e, { }
+--~ end
+--~ function trackers.reset()
+--~ done = { }
+--~ reset()
+--~ end
+--~ function trackers.list() -- pattern
+--~ local list = table.sortedkeys(data)
+--~ local user, system = { }, { }
+--~ for l=1,#list do
+--~ local what = list[l]
+--~ if find(what,"^%*") then
+--~ system[#system+1] = what
+--~ else
+--~ user[#user+1] = what
+--~ end
+--~ end
+--~ return user, system
+--~ end
diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua
index a1775edb5..9c513633b 100644
--- a/tex/generic/context/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
-- merged file : c:/data/develop/context/texmf/tex/generic/context/luatex-fonts-merged.lua
-- parent file : c:/data/develop/context/texmf/tex/generic/context/luatex-fonts.lua
--- merge date : 10/16/09 16:21:21
+-- merge date : 10/18/09 15:26:25
do -- begin closure to overcome local limits and interference
@@ -1107,7 +1107,7 @@ function table.tofile(filename,root,name,reduce,noquotes,hexify)
end
end
-local function flatten(t,f,complete)
+local function flatten(t,f,complete) -- is this used? meybe a variant with next, ...
for i=1,#t do
local v = t[i]
if type(v) == "table" then
@@ -1136,6 +1136,24 @@ end
table.flatten_one_level = table.unnest
+-- a better one:
+
+local function flattened(t,f)
+ if not f then
+ f = { }
+ end
+ for k, v in next, t do
+ if type(v) == "table" then
+ flattened(v,f)
+ else
+ f[k] = v
+ end
+ end
+ return f
+end
+
+table.flattened = flattened
+
-- the next three may disappear
function table.remove_value(t,value) -- todo: n
@@ -1802,6 +1820,11 @@ statistics = {
starttiming = dummyfunction,
stoptiming = dummyfunction,
}
+directives = {
+ register = dummyfunction,
+ enable = dummyfunction,
+ disable = dummyfunction,
+}
trackers = {
register = dummyfunction,
enable = dummyfunction,
@@ -2681,6 +2704,7 @@ function nodes.inject_kerns(head,where,keep)
local k = wx[p]
if k then
n.xoffset = p.xoffset - d[1] - k[2]
+--~ n.xoffset = p.xoffset - k[2]
else
n.xoffset = p.xoffset - d[1]
end
@@ -3465,7 +3489,9 @@ function tfm.do_scale(tfmtable, scaledpoints)
t.unicodes = tfmtable.unicodes
t.indices = tfmtable.indices
t.marks = tfmtable.marks
+t.goodies = tfmtable.goodies
t.colorscheme = tfmtable.colorscheme
+--~ t.embedding = tfmtable.embedding
t.descriptions = descriptions
if tfmtable.fonts then
t.fonts = table.fastcopy(tfmtable.fonts) -- hm also at the end
@@ -5229,7 +5255,7 @@ otf.features.default = otf.features.default or { }
otf.enhancers = otf.enhancers or { }
otf.glists = { "gsub", "gpos" }
-otf.version = 2.631 -- beware: also sync font-mis.lua
+otf.version = 2.633 -- beware: also sync font-mis.lua
otf.pack = true -- beware: also sync font-mis.lua
otf.syncspace = true
otf.notdef = false
@@ -5349,7 +5375,7 @@ local enhancers = {
"patch bugs",
"merge cid fonts", "prepare unicode", "cleanup ttf tables", "compact glyphs", "reverse coverage",
"cleanup aat", "enrich with features", "add some missing characters",
---~ "reorganize mark classes",
+ "reorganize mark classes",
"reorganize kerns", -- moved here
"flatten glyph lookups", "flatten anchor tables", "flatten feature tables",
"prepare luatex tables",
@@ -5821,21 +5847,15 @@ otf.enhancers["analyse subtables"] = function(data,filename)
end
local flags = gk.flags
if flags then
---~ gk.flags = { -- forcing false packs nicer
---~ (flags.ignorecombiningmarks and "mark") or false,
---~ (flags.ignoreligatures and "ligature") or false,
---~ (flags.ignorebaseglyphs and "base") or false,
---~ flags.r2l or false,
---~ }
gk.flags = { -- forcing false packs nicer
- ((flags.ignorecombiningmarks or flags.mark_class) and "mark") or false,
- ( flags.ignoreligatures and "ligature") or false,
- ( flags.ignorebaseglyphs and "base") or false,
- flags.r2l or false,
+ (flags.ignorecombiningmarks and "mark") or false,
+ (flags.ignoreligatures and "ligature") or false,
+ (flags.ignorebaseglyphs and "base") or false,
+ flags.r2l or false,
}
---~ if flags.mark_class then
---~ gk.markclass = luatex.markclasses[flags.mark_class]
---~ end
+ if flags.mark_class then
+ gk.markclass = luatex.markclasses[flags.mark_class]
+ end
end
end
end
@@ -8751,12 +8771,11 @@ function chainprocs.gpos_pair(start,stop,kind,chainname,currentcontext,cache,cur
local factor = tfmdata.factor
while snext and snext.id == glyph and snext.subtype<256 and snext.font == currentfont do
local nextchar = snext.char
-local krn = kerns[nextchar]
+ local krn = kerns[nextchar]
if not krn and marks[nextchar] then
prev = snext
snext = snext.next
else
---~ local krn = kerns[nextchar]
if not krn then
-- skip
elseif type(krn) == "table" then
@@ -8828,7 +8847,7 @@ local function normal_handle_contextchain(start,kind,chainname,contexts,sequence
local flags, done = sequence.flags, false
local skipmark, skipligature, skipbase = flags[1], flags[2], flags[3]
local someskip = skipmark or skipligature or skipbase -- could be stored in flags for a fast test (hm, flags could be false !)
- local markclass = sequence.markclass
+ local markclass = sequence.markclass -- todo, first we need a proper test
for k=1,#contexts do
local match, current, last = true, start, start
local ck = contexts[k]
@@ -8862,10 +8881,7 @@ local function normal_handle_contextchain(start,kind,chainname,contexts,sequence
local ccd = descriptions[char]
if ccd then
local class = ccd.class
---~ if class == skipmark or class == skipligature or class == skipbase or (markclass and not markclass[char]) then
- if class == skipmark or class == skipligature or class == skipbase then
---~ if someskip and (class == skipmark or class == skipligature or class == skipbase) then
- -- skip 'm
+ if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
if trace_skips then
show_skip(kind,chainname,char,ck,class)
end
@@ -8909,10 +8925,7 @@ local function normal_handle_contextchain(start,kind,chainname,contexts,sequence
local ccd = descriptions[char]
if ccd then
local class = ccd.class
---~ if class == skipmark or class == skipligature or class == skipbase or (markclass and not markclass[char]) then
- if class == skipmark or class == skipligature or class == skipbase then
---~ if someskip and class == skipmark or class == skipligature or class == skipbase then
- -- skip 'm
+ if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
if trace_skips then
show_skip(kind,chainname,char,ck,class)
end
@@ -8966,10 +8979,7 @@ local function normal_handle_contextchain(start,kind,chainname,contexts,sequence
local ccd = descriptions[char]
if ccd then
local class = ccd.class
---~ if class == skipmark or class == skipligature or class == skipbase or (markclass and not markclass[char]) then
- if class == skipmark or class == skipligature or class == skipbase then
---~ if someskip and class == skipmark or class == skipligature or class == skipbase then
- -- skip 'm
+ if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
if trace_skips then
show_skip(kind,chainname,char,ck,class)
end
@@ -10418,7 +10428,8 @@ if not modules then modules = { } end modules ['font-def'] = {
local format, concat, gmatch, match, find, lower = string.format, table.concat, string.gmatch, string.match, string.find, string.lower
local tostring, next = tostring, next
-local trace_defining = false trackers.register("fonts.defining", function(v) trace_defining = v end)
+local trace_defining = false trackers .register("fonts.defining", function(v) trace_defining = v end)
+local directive_embedall = false directives.register("fonts.embedall", function(v) directive_embedall = v end)
trackers.register("fonts.loading", "fonts.defining", "otf.loading", "afm.loading", "tfm.loading")
trackers.register("fonts.all", "fonts.*", "otf.*", "afm.*", "tfm.*")
@@ -10692,7 +10703,9 @@ function tfm.read(specification)
end
end
if tfmtable then
- if tfmtable.filename and fonts.dontembed[tfmtable.filename] then
+ if directive_embedall then
+ tfmtable.embedding = "full"
+ elseif tfmtable.filename and fonts.dontembed[tfmtable.filename] then
tfmtable.embedding = "no"
else
tfmtable.embedding = "subset"