summaryrefslogtreecommitdiff
path: root/tex/context/base/lxml-tex.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/lxml-tex.lua')
-rw-r--r--tex/context/base/lxml-tex.lua614
1 files changed, 502 insertions, 112 deletions
diff --git a/tex/context/base/lxml-tex.lua b/tex/context/base/lxml-tex.lua
index 2cbdfc886..550a06a18 100644
--- a/tex/context/base/lxml-tex.lua
+++ b/tex/context/base/lxml-tex.lua
@@ -11,7 +11,7 @@ if not modules then modules = { } end modules ['lxml-tex'] = {
-- be an cldf-xml helper library.
local utfchar = utf.char
-local concat, insert, remove = table.concat, table.insert, table.remove
+local concat, insert, remove, sortedkeys = table.concat, table.insert, table.remove, table.sortedkeys
local format, sub, gsub, find, gmatch, match = string.format, string.sub, string.gsub, string.find, string.gmatch, string.match
local type, next, tonumber, tostring, select = type, next, tonumber, tostring, select
local lpegmatch = lpeg.match
@@ -19,40 +19,66 @@ local P, S, C, Cc = lpeg.P, lpeg.S, lpeg.C, lpeg.Cc
local tex, xml = tex, xml
local lowerchars, upperchars, lettered = characters.lower, characters.upper, characters.lettered
+local basename, dirname, joinfile = file.basename, file.dirname, file.join
lxml = lxml or { }
local lxml = lxml
-local catcodenumbers = catcodes.numbers
-local ctxcatcodes = catcodenumbers.ctxcatcodes -- todo: use different method
-local notcatcodes = catcodenumbers.notcatcodes -- todo: use different method
-
-local commands = commands
-local context = context
-local contextsprint = context.sprint -- with catcodes (here we use fast variants, but with option for tracing)
-
-local xmlelements, xmlcollected, xmlsetproperty = xml.elements, xml.collected, xml.setproperty
-local xmlwithelements = xml.withelements
-local xmlserialize, xmlcollect, xmltext, xmltostring = xml.serialize, xml.collect, xml.text, xml.tostring
-local xmlapplylpath = xml.applylpath
-local xmlunprivatized, xmlprivatetoken, xmlprivatecodes = xml.unprivatized, xml.privatetoken, xml.privatecodes
-
-local variables = (interfaces and interfaces.variables) or { }
-
-local insertbeforevalue, insertaftervalue = utilities.tables.insertbeforevalue, utilities.tables.insertaftervalue
-
-local starttiming, stoptiming = statistics.starttiming, statistics.stoptiming
-
-local trace_setups = false trackers.register("lxml.setups", function(v) trace_setups = v end)
-local trace_loading = false trackers.register("lxml.loading", function(v) trace_loading = v end)
-local trace_access = false trackers.register("lxml.access", function(v) trace_access = v end)
-local trace_comments = false trackers.register("lxml.comments", function(v) trace_comments = v end)
-local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end)
-
-local report_lxml = logs.reporter("xml","tex")
-local report_xml = logs.reporter("xml","tex")
-
-local forceraw, rawroot = false, nil
+local catcodenumbers = catcodes.numbers
+local ctxcatcodes = catcodenumbers.ctxcatcodes -- todo: use different method
+local notcatcodes = catcodenumbers.notcatcodes -- todo: use different method
+
+local commands = commands
+local context = context
+local contextsprint = context.sprint -- with catcodes (here we use fast variants, but with option for tracing)
+
+local implement = interfaces.implement
+
+local xmlelements = xml.elements
+local xmlcollected = xml.collected
+local xmlsetproperty = xml.setproperty
+local xmlwithelements = xml.withelements
+local xmlserialize = xml.serialize
+local xmlcollect = xml.collect
+local xmltext = xml.text
+local xmltostring = xml.tostring
+local xmlapplylpath = xml.applylpath
+local xmlunprivatized = xml.unprivatized
+local xmlprivatetoken = xml.privatetoken
+local xmlprivatecodes = xml.privatecodes
+local xmlstripelement = xml.stripelement
+local xmlinclusion = xml.inclusion
+local xmlinclusions = xml.inclusions
+local xmlbadinclusions = xml.badinclusions
+local xmlcontent = xml.content
+
+local variables = interfaces and interfaces.variables or { }
+
+local settings_to_hash = utilities.parsers.settings_to_hash
+local settings_to_set = utilities.parsers.settings_to_set
+local options_to_hash = utilities.parsers.options_to_hash
+local options_to_array = utilities.parsers.options_to_array
+
+local insertbeforevalue = utilities.tables.insertbeforevalue
+local insertaftervalue = utilities.tables.insertaftervalue
+
+local resolveprefix = resolvers.resolve
+
+local starttiming = statistics.starttiming
+local stoptiming = statistics.stoptiming
+
+local trace_setups = false trackers.register("lxml.setups", function(v) trace_setups = v end)
+local trace_loading = false trackers.register("lxml.loading", function(v) trace_loading = v end)
+local trace_access = false trackers.register("lxml.access", function(v) trace_access = v end)
+local trace_comments = false trackers.register("lxml.comments", function(v) trace_comments = v end)
+local trace_entities = false trackers.register("xml.entities", function(v) trace_entities = v end)
+local trace_selectors = false trackers.register("lxml.selectors",function(v) trace_selectors = v end)
+
+local report_lxml = logs.reporter("lxml","tex")
+local report_xml = logs.reporter("xml","tex")
+
+local forceraw = false
+local forceraw = nil
-- tex entities
--
@@ -62,7 +88,7 @@ lxml.entities = lxml.entities or { }
storage.register("lxml/entities",lxml.entities,"lxml.entities")
---~ xml.placeholders.unknown_any_entity = nil -- has to be per xml
+-- xml.placeholders.unknown_any_entity = nil -- has to be per xml
local xmlentities = xml.entities
local texentities = lxml.entities
@@ -351,7 +377,7 @@ end
function lxml.checkindex(name)
local root = getid(name)
- return (root and root.index) or 0
+ return root and root.index or 0
end
function lxml.withindex(name,n,command) -- will change as name is always there now
@@ -414,7 +440,7 @@ function lxml.convert(id,data,entities,compress,currentresource)
end
function lxml.load(id,filename,compress,entities)
- filename = commands.preparedfile(filename) -- not commands!
+ filename = ctxrunner.preparedfile(filename)
if trace_loading then
report_lxml("loading file %a as %a",filename,id)
end
@@ -433,16 +459,43 @@ function lxml.register(id,xmltable,filename)
return xmltable
end
-function lxml.include(id,pattern,attribute,recurse)
+-- recurse prepare rootpath resolve basename
+
+local options_true = { "recurse", "prepare", "rootpath" }
+local options_nil = { "prepare", "rootpath" }
+
+function lxml.include(id,pattern,attribute,options)
starttiming(xml)
local root = getid(id)
- xml.include(root,pattern,attribute,recurse,function(filename)
+ if options == true then
+ -- downward compatible
+ options = options_true
+ elseif not options then
+ -- downward compatible
+ options = options_nil
+ else
+ options = settings_to_hash(options) or { }
+ end
+ xml.include(root,pattern,attribute,options.recurse,function(filename)
if filename then
- filename = commands.preparedfile(filename)
- if file.dirname(filename) == "" and root.filename then
- local dn = file.dirname(root.filename)
- if dn ~= "" then
- filename = file.join(dn,filename)
+ -- preprocessing
+ if options.prepare then
+ filename = commands.preparedfile(filename)
+ end
+ -- handy if we have a flattened structure
+ if options.basename then
+ filename = basename(filename)
+ end
+ if options.resolve then
+ filename = resolveprefix(filename) or filename
+ end
+ -- some protection
+ if options.rootpath then
+ if dirname(filename) == "" and root.filename then
+ local dn = dirname(root.filename)
+ if dn ~= "" then
+ filename = joinfile(dn,filename)
+ end
end
end
if trace_loading then
@@ -457,6 +510,31 @@ function lxml.include(id,pattern,attribute,recurse)
stoptiming(xml)
end
+function lxml.inclusion(id,default)
+ local inclusion = xmlinclusion(getid(id),default)
+ if inclusion then
+ context(inclusion)
+ end
+end
+
+function lxml.inclusions(id,sorted)
+ local inclusions = xmlinclusions(getid(id),sorted)
+ if inclusions then
+ context(concat(inclusions,","))
+ end
+end
+
+function lxml.badinclusions(id,sorted)
+ local badinclusions = xmlbadinclusions(getid(id),sorted)
+ if badinclusions then
+ context(concat(badinclusions,","))
+ end
+end
+
+function lxml.save(id,name)
+ xml.save(getid(id),name)
+end
+
function xml.getbuffer(name,compress,entities) -- we need to make sure that commands are processed
if not name or name == "" then
name = tex.jobname
@@ -538,30 +616,49 @@ local function tex_element(e,handlers)
end
end
+-- <?context-directive foo ... ?>
+-- <?context-foo-directive ... ?>
+
local pihandlers = { } xml.pihandlers = pihandlers
-local category = P("context-") * C((1-P("-"))^1) * P("-directive")
local space = S(" \n\r")
local spaces = space^0
local class = C((1-space)^0)
local key = class
+local rest = C(P(1)^0)
local value = C(P(1-(space * -1))^0)
-
-local parser = category * spaces * class * spaces * key * spaces * value
-
-pihandlers[#pihandlers+1] = function(str)
- if str then
- local a, b, c, d = lpegmatch(parser,str)
- if d then
- contextsprint(ctxcatcodes,"\\xmlcontextdirective{",a,"}{",b,"}{",c,"}{",d,"}")
+local category = P("context-") * (
+ C((1-P("-"))^1) * P("-directive")
+ + P("directive") * spaces * key
+ )
+
+local c_parser = category * spaces * value -- rest
+local k_parser = class * spaces * key * spaces * rest --value
+
+implement {
+ name = "xmlinstalldirective",
+ arguments = { "string", "string" },
+ actions = function(name,csname)
+ if csname then
+ local keyvalueparser = k_parser / context[csname]
+ local keyvaluechecker = function(category,rest,e)
+ lpegmatch(keyvalueparser,rest)
+ end
+ pihandlers[name] = keyvaluechecker
end
end
-end
+}
local function tex_pi(e,handlers)
local str = e.dt[1]
- for i=1,#pihandlers do
- pihandlers[i](str)
+ if str and str ~= "" then
+ local category, rest = lpegmatch(c_parser,str)
+ if category and rest and #rest > 0 then
+ local handler = pihandlers[category]
+ if handler then
+ handler(category,rest,e)
+ end
+ end
end
end
@@ -915,16 +1012,18 @@ function lxml.setsetup(id,pattern,setup)
end
end
end
+ elseif setup == "-" then
+ for c=1,nc do
+ collected[c].command = false
+ end
+ elseif setup == "+" then
+ for c=1,nc do
+ collected[c].command = true
+ end
else
for c=1,nc do
local e = collected[c]
- if setup == "-" then
- e.command = false
- elseif setup == "+" then
- e.command = true
- else
- e.command = e.tg
- end
+ e.command = e.tg
end
end
elseif trace_setups then
@@ -967,16 +1066,18 @@ function lxml.setsetup(id,pattern,setup)
end
end
end
+ elseif b == "-" then
+ for c=1,nc do
+ collected[c].command = false
+ end
+ elseif b == "+" then
+ for c=1,nc do
+ collected[c].command = true
+ end
else
for c=1,nc do
local e = collected[c]
- if b == "-" then
- e.command = false
- elseif b == "+" then
- e.command = true
- else
- e.command = a .. e.tg
- end
+ e.command = a .. e.tg
end
end
elseif trace_setups then
@@ -1112,11 +1213,13 @@ local function command(collected,cmd,otherwise)
local e = collected[c]
local ix = e.ix
local name = e.name
- if not ix then
+ if name and not ix then
lxml.addindex(name,false,true)
ix = e.ix
end
- if wildcard then
+ if not ix or not name then
+ report_lxml("no valid node index for element %a using command %s",name or "?",cmd)
+ elseif wildcard then
contextsprint(ctxcatcodes,"\\xmlw{",(gsub(cmd,"%*",e.tg)),"}{",name,"::",ix,"}")
else
contextsprint(ctxcatcodes,"\\xmlw{",cmd,"}{",name,"::",ix,"}")
@@ -1186,7 +1289,7 @@ local function stripped(collected) -- tricky as we strip in place
local nc = #collected
if nc > 0 then
for c=1,nc do
- cprint(xml.stripelement(collected[c]))
+ cprint(xmlstripelement(collected[c]))
end
end
end
@@ -1311,10 +1414,11 @@ function texfinalizers.name(collected,n)
c = collected[nc-n+1]
end
if c then
- if c.ns == "" then
+ local ns = c.ns
+ if not ns or ns == "" then
contextsprint(ctxcatcodes,c.tg)
else
- contextsprint(ctxcatcodes,c.ns,":",c.tg)
+ contextsprint(ctxcatcodes,ns,":",c.tg)
end
end
end
@@ -1327,11 +1431,11 @@ function texfinalizers.tags(collected,nonamespace)
if nc > 0 then
for c=1,nc do
local e = collected[c]
- local ns, tg = e.ns, e.tg
- if nonamespace or ns == "" then
- contextsprint(ctxcatcodes,tg)
+ local ns = e.ns
+ if nonamespace or (not ns or ns == "") then
+ contextsprint(ctxcatcodes,e.tg)
else
- contextsprint(ctxcatcodes,ns,":",tg)
+ contextsprint(ctxcatcodes,ns,":",e.tg)
end
end
end
@@ -1341,11 +1445,10 @@ end
--
local function verbatim(id,before,after)
- local root = getid(id)
- if root then
- if before then contextsprint(ctxcatcodes,before,"[",root.tg or "?","]") end
- lxml.toverbatim(xmltostring(root.dt))
---~ lxml.toverbatim(xml.totext(root.dt))
+ local e = getid(id)
+ if e then
+ if before then contextsprint(ctxcatcodes,before,"[",e.tg or "?","]") end
+ lxml.toverbatim(xmltostring(e.dt)) -- lxml.toverbatim(xml.totext(e.dt))
if after then contextsprint(ctxcatcodes,after) end
end
end
@@ -1429,7 +1532,7 @@ end
lxml.content = text
function lxml.position(id,pattern,n)
- position(xmlapplylpath(getid(id),pattern),n)
+ position(xmlapplylpath(getid(id),pattern),tonumber(n))
end
function lxml.chainattribute(id,pattern,a,default)
@@ -1445,72 +1548,136 @@ function lxml.concat(id,pattern,separator,lastseparator,textonly)
end
function lxml.element(id,n)
- position(xmlapplylpath(getid(id),"/*"),n)
+ position(xmlapplylpath(getid(id),"/*"),tonumber(n)) -- tonumber handy
end
lxml.index = lxml.position
function lxml.pos(id)
- local root = getid(id)
- contextsprint(ctxcatcodes,(root and root.ni) or 0)
-end
+ local e = getid(id)
+ contextsprint(ctxcatcodes,e and e.ni or 0)
+end
+
+-- function lxml.att(id,a,default)
+-- local root = getid(id)
+-- if root then
+-- local at = root.at
+-- local str = (at and at[a]) or default
+-- if str and str ~= "" then
+-- contextsprint(notcatcodes,str)
+-- end
+-- elseif default then
+-- contextsprint(notcatcodes,default)
+-- end
+-- end
+--
+-- no need for an assignment so:
function lxml.att(id,a,default)
- local root = getid(id)
- if root then
- local at = root.at
- local str = (at and at[a]) or default
- if str and str ~= "" then
- contextsprint(notcatcodes,str)
+ local e = getid(id)
+ if e then
+ local at = e.at
+ if at then
+ -- normally always true
+ local str = at[a]
+ if not str then
+ if default and default ~= "" then
+ contextsprint(notcatcodes,default)
+ end
+ elseif str ~= "" then
+ contextsprint(notcatcodes,str)
+ else
+ -- explicit empty is valid
+ end
+ elseif default and default ~= "" then
+ contextsprint(notcatcodes,default)
end
- elseif default then
+ elseif default and default ~= "" then
contextsprint(notcatcodes,default)
end
end
+function lxml.refatt(id,a)
+ local e = getid(id)
+ if e then
+ local at = e.at
+ if at then
+ local str = at[a]
+ if str and str ~= "" then
+ str = gsub(str,"^#+","")
+ if str ~= "" then
+ contextsprint(notcatcodes,str)
+ end
+ end
+ end
+ end
+end
+
function lxml.name(id) -- or remapped name? -> lxml.info, combine
- local r = getid(id)
- local ns = r.rn or r.ns or ""
- if ns ~= "" then
- contextsprint(ctxcatcodes,ns,":",r.tg)
- else
- contextsprint(ctxcatcodes,r.tg)
+ local e = getid(id)
+ if e then
+ local ns = e.rn or e.ns
+ if ns and ns ~= "" then
+ contextsprint(ctxcatcodes,ns,":",e.tg)
+ else
+ contextsprint(ctxcatcodes,e.tg)
+ end
end
end
function lxml.match(id) -- or remapped name? -> lxml.info, combine
- contextsprint(ctxcatcodes,getid(id).mi or 0)
+ local e = getid(id)
+ contextsprint(ctxcatcodes,e and e.mi or 0)
end
function lxml.tag(id) -- tag vs name -> also in l-xml tag->name
- contextsprint(ctxcatcodes,getid(id).tg or "")
+ local e = getid(id)
+ if e then
+ local tg = e.tg
+ if tg and tg ~= "" then
+ contextsprint(ctxcatcodes,tg)
+ end
+ end
end
function lxml.namespace(id) -- or remapped name?
- local root = getid(id)
- contextsprint(ctxcatcodes,root.rn or root.ns or "")
+ local e = getid(id)
+ if e then
+ local ns = e.rn or e.ns
+ if ns and ns ~= "" then
+ contextsprint(ctxcatcodes,ns)
+ end
+ end
end
function lxml.flush(id)
- id = getid(id)
- local dt = id and id.dt
- if dt then
- xmlsprint(dt)
+ local e = getid(id)
+ if e then
+ local dt = e.dt
+ if dt then
+ xmlsprint(dt)
+ end
end
end
function lxml.snippet(id,i)
local e = getid(id)
if e then
- local edt = e.dt
- if edt then
- xmlsprint(edt[i])
+ local dt = e.dt
+ if dt then
+ local dti = dt[i]
+ if dti then
+ xmlsprint(dti)
+ end
end
end
end
function lxml.direct(id)
- xmlsprint(getid(id))
+ local e = getid(id)
+ if e then
+ xmlsprint(e)
+ end
end
function lxml.command(id,pattern,cmd)
@@ -1562,7 +1729,20 @@ function lxml.doifelsetext (id,pattern) doifelse(not empty(getid(id),pattern)) e
-- special case: "*" and "" -> self else lpath lookup
---~ function lxml.doifelseempty(id,pattern) doifelse(isempty(getid(id),pattern ~= "" and pattern ~= nil)) end -- not yet done, pattern
+local function checkedempty(id,pattern)
+ local e = getid(id)
+ if not pattern or pattern == "" then
+ local dt = e.dt
+ local nt = #dt
+ return (nt == 0) or (nt == 1 and dt[1] == "")
+ else
+ return isempty(getid(id),pattern)
+ end
+end
+
+function lxml.doifempty (id,pattern) doif (checkedempty(id,pattern)) end
+function lxml.doifnotempty (id,pattern) doifnot (checkedempty(id,pattern)) end
+function lxml.doifelseempty(id,pattern) doifelse(checkedempty(id,pattern)) end
-- status info
@@ -1690,3 +1870,213 @@ end
texfinalizers.upperall = xmlfinalizers.upperall
texfinalizers.lowerall = xmlfinalizers.lowerall
+
+function lxml.tobuffer(id,pattern,name,unescaped)
+ local collected = xmlapplylpath(getid(id),pattern)
+ if collected then
+ if unescaped then
+ collected = xmlcontent(collected[1]) -- expanded entities !
+ else
+ collected = tostring(collected[1])
+ end
+ buffers.assign(name,collected)
+ else
+ buffers.erase(name)
+ end
+end
+
+-- relatively new:
+
+local permitted = nil
+local ctx_xmlinjector = context.xmlinjector
+
+xml.pihandlers["injector"] = function(category,rest,e)
+ local options = options_to_array(rest)
+ local action = options[1]
+ if not action then
+ return
+ end
+ local n = #options
+ if n > 1 then
+ local category = options[2]
+ if category == "*" then
+ ctx_xmlinjector(action)
+ elseif permitted then
+ if n == 2 then
+ if permitted[category] then
+ ctx_xmlinjector(action)
+ end
+ else
+ for i=2,n do
+ local category = options[i]
+ if category == "*" or permitted[category] then
+ ctx_xmlinjector(action)
+ return
+ end
+ end
+ end
+ end
+ else
+ ctx_xmlinjector(action)
+ end
+end
+
+local pattern = P("context-") * C((1-lpeg.patterns.whitespace)^1) * C(P(1)^1)
+
+function lxml.applyselectors(id)
+ local root = getid(id)
+ local function filter(e)
+ local dt = e.dt
+ local ndt = #dt
+ local done = false
+ local i = 1
+ while i <= ndt do
+ local dti = dt[i]
+ if type(dti) == "table" then
+ if dti.tg == "@pi@" then
+ local text = dti.dt[1]
+ local what, rest = lpegmatch(pattern,text)
+ if what == "select" then
+ local categories = options_to_hash(rest)
+ if categories["begin"] then
+ local okay = false
+ for k, v in next, permitted do
+ if categories[k] then
+ okay = k
+ break
+ end
+ end
+ if not trace_selectors then
+ -- skip
+ elseif okay then
+ report_lxml("accepting selector: %s",okay)
+ else
+ categories.begin = false
+ report_lxml("rejecting selector: % t",sortedkeys(categories))
+ end
+ for j=i,ndt do
+ local dtj = dt[j]
+ if type(dtj) == "table" then
+ local tg = dtj.tg
+ if tg == "@pi@" then
+ local text = dtj.dt[1]
+ local what, rest = lpegmatch(pattern,text)
+ if what == "select" then
+ local categories = options_to_hash(rest)
+ if categories["end"] then
+ i = j
+ break
+ else
+ -- error
+ end
+ end
+ elseif not okay then
+ dtj.tg = "@cm@"
+ end
+ else
+-- dt[j] = "" -- okay ?
+ end
+ end
+ end
+ elseif what == "include" then
+ local categories = options_to_hash(rest)
+ if categories["begin"] then
+ local okay = false
+ for k, v in next, permitted do
+ if categories[k] then
+ okay = k
+ break
+ end
+ end
+ if not trace_selectors then
+ -- skip
+ elseif okay then
+ report_lxml("accepting include: %s",okay)
+ else
+ categories.begin = false
+ report_lxml("rejecting include: % t",sortedkeys(categories))
+ end
+ if okay then
+ for j=i,ndt do
+ local dtj = dt[j]
+ if type(dtj) == "table" then
+ local tg = dtj.tg
+ if tg == "@cm@" then
+ local content = dtj.dt[1]
+ local element = root and xml.toelement(content,root)
+ dt[j] = element
+ element.__p__ = dt -- needs checking
+ done = true
+ elseif tg == "@pi@" then
+ local text = dtj.dt[1]
+ local what, rest = lpegmatch(pattern,text)
+ if what == "include" then
+ local categories = options_to_hash(rest)
+ if categories["end"] then
+ i = j
+ break
+ else
+ -- error
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ else
+ filter(dti)
+ end
+ end
+ if done then
+ -- probably not needed
+ xml.reindex(dt)
+ end
+ end
+ i = i + 1
+ end
+ end
+ xmlwithelements(root,filter)
+end
+
+function xml.setinjectors(set)
+ local s = settings_to_set(set)
+ if permitted then
+ for k, v in next, s do
+ permitted[k] = true
+ end
+ else
+ permitted = s
+ end
+end
+
+function xml.resetinjectors(set)
+ if permitted and set and set ~= "" then
+ local s = settings_to_set(set)
+ for k, v in next, s do
+ if v then
+ permitted[k] = nil
+ end
+ end
+ else
+ permitted = nil
+ end
+end
+
+implement {
+ name = "xmlsetinjectors",
+ actions = xml.setinjectors,
+ arguments = "string"
+}
+
+implement {
+ name = "xmlresetinjectors",
+ actions = xml.resetinjectors,
+ arguments = "string"
+}
+
+implement {
+ name = "xmlapplyselectors",
+ actions = lxml.applyselectors,
+ arguments = "string"
+}