summaryrefslogtreecommitdiff
path: root/tex/context/modules/mkiv/s-xml-analyzers.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/modules/mkiv/s-xml-analyzers.lua')
-rw-r--r--tex/context/modules/mkiv/s-xml-analyzers.lua189
1 files changed, 159 insertions, 30 deletions
diff --git a/tex/context/modules/mkiv/s-xml-analyzers.lua b/tex/context/modules/mkiv/s-xml-analyzers.lua
index c356d4c37..6e7f7f2ba 100644
--- a/tex/context/modules/mkiv/s-xml-analyzers.lua
+++ b/tex/context/modules/mkiv/s-xml-analyzers.lua
@@ -11,6 +11,8 @@ moduledata.xml.analyzers = moduledata.xml.analyzers or { }
local next, type = next, type
local utfvalues = string.utfvalues
+local formatters = string.formatters
+local setmetatableindex = table.setmetatableindex
local context = context
local NC, NR, HL, FL, LL, SL, TB = context.NC, context.NR, context.HL, context.TB, context.FL, context.LL, context.SL
local sortedhash, sortedkeys, concat, sequenced = table.sortedhash, table.sortedkeys, table.concat, table.sequenced
@@ -43,28 +45,29 @@ local function analyze(filename)
attr = { }
ents = { }
- table.setmetatableindex(tags,function(t,k)
+ local function att(t,k)
+ local v = setmetatableindex("number")
+ t[k] = v
+ return v
+ end
+
+ local function add(t,k)
local v = {
n = 0,
- attributes = { },
- children = { },
+ attributes = setmetatableindex(att),
+ children = setmetatableindex(add),
}
t[k] = v
return v
- end)
+ end
- table.setmetatableindex(char,function(t,k)
- t[k] = 0
- return 0
- end)
+ setmetatableindex(tags,add)
- table.setmetatableindex(attr,function(t,k)
- char[k] = char[k] or 0
- t[k] = 0
- return 0
- end)
+ setmetatableindex(ents,"number")
+ setmetatableindex(char,"number")
- table.setmetatableindex(ents,function(t,k)
+ setmetatableindex(attr,function(t,k)
+ char[k] = char[k] or 0
t[k] = 0
return 0
end)
@@ -85,24 +88,25 @@ local function analyze(filename)
local tg = e.tg
local tag = tags[tg]
tag.n = tag.n + 1
+ local children = parent and tags[parent].children[tg]
+ local childatt = children and children.attributes
+ if children then
+ children.n = children.n + 1
+ end
if at then
local attributes = tag.attributes
for k, v in next, at do
local a = attributes[k]
- if a then
- a[v] = (a[v] or 0) + 1
- else
- attributes[k] = { [v] = 1 }
+ a[v] = a[v] + 1
+ if childatt then
+ local a = childatt[k]
+ a[v] = a[v] + 1
end
for s in utfvalues(v) do
attr[s] = attr[s] + 1
end
end
end
- if parent then
- local children = tags[parent].children
- children[tg] = (children[tg] or 0) + 1
- end
if dt then
for i=1,#dt do
local d = dt[i]
@@ -119,7 +123,11 @@ local function analyze(filename)
end
for i=1,#filename do
- local root = xml.load(filename[i])
+ local name = filename[i]
+ local root = xml.load(name)
+ --
+ logs.report("xml analyze","loaded: %s",name)
+ --
collect(root)
--
local names = root.statistics.entities.names
@@ -128,10 +136,10 @@ local function analyze(filename)
end
end
- table.setmetatableindex(tags,nil)
- table.setmetatableindex(char,nil)
- table.setmetatableindex(attr,nil)
- table.setmetatableindex(ents,nil)
+ setmetatableindex(tags,nil)
+ setmetatableindex(char,nil)
+ setmetatableindex(attr,nil)
+ setmetatableindex(ents,nil)
end
@@ -153,16 +161,20 @@ function moduledata.xml.analyzers.structure(filename)
NC() context.bold("element") NC() context.darkred(name) NC() NR()
NC() context.bold("frequency") NC() context(data.n) NC() NR()
if next(children) then
- NC() context.bold("children") NC() context.puretext(sequenced(children)) NC() NR()
+ local t = { }
+ for k, v in next, children do
+ t[k] = v.n
+ end
+ NC() context.bold("children") NC() context.puretext(sequenced(t)) NC() NR()
end
if next(attributes) then
NC() context.bold("attributes") NC() context.puretext.darkgreen(concat(sortedkeys(attributes)," ")) NC() NR()
for attribute, values in sortedhash(attributes) do
local n = table.count(values)
if attribute == "id" or attribute == "xml:id" or n > maxnofattributes then
- NC() context(attribute) NC() context("%s different values",n) NC() NR()
+ NC() context("@%s",attribute) NC() context("%s different values",n) NC() NR()
else
- NC() context(attribute) NC() context.puretext(sequenced(values)) NC() NR()
+ NC() context("@%s",attribute) NC() context.puretext(sequenced(values)) NC() NR()
end
end
end
@@ -195,4 +207,121 @@ function moduledata.xml.analyzers.entities(filename)
context.stoptabulate()
end
+local f_parent_s = formatters["xml:%s"]
+local f_parent_n = formatters["\\startxmlsetups xml:%s\n \\xmlflush{#1}\n\\stopxmlsetups"]
+local f_parent_a = formatters["\\startxmlsetups xml:%s\n %% @ % t\n \\xmlflush{#1}\n\\stopxmlsetups"]
+local f_child_s = formatters["xml:%s:%s"]
+local f_child_n = formatters["\\startxmlsetups xml:%s:%s\n \\xmlflush{#1}\n\\stopxmlsetups"]
+local f_child_a = formatters["\\startxmlsetups xml:%s:%s\n %% @ % t\n \\xmlflush{#1}\n\\stopxmlsetups"]
+
+local f_template = formatters [ [[
+%% file: %s
+
+%% Beware, these are all (first level) setups. If you have a complex document
+%% it often makes sense to use \\xmlfilter or similar local filter options.
+
+%% presets
+
+\startxmlsetup xml:presets:all
+ \xmlsetsetups {#1} {
+ %s
+ }
+\stopxmlsetups
+
+%% setups
+
+%s
+]] ]
+
+function moduledata.xml.analyzers.allsetups(filename,usedname)
+ analyze(filename)
+ local result = { }
+ local setups = { }
+ for name, data in table.sortedhash(tags) do
+ local children = data.children
+ local attributes = data.attributes
+ if next(attributes) then
+ result[#result+1] = f_parent_a(name,sortedkeys(attributes))
+ else
+ result[#result+1] = f_parent_n(name)
+ end
+ setups[#setups+1] = f_parent_s(name)
+ if next(children) then
+ for k, v in sortedhash(children) do
+ local attributes = v.attributes
+ if next(attributes) then
+ result[#result+1] = f_child_a(name,k,sortedkeys(attributes))
+ else
+ result[#result+1] = f_child_n(name,k)
+ end
+ setups[#setups+1] = f_child_s(name,k)
+ end
+ end
+ end
+ table.sort(setups)
+ --
+ if type(filename) == "table" then
+ filename = concat(filename," | ")
+ end
+ --
+ usedname = usedname or "xml-analyze-template.tex"
+ --
+ io.savedata(usedname,f_template(filename,concat(setups,"|\n "),concat(result,"\n\n")))
+ logs.report("xml analyze","presets saved in: %s",usedname)
+end
+
+-- example:
+
+-- local t = { }
+-- local x = xml.load("music-collection.xml")
+-- for c in xml.collected(x,"//*") do
+-- if not c.special and not t[c.tg] then
+-- t[c.tg] = true
+-- end
+-- end
+-- inspect(table.sortedkeys(t))
+
+-- xml.finalizers.taglist = function(collected)
+-- local t = { }
+-- for i=1,#collected do
+-- local c = collected[i]
+-- if not c.special then
+-- local tg = c.tg
+-- if tg and not t[tg] then
+-- t[tg] = true
+-- end
+-- end
+-- end
+-- return t
+-- end
+-- local x = xml.load("music-collection.xml")
+-- inspect(table.sortedkeys(xml.applylpath(x,"//*/taglist()")))
+
+-- xml.finalizers.taglist = function(collected,parenttoo)
+-- local t = { }
+-- for i=1,#collected do
+-- local c = collected[i]
+-- if not c.special then
+-- local tg = c.tg
+-- if tg and not t[tg] then
+-- t[tg] = true
+-- end
+-- if parenttoo then
+-- local p = c.__p__
+-- if p and not p.special then
+-- local tg = p.tg .. ":" .. tg
+-- if tg and not t[tg] then
+-- t[tg] = true
+-- end
+-- end
+-- end
+-- end
+-- end
+-- return t
+-- end
+
+-- local x = xml.load("music-collection.xml")
+-- inspect(table.sortedkeys(xml.applylpath(x,"//*/taglist()")))
+-- local x = xml.load("music-collection.xml")
+-- inspect(table.sortedkeys(xml.applylpath(x,"//*/taglist(true)")))