summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/strc-tag.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkiv/strc-tag.lua')
-rw-r--r--tex/context/base/mkiv/strc-tag.lua516
1 files changed, 516 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/strc-tag.lua b/tex/context/base/mkiv/strc-tag.lua
new file mode 100644
index 000000000..8f2e18978
--- /dev/null
+++ b/tex/context/base/mkiv/strc-tag.lua
@@ -0,0 +1,516 @@
+if not modules then modules = { } end modules ['strc-tag'] = {
+ version = 1.001,
+ comment = "companion to strc-tag.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- This is rather experimental code. Tagging happens on the fly and there are two analysers
+-- involved: the pdf backend tagger and the exporter. They share data but there are subtle
+-- differences. Each tag carries a specification and these can be accessed by attribute (the
+-- end of the chain tag) or by so called fullname which is a tagname combined with a number.
+
+local type, next = type, next
+local insert, remove, unpack, concat, merge = table.insert, table.remove, table.unpack, table.concat, table.merge
+local find, topattern, format = string.find, string.topattern, string.format
+local lpegmatch, P, S, C, Cc = lpeg.match, lpeg.P, lpeg.S, lpeg.C, lpeg.Cc
+local texattribute = tex.attribute
+local allocate = utilities.storage.allocate
+local settings_to_hash = utilities.parsers.settings_to_hash
+local setmetatableindex = table.setmetatableindex
+
+local trace_tags = false trackers.register("structures.tags", function(v) trace_tags = v end)
+
+local report_tags = logs.reporter("structure","tags")
+
+local attributes = attributes
+local structures = structures
+local implement = interfaces.implement
+
+local a_tagged = attributes.private('tagged')
+
+local unsetvalue = attributes.unsetvalue
+local codeinjections = backends.codeinjections
+
+local taglist = allocate() -- access by attribute
+local specifications = allocate() -- access by fulltag
+local labels = allocate()
+local stack = { }
+local chain = { }
+local ids = { }
+local enabled = false
+local tagcontext = { }
+local tagpatterns = { }
+local lasttags = { }
+local stacksize = 0
+local metadata = nil -- applied to the next element
+local documentdata = { }
+
+local tags = structures.tags
+tags.taglist = taglist -- can best be hidden
+tags.labels = labels
+tags.patterns = tagpatterns
+tags.specifications = specifications
+
+-- Tags are internally stored as:
+--
+-- tag>number tag>number tag>number
+
+local p_splitter = C((1-S(">"))^1) * P(">") * C(P(1)^1)
+tagpatterns.splitter = p_splitter
+
+local properties = allocate {
+
+ document = { pdf = "Div", nature = "display" },
+
+ division = { pdf = "Div", nature = "display" },
+ paragraph = { pdf = "P", nature = "mixed" },
+ p = { pdf = "P", nature = "mixed" },
+ construct = { pdf = "Span", nature = "inline" },
+ highlight = { pdf = "Span", nature = "inline" },
+
+ section = { pdf = "Sect", nature = "display" },
+ sectiontitle = { pdf = "H", nature = "mixed" },
+ sectionnumber = { pdf = "H", nature = "mixed" },
+ sectioncontent = { pdf = "Div", nature = "display" },
+
+ itemgroup = { pdf = "L", nature = "display" },
+ item = { pdf = "LI", nature = "display" },
+ itemtag = { pdf = "Lbl", nature = "mixed" },
+ itemcontent = { pdf = "LBody", nature = "mixed" },
+ itemhead = { pdf = "Div", nature = "display" },
+ itembody = { pdf = "Div", nature = "display" },
+
+ description = { pdf = "Div", nature = "display" },
+ descriptiontag = { pdf = "Div", nature = "mixed" },
+ descriptioncontent = { pdf = "Div", nature = "mixed" },
+ descriptionsymbol = { pdf = "Span", nature = "inline" }, -- note reference
+
+ verbatimblock = { pdf = "Code", nature = "display" },
+ verbatimlines = { pdf = "Code", nature = "display" },
+ verbatimline = { pdf = "Code", nature = "mixed" },
+ verbatim = { pdf = "Code", nature = "inline" },
+
+ lines = { pdf = "Code", nature = "display" },
+ line = { pdf = "Code", nature = "mixed" },
+
+ synonym = { pdf = "Span", nature = "inline" },
+ sorting = { pdf = "Span", nature = "inline" },
+
+ register = { pdf = "Div", nature = "display" },
+ registerlocation = { pdf = "Span", nature = "inline" },
+ registersection = { pdf = "Div", nature = "display" },
+ registertag = { pdf = "Span", nature = "mixed" },
+ registerentries = { pdf = "Div", nature = "display" },
+ registerentry = { pdf = "Div", nature = "display" },
+ registercontent = { pdf = "Span", nature = "mixed" },
+ registersee = { pdf = "Span", nature = "mixed" },
+ registerpages = { pdf = "Span", nature = "mixed" },
+ registerpage = { pdf = "Span", nature = "mixed" },
+ registerseparator = { pdf = "Span", nature = "inline" },
+ registerpagerange = { pdf = "Span", nature = "mixed" },
+
+ table = { pdf = "Table", nature = "display" },
+ tablerow = { pdf = "TR", nature = "display" },
+ tablecell = { pdf = "TD", nature = "mixed" },
+
+ tabulate = { pdf = "Table", nature = "display" },
+ tabulaterow = { pdf = "TR", nature = "display" },
+ tabulatecell = { pdf = "TD", nature = "mixed" },
+
+ list = { pdf = "TOC", nature = "display" },
+ listitem = { pdf = "TOCI", nature = "display" },
+ listtag = { pdf = "Lbl", nature = "mixed" },
+ listcontent = { pdf = "P", nature = "mixed" },
+ listdata = { pdf = "P", nature = "mixed" },
+ listpage = { pdf = "Reference", nature = "mixed" },
+
+ delimitedblock = { pdf = "BlockQuote", nature = "display" },
+ delimited = { pdf = "Quote", nature = "inline" },
+ delimitedcontent = { pdf = "Span", nature = "inline" },
+ delimitedsymbol = { pdf = "Span", nature = "inline" },
+ subsentence = { pdf = "Span", nature = "inline" },
+ subsentencecontent = { pdf = "Span", nature = "inline" },
+ subsentencesymbol = { pdf = "Span", nature = "inline" },
+
+ label = { pdf = "Span", nature = "mixed" },
+ number = { pdf = "Span", nature = "mixed" },
+
+ float = { pdf = "Div", nature = "display" }, -- Figure
+ floatcaption = { pdf = "Caption", nature = "mixed" },
+ floatlabel = { pdf = "Span", nature = "inline" },
+ floatnumber = { pdf = "Span", nature = "inline" },
+ floattext = { pdf = "Span", nature = "mixed" },
+ floatcontent = { pdf = "P", nature = "mixed" },
+
+ image = { pdf = "P", nature = "mixed" },
+ mpgraphic = { pdf = "P", nature = "mixed" },
+
+ formulaset = { pdf = "Div", nature = "display" },
+ formula = { pdf = "Div", nature = "display" }, -- Formula
+ formulacaption = { pdf = "Span", nature = "mixed" },
+ formulalabel = { pdf = "Span", nature = "mixed" },
+ formulanumber = { pdf = "Span", nature = "mixed" },
+ formulacontent = { pdf = "P", nature = "display" },
+ subformula = { pdf = "Div", nature = "display" },
+
+ link = { pdf = "Link", nature = "inline" },
+
+ margintextblock = { pdf = "Span", nature = "inline" },
+ margintext = { pdf = "Span", nature = "inline" },
+
+ math = { pdf = "Div", nature = "inline" }, -- no display
+ mn = { pdf = "Span", nature = "mixed" },
+ mi = { pdf = "Span", nature = "mixed" },
+ mo = { pdf = "Span", nature = "mixed" },
+ ms = { pdf = "Span", nature = "mixed" },
+ mrow = { pdf = "Span", nature = "display" },
+ msubsup = { pdf = "Span", nature = "display" },
+ msub = { pdf = "Span", nature = "display" },
+ msup = { pdf = "Span", nature = "display" },
+ merror = { pdf = "Span", nature = "mixed" },
+ munderover = { pdf = "Span", nature = "display" },
+ munder = { pdf = "Span", nature = "display" },
+ mover = { pdf = "Span", nature = "display" },
+ mtext = { pdf = "Span", nature = "mixed" },
+ mfrac = { pdf = "Span", nature = "display" },
+ mroot = { pdf = "Span", nature = "display" },
+ msqrt = { pdf = "Span", nature = "display" },
+ mfenced = { pdf = "Span", nature = "display" },
+ maction = { pdf = "Span", nature = "display" },
+
+ mstacker = { pdf = "Span", nature = "display" }, -- these are only internally used
+ mstackertop = { pdf = "Span", nature = "display" }, -- these are only internally used
+ mstackerbot = { pdf = "Span", nature = "display" }, -- these are only internally used
+ mstackermid = { pdf = "Span", nature = "display" }, -- these are only internally used
+
+ mtable = { pdf = "Table", nature = "display" }, -- might change
+ mtr = { pdf = "TR", nature = "display" }, -- might change
+ mtd = { pdf = "TD", nature = "display" }, -- might change
+
+ ignore = { pdf = "Span", nature = "mixed" }, -- used internally
+ private = { pdf = "Span", nature = "mixed" }, -- for users (like LS) when they need it
+ metadata = { pdf = "Div", nature = "display" },
+ metavariable = { pdf = "Span", nature = "mixed" },
+
+ mid = { pdf = "Span", nature = "inline" },
+ sub = { pdf = "Span", nature = "inline" },
+ sup = { pdf = "Span", nature = "inline" },
+ subsup = { pdf = "Span", nature = "inline" },
+
+ combination = { pdf = "Span", nature = "display" },
+ combinationpair = { pdf = "Span", nature = "display" },
+ combinationcontent = { pdf = "Span", nature = "mixed" },
+ combinationcaption = { pdf = "Span", nature = "mixed" },
+}
+
+tags.properties = properties
+
+local patterns = setmetatableindex(function(t,tag)
+ local v = topattern("^" .. tag .. ">")
+ t[tag] = v
+ return v
+end)
+
+function tags.locatedtag(tag)
+ local attribute = texattribute[a_tagged]
+ if attribute >= 0 then
+ local specification = taglist[attribute]
+ if specification then
+ local taglist = specification.taglist
+ local pattern = patterns[tag]
+ for i=#taglist,1,-1 do
+ local t = taglist[i]
+ if find(t,pattern) then
+ return t
+ end
+ end
+ end
+ else
+ -- enabled but not auto
+ end
+ return false -- handy as bogus index
+end
+
+function structures.atlocation(str)
+ local specification = taglist[texattribute[a_tagged]]
+ if specification then
+ if list then
+ local taglist = specification.taglist
+ local pattern = patterns[str]
+ for i=#list,1,-1 do
+ if find(list[i],pattern) then
+ return true
+ end
+ end
+ end
+ end
+end
+
+function tags.setproperty(tag,key,value)
+ local p = properties[tag]
+ if p then
+ p[key] = value
+ else
+ properties[tag] = { [key] = value }
+ end
+end
+
+function tags.setaspect(key,value)
+ local tag = chain[stacksize]
+ if tag then
+ local p = properties[tag]
+ if p then
+ p[key] = value
+ else
+ properties[tag] = { [key] = value }
+ end
+ end
+end
+
+function tags.registermetadata(data)
+ local d = settings_to_hash(data)
+ if #chain > 1 then
+ if metadata then
+ merge(metadata,d)
+ else
+ metadata = d
+ end
+ else
+ merge(documentdata,d)
+ end
+end
+
+function tags.start(tag,specification)
+ if not enabled then
+ codeinjections.enabletags()
+ enabled = true
+ end
+ --
+ labels[tag] = tag -- can go away
+ --
+ local attribute = #taglist + 1
+ local tagindex = (ids[tag] or 0) + 1
+ --
+ local completetag = tag .. ">" .. tagindex
+ --
+ ids[tag] = tagindex
+ lasttags[tag] = tagindex
+ stacksize = stacksize + 1
+ --
+ chain[stacksize] = completetag
+ stack[stacksize] = attribute
+ tagcontext[tag] = completetag
+ --
+ local tagnesting = { unpack(chain,1,stacksize) } -- a copy so we can add actualtext
+ --
+ if specification then
+ specification.attribute = attribute
+ specification.tagindex = tagindex
+ specification.taglist = tagnesting
+ specification.tagname = tag
+ if metadata then
+ specification.metadata = metadata
+ metadata = nil
+ end
+ local userdata = specification.userdata
+ if user ~= "" and type(userdata) == "string" then
+ specification.userdata = settings_to_hash(userdata)
+ end
+ local detail = specification.detail
+ if detail == "" then
+ specification.detail = nil
+ end
+ local parents = specification.parents
+ if parents == "" then
+ specification.parents = nil
+ end
+ else
+ specification = {
+ attribute = attribute,
+ tagindex = tagindex,
+ taglist = tagnesting,
+ tagname = tag,
+ metadata = metadata,
+ }
+ metadata = nil
+ end
+ --
+ taglist[attribute] = specification
+ specifications[completetag] = specification
+ --
+ if completetag == "document>1" then
+ specification.metadata = documentdata
+ end
+ --
+ texattribute[a_tagged] = attribute
+ return attribute
+end
+
+function tags.restart(attribute)
+ stacksize = stacksize + 1
+ if type(attribute) == "number" then
+ local taglist = taglist[attribute].taglist
+ chain[stacksize] = taglist[#taglist]
+ else
+ chain[stacksize] = attribute -- a string
+ attribute = #taglist + 1
+ taglist[attribute] = { taglist = { unpack(chain,1,stacksize) } }
+ end
+ stack[stacksize] = attribute
+ texattribute[a_tagged] = attribute
+ return attribute
+end
+
+function tags.stop()
+ if stacksize > 0 then
+ stacksize = stacksize - 1
+ end
+ local t = stack[stacksize]
+ if not t then
+ -- if trace_tags then
+ report_tags("ignoring end tag, previous chain: %s",stacksize > 0 and concat(chain," ",1,stacksize) or "none")
+ -- end
+ t = unsetvalue
+ end
+ texattribute[a_tagged] = t
+ return t
+end
+
+function tags.getid(tag,detail)
+ return ids[tag] or "?"
+end
+
+function tags.last(tag)
+ return lasttags[tag] -- or false
+end
+
+function tags.lastinchain(tag)
+ if tag and tag ~= "" then
+ return tagcontext[tag]
+ else
+ return chain[stacksize]
+ end
+end
+
+local strip = C((1-S(">"))^1)
+
+function tags.elementtag()
+ local fulltag = chain[stacksize]
+ if fulltag then
+ return lpegmatch(strip,fulltag)
+ end
+end
+
+function tags.strip(fulltag)
+ return lpegmatch(strip,fulltag)
+end
+
+function tags.setuserproperties(tag,list)
+ if not list or list == "" then
+ tag, list = chain[stacksize], tag
+ else
+ tag = tagcontext[tag]
+ end
+ if tag then -- an attribute now
+ local l = settings_to_hash(list)
+ local s = specifications[tag]
+ if s then
+ local u = s.userdata
+ if u then
+ for k, v in next, l do
+ u[k] = v
+ end
+ else
+ s.userdata = l
+ end
+ else
+ -- error
+ end
+ end
+end
+
+function tags.handler(head) -- we need a dummy
+ return head, false
+end
+
+statistics.register("structure elements", function()
+ if enabled then
+ if stacksize > 0 then
+ return format("%s element chains identified, open chain: %s ",#taglist,concat(chain," => ",1,stacksize))
+ else
+ return format("%s element chains identified",#taglist)
+ end
+ end
+end)
+
+directives.register("backend.addtags", function(v)
+ if not enabled then
+ codeinjections.enabletags()
+ enabled = true
+ end
+end)
+
+-- interface
+
+local starttag = tags.start
+
+implement {
+ name = "starttag",
+ actions = starttag,
+ arguments = { "string" }
+}
+
+implement {
+ name = "stoptag",
+ actions = tags.stop,
+}
+
+implement {
+ name = "starttag_u",
+ scope = "private",
+ actions = function(tag,userdata) starttag(tag,{ userdata = userdata }) end,
+ arguments = { "string", "string" }
+}
+
+implement {
+ name = "starttag_d",
+ scope = "private",
+ actions = function(tag,detail) starttag(tag,{ detail = detail }) end,
+ arguments = { "string", "string" }
+}
+
+implement {
+ name = "starttag_c",
+ scope = "private",
+ actions = function(tag,detail,parents) starttag(tag,{ detail = detail, parents = parents }) end,
+ arguments = { "string", "string", "string" }
+}
+
+implement { name = "settagaspect", actions = tags.setaspect, arguments = { "string", "string" } }
+
+implement { name = "settagproperty", actions = tags.setproperty, arguments = { "string", "string", "string" } }
+implement { name = "settagproperty_b", actions = tags.setproperty, arguments = { "string", "'backend'", "string" }, scope = "private" }
+implement { name = "settagproperty_n", actions = tags.setproperty, arguments = { "string", "'nature'", "string" }, scope = "private" }
+
+implement { name = "getelementtag", actions = { tags.elementtag, context } }
+
+implement {
+ name = "setelementuserproperties",
+ scope = "private",
+ actions = tags.setuserproperties,
+ arguments = { "string", "string" }
+}
+
+implement {
+ name = "doifelseinelement",
+ actions = { structures.atlocation, commands.testcase },
+ arguments = "string",
+}
+
+implement {
+ name = "settaggedmetadata",
+ actions = tags.registermetadata,
+ arguments = "string"
+}