diff options
Diffstat (limited to 'tex/context/base/mkiv/strc-tag.lua')
-rw-r--r-- | tex/context/base/mkiv/strc-tag.lua | 516 |
1 files changed, 516 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/strc-tag.lua b/tex/context/base/mkiv/strc-tag.lua new file mode 100644 index 000000000..8f2e18978 --- /dev/null +++ b/tex/context/base/mkiv/strc-tag.lua @@ -0,0 +1,516 @@ +if not modules then modules = { } end modules ['strc-tag'] = { + version = 1.001, + comment = "companion to strc-tag.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- This is rather experimental code. Tagging happens on the fly and there are two analysers +-- involved: the pdf backend tagger and the exporter. They share data but there are subtle +-- differences. Each tag carries a specification and these can be accessed by attribute (the +-- end of the chain tag) or by so called fullname which is a tagname combined with a number. + +local type, next = type, next +local insert, remove, unpack, concat, merge = table.insert, table.remove, table.unpack, table.concat, table.merge +local find, topattern, format = string.find, string.topattern, string.format +local lpegmatch, P, S, C, Cc = lpeg.match, lpeg.P, lpeg.S, lpeg.C, lpeg.Cc +local texattribute = tex.attribute +local allocate = utilities.storage.allocate +local settings_to_hash = utilities.parsers.settings_to_hash +local setmetatableindex = table.setmetatableindex + +local trace_tags = false trackers.register("structures.tags", function(v) trace_tags = v end) + +local report_tags = logs.reporter("structure","tags") + +local attributes = attributes +local structures = structures +local implement = interfaces.implement + +local a_tagged = attributes.private('tagged') + +local unsetvalue = attributes.unsetvalue +local codeinjections = backends.codeinjections + +local taglist = allocate() -- access by attribute +local specifications = allocate() -- access by fulltag +local labels = allocate() +local stack = { } +local chain = { } +local ids = { } +local enabled = false +local tagcontext = { } +local tagpatterns = { } +local lasttags = { } +local stacksize = 0 +local metadata = nil -- applied to the next element +local documentdata = { } + +local tags = structures.tags +tags.taglist = taglist -- can best be hidden +tags.labels = labels +tags.patterns = tagpatterns +tags.specifications = specifications + +-- Tags are internally stored as: +-- +-- tag>number tag>number tag>number + +local p_splitter = C((1-S(">"))^1) * P(">") * C(P(1)^1) +tagpatterns.splitter = p_splitter + +local properties = allocate { + + document = { pdf = "Div", nature = "display" }, + + division = { pdf = "Div", nature = "display" }, + paragraph = { pdf = "P", nature = "mixed" }, + p = { pdf = "P", nature = "mixed" }, + construct = { pdf = "Span", nature = "inline" }, + highlight = { pdf = "Span", nature = "inline" }, + + section = { pdf = "Sect", nature = "display" }, + sectiontitle = { pdf = "H", nature = "mixed" }, + sectionnumber = { pdf = "H", nature = "mixed" }, + sectioncontent = { pdf = "Div", nature = "display" }, + + itemgroup = { pdf = "L", nature = "display" }, + item = { pdf = "LI", nature = "display" }, + itemtag = { pdf = "Lbl", nature = "mixed" }, + itemcontent = { pdf = "LBody", nature = "mixed" }, + itemhead = { pdf = "Div", nature = "display" }, + itembody = { pdf = "Div", nature = "display" }, + + description = { pdf = "Div", nature = "display" }, + descriptiontag = { pdf = "Div", nature = "mixed" }, + descriptioncontent = { pdf = "Div", nature = "mixed" }, + descriptionsymbol = { pdf = "Span", nature = "inline" }, -- note reference + + verbatimblock = { pdf = "Code", nature = "display" }, + verbatimlines = { pdf = "Code", nature = "display" }, + verbatimline = { pdf = "Code", nature = "mixed" }, + verbatim = { pdf = "Code", nature = "inline" }, + + lines = { pdf = "Code", nature = "display" }, + line = { pdf = "Code", nature = "mixed" }, + + synonym = { pdf = "Span", nature = "inline" }, + sorting = { pdf = "Span", nature = "inline" }, + + register = { pdf = "Div", nature = "display" }, + registerlocation = { pdf = "Span", nature = "inline" }, + registersection = { pdf = "Div", nature = "display" }, + registertag = { pdf = "Span", nature = "mixed" }, + registerentries = { pdf = "Div", nature = "display" }, + registerentry = { pdf = "Div", nature = "display" }, + registercontent = { pdf = "Span", nature = "mixed" }, + registersee = { pdf = "Span", nature = "mixed" }, + registerpages = { pdf = "Span", nature = "mixed" }, + registerpage = { pdf = "Span", nature = "mixed" }, + registerseparator = { pdf = "Span", nature = "inline" }, + registerpagerange = { pdf = "Span", nature = "mixed" }, + + table = { pdf = "Table", nature = "display" }, + tablerow = { pdf = "TR", nature = "display" }, + tablecell = { pdf = "TD", nature = "mixed" }, + + tabulate = { pdf = "Table", nature = "display" }, + tabulaterow = { pdf = "TR", nature = "display" }, + tabulatecell = { pdf = "TD", nature = "mixed" }, + + list = { pdf = "TOC", nature = "display" }, + listitem = { pdf = "TOCI", nature = "display" }, + listtag = { pdf = "Lbl", nature = "mixed" }, + listcontent = { pdf = "P", nature = "mixed" }, + listdata = { pdf = "P", nature = "mixed" }, + listpage = { pdf = "Reference", nature = "mixed" }, + + delimitedblock = { pdf = "BlockQuote", nature = "display" }, + delimited = { pdf = "Quote", nature = "inline" }, + delimitedcontent = { pdf = "Span", nature = "inline" }, + delimitedsymbol = { pdf = "Span", nature = "inline" }, + subsentence = { pdf = "Span", nature = "inline" }, + subsentencecontent = { pdf = "Span", nature = "inline" }, + subsentencesymbol = { pdf = "Span", nature = "inline" }, + + label = { pdf = "Span", nature = "mixed" }, + number = { pdf = "Span", nature = "mixed" }, + + float = { pdf = "Div", nature = "display" }, -- Figure + floatcaption = { pdf = "Caption", nature = "mixed" }, + floatlabel = { pdf = "Span", nature = "inline" }, + floatnumber = { pdf = "Span", nature = "inline" }, + floattext = { pdf = "Span", nature = "mixed" }, + floatcontent = { pdf = "P", nature = "mixed" }, + + image = { pdf = "P", nature = "mixed" }, + mpgraphic = { pdf = "P", nature = "mixed" }, + + formulaset = { pdf = "Div", nature = "display" }, + formula = { pdf = "Div", nature = "display" }, -- Formula + formulacaption = { pdf = "Span", nature = "mixed" }, + formulalabel = { pdf = "Span", nature = "mixed" }, + formulanumber = { pdf = "Span", nature = "mixed" }, + formulacontent = { pdf = "P", nature = "display" }, + subformula = { pdf = "Div", nature = "display" }, + + link = { pdf = "Link", nature = "inline" }, + + margintextblock = { pdf = "Span", nature = "inline" }, + margintext = { pdf = "Span", nature = "inline" }, + + math = { pdf = "Div", nature = "inline" }, -- no display + mn = { pdf = "Span", nature = "mixed" }, + mi = { pdf = "Span", nature = "mixed" }, + mo = { pdf = "Span", nature = "mixed" }, + ms = { pdf = "Span", nature = "mixed" }, + mrow = { pdf = "Span", nature = "display" }, + msubsup = { pdf = "Span", nature = "display" }, + msub = { pdf = "Span", nature = "display" }, + msup = { pdf = "Span", nature = "display" }, + merror = { pdf = "Span", nature = "mixed" }, + munderover = { pdf = "Span", nature = "display" }, + munder = { pdf = "Span", nature = "display" }, + mover = { pdf = "Span", nature = "display" }, + mtext = { pdf = "Span", nature = "mixed" }, + mfrac = { pdf = "Span", nature = "display" }, + mroot = { pdf = "Span", nature = "display" }, + msqrt = { pdf = "Span", nature = "display" }, + mfenced = { pdf = "Span", nature = "display" }, + maction = { pdf = "Span", nature = "display" }, + + mstacker = { pdf = "Span", nature = "display" }, -- these are only internally used + mstackertop = { pdf = "Span", nature = "display" }, -- these are only internally used + mstackerbot = { pdf = "Span", nature = "display" }, -- these are only internally used + mstackermid = { pdf = "Span", nature = "display" }, -- these are only internally used + + mtable = { pdf = "Table", nature = "display" }, -- might change + mtr = { pdf = "TR", nature = "display" }, -- might change + mtd = { pdf = "TD", nature = "display" }, -- might change + + ignore = { pdf = "Span", nature = "mixed" }, -- used internally + private = { pdf = "Span", nature = "mixed" }, -- for users (like LS) when they need it + metadata = { pdf = "Div", nature = "display" }, + metavariable = { pdf = "Span", nature = "mixed" }, + + mid = { pdf = "Span", nature = "inline" }, + sub = { pdf = "Span", nature = "inline" }, + sup = { pdf = "Span", nature = "inline" }, + subsup = { pdf = "Span", nature = "inline" }, + + combination = { pdf = "Span", nature = "display" }, + combinationpair = { pdf = "Span", nature = "display" }, + combinationcontent = { pdf = "Span", nature = "mixed" }, + combinationcaption = { pdf = "Span", nature = "mixed" }, +} + +tags.properties = properties + +local patterns = setmetatableindex(function(t,tag) + local v = topattern("^" .. tag .. ">") + t[tag] = v + return v +end) + +function tags.locatedtag(tag) + local attribute = texattribute[a_tagged] + if attribute >= 0 then + local specification = taglist[attribute] + if specification then + local taglist = specification.taglist + local pattern = patterns[tag] + for i=#taglist,1,-1 do + local t = taglist[i] + if find(t,pattern) then + return t + end + end + end + else + -- enabled but not auto + end + return false -- handy as bogus index +end + +function structures.atlocation(str) + local specification = taglist[texattribute[a_tagged]] + if specification then + if list then + local taglist = specification.taglist + local pattern = patterns[str] + for i=#list,1,-1 do + if find(list[i],pattern) then + return true + end + end + end + end +end + +function tags.setproperty(tag,key,value) + local p = properties[tag] + if p then + p[key] = value + else + properties[tag] = { [key] = value } + end +end + +function tags.setaspect(key,value) + local tag = chain[stacksize] + if tag then + local p = properties[tag] + if p then + p[key] = value + else + properties[tag] = { [key] = value } + end + end +end + +function tags.registermetadata(data) + local d = settings_to_hash(data) + if #chain > 1 then + if metadata then + merge(metadata,d) + else + metadata = d + end + else + merge(documentdata,d) + end +end + +function tags.start(tag,specification) + if not enabled then + codeinjections.enabletags() + enabled = true + end + -- + labels[tag] = tag -- can go away + -- + local attribute = #taglist + 1 + local tagindex = (ids[tag] or 0) + 1 + -- + local completetag = tag .. ">" .. tagindex + -- + ids[tag] = tagindex + lasttags[tag] = tagindex + stacksize = stacksize + 1 + -- + chain[stacksize] = completetag + stack[stacksize] = attribute + tagcontext[tag] = completetag + -- + local tagnesting = { unpack(chain,1,stacksize) } -- a copy so we can add actualtext + -- + if specification then + specification.attribute = attribute + specification.tagindex = tagindex + specification.taglist = tagnesting + specification.tagname = tag + if metadata then + specification.metadata = metadata + metadata = nil + end + local userdata = specification.userdata + if user ~= "" and type(userdata) == "string" then + specification.userdata = settings_to_hash(userdata) + end + local detail = specification.detail + if detail == "" then + specification.detail = nil + end + local parents = specification.parents + if parents == "" then + specification.parents = nil + end + else + specification = { + attribute = attribute, + tagindex = tagindex, + taglist = tagnesting, + tagname = tag, + metadata = metadata, + } + metadata = nil + end + -- + taglist[attribute] = specification + specifications[completetag] = specification + -- + if completetag == "document>1" then + specification.metadata = documentdata + end + -- + texattribute[a_tagged] = attribute + return attribute +end + +function tags.restart(attribute) + stacksize = stacksize + 1 + if type(attribute) == "number" then + local taglist = taglist[attribute].taglist + chain[stacksize] = taglist[#taglist] + else + chain[stacksize] = attribute -- a string + attribute = #taglist + 1 + taglist[attribute] = { taglist = { unpack(chain,1,stacksize) } } + end + stack[stacksize] = attribute + texattribute[a_tagged] = attribute + return attribute +end + +function tags.stop() + if stacksize > 0 then + stacksize = stacksize - 1 + end + local t = stack[stacksize] + if not t then + -- if trace_tags then + report_tags("ignoring end tag, previous chain: %s",stacksize > 0 and concat(chain," ",1,stacksize) or "none") + -- end + t = unsetvalue + end + texattribute[a_tagged] = t + return t +end + +function tags.getid(tag,detail) + return ids[tag] or "?" +end + +function tags.last(tag) + return lasttags[tag] -- or false +end + +function tags.lastinchain(tag) + if tag and tag ~= "" then + return tagcontext[tag] + else + return chain[stacksize] + end +end + +local strip = C((1-S(">"))^1) + +function tags.elementtag() + local fulltag = chain[stacksize] + if fulltag then + return lpegmatch(strip,fulltag) + end +end + +function tags.strip(fulltag) + return lpegmatch(strip,fulltag) +end + +function tags.setuserproperties(tag,list) + if not list or list == "" then + tag, list = chain[stacksize], tag + else + tag = tagcontext[tag] + end + if tag then -- an attribute now + local l = settings_to_hash(list) + local s = specifications[tag] + if s then + local u = s.userdata + if u then + for k, v in next, l do + u[k] = v + end + else + s.userdata = l + end + else + -- error + end + end +end + +function tags.handler(head) -- we need a dummy + return head, false +end + +statistics.register("structure elements", function() + if enabled then + if stacksize > 0 then + return format("%s element chains identified, open chain: %s ",#taglist,concat(chain," => ",1,stacksize)) + else + return format("%s element chains identified",#taglist) + end + end +end) + +directives.register("backend.addtags", function(v) + if not enabled then + codeinjections.enabletags() + enabled = true + end +end) + +-- interface + +local starttag = tags.start + +implement { + name = "starttag", + actions = starttag, + arguments = { "string" } +} + +implement { + name = "stoptag", + actions = tags.stop, +} + +implement { + name = "starttag_u", + scope = "private", + actions = function(tag,userdata) starttag(tag,{ userdata = userdata }) end, + arguments = { "string", "string" } +} + +implement { + name = "starttag_d", + scope = "private", + actions = function(tag,detail) starttag(tag,{ detail = detail }) end, + arguments = { "string", "string" } +} + +implement { + name = "starttag_c", + scope = "private", + actions = function(tag,detail,parents) starttag(tag,{ detail = detail, parents = parents }) end, + arguments = { "string", "string", "string" } +} + +implement { name = "settagaspect", actions = tags.setaspect, arguments = { "string", "string" } } + +implement { name = "settagproperty", actions = tags.setproperty, arguments = { "string", "string", "string" } } +implement { name = "settagproperty_b", actions = tags.setproperty, arguments = { "string", "'backend'", "string" }, scope = "private" } +implement { name = "settagproperty_n", actions = tags.setproperty, arguments = { "string", "'nature'", "string" }, scope = "private" } + +implement { name = "getelementtag", actions = { tags.elementtag, context } } + +implement { + name = "setelementuserproperties", + scope = "private", + actions = tags.setuserproperties, + arguments = { "string", "string" } +} + +implement { + name = "doifelseinelement", + actions = { structures.atlocation, commands.testcase }, + arguments = "string", +} + +implement { + name = "settaggedmetadata", + actions = tags.registermetadata, + arguments = "string" +} |