From f58a2cb5d22c5931581274db1f0ec85ac903b747 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Mon, 28 Oct 2019 20:03:50 +0100 Subject: 2019-10-28 18:22:00 --- tex/context/base/mkiv/lpdf-tag.lua | 158 +++++++++++++++++++++++-------------- 1 file changed, 99 insertions(+), 59 deletions(-) (limited to 'tex/context/base/mkiv/lpdf-tag.lua') diff --git a/tex/context/base/mkiv/lpdf-tag.lua b/tex/context/base/mkiv/lpdf-tag.lua index 0a2fe679e..d99e09243 100644 --- a/tex/context/base/mkiv/lpdf-tag.lua +++ b/tex/context/base/mkiv/lpdf-tag.lua @@ -6,11 +6,11 @@ if not modules then modules = { } end modules ['lpdf-tag'] = { license = "see context related readme files" } -local next = next -local format, match, concat = string.format, string.match, table.concat +local next, type = next, type +local format, match, gmatch = string.format, string.match, string.gmatch +local concat, sortedhash = table.concat, table.sortedhash local lpegmatch, P, S, C = lpeg.match, lpeg.P, lpeg.S, lpeg.C local settings_to_hash = utilities.parsers.settings_to_hash -local sortedhash = table.sortedhash local formatters = string.formatters local trace_tags = false trackers.register("structures.tags", function(v) trace_tags = v end) @@ -36,6 +36,7 @@ local pdfunicode = lpdf.unicode local pdfflushobject = lpdf.flushobject local pdfreserveobject = lpdf.reserveobject local pdfpagereference = lpdf.pagereference +local pdfmakenametree = lpdf.makenametree local addtocatalog = lpdf.addtocatalog local addtopageattributes = lpdf.addtopageattributes @@ -62,6 +63,7 @@ local getattr = nuts.getattr local getprev = nuts.getprev local getnext = nuts.getnext local getlist = nuts.getlist +local getchar = nuts.getchar local setlink = nuts.setlink local setlist = nuts.setlist @@ -75,7 +77,7 @@ local structure_kids -- delayed local structure_ref -- delayed local parent_ref -- delayed local root -- delayed -local names -- delayed +local names = { } local tree = { } local elements = { } @@ -90,29 +92,49 @@ local usedmapping = { } ----- tagsplitter = structurestags.patterns.splitter --- local embeddedtags = false -- true will id all, for tracing --- local f_tagid = formatters["%s-%04i"] --- local embeddedfilelist = pdfarray() -- /AF crap --- --- directives.register("structures.tags.embedmath",function(v) --- if not v then --- -- only enable --- elseif embeddedtags == true then --- -- already all tagged --- elseif embeddedtags then --- embeddedtags.math = true --- else --- embeddedtags = { math = true } --- end --- end) +local embeddedtags = false -- true will id all, for tracing, otherwise table +local f_tagid = formatters["%s-%04i"] +local embeddedfilelist = pdfarray() -- /AF crap --- function codeinjections.maptag(original,target,kind) --- mapping[original] = { target, kind or "inline" } --- end +-- for testing, not that it was ever used: + +directives.register("structures.tags.embed",function(v) + if type(v) == "string" then + if type(embeddedtags) ~= "table" then + embeddedtags = { } + end + for s in gmatch(v,"([^, ]+)") do + embeddedtags[s] = true + end + elseif v and not embeddedtags then + embeddedtags = true + end +end) + +-- for old times sake, not that it was ever used: + +directives.register("structures.tags.embedmath",function(v) + if not v then + -- only enable + elseif embeddedtags == true then + -- already all tagged + elseif embeddedtags then + embeddedtags.math = true + else + embeddedtags = { math = true } + end +end) + +function codeinjections.maptag(original,target,kind) + mapping[original] = { target, kind or "inline" } +end + +-- mostly the same as the annotations tree local function finishstructure() if root and #structure_kids > 0 then - local nums, n = pdfarray(), 0 + local nums = pdfarray() + local n = 0 for i=1,#tree do n = n + 1 ; nums[n] = i - 1 n = n + 1 ; nums[n] = pdfreference(pdfflushobject(tree[i])) @@ -120,17 +142,7 @@ local function finishstructure() local parenttree = pdfdictionary { Nums = nums } - -- we need to split names into smaller parts (e.g. alphabetic or so) - -- we already have code for that somewhere - if #names > 0 then - local kids = pdfdictionary { - Limits = pdfarray { names[1], names[#names-1] }, - Names = names, - } - local idtree = pdfdictionary { - Kids = pdfarray { pdfreference(pdfflushobject(kids)) }, - } - end + local idtree = pdfmakenametree(names) -- local rolemap = pdfdictionary() for k, v in next, usedmapping do @@ -142,7 +154,7 @@ local function finishstructure() Type = pdfconstant("StructTreeRoot"), K = pdfreference(pdfflushobject(structure_kids)), ParentTree = pdfreference(pdfflushobject(parent_ref,parenttree)), - IDTree = #names > 0 and pdfreference(pdfflushobject(idtree)) or nil, + IDTree = idtree, RoleMap = rolemap, -- sorted ? } pdfflushobject(structure_ref,structuretree) @@ -191,6 +203,9 @@ end local pdf_userproperties = pdfconstant("UserProperties") +-- /O /Table +-- /Headers [ ] + local function makeattribute(t) if t and next(t) then local properties = pdfarray() @@ -209,40 +224,65 @@ end local function makeelement(fulltag,parent) local specification = specifications[fulltag] - local tag = specification.tagname - if tag == "ignore" then + local tagname = specification.tagname + local tagnameused = tagname + local attributes = nil + if tagname == "ignore" then return false - elseif tag == "mstackertop" or tag == "mstackerbot" or tag == "mstackermid"then + elseif tagname == "mstackertop" or tagname == "mstackerbot" or tagname == "mstackermid"then -- TODO return true + elseif tagname == "tabulatecell" then + local d = structurestags.gettabulatecell(fulltag) + if d and d.kind == 1 then + tagnameused = "tabulateheadcell" + end + elseif tagname == "tablecell" then + -- will become a plugin model + local d = structurestags.gettablecell(fulltag) + if d then + if d.kind == 1 then + tagnameused = "tableheadcell" + end + local rows = d.rows or 1 + local cols = d.columns or 1 + if rows > 1 or cols > 1 then + attributes = pdfdictionary { + O = pdfconstant("Table"), + RowSpan = rows > 1 and rows or nil, + ColSpan = cols > 1 and cols or nil, + } + end + + end end -- local detail = specification.detail local userdata = specification.userdata -- - usedmapping[tag] = true + usedmapping[tagname] = true -- -- specification.attribute is unique -- local id = nil - -- local af = nil - -- if embeddedtags then - -- local tagname = specification.tagname - -- local tagindex = specification.tagindex - -- if embeddedtags == true or embeddedtags[tagname] then - -- id = f_tagid(tagname,tagindex) - -- af = job.fileobjreferences.collected[id] - -- if af then - -- local r = pdfreference(af) - -- af = pdfarray { r } - -- -- embeddedfilelist[#embeddedfilelist+1] = r - -- end - -- end - -- end + local af = nil + if embeddedtags then + local tagindex = specification.tagindex + if embeddedtags == true or embeddedtags[tagname] then + id = f_tagid(tagname,tagindex) + af = job.fileobjreferences.collected[id] + if af then + local r = pdfreference(af) + af = pdfarray { r } + -- embeddedfilelist[#embeddedfilelist+1] = r + end + end + end -- local k = pdfarray() local r = pdfreserveobject() - local t = usedlabels[tag] or tag + local t = usedlabels[tagnameused] or tagnameused + -- local a = nil local d = pdfdictionary { Type = pdf_struct_element, S = pdfconstant(t), @@ -251,16 +291,15 @@ local function makeelement(fulltag,parent) P = parent.pref, Pg = pageref, K = pdfreference(r), - A = a and makeattribute(a) or nil, + -- A = a and makeattribute(a) or nil, + A = attributes, -- Alt = " Who cares ", -- ActualText = " Hi Hans ", AF = af, } local s = pdfreference(pdfflushobject(d)) if id and names then - local size = #names - names[size+1] = id - names[size+2] = s + names[id] = s end local kids = parent.kids kids[#kids+1] = s @@ -336,11 +375,11 @@ function nodeinjections.addtags(head) root = { pref = pdfreference(structure_ref), kids = structure_kids } names = pdfarray() end - local function collectranges(head,list) for n, id in nextnode, head do if id == glyph_code then -- maybe also disc +if getchar(n) ~= 0 then local at = getattr(n,a_tagged) or false -- false: pagebody or so, so artifact -- if not at then -- range = nil @@ -352,6 +391,7 @@ function nodeinjections.addtags(head) elseif range then range[4] = n -- stop end +end elseif id == hlist_code or id == vlist_code then local at = getattr(n,a_image) if at then -- cgit v1.2.3