From 8d8d528d2ad52599f11250cfc567fea4f37f2a8b Mon Sep 17 00:00:00 2001 From: Context Git Mirror Bot Date: Tue, 12 Jan 2016 17:15:07 +0100 Subject: 2016-01-12 16:26:00 --- tex/context/base/mkiv/lpdf-tag.lua | 618 +++++++++++++++++++++++++++++++++++++ 1 file changed, 618 insertions(+) create mode 100644 tex/context/base/mkiv/lpdf-tag.lua (limited to 'tex/context/base/mkiv/lpdf-tag.lua') diff --git a/tex/context/base/mkiv/lpdf-tag.lua b/tex/context/base/mkiv/lpdf-tag.lua new file mode 100644 index 000000000..f20e4bfe8 --- /dev/null +++ b/tex/context/base/mkiv/lpdf-tag.lua @@ -0,0 +1,618 @@ +if not modules then modules = { } end modules ['lpdf-tag'] = { + version = 1.001, + comment = "companion to lpdf-tag.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local next = next +local format, match, concat = string.format, string.match, table.concat +local lpegmatch, P, S, C = lpeg.match, lpeg.P, lpeg.S, lpeg.C +local utfchar = utf.char +local settings_to_hash = utilities.parsers.settings_to_hash +local formatters = string.formatters + +local trace_tags = false trackers.register("structures.tags", function(v) trace_tags = v end) + +local report_tags = logs.reporter("backend","tags") + +local backends = backends +local lpdf = lpdf +local nodes = nodes + +local nodeinjections = backends.pdf.nodeinjections +local codeinjections = backends.pdf.codeinjections + +local tasks = nodes.tasks + +local pdfdictionary = lpdf.dictionary +local pdfarray = lpdf.array +local pdfboolean = lpdf.boolean +local pdfconstant = lpdf.constant +local pdfreference = lpdf.reference +local pdfunicode = lpdf.unicode +local pdfstring = lpdf.string +local pdfflushobject = lpdf.flushobject +local pdfreserveobject = lpdf.reserveobject +local pdfpagereference = lpdf.pagereference + +local addtocatalog = lpdf.addtocatalog +local addtopageattributes = lpdf.addtopageattributes + +local texgetcount = tex.getcount + +local nodecodes = nodes.nodecodes + +local hlist_code = nodecodes.hlist +local vlist_code = nodecodes.vlist +local glyph_code = nodecodes.glyph + +local a_tagged = attributes.private('tagged') +local a_image = attributes.private('image') + +local nuts = nodes.nuts +local tonut = nuts.tonut +local tonode = nuts.tonode + +local nodepool = nuts.pool +local pdfliteral = nodepool.pdfliteral + +local getid = nuts.getid +local getattr = nuts.getattr +local getprev = nuts.getprev +local getnext = nuts.getnext +local getlist = nuts.getlist + +local setfield = nuts.setfield +local setlink = nuts.setlink +local setlist = nuts.setlist + +local traverse_nodes = nuts.traverse +local tosequence = nuts.tosequence +local copy_node = nuts.copy +local slide_nodelist = nuts.slide +local insert_before = nuts.insert_before +local insert_after = nuts.insert_after + +local structure_stack = { } +local structure_kids = pdfarray() +local structure_ref = pdfreserveobject() +local parent_ref = pdfreserveobject() +local root = { pref = pdfreference(structure_ref), kids = structure_kids } +local tree = { } +local elements = { } +local names = pdfarray() + +local structurestags = structures.tags +local taglist = structurestags.taglist +local specifications = structurestags.specifications +local usedlabels = structurestags.labels +local properties = structurestags.properties +local lasttaginchain = structurestags.lastinchain +local usewithcare = structurestags.usewithcare + +local usedmapping = { } + +----- tagsplitter = structurestags.patterns.splitter + +-- local embeddedtags = false -- true will id all, for tracing +-- local f_tagid = formatters["%s-%04i"] +-- local embeddedfilelist = pdfarray() -- /AF crap +-- +-- directives.register("structures.tags.embedmath",function(v) +-- if not v then +-- -- only enable +-- elseif embeddedtags == true then +-- -- already all tagged +-- elseif embeddedtags then +-- embeddedtags.math = true +-- else +-- embeddedtags = { math = true } +-- end +-- end) + +-- function codeinjections.maptag(original,target,kind) +-- mapping[original] = { target, kind or "inline" } +-- end + +local function finishstructure() + if #structure_kids > 0 then + local nums, n = pdfarray(), 0 + for i=1,#tree do + n = n + 1 ; nums[n] = i - 1 + n = n + 1 ; nums[n] = pdfreference(pdfflushobject(tree[i])) + end + local parenttree = pdfdictionary { + Nums = nums + } + -- we need to split names into smaller parts (e.g. alphabetic or so) + -- we already have code for that somewhere + if #names > 0 then + local kids = pdfdictionary { + Limits = pdfarray { names[1], names[#names-1] }, + Names = names, + } + local idtree = pdfdictionary { + Kids = pdfarray { pdfreference(pdfflushobject(kids)) }, + } + end + -- + local rolemap = pdfdictionary() + for k, v in next, usedmapping do + k = usedlabels[k] or k + local p = properties[k] + rolemap[k] = pdfconstant(p and p.pdf or "Span") -- or "Div" + end + local structuretree = pdfdictionary { + Type = pdfconstant("StructTreeRoot"), + K = pdfreference(pdfflushobject(structure_kids)), + ParentTree = pdfreference(pdfflushobject(parent_ref,parenttree)), + IDTree = #names > 0 and pdfreference(pdfflushobject(idtree)) or nil, + RoleMap = rolemap, + } + pdfflushobject(structure_ref,structuretree) + addtocatalog("StructTreeRoot",pdfreference(structure_ref)) + -- + local markinfo = pdfdictionary { + Marked = pdfboolean(true), + -- UserProperties = pdfboolean(true), + -- Suspects = pdfboolean(true), + -- AF = #embeddedfilelist > 0 and pdfreference(pdfflushobject(embeddedfilelist)) or nil, + } + addtocatalog("MarkInfo",pdfreference(pdfflushobject(markinfo))) + -- + for fulltag, element in next, elements do + pdfflushobject(element.knum,element.kids) + end + end +end + +lpdf.registerdocumentfinalizer(finishstructure,"document structure") + +local index, pageref, pagenum, list = 0, nil, 0, nil + +local pdf_mcr = pdfconstant("MCR") +local pdf_struct_element = pdfconstant("StructElem") + +local function initializepage() + index = 0 + pagenum = texgetcount("realpageno") + pageref = pdfreference(pdfpagereference(pagenum)) + list = pdfarray() + tree[pagenum] = list -- we can flush after done, todo +end + +local function finishpage() + -- flush what can be flushed + addtopageattributes("StructParents",pagenum-1) +end + +-- here we can flush and free elements that are finished + +local pdf_userproperties = pdfconstant("UserProperties") + +local function makeattribute(t) + if t and next(t) then + local properties = pdfarray() + for k, v in next, t do + properties[#properties+1] = pdfdictionary { + N = pdfunicode(k), + V = pdfunicode(v), + } + end + return pdfdictionary { + O = pdf_userproperties, + P = properties, + } + end +end + +local function makeelement(fulltag,parent) + local specification = specifications[fulltag] + local tag = specification.tagname + if tag == "ignore" then + return false + elseif tag == "mstackertop" or tag == "mstackerbot" or tag == "mstackermid"then + -- TODO + return true + end + -- + local detail = specification.detail + local userdata = specification.userdata + -- + usedmapping[tag] = true + -- + -- specification.attribute is unique + -- + local id = nil + -- local af = nil + -- if embeddedtags then + -- local tagname = specification.tagname + -- local tagindex = specification.tagindex + -- if embeddedtags == true or embeddedtags[tagname] then + -- id = f_tagid(tagname,tagindex) + -- af = job.fileobjreferences.collected[id] + -- if af then + -- local r = pdfreference(af) + -- af = pdfarray { r } + -- -- embeddedfilelist[#embeddedfilelist+1] = r + -- end + -- end + -- end + -- + local k = pdfarray() + local r = pdfreserveobject() + local t = usedlabels[tag] or tag + local d = pdfdictionary { + Type = pdf_struct_element, + S = pdfconstant(t), + ID = id, + T = detail and detail or nil, + P = parent.pref, + Pg = pageref, + K = pdfreference(r), + A = a and makeattribute(a) or nil, + -- Alt = " Who cares ", + -- ActualText = " Hi Hans ", + AF = af, + } + local s = pdfreference(pdfflushobject(d)) + if id then + names[#names+1] = id + names[#names+1] = s + end + local kids = parent.kids + kids[#kids+1] = s + local e = { + tag = t, + pref = s, + kids = k, + knum = r, + pnum = pagenum + } + elements[fulltag] = e + return e +end + +local f_BDC = formatters["/%s <> BDC"] + +local function makecontent(parent,id,specification) + local tag = parent.tag + local kids = parent.kids + local last = index + if id == "image" then + local list = specification.taglist + local data = usewithcare.images[list[#list]] + local label = data and data.label + local d = pdfdictionary { + Type = pdf_mcr, + Pg = pageref, + MCID = last, + Alt = pdfunicode(label ~= "" and label or "image"), + } + kids[#kids+1] = d + elseif pagenum == parent.pnum then + kids[#kids+1] = last + else + local d = pdfdictionary { + Type = pdf_mcr, + Pg = pageref, + MCID = last, + } + -- kids[#kids+1] = pdfreference(pdfflushobject(d)) + kids[#kids+1] = d + end + -- + index = index + 1 + list[index] = parent.pref -- page related list + -- + return f_BDC(tag,last) +end + +local function makeignore(specification) + return "/Artifact BMC" +end + +-- no need to adapt head, as we always operate on lists + +function nodeinjections.addtags(head) + + local last = nil + local ranges = { } + local range = nil + local head = tonut(head) + + local function collectranges(head,list) + for n in traverse_nodes(head) do + local id = getid(n) -- 14: image, 8: literal (mp) + if id == glyph_code then + local at = getattr(n,a_tagged) + if not at then + range = nil + elseif last ~= at then + range = { at, "glyph", n, n, list } -- attr id start stop list + ranges[#ranges+1] = range + last = at + elseif range then + range[4] = n -- stop + end + elseif id == hlist_code or id == vlist_code then + local at = getattr(n,a_image) + if at then + local at = getattr(n,a_tagged) + if not at then + range = nil + else + ranges[#ranges+1] = { at, "image", n, n, list } -- attr id start stop list + end + last = nil + else + local nl = getlist(n) + -- slide_nodelist(nl) -- temporary hack till math gets slided (tracker item) + collectranges(nl,n) + end + end + end + end + + initializepage() + + collectranges(head) + + if trace_tags then + for i=1,#ranges do + local range = ranges[i] + local attr = range[1] + local id = range[2] + local start = range[3] + local stop = range[4] + local tags = taglist[attr] + if tags then -- not ok ... only first lines + report_tags("%s => %s : %05i % t",tosequence(start,start),tosequence(stop,stop),attr,tags.taglist) + end + end + end + + local top = nil + local noftop = 0 + + for i=1,#ranges do + local range = ranges[i] + local attr = range[1] + local id = range[2] + local start = range[3] + local stop = range[4] + local list = range[5] + local specification = taglist[attr] + local taglist = specification.taglist + local noftags = #taglist + local common = 0 + + if top then + for i=1,noftags >= noftop and noftop or noftags do + if top[i] == taglist[i] then + common = i + else + break + end + end + end + + local prev = common > 0 and elements[taglist[common]] or root + local ignore = false + local literal = nil + + for j=common+1,noftags do + local tag = taglist[j] + local prv = elements[tag] or makeelement(tag,prev) + if prv == false then + -- ignore this one + prev = false + ignore = true + break + elseif prv == true then + -- skip this one + else + prev = prv + end + end + + if prev then + literal = pdfliteral(makecontent(prev,id,specification)) + elseif ignore then + literal = pdfliteral(makeignore(specification)) + end + if literal then + local prev = getprev(start) + if prev then + setlink(prev,literal) + end + setlink(literal,start) + if list and getlist(list) == start then + setlist(list,literal) + end + -- use insert instead: + local literal = pdfliteral("EMC") + local next = getnext(stop) + if next then + setlink(literal,next) + end + setlink(stop,literal) + end + top = taglist + noftop = noftags + end + + finishpage() + + head = tonode(head) + return head, true + +end + +-- variant: more structure but funny collapsing in viewer + +-- function nodeinjections.addtags(head) +-- +-- local last, ranges, range = nil, { }, nil +-- +-- local function collectranges(head,list) +-- for n in traverse_nodes(head) do +-- local id = getid(n) -- 14: image, 8: literal (mp) +-- if id == glyph_code then +-- local at = getattr(n,a_tagged) +-- if not at then +-- range = nil +-- elseif last ~= at then +-- range = { at, "glyph", n, n, list } -- attr id start stop list +-- ranges[#ranges+1] = range +-- last = at +-- elseif range then +-- range[4] = n -- stop +-- end +-- elseif id == hlist_code or id == vlist_code then +-- local at = getattr(n,a_image) +-- if at then +-- local at = getattr(n,a_tagged) +-- if not at then +-- range = nil +-- else +-- ranges[#ranges+1] = { at, "image", n, n, list } -- attr id start stop list +-- end +-- last = nil +-- else +-- local nl = getlist(n) +-- -- slide_nodelist(nl) -- temporary hack till math gets slided (tracker item) +-- collectranges(nl,n) +-- end +-- end +-- end +-- end +-- +-- initializepage() +-- +-- head = tonut(head) +-- collectranges(head) +-- +-- if trace_tags then +-- for i=1,#ranges do +-- local range = ranges[i] +-- local attr = range[1] +-- local id = range[2] +-- local start = range[3] +-- local stop = range[4] +-- local tags = taglist[attr] +-- if tags then -- not ok ... only first lines +-- report_tags("%s => %s : %05i % t",tosequence(start,start),tosequence(stop,stop),attr,tags.taglist) +-- end +-- end +-- end +-- +-- local top = nil +-- local noftop = 0 +-- local last = nil +-- +-- for i=1,#ranges do +-- local range = ranges[i] +-- local attr = range[1] +-- local id = range[2] +-- local start = range[3] +-- local stop = range[4] +-- local list = range[5] +-- local specification = taglist[attr] +-- local taglist = specification.taglist +-- local noftags = #taglist +-- local tag = nil +-- local common = 0 +-- -- local prev = root +-- +-- if top then +-- for i=1,noftags >= noftop and noftop or noftags do +-- if top[i] == taglist[i] then +-- common = i +-- else +-- break +-- end +-- end +-- end +-- +-- local result = { } +-- local r = noftop - common +-- if r > 0 then +-- for i=1,r do +-- result[i] = "EMC" +-- end +-- end +-- +-- local prev = common > 0 and elements[taglist[common]] or root +-- +-- for j=common+1,noftags do +-- local tag = taglist[j] +-- local prv = elements[tag] or makeelement(tag,prev) +-- -- if prv == false then +-- -- -- ignore this one +-- -- prev = false +-- -- break +-- -- elseif prv == true then +-- -- -- skip this one +-- -- else +-- prev = prv +-- r = r + 1 +-- result[r] = makecontent(prev,id) +-- -- end +-- end +-- +-- if r > 0 then +-- local literal = pdfliteral(concat(result,"\n")) +-- -- use insert instead: +-- local literal = pdfliteral(result) +-- local prev = getprev(start) +-- if prev then +-- setlink(prev,literal) +-- end +-- setlink(literal,start) +-- if list and getlist(list) == start then +-- setlist(list,literal) +-- end +-- end +-- +-- top = taglist +-- noftop = noftags +-- last = stop +-- +-- end +-- +-- if last and noftop > 0 then +-- local result = { } +-- for i=1,noftop do +-- result[i] = "EMC" +-- end +-- local literal = pdfliteral(concat(result,"\n")) +-- -- use insert instead: +-- local next = getnext(last) +-- if next then +-- setlink(literal,next) +-- end +-- setlink(last,literal) +-- end +-- +-- finishpage() +-- +-- head = tonode(head) +-- return head, true +-- +-- end + +-- this belongs elsewhere (export is not pdf related) + +function codeinjections.enabletags(tg,lb) + structures.tags.handler = nodeinjections.addtags + tasks.enableaction("shipouts","structures.tags.handler") + tasks.enableaction("shipouts","nodes.handlers.accessibility") + tasks.enableaction("math","noads.handlers.tags") + -- maybe also textblock + if trace_tags then + report_tags("enabling structure tags") + end +end -- cgit v1.2.3