diff options
author | Hans Hagen <pragma@wxs.nl> | 2018-05-12 01:19:03 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg42.2a@gmail.com> | 2018-05-12 01:19:03 +0200 |
commit | 77e216e323271fb85d508b7206b13c980540b74b (patch) | |
tree | 5b4053c2bbe5190e28c0dce89653c7b13aea0642 /scripts/context/lua/mtx-pdf.lua | |
parent | d817aef76ab8b606c02bd0636661b634b43a68a6 (diff) | |
download | context-77e216e323271fb85d508b7206b13c980540b74b.tar.gz |
2018-05-12 00:16:00
Diffstat (limited to 'scripts/context/lua/mtx-pdf.lua')
-rw-r--r-- | scripts/context/lua/mtx-pdf.lua | 183 |
1 files changed, 111 insertions, 72 deletions
diff --git a/scripts/context/lua/mtx-pdf.lua b/scripts/context/lua/mtx-pdf.lua index 7e05a9f74..ca0d2ea6b 100644 --- a/scripts/context/lua/mtx-pdf.lua +++ b/scripts/context/lua/mtx-pdf.lua @@ -75,28 +75,30 @@ function scripts.pdf.info(filename) local pages = pdffile.pages local nofpages = pages.n -- no # yet. will be in 5.2 - report("filename > %s",filename) - report("pdf version > %s",catalog.Version) - report("number of pages > %s",nofpages) - report("title > %s",info.Title) - report("creator > %s",info.Creator) - report("producer > %s",info.Producer) - report("creation date > %s",info.CreationDate) + report("filename > %s",filename) + report("pdf version > %s",catalog.Version) + report("major version > %s",pdffile.majorversion or "?") + report("minor version > %s",pdffile.minorversion or "?") + report("number of pages > %s",nofpages) + report("title > %s",info.Title) + report("creator > %s",info.Creator) + report("producer > %s",info.Producer) + report("creation date > %s",info.CreationDate) report("modification date > %s",info.ModDate) local width, height, start for i=1, nofpages do local page = pages[i] - local bbox = page.CropBox or page.MediaBox + local bbox = page.CropBox or page.MediaBox or { 0, 0, 0, 0 } local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1] if w ~= width or h ~= height then if start then - report("cropbox > pages: %s-%s, width: %s, height: %s",start,i-1,width,height) + report("cropbox > pages: %s-%s, width: %s, height: %s",start,i-1,width,height) end width, height, start = w, h, i end end - report("cropbox > pages: %s-%s, width: %s, height: %s",start,nofpages,width,height) + report("cropbox > pages: %s-%s, width: %s, height: %s",start,nofpages,width,height) end end @@ -117,17 +119,33 @@ function scripts.pdf.metadata(filename,pretty) end end +local expand = lpdf.epdf.expand + local function getfonts(pdffile) local usedfonts = { } - for i=1,pdffile.pages.n do - local page = pdffile.pages[i] - local fontlist = page.Resources.Font - if fontlist then - for k, v in next, lpdf.epdf.expand(fontlist) do - usedfonts[k] = lpdf.epdf.expand(v) + + local function collect(where,tag) + local resources = where.Resources + if resources then + local fontlist = resources.Font + if fontlist then + for k, v in next, expand(fontlist) do + usedfonts[tag and (tag .. "." .. k) or k] = expand(v,k) + end + end + local objects = resources.XObject + if objects then + for k, v in next, expand(objects) do + collect(v,tag and (tag .. "." .. k) or k) + end end end end + + for i=1,pdffile.pages.n do + collect(pdffile.pages[i]) + end + return usedfonts end @@ -135,7 +153,8 @@ local function getunicodes(font) local cid = font.ToUnicode if cid then cid = cid() - local counts = { } + local counts = { } + local indices = { } -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do -- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do -- print(a,b) @@ -151,18 +170,20 @@ local function getunicodes(font) for i=first,last do local c = i + offset counts[c] = counts[c] + 1 + indices[i] = true end end end for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do + indices[old] = true for n in gmatch(new,"....") do local c = tonumber(n,16) counts[c] = counts[c] + 1 end end end - return counts + return counts, indices end end @@ -171,11 +192,17 @@ function scripts.pdf.fonts(filename) if pdffile then local usedfonts = getfonts(pdffile) local found = { } + local common = table.setmetatableindex("table") for k, v in table.sortedhash(usedfonts) do - local counts = getunicodes(v) - local codes = { } - local chars = { } - local freqs = { } + local basefont = v.BaseFont + local encoding = v.Encoding + local subtype = v.Subtype + local unicode = v.ToUnicode + local counts, + indices = getunicodes(v) + local codes = { } + local chars = { } + local freqs = { } if counts then codes = sortedkeys(counts) for i=1,#codes do @@ -184,30 +211,42 @@ function scripts.pdf.fonts(filename) chars[i] = c freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c) end + if basefont and unicode then + local b = gsub(basefont,"^.*%+","") + local c = common[b] + for k in next, indices do + c[k] = true + end + end for i=1,#codes do codes[i] = format("U+%05X",codes[i]) end end found[k] = { - basefont = v.BaseFont or "no basefont", - encoding = v.Encoding or "no encoding", - subtype = v.Subtype or "no subtype", - unicode = v.ToUnicode and "unicode" or "no unicode", + basefont = basefont or "no basefont", + encoding = encoding or "no encoding", + subtype = subtype or "no subtype", + unicode = tounicode and "unicode" or "no unicode", chars = chars, codes = codes, freqs = freqs, } end - if environment.argument("detail") then + if environment.argument("detail") or environment.argument("details") then for k, v in sortedhash(found) do - report("id : %s",k) - report("basefont : %s",v.basefont) - report("encoding : %s",v.encoding) - report("subtype : %s",v.subtype) - report("unicode : %s",v.unicode) - report("characters : %s", concat(v.chars," ")) - report("codepoints : %s", concat(v.codes," ")) + report("id : %s", k) + report("basefont : %s", v.basefont) + report("encoding : %s", v.encoding) + report("subtype : %s", v.subtype) + report("unicode : %s", v.unicode) + report("characters : % t", v.chars) + report("codepoints : % t", v.codes) + report("") + end + for k, v in sortedhash(common) do + report("basefont : %s",k) + report("indices : % t", sortedkeys(v)) report("") end else @@ -229,42 +268,42 @@ end -- this is a quick hack ... proof of concept .. will change (derived from luigi's example) ... -- i will make a ctx wrapper -local qpdf -- just call qpdf, no need for a lib here - -function scripts.pdf.linearize(filename) - qpdf = qpdf or swiglib("qpdf.core") - local oldfile = filename or environment.files[1] - if not oldfile then - return - end - file.addsuffix(oldfile,"pdf") - if not lfs.isfile(oldfile) then - return - end - local newfile = environment.files[2] - if not newfile or file.removesuffix(oldfile) == file.removesuffix(newfile)then - newfile = file.addsuffix(file.removesuffix(oldfile) .. "-linearized","pdf") - end - local password = environment.arguments.password - local instance = qpdf.qpdf_init() - if bit32.band(qpdf.qpdf_read(instance,oldfile,password),qpdf.QPDF_ERRORS) ~= 0 then - report("unable to open input file") - elseif bit32.band(qpdf.qpdf_init_write(instance,newfile),qpdf.QPDF_ERRORS) ~= 0 then - report("unable to open output file") - else - report("linearizing %a into %a",oldfile,newfile) - qpdf.qpdf_set_static_ID(instance,qpdf.QPDF_TRUE) - qpdf.qpdf_set_linearization(instance,qpdf.QPDF_TRUE) - qpdf.qpdf_write(instance) - end - while qpdf.qpdf_more_warnings(instance) ~= 0 do - report("warning: %s",qpdf.qpdf_get_error_full_text(instance,qpdf.qpdf_next_warning(qpdf))) - end - if qpdf.qpdf_has_error(instance) ~= 0 then - report("error: %s",qpdf.qpdf_get_error_full_text(instance,qpdf.qpdf_get_error(qpdf))) - end - qpdf.qpdf_cleanup_p(instance) -end +-- local qpdf -- just call qpdf, no need for a lib here +-- +-- function scripts.pdf.linearize(filename) +-- qpdf = qpdf or swiglib("qpdf.core") +-- local oldfile = filename or environment.files[1] +-- if not oldfile then +-- return +-- end +-- file.addsuffix(oldfile,"pdf") +-- if not lfs.isfile(oldfile) then +-- return +-- end +-- local newfile = environment.files[2] +-- if not newfile or file.removesuffix(oldfile) == file.removesuffix(newfile)then +-- newfile = file.addsuffix(file.removesuffix(oldfile) .. "-linearized","pdf") +-- end +-- local password = environment.arguments.password +-- local instance = qpdf.qpdf_init() +-- if bit32.band(qpdf.qpdf_read(instance,oldfile,password),qpdf.QPDF_ERRORS) ~= 0 then +-- report("unable to open input file") +-- elseif bit32.band(qpdf.qpdf_init_write(instance,newfile),qpdf.QPDF_ERRORS) ~= 0 then +-- report("unable to open output file") +-- else +-- report("linearizing %a into %a",oldfile,newfile) +-- qpdf.qpdf_set_static_ID(instance,qpdf.QPDF_TRUE) +-- qpdf.qpdf_set_linearization(instance,qpdf.QPDF_TRUE) +-- qpdf.qpdf_write(instance) +-- end +-- while qpdf.qpdf_more_warnings(instance) ~= 0 do +-- report("warning: %s",qpdf.qpdf_get_error_full_text(instance,qpdf.qpdf_next_warning(qpdf))) +-- end +-- if qpdf.qpdf_has_error(instance) ~= 0 then +-- report("error: %s",qpdf.qpdf_get_error_full_text(instance,qpdf.qpdf_get_error(qpdf))) +-- end +-- qpdf.qpdf_cleanup_p(instance) +-- end -- scripts.pdf.info("e:/tmp/oeps.pdf") -- scripts.pdf.metadata("e:/tmp/oeps.pdf") @@ -281,8 +320,8 @@ elseif environment.argument("metadata") then scripts.pdf.metadata(filename) elseif environment.argument("fonts") then scripts.pdf.fonts(filename) -elseif environment.argument("linearize") then - scripts.pdf.linearize(filename) +-- elseif environment.argument("linearize") then +-- scripts.pdf.linearize(filename) elseif environment.argument("exporthelp") then application.export(environment.argument("exporthelp"),filename) else |