if not modules then modules = { } end modules ['mtx-pdf'] = {
version = 1.001,
comment = "companion to mtxrun.lua",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files"
}
local tonumber = tonumber
local format, gmatch, gsub, match, find = string.format, string.gmatch, string.gsub, string.match, string.find
local utfchar = utf.char
local concat, insert, swapped = table.concat, table.insert, table.swapped
local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys
local helpinfo = [[
mtx-pdf
ConTeXt PDF Helpers
0.10
show some info about the given file
show metadata xml blob
replace newlines in metadata
show used fonts (
show object"/>
show links"/>
mtxrun --script pdf --info foo.pdf
mtxrun --script pdf --metadata foo.pdf
mtxrun --script pdf --metadata --pretty foo.pdf
mtxrun --script pdf --stream=4 foo.pdf
]]
local application = logs.application {
name = "mtx-pdf",
banner = "ConTeXt PDF Helpers 0.10",
helpinfo = helpinfo,
}
local report = application.report
if not pdfe then
dofile(resolvers.findfile("lpdf-epd.lua","tex"))
elseif CONTEXTLMTXMODE then
dofile(resolvers.findfile("util-dim.lua","tex"))
dofile(resolvers.findfile("lpdf-ini.lmt","tex"))
dofile(resolvers.findfile("lpdf-pde.lmt","tex"))
else
dofile(resolvers.findfile("lpdf-pde.lua","tex"))
end
scripts = scripts or { }
scripts.pdf = scripts.pdf or { }
local details = environment.argument("detail") or environment.argument("details")
local function loadpdffile(filename)
if not filename or filename == "" then
report("no filename given")
elseif not lfs.isfile(filename) then
report("unknown file '%s'",filename)
else
local pdffile = lpdf.epdf.load(filename)
if pdffile then
return pdffile
else
report("no valid pdf file '%s'",filename)
end
end
end
function scripts.pdf.info(filename)
local pdffile = loadpdffile(filename)
if pdffile then
local catalog = pdffile.Catalog
local info = pdffile.Info
local pages = pdffile.pages
local nofpages = pdffile.nofpages
local unset = ""
report("%-17s > %s","filename", filename)
report("%-17s > %s","pdf version", catalog.Version or unset)
report("%-17s > %s","major version", pdffile.majorversion or unset)
report("%-17s > %s","minor version", pdffile.minorversion or unset)
report("%-17s > %s","number of pages", nofpages or 0)
report("%-17s > %s","title", info.Title or unset)
report("%-17s > %s","creator", info.Creator or unset)
report("%-17s > %s","producer", info.Producer or unset)
report("%-17s > %s","author", info.Author or unset)
report("%-17s > %s","creation date", info.CreationDate or unset)
report("%-17s > %s","modification date", info.ModDate or unset)
local function somebox(what)
local box = string.lower(what)
local width, height, start
for i=1, nofpages do
local page = pages[i]
local bbox = page[what] or page.MediaBox or { 0, 0, 0, 0 }
local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1]
if w ~= width or h ~= height then
if start then
report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,i-1,width,height)
end
width, height, start = w, h, i
end
end
report("%-17s > pages: %s-%s, width: %s, height: %s",box,start,nofpages,width,height)
end
if details then
somebox("MediaBox")
somebox("ArtBox")
somebox("BleedBox")
somebox("CropBox")
somebox("TrimBox")
else
somebox("CropBox")
end
-- if details then
local annotations = 0
for i=1, nofpages do
local page = pages[i]
local a = page.Annots
if a then
annotations = annotations + #a
end
end
if annotations > 0 then
report("%-17s > %s", "annotations",annotations)
end
-- end
-- if details then
local d = pdffile.destinations
local k = d and sortedkeys(d)
if k and #k > 0 then
report("%-17s > %s", "destinations",#k)
end
local d = pdffile.javascripts
local k = d and sortedkeys(d)
if k and #k > 0 then
report("%-17s > %s", "javascripts",#k)
end
local d = pdffile.widgets
if d and #d > 0 then
report("%-17s > %s", "widgets",#d)
end
local d = pdffile.embeddedfiles
local k = d and sortedkeys(d)
if k and #k > 0 then
report("%-17s > %s", "embeddedfiles",#k)
end
-- end
end
end
function scripts.pdf.metadata(filename,pretty)
local pdffile = loadpdffile(filename)
if pdffile then
local catalog = pdffile.Catalog
local metadata = catalog.Metadata
if metadata then
metadata = metadata()
if pretty then
metadata = gsub(metadata,"\r","\n")
end
report("metadata > \n\n%s\n",metadata)
else
report("no metadata")
end
end
end
local expanded = lpdf.epdf.expanded
local function getfonts(pdffile)
local usedfonts = { }
local function collect(where,tag)
local resources = where.Resources
if resources then
local fontlist = resources.Font
if fontlist then
for k, v in expanded(fontlist) do
usedfonts[tag and (tag .. "." .. k) or k] = v
end
end
local objects = resources.XObject
if objects then
for k, v in expanded(objects) do
collect(v,tag and (tag .. "." .. k) or k)
end
end
end
end
for i=1,pdffile.nofpages do
collect(pdffile.pages[i])
end
return usedfonts
end
local function getunicodes(font)
local cid = font.ToUnicode
if cid then
cid = cid()
local counts = { }
local indices = { }
-- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do
-- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
-- print(a,b)
-- end
-- end
setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end)
for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do
for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do
first = tonumber(first,16)
last = tonumber(last,16)
offset = tonumber(offset,16)
offset = offset - first
for i=first,last do
local c = i + offset
counts[c] = counts[c] + 1
indices[i] = true
end
end
end
for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do
for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
indices[tonumber(old,16)] = true
for n in gmatch(new,"....") do
local c = tonumber(n,16)
counts[c] = counts[c] + 1
end
end
end
return counts, indices
end
end
function scripts.pdf.fonts(filename)
local pdffile = loadpdffile(filename)
if pdffile then
local usedfonts = getfonts(pdffile)
local found = { }
local common = table.setmetatableindex("table")
for k, v in table.sortedhash(usedfonts) do
local basefont = v.BaseFont
local encoding = v.Encoding
local subtype = v.Subtype
local unicode = v.ToUnicode
local counts,
indices = getunicodes(v)
local codes = { }
local chars = { }
local freqs = { }
local names = { }
if counts then
codes = sortedkeys(counts)
for i=1,#codes do
local k = codes[i]
if k > 32 then
local c = utfchar(k)
chars[i] = c
freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c)
else
chars[i] = k == 32 and "SPACE" or format("U+%03X",k)
freqs[i] = format("U+%05X %s --",k,counts[k] > 1 and "+" or " ")
end
end
if basefont and unicode then
local b = gsub(basefont,"^.*%+","")
local c = common[b]
for k in next, indices do
c[k] = true
end
end
for i=1,#codes do
codes[i] = format("U+%05X",codes[i])
end
end
local d = encoding and encoding.Differences
if d then
for i=1,#d do
local di = d[i]
if type(di) == "string" then
names[#names+1] = di
end
end
end
found[k] = {
basefont = basefont or "no basefont",
encoding = (d and "custom n=" .. #d) or "no encoding",
subtype = subtype or "no subtype",
unicode = tounicode and "unicode" or "no vector",
chars = chars,
codes = codes,
freqs = freqs,
names = names,
}
end
if details then
for k, v in sortedhash(found) do
report("id : %s", k)
report("basefont : %s", v.basefont)
report("encoding : % t", v.names)
report("subtype : %s", v.subtype)
report("unicode : %s", v.unicode)
if #v.chars > 0 then
report("characters : % t", v.chars)
end
if #v.codes > 0 then
report("codepoints : % t", v.codes)
end
report("")
end
for k, v in sortedhash(common) do
report("basefont : %s",k)
report("indices : % t", sortedkeys(v))
report("")
end
else
local haschar = false
for k, v in sortedhash(found) do
if #v.chars > 0 then
haschar = true
break
end
end
local results = { { "id", "basefont", "encoding", "subtype", "unicode", haschar and "characters" or nil } }
for k, v in sortedhash(found) do
results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, haschar and concat(v.chars," ") or nil }
end
utilities.formatters.formatcolumns(results)
report(results[1])
report("")
for i=2,#results do
report(results[i])
end
report("")
end
end
end
function scripts.pdf.object(filename,n)
if n then
local pdffile = loadpdffile(filename)
if pdffile then
print(lpdf.epdf.verboseobject(pdffile,n) or "no object with number " .. n)
end
end
end
function scripts.pdf.links(filename,asked)
local pdffile = loadpdffile(filename)
if pdffile then
local pages = pdffile.pages
local nofpages = pdffile.nofpages
if asked and (asked < 1 or asked > nofpages) then
report("")
report("no page %i, last page %i",asked,nofpages)
report("")
return
end
local reverse = swapped(pages)
local function show(pagenumber)
local page = pages[pagenumber]
local annots = page.Annots
if annots then
report("")
report("annotations @ page %i",pagenumber)
report("")
for i=1,#annots do
local annot = annots[i]
if annot.Subtype == "Link" then
local A = annot.A
if A then
local S = A.S
local D = A.D
if S == "GoTo" then
if D then
local D1 = D[1]
local R1 = reverse[D1]
if tonumber(R1) then
report("intern, page % 4i",R1 or 0)
else
report("intern, name %s",tostring(D1))
end
end
elseif S == "GoToR" then
if D then
local F = A.F
if F then
local D1 = D[1]
if tonumber(D1) then
report("extern, page % 4i, file %s",D1 + 1,F)
else
report("extern, page % 4i, file %s, name %s",0,F,D[1])
end
end
end
end
end
end
end
end
end
if asked then
show(asked)
else
for pagenumber=1,nofpages do
show(pagenumber)
end
end
local destinations = pdffile.destinations
if destinations then
if asked then
report("")
report("destinations to page %i",asked)
report("")
for k, v in sortedhash(destinations) do
local D = v.D
if D then
local p = reverse[D[1]] or 0
if p == asked then
report(k)
end
end
end
else
report("")
report("destinations")
report("")
local list = setmetatableindex("table")
for k, v in sortedhash(destinations) do
local D = v.D
if D then
local p = reverse[D[1]]
report("tag %s, page % 4i",k,p)
insert(list[p],k)
end
end
for k, v in sortedhash(list) do
report("")
report("page %i, names % t",k,v)
end
end
end
end
end
-- scripts.pdf.info("e:/tmp/oeps.pdf")
-- scripts.pdf.metadata("e:/tmp/oeps.pdf")
-- scripts.pdf.fonts("e:/tmp/oeps.pdf")
-- scripts.pdf.linearize("e:/tmp/oeps.pdf")
local filename = environment.files[1] or ""
if filename == "" then
application.help()
elseif environment.argument("info") then
scripts.pdf.info(filename)
elseif environment.argument("metadata") then
scripts.pdf.metadata(filename,environment.argument("pretty"))
elseif environment.argument("fonts") then
scripts.pdf.fonts(filename)
elseif environment.argument("object") then
scripts.pdf.object(filename,tonumber(environment.argument("object")))
elseif environment.argument("links") then
scripts.pdf.links(filename,tonumber(environment.argument("page")))
elseif environment.argument("exporthelp") then
application.export(environment.argument("exporthelp"),filename)
else
application.help()
end
-- a variant on an experiment by hartmut
--~ function downloadlinks(filename)
--~ local document = lpdf.epdf.load(filename)
--~ if document then
--~ local pages = document.pages
--~ for p = 1,#pages do
--~ local annotations = pages[p].Annots
--~ if annotations then
--~ for a=1,#annotations do
--~ local annotation = annotations[a]
--~ local uri = annotation.Subtype == "Link" and annotation.A and annotation.A.URI
--~ if uri and string.find(uri,"^http") then
--~ os.execute("wget " .. uri)
--~ end
--~ end
--~ end
--~ end
--~ end
--~ end