diff options
Diffstat (limited to 'scripts/context/lua/mtx-pdf.lua')
-rw-r--r-- | scripts/context/lua/mtx-pdf.lua | 598 |
1 files changed, 299 insertions, 299 deletions
diff --git a/scripts/context/lua/mtx-pdf.lua b/scripts/context/lua/mtx-pdf.lua index 551aa5b37..2ff22e07f 100644 --- a/scripts/context/lua/mtx-pdf.lua +++ b/scripts/context/lua/mtx-pdf.lua @@ -1,299 +1,299 @@ -if not modules then modules = { } end modules ['mtx-pdf'] = { - version = 1.001, - comment = "companion to mtxrun.lua", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - -local tonumber = tonumber -local format, gmatch = string.format, string.gmatch -local utfchar = utf.char -local concat = table.concat -local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys - -local helpinfo = [[ -<?xml version="1.0"?> -<application> - <metadata> - <entry name="name">mtx-pdf</entry> - <entry name="detail">ConTeXt PDF Helpers</entry> - <entry name="version">0.10</entry> - </metadata> - <flags> - <category name="basic"> - <subcategory> - <flag name="info"><short>show some info about the given file</short></flag> - <flag name="metadata"><short>show metadata xml blob</short></flag> - <flag name="fonts"><short>show used fonts (<ref name="detail)"/></short></flag> - <flag name="linearize"><short>linearize given file</short></flag> - </subcategory> - </category> - </flags> -</application> -]] - -local application = logs.application { - name = "mtx-pdf", - banner = "ConTeXt PDF Helpers 0.10", - helpinfo = helpinfo, -} - -local report = application.report - -dofile(resolvers.findfile("lpdf-epd.lua","tex")) - -scripts = scripts or { } -scripts.pdf = scripts.pdf or { } - -local function loadpdffile(filename) - if not filename or filename == "" then - report("no filename given") - elseif not lfs.isfile(filename) then - report("unknown file '%s'",filename) - else - local pdffile = lpdf.epdf.load(filename) - if pdffile then - return pdffile - else - report("no valid pdf file '%s'",filename) - end - end -end - -function scripts.pdf.info(filename) - local pdffile = loadpdffile(filename) - if pdffile then - local catalog = pdffile.Catalog - local info = pdffile.Info - local pages = pdffile.pages - local nofpages = pages.n -- no # yet. will be in 5.2 - - report("filename > %s",filename) - report("pdf version > %s",catalog.Version) - report("number of pages > %s",nofpages) - report("title > %s",info.Title) - report("creator > %s",info.Creator) - report("producer > %s",info.Producer) - report("creation date > %s",info.CreationDate) - report("modification date > %s",info.ModDate) - - local width, height, start - for i=1, nofpages do - local page = pages[i] - local bbox = page.CropBox or page.MediaBox - local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1] - if w ~= width or h ~= height then - if start then - report("cropbox > pages: %s-%s, width: %s, height: %s",start,i-1,width,height) - end - width, height, start = w, h, i - end - end - report("cropbox > pages: %s-%s, width: %s, height: %s",start,nofpages,width,height) - end -end - -function scripts.pdf.metadata(filename) - local pdffile = loadpdffile(filename) - if pdffile then - local catalog = pdffile.Catalog - local metadata = catalog.Metadata - if metadata then - report("metadata > \n\n%s\n",metadata()) - else - report("no metadata") - end - end -end - -local function getfonts(pdffile) - local usedfonts = { } - for i=1,pdffile.pages.n do - local page = pdffile.pages[i] - local fontlist = page.Resources.Font - for k, v in next, lpdf.epdf.expand(fontlist) do - usedfonts[k] = lpdf.epdf.expand(v) - end - end - return usedfonts -end - -local function getunicodes(font) - local cid = font.ToUnicode - if cid then - cid = cid() - local counts = { } - -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do - -- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do - -- print(a,b) - -- end - -- end - setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end) - for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do - for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do - first = tonumber(first,16) - last = tonumber(last,16) - offset = tonumber(offset,16) - offset = offset - first - for i=first,last do - local c = i + offset - counts[c] = counts[c] + 1 - end - end - end - for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do - for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do - for n in gmatch(new,"....") do - local c = tonumber(n,16) - counts[c] = counts[c] + 1 - end - end - end - return counts - end -end - -function scripts.pdf.fonts(filename) - local pdffile = loadpdffile(filename) - if pdffile then - local usedfonts = getfonts(pdffile) - local found = { } - for k, v in table.sortedhash(usedfonts) do - local counts = getunicodes(v) - local codes = { } - local chars = { } - local freqs = { } - if counts then - codes = sortedkeys(counts) - for i=1,#codes do - local k = codes[i] - local c = utfchar(k) - chars[i] = c - freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c) - end - for i=1,#codes do - codes[i] = format("U+%05X",codes[i]) - end - end - found[k] = { - basefont = v.BaseFont or "no basefont", - encoding = v.Encoding or "no encoding", - subtype = v.Subtype or "no subtype", - unicode = v.ToUnicode and "unicode" or "no unicode", - chars = chars, - codes = codes, - freqs = freqs, - } - end - - if environment.argument("detail") then - for k, v in sortedhash(found) do - report("id : %s",k) - report("basefont : %s",v.basefont) - report("encoding : %s",v.encoding) - report("subtype : %s",v.subtype) - report("unicode : %s",v.unicode) - report("characters : %s", concat(v.chars," ")) - report("codepoints : %s", concat(v.codes," ")) - report("") - end - else - local results = { { "id", "basefont", "encoding", "subtype", "unicode", "characters" } } - for k, v in sortedhash(found) do - results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, concat(v.chars," ") } - end - utilities.formatters.formatcolumns(results) - report(results[1]) - report("") - for i=2,#results do - report(results[i]) - end - report("") - end - end -end - --- this is a quick hack ... proof of concept .. will change (derived from luigi's example) ... --- i will make a ctx wrapper - -local qpdf - -function scripts.pdf.linearize(filename) - qpdf = qpdf or swiglib("qpdf.core") - local oldfile = filename or environment.files[1] - if not oldfile then - return - end - file.addsuffix(oldfile,"pdf") - if not lfs.isfile(oldfile) then - return - end - local newfile = environment.files[2] - if not newfile or file.removesuffix(oldfile) == file.removesuffix(newfile)then - newfile = file.addsuffix(file.removesuffix(oldfile) .. "-linearized","pdf") - end - local password = environment.arguments.password - local instance = qpdf.qpdf_init() - if bit32.band(qpdf.qpdf_read(instance,oldfile,password),qpdf.QPDF_ERRORS) ~= 0 then - report("unable to open input file") - elseif bit32.band(qpdf.qpdf_init_write(instance,newfile),qpdf.QPDF_ERRORS) ~= 0 then - report("unable to open output file") - else - report("linearizing %a into %a",oldfile,newfile) - qpdf.qpdf_set_static_ID(instance,qpdf.QPDF_TRUE) - qpdf.qpdf_set_linearization(instance,qpdf.QPDF_TRUE) - qpdf.qpdf_write(instance) - end - while qpdf.qpdf_more_warnings(instance) ~= 0 do - report("warning: %s",qpdf.qpdf_get_error_full_text(instance,qpdf.qpdf_next_warning(qpdf))) - end - if qpdf.qpdf_has_error(instance) ~= 0 then - report("error: %s",qpdf.qpdf_get_error_full_text(instance,qpdf.qpdf_get_error(qpdf))) - end - qpdf.qpdf_cleanup_p(instance) -end - --- scripts.pdf.info("e:/tmp/oeps.pdf") --- scripts.pdf.metadata("e:/tmp/oeps.pdf") --- scripts.pdf.fonts("e:/tmp/oeps.pdf") --- scripts.pdf.linearize("e:/tmp/oeps.pdf") - -local filename = environment.files[1] or "" - -if filename == "" then - application.help() -elseif environment.argument("info") then - scripts.pdf.info(filename) -elseif environment.argument("metadata") then - scripts.pdf.metadata(filename) -elseif environment.argument("fonts") then - scripts.pdf.fonts(filename) -elseif environment.argument("linearize") then - scripts.pdf.linearize(filename) -elseif environment.argument("exporthelp") then - application.export(environment.argument("exporthelp"),filename) -else - application.help() -end - --- a variant on an experiment by hartmut - ---~ function downloadlinks(filename) ---~ local document = lpdf.epdf.load(filename) ---~ if document then ---~ local pages = document.pages ---~ for p = 1,#pages do ---~ local annotations = pages[p].Annots ---~ if annotations then ---~ for a=1,#annotations do ---~ local annotation = annotations[a] ---~ local uri = annotation.Subtype == "Link" and annotation.A and annotation.A.URI ---~ if uri and string.find(uri,"^http") then ---~ os.execute("wget " .. uri) ---~ end ---~ end ---~ end ---~ end ---~ end ---~ end +if not modules then modules = { } end modules ['mtx-pdf'] = {
+ version = 1.001,
+ comment = "companion to mtxrun.lua",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local tonumber = tonumber
+local format, gmatch = string.format, string.gmatch
+local utfchar = utf.char
+local concat = table.concat
+local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys
+
+local helpinfo = [[
+<?xml version="1.0"?>
+<application>
+ <metadata>
+ <entry name="name">mtx-pdf</entry>
+ <entry name="detail">ConTeXt PDF Helpers</entry>
+ <entry name="version">0.10</entry>
+ </metadata>
+ <flags>
+ <category name="basic">
+ <subcategory>
+ <flag name="info"><short>show some info about the given file</short></flag>
+ <flag name="metadata"><short>show metadata xml blob</short></flag>
+ <flag name="fonts"><short>show used fonts (<ref name="detail)"/></short></flag>
+ <flag name="linearize"><short>linearize given file</short></flag>
+ </subcategory>
+ </category>
+ </flags>
+</application>
+]]
+
+local application = logs.application {
+ name = "mtx-pdf",
+ banner = "ConTeXt PDF Helpers 0.10",
+ helpinfo = helpinfo,
+}
+
+local report = application.report
+
+dofile(resolvers.findfile("lpdf-epd.lua","tex"))
+
+scripts = scripts or { }
+scripts.pdf = scripts.pdf or { }
+
+local function loadpdffile(filename)
+ if not filename or filename == "" then
+ report("no filename given")
+ elseif not lfs.isfile(filename) then
+ report("unknown file '%s'",filename)
+ else
+ local pdffile = lpdf.epdf.load(filename)
+ if pdffile then
+ return pdffile
+ else
+ report("no valid pdf file '%s'",filename)
+ end
+ end
+end
+
+function scripts.pdf.info(filename)
+ local pdffile = loadpdffile(filename)
+ if pdffile then
+ local catalog = pdffile.Catalog
+ local info = pdffile.Info
+ local pages = pdffile.pages
+ local nofpages = pages.n -- no # yet. will be in 5.2
+
+ report("filename > %s",filename)
+ report("pdf version > %s",catalog.Version)
+ report("number of pages > %s",nofpages)
+ report("title > %s",info.Title)
+ report("creator > %s",info.Creator)
+ report("producer > %s",info.Producer)
+ report("creation date > %s",info.CreationDate)
+ report("modification date > %s",info.ModDate)
+
+ local width, height, start
+ for i=1, nofpages do
+ local page = pages[i]
+ local bbox = page.CropBox or page.MediaBox
+ local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1]
+ if w ~= width or h ~= height then
+ if start then
+ report("cropbox > pages: %s-%s, width: %s, height: %s",start,i-1,width,height)
+ end
+ width, height, start = w, h, i
+ end
+ end
+ report("cropbox > pages: %s-%s, width: %s, height: %s",start,nofpages,width,height)
+ end
+end
+
+function scripts.pdf.metadata(filename)
+ local pdffile = loadpdffile(filename)
+ if pdffile then
+ local catalog = pdffile.Catalog
+ local metadata = catalog.Metadata
+ if metadata then
+ report("metadata > \n\n%s\n",metadata())
+ else
+ report("no metadata")
+ end
+ end
+end
+
+local function getfonts(pdffile)
+ local usedfonts = { }
+ for i=1,pdffile.pages.n do
+ local page = pdffile.pages[i]
+ local fontlist = page.Resources.Font
+ for k, v in next, lpdf.epdf.expand(fontlist) do
+ usedfonts[k] = lpdf.epdf.expand(v)
+ end
+ end
+ return usedfonts
+end
+
+local function getunicodes(font)
+ local cid = font.ToUnicode
+ if cid then
+ cid = cid()
+ local counts = { }
+ -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do
+ -- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
+ -- print(a,b)
+ -- end
+ -- end
+ setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end)
+ for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do
+ for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do
+ first = tonumber(first,16)
+ last = tonumber(last,16)
+ offset = tonumber(offset,16)
+ offset = offset - first
+ for i=first,last do
+ local c = i + offset
+ counts[c] = counts[c] + 1
+ end
+ end
+ end
+ for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do
+ for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
+ for n in gmatch(new,"....") do
+ local c = tonumber(n,16)
+ counts[c] = counts[c] + 1
+ end
+ end
+ end
+ return counts
+ end
+end
+
+function scripts.pdf.fonts(filename)
+ local pdffile = loadpdffile(filename)
+ if pdffile then
+ local usedfonts = getfonts(pdffile)
+ local found = { }
+ for k, v in table.sortedhash(usedfonts) do
+ local counts = getunicodes(v)
+ local codes = { }
+ local chars = { }
+ local freqs = { }
+ if counts then
+ codes = sortedkeys(counts)
+ for i=1,#codes do
+ local k = codes[i]
+ local c = utfchar(k)
+ chars[i] = c
+ freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c)
+ end
+ for i=1,#codes do
+ codes[i] = format("U+%05X",codes[i])
+ end
+ end
+ found[k] = {
+ basefont = v.BaseFont or "no basefont",
+ encoding = v.Encoding or "no encoding",
+ subtype = v.Subtype or "no subtype",
+ unicode = v.ToUnicode and "unicode" or "no unicode",
+ chars = chars,
+ codes = codes,
+ freqs = freqs,
+ }
+ end
+
+ if environment.argument("detail") then
+ for k, v in sortedhash(found) do
+ report("id : %s",k)
+ report("basefont : %s",v.basefont)
+ report("encoding : %s",v.encoding)
+ report("subtype : %s",v.subtype)
+ report("unicode : %s",v.unicode)
+ report("characters : %s", concat(v.chars," "))
+ report("codepoints : %s", concat(v.codes," "))
+ report("")
+ end
+ else
+ local results = { { "id", "basefont", "encoding", "subtype", "unicode", "characters" } }
+ for k, v in sortedhash(found) do
+ results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, concat(v.chars," ") }
+ end
+ utilities.formatters.formatcolumns(results)
+ report(results[1])
+ report("")
+ for i=2,#results do
+ report(results[i])
+ end
+ report("")
+ end
+ end
+end
+
+-- this is a quick hack ... proof of concept .. will change (derived from luigi's example) ...
+-- i will make a ctx wrapper
+
+local qpdf
+
+function scripts.pdf.linearize(filename)
+ qpdf = qpdf or swiglib("qpdf.core")
+ local oldfile = filename or environment.files[1]
+ if not oldfile then
+ return
+ end
+ file.addsuffix(oldfile,"pdf")
+ if not lfs.isfile(oldfile) then
+ return
+ end
+ local newfile = environment.files[2]
+ if not newfile or file.removesuffix(oldfile) == file.removesuffix(newfile)then
+ newfile = file.addsuffix(file.removesuffix(oldfile) .. "-linearized","pdf")
+ end
+ local password = environment.arguments.password
+ local instance = qpdf.qpdf_init()
+ if bit32.band(qpdf.qpdf_read(instance,oldfile,password),qpdf.QPDF_ERRORS) ~= 0 then
+ report("unable to open input file")
+ elseif bit32.band(qpdf.qpdf_init_write(instance,newfile),qpdf.QPDF_ERRORS) ~= 0 then
+ report("unable to open output file")
+ else
+ report("linearizing %a into %a",oldfile,newfile)
+ qpdf.qpdf_set_static_ID(instance,qpdf.QPDF_TRUE)
+ qpdf.qpdf_set_linearization(instance,qpdf.QPDF_TRUE)
+ qpdf.qpdf_write(instance)
+ end
+ while qpdf.qpdf_more_warnings(instance) ~= 0 do
+ report("warning: %s",qpdf.qpdf_get_error_full_text(instance,qpdf.qpdf_next_warning(qpdf)))
+ end
+ if qpdf.qpdf_has_error(instance) ~= 0 then
+ report("error: %s",qpdf.qpdf_get_error_full_text(instance,qpdf.qpdf_get_error(qpdf)))
+ end
+ qpdf.qpdf_cleanup_p(instance)
+end
+
+-- scripts.pdf.info("e:/tmp/oeps.pdf")
+-- scripts.pdf.metadata("e:/tmp/oeps.pdf")
+-- scripts.pdf.fonts("e:/tmp/oeps.pdf")
+-- scripts.pdf.linearize("e:/tmp/oeps.pdf")
+
+local filename = environment.files[1] or ""
+
+if filename == "" then
+ application.help()
+elseif environment.argument("info") then
+ scripts.pdf.info(filename)
+elseif environment.argument("metadata") then
+ scripts.pdf.metadata(filename)
+elseif environment.argument("fonts") then
+ scripts.pdf.fonts(filename)
+elseif environment.argument("linearize") then
+ scripts.pdf.linearize(filename)
+elseif environment.argument("exporthelp") then
+ application.export(environment.argument("exporthelp"),filename)
+else
+ application.help()
+end
+
+-- a variant on an experiment by hartmut
+
+--~ function downloadlinks(filename)
+--~ local document = lpdf.epdf.load(filename)
+--~ if document then
+--~ local pages = document.pages
+--~ for p = 1,#pages do
+--~ local annotations = pages[p].Annots
+--~ if annotations then
+--~ for a=1,#annotations do
+--~ local annotation = annotations[a]
+--~ local uri = annotation.Subtype == "Link" and annotation.A and annotation.A.URI
+--~ if uri and string.find(uri,"^http") then
+--~ os.execute("wget " .. uri)
+--~ end
+--~ end
+--~ end
+--~ end
+--~ end
+--~ end
|