From c36e19abdfd15bf6cae6fa379c6ce51f3ef5332d Mon Sep 17 00:00:00 2001 From: Marius Date: Fri, 19 Oct 2012 01:21:22 +0300 Subject: beta 2012.10.19 00:06 --- scripts/context/lua/mtx-pdf.lua | 142 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 132 insertions(+), 10 deletions(-) (limited to 'scripts/context/lua/mtx-pdf.lua') diff --git a/scripts/context/lua/mtx-pdf.lua b/scripts/context/lua/mtx-pdf.lua index 5654b8bc4..f37ee006a 100644 --- a/scripts/context/lua/mtx-pdf.lua +++ b/scripts/context/lua/mtx-pdf.lua @@ -6,14 +6,21 @@ if not modules then modules = { } end modules ['mtx-pdf'] = { license = "see context related readme files" } +local tonumber = tonumber +local format, gmatch = string.format, string.gmatch +local utfchar = utf.char +local concat = table.concat +local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys + local helpinfo = [[ --info show some info about the given file --metadata show metadata xml blob +--fonts show used fonts (--detail) ]] local application = logs.application { name = "mtx-pdf", - banner = "ConTeXt PDF Helpers 0.01", + banner = "ConTeXt PDF Helpers 0.10", helpinfo = helpinfo, } @@ -39,9 +46,8 @@ local function loadpdffile(filename) end end -function scripts.pdf.info() - local filename = environment.files[1] - local pdffile = loadpdffile(filename) +function scripts.pdf.info(filename) + local pdffile = loadpdffile(filename) if pdffile then local catalog = pdffile.Catalog local info = pdffile.Info @@ -73,9 +79,8 @@ function scripts.pdf.info() end end -function scripts.pdf.metadata() - local filename = environment.files[1] - local pdffile = loadpdffile(filename) +function scripts.pdf.metadata(filename) + local pdffile = loadpdffile(filename) if pdffile then local catalog = pdffile.Catalog local metadata = catalog.Metadata @@ -87,10 +92,127 @@ function scripts.pdf.metadata() end end -if environment.argument("info") then - scripts.pdf.info() +local function getfonts(pdffile) + local usedfonts = { } + for i=1,pdffile.pages.n do + local page = pdffile.pages[i] + local fontlist = page.Resources.Font + for k, v in next, lpdf.epdf.expand(fontlist) do + usedfonts[k] = lpdf.epdf.expand(v) + end + end + return usedfonts +end + +local function getunicodes(font) + local cid = font.ToUnicode + if cid then + cid = cid() + local counts = { } + -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do + -- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do + -- print(a,b) + -- end + -- end + setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end) + for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do + for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do + first = tonumber(first,16) + last = tonumber(last,16) + offset = tonumber(offset,16) + offset = offset - first + for i=first,last do + local c = i + offset + counts[c] = counts[c] + 1 + end + end + end + for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do + for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do + for n in gmatch(new,"....") do + local c = tonumber(n,16) + counts[c] = counts[c] + 1 + end + end + end + return counts + end +end + +function scripts.pdf.fonts(filename) + local pdffile = loadpdffile(filename) + if pdffile then + local usedfonts = getfonts(pdffile) + local found = { } + for k, v in table.sortedhash(usedfonts) do + local counts = getunicodes(v) + local codes = { } + local chars = { } + local freqs = { } + if counts then + codes = sortedkeys(counts) + for i=1,#codes do + local k = codes[i] + local c = utfchar(k) + chars[i] = c + freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c) + end + for i=1,#codes do + codes[i] = format("U+%05X",codes[i]) + end + end + found[k] = { + basefont = v.BaseFont or "no basefont", + encoding = v.Encoding or "no encoding", + subtype = v.Subtype or "no subtype", + unicode = v.ToUnicode and "unicode" or "no unicode", + chars = chars, + codes = codes, + freqs = freqs, + } + end + + if environment.argument("detail") then + for k, v in sortedhash(found) do + report("id : %s",k) + report("basefont : %s",v.basefont) + report("encoding : %s",v.encoding) + report("subtype : %s",v.subtype) + report("unicode : %s",v.unicode) + report("characters : %s", concat(v.chars," ")) + report("codepoints : %s", concat(v.codes," ")) + report("") + end + else + local results = { { "id", "basefont", "encoding", "subtype", "unicode", "characters" } } + for k, v in sortedhash(found) do + results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, concat(v.chars," ") } + end + utilities.formatters.formatcolumns(results) + report(results[1]) + report("") + for i=2,#results do + report(results[i]) + end + report("") + end + end +end + +-- scripts.pdf.info("e:/tmp/oeps.pdf") +-- scripts.pdf.metadata("e:/tmp/oeps.pdf") +-- scripts.pdf.fonts("e:/tmp/oeps.pdf") + +local filename = environment.files[1] or "" + +if filename == "" then + application.help() +elseif environment.argument("info") then + scripts.pdf.info(filename) elseif environment.argument("metadata") then - scripts.pdf.metadata() + scripts.pdf.metadata(filename) +elseif environment.argument("fonts") then + scripts.pdf.fonts(filename) else application.help() end -- cgit v1.2.3