summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtx-pdf.lua
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/context/lua/mtx-pdf.lua')
-rw-r--r--scripts/context/lua/mtx-pdf.lua142
1 files changed, 132 insertions, 10 deletions
diff --git a/scripts/context/lua/mtx-pdf.lua b/scripts/context/lua/mtx-pdf.lua
index 5654b8bc4..f37ee006a 100644
--- a/scripts/context/lua/mtx-pdf.lua
+++ b/scripts/context/lua/mtx-pdf.lua
@@ -6,14 +6,21 @@ if not modules then modules = { } end modules ['mtx-pdf'] = {
license = "see context related readme files"
}
+local tonumber = tonumber
+local format, gmatch = string.format, string.gmatch
+local utfchar = utf.char
+local concat = table.concat
+local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys
+
local helpinfo = [[
--info show some info about the given file
--metadata show metadata xml blob
+--fonts show used fonts (--detail)
]]
local application = logs.application {
name = "mtx-pdf",
- banner = "ConTeXt PDF Helpers 0.01",
+ banner = "ConTeXt PDF Helpers 0.10",
helpinfo = helpinfo,
}
@@ -39,9 +46,8 @@ local function loadpdffile(filename)
end
end
-function scripts.pdf.info()
- local filename = environment.files[1]
- local pdffile = loadpdffile(filename)
+function scripts.pdf.info(filename)
+ local pdffile = loadpdffile(filename)
if pdffile then
local catalog = pdffile.Catalog
local info = pdffile.Info
@@ -73,9 +79,8 @@ function scripts.pdf.info()
end
end
-function scripts.pdf.metadata()
- local filename = environment.files[1]
- local pdffile = loadpdffile(filename)
+function scripts.pdf.metadata(filename)
+ local pdffile = loadpdffile(filename)
if pdffile then
local catalog = pdffile.Catalog
local metadata = catalog.Metadata
@@ -87,10 +92,127 @@ function scripts.pdf.metadata()
end
end
-if environment.argument("info") then
- scripts.pdf.info()
+local function getfonts(pdffile)
+ local usedfonts = { }
+ for i=1,pdffile.pages.n do
+ local page = pdffile.pages[i]
+ local fontlist = page.Resources.Font
+ for k, v in next, lpdf.epdf.expand(fontlist) do
+ usedfonts[k] = lpdf.epdf.expand(v)
+ end
+ end
+ return usedfonts
+end
+
+local function getunicodes(font)
+ local cid = font.ToUnicode
+ if cid then
+ cid = cid()
+ local counts = { }
+ -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do
+ -- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
+ -- print(a,b)
+ -- end
+ -- end
+ setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end)
+ for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do
+ for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do
+ first = tonumber(first,16)
+ last = tonumber(last,16)
+ offset = tonumber(offset,16)
+ offset = offset - first
+ for i=first,last do
+ local c = i + offset
+ counts[c] = counts[c] + 1
+ end
+ end
+ end
+ for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do
+ for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do
+ for n in gmatch(new,"....") do
+ local c = tonumber(n,16)
+ counts[c] = counts[c] + 1
+ end
+ end
+ end
+ return counts
+ end
+end
+
+function scripts.pdf.fonts(filename)
+ local pdffile = loadpdffile(filename)
+ if pdffile then
+ local usedfonts = getfonts(pdffile)
+ local found = { }
+ for k, v in table.sortedhash(usedfonts) do
+ local counts = getunicodes(v)
+ local codes = { }
+ local chars = { }
+ local freqs = { }
+ if counts then
+ codes = sortedkeys(counts)
+ for i=1,#codes do
+ local k = codes[i]
+ local c = utfchar(k)
+ chars[i] = c
+ freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c)
+ end
+ for i=1,#codes do
+ codes[i] = format("U+%05X",codes[i])
+ end
+ end
+ found[k] = {
+ basefont = v.BaseFont or "no basefont",
+ encoding = v.Encoding or "no encoding",
+ subtype = v.Subtype or "no subtype",
+ unicode = v.ToUnicode and "unicode" or "no unicode",
+ chars = chars,
+ codes = codes,
+ freqs = freqs,
+ }
+ end
+
+ if environment.argument("detail") then
+ for k, v in sortedhash(found) do
+ report("id : %s",k)
+ report("basefont : %s",v.basefont)
+ report("encoding : %s",v.encoding)
+ report("subtype : %s",v.subtype)
+ report("unicode : %s",v.unicode)
+ report("characters : %s", concat(v.chars," "))
+ report("codepoints : %s", concat(v.codes," "))
+ report("")
+ end
+ else
+ local results = { { "id", "basefont", "encoding", "subtype", "unicode", "characters" } }
+ for k, v in sortedhash(found) do
+ results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, concat(v.chars," ") }
+ end
+ utilities.formatters.formatcolumns(results)
+ report(results[1])
+ report("")
+ for i=2,#results do
+ report(results[i])
+ end
+ report("")
+ end
+ end
+end
+
+-- scripts.pdf.info("e:/tmp/oeps.pdf")
+-- scripts.pdf.metadata("e:/tmp/oeps.pdf")
+-- scripts.pdf.fonts("e:/tmp/oeps.pdf")
+
+local filename = environment.files[1] or ""
+
+if filename == "" then
+ application.help()
+elseif environment.argument("info") then
+ scripts.pdf.info(filename)
elseif environment.argument("metadata") then
- scripts.pdf.metadata()
+ scripts.pdf.metadata(filename)
+elseif environment.argument("fonts") then
+ scripts.pdf.fonts(filename)
else
application.help()
end