diff options
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/context/lua/mtx-pdf.lua | 142 | ||||
-rw-r--r-- | scripts/context/lua/mtxrun.lua | 115 | ||||
-rw-r--r-- | scripts/context/stubs/mswin/mtxrun.lua | 115 | ||||
-rw-r--r-- | scripts/context/stubs/unix/mtxrun | 115 |
4 files changed, 354 insertions, 133 deletions
diff --git a/scripts/context/lua/mtx-pdf.lua b/scripts/context/lua/mtx-pdf.lua index 5654b8bc4..f37ee006a 100644 --- a/scripts/context/lua/mtx-pdf.lua +++ b/scripts/context/lua/mtx-pdf.lua @@ -6,14 +6,21 @@ if not modules then modules = { } end modules ['mtx-pdf'] = { license = "see context related readme files" } +local tonumber = tonumber +local format, gmatch = string.format, string.gmatch +local utfchar = utf.char +local concat = table.concat +local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys + local helpinfo = [[ --info show some info about the given file --metadata show metadata xml blob +--fonts show used fonts (--detail) ]] local application = logs.application { name = "mtx-pdf", - banner = "ConTeXt PDF Helpers 0.01", + banner = "ConTeXt PDF Helpers 0.10", helpinfo = helpinfo, } @@ -39,9 +46,8 @@ local function loadpdffile(filename) end end -function scripts.pdf.info() - local filename = environment.files[1] - local pdffile = loadpdffile(filename) +function scripts.pdf.info(filename) + local pdffile = loadpdffile(filename) if pdffile then local catalog = pdffile.Catalog local info = pdffile.Info @@ -73,9 +79,8 @@ function scripts.pdf.info() end end -function scripts.pdf.metadata() - local filename = environment.files[1] - local pdffile = loadpdffile(filename) +function scripts.pdf.metadata(filename) + local pdffile = loadpdffile(filename) if pdffile then local catalog = pdffile.Catalog local metadata = catalog.Metadata @@ -87,10 +92,127 @@ function scripts.pdf.metadata() end end -if environment.argument("info") then - scripts.pdf.info() +local function getfonts(pdffile) + local usedfonts = { } + for i=1,pdffile.pages.n do + local page = pdffile.pages[i] + local fontlist = page.Resources.Font + for k, v in next, lpdf.epdf.expand(fontlist) do + usedfonts[k] = lpdf.epdf.expand(v) + end + end + return usedfonts +end + +local function getunicodes(font) + local cid = font.ToUnicode + if cid then + cid = cid() + local counts = { } + -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do + -- for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do + -- print(a,b) + -- end + -- end + setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end) + for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do + for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do + first = tonumber(first,16) + last = tonumber(last,16) + offset = tonumber(offset,16) + offset = offset - first + for i=first,last do + local c = i + offset + counts[c] = counts[c] + 1 + end + end + end + for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do + for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do + for n in gmatch(new,"....") do + local c = tonumber(n,16) + counts[c] = counts[c] + 1 + end + end + end + return counts + end +end + +function scripts.pdf.fonts(filename) + local pdffile = loadpdffile(filename) + if pdffile then + local usedfonts = getfonts(pdffile) + local found = { } + for k, v in table.sortedhash(usedfonts) do + local counts = getunicodes(v) + local codes = { } + local chars = { } + local freqs = { } + if counts then + codes = sortedkeys(counts) + for i=1,#codes do + local k = codes[i] + local c = utfchar(k) + chars[i] = c + freqs[i] = format("U+%05X %s %s",k,counts[k] > 1 and "+" or " ", c) + end + for i=1,#codes do + codes[i] = format("U+%05X",codes[i]) + end + end + found[k] = { + basefont = v.BaseFont or "no basefont", + encoding = v.Encoding or "no encoding", + subtype = v.Subtype or "no subtype", + unicode = v.ToUnicode and "unicode" or "no unicode", + chars = chars, + codes = codes, + freqs = freqs, + } + end + + if environment.argument("detail") then + for k, v in sortedhash(found) do + report("id : %s",k) + report("basefont : %s",v.basefont) + report("encoding : %s",v.encoding) + report("subtype : %s",v.subtype) + report("unicode : %s",v.unicode) + report("characters : %s", concat(v.chars," ")) + report("codepoints : %s", concat(v.codes," ")) + report("") + end + else + local results = { { "id", "basefont", "encoding", "subtype", "unicode", "characters" } } + for k, v in sortedhash(found) do + results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, concat(v.chars," ") } + end + utilities.formatters.formatcolumns(results) + report(results[1]) + report("") + for i=2,#results do + report(results[i]) + end + report("") + end + end +end + +-- scripts.pdf.info("e:/tmp/oeps.pdf") +-- scripts.pdf.metadata("e:/tmp/oeps.pdf") +-- scripts.pdf.fonts("e:/tmp/oeps.pdf") + +local filename = environment.files[1] or "" + +if filename == "" then + application.help() +elseif environment.argument("info") then + scripts.pdf.info(filename) elseif environment.argument("metadata") then - scripts.pdf.metadata() + scripts.pdf.metadata(filename) +elseif environment.argument("fonts") then + scripts.pdf.fonts(filename) else application.help() end diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 2090ec584..1eb0f5816 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -247,12 +247,16 @@ function table.strip(tab) end function table.keys(t) - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } end - return keys end local function compare(a,b) @@ -265,41 +269,49 @@ local function compare(a,b) end local function sortedkeys(tab) - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check else - category = 3 + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end end end - end - if category == 0 or category == 3 then - sort(srt,compare) + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt else - sort(srt) + return { } end - return srt end local function sortedhashkeys(tab) -- fast one - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end end + sort(srt) + return srt + else + return { } end - sort(srt) - return srt end table.sortedkeys = sortedkeys @@ -324,7 +336,7 @@ end table.sortedhash = sortedhash table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list) local n = #t for i=1,#list do n = n + 1 @@ -1186,7 +1198,7 @@ local report = texio and texio.write_nl or print -- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type +local type, next = type, next local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -- Beware, we predefine a bunch of patterns here and one reason for doing so @@ -1247,6 +1259,10 @@ patterns.utf8char = utf8char patterns.validutf8 = validutf8char patterns.validutf8char = validutf8char +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer + patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 @@ -1266,16 +1282,16 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = S("\n\r") -patterns.spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - patterns.spacer -patterns.whitespace = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer = 1 - spacer +patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") -patterns.commaspacer = P(",") * patterns.spacer^0 +patterns.commaspacer = P(",") * spacer^0 patterns.period = P(".") patterns.colon = P(":") patterns.semicolon = P(";") @@ -1491,8 +1507,8 @@ end function lpeg.replacer(one,two) if type(one) == "table" then local no = #one + local p if no > 0 then - local p for i=1,no do local o = one[i] local pp = P(o[1]) / o[2] @@ -1502,8 +1518,17 @@ function lpeg.replacer(one,two) p = pp end end - return Cs((p + 1)^0) + else + for k, v in next, one do + local pp = P(k) / v + if p then + p = p + pp + else + p = pp + end + end end + return Cs((p + 1)^0) else two = two or "" return Cs((P(one)/two + 1)^0) @@ -1875,6 +1900,14 @@ function string.tformat(fmt,...) return format(lpegmatch(replacer,fmt),...) end +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -5699,7 +5732,7 @@ statistics.elapsedtime = elapsedtime statistics.elapsedindeed = elapsedindeed statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module function statistics.register(tag,fnc) if statistics.enable and type(fnc) == "function" then diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index 2090ec584..1eb0f5816 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -247,12 +247,16 @@ function table.strip(tab) end function table.keys(t) - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } end - return keys end local function compare(a,b) @@ -265,41 +269,49 @@ local function compare(a,b) end local function sortedkeys(tab) - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check else - category = 3 + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end end end - end - if category == 0 or category == 3 then - sort(srt,compare) + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt else - sort(srt) + return { } end - return srt end local function sortedhashkeys(tab) -- fast one - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end end + sort(srt) + return srt + else + return { } end - sort(srt) - return srt end table.sortedkeys = sortedkeys @@ -324,7 +336,7 @@ end table.sortedhash = sortedhash table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list) local n = #t for i=1,#list do n = n + 1 @@ -1186,7 +1198,7 @@ local report = texio and texio.write_nl or print -- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type +local type, next = type, next local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -- Beware, we predefine a bunch of patterns here and one reason for doing so @@ -1247,6 +1259,10 @@ patterns.utf8char = utf8char patterns.validutf8 = validutf8char patterns.validutf8char = validutf8char +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer + patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 @@ -1266,16 +1282,16 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = S("\n\r") -patterns.spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - patterns.spacer -patterns.whitespace = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer = 1 - spacer +patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") -patterns.commaspacer = P(",") * patterns.spacer^0 +patterns.commaspacer = P(",") * spacer^0 patterns.period = P(".") patterns.colon = P(":") patterns.semicolon = P(";") @@ -1491,8 +1507,8 @@ end function lpeg.replacer(one,two) if type(one) == "table" then local no = #one + local p if no > 0 then - local p for i=1,no do local o = one[i] local pp = P(o[1]) / o[2] @@ -1502,8 +1518,17 @@ function lpeg.replacer(one,two) p = pp end end - return Cs((p + 1)^0) + else + for k, v in next, one do + local pp = P(k) / v + if p then + p = p + pp + else + p = pp + end + end end + return Cs((p + 1)^0) else two = two or "" return Cs((P(one)/two + 1)^0) @@ -1875,6 +1900,14 @@ function string.tformat(fmt,...) return format(lpegmatch(replacer,fmt),...) end +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -5699,7 +5732,7 @@ statistics.elapsedtime = elapsedtime statistics.elapsedindeed = elapsedindeed statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module function statistics.register(tag,fnc) if statistics.enable and type(fnc) == "function" then diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index 2090ec584..1eb0f5816 100644 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -247,12 +247,16 @@ function table.strip(tab) end function table.keys(t) - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } end - return keys end local function compare(a,b) @@ -265,41 +269,49 @@ local function compare(a,b) end local function sortedkeys(tab) - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check else - category = 3 + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end end end - end - if category == 0 or category == 3 then - sort(srt,compare) + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt else - sort(srt) + return { } end - return srt end local function sortedhashkeys(tab) -- fast one - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end end + sort(srt) + return srt + else + return { } end - sort(srt) - return srt end table.sortedkeys = sortedkeys @@ -324,7 +336,7 @@ end table.sortedhash = sortedhash table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list) local n = #t for i=1,#list do n = n + 1 @@ -1186,7 +1198,7 @@ local report = texio and texio.write_nl or print -- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type +local type, next = type, next local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -- Beware, we predefine a bunch of patterns here and one reason for doing so @@ -1247,6 +1259,10 @@ patterns.utf8char = utf8char patterns.validutf8 = validutf8char patterns.validutf8char = validutf8char +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer + patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 @@ -1266,16 +1282,16 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = S("\n\r") -patterns.spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - patterns.spacer -patterns.whitespace = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer = 1 - spacer +patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") -patterns.commaspacer = P(",") * patterns.spacer^0 +patterns.commaspacer = P(",") * spacer^0 patterns.period = P(".") patterns.colon = P(":") patterns.semicolon = P(";") @@ -1491,8 +1507,8 @@ end function lpeg.replacer(one,two) if type(one) == "table" then local no = #one + local p if no > 0 then - local p for i=1,no do local o = one[i] local pp = P(o[1]) / o[2] @@ -1502,8 +1518,17 @@ function lpeg.replacer(one,two) p = pp end end - return Cs((p + 1)^0) + else + for k, v in next, one do + local pp = P(k) / v + if p then + p = p + pp + else + p = pp + end + end end + return Cs((p + 1)^0) else two = two or "" return Cs((P(one)/two + 1)^0) @@ -1875,6 +1900,14 @@ function string.tformat(fmt,...) return format(lpegmatch(replacer,fmt),...) end +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) + return lpegmatch(pattern,str) +end + end -- of closure @@ -5699,7 +5732,7 @@ statistics.elapsedtime = elapsedtime statistics.elapsedindeed = elapsedindeed statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module function statistics.register(tag,fnc) if statistics.enable and type(fnc) == "function" then |