diff options
Diffstat (limited to 'scripts')
| -rw-r--r-- | scripts/context/lua/mtx-pdf.lua | 142 | ||||
| -rw-r--r-- | scripts/context/lua/mtxrun.lua | 115 | ||||
| -rw-r--r-- | scripts/context/stubs/mswin/mtxrun.lua | 115 | ||||
| -rwxr-xr-x | scripts/context/stubs/unix/mtxrun | 115 | 
4 files changed, 354 insertions, 133 deletions
| diff --git a/scripts/context/lua/mtx-pdf.lua b/scripts/context/lua/mtx-pdf.lua index 5654b8bc4..f37ee006a 100644 --- a/scripts/context/lua/mtx-pdf.lua +++ b/scripts/context/lua/mtx-pdf.lua @@ -6,14 +6,21 @@ if not modules then modules = { } end modules ['mtx-pdf'] = {      license   = "see context related readme files"  } +local tonumber = tonumber +local format, gmatch = string.format, string.gmatch +local utfchar = utf.char +local concat = table.concat +local setmetatableindex, sortedhash, sortedkeys = table.setmetatableindex, table.sortedhash, table.sortedkeys +  local helpinfo = [[  --info                show some info about the given file  --metadata            show metadata xml blob +--fonts               show used fonts (--detail)  ]]  local application = logs.application {      name     = "mtx-pdf", -    banner   = "ConTeXt PDF Helpers 0.01", +    banner   = "ConTeXt PDF Helpers 0.10",      helpinfo = helpinfo,  } @@ -39,9 +46,8 @@ local function loadpdffile(filename)      end  end -function scripts.pdf.info() -    local filename = environment.files[1] -    local pdffile  = loadpdffile(filename) +function scripts.pdf.info(filename) +    local pdffile = loadpdffile(filename)      if pdffile then          local catalog  = pdffile.Catalog          local info     = pdffile.Info @@ -73,9 +79,8 @@ function scripts.pdf.info()      end  end -function scripts.pdf.metadata() -    local filename = environment.files[1] -    local pdffile  = loadpdffile(filename) +function scripts.pdf.metadata(filename) +    local pdffile = loadpdffile(filename)      if pdffile then          local catalog  = pdffile.Catalog          local metadata = catalog.Metadata @@ -87,10 +92,127 @@ function scripts.pdf.metadata()      end  end -if environment.argument("info") then -    scripts.pdf.info() +local function getfonts(pdffile) +    local usedfonts = { } +    for i=1,pdffile.pages.n do +        local page = pdffile.pages[i] +        local fontlist = page.Resources.Font +        for k, v in next, lpdf.epdf.expand(fontlist) do +            usedfonts[k] = lpdf.epdf.expand(v) +        end +    end +    return usedfonts +end + +local function getunicodes(font) +    local cid = font.ToUnicode +    if cid then +        cid = cid() +        local counts = { } +     -- for s in gmatch(cid,"begincodespacerange%s*(.-)%s*endcodespacerange") do +     --     for a, b in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do +     --         print(a,b) +     --     end +     -- end +        setmetatableindex(counts, function(t,k) t[k] = 0 return 0 end) +        for s in gmatch(cid,"beginbfrange%s*(.-)%s*endbfrange") do +            for first, last, offset in gmatch(s,"<([^>]+)>%s+<([^>]+)>%s+<([^>]+)>") do +                first  = tonumber(first,16) +                last   = tonumber(last,16) +                offset = tonumber(offset,16) +                offset = offset - first +                for i=first,last do +                    local c = i + offset +                    counts[c] = counts[c] + 1 +                end +            end +        end +        for s in gmatch(cid,"beginbfchar%s*(.-)%s*endbfchar") do +            for old, new in gmatch(s,"<([^>]+)>%s+<([^>]+)>") do +                for n in gmatch(new,"....") do +                    local c = tonumber(n,16) +                    counts[c] = counts[c] + 1 +                end +            end +        end +        return counts +    end +end + +function scripts.pdf.fonts(filename) +    local pdffile = loadpdffile(filename) +    if pdffile then +        local usedfonts = getfonts(pdffile) +        local found     = { } +        for k, v in table.sortedhash(usedfonts) do +            local counts = getunicodes(v) +            local codes = { } +            local chars = { } +            local freqs = { } +            if counts then +                codes = sortedkeys(counts) +                for i=1,#codes do +                    local k = codes[i] +                    local c = utfchar(k) +                    chars[i] = c +                    freqs[i] = format("U+%05X  %s  %s",k,counts[k] > 1 and "+" or " ", c) +                end +                for i=1,#codes do +                    codes[i] = format("U+%05X",codes[i]) +                end +            end +            found[k] = { +                basefont = v.BaseFont or "no basefont", +                encoding = v.Encoding or "no encoding", +                subtype  = v.Subtype or "no subtype", +                unicode  = v.ToUnicode and "unicode" or "no unicode", +                chars    = chars, +                codes    = codes, +                freqs    = freqs, +            } +        end + +        if environment.argument("detail") then +            for k, v in sortedhash(found) do +                report("id         : %s",k) +                report("basefont   : %s",v.basefont) +                report("encoding   : %s",v.encoding) +                report("subtype    : %s",v.subtype) +                report("unicode    : %s",v.unicode) +                report("characters : %s", concat(v.chars," ")) +                report("codepoints : %s", concat(v.codes," ")) +                report("") +            end +        else +            local results = { { "id", "basefont", "encoding", "subtype", "unicode", "characters" } } +            for k, v in sortedhash(found) do +                results[#results+1] = { k, v.basefont, v.encoding, v.subtype, v.unicode, concat(v.chars," ") } +            end +            utilities.formatters.formatcolumns(results) +            report(results[1]) +            report("") +            for i=2,#results do +                report(results[i]) +            end +            report("") +        end +    end +end + +-- scripts.pdf.info("e:/tmp/oeps.pdf") +-- scripts.pdf.metadata("e:/tmp/oeps.pdf") +-- scripts.pdf.fonts("e:/tmp/oeps.pdf") + +local filename = environment.files[1] or "" + +if filename == "" then +    application.help() +elseif environment.argument("info") then +    scripts.pdf.info(filename)  elseif environment.argument("metadata") then -    scripts.pdf.metadata() +    scripts.pdf.metadata(filename) +elseif environment.argument("fonts") then +    scripts.pdf.fonts(filename)  else      application.help()  end diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 2090ec584..1eb0f5816 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -247,12 +247,16 @@ function table.strip(tab)  end  function table.keys(t) -    local keys, k = { }, 0 -    for key, _ in next, t do -        k = k + 1 -        keys[k] = key +    if t then +        local keys, k = { }, 0 +        for key, _ in next, t do +            k = k + 1 +            keys[k] = key +        end +        return keys +    else +        return { }      end -    return keys  end  local function compare(a,b) @@ -265,41 +269,49 @@ local function compare(a,b)  end  local function sortedkeys(tab) -    local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed -    for key,_ in next, tab do -        s = s + 1 -        srt[s] = key -        if category == 3 then -            -- no further check -        else -            local tkey = type(key) -            if tkey == "string" then -                category = (category == 2 and 3) or 1 -            elseif tkey == "number" then -                category = (category == 1 and 3) or 2 +    if tab then +        local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed +        for key,_ in next, tab do +            s = s + 1 +            srt[s] = key +            if category == 3 then +                -- no further check              else -                category = 3 +                local tkey = type(key) +                if tkey == "string" then +                    category = (category == 2 and 3) or 1 +                elseif tkey == "number" then +                    category = (category == 1 and 3) or 2 +                else +                    category = 3 +                end              end          end -    end -    if category == 0 or category == 3 then -        sort(srt,compare) +        if category == 0 or category == 3 then +            sort(srt,compare) +        else +            sort(srt) +        end +        return srt      else -        sort(srt) +        return { }      end -    return srt  end  local function sortedhashkeys(tab) -- fast one -    local srt, s = { }, 0 -    for key,_ in next, tab do -        if key then -            s= s + 1 -            srt[s] = key +    if tab then +        local srt, s = { }, 0 +        for key,_ in next, tab do +            if key then +                s= s + 1 +                srt[s] = key +            end          end +        sort(srt) +        return srt +    else +        return { }      end -    sort(srt) -    return srt  end  table.sortedkeys     = sortedkeys @@ -324,7 +336,7 @@ end  table.sortedhash  = sortedhash  table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list)      local n = #t      for i=1,#list do          n = n + 1 @@ -1186,7 +1198,7 @@ local report = texio and texio.write_nl or print  -- function lpeg.Cmt  (l) local p = lpcmt (l) report("LPEG Cmt =")  lpprint(l) return p end  -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type +local type, next = type, next  local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format  -- Beware, we predefine a bunch of patterns here and one reason for doing so @@ -1247,6 +1259,10 @@ patterns.utf8char      = utf8char  patterns.validutf8     = validutf8char  patterns.validutf8char = validutf8char +local eol              = S("\n\r") +local spacer           = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace       = eol + spacer +  patterns.digit         = digit  patterns.sign          = sign  patterns.cardinal      = sign^0 * digit^1 @@ -1266,16 +1282,16 @@ patterns.letter        = patterns.lowercase + patterns.uppercase  patterns.space         = space  patterns.tab           = P("\t")  patterns.spaceortab    = patterns.space + patterns.tab -patterns.eol           = S("\n\r") -patterns.spacer        = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol           = eol +patterns.spacer        = spacer +patterns.whitespace    = whitespace  patterns.newline       = newline  patterns.emptyline     = newline^1 -patterns.nonspacer     = 1 - patterns.spacer -patterns.whitespace    = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer     = 1 - spacer +patterns.nonwhitespace = 1 - whitespace  patterns.equal         = P("=")  patterns.comma         = P(",") -patterns.commaspacer   = P(",") * patterns.spacer^0 +patterns.commaspacer   = P(",") * spacer^0  patterns.period        = P(".")  patterns.colon         = P(":")  patterns.semicolon     = P(";") @@ -1491,8 +1507,8 @@ end  function lpeg.replacer(one,two)      if type(one) == "table" then          local no = #one +        local p          if no > 0 then -            local p              for i=1,no do                  local o = one[i]                  local pp = P(o[1]) / o[2] @@ -1502,8 +1518,17 @@ function lpeg.replacer(one,two)                      p = pp                  end              end -            return Cs((p + 1)^0) +        else +            for k, v in next, one do +                local pp = P(k) / v +                if p then +                    p = p + pp +                else +                    p = pp +                end +            end          end +        return Cs((p + 1)^0)      else          two = two or ""          return Cs((P(one)/two + 1)^0) @@ -1875,6 +1900,14 @@ function string.tformat(fmt,...)      return format(lpegmatch(replacer,fmt),...)  end +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) +    return lpegmatch(pattern,str) +end +  end -- of closure @@ -5699,7 +5732,7 @@ statistics.elapsedtime    = elapsedtime  statistics.elapsedindeed  = elapsedindeed  statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module  function statistics.register(tag,fnc)      if statistics.enable and type(fnc) == "function" then diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index 2090ec584..1eb0f5816 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -247,12 +247,16 @@ function table.strip(tab)  end  function table.keys(t) -    local keys, k = { }, 0 -    for key, _ in next, t do -        k = k + 1 -        keys[k] = key +    if t then +        local keys, k = { }, 0 +        for key, _ in next, t do +            k = k + 1 +            keys[k] = key +        end +        return keys +    else +        return { }      end -    return keys  end  local function compare(a,b) @@ -265,41 +269,49 @@ local function compare(a,b)  end  local function sortedkeys(tab) -    local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed -    for key,_ in next, tab do -        s = s + 1 -        srt[s] = key -        if category == 3 then -            -- no further check -        else -            local tkey = type(key) -            if tkey == "string" then -                category = (category == 2 and 3) or 1 -            elseif tkey == "number" then -                category = (category == 1 and 3) or 2 +    if tab then +        local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed +        for key,_ in next, tab do +            s = s + 1 +            srt[s] = key +            if category == 3 then +                -- no further check              else -                category = 3 +                local tkey = type(key) +                if tkey == "string" then +                    category = (category == 2 and 3) or 1 +                elseif tkey == "number" then +                    category = (category == 1 and 3) or 2 +                else +                    category = 3 +                end              end          end -    end -    if category == 0 or category == 3 then -        sort(srt,compare) +        if category == 0 or category == 3 then +            sort(srt,compare) +        else +            sort(srt) +        end +        return srt      else -        sort(srt) +        return { }      end -    return srt  end  local function sortedhashkeys(tab) -- fast one -    local srt, s = { }, 0 -    for key,_ in next, tab do -        if key then -            s= s + 1 -            srt[s] = key +    if tab then +        local srt, s = { }, 0 +        for key,_ in next, tab do +            if key then +                s= s + 1 +                srt[s] = key +            end          end +        sort(srt) +        return srt +    else +        return { }      end -    sort(srt) -    return srt  end  table.sortedkeys     = sortedkeys @@ -324,7 +336,7 @@ end  table.sortedhash  = sortedhash  table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list)      local n = #t      for i=1,#list do          n = n + 1 @@ -1186,7 +1198,7 @@ local report = texio and texio.write_nl or print  -- function lpeg.Cmt  (l) local p = lpcmt (l) report("LPEG Cmt =")  lpprint(l) return p end  -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type +local type, next = type, next  local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format  -- Beware, we predefine a bunch of patterns here and one reason for doing so @@ -1247,6 +1259,10 @@ patterns.utf8char      = utf8char  patterns.validutf8     = validutf8char  patterns.validutf8char = validutf8char +local eol              = S("\n\r") +local spacer           = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace       = eol + spacer +  patterns.digit         = digit  patterns.sign          = sign  patterns.cardinal      = sign^0 * digit^1 @@ -1266,16 +1282,16 @@ patterns.letter        = patterns.lowercase + patterns.uppercase  patterns.space         = space  patterns.tab           = P("\t")  patterns.spaceortab    = patterns.space + patterns.tab -patterns.eol           = S("\n\r") -patterns.spacer        = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol           = eol +patterns.spacer        = spacer +patterns.whitespace    = whitespace  patterns.newline       = newline  patterns.emptyline     = newline^1 -patterns.nonspacer     = 1 - patterns.spacer -patterns.whitespace    = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer     = 1 - spacer +patterns.nonwhitespace = 1 - whitespace  patterns.equal         = P("=")  patterns.comma         = P(",") -patterns.commaspacer   = P(",") * patterns.spacer^0 +patterns.commaspacer   = P(",") * spacer^0  patterns.period        = P(".")  patterns.colon         = P(":")  patterns.semicolon     = P(";") @@ -1491,8 +1507,8 @@ end  function lpeg.replacer(one,two)      if type(one) == "table" then          local no = #one +        local p          if no > 0 then -            local p              for i=1,no do                  local o = one[i]                  local pp = P(o[1]) / o[2] @@ -1502,8 +1518,17 @@ function lpeg.replacer(one,two)                      p = pp                  end              end -            return Cs((p + 1)^0) +        else +            for k, v in next, one do +                local pp = P(k) / v +                if p then +                    p = p + pp +                else +                    p = pp +                end +            end          end +        return Cs((p + 1)^0)      else          two = two or ""          return Cs((P(one)/two + 1)^0) @@ -1875,6 +1900,14 @@ function string.tformat(fmt,...)      return format(lpegmatch(replacer,fmt),...)  end +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) +    return lpegmatch(pattern,str) +end +  end -- of closure @@ -5699,7 +5732,7 @@ statistics.elapsedtime    = elapsedtime  statistics.elapsedindeed  = elapsedindeed  statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module  function statistics.register(tag,fnc)      if statistics.enable and type(fnc) == "function" then diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index 2090ec584..1eb0f5816 100755 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -247,12 +247,16 @@ function table.strip(tab)  end  function table.keys(t) -    local keys, k = { }, 0 -    for key, _ in next, t do -        k = k + 1 -        keys[k] = key +    if t then +        local keys, k = { }, 0 +        for key, _ in next, t do +            k = k + 1 +            keys[k] = key +        end +        return keys +    else +        return { }      end -    return keys  end  local function compare(a,b) @@ -265,41 +269,49 @@ local function compare(a,b)  end  local function sortedkeys(tab) -    local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed -    for key,_ in next, tab do -        s = s + 1 -        srt[s] = key -        if category == 3 then -            -- no further check -        else -            local tkey = type(key) -            if tkey == "string" then -                category = (category == 2 and 3) or 1 -            elseif tkey == "number" then -                category = (category == 1 and 3) or 2 +    if tab then +        local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed +        for key,_ in next, tab do +            s = s + 1 +            srt[s] = key +            if category == 3 then +                -- no further check              else -                category = 3 +                local tkey = type(key) +                if tkey == "string" then +                    category = (category == 2 and 3) or 1 +                elseif tkey == "number" then +                    category = (category == 1 and 3) or 2 +                else +                    category = 3 +                end              end          end -    end -    if category == 0 or category == 3 then -        sort(srt,compare) +        if category == 0 or category == 3 then +            sort(srt,compare) +        else +            sort(srt) +        end +        return srt      else -        sort(srt) +        return { }      end -    return srt  end  local function sortedhashkeys(tab) -- fast one -    local srt, s = { }, 0 -    for key,_ in next, tab do -        if key then -            s= s + 1 -            srt[s] = key +    if tab then +        local srt, s = { }, 0 +        for key,_ in next, tab do +            if key then +                s= s + 1 +                srt[s] = key +            end          end +        sort(srt) +        return srt +    else +        return { }      end -    sort(srt) -    return srt  end  table.sortedkeys     = sortedkeys @@ -324,7 +336,7 @@ end  table.sortedhash  = sortedhash  table.sortedpairs = sortedhash -function table.append(t, list) +function table.append(t,list)      local n = #t      for i=1,#list do          n = n + 1 @@ -1186,7 +1198,7 @@ local report = texio and texio.write_nl or print  -- function lpeg.Cmt  (l) local p = lpcmt (l) report("LPEG Cmt =")  lpprint(l) return p end  -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -local type = type +local type, next = type, next  local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format  -- Beware, we predefine a bunch of patterns here and one reason for doing so @@ -1247,6 +1259,10 @@ patterns.utf8char      = utf8char  patterns.validutf8     = validutf8char  patterns.validutf8char = validutf8char +local eol              = S("\n\r") +local spacer           = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace       = eol + spacer +  patterns.digit         = digit  patterns.sign          = sign  patterns.cardinal      = sign^0 * digit^1 @@ -1266,16 +1282,16 @@ patterns.letter        = patterns.lowercase + patterns.uppercase  patterns.space         = space  patterns.tab           = P("\t")  patterns.spaceortab    = patterns.space + patterns.tab -patterns.eol           = S("\n\r") -patterns.spacer        = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +patterns.eol           = eol +patterns.spacer        = spacer +patterns.whitespace    = whitespace  patterns.newline       = newline  patterns.emptyline     = newline^1 -patterns.nonspacer     = 1 - patterns.spacer -patterns.whitespace    = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace +patterns.nonspacer     = 1 - spacer +patterns.nonwhitespace = 1 - whitespace  patterns.equal         = P("=")  patterns.comma         = P(",") -patterns.commaspacer   = P(",") * patterns.spacer^0 +patterns.commaspacer   = P(",") * spacer^0  patterns.period        = P(".")  patterns.colon         = P(":")  patterns.semicolon     = P(";") @@ -1491,8 +1507,8 @@ end  function lpeg.replacer(one,two)      if type(one) == "table" then          local no = #one +        local p          if no > 0 then -            local p              for i=1,no do                  local o = one[i]                  local pp = P(o[1]) / o[2] @@ -1502,8 +1518,17 @@ function lpeg.replacer(one,two)                      p = pp                  end              end -            return Cs((p + 1)^0) +        else +            for k, v in next, one do +                local pp = P(k) / v +                if p then +                    p = p + pp +                else +                    p = pp +                end +            end          end +        return Cs((p + 1)^0)      else          two = two or ""          return Cs((P(one)/two + 1)^0) @@ -1875,6 +1900,14 @@ function string.tformat(fmt,...)      return format(lpegmatch(replacer,fmt),...)  end +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) +    return lpegmatch(pattern,str) +end +  end -- of closure @@ -5699,7 +5732,7 @@ statistics.elapsedtime    = elapsedtime  statistics.elapsedindeed  = elapsedindeed  statistics.elapsedseconds = elapsedseconds --- general function +-- general function .. we might split this module  function statistics.register(tag,fnc)      if statistics.enable and type(fnc) == "function" then | 
