From cd5ed2f3cb9052857cf0f35e2557f93f2b549270 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Thu, 21 Jul 2011 18:09:00 +0200 Subject: beta 2011.07.21 18:09 --- scripts/context/lua/mtx-pdf.lua | 117 +++++++++++++++++++++++++++++++++ scripts/context/lua/mtxrun.lua | 36 +++++++++- scripts/context/stubs/mswin/mtxrun.lua | 36 +++++++++- scripts/context/stubs/unix/mtxrun | 36 +++++++++- 4 files changed, 222 insertions(+), 3 deletions(-) create mode 100644 scripts/context/lua/mtx-pdf.lua (limited to 'scripts') diff --git a/scripts/context/lua/mtx-pdf.lua b/scripts/context/lua/mtx-pdf.lua new file mode 100644 index 000000000..5654b8bc4 --- /dev/null +++ b/scripts/context/lua/mtx-pdf.lua @@ -0,0 +1,117 @@ +if not modules then modules = { } end modules ['mtx-pdf'] = { + version = 1.001, + comment = "companion to mtxrun.lua", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local helpinfo = [[ +--info show some info about the given file +--metadata show metadata xml blob +]] + +local application = logs.application { + name = "mtx-pdf", + banner = "ConTeXt PDF Helpers 0.01", + helpinfo = helpinfo, +} + +local report = application.report + +dofile(resolvers.findfile("lpdf-epd.lua","tex")) + +scripts = scripts or { } +scripts.pdf = scripts.pdf or { } + +local function loadpdffile(filename) + if not filename or filename == "" then + report("no filename given") + elseif not lfs.isfile(filename) then + report("unknown file '%s'",filename) + else + local pdffile = lpdf.epdf.load(filename) + if pdffile then + return pdffile + else + report("no valid pdf file '%s'",filename) + end + end +end + +function scripts.pdf.info() + local filename = environment.files[1] + local pdffile = loadpdffile(filename) + if pdffile then + local catalog = pdffile.Catalog + local info = pdffile.Info + local pages = pdffile.pages + local nofpages = pages.n -- no # yet. will be in 5.2 + + report("filename > %s",filename) + report("pdf version > %s",catalog.Version) + report("number of pages > %s",nofpages) + report("title > %s",info.Title) + report("creator > %s",info.Creator) + report("producer > %s",info.Producer) + report("creation date > %s",info.CreationDate) + report("modification date > %s",info.ModDate) + + local width, height, start + for i=1, nofpages do + local page = pages[i] + local bbox = page.CropBox or page.MediaBox + local w, h = bbox[4]-bbox[2],bbox[3]-bbox[1] + if w ~= width or h ~= height then + if start then + report("cropbox > pages: %s-%s, width: %s, height: %s",start,i-1,width,height) + end + width, height, start = w, h, i + end + end + report("cropbox > pages: %s-%s, width: %s, height: %s",start,nofpages,width,height) + end +end + +function scripts.pdf.metadata() + local filename = environment.files[1] + local pdffile = loadpdffile(filename) + if pdffile then + local catalog = pdffile.Catalog + local metadata = catalog.Metadata + if metadata then + report("metadata > \n\n%s\n",metadata()) + else + report("no metadata") + end + end +end + +if environment.argument("info") then + scripts.pdf.info() +elseif environment.argument("metadata") then + scripts.pdf.metadata() +else + application.help() +end + +-- a variant on an experiment by hartmut + +--~ function downloadlinks(filename) +--~ local document = lpdf.epdf.load(filename) +--~ if document then +--~ local pages = document.pages +--~ for p = 1,#pages do +--~ local annotations = pages[p].Annots +--~ if annotations then +--~ for a=1,#annotations do +--~ local annotation = annotations[a] +--~ local uri = annotation.Subtype == "Link" and annotation.A and annotation.A.URI +--~ if uri and string.find(uri,"^http") then +--~ os.execute("wget " .. uri) +--~ end +--~ end +--~ end +--~ end +--~ end +--~ end diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 29665417e..158d11ecd 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -1350,7 +1350,7 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline) patterns.utflinesplitter = utflinesplitter function string.utfsplitlines(str) - return match(utflinesplitter,str) + return match(utflinesplitter,str or "") end @@ -3902,6 +3902,40 @@ end +local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end + + end -- of closure do -- create closure to overcome 200 locals limit diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index 29665417e..158d11ecd 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -1350,7 +1350,7 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline) patterns.utflinesplitter = utflinesplitter function string.utfsplitlines(str) - return match(utflinesplitter,str) + return match(utflinesplitter,str or "") end @@ -3902,6 +3902,40 @@ end +local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end + + end -- of closure do -- create closure to overcome 200 locals limit diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index 29665417e..158d11ecd 100755 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -1350,7 +1350,7 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline) patterns.utflinesplitter = utflinesplitter function string.utfsplitlines(str) - return match(utflinesplitter,str) + return match(utflinesplitter,str or "") end @@ -3902,6 +3902,40 @@ end +local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end + + end -- of closure do -- create closure to overcome 200 locals limit -- cgit v1.2.3