1 files changed, 0 insertions, 1313 deletions
diff --git a/tex/context/base/mkiv/lpdf-epd.lua b/tex/context/base/mkiv/lpdf-epd.lua
deleted file mode 100644
index 86aa6f294..000000000
--- a/tex/context/base/mkiv/lpdf-epd.lua
+++ /dev/null
@@ -1,1313 +0,0 @@
-if not modules then modules = { } end modules ['lpdf-epd'] = {
-    version   = 1.001,
-    comment   = "companion to lpdf-epa.mkiv",
-    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
-    copyright = "PRAGMA ADE / ConTeXt Development Team",
-    license   = "see context related readme files"
-}
-
--- This is an experimental layer around the epdf library. The reason for this layer is that
--- I want to be independent of the library (which implements a selection of what a file
--- provides) and also because I want an interface closer to Lua's table model while the API
--- stays close to the original xpdf library. Of course, after prototyping a solution, we can
--- optimize it using the low level epdf accessors. However, not all are accessible (this will
--- be fixed).
---
--- It will be handy when we have a __length and __next that can trigger the resolve till then
--- we will provide .n as #; maybe in Lua 5.3 or later.
---
--- As there can be references to the parent we cannot expand a tree. I played with some
--- expansion variants but it does not pay off; adding extra checks is not worth the trouble.
---
--- The document stays open. In order to free memory one has to explicitly unload the loaded
--- document.
---
--- We have much more checking then needed in the prepare functions because occasionally
--- we run into bugs in poppler or the epdf interface. It took us a while to realize that
--- there was a long standing gc issue the on long runs with including many pages could
--- crash the analyzer.
---
--- Normally a value is fetched by key, as in foo.Title but as it can be in pdfdoc encoding
--- a safer bet is foo("Title") which will return a decoded string (or the original if it
--- already was unicode).
-
-local setmetatable, rawset, rawget, type, next = setmetatable, rawset, rawget, type, next
-local tostring, tonumber, unpack = tostring, tonumber, unpack
-local lower, match, char, byte, find = string.lower, string.match, string.char, string.byte, string.find
-local abs = math.abs
-local concat = table.concat
-local toutf, toeight, utfchar = string.toutf, utf.toeight, utf.char
-local setmetatableindex = table.setmetatableindex
-
-local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
-local P, C, S, R, Ct, Cc, V, Carg, Cs, Cf, Cg = lpeg.P, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cc, lpeg.V, lpeg.Carg, lpeg.Cs, lpeg.Cf, lpeg.Cg
-
-local epdf      = epdf
-      lpdf      = lpdf or { }
-local lpdf      = lpdf
-local lpdf_epdf = { }
-lpdf.epdf       = lpdf_epdf
-
--- local getDict, getArray, getReal, getNum, getString, getBool, getName, getRef, getRefNum
--- local getType, getTypeName
--- local dictGetLength, dictGetVal, dictGetValNF, dictGetKey
--- local arrayGetLength, arrayGetNF, arrayGet
--- local streamReset, streamGetDict, streamGetChar
-
--- We use as little as possible and also not an object interface. After all, we
--- don't know how the library (and its api) evolves so we better can be prepared
--- for wrappers.
-
-local registry         = debug.getregistry()
-
-local object           = registry["epdf.Object"]
-local dictionary       = registry["epdf.Dict"]
-local array            = registry["epdf.Array"]
-local xref             = registry["epdf.XRef"]
-local catalog          = registry["epdf.Catalog"]
-local pdfdoc           = registry["epdf.PDFDoc"]
-
-if not (object and dictionary and array and xref and catalog and pdfdoc) then
-    logs.report("fatal error","invalid pdf inclusion library (%s)",1)
-    os.exit()
-end
-
-local openPDF          = epdf.open
-
-local getMajorVersion  = pdfdoc.getPDFMajorVersion
-local getMinorVersion  = pdfdoc.getPDFMinorVersion
-local getXRef          = pdfdoc.getXRef
-local getRawCatalog    = pdfdoc.getCatalog
-
-if not (openPDF and getMajorVersion and getMinorVersion and getXRef and getRawCatalog) then
-    logs.report("fatal error","invalid pdf inclusion library (%s)",2)
-    os.exit()
-end
-
-local getDict          = object.getDict
-local getArray         = object.getArray
-local getReal          = object.getReal
-local getInt           = object.getInt
-local getString        = object.getString
-local getBool          = object.getBool
-local getName          = object.getName
-local getRef           = object.getRef
-local getRefNum        = object.getRefNum
-
-local getType          = object.getType
-
-if not (getDict and getArray and getReal and getInt and getString and getBool and getName and getRef and getRefNum and getType) then
-    logs.report("fatal error","invalid pdf inclusion library (%s)",3)
-    os.exit()
-end
-
-local streamReset      = object.streamReset
-local streamGetDict    = object.streamGetDict
-local streamGetChar    = object.streamGetChar
-local streamGetAll     = object.streamGetAll
-
-if not (streamReset and streamGetDict and streamGetChar) then
-    logs.report("fatal error","invalid pdf inclusion library (%s)",3)
-    os.exit()
-end
-
-local dictGetLength    = dictionary.getLength
-local dictGetVal       = dictionary.getVal
-local dictGetValNF     = dictionary.getValNF
-local dictGetKey       = dictionary.getKey
-
-if not (dictGetLength and dictGetVal and dictGetValNF and dictGetKey) then
-    logs.report("fatal error","invalid pdf inclusion library (%s)",4)
-    os.exit()
-end
-
-local arrayGetLength   = array.getLength
-local arrayGetNF       = array.getNF
-local arrayGet         = array.get
-
-if not (arrayGetLength and arrayGetNF and arrayGet) then
-    logs.report("fatal error","invalid pdf inclusion library (%s)",5)
-    os.exit()
-end
-
--- these are kind of weird as they can't be accessed by (root) object
-
-local getNumPages      = catalog.getNumPages
-local getPageRef       = catalog.getPageRef
-
-local fetch            = xref.fetch
-local getCatalog       = xref.getCatalog
-local getDocInfo       = xref.getDocInfo
-
-if not (getNumPages and getPageRef and fetch and getCatalog and getDocInfo) then
-    logs.report("fatal error","invalid pdf inclusion library (%s)",6)
-    os.exit()
-end
-
--- we're done with library shortcuts
-
-local typenames = { [0] =
-    "boolean",
-    "integer",
-    "real",
-    "string",
-    "name",
-    "null",
-    "array",
-    "dictionary",
-    "stream",
-    "ref",
-    "cmd",
-    "error",
-    "eof",
-    "none",
-    "integer64",
-}
-
-local typenumbers = table.swapped(typenames)
-
-local null_object_code = typenumbers.null
-local ref_object_code  = typenumbers.ref
-
-local report_epdf = logs.reporter("epdf")
-
-local function fatal_error(...)
-    report_epdf(...)
-    report_epdf("aborting job in order to avoid crash")
-    os.exit()
-end
-
--- epdf is the built-in library
-
-function epdf.type(o)
-    local t = lower(match(tostring(o),"[^ :]+"))
-    return t or "?"
-end
-
-local checked_access
-
--- dictionaries (can be optimized: ... resolve and redefine when all locals set)
-
-local frompdfdoc = lpdf.frompdfdoc
-
-local get_flagged
-
-if lpdf.dictionary then
-
-    local pdfdictionary = lpdf.dictionary
-    local pdfarray      = lpdf.array
-    local pdfconstant   = lpdf.constant
-    local pdfstring     = lpdf.string
-    local pdfunicode    = lpdf.unicode
-
-    get_flagged = function(t,f,k)
-        local tk = t[k] -- triggers resolve
-        local fk = f[k]
-        if not fk then
-            return tk
-        elseif fk == "name" then
-            return pdfconstant(tk)
-        elseif fk == "array" then
-            return pdfarray(tk)
-        elseif fk == "dictionary" then
-            return pdfarray(tk)
-        elseif fk == "rawtext" then
-            return pdfstring(tk)
-        elseif fk == "unicode" then
-            return pdfunicode(tk)
-        else
-            return tk
-        end
-    end
-
-else
-
-    get_flagged = function(t,f,k)
-        local tk = t[k] -- triggers resolve
-        local fk = f[k]
-        if not fk then
-            return tk
-        elseif fk == "rawtext" then
-            return frompdfdoc(tk)
-        else
-            return tk
-        end
-    end
-
-end
-
-local function prepare(document,d,t,n,k,mt,flags)
-    for i=1,n do
-        local v = dictGetVal(d,i)
-        if v then
-            local r = dictGetValNF(d,i)
-            local kind = getType(v)
-            if kind == null_object_code then
-                -- ignore
-            elseif kind then
-                local key = dictGetKey(d,i)
-                if r and getType(r) == ref_object_code then
-                    local objnum = getRefNum(r)
-                    local cached = document.__cache__[objnum]
-                    if not cached then
-                        cached = checked_access[kind](v,document,objnum,mt)
-                        if cached then
-                            document.__cache__[objnum] = cached
-                            document.__xrefs__[cached] = objnum
-                        end
-                    end
-                    t[key] = cached
-                else
-                    local v, flag = checked_access[kind](v,document)
-                    t[key] = v
-                    if flag and flags then
-                        flags[key] = flag -- flags
-                    end
-                end
-            else
-                report_epdf("warning: nil value for key %a in dictionary",key)
-            end
-        else
-            fatal_error("error: invalid value at index %a in dictionary of %a",i,document.filename)
-        end
-    end
-    if mt then
-        setmetatable(t,mt)
-    else
-        getmetatable(t).__index = nil
-    end
-    return t[k]
-end
-
---     local function prepare(document,d,t,n,k,mt,flags)
---         for i=1,n do
---             local v = dictGetValNF(d,i)
---             if v then
---                 local key  = dictGetKey(d,i)
---                 local kind = getType(v)
---                 if kind == ref_object_code then
---                     local objnum = getRefNum(v)
---                     local cached = document.__cache__[objnum]
---                     if not cached then
---                         local v = dictGetVal(d,i)
---                         local kind = getType(v)
---                         cached = checked_access[kind](v,document,objnum,mt)
---                         if cached then
---                             document.__cache__[objnum] = cached
---                             document.__xrefs__[cached] = objnum
---                         end
---                     end
---                     t[key] = cached
---                 else
---                     local v, flag = checked_access[kind](v,document)
---                     t[key] = v
---                     if flag and flags then
---                         flags[key] = flag -- flags
---                     end
---                 end
---             end
---         end
---         if mt then
---             setmetatable(t,mt)
---         else
---             getmetatable(t).__index = nil
---         end
---         return t[k]
---     end
-
-local function some_dictionary(d,document)
-    local n = d and dictGetLength(d) or 0
-    if n > 0 then
-        local t = { }
-        local f = { }
-        setmetatable(t, {
-            __index = function(t,k)
-                return prepare(document,d,t,n,k,_,_,f)
-            end,
-            __call = function(t,k)
-                return get_flagged(t,f,k)
-            end,
-         -- __kind = function(k)
-         --     return f[k] or type(t[k])
-         -- end,
-        } )
-        return t, "dictionary"
-    end
-end
-
-local function get_dictionary(object,document,r,mt)
-    local d = getDict(object)
-    local n = d and dictGetLength(d) or 0
-    if n > 0 then
-        local t = { }
-        local f = { }
-        setmetatable(t, {
-            __index = function(t,k)
-                return prepare(document,d,t,n,k,mt,f)
-            end,
-            __call = function(t,k)
-                return get_flagged(t,f,k)
-            end,
-         -- __kind = function(k)
-         --     return f[k] or type(t[k])
-         -- end,
-        } )
-        return t, "dictionary"
-    end
-end
-
--- arrays (can be optimized: ... resolve and redefine when all locals set)
-
-local function prepare(document,a,t,n,k)
-    for i=1,n do
-        local v = arrayGet(a,i)
-        if v then
-            local kind = getType(v)
-            if kind == null_object_code then
-                -- ignore
-            elseif kind then
-                local r = arrayGetNF(a,i)
-                if r and getType(r) == ref_object_code then
-                    local objnum = getRefNum(r)
-                    local cached = document.__cache__[objnum]
-                    if not cached then
-                        cached = checked_access[kind](v,document,objnum)
-                        document.__cache__[objnum] = cached
-                        document.__xrefs__[cached] = objnum
-                    end
-                    t[i] = cached
-                else
-                    t[i] = checked_access[kind](v,document)
-                end
-            else
-                report_epdf("warning: nil value for index %a in array",i)
-            end
-        else
-            fatal_error("error: invalid value at index %a in array of %a",i,document.filename)
-        end
-    end
-    local m = getmetatable(t)
-    if m then
-        m.__index = nil
-        m.__len   = nil
-    end
-    if k then
-        return t[k]
-    end
-end
-
---     local function prepare(document,a,t,n,k)
---         for i=1,n do
---             local v = arrayGetNF(a,i)
---             if v then
---                 local kind = getType(v)
---                 if kind == ref_object_code then
---                     local objnum = getRefNum(v)
---                     local cached = document.__cache__[objnum]
---                     if not cached then
---                         local v = arrayGet(a,i)
---                         local kind = getType(v)
---                         cached = checked_access[kind](v,document,objnum)
---                         document.__cache__[objnum] = cached
---                         document.__xrefs__[cached] = objnum
---                     end
---                     t[i] = cached
---                 else
---                     t[i] = checked_access[kind](v,document)
---                 end
---             end
---         end
---         local m = getmetatable(t)
---         if m then
---             m.__index = nil
---             m.__len   = nil
---         end
---         if k then
---             return t[k]
---         end
---     end
-
-local function some_array(a,document)
-    local n = a and arrayGetLength(a) or 0
-    if n > 0 then
-        local t = { n = n }
-        setmetatable(t, {
-            __index = function(t,k)
-                return prepare(document,a,t,n,k,_,_,f)
-            end,
-            __len = function(t)
-                prepare(document,a,t,n,_,_,f)
-                return n
-            end,
-            __call = function(t,k)
-                return get_flagged(t,f,k)
-            end,
-         -- __kind = function(k)
-         --     return f[k] or type(t[k])
-         -- end,
-        } )
-        return t, "array"
-    end
-end
-
-local function get_array(object,document)
-    local a = getArray(object)
-    local n = a and arrayGetLength(a) or 0
-    if n > 0 then
-        local t = { n = n }
-        local f = { }
-        setmetatable(t, {
-            __index = function(t,k)
-                return prepare(document,a,t,n,k,_,_,f)
-            end,
-            __len = function(t)
-                prepare(document,a,t,n,_,_,f)
-                return n
-            end,
-            __call = function(t,k)
-                return get_flagged(t,f,k)
-            end,
-         -- __kind = function(k)
-         --     return f[k] or type(t[k])
-         -- end,
-        } )
-        return t, "array"
-    end
-end
-
--- todo: collect chunks
-
--- local function streamaccess(s,_,what)
---     if not what or what == "all" or what == "*all" then
---         local t, n = { }, 0
---         streamReset(s)
---         while true do
---             local c = streamGetChar(s)
---             if c < 0 then
---                 break
---             else
---                 n = n + 1
---                 t[n] = char(c)
---             end
---         end
---         return concat(t,"",1,n)
---     end
--- end
-
-local function getstream(s)
-    streamReset(s)
-    if streamGetAll then
-        return streamGetAll(s)
-    else
-        local t, b, n = { }, { }, 0
-        while true do
-            local c = streamGetChar(s)
-            if c < 0 then
-                break
-            else
-                n = n + 1
-                b[n] = c
-            end
-            if n == 2000 then
-                t[#t+1] = char(unpack(b,1,n))
-                n = 1
-            end
-        end
-        t[#t+1] = char(unpack(b,1,n))
-        return concat(t)
-    end
-end
-
-local function streamaccess(s,_,what)
-    if not what or what == "all" or what == "*all" then
-        return getstream(s)
-    end
-end
-
-local function get_stream(d,document)
-    if d then
-        streamReset(d)
-        local s = some_dictionary(streamGetDict(d),document)
-        getmetatable(s).__call = function(...) return streamaccess(d,...) end
-        return s
-    end
-end
-
--- We need to convert the string from utf16 although there is no way to
--- check if we have a regular string starting with a bom. So, we have
--- na dilemma here: a pdf doc encoded string can be invalid utf.
-
--- <hex encoded>   : implicit 0 appended if odd
--- (byte encoded)  : \( \) \\ escaped
---
--- <FE><FF> : utf16be
---
--- \r \r \t \b \f \( \) \\ \NNN and \<newline> : append next line
---
--- the getString function gives back bytes so we don't need to worry about
--- the hex aspect.
-
-local u_pattern = lpeg.patterns.utfbom_16_be * lpeg.patterns.utf16_to_utf8_be
------ b_pattern = lpeg.patterns.hextobytes
-
-local function get_string(v)
-    -- the toutf function only converts a utf16 string and leaves the original
-    -- untouched otherwise; one might want to apply lpdf.frompdfdoc to a
-    -- non-unicode string
-    local s = getString(v)
-    if not s or s == "" then
-        return ""
-    end
-    local u = lpegmatch(u_pattern,s)
-    if u then
-        return u, "unicode"
-    end
-    -- this is too tricky and fails on e.g. reload of url www.pragma-ade.com)
- -- local b = lpegmatch(b_pattern,s)
- -- if b then
- --     return b, "rawtext"
- -- end
-    return s, "rawtext"
-end
-
-local function get_name(v)
-    return getName(v), "name"
-end
-
-local function get_null()
-    return nil
-end
-
--- we have dual access: by typenumber and by typename
-
-local function invalidaccess(k,document)
-    local fullname = type(document) == "table" and document.fullname
-    if fullname then
-        fatal_error("error, asking for key %a in checker of %a",k,fullname)
-    else
-        fatal_error("error, asking for key %a in checker",k)
-    end
-end
-
-checked_access = setmetatableindex(function(t,k)
-    return function(v,document)
-        invalidaccess(k,document)
-    end
-end)
-
-checked_access[typenumbers.boolean]    = getBool
-checked_access[typenumbers.integer]    = getInt
-checked_access[typenumbers.real]       = getReal
-checked_access[typenumbers.string]     = get_string     -- getString
-checked_access[typenumbers.name]       = get_name
-checked_access[typenumbers.null]       = get_null
-checked_access[typenumbers.array]      = get_array      -- d,document,r
-checked_access[typenumbers.dictionary] = get_dictionary -- d,document,r
-checked_access[typenumbers.stream]     = get_stream
-checked_access[typenumbers.ref]        = getRef
-
-for i=0,#typenames do
-    local checker = checked_access[i]
-    if not checker then
-        checker = function()
-            return function(v,document)
-                invalidaccess(i,document)
-            end
-        end
-        checked_access[i] = checker
-    end
-    checked_access[typenames[i]] = checker
-end
-
-local function getnames(document,n,target) -- direct
-    if n then
-        local Names = n.Names
-        if Names then
-            if not target then
-                target = { }
-            end
-            for i=1,Names.n,2 do
-                target[Names[i]] = Names[i+1]
-            end
-        else
-            local Kids = n.Kids
-            if Kids then
-                for i=1,Kids.n do
-                    target = getnames(document,Kids[i],target)
-                end
-            end
-        end
-        return target
-    end
-end
-
-local function getkids(document,n,target) -- direct
-    if n then
-        local Kids = n.Kids
-        if Kids then
-            for i=1,Kids.n do
-                target = getkids(document,Kids[i],target)
-            end
-        elseif target then
-            target[#target+1] = n
-        else
-            target = { n }
-        end
-        return target
-    end
-end
-
--- /OCProperties <<
---     /OCGs [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
---     /D <<
---         /Order [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
---         /ON    [ 15 0 R 17 0 R 19 0 R 21 0 R 23 0 R 25 0 R 27 0 R ]
---         /OFF   [ ]
---     >>
--- >>
-
-local function getlayers(document)
-    local properties = document.Catalog.OCProperties
-    if properties then
-        local layers = properties.OCGs
-        if layers then
-            local t = { }
-            local n = layers.n
-            for i=1,n do
-                local layer = layers[i]
-                t[i] = layer.Name
-            end
-            t.n = n
-            return t
-        end
-    end
-end
-
-local function getstructure(document)
-    -- this might become a tree
-    return document.Catalog.StructTreeRoot
-end
-
--- This is the only messy helper. We can't access the root as any object (it seems)
--- so we need a few low level acessors. It's anyway sort of simple enough to deal
--- with but it won't win a beauty contest.
-
-local function getpages(document,Catalog)
-    local __data__     = document.__data__
-    local __xrefs__    = document.__xrefs__
-    local __cache__    = document.__cache__
-    local __xref__     = document.__xref__
-    --
-    local rawcatalog   = getRawCatalog(__data__)
-    local nofpages     = getNumPages(rawcatalog)
-    --
-    local majorversion = getMajorVersion(__data__)
-    local minorversion = getMinorVersion(__data__)
-    --
-    local pages        = { }
-    local metatable    = { __index = Catalog.Pages } -- somewhat empty
-    --
-    for pagenumber=1,nofpages do
-        local pagereference = getPageRef(rawcatalog,pagenumber).num
-        local pageobject    = fetch(__xref__,pagereference,0)
-        local pagedata      = get_dictionary(pageobject,document,pagereference,metatable)
-        if pagedata then
-         -- rawset(pagedata,"number",pagenumber)
-            pagedata.number          = pagenumber
-            pagedata.object          = pageobject
-            pages[pagenumber]        = pagedata
-            __xrefs__[pagedata]      = pagereference
-            __cache__[pagereference] = pagedata
-        else
-            report_epdf("missing pagedata at slot %i",i)
-        end
-    end
-    --
-    pages.n = nofpages
-    --
-    document.pages        = pages
-    document.majorversion = majorversion
-    document.minorversion = minorversion
-    --
-    return pages
-end
-
-local function resolve(document,k)
-    local entry   = nil
-    local Catalog = document.Catalog
-    local Names   = Catalog.Names
-    if     k == "pages" then
-        entry = getpages(document,Catalog)
-    elseif k == "destinations" then
-        entry = getnames(document,Names and Names.Dests)
-    elseif k == "javascripts" then
-        entry = getnames(document,Names and Names.JS)
-    elseif k == "widgets" then
-        entry = getnames(document,Names and Names.AcroForm)
-    elseif k == "embeddedfiles" then
-        entry = getnames(document,Names and Names.EmbeddedFiles)
-    elseif k == "layers" then
-        entry = getlayers(document)
-    elseif k == "structure" then
-        entry = getstructure(document)
-    end
-    document[k] = entry
-    return entry
-end
-
-local loaded = { }
-
-function lpdf_epdf.load(filename)
-    local document = loaded[filename]
-    if not document then
-        statistics.starttiming(lpdf_epdf)
-        local __data__ = openPDF(filename) -- maybe resolvers.find_file
-        if __data__ then
-            local __xref__ = getXRef(__data__)
-            document = {
-                filename  = filename,
-                __cache__ = { },
-                __xrefs__ = { },
-                __fonts__ = { },
-                __data__  = __data__,
-                __xref__  = __xref__
-            }
-            document.Catalog = some_dictionary(getDict(getCatalog(__xref__)),document)
-            document.Info    = some_dictionary(getDict(getDocInfo(__xref__)),document)
-            setmetatableindex(document,resolve)
-            --
-            document.nofpages = getNumPages(getRawCatalog(__data__))
-        else
-            document = false
-        end
-        loaded[filename] = document
-        loaded[document] = document
-        statistics.stoptiming(lpdf_epdf)
-     -- print(statistics.elapsedtime(lpdf_epdf))
-    end
-    return document or nil
-end
-
-function lpdf_epdf.unload(filename)
-    local document = loaded[filename]
-    if document then
-        loaded[document] = nil
-        loaded[filename] = nil
-    end
-end
-
--- for k, v in next, expand(t) do
-
-local function expand(t)
-    if type(t) == "table" then
-        local dummy = t.dummy
-    end
-    return t
-end
-
--- for k, v in expanded(t) do
-
-local function expanded(t)
-    if type(t) == "table" then
-        local dummy = t.dummy
-    end
-    return next, t
-end
-
-lpdf_epdf.expand   = expand
-lpdf_epdf.expanded = expanded
-
--- we could resolve the text stream in one pass if we directly handle the
--- font but why should we complicate things
-
-local hexdigit  = R("09","AF")
-local numchar   = ( P("\\") * ( (R("09")^3/tonumber) + C(1) ) ) + C(1)
-local number    = lpegpatterns.number / tonumber
-local spaces    = lpegpatterns.whitespace^1
-local optspaces = lpegpatterns.whitespace^0
-local keyword   = P("/") * C(R("AZ","az","09")^1)
-local operator  = C((R("AZ","az")+P("'")+P('"'))^1)
-
-local grammar   = P { "start",
-    start      = (keyword + number + V("dictionary") + V("unicode") + V("string") + V("unicode")+ V("array") + spaces)^1,
- -- keyvalue   = (keyword * spaces * V("start") + spaces)^1,
-    keyvalue   = optspaces * Cf(Ct("") * Cg(keyword * optspaces * V("start") * optspaces)^1,rawset),
-    array      = P("[")  * Ct(V("start")^1) * P("]"),
-    dictionary = P("<<") *    V("keyvalue") * P(">>"),
-    unicode    = P("<")  * Ct(Cc("hex") * C((1-P(">"))^1))            * P(">"),
-    string     = P("(")  * Ct(Cc("dec") * C((V("string")+numchar)^1)) * P(")"), -- untested
-}
-
-local operation = Ct(grammar^1 * operator)
-local parser    = Ct((operation + P(1))^1)
-
--- beginbfrange : <start> <stop> <firstcode>
---                <start> <stop> [ <firstsequence> <firstsequence> <firstsequence> ]
--- beginbfchar  : <code> <newcodes>
-
-local fromsixteen = lpdf.fromsixteen -- maybe inline the lpeg ... but not worth it
-
-local function f_bfchar(t,a,b)
-    t[tonumber(a,16)] = fromsixteen(b)
-end
-
-local function f_bfrange_1(t,a,b,c)
-    print("todo 1",a,b,c)
-    -- c is string
-    -- todo t[tonumber(a,16)] = fromsixteen(b)
-end
-
-local function f_bfrange_2(t,a,b,c)
-    print("todo 2",a,b,c)
-    -- c is table
-    -- todo t[tonumber(a,16)] = fromsixteen(b)
-end
-
-local optionals   = spaces^0
-local hexstring   = optionals * P("<") * C((1-P(">"))^1) * P(">")
-local bfchar      = Carg(1) * hexstring * hexstring / f_bfchar
-local bfrange     = Carg(1) * hexstring * hexstring * hexstring / f_bfrange_1
-                  + Carg(1) * hexstring * hexstring * optionals * P("[") * Ct(hexstring^1) * optionals * P("]") / f_bfrange_2
-local fromunicode = (
-    P("beginbfchar" ) * bfchar ^1 * optionals * P("endbfchar" ) +
-    P("beginbfrange") * bfrange^1 * optionals * P("endbfrange") +
-    spaces +
-    P(1)
-)^1  * Carg(1)
-
-local function analyzefonts(document,resources) -- unfinished, see mtx-pdf for better code
-    local fonts = document.__fonts__
-    if resources then
-        local fontlist = resources.Font
-        if fontlist then
-            for id, data in expanded(fontlist) do
-                if not fonts[id] then
-                    --  a quick hack ... I will look into it more detail if I find a real
-                    -- -application for it
-                    local tounicode = data.ToUnicode()
-                    if tounicode then
-                        tounicode = lpegmatch(fromunicode,tounicode,1,{})
-                    end
-                    fonts[id] = {
-                        tounicode = type(tounicode) == "table" and tounicode or { }
-                    }
-                    setmetatableindex(fonts[id],"self")
-                end
-            end
-        end
-    end
-    return fonts
-end
-
-local more = 0
-local unic = nil -- cheaper than passing each time as Carg(1)
-
-local p_hex_to_utf = C(4) / function(s) -- needs checking !
-    local now = tonumber(s,16)
-    if more > 0 then
-        now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
-        more = 0
-        return unic[now] or utfchar(now)
-    elseif now >= 0xD800 and now <= 0xDBFF then
-        more = now
-     -- return ""
-    else
-        return unic[now] or utfchar(now)
-    end
-end
-
-local p_dec_to_utf = C(1) / function(s) -- needs checking !
-    local now = byte(s)
-    return unic[now] or utfchar(now)
-end
-
-local p_hex_to_utf = P(true) / function() more = 0 end * Cs(p_hex_to_utf^1)
-local p_dec_to_utf = P(true) / function() more = 0 end * Cs(p_dec_to_utf^1)
-
-function lpdf_epdf.getpagecontent(document,pagenumber)
-
-    local page = document.pages[pagenumber]
-
-    if not page then
-        return
-    end
-
-    local fonts   = analyzefonts(document,page.Resources)
-
-    local content = page.Contents() or ""
-    local list    = lpegmatch(parser,content)
-    local font    = nil
- -- local unic    = nil
-
-    for i=1,#list do
-        local entry    = list[i]
-        local size     = #entry
-        local operator = entry[size]
-        if operator == "Tf" then
-            font = fonts[entry[1]]
-            unic = font.tounicode
-        elseif operator == "TJ" then -- { array,  TJ }
-            local list = entry[1]
-            for i=1,#list do
-                local li = list[i]
-                if type(li) == "table" then
-                    if li[1] == "hex" then
-                        list[i] = lpegmatch(p_hex_to_utf,li[2])
-                    else
-                        list[i] = lpegmatch(p_dec_to_utf,li[2])
-                    end
-                else
-                    -- kern
-                end
-            end
-        elseif operator == "Tj" or operator == "'" or operator == '"' then -- { string,  Tj } { string, ' } { n, m, string, " }
-            local list = entry[size-1]
-            if list[1] == "hex" then
-                list[2] = lpegmatch(p_hex_to_utf,li[2])
-            else
-                list[2] = lpegmatch(p_dec_to_utf,li[2])
-            end
-        end
-    end
-
-    unic = nil -- can be collected
-
-    return list
-
-end
-
--- This is also an experiment. When I really need it I can improve it, for instance
--- with proper position calculating. It might be usefull for some search or so.
-
-local softhyphen = utfchar(0xAD) .. "$"
-local linefactor = 1.3
-
-function lpdf_epdf.contenttotext(document,list) -- maybe signal fonts
-    local last_y = 0
-    local last_f = 0
-    local text   = { }
-    local last   = 0
-
-    for i=1,#list do
-        local entry    = list[i]
-        local size     = #entry
-        local operator = entry[size]
-        if operator == "Tf" then
-            last_f = entry[2]
-        elseif operator == "TJ" then
-            local list = entry[1]
-            for i=1,#list do
-                local li = list[i]
-                if type(li) == "string" then
-                    last = last + 1
-                    text[last] = li
-                elseif li < -50 then
-                    last = last + 1
-                    text[last] = " "
-                end
-            end
-            line = concat(list)
-        elseif operator == "Tj" then
-            last = last + 1
-            text[last] = entry[size-1]
-        elseif operator == "cm" or operator == "Tm" then
-            local ty = entry[6]
-            local dy = abs(last_y - ty)
-            if dy > linefactor*last_f then
-                if last > 0 then
-                    if find(text[last],softhyphen,1,true) then
-                        -- ignore
-                    else
-                        last = last + 1
-                        text[last] = "\n"
-                    end
-                end
-            end
-            last_y = ty
-        end
-    end
-
-    return concat(text)
-end
-
-function lpdf_epdf.getstructure(document,list) -- just a test
-    local depth = 0
-    for i=1,#list do
-        local entry    = list[i]
-        local size     = #entry
-        local operator = entry[size]
-        if operator == "BDC" then
-            report_epdf("%w%s : %s",depth,entry[1] or "?",entry[2].MCID or "?")
-            depth = depth + 1
-        elseif operator == "EMC" then
-            depth = depth - 1
-        elseif operator == "TJ" then
-            local list = entry[1]
-            for i=1,#list do
-                local li = list[i]
-                if type(li) == "string" then
-                    report_epdf("%w > %s",depth,li)
-                elseif li < -50 then
-                    report_epdf("%w >",depth,li)
-                end
-            end
-        elseif operator == "Tj" then
-            report_epdf("%w > %s",depth,entry[size-1])
-        end
-    end
-end
-
--- document.Catalog.StructTreeRoot.ParentTree.Nums[2][1].A.P[1])
-
--- helpers
-
--- function lpdf_epdf.getdestinationpage(document,name)
---     local destination = document.__data__:findDest(name)
---     return destination and destination.number
--- end
-
--- This is experimental code that we need for testing the transition from
--- poppler to a new lightweight library. Don't rely on this code to remain
--- as it is now. Interesting is that performance of this variant is the same
--- as the natural page includer.
-
-if img then do
-
-    local copydictionary          = nil
-    local copyarray               = nil
-
-    local ref_object_code         = typenumbers.ref
-    local boolean_object_code     = typenumbers.boolean
-    local integer_object_code     = typenumbers.integer
-    local real_object_code        = typenumbers.real
-    local string_object_code      = typenumbers.string
-    local name_object_code        = typenumbers.name
-    local null_object_code        = typenumbers.null
-    local array_object_code       = typenumbers.array
-    local dictionary_object_code  = typenumbers.dictionary
-    local stream_object_code      = typenumbers.stream
-    local cmd_object_code         = typenumbers.cmd
-
-    local pdfreserveobject        = lpdf.reserveobject
-    local pdfflushobject          = lpdf.flushobject
-    local pdfflushstreamobject    = lpdf.flushstreamobject
-    local pdfreference            = lpdf.reference
-    local pdfconstant             = lpdf.constant
-    local pdfarray                = lpdf.array
-    local pdfdictionary           = lpdf.dictionary
-    local pdfunicode              = lpdf.unicode
-    local pdfstring               = lpdf.string
-    local pdfnull                 = lpdf.null
-
-    local report                  = logs.reporter("backend","xobjects")
-
-    local factor                  = 65536 / (7200/7227) -- 1/number.dimenfactors.bp
-
-    local createimage             = images.create
-
-    local function scaledbbox(b)
-        return { b[1]*factor, b[2]*factor, b[3]*factor, b[4]*factor }
-    end
-
-    local function copyobject(xref,copied,kind,r,v)
-        if kind == null_object_code then
-            return pdfnull()
-        elseif r and getType(r) == ref_object_code then
-            local objnum = getRefNum(r)
-            local r = copied[objnum]
-            if r then
-             -- report("%s object %i is reused",kind,objnum)
-            else
-                local o
-                r = pdfreserveobject()
-                copied[objnum] = r
-                if kind == array_object_code then
-                    local a = copyarray(xref,copied,fetch(xref,objnum,0))
-                    pdfflushobject(r,tostring(a))
-                elseif kind == dictionary_object_code then
-                    local d = copydictionary(xref,copied,fetch(xref,objnum,0))
-                    pdfflushobject(r,tostring(d))
-                elseif kind == stream_object_code then
-                    local f = fetch(xref,objnum,0)
-                    local d = copydictionary(xref,copied,false,streamGetDict(f))
-                    local s = getstream(f)
-                    --
-                    d.Filter      = nil
-                    d.Length      = nil
-                    d.DecodeParms = nil
-                    d.DL          = nil
-                    --
-                    pdfflushstreamobject(s,d,true,r)
-                else
-                    report("reference not done: %s", kind)
-                end
-            end
-            return pdfreference(r)
-        elseif kind == array_object_code then
-            return copyarray(xref,copied,v)
-        elseif kind == dictionary_object_code then
-            return copydictionary(xref,copied,v)
-        elseif kind == integer_object_code then
-            return getInt(v)
-        elseif kind == real_object_code then
-            return getReal(v)
-        elseif kind == name_object_code then
-            return pdfconstant(getName(v))
-        elseif kind == string_object_code then
-            local s = getString(v)
-            if not s or s == "" then
-                return ""
-            end
-            local u = lpegmatch(u_pattern,s)
-            if u then
-                return pdfunicode(s)
-            end
-            return pdfstring(s)
-        elseif kind == boolean_object_code then
-            return getBool(v)
-        elseif kind == stream_object_code then
-            -- hm ...
-            return getStream(v)
-        else
-            report("object not done: %s", kind)
-        end
-    end
-
-    copyarray = function (xref,copied,object)
-        local a = getArray(object)
-        local n = a and arrayGetLength(a) or 0
-        if n > 0 then
-            local target = pdfarray()
-            for i=1,n do
-                local v = arrayGet(a,i)
-                if v then
-                    local kind = getType(v)
-                    local r    = arrayGetNF(a,i)
-                    target[i]  = copyobject(xref,copied,kind,r,v)
-                end
-            end
-            return target
-        end
-    end
-
-    copydictionary = function (xref,copied,object,d)
-        local d = d or getDict(object)
-        local n = d and dictGetLength(d) or 0
-        if n > 0 then
-            local target = pdfdictionary()
-            for i=1,n do
-                local v = dictGetVal(d,i)
-                if v then
-                    local kind  = getType(v)
-                    local key   = dictGetKey(d,i)
-                    local r     = dictGetValNF(d,i)
-                    target[key] = copyobject(xref,copied,kind,r,v)
-                end
-            end
-            return target
-        end
-    end
-
-    local function copy_resources(pdfdoc,xref,copied,pagedata)
-        local object = pagedata.object
-        if object then
-            local d = getDict(object)
-            local n = d and dictGetLength(d) or 0
-            for i=1,n do
-                local k = dictGetKey(d,i)
-                if v and k == "Resources" then
-                    local v    = dictGetVal(d,i)
-                    local kind = getType(v)
-                    local r    = dictGetValNF(d,i)
-                    return copyobject(xref,copied,kind,r,v)
-                end
-            end
-        end
-    end
-
-    local function openpdf(filename)
-        local pdfdoc = lpdf_epdf.load(filename)
-        if pdfdoc then
-            pdfdoc.__copied__ = pdfdoc.__copied__ or { }
-            pdfdoc.filename   = filename
-            return pdfdoc
-        end
-    end
-
-    local function closepdf(pdfdoc)
-        if pdfdoc then
-            lpdf_epdf.unload(pdfdoc.filename)
-        end
-    end
-
-    local function querypdf(pdfdoc,pagenumber)
-        if pdfdoc then
-            if not pagenumber then
-                pagenumber = 1
-            end
-            local root = pdfdoc.Catalog
-            local page = pdfdoc.pages[pagenumber]
-            if page then
-                local mediabox = page.MediaBox or { 0, 0, 0, 0 }
-                local cropbox  = page.CropBox or mediabox
-                return {
-                    filename    = pdfdoc.filename,
-                    pagenumber  = pagenumber,
-                    nofpages    = pdfdoc.nofpages,
-                    boundingbox = scaledbbox(cropbox),
-                    cropbox     = cropbox,
-                    mediabox    = mediabox,
-                    bleedbox    = page.BleedBox or cropbox,
-                    trimbox     = page.TrimBox or cropbox,
-                    artbox      = page.ArtBox or cropbox,
-                }
-            end
-        end
-    end
-
-    local function copypage(pdfdoc,pagenumber,attributes)
-        if pdfdoc then
-            local root     = pdfdoc.Catalog
-            local page     = pdfdoc.pages[pagenumber or 1]
-            local pageinfo = querypdf(pdfdoc,pagenumber)
-            local contents = page.Contents
-            local xref     = pdfdoc.__xref__
-            local copied   = pdfdoc.__copied__
-            --
-            local xobject  = pdfdictionary {
-                Type           = pdfconstant("XObject"),
-                Subtype        = pdfconstant("Form"),
-             -- image attributes
-                FormType       = 1,
-                BBox           = pageinfo.cropbox,
-             -- Metadata       = copy(xref,copied,root,"Metadata"),
-             -- Group          = copy(xref,copied,page,"Group"),
-             -- LastModified   = copy(xref,copied,page,"LastModified"),
-             -- Metadata       = copy(xref,copied,page,"Metadata"),
-             -- PieceInfo      = copy(xref,copied,page,"PieceInfo"),
-                Resources      = copy_resources(pdfdoc,xref,copied,page),
-             -- SeparationInfo = copy(xref,copied,page,"SeparationInfo"),
-            }
-            if attributes then
-                for k, v in next, expand(attributes) do
-                    page[k] = v -- maybe nested
-                end
-            end
-            return createimage {
-                bbox   = pageinfo.boundingbox,
-                stream = contents(),
-                attr   = xobject(),
-            }
-        end
-    end
-
-    -- todo: codeinjections
-
-    lpdf_epdf.image = {
-        open  = openpdf,
-        close = closepdf,
-        query = querypdf,
-        copy  = copypage,
-    }
-
-end end