diff options
Diffstat (limited to 'tex/context/base/mkxl/data-hsh.lmt')
-rw-r--r-- | tex/context/base/mkxl/data-hsh.lmt | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/tex/context/base/mkxl/data-hsh.lmt b/tex/context/base/mkxl/data-hsh.lmt new file mode 100644 index 000000000..0a2d94f81 --- /dev/null +++ b/tex/context/base/mkxl/data-hsh.lmt @@ -0,0 +1,382 @@ +-- only lmt because the backend code doesn't deal with it and it makes +-- no sense to waste time on that for mkiv + +if not modules then modules = { } end modules ['data-hsh'] = { + version = 0.002, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- todo: options +-- +-- lowercase +-- cleanupnames (normalize) +-- use database from project tree + +local type = type +local gsub = string.gsub +local addsuffix, basename, pathpart, filesuffix, filesize = file.addsuffix, file.basename, file.pathpart, file.suffix, file.size +local loadtable, savetable = table.load, table.save +local loaddata, savedata, open = io.loaddata, io.savedata, io.open + +local trace_hashed = false +local report_hashed = logs.reporter("resolvers","hashed") + +trackers.register("resolvers.locating", function(v) trace_hashed = v end) +trackers.register("resolvers.hashed", function(v) trace_hashed = v end) + +-- we can have a virtual file: open at the position, make sure read and seek don't +-- go beyond the boundaries + +local resolvers = resolvers +local finders = resolvers.finders +local openers = resolvers.openers +local loaders = resolvers.loaders + +local ordered = { } +local hashed = { } +local version = 0.002 + +-- local lowercase = characters.lower + +local function showstatus(database,metadata) + report_hashed("database %a, %i paths, %i names, %i unique blobs, %i compressed blobs", + database, metadata.nofpaths, metadata.nofnames, metadata.nofblobs, metadata.nofcompressed + ) +end + +local function validhashed(database) + local found = hashed[database] + if found then + return found + else + local metaname = addsuffix(database,"lua") + local dataname = addsuffix(database,"dat") + local metadata = loadtable(metaname) + if type(metadata) ~= "table" then + report_hashed("invalid database %a",metaname) + elseif metadata.version ~= version then + report_hashed("version mismatch in database %a",metaname) + elseif not lfs.isfile(dataname) then + report_hashed("missing data data file for %a",metaname) + else + return { + database = database, + metadata = metadata, + dataname = dataname, + } + end + end +end + +local function registerhashed(database) + if not hashed[database] then + local valid = validhashed(database) + if valid then + ordered[#ordered + 1] = valid + hashed[database] = ordered[#ordered] + showstatus(database,valid.metadata) + end + end +end + +local registerfilescheme do + + local findfile = finders.file + + local list = { } + local done = { } + local hash = { } + + registerfilescheme = function(name) + if not done[name] then + list[#list+1] = name + done[name] = true + end + end + + -- why does the finder not remember ? + + function finders.file(specification,filetype) + if type(specification) == "table" then + local original = specification.original + -- print(original) + if original then + local found = hash[original] + if found == nil then + for i=1,#list do + local scheme = list[i] + local found = finders[scheme](specification,filetype) + if found then + hash[original] = found + if trace_hashed then + report_hashed("found by auto scheme %s: %s",scheme,found) + end + return found + end + end + local found = findfile(specification,filetype) + if found then + hash[original] = found + if trace_hashed then + report_hashed("found by normal file scheme: %s",found) + end + return found + end + hash[original] = false + elseif found then + return found + end + return false + else + -- something is wrong here, maybe we should trace it (scheme can be "unknown") + end + end + -- again, something is wrong + return findfile(specification,filetype) + end + +end + +finders.helpers.validhashed = validhashed +finders.helpers.registerhashed = registerhashed +finders.helpers.registerfilescheme = registerfilescheme + +local function locate(found,path,name) + local files = found.metadata.files + local hashes = found.metadata.hashes + local fp = files[path] + local hash = fp and fp[name] + if hash and hashes[hash] then + return hash + end +end + +local function locatehash(filename,database) + if filename then + local name = basename(filename) + local path = pathpart(filename) + local hash = false + if database then + local found = hashed[database] + if found then + hash = locate(found,path,name), database, path, name + end + else + for i=1,#ordered do + local found = ordered[i] + hash = locate(found,path,name) + if hash then + database = found.database + break + end + end + end + if hash then + return { + hash = hash, + name = name, + path = path, + base = database, + } + end + end +end + +-- no caching yet, we don't always want the file and it's fast enough + +local function locateblob(filename,database) + local found = locatehash(filename,database) + if found then + local database = found.base + local data = hashed[database] + if data then + local metadata = data.metadata + local dataname = data.dataname + local hashes = metadata.hashes + local blobdata = hashes[found.hash] + if blobdata and dataname then + local position = blobdata.position + local f = open(dataname,"rb") + if f then + f:seek("set",position) + local blob = f:read(blobdata.datasize) + if blobdata.compress == "zip" then + blob = zlib.decompresssize(blob,blobdata.filesize) + end + return blob + end + end + end + end +end + +local finders = resolvers.finders +local notfound = finders.notfound + +function finders.hashed(specification) + local original = specification.original + local fullpath = specification.path + if fullpath then + local found = locatehash(fullpath) + if found then + if trace_hashed then + report_hashed("finder: file %a found",original) + end + return original + end + end + if trace_hashed then + report_hashed("finder: unknown file %a",original) + end + return notfound() +end + +local notfound = openers.notfound +local textopener = openers.helpers.textopener + +function openers.hashed(specification) + local original = specification.original + local fullpath = specification.path + if fullpath then + local found = locateblob(fullpath) + if found then + if trace_hashed then + report_hashed("finder: file %a found",original) + end + return textopener("hashed",original,found,"utf-8") + end + end + if trace_hashed then + report_hashed("finder: unknown file %a",original) + end + return notfound() +end + +local notfound = loaders.notfound + +function loaders.hashed(specification) + local original = specification.original + local fullpath = specification.path + if fullpath then + local found = locateblob(fullpath) + if found then + if trace_hashed then + report_hashed("finder: file %a found",original) + end + return true, found, found and #found or 0 + end + end + if trace_hashed then + report_hashed("finder: unknown file %a",original) + end + return notfound() +end + +-- this actually could end up in the generate namespace but it is not +-- really a 'generic' feature, more a module (at least for now) + +local calculatehash = sha2.HEX256 -- md5.HEX is not unique enough + +function resolvers.finders.helpers.createhashed(specification) + local database = specification.database + local patterns = specification.patterns + if not patterns then + local pattern = specification.pattern + if pattern then + patterns = { + { + pattern = pattern, + compress = specification.compress, + } + } + end + end + local datname = addsuffix(database,"dat") + local luaname = addsuffix(database,"lua") + local metadata = loadtable(luaname) + if type(metadata) ~= "table" then + metadata = false + elseif metadata.kind == "hashed" and metadata.version ~= version then + report_hashed("version mismatch, starting with new table") + metadata = false + end + if not metadata then + metadata = { + version = version, + kind = "hashed", + files = { }, + hashes = { }, + nofnames = 0, + nofpaths = 0, + nofblobs = 0, + nofcompressed = 0, + } + end + local files = metadata.files + local hashes = metadata.hashes + local nofpaths = metadata.nofpaths + local nofnames = metadata.nofnames + local nofblobs = metadata.nofblobs + local nofcompressed = metadata.nofcompressed + if type(patterns) == "table" then + for i=1,#patterns do + local pattern = patterns[i].pattern + if pattern then + local compress = patterns[i].compress + local list = dir.glob(pattern) + local total = #list + report_hashed("database %a, adding pattern %a, compression %l",database,pattern,compress) + for i=1,total do + local filename = list[i] + local name = basename(filename) + local path = pathpart(filename) + local data = loaddata(filename) + -- cleanup + path = gsub(path,"^[./]*","") + -- + if data then + local fp = files[path] + if not fp then + fp = { } + files[path] = fp + nofpaths = nofpaths + 1 + end + local ff = fp[name] + if not ff then + local hash = calculatehash(data) + if not hashes[hash] then + local size = #data + if compress then + data = zlib.compresssize(data,size) + nofcompressed = nofcompressed + 1 + end + local position = filesize(datname) + savedata(datname,data,"",true) + hashes[hash] = { + filesize = size, + datasize = #data, + compress = compress and "zip", + position = position, + } + nofblobs = nofblobs + 1 + end + fp[name] = hash + nofnames = nofnames + 1 + end + end + end + end + end + end + metadata.nofpaths = nofpaths + metadata.nofnames = nofnames + metadata.nofblobs = nofblobs + metadata.nofcompressed = nofcompressed + savetable(luaname, metadata) + showstatus(database,metadata) + return metadata +end + |