diff options
author | Hans Hagen <pragma@wxs.nl> | 2023-05-05 19:33:15 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg@phi-gamma.net> | 2023-05-05 19:33:15 +0200 |
commit | cc6d486ffa3fad4ffb698ad5ccbd5c5f4ebc6a00 (patch) | |
tree | 787c75ff830863d992f7301542a00be25b032dbf /tex/context/base/mkxl/data-sch.lmt | |
parent | a0e79ce94348b934dcefc38133cd8fed08bd38e3 (diff) | |
download | context-cc6d486ffa3fad4ffb698ad5ccbd5c5f4ebc6a00.tar.gz |
2023-05-05 18:39:00
Diffstat (limited to 'tex/context/base/mkxl/data-sch.lmt')
-rw-r--r-- | tex/context/base/mkxl/data-sch.lmt | 312 |
1 files changed, 312 insertions, 0 deletions
diff --git a/tex/context/base/mkxl/data-sch.lmt b/tex/context/base/mkxl/data-sch.lmt new file mode 100644 index 000000000..36f89040c --- /dev/null +++ b/tex/context/base/mkxl/data-sch.lmt @@ -0,0 +1,312 @@ +if not modules then modules = { } end modules ['data-sch'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local load, tonumber, require = load, tonumber, require +local gsub, format = string.gsub, string.format +local savedata = io.savedata +local sortedhash, concat = table.sortedhash, table.concat +local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders +local addsuffix, suffix, splitbase = file.addsuffix, file.suffix, file.splitbase +local md5hex = md5.hex +local removefile, renamefile, fileexists = os.remove, os.rename, io.exists + +-- todo: more locals + +local trace_schemes = false trackers.register("resolvers.schemes",function(v) trace_schemes = v end) +local report_schemes = logs.reporter("resolvers","schemes") + +local http = require("socket.http") +local ltn12 = require("ltn12") + +if mbox then mbox = nil end -- useless and even bugged (helper overwrites lib) + +local resolvers = resolvers +local schemes = resolvers.schemes or { } +resolvers.schemes = schemes + +local cleaners = { } +schemes.cleaners = cleaners + +local threshold = 24 * 60 * 60 +local inmemory = false +local uselibrary = false + +directives.register("schemes.threshold", function(v) threshold = tonumber(v) or threshold end) +directives.register("schemes.inmemory", function(v) inmemory = v end) +directives.register("schemes.uselibrary", function(v) uselibrary = v end) + +function cleaners.none(specification) + return specification.original +end + +-- function cleaners.strip(specification) +-- -- todo: only keep suffix periods, so after the last +-- return (gsub(specification.original,"[^%a%d%.]+","-")) -- so we keep periods +-- end + +function cleaners.strip(specification) -- keep suffixes + local path, name = splitbase(specification.original) + if path == "" then + return (gsub(name,"[^%a%d%.]+","-")) + else + return (gsub((gsub(path,"%.","-") .. "-" .. name),"[^%a%d%.]+","-")) + end +end + +function cleaners.md5(specification) + return addsuffix(md5hex(specification.original),suffix(specification.path)) +end + +local cleaner = cleaners.strip + +directives.register("schemes.cleanmethod", function(v) cleaner = cleaners[v] or cleaners.strip end) + +function resolvers.schemes.cleanname(specification) + local hash = cleaner(specification) + if trace_schemes then + report_schemes("hashing %a to %a",specification.original,hash) + end + return hash +end + +local cached = { } +local loaded = { } +local reused = { } +local thresholds = { } +local handlers = { } + +local function fetcher(report) + if uselibrary then + local curl = require("curl") or require("libs-imp-curl") -- we have curl preloaded + local fetch = curl and curl.fetch + if fetch then + return function(str) + local data, message = fetch { + url = str, + followlocation = true, + sslverifyhost = false, + sslverifypeer = false, + } + if not data then + report("some error: %s",message) + end + return data + end + end + end +end + +local runner = sandbox.registerrunner { + name = "to file curl resolver", + method = "execute", + program = "curl", + template = '--silent --insecure --create-dirs --output "%cachename%" "%original%"', + internal = function(specification) + local fetch = fetcher(specification.reporter) + return fetch and function(name,program,template,checkers,defaults,variables,reporter,finalized) + local data = fetch(variables.original) + savedata(variables.cachename,data or "") + end + end, + checkers = { + cachename = "cache", + original = "url", + } +} + +local memrunner = sandbox.registerrunner { + name = "in memory curl resolver", + method = "resultof", + program = "curl", + template = '--silent --insecure "%original%"', + internal = function(specification) + local fetch = fetcher(specification.reporter) + return fetch and function(name,program,template,checkers,defaults,variables,reporter,finalized) + return fetch(variables.original) or "" + end + end, + checkers = { + original = "url", + } +} + +local function fetch(specification) + local original = specification.original + local scheme = specification.scheme + local cleanname = schemes.cleanname(specification) + if inmemory then + statistics.starttiming(schemes) + local cachename = resolvers.savers.virtualname(cleanname) + local handler = handlers[scheme] + -- if handler and not uselibrary then + if handler then -- internal sockets are twice as fast as library + if trace_schemes then + report_schemes("fetching %a, protocol %a, method %a",original,scheme,"built-in") + end + logs.flush() + handler(specification,cachename) + else + if trace_schemes then + report_schemes("fetching %a, protocol %a, method %a",original,scheme,"curl") + end + logs.flush() + local result = memrunner { + original = original, + } + resolvers.savers.directvirtual(cachename,result,true) -- persistent + end + loaded[scheme] = loaded[scheme] + 1 + statistics.stoptiming(schemes) + return cachename + else + local cachename = caches.setfirstwritablefile(cleanname,"schemes") + if not cached[original] or threshold == 0 then + statistics.starttiming(schemes) + if threshold == 0 or not fileexists(cachename) or (os.difftime(os.time(),lfs.attributes(cachename).modification) > (thresholds[protocol] or threshold)) then + -- removefile(cachename) + cached[original] = cachename + local handler = handlers[scheme] + if handler then + if trace_schemes then + report_schemes("fetching %a, protocol %a, method %a",original,scheme,"built-in") + end + logs.flush() + handler(specification,cachename) + else + if trace_schemes then + report_schemes("fetching %a, protocol %a, method %a",original,scheme,"curl") + end + logs.flush() + runner { + original = original, + cachename = cachename, + } + end + end + if fileexists(cachename) then + cached[original] = cachename + if trace_schemes then + report_schemes("using cached %a, protocol %a, cachename %a",original,scheme,cachename) + end + else + cached[original] = "" + if trace_schemes then + report_schemes("using missing %a, protocol %a",original,scheme) + end + end + loaded[scheme] = loaded[scheme] + 1 + statistics.stoptiming(schemes) + else + if trace_schemes then + report_schemes("reusing %a, protocol %a",original,scheme) + end + reused[scheme] = reused[scheme] + 1 + end + return cached[original] + end +end + +local function finder(specification,filetype) + return resolvers.methodhandler("finders",fetch(specification),filetype) +end + +local opener = openers.file +local loader = loaders.file + +local function install(scheme,handler,newthreshold) + handlers [scheme] = handler + loaded [scheme] = 0 + reused [scheme] = 0 + finders [scheme] = finder + openers [scheme] = opener + loaders [scheme] = loader + thresholds[scheme] = newthreshold or threshold +end + +schemes.install = install + +local function http_handler(specification,cachename) + if inmemory then + local result = { } + local status, message = http.request { + url = specification.original, + sink = ltn12.sink.table(result) + } + resolvers.savers.directvirtual(cachename,concat(result),true) -- persistent + else + local tempname = cachename .. ".tmp" + local handle = io.open(tempname,"wb") + local status, message = http.request { + url = specification.original, + sink = ltn12.sink.file(handle) + } + if not status then + removefile(tempname) + else + removefile(cachename) + renamefile(tempname,cachename) + end + end + return cachename +end + +install('http',http_handler) +install('https') -- see pod +install('ftp') + +statistics.register("scheme handling time", function() + local l, r, nl, nr = { }, { }, 0, 0 + for k, v in sortedhash(loaded) do + if v > 0 then + nl = nl + 1 + l[nl] = k .. ":" .. v + end + end + for k, v in sortedhash(reused) do + if v > 0 then + nr = nr + 1 + r[nr] = k .. ":" .. v + end + end + local n = nl + nr + if n > 0 then + if nl == 0 then l = { "none" } end + if nr == 0 then r = { "none" } end + return format("%s seconds, %s processed, threshold %s seconds, loaded: %s, reused: %s", + statistics.elapsedtime(schemes), n, threshold, concat(l," "), concat(l," ")) + else + return nil + end +end) + +-- We provide a few more helpers: + +----- http = require("socket.http") +local httprequest = http.request +local toquery = url.toquery + +local function fetchstring(url,data) + local q = data and toquery(data) + if q then + url = url .. "?" .. q + end + local reply = httprequest(url) + return reply -- just one argument +end + +schemes.fetchstring = fetchstring + +function schemes.fetchtable(url,data) + local reply = fetchstring(url,data) + if reply then + local s = load("return " .. reply) + if s then + return s() + end + end +end |