summaryrefslogtreecommitdiff
path: root/tex/context/base/mkxl/data-sch.lmt
blob: 36f89040c99d167a7fe452a925f6875722fa30f4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
if not modules then modules = { } end modules ['data-sch'] = {
    version   = 1.001,
    comment   = "companion to luat-lib.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

local load, tonumber, require = load, tonumber, require
local gsub, format = string.gsub, string.format
local savedata = io.savedata
local sortedhash, concat = table.sortedhash, table.concat
local finders, openers, loaders = resolvers.finders, resolvers.openers, resolvers.loaders
local addsuffix, suffix, splitbase = file.addsuffix, file.suffix, file.splitbase
local md5hex = md5.hex
local removefile, renamefile, fileexists = os.remove, os.rename, io.exists

-- todo: more locals

local trace_schemes  = false  trackers.register("resolvers.schemes",function(v) trace_schemes = v end)
local report_schemes = logs.reporter("resolvers","schemes")

local http           = require("socket.http")
local ltn12          = require("ltn12")

if mbox then mbox = nil end -- useless and even bugged (helper overwrites lib)

local resolvers      = resolvers
local schemes        = resolvers.schemes or { }
resolvers.schemes    = schemes

local cleaners       = { }
schemes.cleaners     = cleaners

local threshold      = 24 * 60 * 60
local inmemory       = false
local uselibrary     = false

directives.register("schemes.threshold",  function(v) threshold = tonumber(v) or threshold end)
directives.register("schemes.inmemory",   function(v) inmemory = v end)
directives.register("schemes.uselibrary", function(v) uselibrary = v end)

function cleaners.none(specification)
    return specification.original
end

-- function cleaners.strip(specification)
--     -- todo: only keep suffix periods, so after the last
--     return (gsub(specification.original,"[^%a%d%.]+","-")) -- so we keep periods
-- end

function cleaners.strip(specification) -- keep suffixes
    local path, name = splitbase(specification.original)
    if path == "" then
        return (gsub(name,"[^%a%d%.]+","-"))
    else
        return (gsub((gsub(path,"%.","-") .. "-" .. name),"[^%a%d%.]+","-"))
    end
end

function cleaners.md5(specification)
    return addsuffix(md5hex(specification.original),suffix(specification.path))
end

local cleaner = cleaners.strip

directives.register("schemes.cleanmethod", function(v) cleaner = cleaners[v] or cleaners.strip end)

function resolvers.schemes.cleanname(specification)
    local hash = cleaner(specification)
    if trace_schemes then
        report_schemes("hashing %a to %a",specification.original,hash)
    end
    return hash
end

local cached     = { }
local loaded     = { }
local reused     = { }
local thresholds = { }
local handlers   = { }

local function fetcher(report)
    if uselibrary then
        local curl = require("curl") or require("libs-imp-curl") -- we have curl preloaded
        local fetch = curl and curl.fetch
        if fetch then
            return function(str)
                local data, message = fetch {
                    url            = str,
                    followlocation = true,
                    sslverifyhost  = false,
                    sslverifypeer  = false,
                }
                if not data then
                    report("some error: %s",message)
                end
                return data
            end
        end
    end
end

local runner = sandbox.registerrunner {
    name     = "to file curl resolver",
    method   = "execute",
    program  = "curl",
    template = '--silent --insecure --create-dirs --output "%cachename%" "%original%"',
    internal = function(specification)
        local fetch = fetcher(specification.reporter)
        return fetch and function(name,program,template,checkers,defaults,variables,reporter,finalized)
            local data = fetch(variables.original)
            savedata(variables.cachename,data or "")
        end
    end,
    checkers = {
        cachename = "cache",
        original  = "url",
    }
}

local memrunner = sandbox.registerrunner {
    name     = "in memory curl resolver",
    method   = "resultof",
    program  = "curl",
    template = '--silent --insecure "%original%"',
    internal = function(specification)
        local fetch = fetcher(specification.reporter)
        return fetch and function(name,program,template,checkers,defaults,variables,reporter,finalized)
            return fetch(variables.original) or ""
        end
    end,
    checkers = {
        original = "url",
    }
}

local function fetch(specification)
    local original  = specification.original
    local scheme    = specification.scheme
    local cleanname = schemes.cleanname(specification)
    if inmemory then
        statistics.starttiming(schemes)
        local cachename = resolvers.savers.virtualname(cleanname)
        local handler   = handlers[scheme]
     -- if handler and not uselibrary then
        if handler then -- internal sockets are twice as fast as library
            if trace_schemes then
                report_schemes("fetching %a, protocol %a, method %a",original,scheme,"built-in")
            end
            logs.flush()
            handler(specification,cachename)
        else
            if trace_schemes then
                report_schemes("fetching %a, protocol %a, method %a",original,scheme,"curl")
            end
            logs.flush()
            local result = memrunner {
                original = original,
            }
            resolvers.savers.directvirtual(cachename,result,true) -- persistent
        end
        loaded[scheme] = loaded[scheme] + 1
        statistics.stoptiming(schemes)
        return cachename
    else
        local cachename = caches.setfirstwritablefile(cleanname,"schemes")
        if not cached[original] or threshold == 0 then
            statistics.starttiming(schemes)
            if threshold == 0 or not fileexists(cachename) or (os.difftime(os.time(),lfs.attributes(cachename).modification) > (thresholds[protocol] or threshold)) then
             -- removefile(cachename)
                cached[original] = cachename
                local handler = handlers[scheme]
                if handler then
                    if trace_schemes then
                        report_schemes("fetching %a, protocol %a, method %a",original,scheme,"built-in")
                    end
                    logs.flush()
                    handler(specification,cachename)
                else
                    if trace_schemes then
                        report_schemes("fetching %a, protocol %a, method %a",original,scheme,"curl")
                    end
                    logs.flush()
                    runner {
                        original  = original,
                        cachename = cachename,
                    }
                end
            end
            if fileexists(cachename) then
                cached[original] = cachename
                if trace_schemes then
                    report_schemes("using cached %a, protocol %a, cachename %a",original,scheme,cachename)
                end
            else
                cached[original] = ""
                if trace_schemes then
                    report_schemes("using missing %a, protocol %a",original,scheme)
                end
            end
            loaded[scheme] = loaded[scheme] + 1
            statistics.stoptiming(schemes)
        else
            if trace_schemes then
                report_schemes("reusing %a, protocol %a",original,scheme)
            end
            reused[scheme] = reused[scheme] + 1
        end
        return cached[original]
    end
end

local function finder(specification,filetype)
    return resolvers.methodhandler("finders",fetch(specification),filetype)
end

local opener = openers.file
local loader = loaders.file

local function install(scheme,handler,newthreshold)
    handlers  [scheme] = handler
    loaded    [scheme] = 0
    reused    [scheme] = 0
    finders   [scheme] = finder
    openers   [scheme] = opener
    loaders   [scheme] = loader
    thresholds[scheme] = newthreshold or threshold
end

schemes.install = install

local function http_handler(specification,cachename)
    if inmemory then
        local result = { }
        local status, message = http.request {
            url  = specification.original,
            sink = ltn12.sink.table(result)
        }
        resolvers.savers.directvirtual(cachename,concat(result),true) -- persistent
    else
        local tempname = cachename .. ".tmp"
        local handle   = io.open(tempname,"wb")
        local status, message = http.request {
            url  = specification.original,
            sink = ltn12.sink.file(handle)
        }
        if not status then
            removefile(tempname)
        else
            removefile(cachename)
            renamefile(tempname,cachename)
        end
    end
    return cachename
end

install('http',http_handler)
install('https') -- see pod
install('ftp')

statistics.register("scheme handling time", function()
    local l, r, nl, nr = { }, { }, 0, 0
    for k, v in sortedhash(loaded) do
        if v > 0 then
            nl = nl + 1
            l[nl] = k .. ":" .. v
        end
    end
    for k, v in sortedhash(reused) do
        if v > 0 then
            nr = nr + 1
            r[nr] = k .. ":" .. v
        end
    end
    local n = nl + nr
    if n > 0 then
        if nl == 0 then l = { "none" } end
        if nr == 0 then r = { "none" } end
        return format("%s seconds, %s processed, threshold %s seconds, loaded: %s, reused: %s",
            statistics.elapsedtime(schemes), n, threshold, concat(l," "), concat(l," "))
    else
        return nil
    end
end)

-- We provide a few more helpers:

----- http        = require("socket.http")
local httprequest = http.request
local toquery     = url.toquery

local function fetchstring(url,data)
    local q = data and toquery(data)
    if q then
        url = url .. "?" .. q
    end
    local reply = httprequest(url)
    return reply -- just one argument
end

schemes.fetchstring = fetchstring

function schemes.fetchtable(url,data)
    local reply = fetchstring(url,data)
    if reply then
        local s = load("return " .. reply)
        if s then
            return s()
        end
    end
end