summaryrefslogtreecommitdiff
path: root/tex/context/base/mkxl/data-hsh.lmt
blob: 0a2d94f8190048b2323d080014ffee03f6695ee1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
-- only lmt because the backend code doesn't deal with it and it makes
-- no sense to waste time on that for mkiv

if not modules then modules = { } end modules ['data-hsh'] = {
    version   = 0.002,
    comment   = "companion to luat-lib.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

-- todo: options
--
-- lowercase
-- cleanupnames (normalize)
-- use database from project tree

local type = type
local gsub = string.gsub
local addsuffix, basename, pathpart, filesuffix, filesize = file.addsuffix, file.basename, file.pathpart, file.suffix, file.size
local loadtable, savetable = table.load, table.save
local loaddata, savedata, open = io.loaddata, io.savedata, io.open

local trace_hashed  = false
local report_hashed = logs.reporter("resolvers","hashed")

trackers.register("resolvers.locating", function(v) trace_hashed = v end)
trackers.register("resolvers.hashed",   function(v) trace_hashed = v end)

-- we can have a virtual file: open at the position, make sure read and seek don't
-- go beyond the boundaries

local resolvers = resolvers
local finders   = resolvers.finders
local openers   = resolvers.openers
local loaders   = resolvers.loaders

local ordered = { }
local hashed  = { }
local version = 0.002

-- local lowercase = characters.lower

local function showstatus(database,metadata)
    report_hashed("database %a, %i paths, %i names, %i unique blobs, %i compressed blobs",
        database, metadata.nofpaths, metadata.nofnames, metadata.nofblobs, metadata.nofcompressed
    )
end

local function validhashed(database)
    local found = hashed[database]
    if found then
        return found
    else
        local metaname = addsuffix(database,"lua")
        local dataname = addsuffix(database,"dat")
        local metadata = loadtable(metaname)
        if type(metadata) ~= "table" then
            report_hashed("invalid database %a",metaname)
        elseif metadata.version ~= version then
            report_hashed("version mismatch in database %a",metaname)
        elseif not lfs.isfile(dataname) then
            report_hashed("missing data data file for %a",metaname)
        else
            return {
                database = database,
                metadata = metadata,
                dataname = dataname,
            }
        end
    end
end

local function registerhashed(database)
    if not hashed[database] then
        local valid = validhashed(database)
        if valid then
            ordered[#ordered + 1] = valid
            hashed[database] = ordered[#ordered]
            showstatus(database,valid.metadata)
        end
    end
end

local registerfilescheme  do

    local findfile = finders.file

    local list = { }
    local done = { }
    local hash = { }

    registerfilescheme = function(name)
        if not done[name] then
            list[#list+1] = name
            done[name]    = true
        end
    end

    -- why does the finder not remember ?

    function finders.file(specification,filetype)
        if type(specification) == "table" then
            local original = specification.original
         -- print(original)
            if original then
                local found = hash[original]
                if found == nil then
                    for i=1,#list do
                        local scheme = list[i]
                        local found  = finders[scheme](specification,filetype)
                        if found then
                            hash[original] = found
                            if trace_hashed then
                                report_hashed("found by auto scheme %s: %s",scheme,found)
                            end
                            return found
                        end
                    end
                    local found = findfile(specification,filetype)
                    if found then
                        hash[original] = found
                        if trace_hashed then
                            report_hashed("found by normal file scheme: %s",found)
                        end
                        return found
                    end
                    hash[original] = false
                elseif found then
                    return found
                end
                return false
            else
                -- something is wrong here, maybe we should trace it (scheme can be "unknown")
            end
        end
        -- again, something is wrong
        return findfile(specification,filetype)
    end

end

finders.helpers.validhashed        = validhashed
finders.helpers.registerhashed     = registerhashed
finders.helpers.registerfilescheme = registerfilescheme

local function locate(found,path,name)
    local files  = found.metadata.files
    local hashes = found.metadata.hashes
    local fp = files[path]
    local hash = fp and fp[name]
    if hash and hashes[hash] then
        return hash
    end
end

local function locatehash(filename,database)
    if filename then
        local name = basename(filename)
        local path = pathpart(filename)
        local hash = false
        if database then
            local found = hashed[database]
            if found then
                hash = locate(found,path,name), database, path, name
            end
        else
            for i=1,#ordered do
                local found = ordered[i]
                hash = locate(found,path,name)
                if hash then
                    database = found.database
                    break
                end
            end
        end
        if hash then
            return {
                hash = hash,
                name = name,
                path = path,
                base = database,
            }
        end
    end
end

-- no caching yet, we don't always want the file and it's fast enough

local function locateblob(filename,database)
    local found = locatehash(filename,database)
    if found then
        local database = found.base
        local data     = hashed[database]
        if data then
            local metadata = data.metadata
            local dataname = data.dataname
            local hashes   = metadata.hashes
            local blobdata = hashes[found.hash]
            if blobdata and dataname then
                local position = blobdata.position
                local f = open(dataname,"rb")
                if f then
                    f:seek("set",position)
                    local blob = f:read(blobdata.datasize)
                    if blobdata.compress == "zip" then
                        blob = zlib.decompresssize(blob,blobdata.filesize)
                    end
                    return blob
                end
            end
        end
    end
end

local finders  = resolvers.finders
local notfound = finders.notfound

function finders.hashed(specification)
    local original = specification.original
    local fullpath = specification.path
    if fullpath then
        local found = locatehash(fullpath)
        if found then
            if trace_hashed then
                report_hashed("finder: file %a found",original)
            end
            return original
        end
    end
    if trace_hashed then
        report_hashed("finder: unknown file %a",original)
    end
    return notfound()
end

local notfound   = openers.notfound
local textopener = openers.helpers.textopener

function openers.hashed(specification)
    local original = specification.original
    local fullpath = specification.path
    if fullpath then
        local found = locateblob(fullpath)
        if found then
            if trace_hashed then
                report_hashed("finder: file %a found",original)
            end
            return textopener("hashed",original,found,"utf-8")
        end
    end
    if trace_hashed then
        report_hashed("finder: unknown file %a",original)
    end
    return notfound()
end

local notfound = loaders.notfound

function loaders.hashed(specification)
    local original = specification.original
    local fullpath = specification.path
    if fullpath then
        local found = locateblob(fullpath)
        if found then
            if trace_hashed then
                report_hashed("finder: file %a found",original)
            end
            return true, found, found and #found or 0
        end
    end
    if trace_hashed then
        report_hashed("finder: unknown file %a",original)
    end
    return notfound()
end

-- this actually could end up in the generate namespace but it is not
-- really a 'generic' feature, more a module (at least for now)

local calculatehash = sha2.HEX256 -- md5.HEX is not unique enough

function resolvers.finders.helpers.createhashed(specification)
    local database = specification.database
    local patterns = specification.patterns
    if not patterns then
        local pattern = specification.pattern
        if pattern then
            patterns = {
                {
                    pattern  = pattern,
                    compress = specification.compress,
                }
            }
        end
    end
    local datname  = addsuffix(database,"dat")
    local luaname  = addsuffix(database,"lua")
    local metadata = loadtable(luaname)
    if type(metadata) ~= "table" then
        metadata = false
    elseif metadata.kind == "hashed" and metadata.version ~= version then
        report_hashed("version mismatch, starting with new table")
        metadata = false
    end
    if not metadata then
        metadata = {
            version       = version,
            kind          = "hashed",
            files         = { },
            hashes        = { },
            nofnames      = 0,
            nofpaths      = 0,
            nofblobs      = 0,
            nofcompressed = 0,
        }
    end
    local files         = metadata.files
    local hashes        = metadata.hashes
    local nofpaths      = metadata.nofpaths
    local nofnames      = metadata.nofnames
    local nofblobs      = metadata.nofblobs
    local nofcompressed = metadata.nofcompressed
    if type(patterns) == "table" then
        for i=1,#patterns do
            local pattern = patterns[i].pattern
            if pattern then
                local compress = patterns[i].compress
                local list     = dir.glob(pattern)
                local total    = #list
                report_hashed("database %a, adding pattern %a, compression %l",database,pattern,compress)
                for i=1,total do
                    local filename = list[i]
                    local name     = basename(filename)
                    local path     = pathpart(filename)
                    local data     = loaddata(filename)
                    -- cleanup
                    path = gsub(path,"^[./]*","")
                    --
                    if data then
                        local fp = files[path]
                        if not fp then
                            fp = { }
                            files[path] = fp
                            nofpaths = nofpaths + 1
                        end
                        local ff = fp[name]
                        if not ff then
                            local hash = calculatehash(data)
                            if not hashes[hash] then
                                local size = #data
                                if compress then
                                    data = zlib.compresssize(data,size)
                                    nofcompressed = nofcompressed + 1
                                end
                                local position = filesize(datname)
                                savedata(datname,data,"",true)
                                hashes[hash] = {
                                    filesize = size,
                                    datasize = #data,
                                    compress = compress and "zip",
                                    position = position,
                                }
                                nofblobs = nofblobs + 1
                            end
                            fp[name] = hash
                            nofnames = nofnames + 1
                        end
                    end
                end
            end
        end
    end
    metadata.nofpaths      = nofpaths
    metadata.nofnames      = nofnames
    metadata.nofblobs      = nofblobs
    metadata.nofcompressed = nofcompressed
    savetable(luaname, metadata)
    showstatus(database,metadata)
    return metadata
end