1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
|
-- only lmt because the backend code doesn't deal with it and it makes
-- no sense to waste time on that for mkiv
if not modules then modules = { } end modules ['data-hsh'] = {
version = 0.002,
comment = "companion to luat-lib.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files"
}
-- todo: options
--
-- lowercase
-- cleanupnames (normalize)
-- use database from project tree
local type = type
local gsub = string.gsub
local addsuffix, basename, pathpart, filesuffix, filesize = file.addsuffix, file.basename, file.pathpart, file.suffix, file.size
local loadtable, savetable = table.load, table.save
local loaddata, savedata, open = io.loaddata, io.savedata, io.open
local trace_hashed = false
local report_hashed = logs.reporter("resolvers","hashed")
trackers.register("resolvers.locating", function(v) trace_hashed = v end)
trackers.register("resolvers.hashed", function(v) trace_hashed = v end)
-- we can have a virtual file: open at the position, make sure read and seek don't
-- go beyond the boundaries
local resolvers = resolvers
local finders = resolvers.finders
local openers = resolvers.openers
local loaders = resolvers.loaders
local ordered = { }
local hashed = { }
local version = 0.002
-- local lowercase = characters.lower
local function showstatus(database,metadata)
report_hashed("database %a, %i paths, %i names, %i unique blobs, %i compressed blobs",
database, metadata.nofpaths, metadata.nofnames, metadata.nofblobs, metadata.nofcompressed
)
end
local function validhashed(database)
local found = hashed[database]
if found then
return found
else
local metaname = addsuffix(database,"lua")
local dataname = addsuffix(database,"dat")
local metadata = loadtable(metaname)
if type(metadata) ~= "table" then
report_hashed("invalid database %a",metaname)
elseif metadata.version ~= version then
report_hashed("version mismatch in database %a",metaname)
elseif not lfs.isfile(dataname) then
report_hashed("missing data data file for %a",metaname)
else
return {
database = database,
metadata = metadata,
dataname = dataname,
}
end
end
end
local function registerhashed(database)
if not hashed[database] then
local valid = validhashed(database)
if valid then
ordered[#ordered + 1] = valid
hashed[database] = ordered[#ordered]
showstatus(database,valid.metadata)
end
end
end
local registerfilescheme do
local findfile = finders.file
local list = { }
local done = { }
local hash = { }
registerfilescheme = function(name)
if not done[name] then
list[#list+1] = name
done[name] = true
end
end
-- why does the finder not remember ?
function finders.file(specification,filetype)
if type(specification) == "table" then
local original = specification.original
-- print(original)
if original then
local found = hash[original]
if found == nil then
for i=1,#list do
local scheme = list[i]
local found = finders[scheme](specification,filetype)
if found then
hash[original] = found
if trace_hashed then
report_hashed("found by auto scheme %s: %s",scheme,found)
end
return found
end
end
local found = findfile(specification,filetype)
if found then
hash[original] = found
if trace_hashed then
report_hashed("found by normal file scheme: %s",found)
end
return found
end
hash[original] = false
elseif found then
return found
end
return false
else
-- something is wrong here, maybe we should trace it (scheme can be "unknown")
end
end
-- again, something is wrong
return findfile(specification,filetype)
end
end
finders.helpers.validhashed = validhashed
finders.helpers.registerhashed = registerhashed
finders.helpers.registerfilescheme = registerfilescheme
local function locate(found,path,name)
local files = found.metadata.files
local hashes = found.metadata.hashes
local fp = files[path]
local hash = fp and fp[name]
if hash and hashes[hash] then
return hash
end
end
local function locatehash(filename,database)
if filename then
local name = basename(filename)
local path = pathpart(filename)
local hash = false
if database then
local found = hashed[database]
if found then
hash = locate(found,path,name), database, path, name
end
else
for i=1,#ordered do
local found = ordered[i]
hash = locate(found,path,name)
if hash then
database = found.database
break
end
end
end
if hash then
return {
hash = hash,
name = name,
path = path,
base = database,
}
end
end
end
-- no caching yet, we don't always want the file and it's fast enough
local function locateblob(filename,database)
local found = locatehash(filename,database)
if found then
local database = found.base
local data = hashed[database]
if data then
local metadata = data.metadata
local dataname = data.dataname
local hashes = metadata.hashes
local blobdata = hashes[found.hash]
if blobdata and dataname then
local position = blobdata.position
local f = open(dataname,"rb")
if f then
f:seek("set",position)
local blob = f:read(blobdata.datasize)
if blobdata.compress == "zip" then
blob = zlib.decompresssize(blob,blobdata.filesize)
end
return blob
end
end
end
end
end
local finders = resolvers.finders
local notfound = finders.notfound
function finders.hashed(specification)
local original = specification.original
local fullpath = specification.path
if fullpath then
local found = locatehash(fullpath)
if found then
if trace_hashed then
report_hashed("finder: file %a found",original)
end
return original
end
end
if trace_hashed then
report_hashed("finder: unknown file %a",original)
end
return notfound()
end
local notfound = openers.notfound
local textopener = openers.helpers.textopener
function openers.hashed(specification)
local original = specification.original
local fullpath = specification.path
if fullpath then
local found = locateblob(fullpath)
if found then
if trace_hashed then
report_hashed("finder: file %a found",original)
end
return textopener("hashed",original,found,"utf-8")
end
end
if trace_hashed then
report_hashed("finder: unknown file %a",original)
end
return notfound()
end
local notfound = loaders.notfound
function loaders.hashed(specification)
local original = specification.original
local fullpath = specification.path
if fullpath then
local found = locateblob(fullpath)
if found then
if trace_hashed then
report_hashed("finder: file %a found",original)
end
return true, found, found and #found or 0
end
end
if trace_hashed then
report_hashed("finder: unknown file %a",original)
end
return notfound()
end
-- this actually could end up in the generate namespace but it is not
-- really a 'generic' feature, more a module (at least for now)
local calculatehash = sha2.HEX256 -- md5.HEX is not unique enough
function resolvers.finders.helpers.createhashed(specification)
local database = specification.database
local patterns = specification.patterns
if not patterns then
local pattern = specification.pattern
if pattern then
patterns = {
{
pattern = pattern,
compress = specification.compress,
}
}
end
end
local datname = addsuffix(database,"dat")
local luaname = addsuffix(database,"lua")
local metadata = loadtable(luaname)
if type(metadata) ~= "table" then
metadata = false
elseif metadata.kind == "hashed" and metadata.version ~= version then
report_hashed("version mismatch, starting with new table")
metadata = false
end
if not metadata then
metadata = {
version = version,
kind = "hashed",
files = { },
hashes = { },
nofnames = 0,
nofpaths = 0,
nofblobs = 0,
nofcompressed = 0,
}
end
local files = metadata.files
local hashes = metadata.hashes
local nofpaths = metadata.nofpaths
local nofnames = metadata.nofnames
local nofblobs = metadata.nofblobs
local nofcompressed = metadata.nofcompressed
if type(patterns) == "table" then
for i=1,#patterns do
local pattern = patterns[i].pattern
if pattern then
local compress = patterns[i].compress
local list = dir.glob(pattern)
local total = #list
report_hashed("database %a, adding pattern %a, compression %l",database,pattern,compress)
for i=1,total do
local filename = list[i]
local name = basename(filename)
local path = pathpart(filename)
local data = loaddata(filename)
-- cleanup
path = gsub(path,"^[./]*","")
--
if data then
local fp = files[path]
if not fp then
fp = { }
files[path] = fp
nofpaths = nofpaths + 1
end
local ff = fp[name]
if not ff then
local hash = calculatehash(data)
if not hashes[hash] then
local size = #data
if compress then
data = zlib.compresssize(data,size)
nofcompressed = nofcompressed + 1
end
local position = filesize(datname)
savedata(datname,data,"",true)
hashes[hash] = {
filesize = size,
datasize = #data,
compress = compress and "zip",
position = position,
}
nofblobs = nofblobs + 1
end
fp[name] = hash
nofnames = nofnames + 1
end
end
end
end
end
end
metadata.nofpaths = nofpaths
metadata.nofnames = nofnames
metadata.nofblobs = nofblobs
metadata.nofcompressed = nofcompressed
savetable(luaname, metadata)
showstatus(database,metadata)
return metadata
end
|