2016-01-12 16:26:00

author: Context Git Mirror Bot <phg42.2a@gmail.com> 2016-01-12 17:15:07 +0100
committer: Context Git Mirror Bot <phg42.2a@gmail.com> 2016-01-12 17:15:07 +0100
commit: 8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree: 94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/data-exp.lua
parent: f5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
download: context-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
1 files changed, 552 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/data-exp.lua b/tex/context/base/mkiv/data-exp.lua
new file mode 100644
index 000000000..19ceb90c3
--- /dev/null
+++ b/tex/context/base/mkiv/data-exp.lua
@@ -0,0 +1,552 @@
+if not modules then modules = { } end modules ['data-exp'] = {
+    version   = 1.001,
+    comment   = "companion to luat-lib.mkiv",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files",
+}
+
+local format, find, gmatch, lower, char, sub = string.format, string.find, string.gmatch, string.lower, string.char, string.sub
+local concat, sort = table.concat, table.sort
+local sortedkeys = table.sortedkeys
+local lpegmatch, lpegpatterns = lpeg.match, lpeg.patterns
+local Ct, Cs, Cc, Carg, P, C, S = lpeg.Ct, lpeg.Cs, lpeg.Cc, lpeg.Carg, lpeg.P, lpeg.C, lpeg.S
+local type, next = type, next
+local isdir = lfs.isdir
+
+local ostype = os.type
+local collapsepath, joinpath, basename = file.collapsepath, file.join, file.basename
+
+local trace_locating   = false  trackers.register("resolvers.locating",   function(v) trace_locating   = v end)
+local trace_expansions = false  trackers.register("resolvers.expansions", function(v) trace_expansions = v end)
+local trace_globbing   = true   trackers.register("resolvers.globbing",   function(v) trace_globbing   = v end)
+
+local report_expansions = logs.reporter("resolvers","expansions")
+local report_globbing   = logs.reporter("resolvers","globbing")
+
+local resolvers     = resolvers
+local resolveprefix = resolvers.resolve
+
+-- As this bit of code is somewhat special it gets its own module. After
+-- all, when working on the main resolver code, I don't want to scroll
+-- past this every time. See data-obs.lua for the gsub variant.
+
+-- local function f_first(a,b)
+--     local t, n = { }, 0
+--     for s in gmatch(b,"[^,]+") do
+--         n = n + 1 ; t[n] = a .. s
+--     end
+--     return concat(t,",")
+-- end
+--
+-- local function f_second(a,b)
+--     local t, n = { }, 0
+--     for s in gmatch(a,"[^,]+") do
+--         n = n + 1 ; t[n] = s .. b
+--     end
+--     return concat(t,",")
+-- end
+
+-- kpsewhich --expand-braces '{a,b}{c,d}'
+-- ac:bc:ad:bd
+
+-- old  {a,b}{c,d} => ac ad bc bd
+--
+-- local function f_both(a,b)
+--     local t, n = { }, 0
+--     for sa in gmatch(a,"[^,]+") do
+--         for sb in gmatch(b,"[^,]+") do
+--             n = n + 1 ; t[n] = sa .. sb
+--         end
+--     end
+--     return concat(t,",")
+-- end
+--
+-- new  {a,b}{c,d} => ac bc ad bd
+
+local function f_both(a,b)
+    local t, n = { }, 0
+    for sb in gmatch(b,"[^,]+") do              -- and not sa
+        for sa in gmatch(a,"[^,]+") do          --         sb
+            n = n + 1 ; t[n] = sa .. sb
+        end
+    end
+    return concat(t,",")
+end
+
+local comma   = P(",")
+local nocomma = (1-comma)^1
+local docomma = comma^1/","
+local before  = Cs((nocomma * Carg(1) + docomma)^0)
+local after   = Cs((Carg(1) * nocomma + docomma)^0)
+local both    = Cs(((C(nocomma) * Carg(1))/function(a,b) return lpegmatch(before,b,1,a) end + docomma)^0)
+
+local function f_first (a,b) return lpegmatch(after, b,1,a) end
+local function f_second(a,b) return lpegmatch(before,a,1,b) end
+local function f_both  (a,b) return lpegmatch(both,  b,1,a) end
+
+-- print(f_first ("a",    "x,y,z"))
+-- print(f_second("a,b,c","x"))
+-- print(f_both  ("a,b,c","x,y,z"))
+
+local left  = P("{")
+local right = P("}")
+local var   = P((1 - S("{}" ))^0)
+local set   = P((1 - S("{},"))^0)
+local other = P(1)
+
+local l_first  = Cs( ( Cc("{") * (C(set) * left * C(var) * right / f_first) * Cc("}")               + other )^0 )
+local l_second = Cs( ( Cc("{") * (left * C(var) * right * C(set) / f_second) * Cc("}")              + other )^0 )
+local l_both   = Cs( ( Cc("{") * (left * C(var) * right * left * C(var) * right / f_both) * Cc("}") + other )^0 )
+local l_rest   = Cs( ( left * var * (left/"") * var * (right/"") * var * right                      + other )^0 )
+
+local stripper_1 = lpeg.stripper ("{}@")
+local replacer_1 = lpeg.replacer { { ",}", ",@}" }, { "{,", "{@," }, }
+
+local function splitpathexpr(str, newlist, validate) -- I couldn't resist lpegging it (nice exercise).
+    if trace_expansions then
+        report_expansions("expanding variable %a",str)
+    end
+    local t, ok, done = newlist or { }, false, false
+    local n = #t
+    str = lpegmatch(replacer_1,str)
+    repeat
+        local old = str
+        repeat
+            local old = str
+            str = lpegmatch(l_first, str)
+        until old == str
+        repeat
+            local old = str
+            str = lpegmatch(l_second,str)
+        until old == str
+        repeat
+            local old = str
+            str = lpegmatch(l_both,  str)
+        until old == str
+        repeat
+            local old = str
+            str = lpegmatch(l_rest,  str)
+        until old == str
+    until old == str -- or not find(str,"{",1,true)
+    str = lpegmatch(stripper_1,str)
+    if validate then
+        for s in gmatch(str,"[^,]+") do
+            s = validate(s)
+            if s then
+                n = n + 1
+                t[n] = s
+            end
+        end
+    else
+        for s in gmatch(str,"[^,]+") do
+            n = n + 1
+            t[n] = s
+        end
+    end
+    if trace_expansions then
+        for k=1,#t do
+            report_expansions("% 4i: %s",k,t[k])
+        end
+    end
+    return t
+end
+
+-- We could make the previous one public.
+
+local function validate(s)
+    s = collapsepath(s) -- already keeps the trailing / and //
+    return s ~= "" and not find(s,"^!*unset/*$") and s
+end
+
+resolvers.validatedpath = validate -- keeps the trailing //
+
+function resolvers.expandedpathfromlist(pathlist)
+    local newlist = { }
+    for k=1,#pathlist do
+        splitpathexpr(pathlist[k],newlist,validate)
+    end
+    return newlist
+end
+
+-- {a,b,c,d}
+-- a,b,c/{p,q,r},d
+-- a,b,c/{p,q,r}/d/{x,y,z}//
+-- a,b,c/{p,q/{x,y,z},r},d/{p,q,r}
+-- a,b,c/{p,q/{x,y,z},r},d/{p,q,r}
+-- a{b,c}{d,e}f
+-- {a,b,c,d}
+-- {a,b,c/{p,q,r},d}
+-- {a,b,c/{p,q,r}/d/{x,y,z}//}
+-- {a,b,c/{p,q/{x,y,z}},d/{p,q,r}}
+-- {a,b,c/{p,q/{x,y,z},w}v,d/{p,q,r}}
+-- {$SELFAUTODIR,$SELFAUTOPARENT}{,{/share,}/texmf{-local,.local,}/web2c}
+
+local usedhomedir = nil
+local donegation  = (P("!") /""     )^0
+local doslashes   = (P("\\")/"/" + 1)^0
+
+local function expandedhome()
+    if not usedhomedir then
+        usedhomedir = lpegmatch(Cs(donegation * doslashes),environment.homedir or "")
+        if usedhomedir == "~" or usedhomedir == "" or not isdir(usedhomedir) then
+            if trace_expansions then
+                report_expansions("no home dir set, ignoring dependent path using current path")
+            end
+            usedhomedir = "."
+        end
+    end
+    return usedhomedir
+end
+
+local dohome  = ((P("~")+P("$HOME")+P("%HOME%"))/expandedhome)^0
+local cleanup = Cs(donegation * dohome * doslashes)
+
+resolvers.cleanpath = function(str)
+    return str and lpegmatch(cleanup,str) or ""
+end
+
+-- print(resolvers.cleanpath(""))
+-- print(resolvers.cleanpath("!"))
+-- print(resolvers.cleanpath("~"))
+-- print(resolvers.cleanpath("~/test"))
+-- print(resolvers.cleanpath("!~/test"))
+-- print(resolvers.cleanpath("~/test~test"))
+
+-- This one strips quotes and funny tokens.
+
+-- we have several options here:
+--
+-- expandhome = P("~") / "$HOME"              : relocateble
+-- expandhome = P("~") / "home:"              : relocateble
+-- expandhome = P("~") / environment.homedir  : frozen but unexpanded
+-- expandhome = P("~") = dohome               : frozen and expanded
+
+local expandhome = P("~") / "$HOME"
+
+local dodouble = P('"') / "" * (expandhome + (1 - P('"')))^0 * P('"') / ""
+local dosingle = P("'") / "" * (expandhome + (1 - P("'")))^0 * P("'") / ""
+local dostring =               (expandhome +  1              )^0
+
+local stripper = Cs(
+    lpegpatterns.unspacer * (dosingle + dodouble + dostring) * lpegpatterns.unspacer
+)
+
+function resolvers.checkedvariable(str) -- assumes str is a string
+    return type(str) == "string" and lpegmatch(stripper,str) or str
+end
+
+-- The path splitter:
+
+-- A config (optionally) has the paths split in tables. Internally
+-- we join them and split them after the expansion has taken place. This
+-- is more convenient.
+
+local cache = { }
+
+----- splitter = lpeg.tsplitat(S(ostype == "windows" and ";" or ":;")) -- maybe add ,
+local splitter = lpeg.tsplitat(";") -- as we move towards urls, prefixes and use tables we no longer do :
+
+local backslashswapper = lpeg.replacer("\\","/")
+
+local function splitconfigurationpath(str) -- beware, this can be either a path or a { specification }
+    if str then
+        local found = cache[str]
+        if not found then
+            if str == "" then
+                found = { }
+            else
+                local split = lpegmatch(splitter,lpegmatch(backslashswapper,str)) -- can be combined
+                found = { }
+                local noffound = 0
+                for i=1,#split do
+                    local s = split[i]
+                    if not find(s,"^{*unset}*") then
+                        noffound = noffound + 1
+                        found[noffound] = s
+                    end
+                end
+                if trace_expansions then
+                    report_expansions("splitting path specification %a",str)
+                    for k=1,noffound do
+                        report_expansions("% 4i: %s",k,found[k])
+                    end
+                end
+                cache[str] = found
+            end
+        end
+        return found
+    end
+end
+
+resolvers.splitconfigurationpath = splitconfigurationpath
+
+function resolvers.splitpath(str)
+    if type(str) == 'table' then
+        return str
+    else
+        return splitconfigurationpath(str)
+    end
+end
+
+function resolvers.joinpath(str)
+    if type(str) == 'table' then
+        return joinpath(str)
+    else
+        return str
+    end
+end
+
+-- The next function scans directories and returns a hash where the
+-- entries are either strings or tables.
+--
+-- starting with . or .. etc or funny char
+--
+-- local l_forbidden = S("~`!#$%^&*()={}[]:;\"\'||\\/<>,?\n\r\t")
+-- local l_confusing = P(" ")
+-- local l_character = lpegpatterns.utf8
+-- local l_dangerous = P(".")
+--
+-- local l_normal = (l_character - l_forbidden - l_confusing - l_dangerous) * (l_character - l_forbidden - l_confusing^2)^0 * P(-1)
+-- ----- l_normal = l_normal * Cc(true) + Cc(false)
+--
+-- local function test(str)
+--     print(str,lpegmatch(l_normal,str))
+-- end
+-- test("ヒラギノ明朝 Pro W3")
+-- test("..ヒラギノ明朝 Pro W3")
+-- test(":ヒラギノ明朝 Pro W3;")
+-- test("ヒラギノ明朝 /Pro W3;")
+-- test("ヒラギノ明朝 Pro  W3")
+
+-- a lot of this caching can be stripped away when we have ssd's everywhere
+--
+-- we could cache all the (sub)paths here if needed
+
+local attributes, directory = lfs.attributes, lfs.dir
+
+local weird          = P(".")^1 + lpeg.anywhere(S("~`!#$%^&*()={}[]:;\"\'||<>,?\n\r\t"))
+local lessweird      = P(".")^1 + lpeg.anywhere(S("~`#$%^&*:;\"\'||<>,?\n\r\t"))
+local timer          = { }
+local scanned        = { }
+local nofscans       = 0
+local scancache      = { }
+local fullcache      = { }
+----- simplecache    = { }
+local nofsharedscans = 0
+
+-- So, we assume either a lowercase name or a mixed case one but only one such case
+-- as having Foo fOo foo FoO FOo etc on the system is braindead in any sane project.
+
+local function scan(files,remap,spec,path,n,m,r,onlyone,tolerant)
+    local full    = path == "" and spec or (spec .. path .. '/')
+    local dirs    = { }
+    local nofdirs = 0
+    local pattern = tolerant and lessweird or weird
+    for name in directory(full) do
+        if not lpegmatch(pattern,name) then
+            local mode = attributes(full..name,"mode")
+            if mode == "file" then
+                n = n + 1
+                local lower = lower(name)
+                local paths = files[lower]
+                if paths then
+                    if onlyone then
+                        -- forget about it
+                    else
+                        if type(paths) == "string" then
+                            files[lower] = { paths, path }
+                        else
+                            paths[#paths+1] = path
+                        end
+                        if name ~= lower then
+                            local rl = remap[lower]
+                            if not rl then
+                                remap[lower] = name
+                                r = r + 1
+                            elseif trace_globbing and rl ~= name then
+                                report_globbing("confusing filename, name: %a, lower: %a, already: %a",name,lower,rl)
+                            end
+                        end
+                    end
+                else -- probably unique anyway
+                    files[lower] = path
+                    if name ~= lower then
+                        local rl = remap[lower]
+                        if not rl then
+                            remap[lower] = name
+                            r = r + 1
+                        elseif trace_globbing and rl ~= name then
+                            report_globbing("confusing filename, name: %a, lower: %a, already: %a",name,lower,rl)
+                        end
+                    end
+                end
+            elseif mode == "directory" then
+                m = m + 1
+                nofdirs = nofdirs + 1
+                if path ~= "" then
+                    dirs[nofdirs] = path .. "/" .. name
+                else
+                    dirs[nofdirs] = name
+                end
+            end
+        end
+    end
+    if nofdirs > 0 then
+        sort(dirs)
+        for i=1,nofdirs do
+            files, remap, n, m, r = scan(files,remap,spec,dirs[i],n,m,r,onlyonce,tolerant)
+        end
+    end
+    scancache[sub(full,1,-2)] = files
+    return files, remap, n, m, r
+end
+
+function resolvers.scanfiles(path,branch,usecache,onlyonce,tolerant)
+    local realpath = resolveprefix(path)
+    if usecache then
+        local content = fullcache[realpath]
+        if content then
+            if trace_locating then
+                report_expansions("using cached scan of path %a, branch %a",path,branch or path)
+            end
+            nofsharedscans = nofsharedscans + 1
+            return content
+        end
+    end
+    --
+    statistics.starttiming(timer)
+    if trace_locating then
+        report_expansions("scanning path %a, branch %a",path,branch or path)
+    end
+    local content
+    if isdir(realpath) then
+        local files, remap, n, m, r = scan({ },{ },realpath .. '/',"",0,0,0,onlyonce,tolerant)
+        content = {
+            metadata = {
+                path        = path, -- can be selfautoparent:texmf-whatever
+                files       = n,
+                directories = m,
+                remappings  = r,
+            },
+            files = files,
+            remap = remap,
+        }
+        if trace_locating then
+            report_expansions("%s files found on %s directories with %s uppercase remappings",n,m,r)
+        end
+    else
+        content = {
+            metadata = {
+                path        = path, -- can be selfautoparent:texmf-whatever
+                files       = 0,
+                directories = 0,
+                remappings  = 0,
+            },
+            files = { },
+            remap = { },
+        }
+        if trace_locating then
+            report_expansions("invalid path %a",realpath)
+        end
+    end
+    if usecache then
+        scanned[#scanned+1] = realpath
+        fullcache[realpath] = content
+    end
+    nofscans = nofscans + 1
+    statistics.stoptiming(timer)
+    return content
+end
+
+function resolvers.simplescanfiles(path,branch,usecache)
+    return resolvers.scanfiles(path,branch,usecache,true,true) -- onlyonce
+end
+
+function resolvers.scandata()
+    table.sort(scanned)
+    return {
+        n      = nofscans,
+        shared = nofsharedscans,
+        time   = statistics.elapsedtime(timer),
+        paths  = scanned,
+    }
+end
+
+function resolvers.get_from_content(content,path,name) -- or (content,name)
+    if not content then
+        return
+    end
+    local files = content.files
+    if not files then
+        return
+    end
+    local remap = content.remap
+    if not remap then
+        return
+    end
+    if name then
+        -- this one resolves a remapped name
+        local used = lower(name)
+        return path, remap[used] or used
+    else
+        -- this one does a lookup and resolves a remapped name
+        local name = path
+        local used = lower(name)
+        local path = files[used]
+        if path then
+            return path, remap[used] or used
+        end
+    end
+end
+
+local nothing = function() end
+
+-- function resolvers.filtered_from_content(content,pattern)
+--     if content and type(pattern) == "string" then
+--         local pattern = lower(pattern)
+--         local files   = content.files
+--         local remap   = content.remap
+--         if files and remap then
+--             local n = next(files)
+--             local function iterator()
+--                 while n do
+--                     local k = n
+--                     n = next(files,k)
+--                     if find(k,pattern) then
+--                         return files[k], remap and remap[k] or k
+--                     end
+--                 end
+--             end
+--             return iterator
+--         end
+--     end
+--     return nothing
+-- end
+
+function resolvers.filtered_from_content(content,pattern)
+    if content and type(pattern) == "string" then
+        local pattern = lower(pattern)
+        local files   = content.files -- we could store the sorted list
+        local remap   = content.remap
+        if files and remap then
+            local f = sortedkeys(files)
+            local n = #f
+            local i = 0
+            local function iterator()
+                while i < n do
+                    i = i + 1
+                    local k = f[i]
+                    if find(k,pattern) then
+                        return files[k], remap and remap[k] or k
+                    end
+                end
+            end
+            return iterator
+        end
+    end
+    return nothing
+end
+
+-- inspect(resolvers.simplescanfiles("e:/temporary/mb-mp"))
+-- inspect(resolvers.scanfiles("e:/temporary/mb-mp"))
author	Context Git Mirror Bot <phg42.2a@gmail.com>	2016-01-12 17:15:07 +0100
committer	Context Git Mirror Bot <phg42.2a@gmail.com>	2016-01-12 17:15:07 +0100
commit	8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree	94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/data-exp.lua
parent	f5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
download	context-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz