diff options
| -rw-r--r-- | NEWS | 5 | ||||
| -rw-r--r-- | README | 10 | ||||
| -rw-r--r-- | lualibs-aux.lua | 257 | ||||
| -rw-r--r-- | lualibs-boolean.lua | 66 | ||||
| -rw-r--r-- | lualibs-dir.lua | 314 | ||||
| -rw-r--r-- | lualibs-file.lua | 726 | ||||
| -rw-r--r-- | lualibs-function.lua | 11 | ||||
| -rw-r--r-- | lualibs-io.lua | 237 | ||||
| -rw-r--r-- | lualibs-lpeg.lua | 829 | ||||
| -rw-r--r-- | lualibs-lua.lua | 393 | ||||
| -rw-r--r-- | lualibs-math.lua | 27 | ||||
| -rw-r--r-- | lualibs-md5.lua | 91 | ||||
| -rw-r--r-- | lualibs-number.lua | 185 | ||||
| -rw-r--r-- | lualibs-os.lua | 267 | ||||
| -rw-r--r-- | lualibs-set.lua | 7 | ||||
| -rw-r--r-- | lualibs-string.lua | 399 | ||||
| -rw-r--r-- | lualibs-table.lua | 1215 | ||||
| -rw-r--r-- | lualibs-unicode.lua | 981 | ||||
| -rw-r--r-- | lualibs-url.lua | 338 | ||||
| -rw-r--r-- | lualibs-util-dim.lua (renamed from lualibs-dimen.lua) | 213 | ||||
| -rw-r--r-- | lualibs-util-jsn.lua | 145 | ||||
| -rw-r--r-- | lualibs-util-lua.lua | 351 | ||||
| -rw-r--r-- | lualibs-util-mrg.lua | 221 | ||||
| -rw-r--r-- | lualibs-util-sto.lua | 189 | ||||
| -rw-r--r-- | lualibs-util-str.lua | 766 | ||||
| -rw-r--r-- | lualibs-util-tab.lua | 493 | ||||
| -rw-r--r-- | lualibs-utils.lua | 176 | ||||
| -rw-r--r-- | lualibs.dtx | 85 | ||||
| -rw-r--r-- | lualibs.lua | 54 | 
29 files changed, 7109 insertions, 1942 deletions
| @@ -1,4 +1,9 @@                          History of the lualibs package +2012/10/19 v0.9/ +    * sync with ConTeXt beta 2012.10.17 +    * move some files to util-* prefix +    * add util-sto util-lua util-sto util-jsn +  2011/01/20 v0.96      * Fix computability with lfs in luatex 0.65 @@ -32,9 +32,7 @@ Manifest  Source files:      lualibs.dtx -    lualibs-aux.lua      lualibs-boolean.lua -    lualibs-dimen.lua      lualibs-dir.lua      lualibs-file.lua      lualibs-io.lua @@ -48,7 +46,13 @@ Source files:      lualibs-table.lua      lualibs-unicode.lua      lualibs-url.lua -    lualibs-utils.lua +    lualibs-util-dim.lua +    lualibs-util-jsn.lua +    lualibs-util-lua.lua +    lualibs-util-mrg.lua +    lualibs-util-sto.lua +    lualibs-util-str.lua +    lualibs-util-tab.lua      README      Makefile      NEWS diff --git a/lualibs-aux.lua b/lualibs-aux.lua deleted file mode 100644 index 7950a03..0000000 --- a/lualibs-aux.lua +++ /dev/null @@ -1,257 +0,0 @@ -if not modules then modules = { } end modules ['l-aux'] = { -    version   = 1.001, -    comment   = "companion to luat-lib.mkiv", -    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", -    copyright = "PRAGMA ADE / ConTeXt Development Team", -    license   = "see context related readme files" -} - --- for inline, no store split : for s in string.gmatch(str,",* *([^,]+)") do .. end - -aux = aux or { } - -local concat, format, gmatch = table.concat, string.format, string.gmatch -local tostring, type = tostring, type -local lpegmatch = lpeg.match - -local P, R, V = lpeg.P, lpeg.R, lpeg.V - -local escape, left, right = P("\\"), P('{'), P('}') - -lpeg.patterns.balanced = P { -    [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, -    [2] = left * V(1) * right -} - -local space     = lpeg.P(' ') -local equal     = lpeg.P("=") -local comma     = lpeg.P(",") -local lbrace    = lpeg.P("{") -local rbrace    = lpeg.P("}") -local nobrace   = 1 - (lbrace+rbrace) -local nested    = lpeg.P { lbrace * (nobrace + lpeg.V(1))^0 * rbrace } -local spaces    = space^0 - -local value     = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) - -local key       = lpeg.C((1-equal-comma)^1) -local pattern_a = (space+comma)^0 * (key * equal * value + key * lpeg.C("")) -local pattern_c = (space+comma)^0 * (key * equal * value) - -local key       = lpeg.C((1-space-equal-comma)^1) -local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + lpeg.C(""))) - --- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored - -local hash = { } - -local function set(key,value) -- using Carg is slower here -    hash[key] = value -end - -local pattern_a_s = (pattern_a/set)^1 -local pattern_b_s = (pattern_b/set)^1 -local pattern_c_s = (pattern_c/set)^1 - -aux.settings_to_hash_pattern_a = pattern_a_s -aux.settings_to_hash_pattern_b = pattern_b_s -aux.settings_to_hash_pattern_c = pattern_c_s - -function aux.make_settings_to_hash_pattern(set,how) -    if how == "strict" then -        return (pattern_c/set)^1 -    elseif how == "tolerant" then -        return (pattern_b/set)^1 -    else -        return (pattern_a/set)^1 -    end -end - -function aux.settings_to_hash(str,existing) -    if str and str ~= "" then -        hash = existing or { } -        if moretolerant then -            lpegmatch(pattern_b_s,str) -        else -            lpegmatch(pattern_a_s,str) -        end -        return hash -    else -        return { } -    end -end - -function aux.settings_to_hash_tolerant(str,existing) -    if str and str ~= "" then -        hash = existing or { } -        lpegmatch(pattern_b_s,str) -        return hash -    else -        return { } -    end -end - -function aux.settings_to_hash_strict(str,existing) -    if str and str ~= "" then -        hash = existing or { } -        lpegmatch(pattern_c_s,str) -        return next(hash) and hash -    else -        return nil -    end -end - -local separator = comma * space^0 -local value     = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) -local pattern   = lpeg.Ct(value*(separator*value)^0) - --- "aap, {noot}, mies" : outer {} removes, leading spaces ignored - -aux.settings_to_array_pattern = pattern - --- we could use a weak table as cache - -function aux.settings_to_array(str) -    if not str or str == "" then -        return { } -    else -        return lpegmatch(pattern,str) -    end -end - -local function set(t,v) -    t[#t+1] = v -end - -local value   = lpeg.P(lpeg.Carg(1)*value) / set -local pattern = value*(separator*value)^0 * lpeg.Carg(1) - -function aux.add_settings_to_array(t,str) -    return lpegmatch(pattern,str,nil,t) -end - -function aux.hash_to_string(h,separator,yes,no,strict,omit) -    if h then -        local t, s = { }, table.sortedkeys(h) -        omit = omit and table.tohash(omit) -        for i=1,#s do -            local key = s[i] -            if not omit or not omit[key] then -                local value = h[key] -                if type(value) == "boolean" then -                    if yes and no then -                        if value then -                            t[#t+1] = key .. '=' .. yes -                        elseif not strict then -                            t[#t+1] = key .. '=' .. no -                        end -                    elseif value or not strict then -                        t[#t+1] = key .. '=' .. tostring(value) -                    end -                else -                    t[#t+1] = key .. '=' .. value -                end -            end -        end -        return concat(t,separator or ",") -    else -        return "" -    end -end - -function aux.array_to_string(a,separator) -    if a then -        return concat(a,separator or ",") -    else -        return "" -    end -end - -function aux.settings_to_set(str,t) -    t = t or { } -    for s in gmatch(str,"%s*([^,]+)") do -        t[s] = true -    end -    return t -end - -local value     = lbrace * lpeg.C((nobrace + nested)^0) * rbrace -local pattern   = lpeg.Ct((space + value)^0) - -function aux.arguments_to_table(str) -    return lpegmatch(pattern,str) -end - --- temporary here - -function aux.getparameters(self,class,parentclass,settings) -    local sc = self[class] -    if not sc then -        sc = table.clone(self[parent]) -        self[class] = sc -    end -    aux.settings_to_hash(settings,sc) -end - --- temporary here - -local digit         = lpeg.R("09") -local period        = lpeg.P(".") -local zero          = lpeg.P("0") -local trailingzeros = zero^0 * -digit -- suggested by Roberto R -local case_1        = period * trailingzeros / "" -local case_2        = period * (digit - trailingzeros)^1 * (trailingzeros / "") -local number        = digit^1 * (case_1 + case_2) -local stripper      = lpeg.Cs((number + 1)^0) - ---~ local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100" ---~ collectgarbage("collect") ---~ str = string.rep(sample,10000) ---~ local ts = os.clock() ---~ lpegmatch(stripper,str) ---~ print(#str, os.clock()-ts, lpegmatch(stripper,sample)) - -lpeg.patterns.strip_zeros = stripper - -function aux.strip_zeros(str) -    return lpegmatch(stripper,str) -end - -function aux.definetable(target) -- defines undefined tables -    local composed, t = nil, { } -    for name in gmatch(target,"([^%.]+)") do -        if composed then -            composed = composed .. "." .. name -        else -            composed = name -        end -        t[#t+1] = format("%s = %s or { }",composed,composed) -    end -    return concat(t,"\n") -end - -function aux.accesstable(target) -    local t = _G -    for name in gmatch(target,"([^%.]+)") do -        t = t[name] -    end -    return t -end - --- as we use this a lot ... - ---~ function aux.cachefunction(action,weak) ---~     local cache = { } ---~     if weak then ---~         setmetatable(cache, { __mode = "kv" } ) ---~     end ---~     local function reminder(str) ---~         local found = cache[str] ---~         if not found then ---~             found = action(str) ---~             cache[str] = found ---~         end ---~         return found ---~     end ---~     return reminder, cache ---~ end diff --git a/lualibs-boolean.lua b/lualibs-boolean.lua index be7ec7d..f087f1a 100644 --- a/lualibs-boolean.lua +++ b/lualibs-boolean.lua @@ -6,36 +6,58 @@ if not modules then modules = { } end modules ['l-boolean'] = {      license   = "see context related readme files"  } -boolean = boolean or { } -  local type, tonumber = type, tonumber +boolean = boolean or { } +local boolean = boolean +  function boolean.tonumber(b) -    if b then return 1 else return 0 end +    if b then return 1 else return 0 end -- test and return or return  end -function toboolean(str,tolerant) -    if tolerant then -        local tstr = type(str) -        if tstr == "string" then -            return str == "true" or str == "yes" or str == "on" or str == "1" or str == "t" -        elseif tstr == "number" then -            return tonumber(str) ~= 0 -        elseif tstr == "nil" then -            return false -        else -            return str -        end +function toboolean(str,tolerant) -- global +    if  str == nil then +        return false +    elseif str == false then +        return false +    elseif str == true then +        return true      elseif str == "true" then          return true      elseif str == "false" then          return false +    elseif not tolerant then +        return false +    elseif str == 0 then +        return false +    elseif (tonumber(str) or 0) > 0 then +        return true      else -        return str +        return str == "yes" or str == "on" or str == "t"      end  end -function string.is_boolean(str) +string.toboolean = toboolean + +function string.booleanstring(str) +    if str == "0" then +        return false +    elseif str == "1" then +        return true +    elseif str == "" then +        return false +    elseif str == "false" then +        return false +    elseif str == "true" then +        return true +    elseif (tonumber(str) or 0) > 0 then +        return true +    else +        return str == "yes" or str == "on" or str == "t" +    end +end + +function string.is_boolean(str,default)      if type(str) == "string" then          if str == "true" or str == "yes" or str == "on" or str == "t" then              return true @@ -43,13 +65,5 @@ function string.is_boolean(str)              return false          end      end -    return nil -end - -function boolean.alwaystrue() -    return true -end - -function boolean.falsetrue() -    return false +    return default  end diff --git a/lualibs-dir.lua b/lualibs-dir.lua index 1b9bcbc..00cda38 100644 --- a/lualibs-dir.lua +++ b/lualibs-dir.lua @@ -6,35 +6,92 @@ if not modules then modules = { } end modules ['l-dir'] = {      license   = "see context related readme files"  } --- dir.expand_name will be merged with cleanpath and collapsepath +-- dir.expandname will be merged with cleanpath and collapsepath -local type = type +local type, select = type, select  local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub +local concat, insert, remove = table.concat, table.insert, table.remove  local lpegmatch = lpeg.match +local P, S, R, C, Cc, Cs, Ct, Cv, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cv, lpeg.V +  dir = dir or { } +local dir = dir +local lfs = lfs + +local attributes = lfs.attributes +local walkdir    = lfs.dir +local isdir      = lfs.isdir +local isfile     = lfs.isfile +local currentdir = lfs.currentdir +local chdir      = lfs.chdir + +-- in case we load outside luatex + +if not isdir then +    function isdir(name) +        local a = attributes(name) +        return a and a.mode == "directory" +    end +    lfs.isdir = isdir +end + +if not isfile then +    function isfile(name) +        local a = attributes(name) +        return a and a.mode == "file" +    end +    lfs.isfile = isfile +end  -- handy  function dir.current() -    return (gsub(lfs.currentdir(),"\\","/")) +    return (gsub(currentdir(),"\\","/"))  end --- optimizing for no string.find (*) does not save time +-- optimizing for no find (*) does not save time + +--~ local function globpattern(path,patt,recurse,action) -- fails in recent luatex due to some change in lfs +--~     local ok, scanner +--~     if path == "/" then +--~         ok, scanner = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe +--~     else +--~         ok, scanner = xpcall(function() return walkdir(path)      end, function() end) -- kepler safe +--~     end +--~     if ok and type(scanner) == "function" then +--~         if not find(path,"/$") then path = path .. '/' end +--~         for name in scanner do +--~             local full = path .. name +--~             local mode = attributes(full,'mode') +--~             if mode == 'file' then +--~                 if find(full,patt) then +--~                     action(full) +--~                 end +--~             elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then +--~                 globpattern(full,patt,recurse,action) +--~             end +--~         end +--~     end +--~ end + +local lfsisdir = isdir + +local function isdir(path) +    path = gsub(path,"[/\\]+$","") +    return lfsisdir(path) +end -local attributes = lfs.attributes -local walkdir    = lfs.dir +lfs.isdir = isdir -local function glob_pattern(path,patt,recurse,action) -    local ok, scanner, dirobj +local function globpattern(path,patt,recurse,action)      if path == "/" then -        ok, scanner, dirobj = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe -    else -        ok, scanner, dirobj = xpcall(function() return walkdir(path)      end, function() end) -- kepler safe +        path = path .. "." +    elseif not find(path,"/$") then +        path = path .. '/'      end -    if ok and type(scanner) == "function" then -        if not find(path,"/$") then path = path .. '/' end -        for name in scanner, dirobj do +    if isdir(path) then -- lfs.isdir does not like trailing / +        for name in walkdir(path) do -- lfs.dir accepts trailing /              local full = path .. name              local mode = attributes(full,'mode')              if mode == 'file' then @@ -42,25 +99,25 @@ local function glob_pattern(path,patt,recurse,action)                      action(full)                  end              elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then -                glob_pattern(full,patt,recurse,action) +                globpattern(full,patt,recurse,action)              end          end      end  end -dir.glob_pattern = glob_pattern +dir.globpattern = globpattern -local function collect_pattern(path,patt,recurse,result) -    local ok, scanner, dirobj +local function collectpattern(path,patt,recurse,result) +    local ok, scanner      result = result or { }      if path == "/" then -        ok, scanner, dirobj = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe +        ok, scanner, first = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe      else -        ok, scanner, dirobj = xpcall(function() return walkdir(path)      end, function() end) -- kepler safe +        ok, scanner, first = xpcall(function() return walkdir(path)      end, function() end) -- kepler safe      end      if ok and type(scanner) == "function" then          if not find(path,"/$") then path = path .. '/' end -        for name in scanner, dirobj do +        for name in scanner, first do              local full = path .. name              local attr = attributes(full)              local mode = attr.mode @@ -69,7 +126,7 @@ local function collect_pattern(path,patt,recurse,result)                      result[name] = attr                  end              elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then -                attr.list = collect_pattern(full,patt,recurse) +                attr.list = collectpattern(full,patt,recurse)                  result[name] = attr              end          end @@ -77,9 +134,7 @@ local function collect_pattern(path,patt,recurse,result)      return result  end -dir.collect_pattern = collect_pattern - -local P, S, R, C, Cc, Cs, Ct, Cv, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cv, lpeg.V +dir.collectpattern = collectpattern  local pattern = Ct {      [1] = (C(P(".") + P("/")^1) + C(R("az","AZ") * P(":") * P("/")^0) + Cc("./")) * V(2) * V(3), @@ -103,16 +158,16 @@ local function glob(str,t)              for s=1,#str do                  glob(str[s],t)              end -        elseif lfs.isfile(str) then +        elseif isfile(str) then              t(str)          else -            local split = lpegmatch(pattern,str) +            local split = lpegmatch(pattern,str) -- we could use the file splitter              if split then                  local root, path, base = split[1], split[2], split[3]                  local recurse = find(base,"%*%*")                  local start = root .. path                  local result = lpegmatch(filter,start .. base) -                glob_pattern(start,result,recurse,t) +                globpattern(start,result,recurse,t)              end          end      else @@ -122,12 +177,15 @@ local function glob(str,t)                  glob(str[s],t)              end              return t -        elseif lfs.isfile(str) then -            local t = t or { } -            t[#t+1] = str -            return t +        elseif isfile(str) then +            if t then +                t[#t+1] = str +                return t +            else +                return { str } +            end          else -            local split = lpegmatch(pattern,str) +            local split = lpegmatch(pattern,str) -- we could use the file splitter              if split then                  local t = t or { }                  local action = action or function(name) t[#t+1] = name end @@ -135,7 +193,7 @@ local function glob(str,t)                  local recurse = find(base,"%*%*")                  local start = root .. path                  local result = lpegmatch(filter,start .. base) -                glob_pattern(start,result,recurse,action) +                globpattern(start,result,recurse,action)                  return t              else                  return { } @@ -154,10 +212,11 @@ dir.glob = glob  local function globfiles(path,recurse,func,files) -- func == pattern or function      if type(func) == "string" then -        local s = func -- alas, we need this indirect way +        local s = func          func = function(name) return find(name,s) end      end      files = files or { } +    local noffiles = #files      for name in walkdir(path) do          if find(name,"^%.") then              --- skip @@ -168,12 +227,9 @@ local function globfiles(path,recurse,func,files) -- func == pattern or function                      globfiles(path .. "/" .. name,recurse,func,files)                  end              elseif mode == "file" then -                if func then -                    if func(name) then -                        files[#files+1] = path .. "/" .. name -                    end -                else -                    files[#files+1] = path .. "/" .. name +                if not func or func(name) then +                    noffiles = noffiles + 1 +                    files[noffiles] = path .. "/" .. name                  end              end          end @@ -191,7 +247,7 @@ dir.globfiles = globfiles  -- print(dir.ls("*.tex"))  function dir.ls(pattern) -    return table.concat(glob(pattern),"\n") +    return concat(glob(pattern),"\n")  end  --~ mkdirs("temp") @@ -201,18 +257,20 @@ end  local make_indeed = true -- false -if string.find(os.getenv("PATH"),";") then -- os.type == "windows" +local onwindows = os.type == "windows" or find(os.getenv("PATH"),";") + +if onwindows then      function dir.mkdirs(...) -        local str, pth, t = "", "", { ... } -        for i=1,#t do -            local s = t[i] -            if s ~= "" then -                if str ~= "" then -                    str = str .. "/" .. s -                else -                    str = s -                end +        local str, pth = "", "" +        for i=1,select("#",...) do +            local s = select(i,...) +            if s == "" then +                -- skip +            elseif str == "" then +                str = s +            else +                str = str .. "/" .. s              end          end          local first, middle, last @@ -250,64 +308,32 @@ if string.find(os.getenv("PATH"),";") then -- os.type == "windows"              else                  pth = pth .. "/" .. s              end -            if make_indeed and not lfs.isdir(pth) then +            if make_indeed and not isdir(pth) then                  lfs.mkdir(pth)              end          end -        return pth, (lfs.isdir(pth) == true) +        return pth, (isdir(pth) == true)      end ---~         print(dir.mkdirs("","","a","c")) ---~         print(dir.mkdirs("a")) ---~         print(dir.mkdirs("a:")) ---~         print(dir.mkdirs("a:/b/c")) ---~         print(dir.mkdirs("a:b/c")) ---~         print(dir.mkdirs("a:/bbb/c")) ---~         print(dir.mkdirs("/a/b/c")) ---~         print(dir.mkdirs("/aaa/b/c")) ---~         print(dir.mkdirs("//a/b/c")) ---~         print(dir.mkdirs("///a/b/c")) ---~         print(dir.mkdirs("a/bbb//ccc/")) - -    function dir.expand_name(str) -- will be merged with cleanpath and collapsepath -        local first, nothing, last = match(str,"^(//)(//*)(.*)$") -        if first then -            first = dir.current() .. "/" -        end -        if not first then -            first, last = match(str,"^(//)/*(.*)$") -        end -        if not first then -            first, last = match(str,"^([a-zA-Z]:)(.*)$") -            if first and not find(last,"^/") then -                local d = lfs.currentdir() -                if lfs.chdir(first) then -                    first = dir.current() -                end -                lfs.chdir(d) -            end -        end -        if not first then -            first, last = dir.current(), str -        end -        last = gsub(last,"//","/") -        last = gsub(last,"/%./","/") -        last = gsub(last,"^/*","") -        first = gsub(first,"/*$","") -        if last == "" then -            return first -        else -            return first .. "/" .. last -        end -    end +    --~ print(dir.mkdirs("","","a","c")) +    --~ print(dir.mkdirs("a")) +    --~ print(dir.mkdirs("a:")) +    --~ print(dir.mkdirs("a:/b/c")) +    --~ print(dir.mkdirs("a:b/c")) +    --~ print(dir.mkdirs("a:/bbb/c")) +    --~ print(dir.mkdirs("/a/b/c")) +    --~ print(dir.mkdirs("/aaa/b/c")) +    --~ print(dir.mkdirs("//a/b/c")) +    --~ print(dir.mkdirs("///a/b/c")) +    --~ print(dir.mkdirs("a/bbb//ccc/"))  else      function dir.mkdirs(...) -        local str, pth, t = "", "", { ... } -        for i=1,#t do -            local s = t[i] -            if s ~= "" then +        local str, pth = "", "" +        for i=1,select("#",...) do +            local s = select(i,...) +            if s and s ~= "" then -- we catch nil and false                  if str ~= "" then                      str = str .. "/" .. s                  else @@ -325,7 +351,7 @@ else                  else                      pth = pth .. "/" .. s                  end -                if make_indeed and not first and not lfs.isdir(pth) then +                if make_indeed and not first and not isdir(pth) then                      lfs.mkdir(pth)                  end              end @@ -333,31 +359,91 @@ else              pth = "."              for s in gmatch(str,"[^/]+") do                  pth = pth .. "/" .. s -                if make_indeed and not lfs.isdir(pth) then +                if make_indeed and not isdir(pth) then                      lfs.mkdir(pth)                  end              end          end -        return pth, (lfs.isdir(pth) == true) +        return pth, (isdir(pth) == true) +    end + +    --~ print(dir.mkdirs("","","a","c")) +    --~ print(dir.mkdirs("a")) +    --~ print(dir.mkdirs("/a/b/c")) +    --~ print(dir.mkdirs("/aaa/b/c")) +    --~ print(dir.mkdirs("//a/b/c")) +    --~ print(dir.mkdirs("///a/b/c")) +    --~ print(dir.mkdirs("a/bbb//ccc/")) + +end + +dir.makedirs = dir.mkdirs + +-- we can only define it here as it uses dir.current + +if onwindows then + +    function dir.expandname(str) -- will be merged with cleanpath and collapsepath +        local first, nothing, last = match(str,"^(//)(//*)(.*)$") +        if first then +            first = dir.current() .. "/" -- dir.current sanitizes +        end +        if not first then +            first, last = match(str,"^(//)/*(.*)$") +        end +        if not first then +            first, last = match(str,"^([a-zA-Z]:)(.*)$") +            if first and not find(last,"^/") then +                local d = currentdir() +                if chdir(first) then +                    first = dir.current() +                end +                chdir(d) +            end +        end +        if not first then +            first, last = dir.current(), str +        end +        last = gsub(last,"//","/") +        last = gsub(last,"/%./","/") +        last = gsub(last,"^/*","") +        first = gsub(first,"/*$","") +        if last == "" or last == "." then +            return first +        else +            return first .. "/" .. last +        end      end ---~         print(dir.mkdirs("","","a","c")) ---~         print(dir.mkdirs("a")) ---~         print(dir.mkdirs("/a/b/c")) ---~         print(dir.mkdirs("/aaa/b/c")) ---~         print(dir.mkdirs("//a/b/c")) ---~         print(dir.mkdirs("///a/b/c")) ---~         print(dir.mkdirs("a/bbb//ccc/")) +else -    function dir.expand_name(str) -- will be merged with cleanpath and collapsepath +    function dir.expandname(str) -- will be merged with cleanpath and collapsepath          if not find(str,"^/") then -            str = lfs.currentdir() .. "/" .. str +            str = currentdir() .. "/" .. str          end          str = gsub(str,"//","/")          str = gsub(str,"/%./","/") +        str = gsub(str,"(.)/%.$","%1")          return str      end  end -dir.makedirs = dir.mkdirs +file.expandname = dir.expandname -- for convenience + +local stack = { } + +function dir.push(newdir) +    insert(stack,currentdir()) +    if newdir and newdir ~= "" then +        chdir(newdir) +    end +end + +function dir.pop() +    local d = remove(stack) +    if d then +        chdir(d) +    end +    return d +end diff --git a/lualibs-file.lua b/lualibs-file.lua index 2bfc070..af86f93 100644 --- a/lualibs-file.lua +++ b/lualibs-file.lua @@ -8,293 +8,519 @@ if not modules then modules = { } end modules ['l-file'] = {  -- needs a cleanup -file = file or { } +file       = file or { } +local file = file -local concat = table.concat -local find, gmatch, match, gsub, sub, char = string.find, string.gmatch, string.match, string.gsub, string.sub, string.char -local lpegmatch = lpeg.match - -function file.removesuffix(filename) -    return (gsub(filename,"%.[%a%d]+$","")) +if not lfs then +    lfs = optionalrequire("lfs")  end -function file.addsuffix(filename, suffix) -    if not suffix or suffix == "" then -        return filename -    elseif not find(filename,"%.[%a%d]+$") then -        return filename .. "." .. suffix -    else -        return filename +if not lfs then + +    lfs = { +        getcurrentdir = function() +            return "." +        end, +        attributes = function() +            return nil +        end, +        isfile = function(name) +            local f = io.open(name,'rb') +            if f then +                f:close() +                return true +            end +        end, +        isdir = function(name) +            print("you need to load lfs") +            return false +        end +    } + +elseif not lfs.isfile then + +    local attributes = lfs.attributes + +    function lfs.isdir(name) +        return attributes(name,"mode") == "directory" +    end + +    function lfs.isfile(name) +        return attributes(name,"mode") == "file"      end + + -- function lfs.isdir(name) + --     local a = attributes(name) + --     return a and a.mode == "directory" + -- end + + -- function lfs.isfile(name) + --     local a = attributes(name) + --     return a and a.mode == "file" + -- end +  end -function file.replacesuffix(filename, suffix) -    return (gsub(filename,"%.[%a%d]+$","")) .. "." .. suffix +local insert, concat = table.insert, table.concat +local match = string.match +local lpegmatch = lpeg.match +local getcurrentdir, attributes = lfs.currentdir, lfs.attributes +local checkedsplit = string.checkedsplit + +-- local patterns = file.patterns or { } +-- file.patterns  = patterns + +local P, R, S, C, Cs, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc, lpeg.Ct + +local colon     = P(":") +local period    = P(".") +local periods   = P("..") +local fwslash   = P("/") +local bwslash   = P("\\") +local slashes   = S("\\/") +local noperiod  = 1-period +local noslashes = 1-slashes +local name      = noperiod^1 +local suffix    = period/"" * (1-period-slashes)^1 * -1 + +----- pattern = C((noslashes^0 * slashes^1)^1) +local pattern = C((1 - (slashes^1 * noslashes^1 * -1))^1) * P(1) -- there must be a more efficient way + +local function pathpart(name,default) +    return name and lpegmatch(pattern,name) or default or ""  end -function file.dirname(name,default) -    return match(name,"^(.+)[/\\].-$") or (default or "") +local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 + +local function basename(name) +    return name and lpegmatch(pattern,name) or name  end -function file.basename(name) -    return match(name,"^.+[/\\](.-)$") or name +-- print(pathpart("file")) +-- print(pathpart("dir/file")) +-- print(pathpart("/dir/file")) +-- print(basename("file")) +-- print(basename("dir/file")) +-- print(basename("/dir/file")) + +local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0 + +local function nameonly(name) +    return name and lpegmatch(pattern,name) or name  end -function file.nameonly(name) -    return (gsub(match(name,"^.+[/\\](.-)$") or name,"%..*$","")) +local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 + +local function suffixonly(name) +    return name and lpegmatch(pattern,name) or ""  end -function file.extname(name,default) -    return match(name,"^.+%.([^/\\]-)$") or default or "" +file.pathpart   = pathpart +file.basename   = basename +file.nameonly   = nameonly +file.suffixonly = suffixonly +file.suffix     = suffixonly + +file.dirname    = pathpart   -- obsolete +file.extname    = suffixonly -- obsolete + +-- actually these are schemes + +local drive  = C(R("az","AZ")) * colon +local path   = C((noslashes^0 * slashes)^0) +local suffix = period * C(P(1-period)^0 * P(-1)) +local base   = C((1-suffix)^0) +local rest   = C(P(1)^0) + +drive  = drive  + Cc("") +path   = path   + Cc("") +base   = base   + Cc("") +suffix = suffix + Cc("") + +local pattern_a =   drive * path  *   base * suffix +local pattern_b =           path  *   base * suffix +local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures +local pattern_d =           path  *   rest + +function file.splitname(str,splitdrive) +    if not str then +        -- error +    elseif splitdrive then +        return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix +    else +        return lpegmatch(pattern_b,str) -- returns path, base, suffix +    end  end -file.suffix = file.extname - ---~ function file.join(...) ---~     local pth = concat({...},"/") ---~     pth = gsub(pth,"\\","/") ---~     local a, b = match(pth,"^(.*://)(.*)$") ---~     if a and b then ---~         return a .. gsub(b,"//+","/") ---~     end ---~     a, b = match(pth,"^(//)(.*)$") ---~     if a and b then ---~         return a .. gsub(b,"//+","/") ---~     end ---~     return (gsub(pth,"//+","/")) ---~ end - -local trick_1 = char(1) -local trick_2 = "^" .. trick_1 .. "/+" +function file.splitbase(str) +    return str and lpegmatch(pattern_d,str) -- returns path, base+suffix +end -function file.join(...) -    local lst = { ... } -    local a, b = lst[1], lst[2] -    if a == "" then -        lst[1] = trick_1 -    elseif b and find(a,"^/+$") and find(b,"^/") then -        lst[1] = "" -        lst[2] = gsub(b,"^/+","") +---- stripslash = C((1 - P("/")^1*P(-1))^0) + +function file.nametotable(str,splitdrive) +    if str then +        local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) +     -- if path ~= "" then +     --     path = lpegmatch(stripslash,path) -- unfortunate hack, maybe this becomes default +     -- end +        if splitdrive then +            return { +                path    = path, +                drive   = drive, +                subpath = subpath, +                name    = name, +                base    = base, +                suffix  = suffix, +            } +        else +            return { +                path    = path, +                name    = name, +                base    = base, +                suffix  = suffix, +            } +        end      end -    local pth = concat(lst,"/") -    pth = gsub(pth,"\\","/") -    local a, b = match(pth,"^(.*://)(.*)$") -    if a and b then -        return a .. gsub(b,"//+","/") +end + +-- print(file.splitname("file")) +-- print(file.splitname("dir/file")) +-- print(file.splitname("/dir/file")) +-- print(file.splitname("file")) +-- print(file.splitname("dir/file")) +-- print(file.splitname("/dir/file")) + +-- inspect(file.nametotable("file.ext")) +-- inspect(file.nametotable("dir/file.ext")) +-- inspect(file.nametotable("/dir/file.ext")) +-- inspect(file.nametotable("file.ext")) +-- inspect(file.nametotable("dir/file.ext")) +-- inspect(file.nametotable("/dir/file.ext")) + +----- pattern = Cs(((period * noperiod^1 * -1) / "" + 1)^1) +local pattern = Cs(((period * (1-period-slashes)^1 * -1) / "" + 1)^1) + +function file.removesuffix(name) +    return name and lpegmatch(pattern,name) +end + +-- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 +-- +-- function file.addsuffix(name, suffix) +--     local p = lpegmatch(pattern,name) +--     if p then +--         return name +--     else +--         return name .. "." .. suffix +--     end +-- end + +local suffix  = period/"" * (1-period-slashes)^1 * -1 +local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix) + +function file.addsuffix(filename,suffix,criterium) +    if not filename or not suffix or suffix == "" then +        return filename +    elseif criterium == true then +        return filename .. "." .. suffix +    elseif not criterium then +        local n, s = lpegmatch(pattern,filename) +        if not s or s == "" then +            return filename .. "." .. suffix +        else +            return filename +        end +    else +        local n, s = lpegmatch(pattern,filename) +        if s and s ~= "" then +            local t = type(criterium) +            if t == "table" then +                -- keep if in criterium +                for i=1,#criterium do +                    if s == criterium[i] then +                        return filename +                    end +                end +            elseif t == "string" then +                -- keep if criterium +                if s == criterium then +                    return filename +                end +            end +        end +        return (n or filename) .. "." .. suffix      end -    a, b = match(pth,"^(//)(.*)$") -    if a and b then -        return a .. gsub(b,"//+","/") +end + +-- print("1 " .. file.addsuffix("name","new")                   .. " -> name.new") +-- print("2 " .. file.addsuffix("name.old","new")               .. " -> name.old") +-- print("3 " .. file.addsuffix("name.old","new",true)          .. " -> name.old.new") +-- print("4 " .. file.addsuffix("name.old","new","new")         .. " -> name.new") +-- print("5 " .. file.addsuffix("name.old","new","old")         .. " -> name.old") +-- print("6 " .. file.addsuffix("name.old","new","foo")         .. " -> name.new") +-- print("7 " .. file.addsuffix("name.old","new",{"foo","bar"}) .. " -> name.new") +-- print("8 " .. file.addsuffix("name.old","new",{"old","bar"}) .. " -> name.old") + +local suffix  = period * (1-period-slashes)^1 * -1 +local pattern = Cs((1-suffix)^0) + +function file.replacesuffix(name,suffix) +    if name and suffix and suffix ~= "" then +        return lpegmatch(pattern,name) .. "." .. suffix +    else +        return name      end -    pth = gsub(pth,trick_2,"") -    return (gsub(pth,"//+","/"))  end ---~ print(file.join("//","/y")) ---~ print(file.join("/","/y")) ---~ print(file.join("","/y")) ---~ print(file.join("/x/","/y")) ---~ print(file.join("x/","/y")) ---~ print(file.join("http://","/y")) ---~ print(file.join("http://a","/y")) ---~ print(file.join("http:///a","/y")) ---~ print(file.join("//nas-1","/y")) - -function file.iswritable(name) -    local a = lfs.attributes(name) or lfs.attributes(file.dirname(name,".")) -    return a and sub(a.permissions,2,2) == "w" +-- + +local reslasher = lpeg.replacer(P("\\"),"/") + +function file.reslash(str) +    return str and lpegmatch(reslasher,str)  end -function file.isreadable(name) -    local a = lfs.attributes(name) -    return a and sub(a.permissions,1,1) == "r" +-- We should be able to use: +-- +-- local writable = P(1) * P("w") * Cc(true) +-- +-- function file.is_writable(name) +--     local a = attributes(name) or attributes(pathpart(name,".")) +--     return a and lpegmatch(writable,a.permissions) or false +-- end +-- +-- But after some testing Taco and I came up with the more robust +-- variant: + +function file.is_writable(name) +    if not name then +        -- error +    elseif lfs.isdir(name) then +        name = name .. "/m_t_x_t_e_s_t.tmp" +        local f = io.open(name,"wb") +        if f then +            f:close() +            os.remove(name) +            return true +        end +    elseif lfs.isfile(name) then +        local f = io.open(name,"ab") +        if f then +            f:close() +            return true +        end +    else +        local f = io.open(name,"ab") +        if f then +            f:close() +            os.remove(name) +            return true +        end +    end +    return false  end -file.is_readable = file.isreadable -file.is_writable = file.iswritable +local readable = P("r") * Cc(true) --- todo: lpeg +function file.is_readable(name) +    if name then +        local a = attributes(name) +        return a and lpegmatch(readable,a.permissions) or false +    else +        return false +    end +end ---~ function file.split_path(str) ---~     local t = { } ---~     str = gsub(str,"\\", "/") ---~     str = gsub(str,"(%a):([;/])", "%1\001%2") ---~     for name in gmatch(str,"([^;:]+)") do ---~         if name ~= "" then ---~             t[#t+1] = gsub(name,"\001",":") ---~         end ---~     end ---~     return t ---~ end +file.isreadable = file.is_readable -- depricated +file.iswritable = file.is_writable -- depricated -local checkedsplit = string.checkedsplit +function file.size(name) +    if name then +        local a = attributes(name) +        return a and a.size or 0 +    else +        return 0 +    end +end -function file.split_path(str,separator) -    str = gsub(str,"\\","/") -    return checkedsplit(str,separator or io.pathseparator) +function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split) +    return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)  end -function file.join_path(tab) -    return concat(tab,io.pathseparator) -- can have trailing // +function file.joinpath(tab,separator) -- table +    return tab and concat(tab,separator or io.pathseparator) -- can have trailing //  end --- we can hash them weakly - -function file.collapse_path(str) -    str = gsub(str,"\\","/") -    if find(str,"/") then -        str = gsub(str,"^%./",(gsub(lfs.currentdir(),"\\","/")) .. "/") -- ./xx in qualified -        str = gsub(str,"/%./","/") -        local n, m = 1, 1 -        while n > 0 or m > 0 do -            str, n = gsub(str,"[^/%.]+/%.%.$","") -            str, m = gsub(str,"[^/%.]+/%.%./","") +local stripper  = Cs(P(fwslash)^0/"" * reslasher) +local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon +local isroot    = fwslash^1 * -1 +local hasroot   = fwslash^1 + +local deslasher = lpeg.replacer(S("\\/")^1,"/") + +-- If we have a network or prefix then there is a change that we end up with two +-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2) +-- split and rebuild as url. Of course we could assume no network paths (which +-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but +-- then we still have to deal with urls ... anyhow, multiple // are never a real +-- problem but just ugly. + +function file.join(...) +    local lst = { ... } +    local one = lst[1] +    if lpegmatch(isnetwork,one) then +        local two = lpegmatch(deslasher,concat(lst,"/",2)) +        return one .. "/" .. two +    elseif lpegmatch(isroot,one) then +        local two = lpegmatch(deslasher,concat(lst,"/",2)) +        if lpegmatch(hasroot,two) then +            return two +        else +            return "/" .. two          end -        str = gsub(str,"([^/])/$","%1") -    --  str = gsub(str,"^%./","") -- ./xx in qualified -        str = gsub(str,"/%.$","") +    elseif one == "" then +        return lpegmatch(stripper,concat(lst,"/",2)) +    else +        return lpegmatch(deslasher,concat(lst,"/"))      end -    if str == "" then str = "." end -    return str  end ---~ print(file.collapse_path("/a")) ---~ print(file.collapse_path("a/./b/..")) ---~ print(file.collapse_path("a/aa/../b/bb")) ---~ print(file.collapse_path("a/../..")) ---~ print(file.collapse_path("a/.././././b/..")) ---~ print(file.collapse_path("a/./././b/..")) ---~ print(file.collapse_path("a/b/c/../..")) +-- print(file.join("c:/whatever","name")) +-- print(file.join("//","/y")) +-- print(file.join("/","/y")) +-- print(file.join("","/y")) +-- print(file.join("/x/","/y")) +-- print(file.join("x/","/y")) +-- print(file.join("http://","/y")) +-- print(file.join("http://a","/y")) +-- print(file.join("http:///a","/y")) +-- print(file.join("//nas-1","/y")) + +-- The previous one fails on "a.b/c"  so Taco came up with a split based +-- variant. After some skyping we got it sort of compatible with the old +-- one. After that the anchoring to currentdir was added in a better way. +-- Of course there are some optimizations too. Finally we had to deal with +-- windows drive prefixes and things like sys://. Eventually gsubs and +-- finds were replaced by lpegs. + +local drivespec    = R("az","AZ")^1 * colon +local anchors      = fwslash + drivespec +local untouched    = periods + (1-period)^1 * P(-1) +local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * Ct(lpeg.splitat(S("/\\")^1)) +local absolute     = fwslash + +function file.collapsepath(str,anchor) +    if not str then +        return +    end +    if anchor and not lpegmatch(anchors,str) then +        str = getcurrentdir() .. "/" .. str +    end +    if str == "" or str =="." then +        return "." +    elseif lpegmatch(untouched,str) then +        return lpegmatch(reslasher,str) +    end +    local starter, oldelements = lpegmatch(splitstarter,str) +    local newelements = { } +    local i = #oldelements +    while i > 0 do +        local element = oldelements[i] +        if element == '.' then +            -- do nothing +        elseif element == '..' then +            local n = i - 1 +            while n > 0 do +                local element = oldelements[n] +                if element ~= '..' and element ~= '.' then +                    oldelements[n] = '.' +                    break +                else +                    n = n - 1 +                end +             end +            if n < 1 then +               insert(newelements,1,'..') +            end +        elseif element ~= "" then +            insert(newelements,1,element) +        end +        i = i - 1 +    end +    if #newelements == 0 then +        return starter or "." +    elseif starter then +        return starter .. concat(newelements, '/') +    elseif lpegmatch(absolute,str) then +        return "/" .. concat(newelements,'/') +    else +        return concat(newelements, '/') +    end +end -function file.robustname(str) -    return (gsub(str,"[^%a%d%/%-%.\\]+","-")) +-- local function test(str) +--    print(string.format("%-20s %-15s %-15s",str,file.collapsepath(str),file.collapsepath(str,true))) +-- end +-- test("a/b.c/d") test("b.c/d") test("b.c/..") +-- test("/") test("c:/..") test("sys://..") +-- test("") test("./") test(".") test("..") test("./..") test("../..") +-- test("a") test("./a") test("/a") test("a/../..") +-- test("a/./b/..") test("a/aa/../b/bb") test("a/.././././b/..") test("a/./././b/..") +-- test("a/b/c/../..") test("./a/b/c/../..") test("a/b/c/../..") + +local validchars = R("az","09","AZ","--","..") +local pattern_a  = lpeg.replacer(1-validchars) +local pattern_a  = Cs((validchars + P(1)/"-")^1) +local whatever   = P("-")^0 / "" +local pattern_b  = Cs(whatever * (1 - whatever * -1)^1) + +function file.robustname(str,strict) +    if str then +        str = lpegmatch(pattern_a,str) or str +        if strict then +            return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) +        else +            return str +        end +    end  end  file.readdata = io.loaddata  file.savedata = io.savedata  function file.copy(oldname,newname) -    file.savedata(newname,io.loaddata(oldname)) +    if oldname and newname then +        local data = io.loaddata(oldname) +        if data and data ~= "" then +            file.savedata(newname,data) +        end +    end  end --- lpeg variants, slightly faster, not always - ---~ local period    = lpeg.P(".") ---~ local slashes   = lpeg.S("\\/") ---~ local noperiod  = 1-period ---~ local noslashes = 1-slashes ---~ local name      = noperiod^1 - ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.C(noperiod^1) * -1 - ---~ function file.extname(name) ---~     return lpegmatch(pattern,name) or "" ---~ end - ---~ local pattern = lpeg.Cs(((period * noperiod^1 * -1)/"" + 1)^1) - ---~ function file.removesuffix(name) ---~     return lpegmatch(pattern,name) ---~ end - ---~ local pattern = (noslashes^0 * slashes)^1 * lpeg.C(noslashes^1) * -1 - ---~ function file.basename(name) ---~     return lpegmatch(pattern,name) or name ---~ end - ---~ local pattern = (noslashes^0 * slashes)^1 * lpeg.Cp() * noslashes^1 * -1 - ---~ function file.dirname(name) ---~     local p = lpegmatch(pattern,name) ---~     if p then ---~         return sub(name,1,p-2) ---~     else ---~         return "" ---~     end ---~ end - ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.Cp() * noperiod^1 * -1 - ---~ function file.addsuffix(name, suffix) ---~     local p = lpegmatch(pattern,name) ---~     if p then ---~         return name ---~     else ---~         return name .. "." .. suffix ---~     end ---~ end - ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.Cp() * noperiod^1 * -1 - ---~ function file.replacesuffix(name,suffix) ---~     local p = lpegmatch(pattern,name) ---~     if p then ---~         return sub(name,1,p-2) .. "." .. suffix ---~     else ---~         return name .. "." .. suffix ---~     end ---~ end - ---~ local pattern = (noslashes^0 * slashes)^0 * lpeg.Cp() * ((noperiod^1 * period)^1 * lpeg.Cp() + lpeg.P(true)) * noperiod^1 * -1 - ---~ function file.nameonly(name) ---~     local a, b = lpegmatch(pattern,name) ---~     if b then ---~         return sub(name,a,b-2) ---~     elseif a then ---~         return sub(name,a) ---~     else ---~         return name ---~     end ---~ end - ---~ local test = file.extname ---~ local test = file.basename ---~ local test = file.dirname ---~ local test = file.addsuffix ---~ local test = file.replacesuffix ---~ local test = file.nameonly - ---~ print(1,test("./a/b/c/abd.def.xxx","!!!")) ---~ print(2,test("./../b/c/abd.def.xxx","!!!")) ---~ print(3,test("a/b/c/abd.def.xxx","!!!")) ---~ print(4,test("a/b/c/def.xxx","!!!")) ---~ print(5,test("a/b/c/def","!!!")) ---~ print(6,test("def","!!!")) ---~ print(7,test("def.xxx","!!!")) - ---~ local tim = os.clock() for i=1,250000 do local ext = test("abd.def.xxx","!!!") end print(os.clock()-tim) -  -- also rewrite previous -local letter    = lpeg.R("az","AZ") + lpeg.S("_-+") -local separator = lpeg.P("://") +local letter    = R("az","AZ") + S("_-+") +local separator = P("://") + +local qualified = period^0 * fwslash +                + letter   * colon +                + letter^1 * separator +                + letter^1 * fwslash +local rootbased = fwslash +                + letter * colon -local qualified = lpeg.P(".")^0 * lpeg.P("/") + letter*lpeg.P(":") + letter^1*separator + letter^1 * lpeg.P("/") -local rootbased = lpeg.P("/") + letter*lpeg.P(":") +lpeg.patterns.qualified = qualified +lpeg.patterns.rootbased = rootbased  -- ./name ../name  /name c: :// name/name  function file.is_qualified_path(filename) -    return lpegmatch(qualified,filename) ~= nil +    return filename and lpegmatch(qualified,filename) ~= nil  end  function file.is_rootbased_path(filename) -    return lpegmatch(rootbased,filename) ~= nil -end - -local slash  = lpeg.S("\\/") -local period = lpeg.P(".") -local drive  = lpeg.C(lpeg.R("az","AZ")) * lpeg.P(":") -local path   = lpeg.C(((1-slash)^0 * slash)^0) -local suffix = period * lpeg.C(lpeg.P(1-period)^0 * lpeg.P(-1)) -local base   = lpeg.C((1-suffix)^0) - -local pattern = (drive + lpeg.Cc("")) * (path + lpeg.Cc("")) * (base + lpeg.Cc("")) * (suffix + lpeg.Cc("")) - -function file.splitname(str) -- returns drive, path, base, suffix -    return lpegmatch(pattern,str) +    return filename and lpegmatch(rootbased,filename) ~= nil  end  -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end @@ -304,11 +530,35 @@ end  -- test { "/aa", "/aa/bb", "/aa/bb/cc", "/aa/bb/cc.dd", "/aa/bb/cc.dd.ee" }  -- test { "aa", "aa/bb", "aa/bb/cc", "aa/bb/cc.dd", "aa/bb/cc.dd.ee" } ---~ -- todo: ---~ ---~ if os.type == "windows" then ---~     local currentdir = lfs.currentdir ---~     function lfs.currentdir() ---~         return (gsub(currentdir(),"\\","/")) ---~     end ---~ end +-- -- maybe: +-- +-- if os.type == "windows" then +--     local currentdir = getcurrentdir +--     function getcurrentdir() +--         return lpegmatch(reslasher,currentdir()) +--     end +-- end + +-- for myself: + +function file.strip(name,dir) +    if name then +        local b, a = match(name,"^(.-)" .. dir .. "(.*)$") +        return a ~= "" and a or name +    end +end + +-- local debuglist = { +--     "pathpart", "basename", "nameonly", "suffixonly", "suffix", "dirname", "extname", +--     "addsuffix", "removesuffix", "replacesuffix", "join", +--     "strip","collapsepath", "joinpath", "splitpath", +-- } + +-- for i=1,#debuglist do +--     local name = debuglist[i] +--     local f = file[name] +--     file[name] = function(...) +--         print(name,f(...)) +--         return f(...) +--     end +-- end diff --git a/lualibs-function.lua b/lualibs-function.lua new file mode 100644 index 0000000..7ded8ce --- /dev/null +++ b/lualibs-function.lua @@ -0,0 +1,11 @@ +if not modules then modules = { } end modules ['l-functions'] = { +    version   = 1.001, +    comment   = "companion to luat-lib.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +functions = functions or { } + +function functions.dummy() end diff --git a/lualibs-io.lua b/lualibs-io.lua index 66e2793..06e1fb5 100644 --- a/lualibs-io.lua +++ b/lualibs-io.lua @@ -6,7 +6,11 @@ if not modules then modules = { } end modules ['l-io'] = {      license   = "see context related readme files"  } -local byte, find, gsub = string.byte, string.find, string.gsub +local io = io +local byte, find, gsub, format = string.byte, string.find, string.gsub, string.format +local concat = table.concat +local floor = math.floor +local type = type  if string.find(os.getenv("PATH"),";") then      io.fileseparator, io.pathseparator = "\\", ";" @@ -14,16 +18,53 @@ else      io.fileseparator, io.pathseparator = "/" , ":"  end -function io.loaddata(filename,textmode) +local function readall(f) +    return f:read("*all") +end + +-- The next one is upto 50% faster on large files and less memory consumption due +-- to less intermediate large allocations. This phenomena was discussed on the +-- luatex dev list. + +local function readall(f) +    local size = f:seek("end") +    if size == 0 then +        return "" +    elseif size < 1024*1024 then +        f:seek("set",0) +        return f:read('*all') +    else +        local done = f:seek("set",0) +        if size < 1024*1024 then +            step = 1024 * 1024 +        elseif size > 16*1024*1024 then +            step = 16*1024*1024 +        else +            step = floor(size/(1024*1024)) * 1024 * 1024 / 8 +        end +        local data = { } +        while true do +            local r = f:read(step) +            if not r then +                return concat(data) +            else +                data[#data+1] = r +            end +        end +    end +end + +io.readall = readall + +function io.loaddata(filename,textmode) -- return nil if empty      local f = io.open(filename,(textmode and 'r') or 'rb')      if f then -    --  collectgarbage("step") -- sometimes makes a big difference in mem consumption -        local data = f:read('*all') -    --  garbagecollector.check(data) +--       local data = f:read('*all') +        local data = readall(f)          f:close() -        return data -    else -        return nil +        if #data > 0 then +            return data +        end      end  end @@ -31,25 +72,67 @@ function io.savedata(filename,data,joiner)      local f = io.open(filename,"wb")      if f then          if type(data) == "table" then -            f:write(table.join(data,joiner or "")) +            f:write(concat(data,joiner or ""))          elseif type(data) == "function" then              data(f)          else              f:write(data or "")          end          f:close() +        io.flush()          return true      else          return false      end  end +-- we can also chunk this one if needed: io.lines(filename,chunksize,"*l") + +function io.loadlines(filename,n) -- return nil if empty +    local f = io.open(filename,'r') +    if not f then +        -- no file +    elseif n then +        local lines = { } +        for i=1,n do +            local line = f:read("*lines") +            if line then +                lines[#lines+1] = line +            else +                break +            end +        end +        f:close() +        lines = concat(lines,"\n") +        if #lines > 0 then +            return lines +        end +    else +        local line = f:read("*line") or "" +        f:close() +        if #line > 0 then +            return line +        end +    end +end + +function io.loadchunk(filename,n) +    local f = io.open(filename,'rb') +    if f then +        local data = f:read(n or 1024) +        f:close() +        if #data > 0 then +            return data +        end +    end +end +  function io.exists(filename)      local f = io.open(filename)      if f == nil then          return false      else -        assert(f:close()) +        f:close()          return true      end  end @@ -60,18 +143,29 @@ function io.size(filename)          return 0      else          local s = f:seek("end") -        assert(f:close()) +        f:close()          return s      end  end  function io.noflines(f) -    local n = 0 -    for _ in f:lines() do -        n = n + 1 +    if type(f) == "string" then +        local f = io.open(filename) +        if f then +            local n = f and io.noflines(f) or 0 +            f:close() +            return n +        else +            return 0 +        end +    else +        local n = 0 +        for _ in f:lines() do +            n = n + 1 +        end +        f:seek('set',0) +        return n      end -    f:seek('set',0) -    return n  end  local nextchar = { @@ -97,8 +191,6 @@ local nextchar = {  function io.characters(f,n)      if f then          return nextchar[n or 1], f -    else -        return nil, nil      end  end @@ -107,40 +199,42 @@ local nextbyte = {          local a, b, c, d = f:read(1,1,1,1)          if d then              return byte(a), byte(b), byte(c), byte(d) -        else -            return nil, nil, nil, nil +        end +    end, +    [3] = function(f) +        local a, b, c = f:read(1,1,1) +        if b then +            return byte(a), byte(b), byte(c)          end      end,      [2] = function(f)          local a, b = f:read(1,1)          if b then              return byte(a), byte(b) -        else -            return nil, nil          end      end,      [1] = function (f)          local a = f:read(1)          if a then              return byte(a) -        else -            return nil          end      end,      [-2] = function (f)          local a, b = f:read(1,1)          if b then              return byte(b), byte(a) -        else -            return nil, nil +        end +    end, +    [-3] = function(f) +        local a, b, c = f:read(1,1,1) +        if b then +            return byte(c), byte(b), byte(a)          end      end,      [-4] = function(f)          local a, b, c, d = f:read(1,1,1,1)          if d then              return byte(d), byte(c), byte(b), byte(a) -        else -            return nil, nil, nil, nil          end      end  } @@ -157,12 +251,13 @@ function io.ask(question,default,options)      while true do          io.write(question)          if options then -            io.write(string.format(" [%s]",table.concat(options,"|"))) +            io.write(format(" [%s]",concat(options,"|")))          end          if default then -            io.write(string.format(" [%s]",default)) +            io.write(format(" [%s]",default))          end -        io.write(string.format(" ")) +        io.write(format(" ")) +        io.flush()          local answer = io.read()          answer = gsub(answer,"^%s*(.*)%s*$","%1")          if answer == "" and default then @@ -185,3 +280,83 @@ function io.ask(question,default,options)          end      end  end + +local function readnumber(f,n,m) +    if m then +        f:seek("set",n) +        n = m +    end +    if n == 1 then +        return byte(f:read(1)) +    elseif n == 2 then +        local a, b = byte(f:read(2),1,2) +        return 256 * a + b +    elseif n == 3 then +        local a, b, c = byte(f:read(3),1,3) +        return 256*256 * a + 256 * b + c +    elseif n == 4 then +        local a, b, c, d = byte(f:read(4),1,4) +        return 256*256*256 * a + 256*256 * b + 256 * c + d +    elseif n == 8 then +        local a, b = readnumber(f,4), readnumber(f,4) +        return 256 * a + b +    elseif n == 12 then +        local a, b, c = readnumber(f,4), readnumber(f,4), readnumber(f,4) +        return 256*256 * a + 256 * b + c +    elseif n == -2 then +        local b, a = byte(f:read(2),1,2) +        return 256*a + b +    elseif n == -3 then +        local c, b, a = byte(f:read(3),1,3) +        return 256*256 * a + 256 * b + c +    elseif n == -4 then +        local d, c, b, a = byte(f:read(4),1,4) +        return 256*256*256 * a + 256*256 * b + 256*c + d +    elseif n == -8 then +        local h, g, f, e, d, c, b, a = byte(f:read(8),1,8) +        return 256*256*256*256*256*256*256 * a + +                   256*256*256*256*256*256 * b + +                       256*256*256*256*256 * c + +                           256*256*256*256 * d + +                               256*256*256 * e + +                                   256*256 * f + +                                       256 * g + +                                             h +    else +        return 0 +    end +end + +io.readnumber = readnumber + +function io.readstring(f,n,m) +    if m then +        f:seek("set",n) +        n = m +    end +    local str = gsub(f:read(n),"\000","") +    return str +end + +-- + +if not io.i_limiter then function io.i_limiter() end end -- dummy so we can test safely +if not io.o_limiter then function io.o_limiter() end end -- dummy so we can test safely + +-- This works quite ok: +-- +-- function io.piped(command,writer) +--     local pipe = io.popen(command) +--  -- for line in pipe:lines() do +--  --     print(line) +--  -- end +--     while true do +--         local line = pipe:read(1) +--         if not line then +--             break +--         elseif line ~= "\n" then +--             writer(line) +--         end +--     end +--     return pipe:close() -- ok, status, (error)code +-- end diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index b107a8e..681ef09 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -6,30 +6,153 @@ if not modules then modules = { } end modules ['l-lpeg'] = {      license   = "see context related readme files"  } -local lpeg = require("lpeg") +-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) + +-- move utf    -> l-unicode +-- move string -> l-string or keep it here + +lpeg = require("lpeg") + +-- tracing (only used when we encounter a problem in integration of lpeg in luatex) + +-- some code will move to unicode and string + +-- local lpmatch = lpeg.match +-- local lpprint = lpeg.print +-- local lpp     = lpeg.P +-- local lpr     = lpeg.R +-- local lps     = lpeg.S +-- local lpc     = lpeg.C +-- local lpb     = lpeg.B +-- local lpv     = lpeg.V +-- local lpcf    = lpeg.Cf +-- local lpcb    = lpeg.Cb +-- local lpcg    = lpeg.Cg +-- local lpct    = lpeg.Ct +-- local lpcs    = lpeg.Cs +-- local lpcc    = lpeg.Cc +-- local lpcmt   = lpeg.Cmt +-- local lpcarg  = lpeg.Carg + +-- function lpeg.match(l,...) print("LPEG MATCH") lpprint(l) return lpmatch(l,...) end + +-- function lpeg.P    (l) local p = lpp   (l) print("LPEG P =")    lpprint(l) return p end +-- function lpeg.R    (l) local p = lpr   (l) print("LPEG R =")    lpprint(l) return p end +-- function lpeg.S    (l) local p = lps   (l) print("LPEG S =")    lpprint(l) return p end +-- function lpeg.C    (l) local p = lpc   (l) print("LPEG C =")    lpprint(l) return p end +-- function lpeg.B    (l) local p = lpb   (l) print("LPEG B =")    lpprint(l) return p end +-- function lpeg.V    (l) local p = lpv   (l) print("LPEG V =")    lpprint(l) return p end +-- function lpeg.Cf   (l) local p = lpcf  (l) print("LPEG Cf =")   lpprint(l) return p end +-- function lpeg.Cb   (l) local p = lpcb  (l) print("LPEG Cb =")   lpprint(l) return p end +-- function lpeg.Cg   (l) local p = lpcg  (l) print("LPEG Cg =")   lpprint(l) return p end +-- function lpeg.Ct   (l) local p = lpct  (l) print("LPEG Ct =")   lpprint(l) return p end +-- function lpeg.Cs   (l) local p = lpcs  (l) print("LPEG Cs =")   lpprint(l) return p end +-- function lpeg.Cc   (l) local p = lpcc  (l) print("LPEG Cc =")   lpprint(l) return p end +-- function lpeg.Cmt  (l) local p = lpcmt (l) print("LPEG Cmt =")  lpprint(l) return p end +-- function lpeg.Carg (l) local p = lpcarg(l) print("LPEG Carg =") lpprint(l) return p end + +local type, next, tostring = type, next, tostring +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format +----- mod, div = math.mod, math.div +local floor = math.floor + +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt +local lpegtype, lpegmatch, lpegprint = lpeg.type, lpeg.match, lpeg.print + +-- let's start with an inspector: + +setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end) + +-- Beware, we predefine a bunch of patterns here and one reason for doing so +-- is that we get consistent behaviour in some of the visualizers.  lpeg.patterns  = lpeg.patterns or { } -- so that we can share  local patterns = lpeg.patterns -local P, R, S, Ct, C, Cs, Cc, V = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.V -local match = lpeg.match + +local anything         = P(1) +local endofstring      = P(-1) +local alwaysmatched    = P(true) + +patterns.anything      = anything +patterns.endofstring   = endofstring +patterns.beginofstring = alwaysmatched +patterns.alwaysmatched = alwaysmatched  local digit, sign      = R('09'), S('+-')  local cr, lf, crlf     = P("\r"), P("\n"), P("\r\n") -local utf8byte         = R("\128\191") +local newline          = crlf + S("\r\n") -- cr + lf +local escaped          = P("\\") * anything +local squote           = P("'") +local dquote           = P('"') +local space            = P(" ") + +local utfbom_32_be     = P('\000\000\254\255') +local utfbom_32_le     = P('\255\254\000\000') +local utfbom_16_be     = P('\255\254') +local utfbom_16_le     = P('\254\255') +local utfbom_8         = P('\239\187\191') +local utfbom           = utfbom_32_be + utfbom_32_le +                       + utfbom_16_be + utfbom_16_le +                       + utfbom_8 +local utftype          = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le  * Cc("utf-32-le") +                       + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le  * Cc("utf-16-le") +                       + utfbom_8     * Cc("utf-8")     + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfoffset        = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) +                       + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) +                       + utfbom_8     * Cc(3) + Cc(0) + +local utf8next         = R("\128\191") -patterns.utf8byte      = utf8byte  patterns.utf8one       = R("\000\127") -patterns.utf8two       = R("\194\223") * utf8byte -patterns.utf8three     = R("\224\239") * utf8byte * utf8byte -patterns.utf8four      = R("\240\244") * utf8byte * utf8byte * utf8byte +patterns.utf8two       = R("\194\223") * utf8next +patterns.utf8three     = R("\224\239") * utf8next * utf8next +patterns.utf8four      = R("\240\244") * utf8next * utf8next * utf8next +patterns.utfbom        = utfbom +patterns.utftype       = utftype +patterns.utfoffset     = utfoffset + +local utf8char         = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four +local validutf8char    = utf8char^0 * endofstring * Cc(true) + Cc(false) + +local utf8character    = P(1) * R("\128\191")^0 -- unchecked but fast + +patterns.utf8          = utf8char +patterns.utf8char      = utf8char +patterns.utf8character = utf8character -- this one can be used in most cases so we might use that one +patterns.validutf8     = validutf8char +patterns.validutf8char = validutf8char + +local eol              = S("\n\r") +local spacer           = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace       = eol + spacer +local nonspacer        = 1 - spacer +local nonwhitespace    = 1 - whitespace + +patterns.eol           = eol +patterns.spacer        = spacer +patterns.whitespace    = whitespace +patterns.nonspacer     = nonspacer +patterns.nonwhitespace = nonwhitespace + +local stripper         = spacer^0 * C((spacer^0     * nonspacer^1)^0) -- from example by roberto + +----- collapser        = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) +local collapser        = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) + +patterns.stripper      = stripper +patterns.collapser     = collapser  patterns.digit         = digit  patterns.sign          = sign  patterns.cardinal      = sign^0 * digit^1  patterns.integer       = sign^0 * digit^1 -patterns.float         = sign^0 * digit^0 * P('.') * digit^1 +patterns.unsigned      = digit^0 * P('.') * digit^1 +patterns.float         = sign^0 * patterns.unsigned +patterns.cunsigned     = digit^0 * P(',') * digit^1 +patterns.cfloat        = sign^0 * patterns.cunsigned  patterns.number        = patterns.float + patterns.integer +patterns.cnumber       = patterns.cfloat + patterns.integer  patterns.oct           = P("0") * R("07")^1  patterns.octal         = patterns.oct  patterns.HEX           = P("0x") * R("09","AF")^1 @@ -38,55 +161,75 @@ patterns.hexadecimal   = P("0x") * R("09","AF","af")^1  patterns.lowercase     = R("az")  patterns.uppercase     = R("AZ")  patterns.letter        = patterns.lowercase + patterns.uppercase -patterns.space         = S(" ") -patterns.eol           = S("\n\r") -patterns.spacer        = S(" \t\f\v")  -- + string.char(0xc2, 0xa0) if we want utf (cf mail roberto) -patterns.newline       = crlf + cr + lf -patterns.nonspace      = 1 - patterns.space -patterns.nonspacer     = 1 - patterns.spacer -patterns.whitespace    = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace -patterns.utf8          = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four -patterns.utfbom        = P('\000\000\254\255') + P('\255\254\000\000') + P('\255\254') + P('\254\255') + P('\239\187\191') +patterns.space         = space +patterns.tab           = P("\t") +patterns.spaceortab    = patterns.space + patterns.tab +patterns.newline       = newline +patterns.emptyline     = newline^1 +patterns.equal         = P("=") +patterns.comma         = P(",") +patterns.commaspacer   = P(",") * spacer^0 +patterns.period        = P(".") +patterns.colon         = P(":") +patterns.semicolon     = P(";") +patterns.underscore    = P("_") +patterns.escaped       = escaped +patterns.squote        = squote +patterns.dquote        = dquote +patterns.nosquote      = (escaped + (1-squote))^0 +patterns.nodquote      = (escaped + (1-dquote))^0 +patterns.unsingle      = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle +patterns.undouble      = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle +patterns.unquoted      = patterns.undouble + patterns.unsingle -- more often undouble +patterns.unspacer      = ((patterns.spacer^1)/"")^0 -function lpeg.anywhere(pattern) --slightly adapted from website -    return P { P(pattern) + 1 * V(1) } -- why so complex? -end +patterns.singlequoted  = squote * patterns.nosquote * squote +patterns.doublequoted  = dquote * patterns.nodquote * dquote +patterns.quoted        = patterns.doublequoted + patterns.singlequoted -function lpeg.splitter(pattern, action) -    return (((1-P(pattern))^1)/action+1)^0 +patterns.propername    = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) + +patterns.somecontent   = (anything - newline - space)^1 -- (utf8char - newline - space)^1 +patterns.beginline     = #(1-newline) + +patterns.longtostring  = Cs(whitespace^0/"" * nonwhitespace^0 * ((whitespace^0/" " * (patterns.quoted + nonwhitespace)^1)^0)) + +local function anywhere(pattern) --slightly adapted from website +    return P { P(pattern) + 1 * V(1) }  end -local spacing  = patterns.spacer^0 * patterns.newline -- sort of strip -local empty    = spacing * Cc("") -local nonempty = Cs((1-spacing)^1) * spacing^-1 -local content  = (empty + nonempty)^1 +lpeg.anywhere = anywhere -local capture = Ct(content^0) +function lpeg.instringchecker(p) +    p = anywhere(p) +    return function(str) +        return lpegmatch(p,str) and true or false +    end +end -function string:splitlines() -    return match(capture,self) +function lpeg.splitter(pattern, action) +    return (((1-P(pattern))^1)/action+1)^0  end -patterns.textline = content +function lpeg.tsplitter(pattern, action) +    return Ct((((1-P(pattern))^1)/action+1)^0) +end ---~ local p = lpeg.splitat("->",false)  print(match(p,"oeps->what->more"))  -- oeps what more ---~ local p = lpeg.splitat("->",true)   print(match(p,"oeps->what->more"))  -- oeps what->more ---~ local p = lpeg.splitat("->",false)  print(match(p,"oeps"))              -- oeps ---~ local p = lpeg.splitat("->",true)   print(match(p,"oeps"))              -- oeps +-- probleem: separator can be lpeg and that does not hash too well, but +-- it's quite okay as the key is then not garbage collected -local splitters_s, splitters_m = { }, { } +local splitters_s, splitters_m, splitters_t = { }, { }, { }  local function splitat(separator,single)      local splitter = (single and splitters_s[separator]) or splitters_m[separator]      if not splitter then          separator = P(separator) +        local other = C((1 - separator)^0)          if single then -            local other, any = C((1 - separator)^0), P(1) +            local any = anything              splitter = other * (separator * C(any^0) + "") -- ?              splitters_s[separator] = splitter          else -            local other = C((1 - separator)^0)              splitter = other * (separator * other)^0              splitters_m[separator] = splitter          end @@ -94,29 +237,70 @@ local function splitat(separator,single)      return splitter  end -lpeg.splitat = splitat +local function tsplitat(separator) +    local splitter = splitters_t[separator] +    if not splitter then +        splitter = Ct(splitat(separator)) +        splitters_t[separator] = splitter +    end +    return splitter +end + +lpeg.splitat  = splitat +lpeg.tsplitat = tsplitat + +function string.splitup(str,separator) +    if not separator then +        separator = "," +    end +    return lpegmatch(splitters_m[separator] or splitat(separator),str) +end + +-- local p = splitat("->",false)  print(lpegmatch(p,"oeps->what->more"))  -- oeps what more +-- local p = splitat("->",true)   print(lpegmatch(p,"oeps->what->more"))  -- oeps what->more +-- local p = splitat("->",false)  print(lpegmatch(p,"oeps"))              -- oeps +-- local p = splitat("->",true)   print(lpegmatch(p,"oeps"))              -- oeps  local cache = { }  function lpeg.split(separator,str)      local c = cache[separator]      if not c then -        c = Ct(splitat(separator)) +        c = tsplitat(separator)          cache[separator] = c      end -    return match(c,str) +    return lpegmatch(c,str)  end -function string:split(separator) -    local c = cache[separator] -    if not c then -        c = Ct(splitat(separator)) -        cache[separator] = c +function string.split(str,separator) +    if separator then +        local c = cache[separator] +        if not c then +            c = tsplitat(separator) +            cache[separator] = c +        end +        return lpegmatch(c,str) +    else +        return { str }      end -    return match(c,self)  end -lpeg.splitters = cache +local spacing  = patterns.spacer^0 * newline -- sort of strip +local empty    = spacing * Cc("") +local nonempty = Cs((1-spacing)^1) * spacing^-1 +local content  = (empty + nonempty)^1 + +patterns.textline = content + +local linesplitter = tsplitat(newline) + +patterns.linesplitter = linesplitter + +function string.splitlines(str) +    return lpegmatch(linesplitter,str) +end + +--~ lpeg.splitters = cache -- no longer public  local cache = { } @@ -124,42 +308,545 @@ function lpeg.checkedsplit(separator,str)      local c = cache[separator]      if not c then          separator = P(separator) -        local other = C((1 - separator)^0) +        local other = C((1 - separator)^1)          c = Ct(separator^0 * other * (separator^1 * other)^0)          cache[separator] = c      end -    return match(c,str) +    return lpegmatch(c,str)  end -function string:checkedsplit(separator) +function string.checkedsplit(str,separator)      local c = cache[separator]      if not c then          separator = P(separator) -        local other = C((1 - separator)^0) +        local other = C((1 - separator)^1)          c = Ct(separator^0 * other * (separator^1 * other)^0)          cache[separator] = c      end -    return match(c,self) +    return lpegmatch(c,str) +end + +-- from roberto's site: + +local function f2(s) local c1, c2         = byte(s,1,2) return   c1 * 64 + c2                       -    12416 end +local function f3(s) local c1, c2, c3     = byte(s,1,3) return  (c1 * 64 + c2) * 64 + c3            -   925824 end +local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end + +local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 + +patterns.utf8byte = utf8byte + +--~ local str = " a b c d " + +--~ local s = lpeg.stripper(lpeg.R("az"))   print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper(lpeg.R("az"))     print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.stripper("ab")           print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper("ab")             print("["..lpegmatch(s,str).."]") + +local cache = { } + +function lpeg.stripper(str) +    if type(str) == "string" then +        local s = cache[str] +        if not s then +            s = Cs(((S(str)^1)/"" + 1)^0) +            cache[str] = s +        end +        return s +    else +        return Cs(((str^1)/"" + 1)^0) +    end +end + +local cache = { } + +function lpeg.keeper(str) +    if type(str) == "string" then +        local s = cache[str] +        if not s then +            s = Cs((((1-S(str))^1)/"" + 1)^0) +            cache[str] = s +        end +        return s +    else +        return Cs((((1-str)^1)/"" + 1)^0) +    end +end + +function lpeg.frontstripper(str) -- or pattern (yet undocumented) +    return (P(str) + P(true)) * Cs(anything^0) +end + +function lpeg.endstripper(str) -- or pattern (yet undocumented) +    return Cs((1 - P(str) * endofstring)^0) +end + +-- Just for fun I looked at the used bytecode and +-- p = (p and p + pp) or pp gets one more (testset). + +-- todo: cache when string + +function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys +    local pattern +    local u = isutf and utf8char or 1 +    if type(one) == "table" then +        local no = #one +        local p = P(false) +        if no == 0 then +            for k, v in next, one do +                p = p + P(k) / v +            end +            pattern = Cs((p + u)^0) +        elseif no == 1 then +            local o = one[1] +            one, two = P(o[1]), o[2] +         -- pattern = Cs(((1-one)^1 + one/two)^0) +            pattern = Cs((one/two + u)^0) +        else +            for i=1,no do +                local o = one[i] +                p = p + P(o[1]) / o[2] +            end +            pattern = Cs((p + u)^0) +        end +    else +        pattern = Cs((P(one)/(two or "") + u)^0) +    end +    if makefunction then +        return function(str) +            return lpegmatch(pattern,str) +        end +    else +        return pattern +    end +end + +function lpeg.finder(lst,makefunction) +    local pattern +    if type(lst) == "table" then +        pattern = P(false) +        if #lst == 0 then +            for k, v in next, lst do +                pattern = pattern + P(k) -- ignore key, so we can use a replacer table +            end +        else +            for i=1,#lst do +                pattern = pattern + P(lst[i]) +            end +        end +    else +        pattern = P(lst) +    end +    pattern = (1-pattern)^0 * pattern +    if makefunction then +        return function(str) +            return lpegmatch(pattern,str) +        end +    else +        return pattern +    end +end + +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) + +local splitters_f, splitters_s = { }, { } + +function lpeg.firstofsplit(separator) -- always return value +    local splitter = splitters_f[separator] +    if not splitter then +        separator = P(separator) +        splitter = C((1 - separator)^0) +        splitters_f[separator] = splitter +    end +    return splitter +end + +function lpeg.secondofsplit(separator) -- nil if not split +    local splitter = splitters_s[separator] +    if not splitter then +        separator = P(separator) +        splitter = (1 - separator)^0 * separator * C(anything^0) +        splitters_s[separator] = splitter +    end +    return splitter +end + +function lpeg.balancer(left,right) +    left, right = P(left), P(right) +    return P { left * ((1 - left - right) + V(1))^0 * right } +end + +-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) + +-- -- slower: +-- +-- function lpeg.counter(pattern) +--     local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end  + lpeg.anything)^0 +--     return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end +-- end + +local nany = utf8char/"" + +function lpeg.counter(pattern) +    pattern = Cs((P(pattern)/" " + nany)^0) +    return function(str) +        return #lpegmatch(pattern,str) +    end +end + +-- utf extensies + +utf = utf or (unicode and unicode.utf8) or { } + +local utfcharacters = utf and utf.characters or string.utfcharacters +local utfgmatch     = utf and utf.gmatch +local utfchar       = utf and utf.char + +lpeg.UP = lpeg.P + +if utfcharacters then + +    function lpeg.US(str) +        local p = P(false) +        for uc in utfcharacters(str) do +            p = p + P(uc) +        end +        return p +    end + + +elseif utfgmatch then + +    function lpeg.US(str) +        local p = P(false) +        for uc in utfgmatch(str,".") do +            p = p + P(uc) +        end +        return p +    end + +else + +    function lpeg.US(str) +        local p = P(false) +        local f = function(uc) +            p = p + P(uc) +        end +        lpegmatch((utf8char/f)^0,str) +        return p +    end + +end + +local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture + +function lpeg.UR(str,more) +    local first, last +    if type(str) == "number" then +        first = str +        last = more or first +    else +        first, last = lpegmatch(range,str) +        if not last then +            return P(str) +        end +    end +    if first == last then +        return P(str) +    elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium +        local p = P(false) +        for i=first,last do +            p = p + P(utfchar(i)) +        end +        return p -- nil when invalid range +    else +        local f = function(b) +            return b >= first and b <= last +        end +        -- tricky, these nested captures +        return utf8byte / f -- nil when invalid range +    end  end ---~ function lpeg.append(list,pp) ---~     local p = pp ---~     for l=1,#list do ---~         if p then ---~             p = p + P(list[l]) ---~         else ---~             p = P(list[l]) ---~         end ---~     end ---~     return p ---~ end +-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) + +-- lpeg.print(lpeg.R("ab","cd","gh")) +-- lpeg.print(lpeg.P("a","b","c")) +-- lpeg.print(lpeg.S("a","b","c")) + +-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +-- print(lpeg.count("äáàa",lpeg.UP("áà"))) +-- print(lpeg.count("äáàa",lpeg.US("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR("aá"))) +-- print(lpeg.count("äáàa",lpeg.UR("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) + +function lpeg.is_lpeg(p) +    return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order +    if type(list) ~= "table" then +        list = { list, ... } +    end + -- table.sort(list) -- longest match first +    local p = P(list[1]) +    for l=2,#list do +        p = p + P(list[l]) +    end +    return p +end + +-- For the moment here, but it might move to utilities. Beware, we need to +-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we +-- loop back from the end cq. prepend. + +local sort = table.sort + +local function copyindexed(old) +    local new = { } +    for i=1,#old do +        new[i] = old +    end +    return new +end + +local function sortedkeys(tab) +    local keys, s = { }, 0 +    for key,_ in next, tab do +        s = s + 1 +        keys[s] = key +    end +    sort(keys) +    return keys +end + +function lpeg.append(list,pp,delayed,checked) +    local p = pp +    if #list > 0 then +        local keys = copyindexed(list) +        sort(keys) +        for i=#keys,1,-1 do +            local k = keys[i] +            if p then +                p = P(k) + p +            else +                p = P(k) +            end +        end +    elseif delayed then -- hm, it looks like the lpeg parser resolves anyway +        local keys = sortedkeys(list) +        if p then +            for i=1,#keys,1 do +                local k = keys[i] +                local v = list[k] +                p = P(k)/list + p +            end +        else +            for i=1,#keys do +                local k = keys[i] +                local v = list[k] +                if p then +                    p = P(k) + p +                else +                    p = P(k) +                end +            end +            if p then +                p = p / list +            end +        end +    elseif checked then +        -- problem: substitution gives a capture +        local keys = sortedkeys(list) +        for i=1,#keys do +            local k = keys[i] +            local v = list[k] +            if p then +                if k == v then +                    p = P(k) + p +                else +                    p = P(k)/v + p +                end +            else +                if k == v then +                    p = P(k) +                else +                    p = P(k)/v +                end +            end +        end +    else +        local keys = sortedkeys(list) +        for i=1,#keys do +            local k = keys[i] +            local v = list[k] +            if p then +                p = P(k)/v + p +            else +                p = P(k)/v +            end +        end +    end +    return p +end + +-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) +-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) + +-- function lpeg.exact_match(words,case_insensitive) +--     local pattern = concat(words) +--     if case_insensitive then +--         local pattern = S(upper(characters)) + S(lower(characters)) +--         local list = { } +--         for i=1,#words do +--             list[lower(words[i])] = true +--         end +--         return Cmt(pattern^1, function(_,i,s) +--             return list[lower(s)] and i +--         end) +--     else +--         local pattern = S(concat(words)) +--         local list = { } +--         for i=1,#words do +--             list[words[i]] = true +--         end +--         return Cmt(pattern^1, function(_,i,s) +--             return list[s] and i +--         end) +--     end +-- end + +-- experiment: + +local function make(t) +    local p +    local keys = sortedkeys(t) +    for i=1,#keys do +        local k = keys[i] +        local v = t[k] +        if not p then +            if next(v) then +                p = P(k) * make(v) +            else +                p = P(k) +            end +        else +            if next(v) then +                p = p + P(k) * make(v) +            else +                p = p + P(k) +            end +        end +    end +    return p +end + +function lpeg.utfchartabletopattern(list) -- goes to util-lpg +    local tree = { } +    for i=1,#list do +        local t = tree +        for c in gmatch(list[i],".") do +            if not t[c] then +                t[c] = { } +            end +            t = t[c] +        end +    end +    return make(tree) +end + +-- inspect ( lpeg.utfchartabletopattern { +--     utfchar(0x00A0), -- nbsp +--     utfchar(0x2000), -- enquad +--     utfchar(0x2001), -- emquad +--     utfchar(0x2002), -- enspace +--     utfchar(0x2003), -- emspace +--     utfchar(0x2004), -- threeperemspace +--     utfchar(0x2005), -- fourperemspace +--     utfchar(0x2006), -- sixperemspace +--     utfchar(0x2007), -- figurespace +--     utfchar(0x2008), -- punctuationspace +--     utfchar(0x2009), -- breakablethinspace +--     utfchar(0x200A), -- hairspace +--     utfchar(0x200B), -- zerowidthspace +--     utfchar(0x202F), -- narrownobreakspace +--     utfchar(0x205F), -- math thinspace +-- } ) + +-- a few handy ones: +-- +-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3 + +patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol + +-- The next pattern^n variant is based on an approach suggested +-- by Roberto: constructing a big repetition in chunks. +-- +-- Being sparse is not needed, and only complicate matters and +-- the number of redundant entries is not that large. + +local function nextstep(n,step,result) +    local m = n % step      -- mod(n,step) +    local d = floor(n/step) -- div(n,step) +    if d > 0 then +        local v = V(tostring(step)) +        local s = result.start +        for i=1,d do +            if s then +                s = v * s +            else +                s = v +            end +        end +        result.start = s +    end +    if step > 1 and result.start then +        local v = V(tostring(step/2)) +        result[tostring(step)] = v * v +    end +    if step > 0 then +        return nextstep(m,step/2,result) +    else +        return result +    end +end + +function lpeg.times(pattern,n) +    return P(nextstep(n,2^16,{ "start", ["1"] = pattern })) +end + +-- local p = lpeg.Cs((1 - lpeg.times(lpeg.P("AB"),25))^1) +-- local s = "12" .. string.rep("AB",20) .. "34" .. string.rep("AB",30) .. "56" +-- inspect(p) +-- print(lpeg.match(p,s)) + +-- moved here (before util-str) ---~ from roberto's site: +local digit         = R("09") +local period        = P(".") +local zero          = P("0") +local trailingzeros = zero^0 * -digit -- suggested by Roberto R +local case_1        = period * trailingzeros / "" +local case_2        = period * (digit - trailingzeros)^1 * (trailingzeros / "") +local number        = digit^1 * (case_1 + case_2) +local stripper      = Cs((number + 1)^0) -local f1 = string.byte +lpeg.patterns.stripzeros = stripper -local function f2(s) local c1, c2         = f1(s,1,2) return   c1 * 64 + c2                       -    12416 end -local function f3(s) local c1, c2, c3     = f1(s,1,3) return  (c1 * 64 + c2) * 64 + c3            -   925824 end -local function f4(s) local c1, c2, c3, c4 = f1(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end +-- local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100" +-- collectgarbage("collect") +-- str = string.rep(sample,10000) +-- local ts = os.clock() +-- lpegmatch(stripper,str) +-- print(#str, os.clock()-ts, lpegmatch(stripper,sample)) -patterns.utf8byte = patterns.utf8one/f1 + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 diff --git a/lualibs-lua.lua b/lualibs-lua.lua new file mode 100644 index 0000000..538c65d --- /dev/null +++ b/lualibs-lua.lua @@ -0,0 +1,393 @@ +if not modules then modules = { } end modules ['l-lua'] = { +    version   = 1.001, +    comment   = "companion to luat-lib.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +-- compatibility hacks ... try to avoid usage + +local major, minor = string.match(_VERSION,"^[^%d]+(%d+)%.(%d+).*$") + +_MAJORVERSION = tonumber(major) or 5 +_MINORVERSION = tonumber(minor) or 1 +_LUAVERSION   = _MAJORVERSION + _MINORVERSION/10 + +-- lpeg + +if not lpeg then +    lpeg = require("lpeg") +end + +-- basics: + +if loadstring then + +    local loadnormal = load + +    function load(first,...) +        if type(first) == "string" then +            return loadstring(first,...) +        else +            return loadnormal(first,...) +        end +    end + +else + +    loadstring = load + +end + +-- table: + +-- At some point it was announced that i[pairs would be dropped, which makes +-- sense. As we already used the for loop and # in most places the impact on +-- ConTeXt was not that large; the remaining ipairs already have been replaced. +-- Hm, actually ipairs was retained, but we no longer use it anyway (nor +-- pairs). +-- +-- Just in case, we provide the fallbacks as discussed in Programming +-- in Lua (http://www.lua.org/pil/7.3.html): + +if not ipairs then + +    -- for k, v in ipairs(t) do                ... end +    -- for k=1,#t            do local v = t[k] ... end + +    local function iterate(a,i) +        i = i + 1 +        local v = a[i] +        if v ~= nil then +            return i, v --, nil +        end +    end + +    function ipairs(a) +        return iterate, a, 0 +    end + +end + +if not pairs then + +    -- for k, v in pairs(t) do ... end +    -- for k, v in next, t  do ... end + +    function pairs(t) +        return next, t -- , nil +    end + +end + +-- The unpack function has been moved to the table table, and for compatiility +-- reasons we provide both now. + +if not table.unpack then + +    table.unpack = _G.unpack + +elseif not unpack then + +    _G.unpack = table.unpack + +end + +-- package: + +-- if not package.seachers then +-- +--     package.searchers = package.loaders -- 5.2 +-- +-- elseif not package.loaders then +-- +--     package.loaders = package.searchers +-- +-- end + +if not package.loaders then -- brr, searchers is a special "loadlib function" userdata type + +    package.loaders = package.searchers + +end + +-- moved from util-deb to here: + +local print, select, tostring = print, select, tostring + +local inspectors = { } + +function setinspector(inspector) -- global function +    inspectors[#inspectors+1] = inspector +end + +function inspect(...) -- global function +    for s=1,select("#",...) do +        local value = select(s,...) +        local done = false +        for i=1,#inspectors do +            done = inspectors[i](value) +            if done then +                break +            end +        end +        if not done then +            print(tostring(value)) +        end +    end +end + +-- + +local dummy = function() end + +function optionalrequire(...) +    local ok, result = xpcall(require,dummy,...) +    if ok then +        return result +    end +end + +-- Code moved from data-lua and changed into a plug-in. + +-- We overload the regular loader. We do so because we operate mostly in +-- tds and use our own loader code. Alternatively we could use a more +-- extensive definition of package.path and package.cpath but even then +-- we're not done. Also, we now have better tracing. +-- +-- -- local mylib = require("libtest") +-- -- local mysql = require("luasql.mysql") + +local type = type +local gsub, format = string.gsub, string.format + +local package    = package +local searchers  = package.searchers or package.loaders + +local libpaths   = nil +local clibpaths  = nil +local libhash    = { } +local clibhash   = { } +local libextras  = { } +local clibextras = { } + +-- dummies + +local filejoin   = file and file.join        or function(path,name)   return path .. "/" .. name end +local isreadable = file and file.is_readable or function(name)        local f = io.open(name) if f then f:close() return true end end +local addsuffix  = file and file.addsuffix   or function(name,suffix) return name .. "." .. suffix end + +-- + +local function cleanpath(path) -- hm, don't we have a helper for this? +    return path +end + +local helpers    = package.helpers or { +    libpaths  = function() return { } end, +    clibpaths = function() return { } end, +    cleanpath = cleanpath, +    trace     = false, +    report    = function(...) print(format(...)) end, +} +package.helpers  = helpers + +local function getlibpaths() +    return libpaths or helpers.libpaths(libhash) +end + +local function getclibpaths() +    return clibpaths or helpers.clibpaths(clibhash) +end + +package.libpaths  = getlibpaths +package.clibpaths = getclibpaths + +local function addpath(what,paths,extras,hash,...) +    local pathlist  = { ... } +    local cleanpath = helpers.cleanpath +    local trace     = helpers.trace +    local report    = helpers.report +    -- +    local function add(path) +        local path = cleanpath(path) +        if not hash[path] then +            if trace then +                report("extra %s path: %s",what,path) +            end +            paths [#paths +1] = path +            extras[#extras+1] = path +        end +    end +    -- +    for p=1,#pathlist do +        local path = pathlist[p] +        if type(path) == "table" then +            for i=1,#path do +                add(path[i]) +            end +        else +            add(path) +        end +    end +    return paths, extras +end + +function package.extralibpath(...) +     libpaths,  libextras = addpath("lua", getlibpaths(), libextras, libhash,...) +end + +function package.extraclibpath(...) +    clibpaths, clibextras = addpath("lib",getclibpaths(),clibextras,clibhash,...) +end + +-- function package.extralibpath(...) +--     libpaths  = getlibpaths() +--     local pathlist  = { ... } +--     local cleanpath = helpers.cleanpath +--     local trace     = helpers.trace +--     local report    = helpers.report +--     -- +--     local function add(path) +--         local path = cleanpath(path) +--         if not libhash[path] then +--             if trace then +--                 report("extra lua path: %s",path) +--             end +--             libextras[#libextras+1] = path +--             libpaths [#libpaths +1] = path +--         end +--     end +--     -- +--     for p=1,#pathlist do +--         local path = pathlist[p] +--         if type(path) == "table" then +--             for i=1,#path do +--                 add(path[i]) +--             end +--         else +--             add(path) +--         end +--     end +-- end + +-- function package.extraclibpath(...) +--     clibpaths = getclibpaths() +--     local pathlist  = { ... } +--     local cleanpath = helpers.cleanpath +--     local trace     = helpers.trace +--     local report    = helpers.report +--     -- +--     local function add(path) +--         local path = cleanpath(path) +--         if not clibhash[path] then +--             if trace then +--                 report("extra lib path: %s",path) +--             end +--             clibextras[#clibextras+1] = path +--             clibpaths [#clibpaths +1] = path +--         end +--     end +--     -- +--     for p=1,#pathlist do +--         local path = pathlist[p] +--         if type(path) == "table" then +--             for i=1,#path do +--                 add(path[i]) +--             end +--         else +--             add(path) +--         end +--     end +-- end + +if not searchers[-2] then +    -- use package-path and package-cpath +    searchers[-2] = searchers[2] +end + +searchers[2] = function(name) +    return helpers.loaded(name) +end + +searchers[3] = nil -- get rid of the built in one + +local function loadedaslib(resolved,rawname) + -- local init = "luaopen_" .. string.match(rawname,".-([^%.]+)$") +    local init = "luaopen_"..gsub(rawname,"%.","_") +    if helpers.trace then +        helpers.report("calling loadlib with '%s' with init '%s'",resolved,init) +    end +    return package.loadlib(resolved,init) +end + +local function loadedbylua(name) +    if helpers.trace then +        helpers.report("locating '%s' using normal loader",name) +    end +    return true, searchers[-2](name) -- the original +end + +local function loadedbypath(name,rawname,paths,islib,what) +    local trace  = helpers.trace +    local report = helpers.report +    if trace then +        report("locating '%s' as '%s' on '%s' paths",rawname,name,what) +    end +    for p=1,#paths do +        local path = paths[p] +        local resolved = filejoin(path,name) +        if trace then -- mode detail +            report("checking for '%s' using '%s' path '%s'",name,what,path) +        end +        if isreadable(resolved) then +            if trace then +                report("lib '%s' located on '%s'",name,resolved) +            end +            if islib then +                return true, loadedaslib(resolved,rawname) +            else +                return true, loadfile(resolved) +            end +        end +    end +end + +local function notloaded(name) +    if helpers.trace then +        helpers.report("? unable to locate library '%s'",name) +    end +end + +helpers.loadedaslib  = loadedaslib +helpers.loadedbylua  = loadedbylua +helpers.loadedbypath = loadedbypath +helpers.notloaded    = notloaded + +-- alternatively we could set the package.searchers + +function helpers.loaded(name) +    local thename   = gsub(name,"%.","/") +    local luaname   = addsuffix(thename,"lua") +    local libname   = addsuffix(thename,os.libsuffix or "so") -- brrr +    local libpaths  = getlibpaths() +    local clibpaths = getclibpaths() +    local done, result = loadedbypath(luaname,name,libpaths,false,"lua") +    if done then +        return result +    end +    local done, result = loadedbypath(luaname,name,clibpaths,false,"lua") +    if done then +        return result +    end +    local done, result = loadedbypath(libname,name,clibpaths,true,"lib") +    if done then +        return result +    end +    local done, result = loadedbylua(name) +    if done then +        return result +    end +    return notloaded(name) +end diff --git a/lualibs-math.lua b/lualibs-math.lua index fc8db47..43f60b5 100644 --- a/lualibs-math.lua +++ b/lualibs-math.lua @@ -9,33 +9,26 @@ if not modules then modules = { } end modules ['l-math'] = {  local floor, sin, cos, tan = math.floor, math.sin, math.cos, math.tan  if not math.round then -    function math.round(x) -        return floor(x + 0.5) -    end +    function math.round(x) return floor(x + 0.5) end  end  if not math.div then -    function math.div(n,m) -        return floor(n/m) -    end +    function math.div(n,m) return floor(n/m) end  end  if not math.mod then -    function math.mod(n,m) -        return n % m -    end +    function math.mod(n,m) return n % m end  end  local pipi = 2*math.pi/360 -function math.sind(d) -    return sin(d*pipi) +if not math.sind then +    function math.sind(d) return sin(d*pipi) end +    function math.cosd(d) return cos(d*pipi) end +    function math.tand(d) return tan(d*pipi) end  end -function math.cosd(d) -    return cos(d*pipi) -end - -function math.tand(d) -    return tan(d*pipi) +if not math.odd then +    function math.odd (n) return n % 2 ~= 0 end +    function math.even(n) return n % 2 == 0 end  end diff --git a/lualibs-md5.lua b/lualibs-md5.lua index 27955ef..8ac20a5 100644 --- a/lualibs-md5.lua +++ b/lualibs-md5.lua @@ -7,40 +7,85 @@ if not modules then modules = { } end modules ['l-md5'] = {  -- This also provides file checksums and checkers. +if not md5 then +    md5 = optionalrequire("md5") +end + +if not md5 then +    md5 = { +        sum     = function(str) print("error: md5 is not loaded (sum     ignored)") return str end, +        sumhexa = function(str) print("error: md5 is not loaded (sumhexa ignored)") return str end, +    } +end + +local md5, file = md5, file  local gsub, format, byte = string.gsub, string.format, string.byte +local md5sum = md5.sum  local function convert(str,fmt) -    return (gsub(md5.sum(str),".",function(chr) return format(fmt,byte(chr)) end)) +    return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end))  end  if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end  if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end  if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end ---~ if not md5.HEX then ---~     local function remap(chr) return format("%02X",byte(chr)) end ---~     function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end ---~ end ---~ if not md5.hex then ---~     local function remap(chr) return format("%02x",byte(chr)) end ---~     function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end ---~ end ---~ if not md5.dec then ---~     local function remap(chr) return format("%03i",byte(chr)) end ---~     function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end ---~ end +-- local P, Cs, lpegmatch = lpeg.P, lpeg.Cs,lpeg.match +-- +-- if not md5.HEX then +--     local function remap(chr) return format("%02X",byte(chr)) end +--     function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end +-- end +-- +-- if not md5.hex then +--     local function remap(chr) return format("%02x",byte(chr)) end +--     function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end +-- end +-- +-- if not md5.dec then +--     local function remap(chr) return format("%03i",byte(chr)) end +--     function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end +-- end -file.needs_updating_threshold = 1 +-- if not md5.HEX then +--     local pattern_HEX = Cs( ( P(1) / function(chr) return format("%02X",byte(chr)) end)^0 ) +--     function md5.HEX(str) return lpegmatch(pattern_HEX,md5.sum(str)) end +-- end +-- +-- if not md5.hex then +--     local pattern_hex = Cs( ( P(1) / function(chr) return format("%02x",byte(chr)) end)^0 ) +--     function md5.hex(str) return lpegmatch(pattern_hex,md5.sum(str)) end +-- end +-- +-- if not md5.dec then +--     local pattern_dec = Cs( ( P(1) / function(chr) return format("%02i",byte(chr)) end)^0 ) +--     function md5.dec(str) return lpegmatch(pattern_dec,md5.sum(str)) end +-- end -function file.needs_updating(oldname,newname) -- size modification access change -    local oldtime = lfs.attributes(oldname, modification) -    local newtime = lfs.attributes(newname, modification) -    if newtime >= oldtime then -        return false -    elseif oldtime - newtime < file.needs_updating_threshold then -        return false +function file.needsupdating(oldname,newname,threshold) -- size modification access change +    local oldtime = lfs.attributes(oldname,"modification") +    if oldtime then +        local newtime = lfs.attributes(newname,"modification") +        if not newtime then +            return true -- no new file, so no updating needed +        elseif newtime >= oldtime then +            return false -- new file definitely needs updating +        elseif oldtime - newtime < (threshold or 1) then +            return false -- new file is probably still okay +        else +            return true -- new file has to be updated +        end      else -        return true +        return false -- no old file, so no updating needed +    end +end + +file.needs_updating = file.needsupdating + +function file.syncmtimes(oldname,newname) +    local oldtime = lfs.attributes(oldname,"modification") +    if oldtime and lfs.isfile(newname) then +        lfs.touch(newname,oldtime,oldtime)      end  end @@ -62,7 +107,7 @@ function file.loadchecksum(name)      return nil  end -function file.savechecksum(name, checksum) +function file.savechecksum(name,checksum)      if not checksum then checksum = file.checksum(name) end      if checksum then          io.savedata(name .. ".md5",checksum) diff --git a/lualibs-number.lua b/lualibs-number.lua index a1249f0..001ca31 100644 --- a/lualibs-number.lua +++ b/lualibs-number.lua @@ -6,16 +6,140 @@ if not modules then modules = { } end modules ['l-number'] = {      license   = "see context related readme files"  } -local tostring = tostring -local format, floor, insert, match = string.format, math.floor, table.insert, string.match +-- this module will be replaced when we have the bit library .. the number based sets +-- might go away + +local tostring, tonumber = tostring, tonumber +local format, floor, match, rep = string.format, math.floor, string.match, string.rep +local concat, insert = table.concat, table.insert  local lpegmatch = lpeg.match -number = number or { } +number       = number or { } +local number = number --- a,b,c,d,e,f = number.toset(100101) +if bit32 then -- I wonder if this is faster -function number.toset(n) -    return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +    local btest, bor = bit32.btest, bit32.bor + +    function number.bit(p) +        return 2 ^ (p - 1) -- 1-based indexing +    end + +    number.hasbit = btest +    number.setbit = bor + +    function number.setbit(x,p) -- why not bor? +        return btest(x,p) and x or x + p +    end + +    function number.clearbit(x,p) +        return btest(x,p) and x - p or x +    end + +else + +    -- http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html + +    function number.bit(p) +        return 2 ^ (p - 1) -- 1-based indexing +    end + +    function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... +        return x % (p + p) >= p +    end + +    function number.setbit(x, p) +        return (x % (p + p) >= p) and x or x + p +    end + +    function number.clearbit(x, p) +        return (x % (p + p) >= p) and x - p or x +    end + +end + +-- print(number.tobitstring(8)) +-- print(number.tobitstring(14)) +-- print(number.tobitstring(66)) +-- print(number.tobitstring(0x00)) +-- print(number.tobitstring(0xFF)) +-- print(number.tobitstring(46260767936,4)) + +if bit32 then + +    local bextract = bit32.extract + +    local t = { +        "0", "0", "0", "0", "0", "0", "0", "0", +        "0", "0", "0", "0", "0", "0", "0", "0", +        "0", "0", "0", "0", "0", "0", "0", "0", +        "0", "0", "0", "0", "0", "0", "0", "0", +    } + +    function number.tobitstring(b,m) +        -- if really needed we can speed this one up +        -- because small numbers need less extraction +        local n = 32 +        for i=0,31 do +            local v = bextract(b,i) +            local k = 32 - i +            if v == 1 then +                n = k +                t[k] = "1" +            else +                t[k] = "0" +            end +        end +        if m then +            m = 33 - m * 8 +            if m < 1 then +                m = 1 +            end +            return concat(t,"",m) +        elseif n < 8 then +            return concat(t) +        elseif n < 16 then +            return concat(t,"",9) +        elseif n < 24 then +            return concat(t,"",17) +        else +            return concat(t,"",25) +        end +    end + +else + +    function number.tobitstring(n,m) +        if n > 0 then +            local t = { } +            while n > 0 do +                insert(t,1,n % 2 > 0 and 1 or 0) +                n = floor(n/2) +            end +            local nn = 8 - #t % 8 +            if nn > 0 and nn < 8 then +                for i=1,nn do +                    insert(t,1,0) +                end +            end +            if m then +                m = m * 8 - #t +                if m > 0 then +                    insert(t,1,rep("0",m)) +                end +            end +            return concat(t) +        elseif m then +            rep("00000000",m) +        else +            return "00000000" +        end +    end + +end + +function number.valid(str,default) +    return tonumber(str) or default or nil  end  function number.toevenhex(n) @@ -27,32 +151,57 @@ function number.toevenhex(n)      end  end --- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% --- on +-- a,b,c,d,e,f = number.toset(100101) +-- +-- function number.toset(n) +--     return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +-- end +-- +-- -- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% +-- -- on  --  -- for i=1,1000000 do  --     local a,b,c,d,e,f,g,h = number.toset(12345678)  --     local a,b,c,d         = number.toset(1234)  --     local a,b,c           = number.toset(123) +--     local a,b,c           = number.toset("123")  -- end --- --- of course dedicated "(.)(.)(.)(.)" matches are even faster -local one = lpeg.C(1-lpeg.S(''))^1 +local one = lpeg.C(1-lpeg.S('')/tonumber)^1  function number.toset(n)      return lpegmatch(one,tostring(n))  end -function number.bits(n,zero) -    local t, i = { }, (zero and 0) or 1 -    while n > 0 do +-- function number.bits(n,zero) +--     local t, i = { }, (zero and 0) or 1 +--     while n > 0 do +--         local m = n % 2 +--         if m > 0 then +--             insert(t,1,i) +--         end +--         n = floor(n/2) +--         i = i + 1 +--     end +--     return t +-- end +-- +-- -- a bit faster + +local function bits(n,i,...) +    if n > 0 then          local m = n % 2 +        local n = floor(n/2)          if m > 0 then -            insert(t,1,i) +            return bits(n, i+1, i, ...) +        else +            return bits(n, i+1,    ...)          end -        n = floor(n/2) -        i = i + 1 +    else +        return ...      end -    return t +end + +function number.bits(n) +    return { bits(n,1) }  end diff --git a/lualibs-os.lua b/lualibs-os.lua index fba2cd3..42f3e48 100644 --- a/lualibs-os.lua +++ b/lualibs-os.lua @@ -6,32 +6,130 @@ if not modules then modules = { } end modules ['l-os'] = {      license   = "see context related readme files"  } +-- This file deals with some operating system issues. Please don't bother me +-- with the pros and cons of operating systems as they all have their flaws +-- and benefits. Bashing one of them won't help solving problems and fixing +-- bugs faster and is a waste of time and energy. +-- +-- path separators: / or \ ... we can use / everywhere +-- suffixes       : dll so exe <none> ... no big deal +-- quotes         : we can use "" in most cases +-- expansion      : unless "" are used * might give side effects +-- piping/threads : somewhat different for each os +-- locations      : specific user file locations and settings can change over time +-- +-- os.type     : windows | unix (new, we already guessed os.platform) +-- os.name     : windows | msdos | linux | macosx | solaris | .. | generic (new) +-- os.platform : extended os.name with architecture + +-- os.sleep() => socket.sleep() +-- math.randomseed(tonumber(string.sub(string.reverse(tostring(math.floor(socket.gettime()*10000))),1,6))) +  -- maybe build io.flush in os.execute -local find, format, gsub = string.find, string.format, string.gsub -local random, ceil = math.random, math.ceil +local os = os +local date, time = os.date, os.time +local find, format, gsub, upper, gmatch = string.find, string.format, string.gsub, string.upper, string.gmatch +local concat = table.concat +local random, ceil, randomseed = math.random, math.ceil, math.randomseed +local rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring = rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring + +-- The following code permits traversing the environment table, at least +-- in luatex. Internally all environment names are uppercase. + +-- The randomseed in Lua is not that random, although this depends on the operating system as well +-- as the binary (Luatex is normally okay). But to be sure we set the seed anyway. + +math.initialseed = tonumber(string.sub(string.reverse(tostring(ceil(socket and socket.gettime()*10000 or time()))),1,6)) + +randomseed(math.initialseed) + +if not os.__getenv__ then + +    os.__getenv__ = os.getenv +    os.__setenv__ = os.setenv + +    if os.env then + +        local osgetenv  = os.getenv +        local ossetenv  = os.setenv +        local osenv     = os.env      local _ = osenv.PATH -- initialize the table + +        function os.setenv(k,v) +            if v == nil then +                v = "" +            end +            local K = upper(k) +            osenv[K] = v +            if type(v) == "table" then +                v = concat(v,";") -- path +            end +            ossetenv(K,v) +        end + +        function os.getenv(k) +            local K = upper(k) +            local v = osenv[K] or osenv[k] or osgetenv(K) or osgetenv(k) +            if v == "" then +                return nil +            else +                return v +            end +        end + +    else + +        local ossetenv  = os.setenv +        local osgetenv  = os.getenv +        local osenv     = { } + +        function os.setenv(k,v) +            if v == nil then +                v = "" +            end +            local K = upper(k) +            osenv[K] = v +        end + +        function os.getenv(k) +            local K = upper(k) +            local v = osenv[K] or osgetenv(K) or osgetenv(k) +            if v == "" then +                return nil +            else +                return v +            end +        end + +        local function __index(t,k) +            return os.getenv(k) +        end +        local function __newindex(t,k,v) +            os.setenv(k,v) +        end + +        os.env = { } + +        setmetatable(os.env, { __index = __index, __newindex = __newindex } ) + +    end -local execute, spawn, exec, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.flush +end + +-- end of environment hack + +local execute, spawn, exec, iopopen, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.popen, io.flush  function os.execute(...) ioflush() return execute(...) end  function os.spawn  (...) ioflush() return spawn  (...) end  function os.exec   (...) ioflush() return exec   (...) end +function io.popen  (...) ioflush() return iopopen(...) end  function os.resultof(command) -    ioflush() -- else messed up logging      local handle = io.popen(command,"r") -    if not handle then -    --  print("unknown command '".. command .. "' in os.resultof") -        return "" -    else -        return handle:read("*all") or "" -    end +    return handle and handle:read("*all") or ""  end ---~ os.type     : windows | unix (new, we already guessed os.platform) ---~ os.name     : windows | msdos | linux | macosx | solaris | .. | generic (new) ---~ os.platform : extended os.name with architecture -  if not io.fileseparator then      if find(os.getenv("PATH"),";") then          io.fileseparator, io.pathseparator, os.type = "\\", ";", os.type or "mswin" @@ -44,20 +142,22 @@ os.type = os.type or (io.pathseparator == ";"       and "windows") or "unix"  os.name = os.name or (os.type          == "windows" and "mswin"  ) or "linux"  if os.type == "windows" then -    os.libsuffix, os.binsuffix = 'dll', 'exe' +    os.libsuffix, os.binsuffix, os.binsuffixes = 'dll', 'exe', { 'exe', 'cmd', 'bat' }  else -    os.libsuffix, os.binsuffix = 'so', '' +    os.libsuffix, os.binsuffix, os.binsuffixes = 'so', '', { '' }  end +local launchers = { +    windows = "start %s", +    macosx  = "open %s", +    unix    = "$BROWSER %s &> /dev/null &", +} +  function os.launch(str) -    if os.type == "windows" then -        os.execute("start " .. str) -- os.spawn ? -    else -        os.execute(str .. " &")     -- os.spawn ? -    end +    os.execute(format(launchers[os.name] or launchers.unix,str))  end -if not os.times then +if not os.times then -- ?      -- utime  = user time      -- stime  = system time      -- cutime = children user time @@ -89,36 +189,14 @@ end  -- no need for function anymore as we have more clever code and helpers now  -- this metatable trickery might as well disappear -os.resolvers = os.resolvers or { } +os.resolvers = os.resolvers or { } -- will become private  local resolvers = os.resolvers -local osmt = getmetatable(os) or { __index = function(t,k) t[k] = "unset" return "unset" end } -- maybe nil -local osix = osmt.__index - -osmt.__index = function(t,k) -    return (resolvers[k] or osix)(t,k) -end - -setmetatable(os,osmt) - -if not os.setenv then - -    -- we still store them but they won't be seen in -    -- child processes although we might pass them some day -    -- using command concatination - -    local env, getenv = { }, os.getenv - -    function os.setenv(k,v) -        env[k] = v -    end - -    function os.getenv(k) -        return env[k] or getenv(k) -    end - -end +setmetatable(os, { __index = function(t,k) +    local r = resolvers[k] +    return r and r(t,k) or nil -- no memoize +end })  -- we can use HOSTTYPE on some platforms @@ -159,7 +237,7 @@ elseif os.type == "windows" then  elseif name == "linux" then      function os.resolvers.platform(t,k) -        -- we sometims have HOSTTYPE set so let's check that first +        -- we sometimes have HOSTTYPE set so let's check that first          local platform, architecture = "", os.getenv("HOSTTYPE") or os.resultof("uname -m") or ""          if find(architecture,"x86_64") then              platform = "linux-64" @@ -237,10 +315,10 @@ elseif name == "freebsd" then  elseif name == "kfreebsd" then      function os.resolvers.platform(t,k) -        -- we sometims have HOSTTYPE set so let's check that first +        -- we sometimes have HOSTTYPE set so let's check that first          local platform, architecture = "", os.getenv("HOSTTYPE") or os.resultof("uname -m") or ""          if find(architecture,"x86_64") then -            platform = "kfreebsd-64" +            platform = "kfreebsd-amd64"          else              platform = "kfreebsd-i386"          end @@ -288,7 +366,7 @@ end  local d  function os.timezone(delta) -    d = d or tonumber(tonumber(os.date("%H")-os.date("!%H"))) +    d = d or tonumber(tonumber(date("%H")-date("!%H")))      if delta then          if d > 0 then              return format("+%02i:00",d) @@ -299,3 +377,86 @@ function os.timezone(delta)          return 1      end  end + +local timeformat = format("%%s%s",os.timezone(true)) +local dateformat = "!%Y-%m-%d %H:%M:%S" + +function os.fulltime(t,default) +    t = tonumber(t) or 0 +    if t > 0 then +        -- valid time +    elseif default then +        return default +    else +        t = nil +    end +    return format(timeformat,date(dateformat,t)) +end + +local dateformat = "%Y-%m-%d %H:%M:%S" + +function os.localtime(t,default) +    t = tonumber(t) or 0 +    if t > 0 then +        -- valid time +    elseif default then +        return default +    else +        t = nil +    end +    return date(dateformat,t) +end + +function os.converttime(t,default) +    local t = tonumber(t) +    if t and t > 0 then +        return date(dateformat,t) +    else +        return default or "-" +    end +end + +local memory = { } + +local function which(filename) +    local fullname = memory[filename] +    if fullname == nil then +        local suffix = file.suffix(filename) +        local suffixes = suffix == "" and os.binsuffixes or { suffix } +        for directory in gmatch(os.getenv("PATH"),"[^" .. io.pathseparator .."]+") do +            local df = file.join(directory,filename) +            for i=1,#suffixes do +                local dfs = file.addsuffix(df,suffixes[i]) +                if io.exists(dfs) then +                    fullname = dfs +                    break +                end +            end +        end +        if not fullname then +            fullname = false +        end +        memory[filename] = fullname +    end +    return fullname +end + +os.which = which +os.where = which + +function os.today() +    return date("!*t") -- table with values +end + +function os.now() +    return date("!%Y-%m-%d %H:%M:%S") -- 2011-12-04 14:59:12 +end + +if not os.sleep and socket then +    os.sleep = socket.sleep +end + +-- print(os.which("inkscape.exe")) +-- print(os.which("inkscape")) +-- print(os.which("gs.exe")) +-- print(os.which("ps2pdf")) diff --git a/lualibs-set.lua b/lualibs-set.lua index f844d0b..2370f01 100644 --- a/lualibs-set.lua +++ b/lualibs-set.lua @@ -6,6 +6,8 @@ if not modules then modules = { } end modules ['l-set'] = {      license   = "see context related readme files"  } +-- This will become obsolete when we have the bitset library embedded. +  set = set or { }  local nums   = { } @@ -49,10 +51,11 @@ function set.tolist(n)      if n == 0 or not tabs[n] then          return ""      else -        local t = { } +        local t, n = { }, 0          for k, v in next, tabs[n] do              if v then -                t[#t+1] = k +                n = n + 1 +                t[n] = k              end          end          return concat(t," ") diff --git a/lualibs-string.lua b/lualibs-string.lua index 9856d52..77c076c 100644 --- a/lualibs-string.lua +++ b/lualibs-string.lua @@ -6,283 +6,200 @@ if not modules then modules = { } end modules ['l-string'] = {      license   = "see context related readme files"  } -local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch = lpeg.match - --- some functions may disappear as they are not used anywhere - -if not string.split then - -    -- this will be overloaded by a faster lpeg variant - -    function string:split(pattern) -        if #self > 0 then -            local t = { } -            for s in gmatch(self..pattern,"(.-)"..pattern) do -                t[#t+1] = s -            end -            return t -        else -            return { } -        end -    end - -end - -local chr_to_esc = { -    ["%"] = "%%", -    ["."] = "%.", -    ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", -    ["^"] = "%^", ["$"] = "%$", -    ["["] = "%[", ["]"] = "%]", -    ["("] = "%(", [")"] = "%)", -    ["{"] = "%{", ["}"] = "%}" -} - -string.chr_to_esc = chr_to_esc - -function string:esc() -- variant 2 -    return (gsub(self,"(.)",chr_to_esc)) -end - -function string:unquote() -    return (gsub(self,"^([\"\'])(.*)%1$","%2")) -end - ---~ function string:unquote() ---~     if find(self,"^[\'\"]") then ---~         return sub(self,2,-2) ---~     else ---~         return self ---~     end ---~ end - -function string:quote() -- we could use format("%q") -    return format("%q",self) -end - -function string:count(pattern) -- variant 3 +local string = string +local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs + +-- Some functions are already defined in l-lpeg and maybe some from here will +-- move there (unless we also expose caches). + +-- if not string.split then +-- +--     function string.split(str,pattern) +--         local t = { } +--         if #str > 0 then +--             local n = 1 +--             for s in gmatch(str..pattern,"(.-)"..pattern) do +--                 t[n] = s +--                 n = n + 1 +--             end +--         end +--         return t +--     end +-- +-- end + +-- function string.unquoted(str) +--     return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern +-- end + +local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote +               + patterns.dquote * C(patterns.nodquote) * patterns.dquote + +function string.unquoted(str) +    return lpegmatch(unquoted,str) or str +end + +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"')) + +function string.quoted(str) +    return format("%q",str) -- always double quote +end + +function string.count(str,pattern) -- variant 3      local n = 0 -    for _ in gmatch(self,pattern) do +    for _ in gmatch(str,pattern) do -- not for utf          n = n + 1      end      return n  end -function string:limit(n,sentinel) -    if #self > n then -        sentinel = sentinel or " ..." -        return sub(self,1,(n-#sentinel)) .. sentinel +function string.limit(str,n,sentinel) -- not utf proof +    if #str > n then +        sentinel = sentinel or "..." +        return sub(str,1,(n-#sentinel)) .. sentinel      else -        return self +        return str      end  end ---~ function string:strip() -- the .- is quite efficient ---~  -- return match(self,"^%s*(.-)%s*$") or "" ---~  -- return match(self,'^%s*(.*%S)') or '' -- posted on lua list ---~     return find(s,'^%s*$') and '' or match(s,'^%s*(.*%S)') ---~ end - -do -- roberto's variant: -    local space    = lpeg.S(" \t\v\n") -    local nospace  = 1 - space -    local stripper = space^0 * lpeg.C((space^0 * nospace^1)^0) -    function string.strip(str) -        return lpegmatch(stripper,str) or "" -    end -end - -function string:is_empty() -    return not find(self,"%S") -end +local stripper     = patterns.stripper +local collapser    = patterns.collapser +local longtostring = patterns.longtostring -function string:enhance(pattern,action) -    local ok, n = true, 0 -    while ok do -        ok = false -        self = gsub(self,pattern, function(...) -            ok, n = true, n + 1 -            return action(...) -        end) -    end -    return self, n +function string.strip(str) +    return lpegmatch(stripper,str) or ""  end -local chr_to_hex, hex_to_chr = { }, { } - -for i=0,255 do -    local c, h = char(i), format("%02X",i) -    chr_to_hex[c], hex_to_chr[h] = h, c +function string.collapsespaces(str) +    return lpegmatch(collapser,str) or ""  end -function string:to_hex() -    return (gsub(self or "","(.)",chr_to_hex)) +function string.longtostring(str) +    return lpegmatch(longtostring,str) or ""  end -function string:from_hex() -    return (gsub(self or "","(..)",hex_to_chr)) -end +-- function string.is_empty(str) +--     return not find(str,"%S") +-- end -if not string.characters then +local pattern = P(" ")^0 * P(-1) -    local function nextchar(str, index) -        index = index + 1 -        return (index <= #str) and index or nil, sub(str,index,index) -    end -    function string:characters() -        return nextchar, self, 0 -    end -    local function nextbyte(str, index) -        index = index + 1 -        return (index <= #str) and index or nil, byte(sub(str,index,index)) -    end -    function string:bytes() -        return nextbyte, self, 0 -    end - -end - --- we can use format for this (neg n) - -function string:rpadd(n,chr) -    local m = n-#self -    if m > 0 then -        return self .. rep(chr or " ",m) +function string.is_empty(str) +    if str == "" then +        return true      else -        return self -    end -end - -function string:lpadd(n,chr) -    local m = n-#self -    if m > 0 then -        return rep(chr or " ",m) .. self +        return lpegmatch(pattern,str) and true or false +    end +end + +-- if not string.escapedpattern then +-- +--     local patterns_escapes = { +--         ["%"] = "%%", +--         ["."] = "%.", +--         ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", +--         ["["] = "%[", ["]"] = "%]", +--         ["("] = "%(", [")"] = "%)", +--      -- ["{"] = "%{", ["}"] = "%}" +--      -- ["^"] = "%^", ["$"] = "%$", +--     } +-- +--     local simple_escapes = { +--         ["-"] = "%-", +--         ["."] = "%.", +--         ["?"] = ".", +--         ["*"] = ".*", +--     } +-- +--     function string.escapedpattern(str,simple) +--         return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +--     end +-- +--     function string.topattern(str,lowercase,strict) +--         if str == "" then +--             return ".*" +--         else +--             str = gsub(str,".",simple_escapes) +--             if lowercase then +--                 str = lower(str) +--             end +--             if strict then +--                 return "^" .. str .. "$" +--             else +--                 return str +--             end +--         end +--     end +-- +-- end + +--- needs checking + +local anything     = patterns.anything +local allescapes   = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ? +local someescapes  = Cc("%") * S(".-+%()[]")   -- also {} and ^$ ? +local matchescapes = Cc(".") * S("*?")         -- wildcard and single match + +local pattern_a = Cs ( ( allescapes + anything )^0 ) +local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 ) +local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") ) + +function string.escapedpattern(str,simple) +    return lpegmatch(simple and pattern_b or pattern_a,str) +end + +function string.topattern(str,lowercase,strict) +    if str=="" or type(str) ~= "string" then +        return ".*" +    elseif strict then +        str = lpegmatch(pattern_c,str)      else -        return self +        str = lpegmatch(pattern_b,str)      end -end - -string.padd = string.rpadd - -function is_number(str) -- tonumber -    return find(str,"^[%-%+]?[%d]-%.?[%d+]$") == 1 -end - ---~ print(is_number("1")) ---~ print(is_number("1.1")) ---~ print(is_number(".1")) ---~ print(is_number("-0.1")) ---~ print(is_number("+0.1")) ---~ print(is_number("-.1")) ---~ print(is_number("+.1")) - -function string:split_settings() -- no {} handling, see l-aux for lpeg variant -    if find(self,"=") then -        local t = { } -        for k,v in gmatch(self,"(%a+)=([^%,]*)") do -            t[k] = v -        end -        return t +    if lowercase then +        return lower(str)      else -        return nil +        return str      end  end -local patterns_escapes = { -    ["-"] = "%-", -    ["."] = "%.", -    ["+"] = "%+", -    ["*"] = "%*", -    ["%"] = "%%", -    ["("] = "%)", -    [")"] = "%)", -    ["["] = "%[", -    ["]"] = "%]", -} +-- print(string.escapedpattern("12+34*.tex",false)) +-- print(string.escapedpattern("12+34*.tex",true)) +-- print(string.topattern     ("12+34*.tex",false,false)) +-- print(string.topattern     ("12+34*.tex",false,true)) -function string:pattesc() -    return (gsub(self,".",patterns_escapes)) +function string.valid(str,default) +    return (type(str) == "string" and str ~= "" and str) or default or nil  end -local simple_escapes = { -    ["-"] = "%-", -    ["."] = "%.", -    ["?"] = ".", -    ["*"] = ".*", -} +-- handy fallback -function string:simpleesc() -    return (gsub(self,".",simple_escapes)) -end +string.itself  = function(s) return s end -function string:tohash() -    local t = { } -    for s in gmatch(self,"([^, ]+)") do -- lpeg -        t[s] = true -    end -    return t -end +-- also handy (see utf variant) -local pattern = lpeg.Ct(lpeg.C(1)^0) +local pattern = Ct(C(1)^0) -- string and not utf ! -function string:totable() -    return lpegmatch(pattern,self) +function string.totable(str) +    return lpegmatch(pattern,str)  end ---~ local t = { ---~     "1234567123456712345671234567", ---~     "a\tb\tc", ---~     "aa\tbb\tcc", ---~     "aaa\tbbb\tccc", ---~     "aaaa\tbbbb\tcccc", ---~     "aaaaa\tbbbbb\tccccc", ---~     "aaaaaa\tbbbbbb\tcccccc", ---~ } ---~ for k,v do ---~     print(string.tabtospace(t[k])) ---~ end - -function string.tabtospace(str,tab) -    -- we don't handle embedded newlines -    while true do -        local s = find(str,"\t") -        if s then -            if not tab then tab = 7 end -- only when found -            local d = tab-(s-1) % tab -            if d > 0 then -                str = gsub(str,"\t",rep(" ",d),1) -            else -                str = gsub(str,"\t","",1) -            end -        else -            break -        end -    end -    return str -end +-- handy from within tex: -function string:compactlong() -- strips newlines and leading spaces -    self = gsub(self,"[\n\r]+ *","") -    self = gsub(self,"^ *","") -    return self -end +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! -function string:striplong() -- strips newlines and leading spaces -    self = gsub(self,"^%s*","") -    self = gsub(self,"[\n\r]+ *","\n") -    return self +function string.tformat(fmt,...) +    return format(lpegmatch(replacer,fmt),...)  end -function string:topattern(lowercase,strict) -    if lowercase then -        self = lower(self) -    end -    self = gsub(self,".",simple_escapes) -    if self == "" then -        self = ".*" -    elseif strict then -        self = "^" .. self .. "$" -    end -    return self -end +-- obsolete names: + +string.quote   = string.quoted +string.unquote = string.unquoted diff --git a/lualibs-table.lua b/lualibs-table.lua index ee395d0..640bbbb 100644 --- a/lualibs-table.lua +++ b/lualibs-table.lua @@ -6,208 +6,267 @@ if not modules then modules = { } end modules ['l-table'] = {      license   = "see context related readme files"  } -table.join = table.concat - +local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select +local table, string = table, string  local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove -local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match +local format, lower, dump = string.format, string.lower, string.dump  local getmetatable, setmetatable = getmetatable, setmetatable -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs - --- Starting with version 5.2 Lua no longer provide ipairs, which makes --- sense. As we already used the for loop and # in most places the --- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashio we also hardly used pairs. --- --- Just in case, we provide the fallbacks as discussed in Programming --- in Lua (http://www.lua.org/pil/7.3.html): - -if not ipairs then - -    -- for k, v in ipairs(t) do                ... end -    -- for k=1,#t            do local v = t[k] ... end - -    local function iterate(a,i) -        i = i + 1 -        local v = a[i] -        if v ~= nil then -            return i, v --, nil -        end -    end - -    function ipairs(a) -        return iterate, a, 0 -    end - -end - -if not pairs then - -    -- for k, v in pairs(t) do ... end -    -- for k, v in next, t  do ... end - -    function pairs(t) -        return next, t -- , nil -    end - -end - --- Also, unpack has been moved to the table table, and for compatiility --- reasons we provide both now. - -if not table.unpack then -    table.unpack = _G.unpack -elseif not unpack then -    _G.unpack = table.unpack -end +local getinfo = debug.getinfo +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local floor = math.floor  -- extra functions, some might go (when not used) +local stripper = patterns.stripper +  function table.strip(tab) -    local lst = { } +    local lst, l = { }, 0      for i=1,#tab do -        local s = gsub(tab[i],"^%s*(.-)%s*$","%1") +        local s = lpegmatch(stripper,tab[i]) or ""          if s == "" then              -- skip this one          else -            lst[#lst+1] = s +            l = l + 1 +            lst[l] = s          end      end      return lst  end  function table.keys(t) -    local k = { } -    for key, _ in next, t do -        k[#k+1] = key +    if t then +        local keys, k = { }, 0 +        for key, _ in next, t do +            k = k + 1 +            keys[k] = key +        end +        return keys +    else +        return { }      end -    return k  end  local function compare(a,b) -    return (tostring(a) < tostring(b)) +    local ta, tb = type(a), type(b) -- needed, else 11 < 2 +    if ta == tb then +        return a < b +    else +        return tostring(a) < tostring(b) +    end  end  local function sortedkeys(tab) -    local srt, kind = { }, 0 -- 0=unknown 1=string, 2=number 3=mixed -    for key,_ in next, tab do -        srt[#srt+1] = key -        if kind == 3 then -            -- no further check -        else -            local tkey = type(key) -            if tkey == "string" then -            --  if kind == 2 then kind = 3 else kind = 1 end -                kind = (kind == 2 and 3) or 1 -            elseif tkey == "number" then -            --  if kind == 1 then kind = 3 else kind = 2 end -                kind = (kind == 1 and 3) or 2 +    if tab then +        local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed +        for key,_ in next, tab do +            s = s + 1 +            srt[s] = key +            if category == 3 then +                -- no further check              else -                kind = 3 +                local tkey = type(key) +                if tkey == "string" then +                    category = (category == 2 and 3) or 1 +                elseif tkey == "number" then +                    category = (category == 1 and 3) or 2 +                else +                    category = 3 +                end              end          end +        if category == 0 or category == 3 then +            sort(srt,compare) +        else +            sort(srt) +        end +        return srt +    else +        return { }      end -    if kind == 0 or kind == 3 then -        sort(srt,compare) +end + +local function sortedhashkeys(tab,cmp) -- fast one +    if tab then +        local srt, s = { }, 0 +        for key,_ in next, tab do +            if key then +                s= s + 1 +                srt[s] = key +            end +        end +        sort(srt,cmp) +        return srt      else -        sort(srt) +        return { }      end -    return srt  end -local function sortedhashkeys(tab) -- fast one -    local srt = { } -    for key,_ in next, tab do -        srt[#srt+1] = key +function table.allkeys(t) +    local keys = { } +    for k, v in next, t do +        for k, v in next, v do +            keys[k] = true +        end      end -    sort(srt) -    return srt +    return sortedkeys(keys)  end  table.sortedkeys     = sortedkeys  table.sortedhashkeys = sortedhashkeys -function table.sortedhash(t) -    local s = sortedhashkeys(t) -- maybe just sortedkeys -    local n = 0 -    local function kv(s) -        n = n + 1 -        local k = s[n] -        return k, t[k] +local function nothing() end + +local function sortedhash(t,cmp) +    if t then +        local s +        if cmp then +            -- it would be nice if teh sort function would accept a third argument (or nicer, an optional first) +            s = sortedhashkeys(t,function(a,b) return cmp(t,a,b) end) +        else +            s = sortedkeys(t) -- the robust one +        end +        local n = 0 +        local function kv(s) +            n = n + 1 +            local k = s[n] +            return k, t[k] +        end +        return kv, s +    else +        return nothing      end -    return kv, s  end -table.sortedpairs = table.sortedhash +table.sortedhash  = sortedhash +table.sortedpairs = sortedhash -- obsolete -function table.append(t, list) -    for _,v in next, list do -        insert(t,v) +function table.append(t,list) +    local n = #t +    for i=1,#list do +        n = n + 1 +        t[n] = list[i]      end +    return t  end  function table.prepend(t, list) -    for k,v in next, list do -        insert(t,k,v) +    local nl = #list +    local nt = nl + #t +    for i=#t,1,-1 do +        t[nt] = t[i] +        nt = nt - 1      end +    for i=1,#list do +        t[i] = list[i] +    end +    return t  end +-- function table.merge(t, ...) -- first one is target +--     t = t or { } +--     local lst = { ... } +--     for i=1,#lst do +--         for k, v in next, lst[i] do +--             t[k] = v +--         end +--     end +--     return t +-- end +  function table.merge(t, ...) -- first one is target -    t = t or {} -    local lst = {...} -    for i=1,#lst do -        for k, v in next, lst[i] do +    t = t or { } +    for i=1,select("#",...) do +        for k, v in next, (select(i,...)) do              t[k] = v          end      end      return t  end +-- function table.merged(...) +--     local tmp, lst = { }, { ... } +--     for i=1,#lst do +--         for k, v in next, lst[i] do +--             tmp[k] = v +--         end +--     end +--     return tmp +-- end +  function table.merged(...) -    local tmp, lst = { }, {...} -    for i=1,#lst do -        for k, v in next, lst[i] do -            tmp[k] = v +    local t = { } +    for i=1,select("#",...) do +        for k, v in next, (select(i,...)) do +            t[k] = v          end      end -    return tmp +    return t  end +-- function table.imerge(t, ...) +--     local lst, nt = { ... }, #t +--     for i=1,#lst do +--         local nst = lst[i] +--         for j=1,#nst do +--             nt = nt + 1 +--             t[nt] = nst[j] +--         end +--     end +--     return t +-- end +  function table.imerge(t, ...) -    local lst = {...} -    for i=1,#lst do -        local nst = lst[i] +    local nt = #t +    for i=1,select("#",...) do +        local nst = select(i,...)          for j=1,#nst do -            t[#t+1] = nst[j] +            nt = nt + 1 +            t[nt] = nst[j]          end      end      return t  end +-- function table.imerged(...) +--     local tmp, ntmp, lst = { }, 0, {...} +--     for i=1,#lst do +--         local nst = lst[i] +--         for j=1,#nst do +--             ntmp = ntmp + 1 +--             tmp[ntmp] = nst[j] +--         end +--     end +--     return tmp +-- end +  function table.imerged(...) -    local tmp, lst = { }, {...} -    for i=1,#lst do -        local nst = lst[i] +    local tmp, ntmp = { }, 0 +    for i=1,select("#",...) do +        local nst = select(i,...)          for j=1,#nst do -            tmp[#tmp+1] = nst[j] +            ntmp = ntmp + 1 +            tmp[ntmp] = nst[j]          end      end      return tmp  end -local function fastcopy(old) -- fast one +local function fastcopy(old,metatabletoo) -- fast one      if old then          local new = { } -        for k,v in next, old do +        for k, v in next, old do              if type(v) == "table" then -                new[k] = fastcopy(v) -- was just table.copy +                new[k] = fastcopy(v,metatabletoo) -- was just table.copy              else                  new[k] = v              end          end -        -- optional second arg -        local mt = getmetatable(old) -        if mt then -            setmetatable(new,mt) +        if metatabletoo then +            -- optional second arg +            local mt = getmetatable(old) +            if mt then +                setmetatable(new,mt) +            end          end          return new      else @@ -215,6 +274,8 @@ local function fastcopy(old) -- fast one      end  end +-- todo : copy without metatable +  local function copy(t, tables) -- taken from lua wiki, slightly adapted      tables = tables or { }      local tcopy = {} @@ -247,33 +308,14 @@ end  table.fastcopy = fastcopy  table.copy     = copy --- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) - -function table.sub(t,i,j) -    return { unpack(t,i,j) } -end - -function table.replace(a,b) -    for k,v in next, b do -        a[k] = v +function table.derive(parent) -- for the moment not public +    local child = { } +    if parent then +        setmetatable(child,{ __index = parent })      end +    return child  end --- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) - -function table.is_empty(t) -- obolete, use inline code instead -    return not t or not next(t) -end - -function table.one_entry(t) -- obolete, use inline code instead -    local n = next(t) -    return n and not next(t,n) -end - ---~ function table.starts_at(t) -- obsolete, not nice anyway ---~     return ipairs(t,1)(t,0) ---~ end -  function table.tohash(t,value)      local h = { }      if t then @@ -286,27 +328,19 @@ function table.tohash(t,value)  end  function table.fromhash(t) -    local h = { } +    local hsh, h = { }, 0      for k, v in next, t do -- no ipairs here -        if v then h[#h+1] = k end +        if v then +            h = h + 1 +            hsh[h] = k +        end      end -    return h +    return hsh  end ---~ print(table.serialize(t), "\n") ---~ print(table.serialize(t,"name"), "\n") ---~ print(table.serialize(t,false), "\n") ---~ print(table.serialize(t,true), "\n") ---~ print(table.serialize(t,"name",true), "\n") ---~ print(table.serialize(t,"name",true,true), "\n") - -table.serialize_functions = true -table.serialize_compact   = true -table.serialize_inline    = true -  local noquotes, hexify, handle, reduce, compact, inline, functions -local reserved = table.tohash { -- intercept a language flaw, no reserved words as key +local reserved = table.tohash { -- intercept a language inconvenience: no reserved words as key      'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if',      'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while',  } @@ -318,20 +352,23 @@ local function simple_table(t)              n = n + 1          end          if n == #t then -            local tt = { } +            local tt, nt = { }, 0              for i=1,#t do                  local v = t[i]                  local tv = type(v)                  if tv == "number" then +                    nt = nt + 1                      if hexify then -                        tt[#tt+1] = format("0x%04X",v) +                        tt[nt] = format("0x%04X",v)                      else -                        tt[#tt+1] = tostring(v) -- tostring not needed +                        tt[nt] = tostring(v) -- tostring not needed                      end                  elseif tv == "boolean" then -                    tt[#tt+1] = tostring(v) +                    nt = nt + 1 +                    tt[nt] = tostring(v)                  elseif tv == "string" then -                    tt[#tt+1] = format("%q",v) +                    nt = nt + 1 +                    tt[nt] = format("%q",v)                  else                      tt = nil                      break @@ -352,36 +389,64 @@ end  -- problem: there no good number_to_string converter with the best resolution +-- probably using .. is faster than format +-- maybe split in a few cases (yes/no hexify) + +-- todo: %g faster on numbers than %s + +-- we can speed this up with repeaters and formatters (is indeed faster) + +local propername = patterns.propername -- was find(name,"^%a[%w%_]*$") + +local function dummy() end +  local function do_serialize(root,name,depth,level,indexed)      if level > 0 then          depth = depth .. " "          if indexed then              handle(format("%s{",depth)) -        elseif name then -        --~ handle(format("%s%s={",depth,key(name))) -            if type(name) == "number" then -- or find(k,"^%d+$") then +        else +            local tn = type(name) +            if tn == "number" then                  if hexify then                      handle(format("%s[0x%04X]={",depth,name))                  else                      handle(format("%s[%s]={",depth,name))                  end -            elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then -                handle(format("%s%s={",depth,name)) +            elseif tn == "string" then +                if noquotes and not reserved[name] and lpegmatch(propername,name) then +                    handle(format("%s%s={",depth,name)) +                else +                    handle(format("%s[%q]={",depth,name)) +                end +            elseif tn == "boolean" then +                handle(format("%s[%s]={",depth,tostring(name)))              else -                handle(format("%s[%q]={",depth,name)) +                handle(format("%s{",depth))              end -        else -            handle(format("%s{",depth))          end      end      -- we could check for k (index) being number (cardinal)      if root and next(root) then -        local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) +     -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) +     -- if compact then +     --     -- NOT: for k=1,#root do (we need to quit at nil) +     --     for k,v in ipairs(root) do -- can we use next? +     --         if not first then first = k end +     --         last = last + 1 +     --     end +     -- end +        local first, last = nil, 0          if compact then -            -- NOT: for k=1,#root do (we need to quit at nil) -            for k,v in ipairs(root) do -- can we use next? -                if not first then first = k end -                last = last + 1 +            last = #root +            for k=1,last do +                if root[k] == nil then +                    last = k - 1 +                    break +                end +            end +            if last > 0 then +                first = 1              end          end          local sk = sortedkeys(root) @@ -391,8 +456,8 @@ local function do_serialize(root,name,depth,level,indexed)              --~ if v == root then                  -- circular              --~ else -            local t = type(v) -            if compact and first and type(k) == "number" and k >= first and k <= last then +            local t, tk = type(v), type(k) +            if compact and first and tk == "number" and k >= first and k <= last then                  if t == "number" then                      if hexify then                          handle(format("%s 0x%04X,",depth,v)) @@ -422,7 +487,7 @@ local function do_serialize(root,name,depth,level,indexed)                      handle(format("%s %s,",depth,tostring(v)))                  elseif t == "function" then                      if functions then -                        handle(format('%s loadstring(%q),',depth,dump(v))) +                        handle(format('%s load(%q),',depth,dump(v)))                      else                          handle(format('%s "function",',depth))                      end @@ -434,18 +499,19 @@ local function do_serialize(root,name,depth,level,indexed)                      handle(format("%s __p__=nil,",depth))                  end              elseif t == "number" then -            --~ if hexify then -            --~     handle(format("%s %s=0x%04X,",depth,key(k),v)) -            --~ else -            --~     handle(format("%s %s=%s,",depth,key(k),v)) -- %.99g -            --~ end -                if type(k) == "number" then -- or find(k,"^%d+$") then +                if tk == "number" then                      if hexify then                          handle(format("%s [0x%04X]=0x%04X,",depth,k,v))                      else                          handle(format("%s [%s]=%s,",depth,k,v)) -- %.99g                      end -                elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                elseif tk == "boolean" then +                    if hexify then +                        handle(format("%s [%s]=0x%04X,",depth,tostring(k),v)) +                    else +                        handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g +                    end +                elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                      if hexify then                          handle(format("%s %s=0x%04X,",depth,k,v))                      else @@ -460,27 +526,29 @@ local function do_serialize(root,name,depth,level,indexed)                  end              elseif t == "string" then                  if reduce and tonumber(v) then -                --~ handle(format("%s %s=%s,",depth,key(k),v)) -                    if type(k) == "number" then -- or find(k,"^%d+$") then +                    if tk == "number" then                          if hexify then                              handle(format("%s [0x%04X]=%s,",depth,k,v))                          else                              handle(format("%s [%s]=%s,",depth,k,v))                          end -                    elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                    elseif tk == "boolean" then +                        handle(format("%s [%s]=%s,",depth,tostring(k),v)) +                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                          handle(format("%s %s=%s,",depth,k,v))                      else                          handle(format("%s [%q]=%s,",depth,k,v))                      end                  else -                --~ handle(format("%s %s=%q,",depth,key(k),v)) -                    if type(k) == "number" then -- or find(k,"^%d+$") then +                    if tk == "number" then                          if hexify then                              handle(format("%s [0x%04X]=%q,",depth,k,v))                          else                              handle(format("%s [%s]=%q,",depth,k,v))                          end -                    elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                    elseif tk == "boolean" then +                        handle(format("%s [%s]=%q,",depth,tostring(k),v)) +                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                          handle(format("%s %s=%q,",depth,k,v))                      else                          handle(format("%s [%q]=%q,",depth,k,v)) @@ -488,14 +556,15 @@ local function do_serialize(root,name,depth,level,indexed)                  end              elseif t == "table" then                  if not next(v) then -                    --~ handle(format("%s %s={},",depth,key(k))) -                    if type(k) == "number" then -- or find(k,"^%d+$") then +                    if tk == "number" then                          if hexify then                              handle(format("%s [0x%04X]={},",depth,k))                          else                              handle(format("%s [%s]={},",depth,k))                          end -                    elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                    elseif tk == "boolean" then +                        handle(format("%s [%s]={},",depth,tostring(k))) +                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                          handle(format("%s %s={},",depth,k))                      else                          handle(format("%s [%q]={},",depth,k)) @@ -503,14 +572,15 @@ local function do_serialize(root,name,depth,level,indexed)                  elseif inline then                      local st = simple_table(v)                      if st then -                    --~ handle(format("%s %s={ %s },",depth,key(k),concat(st,", "))) -                        if type(k) == "number" then -- or find(k,"^%d+$") then +                        if tk == "number" then                              if hexify then                                  handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", ")))                              else                                  handle(format("%s [%s]={ %s },",depth,k,concat(st,", ")))                              end -                        elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                        elseif tk == "boolean" then +                            handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) +                        elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                              handle(format("%s %s={ %s },",depth,k,concat(st,", ")))                          else                              handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) @@ -522,42 +592,47 @@ local function do_serialize(root,name,depth,level,indexed)                      do_serialize(v,k,depth,level+1)                  end              elseif t == "boolean" then -            --~ handle(format("%s %s=%s,",depth,key(k),tostring(v))) -                if type(k) == "number" then -- or find(k,"^%d+$") then +                if tk == "number" then                      if hexify then                          handle(format("%s [0x%04X]=%s,",depth,k,tostring(v)))                      else                          handle(format("%s [%s]=%s,",depth,k,tostring(v)))                      end -                elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                elseif tk == "boolean" then +                    handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) +                elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                      handle(format("%s %s=%s,",depth,k,tostring(v)))                  else                      handle(format("%s [%q]=%s,",depth,k,tostring(v)))                  end              elseif t == "function" then                  if functions then -                    --~ handle(format('%s %s=loadstring(%q),',depth,key(k),dump(v))) -                    if type(k) == "number" then -- or find(k,"^%d+$") then +                    local f = getinfo(v).what == "C" and dump(dummy) or dump(v) +                 -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) +                    if tk == "number" then                          if hexify then -                            handle(format("%s [0x%04X]=loadstring(%q),",depth,k,dump(v))) +                            handle(format("%s [0x%04X]=load(%q),",depth,k,f))                          else -                            handle(format("%s [%s]=loadstring(%q),",depth,k,dump(v))) +                            handle(format("%s [%s]=load(%q),",depth,k,f))                          end -                    elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then -                        handle(format("%s %s=loadstring(%q),",depth,k,dump(v))) +                    elseif tk == "boolean" then +                        handle(format("%s [%s]=load(%q),",depth,tostring(k),f)) +                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +                        handle(format("%s %s=load(%q),",depth,k,f))                      else -                        handle(format("%s [%q]=loadstring(%q),",depth,k,dump(v))) +                        handle(format("%s [%q]=load(%q),",depth,k,f))                      end                  end              else -                --~ handle(format("%s %s=%q,",depth,key(k),tostring(v))) -                if type(k) == "number" then -- or find(k,"^%d+$") then +                if tk == "number" then                      if hexify then                          handle(format("%s [0x%04X]=%q,",depth,k,tostring(v)))                      else                          handle(format("%s [%s]=%q,",depth,k,tostring(v)))                      end -                elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                elseif tk == "boolean" then +                    handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) +                elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                      handle(format("%s %s=%q,",depth,k,tostring(v)))                  else                      handle(format("%s [%q]=%q,",depth,k,tostring(v))) @@ -574,15 +649,34 @@ end  -- replacing handle by a direct t[#t+1] = ... (plus test) is not much  -- faster (0.03 on 1.00 for zapfino.tma) -local function serialize(root,name,_handle,_reduce,_noquotes,_hexify) -    noquotes = _noquotes -    hexify = _hexify -    handle = _handle or print -    reduce = _reduce or false -    compact = table.serialize_compact -    inline  = compact and table.serialize_inline -    functions = table.serialize_functions +local function serialize(_handle,root,name,specification) -- handle wins      local tname = type(name) +    if type(specification) == "table" then +        noquotes  = specification.noquotes +        hexify    = specification.hexify +        handle    = _handle or specification.handle or print +        reduce    = specification.reduce or false +        functions = specification.functions +        compact   = specification.compact +        inline    = specification.inline and compact +        if functions == nil then +            functions = true +        end +        if compact == nil then +            compact = true +        end +        if inline == nil then +            inline = compact +        end +    else +        noquotes  = false +        hexify    = false +        handle    = _handle or print +        reduce    = false +        compact   = true +        inline    = true +        functions = true +    end      if tname == "string" then          if name == "return" then              handle("return {") @@ -604,33 +698,364 @@ local function serialize(root,name,_handle,_reduce,_noquotes,_hexify)      else          handle("t={")      end -    if root and next(root) then -        do_serialize(root,name,"",0,indexed) +    if root then +        -- The dummy access will initialize a table that has a delayed initialization +        -- using a metatable. (maybe explicitly test for metatable) +        if getmetatable(root) then -- todo: make this an option, maybe even per subtable +            local dummy = root._w_h_a_t_e_v_e_r_ +            root._w_h_a_t_e_v_e_r_ = nil +        end +        -- Let's forget about empty tables. +        if next(root) then +            do_serialize(root,name,"",0) +        end      end      handle("}")  end ---~ name: ---~ ---~ true     : return     { } ---~ false    :            { } ---~ nil      : t        = { } ---~ string   : string   = { } ---~ 'return' : return     { } ---~ number   : [number] = { } - -function table.serialize(root,name,reduce,noquotes,hexify) -    local t = { } +--    -- This is some 20% faster than using format (because formatters are much faster) but +--    -- of course, inlining the format using .. is then again faster .. anyway, as we do +--    -- some pretty printing as well there is not that much to gain unless we make a 'fast' +--    -- ugly variant as well. But, we would have to move the formatter to l-string then. + +--    local formatters = string.formatters + +--    local function do_serialize(root,name,level,indexed) +--        if level > 0 then +--            if indexed then +--                handle(formatters["%w{"](level)) +--            else +--                local tn = type(name) +--                if tn == "number" then +--                    if hexify then +--                        handle(formatters["%w[%04H]={"](level,name)) +--                    else +--                        handle(formatters["%w[%s]={"](level,name)) +--                    end +--                elseif tn == "string" then +--                    if noquotes and not reserved[name] and lpegmatch(propername,name) then +--                        handle(formatters["%w%s={"](level,name)) +--                    else +--                        handle(formatters["%w[%q]={"](level,name)) +--                    end +--                elseif tn == "boolean" then +--                    handle(formatters["%w[%S]={"](level,name)) +--                else +--                    handle(formatters["%w{"](level)) +--                end +--            end +--        end +--        -- we could check for k (index) being number (cardinal) +--        if root and next(root) then +--         -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) +--         -- if compact then +--         --     -- NOT: for k=1,#root do (we need to quit at nil) +--         --     for k,v in ipairs(root) do -- can we use next? +--         --         if not first then first = k end +--         --         last = last + 1 +--         --     end +--         -- end +--            local first, last = nil, 0 +--            if compact then +--                last = #root +--                for k=1,last do +--                    if root[k] == nil then +--                        last = k - 1 +--                        break +--                    end +--                end +--                if last > 0 then +--                    first = 1 +--                end +--            end +--            local sk = sortedkeys(root) +--            for i=1,#sk do +--                local k = sk[i] +--                local v = root[k] +--                --~ if v == root then +--                    -- circular +--                --~ else +--                local t, tk = type(v), type(k) +--                if compact and first and tk == "number" and k >= first and k <= last then +--                    if t == "number" then +--                        if hexify then +--                            handle(formatters["%w %04H,"](level,v)) +--                        else +--                            handle(formatters["%w %s,"](level,v)) -- %.99g +--                        end +--                    elseif t == "string" then +--                        if reduce and tonumber(v) then +--                            handle(formatters["%w %s,"](level,v)) +--                        else +--                            handle(formatters["%w %q,"](level,v)) +--                        end +--                    elseif t == "table" then +--                        if not next(v) then +--                            handle(formatters["%w {},"](level)) +--                        elseif inline then -- and #t > 0 +--                            local st = simple_table(v) +--                            if st then +--                                handle(formatters["%w { %, t },"](level,st)) +--                            else +--                                do_serialize(v,k,level+1,true) +--                            end +--                        else +--                            do_serialize(v,k,level+1,true) +--                        end +--                    elseif t == "boolean" then +--                        handle(formatters["%w %S,"](level,v)) +--                    elseif t == "function" then +--                        if functions then +--                            handle(formatters['%w load(%q),'](level,dump(v))) +--                        else +--                            handle(formatters['%w "function",'](level)) +--                        end +--                    else +--                        handle(formatters["%w %Q,"](level,v)) +--                    end +--                elseif k == "__p__" then -- parent +--                    if false then +--                        handle(formatters["%w __p__=nil,"](level)) +--                    end +--                elseif t == "number" then +--                    if tk == "number" then +--                        if hexify then +--                            handle(formatters["%w [%04H]=%04H,"](level,k,v)) +--                        else +--                            handle(formatters["%w [%s]=%s,"](level,k,v)) -- %.99g +--                        end +--                    elseif tk == "boolean" then +--                        if hexify then +--                            handle(formatters["%w [%S]=%04H,"](level,k,v)) +--                        else +--                            handle(formatters["%w [%S]=%s,"](level,k,v)) -- %.99g +--                        end +--                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +--                        if hexify then +--                            handle(formatters["%w %s=%04H,"](level,k,v)) +--                        else +--                            handle(formatters["%w %s=%s,"](level,k,v)) -- %.99g +--                        end +--                    else +--                        if hexify then +--                            handle(formatters["%w [%q]=%04H,"](level,k,v)) +--                        else +--                            handle(formatters["%w [%q]=%s,"](level,k,v)) -- %.99g +--                        end +--                    end +--                elseif t == "string" then +--                    if reduce and tonumber(v) then +--                        if tk == "number" then +--                            if hexify then +--                                handle(formatters["%w [%04H]=%s,"](level,k,v)) +--                            else +--                                handle(formatters["%w [%s]=%s,"](level,k,v)) +--                            end +--                        elseif tk == "boolean" then +--                            handle(formatters["%w [%S]=%s,"](level,k,v)) +--                        elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +--                            handle(formatters["%w %s=%s,"](level,k,v)) +--                        else +--                            handle(formatters["%w [%q]=%s,"](level,k,v)) +--                        end +--                    else +--                        if tk == "number" then +--                            if hexify then +--                                handle(formatters["%w [%04H]=%q,"](level,k,v)) +--                            else +--                                handle(formatters["%w [%s]=%q,"](level,k,v)) +--                            end +--                        elseif tk == "boolean" then +--                            handle(formatters["%w [%S]=%q,"](level,k,v)) +--                        elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +--                            handle(formatters["%w %s=%q,"](level,k,v)) +--                        else +--                            handle(formatters["%w [%q]=%q,"](level,k,v)) +--                        end +--                    end +--                elseif t == "table" then +--                    if not next(v) then +--                        if tk == "number" then +--                            if hexify then +--                                handle(formatters["%w [%04H]={},"](level,k)) +--                            else +--                                handle(formatters["%w [%s]={},"](level,k)) +--                            end +--                        elseif tk == "boolean" then +--                            handle(formatters["%w [%S]={},"](level,k)) +--                        elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +--                            handle(formatters["%w %s={},"](level,k)) +--                        else +--                            handle(formatters["%w [%q]={},"](level,k)) +--                        end +--                    elseif inline then +--                        local st = simple_table(v) +--                        if st then +--                            if tk == "number" then +--                                if hexify then +--                                    handle(formatters["%w [%04H]={ %, t },"](level,k,st)) +--                                else +--                                    handle(formatters["%w [%s]={ %, t },"](level,k,st)) +--                                end +--                            elseif tk == "boolean" then +--                                handle(formatters["%w [%S]={ %, t },"](level,k,st)) +--                            elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +--                                handle(formatters["%w %s={ %, t },"](level,k,st)) +--                            else +--                                handle(formatters["%w [%q]={ %, t },"](level,k,st)) +--                            end +--                        else +--                            do_serialize(v,k,level+1) +--                        end +--                    else +--                        do_serialize(v,k,level+1) +--                    end +--                elseif t == "boolean" then +--                    if tk == "number" then +--                        if hexify then +--                            handle(formatters["%w [%04H]=%S,"](level,k,v)) +--                        else +--                            handle(formatters["%w [%s]=%S,"](level,k,v)) +--                        end +--                    elseif tk == "boolean" then +--                        handle(formatters["%w [%S]=%S,"](level,k,v)) +--                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +--                        handle(formatters["%w %s=%S,"](level,k,v)) +--                    else +--                        handle(formatters["%w [%q]=%S,"](level,k,v)) +--                    end +--                elseif t == "function" then +--                    if functions then +--                        local f = getinfo(v).what == "C" and dump(dummy) or dump(v) +--                     -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) +--                        if tk == "number" then +--                            if hexify then +--                                handle(formatters["%w [%04H]=load(%q),"](level,k,f)) +--                            else +--                                handle(formatters["%w [%s]=load(%q),"](level,k,f)) +--                            end +--                        elseif tk == "boolean" then +--                            handle(formatters["%w [%S]=load(%q),"](level,k,f)) +--                        elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +--                            handle(formatters["%w %s=load(%q),"](level,k,f)) +--                        else +--                            handle(formatters["%w [%q]=load(%q),"](level,k,f)) +--                        end +--                    end +--                else +--                    if tk == "number" then +--                        if hexify then +--                            handle(formatters["%w [%04H]=%Q,"](level,k,v)) +--                        else +--                            handle(formatters["%w [%s]=%Q,"](level,k,v)) +--                        end +--                    elseif tk == "boolean" then +--                        handle(formatters["%w [%S]=%Q,"](level,k,v)) +--                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +--                        handle(formatters["%w %s=%Q,"](level,k,v)) +--                    else +--                        handle(formatters["%w [%q]=%Q,"](level,k,v)) +--                    end +--                end +--                --~ end +--            end +--        end +--        if level > 0 then +--            handle(formatters["%w}"](level)) +--        end +--    end + +--    local function serialize(_handle,root,name,specification) -- handle wins +--        local tname = type(name) +--        if type(specification) == "table" then +--            noquotes  = specification.noquotes +--            hexify    = specification.hexify +--            handle    = _handle or specification.handle or print +--            reduce    = specification.reduce or false +--            functions = specification.functions +--            compact   = specification.compact +--            inline    = specification.inline and compact +--            if functions == nil then +--                functions = true +--            end +--            if compact == nil then +--                compact = true +--            end +--            if inline == nil then +--                inline = compact +--            end +--        else +--            noquotes  = false +--            hexify    = false +--            handle    = _handle or print +--            reduce    = false +--            compact   = true +--            inline    = true +--            functions = true +--        end +--        if tname == "string" then +--            if name == "return" then +--                handle("return {") +--            else +--                handle(name .. "={") +--            end +--        elseif tname == "number" then +--            if hexify then +--                handle(format("[0x%04X]={",name)) +--            else +--                handle("[" .. name .. "]={") +--            end +--        elseif tname == "boolean" then +--            if name then +--                handle("return {") +--            else +--                handle("{") +--            end +--        else +--            handle("t={") +--        end +--        if root then +--            -- The dummy access will initialize a table that has a delayed initialization +--            -- using a metatable. (maybe explicitly test for metatable) +--            if getmetatable(root) then -- todo: make this an option, maybe even per subtable +--                local dummy = root._w_h_a_t_e_v_e_r_ +--                root._w_h_a_t_e_v_e_r_ = nil +--            end +--            -- Let's forget about empty tables. +--            if next(root) then +--                do_serialize(root,name,0) +--            end +--        end +--        handle("}") +--    end + +-- name: +-- +-- true     : return     { } +-- false    :            { } +-- nil      : t        = { } +-- string   : string   = { } +-- "return" : return     { } +-- number   : [number] = { } + +function table.serialize(root,name,specification) +    local t, n = { }, 0      local function flush(s) -        t[#t+1] = s +        n = n + 1 +        t[n] = s      end -    serialize(root,name,flush,reduce,noquotes,hexify) +    serialize(flush,root,name,specification)      return concat(t,"\n")  end -function table.tohandle(handle,root,name,reduce,noquotes,hexify) -    serialize(root,name,handle,reduce,noquotes,hexify) -end +--   local a = { e = { 1,2,3,4,5,6}, a = 1, b = 2, c = "ccc", d = { a = 1, b = 2, c = "ccc", d = { a = 1, b = 2, c = "ccc" } } } +--   local t = os.clock() +--   for i=1,10000 do +--       table.serialize(a) +--   end +--   print(os.clock()-t,table.serialize(a)) + +table.tohandle = serialize  -- sometimes tables are real use (zapfino extra pro is some 85M) in which  -- case a stepwise serialization is nice; actually, we could consider: @@ -641,73 +1066,63 @@ end  --  -- so this is on the todo list -table.tofile_maxtab = 2*1024 +local maxtab = 2*1024 -function table.tofile(filename,root,name,reduce,noquotes,hexify) +function table.tofile(filename,root,name,specification)      local f = io.open(filename,'w')      if f then -        local maxtab = table.tofile_maxtab          if maxtab > 1 then -            local t = { } +            local t, n = { }, 0              local function flush(s) -                t[#t+1] = s -                if #t > maxtab then +                n = n + 1 +                t[n] = s +                if n > maxtab then                      f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice -                    t = { } +                    t, n = { }, 0 -- we could recycle t if needed                  end              end -            serialize(root,name,flush,reduce,noquotes,hexify) +            serialize(flush,root,name,specification)              f:write(concat(t,"\n"),"\n")          else              local function flush(s)                  f:write(s,"\n")              end -            serialize(root,name,flush,reduce,noquotes,hexify) +            serialize(flush,root,name,specification)          end          f:close() +        io.flush()      end  end -local function flatten(t,f,complete) -- is this used? meybe a variant with next, ... -    for i=1,#t do -        local v = t[i] -        if type(v) == "table" then -            if complete or type(v[1]) == "table" then -                flatten(v,f,complete) +local function flattened(t,f,depth) +    if f == nil then +        f = { } +        depth = 0xFFFF +    elseif tonumber(f) then +        -- assume that only two arguments are given +        depth = f +        f = { } +    elseif not depth then +        depth = 0xFFFF +    end +    for k, v in next, t do +        if type(k) ~= "number" then +            if depth > 0 and type(v) == "table" then +                flattened(v,f,depth-1)              else -                f[#f+1] = v +                f[k] = v              end -        else -            f[#f+1] = v          end      end -end - -function table.flatten(t) -    local f = { } -    flatten(t,f,true) -    return f -end - -function table.unnest(t) -- bad name -    local f = { } -    flatten(t,f,false) -    return f -end - -table.flatten_one_level = table.unnest - --- a better one: - -local function flattened(t,f) -    if not f then -        f = { } -    end -    for k, v in next, t do -        if type(v) == "table" then -            flattened(v,f) +    local n = #f +    for k=1,#t do +        local v = t[k] +        if depth > 0 and type(v) == "table" then +            flattened(v,f,depth-1) +            n = #f          else -            f[k] = v +            n = n + 1 +            f[n] = v          end      end      return f @@ -715,49 +1130,27 @@ end  table.flattened = flattened --- the next three may disappear - -function table.remove_value(t,value) -- todo: n -    if value then -        for i=1,#t do -            if t[i] == value then -                remove(t,i) -                -- remove all, so no: return -            end -        end +local function unnest(t,f) -- only used in mk, for old times sake +    if not f then          -- and only relevant for token lists +        f = { }            -- this one can become obsolete      end -end - -function table.insert_before_value(t,value,str) -    if str then -        if value then -            for i=1,#t do -                if t[i] == value then -                    insert(t,i,str) -                    return -                end +    for i=1,#t do +        local v = t[i] +        if type(v) == "table" then +            if type(v[1]) == "table" then +                unnest(v,f) +            else +                f[#f+1] = v              end +        else +            f[#f+1] = v          end -        insert(t,1,str) -    elseif value then -        insert(t,1,value)      end +    return f  end -function table.insert_after_value(t,value,str) -    if str then -        if value then -            for i=1,#t do -                if t[i] == value then -                    insert(t,i+1,str) -                    return -                end -            end -        end -        t[#t+1] = str -    elseif value then -        t[#t+1] = value -    end +function table.unnest(t) -- bad name +    return unnest(t)  end  local function are_equal(a,b,n,m) -- indexed @@ -768,7 +1161,7 @@ local function are_equal(a,b,n,m) -- indexed              local ai, bi = a[i], b[i]              if ai==bi then                  -- same -            elseif type(ai)=="table" and type(bi)=="table" then +            elseif type(ai) == "table" and type(bi) == "table" then                  if not are_equal(ai,bi) then                      return false                  end @@ -784,7 +1177,7 @@ end  local function identical(a,b) -- assumes same structure      for ka, va in next, a do -        local vb = b[k] +        local vb = b[ka]          if va == vb then              -- same          elseif type(va) == "table" and  type(vb) == "table" then @@ -798,15 +1191,15 @@ local function identical(a,b) -- assumes same structure      return true  end -table.are_equal = are_equal  table.identical = identical +table.are_equal = are_equal  -- maybe also make a combined one -function table.compact(t) +function table.compact(t) -- remove empty tables, assumes subtables      if t then -        for k,v in next, t do -            if not next(v) then +        for k, v in next, t do +            if not next(v) then -- no type checking                  t[k] = nil              end          end @@ -825,86 +1218,148 @@ function table.contains(t, v)  end  function table.count(t) -    local n, e = 0, next(t) -    while e do -        n, e = n + 1, next(t,e) +    local n = 0 +    for k, v in next, t do +        n = n + 1      end      return n  end -function table.swapped(t) -    local s = { } +function table.swapped(t,s) -- hash +    local n = { } +    if s then +        for k, v in next, s do +            n[k] = v +        end +    end      for k, v in next, t do -        s[v] = k +        n[v] = k      end -    return s +    return n  end ---~ function table.are_equal(a,b) ---~     return table.serialize(a) == table.serialize(b) ---~ end - -function table.clone(t,p) -- t is optional or nil or table -    if not p then -        t, p = { }, t or { } -    elseif not t then -        t = { } +function table.mirrored(t) -- hash +    local n = { } +    for k, v in next, t do +        n[v] = k +        n[k] = v      end -    setmetatable(t, { __index = function(_,key) return p[key] end }) -- why not __index = p ? -    return t -end - -function table.hexed(t,seperator) -    local tt = { } -    for i=1,#t do tt[i] = format("0x%04X",t[i]) end -    return concat(tt,seperator or " ") +    return n  end -function table.reverse_hash(h) -    local r = { } -    for k,v in next, h do -        r[v] = lower(gsub(k," ","")) +function table.reversed(t) +    if t then +        local tt, tn = { }, #t +        if tn > 0 then +            local ttn = 0 +            for i=tn,1,-1 do +                ttn = ttn + 1 +                tt[ttn] = t[i] +            end +        end +        return tt      end -    return r  end  function table.reverse(t) -    local tt = { } -    if #t > 0 then -        for i=#t,1,-1 do -            tt[#tt+1] = t[i] +    if t then +        local n = #t +        for i=1,floor(n/2) do +            local j = n - i + 1 +            t[i], t[j] = t[j], t[i]          end +        return t      end -    return tt  end -function table.insert_before_value(t,value,extra) -    for i=1,#t do -        if t[i] == extra then -            remove(t,i) -        end +function table.sequenced(t,sep,simple) -- hash only +    if not t then +        return ""      end -    for i=1,#t do -        if t[i] == value then -            insert(t,i,extra) -            return +    local n = #t +    local s = { } +    if n > 0 then +        -- indexed +        for i=1,n do +            s[i] = tostring(t[i]) +        end +    else +        -- hashed +        n = 0 +        for k, v in sortedhash(t) do +            if simple then +                if v == true then +                    n = n + 1 +                    s[n] = k +                elseif v and v~= "" then +                    n = n + 1 +                    s[n] = k .. "=" .. tostring(v) +                end +            else +                n = n + 1 +                s[n] = k .. "=" .. tostring(v) +            end          end      end -    insert(t,1,extra) +    return concat(s,sep or " | ")  end -function table.insert_after_value(t,value,extra) -    for i=1,#t do -        if t[i] == extra then -            remove(t,i) -        end +function table.print(t,...) +    if type(t) ~= "table" then +        print(tostring(t)) +    else +        serialize(print,t,...)      end -    for i=1,#t do -        if t[i] == value then -            insert(t,i+1,extra) -            return +end + +setinspector(function(v) if type(v) == "table" then serialize(print,v,"table") return true end end) + +-- -- -- obsolete but we keep them for a while and might comment them later -- -- -- + +-- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) + +function table.sub(t,i,j) +    return { unpack(t,i,j) } +end + +-- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) + +function table.is_empty(t) +    return not t or not next(t) +end + +function table.has_one_entry(t) +    return t and not next(t,next(t)) +end + +-- new + +function table.loweredkeys(t) -- maybe utf +    local l = { } +    for k, v in next, t do +        l[lower(k)] = v +    end +    return l +end + +-- new, might move (maybe duplicate) + +function table.unique(old) +    local hash = { } +    local new = { } +    local n = 0 +    for i=1,#old do +        local oi = old[i] +        if not hash[oi] then +            n = n + 1 +            new[n] = oi +            hash[oi] = true          end      end -    insert(t,#t+1,extra) +    return new  end +function table.sorted(t,...) +    sort(t,...) +    return t -- still sorts in-place +end diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua index 0c5a601..813ffd5 100644 --- a/lualibs-unicode.lua +++ b/lualibs-unicode.lua @@ -6,170 +6,746 @@ if not modules then modules = { } end modules ['l-unicode'] = {      license   = "see context related readme files"  } +-- this module will be reorganized + +-- todo: utf.sub replacement (used in syst-aux) + +-- we put these in the utf namespace: + +utf = utf or (unicode and unicode.utf8) or { } + +utf.characters = utf.characters or string.utfcharacters +utf.values     = utf.values     or string.utfvalues + +-- string.utfvalues +-- string.utfcharacters +-- string.characters +-- string.characterpairs +-- string.bytes +-- string.bytepairs + +local type = type +local char, byte, format, sub = string.char, string.byte, string.format, string.sub +local concat = table.concat +local P, C, R, Cs, Ct, Cmt, Cc, Carg, Cp = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Carg, lpeg.Cp +local lpegmatch, patterns = lpeg.match, lpeg.patterns + +local bytepairs     = string.bytepairs + +local finder        = lpeg.finder +local replacer      = lpeg.replacer + +local utfvalues     = utf.values +local utfgmatch     = utf.gmatch -- not always present + +local p_utftype     = patterns.utftype +local p_utfoffset   = patterns.utfoffset +local p_utf8char    = patterns.utf8char +local p_utf8byte    = patterns.utf8byte +local p_utfbom      = patterns.utfbom +local p_newline     = patterns.newline +local p_whitespace  = patterns.whitespace +  if not unicode then -    unicode = { utf8 = { } } +    unicode = { utf = utf } -- for a while + +end + +if not utf.char then      local floor, char = math.floor, string.char -    function unicode.utf8.utfchar(n) +    function utf.char(n)          if n < 0x80 then +            -- 0aaaaaaa : 0x80              return char(n)          elseif n < 0x800 then -            return char(0xC0 + floor(n/0x40))  .. char(0x80 + (n % 0x40)) +            -- 110bbbaa : 0xC0 : n >> 6 +            -- 10aaaaaa : 0x80 : n & 0x3F +            return char( +                0xC0 + floor(n/0x40), +                0x80 + (n % 0x40) +            )          elseif n < 0x10000 then -            return char(0xE0 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) -        elseif n < 0x40000 then -            return char(0xF0 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) -        else -- wrong: -          -- return char(0xF1 + floor(n/0x1000000)) .. char(0x80 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) -            return "?" +            -- 1110bbbb : 0xE0 :  n >> 12 +            -- 10bbbbaa : 0x80 : (n >>  6) & 0x3F +            -- 10aaaaaa : 0x80 :  n        & 0x3F +            return char( +                0xE0 + floor(n/0x1000), +                0x80 + (floor(n/0x40) % 0x40), +                0x80 + (n % 0x40) +            ) +        elseif n < 0x200000 then +            -- 11110ccc : 0xF0 :  n >> 18 +            -- 10ccbbbb : 0x80 : (n >> 12) & 0x3F +            -- 10bbbbaa : 0x80 : (n >>  6) & 0x3F +            -- 10aaaaaa : 0x80 :  n        & 0x3F +            -- dddd     : ccccc - 1 +            return char( +                0xF0 +  floor(n/0x40000), +                0x80 + (floor(n/0x1000) % 0x40), +                0x80 + (floor(n/0x40) % 0x40), +                0x80 + (n % 0x40) +            ) +        else +            return ""          end      end  end -utf = utf or unicode.utf8 +if not utf.byte then -local concat, utfchar, utfgsub = table.concat, utf.char, utf.gsub -local char, byte, find, bytepairs = string.char, string.byte, string.find, string.bytepairs +    local utf8byte = patterns.utf8byte --- 0  EF BB BF      UTF-8 --- 1  FF FE         UTF-16-little-endian --- 2  FE FF         UTF-16-big-endian --- 3  FF FE 00 00   UTF-32-little-endian --- 4  00 00 FE FF   UTF-32-big-endian +    function utf.byte(c) +        return lpegmatch(utf8byte,c) +    end -unicode.utfname = { -    [0] = 'utf-8', -    [1] = 'utf-16-le', -    [2] = 'utf-16-be', -    [3] = 'utf-32-le', -    [4] = 'utf-32-be' -} +end --- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated +local utfchar, utfbyte = utf.char, utf.byte -function unicode.utftype(f) -    local str = f:read(4) -    if not str then -        f:seek('set') -        return 0 - -- elseif find(str,"^%z%z\254\255") then            -- depricated - -- elseif find(str,"^\000\000\254\255") then        -- not permitted and bugged -    elseif find(str,"\000\000\254\255",1,true) then  -- seems to work okay (TH) -        return 4 - -- elseif find(str,"^\255\254%z%z") then            -- depricated - -- elseif find(str,"^\255\254\000\000") then        -- not permitted and bugged -    elseif find(str,"\255\254\000\000",1,true) then  -- seems to work okay (TH) -        return 3 -    elseif find(str,"^\254\255") then -        f:seek('set',2) -        return 2 -    elseif find(str,"^\255\254") then -        f:seek('set',2) -        return 1 -    elseif find(str,"^\239\187\191") then -        f:seek('set',3) -        return 0 -    else -        f:seek('set') -        return 0 +-- As we want to get rid of the (unmaintained) utf library we implement our own +-- variants (in due time an independent module): + +function utf.filetype(data) +    return data and lpegmatch(p_utftype,data) or "unknown" +end + +local toentities = Cs ( +    ( +        patterns.utf8one +            + ( +                patterns.utf8two +              + patterns.utf8three +              + patterns.utf8four +            ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end +    )^0 +) + +patterns.toentities = toentities + +function utf.toentities(str) +    return lpegmatch(toentities,str) +end + +-- local utfchr = { } -- 60K -> 2.638 M extra mem but currently not called that often (on latin) +-- +-- setmetatable(utfchr, { __index = function(t,k) local v = utfchar(k) t[k] = v return v end } ) +-- +-- collectgarbage("collect") +-- local u = collectgarbage("count")*1024 +-- local t = os.clock() +-- for i=1,1000 do +--     for i=1,600 do +--         local a = utfchr[i] +--     end +-- end +-- print(os.clock()-t,collectgarbage("count")*1024-u) + +-- collectgarbage("collect") +-- local t = os.clock() +-- for i=1,1000 do +--     for i=1,600 do +--         local a = utfchar(i) +--     end +-- end +-- print(os.clock()-t,collectgarbage("count")*1024-u) + +-- local byte = string.byte +-- local utfchar = utf.char + +local one  = P(1) +local two  = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +-- actually one of them is already utf ... sort of useless this one + +-- function utf.char(n) +--     if n < 0x80 then +--         return char(n) +--     elseif n < 0x800 then +--         return char( +--             0xC0 + floor(n/0x40), +--             0x80 + (n % 0x40) +--         ) +--     elseif n < 0x10000 then +--         return char( +--             0xE0 + floor(n/0x1000), +--             0x80 + (floor(n/0x40) % 0x40), +--             0x80 + (n % 0x40) +--         ) +--     elseif n < 0x40000 then +--         return char( +--             0xF0 + floor(n/0x40000), +--             0x80 + floor(n/0x1000), +--             0x80 + (floor(n/0x40) % 0x40), +--             0x80 + (n % 0x40) +--         ) +--     else +--      -- return char( +--      --     0xF1 + floor(n/0x1000000), +--      --     0x80 + floor(n/0x40000), +--      --     0x80 + floor(n/0x1000), +--      --     0x80 + (floor(n/0x40) % 0x40), +--      --     0x80 + (n % 0x40) +--      -- ) +--         return "?" +--     end +-- end +-- +-- merge into: + +local pattern = P("\254\255") * Cs( ( +                    four  / function(a,b,c,d) +                                local ab = 0xFF * byte(a) + byte(b) +                                local cd = 0xFF * byte(c) + byte(d) +                                return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) +                            end +                  + two   / function(a,b) +                                return utfchar(byte(a)*256 + byte(b)) +                            end +                  + one +                )^1 ) +              + P("\255\254") * Cs( ( +                    four  / function(b,a,d,c) +                                local ab = 0xFF * byte(a) + byte(b) +                                local cd = 0xFF * byte(c) + byte(d) +                                return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) +                            end +                  + two   / function(b,a) +                                return utfchar(byte(a)*256 + byte(b)) +                            end +                  + one +                )^1 ) + +function string.toutf(s) -- in string namespace +    return lpegmatch(pattern,s) or s -- todo: utf32 +end + +local validatedutf = Cs ( +    ( +        patterns.utf8one +      + patterns.utf8two +      + patterns.utf8three +      + patterns.utf8four +      + P(1) / "�" +    )^0 +) + +patterns.validatedutf = validatedutf + +function utf.is_valid(str) +    return type(str) == "string" and lpegmatch(validatedutf,str) or false +end + +if not utf.len then + +    -- -- alternative 1: 0.77 +    -- +    -- local utfcharcounter = utfbom^-1 * Cs((p_utf8char/'!')^0) +    -- +    -- function utf.len(str) +    --     return #lpegmatch(utfcharcounter,str or "") +    -- end +    -- +    -- -- alternative 2: 1.70 +    -- +    -- local n = 0 +    -- +    -- local utfcharcounter = utfbom^-1 * (p_utf8char/function() n = n + 1 end)^0 -- slow +    -- +    -- function utf.length(str) +    --     n = 0 +    --     lpegmatch(utfcharcounter,str or "") +    --     return n +    -- end +    -- +    -- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + +    -- local n = 0 +    -- +    -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( Cp() * ( +    -- --     patterns.utf8one  ^1 * Cc(1) +    -- --   + patterns.utf8two  ^1 * Cc(2) +    -- --   + patterns.utf8three^1 * Cc(3) +    -- --   + patterns.utf8four ^1 * Cc(4) ) * Cp() / function(f,d,t) n = n + (t - f)/d end +    -- --  )^0 ) -- just as many captures as below +    -- +    -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( +    -- --     (Cmt(patterns.utf8one  ^1,function(_,_,s) n = n + #s   return true end)) +    -- --   + (Cmt(patterns.utf8two  ^1,function(_,_,s) n = n + #s/2 return true end)) +    -- --   + (Cmt(patterns.utf8three^1,function(_,_,s) n = n + #s/3 return true end)) +    -- --   + (Cmt(patterns.utf8four ^1,function(_,_,s) n = n + #s/4 return true end)) +    -- -- )^0 ) -- not interesting as it creates strings but sometimes faster +    -- +    -- -- The best so far: +    -- +    -- local utfcharcounter = utfbom^-1 * P ( ( +    --     Cp() * (patterns.utf8one  )^1 * Cp() / function(f,t) n = n +  t - f    end +    --   + Cp() * (patterns.utf8two  )^1 * Cp() / function(f,t) n = n + (t - f)/2 end +    --   + Cp() * (patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end +    --   + Cp() * (patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end +    -- )^0 ) + +    -- function utf.len(str) +    --     n = 0 +    --     lpegmatch(utfcharcounter,str or "") +    --     return n +    -- end + +    local n, f = 0, 1 + +    local utfcharcounter = patterns.utfbom^-1 * Cmt ( +        Cc(1) * patterns.utf8one  ^1 +      + Cc(2) * patterns.utf8two  ^1 +      + Cc(3) * patterns.utf8three^1 +      + Cc(4) * patterns.utf8four ^1, +        function(_,t,d) -- due to Cc no string captures, so faster +            n = n + (t - f)/d +            f = t +            return true +        end +    )^0 + +    function utf.len(str) +        n, f = 0, 1 +        lpegmatch(utfcharcounter,str or "") +        return n      end + +    -- -- these are quite a bit slower: + +    -- utfcharcounter = utfbom^-1 * (Cmt(P(1) * R("\128\191")^0, function() n = n + 1 return true end))^0 -- 50+ times slower +    -- utfcharcounter = utfbom^-1 * (Cmt(P(1), function() n = n + 1 return true end) * R("\128\191")^0)^0 -- 50- times slower +  end -function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg -    local result, tmp, n, m, p = { }, { }, 0, 0, 0 -    -- lf | cr | crlf / (cr:13, lf:10) -    local function doit() -        if n == 10 then -            if p ~= 13 then -                result[#result+1] = concat(tmp) -                tmp = { } -                p = 0 -            end -        elseif n == 13 then -            result[#result+1] = concat(tmp) -            tmp = { } -            p = n +utf.length = utf.len + +if not utf.sub then + +    -- inefficient as lpeg just copies ^n + +    -- local function sub(str,start,stop) +    --     local pattern = p_utf8char^-(start-1) * C(p_utf8char^-(stop-start+1)) +    --     inspect(pattern) +    --     return lpegmatch(pattern,str) or "" +    -- end + +    -- local b, e, n, first, last = 0, 0, 0, 0, 0 +    -- +    -- local function slide(s,p) +    --     n = n + 1 +    --     if n == first then +    --         b = p +    --         if not last then +    --             return nil +    --         end +    --     end +    --     if n == last then +    --         e = p +    --         return nil +    --     else +    --         return p +    --     end +    -- end +    -- +    -- local pattern = Cmt(p_utf8char,slide)^0 +    -- +    -- function utf.sub(str,start,stop) -- todo: from the end +    --     if not start then +    --         return str +    --     end +    --     b, e, n, first, last = 0, 0, 0, start, stop +    --     lpegmatch(pattern,str) +    --     if not stop then +    --         return sub(str,b) +    --     else +    --         return sub(str,b,e-1) +    --     end +    -- end + +    -- print(utf.sub("Hans Hagen is my name")) +    -- print(utf.sub("Hans Hagen is my name",5)) +    -- print(utf.sub("Hans Hagen is my name",5,10)) + +    local utflength = utf.length + +    -- also negative indices, upto 10 times slower than a c variant + +    local b, e, n, first, last = 0, 0, 0, 0, 0 + +    local function slide_zero(s,p) +        n = n + 1 +        if n >= last then +            e = p - 1 +        else +            return p +        end +    end + +    local function slide_one(s,p) +        n = n + 1 +        if n == first then +            b = p +        end +        if n >= last then +            e = p - 1          else -            tmp[#tmp+1] = utfchar(n) -            p = 0 +            return p          end      end -    for l,r in bytepairs(str) do -        if r then -            if endian then -                n = l*256 + r + +    local function slide_two(s,p) +        n = n + 1 +        if n == first then +            b = p +        else +            return true +        end +    end + +    local pattern_zero = Cmt(p_utf8char,slide_zero)^0 +    local pattern_one  = Cmt(p_utf8char,slide_one )^0 +    local pattern_two  = Cmt(p_utf8char,slide_two )^0 + +    function utf.sub(str,start,stop) +        if not start then +            return str +        end +        if start == 0 then +            start = 1 +        end +        if not stop then +            if start < 0 then +                local l = utflength(str) -- we can inline this function if needed +                start = l + start              else -                n = r*256 + l +                start = start - 1              end -            if m > 0 then -                n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 -                m = 0 -                doit() -            elseif n >= 0xD800 and n <= 0xDBFF then -                m = n +            b, n, first = 0, 0, start +            lpegmatch(pattern_two,str) +            if n >= first then +                return sub(str,b)              else -                doit() +                return "" +            end +        end +        if start < 0 or stop < 0 then +            local l = utf.length(str) +            if start < 0 then +                start = l + start +                if start <= 0 then +                    start = 1 +                else +                    start = start + 1 +                end +            end +            if stop < 0 then +                stop = l + stop +                if stop == 0 then +                    stop = 1 +                else +                    stop = stop + 1 +                end              end          end +        if start > stop then +            return "" +        elseif start > 1 then +            b, e, n, first, last = 0, 0, 0, start - 1, stop +            lpegmatch(pattern_one,str) +            if n >= first and e == 0 then +                e = #str +            end +            return sub(str,b,e) +        else +            b, e, n, last = 1, 0, 0, stop +            lpegmatch(pattern_zero,str) +            if e == 0 then +                e = #str +            end +            return sub(str,b,e) +        end      end -    if #tmp > 0 then -        result[#result+1] = concat(tmp) + +    -- local n = 100000 +    -- local str = string.rep("123456àáâãäå",100) +    -- +    -- for i=-15,15,1 do +    --     for j=-15,15,1 do +    --         if utf.xsub(str,i,j) ~= utf.sub(str,i,j) then +    --             print("error",i,j,"l>"..utf.xsub(str,i,j),"s>"..utf.sub(str,i,j)) +    --         end +    --     end +    --     if utf.xsub(str,i) ~= utf.sub(str,i) then +    --         print("error",i,"l>"..utf.xsub(str,i),"s>"..utf.sub(str,i)) +    --     end +    -- end + +    -- print(" 1, 7",utf.xsub(str, 1, 7),utf.sub(str, 1, 7)) +    -- print(" 0, 7",utf.xsub(str, 0, 7),utf.sub(str, 0, 7)) +    -- print(" 0, 9",utf.xsub(str, 0, 9),utf.sub(str, 0, 9)) +    -- print(" 4   ",utf.xsub(str, 4   ),utf.sub(str, 4   )) +    -- print(" 0   ",utf.xsub(str, 0   ),utf.sub(str, 0   )) +    -- print(" 0, 0",utf.xsub(str, 0, 0),utf.sub(str, 0, 0)) +    -- print(" 4, 4",utf.xsub(str, 4, 4),utf.sub(str, 4, 4)) +    -- print(" 4, 0",utf.xsub(str, 4, 0),utf.sub(str, 4, 0)) +    -- print("-3, 0",utf.xsub(str,-3, 0),utf.sub(str,-3, 0)) +    -- print(" 0,-3",utf.xsub(str, 0,-3),utf.sub(str, 0,-3)) +    -- print(" 5,-3",utf.xsub(str,-5,-3),utf.sub(str,-5,-3)) +    -- print("-3   ",utf.xsub(str,-3   ),utf.sub(str,-3   )) + +end + +-- a replacement for simple gsubs: + +function utf.remapper(mapping) +    local pattern = Cs((p_utf8char/mapping)^0) +    return function(str) +        if not str or str == "" then +            return "" +        else +            return lpegmatch(pattern,str) +        end +    end, pattern +end + +-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } +-- print(remap("abcd 1234 abcd")) + +-- + +function utf.replacer(t) -- no precheck, always string builder +    local r = replacer(t,false,false,true) +    return function(str) +        return lpegmatch(r,str)      end -    return result  end -function unicode.utf32_to_utf8(str, endian) -    local result = { } -    local tmp, n, m, p = { }, 0, -1, 0 -    -- lf | cr | crlf / (cr:13, lf:10) -    local function doit() -        if n == 10 then -            if p ~= 13 then -                result[#result+1] = concat(tmp) -                tmp = { } -                p = 0 -            end -        elseif n == 13 then -            result[#result+1] = concat(tmp) -            tmp = { } -            p = n +function utf.subtituter(t) -- with precheck and no building if no match +    local f = finder  (t) +    local r = replacer(t,false,false,true) +    return function(str) +        local i = lpegmatch(f,str) +        if not i then +            return str +        elseif i > #str then +            return str          else -            tmp[#tmp+1] = utfchar(n) -            p = 0 +         -- return sub(str,1,i-2) .. lpegmatch(r,str,i-1) -- slower +            return lpegmatch(r,str)          end      end -    for a,b in bytepairs(str) do -        if a and b then -            if m < 0 then -                if endian then -                    m = a*256*256*256 + b*256*256 +end + +-- inspect(utf.split("a b c d")) +-- inspect(utf.split("a b c d",true)) + +local utflinesplitter     = p_utfbom^-1 * lpeg.tsplitat(p_newline) +local utfcharsplitter_ows = p_utfbom^-1 * Ct(C(p_utf8char)^0) +local utfcharsplitter_iws = p_utfbom^-1 * Ct((p_whitespace^1 + C(p_utf8char))^0) +local utfcharsplitter_raw = Ct(C(p_utf8char)^0) + +patterns.utflinesplitter  = utflinesplitter + +function utf.splitlines(str) +    return lpegmatch(utflinesplitter,str or "") +end + +function utf.split(str,ignorewhitespace) -- new +    if ignorewhitespace then +        return lpegmatch(utfcharsplitter_iws,str or "") +    else +        return lpegmatch(utfcharsplitter_ows,str or "") +    end +end + +function utf.totable(str) -- keeps bom +    return lpegmatch(utfcharsplitter_raw,str) +end + +-- 0  EF BB BF      UTF-8 +-- 1  FF FE         UTF-16-little-endian +-- 2  FE FF         UTF-16-big-endian +-- 3  FF FE 00 00   UTF-32-little-endian +-- 4  00 00 FE FF   UTF-32-big-endian +-- +-- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated + +-- utf.name = { +--     [0] = 'utf-8', +--     [1] = 'utf-16-le', +--     [2] = 'utf-16-be', +--     [3] = 'utf-32-le', +--     [4] = 'utf-32-be' +-- } +-- +-- function utf.magic(f) +--     local str = f:read(4) +--     if not str then +--         f:seek('set') +--         return 0 +--  -- elseif find(str,"^%z%z\254\255") then            -- depricated +--  -- elseif find(str,"^\000\000\254\255") then        -- not permitted and bugged +--     elseif find(str,"\000\000\254\255",1,true) then  -- seems to work okay (TH) +--         return 4 +--  -- elseif find(str,"^\255\254%z%z") then            -- depricated +--  -- elseif find(str,"^\255\254\000\000") then        -- not permitted and bugged +--     elseif find(str,"\255\254\000\000",1,true) then  -- seems to work okay (TH) +--         return 3 +--     elseif find(str,"^\254\255") then +--         f:seek('set',2) +--         return 2 +--     elseif find(str,"^\255\254") then +--         f:seek('set',2) +--         return 1 +--     elseif find(str,"^\239\187\191") then +--         f:seek('set',3) +--         return 0 +--     else +--         f:seek('set') +--         return 0 +--     end +-- end + +function utf.magic(f) -- not used +    local str = f:read(4) or "" +    local off = lpegmatch(p_utfoffset,str) +    if off < 4 then +        f:seek('set',off) +    end +    return lpegmatch(p_utftype,str) +end + +local function utf16_to_utf8_be(t) +    if type(t) == "string" then +        t = lpegmatch(utflinesplitter,t) +    end +    local result = { } -- we reuse result +    for i=1,#t do +        local r, more = 0, 0 +        for left, right in bytepairs(t[i]) do +            if right then +                local now = 256*left + right +                if more > 0 then +                    now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +                    more = 0 +                    r = r + 1 +                    result[r] = utfchar(now) +                elseif now >= 0xD800 and now <= 0xDBFF then +                    more = now                  else -                    m = b*256 + a +                    r = r + 1 +                    result[r] = utfchar(now)                  end -            else -                if endian then -                    n = m + a*256 + b +            end +        end +        t[i] = concat(result,"",1,r) -- we reused tmp, hence t +    end +    return t +end + +local function utf16_to_utf8_le(t) +    if type(t) == "string" then +        t = lpegmatch(utflinesplitter,t) +    end +    local result = { } -- we reuse result +    for i=1,#t do +        local r, more = 0, 0 +        for left, right in bytepairs(t[i]) do +            if right then +                local now = 256*right + left +                if more > 0 then +                    now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong +                    more = 0 +                    r = r + 1 +                    result[r] = utfchar(now) +                elseif now >= 0xD800 and now <= 0xDBFF then +                    more = now                  else -                    n = m + b*256*256*256 + a*256*256 +                    r = r + 1 +                    result[r] = utfchar(now)                  end -                m = -1 -                doit()              end -        else -            break          end +        t[i] = concat(result,"",1,r) -- we reused tmp, hence t +    end +    return t +end + +local function utf32_to_utf8_be(t) +    if type(t) == "string" then +        t = lpegmatch(utflinesplitter,t) +    end +    local result = { } -- we reuse result +    for i=1,#t do +        local r, more = 0, -1 +        for a,b in bytepairs(t[i]) do +            if a and b then +                if more < 0 then +                    more = 256*256*256*a + 256*256*b +                else +                    r = r + 1 +                    result[t] = utfchar(more + 256*a + b) +                    more = -1 +                end +            else +                break +            end +        end +        t[i] = concat(result,"",1,r)      end -    if #tmp > 0 then -        result[#result+1] = concat(tmp) +    return t +end + +local function utf32_to_utf8_le(t) +    if type(t) == "string" then +        t = lpegmatch(utflinesplitter,t)      end -    return result +    local result = { } -- we reuse result +    for i=1,#t do +        local r, more = 0, -1 +        for a,b in bytepairs(t[i]) do +            if a and b then +                if more < 0 then +                    more = 256*b + a +                else +                    r = r + 1 +                    result[t] = utfchar(more + 256*256*256*b + 256*256*a) +                    more = -1 +                end +            else +                break +            end +        end +        t[i] = concat(result,"",1,r) +    end +    return t +end + +utf.utf32_to_utf8_be = utf32_to_utf8_be +utf.utf32_to_utf8_le = utf32_to_utf8_le +utf.utf16_to_utf8_be = utf16_to_utf8_be +utf.utf16_to_utf8_le = utf16_to_utf8_le + +function utf.utf8_to_utf8(t) +    return type(t) == "string" and lpegmatch(utflinesplitter,t) or t +end + +function utf.utf16_to_utf8(t,endian) +    return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +end + +function utf.utf32_to_utf8(t,endian) +    return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t  end  local function little(c) -    local b = byte(c) -- b = c:byte() +    local b = byte(c)      if b < 0x10000 then          return char(b%256,b/256)      else @@ -190,10 +766,177 @@ local function big(c)      end  end -function unicode.utf8_to_utf16(str,littleendian) +-- function utf.utf8_to_utf16(str,littleendian) +--     if littleendian then +--         return char(255,254) .. utfgsub(str,".",little) +--     else +--         return char(254,255) .. utfgsub(str,".",big) +--     end +-- end + +local _, l_remap = utf.remapper(little) +local _, b_remap = utf.remapper(big) + +function utf.utf8_to_utf16(str,littleendian)      if littleendian then -        return char(255,254) .. utfgsub(str,".",little) +        return char(255,254) .. lpegmatch(l_remap,str)      else -        return char(254,255) .. utfgsub(str,".",big) +        return char(254,255) .. lpegmatch(b_remap,str) +    end +end + +-- function utf.tocodes(str,separator) -- can be sped up with an lpeg +--     local t, n = { }, 0 +--     for u in utfvalues(str) do +--         n = n + 1 +--         t[n] = format("0x%04X",u) +--     end +--     return concat(t,separator or " ") +-- end + +local pattern = Cs ( +    (p_utf8byte           / function(unicode          ) return format(  "0x%04X",          unicode) end) * +    (p_utf8byte * Carg(1) / function(unicode,separator) return format("%s0x%04X",separator,unicode) end)^0 +) + +function utf.tocodes(str,separator) +    return lpegmatch(pattern,str,1,separator or " ") +end + +function utf.ustring(s) +    return format("U+%05X",type(s) == "number" and s or utfbyte(s)) +end + +function utf.xstring(s) +    return format("0x%05X",type(s) == "number" and s or utfbyte(s)) +end + +-- + +local p_nany = p_utf8char / "" + +if utfgmatch then + +    function utf.count(str,what) +        if type(what) == "string" then +            local n = 0 +            for _ in utfgmatch(str,what) do +                n = n + 1 +            end +            return n +        else -- 4 times slower but still faster than / function +            return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str) +        end +    end + +else + +    local cache = { } + +    function utf.count(str,what) +        if type(what) == "string" then +            local p = cache[what] +            if not p then +                p = Cs((P(what)/" " + p_nany)^0) +                cache[p] = p +            end +            return #lpegmatch(p,str) +        else -- 4 times slower but still faster than / function +            return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str) +        end +    end + +end + +-- maybe also register as string.utf* + + +if not utf.characters then + +    -- New: this gmatch hack is taken from the Lua 5.2 book. It's about two times slower +    -- than the built-in string.utfcharacters. + +    function utf.characters(str) +        return gmatch(str,".[\128-\191]*") +    end + +    string.utfcharacters = utf.characters + +end + +if not utf.values then + +    -- So, a logical next step is to check for the values variant. It over five times +    -- slower than the built-in string.utfvalues. I optimized it a bit for n=0,1. + +    ----- wrap, yield, gmatch = coroutine.wrap, coroutine.yield, string.gmatch +    local find =  string.find + +    local dummy = function() +        -- we share this one      end + +    -- function utf.values(str) +    --     local n = #str +    --     if n == 0 then +    --         return wrap(dummy) +    --     elseif n == 1 then +    --         return wrap(function() yield(utfbyte(str)) end) +    --     else +    --         return wrap(function() for s in gmatch(str,".[\128-\191]*") do +    --             yield(utfbyte(s)) +    --         end end) +    --     end +    -- end +    -- +    -- faster: + +    function utf.values(str) +        local n = #str +        if n == 0 then +            return dummy +        elseif n == 1 then +            return function() return utfbyte(str) end +        else +            local p = 1 +         -- local n = #str +            return function() +             -- if p <= n then -- slower than the last find +                    local b, e = find(str,".[\128-\191]*",p) +                    if b then +                        p = e + 1 +                        return utfbyte(sub(str,b,e)) +                    end +             -- end +            end +        end +    end + +    -- slower: +    -- +    -- local pattern = C(patterns.utf8character) * Cp() +    -- ----- pattern = patterns.utf8character/utfbyte * Cp() +    -- ----- pattern = patterns.utf8byte * Cp() +    -- +    -- function utf.values(str) -- one of the cases where a find is faster than an lpeg +    --     local n = #str +    --     if n == 0 then +    --         return dummy +    --     elseif n == 1 then +    --         return function() return utfbyte(str) end +    --     else +    --         local p = 1 +    --         return function() +    --             local s, e = lpegmatch(pattern,str,p) +    --             if e then +    --                 p = e +    --                 return utfbyte(s) +    --              -- return s +    --             end +    --         end +    --     end +    -- end + +    string.utfvalues = utf.values +  end diff --git a/lualibs-url.lua b/lualibs-url.lua index e3e6f81..4624a05 100644 --- a/lualibs-url.lua +++ b/lualibs-url.lua @@ -6,101 +6,292 @@ if not modules then modules = { } end modules ['l-url'] = {      license   = "see context related readme files"  } -local char, gmatch, gsub = string.char, string.gmatch, string.gsub +local char, format, byte = string.char, string.format, string.byte +local concat = table.concat  local tonumber, type = tonumber, type -local lpegmatch = lpeg.match +local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V +local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer --- from the spec (on the web): +-- from wikipedia:  -- ---     foo://example.com:8042/over/there?name=ferret#nose ---     \_/   \______________/\_________/ \_________/ \__/ ---      |           |            |            |        | ---   scheme     authority       path        query   fragment ---      |   _____________________|__ ---     / \ /                        \ ---     urn:example:animal:ferret:nose - -url = url or { } - -local function tochar(s) -    return char(tonumber(s,16)) -end +--   foo://username:password@example.com:8042/over/there/index.dtb?type=animal;name=narwhal#nose +--   \_/   \_______________/ \_________/ \__/            \___/ \_/ \______________________/ \__/ +--    |           |               |       |                |    |            |                | +--    |       userinfo         hostname  port              |    |          query          fragment +--    |    \________________________________/\_____________|____|/ +-- scheme                  |                          |    |    | +--    |                authority                    path   |    | +--    |                                                    |    | +--    |            path                       interpretable as filename +--    |   ___________|____________                              | +--   / \ /                        \                             | +--   urn:example:animal:ferret:nose               interpretable as extension + +url       = url or { } +local url = url + +local tochar      = function(s) return char(tonumber(s,16)) end + +local colon       = P(":") +local qmark       = P("?") +local hash        = P("#") +local slash       = P("/") +local percent     = P("%") +local endofstring = P(-1) -local colon, qmark, hash, slash, percent, endofstring = lpeg.P(":"), lpeg.P("?"), lpeg.P("#"), lpeg.P("/"), lpeg.P("%"), lpeg.P(-1) +local hexdigit    = R("09","AF","af") +local plus        = P("+") +local nothing     = Cc("") +local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar +local escaped     = (plus / " ") + escapedchar -local hexdigit  = lpeg.R("09","AF","af") -local plus      = lpeg.P("+") -local escaped   = (plus / " ") + (percent * lpeg.C(hexdigit * hexdigit) / tochar) +local noslash     = P("/") / ""  -- we assume schemes with more than 1 character (in order to avoid problems with windows disks) +-- we also assume that when we have a scheme, we also have an authority +-- +-- maybe we should already split the query (better for unescaping as = & can be part of a value + +local schemestr    = Cs((escaped+(1-colon-slash-qmark-hash))^2) +local authoritystr = Cs((escaped+(1-      slash-qmark-hash))^0) +local pathstr      = Cs((escaped+(1-            qmark-hash))^0) +----- querystr     = Cs((escaped+(1-                  hash))^0) +local querystr     = Cs((        (1-                  hash))^0) +local fragmentstr  = Cs((escaped+(1-           endofstring))^0) + +local scheme    =                 schemestr    * colon + nothing +local authority = slash * slash * authoritystr         + nothing +local path      = slash         * pathstr              + nothing +local query     = qmark         * querystr             + nothing +local fragment  = hash          * fragmentstr          + nothing + +local validurl  = scheme * authority * path * query * fragment +local parser    = Ct(validurl) -local scheme    =                 lpeg.Cs((escaped+(1-colon-slash-qmark-hash))^2) * colon + lpeg.Cc("") -local authority = slash * slash * lpeg.Cs((escaped+(1-      slash-qmark-hash))^0)         + lpeg.Cc("") -local path      = slash *         lpeg.Cs((escaped+(1-            qmark-hash))^0)         + lpeg.Cc("") -local query     = qmark         * lpeg.Cs((escaped+(1-                  hash))^0)         + lpeg.Cc("") -local fragment  = hash          * lpeg.Cs((escaped+(1-           endofstring))^0)         + lpeg.Cc("") +lpegpatterns.url         = validurl +lpegpatterns.urlsplitter = parser -local parser = lpeg.Ct(scheme * authority * path * query * fragment) +local escapes = { } + +setmetatable(escapes, { __index = function(t,k) +    local v = format("%%%02X",byte(k)) +    t[k] = v +    return v +end }) + +local escaper   = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most +local unescaper = Cs((escapedchar + 1)^0) + +lpegpatterns.urlunescaped = escapedchar +lpegpatterns.urlescaper   = escaper +lpegpatterns.urlunescaper = unescaper  -- todo: reconsider Ct as we can as well have five return values (saves a table)  -- so we can have two parsers, one with and one without -function url.split(str) +local function split(str)      return (type(str) == "string" and lpegmatch(parser,str)) or str  end --- todo: cache them +local isscheme = schemestr * colon * slash * slash -- this test also assumes authority -function url.hashed(str) -    local s = url.split(str) -    local somescheme = s[1] ~= "" -    return { -        scheme    = (somescheme and s[1]) or "file", -        authority = s[2], -        path      = s[3], -        query     = s[4], -        fragment  = s[5], -        original  = str, -        noscheme  = not somescheme, -    } +local function hasscheme(str) +    if str then +        local scheme = lpegmatch(isscheme,str) -- at least one character +        return scheme ~= "" and scheme or false +    else +        return false +    end  end -function url.hasscheme(str) -    return url.split(str)[1] ~= "" +--~ print(hasscheme("home:")) +--~ print(hasscheme("home://")) + +-- todo: cache them + +local rootletter       = R("az","AZ") +                       + S("_-+") +local separator        = P("://") +local qualified        = P(".")^0 * P("/") +                       + rootletter * P(":") +                       + rootletter^1 * separator +                       + rootletter^1 * P("/") +local rootbased        = P("/") +                       + rootletter * P(":") + +local barswapper       = replacer("|",":") +local backslashswapper = replacer("\\","/") + +-- queries: + +local equal = P("=") +local amp   = P("&") +local key   = Cs(((escapedchar+1)-equal            )^0) +local value = Cs(((escapedchar+1)-amp  -endofstring)^0) + +local splitquery = Cf ( Ct("") * P { "sequence", +    sequence = V("pair") * (amp * V("pair"))^0, +    pair     = Cg(key * equal * value), +}, rawset) + +-- hasher + +local function hashed(str) -- not yet ok (/test?test) +    if str == "" then +        return { +            scheme   = "invalid", +            original = str, +        } +    end +    local s = split(str) +    local rawscheme  = s[1] +    local rawquery   = s[4] +    local somescheme = rawscheme ~= "" +    local somequery  = rawquery  ~= "" +    if not somescheme and not somequery then +        s = { +            scheme    = "file", +            authority = "", +            path      = str, +            query     = "", +            fragment  = "", +            original  = str, +            noscheme  = true, +            filename  = str, +        } +    else -- not always a filename but handy anyway +        local authority, path, filename = s[2], s[3] +        if authority == "" then +            filename = path +        elseif path == "" then +            filename = "" +        else +            filename = authority .. "/" .. path +        end +        s = { +            scheme    = rawscheme, +            authority = authority, +            path      = path, +            query     = lpegmatch(unescaper,rawquery),  -- unescaped, but possible conflict with & and = +            queries   = lpegmatch(splitquery,rawquery), -- split first and then unescaped +            fragment  = s[5], +            original  = str, +            noscheme  = false, +            filename  = filename, +        } +    end +    return s  end -function url.addscheme(str,scheme) -    return (url.hasscheme(str) and str) or ((scheme or "file:///") .. str) +-- inspect(hashed("template://test")) + +-- Here we assume: +-- +-- files: ///  = relative +-- files: //// = absolute (!) + +--~ table.print(hashed("file://c:/opt/tex/texmf-local")) -- c:/opt/tex/texmf-local +--~ table.print(hashed("file://opt/tex/texmf-local"   )) -- opt/tex/texmf-local +--~ table.print(hashed("file:///opt/tex/texmf-local"  )) -- opt/tex/texmf-local +--~ table.print(hashed("file:////opt/tex/texmf-local" )) -- /opt/tex/texmf-local +--~ table.print(hashed("file:///./opt/tex/texmf-local" )) -- ./opt/tex/texmf-local + +--~ table.print(hashed("c:/opt/tex/texmf-local"       )) -- c:/opt/tex/texmf-local +--~ table.print(hashed("opt/tex/texmf-local"          )) -- opt/tex/texmf-local +--~ table.print(hashed("/opt/tex/texmf-local"         )) -- /opt/tex/texmf-local + +url.split     = split +url.hasscheme = hasscheme +url.hashed    = hashed + +function url.addscheme(str,scheme) -- no authority +    if hasscheme(str) then +        return str +    elseif not scheme then +        return "file:///" .. str +    else +        return scheme .. ":///" .. str +    end  end -function url.construct(hash) -    local fullurl = hash.sheme .. "://".. hash.authority .. hash.path -    if hash.query then -        fullurl = fullurl .. "?".. hash.query +function url.construct(hash) -- dodo: we need to escape ! +    local fullurl, f = { }, 0 +    local scheme, authority, path, query, fragment = hash.scheme, hash.authority, hash.path, hash.query, hash.fragment +    if scheme and scheme ~= "" then +        f = f + 1 ; fullurl[f] = scheme .. "://" +    end +    if authority and authority ~= "" then +        f = f + 1 ; fullurl[f] = authority      end -    if hash.fragment then -        fullurl = fullurl .. "?".. hash.fragment +    if path and path ~= "" then +        f = f + 1 ; fullurl[f] = "/" .. path      end -    return fullurl +    if query and query ~= "" then +        f = f + 1 ; fullurl[f] = "?".. query +    end +    if fragment and fragment ~= "" then +        f = f + 1 ; fullurl[f] = "#".. fragment +    end +    return lpegmatch(escaper,concat(fullurl))  end +local pattern = Cs(noslash * R("az","AZ") * (S(":|")/":") * noslash * P(1)^0) +  function url.filename(filename) -    local t = url.hashed(filename) -    return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename +    local spec = hashed(filename) +    local path = spec.path +    return (spec.scheme == "file" and path and lpegmatch(pattern,path)) or filename +end + +-- print(url.filename("/c|/test")) +-- print(url.filename("/c/test")) + +local function escapestring(str) +    return lpegmatch(escaper,str)  end +url.escape = escapestring +  function url.query(str)      if type(str) == "string" then -        local t = { } -        for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do -            t[k] = v -        end -        return t +        return lpegmatch(splitquery,str) or ""      else          return str      end  end +function url.toquery(data) +    local td = type(data) +    if td == "string" then +        return #str and escape(data) or nil -- beware of double escaping +    elseif td == "table" then +        if next(data) then +            local t = { } +            for k, v in next, data do +                t[#t+1] = format("%s=%s",k,escapestring(v)) +            end +            return concat(t,"&") +        end +    else +        -- nil is a signal that no query +    end +end + +-- /test/ | /test | test/ | test => test + +local pattern = Cs(noslash^0 * (1 - noslash * P(-1))^0) + +function url.barepath(path) +    if not path or path == "" then +        return "" +    else +        return lpegmatch(pattern,path) +    end +end + +-- print(url.barepath("/test"),url.barepath("test/"),url.barepath("/test/"),url.barepath("test")) +-- print(url.barepath("/x/yz"),url.barepath("x/yz/"),url.barepath("/x/yz/"),url.barepath("x/yz")) +  --~ print(url.filename("file:///c:/oeps.txt"))  --~ print(url.filename("c:/oeps.txt"))  --~ print(url.filename("file:///oeps.txt")) @@ -108,12 +299,30 @@ end  --~ print(url.filename("/oeps.txt"))  --~ from the spec on the web (sort of): ---~ ---~ function test(str) ---~     print(table.serialize(url.hashed(str))) + +--~ local function test(str) +--~     local t = url.hashed(str) +--~     t.constructed = url.construct(t) +--~     print(table.serialize(t))  --~ end ---~ ---~ test("%56pass%20words") + +--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45")) +--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45")) + +--~ test("sys:///./colo-rgb") + +--~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg") +--~ test("file:///M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("file:///q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") + +--~ test("file:///cow%20with%20spaces") +--~ test("file:///cow%20with%20spaces.pdf") +--~ test("cow%20with%20spaces.pdf") +--~ test("some%20file") +--~ test("/etc/passwords") +--~ test("http://www.myself.com/some%20words.html")  --~ test("file:///c:/oeps.txt")  --~ test("file:///c|/oeps.txt")  --~ test("file:///etc/oeps.txt") @@ -127,8 +336,9 @@ end  --~ test("tel:+1-816-555-1212")  --~ test("telnet://192.0.2.16:80/")  --~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2") ---~ test("/etc/passwords")  --~ test("http://www.pragma-ade.com/spaced%20name")  --~ test("zip:///oeps/oeps.zip#bla/bla.tex")  --~ test("zip:///oeps/oeps.zip?bla/bla.tex") + +--~ table.print(url.hashed("/test?test")) diff --git a/lualibs-dimen.lua b/lualibs-util-dim.lua index da5ab14..47b2706 100644 --- a/lualibs-dimen.lua +++ b/lualibs-util-dim.lua @@ -1,4 +1,4 @@ -if not modules then modules = { } end modules ['l-dimen'] = { +if not modules then modules = { } end modules ['util-dim'] = {      version   = 1.001,      comment   = "support for dimensions",      author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -16,14 +16,21 @@ table.</p>  --ldx]]--  local format, match, gsub, type, setmetatable = string.format, string.match, string.gsub, type, setmetatable -local P, S, R, Cc, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.match +local P, S, R, Cc, C, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.C, lpeg.match + +local allocate          = utilities.storage.allocate +local setmetatableindex = table.setmetatableindex +local formatters        = string.formatters + +--this might become another namespace  number = number or { } +local number = number  number.tonumberf = function(n) return match(format("%.20f",n),"(.-0?)0*$") end -- one zero too much but alas  number.tonumberg = function(n) return       format("%.20g",n)              end -local dimenfactors = { +local dimenfactors = allocate {      ["pt"] =             1/65536,      ["in"] = (  100/ 7227)/65536,      ["cm"] = (  254/ 7227)/65536, @@ -80,14 +87,18 @@ local dimenfactors = {  format (string) is implemented using this table.</p>  --ldx]]-- --- was: -local function todimen(n,unit,fmt) +local function numbertodimen(n,unit,fmt)      if type(n) == 'string' then          return n      else          unit = unit or 'pt' -        return format(fmt or "%s%s",n*dimenfactors[unit],unit) +        if not fmt then +            fmt = "%s%s" +        elseif fmt == true then +            fmt = "%0.5f%s" +        end +        return format(fmt,n*dimenfactors[unit],unit)       -- if fmt then       --     return format(fmt,n*dimenfactors[unit],unit)       -- else @@ -101,21 +112,21 @@ end  --ldx]]--  number.maxdimen     = 1073741823 -number.todimen      = todimen +number.todimen      = numbertodimen  number.dimenfactors = dimenfactors -function number.topoints      (n) return todimen(n,"pt") end -function number.toinches      (n) return todimen(n,"in") end -function number.tocentimeters (n) return todimen(n,"cm") end -function number.tomillimeters (n) return todimen(n,"mm") end -function number.toscaledpoints(n) return todimen(n,"sp") end -function number.toscaledpoints(n) return      n .. "sp"  end -function number.tobasepoints  (n) return todimen(n,"bp") end -function number.topicas       (n) return todimen(n "pc") end -function number.todidots      (n) return todimen(n,"dd") end -function number.tociceros     (n) return todimen(n,"cc") end -function number.tonewdidots   (n) return todimen(n,"nd") end -function number.tonewciceros  (n) return todimen(n,"nc") end +function number.topoints      (n,fmt) return numbertodimen(n,"pt",fmt) end +function number.toinches      (n,fmt) return numbertodimen(n,"in",fmt) end +function number.tocentimeters (n,fmt) return numbertodimen(n,"cm",fmt) end +function number.tomillimeters (n,fmt) return numbertodimen(n,"mm",fmt) end +function number.toscaledpoints(n,fmt) return numbertodimen(n,"sp",fmt) end +function number.toscaledpoints(n)     return            n .. "sp"      end +function number.tobasepoints  (n,fmt) return numbertodimen(n,"bp",fmt) end +function number.topicas       (n,fmt) return numbertodimen(n "pc",fmt) end +function number.todidots      (n,fmt) return numbertodimen(n,"dd",fmt) end +function number.tociceros     (n,fmt) return numbertodimen(n,"cc",fmt) end +function number.tonewdidots   (n,fmt) return numbertodimen(n,"nd",fmt) end +function number.tonewciceros  (n,fmt) return numbertodimen(n,"nc",fmt) end  --[[ldx--  <p>More interesting it to implement a (sort of) dimen datatype, one @@ -132,27 +143,40 @@ local dimenpair = amount/tonumber * (unit^1/dimenfactors + Cc(1)) -- tonumber is  lpeg.patterns.dimenpair = dimenpair +local splitter = amount/tonumber * C(unit^1) + +function number.splitdimen(str) +    return lpegmatch(splitter,str) +end +  --[[ldx--  <p>We use a metatable to intercept errors. When no key is found in  the table with factors, the metatable will be consulted for an  alternative index function.</p>  --ldx]]-- -local mt = { }  setmetatable(dimenfactors,mt) - -mt.__index = function(t,s) +setmetatableindex(dimenfactors, function(t,s)   -- error("wrong dimension: " .. (s or "?")) -- better a message      return false -end +end) -function string:todimen() -    if type(self) == "number" then -        return self -    else -        local value, unit = lpegmatch(dimenpair,self) -        return value/unit -    end -end +--[[ldx-- +<p>We redefine the following function later on, so we comment it +here (which saves us bytecodes.</p> +--ldx]]-- + +-- function string.todimen(str) +--     if type(str) == "number" then +--         return str +--     else +--         local value, unit = lpegmatch(dimenpair,str) +--         return value/unit +--     end +-- end +-- +-- local stringtodimen = string.todimen + +local stringtodimen -- assigned later (commenting saves bytecode)  local amount = S("+-")^0 * R("09")^0 * S(".,")^0 * R("09")^0  local unit   = P("pt") + P("cm") + P("mm") + P("sp") + P("bp") + P("in")  + @@ -160,7 +184,7 @@ local unit   = P("pt") + P("cm") + P("mm") + P("sp") + P("bp") + P("in")  +  local validdimen = amount * unit -lpeg.patterns.validdimen = pattern +lpeg.patterns.validdimen = validdimen  --[[ldx--  <p>This converter accepts calls like:</p> @@ -174,12 +198,6 @@ string.todimen("10pt")  string.todimen("10.0pt")  </typing> -<p>And of course the often more efficient:</p> - -<typing> -somestring:todimen("12.3cm") -</typing> -  <p>With this in place, we can now implement a proper datatype for dimensions, one  that permits us to do this:</p> @@ -197,28 +215,28 @@ local dimensions = { }  <p>The main (and globally) visible representation of a dimen is defined next: it is  a one-element table. The unit that is returned from the match is normally a number  (one of the previously defined factors) but we also accept functions. Later we will -see why.</p> +see why. This function is redefined later.</p>  --ldx]]-- -function dimen(a) -    if a then -        local ta= type(a) -        if ta == "string" then -            local value, unit = lpegmatch(pattern,a) -            if type(unit) == "function" then -                k = value/unit() -            else -                k = value/unit -            end -            a = k -        elseif ta == "table" then -            a = a[1] -        end -        return setmetatable({ a }, dimensions) -    else -        return setmetatable({ 0 }, dimensions) -    end -end +-- function dimen(a) +--     if a then +--         local ta= type(a) +--         if ta == "string" then +--             local value, unit = lpegmatch(pattern,a) +--             if type(unit) == "function" then +--                 k = value/unit() +--             else +--                 k = value/unit +--             end +--             a = k +--         elseif ta == "table" then +--             a = a[1] +--         end +--         return setmetatable({ a }, dimensions) +--     else +--         return setmetatable({ 0 }, dimensions) +--     end +-- end  --[[ldx--  <p>This function return a small hash with a metatable attached. It is @@ -228,35 +246,35 @@ shared some of the code but for reasons of speed we don't.</p>  function dimensions.__add(a, b)      local ta, tb = type(a), type(b) -    if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end -    if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end +    if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end +    if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end      return setmetatable({ a + b }, dimensions)  end  function dimensions.__sub(a, b)      local ta, tb = type(a), type(b) -    if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end -    if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end +    if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end +    if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end      return setmetatable({ a - b }, dimensions)  end  function dimensions.__mul(a, b)      local ta, tb = type(a), type(b) -    if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end -    if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end +    if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end +    if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end      return setmetatable({ a * b }, dimensions)  end  function dimensions.__div(a, b)      local ta, tb = type(a), type(b) -    if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end -    if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end +    if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end +    if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end      return setmetatable({ a / b }, dimensions)  end  function dimensions.__unm(a)      local ta = type(a) -    if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end +    if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end      return setmetatable({ - a }, dimensions)  end @@ -321,23 +339,9 @@ is loaded, the relevant tables that hold the functions needed may not  yet be available.</p>  --ldx]]-- -function dimensions.texify()  -- todo: % -    local fti, fc = fonts and fonts.ids and fonts.ids, font and font.current -    if fti and fc then -        dimenfactors["ex"] = function() return fti[fc()].ex_height end -        dimenfactors["em"] = function() return fti[fc()].quad      end -    else -        dimenfactors["ex"] = 1/65536* 4 --  4pt -        dimenfactors["em"] = 1/65536*10 -- 10pt -    end -end - ---[[ldx-- -<p>In order to set the defaults we call this function now. At some point -the macro package needs to make sure the function is called again.</p> ---ldx]]-- - -dimensions.texify() +   dimenfactors["ex"] =  4 * 1/65536 --   4pt +   dimenfactors["em"] = 10 * 1/65536 --  10pt +-- dimenfactors["%"]  =  4 * 1/65536 -- 400pt/100  --[[ldx--  <p>The previous code is rather efficient (also thanks to <l n='lpeg'/>) but we @@ -389,27 +393,40 @@ function dimen(a)      end  end -function string:todimen() -    if type(self) == "number" then -        return self +function string.todimen(str) -- maybe use tex.sp when available +    if type(str) == "number" then +        return str      else -        local k = known[self] +        local k = known[str]          if not k then -            local value, unit = lpegmatch(dimenpair,self) +            local value, unit = lpegmatch(dimenpair,str)              if value and unit then -                k = value/unit +                k = value/unit -- to be considered: round              else                  k = 0              end -            -- print(self,value,unit) -            known[self] = k +            -- print(str,value,unit) +            known[str] = k          end          return k      end  end +--~ local known = { } + +--~ function string.todimen(str) -- maybe use tex.sp +--~     local k = known[str] +--~     if not k then +--~         k = tex.sp(str) +--~         known[str] = k +--~     end +--~     return k +--~ end + +stringtodimen = string.todimen -- local variable defined earlier +  function number.toscaled(d) -    return format("0.5f",d/2^16) +    return format("%0.5f",d/2^16)  end  --[[ldx-- @@ -421,12 +438,12 @@ probably use a hash instead of a one-element table.</p>  <p>Goodie:s</p>  --ldx]]-- -function number.percent(n) -- will be cleaned up once luatex 0.30 is out -    local hsize = tex.hsize -    if type(hsize) == "string" then -        hsize = hsize:todimen() +function number.percent(n,d) -- will be cleaned up once luatex 0.30 is out +    d = d or tex.hsize +    if type(d) == "string" then +        d = stringtodimen(d)      end -    return (n/100) * hsize +    return (n/100) * d  end  number["%"] = number.percent diff --git a/lualibs-util-jsn.lua b/lualibs-util-jsn.lua new file mode 100644 index 0000000..7493f10 --- /dev/null +++ b/lualibs-util-jsn.lua @@ -0,0 +1,145 @@ +if not modules then modules = { } end modules ['util-jsn'] = { +    version   = 1.001, +    comment   = "companion to m-json.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +-- Of course we could make a nice complete parser with proper error messages but +-- as json is generated programmatically errors are systematic and we can assume +-- a correct stream. If not, we have some fatal error anyway. So, we can just rely +-- on strings being strings (apart from the unicode escape which is not in 5.1) and +-- as we first catch known types we just assume that anything else is a number. + +local P, V, R, S, C, Cc, Cs, Ct, Cf, Cg = lpeg.P, lpeg.V, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cf, lpeg.Cg +local lpegmatch = lpeg.match +local format = string.format +local utfchar = utf.char +local concat = table.concat + +local tonumber, tostring, rawset, type = tonumber, tostring, rawset, type + +local json      = utilities.json or { } +utilities.json  = json + +-- moduledata      = moduledata or { } +-- moduledata.json = json + +-- \\ \/ \b \f \n \r \t \uHHHH + +local lbrace     = P("{") +local rbrace     = P("}") +local lparent    = P("[") +local rparent    = P("]") +local comma      = P(",") +local colon      = P(":") +local dquote     = P('"') + +local whitespace = lpeg.patterns.whitespace +local optionalws = whitespace^0 + +local escape     = C(P("\\u") / "0x" * S("09","AF","af")) / function(s) return utfchar(tonumber(s)) end +local jstring    = dquote * Cs((escape + (1-dquote))^0) * dquote +local jtrue      = P("true")  * Cc(true) +local jfalse     = P("false") * Cc(false) +local jnull      = P("null")  * Cc(nil) +local jnumber    = (1-whitespace-rparent-rbrace-comma)^1 / tonumber + +local key        = jstring + +local jsonconverter = { "value", +    object   = lbrace * Cf(Ct("") * V("pair") * (comma * V("pair"))^0,rawset) * rbrace, +    pair     = Cg(optionalws * key * optionalws * colon * V("value")), +    array    = Ct(lparent * V("value") * (comma * V("value"))^0 * rparent), +    value    = optionalws * (jstring + V("object") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws, +} + +-- local jsonconverter = { "value", +--     object   = lbrace * Cf(Ct("") * V("pair") * (comma * V("pair"))^0,rawset) * rbrace, +--     pair     = Cg(optionalws * V("string") * optionalws * colon * V("value")), +--     array    = Ct(lparent * V("value") * (comma * V("value"))^0 * rparent), +--     string   = jstring, +--     value    = optionalws * (V("string") + V("object") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, +-- } + +-- lpeg.print(jsonconverter) -- size 181 + +function json.tolua(str) +    return lpegmatch(jsonconverter,str) +end + +local function tojson(value,t) -- we could optimize #t +    local kind = type(value) +    if kind == "table" then +        local done = false +        local size = #value +        if size == 0 then +            for k, v in next, value do +                if done then +                    t[#t+1] = "," +                else +                    t[#t+1] = "{" +                    done = true +                end +                t[#t+1] = format("%q:",k) +                tojson(v,t) +            end +            if done then +                t[#t+1] = "}" +            else +                t[#t+1] = "{}" +            end +        elseif size == 1 then +            -- we can optimize for non tables +            t[#t+1] = "[" +            tojson(value[1],t) +            t[#t+1] = "]" +        else +            for i=1,size do +                if done then +                    t[#t+1] = "," +                else +                    t[#t+1] = "[" +                    done = true +                end +                tojson(value[i],t) +            end +            t[#t+1] = "]" +        end +    elseif kind == "string"  then +        t[#t+1] = format("%q",value) +    elseif kind == "number" then +        t[#t+1] = value +    elseif kind == "boolean" then +        t[#t+1] = tostring(value) +    end +    return t +end + +function json.tostring(value) +    -- todo optimize for non table +    local kind = type(value) +    if kind == "table" then +        return concat(tojson(value,{}),"") +    elseif kind == "string" or kind == "number" then +        return value +    else +        return tostring(value) +    end +end + +-- local tmp = [[ { "a" : true, "b" : [ 123 , 456E-10, { "a" : true, "b" : [ 123 , 456 ] } ] } ]] + +-- tmp = json.tolua(tmp) +-- inspect(tmp) +-- tmp = json.tostring(tmp) +-- inspect(tmp) +-- tmp = json.tolua(tmp) +-- inspect(tmp) +-- tmp = json.tostring(tmp) +-- inspect(tmp) + +-- inspect(json.tostring(true)) + +return json diff --git a/lualibs-util-lua.lua b/lualibs-util-lua.lua new file mode 100644 index 0000000..f3be9dc --- /dev/null +++ b/lualibs-util-lua.lua @@ -0,0 +1,351 @@ +if not modules then modules = { } end modules ['util-lua'] = { +    version   = 1.001, +    comment   = "companion to luat-lib.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    comment   = "the strip code is written by Peter Cawley", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +-- we will remove the 5.1 code some day soon + +local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format +local load, loadfile, type = load, loadfile, type + +utilities          = utilities or {} +utilities.lua      = utilities.lua or { } +local luautilities = utilities.lua + +local report_lua = logs.reporter("system","lua") + +local tracestripping           = false +local forcestupidcompile       = true  -- use internal bytecode compiler +luautilities.stripcode         = true  -- support stripping when asked for +luautilities.alwaysstripcode   = false -- saves 1 meg on 7 meg compressed format file (2012.08.12) +luautilities.nofstrippedchunks = 0 +luautilities.nofstrippedbytes  = 0 +local strippedchunks           = { } -- allocate() +luautilities.strippedchunks    = strippedchunks + +luautilities.suffixes = { +    tma = "tma", +    tmc = jit and "tmb" or "tmc", +    lua = "lua", +    luc = jit and "lub" or "luc", +    lui = "lui", +    luv = "luv", +    luj = "luj", +    tua = "tua", +    tuc = "tuc", +} + +-- environment.loadpreprocessedfile can be set to a preprocessor + +if jit or status.luatex_version >= 74 then + +    local function register(name) +        if tracestripping then +            report_lua("stripped bytecode from %a",name or "unknown") +        end +        strippedchunks[#strippedchunks+1] = name +        luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 +    end + +    local function stupidcompile(luafile,lucfile,strip) +        local code = io.loaddata(luafile) +        if code and code ~= "" then +            code = load(code) +            if code then +                code = dump(code,strip and luautilities.stripcode or luautilities.alwaysstripcode) +                if code and code ~= "" then +                    register(name) +                    io.savedata(lucfile,code) +                    return true, 0 +                end +            else +                report_lua("fatal error %a in file %a",1,luafile) +            end +        else +            report_lua("fatal error %a in file %a",2,luafile) +        end +        return false, 0 +    end + +    -- quite subtle ... doing this wrong incidentally can give more bytes + +    function luautilities.loadedluacode(fullname,forcestrip,name) +        -- quite subtle ... doing this wrong incidentally can give more bytes +        name = name or fullname +        local code = environment.loadpreprocessedfile and environment.loadpreprocessedfile(fullname) or loadfile(fullname) +        if code then +            code() +        end +        if forcestrip and luautilities.stripcode then +            if type(forcestrip) == "function" then +                forcestrip = forcestrip(fullname) +            end +            if forcestrip or luautilities.alwaysstripcode then +                register(name) +                return load(dump(code,true)), 0 +            else +                return code, 0 +            end +        elseif luautilities.alwaysstripcode then +            register(name) +            return load(dump(code,true)), 0 +        else +            return code, 0 +        end +    end + +    function luautilities.strippedloadstring(code,forcestrip,name) -- not executed +        if forcestrip and luautilities.stripcode or luautilities.alwaysstripcode then +            code = load(code) +            if not code then +                report_lua("fatal error %a in file %a",3,name) +            end +            register(name) +            code = dump(code,true) +        end +        return load(code), 0 +    end + +    function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true +        report_lua("compiling %a into %a",luafile,lucfile) +        os.remove(lucfile) +        local done = stupidcompile(luafile,lucfile,strip ~= false) +        if done then +            report_lua("dumping %a into %a stripped",luafile,lucfile) +            if cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then +                report_lua("removing %a",luafile) +                os.remove(luafile) +            end +        end +        return done +    end + +    function luautilities.loadstripped(...) +        local l = load(...) +        if l then +            return load(dump(l,true)) +        end +    end + +else + +    -- The next function was posted by Peter Cawley on the lua list and strips line +    -- number information etc. from the bytecode data blob. We only apply this trick +    -- when we store data tables. Stripping makes the compressed format file about +    -- 1MB smaller (and uncompressed we save at least 6MB). +    -- +    -- You can consider this feature an experiment, so it might disappear. There is +    -- no noticeable gain in runtime although the memory footprint should be somewhat +    -- smaller (and the file system has a bit less to deal with). +    -- +    -- Begin of borrowed code ... works for Lua 5.1 which LuaTeX currently uses ... + +    local function register(name,before,after) +        local delta = before - after +        if tracestripping then +            report_lua("bytecodes stripped from %a, # before %s, # after %s, delta %s",name,before,after,delta) +        end +        strippedchunks[#strippedchunks+1] = name +        luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 +        luautilities.nofstrippedbytes  = luautilities.nofstrippedbytes  + delta +        return delta +    end + +    local strip_code_pc + +    if _MAJORVERSION == 5 and _MINORVERSION == 1 then + +        strip_code_pc = function(dump,name) +            local before = #dump +            local version, format, endian, int, size, ins, num = byte(dump,5,11) +            local subint +            if endian == 1 then +                subint = function(dump, i, l) +                    local val = 0 +                    for n = l, 1, -1 do +                        val = val * 256 + byte(dump,i + n - 1) +                    end +                    return val, i + l +                end +            else +                subint = function(dump, i, l) +                    local val = 0 +                    for n = 1, l, 1 do +                        val = val * 256 + byte(dump,i + n - 1) +                    end +                    return val, i + l +                end +            end +            local strip_function +            strip_function = function(dump) +                local count, offset = subint(dump, 1, size) +                local stripped, dirty = rep("\0", size), offset + count +                offset = offset + count + int * 2 + 4 +                offset = offset + int + subint(dump, offset, int) * ins +                count, offset = subint(dump, offset, int) +                for n = 1, count do +                    local t +                    t, offset = subint(dump, offset, 1) +                    if t == 1 then +                        offset = offset + 1 +                    elseif t == 4 then +                        offset = offset + size + subint(dump, offset, size) +                    elseif t == 3 then +                        offset = offset + num +                    end +                end +                count, offset = subint(dump, offset, int) +                stripped = stripped .. sub(dump,dirty, offset - 1) +                for n = 1, count do +                    local proto, off = strip_function(sub(dump,offset, -1)) +                    stripped, offset = stripped .. proto, offset + off - 1 +                end +                offset = offset + subint(dump, offset, int) * int + int +                count, offset = subint(dump, offset, int) +                for n = 1, count do +                    offset = offset + subint(dump, offset, size) + size + int * 2 +                end +                count, offset = subint(dump, offset, int) +                for n = 1, count do +                    offset = offset + subint(dump, offset, size) + size +                end +                stripped = stripped .. rep("\0", int * 3) +                return stripped, offset +            end +            dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) +            local after = #dump +            local delta = register(name,before,after) +            return dump, delta +        end + +    else + +        strip_code_pc = function(dump,name) +            return dump, 0 +        end + +    end + +    -- ... end of borrowed code. + +    -- quite subtle ... doing this wrong incidentally can give more bytes + +    function luautilities.loadedluacode(fullname,forcestrip,name) +        -- quite subtle ... doing this wrong incidentally can give more bytes +        local code = environment.loadpreprocessedfile and environment.preprocessedloadfile(fullname) or loadfile(fullname) +        if code then +            code() +        end +        if forcestrip and luautilities.stripcode then +            if type(forcestrip) == "function" then +                forcestrip = forcestrip(fullname) +            end +            if forcestrip then +                local code, n = strip_code_pc(dump(code),name) +                return load(code), n +            elseif luautilities.alwaysstripcode then +                return load(strip_code_pc(dump(code),name)) +            else +                return code, 0 +            end +        elseif luautilities.alwaysstripcode then +            return load(strip_code_pc(dump(code),name)) +        else +            return code, 0 +        end +    end + +    function luautilities.strippedloadstring(code,forcestrip,name) -- not executed +        local n = 0 +        if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then +            code = load(code) +            if not code then +                report_lua("fatal error in file %a",name) +            end +            code, n = strip_code_pc(dump(code),name) +        end +        return load(code), n +    end + +    local function stupidcompile(luafile,lucfile,strip) +        local code = io.loaddata(luafile) +        local n = 0 +        if code and code ~= "" then +            code = load(code) +            if not code then +                report_lua("fatal error in file %a",luafile) +            end +            code = dump(code) +            if strip then +                code, n = strip_code_pc(code,luautilities.stripcode or luautilities.alwaysstripcode,luafile) -- last one is reported +            end +            if code and code ~= "" then +                io.savedata(lucfile,code) +            end +        end +        return n +    end + +    local luac_normal = "texluac -o %q %q" +    local luac_strip  = "texluac -s -o %q %q" + +    function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true +        report_lua("compiling %a into %a",luafile,lucfile) +        os.remove(lucfile) +        local done = false +        if strip ~= false then +            strip = true +        end +        if forcestupidcompile then +            fallback = true +        elseif strip then +            done = os.spawn(format(luac_strip, lucfile,luafile)) == 0 +        else +            done = os.spawn(format(luac_normal,lucfile,luafile)) == 0 +        end +        if not done and fallback then +            local n = stupidcompile(luafile,lucfile,strip) +            if n > 0 then +                report_lua("%a dumped into %a (%i bytes stripped)",luafile,lucfile,n) +            else +                report_lua("%a dumped into %a (unstripped)",luafile,lucfile) +            end +            cleanup = false -- better see how bad it is +            done = true -- hm +        end +        if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then +            report_lua("removing %a",luafile) +            os.remove(luafile) +        end +        return done +    end + +    luautilities.loadstripped = loadstring + +end + +-- local getmetatable, type = getmetatable, type +-- +-- local types = { } +-- +-- function luautilities.registerdatatype(d,name) +--     types[getmetatable(d)] = name +-- end +-- +-- function luautilities.datatype(d) +--     local t = type(d) +--     if t == "userdata" then +--         local m = getmetatable(d) +--         return m and types[m] or "userdata" +--     else +--         return t +--     end +-- end +-- +-- luautilities.registerdatatype(lpeg.P("!"),"lpeg") +-- +-- print(luautilities.datatype(lpeg.P("oeps"))) diff --git a/lualibs-util-mrg.lua b/lualibs-util-mrg.lua new file mode 100644 index 0000000..78b23dc --- /dev/null +++ b/lualibs-util-mrg.lua @@ -0,0 +1,221 @@ +if not modules then modules = { } end modules ['util-mrg'] = { +    version   = 1.001, +    comment   = "companion to luat-lib.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +-- hm, quite unreadable + +local gsub, format = string.gsub, string.format +local concat = table.concat +local type, next = type, next + +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt, Cb, Cg = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt, lpeg.Cb, lpeg.Cg +local lpegmatch, patterns = lpeg.match, lpeg.patterns + +utilities             = utilities or { } +local merger          = utilities.merger or { } +utilities.merger      = merger +merger.strip_comment  = true + +local report          = logs.reporter("system","merge") +utilities.report      = report + +local m_begin_merge   = "begin library merge" +local m_end_merge     = "end library merge" +local m_begin_closure = "do -- create closure to overcome 200 locals limit" +local m_end_closure   = "end -- of closure" + +local m_pattern = +    "%c+" .. +    "%-%-%s+" .. m_begin_merge .. +    "%c+(.-)%c+" .. +    "%-%-%s+" .. m_end_merge .. +    "%c+" + +local m_format = +    "\n\n-- " .. m_begin_merge .. +    "\n%s\n" .. +    "-- " .. m_end_merge .. "\n\n" + +local m_faked = +    "-- " .. "created merged file" .. "\n\n" .. +    "-- " .. m_begin_merge .. "\n\n" .. +    "-- " .. m_end_merge .. "\n\n" + +local m_report = [[ +-- used libraries    : %s +-- skipped libraries : %s +-- original bytes    : %s +-- stripped bytes    : %s +]] + +local m_preloaded = [[package.loaded[%q] = package.loaded[%q] or true]] + +local function self_fake() +    return m_faked +end + +local function self_nothing() +    return "" +end + +local function self_load(name) +    local data = io.loaddata(name) or "" +    if data == "" then +        report("unknown file %a",name) +    else +        report("inserting file %a",name) +    end +    return data or "" +end + +-- -- saves some 20K .. scite comments +-- data = gsub(data,"%-%-~[^\n\r]*[\r\n]","") +-- -- saves some 20K .. ldx comments +-- data = gsub(data,"%-%-%[%[ldx%-%-.-%-%-ldx%]%]%-%-","") + +local space      = patterns.space +local eol        = patterns.newline +local equals     = P("=")^0 +local open       = P("[") * Cg(equals,"init") * P("[") * P("\n")^-1 +local close      = P("]") * C(equals) * P("]") +local closeeq    = Cmt(close * Cb("init"), function(s,i,a,b) return a == b end) +local longstring = open * (1 - closeeq)^0 * close + +local quoted     = patterns.quoted +local emptyline  = space^0 * eol +local operator1  = P("<=") + P(">=") + P("~=") + P("..") + S("/^<>=*+%%") +local operator2  = S("*+/") +local operator3  = S("-") +local separator  = S(",;") + +local ignore  = (P("]") * space^1 * P("=") * space^1 * P("]")) / "]=[" + +                (P("=") * space^1 * P("{")) / "={" + +                (P("(") * space^1) / "(" + +                (P("{") * (space+eol)^1 * P("}")) / "{}" +local strings = quoted --  / function (s) print("<<"..s..">>") return s end +local longcmt = (emptyline^0 * P("--") * longstring * emptyline^0) / "" +local longstr = longstring +local comment = emptyline^0 * P("--") * P("-")^0 * (1-eol)^0 * emptyline^1 / "\n" +local pack    = ((eol+space)^0 / "") * operator1 * ((eol+space)^0 / "") + +                ((eol+space)^0 / "") * operator2 * ((space)^0 / "") + +                ((eol+space)^1 / "") * operator3 * ((space)^1 / "") + +                ((space)^0 / "") * separator * ((space)^0 / "") +local lines   = emptyline^2 / "\n" +local spaces  = (space * space) / " " +----- spaces  = ((space+eol)^1 ) / " " + +local compact = Cs ( ( +    ignore  + +    strings + +    longcmt + +    longstr + +    comment + +    pack    + +    lines   + +    spaces  + +    1 +)^1 ) + +local strip       = Cs((emptyline^2/"\n" + 1)^0) +local stripreturn = Cs((1-P("return") * space^1 * P(1-space-eol)^1 * (space+eol)^0 * P(-1))^1) + +function merger.compact(data) +    return lpegmatch(strip,lpegmatch(compact,data)) +end + +local function self_compact(data) +    local delta = 0 +    if merger.strip_comment then +        local before = #data +        data = lpegmatch(compact,data) +        data = lpegmatch(strip,data) -- also strips in longstrings ... alas +     -- data = string.strip(data) +        local after = #data +        delta = before - after +        report("original size %s, compacted to %s, stripped %s",before,after,delta) +        data = format("-- original size: %s, stripped down to: %s\n\n%s",before,after,data) +    end +    return lpegmatch(stripreturn,data) or data, delta +end + +local function self_save(name, data) +    if data ~= "" then +        io.savedata(name,data) +        report("saving %s with size %s",name,#data) +    end +end + +local function self_swap(data,code) +    return data ~= "" and (gsub(data,m_pattern, function() return format(m_format,code) end, 1)) or "" +end + +local function self_libs(libs,list) +    local result, f, frozen, foundpath = { }, nil, false, nil +    result[#result+1] = "\n" +    if type(libs) == 'string' then libs = { libs } end +    if type(list) == 'string' then list = { list } end +    for i=1,#libs do +        local lib = libs[i] +        for j=1,#list do +            local pth = gsub(list[j],"\\","/") -- file.clean_path +            report("checking library path %a",pth) +            local name = pth .. "/" .. lib +            if lfs.isfile(name) then +                foundpath = pth +            end +        end +        if foundpath then break end +    end +    if foundpath then +        report("using library path %a",foundpath) +        local right, wrong, original, stripped = { }, { }, 0, 0 +        for i=1,#libs do +            local lib = libs[i] +            local fullname = foundpath .. "/" .. lib +            if lfs.isfile(fullname) then +                report("using library %a",fullname) +                local preloaded = file.nameonly(lib) +                local data = io.loaddata(fullname,true) +                original = original + #data +                local data, delta = self_compact(data) +                right[#right+1] = lib +                result[#result+1] = m_begin_closure +                result[#result+1] = format(m_preloaded,preloaded,preloaded) +                result[#result+1] = data +                result[#result+1] = m_end_closure +                stripped = stripped + delta +            else +                report("skipping library %a",fullname) +                wrong[#wrong+1] = lib +            end +        end +        right = #right > 0 and concat(right," ") or "-" +        wrong = #wrong > 0 and concat(wrong," ") or "-" +        report("used libraries: %a",right) +        report("skipped libraries: %a",wrong) +        report("original bytes: %a",original) +        report("stripped bytes: %a",stripped) +        result[#result+1] = format(m_report,right,wrong,original,stripped) +    else +        report("no valid library path found") +    end +    return concat(result, "\n\n") +end + +function merger.selfcreate(libs,list,target) +    if target then +        self_save(target,self_swap(self_fake(),self_libs(libs,list))) +    end +end + +function merger.selfmerge(name,libs,list,target) +    self_save(target or name,self_swap(self_load(name),self_libs(libs,list))) +end + +function merger.selfclean(name) +    self_save(name,self_swap(self_load(name),self_nothing())) +end diff --git a/lualibs-util-sto.lua b/lualibs-util-sto.lua new file mode 100644 index 0000000..191d6cd --- /dev/null +++ b/lualibs-util-sto.lua @@ -0,0 +1,189 @@ +if not modules then modules = { } end modules ['util-sto'] = { +    version   = 1.001, +    comment   = "companion to luat-lib.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +local setmetatable, getmetatable, type = setmetatable, getmetatable, type + +utilities         = utilities or { } +utilities.storage = utilities.storage or { } +local storage     = utilities.storage + +function storage.mark(t) +    if not t then +        print("\nfatal error: storage cannot be marked\n") +        os.exit() +        return +    end +    local m = getmetatable(t) +    if not m then +        m = { } +        setmetatable(t,m) +    end +    m.__storage__ = true +    return t +end + +function storage.allocate(t) +    t = t or { } +    local m = getmetatable(t) +    if not m then +        m = { } +        setmetatable(t,m) +    end +    m.__storage__ = true +    return t +end + +function storage.marked(t) +    local m = getmetatable(t) +    return m and m.__storage__ +end + +function storage.checked(t) +    if not t then +        report("\nfatal error: storage has not been allocated\n") +        os.exit() +        return +    end +    return t +end + +-- function utilities.storage.delay(parent,name,filename) +--     local m = getmetatable(parent) +--     m.__list[name] = filename +-- end +-- +-- function utilities.storage.predefine(parent) +--     local list = { } +--     local m = getmetatable(parent) or { +--         __list = list, +--         __index = function(t,k) +--             local l = require(list[k]) +--             t[k] = l +--             return l +--         end +--     } +--     setmetatable(parent,m) +-- end +-- +-- bla = { } +-- utilities.storage.predefine(bla) +-- utilities.storage.delay(bla,"test","oepsoeps") +-- local t = bla.test +-- table.print(t) +-- print(t.a) + +function storage.setinitializer(data,initialize) +    local m = getmetatable(data) or { } +    m.__index = function(data,k) +        m.__index = nil -- so that we can access the entries during initializing +        initialize() +        return data[k] +    end +    setmetatable(data, m) +end + +local keyisvalue = { __index = function(t,k) +    t[k] = k +    return k +end } + +function storage.sparse(t) +    t = t or { } +    setmetatable(t,keyisvalue) +    return t +end + +-- table namespace ? + +local function f_empty ()                           return "" end -- t,k +local function f_self  (t,k) t[k] = k               return k  end +local function f_table (t,k) local v = { } t[k] = v return v  end +local function f_ignore()                                     end -- t,k,v + +local t_empty  = { __index    = f_empty  } +local t_self   = { __index    = f_self   } +local t_table  = { __index    = f_table  } +local t_ignore = { __newindex = f_ignore } + +function table.setmetatableindex(t,f) +    if type(t) ~= "table" then +        f, t = t, { } +    end +    local m = getmetatable(t) +    if m then +        if f == "empty" then +            m.__index = f_empty +        elseif f == "key" then +            m.__index = f_self +        elseif f == "table" then +            m.__index = f_table +        else +            m.__index = f +        end +    else +        if f == "empty" then +            setmetatable(t, t_empty) +        elseif f == "key" then +            setmetatable(t, t_self) +        elseif f == "table" then +            setmetatable(t, t_table) +        else +            setmetatable(t,{ __index = f }) +        end +    end +    return t +end + +function table.setmetatablenewindex(t,f) +    if type(t) ~= "table" then +        f, t = t, { } +    end +    local m = getmetatable(t) +    if m then +        if f == "ignore" then +            m.__newindex = f_ignore +        else +            m.__newindex = f +        end +    else +        if f == "ignore" then +            setmetatable(t, t_ignore) +        else +            setmetatable(t,{ __newindex = f }) +        end +    end +    return t +end + +function table.setmetatablecall(t,f) +    if type(t) ~= "table" then +        f, t = t, { } +    end +    local m = getmetatable(t) +    if m then +        m.__call = f +    else +        setmetatable(t,{ __call = f }) +    end +    return t +end + +function table.setmetatablekey(t,key,value) +    local m = getmetatable(t) +    if not m then +        m = { } +        setmetatable(t,m) +    end +    m[key] = value +    return t +end + +function table.getmetatablekey(t,key,value) +    local m = getmetatable(t) +    return m and m[key] +end diff --git a/lualibs-util-str.lua b/lualibs-util-str.lua new file mode 100644 index 0000000..4890a11 --- /dev/null +++ b/lualibs-util-str.lua @@ -0,0 +1,766 @@ +if not modules then modules = { } end modules ['util-str'] = { +    version   = 1.001, +    comment   = "companion to luat-lib.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +utilities         = utilities or {} +utilities.strings = utilities.strings or { } +local strings     = utilities.strings + +local format, gsub, rep, sub = string.format, string.gsub, string.rep, string.sub +local load, dump = load, string.dump +local tonumber, type, tostring = tonumber, type, tostring +local unpack, concat = table.unpack, table.concat +local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc +local patterns, lpegmatch = lpeg.patterns, lpeg.match +local utfchar, utfbyte = utf.char, utf.byte +----- loadstripped = utilities.lua.loadstripped +----- setmetatableindex = table.setmetatableindex + +local loadstripped = _LUAVERSION < 5.2 and load or function(str) +    return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load +end + +-- todo: make a special namespace for the formatter + +if not number then number = { } end -- temp hack for luatex-fonts + +local stripper = patterns.stripzeros + +local function points(n) +    return (not n or n == 0) and "0pt" or lpegmatch(stripper,format("%.5fpt",n/65536)) +end + +local function basepoints(n) +    return (not n or n == 0) and "0bp" or lpegmatch(stripper,format("%.5fbp", n*(7200/7227)/65536)) +end + +number.points     = points +number.basepoints = basepoints + +-- str = " \n \ntest  \n test\ntest " +-- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]") + +local rubish     = patterns.spaceortab^0 * patterns.newline +local anyrubish  = patterns.spaceortab + patterns.newline +local anything   = patterns.anything +local stripped   = (patterns.spaceortab^1 / "") * patterns.newline +local leading    = rubish^0 / "" +local trailing   = (anyrubish^1 * patterns.endofstring) / "" +local redundant  = rubish^3 / "\n" + +local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0) + +function strings.collapsecrlf(str) +    return lpegmatch(pattern,str) +end + +-- The following functions might end up in another namespace. + +local repeaters = { } -- watch how we also moved the -1 in depth-1 to the creator + +function strings.newrepeater(str,offset) +    offset = offset or 0 +    local s = repeaters[str] +    if not s then +        s = { } +        repeaters[str] = s +    end +    local t = s[offset] +    if t then +        return t +    end +    t = { } +    setmetatable(t, { __index = function(t,k) +        if not k then +            return "" +        end +        local n = k + offset +        local s = n > 0 and rep(str,n) or "" +        t[k] = s +        return s +    end }) +    s[offset] = t +    return t +end + +-- local dashes = strings.newrepeater("--",-1) +-- print(dashes[2],dashes[3],dashes[1]) + +local extra, tab, start = 0, 0, 4, 0 + +local nspaces = strings.newrepeater(" ") + +string.nspaces = nspaces + +local pattern = +    Carg(1) / function(t) +        extra, tab, start = 0, t or 7, 1 +    end +  * Cs(( +      Cp() * patterns.tab / function(position) +          local current = (position - start + 1) + extra +          local spaces = tab-(current-1) % tab +          if spaces > 0 then +              extra = extra + spaces - 1 +              return nspaces[spaces] -- rep(" ",spaces) +          else +              return "" +          end +      end +    + patterns.newline * Cp() / function(position) +          extra, start = 0, position +      end +    + patterns.anything +  )^1) + +function strings.tabtospace(str,tab) +    return lpegmatch(pattern,str,1,tab or 7) +end + +-- local t = { +--     "1234567123456712345671234567", +--     "\tb\tc", +--     "a\tb\tc", +--     "aa\tbb\tcc", +--     "aaa\tbbb\tccc", +--     "aaaa\tbbbb\tcccc", +--     "aaaaa\tbbbbb\tccccc", +--     "aaaaaa\tbbbbbb\tcccccc\n       aaaaaa\tbbbbbb\tcccccc", +--     "one\n	two\nxxx	three\nxx	four\nx	five\nsix", +-- } +-- for k=1,#t do +--     print(strings.tabtospace(t[k])) +-- end + +function strings.striplong(str) -- strips all leading spaces +    str = gsub(str,"^%s*","") +    str = gsub(str,"[\n\r]+ *","\n") +    return str +end + +-- local template = string.striplong([[ +--   aaaa +--   bb +--   cccccc +-- ]]) + +function strings.nice(str) +    str = gsub(str,"[:%-+_]+"," ") -- maybe more +    return str +end + +-- Work in progress. Interesting is that compared to the built-in this is faster in +-- luatex than in luajittex where we have a comparable speed. It only makes sense +-- to use the formatter when a (somewhat) complex format is used a lot. Each formatter +-- is a function so there is some overhead and not all formatted output is worth that +-- overhead. Keep in mind that there is an extra function call involved. In principle +-- we end up with a string concatination so one could inline such a sequence but often +-- at the cost of less readabinity. So, it's a sort of (visual) compromise. Of course +-- there is the benefit of more variants. (Concerning the speed: a simple format like +-- %05fpt is better off with format than with a formatter, but as soon as you put +-- something in front formatters become faster. Passing the pt as extra argument makes +-- formatters behave better. Of course this is rather implementation dependent. Also, +-- when a specific format is only used a few times the overhead in creating it is not +-- compensated by speed.) +-- +-- More info can be found in cld-mkiv.pdf so here I stick to a simple list. +-- +-- integer          %...i   number +-- integer          %...d   number +-- unsigned         %...u   number +-- character        %...c   number +-- hexadecimal      %...x   number +-- HEXADECIMAL      %...X   number +-- octal            %...o   number +-- string           %...s   string number +-- float            %...f   number +-- exponential      %...e   number +-- exponential      %...E   number +-- autofloat        %...g   number +-- autofloat        %...G   number +-- utf character    %...c   number +-- force tostring   %...S   any +-- force tostring   %Q      any +-- force tonumber   %N      number (strip leading zeros) +-- signed number    %I      number +-- rounded number   %r      number +-- 0xhexadecimal    %...h   character number +-- 0xHEXADECIMAL    %...H   character number +-- U+hexadecimal    %...u   character number +-- U+HEXADECIMAL    %...U   character number +-- points           %p      number (scaled points) +-- basepoints       %b      number (scaled points) +-- table concat     %...t   table +-- serialize        %...T   sequenced (no nested tables) +-- boolean (logic)  %l      boolean +-- BOOLEAN          %L      boolean +-- whitespace       %...w +-- automatic        %...a   'whatever' (string, table, ...) +-- automatic        %...a   "whatever" (string, table, ...) + +local n = 0 + +-- we are somewhat sloppy in parsing prefixes as it's not that critical + +-- hard to avoid but we can collect them in a private namespace if needed + +-- inline the next two makes no sense as we only use this in logging + +local sequenced = table.sequenced + +function string.autodouble(s,sep) +    if s == nil then +        return '""' +    end +    local t = type(s) +    if t == "number" then +        return tostring(s) -- tostring not really needed +    end +    if t == "table" then +        return ('"' .. sequenced(s,sep or ",") .. '"') +    end +    return ('"' .. tostring(s) .. '"') +end + +function string.autosingle(s,sep) +    if s == nil then +        return "''" +    end +    local t = type(s) +    if t == "number" then +        return tostring(s) -- tostring not really needed +    end +    if t == "table" then +        return ("'" .. sequenced(s,sep or ",") .. "'") +    end +    return ("'" .. tostring(s) .. "'") +end + +local tracedchars  = { } +string.tracedchars = tracedchars +strings.tracers    = tracedchars + +function string.tracedchar(b) +    -- todo: table +    if type(b) == "number" then +        return tracedchars[b] or (utfchar(b) .. " (U+" .. format('%05X',b) .. ")") +    else +        local c = utfbyte(b) +        return tracedchars[c] or (b .. " (U+" .. format('%05X',c) .. ")") +    end +end + +function number.signed(i) +    if i > 0 then +        return "+",  i +    else +        return "-", -i +    end +end + +local preamble = [[ +local type = type +local tostring = tostring +local tonumber = tonumber +local format = string.format +local concat = table.concat +local signed = number.signed +local points = number.points +local basepoints = number.basepoints +local utfchar = utf.char +local utfbyte = utf.byte +local lpegmatch = lpeg.match +local nspaces = string.nspaces +local tracedchar = string.tracedchar +local autosingle = string.autosingle +local autodouble = string.autodouble +local sequenced = table.sequenced +]] + +local template = [[ +%s +%s +return function(%s) return %s end +]] + +local arguments = { "a1" } -- faster than previously used (select(n,...)) + +setmetatable(arguments, { __index = +    function(t,k) +        local v = t[k-1] .. ",a" .. k +        t[k] = v +        return v +    end +}) + +local prefix_any = C((S("+- .") + R("09"))^0) +local prefix_tab = C((1-R("az","AZ","09","%%"))^0) + +-- we've split all cases as then we can optimize them (let's omit the fuzzy u) + +-- todo: replace outer formats in next by .. + +local format_s = function(f) +    n = n + 1 +    if f and f ~= "" then +        return format("format('%%%ss',a%s)",f,n) +    else -- best no tostring in order to stay compatible (.. does a selective tostring too) +        return format("(a%s or '')",n) -- goodie: nil check +    end +end + +local format_S = function(f) -- can be optimized +    n = n + 1 +    if f and f ~= "" then +        return format("format('%%%ss',tostring(a%s))",f,n) +    else +        return format("tostring(a%s)",n) +    end +end + +local format_q = function() +    n = n + 1 +    return format("(a%s and format('%%q',a%s) or '')",n,n) -- goodie: nil check (maybe separate lpeg, not faster) +end + +local format_Q = function() -- can be optimized +    n = n + 1 +    return format("format('%%q',tostring(a%s))",n) +end + +local format_i = function(f) +    n = n + 1 +    if f and f ~= "" then +        return format("format('%%%si',a%s)",f,n) +    else +        return format("a%s",n) +    end +end + +local format_d = format_i + +local format_I = function(f) +    n = n + 1 +    return format("format('%%s%%%si',signed(a%s))",f,n) +end + +local format_f = function(f) +    n = n + 1 +    return format("format('%%%sf',a%s)",f,n) +end + +local format_g = function(f) +    n = n + 1 +    return format("format('%%%sg',a%s)",f,n) +end + +local format_G = function(f) +    n = n + 1 +    return format("format('%%%sG',a%s)",f,n) +end + +local format_e = function(f) +    n = n + 1 +    return format("format('%%%se',a%s)",f,n) +end + +local format_E = function(f) +    n = n + 1 +    return format("format('%%%sE',a%s)",f,n) +end + +local format_x = function(f) +    n = n + 1 +    return format("format('%%%sx',a%s)",f,n) +end + +local format_X = function(f) +    n = n + 1 +    return format("format('%%%sX',a%s)",f,n) +end + +local format_o = function(f) +    n = n + 1 +    return format("format('%%%so',a%s)",f,n) +end + +local format_c = function() +    n = n + 1 +    return format("utfchar(a%s)",n) +end + +local format_C = function() +    n = n + 1 +    return format("tracedchar(a%s)",n) +end + +local format_r = function(f) +    n = n + 1 +    return format("format('%%%s.0f',a%s)",f,n) +end + +local format_h = function(f) +    n = n + 1 +    if f == "-" then +        f = sub(f,2) +        return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) +    else +        return format("format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) +    end +end + +local format_H = function(f) +    n = n + 1 +    if f == "-" then +        f = sub(f,2) +        return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) +    else +        return format("format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) +    end +end + +local format_u = function(f) +    n = n + 1 +    if f == "-" then +        f = sub(f,2) +        return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) +    else +        return format("format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) +    end +end + +local format_U = function(f) +    n = n + 1 +    if f == "-" then +        f = sub(f,2) +        return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) +    else +        return format("format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) +    end +end + +local format_p = function() +    n = n + 1 +    return format("points(a%s)",n) +end + +local format_b = function() +    n = n + 1 +    return format("basepoints(a%s)",n) +end + +local format_t = function(f) +    n = n + 1 +    if f and f ~= "" then +        return format("concat(a%s,%q)",n,f) +    else +        return format("concat(a%s)",n) +    end +end + +local format_T = function(f) +    n = n + 1 +    if f and f ~= "" then +        return format("sequenced(a%s,%q)",n,f) +    else +        return format("sequenced(a%s)",n) +    end +end + +local format_l = function() +    n = n + 1 +    return format("(a%s and 'true' or 'false')",n) +end + +local format_L = function() +    n = n + 1 +    return format("(a%s and 'TRUE' or 'FALSE')",n) +end + +local format_N = function() -- strips leading zeros +    n = n + 1 +    return format("tostring(tonumber(a%s) or a%s)",n,n) +end + +local format_a = function(f) +    n = n + 1 +    if f and f ~= "" then +        return format("autosingle(a%s,%q)",n,f) +    else +        return format("autosingle(a%s)",n) +    end +end + +local format_A = function(f) +    n = n + 1 +    if f and f ~= "" then +        return format("autodouble(a%s,%q)",n,f) +    else +        return format("autodouble(a%s)",n) +    end +end + +local format_w = function(f) -- handy when doing depth related indent +    n = n + 1 +    f = tonumber(f) +    if f then -- not that useful +        return format("nspaces[%s+a%s]",f,n) -- no real need for tonumber +    else +        return format("nspaces[a%s]",n) -- no real need for tonumber +    end +end + +local format_W = function(f) -- handy when doing depth related indent +    return format("nspaces[%s]",tonumber(f) or 0) +end + +local format_rest = function(s) +    return format("%q",s) -- catches " and \n and such +end + +local format_extension = function(extensions,f,name) +    local extension = extensions[name] or "tostring(%s)" +    local f = tonumber(f) or 1 +    if f == 0 then +        return extension +    elseif f == 1 then +        n = n + 1 +        local a = "a" .. n +        return format(extension,a,a) -- maybe more times? +    elseif f < 0 then +        local a = "a" .. (n + f + 1) +        return format(extension,a,a) +    else +        local t = { } +        for i=1,f do +            n = n + 1 +            t[#t+1] = "a" .. n +        end +        return format(extension,unpack(t)) +    end +end + +local builder = Cs { "start", +    start = ( +        ( +            P("%") / "" +          * ( +                V("!") -- new +              + V("s") + V("q") +              + V("i") + V("d") +              + V("f") + V("g") + V("G") + V("e") + V("E") +              + V("x") + V("X") + V("o") +              -- +              + V("c") +              + V("C") +              + V("S") -- new +              + V("Q") -- new +              + V("N") -- new +              -- +              + V("r") +              + V("h") + V("H") + V("u") + V("U") +              + V("p") + V("b") +              + V("t") + V("T") +              + V("l") + V("L") +              + V("I") +              + V("h") -- new +              + V("w") -- new +              + V("W") -- new +              + V("a") -- new +              + V("A") -- new +              -- +              + V("*") -- ignores probably messed up % +            ) +          + V("*") +        ) +     * (P(-1) + Carg(1)) +    )^0, +    -- +    ["s"] = (prefix_any * P("s")) / format_s, -- %s => regular %s (string) +    ["q"] = (prefix_any * P("q")) / format_q, -- %q => regular %q (quoted string) +    ["i"] = (prefix_any * P("i")) / format_i, -- %i => regular %i (integer) +    ["d"] = (prefix_any * P("d")) / format_d, -- %d => regular %d (integer) +    ["f"] = (prefix_any * P("f")) / format_f, -- %f => regular %f (float) +    ["g"] = (prefix_any * P("g")) / format_g, -- %g => regular %g (float) +    ["G"] = (prefix_any * P("G")) / format_G, -- %G => regular %G (float) +    ["e"] = (prefix_any * P("e")) / format_e, -- %e => regular %e (float) +    ["E"] = (prefix_any * P("E")) / format_E, -- %E => regular %E (float) +    ["x"] = (prefix_any * P("x")) / format_x, -- %x => regular %x (hexadecimal) +    ["X"] = (prefix_any * P("X")) / format_X, -- %X => regular %X (HEXADECIMAL) +    ["o"] = (prefix_any * P("o")) / format_o, -- %o => regular %o (octal) +    -- +    ["S"] = (prefix_any * P("S")) / format_S, -- %S => %s (tostring) +    ["Q"] = (prefix_any * P("Q")) / format_S, -- %Q => %q (tostring) +    ["N"] = (prefix_any * P("N")) / format_N, -- %N => tonumber (strips leading zeros) +    ["c"] = (prefix_any * P("c")) / format_c, -- %c => utf character (extension to regular) +    ["C"] = (prefix_any * P("C")) / format_C, -- %c => U+.... utf character +    -- +    ["r"] = (prefix_any * P("r")) / format_r, -- %r => round +    ["h"] = (prefix_any * P("h")) / format_h, -- %h => 0x0a1b2 (when - no 0x) was v +    ["H"] = (prefix_any * P("H")) / format_H, -- %H => 0x0A1B2 (when - no 0x) was V +    ["u"] = (prefix_any * P("u")) / format_u, -- %u => u+0a1b2 (when - no u+) +    ["U"] = (prefix_any * P("U")) / format_U, -- %U => U+0A1B2 (when - no U+) +    ["p"] = (prefix_any * P("p")) / format_p, -- %p => 12.345pt / maybe: P (and more units) +    ["b"] = (prefix_any * P("b")) / format_b, -- %b => 12.342bp / maybe: B (and more units) +    ["t"] = (prefix_tab * P("t")) / format_t, -- %t => concat +    ["T"] = (prefix_tab * P("T")) / format_T, -- %t => sequenced +    ["l"] = (prefix_tab * P("l")) / format_l, -- %l => boolean +    ["L"] = (prefix_tab * P("L")) / format_L, -- %L => BOOLEAN +    ["I"] = (prefix_any * P("I")) / format_I, -- %I => signed integer +    -- +    ["w"] = (prefix_any * P("w")) / format_w, -- %w => n spaces (optional prefix is added) +    ["W"] = (prefix_any * P("W")) / format_W, -- %W => mandate prefix, no specifier +    -- +    ["a"] = (prefix_any * P("a")) / format_a, -- %a => '...' (forces tostring) +    ["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring) +    -- +    ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%%%")^1) / format_rest, -- rest (including %%) +    -- +    ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension, +} + +-- we can be clever and only alias what is needed + +local direct = Cs ( +        P("%")/"" +      * Cc([[local format = string.format return function(str) return format("%]]) +      * (S("+- .") + R("09"))^0 +      * S("sqidfgGeExXo") +      * Cc([[",str) end]]) +      * P(-1) +    ) + +local function make(t,str) +    local f +    local p +    local p = lpegmatch(direct,str) +    if p then +        f = loadstripped(p)() +    else +        n = 0 +        p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n +        if n > 0 then +            p = format(template,preamble,t._preamble_,arguments[n],p) +--           print("builder>",p) +            f = loadstripped(p)() +        else +            f = function() return str end +        end +    end +    t[str] = f +    return f +end + +-- -- collect periodically +-- +-- local threshold = 1000 -- max nof cached formats +-- +-- local function make(t,str) +--     local f = rawget(t,str) +--     if f then +--         return f +--     end +--     local parent = t._t_ +--     if parent._n_ > threshold then +--         local m = { _t_ = parent } +--         getmetatable(parent).__index = m +--         setmetatable(m, { __index = make }) +--     else +--         parent._n_ = parent._n_ + 1 +--     end +--     local f +--     local p = lpegmatch(direct,str) +--     if p then +--         f = loadstripped(p)() +--     else +--         n = 0 +--         p = lpegmatch(builder,str,1,"..",parent._extensions_) -- after this we know n +--         if n > 0 then +--             p = format(template,preamble,parent._preamble_,arguments[n],p) +--          -- print("builder>",p) +--             f = loadstripped(p)() +--         else +--             f = function() return str end +--         end +--     end +--     t[str] = f +--     return f +-- end + +local function use(t,fmt,...) +    return t[fmt](...) +end + +strings.formatters = { } + +-- we cannot make these tables weak, unless we start using an indirect +-- table (metatable) in which case we could better keep a count and +-- clear that table when a threshold is reached + +function strings.formatters.new() +    local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter" } +    setmetatable(t, { __index = make, __call = use }) +    return t +end + +-- function strings.formatters.new() +--     local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter", _n_ = 0 } +--     local m = { _t_ = t } +--     setmetatable(t, { __index = m, __call = use }) +--     setmetatable(m, { __index = make }) +--     return t +-- end + +local formatters   = strings.formatters.new() -- the default instance + +string.formatters  = formatters -- in the main string namespace +string.formatter   = function(str,...) return formatters[str](...) end -- sometimes nicer name + +local function add(t,name,template,preamble) +    if type(t) == "table" and t._type_ == "formatter" then +        t._extensions_[name] = template or "%s" +        if preamble then +            t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload ! +        end +    end +end + +strings.formatters.add = add + +-- registered in the default instance (should we fall back on this one?) + +lpeg.patterns.xmlescape = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P('"')/""" + P(1))^0) +lpeg.patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + P(1))^0) + +add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]]) +add(formatters,"tex",[[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]]) + +-- -- yes or no: +-- +-- local function make(t,str) +--     local f +--     local p = lpegmatch(direct,str) +--     if p then +--         f = loadstripped(p)() +--     else +--         n = 0 +--         p = lpegmatch(builder,str,1,",") -- after this we know n +--         if n > 0 then +--             p = format(template,template_shortcuts,arguments[n],p) +--             f = loadstripped(p)() +--         else +--             f = function() return str end +--         end +--     end +--     t[str] = f +--     return f +-- end +-- +-- local formatteds  = string.formatteds or { } +-- string.formatteds = formatteds +-- +-- setmetatable(formatteds, { __index = make, __call = use }) diff --git a/lualibs-util-tab.lua b/lualibs-util-tab.lua new file mode 100644 index 0000000..ecf36b1 --- /dev/null +++ b/lualibs-util-tab.lua @@ -0,0 +1,493 @@ +if not modules then modules = { } end modules ['util-tab'] = { +    version   = 1.001, +    comment   = "companion to luat-lib.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +utilities        = utilities or {} +utilities.tables = utilities.tables or { } +local tables     = utilities.tables + +local format, gmatch, gsub = string.format, string.gmatch, string.gsub +local concat, insert, remove = table.concat, table.insert, table.remove +local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring +local type, next, rawset, tonumber, tostring, load, select = type, next, rawset, tonumber, tostring, load, select +local lpegmatch, P, Cs, Cc = lpeg.match, lpeg.P, lpeg.Cs, lpeg.Cc +local serialize, sortedkeys, sortedpairs = table.serialize, table.sortedkeys, table.sortedpairs +local formatters = string.formatters + +local splitter = lpeg.tsplitat(".") + +function tables.definetable(target,nofirst,nolast) -- defines undefined tables +    local composed, shortcut, t = nil, nil, { } +    local snippets = lpegmatch(splitter,target) +    for i=1,#snippets - (nolast and 1 or 0) do +        local name = snippets[i] +        if composed then +            composed = shortcut .. "." .. name +            shortcut = shortcut .. "_" .. name +            t[#t+1] = formatters["local %s = %s if not %s then %s = { } %s = %s end"](shortcut,composed,shortcut,shortcut,composed,shortcut) +        else +            composed = name +            shortcut = name +            if not nofirst then +                t[#t+1] = formatters["%s = %s or { }"](composed,composed) +            end +        end +    end +    if nolast then +        composed = shortcut .. "." .. snippets[#snippets] +    end +    return concat(t,"\n"), composed +end + +-- local t = tables.definedtable("a","b","c","d") + +function tables.definedtable(...) +    local t = _G +    for i=1,select("#",...) do +        local li = select(i,...) +        local tl = t[li] +        if not tl then +            tl = { } +            t[li] = tl +        end +        t = tl +    end +    return t +end + +function tables.accesstable(target,root) +    local t = root or _G +    for name in gmatch(target,"([^%.]+)") do +        t = t[name] +        if not t then +            return +        end +    end +    return t +end + +function tables.migratetable(target,v,root) +    local t = root or _G +    local names = string.split(target,".") +    for i=1,#names-1 do +        local name = names[i] +        t[name] = t[name] or { } +        t = t[name] +        if not t then +            return +        end +    end +    t[names[#names]] = v +end + +function tables.removevalue(t,value) -- todo: n +    if value then +        for i=1,#t do +            if t[i] == value then +                remove(t,i) +                -- remove all, so no: return +            end +        end +    end +end + +function tables.insertbeforevalue(t,value,extra) +    for i=1,#t do +        if t[i] == extra then +            remove(t,i) +        end +    end +    for i=1,#t do +        if t[i] == value then +            insert(t,i,extra) +            return +        end +    end +    insert(t,1,extra) +end + +function tables.insertaftervalue(t,value,extra) +    for i=1,#t do +        if t[i] == extra then +            remove(t,i) +        end +    end +    for i=1,#t do +        if t[i] == value then +            insert(t,i+1,extra) +            return +        end +    end +    insert(t,#t+1,extra) +end + +-- experimental + +local escape = Cs(Cc('"') * ((P('"')/'""' + P(1))^0) * Cc('"')) + +function table.tocsv(t,specification) +    if t and #t > 0 then +        local result = { } +        local r = { } +        specification = specification or { } +        local fields = specification.fields +        if type(fields) ~= "string" then +            fields = sortedkeys(t[1]) +        end +        local separator = specification.separator or "," +        if specification.preamble == true then +            for f=1,#fields do +                r[f] = lpegmatch(escape,tostring(fields[f])) +            end +            result[1] = concat(r,separator) +        end +        for i=1,#t do +            local ti = t[i] +            for f=1,#fields do +                local field = ti[fields[f]] +                if type(field) == "string" then +                    r[f] = lpegmatch(escape,field) +                else +                    r[f] = tostring(field) +                end +            end +            result[#result+1] = concat(r,separator) +        end +        return concat(result,"\n") +    else +        return "" +    end +end + +-- local nspaces = utilities.strings.newrepeater(" ") +-- local escape  = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P(1))^0) +-- +-- local function toxml(t,d,result,step) +--     for k, v in sortedpairs(t) do +--         local s = nspaces[d] +--         local tk = type(k) +--         local tv = type(v) +--         if tv == "table" then +--             if tk == "number" then +--                 result[#result+1] = format("%s<entry n='%s'>",s,k) +--                 toxml(v,d+step,result,step) +--                 result[#result+1] = format("%s</entry>",s,k) +--             else +--                 result[#result+1] = format("%s<%s>",s,k) +--                 toxml(v,d+step,result,step) +--                 result[#result+1] = format("%s</%s>",s,k) +--             end +--         elseif tv == "string" then +--             if tk == "number" then +--                 result[#result+1] = format("%s<entry n='%s'>%s</entry>",s,k,lpegmatch(escape,v),k) +--             else +--                 result[#result+1] = format("%s<%s>%s</%s>",s,k,lpegmatch(escape,v),k) +--             end +--         elseif tk == "number" then +--             result[#result+1] = format("%s<entry n='%s'>%s</entry>",s,k,tostring(v),k) +--         else +--             result[#result+1] = format("%s<%s>%s</%s>",s,k,tostring(v),k) +--         end +--     end +-- end +-- +-- much faster + +local nspaces = utilities.strings.newrepeater(" ") + +local function toxml(t,d,result,step) +    for k, v in sortedpairs(t) do +        local s = nspaces[d] -- inlining this is somewhat faster but gives more formatters +        local tk = type(k) +        local tv = type(v) +        if tv == "table" then +            if tk == "number" then +                result[#result+1] = formatters["%s<entry n='%s'>"](s,k) +                toxml(v,d+step,result,step) +                result[#result+1] = formatters["%s</entry>"](s,k) +            else +                result[#result+1] = formatters["%s<%s>"](s,k) +                toxml(v,d+step,result,step) +                result[#result+1] = formatters["%s</%s>"](s,k) +            end +        elseif tv == "string" then +            if tk == "number" then +                result[#result+1] = formatters["%s<entry n='%s'>%!xml!</entry>"](s,k,v,k) +            else +                result[#result+1] = formatters["%s<%s>%!xml!</%s>"](s,k,v,k) +            end +        elseif tk == "number" then +            result[#result+1] = formatters["%s<entry n='%s'>%S</entry>"](s,k,v,k) +        else +            result[#result+1] = formatters["%s<%s>%S</%s>"](s,k,v,k) +        end +    end +end + +-- function table.toxml(t,name,nobanner,indent,spaces) +--     local noroot = name == false +--     local result = (nobanner or noroot) and { } or { "<?xml version='1.0' standalone='yes' ?>" } +--     local indent = rep(" ",indent or 0) +--     local spaces = rep(" ",spaces or 1) +--     if noroot then +--         toxml( t, inndent, result, spaces) +--     else +--         toxml( { [name or "root"] = t }, indent, result, spaces) +--     end +--     return concat(result,"\n") +-- end + +function table.toxml(t,specification) +    specification = specification or { } +    local name   = specification.name +    local noroot = name == false +    local result = (specification.nobanner or noroot) and { } or { "<?xml version='1.0' standalone='yes' ?>" } +    local indent = specification.indent or 0 +    local spaces = specification.spaces or 1 +    if noroot then +        toxml( t, indent, result, spaces) +    else +        toxml( { [name or "data"] = t }, indent, result, spaces) +    end +    return concat(result,"\n") +end + +-- also experimental + +-- encapsulate(table,utilities.tables) +-- encapsulate(table,utilities.tables,true) +-- encapsulate(table,true) + +function tables.encapsulate(core,capsule,protect) +    if type(capsule) ~= "table" then +        protect = true +        capsule = { } +    end +    for key, value in next, core do +        if capsule[key] then +            print(formatters["\ninvalid %s %a in %a"]("inheritance",key,core)) +            os.exit() +        else +            capsule[key] = value +        end +    end +    if protect then +        for key, value in next, core do +            core[key] = nil +        end +        setmetatable(core, { +            __index = capsule, +            __newindex = function(t,key,value) +                if capsule[key] then +                    print(formatters["\ninvalid %s %a' in %a"]("overload",key,core)) +                    os.exit() +                else +                    rawset(t,key,value) +                end +            end +        } ) +    end +end + +local function fastserialize(t,r,outer) -- no mixes +    r[#r+1] = "{" +    local n = #t +    if n > 0 then +        for i=1,n do +            local v = t[i] +            local tv = type(v) +            if tv == "string" then +                r[#r+1] = formatters["%q,"](v) +            elseif tv == "number" then +                r[#r+1] = formatters["%s,"](v) +            elseif tv == "table" then +                fastserialize(v,r) +            elseif tv == "boolean" then +                r[#r+1] = formatters["%S,"](v) +            end +        end +    else +        for k, v in next, t do +            local tv = type(v) +            if tv == "string" then +                r[#r+1] = formatters["[%q]=%q,"](k,v) +            elseif tv == "number" then +                r[#r+1] = formatters["[%q]=%s,"](k,v) +            elseif tv == "table" then +                r[#r+1] = formatters["[%q]="](k) +                fastserialize(v,r) +            elseif tv == "boolean" then +                r[#r+1] = formatters["[%q]=%S,"](k,v) +            end +        end +    end +    if outer then +        r[#r+1] = "}" +    else +        r[#r+1] = "}," +    end +    return r +end + +-- local f_hashed_string  = formatters["[%q]=%q,"] +-- local f_hashed_number  = formatters["[%q]=%s,"] +-- local f_hashed_table   = formatters["[%q]="] +-- local f_hashed_true    = formatters["[%q]=true,"] +-- local f_hashed_false   = formatters["[%q]=false,"] +-- +-- local f_indexed_string = formatters["%q,"] +-- local f_indexed_number = formatters["%s,"] +-- ----- f_indexed_true   = formatters["true,"] +-- ----- f_indexed_false  = formatters["false,"] +-- +-- local function fastserialize(t,r,outer) -- no mixes +--     r[#r+1] = "{" +--     local n = #t +--     if n > 0 then +--         for i=1,n do +--             local v = t[i] +--             local tv = type(v) +--             if tv == "string" then +--                 r[#r+1] = f_indexed_string(v) +--             elseif tv == "number" then +--                 r[#r+1] = f_indexed_number(v) +--             elseif tv == "table" then +--                 fastserialize(v,r) +--             elseif tv == "boolean" then +--              -- r[#r+1] = v and f_indexed_true(k) or f_indexed_false(k) +--                 r[#r+1] = v and "true," or "false," +--             end +--         end +--     else +--         for k, v in next, t do +--             local tv = type(v) +--             if tv == "string" then +--                 r[#r+1] = f_hashed_string(k,v) +--             elseif tv == "number" then +--                 r[#r+1] = f_hashed_number(k,v) +--             elseif tv == "table" then +--                 r[#r+1] = f_hashed_table(k) +--                 fastserialize(v,r) +--             elseif tv == "boolean" then +--                 r[#r+1] = v and f_hashed_true(k) or f_hashed_false(k) +--             end +--         end +--     end +--     if outer then +--         r[#r+1] = "}" +--     else +--         r[#r+1] = "}," +--     end +--     return r +-- end + +function table.fastserialize(t,prefix) -- so prefix should contain the = +    return concat(fastserialize(t,{ prefix or "return" },true)) +end + +function table.deserialize(str) +    if not str or str == "" then +        return +    end +    local code = load(str) +    if not code then +        return +    end +    code = code() +    if not code then +        return +    end +    return code +end + +-- inspect(table.fastserialize { a = 1, b = { 4, { 5, 6 } }, c = { d = 7, e = 'f"g\nh' } }) + +function table.load(filename) +    if filename then +        local t = io.loaddata(filename) +        if t and t ~= "" then +            t = load(t) +            if type(t) == "function" then +                t = t() +                if type(t) == "table" then +                    return t +                end +            end +        end +    end +end + +function table.save(filename,t,n,...) +    io.savedata(filename,serialize(t,n == nil and true or n,...)) +end + +local function slowdrop(t) +    local r = { } +    local l = { } +    for i=1,#t do +        local ti = t[i] +        local j = 0 +        for k, v in next, ti do +            j = j + 1 +            l[j] = formatters["%s=%q"](k,v) +        end +        r[i] = formatters[" {%t},\n"](l) +    end +    return formatters["return {\n%st}"](r) +end + +local function fastdrop(t) +    local r = { "return {\n" } +    for i=1,#t do +        local ti = t[i] +        r[#r+1] = " {" +        for k, v in next, ti do +            r[#r+1] = formatters["%s=%q"](k,v) +        end +        r[#r+1] = "},\n" +    end +    r[#r+1] = "}" +    return concat(r) +end + +function table.drop(t,slow) -- only  { { a=2 }, {a=3} } +    if #t == 0 then +        return "return { }" +    elseif slow == true then +        return slowdrop(t) -- less memory +    else +        return fastdrop(t) -- some 15% faster +    end +end + +function table.autokey(t,k) +    local v = { } +    t[k] = v +    return v +end + +local selfmapper = { __index = function(t,k) t[k] = k return k end } + +function table.twowaymapper(t) +    if not t then +        t = { } +    else +        for i=0,#t do +            local ti = t[i]       -- t[1]     = "one" +            if ti then +                local i = tostring(i) +                t[i]    = ti      -- t["1"]   = "one" +                t[ti]   = i       -- t["one"] = "1" +            end +        end +        t[""] = t[0] or "" +    end + -- setmetatableindex(t,"key") +    setmetatable(t,selfmapper) +    return t +end + diff --git a/lualibs-utils.lua b/lualibs-utils.lua deleted file mode 100644 index ebc27b8..0000000 --- a/lualibs-utils.lua +++ /dev/null @@ -1,176 +0,0 @@ -if not modules then modules = { } end modules ['l-utils'] = { -    version   = 1.001, -    comment   = "companion to luat-lib.mkiv", -    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", -    copyright = "PRAGMA ADE / ConTeXt Development Team", -    license   = "see context related readme files" -} - --- hm, quite unreadable - -local gsub = string.gsub -local concat = table.concat -local type, next = type, next - -if not utils        then utils        = { } end -if not utils.merger then utils.merger = { } end -if not utils.lua    then utils.lua    = { } end - -utils.merger.m_begin = "begin library merge" -utils.merger.m_end   = "end library merge" -utils.merger.pattern = -    "%c+" .. -    "%-%-%s+" .. utils.merger.m_begin .. -    "%c+(.-)%c+" .. -    "%-%-%s+" .. utils.merger.m_end .. -    "%c+" - -function utils.merger._self_fake_() -    return -        "-- " .. "created merged file" .. "\n\n" .. -        "-- " .. utils.merger.m_begin  .. "\n\n" .. -        "-- " .. utils.merger.m_end    .. "\n\n" -end - -function utils.report(...) -    print(...) -end - -utils.merger.strip_comment = true - -function utils.merger._self_load_(name) -    local f, data = io.open(name), "" -    if f then -        utils.report("reading merge from %s",name) -        data = f:read("*all") -        f:close() -    else -        utils.report("unknown file to merge %s",name) -    end -    if data and utils.merger.strip_comment then -        -- saves some 20K -        data = gsub(data,"%-%-~[^\n\r]*[\r\n]", "") -    end -    return data or "" -end - -function utils.merger._self_save_(name, data) -    if data ~= "" then -        local f = io.open(name,'w') -        if f then -            utils.report("saving merge from %s",name) -            f:write(data) -            f:close() -        end -    end -end - -function utils.merger._self_swap_(data,code) -    if data ~= "" then -        return (gsub(data,utils.merger.pattern, function(s) -            return "\n\n" .. "-- "..utils.merger.m_begin .. "\n" .. code .. "\n" .. "-- "..utils.merger.m_end .. "\n\n" -        end, 1)) -    else -        return "" -    end -end - ---~ stripper: ---~ ---~ data = gsub(data,"%-%-~[^\n]*\n","") ---~ data = gsub(data,"\n\n+","\n") - -function utils.merger._self_libs_(libs,list) -    local result, f, frozen = { }, nil, false -    result[#result+1] = "\n" -    if type(libs) == 'string' then libs = { libs } end -    if type(list) == 'string' then list = { list } end -    local foundpath = nil -    for i=1,#libs do -        local lib = libs[i] -        for j=1,#list do -            local pth = gsub(list[j],"\\","/") -- file.clean_path -            utils.report("checking library path %s",pth) -            local name = pth .. "/" .. lib -            if lfs.isfile(name) then -                foundpath = pth -            end -        end -        if foundpath then break end -    end -    if foundpath then -        utils.report("using library path %s",foundpath) -        local right, wrong = { }, { } -        for i=1,#libs do -            local lib = libs[i] -            local fullname = foundpath .. "/" .. lib -            if lfs.isfile(fullname) then -            --  right[#right+1] = lib -                utils.report("merging library %s",fullname) -                result[#result+1] = "do -- create closure to overcome 200 locals limit" -                result[#result+1] = io.loaddata(fullname,true) -                result[#result+1] = "end -- of closure" -            else -            --  wrong[#wrong+1] = lib -                utils.report("no library %s",fullname) -            end -        end -        if #right > 0 then -            utils.report("merged libraries: %s",concat(right," ")) -        end -        if #wrong > 0 then -            utils.report("skipped libraries: %s",concat(wrong," ")) -        end -    else -        utils.report("no valid library path found") -    end -    return concat(result, "\n\n") -end - -function utils.merger.selfcreate(libs,list,target) -    if target then -        utils.merger._self_save_( -            target, -            utils.merger._self_swap_( -                utils.merger._self_fake_(), -                utils.merger._self_libs_(libs,list) -            ) -        ) -    end -end - -function utils.merger.selfmerge(name,libs,list,target) -    utils.merger._self_save_( -        target or name, -        utils.merger._self_swap_( -            utils.merger._self_load_(name), -            utils.merger._self_libs_(libs,list) -        ) -    ) -end - -function utils.merger.selfclean(name) -    utils.merger._self_save_( -        name, -        utils.merger._self_swap_( -            utils.merger._self_load_(name), -            "" -        ) -    ) -end - -function utils.lua.compile(luafile, lucfile, cleanup, strip) -- defaults: cleanup=false strip=true - -- utils.report("compiling",luafile,"into",lucfile) -    os.remove(lucfile) -    local command = "-o " .. string.quote(lucfile) .. " " .. string.quote(luafile) -    if strip ~= false then -        command = "-s " .. command -    end -    local done = (os.spawn("texluac " .. command) == 0) or (os.spawn("luac " .. command) == 0) -    if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then -     -- utils.report("removing",luafile) -        os.remove(luafile) -    end -    return done -end - diff --git a/lualibs.dtx b/lualibs.dtx index 799c3df..e9a20a6 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -33,7 +33,7 @@  \input docstrip.tex  \Msg{************************************************************************}  \Msg{* Installation} -\Msg{* Package: lualibs 2011/01/20 v0.96 Lua additional functions.} +\Msg{* Package: lualibs 2012/10/19 v0.97 Lua additional functions.}  \Msg{************************************************************************}  \keepsilent @@ -90,7 +90,7 @@ and the derived file lualibs.lua.  %<*driver>  \NeedsTeXFormat{LaTeX2e}  \ProvidesFile{lualibs.drv} -  [2011/01/20 v0.96 Lua additional functions.] +  [2012/10/19 v0.97 Lua additional functions.]  \documentclass{ltxdoc}  \EnableCrossrefs  \CodelineIndex @@ -120,7 +120,7 @@ and the derived file lualibs.lua.  % \GetFileInfo{lualibs.drv}  %  % \title{The \textsf{lualibs} package} -% \date{2011/01/20 v0.96} +% \date{2012/10/19 v0.97}  % \author{Elie Roux \\ \texttt{elie.roux@telecom-bretagne.eu}}  %  % \maketitle @@ -151,6 +151,63 @@ and the derived file lualibs.lua.  % initialize \textsf{kpse} library so that |require()| can find files under  % TEXMF tree: |kpse.set_program_name("luatex")|.  % +% \section{Files} +% +% The \textsf{lualibs} bundle contains files from two Con\TeX t Lua +% library categories: The generic auxiliary functions (original file prefix: +% |l-|) together form something close to a standard libary. Most of these are +% extensions of an existing namespace, like for instance |l-table.lua| which +% adds full-fledged serialization capabilities to the Lua table library. +% They were imported under the \textsf{lualibs}-prefix. +% (For a list see table~\ref{tab:extensions}.) +% +% \begin{table}[h] +%  \centering +%  \caption{Extensions of the Lua standard library.} +%  \begin{tabular}{l l l} +%   \textsf{lualibs} name & Con\TeX t name & content                        \\ +%   \hline +%   lualibs-lua.lua       & l-lua.lua      & compatibility, library paths   \\ +%   lualibs-lpeg.lua      & l-lpeg.lua     & patterns                       \\ +%   lualibs-function.lua  & l-function.lua & empty except for dummy         \\ +%   lualibs-string.lua    & l-string.lua   & string manipulation            \\ +%   lualibs-table.lua     & l-table.lua    & serialization, conversion      \\ +%   lualibs-boolean.lua   & l-boolean.lua  & boolean converter              \\ +%   lualibs-number.lua    & l-number.lua   & bit operations                 \\ +%   lualibs-math.lua      & l-math.lua     & math functions                 \\ +%   lualibs-io.lua        & l-io.lua       & reading and writing files      \\ +%   lualibs-os.lua        & l-os.lua       & platform specific code         \\ +%   lualibs-file.lua      & l-file.lua     & filesystem operations          \\ +%   lualibs-md5.lua       & l-md5.lua      & checksum functions             \\ +%   lualibs-dir.lua       & l-dir.lua      & directory handling             \\ +%   lualibs-unicode.lua   & l-unicode.lua  & utf and unicode                \\ +%   lualibs-url.lua       & l-url.lua      & url handling                   \\ +%   lualibs-set.lua       & l-set.lua      & sets                           \\ +%  \end{tabular} +%  \label{tab:extensions} +% \end{table} +% +% The second category comprises a selection of files mostly from the +% utilities namespace (|util-|; cf. table~\ref{tab:utilities}). +% Their purpose is more specific and at times quite low-level. +% +% \begin{table}[h] +%  \centering +%  \caption{Utility functions.} +%  \begin{tabular}{l l l} +%   \textsf{lualibs} name & Con\TeX t name & content                     \\ +%   \hline +%   lualibs-util-lua.lua  & util-lua.lua   & operations on bytecode      \\ +%   lualibs-util-sto.lua  & util-sto.lua   & table allocation            \\ +%   lualibs-util-mrg.lua  & util-mrg.lua   & merging lua sources         \\ +%   lualibs-util-dim.lua  & util-dim.lua   & converters for dimensions   \\ +%   lualibs-util-str.lua  & util-str.lua   & extra string functions      \\ +%   lualibs-util-tab.lua  & util-tab.lua   & extra table functions       \\ +%   lualibs-util-jsn.lua  & util-jsn.lua   & conversion to and from json \\ +%  \end{tabular} +%  \label{tab:utilities} +% \end{table} +%  % \pagebreak  % \section{\texttt{lualibs.lua}}  % @@ -163,8 +220,8 @@ module('lualibs', package.seeall)  local lualibs_module = {      name          = "lualibs", -    version       = 0.96, -    date          = "2011/01/20", +    version       = 0.97, +    date          = "2012/10/19",      description   = "Lua additional functions.",      author        = "Hans Hagen, PRAGMA-ADE, Hasselt NL & Elie Roux",      copyright     = "PRAGMA ADE / ConTeXt Development Team", @@ -179,24 +236,30 @@ end  % Load the modules.  %  %    \begin{macrocode} -require("lualibs-string") +require("lualibs-lua")  require("lualibs-lpeg") +require("lualibs-function") +require("lualibs-string") +require("lualibs-table")  require("lualibs-boolean")  require("lualibs-number")  require("lualibs-math") -require("lualibs-table") -require("lualibs-aux")  require("lualibs-io")  require("lualibs-os")  require("lualibs-file")  require("lualibs-md5")  require("lualibs-dir")  require("lualibs-unicode") -require("lualibs-utils") -require("lualibs-dimen")  require("lualibs-url")  require("lualibs-set") -require("lualibs-dimen") +require("lualibs-util-str") +--[[everything below apparently not required for the fontloader]] +require("lualibs-util-tab") +require("lualibs-util-sto") +require("lualibs-util-dim") +require("lualibs-util-jsn") +--require("lualibs-util-mrg")-- not required +require("lualibs-util-lua")  %    \end{macrocode}  %  % \iffalse diff --git a/lualibs.lua b/lualibs.lua new file mode 100644 index 0000000..cf9b039 --- /dev/null +++ b/lualibs.lua @@ -0,0 +1,54 @@ +--  +--  This is file `lualibs.lua', +--  generated with the docstrip utility. +--  +--  The original source files were: +--  +--  lualibs.dtx  (with options: `lua') +--  This is a generated file. +--   +--  Copyright (C) 2009 by PRAGMA ADE / ConTeXt Development Team +--   +--  See ConTeXt's mreadme.pdf for the license. +--   +--  This work consists of the main source file lualibs.dtx +--  and the derived file lualibs.lua. +--   +module('lualibs', package.seeall) + +local lualibs_module = { +    name          = "lualibs", +    version       = 0.97, +    date          = "2012/10/19", +    description   = "Lua additional functions.", +    author        = "Hans Hagen, PRAGMA-ADE, Hasselt NL & Elie Roux", +    copyright     = "PRAGMA ADE / ConTeXt Development Team", +    license       = "See ConTeXt's mreadme.pdf for the license", +} + +if luatexbase and luatexbase.provides_module then +    luatexbase.provides_module(lualibs_module) +end +require("lualibs-string") +require("lualibs-lpeg") +require("lualibs-boolean") +require("lualibs-number") +require("lualibs-math") +require("lualibs-table") +require("lualibs-io") +require("lualibs-os") +require("lualibs-file") +require("lualibs-md5") +require("lualibs-dir") +require("lualibs-unicode") +require("lualibs-url") +require("lualibs-set") +require("lualibs-util-lua") +require("lualibs-util-sto") +require("lualibs-util-mrg") +require("lualibs-util-dim") +require("lualibs-util-str") +require("lualibs-util-tab") +require("lualibs-util-jsn") +--  +--  End of File `lualibs.lua'. | 
