summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <phg42.2a@gmail.com>2013-04-29 15:00:05 -0700
committerPhilipp Gesang <phg42.2a@gmail.com>2013-04-29 15:00:05 -0700
commit5ff06a36a0e82f3350bc955fac3825d7a1969289 (patch)
treed02c81d72c38393699bba1cc0ac6152f58aa44a6
parent69adb7e51c6c082d0e16e45ff9f5ac75c4618056 (diff)
parent170fbd1a07aaadace4b976f62e67572c53c8449b (diff)
downloadlualibs-5ff06a36a0e82f3350bc955fac3825d7a1969289.tar.gz
Merge pull request #1 from phi-gamma/master
import current status
-rw-r--r--NEWS5
-rw-r--r--README10
-rw-r--r--lualibs-aux.lua257
-rw-r--r--lualibs-boolean.lua66
-rw-r--r--lualibs-dir.lua314
-rw-r--r--lualibs-file.lua726
-rw-r--r--lualibs-function.lua11
-rw-r--r--lualibs-io.lua237
-rw-r--r--lualibs-lpeg.lua829
-rw-r--r--lualibs-lua.lua393
-rw-r--r--lualibs-math.lua27
-rw-r--r--lualibs-md5.lua91
-rw-r--r--lualibs-number.lua185
-rw-r--r--lualibs-os.lua267
-rw-r--r--lualibs-set.lua7
-rw-r--r--lualibs-string.lua399
-rw-r--r--lualibs-table.lua1215
-rw-r--r--lualibs-unicode.lua981
-rw-r--r--lualibs-url.lua338
-rw-r--r--lualibs-util-dim.lua (renamed from lualibs-dimen.lua)213
-rw-r--r--lualibs-util-jsn.lua145
-rw-r--r--lualibs-util-lua.lua351
-rw-r--r--lualibs-util-mrg.lua221
-rw-r--r--lualibs-util-sto.lua189
-rw-r--r--lualibs-util-str.lua766
-rw-r--r--lualibs-util-tab.lua493
-rw-r--r--lualibs-utils.lua176
-rw-r--r--lualibs.dtx85
-rw-r--r--lualibs.lua54
29 files changed, 7109 insertions, 1942 deletions
diff --git a/NEWS b/NEWS
index 30e5e40..8b367bf 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,9 @@
History of the lualibs package
+2012/10/19 v0.9/
+ * sync with ConTeXt beta 2012.10.17
+ * move some files to util-* prefix
+ * add util-sto util-lua util-sto util-jsn
+
2011/01/20 v0.96
* Fix computability with lfs in luatex 0.65
diff --git a/README b/README
index b5c4c97..57bd351 100644
--- a/README
+++ b/README
@@ -32,9 +32,7 @@ Manifest
Source files:
lualibs.dtx
- lualibs-aux.lua
lualibs-boolean.lua
- lualibs-dimen.lua
lualibs-dir.lua
lualibs-file.lua
lualibs-io.lua
@@ -48,7 +46,13 @@ Source files:
lualibs-table.lua
lualibs-unicode.lua
lualibs-url.lua
- lualibs-utils.lua
+ lualibs-util-dim.lua
+ lualibs-util-jsn.lua
+ lualibs-util-lua.lua
+ lualibs-util-mrg.lua
+ lualibs-util-sto.lua
+ lualibs-util-str.lua
+ lualibs-util-tab.lua
README
Makefile
NEWS
diff --git a/lualibs-aux.lua b/lualibs-aux.lua
deleted file mode 100644
index 7950a03..0000000
--- a/lualibs-aux.lua
+++ /dev/null
@@ -1,257 +0,0 @@
-if not modules then modules = { } end modules ['l-aux'] = {
- version = 1.001,
- comment = "companion to luat-lib.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
--- for inline, no store split : for s in string.gmatch(str,",* *([^,]+)") do .. end
-
-aux = aux or { }
-
-local concat, format, gmatch = table.concat, string.format, string.gmatch
-local tostring, type = tostring, type
-local lpegmatch = lpeg.match
-
-local P, R, V = lpeg.P, lpeg.R, lpeg.V
-
-local escape, left, right = P("\\"), P('{'), P('}')
-
-lpeg.patterns.balanced = P {
- [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
- [2] = left * V(1) * right
-}
-
-local space = lpeg.P(' ')
-local equal = lpeg.P("=")
-local comma = lpeg.P(",")
-local lbrace = lpeg.P("{")
-local rbrace = lpeg.P("}")
-local nobrace = 1 - (lbrace+rbrace)
-local nested = lpeg.P { lbrace * (nobrace + lpeg.V(1))^0 * rbrace }
-local spaces = space^0
-
-local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0)
-
-local key = lpeg.C((1-equal-comma)^1)
-local pattern_a = (space+comma)^0 * (key * equal * value + key * lpeg.C(""))
-local pattern_c = (space+comma)^0 * (key * equal * value)
-
-local key = lpeg.C((1-space-equal-comma)^1)
-local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + lpeg.C("")))
-
--- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored
-
-local hash = { }
-
-local function set(key,value) -- using Carg is slower here
- hash[key] = value
-end
-
-local pattern_a_s = (pattern_a/set)^1
-local pattern_b_s = (pattern_b/set)^1
-local pattern_c_s = (pattern_c/set)^1
-
-aux.settings_to_hash_pattern_a = pattern_a_s
-aux.settings_to_hash_pattern_b = pattern_b_s
-aux.settings_to_hash_pattern_c = pattern_c_s
-
-function aux.make_settings_to_hash_pattern(set,how)
- if how == "strict" then
- return (pattern_c/set)^1
- elseif how == "tolerant" then
- return (pattern_b/set)^1
- else
- return (pattern_a/set)^1
- end
-end
-
-function aux.settings_to_hash(str,existing)
- if str and str ~= "" then
- hash = existing or { }
- if moretolerant then
- lpegmatch(pattern_b_s,str)
- else
- lpegmatch(pattern_a_s,str)
- end
- return hash
- else
- return { }
- end
-end
-
-function aux.settings_to_hash_tolerant(str,existing)
- if str and str ~= "" then
- hash = existing or { }
- lpegmatch(pattern_b_s,str)
- return hash
- else
- return { }
- end
-end
-
-function aux.settings_to_hash_strict(str,existing)
- if str and str ~= "" then
- hash = existing or { }
- lpegmatch(pattern_c_s,str)
- return next(hash) and hash
- else
- return nil
- end
-end
-
-local separator = comma * space^0
-local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0)
-local pattern = lpeg.Ct(value*(separator*value)^0)
-
--- "aap, {noot}, mies" : outer {} removes, leading spaces ignored
-
-aux.settings_to_array_pattern = pattern
-
--- we could use a weak table as cache
-
-function aux.settings_to_array(str)
- if not str or str == "" then
- return { }
- else
- return lpegmatch(pattern,str)
- end
-end
-
-local function set(t,v)
- t[#t+1] = v
-end
-
-local value = lpeg.P(lpeg.Carg(1)*value) / set
-local pattern = value*(separator*value)^0 * lpeg.Carg(1)
-
-function aux.add_settings_to_array(t,str)
- return lpegmatch(pattern,str,nil,t)
-end
-
-function aux.hash_to_string(h,separator,yes,no,strict,omit)
- if h then
- local t, s = { }, table.sortedkeys(h)
- omit = omit and table.tohash(omit)
- for i=1,#s do
- local key = s[i]
- if not omit or not omit[key] then
- local value = h[key]
- if type(value) == "boolean" then
- if yes and no then
- if value then
- t[#t+1] = key .. '=' .. yes
- elseif not strict then
- t[#t+1] = key .. '=' .. no
- end
- elseif value or not strict then
- t[#t+1] = key .. '=' .. tostring(value)
- end
- else
- t[#t+1] = key .. '=' .. value
- end
- end
- end
- return concat(t,separator or ",")
- else
- return ""
- end
-end
-
-function aux.array_to_string(a,separator)
- if a then
- return concat(a,separator or ",")
- else
- return ""
- end
-end
-
-function aux.settings_to_set(str,t)
- t = t or { }
- for s in gmatch(str,"%s*([^,]+)") do
- t[s] = true
- end
- return t
-end
-
-local value = lbrace * lpeg.C((nobrace + nested)^0) * rbrace
-local pattern = lpeg.Ct((space + value)^0)
-
-function aux.arguments_to_table(str)
- return lpegmatch(pattern,str)
-end
-
--- temporary here
-
-function aux.getparameters(self,class,parentclass,settings)
- local sc = self[class]
- if not sc then
- sc = table.clone(self[parent])
- self[class] = sc
- end
- aux.settings_to_hash(settings,sc)
-end
-
--- temporary here
-
-local digit = lpeg.R("09")
-local period = lpeg.P(".")
-local zero = lpeg.P("0")
-local trailingzeros = zero^0 * -digit -- suggested by Roberto R
-local case_1 = period * trailingzeros / ""
-local case_2 = period * (digit - trailingzeros)^1 * (trailingzeros / "")
-local number = digit^1 * (case_1 + case_2)
-local stripper = lpeg.Cs((number + 1)^0)
-
---~ local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100"
---~ collectgarbage("collect")
---~ str = string.rep(sample,10000)
---~ local ts = os.clock()
---~ lpegmatch(stripper,str)
---~ print(#str, os.clock()-ts, lpegmatch(stripper,sample))
-
-lpeg.patterns.strip_zeros = stripper
-
-function aux.strip_zeros(str)
- return lpegmatch(stripper,str)
-end
-
-function aux.definetable(target) -- defines undefined tables
- local composed, t = nil, { }
- for name in gmatch(target,"([^%.]+)") do
- if composed then
- composed = composed .. "." .. name
- else
- composed = name
- end
- t[#t+1] = format("%s = %s or { }",composed,composed)
- end
- return concat(t,"\n")
-end
-
-function aux.accesstable(target)
- local t = _G
- for name in gmatch(target,"([^%.]+)") do
- t = t[name]
- end
- return t
-end
-
--- as we use this a lot ...
-
---~ function aux.cachefunction(action,weak)
---~ local cache = { }
---~ if weak then
---~ setmetatable(cache, { __mode = "kv" } )
---~ end
---~ local function reminder(str)
---~ local found = cache[str]
---~ if not found then
---~ found = action(str)
---~ cache[str] = found
---~ end
---~ return found
---~ end
---~ return reminder, cache
---~ end
diff --git a/lualibs-boolean.lua b/lualibs-boolean.lua
index be7ec7d..f087f1a 100644
--- a/lualibs-boolean.lua
+++ b/lualibs-boolean.lua
@@ -6,36 +6,58 @@ if not modules then modules = { } end modules ['l-boolean'] = {
license = "see context related readme files"
}
-boolean = boolean or { }
-
local type, tonumber = type, tonumber
+boolean = boolean or { }
+local boolean = boolean
+
function boolean.tonumber(b)
- if b then return 1 else return 0 end
+ if b then return 1 else return 0 end -- test and return or return
end
-function toboolean(str,tolerant)
- if tolerant then
- local tstr = type(str)
- if tstr == "string" then
- return str == "true" or str == "yes" or str == "on" or str == "1" or str == "t"
- elseif tstr == "number" then
- return tonumber(str) ~= 0
- elseif tstr == "nil" then
- return false
- else
- return str
- end
+function toboolean(str,tolerant) -- global
+ if str == nil then
+ return false
+ elseif str == false then
+ return false
+ elseif str == true then
+ return true
elseif str == "true" then
return true
elseif str == "false" then
return false
+ elseif not tolerant then
+ return false
+ elseif str == 0 then
+ return false
+ elseif (tonumber(str) or 0) > 0 then
+ return true
else
- return str
+ return str == "yes" or str == "on" or str == "t"
end
end
-function string.is_boolean(str)
+string.toboolean = toboolean
+
+function string.booleanstring(str)
+ if str == "0" then
+ return false
+ elseif str == "1" then
+ return true
+ elseif str == "" then
+ return false
+ elseif str == "false" then
+ return false
+ elseif str == "true" then
+ return true
+ elseif (tonumber(str) or 0) > 0 then
+ return true
+ else
+ return str == "yes" or str == "on" or str == "t"
+ end
+end
+
+function string.is_boolean(str,default)
if type(str) == "string" then
if str == "true" or str == "yes" or str == "on" or str == "t" then
return true
@@ -43,13 +65,5 @@ function string.is_boolean(str)
return false
end
end
- return nil
-end
-
-function boolean.alwaystrue()
- return true
-end
-
-function boolean.falsetrue()
- return false
+ return default
end
diff --git a/lualibs-dir.lua b/lualibs-dir.lua
index 1b9bcbc..00cda38 100644
--- a/lualibs-dir.lua
+++ b/lualibs-dir.lua
@@ -6,35 +6,92 @@ if not modules then modules = { } end modules ['l-dir'] = {
license = "see context related readme files"
}
--- dir.expand_name will be merged with cleanpath and collapsepath
+-- dir.expandname will be merged with cleanpath and collapsepath
-local type = type
+local type, select = type, select
local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub
+local concat, insert, remove = table.concat, table.insert, table.remove
local lpegmatch = lpeg.match
+local P, S, R, C, Cc, Cs, Ct, Cv, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cv, lpeg.V
+
dir = dir or { }
+local dir = dir
+local lfs = lfs
+
+local attributes = lfs.attributes
+local walkdir = lfs.dir
+local isdir = lfs.isdir
+local isfile = lfs.isfile
+local currentdir = lfs.currentdir
+local chdir = lfs.chdir
+
+-- in case we load outside luatex
+
+if not isdir then
+ function isdir(name)
+ local a = attributes(name)
+ return a and a.mode == "directory"
+ end
+ lfs.isdir = isdir
+end
+
+if not isfile then
+ function isfile(name)
+ local a = attributes(name)
+ return a and a.mode == "file"
+ end
+ lfs.isfile = isfile
+end
-- handy
function dir.current()
- return (gsub(lfs.currentdir(),"\\","/"))
+ return (gsub(currentdir(),"\\","/"))
end
--- optimizing for no string.find (*) does not save time
+-- optimizing for no find (*) does not save time
+
+--~ local function globpattern(path,patt,recurse,action) -- fails in recent luatex due to some change in lfs
+--~ local ok, scanner
+--~ if path == "/" then
+--~ ok, scanner = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe
+--~ else
+--~ ok, scanner = xpcall(function() return walkdir(path) end, function() end) -- kepler safe
+--~ end
+--~ if ok and type(scanner) == "function" then
+--~ if not find(path,"/$") then path = path .. '/' end
+--~ for name in scanner do
+--~ local full = path .. name
+--~ local mode = attributes(full,'mode')
+--~ if mode == 'file' then
+--~ if find(full,patt) then
+--~ action(full)
+--~ end
+--~ elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then
+--~ globpattern(full,patt,recurse,action)
+--~ end
+--~ end
+--~ end
+--~ end
+
+local lfsisdir = isdir
+
+local function isdir(path)
+ path = gsub(path,"[/\\]+$","")
+ return lfsisdir(path)
+end
-local attributes = lfs.attributes
-local walkdir = lfs.dir
+lfs.isdir = isdir
-local function glob_pattern(path,patt,recurse,action)
- local ok, scanner, dirobj
+local function globpattern(path,patt,recurse,action)
if path == "/" then
- ok, scanner, dirobj = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe
- else
- ok, scanner, dirobj = xpcall(function() return walkdir(path) end, function() end) -- kepler safe
+ path = path .. "."
+ elseif not find(path,"/$") then
+ path = path .. '/'
end
- if ok and type(scanner) == "function" then
- if not find(path,"/$") then path = path .. '/' end
- for name in scanner, dirobj do
+ if isdir(path) then -- lfs.isdir does not like trailing /
+ for name in walkdir(path) do -- lfs.dir accepts trailing /
local full = path .. name
local mode = attributes(full,'mode')
if mode == 'file' then
@@ -42,25 +99,25 @@ local function glob_pattern(path,patt,recurse,action)
action(full)
end
elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then
- glob_pattern(full,patt,recurse,action)
+ globpattern(full,patt,recurse,action)
end
end
end
end
-dir.glob_pattern = glob_pattern
+dir.globpattern = globpattern
-local function collect_pattern(path,patt,recurse,result)
- local ok, scanner, dirobj
+local function collectpattern(path,patt,recurse,result)
+ local ok, scanner
result = result or { }
if path == "/" then
- ok, scanner, dirobj = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe
+ ok, scanner, first = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe
else
- ok, scanner, dirobj = xpcall(function() return walkdir(path) end, function() end) -- kepler safe
+ ok, scanner, first = xpcall(function() return walkdir(path) end, function() end) -- kepler safe
end
if ok and type(scanner) == "function" then
if not find(path,"/$") then path = path .. '/' end
- for name in scanner, dirobj do
+ for name in scanner, first do
local full = path .. name
local attr = attributes(full)
local mode = attr.mode
@@ -69,7 +126,7 @@ local function collect_pattern(path,patt,recurse,result)
result[name] = attr
end
elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then
- attr.list = collect_pattern(full,patt,recurse)
+ attr.list = collectpattern(full,patt,recurse)
result[name] = attr
end
end
@@ -77,9 +134,7 @@ local function collect_pattern(path,patt,recurse,result)
return result
end
-dir.collect_pattern = collect_pattern
-
-local P, S, R, C, Cc, Cs, Ct, Cv, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cv, lpeg.V
+dir.collectpattern = collectpattern
local pattern = Ct {
[1] = (C(P(".") + P("/")^1) + C(R("az","AZ") * P(":") * P("/")^0) + Cc("./")) * V(2) * V(3),
@@ -103,16 +158,16 @@ local function glob(str,t)
for s=1,#str do
glob(str[s],t)
end
- elseif lfs.isfile(str) then
+ elseif isfile(str) then
t(str)
else
- local split = lpegmatch(pattern,str)
+ local split = lpegmatch(pattern,str) -- we could use the file splitter
if split then
local root, path, base = split[1], split[2], split[3]
local recurse = find(base,"%*%*")
local start = root .. path
local result = lpegmatch(filter,start .. base)
- glob_pattern(start,result,recurse,t)
+ globpattern(start,result,recurse,t)
end
end
else
@@ -122,12 +177,15 @@ local function glob(str,t)
glob(str[s],t)
end
return t
- elseif lfs.isfile(str) then
- local t = t or { }
- t[#t+1] = str
- return t
+ elseif isfile(str) then
+ if t then
+ t[#t+1] = str
+ return t
+ else
+ return { str }
+ end
else
- local split = lpegmatch(pattern,str)
+ local split = lpegmatch(pattern,str) -- we could use the file splitter
if split then
local t = t or { }
local action = action or function(name) t[#t+1] = name end
@@ -135,7 +193,7 @@ local function glob(str,t)
local recurse = find(base,"%*%*")
local start = root .. path
local result = lpegmatch(filter,start .. base)
- glob_pattern(start,result,recurse,action)
+ globpattern(start,result,recurse,action)
return t
else
return { }
@@ -154,10 +212,11 @@ dir.glob = glob
local function globfiles(path,recurse,func,files) -- func == pattern or function
if type(func) == "string" then
- local s = func -- alas, we need this indirect way
+ local s = func
func = function(name) return find(name,s) end
end
files = files or { }
+ local noffiles = #files
for name in walkdir(path) do
if find(name,"^%.") then
--- skip
@@ -168,12 +227,9 @@ local function globfiles(path,recurse,func,files) -- func == pattern or function
globfiles(path .. "/" .. name,recurse,func,files)
end
elseif mode == "file" then
- if func then
- if func(name) then
- files[#files+1] = path .. "/" .. name
- end
- else
- files[#files+1] = path .. "/" .. name
+ if not func or func(name) then
+ noffiles = noffiles + 1
+ files[noffiles] = path .. "/" .. name
end
end
end
@@ -191,7 +247,7 @@ dir.globfiles = globfiles
-- print(dir.ls("*.tex"))
function dir.ls(pattern)
- return table.concat(glob(pattern),"\n")
+ return concat(glob(pattern),"\n")
end
--~ mkdirs("temp")
@@ -201,18 +257,20 @@ end
local make_indeed = true -- false
-if string.find(os.getenv("PATH"),";") then -- os.type == "windows"
+local onwindows = os.type == "windows" or find(os.getenv("PATH"),";")
+
+if onwindows then
function dir.mkdirs(...)
- local str, pth, t = "", "", { ... }
- for i=1,#t do
- local s = t[i]
- if s ~= "" then
- if str ~= "" then
- str = str .. "/" .. s
- else
- str = s
- end
+ local str, pth = "", ""
+ for i=1,select("#",...) do
+ local s = select(i,...)
+ if s == "" then
+ -- skip
+ elseif str == "" then
+ str = s
+ else
+ str = str .. "/" .. s
end
end
local first, middle, last
@@ -250,64 +308,32 @@ if string.find(os.getenv("PATH"),";") then -- os.type == "windows"
else
pth = pth .. "/" .. s
end
- if make_indeed and not lfs.isdir(pth) then
+ if make_indeed and not isdir(pth) then
lfs.mkdir(pth)
end
end
- return pth, (lfs.isdir(pth) == true)
+ return pth, (isdir(pth) == true)
end
---~ print(dir.mkdirs("","","a","c"))
---~ print(dir.mkdirs("a"))
---~ print(dir.mkdirs("a:"))
---~ print(dir.mkdirs("a:/b/c"))
---~ print(dir.mkdirs("a:b/c"))
---~ print(dir.mkdirs("a:/bbb/c"))
---~ print(dir.mkdirs("/a/b/c"))
---~ print(dir.mkdirs("/aaa/b/c"))
---~ print(dir.mkdirs("//a/b/c"))
---~ print(dir.mkdirs("///a/b/c"))
---~ print(dir.mkdirs("a/bbb//ccc/"))
-
- function dir.expand_name(str) -- will be merged with cleanpath and collapsepath
- local first, nothing, last = match(str,"^(//)(//*)(.*)$")
- if first then
- first = dir.current() .. "/"
- end
- if not first then
- first, last = match(str,"^(//)/*(.*)$")
- end
- if not first then
- first, last = match(str,"^([a-zA-Z]:)(.*)$")
- if first and not find(last,"^/") then
- local d = lfs.currentdir()
- if lfs.chdir(first) then
- first = dir.current()
- end
- lfs.chdir(d)
- end
- end
- if not first then
- first, last = dir.current(), str
- end
- last = gsub(last,"//","/")
- last = gsub(last,"/%./","/")
- last = gsub(last,"^/*","")
- first = gsub(first,"/*$","")
- if last == "" then
- return first
- else
- return first .. "/" .. last
- end
- end
+ --~ print(dir.mkdirs("","","a","c"))
+ --~ print(dir.mkdirs("a"))
+ --~ print(dir.mkdirs("a:"))
+ --~ print(dir.mkdirs("a:/b/c"))
+ --~ print(dir.mkdirs("a:b/c"))
+ --~ print(dir.mkdirs("a:/bbb/c"))
+ --~ print(dir.mkdirs("/a/b/c"))
+ --~ print(dir.mkdirs("/aaa/b/c"))
+ --~ print(dir.mkdirs("//a/b/c"))
+ --~ print(dir.mkdirs("///a/b/c"))
+ --~ print(dir.mkdirs("a/bbb//ccc/"))
else
function dir.mkdirs(...)
- local str, pth, t = "", "", { ... }
- for i=1,#t do
- local s = t[i]
- if s ~= "" then
+ local str, pth = "", ""
+ for i=1,select("#",...) do
+ local s = select(i,...)
+ if s and s ~= "" then -- we catch nil and false
if str ~= "" then
str = str .. "/" .. s
else
@@ -325,7 +351,7 @@ else
else
pth = pth .. "/" .. s
end
- if make_indeed and not first and not lfs.isdir(pth) then
+ if make_indeed and not first and not isdir(pth) then
lfs.mkdir(pth)
end
end
@@ -333,31 +359,91 @@ else
pth = "."
for s in gmatch(str,"[^/]+") do
pth = pth .. "/" .. s
- if make_indeed and not lfs.isdir(pth) then
+ if make_indeed and not isdir(pth) then
lfs.mkdir(pth)
end
end
end
- return pth, (lfs.isdir(pth) == true)
+ return pth, (isdir(pth) == true)
+ end
+
+ --~ print(dir.mkdirs("","","a","c"))
+ --~ print(dir.mkdirs("a"))
+ --~ print(dir.mkdirs("/a/b/c"))
+ --~ print(dir.mkdirs("/aaa/b/c"))
+ --~ print(dir.mkdirs("//a/b/c"))
+ --~ print(dir.mkdirs("///a/b/c"))
+ --~ print(dir.mkdirs("a/bbb//ccc/"))
+
+end
+
+dir.makedirs = dir.mkdirs
+
+-- we can only define it here as it uses dir.current
+
+if onwindows then
+
+ function dir.expandname(str) -- will be merged with cleanpath and collapsepath
+ local first, nothing, last = match(str,"^(//)(//*)(.*)$")
+ if first then
+ first = dir.current() .. "/" -- dir.current sanitizes
+ end
+ if not first then
+ first, last = match(str,"^(//)/*(.*)$")
+ end
+ if not first then
+ first, last = match(str,"^([a-zA-Z]:)(.*)$")
+ if first and not find(last,"^/") then
+ local d = currentdir()
+ if chdir(first) then
+ first = dir.current()
+ end
+ chdir(d)
+ end
+ end
+ if not first then
+ first, last = dir.current(), str
+ end
+ last = gsub(last,"//","/")
+ last = gsub(last,"/%./","/")
+ last = gsub(last,"^/*","")
+ first = gsub(first,"/*$","")
+ if last == "" or last == "." then
+ return first
+ else
+ return first .. "/" .. last
+ end
end
---~ print(dir.mkdirs("","","a","c"))
---~ print(dir.mkdirs("a"))
---~ print(dir.mkdirs("/a/b/c"))
---~ print(dir.mkdirs("/aaa/b/c"))
---~ print(dir.mkdirs("//a/b/c"))
---~ print(dir.mkdirs("///a/b/c"))
---~ print(dir.mkdirs("a/bbb//ccc/"))
+else
- function dir.expand_name(str) -- will be merged with cleanpath and collapsepath
+ function dir.expandname(str) -- will be merged with cleanpath and collapsepath
if not find(str,"^/") then
- str = lfs.currentdir() .. "/" .. str
+ str = currentdir() .. "/" .. str
end
str = gsub(str,"//","/")
str = gsub(str,"/%./","/")
+ str = gsub(str,"(.)/%.$","%1")
return str
end
end
-dir.makedirs = dir.mkdirs
+file.expandname = dir.expandname -- for convenience
+
+local stack = { }
+
+function dir.push(newdir)
+ insert(stack,currentdir())
+ if newdir and newdir ~= "" then
+ chdir(newdir)
+ end
+end
+
+function dir.pop()
+ local d = remove(stack)
+ if d then
+ chdir(d)
+ end
+ return d
+end
diff --git a/lualibs-file.lua b/lualibs-file.lua
index 2bfc070..af86f93 100644
--- a/lualibs-file.lua
+++ b/lualibs-file.lua
@@ -8,293 +8,519 @@ if not modules then modules = { } end modules ['l-file'] = {
-- needs a cleanup
-file = file or { }
+file = file or { }
+local file = file
-local concat = table.concat
-local find, gmatch, match, gsub, sub, char = string.find, string.gmatch, string.match, string.gsub, string.sub, string.char
-local lpegmatch = lpeg.match
-
-function file.removesuffix(filename)
- return (gsub(filename,"%.[%a%d]+$",""))
+if not lfs then
+ lfs = optionalrequire("lfs")
end
-function file.addsuffix(filename, suffix)
- if not suffix or suffix == "" then
- return filename
- elseif not find(filename,"%.[%a%d]+$") then
- return filename .. "." .. suffix
- else
- return filename
+if not lfs then
+
+ lfs = {
+ getcurrentdir = function()
+ return "."
+ end,
+ attributes = function()
+ return nil
+ end,
+ isfile = function(name)
+ local f = io.open(name,'rb')
+ if f then
+ f:close()
+ return true
+ end
+ end,
+ isdir = function(name)
+ print("you need to load lfs")
+ return false
+ end
+ }
+
+elseif not lfs.isfile then
+
+ local attributes = lfs.attributes
+
+ function lfs.isdir(name)
+ return attributes(name,"mode") == "directory"
+ end
+
+ function lfs.isfile(name)
+ return attributes(name,"mode") == "file"
end
+
+ -- function lfs.isdir(name)
+ -- local a = attributes(name)
+ -- return a and a.mode == "directory"
+ -- end
+
+ -- function lfs.isfile(name)
+ -- local a = attributes(name)
+ -- return a and a.mode == "file"
+ -- end
+
end
-function file.replacesuffix(filename, suffix)
- return (gsub(filename,"%.[%a%d]+$","")) .. "." .. suffix
+local insert, concat = table.insert, table.concat
+local match = string.match
+local lpegmatch = lpeg.match
+local getcurrentdir, attributes = lfs.currentdir, lfs.attributes
+local checkedsplit = string.checkedsplit
+
+-- local patterns = file.patterns or { }
+-- file.patterns = patterns
+
+local P, R, S, C, Cs, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc, lpeg.Ct
+
+local colon = P(":")
+local period = P(".")
+local periods = P("..")
+local fwslash = P("/")
+local bwslash = P("\\")
+local slashes = S("\\/")
+local noperiod = 1-period
+local noslashes = 1-slashes
+local name = noperiod^1
+local suffix = period/"" * (1-period-slashes)^1 * -1
+
+----- pattern = C((noslashes^0 * slashes^1)^1)
+local pattern = C((1 - (slashes^1 * noslashes^1 * -1))^1) * P(1) -- there must be a more efficient way
+
+local function pathpart(name,default)
+ return name and lpegmatch(pattern,name) or default or ""
end
-function file.dirname(name,default)
- return match(name,"^(.+)[/\\].-$") or (default or "")
+local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1
+
+local function basename(name)
+ return name and lpegmatch(pattern,name) or name
end
-function file.basename(name)
- return match(name,"^.+[/\\](.-)$") or name
+-- print(pathpart("file"))
+-- print(pathpart("dir/file"))
+-- print(pathpart("/dir/file"))
+-- print(basename("file"))
+-- print(basename("dir/file"))
+-- print(basename("/dir/file"))
+
+local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0
+
+local function nameonly(name)
+ return name and lpegmatch(pattern,name) or name
end
-function file.nameonly(name)
- return (gsub(match(name,"^.+[/\\](.-)$") or name,"%..*$",""))
+local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1
+
+local function suffixonly(name)
+ return name and lpegmatch(pattern,name) or ""
end
-function file.extname(name,default)
- return match(name,"^.+%.([^/\\]-)$") or default or ""
+file.pathpart = pathpart
+file.basename = basename
+file.nameonly = nameonly
+file.suffixonly = suffixonly
+file.suffix = suffixonly
+
+file.dirname = pathpart -- obsolete
+file.extname = suffixonly -- obsolete
+
+-- actually these are schemes
+
+local drive = C(R("az","AZ")) * colon
+local path = C((noslashes^0 * slashes)^0)
+local suffix = period * C(P(1-period)^0 * P(-1))
+local base = C((1-suffix)^0)
+local rest = C(P(1)^0)
+
+drive = drive + Cc("")
+path = path + Cc("")
+base = base + Cc("")
+suffix = suffix + Cc("")
+
+local pattern_a = drive * path * base * suffix
+local pattern_b = path * base * suffix
+local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures
+local pattern_d = path * rest
+
+function file.splitname(str,splitdrive)
+ if not str then
+ -- error
+ elseif splitdrive then
+ return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix
+ else
+ return lpegmatch(pattern_b,str) -- returns path, base, suffix
+ end
end
-file.suffix = file.extname
-
---~ function file.join(...)
---~ local pth = concat({...},"/")
---~ pth = gsub(pth,"\\","/")
---~ local a, b = match(pth,"^(.*://)(.*)$")
---~ if a and b then
---~ return a .. gsub(b,"//+","/")
---~ end
---~ a, b = match(pth,"^(//)(.*)$")
---~ if a and b then
---~ return a .. gsub(b,"//+","/")
---~ end
---~ return (gsub(pth,"//+","/"))
---~ end
-
-local trick_1 = char(1)
-local trick_2 = "^" .. trick_1 .. "/+"
+function file.splitbase(str)
+ return str and lpegmatch(pattern_d,str) -- returns path, base+suffix
+end
-function file.join(...)
- local lst = { ... }
- local a, b = lst[1], lst[2]
- if a == "" then
- lst[1] = trick_1
- elseif b and find(a,"^/+$") and find(b,"^/") then
- lst[1] = ""
- lst[2] = gsub(b,"^/+","")
+---- stripslash = C((1 - P("/")^1*P(-1))^0)
+
+function file.nametotable(str,splitdrive)
+ if str then
+ local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str)
+ -- if path ~= "" then
+ -- path = lpegmatch(stripslash,path) -- unfortunate hack, maybe this becomes default
+ -- end
+ if splitdrive then
+ return {
+ path = path,
+ drive = drive,
+ subpath = subpath,
+ name = name,
+ base = base,
+ suffix = suffix,
+ }
+ else
+ return {
+ path = path,
+ name = name,
+ base = base,
+ suffix = suffix,
+ }
+ end
end
- local pth = concat(lst,"/")
- pth = gsub(pth,"\\","/")
- local a, b = match(pth,"^(.*://)(.*)$")
- if a and b then
- return a .. gsub(b,"//+","/")
+end
+
+-- print(file.splitname("file"))
+-- print(file.splitname("dir/file"))
+-- print(file.splitname("/dir/file"))
+-- print(file.splitname("file"))
+-- print(file.splitname("dir/file"))
+-- print(file.splitname("/dir/file"))
+
+-- inspect(file.nametotable("file.ext"))
+-- inspect(file.nametotable("dir/file.ext"))
+-- inspect(file.nametotable("/dir/file.ext"))
+-- inspect(file.nametotable("file.ext"))
+-- inspect(file.nametotable("dir/file.ext"))
+-- inspect(file.nametotable("/dir/file.ext"))
+
+----- pattern = Cs(((period * noperiod^1 * -1) / "" + 1)^1)
+local pattern = Cs(((period * (1-period-slashes)^1 * -1) / "" + 1)^1)
+
+function file.removesuffix(name)
+ return name and lpegmatch(pattern,name)
+end
+
+-- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1
+--
+-- function file.addsuffix(name, suffix)
+-- local p = lpegmatch(pattern,name)
+-- if p then
+-- return name
+-- else
+-- return name .. "." .. suffix
+-- end
+-- end
+
+local suffix = period/"" * (1-period-slashes)^1 * -1
+local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix)
+
+function file.addsuffix(filename,suffix,criterium)
+ if not filename or not suffix or suffix == "" then
+ return filename
+ elseif criterium == true then
+ return filename .. "." .. suffix
+ elseif not criterium then
+ local n, s = lpegmatch(pattern,filename)
+ if not s or s == "" then
+ return filename .. "." .. suffix
+ else
+ return filename
+ end
+ else
+ local n, s = lpegmatch(pattern,filename)
+ if s and s ~= "" then
+ local t = type(criterium)
+ if t == "table" then
+ -- keep if in criterium
+ for i=1,#criterium do
+ if s == criterium[i] then
+ return filename
+ end
+ end
+ elseif t == "string" then
+ -- keep if criterium
+ if s == criterium then
+ return filename
+ end
+ end
+ end
+ return (n or filename) .. "." .. suffix
end
- a, b = match(pth,"^(//)(.*)$")
- if a and b then
- return a .. gsub(b,"//+","/")
+end
+
+-- print("1 " .. file.addsuffix("name","new") .. " -> name.new")
+-- print("2 " .. file.addsuffix("name.old","new") .. " -> name.old")
+-- print("3 " .. file.addsuffix("name.old","new",true) .. " -> name.old.new")
+-- print("4 " .. file.addsuffix("name.old","new","new") .. " -> name.new")
+-- print("5 " .. file.addsuffix("name.old","new","old") .. " -> name.old")
+-- print("6 " .. file.addsuffix("name.old","new","foo") .. " -> name.new")
+-- print("7 " .. file.addsuffix("name.old","new",{"foo","bar"}) .. " -> name.new")
+-- print("8 " .. file.addsuffix("name.old","new",{"old","bar"}) .. " -> name.old")
+
+local suffix = period * (1-period-slashes)^1 * -1
+local pattern = Cs((1-suffix)^0)
+
+function file.replacesuffix(name,suffix)
+ if name and suffix and suffix ~= "" then
+ return lpegmatch(pattern,name) .. "." .. suffix
+ else
+ return name
end
- pth = gsub(pth,trick_2,"")
- return (gsub(pth,"//+","/"))
end
---~ print(file.join("//","/y"))
---~ print(file.join("/","/y"))
---~ print(file.join("","/y"))
---~ print(file.join("/x/","/y"))
---~ print(file.join("x/","/y"))
---~ print(file.join("http://","/y"))
---~ print(file.join("http://a","/y"))
---~ print(file.join("http:///a","/y"))
---~ print(file.join("//nas-1","/y"))
-
-function file.iswritable(name)
- local a = lfs.attributes(name) or lfs.attributes(file.dirname(name,"."))
- return a and sub(a.permissions,2,2) == "w"
+--
+
+local reslasher = lpeg.replacer(P("\\"),"/")
+
+function file.reslash(str)
+ return str and lpegmatch(reslasher,str)
end
-function file.isreadable(name)
- local a = lfs.attributes(name)
- return a and sub(a.permissions,1,1) == "r"
+-- We should be able to use:
+--
+-- local writable = P(1) * P("w") * Cc(true)
+--
+-- function file.is_writable(name)
+-- local a = attributes(name) or attributes(pathpart(name,"."))
+-- return a and lpegmatch(writable,a.permissions) or false
+-- end
+--
+-- But after some testing Taco and I came up with the more robust
+-- variant:
+
+function file.is_writable(name)
+ if not name then
+ -- error
+ elseif lfs.isdir(name) then
+ name = name .. "/m_t_x_t_e_s_t.tmp"
+ local f = io.open(name,"wb")
+ if f then
+ f:close()
+ os.remove(name)
+ return true
+ end
+ elseif lfs.isfile(name) then
+ local f = io.open(name,"ab")
+ if f then
+ f:close()
+ return true
+ end
+ else
+ local f = io.open(name,"ab")
+ if f then
+ f:close()
+ os.remove(name)
+ return true
+ end
+ end
+ return false
end
-file.is_readable = file.isreadable
-file.is_writable = file.iswritable
+local readable = P("r") * Cc(true)
--- todo: lpeg
+function file.is_readable(name)
+ if name then
+ local a = attributes(name)
+ return a and lpegmatch(readable,a.permissions) or false
+ else
+ return false
+ end
+end
---~ function file.split_path(str)
---~ local t = { }
---~ str = gsub(str,"\\", "/")
---~ str = gsub(str,"(%a):([;/])", "%1\001%2")
---~ for name in gmatch(str,"([^;:]+)") do
---~ if name ~= "" then
---~ t[#t+1] = gsub(name,"\001",":")
---~ end
---~ end
---~ return t
---~ end
+file.isreadable = file.is_readable -- depricated
+file.iswritable = file.is_writable -- depricated
-local checkedsplit = string.checkedsplit
+function file.size(name)
+ if name then
+ local a = attributes(name)
+ return a and a.size or 0
+ else
+ return 0
+ end
+end
-function file.split_path(str,separator)
- str = gsub(str,"\\","/")
- return checkedsplit(str,separator or io.pathseparator)
+function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split)
+ return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)
end
-function file.join_path(tab)
- return concat(tab,io.pathseparator) -- can have trailing //
+function file.joinpath(tab,separator) -- table
+ return tab and concat(tab,separator or io.pathseparator) -- can have trailing //
end
--- we can hash them weakly
-
-function file.collapse_path(str)
- str = gsub(str,"\\","/")
- if find(str,"/") then
- str = gsub(str,"^%./",(gsub(lfs.currentdir(),"\\","/")) .. "/") -- ./xx in qualified
- str = gsub(str,"/%./","/")
- local n, m = 1, 1
- while n > 0 or m > 0 do
- str, n = gsub(str,"[^/%.]+/%.%.$","")
- str, m = gsub(str,"[^/%.]+/%.%./","")
+local stripper = Cs(P(fwslash)^0/"" * reslasher)
+local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon
+local isroot = fwslash^1 * -1
+local hasroot = fwslash^1
+
+local deslasher = lpeg.replacer(S("\\/")^1,"/")
+
+-- If we have a network or prefix then there is a change that we end up with two
+-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2)
+-- split and rebuild as url. Of course we could assume no network paths (which
+-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but
+-- then we still have to deal with urls ... anyhow, multiple // are never a real
+-- problem but just ugly.
+
+function file.join(...)
+ local lst = { ... }
+ local one = lst[1]
+ if lpegmatch(isnetwork,one) then
+ local two = lpegmatch(deslasher,concat(lst,"/",2))
+ return one .. "/" .. two
+ elseif lpegmatch(isroot,one) then
+ local two = lpegmatch(deslasher,concat(lst,"/",2))
+ if lpegmatch(hasroot,two) then
+ return two
+ else
+ return "/" .. two
end
- str = gsub(str,"([^/])/$","%1")
- -- str = gsub(str,"^%./","") -- ./xx in qualified
- str = gsub(str,"/%.$","")
+ elseif one == "" then
+ return lpegmatch(stripper,concat(lst,"/",2))
+ else
+ return lpegmatch(deslasher,concat(lst,"/"))
end
- if str == "" then str = "." end
- return str
end
---~ print(file.collapse_path("/a"))
---~ print(file.collapse_path("a/./b/.."))
---~ print(file.collapse_path("a/aa/../b/bb"))
---~ print(file.collapse_path("a/../.."))
---~ print(file.collapse_path("a/.././././b/.."))
---~ print(file.collapse_path("a/./././b/.."))
---~ print(file.collapse_path("a/b/c/../.."))
+-- print(file.join("c:/whatever","name"))
+-- print(file.join("//","/y"))
+-- print(file.join("/","/y"))
+-- print(file.join("","/y"))
+-- print(file.join("/x/","/y"))
+-- print(file.join("x/","/y"))
+-- print(file.join("http://","/y"))
+-- print(file.join("http://a","/y"))
+-- print(file.join("http:///a","/y"))
+-- print(file.join("//nas-1","/y"))
+
+-- The previous one fails on "a.b/c" so Taco came up with a split based
+-- variant. After some skyping we got it sort of compatible with the old
+-- one. After that the anchoring to currentdir was added in a better way.
+-- Of course there are some optimizations too. Finally we had to deal with
+-- windows drive prefixes and things like sys://. Eventually gsubs and
+-- finds were replaced by lpegs.
+
+local drivespec = R("az","AZ")^1 * colon
+local anchors = fwslash + drivespec
+local untouched = periods + (1-period)^1 * P(-1)
+local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * Ct(lpeg.splitat(S("/\\")^1))
+local absolute = fwslash
+
+function file.collapsepath(str,anchor)
+ if not str then
+ return
+ end
+ if anchor and not lpegmatch(anchors,str) then
+ str = getcurrentdir() .. "/" .. str
+ end
+ if str == "" or str =="." then
+ return "."
+ elseif lpegmatch(untouched,str) then
+ return lpegmatch(reslasher,str)
+ end
+ local starter, oldelements = lpegmatch(splitstarter,str)
+ local newelements = { }
+ local i = #oldelements
+ while i > 0 do
+ local element = oldelements[i]
+ if element == '.' then
+ -- do nothing
+ elseif element == '..' then
+ local n = i - 1
+ while n > 0 do
+ local element = oldelements[n]
+ if element ~= '..' and element ~= '.' then
+ oldelements[n] = '.'
+ break
+ else
+ n = n - 1
+ end
+ end
+ if n < 1 then
+ insert(newelements,1,'..')
+ end
+ elseif element ~= "" then
+ insert(newelements,1,element)
+ end
+ i = i - 1
+ end
+ if #newelements == 0 then
+ return starter or "."
+ elseif starter then
+ return starter .. concat(newelements, '/')
+ elseif lpegmatch(absolute,str) then
+ return "/" .. concat(newelements,'/')
+ else
+ return concat(newelements, '/')
+ end
+end
-function file.robustname(str)
- return (gsub(str,"[^%a%d%/%-%.\\]+","-"))
+-- local function test(str)
+-- print(string.format("%-20s %-15s %-15s",str,file.collapsepath(str),file.collapsepath(str,true)))
+-- end
+-- test("a/b.c/d") test("b.c/d") test("b.c/..")
+-- test("/") test("c:/..") test("sys://..")
+-- test("") test("./") test(".") test("..") test("./..") test("../..")
+-- test("a") test("./a") test("/a") test("a/../..")
+-- test("a/./b/..") test("a/aa/../b/bb") test("a/.././././b/..") test("a/./././b/..")
+-- test("a/b/c/../..") test("./a/b/c/../..") test("a/b/c/../..")
+
+local validchars = R("az","09","AZ","--","..")
+local pattern_a = lpeg.replacer(1-validchars)
+local pattern_a = Cs((validchars + P(1)/"-")^1)
+local whatever = P("-")^0 / ""
+local pattern_b = Cs(whatever * (1 - whatever * -1)^1)
+
+function file.robustname(str,strict)
+ if str then
+ str = lpegmatch(pattern_a,str) or str
+ if strict then
+ return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking)
+ else
+ return str
+ end
+ end
end
file.readdata = io.loaddata
file.savedata = io.savedata
function file.copy(oldname,newname)
- file.savedata(newname,io.loaddata(oldname))
+ if oldname and newname then
+ local data = io.loaddata(oldname)
+ if data and data ~= "" then
+ file.savedata(newname,data)
+ end
+ end
end
--- lpeg variants, slightly faster, not always
-
---~ local period = lpeg.P(".")
---~ local slashes = lpeg.S("\\/")
---~ local noperiod = 1-period
---~ local noslashes = 1-slashes
---~ local name = noperiod^1
-
---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.C(noperiod^1) * -1
-
---~ function file.extname(name)
---~ return lpegmatch(pattern,name) or ""
---~ end
-
---~ local pattern = lpeg.Cs(((period * noperiod^1 * -1)/"" + 1)^1)
-
---~ function file.removesuffix(name)
---~ return lpegmatch(pattern,name)
---~ end
-
---~ local pattern = (noslashes^0 * slashes)^1 * lpeg.C(noslashes^1) * -1
-
---~ function file.basename(name)
---~ return lpegmatch(pattern,name) or name
---~ end
-
---~ local pattern = (noslashes^0 * slashes)^1 * lpeg.Cp() * noslashes^1 * -1
-
---~ function file.dirname(name)
---~ local p = lpegmatch(pattern,name)
---~ if p then
---~ return sub(name,1,p-2)
---~ else
---~ return ""
---~ end
---~ end
-
---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.Cp() * noperiod^1 * -1
-
---~ function file.addsuffix(name, suffix)
---~ local p = lpegmatch(pattern,name)
---~ if p then
---~ return name
---~ else
---~ return name .. "." .. suffix
---~ end
---~ end
-
---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.Cp() * noperiod^1 * -1
-
---~ function file.replacesuffix(name,suffix)
---~ local p = lpegmatch(pattern,name)
---~ if p then
---~ return sub(name,1,p-2) .. "." .. suffix
---~ else
---~ return name .. "." .. suffix
---~ end
---~ end
-
---~ local pattern = (noslashes^0 * slashes)^0 * lpeg.Cp() * ((noperiod^1 * period)^1 * lpeg.Cp() + lpeg.P(true)) * noperiod^1 * -1
-
---~ function file.nameonly(name)
---~ local a, b = lpegmatch(pattern,name)
---~ if b then
---~ return sub(name,a,b-2)
---~ elseif a then
---~ return sub(name,a)
---~ else
---~ return name
---~ end
---~ end
-
---~ local test = file.extname
---~ local test = file.basename
---~ local test = file.dirname
---~ local test = file.addsuffix
---~ local test = file.replacesuffix
---~ local test = file.nameonly
-
---~ print(1,test("./a/b/c/abd.def.xxx","!!!"))
---~ print(2,test("./../b/c/abd.def.xxx","!!!"))
---~ print(3,test("a/b/c/abd.def.xxx","!!!"))
---~ print(4,test("a/b/c/def.xxx","!!!"))
---~ print(5,test("a/b/c/def","!!!"))
---~ print(6,test("def","!!!"))
---~ print(7,test("def.xxx","!!!"))
-
---~ local tim = os.clock() for i=1,250000 do local ext = test("abd.def.xxx","!!!") end print(os.clock()-tim)
-
-- also rewrite previous
-local letter = lpeg.R("az","AZ") + lpeg.S("_-+")
-local separator = lpeg.P("://")
+local letter = R("az","AZ") + S("_-+")
+local separator = P("://")
+
+local qualified = period^0 * fwslash
+ + letter * colon
+ + letter^1 * separator
+ + letter^1 * fwslash
+local rootbased = fwslash
+ + letter * colon
-local qualified = lpeg.P(".")^0 * lpeg.P("/") + letter*lpeg.P(":") + letter^1*separator + letter^1 * lpeg.P("/")
-local rootbased = lpeg.P("/") + letter*lpeg.P(":")
+lpeg.patterns.qualified = qualified
+lpeg.patterns.rootbased = rootbased
-- ./name ../name /name c: :// name/name
function file.is_qualified_path(filename)
- return lpegmatch(qualified,filename) ~= nil
+ return filename and lpegmatch(qualified,filename) ~= nil
end
function file.is_rootbased_path(filename)
- return lpegmatch(rootbased,filename) ~= nil
-end
-
-local slash = lpeg.S("\\/")
-local period = lpeg.P(".")
-local drive = lpeg.C(lpeg.R("az","AZ")) * lpeg.P(":")
-local path = lpeg.C(((1-slash)^0 * slash)^0)
-local suffix = period * lpeg.C(lpeg.P(1-period)^0 * lpeg.P(-1))
-local base = lpeg.C((1-suffix)^0)
-
-local pattern = (drive + lpeg.Cc("")) * (path + lpeg.Cc("")) * (base + lpeg.Cc("")) * (suffix + lpeg.Cc(""))
-
-function file.splitname(str) -- returns drive, path, base, suffix
- return lpegmatch(pattern,str)
+ return filename and lpegmatch(rootbased,filename) ~= nil
end
-- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end
@@ -304,11 +530,35 @@ end
-- test { "/aa", "/aa/bb", "/aa/bb/cc", "/aa/bb/cc.dd", "/aa/bb/cc.dd.ee" }
-- test { "aa", "aa/bb", "aa/bb/cc", "aa/bb/cc.dd", "aa/bb/cc.dd.ee" }
---~ -- todo:
---~
---~ if os.type == "windows" then
---~ local currentdir = lfs.currentdir
---~ function lfs.currentdir()
---~ return (gsub(currentdir(),"\\","/"))
---~ end
---~ end
+-- -- maybe:
+--
+-- if os.type == "windows" then
+-- local currentdir = getcurrentdir
+-- function getcurrentdir()
+-- return lpegmatch(reslasher,currentdir())
+-- end
+-- end
+
+-- for myself:
+
+function file.strip(name,dir)
+ if name then
+ local b, a = match(name,"^(.-)" .. dir .. "(.*)$")
+ return a ~= "" and a or name
+ end
+end
+
+-- local debuglist = {
+-- "pathpart", "basename", "nameonly", "suffixonly", "suffix", "dirname", "extname",
+-- "addsuffix", "removesuffix", "replacesuffix", "join",
+-- "strip","collapsepath", "joinpath", "splitpath",
+-- }
+
+-- for i=1,#debuglist do
+-- local name = debuglist[i]
+-- local f = file[name]
+-- file[name] = function(...)
+-- print(name,f(...))
+-- return f(...)
+-- end
+-- end
diff --git a/lualibs-function.lua b/lualibs-function.lua
new file mode 100644
index 0000000..7ded8ce
--- /dev/null
+++ b/lualibs-function.lua
@@ -0,0 +1,11 @@
+if not modules then modules = { } end modules ['l-functions'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+functions = functions or { }
+
+function functions.dummy() end
diff --git a/lualibs-io.lua b/lualibs-io.lua
index 66e2793..06e1fb5 100644
--- a/lualibs-io.lua
+++ b/lualibs-io.lua
@@ -6,7 +6,11 @@ if not modules then modules = { } end modules ['l-io'] = {
license = "see context related readme files"
}
-local byte, find, gsub = string.byte, string.find, string.gsub
+local io = io
+local byte, find, gsub, format = string.byte, string.find, string.gsub, string.format
+local concat = table.concat
+local floor = math.floor
+local type = type
if string.find(os.getenv("PATH"),";") then
io.fileseparator, io.pathseparator = "\\", ";"
@@ -14,16 +18,53 @@ else
io.fileseparator, io.pathseparator = "/" , ":"
end
-function io.loaddata(filename,textmode)
+local function readall(f)
+ return f:read("*all")
+end
+
+-- The next one is upto 50% faster on large files and less memory consumption due
+-- to less intermediate large allocations. This phenomena was discussed on the
+-- luatex dev list.
+
+local function readall(f)
+ local size = f:seek("end")
+ if size == 0 then
+ return ""
+ elseif size < 1024*1024 then
+ f:seek("set",0)
+ return f:read('*all')
+ else
+ local done = f:seek("set",0)
+ if size < 1024*1024 then
+ step = 1024 * 1024
+ elseif size > 16*1024*1024 then
+ step = 16*1024*1024
+ else
+ step = floor(size/(1024*1024)) * 1024 * 1024 / 8
+ end
+ local data = { }
+ while true do
+ local r = f:read(step)
+ if not r then
+ return concat(data)
+ else
+ data[#data+1] = r
+ end
+ end
+ end
+end
+
+io.readall = readall
+
+function io.loaddata(filename,textmode) -- return nil if empty
local f = io.open(filename,(textmode and 'r') or 'rb')
if f then
- -- collectgarbage("step") -- sometimes makes a big difference in mem consumption
- local data = f:read('*all')
- -- garbagecollector.check(data)
+-- local data = f:read('*all')
+ local data = readall(f)
f:close()
- return data
- else
- return nil
+ if #data > 0 then
+ return data
+ end
end
end
@@ -31,25 +72,67 @@ function io.savedata(filename,data,joiner)
local f = io.open(filename,"wb")
if f then
if type(data) == "table" then
- f:write(table.join(data,joiner or ""))
+ f:write(concat(data,joiner or ""))
elseif type(data) == "function" then
data(f)
else
f:write(data or "")
end
f:close()
+ io.flush()
return true
else
return false
end
end
+-- we can also chunk this one if needed: io.lines(filename,chunksize,"*l")
+
+function io.loadlines(filename,n) -- return nil if empty
+ local f = io.open(filename,'r')
+ if not f then
+ -- no file
+ elseif n then
+ local lines = { }
+ for i=1,n do
+ local line = f:read("*lines")
+ if line then
+ lines[#lines+1] = line
+ else
+ break
+ end
+ end
+ f:close()
+ lines = concat(lines,"\n")
+ if #lines > 0 then
+ return lines
+ end
+ else
+ local line = f:read("*line") or ""
+ f:close()
+ if #line > 0 then
+ return line
+ end
+ end
+end
+
+function io.loadchunk(filename,n)
+ local f = io.open(filename,'rb')
+ if f then
+ local data = f:read(n or 1024)
+ f:close()
+ if #data > 0 then
+ return data
+ end
+ end
+end
+
function io.exists(filename)
local f = io.open(filename)
if f == nil then
return false
else
- assert(f:close())
+ f:close()
return true
end
end
@@ -60,18 +143,29 @@ function io.size(filename)
return 0
else
local s = f:seek("end")
- assert(f:close())
+ f:close()
return s
end
end
function io.noflines(f)
- local n = 0
- for _ in f:lines() do
- n = n + 1
+ if type(f) == "string" then
+ local f = io.open(filename)
+ if f then
+ local n = f and io.noflines(f) or 0
+ f:close()
+ return n
+ else
+ return 0
+ end
+ else
+ local n = 0
+ for _ in f:lines() do
+ n = n + 1
+ end
+ f:seek('set',0)
+ return n
end
- f:seek('set',0)
- return n
end
local nextchar = {
@@ -97,8 +191,6 @@ local nextchar = {
function io.characters(f,n)
if f then
return nextchar[n or 1], f
- else
- return nil, nil
end
end
@@ -107,40 +199,42 @@ local nextbyte = {
local a, b, c, d = f:read(1,1,1,1)
if d then
return byte(a), byte(b), byte(c), byte(d)
- else
- return nil, nil, nil, nil
+ end
+ end,
+ [3] = function(f)
+ local a, b, c = f:read(1,1,1)
+ if b then
+ return byte(a), byte(b), byte(c)
end
end,
[2] = function(f)
local a, b = f:read(1,1)
if b then
return byte(a), byte(b)
- else
- return nil, nil
end
end,
[1] = function (f)
local a = f:read(1)
if a then
return byte(a)
- else
- return nil
end
end,
[-2] = function (f)
local a, b = f:read(1,1)
if b then
return byte(b), byte(a)
- else
- return nil, nil
+ end
+ end,
+ [-3] = function(f)
+ local a, b, c = f:read(1,1,1)
+ if b then
+ return byte(c), byte(b), byte(a)
end
end,
[-4] = function(f)
local a, b, c, d = f:read(1,1,1,1)
if d then
return byte(d), byte(c), byte(b), byte(a)
- else
- return nil, nil, nil, nil
end
end
}
@@ -157,12 +251,13 @@ function io.ask(question,default,options)
while true do
io.write(question)
if options then
- io.write(string.format(" [%s]",table.concat(options,"|")))
+ io.write(format(" [%s]",concat(options,"|")))
end
if default then
- io.write(string.format(" [%s]",default))
+ io.write(format(" [%s]",default))
end
- io.write(string.format(" "))
+ io.write(format(" "))
+ io.flush()
local answer = io.read()
answer = gsub(answer,"^%s*(.*)%s*$","%1")
if answer == "" and default then
@@ -185,3 +280,83 @@ function io.ask(question,default,options)
end
end
end
+
+local function readnumber(f,n,m)
+ if m then
+ f:seek("set",n)
+ n = m
+ end
+ if n == 1 then
+ return byte(f:read(1))
+ elseif n == 2 then
+ local a, b = byte(f:read(2),1,2)
+ return 256 * a + b
+ elseif n == 3 then
+ local a, b, c = byte(f:read(3),1,3)
+ return 256*256 * a + 256 * b + c
+ elseif n == 4 then
+ local a, b, c, d = byte(f:read(4),1,4)
+ return 256*256*256 * a + 256*256 * b + 256 * c + d
+ elseif n == 8 then
+ local a, b = readnumber(f,4), readnumber(f,4)
+ return 256 * a + b
+ elseif n == 12 then
+ local a, b, c = readnumber(f,4), readnumber(f,4), readnumber(f,4)
+ return 256*256 * a + 256 * b + c
+ elseif n == -2 then
+ local b, a = byte(f:read(2),1,2)
+ return 256*a + b
+ elseif n == -3 then
+ local c, b, a = byte(f:read(3),1,3)
+ return 256*256 * a + 256 * b + c
+ elseif n == -4 then
+ local d, c, b, a = byte(f:read(4),1,4)
+ return 256*256*256 * a + 256*256 * b + 256*c + d
+ elseif n == -8 then
+ local h, g, f, e, d, c, b, a = byte(f:read(8),1,8)
+ return 256*256*256*256*256*256*256 * a +
+ 256*256*256*256*256*256 * b +
+ 256*256*256*256*256 * c +
+ 256*256*256*256 * d +
+ 256*256*256 * e +
+ 256*256 * f +
+ 256 * g +
+ h
+ else
+ return 0
+ end
+end
+
+io.readnumber = readnumber
+
+function io.readstring(f,n,m)
+ if m then
+ f:seek("set",n)
+ n = m
+ end
+ local str = gsub(f:read(n),"\000","")
+ return str
+end
+
+--
+
+if not io.i_limiter then function io.i_limiter() end end -- dummy so we can test safely
+if not io.o_limiter then function io.o_limiter() end end -- dummy so we can test safely
+
+-- This works quite ok:
+--
+-- function io.piped(command,writer)
+-- local pipe = io.popen(command)
+-- -- for line in pipe:lines() do
+-- -- print(line)
+-- -- end
+-- while true do
+-- local line = pipe:read(1)
+-- if not line then
+-- break
+-- elseif line ~= "\n" then
+-- writer(line)
+-- end
+-- end
+-- return pipe:close() -- ok, status, (error)code
+-- end
diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua
index b107a8e..681ef09 100644
--- a/lualibs-lpeg.lua
+++ b/lualibs-lpeg.lua
@@ -6,30 +6,153 @@ if not modules then modules = { } end modules ['l-lpeg'] = {
license = "see context related readme files"
}
-local lpeg = require("lpeg")
+-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1)
+
+-- move utf -> l-unicode
+-- move string -> l-string or keep it here
+
+lpeg = require("lpeg")
+
+-- tracing (only used when we encounter a problem in integration of lpeg in luatex)
+
+-- some code will move to unicode and string
+
+-- local lpmatch = lpeg.match
+-- local lpprint = lpeg.print
+-- local lpp = lpeg.P
+-- local lpr = lpeg.R
+-- local lps = lpeg.S
+-- local lpc = lpeg.C
+-- local lpb = lpeg.B
+-- local lpv = lpeg.V
+-- local lpcf = lpeg.Cf
+-- local lpcb = lpeg.Cb
+-- local lpcg = lpeg.Cg
+-- local lpct = lpeg.Ct
+-- local lpcs = lpeg.Cs
+-- local lpcc = lpeg.Cc
+-- local lpcmt = lpeg.Cmt
+-- local lpcarg = lpeg.Carg
+
+-- function lpeg.match(l,...) print("LPEG MATCH") lpprint(l) return lpmatch(l,...) end
+
+-- function lpeg.P (l) local p = lpp (l) print("LPEG P =") lpprint(l) return p end
+-- function lpeg.R (l) local p = lpr (l) print("LPEG R =") lpprint(l) return p end
+-- function lpeg.S (l) local p = lps (l) print("LPEG S =") lpprint(l) return p end
+-- function lpeg.C (l) local p = lpc (l) print("LPEG C =") lpprint(l) return p end
+-- function lpeg.B (l) local p = lpb (l) print("LPEG B =") lpprint(l) return p end
+-- function lpeg.V (l) local p = lpv (l) print("LPEG V =") lpprint(l) return p end
+-- function lpeg.Cf (l) local p = lpcf (l) print("LPEG Cf =") lpprint(l) return p end
+-- function lpeg.Cb (l) local p = lpcb (l) print("LPEG Cb =") lpprint(l) return p end
+-- function lpeg.Cg (l) local p = lpcg (l) print("LPEG Cg =") lpprint(l) return p end
+-- function lpeg.Ct (l) local p = lpct (l) print("LPEG Ct =") lpprint(l) return p end
+-- function lpeg.Cs (l) local p = lpcs (l) print("LPEG Cs =") lpprint(l) return p end
+-- function lpeg.Cc (l) local p = lpcc (l) print("LPEG Cc =") lpprint(l) return p end
+-- function lpeg.Cmt (l) local p = lpcmt (l) print("LPEG Cmt =") lpprint(l) return p end
+-- function lpeg.Carg (l) local p = lpcarg(l) print("LPEG Carg =") lpprint(l) return p end
+
+local type, next, tostring = type, next, tostring
+local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format
+----- mod, div = math.mod, math.div
+local floor = math.floor
+
+local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt
+local lpegtype, lpegmatch, lpegprint = lpeg.type, lpeg.match, lpeg.print
+
+-- let's start with an inspector:
+
+setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end)
+
+-- Beware, we predefine a bunch of patterns here and one reason for doing so
+-- is that we get consistent behaviour in some of the visualizers.
lpeg.patterns = lpeg.patterns or { } -- so that we can share
local patterns = lpeg.patterns
-local P, R, S, Ct, C, Cs, Cc, V = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.V
-local match = lpeg.match
+
+local anything = P(1)
+local endofstring = P(-1)
+local alwaysmatched = P(true)
+
+patterns.anything = anything
+patterns.endofstring = endofstring
+patterns.beginofstring = alwaysmatched
+patterns.alwaysmatched = alwaysmatched
local digit, sign = R('09'), S('+-')
local cr, lf, crlf = P("\r"), P("\n"), P("\r\n")
-local utf8byte = R("\128\191")
+local newline = crlf + S("\r\n") -- cr + lf
+local escaped = P("\\") * anything
+local squote = P("'")
+local dquote = P('"')
+local space = P(" ")
+
+local utfbom_32_be = P('\000\000\254\255')
+local utfbom_32_le = P('\255\254\000\000')
+local utfbom_16_be = P('\255\254')
+local utfbom_16_le = P('\254\255')
+local utfbom_8 = P('\239\187\191')
+local utfbom = utfbom_32_be + utfbom_32_le
+ + utfbom_16_be + utfbom_16_le
+ + utfbom_8
+local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le")
+ + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le")
+ + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8
+local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4)
+ + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2)
+ + utfbom_8 * Cc(3) + Cc(0)
+
+local utf8next = R("\128\191")
-patterns.utf8byte = utf8byte
patterns.utf8one = R("\000\127")
-patterns.utf8two = R("\194\223") * utf8byte
-patterns.utf8three = R("\224\239") * utf8byte * utf8byte
-patterns.utf8four = R("\240\244") * utf8byte * utf8byte * utf8byte
+patterns.utf8two = R("\194\223") * utf8next
+patterns.utf8three = R("\224\239") * utf8next * utf8next
+patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next
+patterns.utfbom = utfbom
+patterns.utftype = utftype
+patterns.utfoffset = utfoffset
+
+local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four
+local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false)
+
+local utf8character = P(1) * R("\128\191")^0 -- unchecked but fast
+
+patterns.utf8 = utf8char
+patterns.utf8char = utf8char
+patterns.utf8character = utf8character -- this one can be used in most cases so we might use that one
+patterns.validutf8 = validutf8char
+patterns.validutf8char = validutf8char
+
+local eol = S("\n\r")
+local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
+local whitespace = eol + spacer
+local nonspacer = 1 - spacer
+local nonwhitespace = 1 - whitespace
+
+patterns.eol = eol
+patterns.spacer = spacer
+patterns.whitespace = whitespace
+patterns.nonspacer = nonspacer
+patterns.nonwhitespace = nonwhitespace
+
+local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto
+
+----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0)
+local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0))
+
+patterns.stripper = stripper
+patterns.collapser = collapser
patterns.digit = digit
patterns.sign = sign
patterns.cardinal = sign^0 * digit^1
patterns.integer = sign^0 * digit^1
-patterns.float = sign^0 * digit^0 * P('.') * digit^1
+patterns.unsigned = digit^0 * P('.') * digit^1
+patterns.float = sign^0 * patterns.unsigned
+patterns.cunsigned = digit^0 * P(',') * digit^1
+patterns.cfloat = sign^0 * patterns.cunsigned
patterns.number = patterns.float + patterns.integer
+patterns.cnumber = patterns.cfloat + patterns.integer
patterns.oct = P("0") * R("07")^1
patterns.octal = patterns.oct
patterns.HEX = P("0x") * R("09","AF")^1
@@ -38,55 +161,75 @@ patterns.hexadecimal = P("0x") * R("09","AF","af")^1
patterns.lowercase = R("az")
patterns.uppercase = R("AZ")
patterns.letter = patterns.lowercase + patterns.uppercase
-patterns.space = S(" ")
-patterns.eol = S("\n\r")
-patterns.spacer = S(" \t\f\v") -- + string.char(0xc2, 0xa0) if we want utf (cf mail roberto)
-patterns.newline = crlf + cr + lf
-patterns.nonspace = 1 - patterns.space
-patterns.nonspacer = 1 - patterns.spacer
-patterns.whitespace = patterns.eol + patterns.spacer
-patterns.nonwhitespace = 1 - patterns.whitespace
-patterns.utf8 = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four
-patterns.utfbom = P('\000\000\254\255') + P('\255\254\000\000') + P('\255\254') + P('\254\255') + P('\239\187\191')
+patterns.space = space
+patterns.tab = P("\t")
+patterns.spaceortab = patterns.space + patterns.tab
+patterns.newline = newline
+patterns.emptyline = newline^1
+patterns.equal = P("=")
+patterns.comma = P(",")
+patterns.commaspacer = P(",") * spacer^0
+patterns.period = P(".")
+patterns.colon = P(":")
+patterns.semicolon = P(";")
+patterns.underscore = P("_")
+patterns.escaped = escaped
+patterns.squote = squote
+patterns.dquote = dquote
+patterns.nosquote = (escaped + (1-squote))^0
+patterns.nodquote = (escaped + (1-dquote))^0
+patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle
+patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle
+patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble
+patterns.unspacer = ((patterns.spacer^1)/"")^0
-function lpeg.anywhere(pattern) --slightly adapted from website
- return P { P(pattern) + 1 * V(1) } -- why so complex?
-end
+patterns.singlequoted = squote * patterns.nosquote * squote
+patterns.doublequoted = dquote * patterns.nodquote * dquote
+patterns.quoted = patterns.doublequoted + patterns.singlequoted
-function lpeg.splitter(pattern, action)
- return (((1-P(pattern))^1)/action+1)^0
+patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1)
+
+patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1
+patterns.beginline = #(1-newline)
+
+patterns.longtostring = Cs(whitespace^0/"" * nonwhitespace^0 * ((whitespace^0/" " * (patterns.quoted + nonwhitespace)^1)^0))
+
+local function anywhere(pattern) --slightly adapted from website
+ return P { P(pattern) + 1 * V(1) }
end
-local spacing = patterns.spacer^0 * patterns.newline -- sort of strip
-local empty = spacing * Cc("")
-local nonempty = Cs((1-spacing)^1) * spacing^-1
-local content = (empty + nonempty)^1
+lpeg.anywhere = anywhere
-local capture = Ct(content^0)
+function lpeg.instringchecker(p)
+ p = anywhere(p)
+ return function(str)
+ return lpegmatch(p,str) and true or false
+ end
+end
-function string:splitlines()
- return match(capture,self)
+function lpeg.splitter(pattern, action)
+ return (((1-P(pattern))^1)/action+1)^0
end
-patterns.textline = content
+function lpeg.tsplitter(pattern, action)
+ return Ct((((1-P(pattern))^1)/action+1)^0)
+end
---~ local p = lpeg.splitat("->",false) print(match(p,"oeps->what->more")) -- oeps what more
---~ local p = lpeg.splitat("->",true) print(match(p,"oeps->what->more")) -- oeps what->more
---~ local p = lpeg.splitat("->",false) print(match(p,"oeps")) -- oeps
---~ local p = lpeg.splitat("->",true) print(match(p,"oeps")) -- oeps
+-- probleem: separator can be lpeg and that does not hash too well, but
+-- it's quite okay as the key is then not garbage collected
-local splitters_s, splitters_m = { }, { }
+local splitters_s, splitters_m, splitters_t = { }, { }, { }
local function splitat(separator,single)
local splitter = (single and splitters_s[separator]) or splitters_m[separator]
if not splitter then
separator = P(separator)
+ local other = C((1 - separator)^0)
if single then
- local other, any = C((1 - separator)^0), P(1)
+ local any = anything
splitter = other * (separator * C(any^0) + "") -- ?
splitters_s[separator] = splitter
else
- local other = C((1 - separator)^0)
splitter = other * (separator * other)^0
splitters_m[separator] = splitter
end
@@ -94,29 +237,70 @@ local function splitat(separator,single)
return splitter
end
-lpeg.splitat = splitat
+local function tsplitat(separator)
+ local splitter = splitters_t[separator]
+ if not splitter then
+ splitter = Ct(splitat(separator))
+ splitters_t[separator] = splitter
+ end
+ return splitter
+end
+
+lpeg.splitat = splitat
+lpeg.tsplitat = tsplitat
+
+function string.splitup(str,separator)
+ if not separator then
+ separator = ","
+ end
+ return lpegmatch(splitters_m[separator] or splitat(separator),str)
+end
+
+-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more
+-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more
+-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps
+-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps
local cache = { }
function lpeg.split(separator,str)
local c = cache[separator]
if not c then
- c = Ct(splitat(separator))
+ c = tsplitat(separator)
cache[separator] = c
end
- return match(c,str)
+ return lpegmatch(c,str)
end
-function string:split(separator)
- local c = cache[separator]
- if not c then
- c = Ct(splitat(separator))
- cache[separator] = c
+function string.split(str,separator)
+ if separator then
+ local c = cache[separator]
+ if not c then
+ c = tsplitat(separator)
+ cache[separator] = c
+ end
+ return lpegmatch(c,str)
+ else
+ return { str }
end
- return match(c,self)
end
-lpeg.splitters = cache
+local spacing = patterns.spacer^0 * newline -- sort of strip
+local empty = spacing * Cc("")
+local nonempty = Cs((1-spacing)^1) * spacing^-1
+local content = (empty + nonempty)^1
+
+patterns.textline = content
+
+local linesplitter = tsplitat(newline)
+
+patterns.linesplitter = linesplitter
+
+function string.splitlines(str)
+ return lpegmatch(linesplitter,str)
+end
+
+--~ lpeg.splitters = cache -- no longer public
local cache = { }
@@ -124,42 +308,545 @@ function lpeg.checkedsplit(separator,str)
local c = cache[separator]
if not c then
separator = P(separator)
- local other = C((1 - separator)^0)
+ local other = C((1 - separator)^1)
c = Ct(separator^0 * other * (separator^1 * other)^0)
cache[separator] = c
end
- return match(c,str)
+ return lpegmatch(c,str)
end
-function string:checkedsplit(separator)
+function string.checkedsplit(str,separator)
local c = cache[separator]
if not c then
separator = P(separator)
- local other = C((1 - separator)^0)
+ local other = C((1 - separator)^1)
c = Ct(separator^0 * other * (separator^1 * other)^0)
cache[separator] = c
end
- return match(c,self)
+ return lpegmatch(c,str)
+end
+
+-- from roberto's site:
+
+local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
+local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
+local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
+
+local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4
+
+patterns.utf8byte = utf8byte
+
+--~ local str = " a b c d "
+
+--~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]")
+--~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]")
+--~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]")
+--~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]")
+
+local cache = { }
+
+function lpeg.stripper(str)
+ if type(str) == "string" then
+ local s = cache[str]
+ if not s then
+ s = Cs(((S(str)^1)/"" + 1)^0)
+ cache[str] = s
+ end
+ return s
+ else
+ return Cs(((str^1)/"" + 1)^0)
+ end
+end
+
+local cache = { }
+
+function lpeg.keeper(str)
+ if type(str) == "string" then
+ local s = cache[str]
+ if not s then
+ s = Cs((((1-S(str))^1)/"" + 1)^0)
+ cache[str] = s
+ end
+ return s
+ else
+ return Cs((((1-str)^1)/"" + 1)^0)
+ end
+end
+
+function lpeg.frontstripper(str) -- or pattern (yet undocumented)
+ return (P(str) + P(true)) * Cs(anything^0)
+end
+
+function lpeg.endstripper(str) -- or pattern (yet undocumented)
+ return Cs((1 - P(str) * endofstring)^0)
+end
+
+-- Just for fun I looked at the used bytecode and
+-- p = (p and p + pp) or pp gets one more (testset).
+
+-- todo: cache when string
+
+function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys
+ local pattern
+ local u = isutf and utf8char or 1
+ if type(one) == "table" then
+ local no = #one
+ local p = P(false)
+ if no == 0 then
+ for k, v in next, one do
+ p = p + P(k) / v
+ end
+ pattern = Cs((p + u)^0)
+ elseif no == 1 then
+ local o = one[1]
+ one, two = P(o[1]), o[2]
+ -- pattern = Cs(((1-one)^1 + one/two)^0)
+ pattern = Cs((one/two + u)^0)
+ else
+ for i=1,no do
+ local o = one[i]
+ p = p + P(o[1]) / o[2]
+ end
+ pattern = Cs((p + u)^0)
+ end
+ else
+ pattern = Cs((P(one)/(two or "") + u)^0)
+ end
+ if makefunction then
+ return function(str)
+ return lpegmatch(pattern,str)
+ end
+ else
+ return pattern
+ end
+end
+
+function lpeg.finder(lst,makefunction)
+ local pattern
+ if type(lst) == "table" then
+ pattern = P(false)
+ if #lst == 0 then
+ for k, v in next, lst do
+ pattern = pattern + P(k) -- ignore key, so we can use a replacer table
+ end
+ else
+ for i=1,#lst do
+ pattern = pattern + P(lst[i])
+ end
+ end
+ else
+ pattern = P(lst)
+ end
+ pattern = (1-pattern)^0 * pattern
+ if makefunction then
+ return function(str)
+ return lpegmatch(pattern,str)
+ end
+ else
+ return pattern
+ end
+end
+
+-- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
+-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
+-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
+
+local splitters_f, splitters_s = { }, { }
+
+function lpeg.firstofsplit(separator) -- always return value
+ local splitter = splitters_f[separator]
+ if not splitter then
+ separator = P(separator)
+ splitter = C((1 - separator)^0)
+ splitters_f[separator] = splitter
+ end
+ return splitter
+end
+
+function lpeg.secondofsplit(separator) -- nil if not split
+ local splitter = splitters_s[separator]
+ if not splitter then
+ separator = P(separator)
+ splitter = (1 - separator)^0 * separator * C(anything^0)
+ splitters_s[separator] = splitter
+ end
+ return splitter
+end
+
+function lpeg.balancer(left,right)
+ left, right = P(left), P(right)
+ return P { left * ((1 - left - right) + V(1))^0 * right }
+end
+
+-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de"))
+-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty
+-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc"))
+-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de"))
+-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty
+-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc"))
+-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc"))
+-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc"))
+
+-- -- slower:
+--
+-- function lpeg.counter(pattern)
+-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0
+-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end
+-- end
+
+local nany = utf8char/""
+
+function lpeg.counter(pattern)
+ pattern = Cs((P(pattern)/" " + nany)^0)
+ return function(str)
+ return #lpegmatch(pattern,str)
+ end
+end
+
+-- utf extensies
+
+utf = utf or (unicode and unicode.utf8) or { }
+
+local utfcharacters = utf and utf.characters or string.utfcharacters
+local utfgmatch = utf and utf.gmatch
+local utfchar = utf and utf.char
+
+lpeg.UP = lpeg.P
+
+if utfcharacters then
+
+ function lpeg.US(str)
+ local p = P(false)
+ for uc in utfcharacters(str) do
+ p = p + P(uc)
+ end
+ return p
+ end
+
+
+elseif utfgmatch then
+
+ function lpeg.US(str)
+ local p = P(false)
+ for uc in utfgmatch(str,".") do
+ p = p + P(uc)
+ end
+ return p
+ end
+
+else
+
+ function lpeg.US(str)
+ local p = P(false)
+ local f = function(uc)
+ p = p + P(uc)
+ end
+ lpegmatch((utf8char/f)^0,str)
+ return p
+ end
+
+end
+
+local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture
+
+function lpeg.UR(str,more)
+ local first, last
+ if type(str) == "number" then
+ first = str
+ last = more or first
+ else
+ first, last = lpegmatch(range,str)
+ if not last then
+ return P(str)
+ end
+ end
+ if first == last then
+ return P(str)
+ elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium
+ local p = P(false)
+ for i=first,last do
+ p = p + P(utfchar(i))
+ end
+ return p -- nil when invalid range
+ else
+ local f = function(b)
+ return b >= first and b <= last
+ end
+ -- tricky, these nested captures
+ return utf8byte / f -- nil when invalid range
+ end
end
---~ function lpeg.append(list,pp)
---~ local p = pp
---~ for l=1,#list do
---~ if p then
---~ p = p + P(list[l])
---~ else
---~ p = P(list[l])
---~ end
---~ end
---~ return p
---~ end
+-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω"))
+
+-- lpeg.print(lpeg.R("ab","cd","gh"))
+-- lpeg.print(lpeg.P("a","b","c"))
+-- lpeg.print(lpeg.S("a","b","c"))
+
+-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à")))
+-- print(lpeg.count("äáàa",lpeg.UP("áà")))
+-- print(lpeg.count("äáàa",lpeg.US("àá")))
+-- print(lpeg.count("äáàa",lpeg.UR("aá")))
+-- print(lpeg.count("äáàa",lpeg.UR("àá")))
+-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF)))
+
+function lpeg.is_lpeg(p)
+ return p and lpegtype(p) == "pattern"
+end
+
+function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order
+ if type(list) ~= "table" then
+ list = { list, ... }
+ end
+ -- table.sort(list) -- longest match first
+ local p = P(list[1])
+ for l=2,#list do
+ p = p + P(list[l])
+ end
+ return p
+end
+
+-- For the moment here, but it might move to utilities. Beware, we need to
+-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we
+-- loop back from the end cq. prepend.
+
+local sort = table.sort
+
+local function copyindexed(old)
+ local new = { }
+ for i=1,#old do
+ new[i] = old
+ end
+ return new
+end
+
+local function sortedkeys(tab)
+ local keys, s = { }, 0
+ for key,_ in next, tab do
+ s = s + 1
+ keys[s] = key
+ end
+ sort(keys)
+ return keys
+end
+
+function lpeg.append(list,pp,delayed,checked)
+ local p = pp
+ if #list > 0 then
+ local keys = copyindexed(list)
+ sort(keys)
+ for i=#keys,1,-1 do
+ local k = keys[i]
+ if p then
+ p = P(k) + p
+ else
+ p = P(k)
+ end
+ end
+ elseif delayed then -- hm, it looks like the lpeg parser resolves anyway
+ local keys = sortedkeys(list)
+ if p then
+ for i=1,#keys,1 do
+ local k = keys[i]
+ local v = list[k]
+ p = P(k)/list + p
+ end
+ else
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ p = P(k) + p
+ else
+ p = P(k)
+ end
+ end
+ if p then
+ p = p / list
+ end
+ end
+ elseif checked then
+ -- problem: substitution gives a capture
+ local keys = sortedkeys(list)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ if k == v then
+ p = P(k) + p
+ else
+ p = P(k)/v + p
+ end
+ else
+ if k == v then
+ p = P(k)
+ else
+ p = P(k)/v
+ end
+ end
+ end
+ else
+ local keys = sortedkeys(list)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = list[k]
+ if p then
+ p = P(k)/v + p
+ else
+ p = P(k)/v
+ end
+ end
+ end
+ return p
+end
+
+-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true))
+-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true))
+
+-- function lpeg.exact_match(words,case_insensitive)
+-- local pattern = concat(words)
+-- if case_insensitive then
+-- local pattern = S(upper(characters)) + S(lower(characters))
+-- local list = { }
+-- for i=1,#words do
+-- list[lower(words[i])] = true
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- return list[lower(s)] and i
+-- end)
+-- else
+-- local pattern = S(concat(words))
+-- local list = { }
+-- for i=1,#words do
+-- list[words[i]] = true
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- return list[s] and i
+-- end)
+-- end
+-- end
+
+-- experiment:
+
+local function make(t)
+ local p
+ local keys = sortedkeys(t)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = t[k]
+ if not p then
+ if next(v) then
+ p = P(k) * make(v)
+ else
+ p = P(k)
+ end
+ else
+ if next(v) then
+ p = p + P(k) * make(v)
+ else
+ p = p + P(k)
+ end
+ end
+ end
+ return p
+end
+
+function lpeg.utfchartabletopattern(list) -- goes to util-lpg
+ local tree = { }
+ for i=1,#list do
+ local t = tree
+ for c in gmatch(list[i],".") do
+ if not t[c] then
+ t[c] = { }
+ end
+ t = t[c]
+ end
+ end
+ return make(tree)
+end
+
+-- inspect ( lpeg.utfchartabletopattern {
+-- utfchar(0x00A0), -- nbsp
+-- utfchar(0x2000), -- enquad
+-- utfchar(0x2001), -- emquad
+-- utfchar(0x2002), -- enspace
+-- utfchar(0x2003), -- emspace
+-- utfchar(0x2004), -- threeperemspace
+-- utfchar(0x2005), -- fourperemspace
+-- utfchar(0x2006), -- sixperemspace
+-- utfchar(0x2007), -- figurespace
+-- utfchar(0x2008), -- punctuationspace
+-- utfchar(0x2009), -- breakablethinspace
+-- utfchar(0x200A), -- hairspace
+-- utfchar(0x200B), -- zerowidthspace
+-- utfchar(0x202F), -- narrownobreakspace
+-- utfchar(0x205F), -- math thinspace
+-- } )
+
+-- a few handy ones:
+--
+-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3
+
+patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol
+
+-- The next pattern^n variant is based on an approach suggested
+-- by Roberto: constructing a big repetition in chunks.
+--
+-- Being sparse is not needed, and only complicate matters and
+-- the number of redundant entries is not that large.
+
+local function nextstep(n,step,result)
+ local m = n % step -- mod(n,step)
+ local d = floor(n/step) -- div(n,step)
+ if d > 0 then
+ local v = V(tostring(step))
+ local s = result.start
+ for i=1,d do
+ if s then
+ s = v * s
+ else
+ s = v
+ end
+ end
+ result.start = s
+ end
+ if step > 1 and result.start then
+ local v = V(tostring(step/2))
+ result[tostring(step)] = v * v
+ end
+ if step > 0 then
+ return nextstep(m,step/2,result)
+ else
+ return result
+ end
+end
+
+function lpeg.times(pattern,n)
+ return P(nextstep(n,2^16,{ "start", ["1"] = pattern }))
+end
+
+-- local p = lpeg.Cs((1 - lpeg.times(lpeg.P("AB"),25))^1)
+-- local s = "12" .. string.rep("AB",20) .. "34" .. string.rep("AB",30) .. "56"
+-- inspect(p)
+-- print(lpeg.match(p,s))
+
+-- moved here (before util-str)
---~ from roberto's site:
+local digit = R("09")
+local period = P(".")
+local zero = P("0")
+local trailingzeros = zero^0 * -digit -- suggested by Roberto R
+local case_1 = period * trailingzeros / ""
+local case_2 = period * (digit - trailingzeros)^1 * (trailingzeros / "")
+local number = digit^1 * (case_1 + case_2)
+local stripper = Cs((number + 1)^0)
-local f1 = string.byte
+lpeg.patterns.stripzeros = stripper
-local function f2(s) local c1, c2 = f1(s,1,2) return c1 * 64 + c2 - 12416 end
-local function f3(s) local c1, c2, c3 = f1(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
-local function f4(s) local c1, c2, c3, c4 = f1(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
+-- local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100"
+-- collectgarbage("collect")
+-- str = string.rep(sample,10000)
+-- local ts = os.clock()
+-- lpegmatch(stripper,str)
+-- print(#str, os.clock()-ts, lpegmatch(stripper,sample))
-patterns.utf8byte = patterns.utf8one/f1 + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4
diff --git a/lualibs-lua.lua b/lualibs-lua.lua
new file mode 100644
index 0000000..538c65d
--- /dev/null
+++ b/lualibs-lua.lua
@@ -0,0 +1,393 @@
+if not modules then modules = { } end modules ['l-lua'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- compatibility hacks ... try to avoid usage
+
+local major, minor = string.match(_VERSION,"^[^%d]+(%d+)%.(%d+).*$")
+
+_MAJORVERSION = tonumber(major) or 5
+_MINORVERSION = tonumber(minor) or 1
+_LUAVERSION = _MAJORVERSION + _MINORVERSION/10
+
+-- lpeg
+
+if not lpeg then
+ lpeg = require("lpeg")
+end
+
+-- basics:
+
+if loadstring then
+
+ local loadnormal = load
+
+ function load(first,...)
+ if type(first) == "string" then
+ return loadstring(first,...)
+ else
+ return loadnormal(first,...)
+ end
+ end
+
+else
+
+ loadstring = load
+
+end
+
+-- table:
+
+-- At some point it was announced that i[pairs would be dropped, which makes
+-- sense. As we already used the for loop and # in most places the impact on
+-- ConTeXt was not that large; the remaining ipairs already have been replaced.
+-- Hm, actually ipairs was retained, but we no longer use it anyway (nor
+-- pairs).
+--
+-- Just in case, we provide the fallbacks as discussed in Programming
+-- in Lua (http://www.lua.org/pil/7.3.html):
+
+if not ipairs then
+
+ -- for k, v in ipairs(t) do ... end
+ -- for k=1,#t do local v = t[k] ... end
+
+ local function iterate(a,i)
+ i = i + 1
+ local v = a[i]
+ if v ~= nil then
+ return i, v --, nil
+ end
+ end
+
+ function ipairs(a)
+ return iterate, a, 0
+ end
+
+end
+
+if not pairs then
+
+ -- for k, v in pairs(t) do ... end
+ -- for k, v in next, t do ... end
+
+ function pairs(t)
+ return next, t -- , nil
+ end
+
+end
+
+-- The unpack function has been moved to the table table, and for compatiility
+-- reasons we provide both now.
+
+if not table.unpack then
+
+ table.unpack = _G.unpack
+
+elseif not unpack then
+
+ _G.unpack = table.unpack
+
+end
+
+-- package:
+
+-- if not package.seachers then
+--
+-- package.searchers = package.loaders -- 5.2
+--
+-- elseif not package.loaders then
+--
+-- package.loaders = package.searchers
+--
+-- end
+
+if not package.loaders then -- brr, searchers is a special "loadlib function" userdata type
+
+ package.loaders = package.searchers
+
+end
+
+-- moved from util-deb to here:
+
+local print, select, tostring = print, select, tostring
+
+local inspectors = { }
+
+function setinspector(inspector) -- global function
+ inspectors[#inspectors+1] = inspector
+end
+
+function inspect(...) -- global function
+ for s=1,select("#",...) do
+ local value = select(s,...)
+ local done = false
+ for i=1,#inspectors do
+ done = inspectors[i](value)
+ if done then
+ break
+ end
+ end
+ if not done then
+ print(tostring(value))
+ end
+ end
+end
+
+--
+
+local dummy = function() end
+
+function optionalrequire(...)
+ local ok, result = xpcall(require,dummy,...)
+ if ok then
+ return result
+ end
+end
+
+-- Code moved from data-lua and changed into a plug-in.
+
+-- We overload the regular loader. We do so because we operate mostly in
+-- tds and use our own loader code. Alternatively we could use a more
+-- extensive definition of package.path and package.cpath but even then
+-- we're not done. Also, we now have better tracing.
+--
+-- -- local mylib = require("libtest")
+-- -- local mysql = require("luasql.mysql")
+
+local type = type
+local gsub, format = string.gsub, string.format
+
+local package = package
+local searchers = package.searchers or package.loaders
+
+local libpaths = nil
+local clibpaths = nil
+local libhash = { }
+local clibhash = { }
+local libextras = { }
+local clibextras = { }
+
+-- dummies
+
+local filejoin = file and file.join or function(path,name) return path .. "/" .. name end
+local isreadable = file and file.is_readable or function(name) local f = io.open(name) if f then f:close() return true end end
+local addsuffix = file and file.addsuffix or function(name,suffix) return name .. "." .. suffix end
+
+--
+
+local function cleanpath(path) -- hm, don't we have a helper for this?
+ return path
+end
+
+local helpers = package.helpers or {
+ libpaths = function() return { } end,
+ clibpaths = function() return { } end,
+ cleanpath = cleanpath,
+ trace = false,
+ report = function(...) print(format(...)) end,
+}
+package.helpers = helpers
+
+local function getlibpaths()
+ return libpaths or helpers.libpaths(libhash)
+end
+
+local function getclibpaths()
+ return clibpaths or helpers.clibpaths(clibhash)
+end
+
+package.libpaths = getlibpaths
+package.clibpaths = getclibpaths
+
+local function addpath(what,paths,extras,hash,...)
+ local pathlist = { ... }
+ local cleanpath = helpers.cleanpath
+ local trace = helpers.trace
+ local report = helpers.report
+ --
+ local function add(path)
+ local path = cleanpath(path)
+ if not hash[path] then
+ if trace then
+ report("extra %s path: %s",what,path)
+ end
+ paths [#paths +1] = path
+ extras[#extras+1] = path
+ end
+ end
+ --
+ for p=1,#pathlist do
+ local path = pathlist[p]
+ if type(path) == "table" then
+ for i=1,#path do
+ add(path[i])
+ end
+ else
+ add(path)
+ end
+ end
+ return paths, extras
+end
+
+function package.extralibpath(...)
+ libpaths, libextras = addpath("lua", getlibpaths(), libextras, libhash,...)
+end
+
+function package.extraclibpath(...)
+ clibpaths, clibextras = addpath("lib",getclibpaths(),clibextras,clibhash,...)
+end
+
+-- function package.extralibpath(...)
+-- libpaths = getlibpaths()
+-- local pathlist = { ... }
+-- local cleanpath = helpers.cleanpath
+-- local trace = helpers.trace
+-- local report = helpers.report
+-- --
+-- local function add(path)
+-- local path = cleanpath(path)
+-- if not libhash[path] then
+-- if trace then
+-- report("extra lua path: %s",path)
+-- end
+-- libextras[#libextras+1] = path
+-- libpaths [#libpaths +1] = path
+-- end
+-- end
+-- --
+-- for p=1,#pathlist do
+-- local path = pathlist[p]
+-- if type(path) == "table" then
+-- for i=1,#path do
+-- add(path[i])
+-- end
+-- else
+-- add(path)
+-- end
+-- end
+-- end
+
+-- function package.extraclibpath(...)
+-- clibpaths = getclibpaths()
+-- local pathlist = { ... }
+-- local cleanpath = helpers.cleanpath
+-- local trace = helpers.trace
+-- local report = helpers.report
+-- --
+-- local function add(path)
+-- local path = cleanpath(path)
+-- if not clibhash[path] then
+-- if trace then
+-- report("extra lib path: %s",path)
+-- end
+-- clibextras[#clibextras+1] = path
+-- clibpaths [#clibpaths +1] = path
+-- end
+-- end
+-- --
+-- for p=1,#pathlist do
+-- local path = pathlist[p]
+-- if type(path) == "table" then
+-- for i=1,#path do
+-- add(path[i])
+-- end
+-- else
+-- add(path)
+-- end
+-- end
+-- end
+
+if not searchers[-2] then
+ -- use package-path and package-cpath
+ searchers[-2] = searchers[2]
+end
+
+searchers[2] = function(name)
+ return helpers.loaded(name)
+end
+
+searchers[3] = nil -- get rid of the built in one
+
+local function loadedaslib(resolved,rawname)
+ -- local init = "luaopen_" .. string.match(rawname,".-([^%.]+)$")
+ local init = "luaopen_"..gsub(rawname,"%.","_")
+ if helpers.trace then
+ helpers.report("calling loadlib with '%s' with init '%s'",resolved,init)
+ end
+ return package.loadlib(resolved,init)
+end
+
+local function loadedbylua(name)
+ if helpers.trace then
+ helpers.report("locating '%s' using normal loader",name)
+ end
+ return true, searchers[-2](name) -- the original
+end
+
+local function loadedbypath(name,rawname,paths,islib,what)
+ local trace = helpers.trace
+ local report = helpers.report
+ if trace then
+ report("locating '%s' as '%s' on '%s' paths",rawname,name,what)
+ end
+ for p=1,#paths do
+ local path = paths[p]
+ local resolved = filejoin(path,name)
+ if trace then -- mode detail
+ report("checking for '%s' using '%s' path '%s'",name,what,path)
+ end
+ if isreadable(resolved) then
+ if trace then
+ report("lib '%s' located on '%s'",name,resolved)
+ end
+ if islib then
+ return true, loadedaslib(resolved,rawname)
+ else
+ return true, loadfile(resolved)
+ end
+ end
+ end
+end
+
+local function notloaded(name)
+ if helpers.trace then
+ helpers.report("? unable to locate library '%s'",name)
+ end
+end
+
+helpers.loadedaslib = loadedaslib
+helpers.loadedbylua = loadedbylua
+helpers.loadedbypath = loadedbypath
+helpers.notloaded = notloaded
+
+-- alternatively we could set the package.searchers
+
+function helpers.loaded(name)
+ local thename = gsub(name,"%.","/")
+ local luaname = addsuffix(thename,"lua")
+ local libname = addsuffix(thename,os.libsuffix or "so") -- brrr
+ local libpaths = getlibpaths()
+ local clibpaths = getclibpaths()
+ local done, result = loadedbypath(luaname,name,libpaths,false,"lua")
+ if done then
+ return result
+ end
+ local done, result = loadedbypath(luaname,name,clibpaths,false,"lua")
+ if done then
+ return result
+ end
+ local done, result = loadedbypath(libname,name,clibpaths,true,"lib")
+ if done then
+ return result
+ end
+ local done, result = loadedbylua(name)
+ if done then
+ return result
+ end
+ return notloaded(name)
+end
diff --git a/lualibs-math.lua b/lualibs-math.lua
index fc8db47..43f60b5 100644
--- a/lualibs-math.lua
+++ b/lualibs-math.lua
@@ -9,33 +9,26 @@ if not modules then modules = { } end modules ['l-math'] = {
local floor, sin, cos, tan = math.floor, math.sin, math.cos, math.tan
if not math.round then
- function math.round(x)
- return floor(x + 0.5)
- end
+ function math.round(x) return floor(x + 0.5) end
end
if not math.div then
- function math.div(n,m)
- return floor(n/m)
- end
+ function math.div(n,m) return floor(n/m) end
end
if not math.mod then
- function math.mod(n,m)
- return n % m
- end
+ function math.mod(n,m) return n % m end
end
local pipi = 2*math.pi/360
-function math.sind(d)
- return sin(d*pipi)
+if not math.sind then
+ function math.sind(d) return sin(d*pipi) end
+ function math.cosd(d) return cos(d*pipi) end
+ function math.tand(d) return tan(d*pipi) end
end
-function math.cosd(d)
- return cos(d*pipi)
-end
-
-function math.tand(d)
- return tan(d*pipi)
+if not math.odd then
+ function math.odd (n) return n % 2 ~= 0 end
+ function math.even(n) return n % 2 == 0 end
end
diff --git a/lualibs-md5.lua b/lualibs-md5.lua
index 27955ef..8ac20a5 100644
--- a/lualibs-md5.lua
+++ b/lualibs-md5.lua
@@ -7,40 +7,85 @@ if not modules then modules = { } end modules ['l-md5'] = {
-- This also provides file checksums and checkers.
+if not md5 then
+ md5 = optionalrequire("md5")
+end
+
+if not md5 then
+ md5 = {
+ sum = function(str) print("error: md5 is not loaded (sum ignored)") return str end,
+ sumhexa = function(str) print("error: md5 is not loaded (sumhexa ignored)") return str end,
+ }
+end
+
+local md5, file = md5, file
local gsub, format, byte = string.gsub, string.format, string.byte
+local md5sum = md5.sum
local function convert(str,fmt)
- return (gsub(md5.sum(str),".",function(chr) return format(fmt,byte(chr)) end))
+ return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end))
end
if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end
if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end
if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end
---~ if not md5.HEX then
---~ local function remap(chr) return format("%02X",byte(chr)) end
---~ function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end
---~ end
---~ if not md5.hex then
---~ local function remap(chr) return format("%02x",byte(chr)) end
---~ function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end
---~ end
---~ if not md5.dec then
---~ local function remap(chr) return format("%03i",byte(chr)) end
---~ function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end
---~ end
+-- local P, Cs, lpegmatch = lpeg.P, lpeg.Cs,lpeg.match
+--
+-- if not md5.HEX then
+-- local function remap(chr) return format("%02X",byte(chr)) end
+-- function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end
+-- end
+--
+-- if not md5.hex then
+-- local function remap(chr) return format("%02x",byte(chr)) end
+-- function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end
+-- end
+--
+-- if not md5.dec then
+-- local function remap(chr) return format("%03i",byte(chr)) end
+-- function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end
+-- end
-file.needs_updating_threshold = 1
+-- if not md5.HEX then
+-- local pattern_HEX = Cs( ( P(1) / function(chr) return format("%02X",byte(chr)) end)^0 )
+-- function md5.HEX(str) return lpegmatch(pattern_HEX,md5.sum(str)) end
+-- end
+--
+-- if not md5.hex then
+-- local pattern_hex = Cs( ( P(1) / function(chr) return format("%02x",byte(chr)) end)^0 )
+-- function md5.hex(str) return lpegmatch(pattern_hex,md5.sum(str)) end
+-- end
+--
+-- if not md5.dec then
+-- local pattern_dec = Cs( ( P(1) / function(chr) return format("%02i",byte(chr)) end)^0 )
+-- function md5.dec(str) return lpegmatch(pattern_dec,md5.sum(str)) end
+-- end
-function file.needs_updating(oldname,newname) -- size modification access change
- local oldtime = lfs.attributes(oldname, modification)
- local newtime = lfs.attributes(newname, modification)
- if newtime >= oldtime then
- return false
- elseif oldtime - newtime < file.needs_updating_threshold then
- return false
+function file.needsupdating(oldname,newname,threshold) -- size modification access change
+ local oldtime = lfs.attributes(oldname,"modification")
+ if oldtime then
+ local newtime = lfs.attributes(newname,"modification")
+ if not newtime then
+ return true -- no new file, so no updating needed
+ elseif newtime >= oldtime then
+ return false -- new file definitely needs updating
+ elseif oldtime - newtime < (threshold or 1) then
+ return false -- new file is probably still okay
+ else
+ return true -- new file has to be updated
+ end
else
- return true
+ return false -- no old file, so no updating needed
+ end
+end
+
+file.needs_updating = file.needsupdating
+
+function file.syncmtimes(oldname,newname)
+ local oldtime = lfs.attributes(oldname,"modification")
+ if oldtime and lfs.isfile(newname) then
+ lfs.touch(newname,oldtime,oldtime)
end
end
@@ -62,7 +107,7 @@ function file.loadchecksum(name)
return nil
end
-function file.savechecksum(name, checksum)
+function file.savechecksum(name,checksum)
if not checksum then checksum = file.checksum(name) end
if checksum then
io.savedata(name .. ".md5",checksum)
diff --git a/lualibs-number.lua b/lualibs-number.lua
index a1249f0..001ca31 100644
--- a/lualibs-number.lua
+++ b/lualibs-number.lua
@@ -6,16 +6,140 @@ if not modules then modules = { } end modules ['l-number'] = {
license = "see context related readme files"
}
-local tostring = tostring
-local format, floor, insert, match = string.format, math.floor, table.insert, string.match
+-- this module will be replaced when we have the bit library .. the number based sets
+-- might go away
+
+local tostring, tonumber = tostring, tonumber
+local format, floor, match, rep = string.format, math.floor, string.match, string.rep
+local concat, insert = table.concat, table.insert
local lpegmatch = lpeg.match
-number = number or { }
+number = number or { }
+local number = number
--- a,b,c,d,e,f = number.toset(100101)
+if bit32 then -- I wonder if this is faster
-function number.toset(n)
- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)")
+ local btest, bor = bit32.btest, bit32.bor
+
+ function number.bit(p)
+ return 2 ^ (p - 1) -- 1-based indexing
+ end
+
+ number.hasbit = btest
+ number.setbit = bor
+
+ function number.setbit(x,p) -- why not bor?
+ return btest(x,p) and x or x + p
+ end
+
+ function number.clearbit(x,p)
+ return btest(x,p) and x - p or x
+ end
+
+else
+
+ -- http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html
+
+ function number.bit(p)
+ return 2 ^ (p - 1) -- 1-based indexing
+ end
+
+ function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ...
+ return x % (p + p) >= p
+ end
+
+ function number.setbit(x, p)
+ return (x % (p + p) >= p) and x or x + p
+ end
+
+ function number.clearbit(x, p)
+ return (x % (p + p) >= p) and x - p or x
+ end
+
+end
+
+-- print(number.tobitstring(8))
+-- print(number.tobitstring(14))
+-- print(number.tobitstring(66))
+-- print(number.tobitstring(0x00))
+-- print(number.tobitstring(0xFF))
+-- print(number.tobitstring(46260767936,4))
+
+if bit32 then
+
+ local bextract = bit32.extract
+
+ local t = {
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ "0", "0", "0", "0", "0", "0", "0", "0",
+ }
+
+ function number.tobitstring(b,m)
+ -- if really needed we can speed this one up
+ -- because small numbers need less extraction
+ local n = 32
+ for i=0,31 do
+ local v = bextract(b,i)
+ local k = 32 - i
+ if v == 1 then
+ n = k
+ t[k] = "1"
+ else
+ t[k] = "0"
+ end
+ end
+ if m then
+ m = 33 - m * 8
+ if m < 1 then
+ m = 1
+ end
+ return concat(t,"",m)
+ elseif n < 8 then
+ return concat(t)
+ elseif n < 16 then
+ return concat(t,"",9)
+ elseif n < 24 then
+ return concat(t,"",17)
+ else
+ return concat(t,"",25)
+ end
+ end
+
+else
+
+ function number.tobitstring(n,m)
+ if n > 0 then
+ local t = { }
+ while n > 0 do
+ insert(t,1,n % 2 > 0 and 1 or 0)
+ n = floor(n/2)
+ end
+ local nn = 8 - #t % 8
+ if nn > 0 and nn < 8 then
+ for i=1,nn do
+ insert(t,1,0)
+ end
+ end
+ if m then
+ m = m * 8 - #t
+ if m > 0 then
+ insert(t,1,rep("0",m))
+ end
+ end
+ return concat(t)
+ elseif m then
+ rep("00000000",m)
+ else
+ return "00000000"
+ end
+ end
+
+end
+
+function number.valid(str,default)
+ return tonumber(str) or default or nil
end
function number.toevenhex(n)
@@ -27,32 +151,57 @@ function number.toevenhex(n)
end
end
--- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5%
--- on
+-- a,b,c,d,e,f = number.toset(100101)
+--
+-- function number.toset(n)
+-- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)")
+-- end
+--
+-- -- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5%
+-- -- on
--
-- for i=1,1000000 do
-- local a,b,c,d,e,f,g,h = number.toset(12345678)
-- local a,b,c,d = number.toset(1234)
-- local a,b,c = number.toset(123)
+-- local a,b,c = number.toset("123")
-- end
---
--- of course dedicated "(.)(.)(.)(.)" matches are even faster
-local one = lpeg.C(1-lpeg.S(''))^1
+local one = lpeg.C(1-lpeg.S('')/tonumber)^1
function number.toset(n)
return lpegmatch(one,tostring(n))
end
-function number.bits(n,zero)
- local t, i = { }, (zero and 0) or 1
- while n > 0 do
+-- function number.bits(n,zero)
+-- local t, i = { }, (zero and 0) or 1
+-- while n > 0 do
+-- local m = n % 2
+-- if m > 0 then
+-- insert(t,1,i)
+-- end
+-- n = floor(n/2)
+-- i = i + 1
+-- end
+-- return t
+-- end
+--
+-- -- a bit faster
+
+local function bits(n,i,...)
+ if n > 0 then
local m = n % 2
+ local n = floor(n/2)
if m > 0 then
- insert(t,1,i)
+ return bits(n, i+1, i, ...)
+ else
+ return bits(n, i+1, ...)
end
- n = floor(n/2)
- i = i + 1
+ else
+ return ...
end
- return t
+end
+
+function number.bits(n)
+ return { bits(n,1) }
end
diff --git a/lualibs-os.lua b/lualibs-os.lua
index fba2cd3..42f3e48 100644
--- a/lualibs-os.lua
+++ b/lualibs-os.lua
@@ -6,32 +6,130 @@ if not modules then modules = { } end modules ['l-os'] = {
license = "see context related readme files"
}
+-- This file deals with some operating system issues. Please don't bother me
+-- with the pros and cons of operating systems as they all have their flaws
+-- and benefits. Bashing one of them won't help solving problems and fixing
+-- bugs faster and is a waste of time and energy.
+--
+-- path separators: / or \ ... we can use / everywhere
+-- suffixes : dll so exe <none> ... no big deal
+-- quotes : we can use "" in most cases
+-- expansion : unless "" are used * might give side effects
+-- piping/threads : somewhat different for each os
+-- locations : specific user file locations and settings can change over time
+--
+-- os.type : windows | unix (new, we already guessed os.platform)
+-- os.name : windows | msdos | linux | macosx | solaris | .. | generic (new)
+-- os.platform : extended os.name with architecture
+
+-- os.sleep() => socket.sleep()
+-- math.randomseed(tonumber(string.sub(string.reverse(tostring(math.floor(socket.gettime()*10000))),1,6)))
+
-- maybe build io.flush in os.execute
-local find, format, gsub = string.find, string.format, string.gsub
-local random, ceil = math.random, math.ceil
+local os = os
+local date, time = os.date, os.time
+local find, format, gsub, upper, gmatch = string.find, string.format, string.gsub, string.upper, string.gmatch
+local concat = table.concat
+local random, ceil, randomseed = math.random, math.ceil, math.randomseed
+local rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring = rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring
+
+-- The following code permits traversing the environment table, at least
+-- in luatex. Internally all environment names are uppercase.
+
+-- The randomseed in Lua is not that random, although this depends on the operating system as well
+-- as the binary (Luatex is normally okay). But to be sure we set the seed anyway.
+
+math.initialseed = tonumber(string.sub(string.reverse(tostring(ceil(socket and socket.gettime()*10000 or time()))),1,6))
+
+randomseed(math.initialseed)
+
+if not os.__getenv__ then
+
+ os.__getenv__ = os.getenv
+ os.__setenv__ = os.setenv
+
+ if os.env then
+
+ local osgetenv = os.getenv
+ local ossetenv = os.setenv
+ local osenv = os.env local _ = osenv.PATH -- initialize the table
+
+ function os.setenv(k,v)
+ if v == nil then
+ v = ""
+ end
+ local K = upper(k)
+ osenv[K] = v
+ if type(v) == "table" then
+ v = concat(v,";") -- path
+ end
+ ossetenv(K,v)
+ end
+
+ function os.getenv(k)
+ local K = upper(k)
+ local v = osenv[K] or osenv[k] or osgetenv(K) or osgetenv(k)
+ if v == "" then
+ return nil
+ else
+ return v
+ end
+ end
+
+ else
+
+ local ossetenv = os.setenv
+ local osgetenv = os.getenv
+ local osenv = { }
+
+ function os.setenv(k,v)
+ if v == nil then
+ v = ""
+ end
+ local K = upper(k)
+ osenv[K] = v
+ end
+
+ function os.getenv(k)
+ local K = upper(k)
+ local v = osenv[K] or osgetenv(K) or osgetenv(k)
+ if v == "" then
+ return nil
+ else
+ return v
+ end
+ end
+
+ local function __index(t,k)
+ return os.getenv(k)
+ end
+ local function __newindex(t,k,v)
+ os.setenv(k,v)
+ end
+
+ os.env = { }
+
+ setmetatable(os.env, { __index = __index, __newindex = __newindex } )
+
+ end
-local execute, spawn, exec, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.flush
+end
+
+-- end of environment hack
+
+local execute, spawn, exec, iopopen, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.popen, io.flush
function os.execute(...) ioflush() return execute(...) end
function os.spawn (...) ioflush() return spawn (...) end
function os.exec (...) ioflush() return exec (...) end
+function io.popen (...) ioflush() return iopopen(...) end
function os.resultof(command)
- ioflush() -- else messed up logging
local handle = io.popen(command,"r")
- if not handle then
- -- print("unknown command '".. command .. "' in os.resultof")
- return ""
- else
- return handle:read("*all") or ""
- end
+ return handle and handle:read("*all") or ""
end
---~ os.type : windows | unix (new, we already guessed os.platform)
---~ os.name : windows | msdos | linux | macosx | solaris | .. | generic (new)
---~ os.platform : extended os.name with architecture
-
if not io.fileseparator then
if find(os.getenv("PATH"),";") then
io.fileseparator, io.pathseparator, os.type = "\\", ";", os.type or "mswin"
@@ -44,20 +142,22 @@ os.type = os.type or (io.pathseparator == ";" and "windows") or "unix"
os.name = os.name or (os.type == "windows" and "mswin" ) or "linux"
if os.type == "windows" then
- os.libsuffix, os.binsuffix = 'dll', 'exe'
+ os.libsuffix, os.binsuffix, os.binsuffixes = 'dll', 'exe', { 'exe', 'cmd', 'bat' }
else
- os.libsuffix, os.binsuffix = 'so', ''
+ os.libsuffix, os.binsuffix, os.binsuffixes = 'so', '', { '' }
end
+local launchers = {
+ windows = "start %s",
+ macosx = "open %s",
+ unix = "$BROWSER %s &> /dev/null &",
+}
+
function os.launch(str)
- if os.type == "windows" then
- os.execute("start " .. str) -- os.spawn ?
- else
- os.execute(str .. " &") -- os.spawn ?
- end
+ os.execute(format(launchers[os.name] or launchers.unix,str))
end
-if not os.times then
+if not os.times then -- ?
-- utime = user time
-- stime = system time
-- cutime = children user time
@@ -89,36 +189,14 @@ end
-- no need for function anymore as we have more clever code and helpers now
-- this metatable trickery might as well disappear
-os.resolvers = os.resolvers or { }
+os.resolvers = os.resolvers or { } -- will become private
local resolvers = os.resolvers
-local osmt = getmetatable(os) or { __index = function(t,k) t[k] = "unset" return "unset" end } -- maybe nil
-local osix = osmt.__index
-
-osmt.__index = function(t,k)
- return (resolvers[k] or osix)(t,k)
-end
-
-setmetatable(os,osmt)
-
-if not os.setenv then
-
- -- we still store them but they won't be seen in
- -- child processes although we might pass them some day
- -- using command concatination
-
- local env, getenv = { }, os.getenv
-
- function os.setenv(k,v)
- env[k] = v
- end
-
- function os.getenv(k)
- return env[k] or getenv(k)
- end
-
-end
+setmetatable(os, { __index = function(t,k)
+ local r = resolvers[k]
+ return r and r(t,k) or nil -- no memoize
+end })
-- we can use HOSTTYPE on some platforms
@@ -159,7 +237,7 @@ elseif os.type == "windows" then
elseif name == "linux" then
function os.resolvers.platform(t,k)
- -- we sometims have HOSTTYPE set so let's check that first
+ -- we sometimes have HOSTTYPE set so let's check that first
local platform, architecture = "", os.getenv("HOSTTYPE") or os.resultof("uname -m") or ""
if find(architecture,"x86_64") then
platform = "linux-64"
@@ -237,10 +315,10 @@ elseif name == "freebsd" then
elseif name == "kfreebsd" then
function os.resolvers.platform(t,k)
- -- we sometims have HOSTTYPE set so let's check that first
+ -- we sometimes have HOSTTYPE set so let's check that first
local platform, architecture = "", os.getenv("HOSTTYPE") or os.resultof("uname -m") or ""
if find(architecture,"x86_64") then
- platform = "kfreebsd-64"
+ platform = "kfreebsd-amd64"
else
platform = "kfreebsd-i386"
end
@@ -288,7 +366,7 @@ end
local d
function os.timezone(delta)
- d = d or tonumber(tonumber(os.date("%H")-os.date("!%H")))
+ d = d or tonumber(tonumber(date("%H")-date("!%H")))
if delta then
if d > 0 then
return format("+%02i:00",d)
@@ -299,3 +377,86 @@ function os.timezone(delta)
return 1
end
end
+
+local timeformat = format("%%s%s",os.timezone(true))
+local dateformat = "!%Y-%m-%d %H:%M:%S"
+
+function os.fulltime(t,default)
+ t = tonumber(t) or 0
+ if t > 0 then
+ -- valid time
+ elseif default then
+ return default
+ else
+ t = nil
+ end
+ return format(timeformat,date(dateformat,t))
+end
+
+local dateformat = "%Y-%m-%d %H:%M:%S"
+
+function os.localtime(t,default)
+ t = tonumber(t) or 0
+ if t > 0 then
+ -- valid time
+ elseif default then
+ return default
+ else
+ t = nil
+ end
+ return date(dateformat,t)
+end
+
+function os.converttime(t,default)
+ local t = tonumber(t)
+ if t and t > 0 then
+ return date(dateformat,t)
+ else
+ return default or "-"
+ end
+end
+
+local memory = { }
+
+local function which(filename)
+ local fullname = memory[filename]
+ if fullname == nil then
+ local suffix = file.suffix(filename)
+ local suffixes = suffix == "" and os.binsuffixes or { suffix }
+ for directory in gmatch(os.getenv("PATH"),"[^" .. io.pathseparator .."]+") do
+ local df = file.join(directory,filename)
+ for i=1,#suffixes do
+ local dfs = file.addsuffix(df,suffixes[i])
+ if io.exists(dfs) then
+ fullname = dfs
+ break
+ end
+ end
+ end
+ if not fullname then
+ fullname = false
+ end
+ memory[filename] = fullname
+ end
+ return fullname
+end
+
+os.which = which
+os.where = which
+
+function os.today()
+ return date("!*t") -- table with values
+end
+
+function os.now()
+ return date("!%Y-%m-%d %H:%M:%S") -- 2011-12-04 14:59:12
+end
+
+if not os.sleep and socket then
+ os.sleep = socket.sleep
+end
+
+-- print(os.which("inkscape.exe"))
+-- print(os.which("inkscape"))
+-- print(os.which("gs.exe"))
+-- print(os.which("ps2pdf"))
diff --git a/lualibs-set.lua b/lualibs-set.lua
index f844d0b..2370f01 100644
--- a/lualibs-set.lua
+++ b/lualibs-set.lua
@@ -6,6 +6,8 @@ if not modules then modules = { } end modules ['l-set'] = {
license = "see context related readme files"
}
+-- This will become obsolete when we have the bitset library embedded.
+
set = set or { }
local nums = { }
@@ -49,10 +51,11 @@ function set.tolist(n)
if n == 0 or not tabs[n] then
return ""
else
- local t = { }
+ local t, n = { }, 0
for k, v in next, tabs[n] do
if v then
- t[#t+1] = k
+ n = n + 1
+ t[n] = k
end
end
return concat(t," ")
diff --git a/lualibs-string.lua b/lualibs-string.lua
index 9856d52..77c076c 100644
--- a/lualibs-string.lua
+++ b/lualibs-string.lua
@@ -6,283 +6,200 @@ if not modules then modules = { } end modules ['l-string'] = {
license = "see context related readme files"
}
-local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
-local lpegmatch = lpeg.match
-
--- some functions may disappear as they are not used anywhere
-
-if not string.split then
-
- -- this will be overloaded by a faster lpeg variant
-
- function string:split(pattern)
- if #self > 0 then
- local t = { }
- for s in gmatch(self..pattern,"(.-)"..pattern) do
- t[#t+1] = s
- end
- return t
- else
- return { }
- end
- end
-
-end
-
-local chr_to_esc = {
- ["%"] = "%%",
- ["."] = "%.",
- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
- ["^"] = "%^", ["$"] = "%$",
- ["["] = "%[", ["]"] = "%]",
- ["("] = "%(", [")"] = "%)",
- ["{"] = "%{", ["}"] = "%}"
-}
-
-string.chr_to_esc = chr_to_esc
-
-function string:esc() -- variant 2
- return (gsub(self,"(.)",chr_to_esc))
-end
-
-function string:unquote()
- return (gsub(self,"^([\"\'])(.*)%1$","%2"))
-end
-
---~ function string:unquote()
---~ if find(self,"^[\'\"]") then
---~ return sub(self,2,-2)
---~ else
---~ return self
---~ end
---~ end
-
-function string:quote() -- we could use format("%q")
- return format("%q",self)
-end
-
-function string:count(pattern) -- variant 3
+local string = string
+local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs
+
+-- Some functions are already defined in l-lpeg and maybe some from here will
+-- move there (unless we also expose caches).
+
+-- if not string.split then
+--
+-- function string.split(str,pattern)
+-- local t = { }
+-- if #str > 0 then
+-- local n = 1
+-- for s in gmatch(str..pattern,"(.-)"..pattern) do
+-- t[n] = s
+-- n = n + 1
+-- end
+-- end
+-- return t
+-- end
+--
+-- end
+
+-- function string.unquoted(str)
+-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern
+-- end
+
+local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote
+ + patterns.dquote * C(patterns.nodquote) * patterns.dquote
+
+function string.unquoted(str)
+ return lpegmatch(unquoted,str) or str
+end
+
+-- print(string.unquoted("test"))
+-- print(string.unquoted([["t\"est"]]))
+-- print(string.unquoted([["t\"est"x]]))
+-- print(string.unquoted("\'test\'"))
+-- print(string.unquoted('"test"'))
+-- print(string.unquoted('"test"'))
+
+function string.quoted(str)
+ return format("%q",str) -- always double quote
+end
+
+function string.count(str,pattern) -- variant 3
local n = 0
- for _ in gmatch(self,pattern) do
+ for _ in gmatch(str,pattern) do -- not for utf
n = n + 1
end
return n
end
-function string:limit(n,sentinel)
- if #self > n then
- sentinel = sentinel or " ..."
- return sub(self,1,(n-#sentinel)) .. sentinel
+function string.limit(str,n,sentinel) -- not utf proof
+ if #str > n then
+ sentinel = sentinel or "..."
+ return sub(str,1,(n-#sentinel)) .. sentinel
else
- return self
+ return str
end
end
---~ function string:strip() -- the .- is quite efficient
---~ -- return match(self,"^%s*(.-)%s*$") or ""
---~ -- return match(self,'^%s*(.*%S)') or '' -- posted on lua list
---~ return find(s,'^%s*$') and '' or match(s,'^%s*(.*%S)')
---~ end
-
-do -- roberto's variant:
- local space = lpeg.S(" \t\v\n")
- local nospace = 1 - space
- local stripper = space^0 * lpeg.C((space^0 * nospace^1)^0)
- function string.strip(str)
- return lpegmatch(stripper,str) or ""
- end
-end
-
-function string:is_empty()
- return not find(self,"%S")
-end
+local stripper = patterns.stripper
+local collapser = patterns.collapser
+local longtostring = patterns.longtostring
-function string:enhance(pattern,action)
- local ok, n = true, 0
- while ok do
- ok = false
- self = gsub(self,pattern, function(...)
- ok, n = true, n + 1
- return action(...)
- end)
- end
- return self, n
+function string.strip(str)
+ return lpegmatch(stripper,str) or ""
end
-local chr_to_hex, hex_to_chr = { }, { }
-
-for i=0,255 do
- local c, h = char(i), format("%02X",i)
- chr_to_hex[c], hex_to_chr[h] = h, c
+function string.collapsespaces(str)
+ return lpegmatch(collapser,str) or ""
end
-function string:to_hex()
- return (gsub(self or "","(.)",chr_to_hex))
+function string.longtostring(str)
+ return lpegmatch(longtostring,str) or ""
end
-function string:from_hex()
- return (gsub(self or "","(..)",hex_to_chr))
-end
+-- function string.is_empty(str)
+-- return not find(str,"%S")
+-- end
-if not string.characters then
+local pattern = P(" ")^0 * P(-1)
- local function nextchar(str, index)
- index = index + 1
- return (index <= #str) and index or nil, sub(str,index,index)
- end
- function string:characters()
- return nextchar, self, 0
- end
- local function nextbyte(str, index)
- index = index + 1
- return (index <= #str) and index or nil, byte(sub(str,index,index))
- end
- function string:bytes()
- return nextbyte, self, 0
- end
-
-end
-
--- we can use format for this (neg n)
-
-function string:rpadd(n,chr)
- local m = n-#self
- if m > 0 then
- return self .. rep(chr or " ",m)
+function string.is_empty(str)
+ if str == "" then
+ return true
else
- return self
- end
-end
-
-function string:lpadd(n,chr)
- local m = n-#self
- if m > 0 then
- return rep(chr or " ",m) .. self
+ return lpegmatch(pattern,str) and true or false
+ end
+end
+
+-- if not string.escapedpattern then
+--
+-- local patterns_escapes = {
+-- ["%"] = "%%",
+-- ["."] = "%.",
+-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
+-- ["["] = "%[", ["]"] = "%]",
+-- ["("] = "%(", [")"] = "%)",
+-- -- ["{"] = "%{", ["}"] = "%}"
+-- -- ["^"] = "%^", ["$"] = "%$",
+-- }
+--
+-- local simple_escapes = {
+-- ["-"] = "%-",
+-- ["."] = "%.",
+-- ["?"] = ".",
+-- ["*"] = ".*",
+-- }
+--
+-- function string.escapedpattern(str,simple)
+-- return (gsub(str,".",simple and simple_escapes or patterns_escapes))
+-- end
+--
+-- function string.topattern(str,lowercase,strict)
+-- if str == "" then
+-- return ".*"
+-- else
+-- str = gsub(str,".",simple_escapes)
+-- if lowercase then
+-- str = lower(str)
+-- end
+-- if strict then
+-- return "^" .. str .. "$"
+-- else
+-- return str
+-- end
+-- end
+-- end
+--
+-- end
+
+--- needs checking
+
+local anything = patterns.anything
+local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ?
+local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ?
+local matchescapes = Cc(".") * S("*?") -- wildcard and single match
+
+local pattern_a = Cs ( ( allescapes + anything )^0 )
+local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 )
+local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") )
+
+function string.escapedpattern(str,simple)
+ return lpegmatch(simple and pattern_b or pattern_a,str)
+end
+
+function string.topattern(str,lowercase,strict)
+ if str=="" or type(str) ~= "string" then
+ return ".*"
+ elseif strict then
+ str = lpegmatch(pattern_c,str)
else
- return self
+ str = lpegmatch(pattern_b,str)
end
-end
-
-string.padd = string.rpadd
-
-function is_number(str) -- tonumber
- return find(str,"^[%-%+]?[%d]-%.?[%d+]$") == 1
-end
-
---~ print(is_number("1"))
---~ print(is_number("1.1"))
---~ print(is_number(".1"))
---~ print(is_number("-0.1"))
---~ print(is_number("+0.1"))
---~ print(is_number("-.1"))
---~ print(is_number("+.1"))
-
-function string:split_settings() -- no {} handling, see l-aux for lpeg variant
- if find(self,"=") then
- local t = { }
- for k,v in gmatch(self,"(%a+)=([^%,]*)") do
- t[k] = v
- end
- return t
+ if lowercase then
+ return lower(str)
else
- return nil
+ return str
end
end
-local patterns_escapes = {
- ["-"] = "%-",
- ["."] = "%.",
- ["+"] = "%+",
- ["*"] = "%*",
- ["%"] = "%%",
- ["("] = "%)",
- [")"] = "%)",
- ["["] = "%[",
- ["]"] = "%]",
-}
+-- print(string.escapedpattern("12+34*.tex",false))
+-- print(string.escapedpattern("12+34*.tex",true))
+-- print(string.topattern ("12+34*.tex",false,false))
+-- print(string.topattern ("12+34*.tex",false,true))
-function string:pattesc()
- return (gsub(self,".",patterns_escapes))
+function string.valid(str,default)
+ return (type(str) == "string" and str ~= "" and str) or default or nil
end
-local simple_escapes = {
- ["-"] = "%-",
- ["."] = "%.",
- ["?"] = ".",
- ["*"] = ".*",
-}
+-- handy fallback
-function string:simpleesc()
- return (gsub(self,".",simple_escapes))
-end
+string.itself = function(s) return s end
-function string:tohash()
- local t = { }
- for s in gmatch(self,"([^, ]+)") do -- lpeg
- t[s] = true
- end
- return t
-end
+-- also handy (see utf variant)
-local pattern = lpeg.Ct(lpeg.C(1)^0)
+local pattern = Ct(C(1)^0) -- string and not utf !
-function string:totable()
- return lpegmatch(pattern,self)
+function string.totable(str)
+ return lpegmatch(pattern,str)
end
---~ local t = {
---~ "1234567123456712345671234567",
---~ "a\tb\tc",
---~ "aa\tbb\tcc",
---~ "aaa\tbbb\tccc",
---~ "aaaa\tbbbb\tcccc",
---~ "aaaaa\tbbbbb\tccccc",
---~ "aaaaaa\tbbbbbb\tcccccc",
---~ }
---~ for k,v do
---~ print(string.tabtospace(t[k]))
---~ end
-
-function string.tabtospace(str,tab)
- -- we don't handle embedded newlines
- while true do
- local s = find(str,"\t")
- if s then
- if not tab then tab = 7 end -- only when found
- local d = tab-(s-1) % tab
- if d > 0 then
- str = gsub(str,"\t",rep(" ",d),1)
- else
- str = gsub(str,"\t","",1)
- end
- else
- break
- end
- end
- return str
-end
+-- handy from within tex:
-function string:compactlong() -- strips newlines and leading spaces
- self = gsub(self,"[\n\r]+ *","")
- self = gsub(self,"^ *","")
- return self
-end
+local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
-function string:striplong() -- strips newlines and leading spaces
- self = gsub(self,"^%s*","")
- self = gsub(self,"[\n\r]+ *","\n")
- return self
+function string.tformat(fmt,...)
+ return format(lpegmatch(replacer,fmt),...)
end
-function string:topattern(lowercase,strict)
- if lowercase then
- self = lower(self)
- end
- self = gsub(self,".",simple_escapes)
- if self == "" then
- self = ".*"
- elseif strict then
- self = "^" .. self .. "$"
- end
- return self
-end
+-- obsolete names:
+
+string.quote = string.quoted
+string.unquote = string.unquoted
diff --git a/lualibs-table.lua b/lualibs-table.lua
index ee395d0..640bbbb 100644
--- a/lualibs-table.lua
+++ b/lualibs-table.lua
@@ -6,208 +6,267 @@ if not modules then modules = { } end modules ['l-table'] = {
license = "see context related readme files"
}
-table.join = table.concat
-
+local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select
+local table, string = table, string
local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove
-local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match
+local format, lower, dump = string.format, string.lower, string.dump
local getmetatable, setmetatable = getmetatable, setmetatable
-local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs
-
--- Starting with version 5.2 Lua no longer provide ipairs, which makes
--- sense. As we already used the for loop and # in most places the
--- impact on ConTeXt was not that large; the remaining ipairs already
--- have been replaced. In a similar fashio we also hardly used pairs.
---
--- Just in case, we provide the fallbacks as discussed in Programming
--- in Lua (http://www.lua.org/pil/7.3.html):
-
-if not ipairs then
-
- -- for k, v in ipairs(t) do ... end
- -- for k=1,#t do local v = t[k] ... end
-
- local function iterate(a,i)
- i = i + 1
- local v = a[i]
- if v ~= nil then
- return i, v --, nil
- end
- end
-
- function ipairs(a)
- return iterate, a, 0
- end
-
-end
-
-if not pairs then
-
- -- for k, v in pairs(t) do ... end
- -- for k, v in next, t do ... end
-
- function pairs(t)
- return next, t -- , nil
- end
-
-end
-
--- Also, unpack has been moved to the table table, and for compatiility
--- reasons we provide both now.
-
-if not table.unpack then
- table.unpack = _G.unpack
-elseif not unpack then
- _G.unpack = table.unpack
-end
+local getinfo = debug.getinfo
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+local floor = math.floor
-- extra functions, some might go (when not used)
+local stripper = patterns.stripper
+
function table.strip(tab)
- local lst = { }
+ local lst, l = { }, 0
for i=1,#tab do
- local s = gsub(tab[i],"^%s*(.-)%s*$","%1")
+ local s = lpegmatch(stripper,tab[i]) or ""
if s == "" then
-- skip this one
else
- lst[#lst+1] = s
+ l = l + 1
+ lst[l] = s
end
end
return lst
end
function table.keys(t)
- local k = { }
- for key, _ in next, t do
- k[#k+1] = key
+ if t then
+ local keys, k = { }, 0
+ for key, _ in next, t do
+ k = k + 1
+ keys[k] = key
+ end
+ return keys
+ else
+ return { }
end
- return k
end
local function compare(a,b)
- return (tostring(a) < tostring(b))
+ local ta, tb = type(a), type(b) -- needed, else 11 < 2
+ if ta == tb then
+ return a < b
+ else
+ return tostring(a) < tostring(b)
+ end
end
local function sortedkeys(tab)
- local srt, kind = { }, 0 -- 0=unknown 1=string, 2=number 3=mixed
- for key,_ in next, tab do
- srt[#srt+1] = key
- if kind == 3 then
- -- no further check
- else
- local tkey = type(key)
- if tkey == "string" then
- -- if kind == 2 then kind = 3 else kind = 1 end
- kind = (kind == 2 and 3) or 1
- elseif tkey == "number" then
- -- if kind == 1 then kind = 3 else kind = 2 end
- kind = (kind == 1 and 3) or 2
+ if tab then
+ local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed
+ for key,_ in next, tab do
+ s = s + 1
+ srt[s] = key
+ if category == 3 then
+ -- no further check
else
- kind = 3
+ local tkey = type(key)
+ if tkey == "string" then
+ category = (category == 2 and 3) or 1
+ elseif tkey == "number" then
+ category = (category == 1 and 3) or 2
+ else
+ category = 3
+ end
end
end
+ if category == 0 or category == 3 then
+ sort(srt,compare)
+ else
+ sort(srt)
+ end
+ return srt
+ else
+ return { }
end
- if kind == 0 or kind == 3 then
- sort(srt,compare)
+end
+
+local function sortedhashkeys(tab,cmp) -- fast one
+ if tab then
+ local srt, s = { }, 0
+ for key,_ in next, tab do
+ if key then
+ s= s + 1
+ srt[s] = key
+ end
+ end
+ sort(srt,cmp)
+ return srt
else
- sort(srt)
+ return { }
end
- return srt
end
-local function sortedhashkeys(tab) -- fast one
- local srt = { }
- for key,_ in next, tab do
- srt[#srt+1] = key
+function table.allkeys(t)
+ local keys = { }
+ for k, v in next, t do
+ for k, v in next, v do
+ keys[k] = true
+ end
end
- sort(srt)
- return srt
+ return sortedkeys(keys)
end
table.sortedkeys = sortedkeys
table.sortedhashkeys = sortedhashkeys
-function table.sortedhash(t)
- local s = sortedhashkeys(t) -- maybe just sortedkeys
- local n = 0
- local function kv(s)
- n = n + 1
- local k = s[n]
- return k, t[k]
+local function nothing() end
+
+local function sortedhash(t,cmp)
+ if t then
+ local s
+ if cmp then
+ -- it would be nice if teh sort function would accept a third argument (or nicer, an optional first)
+ s = sortedhashkeys(t,function(a,b) return cmp(t,a,b) end)
+ else
+ s = sortedkeys(t) -- the robust one
+ end
+ local n = 0
+ local function kv(s)
+ n = n + 1
+ local k = s[n]
+ return k, t[k]
+ end
+ return kv, s
+ else
+ return nothing
end
- return kv, s
end
-table.sortedpairs = table.sortedhash
+table.sortedhash = sortedhash
+table.sortedpairs = sortedhash -- obsolete
-function table.append(t, list)
- for _,v in next, list do
- insert(t,v)
+function table.append(t,list)
+ local n = #t
+ for i=1,#list do
+ n = n + 1
+ t[n] = list[i]
end
+ return t
end
function table.prepend(t, list)
- for k,v in next, list do
- insert(t,k,v)
+ local nl = #list
+ local nt = nl + #t
+ for i=#t,1,-1 do
+ t[nt] = t[i]
+ nt = nt - 1
end
+ for i=1,#list do
+ t[i] = list[i]
+ end
+ return t
end
+-- function table.merge(t, ...) -- first one is target
+-- t = t or { }
+-- local lst = { ... }
+-- for i=1,#lst do
+-- for k, v in next, lst[i] do
+-- t[k] = v
+-- end
+-- end
+-- return t
+-- end
+
function table.merge(t, ...) -- first one is target
- t = t or {}
- local lst = {...}
- for i=1,#lst do
- for k, v in next, lst[i] do
+ t = t or { }
+ for i=1,select("#",...) do
+ for k, v in next, (select(i,...)) do
t[k] = v
end
end
return t
end
+-- function table.merged(...)
+-- local tmp, lst = { }, { ... }
+-- for i=1,#lst do
+-- for k, v in next, lst[i] do
+-- tmp[k] = v
+-- end
+-- end
+-- return tmp
+-- end
+
function table.merged(...)
- local tmp, lst = { }, {...}
- for i=1,#lst do
- for k, v in next, lst[i] do
- tmp[k] = v
+ local t = { }
+ for i=1,select("#",...) do
+ for k, v in next, (select(i,...)) do
+ t[k] = v
end
end
- return tmp
+ return t
end
+-- function table.imerge(t, ...)
+-- local lst, nt = { ... }, #t
+-- for i=1,#lst do
+-- local nst = lst[i]
+-- for j=1,#nst do
+-- nt = nt + 1
+-- t[nt] = nst[j]
+-- end
+-- end
+-- return t
+-- end
+
function table.imerge(t, ...)
- local lst = {...}
- for i=1,#lst do
- local nst = lst[i]
+ local nt = #t
+ for i=1,select("#",...) do
+ local nst = select(i,...)
for j=1,#nst do
- t[#t+1] = nst[j]
+ nt = nt + 1
+ t[nt] = nst[j]
end
end
return t
end
+-- function table.imerged(...)
+-- local tmp, ntmp, lst = { }, 0, {...}
+-- for i=1,#lst do
+-- local nst = lst[i]
+-- for j=1,#nst do
+-- ntmp = ntmp + 1
+-- tmp[ntmp] = nst[j]
+-- end
+-- end
+-- return tmp
+-- end
+
function table.imerged(...)
- local tmp, lst = { }, {...}
- for i=1,#lst do
- local nst = lst[i]
+ local tmp, ntmp = { }, 0
+ for i=1,select("#",...) do
+ local nst = select(i,...)
for j=1,#nst do
- tmp[#tmp+1] = nst[j]
+ ntmp = ntmp + 1
+ tmp[ntmp] = nst[j]
end
end
return tmp
end
-local function fastcopy(old) -- fast one
+local function fastcopy(old,metatabletoo) -- fast one
if old then
local new = { }
- for k,v in next, old do
+ for k, v in next, old do
if type(v) == "table" then
- new[k] = fastcopy(v) -- was just table.copy
+ new[k] = fastcopy(v,metatabletoo) -- was just table.copy
else
new[k] = v
end
end
- -- optional second arg
- local mt = getmetatable(old)
- if mt then
- setmetatable(new,mt)
+ if metatabletoo then
+ -- optional second arg
+ local mt = getmetatable(old)
+ if mt then
+ setmetatable(new,mt)
+ end
end
return new
else
@@ -215,6 +274,8 @@ local function fastcopy(old) -- fast one
end
end
+-- todo : copy without metatable
+
local function copy(t, tables) -- taken from lua wiki, slightly adapted
tables = tables or { }
local tcopy = {}
@@ -247,33 +308,14 @@ end
table.fastcopy = fastcopy
table.copy = copy
--- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack)
-
-function table.sub(t,i,j)
- return { unpack(t,i,j) }
-end
-
-function table.replace(a,b)
- for k,v in next, b do
- a[k] = v
+function table.derive(parent) -- for the moment not public
+ local child = { }
+ if parent then
+ setmetatable(child,{ __index = parent })
end
+ return child
end
--- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice)
-
-function table.is_empty(t) -- obolete, use inline code instead
- return not t or not next(t)
-end
-
-function table.one_entry(t) -- obolete, use inline code instead
- local n = next(t)
- return n and not next(t,n)
-end
-
---~ function table.starts_at(t) -- obsolete, not nice anyway
---~ return ipairs(t,1)(t,0)
---~ end
-
function table.tohash(t,value)
local h = { }
if t then
@@ -286,27 +328,19 @@ function table.tohash(t,value)
end
function table.fromhash(t)
- local h = { }
+ local hsh, h = { }, 0
for k, v in next, t do -- no ipairs here
- if v then h[#h+1] = k end
+ if v then
+ h = h + 1
+ hsh[h] = k
+ end
end
- return h
+ return hsh
end
---~ print(table.serialize(t), "\n")
---~ print(table.serialize(t,"name"), "\n")
---~ print(table.serialize(t,false), "\n")
---~ print(table.serialize(t,true), "\n")
---~ print(table.serialize(t,"name",true), "\n")
---~ print(table.serialize(t,"name",true,true), "\n")
-
-table.serialize_functions = true
-table.serialize_compact = true
-table.serialize_inline = true
-
local noquotes, hexify, handle, reduce, compact, inline, functions
-local reserved = table.tohash { -- intercept a language flaw, no reserved words as key
+local reserved = table.tohash { -- intercept a language inconvenience: no reserved words as key
'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if',
'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while',
}
@@ -318,20 +352,23 @@ local function simple_table(t)
n = n + 1
end
if n == #t then
- local tt = { }
+ local tt, nt = { }, 0
for i=1,#t do
local v = t[i]
local tv = type(v)
if tv == "number" then
+ nt = nt + 1
if hexify then
- tt[#tt+1] = format("0x%04X",v)
+ tt[nt] = format("0x%04X",v)
else
- tt[#tt+1] = tostring(v) -- tostring not needed
+ tt[nt] = tostring(v) -- tostring not needed
end
elseif tv == "boolean" then
- tt[#tt+1] = tostring(v)
+ nt = nt + 1
+ tt[nt] = tostring(v)
elseif tv == "string" then
- tt[#tt+1] = format("%q",v)
+ nt = nt + 1
+ tt[nt] = format("%q",v)
else
tt = nil
break
@@ -352,36 +389,64 @@ end
-- problem: there no good number_to_string converter with the best resolution
+-- probably using .. is faster than format
+-- maybe split in a few cases (yes/no hexify)
+
+-- todo: %g faster on numbers than %s
+
+-- we can speed this up with repeaters and formatters (is indeed faster)
+
+local propername = patterns.propername -- was find(name,"^%a[%w%_]*$")
+
+local function dummy() end
+
local function do_serialize(root,name,depth,level,indexed)
if level > 0 then
depth = depth .. " "
if indexed then
handle(format("%s{",depth))
- elseif name then
- --~ handle(format("%s%s={",depth,key(name)))
- if type(name) == "number" then -- or find(k,"^%d+$") then
+ else
+ local tn = type(name)
+ if tn == "number" then
if hexify then
handle(format("%s[0x%04X]={",depth,name))
else
handle(format("%s[%s]={",depth,name))
end
- elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then
- handle(format("%s%s={",depth,name))
+ elseif tn == "string" then
+ if noquotes and not reserved[name] and lpegmatch(propername,name) then
+ handle(format("%s%s={",depth,name))
+ else
+ handle(format("%s[%q]={",depth,name))
+ end
+ elseif tn == "boolean" then
+ handle(format("%s[%s]={",depth,tostring(name)))
else
- handle(format("%s[%q]={",depth,name))
+ handle(format("%s{",depth))
end
- else
- handle(format("%s{",depth))
end
end
-- we could check for k (index) being number (cardinal)
if root and next(root) then
- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone)
+ -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone)
+ -- if compact then
+ -- -- NOT: for k=1,#root do (we need to quit at nil)
+ -- for k,v in ipairs(root) do -- can we use next?
+ -- if not first then first = k end
+ -- last = last + 1
+ -- end
+ -- end
+ local first, last = nil, 0
if compact then
- -- NOT: for k=1,#root do (we need to quit at nil)
- for k,v in ipairs(root) do -- can we use next?
- if not first then first = k end
- last = last + 1
+ last = #root
+ for k=1,last do
+ if root[k] == nil then
+ last = k - 1
+ break
+ end
+ end
+ if last > 0 then
+ first = 1
end
end
local sk = sortedkeys(root)
@@ -391,8 +456,8 @@ local function do_serialize(root,name,depth,level,indexed)
--~ if v == root then
-- circular
--~ else
- local t = type(v)
- if compact and first and type(k) == "number" and k >= first and k <= last then
+ local t, tk = type(v), type(k)
+ if compact and first and tk == "number" and k >= first and k <= last then
if t == "number" then
if hexify then
handle(format("%s 0x%04X,",depth,v))
@@ -422,7 +487,7 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s %s,",depth,tostring(v)))
elseif t == "function" then
if functions then
- handle(format('%s loadstring(%q),',depth,dump(v)))
+ handle(format('%s load(%q),',depth,dump(v)))
else
handle(format('%s "function",',depth))
end
@@ -434,18 +499,19 @@ local function do_serialize(root,name,depth,level,indexed)
handle(format("%s __p__=nil,",depth))
end
elseif t == "number" then
- --~ if hexify then
- --~ handle(format("%s %s=0x%04X,",depth,key(k),v))
- --~ else
- --~ handle(format("%s %s=%s,",depth,key(k),v)) -- %.99g
- --~ end
- if type(k) == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=0x%04X,",depth,k,v))
else
handle(format("%s [%s]=%s,",depth,k,v)) -- %.99g
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif tk == "boolean" then
+ if hexify then
+ handle(format("%s [%s]=0x%04X,",depth,tostring(k),v))
+ else
+ handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g
+ end
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
if hexify then
handle(format("%s %s=0x%04X,",depth,k,v))
else
@@ -460,27 +526,29 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif t == "string" then
if reduce and tonumber(v) then
- --~ handle(format("%s %s=%s,",depth,key(k),v))
- if type(k) == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%s,",depth,k,v))
else
handle(format("%s [%s]=%s,",depth,k,v))
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif tk == "boolean" then
+ handle(format("%s [%s]=%s,",depth,tostring(k),v))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%s,",depth,k,v))
else
handle(format("%s [%q]=%s,",depth,k,v))
end
else
- --~ handle(format("%s %s=%q,",depth,key(k),v))
- if type(k) == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%q,",depth,k,v))
else
handle(format("%s [%s]=%q,",depth,k,v))
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif tk == "boolean" then
+ handle(format("%s [%s]=%q,",depth,tostring(k),v))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%q,",depth,k,v))
else
handle(format("%s [%q]=%q,",depth,k,v))
@@ -488,14 +556,15 @@ local function do_serialize(root,name,depth,level,indexed)
end
elseif t == "table" then
if not next(v) then
- --~ handle(format("%s %s={},",depth,key(k)))
- if type(k) == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]={},",depth,k))
else
handle(format("%s [%s]={},",depth,k))
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif tk == "boolean" then
+ handle(format("%s [%s]={},",depth,tostring(k)))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s={},",depth,k))
else
handle(format("%s [%q]={},",depth,k))
@@ -503,14 +572,15 @@ local function do_serialize(root,name,depth,level,indexed)
elseif inline then
local st = simple_table(v)
if st then
- --~ handle(format("%s %s={ %s },",depth,key(k),concat(st,", ")))
- if type(k) == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", ")))
else
handle(format("%s [%s]={ %s },",depth,k,concat(st,", ")))
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif tk == "boolean" then
+ handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", ")))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s={ %s },",depth,k,concat(st,", ")))
else
handle(format("%s [%q]={ %s },",depth,k,concat(st,", ")))
@@ -522,42 +592,47 @@ local function do_serialize(root,name,depth,level,indexed)
do_serialize(v,k,depth,level+1)
end
elseif t == "boolean" then
- --~ handle(format("%s %s=%s,",depth,key(k),tostring(v)))
- if type(k) == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%s,",depth,k,tostring(v)))
else
handle(format("%s [%s]=%s,",depth,k,tostring(v)))
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif tk == "boolean" then
+ handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v)))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%s,",depth,k,tostring(v)))
else
handle(format("%s [%q]=%s,",depth,k,tostring(v)))
end
elseif t == "function" then
if functions then
- --~ handle(format('%s %s=loadstring(%q),',depth,key(k),dump(v)))
- if type(k) == "number" then -- or find(k,"^%d+$") then
+ local f = getinfo(v).what == "C" and dump(dummy) or dump(v)
+ -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v)
+ if tk == "number" then
if hexify then
- handle(format("%s [0x%04X]=loadstring(%q),",depth,k,dump(v)))
+ handle(format("%s [0x%04X]=load(%q),",depth,k,f))
else
- handle(format("%s [%s]=loadstring(%q),",depth,k,dump(v)))
+ handle(format("%s [%s]=load(%q),",depth,k,f))
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
- handle(format("%s %s=loadstring(%q),",depth,k,dump(v)))
+ elseif tk == "boolean" then
+ handle(format("%s [%s]=load(%q),",depth,tostring(k),f))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+ handle(format("%s %s=load(%q),",depth,k,f))
else
- handle(format("%s [%q]=loadstring(%q),",depth,k,dump(v)))
+ handle(format("%s [%q]=load(%q),",depth,k,f))
end
end
else
- --~ handle(format("%s %s=%q,",depth,key(k),tostring(v)))
- if type(k) == "number" then -- or find(k,"^%d+$") then
+ if tk == "number" then
if hexify then
handle(format("%s [0x%04X]=%q,",depth,k,tostring(v)))
else
handle(format("%s [%s]=%q,",depth,k,tostring(v)))
end
- elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then
+ elseif tk == "boolean" then
+ handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v)))
+ elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
handle(format("%s %s=%q,",depth,k,tostring(v)))
else
handle(format("%s [%q]=%q,",depth,k,tostring(v)))
@@ -574,15 +649,34 @@ end
-- replacing handle by a direct t[#t+1] = ... (plus test) is not much
-- faster (0.03 on 1.00 for zapfino.tma)
-local function serialize(root,name,_handle,_reduce,_noquotes,_hexify)
- noquotes = _noquotes
- hexify = _hexify
- handle = _handle or print
- reduce = _reduce or false
- compact = table.serialize_compact
- inline = compact and table.serialize_inline
- functions = table.serialize_functions
+local function serialize(_handle,root,name,specification) -- handle wins
local tname = type(name)
+ if type(specification) == "table" then
+ noquotes = specification.noquotes
+ hexify = specification.hexify
+ handle = _handle or specification.handle or print
+ reduce = specification.reduce or false
+ functions = specification.functions
+ compact = specification.compact
+ inline = specification.inline and compact
+ if functions == nil then
+ functions = true
+ end
+ if compact == nil then
+ compact = true
+ end
+ if inline == nil then
+ inline = compact
+ end
+ else
+ noquotes = false
+ hexify = false
+ handle = _handle or print
+ reduce = false
+ compact = true
+ inline = true
+ functions = true
+ end
if tname == "string" then
if name == "return" then
handle("return {")
@@ -604,33 +698,364 @@ local function serialize(root,name,_handle,_reduce,_noquotes,_hexify)
else
handle("t={")
end
- if root and next(root) then
- do_serialize(root,name,"",0,indexed)
+ if root then
+ -- The dummy access will initialize a table that has a delayed initialization
+ -- using a metatable. (maybe explicitly test for metatable)
+ if getmetatable(root) then -- todo: make this an option, maybe even per subtable
+ local dummy = root._w_h_a_t_e_v_e_r_
+ root._w_h_a_t_e_v_e_r_ = nil
+ end
+ -- Let's forget about empty tables.
+ if next(root) then
+ do_serialize(root,name,"",0)
+ end
end
handle("}")
end
---~ name:
---~
---~ true : return { }
---~ false : { }
---~ nil : t = { }
---~ string : string = { }
---~ 'return' : return { }
---~ number : [number] = { }
-
-function table.serialize(root,name,reduce,noquotes,hexify)
- local t = { }
+-- -- This is some 20% faster than using format (because formatters are much faster) but
+-- -- of course, inlining the format using .. is then again faster .. anyway, as we do
+-- -- some pretty printing as well there is not that much to gain unless we make a 'fast'
+-- -- ugly variant as well. But, we would have to move the formatter to l-string then.
+
+-- local formatters = string.formatters
+
+-- local function do_serialize(root,name,level,indexed)
+-- if level > 0 then
+-- if indexed then
+-- handle(formatters["%w{"](level))
+-- else
+-- local tn = type(name)
+-- if tn == "number" then
+-- if hexify then
+-- handle(formatters["%w[%04H]={"](level,name))
+-- else
+-- handle(formatters["%w[%s]={"](level,name))
+-- end
+-- elseif tn == "string" then
+-- if noquotes and not reserved[name] and lpegmatch(propername,name) then
+-- handle(formatters["%w%s={"](level,name))
+-- else
+-- handle(formatters["%w[%q]={"](level,name))
+-- end
+-- elseif tn == "boolean" then
+-- handle(formatters["%w[%S]={"](level,name))
+-- else
+-- handle(formatters["%w{"](level))
+-- end
+-- end
+-- end
+-- -- we could check for k (index) being number (cardinal)
+-- if root and next(root) then
+-- -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone)
+-- -- if compact then
+-- -- -- NOT: for k=1,#root do (we need to quit at nil)
+-- -- for k,v in ipairs(root) do -- can we use next?
+-- -- if not first then first = k end
+-- -- last = last + 1
+-- -- end
+-- -- end
+-- local first, last = nil, 0
+-- if compact then
+-- last = #root
+-- for k=1,last do
+-- if root[k] == nil then
+-- last = k - 1
+-- break
+-- end
+-- end
+-- if last > 0 then
+-- first = 1
+-- end
+-- end
+-- local sk = sortedkeys(root)
+-- for i=1,#sk do
+-- local k = sk[i]
+-- local v = root[k]
+-- --~ if v == root then
+-- -- circular
+-- --~ else
+-- local t, tk = type(v), type(k)
+-- if compact and first and tk == "number" and k >= first and k <= last then
+-- if t == "number" then
+-- if hexify then
+-- handle(formatters["%w %04H,"](level,v))
+-- else
+-- handle(formatters["%w %s,"](level,v)) -- %.99g
+-- end
+-- elseif t == "string" then
+-- if reduce and tonumber(v) then
+-- handle(formatters["%w %s,"](level,v))
+-- else
+-- handle(formatters["%w %q,"](level,v))
+-- end
+-- elseif t == "table" then
+-- if not next(v) then
+-- handle(formatters["%w {},"](level))
+-- elseif inline then -- and #t > 0
+-- local st = simple_table(v)
+-- if st then
+-- handle(formatters["%w { %, t },"](level,st))
+-- else
+-- do_serialize(v,k,level+1,true)
+-- end
+-- else
+-- do_serialize(v,k,level+1,true)
+-- end
+-- elseif t == "boolean" then
+-- handle(formatters["%w %S,"](level,v))
+-- elseif t == "function" then
+-- if functions then
+-- handle(formatters['%w load(%q),'](level,dump(v)))
+-- else
+-- handle(formatters['%w "function",'](level))
+-- end
+-- else
+-- handle(formatters["%w %Q,"](level,v))
+-- end
+-- elseif k == "__p__" then -- parent
+-- if false then
+-- handle(formatters["%w __p__=nil,"](level))
+-- end
+-- elseif t == "number" then
+-- if tk == "number" then
+-- if hexify then
+-- handle(formatters["%w [%04H]=%04H,"](level,k,v))
+-- else
+-- handle(formatters["%w [%s]=%s,"](level,k,v)) -- %.99g
+-- end
+-- elseif tk == "boolean" then
+-- if hexify then
+-- handle(formatters["%w [%S]=%04H,"](level,k,v))
+-- else
+-- handle(formatters["%w [%S]=%s,"](level,k,v)) -- %.99g
+-- end
+-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+-- if hexify then
+-- handle(formatters["%w %s=%04H,"](level,k,v))
+-- else
+-- handle(formatters["%w %s=%s,"](level,k,v)) -- %.99g
+-- end
+-- else
+-- if hexify then
+-- handle(formatters["%w [%q]=%04H,"](level,k,v))
+-- else
+-- handle(formatters["%w [%q]=%s,"](level,k,v)) -- %.99g
+-- end
+-- end
+-- elseif t == "string" then
+-- if reduce and tonumber(v) then
+-- if tk == "number" then
+-- if hexify then
+-- handle(formatters["%w [%04H]=%s,"](level,k,v))
+-- else
+-- handle(formatters["%w [%s]=%s,"](level,k,v))
+-- end
+-- elseif tk == "boolean" then
+-- handle(formatters["%w [%S]=%s,"](level,k,v))
+-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+-- handle(formatters["%w %s=%s,"](level,k,v))
+-- else
+-- handle(formatters["%w [%q]=%s,"](level,k,v))
+-- end
+-- else
+-- if tk == "number" then
+-- if hexify then
+-- handle(formatters["%w [%04H]=%q,"](level,k,v))
+-- else
+-- handle(formatters["%w [%s]=%q,"](level,k,v))
+-- end
+-- elseif tk == "boolean" then
+-- handle(formatters["%w [%S]=%q,"](level,k,v))
+-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+-- handle(formatters["%w %s=%q,"](level,k,v))
+-- else
+-- handle(formatters["%w [%q]=%q,"](level,k,v))
+-- end
+-- end
+-- elseif t == "table" then
+-- if not next(v) then
+-- if tk == "number" then
+-- if hexify then
+-- handle(formatters["%w [%04H]={},"](level,k))
+-- else
+-- handle(formatters["%w [%s]={},"](level,k))
+-- end
+-- elseif tk == "boolean" then
+-- handle(formatters["%w [%S]={},"](level,k))
+-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+-- handle(formatters["%w %s={},"](level,k))
+-- else
+-- handle(formatters["%w [%q]={},"](level,k))
+-- end
+-- elseif inline then
+-- local st = simple_table(v)
+-- if st then
+-- if tk == "number" then
+-- if hexify then
+-- handle(formatters["%w [%04H]={ %, t },"](level,k,st))
+-- else
+-- handle(formatters["%w [%s]={ %, t },"](level,k,st))
+-- end
+-- elseif tk == "boolean" then
+-- handle(formatters["%w [%S]={ %, t },"](level,k,st))
+-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+-- handle(formatters["%w %s={ %, t },"](level,k,st))
+-- else
+-- handle(formatters["%w [%q]={ %, t },"](level,k,st))
+-- end
+-- else
+-- do_serialize(v,k,level+1)
+-- end
+-- else
+-- do_serialize(v,k,level+1)
+-- end
+-- elseif t == "boolean" then
+-- if tk == "number" then
+-- if hexify then
+-- handle(formatters["%w [%04H]=%S,"](level,k,v))
+-- else
+-- handle(formatters["%w [%s]=%S,"](level,k,v))
+-- end
+-- elseif tk == "boolean" then
+-- handle(formatters["%w [%S]=%S,"](level,k,v))
+-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+-- handle(formatters["%w %s=%S,"](level,k,v))
+-- else
+-- handle(formatters["%w [%q]=%S,"](level,k,v))
+-- end
+-- elseif t == "function" then
+-- if functions then
+-- local f = getinfo(v).what == "C" and dump(dummy) or dump(v)
+-- -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v)
+-- if tk == "number" then
+-- if hexify then
+-- handle(formatters["%w [%04H]=load(%q),"](level,k,f))
+-- else
+-- handle(formatters["%w [%s]=load(%q),"](level,k,f))
+-- end
+-- elseif tk == "boolean" then
+-- handle(formatters["%w [%S]=load(%q),"](level,k,f))
+-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+-- handle(formatters["%w %s=load(%q),"](level,k,f))
+-- else
+-- handle(formatters["%w [%q]=load(%q),"](level,k,f))
+-- end
+-- end
+-- else
+-- if tk == "number" then
+-- if hexify then
+-- handle(formatters["%w [%04H]=%Q,"](level,k,v))
+-- else
+-- handle(formatters["%w [%s]=%Q,"](level,k,v))
+-- end
+-- elseif tk == "boolean" then
+-- handle(formatters["%w [%S]=%Q,"](level,k,v))
+-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then
+-- handle(formatters["%w %s=%Q,"](level,k,v))
+-- else
+-- handle(formatters["%w [%q]=%Q,"](level,k,v))
+-- end
+-- end
+-- --~ end
+-- end
+-- end
+-- if level > 0 then
+-- handle(formatters["%w}"](level))
+-- end
+-- end
+
+-- local function serialize(_handle,root,name,specification) -- handle wins
+-- local tname = type(name)
+-- if type(specification) == "table" then
+-- noquotes = specification.noquotes
+-- hexify = specification.hexify
+-- handle = _handle or specification.handle or print
+-- reduce = specification.reduce or false
+-- functions = specification.functions
+-- compact = specification.compact
+-- inline = specification.inline and compact
+-- if functions == nil then
+-- functions = true
+-- end
+-- if compact == nil then
+-- compact = true
+-- end
+-- if inline == nil then
+-- inline = compact
+-- end
+-- else
+-- noquotes = false
+-- hexify = false
+-- handle = _handle or print
+-- reduce = false
+-- compact = true
+-- inline = true
+-- functions = true
+-- end
+-- if tname == "string" then
+-- if name == "return" then
+-- handle("return {")
+-- else
+-- handle(name .. "={")
+-- end
+-- elseif tname == "number" then
+-- if hexify then
+-- handle(format("[0x%04X]={",name))
+-- else
+-- handle("[" .. name .. "]={")
+-- end
+-- elseif tname == "boolean" then
+-- if name then
+-- handle("return {")
+-- else
+-- handle("{")
+-- end
+-- else
+-- handle("t={")
+-- end
+-- if root then
+-- -- The dummy access will initialize a table that has a delayed initialization
+-- -- using a metatable. (maybe explicitly test for metatable)
+-- if getmetatable(root) then -- todo: make this an option, maybe even per subtable
+-- local dummy = root._w_h_a_t_e_v_e_r_
+-- root._w_h_a_t_e_v_e_r_ = nil
+-- end
+-- -- Let's forget about empty tables.
+-- if next(root) then
+-- do_serialize(root,name,0)
+-- end
+-- end
+-- handle("}")
+-- end
+
+-- name:
+--
+-- true : return { }
+-- false : { }
+-- nil : t = { }
+-- string : string = { }
+-- "return" : return { }
+-- number : [number] = { }
+
+function table.serialize(root,name,specification)
+ local t, n = { }, 0
local function flush(s)
- t[#t+1] = s
+ n = n + 1
+ t[n] = s
end
- serialize(root,name,flush,reduce,noquotes,hexify)
+ serialize(flush,root,name,specification)
return concat(t,"\n")
end
-function table.tohandle(handle,root,name,reduce,noquotes,hexify)
- serialize(root,name,handle,reduce,noquotes,hexify)
-end
+-- local a = { e = { 1,2,3,4,5,6}, a = 1, b = 2, c = "ccc", d = { a = 1, b = 2, c = "ccc", d = { a = 1, b = 2, c = "ccc" } } }
+-- local t = os.clock()
+-- for i=1,10000 do
+-- table.serialize(a)
+-- end
+-- print(os.clock()-t,table.serialize(a))
+
+table.tohandle = serialize
-- sometimes tables are real use (zapfino extra pro is some 85M) in which
-- case a stepwise serialization is nice; actually, we could consider:
@@ -641,73 +1066,63 @@ end
--
-- so this is on the todo list
-table.tofile_maxtab = 2*1024
+local maxtab = 2*1024
-function table.tofile(filename,root,name,reduce,noquotes,hexify)
+function table.tofile(filename,root,name,specification)
local f = io.open(filename,'w')
if f then
- local maxtab = table.tofile_maxtab
if maxtab > 1 then
- local t = { }
+ local t, n = { }, 0
local function flush(s)
- t[#t+1] = s
- if #t > maxtab then
+ n = n + 1
+ t[n] = s
+ if n > maxtab then
f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice
- t = { }
+ t, n = { }, 0 -- we could recycle t if needed
end
end
- serialize(root,name,flush,reduce,noquotes,hexify)
+ serialize(flush,root,name,specification)
f:write(concat(t,"\n"),"\n")
else
local function flush(s)
f:write(s,"\n")
end
- serialize(root,name,flush,reduce,noquotes,hexify)
+ serialize(flush,root,name,specification)
end
f:close()
+ io.flush()
end
end
-local function flatten(t,f,complete) -- is this used? meybe a variant with next, ...
- for i=1,#t do
- local v = t[i]
- if type(v) == "table" then
- if complete or type(v[1]) == "table" then
- flatten(v,f,complete)
+local function flattened(t,f,depth)
+ if f == nil then
+ f = { }
+ depth = 0xFFFF
+ elseif tonumber(f) then
+ -- assume that only two arguments are given
+ depth = f
+ f = { }
+ elseif not depth then
+ depth = 0xFFFF
+ end
+ for k, v in next, t do
+ if type(k) ~= "number" then
+ if depth > 0 and type(v) == "table" then
+ flattened(v,f,depth-1)
else
- f[#f+1] = v
+ f[k] = v
end
- else
- f[#f+1] = v
end
end
-end
-
-function table.flatten(t)
- local f = { }
- flatten(t,f,true)
- return f
-end
-
-function table.unnest(t) -- bad name
- local f = { }
- flatten(t,f,false)
- return f
-end
-
-table.flatten_one_level = table.unnest
-
--- a better one:
-
-local function flattened(t,f)
- if not f then
- f = { }
- end
- for k, v in next, t do
- if type(v) == "table" then
- flattened(v,f)
+ local n = #f
+ for k=1,#t do
+ local v = t[k]
+ if depth > 0 and type(v) == "table" then
+ flattened(v,f,depth-1)
+ n = #f
else
- f[k] = v
+ n = n + 1
+ f[n] = v
end
end
return f
@@ -715,49 +1130,27 @@ end
table.flattened = flattened
--- the next three may disappear
-
-function table.remove_value(t,value) -- todo: n
- if value then
- for i=1,#t do
- if t[i] == value then
- remove(t,i)
- -- remove all, so no: return
- end
- end
+local function unnest(t,f) -- only used in mk, for old times sake
+ if not f then -- and only relevant for token lists
+ f = { } -- this one can become obsolete
end
-end
-
-function table.insert_before_value(t,value,str)
- if str then
- if value then
- for i=1,#t do
- if t[i] == value then
- insert(t,i,str)
- return
- end
+ for i=1,#t do
+ local v = t[i]
+ if type(v) == "table" then
+ if type(v[1]) == "table" then
+ unnest(v,f)
+ else
+ f[#f+1] = v
end
+ else
+ f[#f+1] = v
end
- insert(t,1,str)
- elseif value then
- insert(t,1,value)
end
+ return f
end
-function table.insert_after_value(t,value,str)
- if str then
- if value then
- for i=1,#t do
- if t[i] == value then
- insert(t,i+1,str)
- return
- end
- end
- end
- t[#t+1] = str
- elseif value then
- t[#t+1] = value
- end
+function table.unnest(t) -- bad name
+ return unnest(t)
end
local function are_equal(a,b,n,m) -- indexed
@@ -768,7 +1161,7 @@ local function are_equal(a,b,n,m) -- indexed
local ai, bi = a[i], b[i]
if ai==bi then
-- same
- elseif type(ai)=="table" and type(bi)=="table" then
+ elseif type(ai) == "table" and type(bi) == "table" then
if not are_equal(ai,bi) then
return false
end
@@ -784,7 +1177,7 @@ end
local function identical(a,b) -- assumes same structure
for ka, va in next, a do
- local vb = b[k]
+ local vb = b[ka]
if va == vb then
-- same
elseif type(va) == "table" and type(vb) == "table" then
@@ -798,15 +1191,15 @@ local function identical(a,b) -- assumes same structure
return true
end
-table.are_equal = are_equal
table.identical = identical
+table.are_equal = are_equal
-- maybe also make a combined one
-function table.compact(t)
+function table.compact(t) -- remove empty tables, assumes subtables
if t then
- for k,v in next, t do
- if not next(v) then
+ for k, v in next, t do
+ if not next(v) then -- no type checking
t[k] = nil
end
end
@@ -825,86 +1218,148 @@ function table.contains(t, v)
end
function table.count(t)
- local n, e = 0, next(t)
- while e do
- n, e = n + 1, next(t,e)
+ local n = 0
+ for k, v in next, t do
+ n = n + 1
end
return n
end
-function table.swapped(t)
- local s = { }
+function table.swapped(t,s) -- hash
+ local n = { }
+ if s then
+ for k, v in next, s do
+ n[k] = v
+ end
+ end
for k, v in next, t do
- s[v] = k
+ n[v] = k
end
- return s
+ return n
end
---~ function table.are_equal(a,b)
---~ return table.serialize(a) == table.serialize(b)
---~ end
-
-function table.clone(t,p) -- t is optional or nil or table
- if not p then
- t, p = { }, t or { }
- elseif not t then
- t = { }
+function table.mirrored(t) -- hash
+ local n = { }
+ for k, v in next, t do
+ n[v] = k
+ n[k] = v
end
- setmetatable(t, { __index = function(_,key) return p[key] end }) -- why not __index = p ?
- return t
-end
-
-function table.hexed(t,seperator)
- local tt = { }
- for i=1,#t do tt[i] = format("0x%04X",t[i]) end
- return concat(tt,seperator or " ")
+ return n
end
-function table.reverse_hash(h)
- local r = { }
- for k,v in next, h do
- r[v] = lower(gsub(k," ",""))
+function table.reversed(t)
+ if t then
+ local tt, tn = { }, #t
+ if tn > 0 then
+ local ttn = 0
+ for i=tn,1,-1 do
+ ttn = ttn + 1
+ tt[ttn] = t[i]
+ end
+ end
+ return tt
end
- return r
end
function table.reverse(t)
- local tt = { }
- if #t > 0 then
- for i=#t,1,-1 do
- tt[#tt+1] = t[i]
+ if t then
+ local n = #t
+ for i=1,floor(n/2) do
+ local j = n - i + 1
+ t[i], t[j] = t[j], t[i]
end
+ return t
end
- return tt
end
-function table.insert_before_value(t,value,extra)
- for i=1,#t do
- if t[i] == extra then
- remove(t,i)
- end
+function table.sequenced(t,sep,simple) -- hash only
+ if not t then
+ return ""
end
- for i=1,#t do
- if t[i] == value then
- insert(t,i,extra)
- return
+ local n = #t
+ local s = { }
+ if n > 0 then
+ -- indexed
+ for i=1,n do
+ s[i] = tostring(t[i])
+ end
+ else
+ -- hashed
+ n = 0
+ for k, v in sortedhash(t) do
+ if simple then
+ if v == true then
+ n = n + 1
+ s[n] = k
+ elseif v and v~= "" then
+ n = n + 1
+ s[n] = k .. "=" .. tostring(v)
+ end
+ else
+ n = n + 1
+ s[n] = k .. "=" .. tostring(v)
+ end
end
end
- insert(t,1,extra)
+ return concat(s,sep or " | ")
end
-function table.insert_after_value(t,value,extra)
- for i=1,#t do
- if t[i] == extra then
- remove(t,i)
- end
+function table.print(t,...)
+ if type(t) ~= "table" then
+ print(tostring(t))
+ else
+ serialize(print,t,...)
end
- for i=1,#t do
- if t[i] == value then
- insert(t,i+1,extra)
- return
+end
+
+setinspector(function(v) if type(v) == "table" then serialize(print,v,"table") return true end end)
+
+-- -- -- obsolete but we keep them for a while and might comment them later -- -- --
+
+-- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack)
+
+function table.sub(t,i,j)
+ return { unpack(t,i,j) }
+end
+
+-- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice)
+
+function table.is_empty(t)
+ return not t or not next(t)
+end
+
+function table.has_one_entry(t)
+ return t and not next(t,next(t))
+end
+
+-- new
+
+function table.loweredkeys(t) -- maybe utf
+ local l = { }
+ for k, v in next, t do
+ l[lower(k)] = v
+ end
+ return l
+end
+
+-- new, might move (maybe duplicate)
+
+function table.unique(old)
+ local hash = { }
+ local new = { }
+ local n = 0
+ for i=1,#old do
+ local oi = old[i]
+ if not hash[oi] then
+ n = n + 1
+ new[n] = oi
+ hash[oi] = true
end
end
- insert(t,#t+1,extra)
+ return new
end
+function table.sorted(t,...)
+ sort(t,...)
+ return t -- still sorts in-place
+end
diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua
index 0c5a601..813ffd5 100644
--- a/lualibs-unicode.lua
+++ b/lualibs-unicode.lua
@@ -6,170 +6,746 @@ if not modules then modules = { } end modules ['l-unicode'] = {
license = "see context related readme files"
}
+-- this module will be reorganized
+
+-- todo: utf.sub replacement (used in syst-aux)
+
+-- we put these in the utf namespace:
+
+utf = utf or (unicode and unicode.utf8) or { }
+
+utf.characters = utf.characters or string.utfcharacters
+utf.values = utf.values or string.utfvalues
+
+-- string.utfvalues
+-- string.utfcharacters
+-- string.characters
+-- string.characterpairs
+-- string.bytes
+-- string.bytepairs
+
+local type = type
+local char, byte, format, sub = string.char, string.byte, string.format, string.sub
+local concat = table.concat
+local P, C, R, Cs, Ct, Cmt, Cc, Carg, Cp = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Carg, lpeg.Cp
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+
+local bytepairs = string.bytepairs
+
+local finder = lpeg.finder
+local replacer = lpeg.replacer
+
+local utfvalues = utf.values
+local utfgmatch = utf.gmatch -- not always present
+
+local p_utftype = patterns.utftype
+local p_utfoffset = patterns.utfoffset
+local p_utf8char = patterns.utf8char
+local p_utf8byte = patterns.utf8byte
+local p_utfbom = patterns.utfbom
+local p_newline = patterns.newline
+local p_whitespace = patterns.whitespace
+
if not unicode then
- unicode = { utf8 = { } }
+ unicode = { utf = utf } -- for a while
+
+end
+
+if not utf.char then
local floor, char = math.floor, string.char
- function unicode.utf8.utfchar(n)
+ function utf.char(n)
if n < 0x80 then
+ -- 0aaaaaaa : 0x80
return char(n)
elseif n < 0x800 then
- return char(0xC0 + floor(n/0x40)) .. char(0x80 + (n % 0x40))
+ -- 110bbbaa : 0xC0 : n >> 6
+ -- 10aaaaaa : 0x80 : n & 0x3F
+ return char(
+ 0xC0 + floor(n/0x40),
+ 0x80 + (n % 0x40)
+ )
elseif n < 0x10000 then
- return char(0xE0 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40))
- elseif n < 0x40000 then
- return char(0xF0 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40))
- else -- wrong:
- -- return char(0xF1 + floor(n/0x1000000)) .. char(0x80 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40))
- return "?"
+ -- 1110bbbb : 0xE0 : n >> 12
+ -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F
+ -- 10aaaaaa : 0x80 : n & 0x3F
+ return char(
+ 0xE0 + floor(n/0x1000),
+ 0x80 + (floor(n/0x40) % 0x40),
+ 0x80 + (n % 0x40)
+ )
+ elseif n < 0x200000 then
+ -- 11110ccc : 0xF0 : n >> 18
+ -- 10ccbbbb : 0x80 : (n >> 12) & 0x3F
+ -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F
+ -- 10aaaaaa : 0x80 : n & 0x3F
+ -- dddd : ccccc - 1
+ return char(
+ 0xF0 + floor(n/0x40000),
+ 0x80 + (floor(n/0x1000) % 0x40),
+ 0x80 + (floor(n/0x40) % 0x40),
+ 0x80 + (n % 0x40)
+ )
+ else
+ return ""
end
end
end
-utf = utf or unicode.utf8
+if not utf.byte then
-local concat, utfchar, utfgsub = table.concat, utf.char, utf.gsub
-local char, byte, find, bytepairs = string.char, string.byte, string.find, string.bytepairs
+ local utf8byte = patterns.utf8byte
--- 0 EF BB BF UTF-8
--- 1 FF FE UTF-16-little-endian
--- 2 FE FF UTF-16-big-endian
--- 3 FF FE 00 00 UTF-32-little-endian
--- 4 00 00 FE FF UTF-32-big-endian
+ function utf.byte(c)
+ return lpegmatch(utf8byte,c)
+ end
-unicode.utfname = {
- [0] = 'utf-8',
- [1] = 'utf-16-le',
- [2] = 'utf-16-be',
- [3] = 'utf-32-le',
- [4] = 'utf-32-be'
-}
+end
--- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated
+local utfchar, utfbyte = utf.char, utf.byte
-function unicode.utftype(f)
- local str = f:read(4)
- if not str then
- f:seek('set')
- return 0
- -- elseif find(str,"^%z%z\254\255") then -- depricated
- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged
- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH)
- return 4
- -- elseif find(str,"^\255\254%z%z") then -- depricated
- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged
- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH)
- return 3
- elseif find(str,"^\254\255") then
- f:seek('set',2)
- return 2
- elseif find(str,"^\255\254") then
- f:seek('set',2)
- return 1
- elseif find(str,"^\239\187\191") then
- f:seek('set',3)
- return 0
- else
- f:seek('set')
- return 0
+-- As we want to get rid of the (unmaintained) utf library we implement our own
+-- variants (in due time an independent module):
+
+function utf.filetype(data)
+ return data and lpegmatch(p_utftype,data) or "unknown"
+end
+
+local toentities = Cs (
+ (
+ patterns.utf8one
+ + (
+ patterns.utf8two
+ + patterns.utf8three
+ + patterns.utf8four
+ ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end
+ )^0
+)
+
+patterns.toentities = toentities
+
+function utf.toentities(str)
+ return lpegmatch(toentities,str)
+end
+
+-- local utfchr = { } -- 60K -> 2.638 M extra mem but currently not called that often (on latin)
+--
+-- setmetatable(utfchr, { __index = function(t,k) local v = utfchar(k) t[k] = v return v end } )
+--
+-- collectgarbage("collect")
+-- local u = collectgarbage("count")*1024
+-- local t = os.clock()
+-- for i=1,1000 do
+-- for i=1,600 do
+-- local a = utfchr[i]
+-- end
+-- end
+-- print(os.clock()-t,collectgarbage("count")*1024-u)
+
+-- collectgarbage("collect")
+-- local t = os.clock()
+-- for i=1,1000 do
+-- for i=1,600 do
+-- local a = utfchar(i)
+-- end
+-- end
+-- print(os.clock()-t,collectgarbage("count")*1024-u)
+
+-- local byte = string.byte
+-- local utfchar = utf.char
+
+local one = P(1)
+local two = C(1) * C(1)
+local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1)
+
+-- actually one of them is already utf ... sort of useless this one
+
+-- function utf.char(n)
+-- if n < 0x80 then
+-- return char(n)
+-- elseif n < 0x800 then
+-- return char(
+-- 0xC0 + floor(n/0x40),
+-- 0x80 + (n % 0x40)
+-- )
+-- elseif n < 0x10000 then
+-- return char(
+-- 0xE0 + floor(n/0x1000),
+-- 0x80 + (floor(n/0x40) % 0x40),
+-- 0x80 + (n % 0x40)
+-- )
+-- elseif n < 0x40000 then
+-- return char(
+-- 0xF0 + floor(n/0x40000),
+-- 0x80 + floor(n/0x1000),
+-- 0x80 + (floor(n/0x40) % 0x40),
+-- 0x80 + (n % 0x40)
+-- )
+-- else
+-- -- return char(
+-- -- 0xF1 + floor(n/0x1000000),
+-- -- 0x80 + floor(n/0x40000),
+-- -- 0x80 + floor(n/0x1000),
+-- -- 0x80 + (floor(n/0x40) % 0x40),
+-- -- 0x80 + (n % 0x40)
+-- -- )
+-- return "?"
+-- end
+-- end
+--
+-- merge into:
+
+local pattern = P("\254\255") * Cs( (
+ four / function(a,b,c,d)
+ local ab = 0xFF * byte(a) + byte(b)
+ local cd = 0xFF * byte(c) + byte(d)
+ return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
+ end
+ + two / function(a,b)
+ return utfchar(byte(a)*256 + byte(b))
+ end
+ + one
+ )^1 )
+ + P("\255\254") * Cs( (
+ four / function(b,a,d,c)
+ local ab = 0xFF * byte(a) + byte(b)
+ local cd = 0xFF * byte(c) + byte(d)
+ return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
+ end
+ + two / function(b,a)
+ return utfchar(byte(a)*256 + byte(b))
+ end
+ + one
+ )^1 )
+
+function string.toutf(s) -- in string namespace
+ return lpegmatch(pattern,s) or s -- todo: utf32
+end
+
+local validatedutf = Cs (
+ (
+ patterns.utf8one
+ + patterns.utf8two
+ + patterns.utf8three
+ + patterns.utf8four
+ + P(1) / "�"
+ )^0
+)
+
+patterns.validatedutf = validatedutf
+
+function utf.is_valid(str)
+ return type(str) == "string" and lpegmatch(validatedutf,str) or false
+end
+
+if not utf.len then
+
+ -- -- alternative 1: 0.77
+ --
+ -- local utfcharcounter = utfbom^-1 * Cs((p_utf8char/'!')^0)
+ --
+ -- function utf.len(str)
+ -- return #lpegmatch(utfcharcounter,str or "")
+ -- end
+ --
+ -- -- alternative 2: 1.70
+ --
+ -- local n = 0
+ --
+ -- local utfcharcounter = utfbom^-1 * (p_utf8char/function() n = n + 1 end)^0 -- slow
+ --
+ -- function utf.length(str)
+ -- n = 0
+ -- lpegmatch(utfcharcounter,str or "")
+ -- return n
+ -- end
+ --
+ -- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
+
+ -- local n = 0
+ --
+ -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( Cp() * (
+ -- -- patterns.utf8one ^1 * Cc(1)
+ -- -- + patterns.utf8two ^1 * Cc(2)
+ -- -- + patterns.utf8three^1 * Cc(3)
+ -- -- + patterns.utf8four ^1 * Cc(4) ) * Cp() / function(f,d,t) n = n + (t - f)/d end
+ -- -- )^0 ) -- just as many captures as below
+ --
+ -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( (
+ -- -- (Cmt(patterns.utf8one ^1,function(_,_,s) n = n + #s return true end))
+ -- -- + (Cmt(patterns.utf8two ^1,function(_,_,s) n = n + #s/2 return true end))
+ -- -- + (Cmt(patterns.utf8three^1,function(_,_,s) n = n + #s/3 return true end))
+ -- -- + (Cmt(patterns.utf8four ^1,function(_,_,s) n = n + #s/4 return true end))
+ -- -- )^0 ) -- not interesting as it creates strings but sometimes faster
+ --
+ -- -- The best so far:
+ --
+ -- local utfcharcounter = utfbom^-1 * P ( (
+ -- Cp() * (patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end
+ -- + Cp() * (patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
+ -- + Cp() * (patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
+ -- + Cp() * (patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
+ -- )^0 )
+
+ -- function utf.len(str)
+ -- n = 0
+ -- lpegmatch(utfcharcounter,str or "")
+ -- return n
+ -- end
+
+ local n, f = 0, 1
+
+ local utfcharcounter = patterns.utfbom^-1 * Cmt (
+ Cc(1) * patterns.utf8one ^1
+ + Cc(2) * patterns.utf8two ^1
+ + Cc(3) * patterns.utf8three^1
+ + Cc(4) * patterns.utf8four ^1,
+ function(_,t,d) -- due to Cc no string captures, so faster
+ n = n + (t - f)/d
+ f = t
+ return true
+ end
+ )^0
+
+ function utf.len(str)
+ n, f = 0, 1
+ lpegmatch(utfcharcounter,str or "")
+ return n
end
+
+ -- -- these are quite a bit slower:
+
+ -- utfcharcounter = utfbom^-1 * (Cmt(P(1) * R("\128\191")^0, function() n = n + 1 return true end))^0 -- 50+ times slower
+ -- utfcharcounter = utfbom^-1 * (Cmt(P(1), function() n = n + 1 return true end) * R("\128\191")^0)^0 -- 50- times slower
+
end
-function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg
- local result, tmp, n, m, p = { }, { }, 0, 0, 0
- -- lf | cr | crlf / (cr:13, lf:10)
- local function doit()
- if n == 10 then
- if p ~= 13 then
- result[#result+1] = concat(tmp)
- tmp = { }
- p = 0
- end
- elseif n == 13 then
- result[#result+1] = concat(tmp)
- tmp = { }
- p = n
+utf.length = utf.len
+
+if not utf.sub then
+
+ -- inefficient as lpeg just copies ^n
+
+ -- local function sub(str,start,stop)
+ -- local pattern = p_utf8char^-(start-1) * C(p_utf8char^-(stop-start+1))
+ -- inspect(pattern)
+ -- return lpegmatch(pattern,str) or ""
+ -- end
+
+ -- local b, e, n, first, last = 0, 0, 0, 0, 0
+ --
+ -- local function slide(s,p)
+ -- n = n + 1
+ -- if n == first then
+ -- b = p
+ -- if not last then
+ -- return nil
+ -- end
+ -- end
+ -- if n == last then
+ -- e = p
+ -- return nil
+ -- else
+ -- return p
+ -- end
+ -- end
+ --
+ -- local pattern = Cmt(p_utf8char,slide)^0
+ --
+ -- function utf.sub(str,start,stop) -- todo: from the end
+ -- if not start then
+ -- return str
+ -- end
+ -- b, e, n, first, last = 0, 0, 0, start, stop
+ -- lpegmatch(pattern,str)
+ -- if not stop then
+ -- return sub(str,b)
+ -- else
+ -- return sub(str,b,e-1)
+ -- end
+ -- end
+
+ -- print(utf.sub("Hans Hagen is my name"))
+ -- print(utf.sub("Hans Hagen is my name",5))
+ -- print(utf.sub("Hans Hagen is my name",5,10))
+
+ local utflength = utf.length
+
+ -- also negative indices, upto 10 times slower than a c variant
+
+ local b, e, n, first, last = 0, 0, 0, 0, 0
+
+ local function slide_zero(s,p)
+ n = n + 1
+ if n >= last then
+ e = p - 1
+ else
+ return p
+ end
+ end
+
+ local function slide_one(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ end
+ if n >= last then
+ e = p - 1
else
- tmp[#tmp+1] = utfchar(n)
- p = 0
+ return p
end
end
- for l,r in bytepairs(str) do
- if r then
- if endian then
- n = l*256 + r
+
+ local function slide_two(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ else
+ return true
+ end
+ end
+
+ local pattern_zero = Cmt(p_utf8char,slide_zero)^0
+ local pattern_one = Cmt(p_utf8char,slide_one )^0
+ local pattern_two = Cmt(p_utf8char,slide_two )^0
+
+ function utf.sub(str,start,stop)
+ if not start then
+ return str
+ end
+ if start == 0 then
+ start = 1
+ end
+ if not stop then
+ if start < 0 then
+ local l = utflength(str) -- we can inline this function if needed
+ start = l + start
else
- n = r*256 + l
+ start = start - 1
end
- if m > 0 then
- n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000
- m = 0
- doit()
- elseif n >= 0xD800 and n <= 0xDBFF then
- m = n
+ b, n, first = 0, 0, start
+ lpegmatch(pattern_two,str)
+ if n >= first then
+ return sub(str,b)
else
- doit()
+ return ""
+ end
+ end
+ if start < 0 or stop < 0 then
+ local l = utf.length(str)
+ if start < 0 then
+ start = l + start
+ if start <= 0 then
+ start = 1
+ else
+ start = start + 1
+ end
+ end
+ if stop < 0 then
+ stop = l + stop
+ if stop == 0 then
+ stop = 1
+ else
+ stop = stop + 1
+ end
end
end
+ if start > stop then
+ return ""
+ elseif start > 1 then
+ b, e, n, first, last = 0, 0, 0, start - 1, stop
+ lpegmatch(pattern_one,str)
+ if n >= first and e == 0 then
+ e = #str
+ end
+ return sub(str,b,e)
+ else
+ b, e, n, last = 1, 0, 0, stop
+ lpegmatch(pattern_zero,str)
+ if e == 0 then
+ e = #str
+ end
+ return sub(str,b,e)
+ end
end
- if #tmp > 0 then
- result[#result+1] = concat(tmp)
+
+ -- local n = 100000
+ -- local str = string.rep("123456àáâãäå",100)
+ --
+ -- for i=-15,15,1 do
+ -- for j=-15,15,1 do
+ -- if utf.xsub(str,i,j) ~= utf.sub(str,i,j) then
+ -- print("error",i,j,"l>"..utf.xsub(str,i,j),"s>"..utf.sub(str,i,j))
+ -- end
+ -- end
+ -- if utf.xsub(str,i) ~= utf.sub(str,i) then
+ -- print("error",i,"l>"..utf.xsub(str,i),"s>"..utf.sub(str,i))
+ -- end
+ -- end
+
+ -- print(" 1, 7",utf.xsub(str, 1, 7),utf.sub(str, 1, 7))
+ -- print(" 0, 7",utf.xsub(str, 0, 7),utf.sub(str, 0, 7))
+ -- print(" 0, 9",utf.xsub(str, 0, 9),utf.sub(str, 0, 9))
+ -- print(" 4 ",utf.xsub(str, 4 ),utf.sub(str, 4 ))
+ -- print(" 0 ",utf.xsub(str, 0 ),utf.sub(str, 0 ))
+ -- print(" 0, 0",utf.xsub(str, 0, 0),utf.sub(str, 0, 0))
+ -- print(" 4, 4",utf.xsub(str, 4, 4),utf.sub(str, 4, 4))
+ -- print(" 4, 0",utf.xsub(str, 4, 0),utf.sub(str, 4, 0))
+ -- print("-3, 0",utf.xsub(str,-3, 0),utf.sub(str,-3, 0))
+ -- print(" 0,-3",utf.xsub(str, 0,-3),utf.sub(str, 0,-3))
+ -- print(" 5,-3",utf.xsub(str,-5,-3),utf.sub(str,-5,-3))
+ -- print("-3 ",utf.xsub(str,-3 ),utf.sub(str,-3 ))
+
+end
+
+-- a replacement for simple gsubs:
+
+function utf.remapper(mapping)
+ local pattern = Cs((p_utf8char/mapping)^0)
+ return function(str)
+ if not str or str == "" then
+ return ""
+ else
+ return lpegmatch(pattern,str)
+ end
+ end, pattern
+end
+
+-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" }
+-- print(remap("abcd 1234 abcd"))
+
+--
+
+function utf.replacer(t) -- no precheck, always string builder
+ local r = replacer(t,false,false,true)
+ return function(str)
+ return lpegmatch(r,str)
end
- return result
end
-function unicode.utf32_to_utf8(str, endian)
- local result = { }
- local tmp, n, m, p = { }, 0, -1, 0
- -- lf | cr | crlf / (cr:13, lf:10)
- local function doit()
- if n == 10 then
- if p ~= 13 then
- result[#result+1] = concat(tmp)
- tmp = { }
- p = 0
- end
- elseif n == 13 then
- result[#result+1] = concat(tmp)
- tmp = { }
- p = n
+function utf.subtituter(t) -- with precheck and no building if no match
+ local f = finder (t)
+ local r = replacer(t,false,false,true)
+ return function(str)
+ local i = lpegmatch(f,str)
+ if not i then
+ return str
+ elseif i > #str then
+ return str
else
- tmp[#tmp+1] = utfchar(n)
- p = 0
+ -- return sub(str,1,i-2) .. lpegmatch(r,str,i-1) -- slower
+ return lpegmatch(r,str)
end
end
- for a,b in bytepairs(str) do
- if a and b then
- if m < 0 then
- if endian then
- m = a*256*256*256 + b*256*256
+end
+
+-- inspect(utf.split("a b c d"))
+-- inspect(utf.split("a b c d",true))
+
+local utflinesplitter = p_utfbom^-1 * lpeg.tsplitat(p_newline)
+local utfcharsplitter_ows = p_utfbom^-1 * Ct(C(p_utf8char)^0)
+local utfcharsplitter_iws = p_utfbom^-1 * Ct((p_whitespace^1 + C(p_utf8char))^0)
+local utfcharsplitter_raw = Ct(C(p_utf8char)^0)
+
+patterns.utflinesplitter = utflinesplitter
+
+function utf.splitlines(str)
+ return lpegmatch(utflinesplitter,str or "")
+end
+
+function utf.split(str,ignorewhitespace) -- new
+ if ignorewhitespace then
+ return lpegmatch(utfcharsplitter_iws,str or "")
+ else
+ return lpegmatch(utfcharsplitter_ows,str or "")
+ end
+end
+
+function utf.totable(str) -- keeps bom
+ return lpegmatch(utfcharsplitter_raw,str)
+end
+
+-- 0 EF BB BF UTF-8
+-- 1 FF FE UTF-16-little-endian
+-- 2 FE FF UTF-16-big-endian
+-- 3 FF FE 00 00 UTF-32-little-endian
+-- 4 00 00 FE FF UTF-32-big-endian
+--
+-- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated
+
+-- utf.name = {
+-- [0] = 'utf-8',
+-- [1] = 'utf-16-le',
+-- [2] = 'utf-16-be',
+-- [3] = 'utf-32-le',
+-- [4] = 'utf-32-be'
+-- }
+--
+-- function utf.magic(f)
+-- local str = f:read(4)
+-- if not str then
+-- f:seek('set')
+-- return 0
+-- -- elseif find(str,"^%z%z\254\255") then -- depricated
+-- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged
+-- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH)
+-- return 4
+-- -- elseif find(str,"^\255\254%z%z") then -- depricated
+-- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged
+-- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH)
+-- return 3
+-- elseif find(str,"^\254\255") then
+-- f:seek('set',2)
+-- return 2
+-- elseif find(str,"^\255\254") then
+-- f:seek('set',2)
+-- return 1
+-- elseif find(str,"^\239\187\191") then
+-- f:seek('set',3)
+-- return 0
+-- else
+-- f:seek('set')
+-- return 0
+-- end
+-- end
+
+function utf.magic(f) -- not used
+ local str = f:read(4) or ""
+ local off = lpegmatch(p_utfoffset,str)
+ if off < 4 then
+ f:seek('set',off)
+ end
+ return lpegmatch(p_utftype,str)
+end
+
+local function utf16_to_utf8_be(t)
+ if type(t) == "string" then
+ t = lpegmatch(utflinesplitter,t)
+ end
+ local result = { } -- we reuse result
+ for i=1,#t do
+ local r, more = 0, 0
+ for left, right in bytepairs(t[i]) do
+ if right then
+ local now = 256*left + right
+ if more > 0 then
+ now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+ more = 0
+ r = r + 1
+ result[r] = utfchar(now)
+ elseif now >= 0xD800 and now <= 0xDBFF then
+ more = now
else
- m = b*256 + a
+ r = r + 1
+ result[r] = utfchar(now)
end
- else
- if endian then
- n = m + a*256 + b
+ end
+ end
+ t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+ end
+ return t
+end
+
+local function utf16_to_utf8_le(t)
+ if type(t) == "string" then
+ t = lpegmatch(utflinesplitter,t)
+ end
+ local result = { } -- we reuse result
+ for i=1,#t do
+ local r, more = 0, 0
+ for left, right in bytepairs(t[i]) do
+ if right then
+ local now = 256*right + left
+ if more > 0 then
+ now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong
+ more = 0
+ r = r + 1
+ result[r] = utfchar(now)
+ elseif now >= 0xD800 and now <= 0xDBFF then
+ more = now
else
- n = m + b*256*256*256 + a*256*256
+ r = r + 1
+ result[r] = utfchar(now)
end
- m = -1
- doit()
end
- else
- break
end
+ t[i] = concat(result,"",1,r) -- we reused tmp, hence t
+ end
+ return t
+end
+
+local function utf32_to_utf8_be(t)
+ if type(t) == "string" then
+ t = lpegmatch(utflinesplitter,t)
+ end
+ local result = { } -- we reuse result
+ for i=1,#t do
+ local r, more = 0, -1
+ for a,b in bytepairs(t[i]) do
+ if a and b then
+ if more < 0 then
+ more = 256*256*256*a + 256*256*b
+ else
+ r = r + 1
+ result[t] = utfchar(more + 256*a + b)
+ more = -1
+ end
+ else
+ break
+ end
+ end
+ t[i] = concat(result,"",1,r)
end
- if #tmp > 0 then
- result[#result+1] = concat(tmp)
+ return t
+end
+
+local function utf32_to_utf8_le(t)
+ if type(t) == "string" then
+ t = lpegmatch(utflinesplitter,t)
end
- return result
+ local result = { } -- we reuse result
+ for i=1,#t do
+ local r, more = 0, -1
+ for a,b in bytepairs(t[i]) do
+ if a and b then
+ if more < 0 then
+ more = 256*b + a
+ else
+ r = r + 1
+ result[t] = utfchar(more + 256*256*256*b + 256*256*a)
+ more = -1
+ end
+ else
+ break
+ end
+ end
+ t[i] = concat(result,"",1,r)
+ end
+ return t
+end
+
+utf.utf32_to_utf8_be = utf32_to_utf8_be
+utf.utf32_to_utf8_le = utf32_to_utf8_le
+utf.utf16_to_utf8_be = utf16_to_utf8_be
+utf.utf16_to_utf8_le = utf16_to_utf8_le
+
+function utf.utf8_to_utf8(t)
+ return type(t) == "string" and lpegmatch(utflinesplitter,t) or t
+end
+
+function utf.utf16_to_utf8(t,endian)
+ return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t
+end
+
+function utf.utf32_to_utf8(t,endian)
+ return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t
end
local function little(c)
- local b = byte(c) -- b = c:byte()
+ local b = byte(c)
if b < 0x10000 then
return char(b%256,b/256)
else
@@ -190,10 +766,177 @@ local function big(c)
end
end
-function unicode.utf8_to_utf16(str,littleendian)
+-- function utf.utf8_to_utf16(str,littleendian)
+-- if littleendian then
+-- return char(255,254) .. utfgsub(str,".",little)
+-- else
+-- return char(254,255) .. utfgsub(str,".",big)
+-- end
+-- end
+
+local _, l_remap = utf.remapper(little)
+local _, b_remap = utf.remapper(big)
+
+function utf.utf8_to_utf16(str,littleendian)
if littleendian then
- return char(255,254) .. utfgsub(str,".",little)
+ return char(255,254) .. lpegmatch(l_remap,str)
else
- return char(254,255) .. utfgsub(str,".",big)
+ return char(254,255) .. lpegmatch(b_remap,str)
+ end
+end
+
+-- function utf.tocodes(str,separator) -- can be sped up with an lpeg
+-- local t, n = { }, 0
+-- for u in utfvalues(str) do
+-- n = n + 1
+-- t[n] = format("0x%04X",u)
+-- end
+-- return concat(t,separator or " ")
+-- end
+
+local pattern = Cs (
+ (p_utf8byte / function(unicode ) return format( "0x%04X", unicode) end) *
+ (p_utf8byte * Carg(1) / function(unicode,separator) return format("%s0x%04X",separator,unicode) end)^0
+)
+
+function utf.tocodes(str,separator)
+ return lpegmatch(pattern,str,1,separator or " ")
+end
+
+function utf.ustring(s)
+ return format("U+%05X",type(s) == "number" and s or utfbyte(s))
+end
+
+function utf.xstring(s)
+ return format("0x%05X",type(s) == "number" and s or utfbyte(s))
+end
+
+--
+
+local p_nany = p_utf8char / ""
+
+if utfgmatch then
+
+ function utf.count(str,what)
+ if type(what) == "string" then
+ local n = 0
+ for _ in utfgmatch(str,what) do
+ n = n + 1
+ end
+ return n
+ else -- 4 times slower but still faster than / function
+ return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str)
+ end
+ end
+
+else
+
+ local cache = { }
+
+ function utf.count(str,what)
+ if type(what) == "string" then
+ local p = cache[what]
+ if not p then
+ p = Cs((P(what)/" " + p_nany)^0)
+ cache[p] = p
+ end
+ return #lpegmatch(p,str)
+ else -- 4 times slower but still faster than / function
+ return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str)
+ end
+ end
+
+end
+
+-- maybe also register as string.utf*
+
+
+if not utf.characters then
+
+ -- New: this gmatch hack is taken from the Lua 5.2 book. It's about two times slower
+ -- than the built-in string.utfcharacters.
+
+ function utf.characters(str)
+ return gmatch(str,".[\128-\191]*")
+ end
+
+ string.utfcharacters = utf.characters
+
+end
+
+if not utf.values then
+
+ -- So, a logical next step is to check for the values variant. It over five times
+ -- slower than the built-in string.utfvalues. I optimized it a bit for n=0,1.
+
+ ----- wrap, yield, gmatch = coroutine.wrap, coroutine.yield, string.gmatch
+ local find = string.find
+
+ local dummy = function()
+ -- we share this one
end
+
+ -- function utf.values(str)
+ -- local n = #str
+ -- if n == 0 then
+ -- return wrap(dummy)
+ -- elseif n == 1 then
+ -- return wrap(function() yield(utfbyte(str)) end)
+ -- else
+ -- return wrap(function() for s in gmatch(str,".[\128-\191]*") do
+ -- yield(utfbyte(s))
+ -- end end)
+ -- end
+ -- end
+ --
+ -- faster:
+
+ function utf.values(str)
+ local n = #str
+ if n == 0 then
+ return dummy
+ elseif n == 1 then
+ return function() return utfbyte(str) end
+ else
+ local p = 1
+ -- local n = #str
+ return function()
+ -- if p <= n then -- slower than the last find
+ local b, e = find(str,".[\128-\191]*",p)
+ if b then
+ p = e + 1
+ return utfbyte(sub(str,b,e))
+ end
+ -- end
+ end
+ end
+ end
+
+ -- slower:
+ --
+ -- local pattern = C(patterns.utf8character) * Cp()
+ -- ----- pattern = patterns.utf8character/utfbyte * Cp()
+ -- ----- pattern = patterns.utf8byte * Cp()
+ --
+ -- function utf.values(str) -- one of the cases where a find is faster than an lpeg
+ -- local n = #str
+ -- if n == 0 then
+ -- return dummy
+ -- elseif n == 1 then
+ -- return function() return utfbyte(str) end
+ -- else
+ -- local p = 1
+ -- return function()
+ -- local s, e = lpegmatch(pattern,str,p)
+ -- if e then
+ -- p = e
+ -- return utfbyte(s)
+ -- -- return s
+ -- end
+ -- end
+ -- end
+ -- end
+
+ string.utfvalues = utf.values
+
end
diff --git a/lualibs-url.lua b/lualibs-url.lua
index e3e6f81..4624a05 100644
--- a/lualibs-url.lua
+++ b/lualibs-url.lua
@@ -6,101 +6,292 @@ if not modules then modules = { } end modules ['l-url'] = {
license = "see context related readme files"
}
-local char, gmatch, gsub = string.char, string.gmatch, string.gsub
+local char, format, byte = string.char, string.format, string.byte
+local concat = table.concat
local tonumber, type = tonumber, type
-local lpegmatch = lpeg.match
+local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V
+local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer
--- from the spec (on the web):
+-- from wikipedia:
--
--- foo://example.com:8042/over/there?name=ferret#nose
--- \_/ \______________/\_________/ \_________/ \__/
--- | | | | |
--- scheme authority path query fragment
--- | _____________________|__
--- / \ / \
--- urn:example:animal:ferret:nose
-
-url = url or { }
-
-local function tochar(s)
- return char(tonumber(s,16))
-end
+-- foo://username:password@example.com:8042/over/there/index.dtb?type=animal;name=narwhal#nose
+-- \_/ \_______________/ \_________/ \__/ \___/ \_/ \______________________/ \__/
+-- | | | | | | | |
+-- | userinfo hostname port | | query fragment
+-- | \________________________________/\_____________|____|/
+-- scheme | | | |
+-- | authority path | |
+-- | | |
+-- | path interpretable as filename
+-- | ___________|____________ |
+-- / \ / \ |
+-- urn:example:animal:ferret:nose interpretable as extension
+
+url = url or { }
+local url = url
+
+local tochar = function(s) return char(tonumber(s,16)) end
+
+local colon = P(":")
+local qmark = P("?")
+local hash = P("#")
+local slash = P("/")
+local percent = P("%")
+local endofstring = P(-1)
-local colon, qmark, hash, slash, percent, endofstring = lpeg.P(":"), lpeg.P("?"), lpeg.P("#"), lpeg.P("/"), lpeg.P("%"), lpeg.P(-1)
+local hexdigit = R("09","AF","af")
+local plus = P("+")
+local nothing = Cc("")
+local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar
+local escaped = (plus / " ") + escapedchar
-local hexdigit = lpeg.R("09","AF","af")
-local plus = lpeg.P("+")
-local escaped = (plus / " ") + (percent * lpeg.C(hexdigit * hexdigit) / tochar)
+local noslash = P("/") / ""
-- we assume schemes with more than 1 character (in order to avoid problems with windows disks)
+-- we also assume that when we have a scheme, we also have an authority
+--
+-- maybe we should already split the query (better for unescaping as = & can be part of a value
+
+local schemestr = Cs((escaped+(1-colon-slash-qmark-hash))^2)
+local authoritystr = Cs((escaped+(1- slash-qmark-hash))^0)
+local pathstr = Cs((escaped+(1- qmark-hash))^0)
+----- querystr = Cs((escaped+(1- hash))^0)
+local querystr = Cs(( (1- hash))^0)
+local fragmentstr = Cs((escaped+(1- endofstring))^0)
+
+local scheme = schemestr * colon + nothing
+local authority = slash * slash * authoritystr + nothing
+local path = slash * pathstr + nothing
+local query = qmark * querystr + nothing
+local fragment = hash * fragmentstr + nothing
+
+local validurl = scheme * authority * path * query * fragment
+local parser = Ct(validurl)
-local scheme = lpeg.Cs((escaped+(1-colon-slash-qmark-hash))^2) * colon + lpeg.Cc("")
-local authority = slash * slash * lpeg.Cs((escaped+(1- slash-qmark-hash))^0) + lpeg.Cc("")
-local path = slash * lpeg.Cs((escaped+(1- qmark-hash))^0) + lpeg.Cc("")
-local query = qmark * lpeg.Cs((escaped+(1- hash))^0) + lpeg.Cc("")
-local fragment = hash * lpeg.Cs((escaped+(1- endofstring))^0) + lpeg.Cc("")
+lpegpatterns.url = validurl
+lpegpatterns.urlsplitter = parser
-local parser = lpeg.Ct(scheme * authority * path * query * fragment)
+local escapes = { }
+
+setmetatable(escapes, { __index = function(t,k)
+ local v = format("%%%02X",byte(k))
+ t[k] = v
+ return v
+end })
+
+local escaper = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most
+local unescaper = Cs((escapedchar + 1)^0)
+
+lpegpatterns.urlunescaped = escapedchar
+lpegpatterns.urlescaper = escaper
+lpegpatterns.urlunescaper = unescaper
-- todo: reconsider Ct as we can as well have five return values (saves a table)
-- so we can have two parsers, one with and one without
-function url.split(str)
+local function split(str)
return (type(str) == "string" and lpegmatch(parser,str)) or str
end
--- todo: cache them
+local isscheme = schemestr * colon * slash * slash -- this test also assumes authority
-function url.hashed(str)
- local s = url.split(str)
- local somescheme = s[1] ~= ""
- return {
- scheme = (somescheme and s[1]) or "file",
- authority = s[2],
- path = s[3],
- query = s[4],
- fragment = s[5],
- original = str,
- noscheme = not somescheme,
- }
+local function hasscheme(str)
+ if str then
+ local scheme = lpegmatch(isscheme,str) -- at least one character
+ return scheme ~= "" and scheme or false
+ else
+ return false
+ end
end
-function url.hasscheme(str)
- return url.split(str)[1] ~= ""
+--~ print(hasscheme("home:"))
+--~ print(hasscheme("home://"))
+
+-- todo: cache them
+
+local rootletter = R("az","AZ")
+ + S("_-+")
+local separator = P("://")
+local qualified = P(".")^0 * P("/")
+ + rootletter * P(":")
+ + rootletter^1 * separator
+ + rootletter^1 * P("/")
+local rootbased = P("/")
+ + rootletter * P(":")
+
+local barswapper = replacer("|",":")
+local backslashswapper = replacer("\\","/")
+
+-- queries:
+
+local equal = P("=")
+local amp = P("&")
+local key = Cs(((escapedchar+1)-equal )^0)
+local value = Cs(((escapedchar+1)-amp -endofstring)^0)
+
+local splitquery = Cf ( Ct("") * P { "sequence",
+ sequence = V("pair") * (amp * V("pair"))^0,
+ pair = Cg(key * equal * value),
+}, rawset)
+
+-- hasher
+
+local function hashed(str) -- not yet ok (/test?test)
+ if str == "" then
+ return {
+ scheme = "invalid",
+ original = str,
+ }
+ end
+ local s = split(str)
+ local rawscheme = s[1]
+ local rawquery = s[4]
+ local somescheme = rawscheme ~= ""
+ local somequery = rawquery ~= ""
+ if not somescheme and not somequery then
+ s = {
+ scheme = "file",
+ authority = "",
+ path = str,
+ query = "",
+ fragment = "",
+ original = str,
+ noscheme = true,
+ filename = str,
+ }
+ else -- not always a filename but handy anyway
+ local authority, path, filename = s[2], s[3]
+ if authority == "" then
+ filename = path
+ elseif path == "" then
+ filename = ""
+ else
+ filename = authority .. "/" .. path
+ end
+ s = {
+ scheme = rawscheme,
+ authority = authority,
+ path = path,
+ query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and =
+ queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped
+ fragment = s[5],
+ original = str,
+ noscheme = false,
+ filename = filename,
+ }
+ end
+ return s
end
-function url.addscheme(str,scheme)
- return (url.hasscheme(str) and str) or ((scheme or "file:///") .. str)
+-- inspect(hashed("template://test"))
+
+-- Here we assume:
+--
+-- files: /// = relative
+-- files: //// = absolute (!)
+
+--~ table.print(hashed("file://c:/opt/tex/texmf-local")) -- c:/opt/tex/texmf-local
+--~ table.print(hashed("file://opt/tex/texmf-local" )) -- opt/tex/texmf-local
+--~ table.print(hashed("file:///opt/tex/texmf-local" )) -- opt/tex/texmf-local
+--~ table.print(hashed("file:////opt/tex/texmf-local" )) -- /opt/tex/texmf-local
+--~ table.print(hashed("file:///./opt/tex/texmf-local" )) -- ./opt/tex/texmf-local
+
+--~ table.print(hashed("c:/opt/tex/texmf-local" )) -- c:/opt/tex/texmf-local
+--~ table.print(hashed("opt/tex/texmf-local" )) -- opt/tex/texmf-local
+--~ table.print(hashed("/opt/tex/texmf-local" )) -- /opt/tex/texmf-local
+
+url.split = split
+url.hasscheme = hasscheme
+url.hashed = hashed
+
+function url.addscheme(str,scheme) -- no authority
+ if hasscheme(str) then
+ return str
+ elseif not scheme then
+ return "file:///" .. str
+ else
+ return scheme .. ":///" .. str
+ end
end
-function url.construct(hash)
- local fullurl = hash.sheme .. "://".. hash.authority .. hash.path
- if hash.query then
- fullurl = fullurl .. "?".. hash.query
+function url.construct(hash) -- dodo: we need to escape !
+ local fullurl, f = { }, 0
+ local scheme, authority, path, query, fragment = hash.scheme, hash.authority, hash.path, hash.query, hash.fragment
+ if scheme and scheme ~= "" then
+ f = f + 1 ; fullurl[f] = scheme .. "://"
+ end
+ if authority and authority ~= "" then
+ f = f + 1 ; fullurl[f] = authority
end
- if hash.fragment then
- fullurl = fullurl .. "?".. hash.fragment
+ if path and path ~= "" then
+ f = f + 1 ; fullurl[f] = "/" .. path
end
- return fullurl
+ if query and query ~= "" then
+ f = f + 1 ; fullurl[f] = "?".. query
+ end
+ if fragment and fragment ~= "" then
+ f = f + 1 ; fullurl[f] = "#".. fragment
+ end
+ return lpegmatch(escaper,concat(fullurl))
end
+local pattern = Cs(noslash * R("az","AZ") * (S(":|")/":") * noslash * P(1)^0)
+
function url.filename(filename)
- local t = url.hashed(filename)
- return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename
+ local spec = hashed(filename)
+ local path = spec.path
+ return (spec.scheme == "file" and path and lpegmatch(pattern,path)) or filename
+end
+
+-- print(url.filename("/c|/test"))
+-- print(url.filename("/c/test"))
+
+local function escapestring(str)
+ return lpegmatch(escaper,str)
end
+url.escape = escapestring
+
function url.query(str)
if type(str) == "string" then
- local t = { }
- for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do
- t[k] = v
- end
- return t
+ return lpegmatch(splitquery,str) or ""
else
return str
end
end
+function url.toquery(data)
+ local td = type(data)
+ if td == "string" then
+ return #str and escape(data) or nil -- beware of double escaping
+ elseif td == "table" then
+ if next(data) then
+ local t = { }
+ for k, v in next, data do
+ t[#t+1] = format("%s=%s",k,escapestring(v))
+ end
+ return concat(t,"&")
+ end
+ else
+ -- nil is a signal that no query
+ end
+end
+
+-- /test/ | /test | test/ | test => test
+
+local pattern = Cs(noslash^0 * (1 - noslash * P(-1))^0)
+
+function url.barepath(path)
+ if not path or path == "" then
+ return ""
+ else
+ return lpegmatch(pattern,path)
+ end
+end
+
+-- print(url.barepath("/test"),url.barepath("test/"),url.barepath("/test/"),url.barepath("test"))
+-- print(url.barepath("/x/yz"),url.barepath("x/yz/"),url.barepath("/x/yz/"),url.barepath("x/yz"))
+
--~ print(url.filename("file:///c:/oeps.txt"))
--~ print(url.filename("c:/oeps.txt"))
--~ print(url.filename("file:///oeps.txt"))
@@ -108,12 +299,30 @@ end
--~ print(url.filename("/oeps.txt"))
--~ from the spec on the web (sort of):
---~
---~ function test(str)
---~ print(table.serialize(url.hashed(str)))
+
+--~ local function test(str)
+--~ local t = url.hashed(str)
+--~ t.constructed = url.construct(t)
+--~ print(table.serialize(t))
--~ end
---~
---~ test("%56pass%20words")
+
+--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45"))
+--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45"))
+
+--~ test("sys:///./colo-rgb")
+
+--~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg")
+--~ test("file:///M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("file:///q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+--~ test("/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733")
+
+--~ test("file:///cow%20with%20spaces")
+--~ test("file:///cow%20with%20spaces.pdf")
+--~ test("cow%20with%20spaces.pdf")
+--~ test("some%20file")
+--~ test("/etc/passwords")
+--~ test("http://www.myself.com/some%20words.html")
--~ test("file:///c:/oeps.txt")
--~ test("file:///c|/oeps.txt")
--~ test("file:///etc/oeps.txt")
@@ -127,8 +336,9 @@ end
--~ test("tel:+1-816-555-1212")
--~ test("telnet://192.0.2.16:80/")
--~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2")
---~ test("/etc/passwords")
--~ test("http://www.pragma-ade.com/spaced%20name")
--~ test("zip:///oeps/oeps.zip#bla/bla.tex")
--~ test("zip:///oeps/oeps.zip?bla/bla.tex")
+
+--~ table.print(url.hashed("/test?test"))
diff --git a/lualibs-dimen.lua b/lualibs-util-dim.lua
index da5ab14..47b2706 100644
--- a/lualibs-dimen.lua
+++ b/lualibs-util-dim.lua
@@ -1,4 +1,4 @@
-if not modules then modules = { } end modules ['l-dimen'] = {
+if not modules then modules = { } end modules ['util-dim'] = {
version = 1.001,
comment = "support for dimensions",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -16,14 +16,21 @@ table.</p>
--ldx]]--
local format, match, gsub, type, setmetatable = string.format, string.match, string.gsub, type, setmetatable
-local P, S, R, Cc, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.match
+local P, S, R, Cc, C, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.C, lpeg.match
+
+local allocate = utilities.storage.allocate
+local setmetatableindex = table.setmetatableindex
+local formatters = string.formatters
+
+--this might become another namespace
number = number or { }
+local number = number
number.tonumberf = function(n) return match(format("%.20f",n),"(.-0?)0*$") end -- one zero too much but alas
number.tonumberg = function(n) return format("%.20g",n) end
-local dimenfactors = {
+local dimenfactors = allocate {
["pt"] = 1/65536,
["in"] = ( 100/ 7227)/65536,
["cm"] = ( 254/ 7227)/65536,
@@ -80,14 +87,18 @@ local dimenfactors = {
format (string) is implemented using this table.</p>
--ldx]]--
--- was:
-local function todimen(n,unit,fmt)
+local function numbertodimen(n,unit,fmt)
if type(n) == 'string' then
return n
else
unit = unit or 'pt'
- return format(fmt or "%s%s",n*dimenfactors[unit],unit)
+ if not fmt then
+ fmt = "%s%s"
+ elseif fmt == true then
+ fmt = "%0.5f%s"
+ end
+ return format(fmt,n*dimenfactors[unit],unit)
-- if fmt then
-- return format(fmt,n*dimenfactors[unit],unit)
-- else
@@ -101,21 +112,21 @@ end
--ldx]]--
number.maxdimen = 1073741823
-number.todimen = todimen
+number.todimen = numbertodimen
number.dimenfactors = dimenfactors
-function number.topoints (n) return todimen(n,"pt") end
-function number.toinches (n) return todimen(n,"in") end
-function number.tocentimeters (n) return todimen(n,"cm") end
-function number.tomillimeters (n) return todimen(n,"mm") end
-function number.toscaledpoints(n) return todimen(n,"sp") end
-function number.toscaledpoints(n) return n .. "sp" end
-function number.tobasepoints (n) return todimen(n,"bp") end
-function number.topicas (n) return todimen(n "pc") end
-function number.todidots (n) return todimen(n,"dd") end
-function number.tociceros (n) return todimen(n,"cc") end
-function number.tonewdidots (n) return todimen(n,"nd") end
-function number.tonewciceros (n) return todimen(n,"nc") end
+function number.topoints (n,fmt) return numbertodimen(n,"pt",fmt) end
+function number.toinches (n,fmt) return numbertodimen(n,"in",fmt) end
+function number.tocentimeters (n,fmt) return numbertodimen(n,"cm",fmt) end
+function number.tomillimeters (n,fmt) return numbertodimen(n,"mm",fmt) end
+function number.toscaledpoints(n,fmt) return numbertodimen(n,"sp",fmt) end
+function number.toscaledpoints(n) return n .. "sp" end
+function number.tobasepoints (n,fmt) return numbertodimen(n,"bp",fmt) end
+function number.topicas (n,fmt) return numbertodimen(n "pc",fmt) end
+function number.todidots (n,fmt) return numbertodimen(n,"dd",fmt) end
+function number.tociceros (n,fmt) return numbertodimen(n,"cc",fmt) end
+function number.tonewdidots (n,fmt) return numbertodimen(n,"nd",fmt) end
+function number.tonewciceros (n,fmt) return numbertodimen(n,"nc",fmt) end
--[[ldx--
<p>More interesting it to implement a (sort of) dimen datatype, one
@@ -132,27 +143,40 @@ local dimenpair = amount/tonumber * (unit^1/dimenfactors + Cc(1)) -- tonumber is
lpeg.patterns.dimenpair = dimenpair
+local splitter = amount/tonumber * C(unit^1)
+
+function number.splitdimen(str)
+ return lpegmatch(splitter,str)
+end
+
--[[ldx--
<p>We use a metatable to intercept errors. When no key is found in
the table with factors, the metatable will be consulted for an
alternative index function.</p>
--ldx]]--
-local mt = { } setmetatable(dimenfactors,mt)
-
-mt.__index = function(t,s)
+setmetatableindex(dimenfactors, function(t,s)
-- error("wrong dimension: " .. (s or "?")) -- better a message
return false
-end
+end)
-function string:todimen()
- if type(self) == "number" then
- return self
- else
- local value, unit = lpegmatch(dimenpair,self)
- return value/unit
- end
-end
+--[[ldx--
+<p>We redefine the following function later on, so we comment it
+here (which saves us bytecodes.</p>
+--ldx]]--
+
+-- function string.todimen(str)
+-- if type(str) == "number" then
+-- return str
+-- else
+-- local value, unit = lpegmatch(dimenpair,str)
+-- return value/unit
+-- end
+-- end
+--
+-- local stringtodimen = string.todimen
+
+local stringtodimen -- assigned later (commenting saves bytecode)
local amount = S("+-")^0 * R("09")^0 * S(".,")^0 * R("09")^0
local unit = P("pt") + P("cm") + P("mm") + P("sp") + P("bp") + P("in") +
@@ -160,7 +184,7 @@ local unit = P("pt") + P("cm") + P("mm") + P("sp") + P("bp") + P("in") +
local validdimen = amount * unit
-lpeg.patterns.validdimen = pattern
+lpeg.patterns.validdimen = validdimen
--[[ldx--
<p>This converter accepts calls like:</p>
@@ -174,12 +198,6 @@ string.todimen("10pt")
string.todimen("10.0pt")
</typing>
-<p>And of course the often more efficient:</p>
-
-<typing>
-somestring:todimen("12.3cm")
-</typing>
-
<p>With this in place, we can now implement a proper datatype for dimensions, one
that permits us to do this:</p>
@@ -197,28 +215,28 @@ local dimensions = { }
<p>The main (and globally) visible representation of a dimen is defined next: it is
a one-element table. The unit that is returned from the match is normally a number
(one of the previously defined factors) but we also accept functions. Later we will
-see why.</p>
+see why. This function is redefined later.</p>
--ldx]]--
-function dimen(a)
- if a then
- local ta= type(a)
- if ta == "string" then
- local value, unit = lpegmatch(pattern,a)
- if type(unit) == "function" then
- k = value/unit()
- else
- k = value/unit
- end
- a = k
- elseif ta == "table" then
- a = a[1]
- end
- return setmetatable({ a }, dimensions)
- else
- return setmetatable({ 0 }, dimensions)
- end
-end
+-- function dimen(a)
+-- if a then
+-- local ta= type(a)
+-- if ta == "string" then
+-- local value, unit = lpegmatch(pattern,a)
+-- if type(unit) == "function" then
+-- k = value/unit()
+-- else
+-- k = value/unit
+-- end
+-- a = k
+-- elseif ta == "table" then
+-- a = a[1]
+-- end
+-- return setmetatable({ a }, dimensions)
+-- else
+-- return setmetatable({ 0 }, dimensions)
+-- end
+-- end
--[[ldx--
<p>This function return a small hash with a metatable attached. It is
@@ -228,35 +246,35 @@ shared some of the code but for reasons of speed we don't.</p>
function dimensions.__add(a, b)
local ta, tb = type(a), type(b)
- if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end
- if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end
+ if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end
+ if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end
return setmetatable({ a + b }, dimensions)
end
function dimensions.__sub(a, b)
local ta, tb = type(a), type(b)
- if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end
- if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end
+ if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end
+ if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end
return setmetatable({ a - b }, dimensions)
end
function dimensions.__mul(a, b)
local ta, tb = type(a), type(b)
- if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end
- if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end
+ if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end
+ if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end
return setmetatable({ a * b }, dimensions)
end
function dimensions.__div(a, b)
local ta, tb = type(a), type(b)
- if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end
- if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end
+ if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end
+ if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end
return setmetatable({ a / b }, dimensions)
end
function dimensions.__unm(a)
local ta = type(a)
- if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end
+ if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end
return setmetatable({ - a }, dimensions)
end
@@ -321,23 +339,9 @@ is loaded, the relevant tables that hold the functions needed may not
yet be available.</p>
--ldx]]--
-function dimensions.texify() -- todo: %
- local fti, fc = fonts and fonts.ids and fonts.ids, font and font.current
- if fti and fc then
- dimenfactors["ex"] = function() return fti[fc()].ex_height end
- dimenfactors["em"] = function() return fti[fc()].quad end
- else
- dimenfactors["ex"] = 1/65536* 4 -- 4pt
- dimenfactors["em"] = 1/65536*10 -- 10pt
- end
-end
-
---[[ldx--
-<p>In order to set the defaults we call this function now. At some point
-the macro package needs to make sure the function is called again.</p>
---ldx]]--
-
-dimensions.texify()
+ dimenfactors["ex"] = 4 * 1/65536 -- 4pt
+ dimenfactors["em"] = 10 * 1/65536 -- 10pt
+-- dimenfactors["%"] = 4 * 1/65536 -- 400pt/100
--[[ldx--
<p>The previous code is rather efficient (also thanks to <l n='lpeg'/>) but we
@@ -389,27 +393,40 @@ function dimen(a)
end
end
-function string:todimen()
- if type(self) == "number" then
- return self
+function string.todimen(str) -- maybe use tex.sp when available
+ if type(str) == "number" then
+ return str
else
- local k = known[self]
+ local k = known[str]
if not k then
- local value, unit = lpegmatch(dimenpair,self)
+ local value, unit = lpegmatch(dimenpair,str)
if value and unit then
- k = value/unit
+ k = value/unit -- to be considered: round
else
k = 0
end
- -- print(self,value,unit)
- known[self] = k
+ -- print(str,value,unit)
+ known[str] = k
end
return k
end
end
+--~ local known = { }
+
+--~ function string.todimen(str) -- maybe use tex.sp
+--~ local k = known[str]
+--~ if not k then
+--~ k = tex.sp(str)
+--~ known[str] = k
+--~ end
+--~ return k
+--~ end
+
+stringtodimen = string.todimen -- local variable defined earlier
+
function number.toscaled(d)
- return format("0.5f",d/2^16)
+ return format("%0.5f",d/2^16)
end
--[[ldx--
@@ -421,12 +438,12 @@ probably use a hash instead of a one-element table.</p>
<p>Goodie:s</p>
--ldx]]--
-function number.percent(n) -- will be cleaned up once luatex 0.30 is out
- local hsize = tex.hsize
- if type(hsize) == "string" then
- hsize = hsize:todimen()
+function number.percent(n,d) -- will be cleaned up once luatex 0.30 is out
+ d = d or tex.hsize
+ if type(d) == "string" then
+ d = stringtodimen(d)
end
- return (n/100) * hsize
+ return (n/100) * d
end
number["%"] = number.percent
diff --git a/lualibs-util-jsn.lua b/lualibs-util-jsn.lua
new file mode 100644
index 0000000..7493f10
--- /dev/null
+++ b/lualibs-util-jsn.lua
@@ -0,0 +1,145 @@
+if not modules then modules = { } end modules ['util-jsn'] = {
+ version = 1.001,
+ comment = "companion to m-json.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- Of course we could make a nice complete parser with proper error messages but
+-- as json is generated programmatically errors are systematic and we can assume
+-- a correct stream. If not, we have some fatal error anyway. So, we can just rely
+-- on strings being strings (apart from the unicode escape which is not in 5.1) and
+-- as we first catch known types we just assume that anything else is a number.
+
+local P, V, R, S, C, Cc, Cs, Ct, Cf, Cg = lpeg.P, lpeg.V, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cf, lpeg.Cg
+local lpegmatch = lpeg.match
+local format = string.format
+local utfchar = utf.char
+local concat = table.concat
+
+local tonumber, tostring, rawset, type = tonumber, tostring, rawset, type
+
+local json = utilities.json or { }
+utilities.json = json
+
+-- moduledata = moduledata or { }
+-- moduledata.json = json
+
+-- \\ \/ \b \f \n \r \t \uHHHH
+
+local lbrace = P("{")
+local rbrace = P("}")
+local lparent = P("[")
+local rparent = P("]")
+local comma = P(",")
+local colon = P(":")
+local dquote = P('"')
+
+local whitespace = lpeg.patterns.whitespace
+local optionalws = whitespace^0
+
+local escape = C(P("\\u") / "0x" * S("09","AF","af")) / function(s) return utfchar(tonumber(s)) end
+local jstring = dquote * Cs((escape + (1-dquote))^0) * dquote
+local jtrue = P("true") * Cc(true)
+local jfalse = P("false") * Cc(false)
+local jnull = P("null") * Cc(nil)
+local jnumber = (1-whitespace-rparent-rbrace-comma)^1 / tonumber
+
+local key = jstring
+
+local jsonconverter = { "value",
+ object = lbrace * Cf(Ct("") * V("pair") * (comma * V("pair"))^0,rawset) * rbrace,
+ pair = Cg(optionalws * key * optionalws * colon * V("value")),
+ array = Ct(lparent * V("value") * (comma * V("value"))^0 * rparent),
+ value = optionalws * (jstring + V("object") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws,
+}
+
+-- local jsonconverter = { "value",
+-- object = lbrace * Cf(Ct("") * V("pair") * (comma * V("pair"))^0,rawset) * rbrace,
+-- pair = Cg(optionalws * V("string") * optionalws * colon * V("value")),
+-- array = Ct(lparent * V("value") * (comma * V("value"))^0 * rparent),
+-- string = jstring,
+-- value = optionalws * (V("string") + V("object") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws,
+-- }
+
+-- lpeg.print(jsonconverter) -- size 181
+
+function json.tolua(str)
+ return lpegmatch(jsonconverter,str)
+end
+
+local function tojson(value,t) -- we could optimize #t
+ local kind = type(value)
+ if kind == "table" then
+ local done = false
+ local size = #value
+ if size == 0 then
+ for k, v in next, value do
+ if done then
+ t[#t+1] = ","
+ else
+ t[#t+1] = "{"
+ done = true
+ end
+ t[#t+1] = format("%q:",k)
+ tojson(v,t)
+ end
+ if done then
+ t[#t+1] = "}"
+ else
+ t[#t+1] = "{}"
+ end
+ elseif size == 1 then
+ -- we can optimize for non tables
+ t[#t+1] = "["
+ tojson(value[1],t)
+ t[#t+1] = "]"
+ else
+ for i=1,size do
+ if done then
+ t[#t+1] = ","
+ else
+ t[#t+1] = "["
+ done = true
+ end
+ tojson(value[i],t)
+ end
+ t[#t+1] = "]"
+ end
+ elseif kind == "string" then
+ t[#t+1] = format("%q",value)
+ elseif kind == "number" then
+ t[#t+1] = value
+ elseif kind == "boolean" then
+ t[#t+1] = tostring(value)
+ end
+ return t
+end
+
+function json.tostring(value)
+ -- todo optimize for non table
+ local kind = type(value)
+ if kind == "table" then
+ return concat(tojson(value,{}),"")
+ elseif kind == "string" or kind == "number" then
+ return value
+ else
+ return tostring(value)
+ end
+end
+
+-- local tmp = [[ { "a" : true, "b" : [ 123 , 456E-10, { "a" : true, "b" : [ 123 , 456 ] } ] } ]]
+
+-- tmp = json.tolua(tmp)
+-- inspect(tmp)
+-- tmp = json.tostring(tmp)
+-- inspect(tmp)
+-- tmp = json.tolua(tmp)
+-- inspect(tmp)
+-- tmp = json.tostring(tmp)
+-- inspect(tmp)
+
+-- inspect(json.tostring(true))
+
+return json
diff --git a/lualibs-util-lua.lua b/lualibs-util-lua.lua
new file mode 100644
index 0000000..f3be9dc
--- /dev/null
+++ b/lualibs-util-lua.lua
@@ -0,0 +1,351 @@
+if not modules then modules = { } end modules ['util-lua'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ comment = "the strip code is written by Peter Cawley",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- we will remove the 5.1 code some day soon
+
+local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format
+local load, loadfile, type = load, loadfile, type
+
+utilities = utilities or {}
+utilities.lua = utilities.lua or { }
+local luautilities = utilities.lua
+
+local report_lua = logs.reporter("system","lua")
+
+local tracestripping = false
+local forcestupidcompile = true -- use internal bytecode compiler
+luautilities.stripcode = true -- support stripping when asked for
+luautilities.alwaysstripcode = false -- saves 1 meg on 7 meg compressed format file (2012.08.12)
+luautilities.nofstrippedchunks = 0
+luautilities.nofstrippedbytes = 0
+local strippedchunks = { } -- allocate()
+luautilities.strippedchunks = strippedchunks
+
+luautilities.suffixes = {
+ tma = "tma",
+ tmc = jit and "tmb" or "tmc",
+ lua = "lua",
+ luc = jit and "lub" or "luc",
+ lui = "lui",
+ luv = "luv",
+ luj = "luj",
+ tua = "tua",
+ tuc = "tuc",
+}
+
+-- environment.loadpreprocessedfile can be set to a preprocessor
+
+if jit or status.luatex_version >= 74 then
+
+ local function register(name)
+ if tracestripping then
+ report_lua("stripped bytecode from %a",name or "unknown")
+ end
+ strippedchunks[#strippedchunks+1] = name
+ luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1
+ end
+
+ local function stupidcompile(luafile,lucfile,strip)
+ local code = io.loaddata(luafile)
+ if code and code ~= "" then
+ code = load(code)
+ if code then
+ code = dump(code,strip and luautilities.stripcode or luautilities.alwaysstripcode)
+ if code and code ~= "" then
+ register(name)
+ io.savedata(lucfile,code)
+ return true, 0
+ end
+ else
+ report_lua("fatal error %a in file %a",1,luafile)
+ end
+ else
+ report_lua("fatal error %a in file %a",2,luafile)
+ end
+ return false, 0
+ end
+
+ -- quite subtle ... doing this wrong incidentally can give more bytes
+
+ function luautilities.loadedluacode(fullname,forcestrip,name)
+ -- quite subtle ... doing this wrong incidentally can give more bytes
+ name = name or fullname
+ local code = environment.loadpreprocessedfile and environment.loadpreprocessedfile(fullname) or loadfile(fullname)
+ if code then
+ code()
+ end
+ if forcestrip and luautilities.stripcode then
+ if type(forcestrip) == "function" then
+ forcestrip = forcestrip(fullname)
+ end
+ if forcestrip or luautilities.alwaysstripcode then
+ register(name)
+ return load(dump(code,true)), 0
+ else
+ return code, 0
+ end
+ elseif luautilities.alwaysstripcode then
+ register(name)
+ return load(dump(code,true)), 0
+ else
+ return code, 0
+ end
+ end
+
+ function luautilities.strippedloadstring(code,forcestrip,name) -- not executed
+ if forcestrip and luautilities.stripcode or luautilities.alwaysstripcode then
+ code = load(code)
+ if not code then
+ report_lua("fatal error %a in file %a",3,name)
+ end
+ register(name)
+ code = dump(code,true)
+ end
+ return load(code), 0
+ end
+
+ function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true
+ report_lua("compiling %a into %a",luafile,lucfile)
+ os.remove(lucfile)
+ local done = stupidcompile(luafile,lucfile,strip ~= false)
+ if done then
+ report_lua("dumping %a into %a stripped",luafile,lucfile)
+ if cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then
+ report_lua("removing %a",luafile)
+ os.remove(luafile)
+ end
+ end
+ return done
+ end
+
+ function luautilities.loadstripped(...)
+ local l = load(...)
+ if l then
+ return load(dump(l,true))
+ end
+ end
+
+else
+
+ -- The next function was posted by Peter Cawley on the lua list and strips line
+ -- number information etc. from the bytecode data blob. We only apply this trick
+ -- when we store data tables. Stripping makes the compressed format file about
+ -- 1MB smaller (and uncompressed we save at least 6MB).
+ --
+ -- You can consider this feature an experiment, so it might disappear. There is
+ -- no noticeable gain in runtime although the memory footprint should be somewhat
+ -- smaller (and the file system has a bit less to deal with).
+ --
+ -- Begin of borrowed code ... works for Lua 5.1 which LuaTeX currently uses ...
+
+ local function register(name,before,after)
+ local delta = before - after
+ if tracestripping then
+ report_lua("bytecodes stripped from %a, # before %s, # after %s, delta %s",name,before,after,delta)
+ end
+ strippedchunks[#strippedchunks+1] = name
+ luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1
+ luautilities.nofstrippedbytes = luautilities.nofstrippedbytes + delta
+ return delta
+ end
+
+ local strip_code_pc
+
+ if _MAJORVERSION == 5 and _MINORVERSION == 1 then
+
+ strip_code_pc = function(dump,name)
+ local before = #dump
+ local version, format, endian, int, size, ins, num = byte(dump,5,11)
+ local subint
+ if endian == 1 then
+ subint = function(dump, i, l)
+ local val = 0
+ for n = l, 1, -1 do
+ val = val * 256 + byte(dump,i + n - 1)
+ end
+ return val, i + l
+ end
+ else
+ subint = function(dump, i, l)
+ local val = 0
+ for n = 1, l, 1 do
+ val = val * 256 + byte(dump,i + n - 1)
+ end
+ return val, i + l
+ end
+ end
+ local strip_function
+ strip_function = function(dump)
+ local count, offset = subint(dump, 1, size)
+ local stripped, dirty = rep("\0", size), offset + count
+ offset = offset + count + int * 2 + 4
+ offset = offset + int + subint(dump, offset, int) * ins
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ local t
+ t, offset = subint(dump, offset, 1)
+ if t == 1 then
+ offset = offset + 1
+ elseif t == 4 then
+ offset = offset + size + subint(dump, offset, size)
+ elseif t == 3 then
+ offset = offset + num
+ end
+ end
+ count, offset = subint(dump, offset, int)
+ stripped = stripped .. sub(dump,dirty, offset - 1)
+ for n = 1, count do
+ local proto, off = strip_function(sub(dump,offset, -1))
+ stripped, offset = stripped .. proto, offset + off - 1
+ end
+ offset = offset + subint(dump, offset, int) * int + int
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ offset = offset + subint(dump, offset, size) + size + int * 2
+ end
+ count, offset = subint(dump, offset, int)
+ for n = 1, count do
+ offset = offset + subint(dump, offset, size) + size
+ end
+ stripped = stripped .. rep("\0", int * 3)
+ return stripped, offset
+ end
+ dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1))
+ local after = #dump
+ local delta = register(name,before,after)
+ return dump, delta
+ end
+
+ else
+
+ strip_code_pc = function(dump,name)
+ return dump, 0
+ end
+
+ end
+
+ -- ... end of borrowed code.
+
+ -- quite subtle ... doing this wrong incidentally can give more bytes
+
+ function luautilities.loadedluacode(fullname,forcestrip,name)
+ -- quite subtle ... doing this wrong incidentally can give more bytes
+ local code = environment.loadpreprocessedfile and environment.preprocessedloadfile(fullname) or loadfile(fullname)
+ if code then
+ code()
+ end
+ if forcestrip and luautilities.stripcode then
+ if type(forcestrip) == "function" then
+ forcestrip = forcestrip(fullname)
+ end
+ if forcestrip then
+ local code, n = strip_code_pc(dump(code),name)
+ return load(code), n
+ elseif luautilities.alwaysstripcode then
+ return load(strip_code_pc(dump(code),name))
+ else
+ return code, 0
+ end
+ elseif luautilities.alwaysstripcode then
+ return load(strip_code_pc(dump(code),name))
+ else
+ return code, 0
+ end
+ end
+
+ function luautilities.strippedloadstring(code,forcestrip,name) -- not executed
+ local n = 0
+ if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then
+ code = load(code)
+ if not code then
+ report_lua("fatal error in file %a",name)
+ end
+ code, n = strip_code_pc(dump(code),name)
+ end
+ return load(code), n
+ end
+
+ local function stupidcompile(luafile,lucfile,strip)
+ local code = io.loaddata(luafile)
+ local n = 0
+ if code and code ~= "" then
+ code = load(code)
+ if not code then
+ report_lua("fatal error in file %a",luafile)
+ end
+ code = dump(code)
+ if strip then
+ code, n = strip_code_pc(code,luautilities.stripcode or luautilities.alwaysstripcode,luafile) -- last one is reported
+ end
+ if code and code ~= "" then
+ io.savedata(lucfile,code)
+ end
+ end
+ return n
+ end
+
+ local luac_normal = "texluac -o %q %q"
+ local luac_strip = "texluac -s -o %q %q"
+
+ function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true
+ report_lua("compiling %a into %a",luafile,lucfile)
+ os.remove(lucfile)
+ local done = false
+ if strip ~= false then
+ strip = true
+ end
+ if forcestupidcompile then
+ fallback = true
+ elseif strip then
+ done = os.spawn(format(luac_strip, lucfile,luafile)) == 0
+ else
+ done = os.spawn(format(luac_normal,lucfile,luafile)) == 0
+ end
+ if not done and fallback then
+ local n = stupidcompile(luafile,lucfile,strip)
+ if n > 0 then
+ report_lua("%a dumped into %a (%i bytes stripped)",luafile,lucfile,n)
+ else
+ report_lua("%a dumped into %a (unstripped)",luafile,lucfile)
+ end
+ cleanup = false -- better see how bad it is
+ done = true -- hm
+ end
+ if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then
+ report_lua("removing %a",luafile)
+ os.remove(luafile)
+ end
+ return done
+ end
+
+ luautilities.loadstripped = loadstring
+
+end
+
+-- local getmetatable, type = getmetatable, type
+--
+-- local types = { }
+--
+-- function luautilities.registerdatatype(d,name)
+-- types[getmetatable(d)] = name
+-- end
+--
+-- function luautilities.datatype(d)
+-- local t = type(d)
+-- if t == "userdata" then
+-- local m = getmetatable(d)
+-- return m and types[m] or "userdata"
+-- else
+-- return t
+-- end
+-- end
+--
+-- luautilities.registerdatatype(lpeg.P("!"),"lpeg")
+--
+-- print(luautilities.datatype(lpeg.P("oeps")))
diff --git a/lualibs-util-mrg.lua b/lualibs-util-mrg.lua
new file mode 100644
index 0000000..78b23dc
--- /dev/null
+++ b/lualibs-util-mrg.lua
@@ -0,0 +1,221 @@
+if not modules then modules = { } end modules ['util-mrg'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- hm, quite unreadable
+
+local gsub, format = string.gsub, string.format
+local concat = table.concat
+local type, next = type, next
+
+local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt, Cb, Cg = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt, lpeg.Cb, lpeg.Cg
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
+
+utilities = utilities or { }
+local merger = utilities.merger or { }
+utilities.merger = merger
+merger.strip_comment = true
+
+local report = logs.reporter("system","merge")
+utilities.report = report
+
+local m_begin_merge = "begin library merge"
+local m_end_merge = "end library merge"
+local m_begin_closure = "do -- create closure to overcome 200 locals limit"
+local m_end_closure = "end -- of closure"
+
+local m_pattern =
+ "%c+" ..
+ "%-%-%s+" .. m_begin_merge ..
+ "%c+(.-)%c+" ..
+ "%-%-%s+" .. m_end_merge ..
+ "%c+"
+
+local m_format =
+ "\n\n-- " .. m_begin_merge ..
+ "\n%s\n" ..
+ "-- " .. m_end_merge .. "\n\n"
+
+local m_faked =
+ "-- " .. "created merged file" .. "\n\n" ..
+ "-- " .. m_begin_merge .. "\n\n" ..
+ "-- " .. m_end_merge .. "\n\n"
+
+local m_report = [[
+-- used libraries : %s
+-- skipped libraries : %s
+-- original bytes : %s
+-- stripped bytes : %s
+]]
+
+local m_preloaded = [[package.loaded[%q] = package.loaded[%q] or true]]
+
+local function self_fake()
+ return m_faked
+end
+
+local function self_nothing()
+ return ""
+end
+
+local function self_load(name)
+ local data = io.loaddata(name) or ""
+ if data == "" then
+ report("unknown file %a",name)
+ else
+ report("inserting file %a",name)
+ end
+ return data or ""
+end
+
+-- -- saves some 20K .. scite comments
+-- data = gsub(data,"%-%-~[^\n\r]*[\r\n]","")
+-- -- saves some 20K .. ldx comments
+-- data = gsub(data,"%-%-%[%[ldx%-%-.-%-%-ldx%]%]%-%-","")
+
+local space = patterns.space
+local eol = patterns.newline
+local equals = P("=")^0
+local open = P("[") * Cg(equals,"init") * P("[") * P("\n")^-1
+local close = P("]") * C(equals) * P("]")
+local closeeq = Cmt(close * Cb("init"), function(s,i,a,b) return a == b end)
+local longstring = open * (1 - closeeq)^0 * close
+
+local quoted = patterns.quoted
+local emptyline = space^0 * eol
+local operator1 = P("<=") + P(">=") + P("~=") + P("..") + S("/^<>=*+%%")
+local operator2 = S("*+/")
+local operator3 = S("-")
+local separator = S(",;")
+
+local ignore = (P("]") * space^1 * P("=") * space^1 * P("]")) / "]=[" +
+ (P("=") * space^1 * P("{")) / "={" +
+ (P("(") * space^1) / "(" +
+ (P("{") * (space+eol)^1 * P("}")) / "{}"
+local strings = quoted -- / function (s) print("<<"..s..">>") return s end
+local longcmt = (emptyline^0 * P("--") * longstring * emptyline^0) / ""
+local longstr = longstring
+local comment = emptyline^0 * P("--") * P("-")^0 * (1-eol)^0 * emptyline^1 / "\n"
+local pack = ((eol+space)^0 / "") * operator1 * ((eol+space)^0 / "") +
+ ((eol+space)^0 / "") * operator2 * ((space)^0 / "") +
+ ((eol+space)^1 / "") * operator3 * ((space)^1 / "") +
+ ((space)^0 / "") * separator * ((space)^0 / "")
+local lines = emptyline^2 / "\n"
+local spaces = (space * space) / " "
+----- spaces = ((space+eol)^1 ) / " "
+
+local compact = Cs ( (
+ ignore +
+ strings +
+ longcmt +
+ longstr +
+ comment +
+ pack +
+ lines +
+ spaces +
+ 1
+)^1 )
+
+local strip = Cs((emptyline^2/"\n" + 1)^0)
+local stripreturn = Cs((1-P("return") * space^1 * P(1-space-eol)^1 * (space+eol)^0 * P(-1))^1)
+
+function merger.compact(data)
+ return lpegmatch(strip,lpegmatch(compact,data))
+end
+
+local function self_compact(data)
+ local delta = 0
+ if merger.strip_comment then
+ local before = #data
+ data = lpegmatch(compact,data)
+ data = lpegmatch(strip,data) -- also strips in longstrings ... alas
+ -- data = string.strip(data)
+ local after = #data
+ delta = before - after
+ report("original size %s, compacted to %s, stripped %s",before,after,delta)
+ data = format("-- original size: %s, stripped down to: %s\n\n%s",before,after,data)
+ end
+ return lpegmatch(stripreturn,data) or data, delta
+end
+
+local function self_save(name, data)
+ if data ~= "" then
+ io.savedata(name,data)
+ report("saving %s with size %s",name,#data)
+ end
+end
+
+local function self_swap(data,code)
+ return data ~= "" and (gsub(data,m_pattern, function() return format(m_format,code) end, 1)) or ""
+end
+
+local function self_libs(libs,list)
+ local result, f, frozen, foundpath = { }, nil, false, nil
+ result[#result+1] = "\n"
+ if type(libs) == 'string' then libs = { libs } end
+ if type(list) == 'string' then list = { list } end
+ for i=1,#libs do
+ local lib = libs[i]
+ for j=1,#list do
+ local pth = gsub(list[j],"\\","/") -- file.clean_path
+ report("checking library path %a",pth)
+ local name = pth .. "/" .. lib
+ if lfs.isfile(name) then
+ foundpath = pth
+ end
+ end
+ if foundpath then break end
+ end
+ if foundpath then
+ report("using library path %a",foundpath)
+ local right, wrong, original, stripped = { }, { }, 0, 0
+ for i=1,#libs do
+ local lib = libs[i]
+ local fullname = foundpath .. "/" .. lib
+ if lfs.isfile(fullname) then
+ report("using library %a",fullname)
+ local preloaded = file.nameonly(lib)
+ local data = io.loaddata(fullname,true)
+ original = original + #data
+ local data, delta = self_compact(data)
+ right[#right+1] = lib
+ result[#result+1] = m_begin_closure
+ result[#result+1] = format(m_preloaded,preloaded,preloaded)
+ result[#result+1] = data
+ result[#result+1] = m_end_closure
+ stripped = stripped + delta
+ else
+ report("skipping library %a",fullname)
+ wrong[#wrong+1] = lib
+ end
+ end
+ right = #right > 0 and concat(right," ") or "-"
+ wrong = #wrong > 0 and concat(wrong," ") or "-"
+ report("used libraries: %a",right)
+ report("skipped libraries: %a",wrong)
+ report("original bytes: %a",original)
+ report("stripped bytes: %a",stripped)
+ result[#result+1] = format(m_report,right,wrong,original,stripped)
+ else
+ report("no valid library path found")
+ end
+ return concat(result, "\n\n")
+end
+
+function merger.selfcreate(libs,list,target)
+ if target then
+ self_save(target,self_swap(self_fake(),self_libs(libs,list)))
+ end
+end
+
+function merger.selfmerge(name,libs,list,target)
+ self_save(target or name,self_swap(self_load(name),self_libs(libs,list)))
+end
+
+function merger.selfclean(name)
+ self_save(name,self_swap(self_load(name),self_nothing()))
+end
diff --git a/lualibs-util-sto.lua b/lualibs-util-sto.lua
new file mode 100644
index 0000000..191d6cd
--- /dev/null
+++ b/lualibs-util-sto.lua
@@ -0,0 +1,189 @@
+if not modules then modules = { } end modules ['util-sto'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+local setmetatable, getmetatable, type = setmetatable, getmetatable, type
+
+utilities = utilities or { }
+utilities.storage = utilities.storage or { }
+local storage = utilities.storage
+
+function storage.mark(t)
+ if not t then
+ print("\nfatal error: storage cannot be marked\n")
+ os.exit()
+ return
+ end
+ local m = getmetatable(t)
+ if not m then
+ m = { }
+ setmetatable(t,m)
+ end
+ m.__storage__ = true
+ return t
+end
+
+function storage.allocate(t)
+ t = t or { }
+ local m = getmetatable(t)
+ if not m then
+ m = { }
+ setmetatable(t,m)
+ end
+ m.__storage__ = true
+ return t
+end
+
+function storage.marked(t)
+ local m = getmetatable(t)
+ return m and m.__storage__
+end
+
+function storage.checked(t)
+ if not t then
+ report("\nfatal error: storage has not been allocated\n")
+ os.exit()
+ return
+ end
+ return t
+end
+
+-- function utilities.storage.delay(parent,name,filename)
+-- local m = getmetatable(parent)
+-- m.__list[name] = filename
+-- end
+--
+-- function utilities.storage.predefine(parent)
+-- local list = { }
+-- local m = getmetatable(parent) or {
+-- __list = list,
+-- __index = function(t,k)
+-- local l = require(list[k])
+-- t[k] = l
+-- return l
+-- end
+-- }
+-- setmetatable(parent,m)
+-- end
+--
+-- bla = { }
+-- utilities.storage.predefine(bla)
+-- utilities.storage.delay(bla,"test","oepsoeps")
+-- local t = bla.test
+-- table.print(t)
+-- print(t.a)
+
+function storage.setinitializer(data,initialize)
+ local m = getmetatable(data) or { }
+ m.__index = function(data,k)
+ m.__index = nil -- so that we can access the entries during initializing
+ initialize()
+ return data[k]
+ end
+ setmetatable(data, m)
+end
+
+local keyisvalue = { __index = function(t,k)
+ t[k] = k
+ return k
+end }
+
+function storage.sparse(t)
+ t = t or { }
+ setmetatable(t,keyisvalue)
+ return t
+end
+
+-- table namespace ?
+
+local function f_empty () return "" end -- t,k
+local function f_self (t,k) t[k] = k return k end
+local function f_table (t,k) local v = { } t[k] = v return v end
+local function f_ignore() end -- t,k,v
+
+local t_empty = { __index = f_empty }
+local t_self = { __index = f_self }
+local t_table = { __index = f_table }
+local t_ignore = { __newindex = f_ignore }
+
+function table.setmetatableindex(t,f)
+ if type(t) ~= "table" then
+ f, t = t, { }
+ end
+ local m = getmetatable(t)
+ if m then
+ if f == "empty" then
+ m.__index = f_empty
+ elseif f == "key" then
+ m.__index = f_self
+ elseif f == "table" then
+ m.__index = f_table
+ else
+ m.__index = f
+ end
+ else
+ if f == "empty" then
+ setmetatable(t, t_empty)
+ elseif f == "key" then
+ setmetatable(t, t_self)
+ elseif f == "table" then
+ setmetatable(t, t_table)
+ else
+ setmetatable(t,{ __index = f })
+ end
+ end
+ return t
+end
+
+function table.setmetatablenewindex(t,f)
+ if type(t) ~= "table" then
+ f, t = t, { }
+ end
+ local m = getmetatable(t)
+ if m then
+ if f == "ignore" then
+ m.__newindex = f_ignore
+ else
+ m.__newindex = f
+ end
+ else
+ if f == "ignore" then
+ setmetatable(t, t_ignore)
+ else
+ setmetatable(t,{ __newindex = f })
+ end
+ end
+ return t
+end
+
+function table.setmetatablecall(t,f)
+ if type(t) ~= "table" then
+ f, t = t, { }
+ end
+ local m = getmetatable(t)
+ if m then
+ m.__call = f
+ else
+ setmetatable(t,{ __call = f })
+ end
+ return t
+end
+
+function table.setmetatablekey(t,key,value)
+ local m = getmetatable(t)
+ if not m then
+ m = { }
+ setmetatable(t,m)
+ end
+ m[key] = value
+ return t
+end
+
+function table.getmetatablekey(t,key,value)
+ local m = getmetatable(t)
+ return m and m[key]
+end
diff --git a/lualibs-util-str.lua b/lualibs-util-str.lua
new file mode 100644
index 0000000..4890a11
--- /dev/null
+++ b/lualibs-util-str.lua
@@ -0,0 +1,766 @@
+if not modules then modules = { } end modules ['util-str'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+utilities = utilities or {}
+utilities.strings = utilities.strings or { }
+local strings = utilities.strings
+
+local format, gsub, rep, sub = string.format, string.gsub, string.rep, string.sub
+local load, dump = load, string.dump
+local tonumber, type, tostring = tonumber, type, tostring
+local unpack, concat = table.unpack, table.concat
+local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc
+local patterns, lpegmatch = lpeg.patterns, lpeg.match
+local utfchar, utfbyte = utf.char, utf.byte
+----- loadstripped = utilities.lua.loadstripped
+----- setmetatableindex = table.setmetatableindex
+
+local loadstripped = _LUAVERSION < 5.2 and load or function(str)
+ return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load
+end
+
+-- todo: make a special namespace for the formatter
+
+if not number then number = { } end -- temp hack for luatex-fonts
+
+local stripper = patterns.stripzeros
+
+local function points(n)
+ return (not n or n == 0) and "0pt" or lpegmatch(stripper,format("%.5fpt",n/65536))
+end
+
+local function basepoints(n)
+ return (not n or n == 0) and "0bp" or lpegmatch(stripper,format("%.5fbp", n*(7200/7227)/65536))
+end
+
+number.points = points
+number.basepoints = basepoints
+
+-- str = " \n \ntest \n test\ntest "
+-- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]")
+
+local rubish = patterns.spaceortab^0 * patterns.newline
+local anyrubish = patterns.spaceortab + patterns.newline
+local anything = patterns.anything
+local stripped = (patterns.spaceortab^1 / "") * patterns.newline
+local leading = rubish^0 / ""
+local trailing = (anyrubish^1 * patterns.endofstring) / ""
+local redundant = rubish^3 / "\n"
+
+local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0)
+
+function strings.collapsecrlf(str)
+ return lpegmatch(pattern,str)
+end
+
+-- The following functions might end up in another namespace.
+
+local repeaters = { } -- watch how we also moved the -1 in depth-1 to the creator
+
+function strings.newrepeater(str,offset)
+ offset = offset or 0
+ local s = repeaters[str]
+ if not s then
+ s = { }
+ repeaters[str] = s
+ end
+ local t = s[offset]
+ if t then
+ return t
+ end
+ t = { }
+ setmetatable(t, { __index = function(t,k)
+ if not k then
+ return ""
+ end
+ local n = k + offset
+ local s = n > 0 and rep(str,n) or ""
+ t[k] = s
+ return s
+ end })
+ s[offset] = t
+ return t
+end
+
+-- local dashes = strings.newrepeater("--",-1)
+-- print(dashes[2],dashes[3],dashes[1])
+
+local extra, tab, start = 0, 0, 4, 0
+
+local nspaces = strings.newrepeater(" ")
+
+string.nspaces = nspaces
+
+local pattern =
+ Carg(1) / function(t)
+ extra, tab, start = 0, t or 7, 1
+ end
+ * Cs((
+ Cp() * patterns.tab / function(position)
+ local current = (position - start + 1) + extra
+ local spaces = tab-(current-1) % tab
+ if spaces > 0 then
+ extra = extra + spaces - 1
+ return nspaces[spaces] -- rep(" ",spaces)
+ else
+ return ""
+ end
+ end
+ + patterns.newline * Cp() / function(position)
+ extra, start = 0, position
+ end
+ + patterns.anything
+ )^1)
+
+function strings.tabtospace(str,tab)
+ return lpegmatch(pattern,str,1,tab or 7)
+end
+
+-- local t = {
+-- "1234567123456712345671234567",
+-- "\tb\tc",
+-- "a\tb\tc",
+-- "aa\tbb\tcc",
+-- "aaa\tbbb\tccc",
+-- "aaaa\tbbbb\tcccc",
+-- "aaaaa\tbbbbb\tccccc",
+-- "aaaaaa\tbbbbbb\tcccccc\n aaaaaa\tbbbbbb\tcccccc",
+-- "one\n two\nxxx three\nxx four\nx five\nsix",
+-- }
+-- for k=1,#t do
+-- print(strings.tabtospace(t[k]))
+-- end
+
+function strings.striplong(str) -- strips all leading spaces
+ str = gsub(str,"^%s*","")
+ str = gsub(str,"[\n\r]+ *","\n")
+ return str
+end
+
+-- local template = string.striplong([[
+-- aaaa
+-- bb
+-- cccccc
+-- ]])
+
+function strings.nice(str)
+ str = gsub(str,"[:%-+_]+"," ") -- maybe more
+ return str
+end
+
+-- Work in progress. Interesting is that compared to the built-in this is faster in
+-- luatex than in luajittex where we have a comparable speed. It only makes sense
+-- to use the formatter when a (somewhat) complex format is used a lot. Each formatter
+-- is a function so there is some overhead and not all formatted output is worth that
+-- overhead. Keep in mind that there is an extra function call involved. In principle
+-- we end up with a string concatination so one could inline such a sequence but often
+-- at the cost of less readabinity. So, it's a sort of (visual) compromise. Of course
+-- there is the benefit of more variants. (Concerning the speed: a simple format like
+-- %05fpt is better off with format than with a formatter, but as soon as you put
+-- something in front formatters become faster. Passing the pt as extra argument makes
+-- formatters behave better. Of course this is rather implementation dependent. Also,
+-- when a specific format is only used a few times the overhead in creating it is not
+-- compensated by speed.)
+--
+-- More info can be found in cld-mkiv.pdf so here I stick to a simple list.
+--
+-- integer %...i number
+-- integer %...d number
+-- unsigned %...u number
+-- character %...c number
+-- hexadecimal %...x number
+-- HEXADECIMAL %...X number
+-- octal %...o number
+-- string %...s string number
+-- float %...f number
+-- exponential %...e number
+-- exponential %...E number
+-- autofloat %...g number
+-- autofloat %...G number
+-- utf character %...c number
+-- force tostring %...S any
+-- force tostring %Q any
+-- force tonumber %N number (strip leading zeros)
+-- signed number %I number
+-- rounded number %r number
+-- 0xhexadecimal %...h character number
+-- 0xHEXADECIMAL %...H character number
+-- U+hexadecimal %...u character number
+-- U+HEXADECIMAL %...U character number
+-- points %p number (scaled points)
+-- basepoints %b number (scaled points)
+-- table concat %...t table
+-- serialize %...T sequenced (no nested tables)
+-- boolean (logic) %l boolean
+-- BOOLEAN %L boolean
+-- whitespace %...w
+-- automatic %...a 'whatever' (string, table, ...)
+-- automatic %...a "whatever" (string, table, ...)
+
+local n = 0
+
+-- we are somewhat sloppy in parsing prefixes as it's not that critical
+
+-- hard to avoid but we can collect them in a private namespace if needed
+
+-- inline the next two makes no sense as we only use this in logging
+
+local sequenced = table.sequenced
+
+function string.autodouble(s,sep)
+ if s == nil then
+ return '""'
+ end
+ local t = type(s)
+ if t == "number" then
+ return tostring(s) -- tostring not really needed
+ end
+ if t == "table" then
+ return ('"' .. sequenced(s,sep or ",") .. '"')
+ end
+ return ('"' .. tostring(s) .. '"')
+end
+
+function string.autosingle(s,sep)
+ if s == nil then
+ return "''"
+ end
+ local t = type(s)
+ if t == "number" then
+ return tostring(s) -- tostring not really needed
+ end
+ if t == "table" then
+ return ("'" .. sequenced(s,sep or ",") .. "'")
+ end
+ return ("'" .. tostring(s) .. "'")
+end
+
+local tracedchars = { }
+string.tracedchars = tracedchars
+strings.tracers = tracedchars
+
+function string.tracedchar(b)
+ -- todo: table
+ if type(b) == "number" then
+ return tracedchars[b] or (utfchar(b) .. " (U+" .. format('%05X',b) .. ")")
+ else
+ local c = utfbyte(b)
+ return tracedchars[c] or (b .. " (U+" .. format('%05X',c) .. ")")
+ end
+end
+
+function number.signed(i)
+ if i > 0 then
+ return "+", i
+ else
+ return "-", -i
+ end
+end
+
+local preamble = [[
+local type = type
+local tostring = tostring
+local tonumber = tonumber
+local format = string.format
+local concat = table.concat
+local signed = number.signed
+local points = number.points
+local basepoints = number.basepoints
+local utfchar = utf.char
+local utfbyte = utf.byte
+local lpegmatch = lpeg.match
+local nspaces = string.nspaces
+local tracedchar = string.tracedchar
+local autosingle = string.autosingle
+local autodouble = string.autodouble
+local sequenced = table.sequenced
+]]
+
+local template = [[
+%s
+%s
+return function(%s) return %s end
+]]
+
+local arguments = { "a1" } -- faster than previously used (select(n,...))
+
+setmetatable(arguments, { __index =
+ function(t,k)
+ local v = t[k-1] .. ",a" .. k
+ t[k] = v
+ return v
+ end
+})
+
+local prefix_any = C((S("+- .") + R("09"))^0)
+local prefix_tab = C((1-R("az","AZ","09","%%"))^0)
+
+-- we've split all cases as then we can optimize them (let's omit the fuzzy u)
+
+-- todo: replace outer formats in next by ..
+
+local format_s = function(f)
+ n = n + 1
+ if f and f ~= "" then
+ return format("format('%%%ss',a%s)",f,n)
+ else -- best no tostring in order to stay compatible (.. does a selective tostring too)
+ return format("(a%s or '')",n) -- goodie: nil check
+ end
+end
+
+local format_S = function(f) -- can be optimized
+ n = n + 1
+ if f and f ~= "" then
+ return format("format('%%%ss',tostring(a%s))",f,n)
+ else
+ return format("tostring(a%s)",n)
+ end
+end
+
+local format_q = function()
+ n = n + 1
+ return format("(a%s and format('%%q',a%s) or '')",n,n) -- goodie: nil check (maybe separate lpeg, not faster)
+end
+
+local format_Q = function() -- can be optimized
+ n = n + 1
+ return format("format('%%q',tostring(a%s))",n)
+end
+
+local format_i = function(f)
+ n = n + 1
+ if f and f ~= "" then
+ return format("format('%%%si',a%s)",f,n)
+ else
+ return format("a%s",n)
+ end
+end
+
+local format_d = format_i
+
+local format_I = function(f)
+ n = n + 1
+ return format("format('%%s%%%si',signed(a%s))",f,n)
+end
+
+local format_f = function(f)
+ n = n + 1
+ return format("format('%%%sf',a%s)",f,n)
+end
+
+local format_g = function(f)
+ n = n + 1
+ return format("format('%%%sg',a%s)",f,n)
+end
+
+local format_G = function(f)
+ n = n + 1
+ return format("format('%%%sG',a%s)",f,n)
+end
+
+local format_e = function(f)
+ n = n + 1
+ return format("format('%%%se',a%s)",f,n)
+end
+
+local format_E = function(f)
+ n = n + 1
+ return format("format('%%%sE',a%s)",f,n)
+end
+
+local format_x = function(f)
+ n = n + 1
+ return format("format('%%%sx',a%s)",f,n)
+end
+
+local format_X = function(f)
+ n = n + 1
+ return format("format('%%%sX',a%s)",f,n)
+end
+
+local format_o = function(f)
+ n = n + 1
+ return format("format('%%%so',a%s)",f,n)
+end
+
+local format_c = function()
+ n = n + 1
+ return format("utfchar(a%s)",n)
+end
+
+local format_C = function()
+ n = n + 1
+ return format("tracedchar(a%s)",n)
+end
+
+local format_r = function(f)
+ n = n + 1
+ return format("format('%%%s.0f',a%s)",f,n)
+end
+
+local format_h = function(f)
+ n = n + 1
+ if f == "-" then
+ f = sub(f,2)
+ return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
+ else
+ return format("format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
+ end
+end
+
+local format_H = function(f)
+ n = n + 1
+ if f == "-" then
+ f = sub(f,2)
+ return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
+ else
+ return format("format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
+ end
+end
+
+local format_u = function(f)
+ n = n + 1
+ if f == "-" then
+ f = sub(f,2)
+ return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
+ else
+ return format("format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
+ end
+end
+
+local format_U = function(f)
+ n = n + 1
+ if f == "-" then
+ f = sub(f,2)
+ return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
+ else
+ return format("format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n)
+ end
+end
+
+local format_p = function()
+ n = n + 1
+ return format("points(a%s)",n)
+end
+
+local format_b = function()
+ n = n + 1
+ return format("basepoints(a%s)",n)
+end
+
+local format_t = function(f)
+ n = n + 1
+ if f and f ~= "" then
+ return format("concat(a%s,%q)",n,f)
+ else
+ return format("concat(a%s)",n)
+ end
+end
+
+local format_T = function(f)
+ n = n + 1
+ if f and f ~= "" then
+ return format("sequenced(a%s,%q)",n,f)
+ else
+ return format("sequenced(a%s)",n)
+ end
+end
+
+local format_l = function()
+ n = n + 1
+ return format("(a%s and 'true' or 'false')",n)
+end
+
+local format_L = function()
+ n = n + 1
+ return format("(a%s and 'TRUE' or 'FALSE')",n)
+end
+
+local format_N = function() -- strips leading zeros
+ n = n + 1
+ return format("tostring(tonumber(a%s) or a%s)",n,n)
+end
+
+local format_a = function(f)
+ n = n + 1
+ if f and f ~= "" then
+ return format("autosingle(a%s,%q)",n,f)
+ else
+ return format("autosingle(a%s)",n)
+ end
+end
+
+local format_A = function(f)
+ n = n + 1
+ if f and f ~= "" then
+ return format("autodouble(a%s,%q)",n,f)
+ else
+ return format("autodouble(a%s)",n)
+ end
+end
+
+local format_w = function(f) -- handy when doing depth related indent
+ n = n + 1
+ f = tonumber(f)
+ if f then -- not that useful
+ return format("nspaces[%s+a%s]",f,n) -- no real need for tonumber
+ else
+ return format("nspaces[a%s]",n) -- no real need for tonumber
+ end
+end
+
+local format_W = function(f) -- handy when doing depth related indent
+ return format("nspaces[%s]",tonumber(f) or 0)
+end
+
+local format_rest = function(s)
+ return format("%q",s) -- catches " and \n and such
+end
+
+local format_extension = function(extensions,f,name)
+ local extension = extensions[name] or "tostring(%s)"
+ local f = tonumber(f) or 1
+ if f == 0 then
+ return extension
+ elseif f == 1 then
+ n = n + 1
+ local a = "a" .. n
+ return format(extension,a,a) -- maybe more times?
+ elseif f < 0 then
+ local a = "a" .. (n + f + 1)
+ return format(extension,a,a)
+ else
+ local t = { }
+ for i=1,f do
+ n = n + 1
+ t[#t+1] = "a" .. n
+ end
+ return format(extension,unpack(t))
+ end
+end
+
+local builder = Cs { "start",
+ start = (
+ (
+ P("%") / ""
+ * (
+ V("!") -- new
+ + V("s") + V("q")
+ + V("i") + V("d")
+ + V("f") + V("g") + V("G") + V("e") + V("E")
+ + V("x") + V("X") + V("o")
+ --
+ + V("c")
+ + V("C")
+ + V("S") -- new
+ + V("Q") -- new
+ + V("N") -- new
+ --
+ + V("r")
+ + V("h") + V("H") + V("u") + V("U")
+ + V("p") + V("b")
+ + V("t") + V("T")
+ + V("l") + V("L")
+ + V("I")
+ + V("h") -- new
+ + V("w") -- new
+ + V("W") -- new
+ + V("a") -- new
+ + V("A") -- new
+ --
+ + V("*") -- ignores probably messed up %
+ )
+ + V("*")
+ )
+ * (P(-1) + Carg(1))
+ )^0,
+ --
+ ["s"] = (prefix_any * P("s")) / format_s, -- %s => regular %s (string)
+ ["q"] = (prefix_any * P("q")) / format_q, -- %q => regular %q (quoted string)
+ ["i"] = (prefix_any * P("i")) / format_i, -- %i => regular %i (integer)
+ ["d"] = (prefix_any * P("d")) / format_d, -- %d => regular %d (integer)
+ ["f"] = (prefix_any * P("f")) / format_f, -- %f => regular %f (float)
+ ["g"] = (prefix_any * P("g")) / format_g, -- %g => regular %g (float)
+ ["G"] = (prefix_any * P("G")) / format_G, -- %G => regular %G (float)
+ ["e"] = (prefix_any * P("e")) / format_e, -- %e => regular %e (float)
+ ["E"] = (prefix_any * P("E")) / format_E, -- %E => regular %E (float)
+ ["x"] = (prefix_any * P("x")) / format_x, -- %x => regular %x (hexadecimal)
+ ["X"] = (prefix_any * P("X")) / format_X, -- %X => regular %X (HEXADECIMAL)
+ ["o"] = (prefix_any * P("o")) / format_o, -- %o => regular %o (octal)
+ --
+ ["S"] = (prefix_any * P("S")) / format_S, -- %S => %s (tostring)
+ ["Q"] = (prefix_any * P("Q")) / format_S, -- %Q => %q (tostring)
+ ["N"] = (prefix_any * P("N")) / format_N, -- %N => tonumber (strips leading zeros)
+ ["c"] = (prefix_any * P("c")) / format_c, -- %c => utf character (extension to regular)
+ ["C"] = (prefix_any * P("C")) / format_C, -- %c => U+.... utf character
+ --
+ ["r"] = (prefix_any * P("r")) / format_r, -- %r => round
+ ["h"] = (prefix_any * P("h")) / format_h, -- %h => 0x0a1b2 (when - no 0x) was v
+ ["H"] = (prefix_any * P("H")) / format_H, -- %H => 0x0A1B2 (when - no 0x) was V
+ ["u"] = (prefix_any * P("u")) / format_u, -- %u => u+0a1b2 (when - no u+)
+ ["U"] = (prefix_any * P("U")) / format_U, -- %U => U+0A1B2 (when - no U+)
+ ["p"] = (prefix_any * P("p")) / format_p, -- %p => 12.345pt / maybe: P (and more units)
+ ["b"] = (prefix_any * P("b")) / format_b, -- %b => 12.342bp / maybe: B (and more units)
+ ["t"] = (prefix_tab * P("t")) / format_t, -- %t => concat
+ ["T"] = (prefix_tab * P("T")) / format_T, -- %t => sequenced
+ ["l"] = (prefix_tab * P("l")) / format_l, -- %l => boolean
+ ["L"] = (prefix_tab * P("L")) / format_L, -- %L => BOOLEAN
+ ["I"] = (prefix_any * P("I")) / format_I, -- %I => signed integer
+ --
+ ["w"] = (prefix_any * P("w")) / format_w, -- %w => n spaces (optional prefix is added)
+ ["W"] = (prefix_any * P("W")) / format_W, -- %W => mandate prefix, no specifier
+ --
+ ["a"] = (prefix_any * P("a")) / format_a, -- %a => '...' (forces tostring)
+ ["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring)
+ --
+ ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%%%")^1) / format_rest, -- rest (including %%)
+ --
+ ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension,
+}
+
+-- we can be clever and only alias what is needed
+
+local direct = Cs (
+ P("%")/""
+ * Cc([[local format = string.format return function(str) return format("%]])
+ * (S("+- .") + R("09"))^0
+ * S("sqidfgGeExXo")
+ * Cc([[",str) end]])
+ * P(-1)
+ )
+
+local function make(t,str)
+ local f
+ local p
+ local p = lpegmatch(direct,str)
+ if p then
+ f = loadstripped(p)()
+ else
+ n = 0
+ p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n
+ if n > 0 then
+ p = format(template,preamble,t._preamble_,arguments[n],p)
+-- print("builder>",p)
+ f = loadstripped(p)()
+ else
+ f = function() return str end
+ end
+ end
+ t[str] = f
+ return f
+end
+
+-- -- collect periodically
+--
+-- local threshold = 1000 -- max nof cached formats
+--
+-- local function make(t,str)
+-- local f = rawget(t,str)
+-- if f then
+-- return f
+-- end
+-- local parent = t._t_
+-- if parent._n_ > threshold then
+-- local m = { _t_ = parent }
+-- getmetatable(parent).__index = m
+-- setmetatable(m, { __index = make })
+-- else
+-- parent._n_ = parent._n_ + 1
+-- end
+-- local f
+-- local p = lpegmatch(direct,str)
+-- if p then
+-- f = loadstripped(p)()
+-- else
+-- n = 0
+-- p = lpegmatch(builder,str,1,"..",parent._extensions_) -- after this we know n
+-- if n > 0 then
+-- p = format(template,preamble,parent._preamble_,arguments[n],p)
+-- -- print("builder>",p)
+-- f = loadstripped(p)()
+-- else
+-- f = function() return str end
+-- end
+-- end
+-- t[str] = f
+-- return f
+-- end
+
+local function use(t,fmt,...)
+ return t[fmt](...)
+end
+
+strings.formatters = { }
+
+-- we cannot make these tables weak, unless we start using an indirect
+-- table (metatable) in which case we could better keep a count and
+-- clear that table when a threshold is reached
+
+function strings.formatters.new()
+ local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter" }
+ setmetatable(t, { __index = make, __call = use })
+ return t
+end
+
+-- function strings.formatters.new()
+-- local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter", _n_ = 0 }
+-- local m = { _t_ = t }
+-- setmetatable(t, { __index = m, __call = use })
+-- setmetatable(m, { __index = make })
+-- return t
+-- end
+
+local formatters = strings.formatters.new() -- the default instance
+
+string.formatters = formatters -- in the main string namespace
+string.formatter = function(str,...) return formatters[str](...) end -- sometimes nicer name
+
+local function add(t,name,template,preamble)
+ if type(t) == "table" and t._type_ == "formatter" then
+ t._extensions_[name] = template or "%s"
+ if preamble then
+ t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload !
+ end
+ end
+end
+
+strings.formatters.add = add
+
+-- registered in the default instance (should we fall back on this one?)
+
+lpeg.patterns.xmlescape = Cs((P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;" + P('"')/"&quot;" + P(1))^0)
+lpeg.patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + P(1))^0)
+
+add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]])
+add(formatters,"tex",[[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]])
+
+-- -- yes or no:
+--
+-- local function make(t,str)
+-- local f
+-- local p = lpegmatch(direct,str)
+-- if p then
+-- f = loadstripped(p)()
+-- else
+-- n = 0
+-- p = lpegmatch(builder,str,1,",") -- after this we know n
+-- if n > 0 then
+-- p = format(template,template_shortcuts,arguments[n],p)
+-- f = loadstripped(p)()
+-- else
+-- f = function() return str end
+-- end
+-- end
+-- t[str] = f
+-- return f
+-- end
+--
+-- local formatteds = string.formatteds or { }
+-- string.formatteds = formatteds
+--
+-- setmetatable(formatteds, { __index = make, __call = use })
diff --git a/lualibs-util-tab.lua b/lualibs-util-tab.lua
new file mode 100644
index 0000000..ecf36b1
--- /dev/null
+++ b/lualibs-util-tab.lua
@@ -0,0 +1,493 @@
+if not modules then modules = { } end modules ['util-tab'] = {
+ version = 1.001,
+ comment = "companion to luat-lib.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+utilities = utilities or {}
+utilities.tables = utilities.tables or { }
+local tables = utilities.tables
+
+local format, gmatch, gsub = string.format, string.gmatch, string.gsub
+local concat, insert, remove = table.concat, table.insert, table.remove
+local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring
+local type, next, rawset, tonumber, tostring, load, select = type, next, rawset, tonumber, tostring, load, select
+local lpegmatch, P, Cs, Cc = lpeg.match, lpeg.P, lpeg.Cs, lpeg.Cc
+local serialize, sortedkeys, sortedpairs = table.serialize, table.sortedkeys, table.sortedpairs
+local formatters = string.formatters
+
+local splitter = lpeg.tsplitat(".")
+
+function tables.definetable(target,nofirst,nolast) -- defines undefined tables
+ local composed, shortcut, t = nil, nil, { }
+ local snippets = lpegmatch(splitter,target)
+ for i=1,#snippets - (nolast and 1 or 0) do
+ local name = snippets[i]
+ if composed then
+ composed = shortcut .. "." .. name
+ shortcut = shortcut .. "_" .. name
+ t[#t+1] = formatters["local %s = %s if not %s then %s = { } %s = %s end"](shortcut,composed,shortcut,shortcut,composed,shortcut)
+ else
+ composed = name
+ shortcut = name
+ if not nofirst then
+ t[#t+1] = formatters["%s = %s or { }"](composed,composed)
+ end
+ end
+ end
+ if nolast then
+ composed = shortcut .. "." .. snippets[#snippets]
+ end
+ return concat(t,"\n"), composed
+end
+
+-- local t = tables.definedtable("a","b","c","d")
+
+function tables.definedtable(...)
+ local t = _G
+ for i=1,select("#",...) do
+ local li = select(i,...)
+ local tl = t[li]
+ if not tl then
+ tl = { }
+ t[li] = tl
+ end
+ t = tl
+ end
+ return t
+end
+
+function tables.accesstable(target,root)
+ local t = root or _G
+ for name in gmatch(target,"([^%.]+)") do
+ t = t[name]
+ if not t then
+ return
+ end
+ end
+ return t
+end
+
+function tables.migratetable(target,v,root)
+ local t = root or _G
+ local names = string.split(target,".")
+ for i=1,#names-1 do
+ local name = names[i]
+ t[name] = t[name] or { }
+ t = t[name]
+ if not t then
+ return
+ end
+ end
+ t[names[#names]] = v
+end
+
+function tables.removevalue(t,value) -- todo: n
+ if value then
+ for i=1,#t do
+ if t[i] == value then
+ remove(t,i)
+ -- remove all, so no: return
+ end
+ end
+ end
+end
+
+function tables.insertbeforevalue(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i,extra)
+ return
+ end
+ end
+ insert(t,1,extra)
+end
+
+function tables.insertaftervalue(t,value,extra)
+ for i=1,#t do
+ if t[i] == extra then
+ remove(t,i)
+ end
+ end
+ for i=1,#t do
+ if t[i] == value then
+ insert(t,i+1,extra)
+ return
+ end
+ end
+ insert(t,#t+1,extra)
+end
+
+-- experimental
+
+local escape = Cs(Cc('"') * ((P('"')/'""' + P(1))^0) * Cc('"'))
+
+function table.tocsv(t,specification)
+ if t and #t > 0 then
+ local result = { }
+ local r = { }
+ specification = specification or { }
+ local fields = specification.fields
+ if type(fields) ~= "string" then
+ fields = sortedkeys(t[1])
+ end
+ local separator = specification.separator or ","
+ if specification.preamble == true then
+ for f=1,#fields do
+ r[f] = lpegmatch(escape,tostring(fields[f]))
+ end
+ result[1] = concat(r,separator)
+ end
+ for i=1,#t do
+ local ti = t[i]
+ for f=1,#fields do
+ local field = ti[fields[f]]
+ if type(field) == "string" then
+ r[f] = lpegmatch(escape,field)
+ else
+ r[f] = tostring(field)
+ end
+ end
+ result[#result+1] = concat(r,separator)
+ end
+ return concat(result,"\n")
+ else
+ return ""
+ end
+end
+
+-- local nspaces = utilities.strings.newrepeater(" ")
+-- local escape = Cs((P("<")/"&lt;" + P(">")/"&gt;" + P("&")/"&amp;" + P(1))^0)
+--
+-- local function toxml(t,d,result,step)
+-- for k, v in sortedpairs(t) do
+-- local s = nspaces[d]
+-- local tk = type(k)
+-- local tv = type(v)
+-- if tv == "table" then
+-- if tk == "number" then
+-- result[#result+1] = format("%s<entry n='%s'>",s,k)
+-- toxml(v,d+step,result,step)
+-- result[#result+1] = format("%s</entry>",s,k)
+-- else
+-- result[#result+1] = format("%s<%s>",s,k)
+-- toxml(v,d+step,result,step)
+-- result[#result+1] = format("%s</%s>",s,k)
+-- end
+-- elseif tv == "string" then
+-- if tk == "number" then
+-- result[#result+1] = format("%s<entry n='%s'>%s</entry>",s,k,lpegmatch(escape,v),k)
+-- else
+-- result[#result+1] = format("%s<%s>%s</%s>",s,k,lpegmatch(escape,v),k)
+-- end
+-- elseif tk == "number" then
+-- result[#result+1] = format("%s<entry n='%s'>%s</entry>",s,k,tostring(v),k)
+-- else
+-- result[#result+1] = format("%s<%s>%s</%s>",s,k,tostring(v),k)
+-- end
+-- end
+-- end
+--
+-- much faster
+
+local nspaces = utilities.strings.newrepeater(" ")
+
+local function toxml(t,d,result,step)
+ for k, v in sortedpairs(t) do
+ local s = nspaces[d] -- inlining this is somewhat faster but gives more formatters
+ local tk = type(k)
+ local tv = type(v)
+ if tv == "table" then
+ if tk == "number" then
+ result[#result+1] = formatters["%s<entry n='%s'>"](s,k)
+ toxml(v,d+step,result,step)
+ result[#result+1] = formatters["%s</entry>"](s,k)
+ else
+ result[#result+1] = formatters["%s<%s>"](s,k)
+ toxml(v,d+step,result,step)
+ result[#result+1] = formatters["%s</%s>"](s,k)
+ end
+ elseif tv == "string" then
+ if tk == "number" then
+ result[#result+1] = formatters["%s<entry n='%s'>%!xml!</entry>"](s,k,v,k)
+ else
+ result[#result+1] = formatters["%s<%s>%!xml!</%s>"](s,k,v,k)
+ end
+ elseif tk == "number" then
+ result[#result+1] = formatters["%s<entry n='%s'>%S</entry>"](s,k,v,k)
+ else
+ result[#result+1] = formatters["%s<%s>%S</%s>"](s,k,v,k)
+ end
+ end
+end
+
+-- function table.toxml(t,name,nobanner,indent,spaces)
+-- local noroot = name == false
+-- local result = (nobanner or noroot) and { } or { "<?xml version='1.0' standalone='yes' ?>" }
+-- local indent = rep(" ",indent or 0)
+-- local spaces = rep(" ",spaces or 1)
+-- if noroot then
+-- toxml( t, inndent, result, spaces)
+-- else
+-- toxml( { [name or "root"] = t }, indent, result, spaces)
+-- end
+-- return concat(result,"\n")
+-- end
+
+function table.toxml(t,specification)
+ specification = specification or { }
+ local name = specification.name
+ local noroot = name == false
+ local result = (specification.nobanner or noroot) and { } or { "<?xml version='1.0' standalone='yes' ?>" }
+ local indent = specification.indent or 0
+ local spaces = specification.spaces or 1
+ if noroot then
+ toxml( t, indent, result, spaces)
+ else
+ toxml( { [name or "data"] = t }, indent, result, spaces)
+ end
+ return concat(result,"\n")
+end
+
+-- also experimental
+
+-- encapsulate(table,utilities.tables)
+-- encapsulate(table,utilities.tables,true)
+-- encapsulate(table,true)
+
+function tables.encapsulate(core,capsule,protect)
+ if type(capsule) ~= "table" then
+ protect = true
+ capsule = { }
+ end
+ for key, value in next, core do
+ if capsule[key] then
+ print(formatters["\ninvalid %s %a in %a"]("inheritance",key,core))
+ os.exit()
+ else
+ capsule[key] = value
+ end
+ end
+ if protect then
+ for key, value in next, core do
+ core[key] = nil
+ end
+ setmetatable(core, {
+ __index = capsule,
+ __newindex = function(t,key,value)
+ if capsule[key] then
+ print(formatters["\ninvalid %s %a' in %a"]("overload",key,core))
+ os.exit()
+ else
+ rawset(t,key,value)
+ end
+ end
+ } )
+ end
+end
+
+local function fastserialize(t,r,outer) -- no mixes
+ r[#r+1] = "{"
+ local n = #t
+ if n > 0 then
+ for i=1,n do
+ local v = t[i]
+ local tv = type(v)
+ if tv == "string" then
+ r[#r+1] = formatters["%q,"](v)
+ elseif tv == "number" then
+ r[#r+1] = formatters["%s,"](v)
+ elseif tv == "table" then
+ fastserialize(v,r)
+ elseif tv == "boolean" then
+ r[#r+1] = formatters["%S,"](v)
+ end
+ end
+ else
+ for k, v in next, t do
+ local tv = type(v)
+ if tv == "string" then
+ r[#r+1] = formatters["[%q]=%q,"](k,v)
+ elseif tv == "number" then
+ r[#r+1] = formatters["[%q]=%s,"](k,v)
+ elseif tv == "table" then
+ r[#r+1] = formatters["[%q]="](k)
+ fastserialize(v,r)
+ elseif tv == "boolean" then
+ r[#r+1] = formatters["[%q]=%S,"](k,v)
+ end
+ end
+ end
+ if outer then
+ r[#r+1] = "}"
+ else
+ r[#r+1] = "},"
+ end
+ return r
+end
+
+-- local f_hashed_string = formatters["[%q]=%q,"]
+-- local f_hashed_number = formatters["[%q]=%s,"]
+-- local f_hashed_table = formatters["[%q]="]
+-- local f_hashed_true = formatters["[%q]=true,"]
+-- local f_hashed_false = formatters["[%q]=false,"]
+--
+-- local f_indexed_string = formatters["%q,"]
+-- local f_indexed_number = formatters["%s,"]
+-- ----- f_indexed_true = formatters["true,"]
+-- ----- f_indexed_false = formatters["false,"]
+--
+-- local function fastserialize(t,r,outer) -- no mixes
+-- r[#r+1] = "{"
+-- local n = #t
+-- if n > 0 then
+-- for i=1,n do
+-- local v = t[i]
+-- local tv = type(v)
+-- if tv == "string" then
+-- r[#r+1] = f_indexed_string(v)
+-- elseif tv == "number" then
+-- r[#r+1] = f_indexed_number(v)
+-- elseif tv == "table" then
+-- fastserialize(v,r)
+-- elseif tv == "boolean" then
+-- -- r[#r+1] = v and f_indexed_true(k) or f_indexed_false(k)
+-- r[#r+1] = v and "true," or "false,"
+-- end
+-- end
+-- else
+-- for k, v in next, t do
+-- local tv = type(v)
+-- if tv == "string" then
+-- r[#r+1] = f_hashed_string(k,v)
+-- elseif tv == "number" then
+-- r[#r+1] = f_hashed_number(k,v)
+-- elseif tv == "table" then
+-- r[#r+1] = f_hashed_table(k)
+-- fastserialize(v,r)
+-- elseif tv == "boolean" then
+-- r[#r+1] = v and f_hashed_true(k) or f_hashed_false(k)
+-- end
+-- end
+-- end
+-- if outer then
+-- r[#r+1] = "}"
+-- else
+-- r[#r+1] = "},"
+-- end
+-- return r
+-- end
+
+function table.fastserialize(t,prefix) -- so prefix should contain the =
+ return concat(fastserialize(t,{ prefix or "return" },true))
+end
+
+function table.deserialize(str)
+ if not str or str == "" then
+ return
+ end
+ local code = load(str)
+ if not code then
+ return
+ end
+ code = code()
+ if not code then
+ return
+ end
+ return code
+end
+
+-- inspect(table.fastserialize { a = 1, b = { 4, { 5, 6 } }, c = { d = 7, e = 'f"g\nh' } })
+
+function table.load(filename)
+ if filename then
+ local t = io.loaddata(filename)
+ if t and t ~= "" then
+ t = load(t)
+ if type(t) == "function" then
+ t = t()
+ if type(t) == "table" then
+ return t
+ end
+ end
+ end
+ end
+end
+
+function table.save(filename,t,n,...)
+ io.savedata(filename,serialize(t,n == nil and true or n,...))
+end
+
+local function slowdrop(t)
+ local r = { }
+ local l = { }
+ for i=1,#t do
+ local ti = t[i]
+ local j = 0
+ for k, v in next, ti do
+ j = j + 1
+ l[j] = formatters["%s=%q"](k,v)
+ end
+ r[i] = formatters[" {%t},\n"](l)
+ end
+ return formatters["return {\n%st}"](r)
+end
+
+local function fastdrop(t)
+ local r = { "return {\n" }
+ for i=1,#t do
+ local ti = t[i]
+ r[#r+1] = " {"
+ for k, v in next, ti do
+ r[#r+1] = formatters["%s=%q"](k,v)
+ end
+ r[#r+1] = "},\n"
+ end
+ r[#r+1] = "}"
+ return concat(r)
+end
+
+function table.drop(t,slow) -- only { { a=2 }, {a=3} }
+ if #t == 0 then
+ return "return { }"
+ elseif slow == true then
+ return slowdrop(t) -- less memory
+ else
+ return fastdrop(t) -- some 15% faster
+ end
+end
+
+function table.autokey(t,k)
+ local v = { }
+ t[k] = v
+ return v
+end
+
+local selfmapper = { __index = function(t,k) t[k] = k return k end }
+
+function table.twowaymapper(t)
+ if not t then
+ t = { }
+ else
+ for i=0,#t do
+ local ti = t[i] -- t[1] = "one"
+ if ti then
+ local i = tostring(i)
+ t[i] = ti -- t["1"] = "one"
+ t[ti] = i -- t["one"] = "1"
+ end
+ end
+ t[""] = t[0] or ""
+ end
+ -- setmetatableindex(t,"key")
+ setmetatable(t,selfmapper)
+ return t
+end
+
diff --git a/lualibs-utils.lua b/lualibs-utils.lua
deleted file mode 100644
index ebc27b8..0000000
--- a/lualibs-utils.lua
+++ /dev/null
@@ -1,176 +0,0 @@
-if not modules then modules = { } end modules ['l-utils'] = {
- version = 1.001,
- comment = "companion to luat-lib.mkiv",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files"
-}
-
--- hm, quite unreadable
-
-local gsub = string.gsub
-local concat = table.concat
-local type, next = type, next
-
-if not utils then utils = { } end
-if not utils.merger then utils.merger = { } end
-if not utils.lua then utils.lua = { } end
-
-utils.merger.m_begin = "begin library merge"
-utils.merger.m_end = "end library merge"
-utils.merger.pattern =
- "%c+" ..
- "%-%-%s+" .. utils.merger.m_begin ..
- "%c+(.-)%c+" ..
- "%-%-%s+" .. utils.merger.m_end ..
- "%c+"
-
-function utils.merger._self_fake_()
- return
- "-- " .. "created merged file" .. "\n\n" ..
- "-- " .. utils.merger.m_begin .. "\n\n" ..
- "-- " .. utils.merger.m_end .. "\n\n"
-end
-
-function utils.report(...)
- print(...)
-end
-
-utils.merger.strip_comment = true
-
-function utils.merger._self_load_(name)
- local f, data = io.open(name), ""
- if f then
- utils.report("reading merge from %s",name)
- data = f:read("*all")
- f:close()
- else
- utils.report("unknown file to merge %s",name)
- end
- if data and utils.merger.strip_comment then
- -- saves some 20K
- data = gsub(data,"%-%-~[^\n\r]*[\r\n]", "")
- end
- return data or ""
-end
-
-function utils.merger._self_save_(name, data)
- if data ~= "" then
- local f = io.open(name,'w')
- if f then
- utils.report("saving merge from %s",name)
- f:write(data)
- f:close()
- end
- end
-end
-
-function utils.merger._self_swap_(data,code)
- if data ~= "" then
- return (gsub(data,utils.merger.pattern, function(s)
- return "\n\n" .. "-- "..utils.merger.m_begin .. "\n" .. code .. "\n" .. "-- "..utils.merger.m_end .. "\n\n"
- end, 1))
- else
- return ""
- end
-end
-
---~ stripper:
---~
---~ data = gsub(data,"%-%-~[^\n]*\n","")
---~ data = gsub(data,"\n\n+","\n")
-
-function utils.merger._self_libs_(libs,list)
- local result, f, frozen = { }, nil, false
- result[#result+1] = "\n"
- if type(libs) == 'string' then libs = { libs } end
- if type(list) == 'string' then list = { list } end
- local foundpath = nil
- for i=1,#libs do
- local lib = libs[i]
- for j=1,#list do
- local pth = gsub(list[j],"\\","/") -- file.clean_path
- utils.report("checking library path %s",pth)
- local name = pth .. "/" .. lib
- if lfs.isfile(name) then
- foundpath = pth
- end
- end
- if foundpath then break end
- end
- if foundpath then
- utils.report("using library path %s",foundpath)
- local right, wrong = { }, { }
- for i=1,#libs do
- local lib = libs[i]
- local fullname = foundpath .. "/" .. lib
- if lfs.isfile(fullname) then
- -- right[#right+1] = lib
- utils.report("merging library %s",fullname)
- result[#result+1] = "do -- create closure to overcome 200 locals limit"
- result[#result+1] = io.loaddata(fullname,true)
- result[#result+1] = "end -- of closure"
- else
- -- wrong[#wrong+1] = lib
- utils.report("no library %s",fullname)
- end
- end
- if #right > 0 then
- utils.report("merged libraries: %s",concat(right," "))
- end
- if #wrong > 0 then
- utils.report("skipped libraries: %s",concat(wrong," "))
- end
- else
- utils.report("no valid library path found")
- end
- return concat(result, "\n\n")
-end
-
-function utils.merger.selfcreate(libs,list,target)
- if target then
- utils.merger._self_save_(
- target,
- utils.merger._self_swap_(
- utils.merger._self_fake_(),
- utils.merger._self_libs_(libs,list)
- )
- )
- end
-end
-
-function utils.merger.selfmerge(name,libs,list,target)
- utils.merger._self_save_(
- target or name,
- utils.merger._self_swap_(
- utils.merger._self_load_(name),
- utils.merger._self_libs_(libs,list)
- )
- )
-end
-
-function utils.merger.selfclean(name)
- utils.merger._self_save_(
- name,
- utils.merger._self_swap_(
- utils.merger._self_load_(name),
- ""
- )
- )
-end
-
-function utils.lua.compile(luafile, lucfile, cleanup, strip) -- defaults: cleanup=false strip=true
- -- utils.report("compiling",luafile,"into",lucfile)
- os.remove(lucfile)
- local command = "-o " .. string.quote(lucfile) .. " " .. string.quote(luafile)
- if strip ~= false then
- command = "-s " .. command
- end
- local done = (os.spawn("texluac " .. command) == 0) or (os.spawn("luac " .. command) == 0)
- if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then
- -- utils.report("removing",luafile)
- os.remove(luafile)
- end
- return done
-end
-
diff --git a/lualibs.dtx b/lualibs.dtx
index 799c3df..e9a20a6 100644
--- a/lualibs.dtx
+++ b/lualibs.dtx
@@ -33,7 +33,7 @@
\input docstrip.tex
\Msg{************************************************************************}
\Msg{* Installation}
-\Msg{* Package: lualibs 2011/01/20 v0.96 Lua additional functions.}
+\Msg{* Package: lualibs 2012/10/19 v0.97 Lua additional functions.}
\Msg{************************************************************************}
\keepsilent
@@ -90,7 +90,7 @@ and the derived file lualibs.lua.
%<*driver>
\NeedsTeXFormat{LaTeX2e}
\ProvidesFile{lualibs.drv}
- [2011/01/20 v0.96 Lua additional functions.]
+ [2012/10/19 v0.97 Lua additional functions.]
\documentclass{ltxdoc}
\EnableCrossrefs
\CodelineIndex
@@ -120,7 +120,7 @@ and the derived file lualibs.lua.
% \GetFileInfo{lualibs.drv}
%
% \title{The \textsf{lualibs} package}
-% \date{2011/01/20 v0.96}
+% \date{2012/10/19 v0.97}
% \author{Elie Roux \\ \texttt{elie.roux@telecom-bretagne.eu}}
%
% \maketitle
@@ -151,6 +151,63 @@ and the derived file lualibs.lua.
% initialize \textsf{kpse} library so that |require()| can find files under
% TEXMF tree: |kpse.set_program_name("luatex")|.
%
+% \section{Files}
+%
+% The \textsf{lualibs} bundle contains files from two Con\TeX t Lua
+% library categories: The generic auxiliary functions (original file prefix:
+% |l-|) together form something close to a standard libary. Most of these are
+% extensions of an existing namespace, like for instance |l-table.lua| which
+% adds full-fledged serialization capabilities to the Lua table library.
+% They were imported under the \textsf{lualibs}-prefix.
+% (For a list see table~\ref{tab:extensions}.)
+%
+% \begin{table}[h]
+% \centering
+% \caption{Extensions of the Lua standard library.}
+% \begin{tabular}{l l l}
+% \textsf{lualibs} name & Con\TeX t name & content \\
+% \hline
+% lualibs-lua.lua & l-lua.lua & compatibility, library paths \\
+% lualibs-lpeg.lua & l-lpeg.lua & patterns \\
+% lualibs-function.lua & l-function.lua & empty except for dummy \\
+% lualibs-string.lua & l-string.lua & string manipulation \\
+% lualibs-table.lua & l-table.lua & serialization, conversion \\
+% lualibs-boolean.lua & l-boolean.lua & boolean converter \\
+% lualibs-number.lua & l-number.lua & bit operations \\
+% lualibs-math.lua & l-math.lua & math functions \\
+% lualibs-io.lua & l-io.lua & reading and writing files \\
+% lualibs-os.lua & l-os.lua & platform specific code \\
+% lualibs-file.lua & l-file.lua & filesystem operations \\
+% lualibs-md5.lua & l-md5.lua & checksum functions \\
+% lualibs-dir.lua & l-dir.lua & directory handling \\
+% lualibs-unicode.lua & l-unicode.lua & utf and unicode \\
+% lualibs-url.lua & l-url.lua & url handling \\
+% lualibs-set.lua & l-set.lua & sets \\
+% \end{tabular}
+% \label{tab:extensions}
+% \end{table}
+%
+% The second category comprises a selection of files mostly from the
+% utilities namespace (|util-|; cf. table~\ref{tab:utilities}).
+% Their purpose is more specific and at times quite low-level.
+%
+% \begin{table}[h]
+% \centering
+% \caption{Utility functions.}
+% \begin{tabular}{l l l}
+% \textsf{lualibs} name & Con\TeX t name & content \\
+% \hline
+% lualibs-util-lua.lua & util-lua.lua & operations on bytecode \\
+% lualibs-util-sto.lua & util-sto.lua & table allocation \\
+% lualibs-util-mrg.lua & util-mrg.lua & merging lua sources \\
+% lualibs-util-dim.lua & util-dim.lua & converters for dimensions \\
+% lualibs-util-str.lua & util-str.lua & extra string functions \\
+% lualibs-util-tab.lua & util-tab.lua & extra table functions \\
+% lualibs-util-jsn.lua & util-jsn.lua & conversion to and from json \\
+% \end{tabular}
+% \label{tab:utilities}
+% \end{table}
+%
% \pagebreak
% \section{\texttt{lualibs.lua}}
%
@@ -163,8 +220,8 @@ module('lualibs', package.seeall)
local lualibs_module = {
name = "lualibs",
- version = 0.96,
- date = "2011/01/20",
+ version = 0.97,
+ date = "2012/10/19",
description = "Lua additional functions.",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL & Elie Roux",
copyright = "PRAGMA ADE / ConTeXt Development Team",
@@ -179,24 +236,30 @@ end
% Load the modules.
%
% \begin{macrocode}
-require("lualibs-string")
+require("lualibs-lua")
require("lualibs-lpeg")
+require("lualibs-function")
+require("lualibs-string")
+require("lualibs-table")
require("lualibs-boolean")
require("lualibs-number")
require("lualibs-math")
-require("lualibs-table")
-require("lualibs-aux")
require("lualibs-io")
require("lualibs-os")
require("lualibs-file")
require("lualibs-md5")
require("lualibs-dir")
require("lualibs-unicode")
-require("lualibs-utils")
-require("lualibs-dimen")
require("lualibs-url")
require("lualibs-set")
-require("lualibs-dimen")
+require("lualibs-util-str")
+--[[everything below apparently not required for the fontloader]]
+require("lualibs-util-tab")
+require("lualibs-util-sto")
+require("lualibs-util-dim")
+require("lualibs-util-jsn")
+--require("lualibs-util-mrg")-- not required
+require("lualibs-util-lua")
% \end{macrocode}
%
% \iffalse
diff --git a/lualibs.lua b/lualibs.lua
new file mode 100644
index 0000000..cf9b039
--- /dev/null
+++ b/lualibs.lua
@@ -0,0 +1,54 @@
+--
+-- This is file `lualibs.lua',
+-- generated with the docstrip utility.
+--
+-- The original source files were:
+--
+-- lualibs.dtx (with options: `lua')
+-- This is a generated file.
+--
+-- Copyright (C) 2009 by PRAGMA ADE / ConTeXt Development Team
+--
+-- See ConTeXt's mreadme.pdf for the license.
+--
+-- This work consists of the main source file lualibs.dtx
+-- and the derived file lualibs.lua.
+--
+module('lualibs', package.seeall)
+
+local lualibs_module = {
+ name = "lualibs",
+ version = 0.97,
+ date = "2012/10/19",
+ description = "Lua additional functions.",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL & Elie Roux",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "See ConTeXt's mreadme.pdf for the license",
+}
+
+if luatexbase and luatexbase.provides_module then
+ luatexbase.provides_module(lualibs_module)
+end
+require("lualibs-string")
+require("lualibs-lpeg")
+require("lualibs-boolean")
+require("lualibs-number")
+require("lualibs-math")
+require("lualibs-table")
+require("lualibs-io")
+require("lualibs-os")
+require("lualibs-file")
+require("lualibs-md5")
+require("lualibs-dir")
+require("lualibs-unicode")
+require("lualibs-url")
+require("lualibs-set")
+require("lualibs-util-lua")
+require("lualibs-util-sto")
+require("lualibs-util-mrg")
+require("lualibs-util-dim")
+require("lualibs-util-str")
+require("lualibs-util-tab")
+require("lualibs-util-jsn")
+--
+-- End of File `lualibs.lua'.