diff options
-rw-r--r-- | luaextra-aux.lua | 249 | ||||
-rw-r--r-- | luaextra-boolean.lua | 55 | ||||
-rw-r--r-- | luaextra-dimen.lua | 418 | ||||
-rw-r--r-- | luaextra-dir.lua | 337 | ||||
-rw-r--r-- | luaextra-file.lua | 267 | ||||
-rw-r--r-- | luaextra-io.lua | 186 | ||||
-rw-r--r-- | luaextra-lpeg.lua | 165 | ||||
-rw-r--r-- | luaextra-math.lua | 41 | ||||
-rw-r--r-- | luaextra-md5.lua | 72 | ||||
-rw-r--r-- | luaextra-number.lua | 58 | ||||
-rw-r--r-- | luaextra-os.lua | 165 | ||||
-rw-r--r-- | luaextra-set.lua | 84 | ||||
-rw-r--r-- | luaextra-string.lua | 285 | ||||
-rw-r--r-- | luaextra-table.lua | 858 | ||||
-rw-r--r-- | luaextra-unicode.lua | 193 | ||||
-rw-r--r-- | luaextra-url.lua | 106 | ||||
-rw-r--r-- | luaextra-utils.lua | 173 | ||||
-rw-r--r-- | luaextra.dtx | 857 |
18 files changed, 3738 insertions, 831 deletions
diff --git a/luaextra-aux.lua b/luaextra-aux.lua new file mode 100644 index 0000000..1ff5c08 --- /dev/null +++ b/luaextra-aux.lua @@ -0,0 +1,249 @@ +if not modules then modules = { } end modules ['l-aux'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +aux = aux or { } + +local concat, format, gmatch = table.concat, string.format, string.gmatch +local tostring, type = tostring, type +local lpegmatch = lpeg.match + +local space = lpeg.P(' ') +local equal = lpeg.P("=") +local comma = lpeg.P(",") +local lbrace = lpeg.P("{") +local rbrace = lpeg.P("}") +local nobrace = 1 - (lbrace+rbrace) +local nested = lpeg.P{ lbrace * (nobrace + lpeg.V(1))^0 * rbrace } +local spaces = space^0 + +local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) + +local key = lpeg.C((1-equal-comma)^1) +local pattern_a = (space+comma)^0 * (key * equal * value + key * lpeg.C("")) +local pattern_c = (space+comma)^0 * (key * equal * value) + +local key = lpeg.C((1-space-equal-comma)^1) +local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + lpeg.C(""))) + +-- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored + +local hash = { } + +local function set(key,value) -- using Carg is slower here + hash[key] = value +end + +local pattern_a_s = (pattern_a/set)^1 +local pattern_b_s = (pattern_b/set)^1 +local pattern_c_s = (pattern_c/set)^1 + +aux.settings_to_hash_pattern_a = pattern_a_s +aux.settings_to_hash_pattern_b = pattern_b_s +aux.settings_to_hash_pattern_c = pattern_c_s + +function aux.make_settings_to_hash_pattern(set,how) + if how == "strict" then + return (pattern_c/set)^1 + elseif how == "tolerant" then + return (pattern_b/set)^1 + else + return (pattern_a/set)^1 + end +end + +function aux.settings_to_hash(str,existing) + if str and str ~= "" then + hash = existing or { } + if moretolerant then + lpegmatch(pattern_b_s,str) + else + lpegmatch(pattern_a_s,str) + end + return hash + else + return { } + end +end + +function aux.settings_to_hash_tolerant(str,existing) + if str and str ~= "" then + hash = existing or { } + lpegmatch(pattern_b_s,str) + return hash + else + return { } + end +end + +function aux.settings_to_hash_strict(str,existing) + if str and str ~= "" then + hash = existing or { } + lpegmatch(pattern_c_s,str) + return next(hash) and hash + else + return nil + end +end + +local separator = comma * space^0 +local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) +local pattern = lpeg.Ct(value*(separator*value)^0) + +-- "aap, {noot}, mies" : outer {} removes, leading spaces ignored + +aux.settings_to_array_pattern = pattern + +function aux.settings_to_array(str) + if not str or str == "" then + return { } + else + return lpegmatch(pattern,str) + end +end + +local function set(t,v) + t[#t+1] = v +end + +local value = lpeg.P(lpeg.Carg(1)*value) / set +local pattern = value*(separator*value)^0 * lpeg.Carg(1) + +function aux.add_settings_to_array(t,str) + return lpegmatch(pattern,str,nil,t) +end + +function aux.hash_to_string(h,separator,yes,no,strict,omit) + if h then + local t, s = { }, table.sortedkeys(h) + omit = omit and table.tohash(omit) + for i=1,#s do + local key = s[i] + if not omit or not omit[key] then + local value = h[key] + if type(value) == "boolean" then + if yes and no then + if value then + t[#t+1] = key .. '=' .. yes + elseif not strict then + t[#t+1] = key .. '=' .. no + end + elseif value or not strict then + t[#t+1] = key .. '=' .. tostring(value) + end + else + t[#t+1] = key .. '=' .. value + end + end + end + return concat(t,separator or ",") + else + return "" + end +end + +function aux.array_to_string(a,separator) + if a then + return concat(a,separator or ",") + else + return "" + end +end + +function aux.settings_to_set(str,t) + t = t or { } + for s in gmatch(str,"%s*([^,]+)") do + t[s] = true + end + return t +end + +local value = lbrace * lpeg.C((nobrace + nested)^0) * rbrace +local pattern = lpeg.Ct((space + value)^0) + +function aux.arguments_to_table(str) + return lpegmatch(pattern,str) +end + +-- temporary here + +function aux.getparameters(self,class,parentclass,settings) + local sc = self[class] + if not sc then + sc = table.clone(self[parent]) + self[class] = sc + end + aux.settings_to_hash(settings,sc) +end + +-- temporary here + +local digit = lpeg.R("09") +local period = lpeg.P(".") +local zero = lpeg.P("0") + +--~ local finish = lpeg.P(-1) +--~ local nodigit = (1-digit) + finish +--~ local case_1 = (period * zero^1 * #nodigit)/"" -- .000 +--~ local case_2 = (period * (1-(zero^0/"") * #nodigit)^1 * (zero^0/"") * nodigit) -- .010 .10 .100100 + +local trailingzeros = zero^0 * -digit -- suggested by Roberto R +local case_1 = period * trailingzeros / "" +local case_2 = period * (digit - trailingzeros)^1 * (trailingzeros / "") + +local number = digit^1 * (case_1 + case_2) +local stripper = lpeg.Cs((number + 1)^0) + +--~ local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100" +--~ collectgarbage("collect") +--~ str = string.rep(sample,10000) +--~ local ts = os.clock() +--~ lpegmatch(stripper,str) +--~ print(#str, os.clock()-ts, lpegmatch(stripper,sample)) + +function aux.strip_zeros(str) + return lpegmatch(stripper,str) +end + +function aux.definetable(target) -- defines undefined tables + local composed, t = nil, { } + for name in gmatch(target,"([^%.]+)") do + if composed then + composed = composed .. "." .. name + else + composed = name + end + t[#t+1] = format("%s = %s or { }",composed,composed) + end + return concat(t,"\n") +end + +function aux.accesstable(target) + local t = _G + for name in gmatch(target,"([^%.]+)") do + t = t[name] + end + return t +end + +-- as we use this a lot ... + +--~ function aux.cachefunction(action,weak) +--~ local cache = { } +--~ if weak then +--~ setmetatable(cache, { __mode = "kv" } ) +--~ end +--~ local function reminder(str) +--~ local found = cache[str] +--~ if not found then +--~ found = action(str) +--~ cache[str] = found +--~ end +--~ return found +--~ end +--~ return reminder, cache +--~ end diff --git a/luaextra-boolean.lua b/luaextra-boolean.lua new file mode 100644 index 0000000..be7ec7d --- /dev/null +++ b/luaextra-boolean.lua @@ -0,0 +1,55 @@ +if not modules then modules = { } end modules ['l-boolean'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +boolean = boolean or { } + +local type, tonumber = type, tonumber + +function boolean.tonumber(b) + if b then return 1 else return 0 end +end + +function toboolean(str,tolerant) + if tolerant then + local tstr = type(str) + if tstr == "string" then + return str == "true" or str == "yes" or str == "on" or str == "1" or str == "t" + elseif tstr == "number" then + return tonumber(str) ~= 0 + elseif tstr == "nil" then + return false + else + return str + end + elseif str == "true" then + return true + elseif str == "false" then + return false + else + return str + end +end + +function string.is_boolean(str) + if type(str) == "string" then + if str == "true" or str == "yes" or str == "on" or str == "t" then + return true + elseif str == "false" or str == "no" or str == "off" or str == "f" then + return false + end + end + return nil +end + +function boolean.alwaystrue() + return true +end + +function boolean.falsetrue() + return false +end diff --git a/luaextra-dimen.lua b/luaextra-dimen.lua new file mode 100644 index 0000000..e15e294 --- /dev/null +++ b/luaextra-dimen.lua @@ -0,0 +1,418 @@ +if not modules then modules = { } end modules ['l-dimen'] = { + version = 1.001, + comment = "support for dimensions", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +--[[ldx-- +<p>Internally <l n='luatex'/> work with scaled point, which are +represented by integers. However, in practice, at east at the +<l n='tex'/> end we work with more generic units like points (pt). Going +from scaled points (numbers) to one of those units can be +done by using the conversion factors collected in the following +table.</p> +--ldx]]-- + +local format, match, gsub, type, setmetatable = string.format, string.match, string.gsub, type, setmetatable +local lpegmatch = lpeg.match + +number = number or { } + +number.tonumberf = function(n) return match(format("%.20f",n),"(.-0?)0*$") end -- one zero too much but alas +number.tonumberg = function(n) return format("%.20g",n) end + +local dimenfactors = { + ["pt"] = 1/65536, + ["in"] = ( 100/ 7227)/65536, + ["cm"] = ( 254/ 7227)/65536, + ["mm"] = ( 2540/ 7227)/65536, + ["sp"] = 1, -- 65536 sp in 1pt + ["bp"] = ( 7200/ 7227)/65536, + ["pc"] = ( 1/ 12)/65536, + ["dd"] = ( 1157/ 1238)/65536, + ["cc"] = ( 1157/14856)/65536, + ["nd"] = (20320/21681)/65536, + ["nc"] = ( 5080/65043)/65536 +} + +--~ print(table.serialize(dimenfactors)) +--~ +--~ %.99g: +--~ +--~ t={ +--~ ["bp"]=1.5201782378580324e-005, +--~ ["cc"]=1.1883696112892098e-006, +--~ ["cm"]=5.3628510057769479e-007, +--~ ["dd"]=1.4260435335470516e-005, +--~ ["em"]=0.000152587890625, +--~ ["ex"]=6.103515625e-005, +--~ ["in"]=2.1113586636917117e-007, +--~ ["mm"]=5.3628510057769473e-008, +--~ ["nc"]=1.1917446679504327e-006, +--~ ["nd"]=1.4300936015405194e-005, +--~ ["pc"]=1.2715657552083333e-006, +--~ ["pt"]=1.52587890625e-005, +--~ ["sp"]=1, +--~ } +--~ +--~ patched %s and tonumber +--~ +--~ t={ +--~ ["bp"]=0.00001520178238, +--~ ["cc"]=0.00000118836961, +--~ ["cm"]=0.0000005362851, +--~ ["dd"]=0.00001426043534, +--~ ["em"]=0.00015258789063, +--~ ["ex"]=0.00006103515625, +--~ ["in"]=0.00000021113587, +--~ ["mm"]=0.00000005362851, +--~ ["nc"]=0.00000119174467, +--~ ["nd"]=0.00001430093602, +--~ ["pc"]=0.00000127156576, +--~ ["pt"]=0.00001525878906, +--~ ["sp"]=1, +--~ } + +--[[ldx-- +<p>A conversion function that takes a number, unit (string) and optional +format (string) is implemented using this table.</p> +--ldx]]-- + +-- was: + +local function todimen(n,unit,fmt) + if type(n) == 'string' then + return n + else + unit = unit or 'pt' + return format(fmt or "%s%s",n*dimenfactors[unit],unit) + -- if fmt then + -- return format(fmt,n*dimenfactors[unit],unit) + -- else + -- return match(format("%.20f",n*dimenfactors[unit]),"(.-0?)0*$") .. unit + -- end + end +end + +--[[ldx-- +<p>We collect a bunch of converters in the <type>number</type> namespace.</p> +--ldx]]-- + +number.todimen = todimen +number.dimenfactors = dimenfactors + +function number.topoints (n) return todimen(n,"pt") end +function number.toinches (n) return todimen(n,"in") end +function number.tocentimeters (n) return todimen(n,"cm") end +function number.tomillimeters (n) return todimen(n,"mm") end +function number.toscaledpoints(n) return todimen(n,"sp") end +function number.toscaledpoints(n) return n .. "sp" end +function number.tobasepoints (n) return todimen(n,"bp") end +function number.topicas (n) return todimen(n "pc") end +function number.todidots (n) return todimen(n,"dd") end +function number.tociceros (n) return todimen(n,"cc") end +function number.tonewdidots (n) return todimen(n,"nd") end +function number.tonewciceros (n) return todimen(n,"nc") end + +--[[ldx-- +<p>More interesting it to implement a (sort of) dimen datatype, one +that permits calculations too. First we define a function that +converts a string to scaledpoints. We use <l n='lpeg'/>. We capture +a number and optionally a unit. When no unit is given a constant +capture takes place.</p> +--ldx]]-- + +local amount = (lpeg.S("+-")^0 * lpeg.R("09")^0 * lpeg.P(".")^0 * lpeg.R("09")^0) + lpeg.Cc("0") +local unit = lpeg.R("az")^1 + +local pattern = amount/tonumber * (unit^1/dimenfactors + lpeg.Cc(1)) -- tonumber is new + +--[[ldx-- +<p>We use a metatable to intercept errors. When no key is found in +the table with factors, the metatable will be consulted for an +alternative index function.</p> +--ldx]]-- + +local mt = { } setmetatable(dimenfactors,mt) + +mt.__index = function(t,s) + -- error("wrong dimension: " .. (s or "?")) -- better a message + return false +end + +function string:todimen() + if type(self) == "number" then + return self + else + local value, unit = lpegmatch(pattern,self) + print(value,unit) + return value/unit + end +end + +--[[ldx-- +<p>This converter accepts calls like:</p> + +<typing> +string.todimen("10")) +string.todimen(".10")) +string.todimen("10.0")) +string.todimen("10.0pt")) +string.todimen("10pt")) +string.todimen("10.0pt")) +</typing> + +<p>And of course the often more efficient:</p> + +<typing> +somestring:todimen("12.3cm") +</typing> + +<p>With this in place, we can now implement a proper datatype for dimensions, one +that permits us to do this:</p> + +<typing> +s = dimen "10pt" + dimen "20pt" + dimen "200pt" + - dimen "100sp" / 10 + "20pt" + "0pt" +</typing> + +<p>We create a local metatable for this new type:</p> +--ldx]]-- + +local dimensions = { } + +--[[ldx-- +<p>The main (and globally) visible representation of a dimen is defined next: it is +a one-element table. The unit that is returned from the match is normally a number +(one of the previously defined factors) but we also accept functions. Later we will +see why.</p> +--ldx]]-- + +function dimen(a) + if a then + local ta= type(a) + if ta == "string" then + local value, unit = lpegmatch(pattern,a) + if type(unit) == "function" then + k = value/unit() + else + k = value/unit + end + a = k + elseif ta == "table" then + a = a[1] + end + return setmetatable({ a }, dimensions) + else + return setmetatable({ 0 }, dimensions) + end +end + +--[[ldx-- +<p>This function return a small hash with a metatable attached. It is +through this metatable that we can do the calculations. We could have +shared some of the code but for reasons of speed we don't.</p> +--ldx]]-- + +function dimensions.__add(a, b) + local ta, tb = type(a), type(b) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + return setmetatable({ a + b }, dimensions) +end + +function dimensions.__sub(a, b) + local ta, tb = type(a), type(b) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + return setmetatable({ a - b }, dimensions) +end + +function dimensions.__mul(a, b) + local ta, tb = type(a), type(b) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + return setmetatable({ a * b }, dimensions) +end + +function dimensions.__div(a, b) + local ta, tb = type(a), type(b) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + return setmetatable({ a / b }, dimensions) +end + +function dimensions.__unm(a) + local ta = type(a) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + return setmetatable({ - a }, dimensions) +end + +--[[ldx-- +<p>It makes no sense to implement the power and modulo function but +the next two do make sense because they permits is code like:</p> + +<typing> +local a, b = dimen "10pt", dimen "11pt" +... +if a > b then + ... +end +</typing> +--ldx]]-- + +-- makes no sense: dimensions.__pow and dimensions.__mod + +function dimensions.__lt(a, b) + return a[1] < b[1] +end + +function dimensions.__eq(a, b) + return a[1] == b[1] +end + +--[[ldx-- +<p>We also need to provide a function for conversion to string (so that +we can print dimensions). We print them as points, just like <l n='tex'/>.</p> +--ldx]]-- + +function dimensions.__tostring(a) + return a[1]/65536 .. "pt" -- instead of todimen(a[1]) +end + +--[[ldx-- +<p>Since it does not take much code, we also provide a way to access +a few accessors</p> + +<typing> +print(dimen().pt) +print(dimen().sp) +</typing> +--ldx]]-- + +function dimensions.__index(tab,key) + local d = dimenfactors[key] + if not d then + error("illegal property of dimen: " .. key) + d = 1 + end + return 1/d +end + +--[[ldx-- +<p>In the converter from string to dimension we support functions as +factors. This is because in <l n='tex'/> we have a few more units: +<type>ex</type> and <type>em</type>. These are not constant factors but +depend on the current font. They are not defined by default, but need +an explicit function call. This is because at the moment that this code +is loaded, the relevant tables that hold the functions needed may not +yet be available.</p> +--ldx]]-- + +function dimensions.texify() -- todo: % + local fti, fc = fonts and fonts.ids and fonts.ids, font and font.current + if fti and fc then + dimenfactors["ex"] = function() return fti[fc()].ex_height end + dimenfactors["em"] = function() return fti[fc()].quad end + else + dimenfactors["ex"] = 1/65536* 4 -- 4pt + dimenfactors["em"] = 1/65536*10 -- 10pt + end +end + +--[[ldx-- +<p>In order to set the defaults we call this function now. At some point +the macro package needs to make sure the function is called again.</p> +--ldx]]-- + +dimensions.texify() + +--[[ldx-- +<p>The previous code is rather efficient (also thanks to <l n='lpeg'/>) but we +can speed it up by caching converted dimensions. On my machine (2008) the following +loop takes about 25.5 seconds.</p> + +<typing> +for i=1,1000000 do + local s = dimen "10pt" + dimen "20pt" + dimen "200pt" + - dimen "100sp" / 10 + "20pt" + "0pt" +end +</typing> + +<p>When we cache converted strings this becomes 16.3 seconds. In order not +to waste too much memory on it, we tag the values of the cache as being +week which mean that the garbage collector will collect them in a next +sweep. This means that in most cases the speed up is mostly affecting the +current couple of calculations and as such the speed penalty is small.</p> + +<p>We redefine two previous defined functions that can benefit from +this:</p> +--ldx]]-- + +local known = { } setmetatable(known, { __mode = "v" }) + +function dimen(a) + if a then + local ta= type(a) + if ta == "string" then + local k = known[a] + if k then + a = k + else + local value, unit = lpegmatch(pattern,a) + if type(unit) == "function" then + k = value/unit() + else + k = value/unit + end + known[a] = k + a = k + end + elseif ta == "table" then + a = a[1] + end + return setmetatable({ a }, dimensions) + else + return setmetatable({ 0 }, dimensions) + end +end + +function string:todimen() + if type(self) == "number" then + return self + else + local k = known[self] + if not k then + local value, unit = lpegmatch(pattern,self) + if value and unit then + k = value/unit + else + k = 0 + end + -- print(self,value,unit) + known[self] = k + end + return k + end +end + +--[[ldx-- +<p>In a similar fashion we can define a glue datatype. In that case we +probably use a hash instead of a one-element table.</p> +--ldx]]-- + +--[[ldx-- +<p>Goodie:s</p> +--ldx]]-- + +function number.percent(n) -- will be cleaned up once luatex 0.30 is out + local hsize = tex.hsize + if type(hsize) == "string" then + hsize = hsize:todimen() + end + return (n/100) * hsize +end + +number["%"] = number.percent diff --git a/luaextra-dir.lua b/luaextra-dir.lua new file mode 100644 index 0000000..3760db2 --- /dev/null +++ b/luaextra-dir.lua @@ -0,0 +1,337 @@ +if not modules then modules = { } end modules ['l-dir'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local type = type +local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub +local lpegmatch = lpeg.match + +dir = dir or { } + +-- optimizing for no string.find (*) does not save time + +local attributes = lfs.attributes +local walkdir = lfs.dir + +local function glob_pattern(path,patt,recurse,action) + local ok, scanner + if path == "/" then + ok, scanner = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe + else + ok, scanner = xpcall(function() return walkdir(path) end, function() end) -- kepler safe + end + if ok and type(scanner) == "function" then + if not find(path,"/$") then path = path .. '/' end + for name in scanner do + local full = path .. name + local mode = attributes(full,'mode') + if mode == 'file' then + if find(full,patt) then + action(full) + end + elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then + glob_pattern(full,patt,recurse,action) + end + end + end +end + +dir.glob_pattern = glob_pattern + +local function collect_pattern(path,patt,recurse,result) + local ok, scanner + result = result or { } + if path == "/" then + ok, scanner = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe + else + ok, scanner = xpcall(function() return walkdir(path) end, function() end) -- kepler safe + end + if ok and type(scanner) == "function" then + if not find(path,"/$") then path = path .. '/' end + for name in scanner do + local full = path .. name + local attr = attributes(full) + local mode = attr.mode + if mode == 'file' then + if find(full,patt) then + result[name] = attr + end + elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then + attr.list = collect_pattern(full,patt,recurse) + result[name] = attr + end + end + end + return result +end + +dir.collect_pattern = collect_pattern + +local P, S, R, C, Cc, Cs, Ct, Cv, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cv, lpeg.V + +local pattern = Ct { + [1] = (C(P(".") + P("/")^1) + C(R("az","AZ") * P(":") * P("/")^0) + Cc("./")) * V(2) * V(3), + [2] = C(((1-S("*?/"))^0 * P("/"))^0), + [3] = C(P(1)^0) +} + +local filter = Cs ( ( + P("**") / ".*" + + P("*") / "[^/]*" + + P("?") / "[^/]" + + P(".") / "%%." + + P("+") / "%%+" + + P("-") / "%%-" + + P(1) +)^0 ) + +local function glob(str,t) + if type(str) == "table" then + local t = t or { } + for s=1,#str do + glob(str[s],t) + end + return t + elseif lfs.isfile(str) then + local t = t or { } + t[#t+1] = str + return t + else + local split = lpegmatch(pattern,str) + if split then + local t = t or { } + local action = action or function(name) t[#t+1] = name end + local root, path, base = split[1], split[2], split[3] + local recurse = find(base,"%*%*") + local start = root .. path + local result = lpegmatch(filter,start .. base) + glob_pattern(start,result,recurse,action) + return t + else + return { } + end + end +end + +dir.glob = glob + +--~ list = dir.glob("**/*.tif") +--~ list = dir.glob("/**/*.tif") +--~ list = dir.glob("./**/*.tif") +--~ list = dir.glob("oeps/**/*.tif") +--~ list = dir.glob("/oeps/**/*.tif") + +local function globfiles(path,recurse,func,files) -- func == pattern or function + if type(func) == "string" then + local s = func -- alas, we need this indirect way + func = function(name) return find(name,s) end + end + files = files or { } + for name in walkdir(path) do + if find(name,"^%.") then + --- skip + else + local mode = attributes(name,'mode') + if mode == "directory" then + if recurse then + globfiles(path .. "/" .. name,recurse,func,files) + end + elseif mode == "file" then + if func then + if func(name) then + files[#files+1] = path .. "/" .. name + end + else + files[#files+1] = path .. "/" .. name + end + end + end + end + return files +end + +dir.globfiles = globfiles + +-- t = dir.glob("c:/data/develop/context/sources/**/????-*.tex") +-- t = dir.glob("c:/data/develop/tex/texmf/**/*.tex") +-- t = dir.glob("c:/data/develop/context/texmf/**/*.tex") +-- t = dir.glob("f:/minimal/tex/**/*") +-- print(dir.ls("f:/minimal/tex/**/*")) +-- print(dir.ls("*.tex")) + +function dir.ls(pattern) + return table.concat(glob(pattern),"\n") +end + +--~ mkdirs("temp") +--~ mkdirs("a/b/c") +--~ mkdirs(".","/a/b/c") +--~ mkdirs("a","b","c") + +local make_indeed = true -- false + +if string.find(os.getenv("PATH"),";") then + + function dir.mkdirs(...) + local str, pth = "", "" + for _, s in ipairs({...}) do + if s ~= "" then + if str ~= "" then + str = str .. "/" .. s + else + str = s + end + end + end + local first, middle, last + local drive = false + first, middle, last = match(str,"^(//)(//*)(.*)$") + if first then + -- empty network path == local path + else + first, last = match(str,"^(//)/*(.-)$") + if first then + middle, last = match(str,"([^/]+)/+(.-)$") + if middle then + pth = "//" .. middle + else + pth = "//" .. last + last = "" + end + else + first, middle, last = match(str,"^([a-zA-Z]:)(/*)(.-)$") + if first then + pth, drive = first .. middle, true + else + middle, last = match(str,"^(/*)(.-)$") + if not middle then + last = str + end + end + end + end + for s in gmatch(last,"[^/]+") do + if pth == "" then + pth = s + elseif drive then + pth, drive = pth .. s, false + else + pth = pth .. "/" .. s + end + if make_indeed and not lfs.isdir(pth) then + lfs.mkdir(pth) + end + end + return pth, (lfs.isdir(pth) == true) + end + +--~ print(dir.mkdirs("","","a","c")) +--~ print(dir.mkdirs("a")) +--~ print(dir.mkdirs("a:")) +--~ print(dir.mkdirs("a:/b/c")) +--~ print(dir.mkdirs("a:b/c")) +--~ print(dir.mkdirs("a:/bbb/c")) +--~ print(dir.mkdirs("/a/b/c")) +--~ print(dir.mkdirs("/aaa/b/c")) +--~ print(dir.mkdirs("//a/b/c")) +--~ print(dir.mkdirs("///a/b/c")) +--~ print(dir.mkdirs("a/bbb//ccc/")) + + function dir.expand_name(str) + local first, nothing, last = match(str,"^(//)(//*)(.*)$") + if first then + first = lfs.currentdir() .. "/" + first = gsub(first,"\\","/") + end + if not first then + first, last = match(str,"^(//)/*(.*)$") + end + if not first then + first, last = match(str,"^([a-zA-Z]:)(.*)$") + if first and not find(last,"^/") then + local d = lfs.currentdir() + if lfs.chdir(first) then + first = lfs.currentdir() + first = gsub(first,"\\","/") + end + lfs.chdir(d) + end + end + if not first then + first, last = lfs.currentdir(), str + first = gsub(first,"\\","/") + end + last = gsub(last,"//","/") + last = gsub(last,"/%./","/") + last = gsub(last,"^/*","") + first = gsub(first,"/*$","") + if last == "" then + return first + else + return first .. "/" .. last + end + end + +else + + function dir.mkdirs(...) + local str, pth = "", "" + for _, s in ipairs({...}) do + if s ~= "" then + if str ~= "" then + str = str .. "/" .. s + else + str = s + end + end + end + str = gsub(str,"/+","/") + if find(str,"^/") then + pth = "/" + for s in gmatch(str,"[^/]+") do + local first = (pth == "/") + if first then + pth = pth .. s + else + pth = pth .. "/" .. s + end + if make_indeed and not first and not lfs.isdir(pth) then + lfs.mkdir(pth) + end + end + else + pth = "." + for s in gmatch(str,"[^/]+") do + pth = pth .. "/" .. s + if make_indeed and not lfs.isdir(pth) then + lfs.mkdir(pth) + end + end + end + return pth, (lfs.isdir(pth) == true) + end + +--~ print(dir.mkdirs("","","a","c")) +--~ print(dir.mkdirs("a")) +--~ print(dir.mkdirs("/a/b/c")) +--~ print(dir.mkdirs("/aaa/b/c")) +--~ print(dir.mkdirs("//a/b/c")) +--~ print(dir.mkdirs("///a/b/c")) +--~ print(dir.mkdirs("a/bbb//ccc/")) + + function dir.expand_name(str) + if not find(str,"^/") then + str = lfs.currentdir() .. "/" .. str + end + str = gsub(str,"//","/") + str = gsub(str,"/%./","/") + return str + end + +end + +dir.makedirs = dir.mkdirs diff --git a/luaextra-file.lua b/luaextra-file.lua new file mode 100644 index 0000000..c95ef65 --- /dev/null +++ b/luaextra-file.lua @@ -0,0 +1,267 @@ +if not modules then modules = { } end modules ['l-file'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- needs a cleanup + +file = file or { } + +local concat = table.concat +local find, gmatch, match, gsub, sub = string.find, string.gmatch, string.match, string.gsub, string.sub +local lpegmatch = lpeg.match + +function file.removesuffix(filename) + return (gsub(filename,"%.[%a%d]+$","")) +end + +function file.addsuffix(filename, suffix) + if not find(filename,"%.[%a%d]+$") then + return filename .. "." .. suffix + else + return filename + end +end + +function file.replacesuffix(filename, suffix) + return (gsub(filename,"%.[%a%d]+$","")) .. "." .. suffix +end + +function file.dirname(name,default) + return match(name,"^(.+)[/\\].-$") or (default or "") +end + +function file.basename(name) + return match(name,"^.+[/\\](.-)$") or name +end + +function file.nameonly(name) + return (gsub(match(name,"^.+[/\\](.-)$") or name,"%..*$","")) +end + +function file.extname(name,default) + return match(name,"^.+%.([^/\\]-)$") or default or "" +end + +file.suffix = file.extname + +--~ print(file.join("x/","/y")) +--~ print(file.join("http://","/y")) +--~ print(file.join("http://a","/y")) +--~ print(file.join("http:///a","/y")) +--~ print(file.join("//nas-1","/y")) + +function file.join(...) + local pth = concat({...},"/") + pth = gsub(pth,"\\","/") + local a, b = match(pth,"^(.*://)(.*)$") + if a and b then + return a .. gsub(b,"//+","/") + end + a, b = match(pth,"^(//)(.*)$") + if a and b then + return a .. gsub(b,"//+","/") + end + return (gsub(pth,"//+","/")) +end + +function file.iswritable(name) + local a = lfs.attributes(name) or lfs.attributes(file.dirname(name,".")) + return a and sub(a.permissions,2,2) == "w" +end + +function file.isreadable(name) + local a = lfs.attributes(name) + return a and sub(a.permissions,1,1) == "r" +end + +file.is_readable = file.isreadable +file.is_writable = file.iswritable + +-- todo: lpeg + +--~ function file.split_path(str) +--~ local t = { } +--~ str = gsub(str,"\\", "/") +--~ str = gsub(str,"(%a):([;/])", "%1\001%2") +--~ for name in gmatch(str,"([^;:]+)") do +--~ if name ~= "" then +--~ t[#t+1] = gsub(name,"\001",":") +--~ end +--~ end +--~ return t +--~ end + +local checkedsplit = string.checkedsplit + +function file.split_path(str,separator) + str = gsub(str,"\\","/") + return checkedsplit(str,separator or io.pathseparator) +end + +function file.join_path(tab) + return concat(tab,io.pathseparator) -- can have trailing // +end + +function file.collapse_path(str) + str = gsub(str,"/%./","/") + local n, m = 1, 1 + while n > 0 or m > 0 do + str, n = gsub(str,"[^/%.]+/%.%.$","") + str, m = gsub(str,"[^/%.]+/%.%./","") + end + str = gsub(str,"([^/])/$","%1") + str = gsub(str,"^%./","") + str = gsub(str,"/%.$","") + if str == "" then str = "." end + return str +end + +--~ print(file.collapse_path("/a")) +--~ print(file.collapse_path("a/./b/..")) +--~ print(file.collapse_path("a/aa/../b/bb")) +--~ print(file.collapse_path("a/../..")) +--~ print(file.collapse_path("a/.././././b/..")) +--~ print(file.collapse_path("a/./././b/..")) +--~ print(file.collapse_path("a/b/c/../..")) + +function file.robustname(str) + return (gsub(str,"[^%a%d%/%-%.\\]+","-")) +end + +file.readdata = io.loaddata +file.savedata = io.savedata + +function file.copy(oldname,newname) + file.savedata(newname,io.loaddata(oldname)) +end + +-- lpeg variants, slightly faster, not always + +--~ local period = lpeg.P(".") +--~ local slashes = lpeg.S("\\/") +--~ local noperiod = 1-period +--~ local noslashes = 1-slashes +--~ local name = noperiod^1 + +--~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.C(noperiod^1) * -1 + +--~ function file.extname(name) +--~ return lpegmatch(pattern,name) or "" +--~ end + +--~ local pattern = lpeg.Cs(((period * noperiod^1 * -1)/"" + 1)^1) + +--~ function file.removesuffix(name) +--~ return lpegmatch(pattern,name) +--~ end + +--~ local pattern = (noslashes^0 * slashes)^1 * lpeg.C(noslashes^1) * -1 + +--~ function file.basename(name) +--~ return lpegmatch(pattern,name) or name +--~ end + +--~ local pattern = (noslashes^0 * slashes)^1 * lpeg.Cp() * noslashes^1 * -1 + +--~ function file.dirname(name) +--~ local p = lpegmatch(pattern,name) +--~ if p then +--~ return sub(name,1,p-2) +--~ else +--~ return "" +--~ end +--~ end + +--~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.Cp() * noperiod^1 * -1 + +--~ function file.addsuffix(name, suffix) +--~ local p = lpegmatch(pattern,name) +--~ if p then +--~ return name +--~ else +--~ return name .. "." .. suffix +--~ end +--~ end + +--~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.Cp() * noperiod^1 * -1 + +--~ function file.replacesuffix(name,suffix) +--~ local p = lpegmatch(pattern,name) +--~ if p then +--~ return sub(name,1,p-2) .. "." .. suffix +--~ else +--~ return name .. "." .. suffix +--~ end +--~ end + +--~ local pattern = (noslashes^0 * slashes)^0 * lpeg.Cp() * ((noperiod^1 * period)^1 * lpeg.Cp() + lpeg.P(true)) * noperiod^1 * -1 + +--~ function file.nameonly(name) +--~ local a, b = lpegmatch(pattern,name) +--~ if b then +--~ return sub(name,a,b-2) +--~ elseif a then +--~ return sub(name,a) +--~ else +--~ return name +--~ end +--~ end + +--~ local test = file.extname +--~ local test = file.basename +--~ local test = file.dirname +--~ local test = file.addsuffix +--~ local test = file.replacesuffix +--~ local test = file.nameonly + +--~ print(1,test("./a/b/c/abd.def.xxx","!!!")) +--~ print(2,test("./../b/c/abd.def.xxx","!!!")) +--~ print(3,test("a/b/c/abd.def.xxx","!!!")) +--~ print(4,test("a/b/c/def.xxx","!!!")) +--~ print(5,test("a/b/c/def","!!!")) +--~ print(6,test("def","!!!")) +--~ print(7,test("def.xxx","!!!")) + +--~ local tim = os.clock() for i=1,250000 do local ext = test("abd.def.xxx","!!!") end print(os.clock()-tim) + +-- also rewrite previous + +local letter = lpeg.R("az","AZ") + lpeg.S("_-+") +local separator = lpeg.P("://") + +local qualified = lpeg.P(".")^0 * lpeg.P("/") + letter*lpeg.P(":") + letter^1*separator + letter^1 * lpeg.P("/") +local rootbased = lpeg.P("/") + letter*lpeg.P(":") + +-- ./name ../name /name c: :// name/name + +function file.is_qualified_path(filename) + return lpegmatch(qualified,filename) ~= nil +end + +function file.is_rootbased_path(filename) + return lpegmatch(rootbased,filename) ~= nil +end + +local slash = lpeg.S("\\/") +local period = lpeg.P(".") +local drive = lpeg.C(lpeg.R("az","AZ")) * lpeg.P(":") +local path = lpeg.C(((1-slash)^0 * slash)^0) +local suffix = period * lpeg.C(lpeg.P(1-period)^0 * lpeg.P(-1)) +local base = lpeg.C((1-suffix)^0) + +local pattern = (drive + lpeg.Cc("")) * (path + lpeg.Cc("")) * (base + lpeg.Cc("")) * (suffix + lpeg.Cc("")) + +function file.splitname(str) -- returns drive, path, base, suffix + return lpegmatch(pattern,str) +end + +-- function test(t) for k, v in pairs(t) do print(v, "=>", file.splitname(v)) end end +-- +-- test { "c:", "c:/aa", "c:/aa/bb", "c:/aa/bb/cc", "c:/aa/bb/cc.dd", "c:/aa/bb/cc.dd.ee" } +-- test { "c:", "c:aa", "c:aa/bb", "c:aa/bb/cc", "c:aa/bb/cc.dd", "c:aa/bb/cc.dd.ee" } +-- test { "/aa", "/aa/bb", "/aa/bb/cc", "/aa/bb/cc.dd", "/aa/bb/cc.dd.ee" } +-- test { "aa", "aa/bb", "aa/bb/cc", "aa/bb/cc.dd", "aa/bb/cc.dd.ee" } diff --git a/luaextra-io.lua b/luaextra-io.lua new file mode 100644 index 0000000..5a126da --- /dev/null +++ b/luaextra-io.lua @@ -0,0 +1,186 @@ +if not modules then modules = { } end modules ['l-io'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local byte, find, gsub = string.byte, string.find, string.gsub + +if string.find(os.getenv("PATH"),";") then + io.fileseparator, io.pathseparator = "\\", ";" +else + io.fileseparator, io.pathseparator = "/" , ":" +end + +function io.loaddata(filename,textmode) + local f = io.open(filename,(textmode and 'r') or 'rb') + if f then + -- collectgarbage("step") -- sometimes makes a big difference in mem consumption + local data = f:read('*all') + -- garbagecollector.check(data) + f:close() + return data + else + return nil + end +end + +function io.savedata(filename,data,joiner) + local f = io.open(filename,"wb") + if f then + if type(data) == "table" then + f:write(table.join(data,joiner or "")) + elseif type(data) == "function" then + data(f) + else + f:write(data or "") + end + f:close() + return true + else + return false + end +end + +function io.exists(filename) + local f = io.open(filename) + if f == nil then + return false + else + assert(f:close()) + return true + end +end + +function io.size(filename) + local f = io.open(filename) + if f == nil then + return 0 + else + local s = f:seek("end") + assert(f:close()) + return s + end +end + +function io.noflines(f) + local n = 0 + for _ in f:lines() do + n = n + 1 + end + f:seek('set',0) + return n +end + +local nextchar = { + [ 4] = function(f) + return f:read(1,1,1,1) + end, + [ 2] = function(f) + return f:read(1,1) + end, + [ 1] = function(f) + return f:read(1) + end, + [-2] = function(f) + local a, b = f:read(1,1) + return b, a + end, + [-4] = function(f) + local a, b, c, d = f:read(1,1,1,1) + return d, c, b, a + end +} + +function io.characters(f,n) + if f then + return nextchar[n or 1], f + else + return nil, nil + end +end + +local nextbyte = { + [4] = function(f) + local a, b, c, d = f:read(1,1,1,1) + if d then + return byte(a), byte(b), byte(c), byte(d) + else + return nil, nil, nil, nil + end + end, + [2] = function(f) + local a, b = f:read(1,1) + if b then + return byte(a), byte(b) + else + return nil, nil + end + end, + [1] = function (f) + local a = f:read(1) + if a then + return byte(a) + else + return nil + end + end, + [-2] = function (f) + local a, b = f:read(1,1) + if b then + return byte(b), byte(a) + else + return nil, nil + end + end, + [-4] = function(f) + local a, b, c, d = f:read(1,1,1,1) + if d then + return byte(d), byte(c), byte(b), byte(a) + else + return nil, nil, nil, nil + end + end +} + +function io.bytes(f,n) + if f then + return nextbyte[n or 1], f + else + return nil, nil + end +end + +function io.ask(question,default,options) + while true do + io.write(question) + if options then + io.write(string.format(" [%s]",table.concat(options,"|"))) + end + if default then + io.write(string.format(" [%s]",default)) + end + io.write(string.format(" ")) + local answer = io.read() + answer = gsub(answer,"^%s*(.*)%s*$","%1") + if answer == "" and default then + return default + elseif not options then + return answer + else + for _,v in pairs(options) do + if v == answer then + return answer + end + end + local pattern = "^" .. answer + for _,v in pairs(options) do + if find(v,pattern) then + return v + end + end + end + end +end diff --git a/luaextra-lpeg.lua b/luaextra-lpeg.lua new file mode 100644 index 0000000..9e761e4 --- /dev/null +++ b/luaextra-lpeg.lua @@ -0,0 +1,165 @@ +if not modules then modules = { } end modules ['l-lpeg'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +lpeg = require("lpeg") + +local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc +local match = lpeg.match + +--~ l-lpeg.lua : + +--~ lpeg.digit = lpeg.R('09')^1 +--~ lpeg.sign = lpeg.S('+-')^1 +--~ lpeg.cardinal = lpeg.P(lpeg.sign^0 * lpeg.digit^1) +--~ lpeg.integer = lpeg.P(lpeg.sign^0 * lpeg.digit^1) +--~ lpeg.float = lpeg.P(lpeg.sign^0 * lpeg.digit^0 * lpeg.P('.') * lpeg.digit^1) +--~ lpeg.number = lpeg.float + lpeg.integer +--~ lpeg.oct = lpeg.P("0") * lpeg.R('07')^1 +--~ lpeg.hex = lpeg.P("0x") * (lpeg.R('09') + lpeg.R('AF'))^1 +--~ lpeg.uppercase = lpeg.P("AZ") +--~ lpeg.lowercase = lpeg.P("az") + +--~ lpeg.eol = lpeg.S('\r\n\f')^1 -- includes formfeed +--~ lpeg.space = lpeg.S(' ')^1 +--~ lpeg.nonspace = lpeg.P(1-lpeg.space)^1 +--~ lpeg.whitespace = lpeg.S(' \r\n\f\t')^1 +--~ lpeg.nonwhitespace = lpeg.P(1-lpeg.whitespace)^1 + +local hash = { } + +function lpeg.anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * lpeg.V(1) } +end + +function lpeg.startswith(pattern) --slightly adapted + return P(pattern) +end + +function lpeg.splitter(pattern, action) + return (((1-P(pattern))^1)/action+1)^0 +end + +-- variant: + +--~ local parser = lpeg.Ct(lpeg.splitat(newline)) + +local crlf = P("\r\n") +local cr = P("\r") +local lf = P("\n") +local space = S(" \t\f\v") -- + string.char(0xc2, 0xa0) if we want utf (cf mail roberto) +local newline = crlf + cr + lf +local spacing = space^0 * newline + +local empty = spacing * Cc("") +local nonempty = Cs((1-spacing)^1) * spacing^-1 +local content = (empty + nonempty)^1 + +local capture = Ct(content^0) + +function string:splitlines() + return match(capture,self) +end + +lpeg.linebyline = content -- better make a sublibrary + +--~ local p = lpeg.splitat("->",false) print(match(p,"oeps->what->more")) -- oeps what more +--~ local p = lpeg.splitat("->",true) print(match(p,"oeps->what->more")) -- oeps what->more +--~ local p = lpeg.splitat("->",false) print(match(p,"oeps")) -- oeps +--~ local p = lpeg.splitat("->",true) print(match(p,"oeps")) -- oeps + +local splitters_s, splitters_m = { }, { } + +local function splitat(separator,single) + local splitter = (single and splitters_s[separator]) or splitters_m[separator] + if not splitter then + separator = P(separator) + if single then + local other, any = C((1 - separator)^0), P(1) + splitter = other * (separator * C(any^0) + "") -- ? + splitters_s[separator] = splitter + else + local other = C((1 - separator)^0) + splitter = other * (separator * other)^0 + splitters_m[separator] = splitter + end + end + return splitter +end + +lpeg.splitat = splitat + +local cache = { } + +function string:split(separator) + local c = cache[separator] + if not c then + c = Ct(splitat(separator)) + cache[separator] = c + end + return match(c,self) +end + +local cache = { } + +function string:checkedsplit(separator) + local c = cache[separator] + if not c then + separator = P(separator) + local other = C((1 - separator)^0) + c = Ct(separator^0 * other * (separator^1 * other)^0) + cache[separator] = c + end + return match(c,self) +end + +--~ function lpeg.L(list,pp) +--~ local p = pp +--~ for l=1,#list do +--~ if p then +--~ p = p + lpeg.P(list[l]) +--~ else +--~ p = lpeg.P(list[l]) +--~ end +--~ end +--~ return p +--~ end + +--~ from roberto's site: +--~ +--~ -- decode a two-byte UTF-8 sequence +--~ local function f2 (s) +--~ local c1, c2 = string.byte(s, 1, 2) +--~ return c1 * 64 + c2 - 12416 +--~ end +--~ +--~ -- decode a three-byte UTF-8 sequence +--~ local function f3 (s) +--~ local c1, c2, c3 = string.byte(s, 1, 3) +--~ return (c1 * 64 + c2) * 64 + c3 - 925824 +--~ end +--~ +--~ -- decode a four-byte UTF-8 sequence +--~ local function f4 (s) +--~ local c1, c2, c3, c4 = string.byte(s, 1, 4) +--~ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 +--~ end +--~ +--~ local cont = lpeg.R("\128\191") -- continuation byte +--~ +--~ local utf8 = lpeg.R("\0\127") / string.byte +--~ + lpeg.R("\194\223") * cont / f2 +--~ + lpeg.R("\224\239") * cont * cont / f3 +--~ + lpeg.R("\240\244") * cont * cont * cont / f4 +--~ +--~ local decode_pattern = lpeg.Ct(utf8^0) * -1 + + +local cont = R("\128\191") -- continuation byte + +lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont + diff --git a/luaextra-math.lua b/luaextra-math.lua new file mode 100644 index 0000000..fc8db47 --- /dev/null +++ b/luaextra-math.lua @@ -0,0 +1,41 @@ +if not modules then modules = { } end modules ['l-math'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local floor, sin, cos, tan = math.floor, math.sin, math.cos, math.tan + +if not math.round then + function math.round(x) + return floor(x + 0.5) + end +end + +if not math.div then + function math.div(n,m) + return floor(n/m) + end +end + +if not math.mod then + function math.mod(n,m) + return n % m + end +end + +local pipi = 2*math.pi/360 + +function math.sind(d) + return sin(d*pipi) +end + +function math.cosd(d) + return cos(d*pipi) +end + +function math.tand(d) + return tan(d*pipi) +end diff --git a/luaextra-md5.lua b/luaextra-md5.lua new file mode 100644 index 0000000..27955ef --- /dev/null +++ b/luaextra-md5.lua @@ -0,0 +1,72 @@ +if not modules then modules = { } end modules ['l-md5'] = { + version = 1.001, + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- This also provides file checksums and checkers. + +local gsub, format, byte = string.gsub, string.format, string.byte + +local function convert(str,fmt) + return (gsub(md5.sum(str),".",function(chr) return format(fmt,byte(chr)) end)) +end + +if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end +if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end +if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end + +--~ if not md5.HEX then +--~ local function remap(chr) return format("%02X",byte(chr)) end +--~ function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end +--~ end +--~ if not md5.hex then +--~ local function remap(chr) return format("%02x",byte(chr)) end +--~ function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end +--~ end +--~ if not md5.dec then +--~ local function remap(chr) return format("%03i",byte(chr)) end +--~ function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end +--~ end + +file.needs_updating_threshold = 1 + +function file.needs_updating(oldname,newname) -- size modification access change + local oldtime = lfs.attributes(oldname, modification) + local newtime = lfs.attributes(newname, modification) + if newtime >= oldtime then + return false + elseif oldtime - newtime < file.needs_updating_threshold then + return false + else + return true + end +end + +function file.checksum(name) + if md5 then + local data = io.loaddata(name) + if data then + return md5.HEX(data) + end + end + return nil +end + +function file.loadchecksum(name) + if md5 then + local data = io.loaddata(name .. ".md5") + return data and (gsub(data,"%s","")) + end + return nil +end + +function file.savechecksum(name, checksum) + if not checksum then checksum = file.checksum(name) end + if checksum then + io.savedata(name .. ".md5",checksum) + return checksum + end + return nil +end diff --git a/luaextra-number.lua b/luaextra-number.lua new file mode 100644 index 0000000..a1249f0 --- /dev/null +++ b/luaextra-number.lua @@ -0,0 +1,58 @@ +if not modules then modules = { } end modules ['l-number'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local tostring = tostring +local format, floor, insert, match = string.format, math.floor, table.insert, string.match +local lpegmatch = lpeg.match + +number = number or { } + +-- a,b,c,d,e,f = number.toset(100101) + +function number.toset(n) + return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +end + +function number.toevenhex(n) + local s = format("%X",n) + if #s % 2 == 0 then + return s + else + return "0" .. s + end +end + +-- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% +-- on +-- +-- for i=1,1000000 do +-- local a,b,c,d,e,f,g,h = number.toset(12345678) +-- local a,b,c,d = number.toset(1234) +-- local a,b,c = number.toset(123) +-- end +-- +-- of course dedicated "(.)(.)(.)(.)" matches are even faster + +local one = lpeg.C(1-lpeg.S(''))^1 + +function number.toset(n) + return lpegmatch(one,tostring(n)) +end + +function number.bits(n,zero) + local t, i = { }, (zero and 0) or 1 + while n > 0 do + local m = n % 2 + if m > 0 then + insert(t,1,i) + end + n = floor(n/2) + i = i + 1 + end + return t +end diff --git a/luaextra-os.lua b/luaextra-os.lua new file mode 100644 index 0000000..b60dfb7 --- /dev/null +++ b/luaextra-os.lua @@ -0,0 +1,165 @@ +if not modules then modules = { } end modules ['l-os'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- maybe build io.flush in os.execute + +local find, format = string.find, string.format +local random, ceil = math.random, math.ceil + +local execute, spawn, exec, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.flush + +function os.execute(...) ioflush() return execute(...) end +function os.spawn (...) ioflush() return spawn (...) end +function os.exec (...) ioflush() return exec (...) end + +function os.resultof(command) + ioflush() -- else messed up logging + local handle = io.popen(command,"r") + if not handle then + -- print("unknown command '".. command .. "' in os.resultof") + return "" + else + return handle:read("*all") or "" + end +end + +--~ os.type : windows | unix (new, we already guessed os.platform) +--~ os.name : windows | msdos | linux | macosx | solaris | .. | generic (new) +--~ os.platform : extended os.name with architecture + +if not io.fileseparator then + if find(os.getenv("PATH"),";") then + io.fileseparator, io.pathseparator, os.type = "\\", ";", os.type or "mswin" + else + io.fileseparator, io.pathseparator, os.type = "/" , ":", os.type or "unix" + end +end + +os.type = os.type or (io.pathseparator == ";" and "windows") or "unix" +os.name = os.name or (os.type == "windows" and "mswin" ) or "linux" + +function os.launch(str) + if os.type == "windows" then + os.execute("start " .. str) -- os.spawn ? + else + os.execute(str .. " &") -- os.spawn ? + end +end + +if not os.setenv then + function os.setenv() return false end +end + +if not os.times then + -- utime = user time + -- stime = system time + -- cutime = children user time + -- cstime = children system time + function os.times() + return { + utime = os.gettimeofday(), -- user + stime = 0, -- system + cutime = 0, -- children user + cstime = 0, -- children system + } + end +end + +os.gettimeofday = os.gettimeofday or os.clock + +local startuptime = os.gettimeofday() + +function os.runtime() + return os.gettimeofday() - startuptime +end + +--~ print(os.gettimeofday()-os.time()) +--~ os.sleep(1.234) +--~ print (">>",os.runtime()) +--~ print(os.date("%H:%M:%S",os.gettimeofday())) +--~ print(os.date("%H:%M:%S",os.time())) + +-- no need for function anymore as we have more clever code and helpers now + +os.platform = os.name or os.type or "linux" +os.libsuffix = 'so' +os.binsuffix = '' + +local name = os.name + +if name == "windows" or name == "mswin" or name == "win32" or name == "msdos" or os.type == "windows" then + if os.getenv("PROCESSOR_ARCHITECTURE") == "AMD64" then + os.platform = "mswin-64" + else + os.platform = "mswin" + end + os.libsuffix = 'dll' + os.binsuffix = 'exe' +else + local architecture = os.getenv("HOSTTYPE") or "" + if architecture == "" then + architecture = os.resultof("uname -m") or "" + end + if architecture == "" then + local architecture = os.resultof("echo $HOSTTYPE") + end + if name == "linux" then + if find(architecture,"x86_64") then + os.platform = "linux-64" + elseif find(architecture,"ppc") then + os.platform = "linux-ppc" + else + os.platform = "linux" + end + elseif name == "macosx" then + if find(architecture,"i386") then + os.platform = "osx-intel" + elseif find(architecture,"x86_64") then + os.platform = "osx-64" + else + os.platform = "osx-ppc" + end + elseif name == "sunos" then + if find(architecture,"sparc") then + os.platform = "solaris-sparc" + else -- if architecture == 'i86pc' + os.platform = "solaris-intel" + end + elseif name == "freebsd" then + if find(architecture,"amd64") then + os.platform = "freebsd-amd64" + else + os.platform = "freebsd" + end + else + os.platform = 'linux' + end +end + +-- beware, we set the randomseed +-- + +-- from wikipedia: Version 4 UUIDs use a scheme relying only on random numbers. This algorithm sets the +-- version number as well as two reserved bits. All other bits are set using a random or pseudorandom +-- data source. Version 4 UUIDs have the form xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx with hexadecimal +-- digits x and hexadecimal digits 8, 9, A, or B for y. e.g. f47ac10b-58cc-4372-a567-0e02b2c3d479. +-- +-- as we don't call this function too often there is not so much risk on repetition + + +local t = { 8, 9, "a", "b" } + +function os.uuid() + return format("%04x%04x-4%03x-%s%03x-%04x-%04x%04x%04x", + random(0xFFFF),random(0xFFFF), + random(0x0FFF), + t[ceil(random(4))] or 8,random(0x0FFF), + random(0xFFFF), + random(0xFFFF),random(0xFFFF),random(0xFFFF) + ) +end diff --git a/luaextra-set.lua b/luaextra-set.lua new file mode 100644 index 0000000..f844d0b --- /dev/null +++ b/luaextra-set.lua @@ -0,0 +1,84 @@ +if not modules then modules = { } end modules ['l-set'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +set = set or { } + +local nums = { } +local tabs = { } +local concat = table.concat +local next, type = next, type + +set.create = table.tohash + +function set.tonumber(t) + if next(t) then + local s = "" + -- we could save mem by sorting, but it slows down + for k, v in next, t do + if v then + -- why bother about the leading space + s = s .. " " .. k + end + end + local n = nums[s] + if not n then + n = #tabs + 1 + tabs[n] = t + nums[s] = n + end + return n + else + return 0 + end +end + +function set.totable(n) + if n == 0 then + return { } + else + return tabs[n] or { } + end +end + +function set.tolist(n) + if n == 0 or not tabs[n] then + return "" + else + local t = { } + for k, v in next, tabs[n] do + if v then + t[#t+1] = k + end + end + return concat(t," ") + end +end + +function set.contains(n,s) + if type(n) == "table" then + return n[s] + elseif n == 0 then + return false + else + local t = tabs[n] + return t and t[s] + end +end + +--~ local c = set.create{'aap','noot','mies'} +--~ local s = set.tonumber(c) +--~ local t = set.totable(s) +--~ print(t['aap']) +--~ local c = set.create{'zus','wim','jet'} +--~ local s = set.tonumber(c) +--~ local t = set.totable(s) +--~ print(t['aap']) +--~ print(t['jet']) +--~ print(set.contains(t,'jet')) +--~ print(set.contains(t,'aap')) + diff --git a/luaextra-string.lua b/luaextra-string.lua new file mode 100644 index 0000000..25b8f8e --- /dev/null +++ b/luaextra-string.lua @@ -0,0 +1,285 @@ +if not modules then modules = { } end modules ['l-string'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch = lpeg.match + +-- some functions may disappear as they are not used anywhere + +if not string.split then + + -- this will be overloaded by a faster lpeg variant + + function string:split(pattern) + if #self > 0 then + local t = { } + for s in gmatch(self..pattern,"(.-)"..pattern) do + t[#t+1] = s + end + return t + else + return { } + end + end + +end + +local chr_to_esc = { + ["%"] = "%%", + ["."] = "%.", + ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", + ["^"] = "%^", ["$"] = "%$", + ["["] = "%[", ["]"] = "%]", + ["("] = "%(", [")"] = "%)", + ["{"] = "%{", ["}"] = "%}" +} + +string.chr_to_esc = chr_to_esc + +function string:esc() -- variant 2 + return (gsub(self,"(.)",chr_to_esc)) +end + +function string:unquote() + return (gsub(self,"^([\"\'])(.*)%1$","%2")) +end + +--~ function string:unquote() +--~ if find(self,"^[\'\"]") then +--~ return sub(self,2,-2) +--~ else +--~ return self +--~ end +--~ end + +function string:quote() -- we could use format("%q") + return format("%q",self) +end + +function string:count(pattern) -- variant 3 + local n = 0 + for _ in gmatch(self,pattern) do + n = n + 1 + end + return n +end + +function string:limit(n,sentinel) + if #self > n then + sentinel = sentinel or " ..." + return sub(self,1,(n-#sentinel)) .. sentinel + else + return self + end +end + +--~ function string:strip() -- the .- is quite efficient +--~ -- return match(self,"^%s*(.-)%s*$") or "" +--~ -- return match(self,'^%s*(.*%S)') or '' -- posted on lua list +--~ return find(s,'^%s*$') and '' or match(s,'^%s*(.*%S)') +--~ end + +do -- roberto's variant: + local space = lpeg.S(" \t\v\n") + local nospace = 1 - space + local stripper = space^0 * lpeg.C((space^0 * nospace^1)^0) + function string.strip(str) + return lpegmatch(stripper,str) or "" + end +end + +function string:is_empty() + return not find(self,"%S") +end + +function string:enhance(pattern,action) + local ok, n = true, 0 + while ok do + ok = false + self = gsub(self,pattern, function(...) + ok, n = true, n + 1 + return action(...) + end) + end + return self, n +end + +local chr_to_hex, hex_to_chr = { }, { } + +for i=0,255 do + local c, h = char(i), format("%02X",i) + chr_to_hex[c], hex_to_chr[h] = h, c +end + +function string:to_hex() + return (gsub(self or "","(.)",chr_to_hex)) +end + +function string:from_hex() + return (gsub(self or "","(..)",hex_to_chr)) +end + +if not string.characters then + + local function nextchar(str, index) + index = index + 1 + return (index <= #str) and index or nil, sub(str,index,index) + end + function string:characters() + return nextchar, self, 0 + end + local function nextbyte(str, index) + index = index + 1 + return (index <= #str) and index or nil, byte(sub(str,index,index)) + end + function string:bytes() + return nextbyte, self, 0 + end + +end + +-- we can use format for this (neg n) + +function string:rpadd(n,chr) + local m = n-#self + if m > 0 then + return self .. rep(chr or " ",m) + else + return self + end +end + +function string:lpadd(n,chr) + local m = n-#self + if m > 0 then + return rep(chr or " ",m) .. self + else + return self + end +end + +string.padd = string.rpadd + +function is_number(str) -- tonumber + return find(str,"^[%-%+]?[%d]-%.?[%d+]$") == 1 +end + +--~ print(is_number("1")) +--~ print(is_number("1.1")) +--~ print(is_number(".1")) +--~ print(is_number("-0.1")) +--~ print(is_number("+0.1")) +--~ print(is_number("-.1")) +--~ print(is_number("+.1")) + +function string:split_settings() -- no {} handling, see l-aux for lpeg variant + if find(self,"=") then + local t = { } + for k,v in gmatch(self,"(%a+)=([^%,]*)") do + t[k] = v + end + return t + else + return nil + end +end + +local patterns_escapes = { + ["-"] = "%-", + ["."] = "%.", + ["+"] = "%+", + ["*"] = "%*", + ["%"] = "%%", + ["("] = "%)", + [")"] = "%)", + ["["] = "%[", + ["]"] = "%]", +} + +function string:pattesc() + return (gsub(self,".",patterns_escapes)) +end + +local simple_escapes = { + ["-"] = "%-", + ["."] = "%.", + ["?"] = ".", + ["*"] = ".*", +} + +function string:simpleesc() + return (gsub(self,".",simple_escapes)) +end + +function string:tohash() + local t = { } + for s in gmatch(self,"([^, ]+)") do -- lpeg + t[s] = true + end + return t +end + +local pattern = lpeg.Ct(lpeg.C(1)^0) + +function string:totable() + return lpegmatch(pattern,self) +end + +--~ for _, str in ipairs { +--~ "1234567123456712345671234567", +--~ "a\tb\tc", +--~ "aa\tbb\tcc", +--~ "aaa\tbbb\tccc", +--~ "aaaa\tbbbb\tcccc", +--~ "aaaaa\tbbbbb\tccccc", +--~ "aaaaaa\tbbbbbb\tcccccc", +--~ } do print(string.tabtospace(str)) end + +function string.tabtospace(str,tab) + -- we don't handle embedded newlines + while true do + local s = find(str,"\t") + if s then + if not tab then tab = 7 end -- only when found + local d = tab-(s-1) % tab + if d > 0 then + str = gsub(str,"\t",rep(" ",d),1) + else + str = gsub(str,"\t","",1) + end + else + break + end + end + return str +end + +function string:compactlong() -- strips newlines and leading spaces + self = gsub(self,"[\n\r]+ *","") + self = gsub(self,"^ *","") + return self +end + +function string:striplong() -- strips newlines and leading spaces + self = gsub(self,"^%s*","") + self = gsub(self,"[\n\r]+ *","\n") + return self +end + +function string:topattern(lowercase,strict) + if lowercase then + self = lower(self) + end + self = gsub(self,".",simple_escapes) + if self == "" then + self = ".*" + elseif strict then + self = "^" .. self .. "$" + end + return self +end diff --git a/luaextra-table.lua b/luaextra-table.lua new file mode 100644 index 0000000..70a901e --- /dev/null +++ b/luaextra-table.lua @@ -0,0 +1,858 @@ +if not modules then modules = { } end modules ['l-table'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +table.join = table.concat + +local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove +local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match +local getmetatable, setmetatable = getmetatable, setmetatable +local type, next, tostring, tonumber, ipairs, pairs = type, next, tostring, tonumber, ipairs, pairs + +function table.strip(tab) + local lst = { } + for i=1,#tab do + local s = gsub(tab[i],"^%s*(.-)%s*$","%1") + if s == "" then + -- skip this one + else + lst[#lst+1] = s + end + end + return lst +end + +function table.keys(t) + local k = { } + for key,_ in next, t do + k[#k+1] = key + end + return k +end + +local function compare(a,b) + return (tostring(a) < tostring(b)) +end + +local function sortedkeys(tab) + local srt, kind = { }, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + srt[#srt+1] = key + if kind == 3 then + -- no further check + else + local tkey = type(key) + if tkey == "string" then + -- if kind == 2 then kind = 3 else kind = 1 end + kind = (kind == 2 and 3) or 1 + elseif tkey == "number" then + -- if kind == 1 then kind = 3 else kind = 2 end + kind = (kind == 1 and 3) or 2 + else + kind = 3 + end + end + end + if kind == 0 or kind == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt +end + +local function sortedhashkeys(tab) -- fast one + local srt = { } + for key,_ in next, tab do + srt[#srt+1] = key + end + sort(srt) + return srt +end + +table.sortedkeys = sortedkeys +table.sortedhashkeys = sortedhashkeys + +function table.sortedpairs(t) + local s = sortedhashkeys(t) -- maybe just sortedkeys + local n = 0 + local function kv(s) + n = n + 1 + local k = s[n] + return k, t[k] + end + return kv, s +end + +function table.append(t, list) + for _,v in next, list do + insert(t,v) + end +end + +function table.prepend(t, list) + for k,v in next, list do + insert(t,k,v) + end +end + +function table.merge(t, ...) -- first one is target + t = t or {} + local lst = {...} + for i=1,#lst do + for k, v in next, lst[i] do + t[k] = v + end + end + return t +end + +function table.merged(...) + local tmp, lst = { }, {...} + for i=1,#lst do + for k, v in next, lst[i] do + tmp[k] = v + end + end + return tmp +end + +function table.imerge(t, ...) + local lst = {...} + for i=1,#lst do + local nst = lst[i] + for j=1,#nst do + t[#t+1] = nst[j] + end + end + return t +end + +function table.imerged(...) + local tmp, lst = { }, {...} + for i=1,#lst do + local nst = lst[i] + for j=1,#nst do + tmp[#tmp+1] = nst[j] + end + end + return tmp +end + +local function fastcopy(old) -- fast one + if old then + local new = { } + for k,v in next, old do + if type(v) == "table" then + new[k] = fastcopy(v) -- was just table.copy + else + new[k] = v + end + end + -- optional second arg + local mt = getmetatable(old) + if mt then + setmetatable(new,mt) + end + return new + else + return { } + end +end + +local function copy(t, tables) -- taken from lua wiki, slightly adapted + tables = tables or { } + local tcopy = {} + if not tables[t] then + tables[t] = tcopy + end + for i,v in next, t do -- brrr, what happens with sparse indexed + if type(i) == "table" then + if tables[i] then + i = tables[i] + else + i = copy(i, tables) + end + end + if type(v) ~= "table" then + tcopy[i] = v + elseif tables[v] then + tcopy[i] = tables[v] + else + tcopy[i] = copy(v, tables) + end + end + local mt = getmetatable(t) + if mt then + setmetatable(tcopy,mt) + end + return tcopy +end + +table.fastcopy = fastcopy +table.copy = copy + +-- rougly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) + +function table.sub(t,i,j) + return { unpack(t,i,j) } +end + +function table.replace(a,b) + for k,v in next, b do + a[k] = v + end +end + +-- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) + +function table.is_empty(t) + return not t or not next(t) +end + +function table.one_entry(t) + local n = next(t) + return n and not next(t,n) +end + +function table.starts_at(t) + return ipairs(t,1)(t,0) +end + +function table.tohash(t,value) + local h = { } + if t then + if value == nil then value = true end + for _, v in next, t do -- no ipairs here + h[v] = value + end + end + return h +end + +function table.fromhash(t) + local h = { } + for k, v in next, t do -- no ipairs here + if v then h[#h+1] = k end + end + return h +end + +--~ print(table.serialize(t), "\n") +--~ print(table.serialize(t,"name"), "\n") +--~ print(table.serialize(t,false), "\n") +--~ print(table.serialize(t,true), "\n") +--~ print(table.serialize(t,"name",true), "\n") +--~ print(table.serialize(t,"name",true,true), "\n") + +table.serialize_functions = true +table.serialize_compact = true +table.serialize_inline = true + +local noquotes, hexify, handle, reduce, compact, inline, functions + +local reserved = table.tohash { -- intercept a language flaw, no reserved words as key + 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if', + 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while', +} + +local function simple_table(t) + if #t > 0 then + local n = 0 + for _,v in next, t do + n = n + 1 + end + if n == #t then + local tt = { } + for i=1,#t do + local v = t[i] + local tv = type(v) + if tv == "number" then + if hexify then + tt[#tt+1] = format("0x%04X",v) + else + tt[#tt+1] = tostring(v) -- tostring not needed + end + elseif tv == "boolean" then + tt[#tt+1] = tostring(v) + elseif tv == "string" then + tt[#tt+1] = format("%q",v) + else + tt = nil + break + end + end + return tt + end + end + return nil +end + +-- Because this is a core function of mkiv I moved some function calls +-- inline. +-- +-- twice as fast in a test: +-- +-- local propername = lpeg.P(lpeg.R("AZ","az","__") * lpeg.R("09","AZ","az", "__")^0 * lpeg.P(-1) ) + +-- problem: there no good number_to_string converter with the best resolution + +local function do_serialize(root,name,depth,level,indexed) + if level > 0 then + depth = depth .. " " + if indexed then + handle(format("%s{",depth)) + elseif name then + --~ handle(format("%s%s={",depth,key(name))) + if type(name) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s[0x%04X]={",depth,name)) + else + handle(format("%s[%s]={",depth,name)) + end + elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + else + handle(format("%s{",depth)) + end + end + -- we could check for k (index) being number (cardinal) + if root and next(root) then + local first, last = nil, 0 -- #root cannot be trusted here + if compact then + -- NOT: for k=1,#root do (we need to quit at nil) + for k,v in ipairs(root) do -- can we use next? + if not first then first = k end + last = last + 1 + end + end + local sk = sortedkeys(root) + for i=1,#sk do + local k = sk[i] + local v = root[k] + --~ if v == root then + -- circular + --~ else + local t = type(v) + if compact and first and type(k) == "number" and k >= first and k <= last then + if t == "number" then + if hexify then + handle(format("%s 0x%04X,",depth,v)) + else + handle(format("%s %s,",depth,v)) -- %.99g + end + elseif t == "string" then + if reduce and tonumber(v) then + handle(format("%s %s,",depth,v)) + else + handle(format("%s %q,",depth,v)) + end + elseif t == "table" then + if not next(v) then + handle(format("%s {},",depth)) + elseif inline then -- and #t > 0 + local st = simple_table(v) + if st then + handle(format("%s { %s },",depth,concat(st,", "))) + else + do_serialize(v,k,depth,level+1,true) + end + else + do_serialize(v,k,depth,level+1,true) + end + elseif t == "boolean" then + handle(format("%s %s,",depth,tostring(v))) + elseif t == "function" then + if functions then + handle(format('%s loadstring(%q),',depth,dump(v))) + else + handle(format('%s "function",',depth)) + end + else + handle(format("%s %q,",depth,tostring(v))) + end + elseif k == "__p__" then -- parent + if false then + handle(format("%s __p__=nil,",depth)) + end + elseif t == "number" then + --~ if hexify then + --~ handle(format("%s %s=0x%04X,",depth,key(k),v)) + --~ else + --~ handle(format("%s %s=%s,",depth,key(k),v)) -- %.99g + --~ end + if type(k) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) + else + handle(format("%s [%s]=%s,",depth,k,v)) -- %.99g + end + elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + if hexify then + handle(format("%s %s=0x%04X,",depth,k,v)) + else + handle(format("%s %s=%s,",depth,k,v)) -- %.99g + end + else + if hexify then + handle(format("%s [%q]=0x%04X,",depth,k,v)) + else + handle(format("%s [%q]=%s,",depth,k,v)) -- %.99g + end + end + elseif t == "string" then + if reduce and tonumber(v) then + --~ handle(format("%s %s=%s,",depth,key(k),v)) + if type(k) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s [0x%04X]=%s,",depth,k,v)) + else + handle(format("%s [%s]=%s,",depth,k,v)) + end + elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + handle(format("%s %s=%s,",depth,k,v)) + else + handle(format("%s [%q]=%s,",depth,k,v)) + end + else + --~ handle(format("%s %s=%q,",depth,key(k),v)) + if type(k) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s [0x%04X]=%q,",depth,k,v)) + else + handle(format("%s [%s]=%q,",depth,k,v)) + end + elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + handle(format("%s %s=%q,",depth,k,v)) + else + handle(format("%s [%q]=%q,",depth,k,v)) + end + end + elseif t == "table" then + if not next(v) then + --~ handle(format("%s %s={},",depth,key(k))) + if type(k) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s [0x%04X]={},",depth,k)) + else + handle(format("%s [%s]={},",depth,k)) + end + elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + handle(format("%s %s={},",depth,k)) + else + handle(format("%s [%q]={},",depth,k)) + end + elseif inline then + local st = simple_table(v) + if st then + --~ handle(format("%s %s={ %s },",depth,key(k),concat(st,", "))) + if type(k) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) + else + handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) + end + elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + handle(format("%s %s={ %s },",depth,k,concat(st,", "))) + else + handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) + end + else + do_serialize(v,k,depth,level+1) + end + else + do_serialize(v,k,depth,level+1) + end + elseif t == "boolean" then + --~ handle(format("%s %s=%s,",depth,key(k),tostring(v))) + if type(k) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) + else + handle(format("%s [%s]=%s,",depth,k,tostring(v))) + end + elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + handle(format("%s %s=%s,",depth,k,tostring(v))) + else + handle(format("%s [%q]=%s,",depth,k,tostring(v))) + end + elseif t == "function" then + if functions then + --~ handle(format('%s %s=loadstring(%q),',depth,key(k),dump(v))) + if type(k) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s [0x%04X]=loadstring(%q),",depth,k,dump(v))) + else + handle(format("%s [%s]=loadstring(%q),",depth,k,dump(v))) + end + elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + handle(format("%s %s=loadstring(%q),",depth,k,dump(v))) + else + handle(format("%s [%q]=loadstring(%q),",depth,k,dump(v))) + end + end + else + --~ handle(format("%s %s=%q,",depth,key(k),tostring(v))) + if type(k) == "number" then -- or find(k,"^%d+$") then + if hexify then + handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) + else + handle(format("%s [%s]=%q,",depth,k,tostring(v))) + end + elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + handle(format("%s %s=%q,",depth,k,tostring(v))) + else + handle(format("%s [%q]=%q,",depth,k,tostring(v))) + end + end + --~ end + end + end + if level > 0 then + handle(format("%s},",depth)) + end +end + +-- replacing handle by a direct t[#t+1] = ... (plus test) is not much +-- faster (0.03 on 1.00 for zapfino.tma) + +local function serialize(root,name,_handle,_reduce,_noquotes,_hexify) + noquotes = _noquotes + hexify = _hexify + handle = _handle or print + reduce = _reduce or false + compact = table.serialize_compact + inline = compact and table.serialize_inline + functions = table.serialize_functions + local tname = type(name) + if tname == "string" then + if name == "return" then + handle("return {") + else + handle(name .. "={") + end + elseif tname == "number" then + if hexify then + handle(format("[0x%04X]={",name)) + else + handle("[" .. name .. "]={") + end + elseif tname == "boolean" then + if name then + handle("return {") + else + handle("{") + end + else + handle("t={") + end + if root and next(root) then + do_serialize(root,name,"",0,indexed) + end + handle("}") +end + +--~ name: +--~ +--~ true : return { } +--~ false : { } +--~ nil : t = { } +--~ string : string = { } +--~ 'return' : return { } +--~ number : [number] = { } + +function table.serialize(root,name,reduce,noquotes,hexify) + local t = { } + local function flush(s) + t[#t+1] = s + end + serialize(root,name,flush,reduce,noquotes,hexify) + return concat(t,"\n") +end + +function table.tohandle(handle,root,name,reduce,noquotes,hexify) + serialize(root,name,handle,reduce,noquotes,hexify) +end + +-- sometimes tables are real use (zapfino extra pro is some 85M) in which +-- case a stepwise serialization is nice; actually, we could consider: +-- +-- for line in table.serializer(root,name,reduce,noquotes) do +-- ...(line) +-- end +-- +-- so this is on the todo list + +table.tofile_maxtab = 2*1024 + +function table.tofile(filename,root,name,reduce,noquotes,hexify) + local f = io.open(filename,'w') + if f then + local maxtab = table.tofile_maxtab + if maxtab > 1 then + local t = { } + local function flush(s) + t[#t+1] = s + if #t > maxtab then + f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice + t = { } + end + end + serialize(root,name,flush,reduce,noquotes,hexify) + f:write(concat(t,"\n"),"\n") + else + local function flush(s) + f:write(s,"\n") + end + serialize(root,name,flush,reduce,noquotes,hexify) + end + f:close() + end +end + +local function flatten(t,f,complete) -- is this used? meybe a variant with next, ... + for i=1,#t do + local v = t[i] + if type(v) == "table" then + if complete or type(v[1]) == "table" then + flatten(v,f,complete) + else + f[#f+1] = v + end + else + f[#f+1] = v + end + end +end + +function table.flatten(t) + local f = { } + flatten(t,f,true) + return f +end + +function table.unnest(t) -- bad name + local f = { } + flatten(t,f,false) + return f +end + +table.flatten_one_level = table.unnest + +-- a better one: + +local function flattened(t,f) + if not f then + f = { } + end + for k, v in next, t do + if type(v) == "table" then + flattened(v,f) + else + f[k] = v + end + end + return f +end + +table.flattened = flattened + +-- the next three may disappear + +function table.remove_value(t,value) -- todo: n + if value then + for i=1,#t do + if t[i] == value then + remove(t,i) + -- remove all, so no: return + end + end + end +end + +function table.insert_before_value(t,value,str) + if str then + if value then + for i=1,#t do + if t[i] == value then + insert(t,i,str) + return + end + end + end + insert(t,1,str) + elseif value then + insert(t,1,value) + end +end + +function table.insert_after_value(t,value,str) + if str then + if value then + for i=1,#t do + if t[i] == value then + insert(t,i+1,str) + return + end + end + end + t[#t+1] = str + elseif value then + t[#t+1] = value + end +end + +local function are_equal(a,b,n,m) -- indexed + if a and b and #a == #b then + n = n or 1 + m = m or #a + for i=n,m do + local ai, bi = a[i], b[i] + if ai==bi then + -- same + elseif type(ai)=="table" and type(bi)=="table" then + if not are_equal(ai,bi) then + return false + end + else + return false + end + end + return true + else + return false + end +end + +local function identical(a,b) -- assumes same structure + for ka, va in next, a do + local vb = b[k] + if va == vb then + -- same + elseif type(va) == "table" and type(vb) == "table" then + if not identical(va,vb) then + return false + end + else + return false + end + end + return true +end + +table.are_equal = are_equal +table.identical = identical + +-- maybe also make a combined one + +function table.compact(t) + if t then + for k,v in next, t do + if not next(v) then + t[k] = nil + end + end + end +end + +function table.contains(t, v) + if t then + for i=1, #t do + if t[i] == v then + return i + end + end + end + return false +end + +function table.count(t) + local n, e = 0, next(t) + while e do + n, e = n + 1, next(t,e) + end + return n +end + +function table.swapped(t) + local s = { } + for k, v in next, t do + s[v] = k + end + return s +end + +--~ function table.are_equal(a,b) +--~ return table.serialize(a) == table.serialize(b) +--~ end + +function table.clone(t,p) -- t is optional or nil or table + if not p then + t, p = { }, t or { } + elseif not t then + t = { } + end + setmetatable(t, { __index = function(_,key) return p[key] end }) -- why not __index = p ? + return t +end + +function table.hexed(t,seperator) + local tt = { } + for i=1,#t do tt[i] = format("0x%04X",t[i]) end + return concat(tt,seperator or " ") +end + +function table.reverse_hash(h) + local r = { } + for k,v in next, h do + r[v] = lower(gsub(k," ","")) + end + return r +end + +function table.reverse(t) + local tt = { } + if #t > 0 then + for i=#t,1,-1 do + tt[#tt+1] = t[i] + end + end + return tt +end + +function table.insert_before_value(t,value,extra) + for i=1,#t do + if t[i] == extra then + remove(t,i) + end + end + for i=1,#t do + if t[i] == value then + insert(t,i,extra) + return + end + end + insert(t,1,extra) +end + +function table.insert_after_value(t,value,extra) + for i=1,#t do + if t[i] == extra then + remove(t,i) + end + end + for i=1,#t do + if t[i] == value then + insert(t,i+1,extra) + return + end + end + insert(t,#t+1,extra) +end diff --git a/luaextra-unicode.lua b/luaextra-unicode.lua new file mode 100644 index 0000000..290234d --- /dev/null +++ b/luaextra-unicode.lua @@ -0,0 +1,193 @@ +if not modules then modules = { } end modules ['l-unicode'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +if not unicode then + + unicode = { utf8 = { } } + + local floor, char = math.floor, string.char + + function unicode.utf8.utfchar(n) + if n < 0x80 then + return char(n) + elseif n < 0x800 then + return char(0xC0 + floor(n/0x40)) .. char(0x80 + (n % 0x40)) + elseif n < 0x10000 then + return char(0xE0 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) + elseif n < 0x40000 then + return char(0xF0 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) + else -- wrong: + -- return char(0xF1 + floor(n/0x1000000)) .. char(0x80 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) + return "?" + end + end + +end + +utf = utf or unicode.utf8 + +local concat, utfchar, utfgsub = table.concat, utf.char, utf.gsub +local char, byte, find, bytepairs = string.char, string.byte, string.find, string.bytepairs + +-- 0 EF BB BF UTF-8 +-- 1 FF FE UTF-16-little-endian +-- 2 FE FF UTF-16-big-endian +-- 3 FF FE 00 00 UTF-32-little-endian +-- 4 00 00 FE FF UTF-32-big-endian + +unicode.utfname = { + [0] = 'utf-8', + [1] = 'utf-16-le', + [2] = 'utf-16-be', + [3] = 'utf-32-le', + [4] = 'utf-32-be' +} + +function unicode.utftype(f) -- \000 fails ! + local str = f:read(4) + if not str then + f:seek('set') + return 0 + elseif find(str,"^%z%z\254\255") then + return 4 + elseif find(str,"^\255\254%z%z") then + return 3 + elseif find(str,"^\254\255") then + f:seek('set',2) + return 2 + elseif find(str,"^\255\254") then + f:seek('set',2) + return 1 + elseif find(str,"^\239\187\191") then + f:seek('set',3) + return 0 + else + f:seek('set') + return 0 + end +end + +function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg + local result, tmp, n, m, p = { }, { }, 0, 0, 0 + -- lf | cr | crlf / (cr:13, lf:10) + local function doit() + if n == 10 then + if p ~= 13 then + result[#result+1] = concat(tmp) + tmp = { } + p = 0 + end + elseif n == 13 then + result[#result+1] = concat(tmp) + tmp = { } + p = n + else + tmp[#tmp+1] = utfchar(n) + p = 0 + end + end + for l,r in bytepairs(str) do + if r then + if endian then + n = l*256 + r + else + n = r*256 + l + end + if m > 0 then + n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 + m = 0 + doit() + elseif n >= 0xD800 and n <= 0xDBFF then + m = n + else + doit() + end + end + end + if #tmp > 0 then + result[#result+1] = concat(tmp) + end + return result +end + +function unicode.utf32_to_utf8(str, endian) + local result = { } + local tmp, n, m, p = { }, 0, -1, 0 + -- lf | cr | crlf / (cr:13, lf:10) + local function doit() + if n == 10 then + if p ~= 13 then + result[#result+1] = concat(tmp) + tmp = { } + p = 0 + end + elseif n == 13 then + result[#result+1] = concat(tmp) + tmp = { } + p = n + else + tmp[#tmp+1] = utfchar(n) + p = 0 + end + end + for a,b in bytepairs(str) do + if a and b then + if m < 0 then + if endian then + m = a*256*256*256 + b*256*256 + else + m = b*256 + a + end + else + if endian then + n = m + a*256 + b + else + n = m + b*256*256*256 + a*256*256 + end + m = -1 + doit() + end + else + break + end + end + if #tmp > 0 then + result[#result+1] = concat(tmp) + end + return result +end + +local function little(c) + local b = byte(c) -- b = c:byte() + if b < 0x10000 then + return char(b%256,b/256) + else + b = b - 0x10000 + local b1, b2 = b/1024 + 0xD800, b%1024 + 0xDC00 + return char(b1%256,b1/256,b2%256,b2/256) + end +end + +local function big(c) + local b = byte(c) + if b < 0x10000 then + return char(b/256,b%256) + else + b = b - 0x10000 + local b1, b2 = b/1024 + 0xD800, b%1024 + 0xDC00 + return char(b1/256,b1%256,b2/256,b2%256) + end +end + +function unicode.utf8_to_utf16(str,littleendian) + if littleendian then + return char(255,254) .. utfgsub(str,".",little) + else + return char(254,255) .. utfgsub(str,".",big) + end +end diff --git a/luaextra-url.lua b/luaextra-url.lua new file mode 100644 index 0000000..1d282a1 --- /dev/null +++ b/luaextra-url.lua @@ -0,0 +1,106 @@ +if not modules then modules = { } end modules ['l-url'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local char, gmatch, gsub = string.char, string.gmatch, string.gsub +local tonumber, type = tonumber, type +local lpegmatch = lpeg.match + +-- from the spec (on the web): +-- +-- foo://example.com:8042/over/there?name=ferret#nose +-- \_/ \______________/\_________/ \_________/ \__/ +-- | | | | | +-- scheme authority path query fragment +-- | _____________________|__ +-- / \ / \ +-- urn:example:animal:ferret:nose + +url = url or { } + +local function tochar(s) + return char(tonumber(s,16)) +end + +local colon, qmark, hash, slash, percent, endofstring = lpeg.P(":"), lpeg.P("?"), lpeg.P("#"), lpeg.P("/"), lpeg.P("%"), lpeg.P(-1) + +local hexdigit = lpeg.R("09","AF","af") +local plus = lpeg.P("+") +local escaped = (plus / " ") + (percent * lpeg.C(hexdigit * hexdigit) / tochar) + +local scheme = lpeg.Cs((escaped+(1-colon-slash-qmark-hash))^0) * colon + lpeg.Cc("") +local authority = slash * slash * lpeg.Cs((escaped+(1- slash-qmark-hash))^0) + lpeg.Cc("") +local path = slash * lpeg.Cs((escaped+(1- qmark-hash))^0) + lpeg.Cc("") +local query = qmark * lpeg.Cs((escaped+(1- hash))^0) + lpeg.Cc("") +local fragment = hash * lpeg.Cs((escaped+(1- endofstring))^0) + lpeg.Cc("") + +local parser = lpeg.Ct(scheme * authority * path * query * fragment) + +function url.split(str) + return (type(str) == "string" and lpegmatch(parser,str)) or str +end + +function url.hashed(str) + local s = url.split(str) + return { + scheme = (s[1] ~= "" and s[1]) or "file", + authority = s[2], + path = s[3], + query = s[4], + fragment = s[5], + original = str + } +end + +function url.filename(filename) + local t = url.hashed(filename) + return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename +end + +function url.query(str) + if type(str) == "string" then + local t = { } + for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do + t[k] = v + end + return t + else + return str + end +end + +--~ print(url.filename("file:///c:/oeps.txt")) +--~ print(url.filename("c:/oeps.txt")) +--~ print(url.filename("file:///oeps.txt")) +--~ print(url.filename("file:///etc/test.txt")) +--~ print(url.filename("/oeps.txt")) + +--~ from the spec on the web (sort of): +--~ +--~ function test(str) +--~ print(table.serialize(url.hashed(str))) +--~ end +--~ +--~ test("%56pass%20words") +--~ test("file:///c:/oeps.txt") +--~ test("file:///c|/oeps.txt") +--~ test("file:///etc/oeps.txt") +--~ test("file://./etc/oeps.txt") +--~ test("file:////etc/oeps.txt") +--~ test("ftp://ftp.is.co.za/rfc/rfc1808.txt") +--~ test("http://www.ietf.org/rfc/rfc2396.txt") +--~ test("ldap://[2001:db8::7]/c=GB?objectClass?one#what") +--~ test("mailto:John.Doe@example.com") +--~ test("news:comp.infosystems.www.servers.unix") +--~ test("tel:+1-816-555-1212") +--~ test("telnet://192.0.2.16:80/") +--~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2") +--~ test("/etc/passwords") +--~ test("http://www.pragma-ade.com/spaced%20name") + +--~ test("zip:///oeps/oeps.zip#bla/bla.tex") +--~ test("zip:///oeps/oeps.zip?bla/bla.tex") diff --git a/luaextra-utils.lua b/luaextra-utils.lua new file mode 100644 index 0000000..a5bc9d7 --- /dev/null +++ b/luaextra-utils.lua @@ -0,0 +1,173 @@ +if not modules then modules = { } end modules ['l-utils'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- hm, quite unreadable + +local gsub = string.gsub +local concat = table.concat + +if not utils then utils = { } end +if not utils.merger then utils.merger = { } end +if not utils.lua then utils.lua = { } end + +utils.merger.m_begin = "begin library merge" +utils.merger.m_end = "end library merge" +utils.merger.pattern = + "%c+" .. + "%-%-%s+" .. utils.merger.m_begin .. + "%c+(.-)%c+" .. + "%-%-%s+" .. utils.merger.m_end .. + "%c+" + +function utils.merger._self_fake_() + return + "-- " .. "created merged file" .. "\n\n" .. + "-- " .. utils.merger.m_begin .. "\n\n" .. + "-- " .. utils.merger.m_end .. "\n\n" +end + +function utils.report(...) + print(...) +end + +utils.merger.strip_comment = true + +function utils.merger._self_load_(name) + local f, data = io.open(name), "" + if f then + utils.report("reading merge from %s",name) + data = f:read("*all") + f:close() + else + utils.report("unknown file to merge %s",name) + end + if data and utils.merger.strip_comment then + -- saves some 20K + data = gsub(data,"%-%-~[^\n\r]*[\r\n]", "") + end + return data or "" +end + +function utils.merger._self_save_(name, data) + if data ~= "" then + local f = io.open(name,'w') + if f then + utils.report("saving merge from %s",name) + f:write(data) + f:close() + end + end +end + +function utils.merger._self_swap_(data,code) + if data ~= "" then + return (gsub(data,utils.merger.pattern, function(s) + return "\n\n" .. "-- "..utils.merger.m_begin .. "\n" .. code .. "\n" .. "-- "..utils.merger.m_end .. "\n\n" + end, 1)) + else + return "" + end +end + +--~ stripper: +--~ +--~ data = gsub(data,"%-%-~[^\n]*\n","") +--~ data = gsub(data,"\n\n+","\n") + +function utils.merger._self_libs_(libs,list) + local result, f, frozen = { }, nil, false + result[#result+1] = "\n" + if type(libs) == 'string' then libs = { libs } end + if type(list) == 'string' then list = { list } end + local foundpath = nil + for _, lib in ipairs(libs) do + for _, pth in ipairs(list) do + pth = gsub(pth,"\\","/") -- file.clean_path + utils.report("checking library path %s",pth) + local name = pth .. "/" .. lib + if lfs.isfile(name) then + foundpath = pth + end + end + if foundpath then break end + end + if foundpath then + utils.report("using library path %s",foundpath) + local right, wrong = { }, { } + for _, lib in ipairs(libs) do + local fullname = foundpath .. "/" .. lib + if lfs.isfile(fullname) then + -- right[#right+1] = lib + utils.report("merging library %s",fullname) + result[#result+1] = "do -- create closure to overcome 200 locals limit" + result[#result+1] = io.loaddata(fullname,true) + result[#result+1] = "end -- of closure" + else + -- wrong[#wrong+1] = lib + utils.report("no library %s",fullname) + end + end + if #right > 0 then + utils.report("merged libraries: %s",concat(right," ")) + end + if #wrong > 0 then + utils.report("skipped libraries: %s",concat(wrong," ")) + end + else + utils.report("no valid library path found") + end + return concat(result, "\n\n") +end + +function utils.merger.selfcreate(libs,list,target) + if target then + utils.merger._self_save_( + target, + utils.merger._self_swap_( + utils.merger._self_fake_(), + utils.merger._self_libs_(libs,list) + ) + ) + end +end + +function utils.merger.selfmerge(name,libs,list,target) + utils.merger._self_save_( + target or name, + utils.merger._self_swap_( + utils.merger._self_load_(name), + utils.merger._self_libs_(libs,list) + ) + ) +end + +function utils.merger.selfclean(name) + utils.merger._self_save_( + name, + utils.merger._self_swap_( + utils.merger._self_load_(name), + "" + ) + ) +end + +function utils.lua.compile(luafile, lucfile, cleanup, strip) -- defaults: cleanup=false strip=true + -- utils.report("compiling",luafile,"into",lucfile) + os.remove(lucfile) + local command = "-o " .. string.quote(lucfile) .. " " .. string.quote(luafile) + if strip ~= false then + command = "-s " .. command + end + local done = (os.spawn("texluac " .. command) == 0) or (os.spawn("luac " .. command) == 0) + if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then + -- utils.report("removing",luafile) + os.remove(luafile) + end + return done +end + diff --git a/luaextra.dtx b/luaextra.dtx index a6f18fd..e8b83a1 100644 --- a/luaextra.dtx +++ b/luaextra.dtx @@ -182,850 +182,45 @@ do end end % \end{macrocode} -% -% \begin{macro}{string:stripspaces} -% -% A function to strip the spaces at the beginning and at the end of a -% string. -% -% \begin{macrocode} - -function string:stripspaces() - return (self:gsub("^%s*(.-)%s*$", "%1")) -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{string.is boolean} -% -% If the argument is a string describing a boolean, this function returns -% the boolean, otherwise it retuns nil. -% -% \begin{macrocode} - -function string.is_boolean(str) - if type(str) == "string" then - if str == "true" or str == "yes" or str == "on" or str == "t" then - return true - elseif str == "false" or str == "no" or str == "off" or str == "f" then - return false - end - end - return nil -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{string.is number} -% -% Returns true if the argument string is a number. -% -% \begin{macrocode} - -function string.is_number(str) - return str:find("^[%-%+]?[%d]-%.?[%d+]$") == 1 -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{lpeg.space and lpeg.newline} -% -% Two small helpers for \texttt{lpeg}, that will certainly be widely used: -% spaces and newlines. -% +% Initialize \textsf{Kpathsea} library, so that |require()| will use it to +% locate modules. % \begin{macrocode} -lpeg.space = lpeg.S(" \t\f\v") -lpeg.newline = lpeg.P("\r\n") + lpeg.P("\r") +lpeg.P("\n") - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.fastcopy} -% -% A function copying a table fastly. -% -% \begin{macrocode} - -if not table.fastcopy then do - - local type, pairs, getmetatable, setmetatable = - type, pairs, getmetatable, setmetatable - - local function fastcopy(old) -- fast one - if old then - local new = { } - for k,v in pairs(old) do - if type(v) == "table" then - new[k] = fastcopy(v) -- was just table.copy - else - new[k] = v - end - end - local mt = getmetatable(old) - if mt then - setmetatable(new,mt) - end - return new - else - return { } - end - end - - table.fastcopy = fastcopy - -end end +kpse.set_program_name("luatex") % \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.copy} -% -% A function copying a table in more cases than fastcopy, for example when -% a key is a table. -% +% Load the individual modules. % \begin{macrocode} -if not table.copy then do - - local type, pairs, getmetatable, setmetatable = type, pairs, getmetatable, setmetatable - - local function copy(t, tables) -- taken from lua wiki, slightly adapted - tables = tables or { } - local tcopy = {} - if not tables[t] then - tables[t] = tcopy - end - for i,v in pairs(t) do -- brrr, what happens with sparse indexed - if type(i) == "table" then - if tables[i] then - i = tables[i] - else - i = copy(i, tables) - end - end - if type(v) ~= "table" then - tcopy[i] = v - elseif tables[v] then - tcopy[i] = tables[v] - else - tcopy[i] = copy(v, tables) - end - end - local mt = getmetatable(t) - if mt then - setmetatable(tcopy,mt) - end - return tcopy - end - - table.copy = copy - -end end +require("luaextra-string.lua") +require("luaextra-lpeg.lua") +require("luaextra-boolean.lua") +require("luaextra-number.lua") +require("luaextra-math.lua") +require("luaextra-table.lua") +require("luaextra-aux.lua") +require("luaextra-io.lua") +require("luaextra-os.lua") +require("luaextra-file.lua") +require("luaextra-md5.lua") +require("luaextra-dir.lua") +require("luaextra-unicode.lua") +require("luaextra-utils.lua") +require("luaextra-dimen.lua") +require("luaextra-url.lua") +require("luaextra-set.lua") +require("luaextra-dimen.lua") % \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.serialize} -% -% A bunch of functions leading to \texttt{table.serialize}. -% +% Aliases for backward compatibility. % \begin{macrocode} -function table.sortedkeys(tab) - local srt, kind = { }, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in pairs(tab) do - srt[#srt+1] = key - if kind == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - -- if kind == 2 then kind = 3 else kind = 1 end - kind = (kind == 2 and 3) or 1 - elseif tkey == "number" then - -- if kind == 1 then kind = 3 else kind = 2 end - kind = (kind == 1 and 3) or 2 - else - kind = 3 - end - end - end - if kind == 0 or kind == 3 then - table.sort(srt,function(a,b) return (tostring(a) < tostring(b)) end) - else - table.sort(srt) - end - return srt -end - -do - table.serialize_functions = true - table.serialize_compact = true - table.serialize_inline = true - - local function key(k) - if type(k) == "number" then -- or k:find("^%d+$") then - return "["..k.."]" - elseif noquotes and k:find("^%a[%a%d%_]*$") then - return k - else - return '["'..k..'"]' - end - end - - local function simple_table(t) - if #t > 0 then - local n = 0 - for _,v in pairs(t) do - n = n + 1 - end - if n == #t then - local tt = { } - for i=1,#t do - local v = t[i] - local tv = type(v) - if tv == "number" or tv == "boolean" then - tt[#tt+1] = tostring(v) - elseif tv == "string" then - tt[#tt+1] = ("%q"):format(v) - else - tt = nil - break - end - end - return tt - end - end - return nil - end - - local function serialize(root,name,handle,depth,level,reduce,noquotes,indexed) - handle = handle or print - reduce = reduce or false - if depth then - depth = depth .. " " - if indexed then - handle(("%s{"):format(depth)) - else - handle(("%s%s={"):format(depth,key(name))) - end - else - depth = "" - local tname = type(name) - if tname == "string" then - if name == "return" then - handle("return {") - else - handle(name .. "={") - end - elseif tname == "number" then - handle("[" .. name .. "]={") - elseif tname == "boolean" then - if name then - handle("return {") - else - handle("{") - end - else - handle("t={") - end - end - if root and next(root) then - local compact = table.serialize_compact - local inline = compact and table.serialize_inline - local first, last = nil, 0 -- #root cannot be trusted here - if compact then - for k,v in ipairs(root) do -- NOT: for k=1,#root do (why) - if not first then first = k end - last = last + 1 - end - end - for _,k in pairs(table.sortedkeys(root)) do - local v = root[k] - local t = type(v) - if compact and first and type(k) == "number" and k >= first and k <= last then - if t == "number" then - handle(("%s %s,"):format(depth,v)) - elseif t == "string" then - if reduce and (v:find("^[%-%+]?[%d]-%.?[%d+]$") == 1) then - handle(("%s %s,"):format(depth,v)) - else - handle(("%s %q,"):format(depth,v)) - end - elseif t == "table" then - if not next(v) then - handle(("%s {},"):format(depth)) - elseif inline then - local st = simple_table(v) - if st then - handle(("%s { %s },"):format(depth,table.concat(st,", "))) - else - serialize(v,k,handle,depth,level+1,reduce,noquotes,true) - end - else - serialize(v,k,handle,depth,level+1,reduce,noquotes,true) - end - elseif t == "boolean" then - handle(("%s %s,"):format(depth,tostring(v))) - elseif t == "function" then - if table.serialize_functions then - handle(('%s loadstring(%q),'):format(depth,string.dump(v))) - else - handle(('%s "function",'):format(depth)) - end - else - handle(("%s %q,"):format(depth,tostring(v))) - end - elseif k == "__p__" then -- parent - if false then - handle(("%s __p__=nil,"):format(depth)) - end - elseif t == "number" then - handle(("%s %s=%s,"):format(depth,key(k),v)) - elseif t == "string" then - if reduce and (v:find("^[%-%+]?[%d]-%.?[%d+]$") == 1) then - handle(("%s %s=%s,"):format(depth,key(k),v)) - else - handle(("%s %s=%q,"):format(depth,key(k),v)) - end - elseif t == "table" then - if not next(v) then - handle(("%s %s={},"):format(depth,key(k))) - elseif inline then - local st = simple_table(v) - if st then - handle(("%s %s={ %s },"):format(depth,key(k),table.concat(st,", "))) - else - serialize(v,k,handle,depth,level+1,reduce,noquotes) - end - else - serialize(v,k,handle,depth,level+1,reduce,noquotes) - end - elseif t == "boolean" then - handle(("%s %s=%s,"):format(depth,key(k),tostring(v))) - elseif t == "function" then - if table.serialize_functions then - handle(('%s %s=loadstring(%q),'):format(depth,key(k),string.dump(v))) - else - handle(('%s %s="function",'):format(depth,key(k))) - end - else - handle(("%s %s=%q,"):format(depth,key(k),tostring(v))) - -- handle(('%s %s=loadstring(%q),'):format(depth,key(k),string.dump(function() return v end))) - end - end - if level > 0 then - handle(("%s},"):format(depth)) - else - handle(("%s}"):format(depth)) - end - else - handle(("%s}"):format(depth)) - end - end - - function table.serialize(root,name,reduce,noquotes) - local t = { } - local function flush(s) - t[#t+1] = s - end - serialize(root, name, flush, nil, 0, reduce, noquotes) - return table.concat(t,"\n") - end - - function table.tostring(t, name) - return table.serialize(t, name) - end - - function table.tohandle(handle,root,name,reduce,noquotes) - serialize(root, name, handle, nil, 0, reduce, noquotes) - end - - -- sometimes tables are real use (zapfino extra pro is some 85M) in which - -- case a stepwise serialization is nice; actually, we could consider: - -- - -- for line in table.serializer(root,name,reduce,noquotes) do - -- ...(line) - -- end - -- - -- so this is on the todo list - - table.tofile_maxtab = 2*1024 - - function table.tofile(filename,root,name,reduce,noquotes) - local f = io.open(filename,'w') - if f then - local concat = table.concat - local maxtab = table.tofile_maxtab - if maxtab > 1 then - local t = { } - local function flush(s) - t[#t+1] = s - if #t > maxtab then - f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice - t = { } - end - end - serialize(root, name, flush, nil, 0, reduce, noquotes) - f:write(concat(t,"\n"),"\n") - else - local function flush(s) - f:write(s,"\n") - end - serialize(root, name, flush, nil, 0, reduce, noquotes) - end - f:close() - end - end - -end +fpath = file +lfs.is_readable = file.is_readable +lfs.is_writable = file.is_writable % \end{macrocode} % -% \end{macro} -% \begin{macro}{table.tohash} -% -% Returning a table with all values of the argument table as keys, and -% \texttt{false} as values. This is what we will call a hash. -% -% \begin{macrocode} - -function table.tohash(t) - local h = { } - for _, v in pairs(t) do -- no ipairs here - h[v] = true - end - return h -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.fromhash} -% -% Returning a table built from a hash, with simple integer keys. -% -% \begin{macrocode} - -function table.fromhash(t) - local h = { } - for k, v in pairs(t) do -- no ipairs here - if v then h[#h+1] = k end - end - return h -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.contains value} -% -% A function returning true if the value \texttt{val} is in the table -% \texttt{t}. -% -% \begin{macrocode} - -function table.contains_value(t, val) - if t then - for k, v in pairs(t) do - if v==val then - return true - end - end - end - return false -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.contains key} -% -% A function returning true if the key \texttt{key} is in the table -% \texttt{t} -% -% \begin{macrocode} - -function table.contains_key(t, key) - if t then - for k, v in pairs(t) do - if k==key then - return true - end - end - end - return false -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.value position} -% -% A function returning the position of a value in a table. This will be -% important to be able to remove a value. -% -% \begin{macrocode} - -function table.value_position(t, val) - if t then - local i=1 - for k, v in pairs(t) do - if v==val then - return i - end - i=i+1 - end - end - return 0 -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.key position} -% -% A function returning the position of a key in a table. -% -% \begin{macrocode} - -function table.key_position(t, key) - if t then - local i=1 - for k,v in pairs(t) do - if k==key then - return i - end - i = i+1 - end - end - return -1 -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.remove value} -% -% Removes the first occurence of a value from a table. -% -% \begin{macrocode} - -function table.remove_value(t, v) - local p = table.value_position(t,v) - if p ~= -1 then - table.remove(t, table.value_position(t,v)) - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.remove key} -% -% Removing a key from a table. -% -% \begin{macrocode} - -function table.remove_key(t, k) - local p = table.key_position(t,k) - if p ~= -1 then - table.remove(t, table.key_position(t,k)) - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{table.is empty} -% -% Returns true if a table is empty. -% -% \begin{macrocode} - -function table.is_empty(t) - return not t or not next(t) -end - -% \end{macrocode} -% -% \texttt{fpath} will contain all the file path manipulation functions. -% Some functions certainly need a little update or cleanup... -% -% \begin{macrocode} - -fpath = { } - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.removesuffix} -% -% A function to remove the suffix (extention) of a filename. -% -% \begin{macrocode} - -function fpath.removesuffix(filename) - return filename:gsub("%.[%a%d]+$", "") -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.addsuffix} -% -% A function adding a suffix to a filename, except if it already has one. -% -% \begin{macrocode} - -function fpath.addsuffix(filename, suffix) - if not filename:find("%.[%a%d]+$") then - return filename .. "." .. suffix - else - return filename - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.replacesuffix} -% -% A function replacing a suffix by a new one. -% -% \begin{macrocode} - -function fpath.replacesuffix(filename, suffix) - if not filename:find("%.[%a%d]+$") then - return filename .. "." .. suffix - else - return (filename:gsub("%.[%a%d]+$","."..suffix)) - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.dirname} -% -% A function returning the directory of a file path. -% -% \begin{macrocode} - -function fpath.dirname(name) - return name:match("^(.+)[/\\].-$") or "" -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.basename} -% -% A function returning the basename (the name of the file, without the directories) of a file path. -% -% \begin{macrocode} - -function fpath.basename(fname) - if not fname then - return nil - end - return fname:match("^.+[/\\](.-)$") or fname -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.nameonly} -% -% Returning the basename of a file without the suffix. -% -% \begin{macrocode} - -function fpath.nameonly(name) - return ((name:match("^.+[/\\](.-)$") or name):gsub("%..*$","")) -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.suffix} -% -% Returns the suffix of a file name. -% -% \begin{macrocode} - -function fpath.suffix(name) - return name:match("^.+%.([^/\\]-)$") or "" -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.join} -% -% A function joining any number of arguments into a complete path. -% -% \begin{macrocode} - -function fpath.join(...) - local pth = table.concat({...},"/") - pth = pth:gsub("\\","/") - local a, b = pth:match("^(.*://)(.*)$") - if a and b then - return a .. b:gsub("//+","/") - end - a, b = pth:match("^(//)(.*)$") - if a and b then - return a .. b:gsub("//+","/") - end - return (pth:gsub("//+","/")) -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.split} -% -% A function returning a table with all directories from a filename. -% -% \begin{macrocode} - -function fpath.split(str) - local t = { } - str = str:gsub("\\", "/") - str = str:gsub("(%a):([;/])", "%1\001%2") - for name in str:gmatch("([^;:]+)") do - if name ~= "" then - name = name:gsub("\001",":") - t[#t+1] = name - end - end - return t -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.normalize sep} -% -% A function to change directory separators to canonical ones (\texttt{/}). -% -% \begin{macrocode} - -function fpath.normalize_sep(str) - return str:gsub("\\", "/") -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{fpath.localize sep} -% -% A function changing directory separators into local ones (\texttt{/} on -% Unix, |\| on Windows). -% -% \begin{macrocode} - -function fpath.localize_sep(str) - if os.type == 'windows' or os.type == 'msdos' then - return str:gsub("/", "\\") - else - return str:gsub("\\", "/") - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{lfs.is writable} -% -% Returns true if a file is writable. This function and the following ones -% are a bit too expensive, they should be made with |lfs.attributes|. -% -% \begin{macrocode} - -function lfs.is_writable(name) - local f = io.open(name, 'w') - if f then - f:close() - return true - else - return false - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{lfs.is readable} -% -% Returns true if a file is readable. -% -% \begin{macrocode} - -function lfs.is_readable(name) - local f = io.open(name,'r') - if f then - f:close() - return true - else - return false - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{math.round} -% -% Returns the closest integer. -% -% \begin{macrocode} - -if not math.round then - function math.round(x) - return math.floor(x + 0.5) - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{math.div} -% -% Returns the quotient of the euclidian division of n by m. -% -% \begin{macrocode} - -if not math.div then - function math.div(n,m) - return floor(n/m) - end -end - -% \end{macrocode} -% -% \end{macro} -% \begin{macro}{math.mod} -% -% Returns the remainder of the euclidian division of n by m. -% -% \begin{macrocode} - -if not math.mod then - function math.mod(n,m) - return n % m - end -end - -% \end{macrocode} -% -% \end{macro} -% % \iffalse %</lua> % \fi |