From 9e40ecb0b88b556a55b7b5bf3b7c58b5a8368501 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 18:25:49 +0200 Subject: update l-math --- lualibs-math.lua | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lualibs-math.lua b/lualibs-math.lua index fc8db47..4669a51 100644 --- a/lualibs-math.lua +++ b/lualibs-math.lua @@ -28,14 +28,13 @@ end local pipi = 2*math.pi/360 -function math.sind(d) - return sin(d*pipi) +if not math.sind then + function math.sind(d) return sin(d*pipi) end + function math.cosd(d) return cos(d*pipi) end + function math.tand(d) return tan(d*pipi) end end -function math.cosd(d) - return cos(d*pipi) -end - -function math.tand(d) - return tan(d*pipi) +if not math.odd then + function math.odd (n) return n % 2 ~= 0 end + function math.even(n) return n % 2 == 0 end end -- cgit v1.2.3 From e84a0c500a2104dc82b8278d7778144351aa71b3 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 18:35:14 +0200 Subject: update l-number l-boolean --- lualibs-boolean.lua | 66 ++++++++++++++++++++++++++---------------- lualibs-number.lua | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 120 insertions(+), 28 deletions(-) diff --git a/lualibs-boolean.lua b/lualibs-boolean.lua index be7ec7d..2b94de7 100644 --- a/lualibs-boolean.lua +++ b/lualibs-boolean.lua @@ -6,36 +6,60 @@ if not modules then modules = { } end modules ['l-boolean'] = { license = "see context related readme files" } -boolean = boolean or { } - local type, tonumber = type, tonumber +boolean = boolean or { } +local boolean = boolean + function boolean.tonumber(b) - if b then return 1 else return 0 end + if b then return 1 else return 0 end -- test and return or return end function toboolean(str,tolerant) - if tolerant then - local tstr = type(str) - if tstr == "string" then - return str == "true" or str == "yes" or str == "on" or str == "1" or str == "t" - elseif tstr == "number" then - return tonumber(str) ~= 0 - elseif tstr == "nil" then - return false - else - return str - end + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true + elseif str == "true" then + return true + elseif str == "false" then + return false + elseif not tolerant then + return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true + else + return str == "yes" or str == "on" or str == "t" + end +end + +string.toboolean = toboolean + +function string.booleanstring(str) + if str == nil then + return false + elseif str == false then + return false + elseif str == true then + return true elseif str == "true" then return true elseif str == "false" then return false + elseif str == 0 then + return false + elseif (tonumber(str) or 0) > 0 then + return true else - return str + return str == "yes" or str == "on" or str == "t" end end -function string.is_boolean(str) +function string.is_boolean(str,default) if type(str) == "string" then if str == "true" or str == "yes" or str == "on" or str == "t" then return true @@ -43,13 +67,5 @@ function string.is_boolean(str) return false end end - return nil -end - -function boolean.alwaystrue() - return true -end - -function boolean.falsetrue() - return false + return default end diff --git a/lualibs-number.lua b/lualibs-number.lua index a1249f0..a4dbe3b 100644 --- a/lualibs-number.lua +++ b/lualibs-number.lua @@ -6,11 +6,15 @@ if not modules then modules = { } end modules ['l-number'] = { license = "see context related readme files" } -local tostring = tostring -local format, floor, insert, match = string.format, math.floor, table.insert, string.match +-- this module will be replaced when we have the bit library + +local tostring, tonumber = tostring, tonumber +local format, floor, match, rep = string.format, math.floor, string.match, string.rep +local concat, insert = table.concat, table.insert local lpegmatch = lpeg.match -number = number or { } +number = number or { } +local number = number -- a,b,c,d,e,f = number.toset(100101) @@ -56,3 +60,75 @@ function number.bits(n,zero) end return t end + +--~ http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html + +function number.bit(p) + return 2 ^ (p - 1) -- 1-based indexing +end + +function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... + return x % (p + p) >= p +end + +function number.setbit(x, p) + return (x % (p + p) >= p) and x or x + p +end + +function number.clearbit(x, p) + return (x % (p + p) >= p) and x - p or x +end + +--~ function number.tobitstring(n) +--~ if n == 0 then +--~ return "0" +--~ else +--~ local t = { } +--~ while n > 0 do +--~ insert(t,1,n % 2 > 0 and 1 or 0) +--~ n = floor(n/2) +--~ end +--~ return concat(t) +--~ end +--~ end + +function number.tobitstring(n,m) + if n == 0 then + if m then + rep("00000000",m) + else + return "00000000" + end + else + local t = { } + while n > 0 do + insert(t,1,n % 2 > 0 and 1 or 0) + n = floor(n/2) + end + local nn = 8 - #t % 8 + if nn > 0 and nn < 8 then + for i=1,nn do + insert(t,1,0) + end + end + if m then + m = m * 8 - #t + if m > 0 then + insert(t,1,rep("0",m)) + end + end + return concat(t) + end +end + +--~ print(number.tobitstring(8)) +--~ print(number.tobitstring(14)) +--~ print(number.tobitstring(66)) +--~ print(number.tobitstring(0x00)) +--~ print(number.tobitstring(0xFF)) +--~ print(number.tobitstring(46260767936,8)) +--~ print(#number.tobitstring(46260767936,6)) + +function number.valid(str,default) + return tonumber(str) or default or nil +end -- cgit v1.2.3 From 42d2142a79697299cf42b60d63d8c025d942df36 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 18:37:00 +0200 Subject: update lualibs-set.lua --- lualibs-set.lua | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lualibs-set.lua b/lualibs-set.lua index f844d0b..2370f01 100644 --- a/lualibs-set.lua +++ b/lualibs-set.lua @@ -6,6 +6,8 @@ if not modules then modules = { } end modules ['l-set'] = { license = "see context related readme files" } +-- This will become obsolete when we have the bitset library embedded. + set = set or { } local nums = { } @@ -49,10 +51,11 @@ function set.tolist(n) if n == 0 or not tabs[n] then return "" else - local t = { } + local t, n = { }, 0 for k, v in next, tabs[n] do if v then - t[#t+1] = k + n = n + 1 + t[n] = k end end return concat(t," ") -- cgit v1.2.3 From e9578dd0f5221aa22d7fc240c3258b05fc213b94 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 18:44:30 +0200 Subject: update l-md5 --- lualibs-md5.lua | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/lualibs-md5.lua b/lualibs-md5.lua index 27955ef..6abf2e1 100644 --- a/lualibs-md5.lua +++ b/lualibs-md5.lua @@ -7,6 +7,7 @@ if not modules then modules = { } end modules ['l-md5'] = { -- This also provides file checksums and checkers. +local md5, file = md5, file local gsub, format, byte = string.gsub, string.format, string.byte local function convert(str,fmt) @@ -30,17 +31,30 @@ if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end --~ function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end --~ end -file.needs_updating_threshold = 1 - -function file.needs_updating(oldname,newname) -- size modification access change - local oldtime = lfs.attributes(oldname, modification) - local newtime = lfs.attributes(newname, modification) - if newtime >= oldtime then - return false - elseif oldtime - newtime < file.needs_updating_threshold then - return false +function file.needsupdating(oldname,newname,threshold) -- size modification access change + local oldtime = lfs.attributes(oldname,"modification") + if oldtime then + local newtime = lfs.attributes(newname,"modification") + if not newtime then + return true -- no new file, so no updating needed + elseif newtime >= oldtime then + return false -- new file definitely needs updating + elseif oldtime - newtime < (threshold or 1) then + return false -- new file is probably still okay + else + return true -- new file has to be updated + end else - return true + return false -- no old file, so no updating needed + end +end + +file.needs_updating = file.needsupdating + +function file.syncmtimes(oldname,newname) + local oldtime = lfs.attributes(oldname,"modification") + if oldtime and lfs.isfile(newname) then + lfs.touch(newname,oldtime,oldtime) end end @@ -62,7 +76,7 @@ function file.loadchecksum(name) return nil end -function file.savechecksum(name, checksum) +function file.savechecksum(name,checksum) if not checksum then checksum = file.checksum(name) end if checksum then io.savedata(name .. ".md5",checksum) -- cgit v1.2.3 From 8dff98a320873888fd263cb7db46a04bff168d54 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 18:48:53 +0200 Subject: update l-io --- lualibs-io.lua | 105 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 77 insertions(+), 28 deletions(-) diff --git a/lualibs-io.lua b/lualibs-io.lua index 66e2793..a9269ab 100644 --- a/lualibs-io.lua +++ b/lualibs-io.lua @@ -6,7 +6,10 @@ if not modules then modules = { } end modules ['l-io'] = { license = "see context related readme files" } -local byte, find, gsub = string.byte, string.find, string.gsub +local io = io +local byte, find, gsub, format = string.byte, string.find, string.gsub, string.format +local concat = table.concat +local type = type if string.find(os.getenv("PATH"),";") then io.fileseparator, io.pathseparator = "\\", ";" @@ -14,16 +17,14 @@ else io.fileseparator, io.pathseparator = "/" , ":" end -function io.loaddata(filename,textmode) +function io.loaddata(filename,textmode) -- return nil if empty local f = io.open(filename,(textmode and 'r') or 'rb') if f then - -- collectgarbage("step") -- sometimes makes a big difference in mem consumption local data = f:read('*all') - -- garbagecollector.check(data) f:close() - return data - else - return nil + if #data > 0 then + return data + end end end @@ -31,19 +32,59 @@ function io.savedata(filename,data,joiner) local f = io.open(filename,"wb") if f then if type(data) == "table" then - f:write(table.join(data,joiner or "")) + f:write(concat(data,joiner or "")) elseif type(data) == "function" then data(f) else f:write(data or "") end f:close() + io.flush() return true else return false end end +function io.loadlines(filename,n) -- return nil if empty + local f = io.open(filename,'r') + if f then + if n then + local lines = { } + for i=1,n do + local line = f:read("*lines") + if line then + lines[#lines+1] = line + else + break + end + end + f:close() + lines = concat(lines,"\n") + if #lines > 0 then + return lines + end + else + local line = f:read("*line") or "" + assert(f:close()) + if #line > 0 then + return line + end + end + end +end + +function io.loadchunk(filename,n) + local f = io.open(filename,'rb') + if f then + local data = f:read(n or 1024) + f:close() + if #data > 0 then + return data + end + end +end + function io.exists(filename) local f = io.open(filename) if f == nil then @@ -66,12 +107,19 @@ function io.size(filename) end function io.noflines(f) - local n = 0 - for _ in f:lines() do - n = n + 1 + if type(f) == "string" then + local f = io.open(filename) + local n = f and io.noflines(f) or 0 + assert(f:close()) + return n + else + local n = 0 + for _ in f:lines() do + n = n + 1 + end + f:seek('set',0) + return n end - f:seek('set',0) - return n end local nextchar = { @@ -97,8 +145,6 @@ local nextchar = { function io.characters(f,n) if f then return nextchar[n or 1], f - else - return nil, nil end end @@ -107,40 +153,42 @@ local nextbyte = { local a, b, c, d = f:read(1,1,1,1) if d then return byte(a), byte(b), byte(c), byte(d) - else - return nil, nil, nil, nil + end + end, + [3] = function(f) + local a, b, c = f:read(1,1,1) + if b then + return byte(a), byte(b), byte(c) end end, [2] = function(f) local a, b = f:read(1,1) if b then return byte(a), byte(b) - else - return nil, nil end end, [1] = function (f) local a = f:read(1) if a then return byte(a) - else - return nil end end, [-2] = function (f) local a, b = f:read(1,1) if b then return byte(b), byte(a) - else - return nil, nil + end + end, + [-3] = function(f) + local a, b, c = f:read(1,1,1) + if b then + return byte(c), byte(b), byte(a) end end, [-4] = function(f) local a, b, c, d = f:read(1,1,1,1) if d then return byte(d), byte(c), byte(b), byte(a) - else - return nil, nil, nil, nil end end } @@ -157,12 +205,13 @@ function io.ask(question,default,options) while true do io.write(question) if options then - io.write(string.format(" [%s]",table.concat(options,"|"))) + io.write(format(" [%s]",concat(options,"|"))) end if default then - io.write(string.format(" [%s]",default)) + io.write(format(" [%s]",default)) end - io.write(string.format(" ")) + io.write(format(" ")) + io.flush() local answer = io.read() answer = gsub(answer,"^%s*(.*)%s*$","%1") if answer == "" and default then -- cgit v1.2.3 From 06682300f19cef9f42fac68cd11a11c5cd3de40e Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 18:58:12 +0200 Subject: update l-lpeg l-url --- lualibs-lpeg.lua | 837 ++++++++++++++++++++++++++++++++++++++++++++++++++----- lualibs-url.lua | 335 +++++++++++++++++----- 2 files changed, 1040 insertions(+), 132 deletions(-) diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index b107a8e..d92b722 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -6,30 +6,124 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } + +-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) + local lpeg = require("lpeg") +-- tracing (only used when we encounter a problem in integration of lpeg in luatex) + +-- some code will move to unicode and string + +local report = texio and texio.write_nl or print + +-- local lpmatch = lpeg.match +-- local lpprint = lpeg.print +-- local lpp = lpeg.P +-- local lpr = lpeg.R +-- local lps = lpeg.S +-- local lpc = lpeg.C +-- local lpb = lpeg.B +-- local lpv = lpeg.V +-- local lpcf = lpeg.Cf +-- local lpcb = lpeg.Cb +-- local lpcg = lpeg.Cg +-- local lpct = lpeg.Ct +-- local lpcs = lpeg.Cs +-- local lpcc = lpeg.Cc +-- local lpcmt = lpeg.Cmt +-- local lpcarg = lpeg.Carg + +-- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end + +-- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end +-- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end +-- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end +-- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end +-- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end +-- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end +-- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end +-- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end +-- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end +-- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end +-- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end +-- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end +-- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end +-- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end + +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format + +-- Beware, we predefine a bunch of patterns here and one reason for doing so +-- is that we get consistent behaviour in some of the visualizers. + lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns -local P, R, S, Ct, C, Cs, Cc, V = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.V -local match = lpeg.match +local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp +local lpegtype, lpegmatch = lpeg.type, lpeg.match + +local utfcharacters = string.utfcharacters +local utfgmatch = unicode and unicode.utf8.gmatch + +local anything = P(1) +local endofstring = P(-1) +local alwaysmatched = P(true) + +patterns.anything = anything +patterns.endofstring = endofstring +patterns.beginofstring = alwaysmatched +patterns.alwaysmatched = alwaysmatched local digit, sign = R('09'), S('+-') local cr, lf, crlf = P("\r"), P("\n"), P("\r\n") -local utf8byte = R("\128\191") +local newline = crlf + S("\r\n") -- cr + lf +local escaped = P("\\") * anything +local squote = P("'") +local dquote = P('"') +local space = P(" ") + +local utfbom_32_be = P('\000\000\254\255') +local utfbom_32_le = P('\255\254\000\000') +local utfbom_16_be = P('\255\254') +local utfbom_16_le = P('\254\255') +local utfbom_8 = P('\239\187\191') +local utfbom = utfbom_32_be + utfbom_32_le + + utfbom_16_be + utfbom_16_le + + utfbom_8 +local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le" + + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le" + + utfbom_8 / "utf-8" + alwaysmatched / "unknown" + +local utf8next = R("\128\191") -patterns.utf8byte = utf8byte patterns.utf8one = R("\000\127") -patterns.utf8two = R("\194\223") * utf8byte -patterns.utf8three = R("\224\239") * utf8byte * utf8byte -patterns.utf8four = R("\240\244") * utf8byte * utf8byte * utf8byte +patterns.utf8two = R("\194\223") * utf8next +patterns.utf8three = R("\224\239") * utf8next * utf8next +patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next +patterns.utfbom = utfbom +patterns.utftype = utftype + +local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four +local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) + +patterns.utf8 = utf8char +patterns.utf8char = utf8char +patterns.validutf8 = validutf8char +patterns.validutf8char = validutf8char + +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 patterns.integer = sign^0 * digit^1 patterns.float = sign^0 * digit^0 * P('.') * digit^1 +patterns.cfloat = sign^0 * digit^0 * P(',') * digit^1 patterns.number = patterns.float + patterns.integer +patterns.cnumber = patterns.cfloat + patterns.integer patterns.oct = P("0") * R("07")^1 patterns.octal = patterns.oct patterns.HEX = P("0x") * R("09","AF")^1 @@ -38,55 +132,83 @@ patterns.hexadecimal = P("0x") * R("09","AF","af")^1 patterns.lowercase = R("az") patterns.uppercase = R("AZ") patterns.letter = patterns.lowercase + patterns.uppercase -patterns.space = S(" ") -patterns.eol = S("\n\r") -patterns.spacer = S(" \t\f\v") -- + string.char(0xc2, 0xa0) if we want utf (cf mail roberto) -patterns.newline = crlf + cr + lf -patterns.nonspace = 1 - patterns.space -patterns.nonspacer = 1 - patterns.spacer -patterns.whitespace = patterns.eol + patterns.spacer -patterns.nonwhitespace = 1 - patterns.whitespace -patterns.utf8 = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four -patterns.utfbom = P('\000\000\254\255') + P('\255\254\000\000') + P('\255\254') + P('\254\255') + P('\239\187\191') +patterns.space = space +patterns.tab = P("\t") +patterns.spaceortab = patterns.space + patterns.tab +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace +patterns.newline = newline +patterns.emptyline = newline^1 +patterns.nonspacer = 1 - spacer +patterns.nonwhitespace = 1 - whitespace +patterns.equal = P("=") +patterns.comma = P(",") +patterns.commaspacer = P(",") * spacer^0 +patterns.period = P(".") +patterns.colon = P(":") +patterns.semicolon = P(";") +patterns.underscore = P("_") +patterns.escaped = escaped +patterns.squote = squote +patterns.dquote = dquote +patterns.nosquote = (escaped + (1-squote))^0 +patterns.nodquote = (escaped + (1-dquote))^0 +patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") +patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") +patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble +patterns.unspacer = ((patterns.spacer^1)/"")^0 -function lpeg.anywhere(pattern) --slightly adapted from website - return P { P(pattern) + 1 * V(1) } -- why so complex? -end +patterns.singlequoted = squote * patterns.nosquote * squote +patterns.doublequoted = dquote * patterns.nodquote * dquote +patterns.quoted = patterns.doublequoted + patterns.singlequoted -function lpeg.splitter(pattern, action) - return (((1-P(pattern))^1)/action+1)^0 +patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 +patterns.beginline = #(1-newline) + +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"')) + +local function anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * V(1) } end -local spacing = patterns.spacer^0 * patterns.newline -- sort of strip -local empty = spacing * Cc("") -local nonempty = Cs((1-spacing)^1) * spacing^-1 -local content = (empty + nonempty)^1 +lpeg.anywhere = anywhere -local capture = Ct(content^0) +function lpeg.instringchecker(p) + p = anywhere(p) + return function(str) + return lpegmatch(p,str) and true or false + end +end -function string:splitlines() - return match(capture,self) +function lpeg.splitter(pattern, action) + return (((1-P(pattern))^1)/action+1)^0 end -patterns.textline = content +function lpeg.tsplitter(pattern, action) + return Ct((((1-P(pattern))^1)/action+1)^0) +end ---~ local p = lpeg.splitat("->",false) print(match(p,"oeps->what->more")) -- oeps what more ---~ local p = lpeg.splitat("->",true) print(match(p,"oeps->what->more")) -- oeps what->more ---~ local p = lpeg.splitat("->",false) print(match(p,"oeps")) -- oeps ---~ local p = lpeg.splitat("->",true) print(match(p,"oeps")) -- oeps +-- probleem: separator can be lpeg and that does not hash too well, but +-- it's quite okay as the key is then not garbage collected -local splitters_s, splitters_m = { }, { } +local splitters_s, splitters_m, splitters_t = { }, { }, { } local function splitat(separator,single) local splitter = (single and splitters_s[separator]) or splitters_m[separator] if not splitter then separator = P(separator) + local other = C((1 - separator)^0) if single then - local other, any = C((1 - separator)^0), P(1) + local any = anything splitter = other * (separator * C(any^0) + "") -- ? splitters_s[separator] = splitter else - local other = C((1 - separator)^0) splitter = other * (separator * other)^0 splitters_m[separator] = splitter end @@ -94,29 +216,135 @@ local function splitat(separator,single) return splitter end -lpeg.splitat = splitat +local function tsplitat(separator) + local splitter = splitters_t[separator] + if not splitter then + splitter = Ct(splitat(separator)) + splitters_t[separator] = splitter + end + return splitter +end + +lpeg.splitat = splitat +lpeg.tsplitat = tsplitat + +function string.splitup(str,separator) + if not separator then + separator = "," + end + return lpegmatch(splitters_m[separator] or splitat(separator),str) +end + +--~ local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more +--~ local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more +--~ local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps +--~ local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps local cache = { } function lpeg.split(separator,str) local c = cache[separator] if not c then - c = Ct(splitat(separator)) + c = tsplitat(separator) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end -function string:split(separator) - local c = cache[separator] - if not c then - c = Ct(splitat(separator)) - cache[separator] = c +function string.split(str,separator) + if separator then + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c + end + return lpegmatch(c,str) + else + return { str } end - return match(c,self) end -lpeg.splitters = cache +local spacing = patterns.spacer^0 * newline -- sort of strip +local empty = spacing * Cc("") +local nonempty = Cs((1-spacing)^1) * spacing^-1 +local content = (empty + nonempty)^1 + +patterns.textline = content + +--~ local linesplitter = Ct(content^0) +--~ +--~ function string.splitlines(str) +--~ return lpegmatch(linesplitter,str) +--~ end + +local linesplitter = tsplitat(newline) + +patterns.linesplitter = linesplitter + +function string.splitlines(str) + return lpegmatch(linesplitter,str) +end + +local utflinesplitter = utfbom^-1 * tsplitat(newline) + +patterns.utflinesplitter = utflinesplitter + +function string.utfsplitlines(str) + return lpegmatch(utflinesplitter,str or "") +end + +local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) +local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) + +function string.utfsplit(str,ignorewhitespace) -- new + if ignorewhitespace then + return lpegmatch(utfcharsplitter_iws,str or "") + else + return lpegmatch(utfcharsplitter_ows,str or "") + end +end + +-- inspect(string.utfsplit("a b c d")) +-- inspect(string.utfsplit("a b c d",true)) + +-- -- alternative 1: 0.77 +-- +-- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) +-- +-- function string.utflength(str) +-- return #lpegmatch(utfcharcounter,str or "") +-- end +-- +-- -- alternative 2: 1.70 +-- +-- local n = 0 +-- +-- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow +-- +-- function string.utflength(str) +-- n = 0 +-- lpegmatch(utfcharcounter,str or "") +-- return n +-- end +-- +-- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + +local n = 0 + +local utfcharcounter = utfbom^-1 * Cs ( ( + Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end + + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end + + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end + + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end +)^0 ) + +function string.utflength(str) + n = 0 + lpegmatch(utfcharcounter,str or "") + return n +end + +--~ lpeg.splitters = cache -- no longer public local cache = { } @@ -124,42 +352,515 @@ function lpeg.checkedsplit(separator,str) local c = cache[separator] if not c then separator = P(separator) - local other = C((1 - separator)^0) + local other = C((1 - separator)^1) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,str) + return lpegmatch(c,str) end -function string:checkedsplit(separator) +function string.checkedsplit(str,separator) local c = cache[separator] if not c then separator = P(separator) - local other = C((1 - separator)^0) + local other = C((1 - separator)^1) c = Ct(separator^0 * other * (separator^1 * other)^0) cache[separator] = c end - return match(c,self) + return lpegmatch(c,str) +end + +--~ from roberto's site: + +local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end +local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end +local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end + +local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 + +patterns.utf8byte = utf8byte + +--~ local str = " a b c d " + +--~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]") + +local cache = { } + +function lpeg.stripper(str) + if type(str) == "string" then + local s = cache[str] + if not s then + s = Cs(((S(str)^1)/"" + 1)^0) + cache[str] = s + end + return s + else + return Cs(((str^1)/"" + 1)^0) + end +end + +local cache = { } + +function lpeg.keeper(str) + if type(str) == "string" then + local s = cache[str] + if not s then + s = Cs((((1-S(str))^1)/"" + 1)^0) + cache[str] = s + end + return s + else + return Cs((((1-str)^1)/"" + 1)^0) + end +end + +function lpeg.frontstripper(str) -- or pattern (yet undocumented) + return (P(str) + P(true)) * Cs(anything^0) +end + +function lpeg.endstripper(str) -- or pattern (yet undocumented) + return Cs((1 - P(str) * endofstring)^0) +end + +-- Just for fun I looked at the used bytecode and +-- p = (p and p + pp) or pp gets one more (testset). + +function lpeg.replacer(one,two) + if type(one) == "table" then + local no = #one + local p + if no == 0 then + for k, v in next, one do + local pp = P(k) / v + if p then + p = p + pp + else + p = pp + end + end + return Cs((p + 1)^0) + elseif no == 1 then + local o = one[1] + one, two = P(o[1]), o[2] + return Cs(((1-one)^1 + one/two)^0) + else + for i=1,no do + local o = one[i] + local pp = P(o[1]) / o[2] + if p then + p = p + pp + else + p = pp + end + end + return Cs((p + 1)^0) + end + else + one = P(one) + two = two or "" + return Cs(((1-one)^1 + one/two)^0) + end end ---~ function lpeg.append(list,pp) ---~ local p = pp ---~ for l=1,#list do ---~ if p then ---~ p = p + P(list[l]) ---~ else ---~ p = P(list[l]) ---~ end ---~ end ---~ return p +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) + +local splitters_f, splitters_s = { }, { } + +function lpeg.firstofsplit(separator) -- always return value + local splitter = splitters_f[separator] + if not splitter then + separator = P(separator) + splitter = C((1 - separator)^0) + splitters_f[separator] = splitter + end + return splitter +end + +function lpeg.secondofsplit(separator) -- nil if not split + local splitter = splitters_s[separator] + if not splitter then + separator = P(separator) + splitter = (1 - separator)^0 * separator * C(anything^0) + splitters_s[separator] = splitter + end + return splitter +end + +function lpeg.balancer(left,right) + left, right = P(left), P(right) + return P { left * ((1 - left - right) + V(1))^0 * right } +end + +--~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +--~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +--~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +--~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +--~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +--~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +--~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +--~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) + +--~ -- slower: +--~ +--~ function lpeg.counter(pattern) +--~ local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 +--~ return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end --~ end ---~ from roberto's site: +local nany = utf8char/"" + +function lpeg.counter(pattern) + pattern = Cs((P(pattern)/" " + nany)^0) + return function(str) + return #lpegmatch(pattern,str) + end +end + +if utfgmatch then + + function lpeg.count(str,what) -- replaces string.count + if type(what) == "string" then + local n = 0 + for _ in utfgmatch(str,what) do + n = n + 1 + end + return n + else -- 4 times slower but still faster than / function + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) + end + end + +else + + local cache = { } + + function lpeg.count(str,what) -- replaces string.count + if type(what) == "string" then + local p = cache[what] + if not p then + p = Cs((P(what)/" " + nany)^0) + cache[p] = p + end + return #lpegmatch(p,str) + else -- 4 times slower but still faster than / function + return #lpegmatch(Cs((P(what)/" " + nany)^0),str) + end + end + +end + +local patterns_escapes = { -- also defines in l-string + ["%"] = "%%", + ["."] = "%.", + ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", + ["["] = "%[", ["]"] = "%]", + ["("] = "%)", [")"] = "%)", + -- ["{"] = "%{", ["}"] = "%}" + -- ["^"] = "%^", ["$"] = "%$", +} + +local simple_escapes = { -- also defines in l-string + ["-"] = "%-", + ["."] = "%.", + ["?"] = ".", + ["*"] = ".*", +} + +local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) +local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) + +function string.escapedpattern(str,simple) + return lpegmatch(simple and s or p,str) +end + +-- utf extensies + +lpeg.UP = lpeg.P + +if utfcharacters then + + function lpeg.US(str) + local p + for uc in utfcharacters(str) do + if p then + p = p + P(uc) + else + p = P(uc) + end + end + return p + end + + +elseif utfgmatch then + + function lpeg.US(str) + local p + for uc in utfgmatch(str,".") do + if p then + p = p + P(uc) + else + p = P(uc) + end + end + return p + end + +else -local f1 = string.byte + function lpeg.US(str) + local p + local f = function(uc) + if p then + p = p + P(uc) + else + p = P(uc) + end + end + lpegmatch((utf8char/f)^0,str) + return p + end + +end + +local range = Cs(utf8byte) * (Cs(utf8byte) + Cc(false)) + +local utfchar = unicode and unicode.utf8 and unicode.utf8.char + +function lpeg.UR(str,more) + local first, last + if type(str) == "number" then + first = str + last = more or first + else + first, last = lpegmatch(range,str) + if not last then + return P(str) + end + end + if first == last then + return P(str) + elseif utfchar and last - first < 8 then -- a somewhat arbitrary criterium + local p + for i=first,last do + if p then + p = p + P(utfchar(i)) + else + p = P(utfchar(i)) + end + end + return p -- nil when invalid range + else + local f = function(b) + return b >= first and b <= last + end + return utf8byte / f -- nil when invalid range + end +end -local function f2(s) local c1, c2 = f1(s,1,2) return c1 * 64 + c2 - 12416 end -local function f3(s) local c1, c2, c3 = f1(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end -local function f4(s) local c1, c2, c3, c4 = f1(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end +--~ lpeg.print(lpeg.R("ab","cd","gh")) +--~ lpeg.print(lpeg.P("a","b","c")) +--~ lpeg.print(lpeg.S("a","b","c")) -patterns.utf8byte = patterns.utf8one/f1 + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 +--~ print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +--~ print(lpeg.count("äáàa",lpeg.UP("áà"))) +--~ print(lpeg.count("äáàa",lpeg.US("àá"))) +--~ print(lpeg.count("äáàa",lpeg.UR("aá"))) +--~ print(lpeg.count("äáàa",lpeg.UR("àá"))) +--~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) + +function lpeg.is_lpeg(p) + return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order + if type(list) ~= "table" then + list = { list, ... } + end + -- table.sort(list) -- longest match first + local p = P(list[1]) + for l=2,#list do + p = p + P(list[l]) + end + return p +end + +-- For the moment here, but it might move to utilities. Beware, we need to +-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we +-- loop back from the end cq. prepend. + +local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency! + +function lpeg.append(list,pp,delayed,checked) + local p = pp + if #list > 0 then + local keys = fastcopy(list) + sort(keys) + for i=#keys,1,-1 do + local k = keys[i] + if p then + p = P(k) + p + else + p = P(k) + end + end + elseif delayed then -- hm, it looks like the lpeg parser resolves anyway + local keys = sortedkeys(list) + if p then + for i=1,#keys,1 do + local k = keys[i] + local v = list[k] + p = P(k)/list + p + end + else + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + p = P(k) + p + else + p = P(k) + end + end + if p then + p = p / list + end + end + elseif checked then + -- problem: substitution gives a capture + local keys = sortedkeys(list) + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + if k == v then + p = P(k) + p + else + p = P(k)/v + p + end + else + if k == v then + p = P(k) + else + p = P(k)/v + end + end + end + else + local keys = sortedkeys(list) + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + p = P(k)/v + p + else + p = P(k)/v + end + end + end + return p +end + +-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) +-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) + +-- function lpeg.exact_match(words,case_insensitive) +-- local pattern = concat(words) +-- if case_insensitive then +-- local pattern = S(upper(characters)) + S(lower(characters)) +-- local list = { } +-- for i=1,#words do +-- list[lower(words[i])] = true +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- return list[lower(s)] and i +-- end) +-- else +-- local pattern = S(concat(words)) +-- local list = { } +-- for i=1,#words do +-- list[words[i]] = true +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- return list[s] and i +-- end) +-- end +-- end + +-- experiment: + +local function make(t) + local p +-- for k, v in next, t do + for k, v in table.sortedhash(t) do + if not p then + if next(v) then + p = P(k) * make(v) + else + p = P(k) + end + else + if next(v) then + p = p + P(k) * make(v) + else + p = p + P(k) + end + end + end + return p +end + +function lpeg.utfchartabletopattern(list) + local tree = { } + for i=1,#list do + local t = tree + for c in gmatch(list[i],".") do + if not t[c] then + t[c] = { } + end + t = t[c] + end + end + return make(tree) +end + +-- inspect ( lpeg.utfchartabletopattern { +-- utfchar(0x00A0), -- nbsp +-- utfchar(0x2000), -- enquad +-- utfchar(0x2001), -- emquad +-- utfchar(0x2002), -- enspace +-- utfchar(0x2003), -- emspace +-- utfchar(0x2004), -- threeperemspace +-- utfchar(0x2005), -- fourperemspace +-- utfchar(0x2006), -- sixperemspace +-- utfchar(0x2007), -- figurespace +-- utfchar(0x2008), -- punctuationspace +-- utfchar(0x2009), -- breakablethinspace +-- utfchar(0x200A), -- hairspace +-- utfchar(0x200B), -- zerowidthspace +-- utfchar(0x202F), -- narrownobreakspace +-- utfchar(0x205F), -- math thinspace +-- } ) + +-- handy from within tex: + +local lpegmatch = lpeg.match + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- strips leading and trailing spaces and collapsed all other spaces + +local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) + +function string.collapsespaces(str) + return lpegmatch(pattern,str) +end diff --git a/lualibs-url.lua b/lualibs-url.lua index e3e6f81..83b8fcc 100644 --- a/lualibs-url.lua +++ b/lualibs-url.lua @@ -6,101 +6,291 @@ if not modules then modules = { } end modules ['l-url'] = { license = "see context related readme files" } -local char, gmatch, gsub = string.char, string.gmatch, string.gsub +local char, gmatch, gsub, format, byte, find = string.char, string.gmatch, string.gsub, string.format, string.byte, string.find +local concat = table.concat local tonumber, type = tonumber, type -local lpegmatch = lpeg.match +local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V +local lpegmatch, lpegpatterns, replacer = lpeg.match, lpeg.patterns, lpeg.replacer --- from the spec (on the web): +-- from wikipedia: -- --- foo://example.com:8042/over/there?name=ferret#nose --- \_/ \______________/\_________/ \_________/ \__/ --- | | | | | --- scheme authority path query fragment --- | _____________________|__ --- / \ / \ --- urn:example:animal:ferret:nose - -url = url or { } - -local function tochar(s) - return char(tonumber(s,16)) -end +-- foo://username:password@example.com:8042/over/there/index.dtb?type=animal;name=narwhal#nose +-- \_/ \_______________/ \_________/ \__/ \___/ \_/ \______________________/ \__/ +-- | | | | | | | | +-- | userinfo hostname port | | query fragment +-- | \________________________________/\_____________|____|/ +-- scheme | | | | +-- | authority path | | +-- | | | +-- | path interpretable as filename +-- | ___________|____________ | +-- / \ / \ | +-- urn:example:animal:ferret:nose interpretable as extension + +url = url or { } +local url = url -local colon, qmark, hash, slash, percent, endofstring = lpeg.P(":"), lpeg.P("?"), lpeg.P("#"), lpeg.P("/"), lpeg.P("%"), lpeg.P(-1) +local tochar = function(s) return char(tonumber(s,16)) end -local hexdigit = lpeg.R("09","AF","af") -local plus = lpeg.P("+") -local escaped = (plus / " ") + (percent * lpeg.C(hexdigit * hexdigit) / tochar) +local colon = P(":") +local qmark = P("?") +local hash = P("#") +local slash = P("/") +local percent = P("%") +local endofstring = P(-1) + +local hexdigit = R("09","AF","af") +local plus = P("+") +local nothing = Cc("") +local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar +local escaped = (plus / " ") + escapedchar -- we assume schemes with more than 1 character (in order to avoid problems with windows disks) +-- we also assume that when we have a scheme, we also have an authority +-- +-- maybe we should already split the query (better for unescaping as = & can be part of a value + +local schemestr = Cs((escaped+(1-colon-slash-qmark-hash))^2) +local authoritystr = Cs((escaped+(1- slash-qmark-hash))^0) +local pathstr = Cs((escaped+(1- qmark-hash))^0) +----- querystr = Cs((escaped+(1- hash))^0) +local querystr = Cs(( (1- hash))^0) +local fragmentstr = Cs((escaped+(1- endofstring))^0) + +local scheme = schemestr * colon + nothing +local authority = slash * slash * authoritystr + nothing +local path = slash * pathstr + nothing +local query = qmark * querystr + nothing +local fragment = hash * fragmentstr + nothing + +local validurl = scheme * authority * path * query * fragment +local parser = Ct(validurl) + +lpegpatterns.url = validurl +lpegpatterns.urlsplitter = parser -local scheme = lpeg.Cs((escaped+(1-colon-slash-qmark-hash))^2) * colon + lpeg.Cc("") -local authority = slash * slash * lpeg.Cs((escaped+(1- slash-qmark-hash))^0) + lpeg.Cc("") -local path = slash * lpeg.Cs((escaped+(1- qmark-hash))^0) + lpeg.Cc("") -local query = qmark * lpeg.Cs((escaped+(1- hash))^0) + lpeg.Cc("") -local fragment = hash * lpeg.Cs((escaped+(1- endofstring))^0) + lpeg.Cc("") +local escapes = { } -local parser = lpeg.Ct(scheme * authority * path * query * fragment) +setmetatable(escapes, { __index = function(t,k) + local v = format("%%%02X",byte(k)) + t[k] = v + return v +end }) + +local escaper = Cs((R("09","AZ","az")^1 + P(" ")/"%%20" + S("-./_")^1 + P(1) / escapes)^0) -- space happens most +local unescaper = Cs((escapedchar + 1)^0) + +lpegpatterns.urlunescaped = escapedchar +lpegpatterns.urlescaper = escaper +lpegpatterns.urlunescaper = unescaper -- todo: reconsider Ct as we can as well have five return values (saves a table) -- so we can have two parsers, one with and one without -function url.split(str) +local function split(str) return (type(str) == "string" and lpegmatch(parser,str)) or str end --- todo: cache them +local isscheme = schemestr * colon * slash * slash -- this test also assumes authority -function url.hashed(str) - local s = url.split(str) - local somescheme = s[1] ~= "" - return { - scheme = (somescheme and s[1]) or "file", - authority = s[2], - path = s[3], - query = s[4], - fragment = s[5], - original = str, - noscheme = not somescheme, - } +local function hasscheme(str) + if str then + local scheme = lpegmatch(isscheme,str) -- at least one character + return scheme ~= "" and scheme or false + else + return false + end end -function url.hasscheme(str) - return url.split(str)[1] ~= "" +--~ print(hasscheme("home:")) +--~ print(hasscheme("home://")) + +-- todo: cache them + +local rootletter = R("az","AZ") + + S("_-+") +local separator = P("://") +local qualified = P(".")^0 * P("/") + + rootletter * P(":") + + rootletter^1 * separator + + rootletter^1 * P("/") +local rootbased = P("/") + + rootletter * P(":") + +local barswapper = replacer("|",":") +local backslashswapper = replacer("\\","/") + +-- queries: + +local equal = P("=") +local amp = P("&") +local key = Cs(((escapedchar+1)-equal )^0) +local value = Cs(((escapedchar+1)-amp -endofstring)^0) + +local splitquery = Cf ( Ct("") * P { "sequence", + sequence = V("pair") * (amp * V("pair"))^0, + pair = Cg(key * equal * value), +}, rawset) + +-- hasher + +local function hashed(str) -- not yet ok (/test?test) + if str == "" then + return { + scheme = "invalid", + original = str, + } + end + local s = split(str) + local rawscheme = s[1] + local rawquery = s[4] + local somescheme = rawscheme ~= "" + local somequery = rawquery ~= "" + if not somescheme and not somequery then + s = { + scheme = "file", + authority = "", + path = str, + query = "", + fragment = "", + original = str, + noscheme = true, + filename = str, + } + else -- not always a filename but handy anyway + local authority, path, filename = s[2], s[3] + if authority == "" then + filename = path + elseif path == "" then + filename = "" + else + filename = authority .. "/" .. path + end + s = { + scheme = rawscheme, + authority = authority, + path = path, + query = lpegmatch(unescaper,rawquery), -- unescaped, but possible conflict with & and = + queries = lpegmatch(splitquery,rawquery), -- split first and then unescaped + fragment = s[5], + original = str, + noscheme = false, + filename = filename, + } + end + return s end -function url.addscheme(str,scheme) - return (url.hasscheme(str) and str) or ((scheme or "file:///") .. str) +-- inspect(hashed("template://test")) + +-- Here we assume: +-- +-- files: /// = relative +-- files: //// = absolute (!) + +--~ table.print(hashed("file://c:/opt/tex/texmf-local")) -- c:/opt/tex/texmf-local +--~ table.print(hashed("file://opt/tex/texmf-local" )) -- opt/tex/texmf-local +--~ table.print(hashed("file:///opt/tex/texmf-local" )) -- opt/tex/texmf-local +--~ table.print(hashed("file:////opt/tex/texmf-local" )) -- /opt/tex/texmf-local +--~ table.print(hashed("file:///./opt/tex/texmf-local" )) -- ./opt/tex/texmf-local + +--~ table.print(hashed("c:/opt/tex/texmf-local" )) -- c:/opt/tex/texmf-local +--~ table.print(hashed("opt/tex/texmf-local" )) -- opt/tex/texmf-local +--~ table.print(hashed("/opt/tex/texmf-local" )) -- /opt/tex/texmf-local + +url.split = split +url.hasscheme = hasscheme +url.hashed = hashed + +function url.addscheme(str,scheme) -- no authority + if hasscheme(str) then + return str + elseif not scheme then + return "file:///" .. str + else + return scheme .. ":///" .. str + end end -function url.construct(hash) - local fullurl = hash.sheme .. "://".. hash.authority .. hash.path - if hash.query then - fullurl = fullurl .. "?".. hash.query +function url.construct(hash) -- dodo: we need to escape ! + local fullurl, f = { }, 0 + local scheme, authority, path, query, fragment = hash.scheme, hash.authority, hash.path, hash.query, hash.fragment + if scheme and scheme ~= "" then + f = f + 1 ; fullurl[f] = scheme .. "://" end - if hash.fragment then - fullurl = fullurl .. "?".. hash.fragment + if authority and authority ~= "" then + f = f + 1 ; fullurl[f] = authority end - return fullurl + if path and path ~= "" then + f = f + 1 ; fullurl[f] = "/" .. path + end + if query and query ~= "" then + f = f + 1 ; fullurl[f] = "?".. query + end + if fragment and fragment ~= "" then + f = f + 1 ; fullurl[f] = "#".. fragment + end + return lpegmatch(escaper,concat(fullurl)) end -function url.filename(filename) - local t = url.hashed(filename) +function url.filename(filename) -- why no lpeg here ? + local t = hashed(filename) return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename end +local function escapestring(str) + return lpegmatch(escaper,str) +end + +url.escape = escapestring + +-- function url.query(str) -- separator could be an option +-- if type(str) == "string" then +-- local t = { } +-- for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do +-- t[k] = v +-- end +-- return t +-- else +-- return str +-- end +-- end + function url.query(str) if type(str) == "string" then - local t = { } - for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do - t[k] = v - end - return t + return lpegmatch(splitquery,str) or "" else return str end end +function url.toquery(data) + local td = type(data) + if td == "string" then + return #str and escape(data) or nil -- beware of double escaping + elseif td == "table" then + if next(data) then + local t = { } + for k, v in next, data do + t[#t+1] = format("%s=%s",k,escapestring(v)) + end + return concat(t,"&") + end + else + -- nil is a signal that no query + end +end + +-- /test/ | /test | test/ | test => test + +function url.barepath(path) + if not path or path == "" then + return "" + else + return (gsub(path,"^/?(.-)/?$","%1")) + end +end + --~ print(url.filename("file:///c:/oeps.txt")) --~ print(url.filename("c:/oeps.txt")) --~ print(url.filename("file:///oeps.txt")) @@ -108,12 +298,30 @@ end --~ print(url.filename("/oeps.txt")) --~ from the spec on the web (sort of): ---~ ---~ function test(str) ---~ print(table.serialize(url.hashed(str))) + +--~ local function test(str) +--~ local t = url.hashed(str) +--~ t.constructed = url.construct(t) +--~ print(table.serialize(t)) --~ end ---~ ---~ test("%56pass%20words") + +--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45")) +--~ inspect(url.hashed("http://www.pragma-ade.com/test%20test?test=test%20test&x=123%3d45")) + +--~ test("sys:///./colo-rgb") + +--~ test("/data/site/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733/figuur-cow.jpg") +--~ test("file:///M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("M:/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("file:///q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") +--~ test("/q2p/develop/output/q2p-develop/resources/ecaboperception4_res/topicresources/58313733") + +--~ test("file:///cow%20with%20spaces") +--~ test("file:///cow%20with%20spaces.pdf") +--~ test("cow%20with%20spaces.pdf") +--~ test("some%20file") +--~ test("/etc/passwords") +--~ test("http://www.myself.com/some%20words.html") --~ test("file:///c:/oeps.txt") --~ test("file:///c|/oeps.txt") --~ test("file:///etc/oeps.txt") @@ -127,7 +335,6 @@ end --~ test("tel:+1-816-555-1212") --~ test("telnet://192.0.2.16:80/") --~ test("urn:oasis:names:specification:docbook:dtd:xml:4.1.2") ---~ test("/etc/passwords") --~ test("http://www.pragma-ade.com/spaced%20name") --~ test("zip:///oeps/oeps.zip#bla/bla.tex") -- cgit v1.2.3 From 1456bd5c2c1458bcdee99739ce2255e6b8939768 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 19:03:29 +0200 Subject: l-unicode --- lualibs-unicode.lua | 568 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 481 insertions(+), 87 deletions(-) diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua index 0c5a601..630c349 100644 --- a/lualibs-unicode.lua +++ b/lualibs-unicode.lua @@ -6,33 +6,253 @@ if not modules then modules = { } end modules ['l-unicode'] = { license = "see context related readme files" } +-- this module will be reorganized + +-- todo: utf.sub replacement (used in syst-aux) + +local concat = table.concat +local type = type +local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local utftype = patterns.utftype +local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format +local utfsplitlines = string.utfsplitlines + if not unicode then - unicode = { utf8 = { } } + unicode = { } + +end + +local unicode = unicode + +utf = utf or unicode.utf8 + +if not utf then + + utf8 = { } + unicode.utf8 = utf8 + utf = utf8 + +end + +if not utf.char then local floor, char = math.floor, string.char - function unicode.utf8.utfchar(n) + function utf.char(n) if n < 0x80 then + -- 0aaaaaaa : 0x80 return char(n) elseif n < 0x800 then - return char(0xC0 + floor(n/0x40)) .. char(0x80 + (n % 0x40)) + -- 110bbbaa : 0xC0 : n >> 6 + -- 10aaaaaa : 0x80 : n & 0x3F + return char( + 0xC0 + floor(n/0x40), + 0x80 + (n % 0x40) + ) elseif n < 0x10000 then - return char(0xE0 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) - elseif n < 0x40000 then - return char(0xF0 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) - else -- wrong: - -- return char(0xF1 + floor(n/0x1000000)) .. char(0x80 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) - return "?" + -- 1110bbbb : 0xE0 : n >> 12 + -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F + -- 10aaaaaa : 0x80 : n & 0x3F + return char( + 0xE0 + floor(n/0x1000), + 0x80 + (floor(n/0x40) % 0x40), + 0x80 + (n % 0x40) + ) + elseif n < 0x200000 then + -- 11110ccc : 0xF0 : n >> 18 + -- 10ccbbbb : 0x80 : (n >> 12) & 0x3F + -- 10bbbbaa : 0x80 : (n >> 6) & 0x3F + -- 10aaaaaa : 0x80 : n & 0x3F + -- dddd : ccccc - 1 + return char( + 0xF0 + floor(n/0x40000), + 0x80 + (floor(n/0x1000) % 0x40), + 0x80 + (floor(n/0x40) % 0x40), + 0x80 + (n % 0x40) + ) + else + return "" end end end -utf = utf or unicode.utf8 +if not utf.byte then + + local utf8byte = patterns.utf8byte + + function utf.byte(c) + return lpegmatch(utf8byte,c) + end + +end + +local utfchar, utfbyte = utf.char, utf.byte + +-- As we want to get rid of the (unmaintained) utf library we implement our own +-- variants (in due time an independent module): + +function unicode.filetype(data) + return data and lpegmatch(utftype,data) or "unknown" +end + +local toentities = Cs ( + ( + patterns.utf8one + + ( + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end + )^0 +) -local concat, utfchar, utfgsub = table.concat, utf.char, utf.gsub -local char, byte, find, bytepairs = string.char, string.byte, string.find, string.bytepairs +patterns.toentities = toentities + +function utf.toentities(str) + return lpegmatch(toentities,str) +end + +--~ local utfchr = { } -- 60K -> 2.638 M extra mem but currently not called that often (on latin) +--~ +--~ setmetatable(utfchr, { __index = function(t,k) local v = utfchar(k) t[k] = v return v end } ) +--~ +--~ collectgarbage("collect") +--~ local u = collectgarbage("count")*1024 +--~ local t = os.clock() +--~ for i=1,1000 do +--~ for i=1,600 do +--~ local a = utfchr[i] +--~ end +--~ end +--~ print(os.clock()-t,collectgarbage("count")*1024-u) + +--~ collectgarbage("collect") +--~ local t = os.clock() +--~ for i=1,1000 do +--~ for i=1,600 do +--~ local a = utfchar(i) +--~ end +--~ end +--~ print(os.clock()-t,collectgarbage("count")*1024-u) + +--~ local byte = string.byte +--~ local utfchar = utf.char +--~ local lpegmatch = lpeg.match, lpeg.P, lpeg.C, lpeg.R, lpeg.Cs + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +-- actually one of them is already utf ... sort of useless this one + +-- function utf.char(n) +-- if n < 0x80 then +-- return char(n) +-- elseif n < 0x800 then +-- return char( +-- 0xC0 + floor(n/0x40), +-- 0x80 + (n % 0x40) +-- ) +-- elseif n < 0x10000 then +-- return char( +-- 0xE0 + floor(n/0x1000), +-- 0x80 + (floor(n/0x40) % 0x40), +-- 0x80 + (n % 0x40) +-- ) +-- elseif n < 0x40000 then +-- return char( +-- 0xF0 + floor(n/0x40000), +-- 0x80 + floor(n/0x1000), +-- 0x80 + (floor(n/0x40) % 0x40), +-- 0x80 + (n % 0x40) +-- ) +-- else +-- -- return char( +-- -- 0xF1 + floor(n/0x1000000), +-- -- 0x80 + floor(n/0x40000), +-- -- 0x80 + floor(n/0x1000), +-- -- 0x80 + (floor(n/0x40) % 0x40), +-- -- 0x80 + (n % 0x40) +-- -- ) +-- return "?" +-- end +-- end +-- +-- merge into: + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end + +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + + +utf.length = string.utflength +utf.split = string.utfsplit +utf.splitines = string.utfsplitlines +utf.valid = string.validutf + +if not utf.len then + utf.len = utf.length +end + +-- a replacement for simple gsubs: + +local utf8char = patterns.utf8char + +function utf.remapper(mapping) + local pattern = Cs((utf8char/mapping)^0) + return function(str) + if not str or str == "" then + return "" + else + return lpegmatch(pattern,str) + end + end, pattern +end + +-- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } +-- print(remap("abcd 1234 abcd")) -- 0 EF BB BF UTF-8 -- 1 FF FE UTF-16-little-endian @@ -78,98 +298,236 @@ function unicode.utftype(f) end end -function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg - local result, tmp, n, m, p = { }, { }, 0, 0, 0 - -- lf | cr | crlf / (cr:13, lf:10) - local function doit() - if n == 10 then - if p ~= 13 then - result[#result+1] = concat(tmp) - tmp = { } - p = 0 - end - elseif n == 13 then - result[#result+1] = concat(tmp) - tmp = { } - p = n - else - tmp[#tmp+1] = utfchar(n) - p = 0 - end +--~ function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg +--~ local result, tmp, n, m, p, r, t = { }, { }, 0, 0, 0, 0, 0 -- we reuse tmp +--~ -- lf | cr | crlf / (cr:13, lf:10) +--~ local function doit() -- inline this +--~ if n == 10 then +--~ if p ~= 13 then +--~ if t > 0 then +--~ r = r + 1 +--~ result[r] = concat(tmp,"",1,t) +--~ t = 0 +--~ end +--~ p = 0 +--~ end +--~ elseif n == 13 then +--~ if t > 0 then +--~ r = r + 1 +--~ result[r] = concat(tmp,"",1,t) +--~ t = 0 +--~ end +--~ p = n +--~ else +--~ t = t + 1 +--~ tmp[t] = utfchar(n) +--~ p = 0 +--~ end +--~ end +--~ for l,r in bytepairs(str) do +--~ if r then +--~ if endian then -- maybe make two loops +--~ n = 256*l + r +--~ else +--~ n = 256*r + l +--~ end +--~ if m > 0 then +--~ n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 +--~ m = 0 +--~ doit() +--~ elseif n >= 0xD800 and n <= 0xDBFF then +--~ m = n +--~ else +--~ doit() +--~ end +--~ end +--~ end +--~ if t > 0 then +--~ r = r + 1 +--~ result[r] = concat(tmp,"",1,t) -- we reused tmp, hence t +--~ end +--~ return result +--~ end + +--~ function unicode.utf32_to_utf8(str, endian) +--~ local result, tmp, n, m, p, r, t = { }, { }, 0, -1, 0, 0, 0 +--~ -- lf | cr | crlf / (cr:13, lf:10) +--~ local function doit() -- inline this +--~ if n == 10 then +--~ if p ~= 13 then +--~ if t > 0 then +--~ r = r + 1 +--~ result[r] = concat(tmp,"",1,t) +--~ t = 0 +--~ end +--~ p = 0 +--~ end +--~ elseif n == 13 then +--~ if t > 0 then +--~ r = r + 1 +--~ result[r] = concat(tmp,"",1,t) +--~ t = 0 +--~ end +--~ p = n +--~ else +--~ t = t + 1 +--~ tmp[t] = utfchar(n) +--~ p = 0 +--~ end +--~ end +--~ for a,b in bytepairs(str) do +--~ if a and b then +--~ if m < 0 then +--~ if endian then -- maybe make two loops +--~ m = 256*256*256*a + 256*256*b +--~ else +--~ m = 256*b + a +--~ end +--~ else +--~ if endian then -- maybe make two loops +--~ n = m + 256*a + b +--~ else +--~ n = m + 256*256*256*b + 256*256*a +--~ end +--~ m = -1 +--~ doit() +--~ end +--~ else +--~ break +--~ end +--~ end +--~ if #tmp > 0 then +--~ r = r + 1 +--~ result[r] = concat(tmp,"",1,t) -- we reused tmp, hence t +--~ end +--~ return result +--~ end + +local function utf16_to_utf8_be(t) + if type(t) == "string" then + t = utfsplitlines(str) end - for l,r in bytepairs(str) do - if r then - if endian then - n = l*256 + r - else - n = r*256 + l - end - if m > 0 then - n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 - m = 0 - doit() - elseif n >= 0xD800 and n <= 0xDBFF then - m = n - else - doit() + local result = { } -- we reuse result + for i=1,#t do + local r, more = 0, 0 + for left, right in bytepairs(t[i]) do + if right then + local now = 256*left + right + if more > 0 then + now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong + more = 0 + r = r + 1 + result[r] = utfchar(now) + elseif now >= 0xD800 and now <= 0xDBFF then + more = now + else + r = r + 1 + result[r] = utfchar(now) + end end end + t[i] = concat(result,"",1,r) -- we reused tmp, hence t end - if #tmp > 0 then - result[#result+1] = concat(tmp) + return t +end + +local function utf16_to_utf8_le(t) + if type(t) == "string" then + t = utfsplitlines(str) end - return result -end - -function unicode.utf32_to_utf8(str, endian) - local result = { } - local tmp, n, m, p = { }, 0, -1, 0 - -- lf | cr | crlf / (cr:13, lf:10) - local function doit() - if n == 10 then - if p ~= 13 then - result[#result+1] = concat(tmp) - tmp = { } - p = 0 + local result = { } -- we reuse result + for i=1,#t do + local r, more = 0, 0 + for left, right in bytepairs(t[i]) do + if right then + local now = 256*right + left + if more > 0 then + now = (more-0xD800)*0x400 + (now-0xDC00) + 0x10000 -- the 0x10000 smells wrong + more = 0 + r = r + 1 + result[r] = utfchar(now) + elseif now >= 0xD800 and now <= 0xDBFF then + more = now + else + r = r + 1 + result[r] = utfchar(now) + end end - elseif n == 13 then - result[#result+1] = concat(tmp) - tmp = { } - p = n - else - tmp[#tmp+1] = utfchar(n) - p = 0 end + t[i] = concat(result,"",1,r) -- we reused tmp, hence t + end + return t +end + +local function utf32_to_utf8_be(t) + if type(t) == "string" then + t = utfsplitlines(t) end - for a,b in bytepairs(str) do - if a and b then - if m < 0 then - if endian then - m = a*256*256*256 + b*256*256 + local result = { } -- we reuse result + for i=1,#t do + local r, more = 0, -1 + for a,b in bytepairs(t[i]) do + if a and b then + if more < 0 then + more = 256*256*256*a + 256*256*b else - m = b*256 + a + r = r + 1 + result[t] = utfchar(more + 256*a + b) + more = -1 end else - if endian then - n = m + a*256 + b + break + end + end + t[i] = concat(result,"",1,r) + end + return t +end + +local function utf32_to_utf8_le(t) + if type(t) == "string" then + t = utfsplitlines(t) + end + local result = { } -- we reuse result + for i=1,#t do + local r, more = 0, -1 + for a,b in bytepairs(t[i]) do + if a and b then + if more < 0 then + more = 256*b + a else - n = m + b*256*256*256 + a*256*256 + r = r + 1 + result[t] = utfchar(more + 256*256*256*b + 256*256*a) + more = -1 end - m = -1 - doit() + else + break end - else - break end + t[i] = concat(result,"",1,r) end - if #tmp > 0 then - result[#result+1] = concat(tmp) - end - return result + return t +end + +unicode.utf32_to_utf8_be = utf32_to_utf8_be +unicode.utf32_to_utf8_le = utf32_to_utf8_le +unicode.utf16_to_utf8_be = utf16_to_utf8_be +unicode.utf16_to_utf8_le = utf16_to_utf8_le + +function unicode.utf8_to_utf8(t) + return type(t) == "string" and utfsplitlines(t) or t +end + +function unicode.utf16_to_utf8(t,endian) + return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t +end + +function unicode.utf32_to_utf8(t,endian) + return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t end local function little(c) - local b = byte(c) -- b = c:byte() + local b = byte(c) if b < 0x10000 then return char(b%256,b/256) else @@ -190,10 +548,46 @@ local function big(c) end end +-- function unicode.utf8_to_utf16(str,littleendian) +-- if littleendian then +-- return char(255,254) .. utfgsub(str,".",little) +-- else +-- return char(254,255) .. utfgsub(str,".",big) +-- end +-- end + +local _, l_remap = utf.remapper(little) +local _, b_remap = utf.remapper(big) + function unicode.utf8_to_utf16(str,littleendian) if littleendian then - return char(255,254) .. utfgsub(str,".",little) + return char(255,254) .. lpegmatch(l_remap,str) else - return char(254,255) .. utfgsub(str,".",big) + return char(254,255) .. lpegmatch(b_remap,str) + end +end + +function unicode.utfcodes(str) + local t, n = { }, 0 + for u in utfvalues(str) do + n = n + 1 + t[n] = format("0x%04X",u) end + return concat(t,separator or " ") +end + +function unicode.ustring(s) + return format("U+%05X",type(s) == "number" and s or utfbyte(s)) +end + +function unicode.xstring(s) + return format("0x%05X",type(s) == "number" and s or utfbyte(s)) +end + +-- + +local pattern = Ct(C(patterns.utf8char)^0) + +function utf.totable(str) + return lpegmatch(pattern,str) end -- cgit v1.2.3 From d294bdc989812cc84c07c73d2c63dfae00ef58af Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 19:08:51 +0200 Subject: update l-string --- lualibs-string.lua | 285 +++++++++++++---------------------------------------- 1 file changed, 66 insertions(+), 219 deletions(-) diff --git a/lualibs-string.lua b/lualibs-string.lua index 9856d52..03616aa 100644 --- a/lualibs-string.lua +++ b/lualibs-string.lua @@ -6,8 +6,9 @@ if not modules then modules = { } end modules ['l-string'] = { license = "see context related readme files" } +local string = string local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch = lpeg.match +local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct -- some functions may disappear as they are not used anywhere @@ -15,196 +16,75 @@ if not string.split then -- this will be overloaded by a faster lpeg variant - function string:split(pattern) - if #self > 0 then - local t = { } - for s in gmatch(self..pattern,"(.-)"..pattern) do - t[#t+1] = s + function string.split(str,pattern) + local t = { } + if #str > 0 then + local n = 1 + for s in gmatch(str..pattern,"(.-)"..pattern) do + t[n] = s + n = n + 1 end - return t - else - return { } end + return t end end -local chr_to_esc = { - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["^"] = "%^", ["$"] = "%$", - ["["] = "%[", ["]"] = "%]", - ["("] = "%(", [")"] = "%)", - ["{"] = "%{", ["}"] = "%}" -} - -string.chr_to_esc = chr_to_esc - -function string:esc() -- variant 2 - return (gsub(self,"(.)",chr_to_esc)) -end - -function string:unquote() - return (gsub(self,"^([\"\'])(.*)%1$","%2")) +function string.unquoted(str) + return (gsub(str,"^([\"\'])(.*)%1$","%2")) end ---~ function string:unquote() ---~ if find(self,"^[\'\"]") then ---~ return sub(self,2,-2) +--~ function stringunquoted(str) +--~ if find(str,"^[\'\"]") then +--~ return sub(str,2,-2) --~ else ---~ return self +--~ return str --~ end --~ end -function string:quote() -- we could use format("%q") - return format("%q",self) +function string.quoted(str) + return format("%q",str) -- always " end -function string:count(pattern) -- variant 3 +function string.count(str,pattern) -- variant 3 local n = 0 - for _ in gmatch(self,pattern) do + for _ in gmatch(str,pattern) do -- not for utf n = n + 1 end return n end -function string:limit(n,sentinel) - if #self > n then - sentinel = sentinel or " ..." - return sub(self,1,(n-#sentinel)) .. sentinel +function string.limit(str,n,sentinel) -- not utf proof + if #str > n then + sentinel = sentinel or "..." + return sub(str,1,(n-#sentinel)) .. sentinel else - return self + return str end end ---~ function string:strip() -- the .- is quite efficient ---~ -- return match(self,"^%s*(.-)%s*$") or "" ---~ -- return match(self,'^%s*(.*%S)') or '' -- posted on lua list ---~ return find(s,'^%s*$') and '' or match(s,'^%s*(.*%S)') ---~ end - -do -- roberto's variant: - local space = lpeg.S(" \t\v\n") - local nospace = 1 - space - local stripper = space^0 * lpeg.C((space^0 * nospace^1)^0) - function string.strip(str) - return lpegmatch(stripper,str) or "" - end -end +local space = S(" \t\v\n") +local nospace = 1 - space +local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code -function string:is_empty() - return not find(self,"%S") +function string.strip(str) + return lpegmatch(stripper,str) or "" end -function string:enhance(pattern,action) - local ok, n = true, 0 - while ok do - ok = false - self = gsub(self,pattern, function(...) - ok, n = true, n + 1 - return action(...) - end) - end - return self, n -end - -local chr_to_hex, hex_to_chr = { }, { } - -for i=0,255 do - local c, h = char(i), format("%02X",i) - chr_to_hex[c], hex_to_chr[h] = h, c -end - -function string:to_hex() - return (gsub(self or "","(.)",chr_to_hex)) -end - -function string:from_hex() - return (gsub(self or "","(..)",hex_to_chr)) -end - -if not string.characters then - - local function nextchar(str, index) - index = index + 1 - return (index <= #str) and index or nil, sub(str,index,index) - end - function string:characters() - return nextchar, self, 0 - end - local function nextbyte(str, index) - index = index + 1 - return (index <= #str) and index or nil, byte(sub(str,index,index)) - end - function string:bytes() - return nextbyte, self, 0 - end - -end - --- we can use format for this (neg n) - -function string:rpadd(n,chr) - local m = n-#self - if m > 0 then - return self .. rep(chr or " ",m) - else - return self - end -end - -function string:lpadd(n,chr) - local m = n-#self - if m > 0 then - return rep(chr or " ",m) .. self - else - return self - end -end - -string.padd = string.rpadd - -function is_number(str) -- tonumber - return find(str,"^[%-%+]?[%d]-%.?[%d+]$") == 1 -end - ---~ print(is_number("1")) ---~ print(is_number("1.1")) ---~ print(is_number(".1")) ---~ print(is_number("-0.1")) ---~ print(is_number("+0.1")) ---~ print(is_number("-.1")) ---~ print(is_number("+.1")) - -function string:split_settings() -- no {} handling, see l-aux for lpeg variant - if find(self,"=") then - local t = { } - for k,v in gmatch(self,"(%a+)=([^%,]*)") do - t[k] = v - end - return t - else - return nil - end +function string.is_empty(str) + return not find(str,"%S") end local patterns_escapes = { - ["-"] = "%-", - ["."] = "%.", - ["+"] = "%+", - ["*"] = "%*", ["%"] = "%%", - ["("] = "%)", - [")"] = "%)", - ["["] = "%[", - ["]"] = "%]", + ["."] = "%.", + ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", + ["["] = "%[", ["]"] = "%]", + ["("] = "%(", [")"] = "%)", + -- ["{"] = "%{", ["}"] = "%}" + -- ["^"] = "%^", ["$"] = "%$", } -function string:pattesc() - return (gsub(self,".",patterns_escapes)) -end - local simple_escapes = { ["-"] = "%-", ["."] = "%.", @@ -212,77 +92,44 @@ local simple_escapes = { ["*"] = ".*", } -function string:simpleesc() - return (gsub(self,".",simple_escapes)) +function string.escapedpattern(str,simple) + return (gsub(str,".",simple and simple_escapes or patterns_escapes)) end -function string:tohash() - local t = { } - for s in gmatch(self,"([^, ]+)") do -- lpeg - t[s] = true +function string.topattern(str,lowercase,strict) + if str == "" then + return ".*" + else + str = gsub(str,".",simple_escapes) + if lowercase then + str = lower(str) + end + if strict then + return "^" .. str .. "$" + else + return str + end end - return t end -local pattern = lpeg.Ct(lpeg.C(1)^0) -function string:totable() - return lpegmatch(pattern,self) +function string.valid(str,default) + return (type(str) == "string" and str ~= "" and str) or default or nil end ---~ local t = { ---~ "1234567123456712345671234567", ---~ "a\tb\tc", ---~ "aa\tbb\tcc", ---~ "aaa\tbbb\tccc", ---~ "aaaa\tbbbb\tcccc", ---~ "aaaaa\tbbbbb\tccccc", ---~ "aaaaaa\tbbbbbb\tcccccc", ---~ } ---~ for k,v do ---~ print(string.tabtospace(t[k])) ---~ end +-- obsolete names: -function string.tabtospace(str,tab) - -- we don't handle embedded newlines - while true do - local s = find(str,"\t") - if s then - if not tab then tab = 7 end -- only when found - local d = tab-(s-1) % tab - if d > 0 then - str = gsub(str,"\t",rep(" ",d),1) - else - str = gsub(str,"\t","",1) - end - else - break - end - end - return str -end +string.quote = string.quoted +string.unquote = string.unquoted -function string:compactlong() -- strips newlines and leading spaces - self = gsub(self,"[\n\r]+ *","") - self = gsub(self,"^ *","") - return self -end +-- handy fallback -function string:striplong() -- strips newlines and leading spaces - self = gsub(self,"^%s*","") - self = gsub(self,"[\n\r]+ *","\n") - return self -end +string.itself = function(s) return s end -function string:topattern(lowercase,strict) - if lowercase then - self = lower(self) - end - self = gsub(self,".",simple_escapes) - if self == "" then - self = ".*" - elseif strict then - self = "^" .. self .. "$" - end - return self +-- also handy (see utf variant) + +local pattern = Ct(C(1)^0) + +function string.totable(str) + return lpegmatch(pattern,str) end -- cgit v1.2.3 From eddad3cd039618fcb6a147686bb69906ecd5662c Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 19:33:49 +0200 Subject: update l-os --- lualibs-os.lua | 166 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 124 insertions(+), 42 deletions(-) diff --git a/lualibs-os.lua b/lualibs-os.lua index fba2cd3..95d007d 100644 --- a/lualibs-os.lua +++ b/lualibs-os.lua @@ -6,32 +6,130 @@ if not modules then modules = { } end modules ['l-os'] = { license = "see context related readme files" } +-- This file deals with some operating system issues. Please don't bother me +-- with the pros and cons of operating systems as they all have their flaws +-- and benefits. Bashing one of them won't help solving problems and fixing +-- bugs faster and is a waste of time and energy. +-- +-- path separators: / or \ ... we can use / everywhere +-- suffixes : dll so exe ... no big deal +-- quotes : we can use "" in most cases +-- expansion : unless "" are used * might give side effects +-- piping/threads : somewhat different for each os +-- locations : specific user file locations and settings can change over time +-- +-- os.type : windows | unix (new, we already guessed os.platform) +-- os.name : windows | msdos | linux | macosx | solaris | .. | generic (new) +-- os.platform : extended os.name with architecture + +-- os.sleep() => socket.sleep() +-- math.randomseed(tonumber(string.sub(string.reverse(tostring(math.floor(socket.gettime()*10000))),1,6))) + -- maybe build io.flush in os.execute +local os = os +local date, time = os.date, os.time +local find, format, gsub, upper, gmatch = string.find, string.format, string.gsub, string.upper, string.gmatch +local concat = table.concat +local random, ceil, randomseed = math.random, math.ceil, math.randomseed +local rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring = rawget, rawset, type, getmetatable, setmetatable, tonumber, tostring + +-- The following code permits traversing the environment table, at least +-- in luatex. Internally all environment names are uppercase. + +-- The randomseed in Lua is not that random, although this depends on the operating system as well +-- as the binary (Luatex is normally okay). But to be sure we set the seed anyway. + +math.initialseed = tonumber(string.sub(string.reverse(tostring(ceil(socket and socket.gettime()*10000 or time()))),1,6)) + +randomseed(math.initialseed) + +if not os.__getenv__ then + + os.__getenv__ = os.getenv + os.__setenv__ = os.setenv + + if os.env then + + local osgetenv = os.getenv + local ossetenv = os.setenv + local osenv = os.env local _ = osenv.PATH -- initialize the table + + function os.setenv(k,v) + if v == nil then + v = "" + end + local K = upper(k) + osenv[K] = v + if type(v) == "table" then + v = concat(v,";") -- path + end + ossetenv(K,v) + end + + function os.getenv(k) + local K = upper(k) + local v = osenv[K] or osenv[k] or osgetenv(K) or osgetenv(k) + if v == "" then + return nil + else + return v + end + end + + else + + local ossetenv = os.setenv + local osgetenv = os.getenv + local osenv = { } + + function os.setenv(k,v) + if v == nil then + v = "" + end + local K = upper(k) + osenv[K] = v + end + + function os.getenv(k) + local K = upper(k) + local v = osenv[K] or osgetenv(K) or osgetenv(k) + if v == "" then + return nil + else + return v + end + end + + local function __index(t,k) + return os.getenv(k) + end + local function __newindex(t,k,v) + os.setenv(k,v) + end + + os.env = { } + + setmetatable(os.env, { __index = __index, __newindex = __newindex } ) + + end + +end local find, format, gsub = string.find, string.format, string.gsub local random, ceil = math.random, math.ceil -local execute, spawn, exec, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.flush +local execute, spawn, exec, iopopen, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.popen, io.flush function os.execute(...) ioflush() return execute(...) end function os.spawn (...) ioflush() return spawn (...) end function os.exec (...) ioflush() return exec (...) end +function io.popen (...) ioflush() return iopopen(...) end function os.resultof(command) - ioflush() -- else messed up logging local handle = io.popen(command,"r") - if not handle then - -- print("unknown command '".. command .. "' in os.resultof") - return "" - else - return handle:read("*all") or "" - end + return handle and handle:read("*all") or "" end ---~ os.type : windows | unix (new, we already guessed os.platform) ---~ os.name : windows | msdos | linux | macosx | solaris | .. | generic (new) ---~ os.platform : extended os.name with architecture - if not io.fileseparator then if find(os.getenv("PATH"),";") then io.fileseparator, io.pathseparator, os.type = "\\", ";", os.type or "mswin" @@ -44,17 +142,19 @@ os.type = os.type or (io.pathseparator == ";" and "windows") or "unix" os.name = os.name or (os.type == "windows" and "mswin" ) or "linux" if os.type == "windows" then - os.libsuffix, os.binsuffix = 'dll', 'exe' + os.libsuffix, os.binsuffix, os.binsuffixes = 'dll', 'exe', { 'exe', 'cmd', 'bat' } else - os.libsuffix, os.binsuffix = 'so', '' + os.libsuffix, os.binsuffix, os.binsuffixes = 'so', '', { '' } end +local launchers = { + windows = "start %s", + macosx = "open %s", + unix = "$BROWSER %s &> /dev/null &", +} + function os.launch(str) - if os.type == "windows" then - os.execute("start " .. str) -- os.spawn ? - else - os.execute(str .. " &") -- os.spawn ? - end + os.execute(format(launchers[os.name] or launchers.unix,str)) end if not os.times then @@ -89,7 +189,7 @@ end -- no need for function anymore as we have more clever code and helpers now -- this metatable trickery might as well disappear -os.resolvers = os.resolvers or { } +os.resolvers = os.resolvers or { } -- will become private local resolvers = os.resolvers @@ -102,24 +202,6 @@ end setmetatable(os,osmt) -if not os.setenv then - - -- we still store them but they won't be seen in - -- child processes although we might pass them some day - -- using command concatination - - local env, getenv = { }, os.getenv - - function os.setenv(k,v) - env[k] = v - end - - function os.getenv(k) - return env[k] or getenv(k) - end - -end - -- we can use HOSTTYPE on some platforms local name, platform = os.name or "linux", os.getenv("MTX_PLATFORM") or "" @@ -159,7 +241,7 @@ elseif os.type == "windows" then elseif name == "linux" then function os.resolvers.platform(t,k) - -- we sometims have HOSTTYPE set so let's check that first + -- we sometimes have HOSTTYPE set so let's check that first local platform, architecture = "", os.getenv("HOSTTYPE") or os.resultof("uname -m") or "" if find(architecture,"x86_64") then platform = "linux-64" @@ -237,10 +319,10 @@ elseif name == "freebsd" then elseif name == "kfreebsd" then function os.resolvers.platform(t,k) - -- we sometims have HOSTTYPE set so let's check that first + -- we sometimes have HOSTTYPE set so let's check that first local platform, architecture = "", os.getenv("HOSTTYPE") or os.resultof("uname -m") or "" if find(architecture,"x86_64") then - platform = "kfreebsd-64" + platform = "kfreebsd-amd64" else platform = "kfreebsd-i386" end @@ -288,7 +370,7 @@ end local d function os.timezone(delta) - d = d or tonumber(tonumber(os.date("%H")-os.date("!%H"))) + d = d or tonumber(tonumber(date("%H")-date("!%H"))) if delta then if d > 0 then return format("+%02i:00",d) -- cgit v1.2.3 From 7932ae95cb4506822cb76ce7cc4f5491652db60d Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 19:38:46 +0200 Subject: update l-dir.lua --- lualibs-dir.lua | 284 ++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 183 insertions(+), 101 deletions(-) diff --git a/lualibs-dir.lua b/lualibs-dir.lua index 1b9bcbc..3deb660 100644 --- a/lualibs-dir.lua +++ b/lualibs-dir.lua @@ -6,35 +6,91 @@ if not modules then modules = { } end modules ['l-dir'] = { license = "see context related readme files" } --- dir.expand_name will be merged with cleanpath and collapsepath +-- dir.expandname will be merged with cleanpath and collapsepath local type = type local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub +local concat, insert, remove = table.concat, table.insert, table.remove local lpegmatch = lpeg.match +local P, S, R, C, Cc, Cs, Ct, Cv, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cv, lpeg.V + dir = dir or { } +local dir = dir +local lfs = lfs + +local attributes = lfs.attributes +local walkdir = lfs.dir +local isdir = lfs.isdir +local isfile = lfs.isfile +local currentdir = lfs.currentdir + +-- in case we load outside luatex + +if not isdir then + function isdir(name) + local a = attributes(name) + return a and a.mode == "directory" + end + lfs.isdir = isdir +end + +if not isfile then + function isfile(name) + local a = attributes(name) + return a and a.mode == "file" + end + lfs.isfile = isfile +end -- handy function dir.current() - return (gsub(lfs.currentdir(),"\\","/")) + return (gsub(currentdir(),"\\","/")) end --- optimizing for no string.find (*) does not save time +-- optimizing for no find (*) does not save time + +--~ local function globpattern(path,patt,recurse,action) -- fails in recent luatex due to some change in lfs +--~ local ok, scanner +--~ if path == "/" then +--~ ok, scanner = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe +--~ else +--~ ok, scanner = xpcall(function() return walkdir(path) end, function() end) -- kepler safe +--~ end +--~ if ok and type(scanner) == "function" then +--~ if not find(path,"/$") then path = path .. '/' end +--~ for name in scanner do +--~ local full = path .. name +--~ local mode = attributes(full,'mode') +--~ if mode == 'file' then +--~ if find(full,patt) then +--~ action(full) +--~ end +--~ elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then +--~ globpattern(full,patt,recurse,action) +--~ end +--~ end +--~ end +--~ end + +local lfsisdir = isdir + +local function isdir(path) + path = gsub(path,"[/\\]+$","") + return lfsisdir(path) +end -local attributes = lfs.attributes -local walkdir = lfs.dir +lfs.isdir = isdir -local function glob_pattern(path,patt,recurse,action) - local ok, scanner, dirobj +local function globpattern(path,patt,recurse,action) if path == "/" then - ok, scanner, dirobj = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe - else - ok, scanner, dirobj = xpcall(function() return walkdir(path) end, function() end) -- kepler safe + path = path .. "." + elseif not find(path,"/$") then + path = path .. '/' end - if ok and type(scanner) == "function" then - if not find(path,"/$") then path = path .. '/' end - for name in scanner, dirobj do + if isdir(path) then -- lfs.isdir does not like trailing / + for name in walkdir(path) do -- lfs.dir accepts trailing / local full = path .. name local mode = attributes(full,'mode') if mode == 'file' then @@ -42,25 +98,25 @@ local function glob_pattern(path,patt,recurse,action) action(full) end elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then - glob_pattern(full,patt,recurse,action) + globpattern(full,patt,recurse,action) end end end end -dir.glob_pattern = glob_pattern +dir.globpattern = globpattern -local function collect_pattern(path,patt,recurse,result) - local ok, scanner, dirobj +local function collectpattern(path,patt,recurse,result) + local ok, scanner result = result or { } if path == "/" then - ok, scanner, dirobj = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe + ok, scanner, first = xpcall(function() return walkdir(path..".") end, function() end) -- kepler safe else - ok, scanner, dirobj = xpcall(function() return walkdir(path) end, function() end) -- kepler safe + ok, scanner, first = xpcall(function() return walkdir(path) end, function() end) -- kepler safe end if ok and type(scanner) == "function" then if not find(path,"/$") then path = path .. '/' end - for name in scanner, dirobj do + for name in scanner, first do local full = path .. name local attr = attributes(full) local mode = attr.mode @@ -69,7 +125,7 @@ local function collect_pattern(path,patt,recurse,result) result[name] = attr end elseif recurse and (mode == "directory") and (name ~= '.') and (name ~= "..") then - attr.list = collect_pattern(full,patt,recurse) + attr.list = collectpattern(full,patt,recurse) result[name] = attr end end @@ -77,9 +133,7 @@ local function collect_pattern(path,patt,recurse,result) return result end -dir.collect_pattern = collect_pattern - -local P, S, R, C, Cc, Cs, Ct, Cv, V = lpeg.P, lpeg.S, lpeg.R, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cv, lpeg.V +dir.collectpattern = collectpattern local pattern = Ct { [1] = (C(P(".") + P("/")^1) + C(R("az","AZ") * P(":") * P("/")^0) + Cc("./")) * V(2) * V(3), @@ -103,16 +157,16 @@ local function glob(str,t) for s=1,#str do glob(str[s],t) end - elseif lfs.isfile(str) then + elseif isfile(str) then t(str) else - local split = lpegmatch(pattern,str) + local split = lpegmatch(pattern,str) -- we could use the file splitter if split then local root, path, base = split[1], split[2], split[3] local recurse = find(base,"%*%*") local start = root .. path local result = lpegmatch(filter,start .. base) - glob_pattern(start,result,recurse,t) + globpattern(start,result,recurse,t) end end else @@ -122,12 +176,15 @@ local function glob(str,t) glob(str[s],t) end return t - elseif lfs.isfile(str) then - local t = t or { } - t[#t+1] = str - return t + elseif isfile(str) then + if t then + t[#t+1] = str + return t + else + return { str } + end else - local split = lpegmatch(pattern,str) + local split = lpegmatch(pattern,str) -- we could use the file splitter if split then local t = t or { } local action = action or function(name) t[#t+1] = name end @@ -135,7 +192,7 @@ local function glob(str,t) local recurse = find(base,"%*%*") local start = root .. path local result = lpegmatch(filter,start .. base) - glob_pattern(start,result,recurse,action) + globpattern(start,result,recurse,action) return t else return { } @@ -154,10 +211,11 @@ dir.glob = glob local function globfiles(path,recurse,func,files) -- func == pattern or function if type(func) == "string" then - local s = func -- alas, we need this indirect way + local s = func func = function(name) return find(name,s) end end files = files or { } + local noffiles = #files for name in walkdir(path) do if find(name,"^%.") then --- skip @@ -168,12 +226,9 @@ local function globfiles(path,recurse,func,files) -- func == pattern or function globfiles(path .. "/" .. name,recurse,func,files) end elseif mode == "file" then - if func then - if func(name) then - files[#files+1] = path .. "/" .. name - end - else - files[#files+1] = path .. "/" .. name + if not func or func(name) then + noffiles = noffiles + 1 + files[noffiles] = path .. "/" .. name end end end @@ -191,7 +246,7 @@ dir.globfiles = globfiles -- print(dir.ls("*.tex")) function dir.ls(pattern) - return table.concat(glob(pattern),"\n") + return concat(glob(pattern),"\n") end --~ mkdirs("temp") @@ -201,7 +256,9 @@ end local make_indeed = true -- false -if string.find(os.getenv("PATH"),";") then -- os.type == "windows" +local onwindows = os.type == "windows" or find(os.getenv("PATH"),";") + +if onwindows then function dir.mkdirs(...) local str, pth, t = "", "", { ... } @@ -250,56 +307,24 @@ if string.find(os.getenv("PATH"),";") then -- os.type == "windows" else pth = pth .. "/" .. s end - if make_indeed and not lfs.isdir(pth) then + if make_indeed and not isdir(pth) then lfs.mkdir(pth) end end - return pth, (lfs.isdir(pth) == true) + return pth, (isdir(pth) == true) end ---~ print(dir.mkdirs("","","a","c")) ---~ print(dir.mkdirs("a")) ---~ print(dir.mkdirs("a:")) ---~ print(dir.mkdirs("a:/b/c")) ---~ print(dir.mkdirs("a:b/c")) ---~ print(dir.mkdirs("a:/bbb/c")) ---~ print(dir.mkdirs("/a/b/c")) ---~ print(dir.mkdirs("/aaa/b/c")) ---~ print(dir.mkdirs("//a/b/c")) ---~ print(dir.mkdirs("///a/b/c")) ---~ print(dir.mkdirs("a/bbb//ccc/")) - - function dir.expand_name(str) -- will be merged with cleanpath and collapsepath - local first, nothing, last = match(str,"^(//)(//*)(.*)$") - if first then - first = dir.current() .. "/" - end - if not first then - first, last = match(str,"^(//)/*(.*)$") - end - if not first then - first, last = match(str,"^([a-zA-Z]:)(.*)$") - if first and not find(last,"^/") then - local d = lfs.currentdir() - if lfs.chdir(first) then - first = dir.current() - end - lfs.chdir(d) - end - end - if not first then - first, last = dir.current(), str - end - last = gsub(last,"//","/") - last = gsub(last,"/%./","/") - last = gsub(last,"^/*","") - first = gsub(first,"/*$","") - if last == "" then - return first - else - return first .. "/" .. last - end - end + --~ print(dir.mkdirs("","","a","c")) + --~ print(dir.mkdirs("a")) + --~ print(dir.mkdirs("a:")) + --~ print(dir.mkdirs("a:/b/c")) + --~ print(dir.mkdirs("a:b/c")) + --~ print(dir.mkdirs("a:/bbb/c")) + --~ print(dir.mkdirs("/a/b/c")) + --~ print(dir.mkdirs("/aaa/b/c")) + --~ print(dir.mkdirs("//a/b/c")) + --~ print(dir.mkdirs("///a/b/c")) + --~ print(dir.mkdirs("a/bbb//ccc/")) else @@ -307,7 +332,7 @@ else local str, pth, t = "", "", { ... } for i=1,#t do local s = t[i] - if s ~= "" then + if s and s ~= "" then -- we catch nil and false if str ~= "" then str = str .. "/" .. s else @@ -325,7 +350,7 @@ else else pth = pth .. "/" .. s end - if make_indeed and not first and not lfs.isdir(pth) then + if make_indeed and not first and not isdir(pth) then lfs.mkdir(pth) end end @@ -333,31 +358,88 @@ else pth = "." for s in gmatch(str,"[^/]+") do pth = pth .. "/" .. s - if make_indeed and not lfs.isdir(pth) then + if make_indeed and not isdir(pth) then lfs.mkdir(pth) end end end - return pth, (lfs.isdir(pth) == true) + return pth, (isdir(pth) == true) + end + + --~ print(dir.mkdirs("","","a","c")) + --~ print(dir.mkdirs("a")) + --~ print(dir.mkdirs("/a/b/c")) + --~ print(dir.mkdirs("/aaa/b/c")) + --~ print(dir.mkdirs("//a/b/c")) + --~ print(dir.mkdirs("///a/b/c")) + --~ print(dir.mkdirs("a/bbb//ccc/")) + +end + +dir.makedirs = dir.mkdirs + +-- we can only define it here as it uses dir.current + +if onwindows then + + function dir.expandname(str) -- will be merged with cleanpath and collapsepath + local first, nothing, last = match(str,"^(//)(//*)(.*)$") + if first then + first = dir.current() .. "/" + end + if not first then + first, last = match(str,"^(//)/*(.*)$") + end + if not first then + first, last = match(str,"^([a-zA-Z]:)(.*)$") + if first and not find(last,"^/") then + local d = currentdir() + if lfs.chdir(first) then + first = dir.current() + end + lfs.chdir(d) + end + end + if not first then + first, last = dir.current(), str + end + last = gsub(last,"//","/") + last = gsub(last,"/%./","/") + last = gsub(last,"^/*","") + first = gsub(first,"/*$","") + if last == "" or last == "." then + return first + else + return first .. "/" .. last + end end ---~ print(dir.mkdirs("","","a","c")) ---~ print(dir.mkdirs("a")) ---~ print(dir.mkdirs("/a/b/c")) ---~ print(dir.mkdirs("/aaa/b/c")) ---~ print(dir.mkdirs("//a/b/c")) ---~ print(dir.mkdirs("///a/b/c")) ---~ print(dir.mkdirs("a/bbb//ccc/")) +else - function dir.expand_name(str) -- will be merged with cleanpath and collapsepath + function dir.expandname(str) -- will be merged with cleanpath and collapsepath if not find(str,"^/") then - str = lfs.currentdir() .. "/" .. str + str = currentdir() .. "/" .. str end str = gsub(str,"//","/") str = gsub(str,"/%./","/") + str = gsub(str,"(.)/%.$","%1") return str end end -dir.makedirs = dir.mkdirs +file.expandname = dir.expandname -- for convenience + +local stack = { } + +function dir.push(newdir) + insert(stack,lfs.currentdir()) +end + +function dir.pop() + local d = remove(stack) + if d then + lfs.chdir(d) + end + return d +end -- cgit v1.2.3 From eee3680bb4dbb5f135cc6285bb83833b4e237fec Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 20:23:49 +0200 Subject: update l-file l-table; add yet uncommitted changes --- lualibs-file.lua | 391 ++++++++++++++++++++++-------- lualibs-io.lua | 62 +++++ lualibs-math.lua | 12 +- lualibs-os.lua | 75 +++++- lualibs-table.lua | 692 ++++++++++++++++++++++++++++++------------------------ lualibs-url.lua | 2 + 6 files changed, 823 insertions(+), 411 deletions(-) diff --git a/lualibs-file.lua b/lualibs-file.lua index 2bfc070..7ab9fbc 100644 --- a/lualibs-file.lua +++ b/lualibs-file.lua @@ -8,47 +8,101 @@ if not modules then modules = { } end modules ['l-file'] = { -- needs a cleanup -file = file or { } +file = file or { } +local file = file -local concat = table.concat -local find, gmatch, match, gsub, sub, char = string.find, string.gmatch, string.match, string.gsub, string.sub, string.char +local insert, concat = table.insert, table.concat +local find, gmatch, match, gsub, sub, char, lower = string.find, string.gmatch, string.match, string.gsub, string.sub, string.char, string.lower local lpegmatch = lpeg.match -function file.removesuffix(filename) - return (gsub(filename,"%.[%a%d]+$","")) +local P, R, S, C, Cs, Cp, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc + +local function dirname(name,default) + return match(name,"^(.+)[/\\].-$") or (default or "") end -function file.addsuffix(filename, suffix) - if not suffix or suffix == "" then - return filename - elseif not find(filename,"%.[%a%d]+$") then - return filename .. "." .. suffix - else - return filename - end +local function basename(name) + return match(name,"^.+[/\\](.-)$") or name end -function file.replacesuffix(filename, suffix) - return (gsub(filename,"%.[%a%d]+$","")) .. "." .. suffix +-- local function nameonly(name) +-- return (gsub(match(name,"^.+[/\\](.-)$") or name,"%..*$","")) +-- end + +local function nameonly(name) + return (gsub(match(name,"^.+[/\\](.-)$") or name,"%.[%a%d]+$","")) end -function file.dirname(name,default) - return match(name,"^(.+)[/\\].-$") or (default or "") +local function suffixonly(name,default) + return match(name,"^.+%.([^/\\]-)$") or default or "" end -function file.basename(name) - return match(name,"^.+[/\\](.-)$") or name +local function splitname(name) + local n, s = match(name,"^(.+)%.([^/\\]-)$") + return n or name, s or "" end -function file.nameonly(name) - return (gsub(match(name,"^.+[/\\](.-)$") or name,"%..*$","")) +file.basename = basename + +file.pathpart = dirname +file.dirname = dirname + +file.nameonly = nameonly + +file.suffixonly = suffixonly +file.extname = suffixonly -- obsolete +file.suffix = suffixonly + +function file.removesuffix(filename) + return (gsub(filename,"%.[%a%d]+$","")) end -function file.extname(name,default) - return match(name,"^.+%.([^/\\]-)$") or default or "" +function file.addsuffix(filename, suffix, criterium) + if not suffix or suffix == "" then + return filename + elseif criterium == true then + return filename .. "." .. suffix + elseif not criterium then + local n, s = splitname(filename) + if not s or s == "" then + return filename .. "." .. suffix + else + return filename + end + else + local n, s = splitname(filename) + if s and s ~= "" then + local t = type(criterium) + if t == "table" then + -- keep if in criterium + for i=1,#criterium do + if s == criterium[i] then + return filename + end + end + elseif t == "string" then + -- keep if criterium + if s == criterium then + return filename + end + end + end + return n .. "." .. suffix + end end -file.suffix = file.extname +--~ print("1 " .. file.addsuffix("name","new") .. " -> name.new") +--~ print("2 " .. file.addsuffix("name.old","new") .. " -> name.old") +--~ print("3 " .. file.addsuffix("name.old","new",true) .. " -> name.old.new") +--~ print("4 " .. file.addsuffix("name.old","new","new") .. " -> name.new") +--~ print("5 " .. file.addsuffix("name.old","new","old") .. " -> name.old") +--~ print("6 " .. file.addsuffix("name.old","new","foo") .. " -> name.new") +--~ print("7 " .. file.addsuffix("name.old","new",{"foo","bar"}) .. " -> name.new") +--~ print("8 " .. file.addsuffix("name.old","new",{"old","bar"}) .. " -> name.old") + +function file.replacesuffix(filename, suffix) + return (gsub(filename,"%.[%a%d]+$","")) .. "." .. suffix +end --~ function file.join(...) --~ local pth = concat({...},"/") @@ -67,10 +121,10 @@ file.suffix = file.extname local trick_1 = char(1) local trick_2 = "^" .. trick_1 .. "/+" -function file.join(...) +function file.join(...) -- rather dirty local lst = { ... } local a, b = lst[1], lst[2] - if a == "" then + if not a or a == "" then -- not a added lst[1] = trick_1 elseif b and find(a,"^/+$") and find(b,"^/") then lst[1] = "" @@ -100,74 +154,165 @@ end --~ print(file.join("http:///a","/y")) --~ print(file.join("//nas-1","/y")) -function file.iswritable(name) - local a = lfs.attributes(name) or lfs.attributes(file.dirname(name,".")) - return a and sub(a.permissions,2,2) == "w" +-- We should be able to use: +-- +-- function file.is_writable(name) +-- local a = attributes(name) or attributes(dirname(name,".")) +-- return a and sub(a.permissions,2,2) == "w" +-- end +-- +-- But after some testing Taco and I came up with: + +function file.is_writable(name) + if lfs.isdir(name) then + name = name .. "/m_t_x_t_e_s_t.tmp" + local f = io.open(name,"wb") + if f then + f:close() + os.remove(name) + return true + end + elseif lfs.isfile(name) then + local f = io.open(name,"ab") + if f then + f:close() + return true + end + else + local f = io.open(name,"ab") + if f then + f:close() + os.remove(name) + return true + end + end + return false end -function file.isreadable(name) - local a = lfs.attributes(name) +function file.is_readable(name) + local a = attributes(name) return a and sub(a.permissions,1,1) == "r" end -file.is_readable = file.isreadable -file.is_writable = file.iswritable +file.isreadable = file.is_readable -- depricated +file.iswritable = file.is_writable -- depricated --- todo: lpeg +function file.size(name) + local a = attributes(name) + return a and a.size or 0 +end ---~ function file.split_path(str) ---~ local t = { } ---~ str = gsub(str,"\\", "/") ---~ str = gsub(str,"(%a):([;/])", "%1\001%2") ---~ for name in gmatch(str,"([^;:]+)") do ---~ if name ~= "" then ---~ t[#t+1] = gsub(name,"\001",":") ---~ end ---~ end ---~ return t ---~ end +-- todo: lpeg \\ / .. does not save much local checkedsplit = string.checkedsplit -function file.split_path(str,separator) +function file.splitpath(str,separator) -- string str = gsub(str,"\\","/") return checkedsplit(str,separator or io.pathseparator) end -function file.join_path(tab) - return concat(tab,io.pathseparator) -- can have trailing // +function file.joinpath(tab,separator) -- table + return concat(tab,separator or io.pathseparator) -- can have trailing // end -- we can hash them weakly -function file.collapse_path(str) +--~ function file.collapsepath(str) -- fails on b.c/.. +--~ str = gsub(str,"\\","/") +--~ if find(str,"/") then +--~ str = gsub(str,"^%./",(gsub(getcurrentdir(),"\\","/")) .. "/") -- ./xx in qualified +--~ str = gsub(str,"/%./","/") +--~ local n, m = 1, 1 +--~ while n > 0 or m > 0 do +--~ str, n = gsub(str,"[^/%.]+/%.%.$","") +--~ str, m = gsub(str,"[^/%.]+/%.%./","") +--~ end +--~ str = gsub(str,"([^/])/$","%1") +--~ -- str = gsub(str,"^%./","") -- ./xx in qualified +--~ str = gsub(str,"/%.$","") +--~ end +--~ if str == "" then str = "." end +--~ return str +--~ end +--~ +--~ The previous one fails on "a.b/c" so Taco came up with a split based +--~ variant. After some skyping we got it sort of compatible with the old +--~ one. After that the anchoring to currentdir was added in a better way. +--~ Of course there are some optimizations too. Finally we had to deal with +--~ windows drive prefixes and things like sys://. + +function file.collapsepath(str,anchor) + if anchor and not find(str,"^/") and not find(str,"^%a:") then + str = getcurrentdir() .. "/" .. str + end + if str == "" or str =="." then + return "." + elseif find(str,"^%.%.") then + str = gsub(str,"\\","/") + return str + elseif not find(str,"%.") then + str = gsub(str,"\\","/") + return str + end str = gsub(str,"\\","/") - if find(str,"/") then - str = gsub(str,"^%./",(gsub(lfs.currentdir(),"\\","/")) .. "/") -- ./xx in qualified - str = gsub(str,"/%./","/") - local n, m = 1, 1 - while n > 0 or m > 0 do - str, n = gsub(str,"[^/%.]+/%.%.$","") - str, m = gsub(str,"[^/%.]+/%.%./","") + local starter, rest = match(str,"^(%a+:/*)(.-)$") + if starter then + str = rest + end + local oldelements = checkedsplit(str,"/") + local newelements = { } + local i = #oldelements + while i > 0 do + local element = oldelements[i] + if element == '.' then + -- do nothing + elseif element == '..' then + local n = i - 1 + while n > 0 do + local element = oldelements[n] + if element ~= '..' and element ~= '.' then + oldelements[n] = '.' + break + else + n = n - 1 + end + end + if n < 1 then + insert(newelements,1,'..') + end + elseif element ~= "" then + insert(newelements,1,element) end - str = gsub(str,"([^/])/$","%1") - -- str = gsub(str,"^%./","") -- ./xx in qualified - str = gsub(str,"/%.$","") + i = i - 1 + end + if #newelements == 0 then + return starter or "." + elseif starter then + return starter .. concat(newelements, '/') + elseif find(str,"^/") then + return "/" .. concat(newelements,'/') + else + return concat(newelements, '/') end - if str == "" then str = "." end - return str end ---~ print(file.collapse_path("/a")) ---~ print(file.collapse_path("a/./b/..")) ---~ print(file.collapse_path("a/aa/../b/bb")) ---~ print(file.collapse_path("a/../..")) ---~ print(file.collapse_path("a/.././././b/..")) ---~ print(file.collapse_path("a/./././b/..")) ---~ print(file.collapse_path("a/b/c/../..")) - -function file.robustname(str) - return (gsub(str,"[^%a%d%/%-%.\\]+","-")) +--~ local function test(str) +--~ print(string.format("%-20s %-15s %-15s",str,file.collapsepath(str),file.collapsepath(str,true))) +--~ end +--~ test("a/b.c/d") test("b.c/d") test("b.c/..") +--~ test("/") test("c:/..") test("sys://..") +--~ test("") test("./") test(".") test("..") test("./..") test("../..") +--~ test("a") test("./a") test("/a") test("a/../..") +--~ test("a/./b/..") test("a/aa/../b/bb") test("a/.././././b/..") test("a/./././b/..") +--~ test("a/b/c/../..") test("./a/b/c/../..") test("a/b/c/../..") + +function file.robustname(str,strict) + str = gsub(str,"[^%a%d%/%-%.\\]+","-") + if strict then + return lower(gsub(str,"^%-*(.-)%-*$","%1")) + else + return str + end end file.readdata = io.loaddata @@ -179,31 +324,31 @@ end -- lpeg variants, slightly faster, not always ---~ local period = lpeg.P(".") ---~ local slashes = lpeg.S("\\/") +--~ local period = P(".") +--~ local slashes = S("\\/") --~ local noperiod = 1-period --~ local noslashes = 1-slashes --~ local name = noperiod^1 ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.C(noperiod^1) * -1 +--~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 ---~ function file.extname(name) +--~ function file.suffixonly(name) --~ return lpegmatch(pattern,name) or "" --~ end ---~ local pattern = lpeg.Cs(((period * noperiod^1 * -1)/"" + 1)^1) +--~ local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1) --~ function file.removesuffix(name) --~ return lpegmatch(pattern,name) --~ end ---~ local pattern = (noslashes^0 * slashes)^1 * lpeg.C(noslashes^1) * -1 +--~ local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 --~ function file.basename(name) --~ return lpegmatch(pattern,name) or name --~ end ---~ local pattern = (noslashes^0 * slashes)^1 * lpeg.Cp() * noslashes^1 * -1 +--~ local pattern = (noslashes^0 * slashes)^1 * Cp() * noslashes^1 * -1 --~ function file.dirname(name) --~ local p = lpegmatch(pattern,name) @@ -214,7 +359,7 @@ end --~ end --~ end ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.Cp() * noperiod^1 * -1 +--~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 --~ function file.addsuffix(name, suffix) --~ local p = lpegmatch(pattern,name) @@ -225,7 +370,7 @@ end --~ end --~ end ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * lpeg.Cp() * noperiod^1 * -1 +--~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 --~ function file.replacesuffix(name,suffix) --~ local p = lpegmatch(pattern,name) @@ -236,7 +381,7 @@ end --~ end --~ end ---~ local pattern = (noslashes^0 * slashes)^0 * lpeg.Cp() * ((noperiod^1 * period)^1 * lpeg.Cp() + lpeg.P(true)) * noperiod^1 * -1 +--~ local pattern = (noslashes^0 * slashes)^0 * Cp() * ((noperiod^1 * period)^1 * Cp() + P(true)) * noperiod^1 * -1 --~ function file.nameonly(name) --~ local a, b = lpegmatch(pattern,name) @@ -249,7 +394,7 @@ end --~ end --~ end ---~ local test = file.extname +--~ local test = file.suffixonly --~ local test = file.basename --~ local test = file.dirname --~ local test = file.addsuffix @@ -268,11 +413,14 @@ end -- also rewrite previous -local letter = lpeg.R("az","AZ") + lpeg.S("_-+") -local separator = lpeg.P("://") +local letter = R("az","AZ") + S("_-+") +local separator = P("://") + +local qualified = P(".")^0 * P("/") + letter*P(":") + letter^1*separator + letter^1 * P("/") +local rootbased = P("/") + letter*P(":") -local qualified = lpeg.P(".")^0 * lpeg.P("/") + letter*lpeg.P(":") + letter^1*separator + letter^1 * lpeg.P("/") -local rootbased = lpeg.P("/") + letter*lpeg.P(":") +lpeg.patterns.qualified = qualified +lpeg.patterns.rootbased = rootbased -- ./name ../name /name c: :// name/name @@ -284,19 +432,61 @@ function file.is_rootbased_path(filename) return lpegmatch(rootbased,filename) ~= nil end -local slash = lpeg.S("\\/") -local period = lpeg.P(".") -local drive = lpeg.C(lpeg.R("az","AZ")) * lpeg.P(":") -local path = lpeg.C(((1-slash)^0 * slash)^0) -local suffix = period * lpeg.C(lpeg.P(1-period)^0 * lpeg.P(-1)) -local base = lpeg.C((1-suffix)^0) +-- actually these are schemes + +local slash = S("\\/") +local period = P(".") +local drive = C(R("az","AZ")) * P(":") +local path = C(((1-slash)^0 * slash)^0) +local suffix = period * C(P(1-period)^0 * P(-1)) +local base = C((1-suffix)^0) +local rest = C(P(1)^0) + +drive = drive + Cc("") +path = path + Cc("") +base = base + Cc("") +suffix = suffix + Cc("") + +local pattern_a = drive * path * base * suffix +local pattern_b = path * base * suffix +local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures +local pattern_d = path * rest + +function file.splitname(str,splitdrive) + if splitdrive then + return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix + else + return lpegmatch(pattern_b,str) -- returns path, base, suffix + end +end -local pattern = (drive + lpeg.Cc("")) * (path + lpeg.Cc("")) * (base + lpeg.Cc("")) * (suffix + lpeg.Cc("")) +function file.splitbase(str) + return lpegmatch(pattern_d,str) -- returns path, base+suffix +end -function file.splitname(str) -- returns drive, path, base, suffix - return lpegmatch(pattern,str) +function file.nametotable(str,splitdrive) -- returns table + local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) + if splitdrive then + return { + path = path, + drive = drive, + subpath = subpath, + name = name, + base = base, + suffix = suffix, + } + else + return { + path = path, + name = name, + base = base, + suffix = suffix, + } + end end +-- print(file.splitbase("a/b/c.txt")) + -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end -- -- test { "c:", "c:/aa", "c:/aa/bb", "c:/aa/bb/cc", "c:/aa/bb/cc.dd", "c:/aa/bb/cc.dd.ee" } @@ -307,8 +497,15 @@ end --~ -- todo: --~ --~ if os.type == "windows" then ---~ local currentdir = lfs.currentdir ---~ function lfs.currentdir() +--~ local currentdir = getcurrentdir +--~ function getcurrentdir() --~ return (gsub(currentdir(),"\\","/")) --~ end --~ end + +-- for myself: + +function file.strip(name,dir) + local b, a = match(name,"^(.-)" .. dir .. "(.*)$") + return a ~= "" and a or name +end diff --git a/lualibs-io.lua b/lualibs-io.lua index a9269ab..657b755 100644 --- a/lualibs-io.lua +++ b/lualibs-io.lua @@ -234,3 +234,65 @@ function io.ask(question,default,options) end end end + +local function readnumber(f,n,m) + if m then + f:seek("set",n) + n = m + end + if n == 1 then + return byte(f:read(1)) + elseif n == 2 then + local a, b = byte(f:read(2),1,2) + return 256 * a + b + elseif n == 3 then + local a, b, c = byte(f:read(3),1,3) + return 256*256 * a + 256 * b + c + elseif n == 4 then + local a, b, c, d = byte(f:read(4),1,4) + return 256*256*256 * a + 256*256 * b + 256 * c + d + elseif n == 8 then + local a, b = readnumber(f,4), readnumber(f,4) + return 256 * a + b + elseif n == 12 then + local a, b, c = readnumber(f,4), readnumber(f,4), readnumber(f,4) + return 256*256 * a + 256 * b + c + elseif n == -2 then + local b, a = byte(f:read(2),1,2) + return 256*a + b + elseif n == -3 then + local c, b, a = byte(f:read(3),1,3) + return 256*256 * a + 256 * b + c + elseif n == -4 then + local d, c, b, a = byte(f:read(4),1,4) + return 256*256*256 * a + 256*256 * b + 256*c + d + elseif n == -8 then + local h, g, f, e, d, c, b, a = byte(f:read(8),1,8) + return 256*256*256*256*256*256*256 * a + + 256*256*256*256*256*256 * b + + 256*256*256*256*256 * c + + 256*256*256*256 * d + + 256*256*256 * e + + 256*256 * f + + 256 * g + + h + else + return 0 + end +end + +io.readnumber = readnumber + +function io.readstring(f,n,m) + if m then + f:seek("set",n) + n = m + end + local str = gsub(f:read(n),"%z","") + return str +end + +-- + +if not io.i_limiter then function io.i_limiter() end end -- dummy so we can test safely +if not io.o_limiter then function io.o_limiter() end end -- dummy so we can test safely diff --git a/lualibs-math.lua b/lualibs-math.lua index 4669a51..43f60b5 100644 --- a/lualibs-math.lua +++ b/lualibs-math.lua @@ -9,21 +9,15 @@ if not modules then modules = { } end modules ['l-math'] = { local floor, sin, cos, tan = math.floor, math.sin, math.cos, math.tan if not math.round then - function math.round(x) - return floor(x + 0.5) - end + function math.round(x) return floor(x + 0.5) end end if not math.div then - function math.div(n,m) - return floor(n/m) - end + function math.div(n,m) return floor(n/m) end end if not math.mod then - function math.mod(n,m) - return n % m - end + function math.mod(n,m) return n % m end end local pipi = 2*math.pi/360 diff --git a/lualibs-os.lua b/lualibs-os.lua index 95d007d..799f449 100644 --- a/lualibs-os.lua +++ b/lualibs-os.lua @@ -115,8 +115,8 @@ if not os.__getenv__ then end end -local find, format, gsub = string.find, string.format, string.gsub -local random, ceil = math.random, math.ceil + +-- end of environment hack local execute, spawn, exec, iopopen, ioflush = os.execute, os.spawn or os.execute, os.exec or os.execute, io.popen, io.flush @@ -381,3 +381,74 @@ function os.timezone(delta) return 1 end end + +local timeformat = format("%%s%s",os.timezone(true)) +local dateformat = "!%Y-%m-%d %H:%M:%S" + +function os.fulltime(t,default) + t = tonumber(t) or 0 + if t > 0 then + -- valid time + elseif default then + return default + else + t = nil + end + return format(timeformat,date(dateformat,t)) +end + +local dateformat = "%Y-%m-%d %H:%M:%S" + +function os.localtime(t,default) + t = tonumber(t) or 0 + if t > 0 then + -- valid time + elseif default then + return default + else + t = nil + end + return date(dateformat,t) +end + +function os.converttime(t,default) + local t = tonumber(t) + if t and t > 0 then + return date(dateformat,t) + else + return default or "-" + end +end + +local memory = { } + +local function which(filename) + local fullname = memory[filename] + if fullname == nil then + local suffix = file.suffix(filename) + local suffixes = suffix == "" and os.binsuffixes or { suffix } + for directory in gmatch(os.getenv("PATH"),"[^" .. io.pathseparator .."]+") do + local df = file.join(directory,filename) + for i=1,#suffixes do + local dfs = file.addsuffix(df,suffixes[i]) + if io.exists(dfs) then + fullname = dfs + break + end + end + end + if not fullname then + fullname = false + end + memory[filename] = fullname + end + return fullname +end + +os.which = which +os.where = which + +-- print(os.which("inkscape.exe")) +-- print(os.which("inkscape")) +-- print(os.which("gs.exe")) +-- print(os.which("ps2pdf")) diff --git a/lualibs-table.lua b/lualibs-table.lua index ee395d0..80f28c2 100644 --- a/lualibs-table.lua +++ b/lualibs-table.lua @@ -6,17 +6,19 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -table.join = table.concat - +local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match local getmetatable, setmetatable = getmetatable, setmetatable -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local getinfo = debug.getinfo -- Starting with version 5.2 Lua no longer provide ipairs, which makes -- sense. As we already used the for loop and # in most places the -- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashio we also hardly used pairs. +-- have been replaced. In a similar fashion we also hardly used pairs. +-- +-- Hm, actually ipairs was retained, but we no longer use it anyway. -- -- Just in case, we provide the fallbacks as discussed in Programming -- in Lua (http://www.lua.org/pil/7.3.html): @@ -63,97 +65,134 @@ end -- extra functions, some might go (when not used) function table.strip(tab) - local lst = { } + local lst, l = { }, 0 for i=1,#tab do local s = gsub(tab[i],"^%s*(.-)%s*$","%1") if s == "" then -- skip this one else - lst[#lst+1] = s + l = l + 1 + lst[l] = s end end return lst end function table.keys(t) - local k = { } - for key, _ in next, t do - k[#k+1] = key + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } end - return k end local function compare(a,b) - return (tostring(a) < tostring(b)) + local ta, tb = type(a), type(b) -- needed, else 11 < 2 + if ta == tb then + return a < b + else + return tostring(a) < tostring(b) + end end local function sortedkeys(tab) - local srt, kind = { }, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - srt[#srt+1] = key - if kind == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - -- if kind == 2 then kind = 3 else kind = 1 end - kind = (kind == 2 and 3) or 1 - elseif tkey == "number" then - -- if kind == 1 then kind = 3 else kind = 2 end - kind = (kind == 1 and 3) or 2 + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check else - kind = 3 + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end end end - end - if kind == 0 or kind == 3 then - sort(srt,compare) + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt else - sort(srt) + return { } end - return srt end local function sortedhashkeys(tab) -- fast one - local srt = { } - for key,_ in next, tab do - srt[#srt+1] = key + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end + end + sort(srt) + return srt + else + return { } end - sort(srt) - return srt end table.sortedkeys = sortedkeys table.sortedhashkeys = sortedhashkeys -function table.sortedhash(t) - local s = sortedhashkeys(t) -- maybe just sortedkeys - local n = 0 - local function kv(s) - n = n + 1 - local k = s[n] - return k, t[k] +local function nothing() end + +local function sortedhash(t) + if t then + local n, s = 0, sortedkeys(t) -- the robust one + local function kv(s) + n = n + 1 + local k = s[n] + return k, t[k] + end + return kv, s + else + return nothing end - return kv, s end -table.sortedpairs = table.sortedhash +table.sortedhash = sortedhash +table.sortedpairs = sortedhash -function table.append(t, list) - for _,v in next, list do - insert(t,v) +function table.append(t,list) + local n = #t + for i=1,#list do + n = n + 1 + t[n] = list[i] end + return t end function table.prepend(t, list) - for k,v in next, list do - insert(t,k,v) + local nl = #list + local nt = nl + #t + for i=#t,1,-1 do + t[nt] = t[i] + nt = nt - 1 + end + for i=1,#list do + t[i] = list[i] end + return t end function table.merge(t, ...) -- first one is target - t = t or {} - local lst = {...} + t = t or { } + local lst = { ... } for i=1,#lst do for k, v in next, lst[i] do t[k] = v @@ -163,7 +202,7 @@ function table.merge(t, ...) -- first one is target end function table.merged(...) - local tmp, lst = { }, {...} + local tmp, lst = { }, { ... } for i=1,#lst do for k, v in next, lst[i] do tmp[k] = v @@ -173,41 +212,45 @@ function table.merged(...) end function table.imerge(t, ...) - local lst = {...} + local lst, nt = { ... }, #t for i=1,#lst do local nst = lst[i] for j=1,#nst do - t[#t+1] = nst[j] + nt = nt + 1 + t[nt] = nst[j] end end return t end function table.imerged(...) - local tmp, lst = { }, {...} + local tmp, ntmp, lst = { }, 0, {...} for i=1,#lst do local nst = lst[i] for j=1,#nst do - tmp[#tmp+1] = nst[j] + ntmp = ntmp + 1 + tmp[ntmp] = nst[j] end end return tmp end -local function fastcopy(old) -- fast one +local function fastcopy(old,metatabletoo) -- fast one if old then local new = { } for k,v in next, old do if type(v) == "table" then - new[k] = fastcopy(v) -- was just table.copy + new[k] = fastcopy(v,metatabletoo) -- was just table.copy else new[k] = v end end - -- optional second arg - local mt = getmetatable(old) - if mt then - setmetatable(new,mt) + if metatabletoo then + -- optional second arg + local mt = getmetatable(old) + if mt then + setmetatable(new,mt) + end end return new else @@ -215,6 +258,8 @@ local function fastcopy(old) -- fast one end end +-- todo : copy without metatable + local function copy(t, tables) -- taken from lua wiki, slightly adapted tables = tables or { } local tcopy = {} @@ -247,33 +292,14 @@ end table.fastcopy = fastcopy table.copy = copy --- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) - -function table.sub(t,i,j) - return { unpack(t,i,j) } -end - -function table.replace(a,b) - for k,v in next, b do - a[k] = v +function table.derive(parent) + local child = { } + if parent then + setmetatable(child,{ __index = parent }) end + return child end --- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) - -function table.is_empty(t) -- obolete, use inline code instead - return not t or not next(t) -end - -function table.one_entry(t) -- obolete, use inline code instead - local n = next(t) - return n and not next(t,n) -end - ---~ function table.starts_at(t) -- obsolete, not nice anyway ---~ return ipairs(t,1)(t,0) ---~ end - function table.tohash(t,value) local h = { } if t then @@ -286,27 +312,19 @@ function table.tohash(t,value) end function table.fromhash(t) - local h = { } + local hsh, h = { }, 0 for k, v in next, t do -- no ipairs here - if v then h[#h+1] = k end + if v then + h = h + 1 + hsh[h] = k + end end - return h + return hsh end ---~ print(table.serialize(t), "\n") ---~ print(table.serialize(t,"name"), "\n") ---~ print(table.serialize(t,false), "\n") ---~ print(table.serialize(t,true), "\n") ---~ print(table.serialize(t,"name",true), "\n") ---~ print(table.serialize(t,"name",true,true), "\n") - -table.serialize_functions = true -table.serialize_compact = true -table.serialize_inline = true - local noquotes, hexify, handle, reduce, compact, inline, functions -local reserved = table.tohash { -- intercept a language flaw, no reserved words as key +local reserved = table.tohash { -- intercept a language inconvenience: no reserved words as key 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if', 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while', } @@ -318,20 +336,23 @@ local function simple_table(t) n = n + 1 end if n == #t then - local tt = { } + local tt, nt = { }, 0 for i=1,#t do local v = t[i] local tv = type(v) if tv == "number" then + nt = nt + 1 if hexify then - tt[#tt+1] = format("0x%04X",v) + tt[nt] = format("0x%04X",v) else - tt[#tt+1] = tostring(v) -- tostring not needed + tt[nt] = tostring(v) -- tostring not needed end elseif tv == "boolean" then - tt[#tt+1] = tostring(v) + nt = nt + 1 + tt[nt] = tostring(v) elseif tv == "string" then - tt[#tt+1] = format("%q",v) + nt = nt + 1 + tt[nt] = format("%q",v) else tt = nil break @@ -352,36 +373,56 @@ end -- problem: there no good number_to_string converter with the best resolution +local function dummy() end + local function do_serialize(root,name,depth,level,indexed) if level > 0 then depth = depth .. " " if indexed then handle(format("%s{",depth)) - elseif name then - --~ handle(format("%s%s={",depth,key(name))) - if type(name) == "number" then -- or find(k,"^%d+$") then + else + local tn = type(name) + if tn == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end - elseif noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then - handle(format("%s%s={",depth,name)) + elseif tn == "string" then + if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + elseif tn == "boolean" then + handle(format("%s[%s]={",depth,tostring(name))) else - handle(format("%s[%q]={",depth,name)) + handle(format("%s{",depth)) end - else - handle(format("%s{",depth)) end end -- we could check for k (index) being number (cardinal) if root and next(root) then - local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- if compact then + -- -- NOT: for k=1,#root do (we need to quit at nil) + -- for k,v in ipairs(root) do -- can we use next? + -- if not first then first = k end + -- last = last + 1 + -- end + -- end + local first, last = nil, 0 if compact then - -- NOT: for k=1,#root do (we need to quit at nil) - for k,v in ipairs(root) do -- can we use next? - if not first then first = k end - last = last + 1 + last = #root + for k=1,last do +-- if not root[k] then + if root[k] == nil then + last = k - 1 + break + end + end + if last > 0 then + first = 1 end end local sk = sortedkeys(root) @@ -391,8 +432,8 @@ local function do_serialize(root,name,depth,level,indexed) --~ if v == root then -- circular --~ else - local t = type(v) - if compact and first and type(k) == "number" and k >= first and k <= last then + local t, tk = type(v), type(k) + if compact and first and tk == "number" and k >= first and k <= last then if t == "number" then if hexify then handle(format("%s 0x%04X,",depth,v)) @@ -434,17 +475,18 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s __p__=nil,",depth)) end elseif t == "number" then - --~ if hexify then - --~ handle(format("%s %s=0x%04X,",depth,key(k),v)) - --~ else - --~ handle(format("%s %s=%s,",depth,key(k),v)) -- %.99g - --~ end - if type(k) == "number" then -- or find(k,"^%d+$") then + if tk == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) else handle(format("%s [%s]=%s,",depth,k,v)) -- %.99g end + elseif tk == "boolean" then + if hexify then + handle(format("%s [%s]=0x%04X,",depth,tostring(k),v)) + else + handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g + end elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then if hexify then handle(format("%s %s=0x%04X,",depth,k,v)) @@ -460,26 +502,28 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "string" then if reduce and tonumber(v) then - --~ handle(format("%s %s=%s,",depth,key(k),v)) - if type(k) == "number" then -- or find(k,"^%d+$") then + if tk == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,v)) else handle(format("%s [%s]=%s,",depth,k,v)) end + elseif tk == "boolean" then + handle(format("%s [%s]=%s,",depth,tostring(k),v)) elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then handle(format("%s %s=%s,",depth,k,v)) else handle(format("%s [%q]=%s,",depth,k,v)) end else - --~ handle(format("%s %s=%q,",depth,key(k),v)) - if type(k) == "number" then -- or find(k,"^%d+$") then + if tk == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,v)) else handle(format("%s [%s]=%q,",depth,k,v)) end + elseif tk == "boolean" then + handle(format("%s [%s]=%q,",depth,tostring(k),v)) elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then handle(format("%s %s=%q,",depth,k,v)) else @@ -488,13 +532,14 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "table" then if not next(v) then - --~ handle(format("%s %s={},",depth,key(k))) - if type(k) == "number" then -- or find(k,"^%d+$") then + if tk == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s [0x%04X]={},",depth,k)) else handle(format("%s [%s]={},",depth,k)) end + elseif tk == "boolean" then + handle(format("%s [%s]={},",depth,tostring(k))) elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then handle(format("%s %s={},",depth,k)) else @@ -503,13 +548,14 @@ local function do_serialize(root,name,depth,level,indexed) elseif inline then local st = simple_table(v) if st then - --~ handle(format("%s %s={ %s },",depth,key(k),concat(st,", "))) - if type(k) == "number" then -- or find(k,"^%d+$") then + if tk == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) end + elseif tk == "boolean" then -- or find(k,"^%d+$") then + handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then handle(format("%s %s={ %s },",depth,k,concat(st,", "))) else @@ -522,13 +568,14 @@ local function do_serialize(root,name,depth,level,indexed) do_serialize(v,k,depth,level+1) end elseif t == "boolean" then - --~ handle(format("%s %s=%s,",depth,key(k),tostring(v))) - if type(k) == "number" then -- or find(k,"^%d+$") then + if tk == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) else handle(format("%s [%s]=%s,",depth,k,tostring(v))) end + elseif tk == "boolean" then -- or find(k,"^%d+$") then + handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then handle(format("%s %s=%s,",depth,k,tostring(v))) else @@ -536,27 +583,31 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "function" then if functions then - --~ handle(format('%s %s=loadstring(%q),',depth,key(k),dump(v))) - if type(k) == "number" then -- or find(k,"^%d+$") then + local f = getinfo(v).what == "C" and dump(dummy) or dump(v) + -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) + if tk == "number" then -- or find(k,"^%d+$") then if hexify then - handle(format("%s [0x%04X]=loadstring(%q),",depth,k,dump(v))) + handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f)) else - handle(format("%s [%s]=loadstring(%q),",depth,k,dump(v))) + handle(format("%s [%s]=loadstring(%q),",depth,k,f)) end + elseif tk == "boolean" then + handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f)) elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=loadstring(%q),",depth,k,dump(v))) + handle(format("%s %s=loadstring(%q),",depth,k,f)) else - handle(format("%s [%q]=loadstring(%q),",depth,k,dump(v))) + handle(format("%s [%q]=loadstring(%q),",depth,k,f)) end end else - --~ handle(format("%s %s=%q,",depth,key(k),tostring(v))) - if type(k) == "number" then -- or find(k,"^%d+$") then + if tk == "number" then -- or find(k,"^%d+$") then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) else handle(format("%s [%s]=%q,",depth,k,tostring(v))) end + elseif tk == "boolean" then -- or find(k,"^%d+$") then + handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then handle(format("%s %s=%q,",depth,k,tostring(v))) else @@ -574,15 +625,34 @@ end -- replacing handle by a direct t[#t+1] = ... (plus test) is not much -- faster (0.03 on 1.00 for zapfino.tma) -local function serialize(root,name,_handle,_reduce,_noquotes,_hexify) - noquotes = _noquotes - hexify = _hexify - handle = _handle or print - reduce = _reduce or false - compact = table.serialize_compact - inline = compact and table.serialize_inline - functions = table.serialize_functions +local function serialize(_handle,root,name,specification) -- handle wins local tname = type(name) + if type(specification) == "table" then + noquotes = specification.noquotes + hexify = specification.hexify + handle = _handle or specification.handle or print + reduce = specification.reduce or false + functions = specification.functions + compact = specification.compact + inline = specification.inline and compact + if functions == nil then + functions = true + end + if compact == nil then + compact = true + end + if inline == nil then + inline = compact + end + else + noquotes = false + hexify = false + handle = _handle or print + reduce = false + compact = true + inline = true + functions = true + end if tname == "string" then if name == "return" then handle("return {") @@ -604,8 +674,17 @@ local function serialize(root,name,_handle,_reduce,_noquotes,_hexify) else handle("t={") end - if root and next(root) then - do_serialize(root,name,"",0,indexed) + if root then + -- The dummy access will initialize a table that has a delayed initialization + -- using a metatable. (maybe explicitly test for metatable) + if getmetatable(root) then -- todo: make this an option, maybe even per subtable + local dummy = root._w_h_a_t_e_v_e_r_ + root._w_h_a_t_e_v_e_r_ = nil + end + -- Let's forget about empty tables. + if next(root) then + do_serialize(root,name,"",0) + end end handle("}") end @@ -619,18 +698,17 @@ end --~ 'return' : return { } --~ number : [number] = { } -function table.serialize(root,name,reduce,noquotes,hexify) - local t = { } +function table.serialize(root,name,specification) + local t, n = { }, 0 local function flush(s) - t[#t+1] = s + n = n + 1 + t[n] = s end - serialize(root,name,flush,reduce,noquotes,hexify) + serialize(flush,root,name,specification) return concat(t,"\n") end -function table.tohandle(handle,root,name,reduce,noquotes,hexify) - serialize(root,name,handle,reduce,noquotes,hexify) -end +table.tohandle = serialize -- sometimes tables are real use (zapfino extra pro is some 85M) in which -- case a stepwise serialization is nice; actually, we could consider: @@ -641,73 +719,63 @@ end -- -- so this is on the todo list -table.tofile_maxtab = 2*1024 +local maxtab = 2*1024 -function table.tofile(filename,root,name,reduce,noquotes,hexify) +function table.tofile(filename,root,name,specification) local f = io.open(filename,'w') if f then - local maxtab = table.tofile_maxtab if maxtab > 1 then - local t = { } + local t, n = { }, 0 local function flush(s) - t[#t+1] = s - if #t > maxtab then + n = n + 1 + t[n] = s + if n > maxtab then f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice - t = { } + t, n = { }, 0 -- we could recycle t if needed end end - serialize(root,name,flush,reduce,noquotes,hexify) + serialize(flush,root,name,specification) f:write(concat(t,"\n"),"\n") else local function flush(s) f:write(s,"\n") end - serialize(root,name,flush,reduce,noquotes,hexify) + serialize(flush,root,name,specification) end f:close() + io.flush() end end -local function flatten(t,f,complete) -- is this used? meybe a variant with next, ... - for i=1,#t do - local v = t[i] - if type(v) == "table" then - if complete or type(v[1]) == "table" then - flatten(v,f,complete) +local function flattened(t,f,depth) + if f == nil then + f = { } + depth = 0xFFFF + elseif tonumber(f) then + -- assume then only two arguments are given + depth = f + f = { } + elseif not depth then + depth = 0xFFFF + end + for k, v in next, t do + if type(k) ~= "number" then + if depth > 0 and type(v) == "table" then + flattened(v,f,depth-1) else - f[#f+1] = v + f[k] = v end - else - f[#f+1] = v end end -end - -function table.flatten(t) - local f = { } - flatten(t,f,true) - return f -end - -function table.unnest(t) -- bad name - local f = { } - flatten(t,f,false) - return f -end - -table.flatten_one_level = table.unnest - --- a better one: - -local function flattened(t,f) - if not f then - f = { } - end - for k, v in next, t do - if type(v) == "table" then - flattened(v,f) + local n = #f + for k=1,#t do + local v = t[k] + if depth > 0 and type(v) == "table" then + flattened(v,f,depth-1) + n = #f else - f[k] = v + n = n + 1 + f[n] = v end end return f @@ -715,49 +783,27 @@ end table.flattened = flattened --- the next three may disappear - -function table.remove_value(t,value) -- todo: n - if value then - for i=1,#t do - if t[i] == value then - remove(t,i) - -- remove all, so no: return - end - end +local function unnest(t,f) -- only used in mk, for old times sake + if not f then -- and only relevant for token lists + f = { } end -end - -function table.insert_before_value(t,value,str) - if str then - if value then - for i=1,#t do - if t[i] == value then - insert(t,i,str) - return - end + for i=1,#t do + local v = t[i] + if type(v) == "table" then + if type(v[1]) == "table" then + unnest(v,f) + else + f[#f+1] = v end + else + f[#f+1] = v end - insert(t,1,str) - elseif value then - insert(t,1,value) end + return f end -function table.insert_after_value(t,value,str) - if str then - if value then - for i=1,#t do - if t[i] == value then - insert(t,i+1,str) - return - end - end - end - t[#t+1] = str - elseif value then - t[#t+1] = value - end +function table.unnest(t) -- bad name + return unnest(t) end local function are_equal(a,b,n,m) -- indexed @@ -784,7 +830,7 @@ end local function identical(a,b) -- assumes same structure for ka, va in next, a do - local vb = b[k] + local vb = b[ka] if va == vb then -- same elseif type(va) == "table" and type(vb) == "table" then @@ -798,8 +844,8 @@ local function identical(a,b) -- assumes same structure return true end -table.are_equal = are_equal table.identical = identical +table.are_equal = are_equal -- maybe also make a combined one @@ -825,86 +871,126 @@ function table.contains(t, v) end function table.count(t) - local n, e = 0, next(t) - while e do - n, e = n + 1, next(t,e) + local n = 0 + for k, v in next, t do + n = n + 1 end return n end -function table.swapped(t) - local s = { } +function table.swapped(t,s) -- hash + local n = { } + if s then +--~ for i=1,#s do +--~ n[i] = s[i] +--~ end + for k, v in next, s do + n[k] = v + end + end +--~ for i=1,#t do +--~ local ti = t[i] -- don't ask but t[i] can be nil +--~ if ti then +--~ n[ti] = i +--~ end +--~ end for k, v in next, t do - s[v] = k + n[v] = k end - return s + return n end ---~ function table.are_equal(a,b) ---~ return table.serialize(a) == table.serialize(b) ---~ end - -function table.clone(t,p) -- t is optional or nil or table - if not p then - t, p = { }, t or { } - elseif not t then - t = { } +function table.reversed(t) + if t then + local tt, tn = { }, #t + if tn > 0 then + local ttn = 0 + for i=tn,1,-1 do + ttn = ttn + 1 + tt[ttn] = t[i] + end + end + return tt end - setmetatable(t, { __index = function(_,key) return p[key] end }) -- why not __index = p ? - return t end -function table.hexed(t,seperator) - local tt = { } - for i=1,#t do tt[i] = format("0x%04X",t[i]) end - return concat(tt,seperator or " ") +function table.sequenced(t,sep) -- hash only + if t then + local s, n = { }, 0 + for k, v in sortedhash(t) do + if simple then + if v == true then + n = n + 1 + s[n] = k + elseif v and v~= "" then + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + else + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + end + return concat(s, sep or " | ") + else + return "" + end end -function table.reverse_hash(h) - local r = { } - for k,v in next, h do - r[v] = lower(gsub(k," ","")) +function table.print(t,...) + if type(t) ~= "table" then + print(tostring(t)) + else + table.tohandle(print,t,...) end - return r end -function table.reverse(t) - local tt = { } - if #t > 0 then - for i=#t,1,-1 do - tt[#tt+1] = t[i] - end - end - return tt +-- -- -- obsolete but we keep them for a while and might comment them later -- -- -- + +-- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) + +function table.sub(t,i,j) + return { unpack(t,i,j) } end -function table.insert_before_value(t,value,extra) - for i=1,#t do - if t[i] == extra then - remove(t,i) - end - end - for i=1,#t do - if t[i] == value then - insert(t,i,extra) - return - end - end - insert(t,1,extra) +-- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) + +function table.is_empty(t) + return not t or not next(t) end -function table.insert_after_value(t,value,extra) - for i=1,#t do - if t[i] == extra then - remove(t,i) - end +function table.has_one_entry(t) + return t and not next(t,next(t)) +end + +-- new + +function table.loweredkeys(t) -- maybe utf + local l = { } + for k, v in next, t do + l[lower(k)] = v end - for i=1,#t do - if t[i] == value then - insert(t,i+1,extra) - return + return l +end + +-- new, might move (maybe duplicate) + +function table.unique(old) + local hash = { } + local new = { } + local n = 0 + for i=1,#old do + local oi = old[i] + if not hash[oi] then + n = n + 1 + new[n] = oi + hash[oi] = true end end - insert(t,#t+1,extra) + return new end +-- function table.sorted(t,...) +-- table.sort(t,...) +-- return t -- still sorts in-place +-- end diff --git a/lualibs-url.lua b/lualibs-url.lua index 83b8fcc..ab50028 100644 --- a/lualibs-url.lua +++ b/lualibs-url.lua @@ -339,3 +339,5 @@ end --~ test("zip:///oeps/oeps.zip#bla/bla.tex") --~ test("zip:///oeps/oeps.zip?bla/bla.tex") + +--~ table.print(url.hashed("/test?test")) -- cgit v1.2.3 From 5b3ff130cd5e4a3c29f468b0ac3f9b675096c7da Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 21:45:45 +0200 Subject: reflect move l-dimen -> util-dim --- README | 2 +- lualibs-dimen.lua | 432 --------------------------------------------------- lualibs-util-dim.lua | 432 +++++++++++++++++++++++++++++++++++++++++++++++++++ lualibs.dtx | 3 +- 4 files changed, 434 insertions(+), 435 deletions(-) delete mode 100644 lualibs-dimen.lua create mode 100644 lualibs-util-dim.lua diff --git a/README b/README index b5c4c97..b39b36d 100644 --- a/README +++ b/README @@ -34,7 +34,6 @@ Source files: lualibs.dtx lualibs-aux.lua lualibs-boolean.lua - lualibs-dimen.lua lualibs-dir.lua lualibs-file.lua lualibs-io.lua @@ -49,6 +48,7 @@ Source files: lualibs-unicode.lua lualibs-url.lua lualibs-utils.lua + lualibs-util-dim.lua README Makefile NEWS diff --git a/lualibs-dimen.lua b/lualibs-dimen.lua deleted file mode 100644 index da5ab14..0000000 --- a/lualibs-dimen.lua +++ /dev/null @@ -1,432 +0,0 @@ -if not modules then modules = { } end modules ['l-dimen'] = { - version = 1.001, - comment = "support for dimensions", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - ---[[ldx-- -

Internally work with scaled point, which are -represented by integers. However, in practice, at east at the - end we work with more generic units like points (pt). Going -from scaled points (numbers) to one of those units can be -done by using the conversion factors collected in the following -table.

---ldx]]-- - -local format, match, gsub, type, setmetatable = string.format, string.match, string.gsub, type, setmetatable -local P, S, R, Cc, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.match - -number = number or { } - -number.tonumberf = function(n) return match(format("%.20f",n),"(.-0?)0*$") end -- one zero too much but alas -number.tonumberg = function(n) return format("%.20g",n) end - -local dimenfactors = { - ["pt"] = 1/65536, - ["in"] = ( 100/ 7227)/65536, - ["cm"] = ( 254/ 7227)/65536, - ["mm"] = ( 2540/ 7227)/65536, - ["sp"] = 1, -- 65536 sp in 1pt - ["bp"] = ( 7200/ 7227)/65536, - ["pc"] = ( 1/ 12)/65536, - ["dd"] = ( 1157/ 1238)/65536, - ["cc"] = ( 1157/14856)/65536, - ["nd"] = (20320/21681)/65536, - ["nc"] = ( 5080/65043)/65536 -} - ---~ print(table.serialize(dimenfactors)) ---~ ---~ %.99g: ---~ ---~ t={ ---~ ["bp"]=1.5201782378580324e-005, ---~ ["cc"]=1.1883696112892098e-006, ---~ ["cm"]=5.3628510057769479e-007, ---~ ["dd"]=1.4260435335470516e-005, ---~ ["em"]=0.000152587890625, ---~ ["ex"]=6.103515625e-005, ---~ ["in"]=2.1113586636917117e-007, ---~ ["mm"]=5.3628510057769473e-008, ---~ ["nc"]=1.1917446679504327e-006, ---~ ["nd"]=1.4300936015405194e-005, ---~ ["pc"]=1.2715657552083333e-006, ---~ ["pt"]=1.52587890625e-005, ---~ ["sp"]=1, ---~ } ---~ ---~ patched %s and tonumber ---~ ---~ t={ ---~ ["bp"]=0.00001520178238, ---~ ["cc"]=0.00000118836961, ---~ ["cm"]=0.0000005362851, ---~ ["dd"]=0.00001426043534, ---~ ["em"]=0.00015258789063, ---~ ["ex"]=0.00006103515625, ---~ ["in"]=0.00000021113587, ---~ ["mm"]=0.00000005362851, ---~ ["nc"]=0.00000119174467, ---~ ["nd"]=0.00001430093602, ---~ ["pc"]=0.00000127156576, ---~ ["pt"]=0.00001525878906, ---~ ["sp"]=1, ---~ } - ---[[ldx-- -

A conversion function that takes a number, unit (string) and optional -format (string) is implemented using this table.

---ldx]]-- - --- was: - -local function todimen(n,unit,fmt) - if type(n) == 'string' then - return n - else - unit = unit or 'pt' - return format(fmt or "%s%s",n*dimenfactors[unit],unit) - -- if fmt then - -- return format(fmt,n*dimenfactors[unit],unit) - -- else - -- return match(format("%.20f",n*dimenfactors[unit]),"(.-0?)0*$") .. unit - -- end - end -end - ---[[ldx-- -

We collect a bunch of converters in the number namespace.

---ldx]]-- - -number.maxdimen = 1073741823 -number.todimen = todimen -number.dimenfactors = dimenfactors - -function number.topoints (n) return todimen(n,"pt") end -function number.toinches (n) return todimen(n,"in") end -function number.tocentimeters (n) return todimen(n,"cm") end -function number.tomillimeters (n) return todimen(n,"mm") end -function number.toscaledpoints(n) return todimen(n,"sp") end -function number.toscaledpoints(n) return n .. "sp" end -function number.tobasepoints (n) return todimen(n,"bp") end -function number.topicas (n) return todimen(n "pc") end -function number.todidots (n) return todimen(n,"dd") end -function number.tociceros (n) return todimen(n,"cc") end -function number.tonewdidots (n) return todimen(n,"nd") end -function number.tonewciceros (n) return todimen(n,"nc") end - ---[[ldx-- -

More interesting it to implement a (sort of) dimen datatype, one -that permits calculations too. First we define a function that -converts a string to scaledpoints. We use . We capture -a number and optionally a unit. When no unit is given a constant -capture takes place.

---ldx]]-- - -local amount = (S("+-")^0 * R("09")^0 * P(".")^0 * R("09")^0) + Cc("0") -local unit = R("az")^1 - -local dimenpair = amount/tonumber * (unit^1/dimenfactors + Cc(1)) -- tonumber is new - -lpeg.patterns.dimenpair = dimenpair - ---[[ldx-- -

We use a metatable to intercept errors. When no key is found in -the table with factors, the metatable will be consulted for an -alternative index function.

---ldx]]-- - -local mt = { } setmetatable(dimenfactors,mt) - -mt.__index = function(t,s) - -- error("wrong dimension: " .. (s or "?")) -- better a message - return false -end - -function string:todimen() - if type(self) == "number" then - return self - else - local value, unit = lpegmatch(dimenpair,self) - return value/unit - end -end - -local amount = S("+-")^0 * R("09")^0 * S(".,")^0 * R("09")^0 -local unit = P("pt") + P("cm") + P("mm") + P("sp") + P("bp") + P("in") + - P("pc") + P("dd") + P("cc") + P("nd") + P("nc") - -local validdimen = amount * unit - -lpeg.patterns.validdimen = pattern - ---[[ldx-- -

This converter accepts calls like:

- - -string.todimen("10") -string.todimen(".10") -string.todimen("10.0") -string.todimen("10.0pt") -string.todimen("10pt") -string.todimen("10.0pt") - - -

And of course the often more efficient:

- - -somestring:todimen("12.3cm") - - -

With this in place, we can now implement a proper datatype for dimensions, one -that permits us to do this:

- - -s = dimen "10pt" + dimen "20pt" + dimen "200pt" - - dimen "100sp" / 10 + "20pt" + "0pt" - - -

We create a local metatable for this new type:

---ldx]]-- - -local dimensions = { } - ---[[ldx-- -

The main (and globally) visible representation of a dimen is defined next: it is -a one-element table. The unit that is returned from the match is normally a number -(one of the previously defined factors) but we also accept functions. Later we will -see why.

---ldx]]-- - -function dimen(a) - if a then - local ta= type(a) - if ta == "string" then - local value, unit = lpegmatch(pattern,a) - if type(unit) == "function" then - k = value/unit() - else - k = value/unit - end - a = k - elseif ta == "table" then - a = a[1] - end - return setmetatable({ a }, dimensions) - else - return setmetatable({ 0 }, dimensions) - end -end - ---[[ldx-- -

This function return a small hash with a metatable attached. It is -through this metatable that we can do the calculations. We could have -shared some of the code but for reasons of speed we don't.

---ldx]]-- - -function dimensions.__add(a, b) - local ta, tb = type(a), type(b) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end - return setmetatable({ a + b }, dimensions) -end - -function dimensions.__sub(a, b) - local ta, tb = type(a), type(b) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end - return setmetatable({ a - b }, dimensions) -end - -function dimensions.__mul(a, b) - local ta, tb = type(a), type(b) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end - return setmetatable({ a * b }, dimensions) -end - -function dimensions.__div(a, b) - local ta, tb = type(a), type(b) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end - return setmetatable({ a / b }, dimensions) -end - -function dimensions.__unm(a) - local ta = type(a) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - return setmetatable({ - a }, dimensions) -end - ---[[ldx-- -

It makes no sense to implement the power and modulo function but -the next two do make sense because they permits is code like:

- - -local a, b = dimen "10pt", dimen "11pt" -... -if a > b then - ... -end - ---ldx]]-- - --- makes no sense: dimensions.__pow and dimensions.__mod - -function dimensions.__lt(a, b) - return a[1] < b[1] -end - -function dimensions.__eq(a, b) - return a[1] == b[1] -end - ---[[ldx-- -

We also need to provide a function for conversion to string (so that -we can print dimensions). We print them as points, just like .

---ldx]]-- - -function dimensions.__tostring(a) - return a[1]/65536 .. "pt" -- instead of todimen(a[1]) -end - ---[[ldx-- -

Since it does not take much code, we also provide a way to access -a few accessors

- - -print(dimen().pt) -print(dimen().sp) - ---ldx]]-- - -function dimensions.__index(tab,key) - local d = dimenfactors[key] - if not d then - error("illegal property of dimen: " .. key) - d = 1 - end - return 1/d -end - ---[[ldx-- -

In the converter from string to dimension we support functions as -factors. This is because in we have a few more units: -ex and em. These are not constant factors but -depend on the current font. They are not defined by default, but need -an explicit function call. This is because at the moment that this code -is loaded, the relevant tables that hold the functions needed may not -yet be available.

---ldx]]-- - -function dimensions.texify() -- todo: % - local fti, fc = fonts and fonts.ids and fonts.ids, font and font.current - if fti and fc then - dimenfactors["ex"] = function() return fti[fc()].ex_height end - dimenfactors["em"] = function() return fti[fc()].quad end - else - dimenfactors["ex"] = 1/65536* 4 -- 4pt - dimenfactors["em"] = 1/65536*10 -- 10pt - end -end - ---[[ldx-- -

In order to set the defaults we call this function now. At some point -the macro package needs to make sure the function is called again.

---ldx]]-- - -dimensions.texify() - ---[[ldx-- -

The previous code is rather efficient (also thanks to ) but we -can speed it up by caching converted dimensions. On my machine (2008) the following -loop takes about 25.5 seconds.

- - -for i=1,1000000 do - local s = dimen "10pt" + dimen "20pt" + dimen "200pt" - - dimen "100sp" / 10 + "20pt" + "0pt" -end - - -

When we cache converted strings this becomes 16.3 seconds. In order not -to waste too much memory on it, we tag the values of the cache as being -week which mean that the garbage collector will collect them in a next -sweep. This means that in most cases the speed up is mostly affecting the -current couple of calculations and as such the speed penalty is small.

- -

We redefine two previous defined functions that can benefit from -this:

---ldx]]-- - -local known = { } setmetatable(known, { __mode = "v" }) - -function dimen(a) - if a then - local ta= type(a) - if ta == "string" then - local k = known[a] - if k then - a = k - else - local value, unit = lpegmatch(dimenpair,a) - if type(unit) == "function" then - k = value/unit() - else - k = value/unit - end - known[a] = k - a = k - end - elseif ta == "table" then - a = a[1] - end - return setmetatable({ a }, dimensions) - else - return setmetatable({ 0 }, dimensions) - end -end - -function string:todimen() - if type(self) == "number" then - return self - else - local k = known[self] - if not k then - local value, unit = lpegmatch(dimenpair,self) - if value and unit then - k = value/unit - else - k = 0 - end - -- print(self,value,unit) - known[self] = k - end - return k - end -end - -function number.toscaled(d) - return format("0.5f",d/2^16) -end - ---[[ldx-- -

In a similar fashion we can define a glue datatype. In that case we -probably use a hash instead of a one-element table.

---ldx]]-- - ---[[ldx-- -

Goodie:s

---ldx]]-- - -function number.percent(n) -- will be cleaned up once luatex 0.30 is out - local hsize = tex.hsize - if type(hsize) == "string" then - hsize = hsize:todimen() - end - return (n/100) * hsize -end - -number["%"] = number.percent diff --git a/lualibs-util-dim.lua b/lualibs-util-dim.lua new file mode 100644 index 0000000..da5ab14 --- /dev/null +++ b/lualibs-util-dim.lua @@ -0,0 +1,432 @@ +if not modules then modules = { } end modules ['l-dimen'] = { + version = 1.001, + comment = "support for dimensions", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +--[[ldx-- +

Internally work with scaled point, which are +represented by integers. However, in practice, at east at the + end we work with more generic units like points (pt). Going +from scaled points (numbers) to one of those units can be +done by using the conversion factors collected in the following +table.

+--ldx]]-- + +local format, match, gsub, type, setmetatable = string.format, string.match, string.gsub, type, setmetatable +local P, S, R, Cc, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.match + +number = number or { } + +number.tonumberf = function(n) return match(format("%.20f",n),"(.-0?)0*$") end -- one zero too much but alas +number.tonumberg = function(n) return format("%.20g",n) end + +local dimenfactors = { + ["pt"] = 1/65536, + ["in"] = ( 100/ 7227)/65536, + ["cm"] = ( 254/ 7227)/65536, + ["mm"] = ( 2540/ 7227)/65536, + ["sp"] = 1, -- 65536 sp in 1pt + ["bp"] = ( 7200/ 7227)/65536, + ["pc"] = ( 1/ 12)/65536, + ["dd"] = ( 1157/ 1238)/65536, + ["cc"] = ( 1157/14856)/65536, + ["nd"] = (20320/21681)/65536, + ["nc"] = ( 5080/65043)/65536 +} + +--~ print(table.serialize(dimenfactors)) +--~ +--~ %.99g: +--~ +--~ t={ +--~ ["bp"]=1.5201782378580324e-005, +--~ ["cc"]=1.1883696112892098e-006, +--~ ["cm"]=5.3628510057769479e-007, +--~ ["dd"]=1.4260435335470516e-005, +--~ ["em"]=0.000152587890625, +--~ ["ex"]=6.103515625e-005, +--~ ["in"]=2.1113586636917117e-007, +--~ ["mm"]=5.3628510057769473e-008, +--~ ["nc"]=1.1917446679504327e-006, +--~ ["nd"]=1.4300936015405194e-005, +--~ ["pc"]=1.2715657552083333e-006, +--~ ["pt"]=1.52587890625e-005, +--~ ["sp"]=1, +--~ } +--~ +--~ patched %s and tonumber +--~ +--~ t={ +--~ ["bp"]=0.00001520178238, +--~ ["cc"]=0.00000118836961, +--~ ["cm"]=0.0000005362851, +--~ ["dd"]=0.00001426043534, +--~ ["em"]=0.00015258789063, +--~ ["ex"]=0.00006103515625, +--~ ["in"]=0.00000021113587, +--~ ["mm"]=0.00000005362851, +--~ ["nc"]=0.00000119174467, +--~ ["nd"]=0.00001430093602, +--~ ["pc"]=0.00000127156576, +--~ ["pt"]=0.00001525878906, +--~ ["sp"]=1, +--~ } + +--[[ldx-- +

A conversion function that takes a number, unit (string) and optional +format (string) is implemented using this table.

+--ldx]]-- + +-- was: + +local function todimen(n,unit,fmt) + if type(n) == 'string' then + return n + else + unit = unit or 'pt' + return format(fmt or "%s%s",n*dimenfactors[unit],unit) + -- if fmt then + -- return format(fmt,n*dimenfactors[unit],unit) + -- else + -- return match(format("%.20f",n*dimenfactors[unit]),"(.-0?)0*$") .. unit + -- end + end +end + +--[[ldx-- +

We collect a bunch of converters in the number namespace.

+--ldx]]-- + +number.maxdimen = 1073741823 +number.todimen = todimen +number.dimenfactors = dimenfactors + +function number.topoints (n) return todimen(n,"pt") end +function number.toinches (n) return todimen(n,"in") end +function number.tocentimeters (n) return todimen(n,"cm") end +function number.tomillimeters (n) return todimen(n,"mm") end +function number.toscaledpoints(n) return todimen(n,"sp") end +function number.toscaledpoints(n) return n .. "sp" end +function number.tobasepoints (n) return todimen(n,"bp") end +function number.topicas (n) return todimen(n "pc") end +function number.todidots (n) return todimen(n,"dd") end +function number.tociceros (n) return todimen(n,"cc") end +function number.tonewdidots (n) return todimen(n,"nd") end +function number.tonewciceros (n) return todimen(n,"nc") end + +--[[ldx-- +

More interesting it to implement a (sort of) dimen datatype, one +that permits calculations too. First we define a function that +converts a string to scaledpoints. We use . We capture +a number and optionally a unit. When no unit is given a constant +capture takes place.

+--ldx]]-- + +local amount = (S("+-")^0 * R("09")^0 * P(".")^0 * R("09")^0) + Cc("0") +local unit = R("az")^1 + +local dimenpair = amount/tonumber * (unit^1/dimenfactors + Cc(1)) -- tonumber is new + +lpeg.patterns.dimenpair = dimenpair + +--[[ldx-- +

We use a metatable to intercept errors. When no key is found in +the table with factors, the metatable will be consulted for an +alternative index function.

+--ldx]]-- + +local mt = { } setmetatable(dimenfactors,mt) + +mt.__index = function(t,s) + -- error("wrong dimension: " .. (s or "?")) -- better a message + return false +end + +function string:todimen() + if type(self) == "number" then + return self + else + local value, unit = lpegmatch(dimenpair,self) + return value/unit + end +end + +local amount = S("+-")^0 * R("09")^0 * S(".,")^0 * R("09")^0 +local unit = P("pt") + P("cm") + P("mm") + P("sp") + P("bp") + P("in") + + P("pc") + P("dd") + P("cc") + P("nd") + P("nc") + +local validdimen = amount * unit + +lpeg.patterns.validdimen = pattern + +--[[ldx-- +

This converter accepts calls like:

+ + +string.todimen("10") +string.todimen(".10") +string.todimen("10.0") +string.todimen("10.0pt") +string.todimen("10pt") +string.todimen("10.0pt") + + +

And of course the often more efficient:

+ + +somestring:todimen("12.3cm") + + +

With this in place, we can now implement a proper datatype for dimensions, one +that permits us to do this:

+ + +s = dimen "10pt" + dimen "20pt" + dimen "200pt" + - dimen "100sp" / 10 + "20pt" + "0pt" + + +

We create a local metatable for this new type:

+--ldx]]-- + +local dimensions = { } + +--[[ldx-- +

The main (and globally) visible representation of a dimen is defined next: it is +a one-element table. The unit that is returned from the match is normally a number +(one of the previously defined factors) but we also accept functions. Later we will +see why.

+--ldx]]-- + +function dimen(a) + if a then + local ta= type(a) + if ta == "string" then + local value, unit = lpegmatch(pattern,a) + if type(unit) == "function" then + k = value/unit() + else + k = value/unit + end + a = k + elseif ta == "table" then + a = a[1] + end + return setmetatable({ a }, dimensions) + else + return setmetatable({ 0 }, dimensions) + end +end + +--[[ldx-- +

This function return a small hash with a metatable attached. It is +through this metatable that we can do the calculations. We could have +shared some of the code but for reasons of speed we don't.

+--ldx]]-- + +function dimensions.__add(a, b) + local ta, tb = type(a), type(b) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + return setmetatable({ a + b }, dimensions) +end + +function dimensions.__sub(a, b) + local ta, tb = type(a), type(b) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + return setmetatable({ a - b }, dimensions) +end + +function dimensions.__mul(a, b) + local ta, tb = type(a), type(b) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + return setmetatable({ a * b }, dimensions) +end + +function dimensions.__div(a, b) + local ta, tb = type(a), type(b) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + return setmetatable({ a / b }, dimensions) +end + +function dimensions.__unm(a) + local ta = type(a) + if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + return setmetatable({ - a }, dimensions) +end + +--[[ldx-- +

It makes no sense to implement the power and modulo function but +the next two do make sense because they permits is code like:

+ + +local a, b = dimen "10pt", dimen "11pt" +... +if a > b then + ... +end + +--ldx]]-- + +-- makes no sense: dimensions.__pow and dimensions.__mod + +function dimensions.__lt(a, b) + return a[1] < b[1] +end + +function dimensions.__eq(a, b) + return a[1] == b[1] +end + +--[[ldx-- +

We also need to provide a function for conversion to string (so that +we can print dimensions). We print them as points, just like .

+--ldx]]-- + +function dimensions.__tostring(a) + return a[1]/65536 .. "pt" -- instead of todimen(a[1]) +end + +--[[ldx-- +

Since it does not take much code, we also provide a way to access +a few accessors

+ + +print(dimen().pt) +print(dimen().sp) + +--ldx]]-- + +function dimensions.__index(tab,key) + local d = dimenfactors[key] + if not d then + error("illegal property of dimen: " .. key) + d = 1 + end + return 1/d +end + +--[[ldx-- +

In the converter from string to dimension we support functions as +factors. This is because in we have a few more units: +ex and em. These are not constant factors but +depend on the current font. They are not defined by default, but need +an explicit function call. This is because at the moment that this code +is loaded, the relevant tables that hold the functions needed may not +yet be available.

+--ldx]]-- + +function dimensions.texify() -- todo: % + local fti, fc = fonts and fonts.ids and fonts.ids, font and font.current + if fti and fc then + dimenfactors["ex"] = function() return fti[fc()].ex_height end + dimenfactors["em"] = function() return fti[fc()].quad end + else + dimenfactors["ex"] = 1/65536* 4 -- 4pt + dimenfactors["em"] = 1/65536*10 -- 10pt + end +end + +--[[ldx-- +

In order to set the defaults we call this function now. At some point +the macro package needs to make sure the function is called again.

+--ldx]]-- + +dimensions.texify() + +--[[ldx-- +

The previous code is rather efficient (also thanks to ) but we +can speed it up by caching converted dimensions. On my machine (2008) the following +loop takes about 25.5 seconds.

+ + +for i=1,1000000 do + local s = dimen "10pt" + dimen "20pt" + dimen "200pt" + - dimen "100sp" / 10 + "20pt" + "0pt" +end + + +

When we cache converted strings this becomes 16.3 seconds. In order not +to waste too much memory on it, we tag the values of the cache as being +week which mean that the garbage collector will collect them in a next +sweep. This means that in most cases the speed up is mostly affecting the +current couple of calculations and as such the speed penalty is small.

+ +

We redefine two previous defined functions that can benefit from +this:

+--ldx]]-- + +local known = { } setmetatable(known, { __mode = "v" }) + +function dimen(a) + if a then + local ta= type(a) + if ta == "string" then + local k = known[a] + if k then + a = k + else + local value, unit = lpegmatch(dimenpair,a) + if type(unit) == "function" then + k = value/unit() + else + k = value/unit + end + known[a] = k + a = k + end + elseif ta == "table" then + a = a[1] + end + return setmetatable({ a }, dimensions) + else + return setmetatable({ 0 }, dimensions) + end +end + +function string:todimen() + if type(self) == "number" then + return self + else + local k = known[self] + if not k then + local value, unit = lpegmatch(dimenpair,self) + if value and unit then + k = value/unit + else + k = 0 + end + -- print(self,value,unit) + known[self] = k + end + return k + end +end + +function number.toscaled(d) + return format("0.5f",d/2^16) +end + +--[[ldx-- +

In a similar fashion we can define a glue datatype. In that case we +probably use a hash instead of a one-element table.

+--ldx]]-- + +--[[ldx-- +

Goodie:s

+--ldx]]-- + +function number.percent(n) -- will be cleaned up once luatex 0.30 is out + local hsize = tex.hsize + if type(hsize) == "string" then + hsize = hsize:todimen() + end + return (n/100) * hsize +end + +number["%"] = number.percent diff --git a/lualibs.dtx b/lualibs.dtx index 799c3df..cfdd15d 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -193,10 +193,9 @@ require("lualibs-md5") require("lualibs-dir") require("lualibs-unicode") require("lualibs-utils") -require("lualibs-dimen") +require("lualibs-util-dim") require("lualibs-url") require("lualibs-set") -require("lualibs-dimen") % \end{macrocode} % % \iffalse -- cgit v1.2.3 From 26624ddc9f0ffa61e269658de95c91e9a77c08a4 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 21:49:46 +0200 Subject: update util-dim --- lualibs-util-dim.lua | 211 +++++++++++++++++++++++++++------------------------ 1 file changed, 113 insertions(+), 98 deletions(-) diff --git a/lualibs-util-dim.lua b/lualibs-util-dim.lua index da5ab14..d77bd49 100644 --- a/lualibs-util-dim.lua +++ b/lualibs-util-dim.lua @@ -16,14 +16,20 @@ table.

--ldx]]-- local format, match, gsub, type, setmetatable = string.format, string.match, string.gsub, type, setmetatable -local P, S, R, Cc, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.match +local P, S, R, Cc, C, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.C, lpeg.match + +local allocate = utilities.storage.allocate +local setmetatableindex = table.setmetatableindex + +--this might become another namespace number = number or { } +local number = number number.tonumberf = function(n) return match(format("%.20f",n),"(.-0?)0*$") end -- one zero too much but alas number.tonumberg = function(n) return format("%.20g",n) end -local dimenfactors = { +local dimenfactors = allocate { ["pt"] = 1/65536, ["in"] = ( 100/ 7227)/65536, ["cm"] = ( 254/ 7227)/65536, @@ -80,14 +86,17 @@ local dimenfactors = { format (string) is implemented using this table.

--ldx]]-- --- was: - -local function todimen(n,unit,fmt) +local function numbertodimen(n,unit,fmt) if type(n) == 'string' then return n else unit = unit or 'pt' - return format(fmt or "%s%s",n*dimenfactors[unit],unit) + if not fmt then + fmt = "%s%s" + elseif fmt == true then + fmt = "%0.5f%s" + end + return format(fmt,n*dimenfactors[unit],unit) -- if fmt then -- return format(fmt,n*dimenfactors[unit],unit) -- else @@ -101,21 +110,21 @@ end --ldx]]-- number.maxdimen = 1073741823 -number.todimen = todimen +number.todimen = numbertodimen number.dimenfactors = dimenfactors -function number.topoints (n) return todimen(n,"pt") end -function number.toinches (n) return todimen(n,"in") end -function number.tocentimeters (n) return todimen(n,"cm") end -function number.tomillimeters (n) return todimen(n,"mm") end -function number.toscaledpoints(n) return todimen(n,"sp") end -function number.toscaledpoints(n) return n .. "sp" end -function number.tobasepoints (n) return todimen(n,"bp") end -function number.topicas (n) return todimen(n "pc") end -function number.todidots (n) return todimen(n,"dd") end -function number.tociceros (n) return todimen(n,"cc") end -function number.tonewdidots (n) return todimen(n,"nd") end -function number.tonewciceros (n) return todimen(n,"nc") end +function number.topoints (n,fmt) return numbertodimen(n,"pt",fmt) end +function number.toinches (n,fmt) return numbertodimen(n,"in",fmt) end +function number.tocentimeters (n,fmt) return numbertodimen(n,"cm",fmt) end +function number.tomillimeters (n,fmt) return numbertodimen(n,"mm",fmt) end +function number.toscaledpoints(n,fmt) return numbertodimen(n,"sp",fmt) end +function number.toscaledpoints(n) return n .. "sp" end +function number.tobasepoints (n,fmt) return numbertodimen(n,"bp",fmt) end +function number.topicas (n,fmt) return numbertodimen(n "pc",fmt) end +function number.todidots (n,fmt) return numbertodimen(n,"dd",fmt) end +function number.tociceros (n,fmt) return numbertodimen(n,"cc",fmt) end +function number.tonewdidots (n,fmt) return numbertodimen(n,"nd",fmt) end +function number.tonewciceros (n,fmt) return numbertodimen(n,"nc",fmt) end --[[ldx--

More interesting it to implement a (sort of) dimen datatype, one @@ -132,27 +141,40 @@ local dimenpair = amount/tonumber * (unit^1/dimenfactors + Cc(1)) -- tonumber is lpeg.patterns.dimenpair = dimenpair +local splitter = amount/tonumber * C(unit^1) + +function number.splitdimen(str) + return lpegmatch(splitter,str) +end + --[[ldx--

We use a metatable to intercept errors. When no key is found in the table with factors, the metatable will be consulted for an alternative index function.

--ldx]]-- -local mt = { } setmetatable(dimenfactors,mt) - -mt.__index = function(t,s) +setmetatableindex(dimenfactors, function(t,s) -- error("wrong dimension: " .. (s or "?")) -- better a message return false -end +end) -function string:todimen() - if type(self) == "number" then - return self - else - local value, unit = lpegmatch(dimenpair,self) - return value/unit - end -end +--[[ldx-- +

We redefine the following function later on, so we comment it +here (which saves us bytecodes.

+--ldx]]-- + +-- function string.todimen(str) +-- if type(str) == "number" then +-- return str +-- else +-- local value, unit = lpegmatch(dimenpair,str) +-- return value/unit +-- end +-- end +-- +-- local stringtodimen = string.todimen + +local stringtodimen -- assigned later (commenting saves bytecode) local amount = S("+-")^0 * R("09")^0 * S(".,")^0 * R("09")^0 local unit = P("pt") + P("cm") + P("mm") + P("sp") + P("bp") + P("in") + @@ -160,7 +182,7 @@ local unit = P("pt") + P("cm") + P("mm") + P("sp") + P("bp") + P("in") + local validdimen = amount * unit -lpeg.patterns.validdimen = pattern +lpeg.patterns.validdimen = validdimen --[[ldx--

This converter accepts calls like:

@@ -174,12 +196,6 @@ string.todimen("10pt") string.todimen("10.0pt") -

And of course the often more efficient:

- - -somestring:todimen("12.3cm") - -

With this in place, we can now implement a proper datatype for dimensions, one that permits us to do this:

@@ -197,28 +213,28 @@ local dimensions = { }

The main (and globally) visible representation of a dimen is defined next: it is a one-element table. The unit that is returned from the match is normally a number (one of the previously defined factors) but we also accept functions. Later we will -see why.

+see why. This function is redefined later.

--ldx]]-- -function dimen(a) - if a then - local ta= type(a) - if ta == "string" then - local value, unit = lpegmatch(pattern,a) - if type(unit) == "function" then - k = value/unit() - else - k = value/unit - end - a = k - elseif ta == "table" then - a = a[1] - end - return setmetatable({ a }, dimensions) - else - return setmetatable({ 0 }, dimensions) - end -end +-- function dimen(a) +-- if a then +-- local ta= type(a) +-- if ta == "string" then +-- local value, unit = lpegmatch(pattern,a) +-- if type(unit) == "function" then +-- k = value/unit() +-- else +-- k = value/unit +-- end +-- a = k +-- elseif ta == "table" then +-- a = a[1] +-- end +-- return setmetatable({ a }, dimensions) +-- else +-- return setmetatable({ 0 }, dimensions) +-- end +-- end --[[ldx--

This function return a small hash with a metatable attached. It is @@ -228,35 +244,35 @@ shared some of the code but for reasons of speed we don't.

function dimensions.__add(a, b) local ta, tb = type(a), type(b) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end + if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end return setmetatable({ a + b }, dimensions) end function dimensions.__sub(a, b) local ta, tb = type(a), type(b) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end + if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end return setmetatable({ a - b }, dimensions) end function dimensions.__mul(a, b) local ta, tb = type(a), type(b) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end + if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end return setmetatable({ a * b }, dimensions) end function dimensions.__div(a, b) local ta, tb = type(a), type(b) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end - if tb == "string" then b = b:todimen() elseif tb == "table" then b = b[1] end + if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end + if tb == "string" then b = stringtodimen(b) elseif tb == "table" then b = b[1] end return setmetatable({ a / b }, dimensions) end function dimensions.__unm(a) local ta = type(a) - if ta == "string" then a = a:todimen() elseif ta == "table" then a = a[1] end + if ta == "string" then a = stringtodimen(a) elseif ta == "table" then a = a[1] end return setmetatable({ - a }, dimensions) end @@ -321,23 +337,9 @@ is loaded, the relevant tables that hold the functions needed may not yet be available.

--ldx]]-- -function dimensions.texify() -- todo: % - local fti, fc = fonts and fonts.ids and fonts.ids, font and font.current - if fti and fc then - dimenfactors["ex"] = function() return fti[fc()].ex_height end - dimenfactors["em"] = function() return fti[fc()].quad end - else - dimenfactors["ex"] = 1/65536* 4 -- 4pt - dimenfactors["em"] = 1/65536*10 -- 10pt - end -end - ---[[ldx-- -

In order to set the defaults we call this function now. At some point -the macro package needs to make sure the function is called again.

---ldx]]-- - -dimensions.texify() + dimenfactors["ex"] = 4 * 1/65536 -- 4pt + dimenfactors["em"] = 10 * 1/65536 -- 10pt +-- dimenfactors["%"] = 4 * 1/65536 -- 400pt/100 --[[ldx--

The previous code is rather efficient (also thanks to ) but we @@ -389,27 +391,40 @@ function dimen(a) end end -function string:todimen() - if type(self) == "number" then - return self +function string.todimen(str) -- maybe use tex.sp when available + if type(str) == "number" then + return str else - local k = known[self] + local k = known[str] if not k then - local value, unit = lpegmatch(dimenpair,self) + local value, unit = lpegmatch(dimenpair,str) if value and unit then - k = value/unit + k = value/unit -- to be considered: round else k = 0 end - -- print(self,value,unit) - known[self] = k + -- print(str,value,unit) + known[str] = k end return k end end +--~ local known = { } + +--~ function string.todimen(str) -- maybe use tex.sp +--~ local k = known[str] +--~ if not k then +--~ k = tex.sp(str) +--~ known[str] = k +--~ end +--~ return k +--~ end + +stringtodimen = string.todimen -- local variable defined earlier + function number.toscaled(d) - return format("0.5f",d/2^16) + return format("%0.5f",d/2^16) end --[[ldx-- @@ -421,12 +436,12 @@ probably use a hash instead of a one-element table.

Goodie:s

--ldx]]-- -function number.percent(n) -- will be cleaned up once luatex 0.30 is out - local hsize = tex.hsize - if type(hsize) == "string" then - hsize = hsize:todimen() +function number.percent(n,d) -- will be cleaned up once luatex 0.30 is out + d = d or tex.hsize + if type(d) == "string" then + d = stringtodimen(d) end - return (n/100) * hsize + return (n/100) * d end number["%"] = number.percent -- cgit v1.2.3 From 59cf0ae4a208ec4acd1d63229432f5eaaba00967 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 21:55:19 +0200 Subject: reflect move l-utils -> util-mrg --- README | 2 +- lualibs-util-mrg.lua | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++ lualibs-utils.lua | 176 --------------------------------------------------- lualibs.dtx | 2 +- 4 files changed, 178 insertions(+), 178 deletions(-) create mode 100644 lualibs-util-mrg.lua delete mode 100644 lualibs-utils.lua diff --git a/README b/README index b39b36d..51f5794 100644 --- a/README +++ b/README @@ -47,7 +47,7 @@ Source files: lualibs-table.lua lualibs-unicode.lua lualibs-url.lua - lualibs-utils.lua + lualibs-util-mrg.lua lualibs-util-dim.lua README Makefile diff --git a/lualibs-util-mrg.lua b/lualibs-util-mrg.lua new file mode 100644 index 0000000..ebc27b8 --- /dev/null +++ b/lualibs-util-mrg.lua @@ -0,0 +1,176 @@ +if not modules then modules = { } end modules ['l-utils'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- hm, quite unreadable + +local gsub = string.gsub +local concat = table.concat +local type, next = type, next + +if not utils then utils = { } end +if not utils.merger then utils.merger = { } end +if not utils.lua then utils.lua = { } end + +utils.merger.m_begin = "begin library merge" +utils.merger.m_end = "end library merge" +utils.merger.pattern = + "%c+" .. + "%-%-%s+" .. utils.merger.m_begin .. + "%c+(.-)%c+" .. + "%-%-%s+" .. utils.merger.m_end .. + "%c+" + +function utils.merger._self_fake_() + return + "-- " .. "created merged file" .. "\n\n" .. + "-- " .. utils.merger.m_begin .. "\n\n" .. + "-- " .. utils.merger.m_end .. "\n\n" +end + +function utils.report(...) + print(...) +end + +utils.merger.strip_comment = true + +function utils.merger._self_load_(name) + local f, data = io.open(name), "" + if f then + utils.report("reading merge from %s",name) + data = f:read("*all") + f:close() + else + utils.report("unknown file to merge %s",name) + end + if data and utils.merger.strip_comment then + -- saves some 20K + data = gsub(data,"%-%-~[^\n\r]*[\r\n]", "") + end + return data or "" +end + +function utils.merger._self_save_(name, data) + if data ~= "" then + local f = io.open(name,'w') + if f then + utils.report("saving merge from %s",name) + f:write(data) + f:close() + end + end +end + +function utils.merger._self_swap_(data,code) + if data ~= "" then + return (gsub(data,utils.merger.pattern, function(s) + return "\n\n" .. "-- "..utils.merger.m_begin .. "\n" .. code .. "\n" .. "-- "..utils.merger.m_end .. "\n\n" + end, 1)) + else + return "" + end +end + +--~ stripper: +--~ +--~ data = gsub(data,"%-%-~[^\n]*\n","") +--~ data = gsub(data,"\n\n+","\n") + +function utils.merger._self_libs_(libs,list) + local result, f, frozen = { }, nil, false + result[#result+1] = "\n" + if type(libs) == 'string' then libs = { libs } end + if type(list) == 'string' then list = { list } end + local foundpath = nil + for i=1,#libs do + local lib = libs[i] + for j=1,#list do + local pth = gsub(list[j],"\\","/") -- file.clean_path + utils.report("checking library path %s",pth) + local name = pth .. "/" .. lib + if lfs.isfile(name) then + foundpath = pth + end + end + if foundpath then break end + end + if foundpath then + utils.report("using library path %s",foundpath) + local right, wrong = { }, { } + for i=1,#libs do + local lib = libs[i] + local fullname = foundpath .. "/" .. lib + if lfs.isfile(fullname) then + -- right[#right+1] = lib + utils.report("merging library %s",fullname) + result[#result+1] = "do -- create closure to overcome 200 locals limit" + result[#result+1] = io.loaddata(fullname,true) + result[#result+1] = "end -- of closure" + else + -- wrong[#wrong+1] = lib + utils.report("no library %s",fullname) + end + end + if #right > 0 then + utils.report("merged libraries: %s",concat(right," ")) + end + if #wrong > 0 then + utils.report("skipped libraries: %s",concat(wrong," ")) + end + else + utils.report("no valid library path found") + end + return concat(result, "\n\n") +end + +function utils.merger.selfcreate(libs,list,target) + if target then + utils.merger._self_save_( + target, + utils.merger._self_swap_( + utils.merger._self_fake_(), + utils.merger._self_libs_(libs,list) + ) + ) + end +end + +function utils.merger.selfmerge(name,libs,list,target) + utils.merger._self_save_( + target or name, + utils.merger._self_swap_( + utils.merger._self_load_(name), + utils.merger._self_libs_(libs,list) + ) + ) +end + +function utils.merger.selfclean(name) + utils.merger._self_save_( + name, + utils.merger._self_swap_( + utils.merger._self_load_(name), + "" + ) + ) +end + +function utils.lua.compile(luafile, lucfile, cleanup, strip) -- defaults: cleanup=false strip=true + -- utils.report("compiling",luafile,"into",lucfile) + os.remove(lucfile) + local command = "-o " .. string.quote(lucfile) .. " " .. string.quote(luafile) + if strip ~= false then + command = "-s " .. command + end + local done = (os.spawn("texluac " .. command) == 0) or (os.spawn("luac " .. command) == 0) + if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then + -- utils.report("removing",luafile) + os.remove(luafile) + end + return done +end + diff --git a/lualibs-utils.lua b/lualibs-utils.lua deleted file mode 100644 index ebc27b8..0000000 --- a/lualibs-utils.lua +++ /dev/null @@ -1,176 +0,0 @@ -if not modules then modules = { } end modules ['l-utils'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - --- hm, quite unreadable - -local gsub = string.gsub -local concat = table.concat -local type, next = type, next - -if not utils then utils = { } end -if not utils.merger then utils.merger = { } end -if not utils.lua then utils.lua = { } end - -utils.merger.m_begin = "begin library merge" -utils.merger.m_end = "end library merge" -utils.merger.pattern = - "%c+" .. - "%-%-%s+" .. utils.merger.m_begin .. - "%c+(.-)%c+" .. - "%-%-%s+" .. utils.merger.m_end .. - "%c+" - -function utils.merger._self_fake_() - return - "-- " .. "created merged file" .. "\n\n" .. - "-- " .. utils.merger.m_begin .. "\n\n" .. - "-- " .. utils.merger.m_end .. "\n\n" -end - -function utils.report(...) - print(...) -end - -utils.merger.strip_comment = true - -function utils.merger._self_load_(name) - local f, data = io.open(name), "" - if f then - utils.report("reading merge from %s",name) - data = f:read("*all") - f:close() - else - utils.report("unknown file to merge %s",name) - end - if data and utils.merger.strip_comment then - -- saves some 20K - data = gsub(data,"%-%-~[^\n\r]*[\r\n]", "") - end - return data or "" -end - -function utils.merger._self_save_(name, data) - if data ~= "" then - local f = io.open(name,'w') - if f then - utils.report("saving merge from %s",name) - f:write(data) - f:close() - end - end -end - -function utils.merger._self_swap_(data,code) - if data ~= "" then - return (gsub(data,utils.merger.pattern, function(s) - return "\n\n" .. "-- "..utils.merger.m_begin .. "\n" .. code .. "\n" .. "-- "..utils.merger.m_end .. "\n\n" - end, 1)) - else - return "" - end -end - ---~ stripper: ---~ ---~ data = gsub(data,"%-%-~[^\n]*\n","") ---~ data = gsub(data,"\n\n+","\n") - -function utils.merger._self_libs_(libs,list) - local result, f, frozen = { }, nil, false - result[#result+1] = "\n" - if type(libs) == 'string' then libs = { libs } end - if type(list) == 'string' then list = { list } end - local foundpath = nil - for i=1,#libs do - local lib = libs[i] - for j=1,#list do - local pth = gsub(list[j],"\\","/") -- file.clean_path - utils.report("checking library path %s",pth) - local name = pth .. "/" .. lib - if lfs.isfile(name) then - foundpath = pth - end - end - if foundpath then break end - end - if foundpath then - utils.report("using library path %s",foundpath) - local right, wrong = { }, { } - for i=1,#libs do - local lib = libs[i] - local fullname = foundpath .. "/" .. lib - if lfs.isfile(fullname) then - -- right[#right+1] = lib - utils.report("merging library %s",fullname) - result[#result+1] = "do -- create closure to overcome 200 locals limit" - result[#result+1] = io.loaddata(fullname,true) - result[#result+1] = "end -- of closure" - else - -- wrong[#wrong+1] = lib - utils.report("no library %s",fullname) - end - end - if #right > 0 then - utils.report("merged libraries: %s",concat(right," ")) - end - if #wrong > 0 then - utils.report("skipped libraries: %s",concat(wrong," ")) - end - else - utils.report("no valid library path found") - end - return concat(result, "\n\n") -end - -function utils.merger.selfcreate(libs,list,target) - if target then - utils.merger._self_save_( - target, - utils.merger._self_swap_( - utils.merger._self_fake_(), - utils.merger._self_libs_(libs,list) - ) - ) - end -end - -function utils.merger.selfmerge(name,libs,list,target) - utils.merger._self_save_( - target or name, - utils.merger._self_swap_( - utils.merger._self_load_(name), - utils.merger._self_libs_(libs,list) - ) - ) -end - -function utils.merger.selfclean(name) - utils.merger._self_save_( - name, - utils.merger._self_swap_( - utils.merger._self_load_(name), - "" - ) - ) -end - -function utils.lua.compile(luafile, lucfile, cleanup, strip) -- defaults: cleanup=false strip=true - -- utils.report("compiling",luafile,"into",lucfile) - os.remove(lucfile) - local command = "-o " .. string.quote(lucfile) .. " " .. string.quote(luafile) - if strip ~= false then - command = "-s " .. command - end - local done = (os.spawn("texluac " .. command) == 0) or (os.spawn("luac " .. command) == 0) - if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then - -- utils.report("removing",luafile) - os.remove(luafile) - end - return done -end - diff --git a/lualibs.dtx b/lualibs.dtx index cfdd15d..1fdd624 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -192,7 +192,7 @@ require("lualibs-file") require("lualibs-md5") require("lualibs-dir") require("lualibs-unicode") -require("lualibs-utils") +require("lualibs-util-mrg") require("lualibs-util-dim") require("lualibs-url") require("lualibs-set") -- cgit v1.2.3 From 672629eb217dd4b7e56b63c11fdceb983dd0eacd Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 21:59:40 +0200 Subject: update util-mrg --- lualibs-util-mrg.lua | 167 ++++++++++++++++++++------------------------------- 1 file changed, 65 insertions(+), 102 deletions(-) diff --git a/lualibs-util-mrg.lua b/lualibs-util-mrg.lua index ebc27b8..d59745b 100644 --- a/lualibs-util-mrg.lua +++ b/lualibs-util-mrg.lua @@ -8,89 +8,86 @@ if not modules then modules = { } end modules ['l-utils'] = { -- hm, quite unreadable -local gsub = string.gsub +local gsub, format = string.gsub, string.format local concat = table.concat local type, next = type, next -if not utils then utils = { } end -if not utils.merger then utils.merger = { } end -if not utils.lua then utils.lua = { } end +utilities = utilities or {} +local merger = utilities.merger or { } +utilities.merger = merger +utilities.report = logs and logs.reporter("system") or print -utils.merger.m_begin = "begin library merge" -utils.merger.m_end = "end library merge" -utils.merger.pattern = +merger.strip_comment = true + +local m_begin_merge = "begin library merge" +local m_end_merge = "end library merge" +local m_begin_closure = "do -- create closure to overcome 200 locals limit" +local m_end_closure = "end -- of closure" + +local m_pattern = "%c+" .. - "%-%-%s+" .. utils.merger.m_begin .. + "%-%-%s+" .. m_begin_merge .. "%c+(.-)%c+" .. - "%-%-%s+" .. utils.merger.m_end .. + "%-%-%s+" .. m_end_merge .. "%c+" -function utils.merger._self_fake_() - return - "-- " .. "created merged file" .. "\n\n" .. - "-- " .. utils.merger.m_begin .. "\n\n" .. - "-- " .. utils.merger.m_end .. "\n\n" -end +local m_format = + "\n\n-- " .. m_begin_merge .. + "\n%s\n" .. + "-- " .. m_end_merge .. "\n\n" + +local m_faked = + "-- " .. "created merged file" .. "\n\n" .. + "-- " .. m_begin_merge .. "\n\n" .. + "-- " .. m_end_merge .. "\n\n" -function utils.report(...) - print(...) +local function self_fake() + return m_faked end -utils.merger.strip_comment = true +local function self_nothing() + return "" +end -function utils.merger._self_load_(name) - local f, data = io.open(name), "" - if f then - utils.report("reading merge from %s",name) - data = f:read("*all") - f:close() +local function self_load(name) + local data = io.loaddata(name) or "" + if data == "" then + utilities.report("merge: unknown file %s",name) else - utils.report("unknown file to merge %s",name) - end - if data and utils.merger.strip_comment then - -- saves some 20K - data = gsub(data,"%-%-~[^\n\r]*[\r\n]", "") + utilities.report("merge: inserting %s",name) end return data or "" end -function utils.merger._self_save_(name, data) +local function self_save(name, data) if data ~= "" then - local f = io.open(name,'w') - if f then - utils.report("saving merge from %s",name) - f:write(data) - f:close() + if merger.strip_comment then + local n = #data + -- saves some 20K .. scite comments + data = gsub(data,"%-%-~[^\n\r]*[\r\n]","") + -- saves some 20K .. ldx comments + data = gsub(data,"%-%-%[%[ldx%-%-.-%-%-ldx%]%]%-%-","") + utilities.report("merge: %s bytes of comment stripped, %s bytes of code left",n-#data,#data) end + io.savedata(name,data) + utilities.report("merge: saving %s",name) end end -function utils.merger._self_swap_(data,code) - if data ~= "" then - return (gsub(data,utils.merger.pattern, function(s) - return "\n\n" .. "-- "..utils.merger.m_begin .. "\n" .. code .. "\n" .. "-- "..utils.merger.m_end .. "\n\n" - end, 1)) - else - return "" - end +local function self_swap(data,code) + return data ~= "" and (gsub(data,m_pattern, function() return format(m_format,code) end, 1)) or "" end ---~ stripper: ---~ ---~ data = gsub(data,"%-%-~[^\n]*\n","") ---~ data = gsub(data,"\n\n+","\n") - -function utils.merger._self_libs_(libs,list) - local result, f, frozen = { }, nil, false +local function self_libs(libs,list) + local result, f, frozen, foundpath = { }, nil, false, nil result[#result+1] = "\n" if type(libs) == 'string' then libs = { libs } end if type(list) == 'string' then list = { list } end - local foundpath = nil for i=1,#libs do local lib = libs[i] for j=1,#list do local pth = gsub(list[j],"\\","/") -- file.clean_path - utils.report("checking library path %s",pth) + utilities.report("merge: checking library path %s",pth) local name = pth .. "/" .. lib if lfs.isfile(name) then foundpath = pth @@ -99,78 +96,44 @@ function utils.merger._self_libs_(libs,list) if foundpath then break end end if foundpath then - utils.report("using library path %s",foundpath) + utilities.report("merge: using library path %s",foundpath) local right, wrong = { }, { } for i=1,#libs do local lib = libs[i] local fullname = foundpath .. "/" .. lib if lfs.isfile(fullname) then - -- right[#right+1] = lib - utils.report("merging library %s",fullname) - result[#result+1] = "do -- create closure to overcome 200 locals limit" + utilities.report("merge: using library %s",fullname) + right[#right+1] = lib + result[#result+1] = m_begin_closure result[#result+1] = io.loaddata(fullname,true) - result[#result+1] = "end -- of closure" + result[#result+1] = m_end_closure else - -- wrong[#wrong+1] = lib - utils.report("no library %s",fullname) + utilities.report("merge: skipping library %s",fullname) + wrong[#wrong+1] = lib end end if #right > 0 then - utils.report("merged libraries: %s",concat(right," ")) + utilities.report("merge: used libraries: %s",concat(right," ")) end if #wrong > 0 then - utils.report("skipped libraries: %s",concat(wrong," ")) + utilities.report("merge: skipped libraries: %s",concat(wrong," ")) end else - utils.report("no valid library path found") + utilities.report("merge: no valid library path found") end return concat(result, "\n\n") end -function utils.merger.selfcreate(libs,list,target) +function merger.selfcreate(libs,list,target) if target then - utils.merger._self_save_( - target, - utils.merger._self_swap_( - utils.merger._self_fake_(), - utils.merger._self_libs_(libs,list) - ) - ) + self_save(target,self_swap(self_fake(),self_libs(libs,list))) end end -function utils.merger.selfmerge(name,libs,list,target) - utils.merger._self_save_( - target or name, - utils.merger._self_swap_( - utils.merger._self_load_(name), - utils.merger._self_libs_(libs,list) - ) - ) +function merger.selfmerge(name,libs,list,target) + self_save(target or name,self_swap(self_load(name),self_libs(libs,list))) end -function utils.merger.selfclean(name) - utils.merger._self_save_( - name, - utils.merger._self_swap_( - utils.merger._self_load_(name), - "" - ) - ) +function merger.selfclean(name) + self_save(name,self_swap(self_load(name),self_nothing())) end - -function utils.lua.compile(luafile, lucfile, cleanup, strip) -- defaults: cleanup=false strip=true - -- utils.report("compiling",luafile,"into",lucfile) - os.remove(lucfile) - local command = "-o " .. string.quote(lucfile) .. " " .. string.quote(luafile) - if strip ~= false then - command = "-s " .. command - end - local done = (os.spawn("texluac " .. command) == 0) or (os.spawn("luac " .. command) == 0) - if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then - -- utils.report("removing",luafile) - os.remove(luafile) - end - return done -end - -- cgit v1.2.3 From 013563d52aee465e2435a288e750276bdc236fbd Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 22:07:39 +0200 Subject: reflect move l-aux -> util-tab.lua --- README | 4 +- lualibs-aux.lua | 257 --------------------------------------------------- lualibs-util-tab.lua | 257 +++++++++++++++++++++++++++++++++++++++++++++++++++ lualibs.dtx | 6 +- 4 files changed, 262 insertions(+), 262 deletions(-) delete mode 100644 lualibs-aux.lua create mode 100644 lualibs-util-tab.lua diff --git a/README b/README index 51f5794..5cbdd8b 100644 --- a/README +++ b/README @@ -32,7 +32,6 @@ Manifest Source files: lualibs.dtx - lualibs-aux.lua lualibs-boolean.lua lualibs-dir.lua lualibs-file.lua @@ -47,8 +46,9 @@ Source files: lualibs-table.lua lualibs-unicode.lua lualibs-url.lua - lualibs-util-mrg.lua lualibs-util-dim.lua + lualibs-util-mrg.lua + lualibs-util-tab.lua README Makefile NEWS diff --git a/lualibs-aux.lua b/lualibs-aux.lua deleted file mode 100644 index 7950a03..0000000 --- a/lualibs-aux.lua +++ /dev/null @@ -1,257 +0,0 @@ -if not modules then modules = { } end modules ['l-aux'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - --- for inline, no store split : for s in string.gmatch(str,",* *([^,]+)") do .. end - -aux = aux or { } - -local concat, format, gmatch = table.concat, string.format, string.gmatch -local tostring, type = tostring, type -local lpegmatch = lpeg.match - -local P, R, V = lpeg.P, lpeg.R, lpeg.V - -local escape, left, right = P("\\"), P('{'), P('}') - -lpeg.patterns.balanced = P { - [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, - [2] = left * V(1) * right -} - -local space = lpeg.P(' ') -local equal = lpeg.P("=") -local comma = lpeg.P(",") -local lbrace = lpeg.P("{") -local rbrace = lpeg.P("}") -local nobrace = 1 - (lbrace+rbrace) -local nested = lpeg.P { lbrace * (nobrace + lpeg.V(1))^0 * rbrace } -local spaces = space^0 - -local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) - -local key = lpeg.C((1-equal-comma)^1) -local pattern_a = (space+comma)^0 * (key * equal * value + key * lpeg.C("")) -local pattern_c = (space+comma)^0 * (key * equal * value) - -local key = lpeg.C((1-space-equal-comma)^1) -local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + lpeg.C(""))) - --- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored - -local hash = { } - -local function set(key,value) -- using Carg is slower here - hash[key] = value -end - -local pattern_a_s = (pattern_a/set)^1 -local pattern_b_s = (pattern_b/set)^1 -local pattern_c_s = (pattern_c/set)^1 - -aux.settings_to_hash_pattern_a = pattern_a_s -aux.settings_to_hash_pattern_b = pattern_b_s -aux.settings_to_hash_pattern_c = pattern_c_s - -function aux.make_settings_to_hash_pattern(set,how) - if how == "strict" then - return (pattern_c/set)^1 - elseif how == "tolerant" then - return (pattern_b/set)^1 - else - return (pattern_a/set)^1 - end -end - -function aux.settings_to_hash(str,existing) - if str and str ~= "" then - hash = existing or { } - if moretolerant then - lpegmatch(pattern_b_s,str) - else - lpegmatch(pattern_a_s,str) - end - return hash - else - return { } - end -end - -function aux.settings_to_hash_tolerant(str,existing) - if str and str ~= "" then - hash = existing or { } - lpegmatch(pattern_b_s,str) - return hash - else - return { } - end -end - -function aux.settings_to_hash_strict(str,existing) - if str and str ~= "" then - hash = existing or { } - lpegmatch(pattern_c_s,str) - return next(hash) and hash - else - return nil - end -end - -local separator = comma * space^0 -local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) -local pattern = lpeg.Ct(value*(separator*value)^0) - --- "aap, {noot}, mies" : outer {} removes, leading spaces ignored - -aux.settings_to_array_pattern = pattern - --- we could use a weak table as cache - -function aux.settings_to_array(str) - if not str or str == "" then - return { } - else - return lpegmatch(pattern,str) - end -end - -local function set(t,v) - t[#t+1] = v -end - -local value = lpeg.P(lpeg.Carg(1)*value) / set -local pattern = value*(separator*value)^0 * lpeg.Carg(1) - -function aux.add_settings_to_array(t,str) - return lpegmatch(pattern,str,nil,t) -end - -function aux.hash_to_string(h,separator,yes,no,strict,omit) - if h then - local t, s = { }, table.sortedkeys(h) - omit = omit and table.tohash(omit) - for i=1,#s do - local key = s[i] - if not omit or not omit[key] then - local value = h[key] - if type(value) == "boolean" then - if yes and no then - if value then - t[#t+1] = key .. '=' .. yes - elseif not strict then - t[#t+1] = key .. '=' .. no - end - elseif value or not strict then - t[#t+1] = key .. '=' .. tostring(value) - end - else - t[#t+1] = key .. '=' .. value - end - end - end - return concat(t,separator or ",") - else - return "" - end -end - -function aux.array_to_string(a,separator) - if a then - return concat(a,separator or ",") - else - return "" - end -end - -function aux.settings_to_set(str,t) - t = t or { } - for s in gmatch(str,"%s*([^,]+)") do - t[s] = true - end - return t -end - -local value = lbrace * lpeg.C((nobrace + nested)^0) * rbrace -local pattern = lpeg.Ct((space + value)^0) - -function aux.arguments_to_table(str) - return lpegmatch(pattern,str) -end - --- temporary here - -function aux.getparameters(self,class,parentclass,settings) - local sc = self[class] - if not sc then - sc = table.clone(self[parent]) - self[class] = sc - end - aux.settings_to_hash(settings,sc) -end - --- temporary here - -local digit = lpeg.R("09") -local period = lpeg.P(".") -local zero = lpeg.P("0") -local trailingzeros = zero^0 * -digit -- suggested by Roberto R -local case_1 = period * trailingzeros / "" -local case_2 = period * (digit - trailingzeros)^1 * (trailingzeros / "") -local number = digit^1 * (case_1 + case_2) -local stripper = lpeg.Cs((number + 1)^0) - ---~ local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100" ---~ collectgarbage("collect") ---~ str = string.rep(sample,10000) ---~ local ts = os.clock() ---~ lpegmatch(stripper,str) ---~ print(#str, os.clock()-ts, lpegmatch(stripper,sample)) - -lpeg.patterns.strip_zeros = stripper - -function aux.strip_zeros(str) - return lpegmatch(stripper,str) -end - -function aux.definetable(target) -- defines undefined tables - local composed, t = nil, { } - for name in gmatch(target,"([^%.]+)") do - if composed then - composed = composed .. "." .. name - else - composed = name - end - t[#t+1] = format("%s = %s or { }",composed,composed) - end - return concat(t,"\n") -end - -function aux.accesstable(target) - local t = _G - for name in gmatch(target,"([^%.]+)") do - t = t[name] - end - return t -end - --- as we use this a lot ... - ---~ function aux.cachefunction(action,weak) ---~ local cache = { } ---~ if weak then ---~ setmetatable(cache, { __mode = "kv" } ) ---~ end ---~ local function reminder(str) ---~ local found = cache[str] ---~ if not found then ---~ found = action(str) ---~ cache[str] = found ---~ end ---~ return found ---~ end ---~ return reminder, cache ---~ end diff --git a/lualibs-util-tab.lua b/lualibs-util-tab.lua new file mode 100644 index 0000000..7950a03 --- /dev/null +++ b/lualibs-util-tab.lua @@ -0,0 +1,257 @@ +if not modules then modules = { } end modules ['l-aux'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- for inline, no store split : for s in string.gmatch(str,",* *([^,]+)") do .. end + +aux = aux or { } + +local concat, format, gmatch = table.concat, string.format, string.gmatch +local tostring, type = tostring, type +local lpegmatch = lpeg.match + +local P, R, V = lpeg.P, lpeg.R, lpeg.V + +local escape, left, right = P("\\"), P('{'), P('}') + +lpeg.patterns.balanced = P { + [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, + [2] = left * V(1) * right +} + +local space = lpeg.P(' ') +local equal = lpeg.P("=") +local comma = lpeg.P(",") +local lbrace = lpeg.P("{") +local rbrace = lpeg.P("}") +local nobrace = 1 - (lbrace+rbrace) +local nested = lpeg.P { lbrace * (nobrace + lpeg.V(1))^0 * rbrace } +local spaces = space^0 + +local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) + +local key = lpeg.C((1-equal-comma)^1) +local pattern_a = (space+comma)^0 * (key * equal * value + key * lpeg.C("")) +local pattern_c = (space+comma)^0 * (key * equal * value) + +local key = lpeg.C((1-space-equal-comma)^1) +local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + lpeg.C(""))) + +-- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored + +local hash = { } + +local function set(key,value) -- using Carg is slower here + hash[key] = value +end + +local pattern_a_s = (pattern_a/set)^1 +local pattern_b_s = (pattern_b/set)^1 +local pattern_c_s = (pattern_c/set)^1 + +aux.settings_to_hash_pattern_a = pattern_a_s +aux.settings_to_hash_pattern_b = pattern_b_s +aux.settings_to_hash_pattern_c = pattern_c_s + +function aux.make_settings_to_hash_pattern(set,how) + if how == "strict" then + return (pattern_c/set)^1 + elseif how == "tolerant" then + return (pattern_b/set)^1 + else + return (pattern_a/set)^1 + end +end + +function aux.settings_to_hash(str,existing) + if str and str ~= "" then + hash = existing or { } + if moretolerant then + lpegmatch(pattern_b_s,str) + else + lpegmatch(pattern_a_s,str) + end + return hash + else + return { } + end +end + +function aux.settings_to_hash_tolerant(str,existing) + if str and str ~= "" then + hash = existing or { } + lpegmatch(pattern_b_s,str) + return hash + else + return { } + end +end + +function aux.settings_to_hash_strict(str,existing) + if str and str ~= "" then + hash = existing or { } + lpegmatch(pattern_c_s,str) + return next(hash) and hash + else + return nil + end +end + +local separator = comma * space^0 +local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) +local pattern = lpeg.Ct(value*(separator*value)^0) + +-- "aap, {noot}, mies" : outer {} removes, leading spaces ignored + +aux.settings_to_array_pattern = pattern + +-- we could use a weak table as cache + +function aux.settings_to_array(str) + if not str or str == "" then + return { } + else + return lpegmatch(pattern,str) + end +end + +local function set(t,v) + t[#t+1] = v +end + +local value = lpeg.P(lpeg.Carg(1)*value) / set +local pattern = value*(separator*value)^0 * lpeg.Carg(1) + +function aux.add_settings_to_array(t,str) + return lpegmatch(pattern,str,nil,t) +end + +function aux.hash_to_string(h,separator,yes,no,strict,omit) + if h then + local t, s = { }, table.sortedkeys(h) + omit = omit and table.tohash(omit) + for i=1,#s do + local key = s[i] + if not omit or not omit[key] then + local value = h[key] + if type(value) == "boolean" then + if yes and no then + if value then + t[#t+1] = key .. '=' .. yes + elseif not strict then + t[#t+1] = key .. '=' .. no + end + elseif value or not strict then + t[#t+1] = key .. '=' .. tostring(value) + end + else + t[#t+1] = key .. '=' .. value + end + end + end + return concat(t,separator or ",") + else + return "" + end +end + +function aux.array_to_string(a,separator) + if a then + return concat(a,separator or ",") + else + return "" + end +end + +function aux.settings_to_set(str,t) + t = t or { } + for s in gmatch(str,"%s*([^,]+)") do + t[s] = true + end + return t +end + +local value = lbrace * lpeg.C((nobrace + nested)^0) * rbrace +local pattern = lpeg.Ct((space + value)^0) + +function aux.arguments_to_table(str) + return lpegmatch(pattern,str) +end + +-- temporary here + +function aux.getparameters(self,class,parentclass,settings) + local sc = self[class] + if not sc then + sc = table.clone(self[parent]) + self[class] = sc + end + aux.settings_to_hash(settings,sc) +end + +-- temporary here + +local digit = lpeg.R("09") +local period = lpeg.P(".") +local zero = lpeg.P("0") +local trailingzeros = zero^0 * -digit -- suggested by Roberto R +local case_1 = period * trailingzeros / "" +local case_2 = period * (digit - trailingzeros)^1 * (trailingzeros / "") +local number = digit^1 * (case_1 + case_2) +local stripper = lpeg.Cs((number + 1)^0) + +--~ local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100" +--~ collectgarbage("collect") +--~ str = string.rep(sample,10000) +--~ local ts = os.clock() +--~ lpegmatch(stripper,str) +--~ print(#str, os.clock()-ts, lpegmatch(stripper,sample)) + +lpeg.patterns.strip_zeros = stripper + +function aux.strip_zeros(str) + return lpegmatch(stripper,str) +end + +function aux.definetable(target) -- defines undefined tables + local composed, t = nil, { } + for name in gmatch(target,"([^%.]+)") do + if composed then + composed = composed .. "." .. name + else + composed = name + end + t[#t+1] = format("%s = %s or { }",composed,composed) + end + return concat(t,"\n") +end + +function aux.accesstable(target) + local t = _G + for name in gmatch(target,"([^%.]+)") do + t = t[name] + end + return t +end + +-- as we use this a lot ... + +--~ function aux.cachefunction(action,weak) +--~ local cache = { } +--~ if weak then +--~ setmetatable(cache, { __mode = "kv" } ) +--~ end +--~ local function reminder(str) +--~ local found = cache[str] +--~ if not found then +--~ found = action(str) +--~ cache[str] = found +--~ end +--~ return found +--~ end +--~ return reminder, cache +--~ end diff --git a/lualibs.dtx b/lualibs.dtx index 1fdd624..4d36dda 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -185,17 +185,17 @@ require("lualibs-boolean") require("lualibs-number") require("lualibs-math") require("lualibs-table") -require("lualibs-aux") require("lualibs-io") require("lualibs-os") require("lualibs-file") require("lualibs-md5") require("lualibs-dir") require("lualibs-unicode") -require("lualibs-util-mrg") -require("lualibs-util-dim") require("lualibs-url") require("lualibs-set") +require("lualibs-util-tab") +require("lualibs-util-mrg") +require("lualibs-util-dim") % \end{macrocode} % % \iffalse -- cgit v1.2.3 From ee229c4cb8b72d7d69c7283a9817a601d37e22b7 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 22:11:40 +0200 Subject: update util-tab --- lualibs-util-dim.lua | 2 +- lualibs-util-lua.lua | 234 ++++++++++++++++++++++++++++ lualibs-util-mrg.lua | 2 +- lualibs-util-sto.lua | 172 +++++++++++++++++++++ lualibs-util-tab.lua | 427 +++++++++++++++++++++++++++------------------------ 5 files changed, 635 insertions(+), 202 deletions(-) create mode 100644 lualibs-util-lua.lua create mode 100644 lualibs-util-sto.lua diff --git a/lualibs-util-dim.lua b/lualibs-util-dim.lua index d77bd49..4bae2d0 100644 --- a/lualibs-util-dim.lua +++ b/lualibs-util-dim.lua @@ -1,4 +1,4 @@ -if not modules then modules = { } end modules ['l-dimen'] = { +if not modules then modules = { } end modules ['util-dim'] = { version = 1.001, comment = "support for dimensions", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", diff --git a/lualibs-util-lua.lua b/lualibs-util-lua.lua new file mode 100644 index 0000000..2baeaa8 --- /dev/null +++ b/lualibs-util-lua.lua @@ -0,0 +1,234 @@ +if not modules then modules = { } end modules ['util-lua'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + comment = "the strip code is written by Peter Cawley", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format +local loadstring, loadfile, type = loadstring, loadfile, type + +utilities = utilities or {} +utilities.lua = utilities.lua or { } +local luautilities = utilities.lua + +utilities.report = logs and logs.reporter("system") or print -- can be overloaded later + +local tracestripping = false +local forcestupidcompile = true -- use internal bytecode compiler +luautilities.stripcode = true -- support stripping when asked for +luautilities.alwaysstripcode = false -- saves 1 meg on 7 meg compressed format file (2012.08.12) +luautilities.nofstrippedchunks = 0 +luautilities.nofstrippedbytes = 0 + +-- The next function was posted by Peter Cawley on the lua list and strips line +-- number information etc. from the bytecode data blob. We only apply this trick +-- when we store data tables. Stripping makes the compressed format file about +-- 1MB smaller (and uncompressed we save at least 6MB). +-- +-- You can consider this feature an experiment, so it might disappear. There is +-- no noticeable gain in runtime although the memory footprint should be somewhat +-- smaller (and the file system has a bit less to deal with). +-- +-- Begin of borrowed code ... works for Lua 5.1 which LuaTeX currently uses ... + +local function strip_code_pc(dump,name) + local before = #dump + local version, format, endian, int, size, ins, num = byte(dump,5,11) + local subint + if endian == 1 then + subint = function(dump, i, l) + local val = 0 + for n = l, 1, -1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + else + subint = function(dump, i, l) + local val = 0 + for n = 1, l, 1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + end + local strip_function + strip_function = function(dump) + local count, offset = subint(dump, 1, size) + local stripped, dirty = rep("\0", size), offset + count + offset = offset + count + int * 2 + 4 + offset = offset + int + subint(dump, offset, int) * ins + count, offset = subint(dump, offset, int) + for n = 1, count do + local t + t, offset = subint(dump, offset, 1) + if t == 1 then + offset = offset + 1 + elseif t == 4 then + offset = offset + size + subint(dump, offset, size) + elseif t == 3 then + offset = offset + num + end + end + count, offset = subint(dump, offset, int) + stripped = stripped .. sub(dump,dirty, offset - 1) + for n = 1, count do + local proto, off = strip_function(sub(dump,offset, -1)) + stripped, offset = stripped .. proto, offset + off - 1 + end + offset = offset + subint(dump, offset, int) * int + int + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + int * 2 + end + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + end + stripped = stripped .. rep("\0", int * 3) + return stripped, offset + end + dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) + local after = #dump + local delta = before-after + if tracestripping then + utilities.report("stripped bytecode: %s, before %s, after %s, delta %s",name or "unknown",before,after,delta) + end + luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 + luautilities.nofstrippedbytes = luautilities.nofstrippedbytes + delta + return dump, delta +end + +-- ... end of borrowed code. + +local function strippedbytecode(code,forcestrip,name) + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + return strip_code_pc(code,name) + else + return code, 0 + end +end + +luautilities.stripbytecode = strip_code_pc +luautilities.strippedbytecode = strippedbytecode + +local function fatalerror(name) + utilities.report(format("fatal error in %q",name or "unknown")) +end + +-- quite subtle ... doing this wrong incidentally can give more bytes + + +function luautilities.loadedluacode(fullname,forcestrip,name) + -- quite subtle ... doing this wrong incidentally can give more bytes + name = name or fullname + local code = loadfile(fullname) + if code then + code() + end + if forcestrip and luautilities.stripcode then + if type(forcestrip) == "function" then + forcestrip = forcestrip(fullname) + end + if forcestrip then + local code, n = strip_code_pc(dump(code,name)) + return loadstring(code), n + elseif luautilities.alwaysstripcode then + return loadstring(strip_code_pc(dump(code),name)) + else + return code, 0 + end + elseif luautilities.alwaysstripcode then + return loadstring(strip_code_pc(dump(code),name)) + else + return code, 0 + end +end + +function luautilities.strippedloadstring(code,forcestrip,name) -- not executed + local n = 0 + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + code = loadstring(code) + if not code then + fatalerror(name) + end + code, n = strip_code_pc(dump(code),name) + end + return loadstring(code), n +end + +local function stupidcompile(luafile,lucfile,strip) + local code = io.loaddata(luafile) + local n = 0 + if code and code ~= "" then + code = loadstring(code) + if not code then + fatalerror() + end + code = dump(code) + if strip then + code, n = strippedbytecode(code,true,luafile) -- last one is reported + end + if code and code ~= "" then + io.savedata(lucfile,code) + end + end + return n +end + +local luac_normal = "texluac -o %q %q" +local luac_strip = "texluac -s -o %q %q" + +function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true + utilities.report("lua: compiling %s into %s",luafile,lucfile) + os.remove(lucfile) + local done = false + if strip ~= false then + strip = true + end + if forcestupidcompile then + fallback = true + elseif strip then + done = os.spawn(format(luac_strip, lucfile,luafile)) == 0 + else + done = os.spawn(format(luac_normal,lucfile,luafile)) == 0 + end + if not done and fallback then + local n = stupidcompile(luafile,lucfile,strip) + if n > 0 then + utilities.report("lua: %s dumped into %s (%i bytes stripped)",luafile,lucfile,n) + else + utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile) + end + cleanup = false -- better see how bad it is + end + if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then + utilities.report("lua: removing %s",luafile) + os.remove(luafile) + end + return done +end +--~ local getmetatable, type = getmetatable, type + +--~ local types = { } + +--~ function luautilities.registerdatatype(d,name) +--~ types[getmetatable(d)] = name +--~ end + +--~ function luautilities.datatype(d) +--~ local t = type(d) +--~ if t == "userdata" then +--~ local m = getmetatable(d) +--~ return m and types[m] or "userdata" +--~ else +--~ return t +--~ end +--~ end + +--~ luautilities.registerdatatype(lpeg.P("!"),"lpeg") + +--~ print(luautilities.datatype(lpeg.P("oeps"))) diff --git a/lualibs-util-mrg.lua b/lualibs-util-mrg.lua index d59745b..8d6c5dd 100644 --- a/lualibs-util-mrg.lua +++ b/lualibs-util-mrg.lua @@ -1,4 +1,4 @@ -if not modules then modules = { } end modules ['l-utils'] = { +if not modules then modules = { } end modules ['util-mrg'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", diff --git a/lualibs-util-sto.lua b/lualibs-util-sto.lua new file mode 100644 index 0000000..42ee6cf --- /dev/null +++ b/lualibs-util-sto.lua @@ -0,0 +1,172 @@ +if not modules then modules = { } end modules ['util-sto'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local setmetatable, getmetatable = setmetatable, getmetatable + +utilities = utilities or { } +utilities.storage = utilities.storage or { } +local storage = utilities.storage + +function storage.mark(t) + if not t then + texio.write_nl("fatal error: storage cannot be marked") + return -- os.exit() + end + local m = getmetatable(t) + if not m then + m = { } + setmetatable(t,m) + end + m.__storage__ = true + return t +end + +function storage.allocate(t) + t = t or { } + local m = getmetatable(t) + if not m then + m = { } + setmetatable(t,m) + end + m.__storage__ = true + return t +end + +function storage.marked(t) + local m = getmetatable(t) + return m and m.__storage__ +end + +function storage.checked(t) + if not t then + texio.write_nl("fatal error: storage has not been allocated") + return -- os.exit() + end + return t +end + +--~ function utilities.storage.delay(parent,name,filename) +--~ local m = getmetatable(parent) +--~ m.__list[name] = filename +--~ end +--~ +--~ function utilities.storage.predefine(parent) +--~ local list = { } +--~ local m = getmetatable(parent) or { +--~ __list = list, +--~ __index = function(t,k) +--~ local l = require(list[k]) +--~ t[k] = l +--~ return l +--~ end +--~ } +--~ setmetatable(parent,m) +--~ end +--~ +--~ bla = { } +--~ utilities.storage.predefine(bla) +--~ utilities.storage.delay(bla,"test","oepsoeps") +--~ local t = bla.test +--~ table.print(t) +--~ print(t.a) + +function storage.setinitializer(data,initialize) + local m = getmetatable(data) or { } + m.__index = function(data,k) + m.__index = nil -- so that we can access the entries during initializing + initialize() + return data[k] + end + setmetatable(data, m) +end + +local keyisvalue = { __index = function(t,k) + t[k] = k + return k +end } + +function storage.sparse(t) + t = t or { } + setmetatable(t,keyisvalue) + return t +end + +-- table namespace ? + +local function f_empty () return "" end -- t,k +local function f_self (t,k) t[k] = k return k end +local function f_ignore() end -- t,k,v + +local t_empty = { __index = f_empty } +local t_self = { __index = f_self } +local t_ignore = { __newindex = f_ignore } + +function table.setmetatableindex(t,f) + local m = getmetatable(t) + if m then + if f == "empty" then + m.__index = f_empty + elseif f == "key" then + m.__index = f_self + else + m.__index = f + end + else + if f == "empty" then + setmetatable(t, t_empty) + elseif f == "key" then + setmetatable(t, t_self) + else + setmetatable(t,{ __index = f }) + end + end + return t +end + +function table.setmetatablenewindex(t,f) + local m = getmetatable(t) + if m then + if f == "ignore" then + m.__newindex = f_ignore + else + m.__newindex = f + end + else + if f == "ignore" then + setmetatable(t, t_ignore) + else + setmetatable(t,{ __newindex = f }) + end + end + return t +end + +function table.setmetatablecall(t,f) + local m = getmetatable(t) + if m then + m.__call = f + else + setmetatable(t,{ __call = f }) + end + return t +end + +function table.setmetatablekey(t,key,value) + local m = getmetatable(t) + if not m then + m = { } + setmetatable(t,m) + end + m[key] = value + return t +end + +function table.getmetatablekey(t,key,value) + local m = getmetatable(t) + return m and m[key] +end diff --git a/lualibs-util-tab.lua b/lualibs-util-tab.lua index 7950a03..7a2da29 100644 --- a/lualibs-util-tab.lua +++ b/lualibs-util-tab.lua @@ -1,4 +1,4 @@ -if not modules then modules = { } end modules ['l-aux'] = { +if not modules then modules = { } end modules ['util-tab'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -6,252 +6,279 @@ if not modules then modules = { } end modules ['l-aux'] = { license = "see context related readme files" } --- for inline, no store split : for s in string.gmatch(str,",* *([^,]+)") do .. end +utilities = utilities or {} +utilities.tables = utilities.tables or { } +local tables = utilities.tables -aux = aux or { } +local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub +local concat, insert, remove = table.concat, table.insert, table.remove +local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring +local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring +local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs -local concat, format, gmatch = table.concat, string.format, string.gmatch -local tostring, type = tostring, type -local lpegmatch = lpeg.match - -local P, R, V = lpeg.P, lpeg.R, lpeg.V - -local escape, left, right = P("\\"), P('{'), P('}') - -lpeg.patterns.balanced = P { - [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, - [2] = left * V(1) * right -} - -local space = lpeg.P(' ') -local equal = lpeg.P("=") -local comma = lpeg.P(",") -local lbrace = lpeg.P("{") -local rbrace = lpeg.P("}") -local nobrace = 1 - (lbrace+rbrace) -local nested = lpeg.P { lbrace * (nobrace + lpeg.V(1))^0 * rbrace } -local spaces = space^0 - -local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) - -local key = lpeg.C((1-equal-comma)^1) -local pattern_a = (space+comma)^0 * (key * equal * value + key * lpeg.C("")) -local pattern_c = (space+comma)^0 * (key * equal * value) - -local key = lpeg.C((1-space-equal-comma)^1) -local pattern_b = spaces * comma^0 * spaces * (key * ((spaces * equal * spaces * value) + lpeg.C(""))) - --- "a=1, b=2, c=3, d={a{b,c}d}, e=12345, f=xx{a{b,c}d}xx, g={}" : outer {} removes, leading spaces ignored - -local hash = { } - -local function set(key,value) -- using Carg is slower here - hash[key] = value +function tables.definetable(target) -- defines undefined tables + local composed, t, n = nil, { }, 0 + for name in gmatch(target,"([^%.]+)") do + n = n + 1 + if composed then + composed = composed .. "." .. name + else + composed = name + end + t[n] = format("%s = %s or { }",composed,composed) + end + return concat(t,"\n") end -local pattern_a_s = (pattern_a/set)^1 -local pattern_b_s = (pattern_b/set)^1 -local pattern_c_s = (pattern_c/set)^1 - -aux.settings_to_hash_pattern_a = pattern_a_s -aux.settings_to_hash_pattern_b = pattern_b_s -aux.settings_to_hash_pattern_c = pattern_c_s - -function aux.make_settings_to_hash_pattern(set,how) - if how == "strict" then - return (pattern_c/set)^1 - elseif how == "tolerant" then - return (pattern_b/set)^1 - else - return (pattern_a/set)^1 +function tables.accesstable(target,root) + local t = root or _G + for name in gmatch(target,"([^%.]+)") do + t = t[name] + if not t then + return + end end + return t end -function aux.settings_to_hash(str,existing) - if str and str ~= "" then - hash = existing or { } - if moretolerant then - lpegmatch(pattern_b_s,str) - else - lpegmatch(pattern_a_s,str) +function tables.migratetable(target,v,root) + local t = root or _G + local names = string.split(target,".") + for i=1,#names-1 do + local name = names[i] + t[name] = t[name] or { } + t = t[name] + if not t then + return end - return hash - else - return { } end + t[names[#names]] = v end -function aux.settings_to_hash_tolerant(str,existing) - if str and str ~= "" then - hash = existing or { } - lpegmatch(pattern_b_s,str) - return hash - else - return { } +function tables.removevalue(t,value) -- todo: n + if value then + for i=1,#t do + if t[i] == value then + remove(t,i) + -- remove all, so no: return + end + end end end -function aux.settings_to_hash_strict(str,existing) - if str and str ~= "" then - hash = existing or { } - lpegmatch(pattern_c_s,str) - return next(hash) and hash - else - return nil +function tables.insertbeforevalue(t,value,extra) + for i=1,#t do + if t[i] == extra then + remove(t,i) + end + end + for i=1,#t do + if t[i] == value then + insert(t,i,extra) + return + end end + insert(t,1,extra) end -local separator = comma * space^0 -local value = lpeg.P(lbrace * lpeg.C((nobrace + nested)^0) * rbrace) + lpeg.C((nested + (1-comma))^0) -local pattern = lpeg.Ct(value*(separator*value)^0) - --- "aap, {noot}, mies" : outer {} removes, leading spaces ignored - -aux.settings_to_array_pattern = pattern - --- we could use a weak table as cache +function tables.insertaftervalue(t,value,extra) + for i=1,#t do + if t[i] == extra then + remove(t,i) + end + end + for i=1,#t do + if t[i] == value then + insert(t,i+1,extra) + return + end + end + insert(t,#t+1,extra) +end -function aux.settings_to_array(str) - if not str or str == "" then - return { } - else - return lpegmatch(pattern,str) +-- experimental + +local function toxml(t,d,result,step) + for k, v in table.sortedpairs(t) do + if type(v) == "table" then + if type(k) == "number" then + result[#result+1] = format("%s",d,k) + toxml(v,d..step,result,step) + result[#result+1] = format("%s",d,k) + else + result[#result+1] = format("%s<%s>",d,k) + toxml(v,d..step,result,step) + result[#result+1] = format("%s",d,k) + end + elseif type(k) == "number" then + result[#result+1] = format("%s%s",d,k,v,k) + else + result[#result+1] = format("%s<%s>%s",d,k,tostring(v),k) + end end end -local function set(t,v) - t[#t+1] = v +function table.toxml(t,name,nobanner,indent,spaces) + local noroot = name == false + local result = (nobanner or noroot) and { } or { "" } + local indent = rep(" ",indent or 0) + local spaces = rep(" ",spaces or 1) + if noroot then + toxml( t, inndent, result, spaces) + else + toxml( { [name or "root"] = t }, indent, result, spaces) + end + return concat(result,"\n") end -local value = lpeg.P(lpeg.Carg(1)*value) / set -local pattern = value*(separator*value)^0 * lpeg.Carg(1) +-- also experimental -function aux.add_settings_to_array(t,str) - return lpegmatch(pattern,str,nil,t) -end +-- encapsulate(table,utilities.tables) +-- encapsulate(table,utilities.tables,true) +-- encapsulate(table,true) -function aux.hash_to_string(h,separator,yes,no,strict,omit) - if h then - local t, s = { }, table.sortedkeys(h) - omit = omit and table.tohash(omit) - for i=1,#s do - local key = s[i] - if not omit or not omit[key] then - local value = h[key] - if type(value) == "boolean" then - if yes and no then - if value then - t[#t+1] = key .. '=' .. yes - elseif not strict then - t[#t+1] = key .. '=' .. no - end - elseif value or not strict then - t[#t+1] = key .. '=' .. tostring(value) - end +function tables.encapsulate(core,capsule,protect) + if type(capsule) ~= "table" then + protect = true + capsule = { } + end + for key, value in next, core do + if capsule[key] then + print(format("\ninvalid inheritance '%s' in '%s': %s",key,tostring(core))) + os.exit() + else + capsule[key] = value + end + end + if protect then + for key, value in next, core do + core[key] = nil + end + setmetatable(core, { + __index = capsule, + __newindex = function(t,key,value) + if capsule[key] then + print(format("\ninvalid overload '%s' in '%s'",key,tostring(core))) + os.exit() else - t[#t+1] = key .. '=' .. value + rawset(t,key,value) end end - end - return concat(t,separator or ",") - else - return "" + } ) end end -function aux.array_to_string(a,separator) - if a then - return concat(a,separator or ",") +local function serialize(t,r,outer) -- no mixes + r[#r+1] = "{" + local n = #t + if n > 0 then + for i=1,n do + local v = t[i] + local tv = type(v) + if tv == "string" then + r[#r+1] = format("%q,",v) + elseif tv == "number" then + r[#r+1] = format("%s,",v) + elseif tv == "table" then + serialize(v,r) + elseif tv == "boolean" then + r[#r+1] = format("%s,",tostring(v)) + end + end else - return "" + for k, v in next, t do + local tv = type(v) + if tv == "string" then + r[#r+1] = format("[%q]=%q,",k,v) + elseif tv == "number" then + r[#r+1] = format("[%q]=%s,",k,v) + elseif tv == "table" then + r[#r+1] = format("[%q]=",k) + serialize(v,r) + elseif tv == "boolean" then + r[#r+1] = format("[%q]=%s,",k,tostring(v)) + end + end end -end - -function aux.settings_to_set(str,t) - t = t or { } - for s in gmatch(str,"%s*([^,]+)") do - t[s] = true + if outer then + r[#r+1] = "}" + else + r[#r+1] = "}," end - return t + return r end -local value = lbrace * lpeg.C((nobrace + nested)^0) * rbrace -local pattern = lpeg.Ct((space + value)^0) - -function aux.arguments_to_table(str) - return lpegmatch(pattern,str) +function table.fastserialize(t,prefix) + return concat(serialize(t,{ prefix or "return" },true)) end --- temporary here - -function aux.getparameters(self,class,parentclass,settings) - local sc = self[class] - if not sc then - sc = table.clone(self[parent]) - self[class] = sc +function table.deserialize(str) + if not str or str == "" then + return + end + local code = loadstring(str) + if not code then + return end - aux.settings_to_hash(settings,sc) + code = code() + if not code then + return + end + return code end --- temporary here - -local digit = lpeg.R("09") -local period = lpeg.P(".") -local zero = lpeg.P("0") -local trailingzeros = zero^0 * -digit -- suggested by Roberto R -local case_1 = period * trailingzeros / "" -local case_2 = period * (digit - trailingzeros)^1 * (trailingzeros / "") -local number = digit^1 * (case_1 + case_2) -local stripper = lpeg.Cs((number + 1)^0) - ---~ local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100" ---~ collectgarbage("collect") ---~ str = string.rep(sample,10000) ---~ local ts = os.clock() ---~ lpegmatch(stripper,str) ---~ print(#str, os.clock()-ts, lpegmatch(stripper,sample)) - -lpeg.patterns.strip_zeros = stripper - -function aux.strip_zeros(str) - return lpegmatch(stripper,str) +-- inspect(table.fastserialize { a = 1, b = { 4, { 5, 6 } }, c = { d = 7, e = 'f"g\nh' } }) + +function table.load(filename) + if filename then + local t = io.loaddata(filename) + if t and t ~= "" then + t = loadstring(t) + if type(t) == "function" then + t = t() + if type(t) == "table" then + return t + end + end + end + end end -function aux.definetable(target) -- defines undefined tables - local composed, t = nil, { } - for name in gmatch(target,"([^%.]+)") do - if composed then - composed = composed .. "." .. name - else - composed = name +local function slowdrop(t) + local r = { } + local l = { } + for i=1,#t do + local ti = t[i] + local j = 0 + for k, v in next, ti do + j = j + 1 + l[j] = format("%s=%q",k,v) end - t[#t+1] = format("%s = %s or { }",composed,composed) + r[i] = format(" {%s},\n",concat(l)) end - return concat(t,"\n") + return format("return {\n%s}",concat(r)) end -function aux.accesstable(target) - local t = _G - for name in gmatch(target,"([^%.]+)") do - t = t[name] +local function fastdrop(t) + local r = { "return {\n" } + for i=1,#t do + local ti = t[i] + r[#r+1] = " {" + for k, v in next, ti do + r[#r+1] = format("%s=%q",k,v) + end + r[#r+1] = "},\n" end - return t + r[#r+1] = "}" + return concat(r) end --- as we use this a lot ... - ---~ function aux.cachefunction(action,weak) ---~ local cache = { } ---~ if weak then ---~ setmetatable(cache, { __mode = "kv" } ) ---~ end ---~ local function reminder(str) ---~ local found = cache[str] ---~ if not found then ---~ found = action(str) ---~ cache[str] = found ---~ end ---~ return found ---~ end ---~ return reminder, cache ---~ end +function table.drop(t,slow) + if #t == 0 then + return "return { }" + elseif slow == true then + return slowdrop(t) -- less memory + else + return fastdrop(t) -- some 15% faster + end +end -- cgit v1.2.3 From e31f51cedf7e3f479a47b28bd86fa8bf887a86b4 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 22:15:02 +0200 Subject: add util-lua util-sto --- README | 2 ++ lualibs.dtx | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/README b/README index 5cbdd8b..9132530 100644 --- a/README +++ b/README @@ -46,6 +46,8 @@ Source files: lualibs-table.lua lualibs-unicode.lua lualibs-url.lua + lualibs-util-lua.lua + lualibs-util-sto.lua lualibs-util-dim.lua lualibs-util-mrg.lua lualibs-util-tab.lua diff --git a/lualibs.dtx b/lualibs.dtx index 4d36dda..1207b7f 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -193,9 +193,11 @@ require("lualibs-dir") require("lualibs-unicode") require("lualibs-url") require("lualibs-set") -require("lualibs-util-tab") +require("lualibs-util-lua") +require("lualibs-util-sto") require("lualibs-util-mrg") require("lualibs-util-dim") +require("lualibs-util-tab") % \end{macrocode} % % \iffalse -- cgit v1.2.3 From 11ed6e5dd8fde59f51ac7b1631235906fdcb08ee Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 22:21:21 +0200 Subject: add util-str --- README | 5 +- lualibs-util-str.lua | 127 +++++++++++++++++++++++++++++++++++++++++++++++++++ lualibs.dtx | 5 +- 3 files changed, 133 insertions(+), 4 deletions(-) create mode 100644 lualibs-util-str.lua diff --git a/README b/README index 9132530..b23d37f 100644 --- a/README +++ b/README @@ -46,10 +46,11 @@ Source files: lualibs-table.lua lualibs-unicode.lua lualibs-url.lua - lualibs-util-lua.lua - lualibs-util-sto.lua lualibs-util-dim.lua + lualibs-util-lua.lua lualibs-util-mrg.lua + lualibs-util-sto.lua + lualibs-util-str.lua lualibs-util-tab.lua README Makefile diff --git a/lualibs-util-str.lua b/lualibs-util-str.lua new file mode 100644 index 0000000..377dd16 --- /dev/null +++ b/lualibs-util-str.lua @@ -0,0 +1,127 @@ +if not modules then modules = { } end modules ['util-str'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +utilities = utilities or {} +utilities.strings = utilities.strings or { } +local strings = utilities.strings + +local gsub, rep = string.gsub, string.rep +local Cs, C, Cp, P, Carg = lpeg.Cs, lpeg.C, lpeg.Cp, lpeg.P, lpeg.Carg +local patterns, lpegmatch = lpeg.patterns, lpeg.match + +-- str = " \n \ntest \n test\ntest " +-- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]") + +local rubish = patterns.spaceortab^0 * patterns.newline +local anyrubish = patterns.spaceortab + patterns.newline +local anything = patterns.anything +local stripped = (patterns.spaceortab^1 / "") * patterns.newline +local leading = rubish^0 / "" +local trailing = (anyrubish^1 * patterns.endofstring) / "" +local redundant = rubish^3 / "\n" + +local pattern = Cs(leading * (trailing + redundant + stripped + anything)^0) + +function strings.collapsecrlf(str) + return lpegmatch(pattern,str) +end + +-- The following functions might end up in another namespace. + +local repeaters = { } -- watch how we also moved the -1 in depth-1 to the creator + +function strings.newrepeater(str,offset) + offset = offset or 0 + local s = repeaters[str] + if not s then + s = { } + repeaters[str] = s + end + local t = s[offset] + if t then + return t + end + t = { } + setmetatable(t, { + __index = function(t,k) + if not k then + return "" + end + local n = k + offset + local s = n > 0 and rep(str,n) or "" + t[k] = s + return s + end + } ) + s[offset] = t + return t +end + +-- local dashes = strings.newrepeater("--",-1) +-- print(dashes[2],dashes[3],dashes[1]) + +local extra, tab, start = 0, 0, 4, 0 + +local nspaces = strings.newrepeater(" ") + +local pattern = + Carg(1) / function(t) + extra, tab, start = 0, t or 7, 1 + end + * Cs(( + Cp() * patterns.tab / function(position) + local current = (position - start + 1) + extra + local spaces = tab-(current-1) % tab + if spaces > 0 then + extra = extra + spaces - 1 + return nspaces[spaces] -- rep(" ",spaces) + else + return "" + end + end + + patterns.newline * Cp() / function(position) + extra, start = 0, position + end + + patterns.anything + )^1) + +function strings.tabtospace(str,tab) + return lpegmatch(pattern,str,1,tab or 7) +end + +--~ local t = { +--~ "1234567123456712345671234567", +--~ "\tb\tc", +--~ "a\tb\tc", +--~ "aa\tbb\tcc", +--~ "aaa\tbbb\tccc", +--~ "aaaa\tbbbb\tcccc", +--~ "aaaaa\tbbbbb\tccccc", +--~ "aaaaaa\tbbbbbb\tcccccc\n aaaaaa\tbbbbbb\tcccccc", +--~ "one\n two\nxxx three\nxx four\nx five\nsix", +--~ } +--~ for k=1,#t do +--~ print(strings.tabtospace(t[k])) +--~ end + +function strings.striplong(str) -- strips all leading spaces + str = gsub(str,"^%s*","") + str = gsub(str,"[\n\r]+ *","\n") + return str +end + +--~ local template = string.striplong([[ +--~ aaaa +--~ bb +--~ cccccc +--~ ]]) + +function strings.nice(str) + str = gsub(str,"[:%-+_]+"," ") -- maybe more + return str +end diff --git a/lualibs.dtx b/lualibs.dtx index 1207b7f..8f0d19c 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -193,10 +193,11 @@ require("lualibs-dir") require("lualibs-unicode") require("lualibs-url") require("lualibs-set") +require("lualibs-util-dim") require("lualibs-util-lua") -require("lualibs-util-sto") require("lualibs-util-mrg") -require("lualibs-util-dim") +require("lualibs-util-sto") +require("lualibs-util-str") require("lualibs-util-tab") % \end{macrocode} % -- cgit v1.2.3 From 1762bae45c50bf172313c08c004b8b22f1c48724 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 22:30:20 +0200 Subject: add util-jsn --- README | 1 + lualibs-util-jsn.lua | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++ lualibs.dtx | 5 +- 3 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 lualibs-util-jsn.lua diff --git a/README b/README index b23d37f..57bd351 100644 --- a/README +++ b/README @@ -47,6 +47,7 @@ Source files: lualibs-unicode.lua lualibs-url.lua lualibs-util-dim.lua + lualibs-util-jsn.lua lualibs-util-lua.lua lualibs-util-mrg.lua lualibs-util-sto.lua diff --git a/lualibs-util-jsn.lua b/lualibs-util-jsn.lua new file mode 100644 index 0000000..7493f10 --- /dev/null +++ b/lualibs-util-jsn.lua @@ -0,0 +1,145 @@ +if not modules then modules = { } end modules ['util-jsn'] = { + version = 1.001, + comment = "companion to m-json.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- Of course we could make a nice complete parser with proper error messages but +-- as json is generated programmatically errors are systematic and we can assume +-- a correct stream. If not, we have some fatal error anyway. So, we can just rely +-- on strings being strings (apart from the unicode escape which is not in 5.1) and +-- as we first catch known types we just assume that anything else is a number. + +local P, V, R, S, C, Cc, Cs, Ct, Cf, Cg = lpeg.P, lpeg.V, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cf, lpeg.Cg +local lpegmatch = lpeg.match +local format = string.format +local utfchar = utf.char +local concat = table.concat + +local tonumber, tostring, rawset, type = tonumber, tostring, rawset, type + +local json = utilities.json or { } +utilities.json = json + +-- moduledata = moduledata or { } +-- moduledata.json = json + +-- \\ \/ \b \f \n \r \t \uHHHH + +local lbrace = P("{") +local rbrace = P("}") +local lparent = P("[") +local rparent = P("]") +local comma = P(",") +local colon = P(":") +local dquote = P('"') + +local whitespace = lpeg.patterns.whitespace +local optionalws = whitespace^0 + +local escape = C(P("\\u") / "0x" * S("09","AF","af")) / function(s) return utfchar(tonumber(s)) end +local jstring = dquote * Cs((escape + (1-dquote))^0) * dquote +local jtrue = P("true") * Cc(true) +local jfalse = P("false") * Cc(false) +local jnull = P("null") * Cc(nil) +local jnumber = (1-whitespace-rparent-rbrace-comma)^1 / tonumber + +local key = jstring + +local jsonconverter = { "value", + object = lbrace * Cf(Ct("") * V("pair") * (comma * V("pair"))^0,rawset) * rbrace, + pair = Cg(optionalws * key * optionalws * colon * V("value")), + array = Ct(lparent * V("value") * (comma * V("value"))^0 * rparent), + value = optionalws * (jstring + V("object") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws, +} + +-- local jsonconverter = { "value", +-- object = lbrace * Cf(Ct("") * V("pair") * (comma * V("pair"))^0,rawset) * rbrace, +-- pair = Cg(optionalws * V("string") * optionalws * colon * V("value")), +-- array = Ct(lparent * V("value") * (comma * V("value"))^0 * rparent), +-- string = jstring, +-- value = optionalws * (V("string") + V("object") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, +-- } + +-- lpeg.print(jsonconverter) -- size 181 + +function json.tolua(str) + return lpegmatch(jsonconverter,str) +end + +local function tojson(value,t) -- we could optimize #t + local kind = type(value) + if kind == "table" then + local done = false + local size = #value + if size == 0 then + for k, v in next, value do + if done then + t[#t+1] = "," + else + t[#t+1] = "{" + done = true + end + t[#t+1] = format("%q:",k) + tojson(v,t) + end + if done then + t[#t+1] = "}" + else + t[#t+1] = "{}" + end + elseif size == 1 then + -- we can optimize for non tables + t[#t+1] = "[" + tojson(value[1],t) + t[#t+1] = "]" + else + for i=1,size do + if done then + t[#t+1] = "," + else + t[#t+1] = "[" + done = true + end + tojson(value[i],t) + end + t[#t+1] = "]" + end + elseif kind == "string" then + t[#t+1] = format("%q",value) + elseif kind == "number" then + t[#t+1] = value + elseif kind == "boolean" then + t[#t+1] = tostring(value) + end + return t +end + +function json.tostring(value) + -- todo optimize for non table + local kind = type(value) + if kind == "table" then + return concat(tojson(value,{}),"") + elseif kind == "string" or kind == "number" then + return value + else + return tostring(value) + end +end + +-- local tmp = [[ { "a" : true, "b" : [ 123 , 456E-10, { "a" : true, "b" : [ 123 , 456 ] } ] } ]] + +-- tmp = json.tolua(tmp) +-- inspect(tmp) +-- tmp = json.tostring(tmp) +-- inspect(tmp) +-- tmp = json.tolua(tmp) +-- inspect(tmp) +-- tmp = json.tostring(tmp) +-- inspect(tmp) + +-- inspect(json.tostring(true)) + +return json diff --git a/lualibs.dtx b/lualibs.dtx index 8f0d19c..cc3142a 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -193,12 +193,13 @@ require("lualibs-dir") require("lualibs-unicode") require("lualibs-url") require("lualibs-set") -require("lualibs-util-dim") require("lualibs-util-lua") -require("lualibs-util-mrg") require("lualibs-util-sto") +require("lualibs-util-mrg") +require("lualibs-util-dim") require("lualibs-util-str") require("lualibs-util-tab") +require("lualibs-util-jsn") % \end{macrocode} % % \iffalse -- cgit v1.2.3 From 80e5c6eed03712ab06c068b5e22f1808b99f0616 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 19 Oct 2012 23:49:10 +0200 Subject: update doc --- NEWS | 5 +++++ lualibs.dtx | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- lualibs.lua | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 5 deletions(-) create mode 100644 lualibs.lua diff --git a/NEWS b/NEWS index 30e5e40..8b367bf 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,9 @@ History of the lualibs package +2012/10/19 v0.9/ + * sync with ConTeXt beta 2012.10.17 + * move some files to util-* prefix + * add util-sto util-lua util-sto util-jsn + 2011/01/20 v0.96 * Fix computability with lfs in luatex 0.65 diff --git a/lualibs.dtx b/lualibs.dtx index cc3142a..e38e89f 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -33,7 +33,7 @@ \input docstrip.tex \Msg{************************************************************************} \Msg{* Installation} -\Msg{* Package: lualibs 2011/01/20 v0.96 Lua additional functions.} +\Msg{* Package: lualibs 2012/10/19 v0.97 Lua additional functions.} \Msg{************************************************************************} \keepsilent @@ -90,7 +90,7 @@ and the derived file lualibs.lua. %<*driver> \NeedsTeXFormat{LaTeX2e} \ProvidesFile{lualibs.drv} - [2011/01/20 v0.96 Lua additional functions.] + [2012/10/19 v0.97 Lua additional functions.] \documentclass{ltxdoc} \EnableCrossrefs \CodelineIndex @@ -120,7 +120,7 @@ and the derived file lualibs.lua. % \GetFileInfo{lualibs.drv} % % \title{The \textsf{lualibs} package} -% \date{2011/01/20 v0.96} +% \date{2012/10/19 v0.97} % \author{Elie Roux \\ \texttt{elie.roux@telecom-bretagne.eu}} % % \maketitle @@ -151,6 +151,61 @@ and the derived file lualibs.lua. % initialize \textsf{kpse} library so that |require()| can find files under % TEXMF tree: |kpse.set_program_name("luatex")|. % +% \section{Files} +% +% The \textsf{lualibs} bundle contains files from two Con\TeX t Lua +% library categories: The generic auxiliary functions (original file prefix: +% |t-|) together form something close to a standard libary. Most of these are +% extensions of an existing namespace, like for instance |l-table.lua| which +% adds full-fledged serialization capabilities to the Lua table library. +% They were imported under the \textsf{lualibs}-prefix. +% (For a list see table~\ref{tab:extensions}.) +% +% \begin{table}[h] +% \centering +% \caption{Extensions of the Lua standard library.} +% \begin{tabular}{l l l} +% \textsf{lualibs} name & Con\TeX t name & content \\ +% \hline +% lualibs-string.lua & l-string.lua & string manipulation \\ +% lualibs-lpeg.lua & l-lpeg.lua & patterns \\ +% lualibs-boolean.lua & l-boolean.lua & boolean converter \\ +% lualibs-number.lua & l-number.lua & bit set \\ +% lualibs-math.lua & l-math.lua & math functions \\ +% lualibs-table.lua & l-table.lua & serialization, conversion \\ +% lualibs-io.lua & l-io.lua & reading and writing files \\ +% lualibs-os.lua & l-os.lua & platform specific code \\ +% lualibs-file.lua & l-file.lua & filesystem operations \\ +% lualibs-md5.lua & l-md5.lua & checksum functions \\ +% lualibs-dir.lua & l-dir.lua & directory handling \\ +% lualibs-unicode.lua & l-unicode.lua & utf and unicode \\ +% lualibs-url.lua & l-url.lua & url handling \\ +% lualibs-set.lua & l-set.lua & sets \\ +% \end{tabular} +% \label{tab:extensions} +% \end{table} +% +% The second category comprises a selection of files mostly from the +% utilities namespace (|util-|; cf. table~\ref{tab:utilities}). +% Their purpose is more specific and at times quite low-level. +% +% \begin{table}[h] +% \centering +% \caption{Utility functions.} +% \begin{tabular}{l l l} +% \textsf{lualibs} name & Con\TeX t name & content \\ +% \hline +% lualibs-util-lua.lua & util-lua.lua & operations on bytecode \\ +% lualibs-util-sto.lua & util-sto.lua & table allocation \\ +% lualibs-util-mrg.lua & util-mrg.lua & merging lua sources \\ +% lualibs-util-dim.lua & util-dim.lua & converters for dimensions \\ +% lualibs-util-str.lua & util-str.lua & extra string functions \\ +% lualibs-util-tab.lua & util-tab.lua & extra table functions \\ +% lualibs-util-jsn.lua & util-jsn.lua & conversion to and from json \\ +% \end{tabular} +% \label{tab:utilities} +% \end{table} +% % \pagebreak % \section{\texttt{lualibs.lua}} % @@ -163,8 +218,8 @@ module('lualibs', package.seeall) local lualibs_module = { name = "lualibs", - version = 0.96, - date = "2011/01/20", + version = 0.97, + date = "2012/10/19", description = "Lua additional functions.", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL & Elie Roux", copyright = "PRAGMA ADE / ConTeXt Development Team", diff --git a/lualibs.lua b/lualibs.lua new file mode 100644 index 0000000..cf9b039 --- /dev/null +++ b/lualibs.lua @@ -0,0 +1,54 @@ +-- +-- This is file `lualibs.lua', +-- generated with the docstrip utility. +-- +-- The original source files were: +-- +-- lualibs.dtx (with options: `lua') +-- This is a generated file. +-- +-- Copyright (C) 2009 by PRAGMA ADE / ConTeXt Development Team +-- +-- See ConTeXt's mreadme.pdf for the license. +-- +-- This work consists of the main source file lualibs.dtx +-- and the derived file lualibs.lua. +-- +module('lualibs', package.seeall) + +local lualibs_module = { + name = "lualibs", + version = 0.97, + date = "2012/10/19", + description = "Lua additional functions.", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL & Elie Roux", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "See ConTeXt's mreadme.pdf for the license", +} + +if luatexbase and luatexbase.provides_module then + luatexbase.provides_module(lualibs_module) +end +require("lualibs-string") +require("lualibs-lpeg") +require("lualibs-boolean") +require("lualibs-number") +require("lualibs-math") +require("lualibs-table") +require("lualibs-io") +require("lualibs-os") +require("lualibs-file") +require("lualibs-md5") +require("lualibs-dir") +require("lualibs-unicode") +require("lualibs-url") +require("lualibs-set") +require("lualibs-util-lua") +require("lualibs-util-sto") +require("lualibs-util-mrg") +require("lualibs-util-dim") +require("lualibs-util-str") +require("lualibs-util-tab") +require("lualibs-util-jsn") +-- +-- End of File `lualibs.lua'. -- cgit v1.2.3 From 616450e5fcb90619477354f34bfd25de2081f5fa Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sat, 20 Oct 2012 10:37:35 +0200 Subject: repair oversight in l-file --- lualibs-file.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/lualibs-file.lua b/lualibs-file.lua index 7ab9fbc..bea4ef0 100644 --- a/lualibs-file.lua +++ b/lualibs-file.lua @@ -14,6 +14,7 @@ local file = file local insert, concat = table.insert, table.concat local find, gmatch, match, gsub, sub, char, lower = string.find, string.gmatch, string.match, string.gsub, string.sub, string.char, string.lower local lpegmatch = lpeg.match +local getcurrentdir, attributes = lfs.currentdir, lfs.attributes local P, R, S, C, Cs, Cp, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc -- cgit v1.2.3 From 91fe82a3cacc9d87c07fce4fd05a1ea70f31996e Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 15:46:00 +0200 Subject: add l-lua -> lualibs-lua.lua --- lualibs-lua.lua | 393 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 393 insertions(+) create mode 100644 lualibs-lua.lua diff --git a/lualibs-lua.lua b/lualibs-lua.lua new file mode 100644 index 0000000..538c65d --- /dev/null +++ b/lualibs-lua.lua @@ -0,0 +1,393 @@ +if not modules then modules = { } end modules ['l-lua'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- compatibility hacks ... try to avoid usage + +local major, minor = string.match(_VERSION,"^[^%d]+(%d+)%.(%d+).*$") + +_MAJORVERSION = tonumber(major) or 5 +_MINORVERSION = tonumber(minor) or 1 +_LUAVERSION = _MAJORVERSION + _MINORVERSION/10 + +-- lpeg + +if not lpeg then + lpeg = require("lpeg") +end + +-- basics: + +if loadstring then + + local loadnormal = load + + function load(first,...) + if type(first) == "string" then + return loadstring(first,...) + else + return loadnormal(first,...) + end + end + +else + + loadstring = load + +end + +-- table: + +-- At some point it was announced that i[pairs would be dropped, which makes +-- sense. As we already used the for loop and # in most places the impact on +-- ConTeXt was not that large; the remaining ipairs already have been replaced. +-- Hm, actually ipairs was retained, but we no longer use it anyway (nor +-- pairs). +-- +-- Just in case, we provide the fallbacks as discussed in Programming +-- in Lua (http://www.lua.org/pil/7.3.html): + +if not ipairs then + + -- for k, v in ipairs(t) do ... end + -- for k=1,#t do local v = t[k] ... end + + local function iterate(a,i) + i = i + 1 + local v = a[i] + if v ~= nil then + return i, v --, nil + end + end + + function ipairs(a) + return iterate, a, 0 + end + +end + +if not pairs then + + -- for k, v in pairs(t) do ... end + -- for k, v in next, t do ... end + + function pairs(t) + return next, t -- , nil + end + +end + +-- The unpack function has been moved to the table table, and for compatiility +-- reasons we provide both now. + +if not table.unpack then + + table.unpack = _G.unpack + +elseif not unpack then + + _G.unpack = table.unpack + +end + +-- package: + +-- if not package.seachers then +-- +-- package.searchers = package.loaders -- 5.2 +-- +-- elseif not package.loaders then +-- +-- package.loaders = package.searchers +-- +-- end + +if not package.loaders then -- brr, searchers is a special "loadlib function" userdata type + + package.loaders = package.searchers + +end + +-- moved from util-deb to here: + +local print, select, tostring = print, select, tostring + +local inspectors = { } + +function setinspector(inspector) -- global function + inspectors[#inspectors+1] = inspector +end + +function inspect(...) -- global function + for s=1,select("#",...) do + local value = select(s,...) + local done = false + for i=1,#inspectors do + done = inspectors[i](value) + if done then + break + end + end + if not done then + print(tostring(value)) + end + end +end + +-- + +local dummy = function() end + +function optionalrequire(...) + local ok, result = xpcall(require,dummy,...) + if ok then + return result + end +end + +-- Code moved from data-lua and changed into a plug-in. + +-- We overload the regular loader. We do so because we operate mostly in +-- tds and use our own loader code. Alternatively we could use a more +-- extensive definition of package.path and package.cpath but even then +-- we're not done. Also, we now have better tracing. +-- +-- -- local mylib = require("libtest") +-- -- local mysql = require("luasql.mysql") + +local type = type +local gsub, format = string.gsub, string.format + +local package = package +local searchers = package.searchers or package.loaders + +local libpaths = nil +local clibpaths = nil +local libhash = { } +local clibhash = { } +local libextras = { } +local clibextras = { } + +-- dummies + +local filejoin = file and file.join or function(path,name) return path .. "/" .. name end +local isreadable = file and file.is_readable or function(name) local f = io.open(name) if f then f:close() return true end end +local addsuffix = file and file.addsuffix or function(name,suffix) return name .. "." .. suffix end + +-- + +local function cleanpath(path) -- hm, don't we have a helper for this? + return path +end + +local helpers = package.helpers or { + libpaths = function() return { } end, + clibpaths = function() return { } end, + cleanpath = cleanpath, + trace = false, + report = function(...) print(format(...)) end, +} +package.helpers = helpers + +local function getlibpaths() + return libpaths or helpers.libpaths(libhash) +end + +local function getclibpaths() + return clibpaths or helpers.clibpaths(clibhash) +end + +package.libpaths = getlibpaths +package.clibpaths = getclibpaths + +local function addpath(what,paths,extras,hash,...) + local pathlist = { ... } + local cleanpath = helpers.cleanpath + local trace = helpers.trace + local report = helpers.report + -- + local function add(path) + local path = cleanpath(path) + if not hash[path] then + if trace then + report("extra %s path: %s",what,path) + end + paths [#paths +1] = path + extras[#extras+1] = path + end + end + -- + for p=1,#pathlist do + local path = pathlist[p] + if type(path) == "table" then + for i=1,#path do + add(path[i]) + end + else + add(path) + end + end + return paths, extras +end + +function package.extralibpath(...) + libpaths, libextras = addpath("lua", getlibpaths(), libextras, libhash,...) +end + +function package.extraclibpath(...) + clibpaths, clibextras = addpath("lib",getclibpaths(),clibextras,clibhash,...) +end + +-- function package.extralibpath(...) +-- libpaths = getlibpaths() +-- local pathlist = { ... } +-- local cleanpath = helpers.cleanpath +-- local trace = helpers.trace +-- local report = helpers.report +-- -- +-- local function add(path) +-- local path = cleanpath(path) +-- if not libhash[path] then +-- if trace then +-- report("extra lua path: %s",path) +-- end +-- libextras[#libextras+1] = path +-- libpaths [#libpaths +1] = path +-- end +-- end +-- -- +-- for p=1,#pathlist do +-- local path = pathlist[p] +-- if type(path) == "table" then +-- for i=1,#path do +-- add(path[i]) +-- end +-- else +-- add(path) +-- end +-- end +-- end + +-- function package.extraclibpath(...) +-- clibpaths = getclibpaths() +-- local pathlist = { ... } +-- local cleanpath = helpers.cleanpath +-- local trace = helpers.trace +-- local report = helpers.report +-- -- +-- local function add(path) +-- local path = cleanpath(path) +-- if not clibhash[path] then +-- if trace then +-- report("extra lib path: %s",path) +-- end +-- clibextras[#clibextras+1] = path +-- clibpaths [#clibpaths +1] = path +-- end +-- end +-- -- +-- for p=1,#pathlist do +-- local path = pathlist[p] +-- if type(path) == "table" then +-- for i=1,#path do +-- add(path[i]) +-- end +-- else +-- add(path) +-- end +-- end +-- end + +if not searchers[-2] then + -- use package-path and package-cpath + searchers[-2] = searchers[2] +end + +searchers[2] = function(name) + return helpers.loaded(name) +end + +searchers[3] = nil -- get rid of the built in one + +local function loadedaslib(resolved,rawname) + -- local init = "luaopen_" .. string.match(rawname,".-([^%.]+)$") + local init = "luaopen_"..gsub(rawname,"%.","_") + if helpers.trace then + helpers.report("calling loadlib with '%s' with init '%s'",resolved,init) + end + return package.loadlib(resolved,init) +end + +local function loadedbylua(name) + if helpers.trace then + helpers.report("locating '%s' using normal loader",name) + end + return true, searchers[-2](name) -- the original +end + +local function loadedbypath(name,rawname,paths,islib,what) + local trace = helpers.trace + local report = helpers.report + if trace then + report("locating '%s' as '%s' on '%s' paths",rawname,name,what) + end + for p=1,#paths do + local path = paths[p] + local resolved = filejoin(path,name) + if trace then -- mode detail + report("checking for '%s' using '%s' path '%s'",name,what,path) + end + if isreadable(resolved) then + if trace then + report("lib '%s' located on '%s'",name,resolved) + end + if islib then + return true, loadedaslib(resolved,rawname) + else + return true, loadfile(resolved) + end + end + end +end + +local function notloaded(name) + if helpers.trace then + helpers.report("? unable to locate library '%s'",name) + end +end + +helpers.loadedaslib = loadedaslib +helpers.loadedbylua = loadedbylua +helpers.loadedbypath = loadedbypath +helpers.notloaded = notloaded + +-- alternatively we could set the package.searchers + +function helpers.loaded(name) + local thename = gsub(name,"%.","/") + local luaname = addsuffix(thename,"lua") + local libname = addsuffix(thename,os.libsuffix or "so") -- brrr + local libpaths = getlibpaths() + local clibpaths = getclibpaths() + local done, result = loadedbypath(luaname,name,libpaths,false,"lua") + if done then + return result + end + local done, result = loadedbypath(luaname,name,clibpaths,false,"lua") + if done then + return result + end + local done, result = loadedbypath(libname,name,clibpaths,true,"lib") + if done then + return result + end + local done, result = loadedbylua(name) + if done then + return result + end + return notloaded(name) +end -- cgit v1.2.3 From e7b5bd39a5ef0d1739244e53cc22109f9bfc82ba Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 17:08:56 +0200 Subject: update l-lpeg --- lualibs-lpeg.lua | 500 +++++++++++++++++++++++++++---------------------------- 1 file changed, 243 insertions(+), 257 deletions(-) diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index d92b722..681ef09 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -6,17 +6,17 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } - -- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) -local lpeg = require("lpeg") +-- move utf -> l-unicode +-- move string -> l-string or keep it here + +lpeg = require("lpeg") -- tracing (only used when we encounter a problem in integration of lpeg in luatex) -- some code will move to unicode and string -local report = texio and texio.write_nl or print - -- local lpmatch = lpeg.match -- local lpprint = lpeg.print -- local lpp = lpeg.P @@ -34,25 +34,34 @@ local report = texio and texio.write_nl or print -- local lpcmt = lpeg.Cmt -- local lpcarg = lpeg.Carg --- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end - --- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end --- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end --- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end --- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end --- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end --- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end --- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end --- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end --- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end --- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end --- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end --- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end --- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end --- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end - -local type, next = type, next +-- function lpeg.match(l,...) print("LPEG MATCH") lpprint(l) return lpmatch(l,...) end + +-- function lpeg.P (l) local p = lpp (l) print("LPEG P =") lpprint(l) return p end +-- function lpeg.R (l) local p = lpr (l) print("LPEG R =") lpprint(l) return p end +-- function lpeg.S (l) local p = lps (l) print("LPEG S =") lpprint(l) return p end +-- function lpeg.C (l) local p = lpc (l) print("LPEG C =") lpprint(l) return p end +-- function lpeg.B (l) local p = lpb (l) print("LPEG B =") lpprint(l) return p end +-- function lpeg.V (l) local p = lpv (l) print("LPEG V =") lpprint(l) return p end +-- function lpeg.Cf (l) local p = lpcf (l) print("LPEG Cf =") lpprint(l) return p end +-- function lpeg.Cb (l) local p = lpcb (l) print("LPEG Cb =") lpprint(l) return p end +-- function lpeg.Cg (l) local p = lpcg (l) print("LPEG Cg =") lpprint(l) return p end +-- function lpeg.Ct (l) local p = lpct (l) print("LPEG Ct =") lpprint(l) return p end +-- function lpeg.Cs (l) local p = lpcs (l) print("LPEG Cs =") lpprint(l) return p end +-- function lpeg.Cc (l) local p = lpcc (l) print("LPEG Cc =") lpprint(l) return p end +-- function lpeg.Cmt (l) local p = lpcmt (l) print("LPEG Cmt =") lpprint(l) return p end +-- function lpeg.Carg (l) local p = lpcarg(l) print("LPEG Carg =") lpprint(l) return p end + +local type, next, tostring = type, next, tostring local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format +----- mod, div = math.mod, math.div +local floor = math.floor + +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt +local lpegtype, lpegmatch, lpegprint = lpeg.type, lpeg.match, lpeg.print + +-- let's start with an inspector: + +setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end) -- Beware, we predefine a bunch of patterns here and one reason for doing so -- is that we get consistent behaviour in some of the visualizers. @@ -60,11 +69,6 @@ local byte, char, gmatch, format = string.byte, string.char, string.gmatch, stri lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns -local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp -local lpegtype, lpegmatch = lpeg.type, lpeg.match - -local utfcharacters = string.utfcharacters -local utfgmatch = unicode and unicode.utf8.gmatch local anything = P(1) local endofstring = P(-1) @@ -91,9 +95,12 @@ local utfbom_8 = P('\239\187\191') local utfbom = utfbom_32_be + utfbom_32_le + utfbom_16_be + utfbom_16_le + utfbom_8 -local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le" - + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le" - + utfbom_8 / "utf-8" + alwaysmatched / "unknown" +local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) + + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) + + utfbom_8 * Cc(3) + Cc(0) local utf8next = R("\128\191") @@ -103,25 +110,47 @@ patterns.utf8three = R("\224\239") * utf8next * utf8next patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next patterns.utfbom = utfbom patterns.utftype = utftype +patterns.utfoffset = utfoffset local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) +local utf8character = P(1) * R("\128\191")^0 -- unchecked but fast + patterns.utf8 = utf8char patterns.utf8char = utf8char +patterns.utf8character = utf8character -- this one can be used in most cases so we might use that one patterns.validutf8 = validutf8char patterns.validutf8char = validutf8char local eol = S("\n\r") local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) local whitespace = eol + spacer +local nonspacer = 1 - spacer +local nonwhitespace = 1 - whitespace + +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace +patterns.nonspacer = nonspacer +patterns.nonwhitespace = nonwhitespace + +local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto + +----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) +local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) + +patterns.stripper = stripper +patterns.collapser = collapser patterns.digit = digit patterns.sign = sign patterns.cardinal = sign^0 * digit^1 patterns.integer = sign^0 * digit^1 -patterns.float = sign^0 * digit^0 * P('.') * digit^1 -patterns.cfloat = sign^0 * digit^0 * P(',') * digit^1 +patterns.unsigned = digit^0 * P('.') * digit^1 +patterns.float = sign^0 * patterns.unsigned +patterns.cunsigned = digit^0 * P(',') * digit^1 +patterns.cfloat = sign^0 * patterns.cunsigned patterns.number = patterns.float + patterns.integer patterns.cnumber = patterns.cfloat + patterns.integer patterns.oct = P("0") * R("07")^1 @@ -135,13 +164,8 @@ patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = eol -patterns.spacer = spacer -patterns.whitespace = whitespace patterns.newline = newline patterns.emptyline = newline^1 -patterns.nonspacer = 1 - spacer -patterns.nonwhitespace = 1 - whitespace patterns.equal = P("=") patterns.comma = P(",") patterns.commaspacer = P(",") * spacer^0 @@ -154,8 +178,8 @@ patterns.squote = squote patterns.dquote = dquote patterns.nosquote = (escaped + (1-squote))^0 patterns.nodquote = (escaped + (1-dquote))^0 -patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") +patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle +patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble patterns.unspacer = ((patterns.spacer^1)/"")^0 @@ -163,15 +187,12 @@ patterns.singlequoted = squote * patterns.nosquote * squote patterns.doublequoted = dquote * patterns.nodquote * dquote patterns.quoted = patterns.doublequoted + patterns.singlequoted +patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) + patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 patterns.beginline = #(1-newline) --- print(string.unquoted("test")) --- print(string.unquoted([["t\"est"]])) --- print(string.unquoted([["t\"est"x]])) --- print(string.unquoted("\'test\'")) --- print(string.unquoted('"test"')) --- print(string.unquoted('"test"')) +patterns.longtostring = Cs(whitespace^0/"" * nonwhitespace^0 * ((whitespace^0/" " * (patterns.quoted + nonwhitespace)^1)^0)) local function anywhere(pattern) --slightly adapted from website return P { P(pattern) + 1 * V(1) } @@ -235,10 +256,10 @@ function string.splitup(str,separator) return lpegmatch(splitters_m[separator] or splitat(separator),str) end ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more +-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more +-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps local cache = { } @@ -271,12 +292,6 @@ local content = (empty + nonempty)^1 patterns.textline = content ---~ local linesplitter = Ct(content^0) ---~ ---~ function string.splitlines(str) ---~ return lpegmatch(linesplitter,str) ---~ end - local linesplitter = tsplitat(newline) patterns.linesplitter = linesplitter @@ -285,65 +300,6 @@ function string.splitlines(str) return lpegmatch(linesplitter,str) end -local utflinesplitter = utfbom^-1 * tsplitat(newline) - -patterns.utflinesplitter = utflinesplitter - -function string.utfsplitlines(str) - return lpegmatch(utflinesplitter,str or "") -end - -local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) -local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) - -function string.utfsplit(str,ignorewhitespace) -- new - if ignorewhitespace then - return lpegmatch(utfcharsplitter_iws,str or "") - else - return lpegmatch(utfcharsplitter_ows,str or "") - end -end - --- inspect(string.utfsplit("a b c d")) --- inspect(string.utfsplit("a b c d",true)) - --- -- alternative 1: 0.77 --- --- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) --- --- function string.utflength(str) --- return #lpegmatch(utfcharcounter,str or "") --- end --- --- -- alternative 2: 1.70 --- --- local n = 0 --- --- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow --- --- function string.utflength(str) --- n = 0 --- lpegmatch(utfcharcounter,str or "") --- return n --- end --- --- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) - -local n = 0 - -local utfcharcounter = utfbom^-1 * Cs ( ( - Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end - + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end - + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end - + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end -)^0 ) - -function string.utflength(str) - n = 0 - lpegmatch(utfcharcounter,str or "") - return n -end - --~ lpeg.splitters = cache -- no longer public local cache = { } @@ -370,7 +326,7 @@ function string.checkedsplit(str,separator) return lpegmatch(c,str) end ---~ from roberto's site: +-- from roberto's site: local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end @@ -428,40 +384,66 @@ end -- Just for fun I looked at the used bytecode and -- p = (p and p + pp) or pp gets one more (testset). -function lpeg.replacer(one,two) +-- todo: cache when string + +function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys + local pattern + local u = isutf and utf8char or 1 if type(one) == "table" then local no = #one - local p + local p = P(false) if no == 0 then for k, v in next, one do - local pp = P(k) / v - if p then - p = p + pp - else - p = pp - end + p = p + P(k) / v end - return Cs((p + 1)^0) + pattern = Cs((p + u)^0) elseif no == 1 then local o = one[1] one, two = P(o[1]), o[2] - return Cs(((1-one)^1 + one/two)^0) + -- pattern = Cs(((1-one)^1 + one/two)^0) + pattern = Cs((one/two + u)^0) else for i=1,no do local o = one[i] - local pp = P(o[1]) / o[2] - if p then - p = p + pp - else - p = pp - end + p = p + P(o[1]) / o[2] + end + pattern = Cs((p + u)^0) + end + else + pattern = Cs((P(one)/(two or "") + u)^0) + end + if makefunction then + return function(str) + return lpegmatch(pattern,str) + end + else + return pattern + end +end + +function lpeg.finder(lst,makefunction) + local pattern + if type(lst) == "table" then + pattern = P(false) + if #lst == 0 then + for k, v in next, lst do + pattern = pattern + P(k) -- ignore key, so we can use a replacer table + end + else + for i=1,#lst do + pattern = pattern + P(lst[i]) end - return Cs((p + 1)^0) end else - one = P(one) - two = two or "" - return Cs(((1-one)^1 + one/two)^0) + pattern = P(lst) + end + pattern = (1-pattern)^0 * pattern + if makefunction then + return function(str) + return lpegmatch(pattern,str) + end + else + return pattern end end @@ -496,21 +478,21 @@ function lpeg.balancer(left,right) return P { left * ((1 - left - right) + V(1))^0 * right } end ---~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) ---~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty ---~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) ---~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) ---~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty ---~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) ---~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) ---~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) +-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) ---~ -- slower: ---~ ---~ function lpeg.counter(pattern) ---~ local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 ---~ return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end ---~ end +-- -- slower: +-- +-- function lpeg.counter(pattern) +-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 +-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end +-- end local nany = utf8char/"" @@ -521,77 +503,22 @@ function lpeg.counter(pattern) end end -if utfgmatch then - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local n = 0 - for _ in utfgmatch(str,what) do - n = n + 1 - end - return n - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) - end - end - -else - - local cache = { } - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local p = cache[what] - if not p then - p = Cs((P(what)/" " + nany)^0) - cache[p] = p - end - return #lpegmatch(p,str) - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) - end - end - -end - -local patterns_escapes = { -- also defines in l-string - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%)", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} - -local simple_escapes = { -- also defines in l-string - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} - -local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) -local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) +-- utf extensies -function string.escapedpattern(str,simple) - return lpegmatch(simple and s or p,str) -end +utf = utf or (unicode and unicode.utf8) or { } --- utf extensies +local utfcharacters = utf and utf.characters or string.utfcharacters +local utfgmatch = utf and utf.gmatch +local utfchar = utf and utf.char lpeg.UP = lpeg.P if utfcharacters then function lpeg.US(str) - local p + local p = P(false) for uc in utfcharacters(str) do - if p then - p = p + P(uc) - else - p = P(uc) - end + p = p + P(uc) end return p end @@ -600,13 +527,9 @@ if utfcharacters then elseif utfgmatch then function lpeg.US(str) - local p + local p = P(false) for uc in utfgmatch(str,".") do - if p then - p = p + P(uc) - else - p = P(uc) - end + p = p + P(uc) end return p end @@ -614,13 +537,9 @@ elseif utfgmatch then else function lpeg.US(str) - local p + local p = P(false) local f = function(uc) - if p then - p = p + P(uc) - else - p = P(uc) - end + p = p + P(uc) end lpegmatch((utf8char/f)^0,str) return p @@ -628,9 +547,7 @@ else end -local range = Cs(utf8byte) * (Cs(utf8byte) + Cc(false)) - -local utfchar = unicode and unicode.utf8 and unicode.utf8.char +local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture function lpeg.UR(str,more) local first, last @@ -645,34 +562,33 @@ function lpeg.UR(str,more) end if first == last then return P(str) - elseif utfchar and last - first < 8 then -- a somewhat arbitrary criterium - local p + elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium + local p = P(false) for i=first,last do - if p then - p = p + P(utfchar(i)) - else - p = P(utfchar(i)) - end + p = p + P(utfchar(i)) end return p -- nil when invalid range else local f = function(b) return b >= first and b <= last end + -- tricky, these nested captures return utf8byte / f -- nil when invalid range end end ---~ lpeg.print(lpeg.R("ab","cd","gh")) ---~ lpeg.print(lpeg.P("a","b","c")) ---~ lpeg.print(lpeg.S("a","b","c")) +-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) + +-- lpeg.print(lpeg.R("ab","cd","gh")) +-- lpeg.print(lpeg.P("a","b","c")) +-- lpeg.print(lpeg.S("a","b","c")) ---~ print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) ---~ print(lpeg.count("äáàa",lpeg.UP("áà"))) ---~ print(lpeg.count("äáàa",lpeg.US("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("aá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) +-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +-- print(lpeg.count("äáàa",lpeg.UP("áà"))) +-- print(lpeg.count("äáàa",lpeg.US("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR("aá"))) +-- print(lpeg.count("äáàa",lpeg.UR("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) function lpeg.is_lpeg(p) return p and lpegtype(p) == "pattern" @@ -694,12 +610,30 @@ end -- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we -- loop back from the end cq. prepend. -local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency! +local sort = table.sort + +local function copyindexed(old) + local new = { } + for i=1,#old do + new[i] = old + end + return new +end + +local function sortedkeys(tab) + local keys, s = { }, 0 + for key,_ in next, tab do + s = s + 1 + keys[s] = key + end + sort(keys) + return keys +end function lpeg.append(list,pp,delayed,checked) local p = pp if #list > 0 then - local keys = fastcopy(list) + local keys = copyindexed(list) sort(keys) for i=#keys,1,-1 do local k = keys[i] @@ -796,8 +730,10 @@ end local function make(t) local p --- for k, v in next, t do - for k, v in table.sortedhash(t) do + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] + local v = t[k] if not p then if next(v) then p = P(k) * make(v) @@ -815,7 +751,7 @@ local function make(t) return p end -function lpeg.utfchartabletopattern(list) +function lpeg.utfchartabletopattern(list) -- goes to util-lpg local tree = { } for i=1,#list do local t = tree @@ -847,20 +783,70 @@ end -- utfchar(0x205F), -- math thinspace -- } ) --- handy from within tex: +-- a few handy ones: +-- +-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3 -local lpegmatch = lpeg.match +patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol -local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! +-- The next pattern^n variant is based on an approach suggested +-- by Roberto: constructing a big repetition in chunks. +-- +-- Being sparse is not needed, and only complicate matters and +-- the number of redundant entries is not that large. + +local function nextstep(n,step,result) + local m = n % step -- mod(n,step) + local d = floor(n/step) -- div(n,step) + if d > 0 then + local v = V(tostring(step)) + local s = result.start + for i=1,d do + if s then + s = v * s + else + s = v + end + end + result.start = s + end + if step > 1 and result.start then + local v = V(tostring(step/2)) + result[tostring(step)] = v * v + end + if step > 0 then + return nextstep(m,step/2,result) + else + return result + end +end -function string.tformat(fmt,...) - return format(lpegmatch(replacer,fmt),...) +function lpeg.times(pattern,n) + return P(nextstep(n,2^16,{ "start", ["1"] = pattern })) end --- strips leading and trailing spaces and collapsed all other spaces +-- local p = lpeg.Cs((1 - lpeg.times(lpeg.P("AB"),25))^1) +-- local s = "12" .. string.rep("AB",20) .. "34" .. string.rep("AB",30) .. "56" +-- inspect(p) +-- print(lpeg.match(p,s)) -local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) +-- moved here (before util-str) + +local digit = R("09") +local period = P(".") +local zero = P("0") +local trailingzeros = zero^0 * -digit -- suggested by Roberto R +local case_1 = period * trailingzeros / "" +local case_2 = period * (digit - trailingzeros)^1 * (trailingzeros / "") +local number = digit^1 * (case_1 + case_2) +local stripper = Cs((number + 1)^0) + +lpeg.patterns.stripzeros = stripper + +-- local sample = "bla 11.00 bla 11 bla 0.1100 bla 1.00100 bla 0.00 bla 0.001 bla 1.1100 bla 0.100100100 bla 0.00100100100" +-- collectgarbage("collect") +-- str = string.rep(sample,10000) +-- local ts = os.clock() +-- lpegmatch(stripper,str) +-- print(#str, os.clock()-ts, lpegmatch(stripper,sample)) -function string.collapsespaces(str) - return lpegmatch(pattern,str) -end -- cgit v1.2.3 From d304e059e97b9ccddcd062c8891cd77481f310e2 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 17:29:50 +0200 Subject: add l-function.lua -> lualibs-function.lua --- lualibs-function.lua | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 lualibs-function.lua diff --git a/lualibs-function.lua b/lualibs-function.lua new file mode 100644 index 0000000..7ded8ce --- /dev/null +++ b/lualibs-function.lua @@ -0,0 +1,11 @@ +if not modules then modules = { } end modules ['l-functions'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +functions = functions or { } + +function functions.dummy() end -- cgit v1.2.3 From ad29fc12bf3c6d7379685ddd3ef80ce768db3112 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 17:39:00 +0200 Subject: update l-string --- lualibs-string.lua | 206 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 138 insertions(+), 68 deletions(-) diff --git a/lualibs-string.lua b/lualibs-string.lua index 03616aa..77c076c 100644 --- a/lualibs-string.lua +++ b/lualibs-string.lua @@ -7,43 +7,49 @@ if not modules then modules = { } end modules ['l-string'] = { } local string = string -local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct - --- some functions may disappear as they are not used anywhere - -if not string.split then - - -- this will be overloaded by a faster lpeg variant - - function string.split(str,pattern) - local t = { } - if #str > 0 then - local n = 1 - for s in gmatch(str..pattern,"(.-)"..pattern) do - t[n] = s - n = n + 1 - end - end - return t - end - -end +local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs + +-- Some functions are already defined in l-lpeg and maybe some from here will +-- move there (unless we also expose caches). + +-- if not string.split then +-- +-- function string.split(str,pattern) +-- local t = { } +-- if #str > 0 then +-- local n = 1 +-- for s in gmatch(str..pattern,"(.-)"..pattern) do +-- t[n] = s +-- n = n + 1 +-- end +-- end +-- return t +-- end +-- +-- end + +-- function string.unquoted(str) +-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern +-- end + +local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote + + patterns.dquote * C(patterns.nodquote) * patterns.dquote function string.unquoted(str) - return (gsub(str,"^([\"\'])(.*)%1$","%2")) + return lpegmatch(unquoted,str) or str end ---~ function stringunquoted(str) ---~ if find(str,"^[\'\"]") then ---~ return sub(str,2,-2) ---~ else ---~ return str ---~ end ---~ end +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"')) function string.quoted(str) - return format("%q",str) -- always " + return format("%q",str) -- always double quote end function string.count(str,pattern) -- variant 3 @@ -63,73 +69,137 @@ function string.limit(str,n,sentinel) -- not utf proof end end -local space = S(" \t\v\n") -local nospace = 1 - space -local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code +local stripper = patterns.stripper +local collapser = patterns.collapser +local longtostring = patterns.longtostring function string.strip(str) return lpegmatch(stripper,str) or "" end -function string.is_empty(str) - return not find(str,"%S") +function string.collapsespaces(str) + return lpegmatch(collapser,str) or "" end -local patterns_escapes = { - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%(", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} +function string.longtostring(str) + return lpegmatch(longtostring,str) or "" +end -local simple_escapes = { - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} +-- function string.is_empty(str) +-- return not find(str,"%S") +-- end + +local pattern = P(" ")^0 * P(-1) + +function string.is_empty(str) + if str == "" then + return true + else + return lpegmatch(pattern,str) and true or false + end +end + +-- if not string.escapedpattern then +-- +-- local patterns_escapes = { +-- ["%"] = "%%", +-- ["."] = "%.", +-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", +-- ["["] = "%[", ["]"] = "%]", +-- ["("] = "%(", [")"] = "%)", +-- -- ["{"] = "%{", ["}"] = "%}" +-- -- ["^"] = "%^", ["$"] = "%$", +-- } +-- +-- local simple_escapes = { +-- ["-"] = "%-", +-- ["."] = "%.", +-- ["?"] = ".", +-- ["*"] = ".*", +-- } +-- +-- function string.escapedpattern(str,simple) +-- return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +-- end +-- +-- function string.topattern(str,lowercase,strict) +-- if str == "" then +-- return ".*" +-- else +-- str = gsub(str,".",simple_escapes) +-- if lowercase then +-- str = lower(str) +-- end +-- if strict then +-- return "^" .. str .. "$" +-- else +-- return str +-- end +-- end +-- end +-- +-- end + +--- needs checking + +local anything = patterns.anything +local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ? +local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ? +local matchescapes = Cc(".") * S("*?") -- wildcard and single match + +local pattern_a = Cs ( ( allescapes + anything )^0 ) +local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 ) +local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") ) function string.escapedpattern(str,simple) - return (gsub(str,".",simple and simple_escapes or patterns_escapes)) + return lpegmatch(simple and pattern_b or pattern_a,str) end function string.topattern(str,lowercase,strict) - if str == "" then + if str=="" or type(str) ~= "string" then return ".*" + elseif strict then + str = lpegmatch(pattern_c,str) else - str = gsub(str,".",simple_escapes) - if lowercase then - str = lower(str) - end - if strict then - return "^" .. str .. "$" - else - return str - end + str = lpegmatch(pattern_b,str) + end + if lowercase then + return lower(str) + else + return str end end +-- print(string.escapedpattern("12+34*.tex",false)) +-- print(string.escapedpattern("12+34*.tex",true)) +-- print(string.topattern ("12+34*.tex",false,false)) +-- print(string.topattern ("12+34*.tex",false,true)) function string.valid(str,default) return (type(str) == "string" and str ~= "" and str) or default or nil end --- obsolete names: - -string.quote = string.quoted -string.unquote = string.unquoted - -- handy fallback string.itself = function(s) return s end -- also handy (see utf variant) -local pattern = Ct(C(1)^0) +local pattern = Ct(C(1)^0) -- string and not utf ! function string.totable(str) return lpegmatch(pattern,str) end + +-- handy from within tex: + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- obsolete names: + +string.quote = string.quoted +string.unquote = string.unquoted -- cgit v1.2.3 From 6252afeb5d04e9860f9f4b587f269de1bcaa9489 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 18:03:28 +0200 Subject: update l-table --- lualibs-table.lua | 637 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 503 insertions(+), 134 deletions(-) diff --git a/lualibs-table.lua b/lualibs-table.lua index 80f28c2..640bbbb 100644 --- a/lualibs-table.lua +++ b/lualibs-table.lua @@ -6,68 +6,23 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove -local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match +local format, lower, dump = string.format, string.lower, string.dump local getmetatable, setmetatable = getmetatable, setmetatable local getinfo = debug.getinfo - --- Starting with version 5.2 Lua no longer provide ipairs, which makes --- sense. As we already used the for loop and # in most places the --- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashion we also hardly used pairs. --- --- Hm, actually ipairs was retained, but we no longer use it anyway. --- --- Just in case, we provide the fallbacks as discussed in Programming --- in Lua (http://www.lua.org/pil/7.3.html): - -if not ipairs then - - -- for k, v in ipairs(t) do ... end - -- for k=1,#t do local v = t[k] ... end - - local function iterate(a,i) - i = i + 1 - local v = a[i] - if v ~= nil then - return i, v --, nil - end - end - - function ipairs(a) - return iterate, a, 0 - end - -end - -if not pairs then - - -- for k, v in pairs(t) do ... end - -- for k, v in next, t do ... end - - function pairs(t) - return next, t -- , nil - end - -end - --- Also, unpack has been moved to the table table, and for compatiility --- reasons we provide both now. - -if not table.unpack then - table.unpack = _G.unpack -elseif not unpack then - _G.unpack = table.unpack -end +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local floor = math.floor -- extra functions, some might go (when not used) +local stripper = patterns.stripper + function table.strip(tab) local lst, l = { }, 0 for i=1,#tab do - local s = gsub(tab[i],"^%s*(.-)%s*$","%1") + local s = lpegmatch(stripper,tab[i]) or "" if s == "" then -- skip this one else @@ -130,7 +85,7 @@ local function sortedkeys(tab) end end -local function sortedhashkeys(tab) -- fast one +local function sortedhashkeys(tab,cmp) -- fast one if tab then local srt, s = { }, 0 for key,_ in next, tab do @@ -139,21 +94,38 @@ local function sortedhashkeys(tab) -- fast one srt[s] = key end end - sort(srt) + sort(srt,cmp) return srt else return { } end end +function table.allkeys(t) + local keys = { } + for k, v in next, t do + for k, v in next, v do + keys[k] = true + end + end + return sortedkeys(keys) +end + table.sortedkeys = sortedkeys table.sortedhashkeys = sortedhashkeys local function nothing() end -local function sortedhash(t) +local function sortedhash(t,cmp) if t then - local n, s = 0, sortedkeys(t) -- the robust one + local s + if cmp then + -- it would be nice if teh sort function would accept a third argument (or nicer, an optional first) + s = sortedhashkeys(t,function(a,b) return cmp(t,a,b) end) + else + s = sortedkeys(t) -- the robust one + end + local n = 0 local function kv(s) n = n + 1 local k = s[n] @@ -166,7 +138,7 @@ local function sortedhash(t) end table.sortedhash = sortedhash -table.sortedpairs = sortedhash +table.sortedpairs = sortedhash -- obsolete function table.append(t,list) local n = #t @@ -190,31 +162,63 @@ function table.prepend(t, list) return t end +-- function table.merge(t, ...) -- first one is target +-- t = t or { } +-- local lst = { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- t[k] = v +-- end +-- end +-- return t +-- end + function table.merge(t, ...) -- first one is target t = t or { } - local lst = { ... } - for i=1,#lst do - for k, v in next, lst[i] do + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do t[k] = v end end return t end +-- function table.merged(...) +-- local tmp, lst = { }, { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- tmp[k] = v +-- end +-- end +-- return tmp +-- end + function table.merged(...) - local tmp, lst = { }, { ... } - for i=1,#lst do - for k, v in next, lst[i] do - tmp[k] = v + local t = { } + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do + t[k] = v end end - return tmp + return t end +-- function table.imerge(t, ...) +-- local lst, nt = { ... }, #t +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- nt = nt + 1 +-- t[nt] = nst[j] +-- end +-- end +-- return t +-- end + function table.imerge(t, ...) - local lst, nt = { ... }, #t - for i=1,#lst do - local nst = lst[i] + local nt = #t + for i=1,select("#",...) do + local nst = select(i,...) for j=1,#nst do nt = nt + 1 t[nt] = nst[j] @@ -223,10 +227,22 @@ function table.imerge(t, ...) return t end +-- function table.imerged(...) +-- local tmp, ntmp, lst = { }, 0, {...} +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- ntmp = ntmp + 1 +-- tmp[ntmp] = nst[j] +-- end +-- end +-- return tmp +-- end + function table.imerged(...) - local tmp, ntmp, lst = { }, 0, {...} - for i=1,#lst do - local nst = lst[i] + local tmp, ntmp = { }, 0 + for i=1,select("#",...) do + local nst = select(i,...) for j=1,#nst do ntmp = ntmp + 1 tmp[ntmp] = nst[j] @@ -238,7 +254,7 @@ end local function fastcopy(old,metatabletoo) -- fast one if old then local new = { } - for k,v in next, old do + for k, v in next, old do if type(v) == "table" then new[k] = fastcopy(v,metatabletoo) -- was just table.copy else @@ -292,7 +308,7 @@ end table.fastcopy = fastcopy table.copy = copy -function table.derive(parent) +function table.derive(parent) -- for the moment not public local child = { } if parent then setmetatable(child,{ __index = parent }) @@ -373,6 +389,15 @@ end -- problem: there no good number_to_string converter with the best resolution +-- probably using .. is faster than format +-- maybe split in a few cases (yes/no hexify) + +-- todo: %g faster on numbers than %s + +-- we can speed this up with repeaters and formatters (is indeed faster) + +local propername = patterns.propername -- was find(name,"^%a[%w%_]*$") + local function dummy() end local function do_serialize(root,name,depth,level,indexed) @@ -382,14 +407,14 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s{",depth)) else local tn = type(name) - if tn == "number" then -- or find(k,"^%d+$") then + if tn == "number" then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end elseif tn == "string" then - if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + if noquotes and not reserved[name] and lpegmatch(propername,name) then handle(format("%s%s={",depth,name)) else handle(format("%s[%q]={",depth,name)) @@ -415,7 +440,6 @@ local function do_serialize(root,name,depth,level,indexed) if compact then last = #root for k=1,last do --- if not root[k] then if root[k] == nil then last = k - 1 break @@ -463,7 +487,7 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s %s,",depth,tostring(v))) elseif t == "function" then if functions then - handle(format('%s loadstring(%q),',depth,dump(v))) + handle(format('%s load(%q),',depth,dump(v))) else handle(format('%s "function",',depth)) end @@ -475,7 +499,7 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s __p__=nil,",depth)) end elseif t == "number" then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) else @@ -487,7 +511,7 @@ local function do_serialize(root,name,depth,level,indexed) else handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g end - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then if hexify then handle(format("%s %s=0x%04X,",depth,k,v)) else @@ -502,7 +526,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "string" then if reduce and tonumber(v) then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,v)) else @@ -510,13 +534,13 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]=%s,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%s,",depth,k,v)) else handle(format("%s [%q]=%s,",depth,k,v)) end else - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,v)) else @@ -524,7 +548,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]=%q,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%q,",depth,k,v)) else handle(format("%s [%q]=%q,",depth,k,v)) @@ -532,7 +556,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "table" then if not next(v) then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]={},",depth,k)) else @@ -540,7 +564,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]={},",depth,tostring(k))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s={},",depth,k)) else handle(format("%s [%q]={},",depth,k)) @@ -548,15 +572,15 @@ local function do_serialize(root,name,depth,level,indexed) elseif inline then local st = simple_table(v) if st then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) @@ -568,15 +592,15 @@ local function do_serialize(root,name,depth,level,indexed) do_serialize(v,k,depth,level+1) end elseif t == "boolean" then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) else handle(format("%s [%s]=%s,",depth,k,tostring(v))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%s,",depth,k,tostring(v))) else handle(format("%s [%q]=%s,",depth,k,tostring(v))) @@ -585,30 +609,30 @@ local function do_serialize(root,name,depth,level,indexed) if functions then local f = getinfo(v).what == "C" and dump(dummy) or dump(v) -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then - handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f)) + handle(format("%s [0x%04X]=load(%q),",depth,k,f)) else - handle(format("%s [%s]=loadstring(%q),",depth,k,f)) + handle(format("%s [%s]=load(%q),",depth,k,f)) end elseif tk == "boolean" then - handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=loadstring(%q),",depth,k,f)) + handle(format("%s [%s]=load(%q),",depth,tostring(k),f)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=load(%q),",depth,k,f)) else - handle(format("%s [%q]=loadstring(%q),",depth,k,f)) + handle(format("%s [%q]=load(%q),",depth,k,f)) end end else - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) else handle(format("%s [%s]=%q,",depth,k,tostring(v))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%q,",depth,k,tostring(v))) else handle(format("%s [%q]=%q,",depth,k,tostring(v))) @@ -689,14 +713,330 @@ local function serialize(_handle,root,name,specification) -- handle wins handle("}") end ---~ name: ---~ ---~ true : return { } ---~ false : { } ---~ nil : t = { } ---~ string : string = { } ---~ 'return' : return { } ---~ number : [number] = { } +-- -- This is some 20% faster than using format (because formatters are much faster) but +-- -- of course, inlining the format using .. is then again faster .. anyway, as we do +-- -- some pretty printing as well there is not that much to gain unless we make a 'fast' +-- -- ugly variant as well. But, we would have to move the formatter to l-string then. + +-- local formatters = string.formatters + +-- local function do_serialize(root,name,level,indexed) +-- if level > 0 then +-- if indexed then +-- handle(formatters["%w{"](level)) +-- else +-- local tn = type(name) +-- if tn == "number" then +-- if hexify then +-- handle(formatters["%w[%04H]={"](level,name)) +-- else +-- handle(formatters["%w[%s]={"](level,name)) +-- end +-- elseif tn == "string" then +-- if noquotes and not reserved[name] and lpegmatch(propername,name) then +-- handle(formatters["%w%s={"](level,name)) +-- else +-- handle(formatters["%w[%q]={"](level,name)) +-- end +-- elseif tn == "boolean" then +-- handle(formatters["%w[%S]={"](level,name)) +-- else +-- handle(formatters["%w{"](level)) +-- end +-- end +-- end +-- -- we could check for k (index) being number (cardinal) +-- if root and next(root) then +-- -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) +-- -- if compact then +-- -- -- NOT: for k=1,#root do (we need to quit at nil) +-- -- for k,v in ipairs(root) do -- can we use next? +-- -- if not first then first = k end +-- -- last = last + 1 +-- -- end +-- -- end +-- local first, last = nil, 0 +-- if compact then +-- last = #root +-- for k=1,last do +-- if root[k] == nil then +-- last = k - 1 +-- break +-- end +-- end +-- if last > 0 then +-- first = 1 +-- end +-- end +-- local sk = sortedkeys(root) +-- for i=1,#sk do +-- local k = sk[i] +-- local v = root[k] +-- --~ if v == root then +-- -- circular +-- --~ else +-- local t, tk = type(v), type(k) +-- if compact and first and tk == "number" and k >= first and k <= last then +-- if t == "number" then +-- if hexify then +-- handle(formatters["%w %04H,"](level,v)) +-- else +-- handle(formatters["%w %s,"](level,v)) -- %.99g +-- end +-- elseif t == "string" then +-- if reduce and tonumber(v) then +-- handle(formatters["%w %s,"](level,v)) +-- else +-- handle(formatters["%w %q,"](level,v)) +-- end +-- elseif t == "table" then +-- if not next(v) then +-- handle(formatters["%w {},"](level)) +-- elseif inline then -- and #t > 0 +-- local st = simple_table(v) +-- if st then +-- handle(formatters["%w { %, t },"](level,st)) +-- else +-- do_serialize(v,k,level+1,true) +-- end +-- else +-- do_serialize(v,k,level+1,true) +-- end +-- elseif t == "boolean" then +-- handle(formatters["%w %S,"](level,v)) +-- elseif t == "function" then +-- if functions then +-- handle(formatters['%w load(%q),'](level,dump(v))) +-- else +-- handle(formatters['%w "function",'](level)) +-- end +-- else +-- handle(formatters["%w %Q,"](level,v)) +-- end +-- elseif k == "__p__" then -- parent +-- if false then +-- handle(formatters["%w __p__=nil,"](level)) +-- end +-- elseif t == "number" then +-- if tk == "number" then +-- if hexify then +-- handle(formatters["%w [%04H]=%04H,"](level,k,v)) +-- else +-- handle(formatters["%w [%s]=%s,"](level,k,v)) -- %.99g +-- end +-- elseif tk == "boolean" then +-- if hexify then +-- handle(formatters["%w [%S]=%04H,"](level,k,v)) +-- else +-- handle(formatters["%w [%S]=%s,"](level,k,v)) -- %.99g +-- end +-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +-- if hexify then +-- handle(formatters["%w %s=%04H,"](level,k,v)) +-- else +-- handle(formatters["%w %s=%s,"](level,k,v)) -- %.99g +-- end +-- else +-- if hexify then +-- handle(formatters["%w [%q]=%04H,"](level,k,v)) +-- else +-- handle(formatters["%w [%q]=%s,"](level,k,v)) -- %.99g +-- end +-- end +-- elseif t == "string" then +-- if reduce and tonumber(v) then +-- if tk == "number" then +-- if hexify then +-- handle(formatters["%w [%04H]=%s,"](level,k,v)) +-- else +-- handle(formatters["%w [%s]=%s,"](level,k,v)) +-- end +-- elseif tk == "boolean" then +-- handle(formatters["%w [%S]=%s,"](level,k,v)) +-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +-- handle(formatters["%w %s=%s,"](level,k,v)) +-- else +-- handle(formatters["%w [%q]=%s,"](level,k,v)) +-- end +-- else +-- if tk == "number" then +-- if hexify then +-- handle(formatters["%w [%04H]=%q,"](level,k,v)) +-- else +-- handle(formatters["%w [%s]=%q,"](level,k,v)) +-- end +-- elseif tk == "boolean" then +-- handle(formatters["%w [%S]=%q,"](level,k,v)) +-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +-- handle(formatters["%w %s=%q,"](level,k,v)) +-- else +-- handle(formatters["%w [%q]=%q,"](level,k,v)) +-- end +-- end +-- elseif t == "table" then +-- if not next(v) then +-- if tk == "number" then +-- if hexify then +-- handle(formatters["%w [%04H]={},"](level,k)) +-- else +-- handle(formatters["%w [%s]={},"](level,k)) +-- end +-- elseif tk == "boolean" then +-- handle(formatters["%w [%S]={},"](level,k)) +-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +-- handle(formatters["%w %s={},"](level,k)) +-- else +-- handle(formatters["%w [%q]={},"](level,k)) +-- end +-- elseif inline then +-- local st = simple_table(v) +-- if st then +-- if tk == "number" then +-- if hexify then +-- handle(formatters["%w [%04H]={ %, t },"](level,k,st)) +-- else +-- handle(formatters["%w [%s]={ %, t },"](level,k,st)) +-- end +-- elseif tk == "boolean" then +-- handle(formatters["%w [%S]={ %, t },"](level,k,st)) +-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +-- handle(formatters["%w %s={ %, t },"](level,k,st)) +-- else +-- handle(formatters["%w [%q]={ %, t },"](level,k,st)) +-- end +-- else +-- do_serialize(v,k,level+1) +-- end +-- else +-- do_serialize(v,k,level+1) +-- end +-- elseif t == "boolean" then +-- if tk == "number" then +-- if hexify then +-- handle(formatters["%w [%04H]=%S,"](level,k,v)) +-- else +-- handle(formatters["%w [%s]=%S,"](level,k,v)) +-- end +-- elseif tk == "boolean" then +-- handle(formatters["%w [%S]=%S,"](level,k,v)) +-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +-- handle(formatters["%w %s=%S,"](level,k,v)) +-- else +-- handle(formatters["%w [%q]=%S,"](level,k,v)) +-- end +-- elseif t == "function" then +-- if functions then +-- local f = getinfo(v).what == "C" and dump(dummy) or dump(v) +-- -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) +-- if tk == "number" then +-- if hexify then +-- handle(formatters["%w [%04H]=load(%q),"](level,k,f)) +-- else +-- handle(formatters["%w [%s]=load(%q),"](level,k,f)) +-- end +-- elseif tk == "boolean" then +-- handle(formatters["%w [%S]=load(%q),"](level,k,f)) +-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +-- handle(formatters["%w %s=load(%q),"](level,k,f)) +-- else +-- handle(formatters["%w [%q]=load(%q),"](level,k,f)) +-- end +-- end +-- else +-- if tk == "number" then +-- if hexify then +-- handle(formatters["%w [%04H]=%Q,"](level,k,v)) +-- else +-- handle(formatters["%w [%s]=%Q,"](level,k,v)) +-- end +-- elseif tk == "boolean" then +-- handle(formatters["%w [%S]=%Q,"](level,k,v)) +-- elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +-- handle(formatters["%w %s=%Q,"](level,k,v)) +-- else +-- handle(formatters["%w [%q]=%Q,"](level,k,v)) +-- end +-- end +-- --~ end +-- end +-- end +-- if level > 0 then +-- handle(formatters["%w}"](level)) +-- end +-- end + +-- local function serialize(_handle,root,name,specification) -- handle wins +-- local tname = type(name) +-- if type(specification) == "table" then +-- noquotes = specification.noquotes +-- hexify = specification.hexify +-- handle = _handle or specification.handle or print +-- reduce = specification.reduce or false +-- functions = specification.functions +-- compact = specification.compact +-- inline = specification.inline and compact +-- if functions == nil then +-- functions = true +-- end +-- if compact == nil then +-- compact = true +-- end +-- if inline == nil then +-- inline = compact +-- end +-- else +-- noquotes = false +-- hexify = false +-- handle = _handle or print +-- reduce = false +-- compact = true +-- inline = true +-- functions = true +-- end +-- if tname == "string" then +-- if name == "return" then +-- handle("return {") +-- else +-- handle(name .. "={") +-- end +-- elseif tname == "number" then +-- if hexify then +-- handle(format("[0x%04X]={",name)) +-- else +-- handle("[" .. name .. "]={") +-- end +-- elseif tname == "boolean" then +-- if name then +-- handle("return {") +-- else +-- handle("{") +-- end +-- else +-- handle("t={") +-- end +-- if root then +-- -- The dummy access will initialize a table that has a delayed initialization +-- -- using a metatable. (maybe explicitly test for metatable) +-- if getmetatable(root) then -- todo: make this an option, maybe even per subtable +-- local dummy = root._w_h_a_t_e_v_e_r_ +-- root._w_h_a_t_e_v_e_r_ = nil +-- end +-- -- Let's forget about empty tables. +-- if next(root) then +-- do_serialize(root,name,0) +-- end +-- end +-- handle("}") +-- end + +-- name: +-- +-- true : return { } +-- false : { } +-- nil : t = { } +-- string : string = { } +-- "return" : return { } +-- number : [number] = { } function table.serialize(root,name,specification) local t, n = { }, 0 @@ -708,6 +1048,13 @@ function table.serialize(root,name,specification) return concat(t,"\n") end +-- local a = { e = { 1,2,3,4,5,6}, a = 1, b = 2, c = "ccc", d = { a = 1, b = 2, c = "ccc", d = { a = 1, b = 2, c = "ccc" } } } +-- local t = os.clock() +-- for i=1,10000 do +-- table.serialize(a) +-- end +-- print(os.clock()-t,table.serialize(a)) + table.tohandle = serialize -- sometimes tables are real use (zapfino extra pro is some 85M) in which @@ -752,7 +1099,7 @@ local function flattened(t,f,depth) f = { } depth = 0xFFFF elseif tonumber(f) then - -- assume then only two arguments are given + -- assume that only two arguments are given depth = f f = { } elseif not depth then @@ -785,7 +1132,7 @@ table.flattened = flattened local function unnest(t,f) -- only used in mk, for old times sake if not f then -- and only relevant for token lists - f = { } + f = { } -- this one can become obsolete end for i=1,#t do local v = t[i] @@ -814,7 +1161,7 @@ local function are_equal(a,b,n,m) -- indexed local ai, bi = a[i], b[i] if ai==bi then -- same - elseif type(ai)=="table" and type(bi)=="table" then + elseif type(ai) == "table" and type(bi) == "table" then if not are_equal(ai,bi) then return false end @@ -849,10 +1196,10 @@ table.are_equal = are_equal -- maybe also make a combined one -function table.compact(t) +function table.compact(t) -- remove empty tables, assumes subtables if t then - for k,v in next, t do - if not next(v) then + for k, v in next, t do + if not next(v) then -- no type checking t[k] = nil end end @@ -881,25 +1228,25 @@ end function table.swapped(t,s) -- hash local n = { } if s then ---~ for i=1,#s do ---~ n[i] = s[i] ---~ end for k, v in next, s do n[k] = v end end ---~ for i=1,#t do ---~ local ti = t[i] -- don't ask but t[i] can be nil ---~ if ti then ---~ n[ti] = i ---~ end ---~ end for k, v in next, t do n[v] = k end return n end +function table.mirrored(t) -- hash + local n = { } + for k, v in next, t do + n[v] = k + n[k] = v + end + return n +end + function table.reversed(t) if t then local tt, tn = { }, #t @@ -914,9 +1261,31 @@ function table.reversed(t) end end -function table.sequenced(t,sep) -- hash only +function table.reverse(t) if t then - local s, n = { }, 0 + local n = #t + for i=1,floor(n/2) do + local j = n - i + 1 + t[i], t[j] = t[j], t[i] + end + return t + end +end + +function table.sequenced(t,sep,simple) -- hash only + if not t then + return "" + end + local n = #t + local s = { } + if n > 0 then + -- indexed + for i=1,n do + s[i] = tostring(t[i]) + end + else + -- hashed + n = 0 for k, v in sortedhash(t) do if simple then if v == true then @@ -931,20 +1300,20 @@ function table.sequenced(t,sep) -- hash only s[n] = k .. "=" .. tostring(v) end end - return concat(s, sep or " | ") - else - return "" end + return concat(s,sep or " | ") end function table.print(t,...) if type(t) ~= "table" then print(tostring(t)) else - table.tohandle(print,t,...) + serialize(print,t,...) end end +setinspector(function(v) if type(v) == "table" then serialize(print,v,"table") return true end end) + -- -- -- obsolete but we keep them for a while and might comment them later -- -- -- -- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) @@ -990,7 +1359,7 @@ function table.unique(old) return new end --- function table.sorted(t,...) --- table.sort(t,...) --- return t -- still sorts in-place --- end +function table.sorted(t,...) + sort(t,...) + return t -- still sorts in-place +end -- cgit v1.2.3 From 44173ba49717a356edf1c09f892f6909cad43481 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 18:10:32 +0200 Subject: update l-boolean --- lualibs-boolean.lua | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/lualibs-boolean.lua b/lualibs-boolean.lua index 2b94de7..f087f1a 100644 --- a/lualibs-boolean.lua +++ b/lualibs-boolean.lua @@ -15,7 +15,7 @@ function boolean.tonumber(b) if b then return 1 else return 0 end -- test and return or return end -function toboolean(str,tolerant) +function toboolean(str,tolerant) -- global if str == nil then return false elseif str == false then @@ -40,18 +40,16 @@ end string.toboolean = toboolean function string.booleanstring(str) - if str == nil then - return false - elseif str == false then + if str == "0" then return false - elseif str == true then - return true - elseif str == "true" then + elseif str == "1" then return true - elseif str == "false" then + elseif str == "" then return false - elseif str == 0 then + elseif str == "false" then return false + elseif str == "true" then + return true elseif (tonumber(str) or 0) > 0 then return true else -- cgit v1.2.3 From b7f568675411e081301e589c6efb863a2d844817 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 18:23:34 +0200 Subject: update l-number --- lualibs-number.lua | 241 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 157 insertions(+), 84 deletions(-) diff --git a/lualibs-number.lua b/lualibs-number.lua index a4dbe3b..001ca31 100644 --- a/lualibs-number.lua +++ b/lualibs-number.lua @@ -6,7 +6,8 @@ if not modules then modules = { } end modules ['l-number'] = { license = "see context related readme files" } --- this module will be replaced when we have the bit library +-- this module will be replaced when we have the bit library .. the number based sets +-- might go away local tostring, tonumber = tostring, tonumber local format, floor, match, rep = string.format, math.floor, string.match, string.rep @@ -16,10 +17,129 @@ local lpegmatch = lpeg.match number = number or { } local number = number --- a,b,c,d,e,f = number.toset(100101) +if bit32 then -- I wonder if this is faster + + local btest, bor = bit32.btest, bit32.bor + + function number.bit(p) + return 2 ^ (p - 1) -- 1-based indexing + end + + number.hasbit = btest + number.setbit = bor + + function number.setbit(x,p) -- why not bor? + return btest(x,p) and x or x + p + end + + function number.clearbit(x,p) + return btest(x,p) and x - p or x + end + +else + + -- http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html + + function number.bit(p) + return 2 ^ (p - 1) -- 1-based indexing + end + + function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... + return x % (p + p) >= p + end + + function number.setbit(x, p) + return (x % (p + p) >= p) and x or x + p + end + + function number.clearbit(x, p) + return (x % (p + p) >= p) and x - p or x + end -function number.toset(n) - return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +end + +-- print(number.tobitstring(8)) +-- print(number.tobitstring(14)) +-- print(number.tobitstring(66)) +-- print(number.tobitstring(0x00)) +-- print(number.tobitstring(0xFF)) +-- print(number.tobitstring(46260767936,4)) + +if bit32 then + + local bextract = bit32.extract + + local t = { + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + "0", "0", "0", "0", "0", "0", "0", "0", + } + + function number.tobitstring(b,m) + -- if really needed we can speed this one up + -- because small numbers need less extraction + local n = 32 + for i=0,31 do + local v = bextract(b,i) + local k = 32 - i + if v == 1 then + n = k + t[k] = "1" + else + t[k] = "0" + end + end + if m then + m = 33 - m * 8 + if m < 1 then + m = 1 + end + return concat(t,"",m) + elseif n < 8 then + return concat(t) + elseif n < 16 then + return concat(t,"",9) + elseif n < 24 then + return concat(t,"",17) + else + return concat(t,"",25) + end + end + +else + + function number.tobitstring(n,m) + if n > 0 then + local t = { } + while n > 0 do + insert(t,1,n % 2 > 0 and 1 or 0) + n = floor(n/2) + end + local nn = 8 - #t % 8 + if nn > 0 and nn < 8 then + for i=1,nn do + insert(t,1,0) + end + end + if m then + m = m * 8 - #t + if m > 0 then + insert(t,1,rep("0",m)) + end + end + return concat(t) + elseif m then + rep("00000000",m) + else + return "00000000" + end + end + +end + +function number.valid(str,default) + return tonumber(str) or default or nil end function number.toevenhex(n) @@ -31,104 +151,57 @@ function number.toevenhex(n) end end --- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% --- on +-- a,b,c,d,e,f = number.toset(100101) +-- +-- function number.toset(n) +-- return match(tostring(n),"(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)") +-- end +-- +-- -- the lpeg way is slower on 8 digits, but faster on 4 digits, some 7.5% +-- -- on -- -- for i=1,1000000 do -- local a,b,c,d,e,f,g,h = number.toset(12345678) -- local a,b,c,d = number.toset(1234) -- local a,b,c = number.toset(123) +-- local a,b,c = number.toset("123") -- end --- --- of course dedicated "(.)(.)(.)(.)" matches are even faster -local one = lpeg.C(1-lpeg.S(''))^1 +local one = lpeg.C(1-lpeg.S('')/tonumber)^1 function number.toset(n) return lpegmatch(one,tostring(n)) end -function number.bits(n,zero) - local t, i = { }, (zero and 0) or 1 - while n > 0 do +-- function number.bits(n,zero) +-- local t, i = { }, (zero and 0) or 1 +-- while n > 0 do +-- local m = n % 2 +-- if m > 0 then +-- insert(t,1,i) +-- end +-- n = floor(n/2) +-- i = i + 1 +-- end +-- return t +-- end +-- +-- -- a bit faster + +local function bits(n,i,...) + if n > 0 then local m = n % 2 + local n = floor(n/2) if m > 0 then - insert(t,1,i) - end - n = floor(n/2) - i = i + 1 - end - return t -end - ---~ http://ricilake.blogspot.com/2007/10/iterating-bits-in-lua.html - -function number.bit(p) - return 2 ^ (p - 1) -- 1-based indexing -end - -function number.hasbit(x, p) -- typical call: if hasbit(x, bit(3)) then ... - return x % (p + p) >= p -end - -function number.setbit(x, p) - return (x % (p + p) >= p) and x or x + p -end - -function number.clearbit(x, p) - return (x % (p + p) >= p) and x - p or x -end - ---~ function number.tobitstring(n) ---~ if n == 0 then ---~ return "0" ---~ else ---~ local t = { } ---~ while n > 0 do ---~ insert(t,1,n % 2 > 0 and 1 or 0) ---~ n = floor(n/2) ---~ end ---~ return concat(t) ---~ end ---~ end - -function number.tobitstring(n,m) - if n == 0 then - if m then - rep("00000000",m) + return bits(n, i+1, i, ...) else - return "00000000" + return bits(n, i+1, ...) end else - local t = { } - while n > 0 do - insert(t,1,n % 2 > 0 and 1 or 0) - n = floor(n/2) - end - local nn = 8 - #t % 8 - if nn > 0 and nn < 8 then - for i=1,nn do - insert(t,1,0) - end - end - if m then - m = m * 8 - #t - if m > 0 then - insert(t,1,rep("0",m)) - end - end - return concat(t) + return ... end end ---~ print(number.tobitstring(8)) ---~ print(number.tobitstring(14)) ---~ print(number.tobitstring(66)) ---~ print(number.tobitstring(0x00)) ---~ print(number.tobitstring(0xFF)) ---~ print(number.tobitstring(46260767936,8)) ---~ print(#number.tobitstring(46260767936,6)) - -function number.valid(str,default) - return tonumber(str) or default or nil +function number.bits(n) + return { bits(n,1) } end -- cgit v1.2.3 From be46e9f6cb71c1201e3e8ad2211e0289eb708821 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 19:03:12 +0200 Subject: update l-io --- lualibs-io.lua | 120 +++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 28 deletions(-) diff --git a/lualibs-io.lua b/lualibs-io.lua index 657b755..06e1fb5 100644 --- a/lualibs-io.lua +++ b/lualibs-io.lua @@ -9,6 +9,7 @@ if not modules then modules = { } end modules ['l-io'] = { local io = io local byte, find, gsub, format = string.byte, string.find, string.gsub, string.format local concat = table.concat +local floor = math.floor local type = type if string.find(os.getenv("PATH"),";") then @@ -17,10 +18,49 @@ else io.fileseparator, io.pathseparator = "/" , ":" end +local function readall(f) + return f:read("*all") +end + +-- The next one is upto 50% faster on large files and less memory consumption due +-- to less intermediate large allocations. This phenomena was discussed on the +-- luatex dev list. + +local function readall(f) + local size = f:seek("end") + if size == 0 then + return "" + elseif size < 1024*1024 then + f:seek("set",0) + return f:read('*all') + else + local done = f:seek("set",0) + if size < 1024*1024 then + step = 1024 * 1024 + elseif size > 16*1024*1024 then + step = 16*1024*1024 + else + step = floor(size/(1024*1024)) * 1024 * 1024 / 8 + end + local data = { } + while true do + local r = f:read(step) + if not r then + return concat(data) + else + data[#data+1] = r + end + end + end +end + +io.readall = readall + function io.loaddata(filename,textmode) -- return nil if empty local f = io.open(filename,(textmode and 'r') or 'rb') if f then - local data = f:read('*all') +-- local data = f:read('*all') + local data = readall(f) f:close() if #data > 0 then return data @@ -46,31 +86,33 @@ function io.savedata(filename,data,joiner) end end +-- we can also chunk this one if needed: io.lines(filename,chunksize,"*l") + function io.loadlines(filename,n) -- return nil if empty local f = io.open(filename,'r') - if f then - if n then - local lines = { } - for i=1,n do - local line = f:read("*lines") - if line then - lines[#lines+1] = line - else - break - end - end - f:close() - lines = concat(lines,"\n") - if #lines > 0 then - return lines - end - else - local line = f:read("*line") or "" - assert(f:close()) - if #line > 0 then - return line + if not f then + -- no file + elseif n then + local lines = { } + for i=1,n do + local line = f:read("*lines") + if line then + lines[#lines+1] = line + else + break end end + f:close() + lines = concat(lines,"\n") + if #lines > 0 then + return lines + end + else + local line = f:read("*line") or "" + f:close() + if #line > 0 then + return line + end end end @@ -90,7 +132,7 @@ function io.exists(filename) if f == nil then return false else - assert(f:close()) + f:close() return true end end @@ -101,7 +143,7 @@ function io.size(filename) return 0 else local s = f:seek("end") - assert(f:close()) + f:close() return s end end @@ -109,9 +151,13 @@ end function io.noflines(f) if type(f) == "string" then local f = io.open(filename) - local n = f and io.noflines(f) or 0 - assert(f:close()) - return n + if f then + local n = f and io.noflines(f) or 0 + f:close() + return n + else + return 0 + end else local n = 0 for _ in f:lines() do @@ -288,7 +334,7 @@ function io.readstring(f,n,m) f:seek("set",n) n = m end - local str = gsub(f:read(n),"%z","") + local str = gsub(f:read(n),"\000","") return str end @@ -296,3 +342,21 @@ end if not io.i_limiter then function io.i_limiter() end end -- dummy so we can test safely if not io.o_limiter then function io.o_limiter() end end -- dummy so we can test safely + +-- This works quite ok: +-- +-- function io.piped(command,writer) +-- local pipe = io.popen(command) +-- -- for line in pipe:lines() do +-- -- print(line) +-- -- end +-- while true do +-- local line = pipe:read(1) +-- if not line then +-- break +-- elseif line ~= "\n" then +-- writer(line) +-- end +-- end +-- return pipe:close() -- ok, status, (error)code +-- end -- cgit v1.2.3 From cca1eef95a689eb483f566b34e2496f2b690ba95 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 19:12:11 +0200 Subject: update l-os --- lualibs-os.lua | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/lualibs-os.lua b/lualibs-os.lua index 799f449..42f3e48 100644 --- a/lualibs-os.lua +++ b/lualibs-os.lua @@ -157,7 +157,7 @@ function os.launch(str) os.execute(format(launchers[os.name] or launchers.unix,str)) end -if not os.times then +if not os.times then -- ? -- utime = user time -- stime = system time -- cutime = children user time @@ -193,14 +193,10 @@ os.resolvers = os.resolvers or { } -- will become private local resolvers = os.resolvers -local osmt = getmetatable(os) or { __index = function(t,k) t[k] = "unset" return "unset" end } -- maybe nil -local osix = osmt.__index - -osmt.__index = function(t,k) - return (resolvers[k] or osix)(t,k) -end - -setmetatable(os,osmt) +setmetatable(os, { __index = function(t,k) + local r = resolvers[k] + return r and r(t,k) or nil -- no memoize +end }) -- we can use HOSTTYPE on some platforms @@ -448,6 +444,18 @@ end os.which = which os.where = which +function os.today() + return date("!*t") -- table with values +end + +function os.now() + return date("!%Y-%m-%d %H:%M:%S") -- 2011-12-04 14:59:12 +end + +if not os.sleep and socket then + os.sleep = socket.sleep +end + -- print(os.which("inkscape.exe")) -- print(os.which("inkscape")) -- print(os.which("gs.exe")) -- cgit v1.2.3 From 83be008e1e5e06a01ca5cce672c553fe97cac4d8 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 19:20:24 +0200 Subject: update l-file --- lualibs-file.lua | 682 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 367 insertions(+), 315 deletions(-) diff --git a/lualibs-file.lua b/lualibs-file.lua index bea4ef0..af86f93 100644 --- a/lualibs-file.lua +++ b/lualibs-file.lua @@ -11,67 +11,228 @@ if not modules then modules = { } end modules ['l-file'] = { file = file or { } local file = file +if not lfs then + lfs = optionalrequire("lfs") +end + +if not lfs then + + lfs = { + getcurrentdir = function() + return "." + end, + attributes = function() + return nil + end, + isfile = function(name) + local f = io.open(name,'rb') + if f then + f:close() + return true + end + end, + isdir = function(name) + print("you need to load lfs") + return false + end + } + +elseif not lfs.isfile then + + local attributes = lfs.attributes + + function lfs.isdir(name) + return attributes(name,"mode") == "directory" + end + + function lfs.isfile(name) + return attributes(name,"mode") == "file" + end + + -- function lfs.isdir(name) + -- local a = attributes(name) + -- return a and a.mode == "directory" + -- end + + -- function lfs.isfile(name) + -- local a = attributes(name) + -- return a and a.mode == "file" + -- end + +end + local insert, concat = table.insert, table.concat -local find, gmatch, match, gsub, sub, char, lower = string.find, string.gmatch, string.match, string.gsub, string.sub, string.char, string.lower +local match = string.match local lpegmatch = lpeg.match local getcurrentdir, attributes = lfs.currentdir, lfs.attributes +local checkedsplit = string.checkedsplit + +-- local patterns = file.patterns or { } +-- file.patterns = patterns + +local P, R, S, C, Cs, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc, lpeg.Ct -local P, R, S, C, Cs, Cp, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cs, lpeg.Cp, lpeg.Cc +local colon = P(":") +local period = P(".") +local periods = P("..") +local fwslash = P("/") +local bwslash = P("\\") +local slashes = S("\\/") +local noperiod = 1-period +local noslashes = 1-slashes +local name = noperiod^1 +local suffix = period/"" * (1-period-slashes)^1 * -1 -local function dirname(name,default) - return match(name,"^(.+)[/\\].-$") or (default or "") +----- pattern = C((noslashes^0 * slashes^1)^1) +local pattern = C((1 - (slashes^1 * noslashes^1 * -1))^1) * P(1) -- there must be a more efficient way + +local function pathpart(name,default) + return name and lpegmatch(pattern,name) or default or "" end +local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 + local function basename(name) - return match(name,"^.+[/\\](.-)$") or name + return name and lpegmatch(pattern,name) or name end --- local function nameonly(name) --- return (gsub(match(name,"^.+[/\\](.-)$") or name,"%..*$","")) --- end +-- print(pathpart("file")) +-- print(pathpart("dir/file")) +-- print(pathpart("/dir/file")) +-- print(basename("file")) +-- print(basename("dir/file")) +-- print(basename("/dir/file")) + +local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0 local function nameonly(name) - return (gsub(match(name,"^.+[/\\](.-)$") or name,"%.[%a%d]+$","")) + return name and lpegmatch(pattern,name) or name end -local function suffixonly(name,default) - return match(name,"^.+%.([^/\\]-)$") or default or "" -end +local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 -local function splitname(name) - local n, s = match(name,"^(.+)%.([^/\\]-)$") - return n or name, s or "" +local function suffixonly(name) + return name and lpegmatch(pattern,name) or "" end +file.pathpart = pathpart file.basename = basename - -file.pathpart = dirname -file.dirname = dirname - file.nameonly = nameonly - file.suffixonly = suffixonly -file.extname = suffixonly -- obsolete file.suffix = suffixonly -function file.removesuffix(filename) - return (gsub(filename,"%.[%a%d]+$","")) +file.dirname = pathpart -- obsolete +file.extname = suffixonly -- obsolete + +-- actually these are schemes + +local drive = C(R("az","AZ")) * colon +local path = C((noslashes^0 * slashes)^0) +local suffix = period * C(P(1-period)^0 * P(-1)) +local base = C((1-suffix)^0) +local rest = C(P(1)^0) + +drive = drive + Cc("") +path = path + Cc("") +base = base + Cc("") +suffix = suffix + Cc("") + +local pattern_a = drive * path * base * suffix +local pattern_b = path * base * suffix +local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures +local pattern_d = path * rest + +function file.splitname(str,splitdrive) + if not str then + -- error + elseif splitdrive then + return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix + else + return lpegmatch(pattern_b,str) -- returns path, base, suffix + end +end + +function file.splitbase(str) + return str and lpegmatch(pattern_d,str) -- returns path, base+suffix end -function file.addsuffix(filename, suffix, criterium) - if not suffix or suffix == "" then +---- stripslash = C((1 - P("/")^1*P(-1))^0) + +function file.nametotable(str,splitdrive) + if str then + local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) + -- if path ~= "" then + -- path = lpegmatch(stripslash,path) -- unfortunate hack, maybe this becomes default + -- end + if splitdrive then + return { + path = path, + drive = drive, + subpath = subpath, + name = name, + base = base, + suffix = suffix, + } + else + return { + path = path, + name = name, + base = base, + suffix = suffix, + } + end + end +end + +-- print(file.splitname("file")) +-- print(file.splitname("dir/file")) +-- print(file.splitname("/dir/file")) +-- print(file.splitname("file")) +-- print(file.splitname("dir/file")) +-- print(file.splitname("/dir/file")) + +-- inspect(file.nametotable("file.ext")) +-- inspect(file.nametotable("dir/file.ext")) +-- inspect(file.nametotable("/dir/file.ext")) +-- inspect(file.nametotable("file.ext")) +-- inspect(file.nametotable("dir/file.ext")) +-- inspect(file.nametotable("/dir/file.ext")) + +----- pattern = Cs(((period * noperiod^1 * -1) / "" + 1)^1) +local pattern = Cs(((period * (1-period-slashes)^1 * -1) / "" + 1)^1) + +function file.removesuffix(name) + return name and lpegmatch(pattern,name) +end + +-- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 +-- +-- function file.addsuffix(name, suffix) +-- local p = lpegmatch(pattern,name) +-- if p then +-- return name +-- else +-- return name .. "." .. suffix +-- end +-- end + +local suffix = period/"" * (1-period-slashes)^1 * -1 +local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix) + +function file.addsuffix(filename,suffix,criterium) + if not filename or not suffix or suffix == "" then return filename elseif criterium == true then return filename .. "." .. suffix elseif not criterium then - local n, s = splitname(filename) + local n, s = lpegmatch(pattern,filename) if not s or s == "" then return filename .. "." .. suffix else return filename end else - local n, s = splitname(filename) + local n, s = lpegmatch(pattern,filename) if s and s ~= "" then local t = type(criterium) if t == "table" then @@ -88,84 +249,54 @@ function file.addsuffix(filename, suffix, criterium) end end end - return n .. "." .. suffix + return (n or filename) .. "." .. suffix end end ---~ print("1 " .. file.addsuffix("name","new") .. " -> name.new") ---~ print("2 " .. file.addsuffix("name.old","new") .. " -> name.old") ---~ print("3 " .. file.addsuffix("name.old","new",true) .. " -> name.old.new") ---~ print("4 " .. file.addsuffix("name.old","new","new") .. " -> name.new") ---~ print("5 " .. file.addsuffix("name.old","new","old") .. " -> name.old") ---~ print("6 " .. file.addsuffix("name.old","new","foo") .. " -> name.new") ---~ print("7 " .. file.addsuffix("name.old","new",{"foo","bar"}) .. " -> name.new") ---~ print("8 " .. file.addsuffix("name.old","new",{"old","bar"}) .. " -> name.old") - -function file.replacesuffix(filename, suffix) - return (gsub(filename,"%.[%a%d]+$","")) .. "." .. suffix -end - ---~ function file.join(...) ---~ local pth = concat({...},"/") ---~ pth = gsub(pth,"\\","/") ---~ local a, b = match(pth,"^(.*://)(.*)$") ---~ if a and b then ---~ return a .. gsub(b,"//+","/") ---~ end ---~ a, b = match(pth,"^(//)(.*)$") ---~ if a and b then ---~ return a .. gsub(b,"//+","/") ---~ end ---~ return (gsub(pth,"//+","/")) ---~ end - -local trick_1 = char(1) -local trick_2 = "^" .. trick_1 .. "/+" - -function file.join(...) -- rather dirty - local lst = { ... } - local a, b = lst[1], lst[2] - if not a or a == "" then -- not a added - lst[1] = trick_1 - elseif b and find(a,"^/+$") and find(b,"^/") then - lst[1] = "" - lst[2] = gsub(b,"^/+","") - end - local pth = concat(lst,"/") - pth = gsub(pth,"\\","/") - local a, b = match(pth,"^(.*://)(.*)$") - if a and b then - return a .. gsub(b,"//+","/") - end - a, b = match(pth,"^(//)(.*)$") - if a and b then - return a .. gsub(b,"//+","/") +-- print("1 " .. file.addsuffix("name","new") .. " -> name.new") +-- print("2 " .. file.addsuffix("name.old","new") .. " -> name.old") +-- print("3 " .. file.addsuffix("name.old","new",true) .. " -> name.old.new") +-- print("4 " .. file.addsuffix("name.old","new","new") .. " -> name.new") +-- print("5 " .. file.addsuffix("name.old","new","old") .. " -> name.old") +-- print("6 " .. file.addsuffix("name.old","new","foo") .. " -> name.new") +-- print("7 " .. file.addsuffix("name.old","new",{"foo","bar"}) .. " -> name.new") +-- print("8 " .. file.addsuffix("name.old","new",{"old","bar"}) .. " -> name.old") + +local suffix = period * (1-period-slashes)^1 * -1 +local pattern = Cs((1-suffix)^0) + +function file.replacesuffix(name,suffix) + if name and suffix and suffix ~= "" then + return lpegmatch(pattern,name) .. "." .. suffix + else + return name end - pth = gsub(pth,trick_2,"") - return (gsub(pth,"//+","/")) end ---~ print(file.join("//","/y")) ---~ print(file.join("/","/y")) ---~ print(file.join("","/y")) ---~ print(file.join("/x/","/y")) ---~ print(file.join("x/","/y")) ---~ print(file.join("http://","/y")) ---~ print(file.join("http://a","/y")) ---~ print(file.join("http:///a","/y")) ---~ print(file.join("//nas-1","/y")) +-- + +local reslasher = lpeg.replacer(P("\\"),"/") + +function file.reslash(str) + return str and lpegmatch(reslasher,str) +end -- We should be able to use: -- +-- local writable = P(1) * P("w") * Cc(true) +-- -- function file.is_writable(name) --- local a = attributes(name) or attributes(dirname(name,".")) --- return a and sub(a.permissions,2,2) == "w" +-- local a = attributes(name) or attributes(pathpart(name,".")) +-- return a and lpegmatch(writable,a.permissions) or false -- end -- --- But after some testing Taco and I came up with: +-- But after some testing Taco and I came up with the more robust +-- variant: function file.is_writable(name) - if lfs.isdir(name) then + if not name then + -- error + elseif lfs.isdir(name) then name = name .. "/m_t_x_t_e_s_t.tmp" local f = io.open(name,"wb") if f then @@ -190,77 +321,108 @@ function file.is_writable(name) return false end +local readable = P("r") * Cc(true) + function file.is_readable(name) - local a = attributes(name) - return a and sub(a.permissions,1,1) == "r" + if name then + local a = attributes(name) + return a and lpegmatch(readable,a.permissions) or false + else + return false + end end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated function file.size(name) - local a = attributes(name) - return a and a.size or 0 + if name then + local a = attributes(name) + return a and a.size or 0 + else + return 0 + end end --- todo: lpeg \\ / .. does not save much - -local checkedsplit = string.checkedsplit - -function file.splitpath(str,separator) -- string - str = gsub(str,"\\","/") - return checkedsplit(str,separator or io.pathseparator) +function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split) + return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) end function file.joinpath(tab,separator) -- table - return concat(tab,separator or io.pathseparator) -- can have trailing // + return tab and concat(tab,separator or io.pathseparator) -- can have trailing // end --- we can hash them weakly - ---~ function file.collapsepath(str) -- fails on b.c/.. ---~ str = gsub(str,"\\","/") ---~ if find(str,"/") then ---~ str = gsub(str,"^%./",(gsub(getcurrentdir(),"\\","/")) .. "/") -- ./xx in qualified ---~ str = gsub(str,"/%./","/") ---~ local n, m = 1, 1 ---~ while n > 0 or m > 0 do ---~ str, n = gsub(str,"[^/%.]+/%.%.$","") ---~ str, m = gsub(str,"[^/%.]+/%.%./","") ---~ end ---~ str = gsub(str,"([^/])/$","%1") ---~ -- str = gsub(str,"^%./","") -- ./xx in qualified ---~ str = gsub(str,"/%.$","") ---~ end ---~ if str == "" then str = "." end ---~ return str ---~ end ---~ ---~ The previous one fails on "a.b/c" so Taco came up with a split based ---~ variant. After some skyping we got it sort of compatible with the old ---~ one. After that the anchoring to currentdir was added in a better way. ---~ Of course there are some optimizations too. Finally we had to deal with ---~ windows drive prefixes and things like sys://. +local stripper = Cs(P(fwslash)^0/"" * reslasher) +local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon +local isroot = fwslash^1 * -1 +local hasroot = fwslash^1 + +local deslasher = lpeg.replacer(S("\\/")^1,"/") + +-- If we have a network or prefix then there is a change that we end up with two +-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2) +-- split and rebuild as url. Of course we could assume no network paths (which +-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but +-- then we still have to deal with urls ... anyhow, multiple // are never a real +-- problem but just ugly. + +function file.join(...) + local lst = { ... } + local one = lst[1] + if lpegmatch(isnetwork,one) then + local two = lpegmatch(deslasher,concat(lst,"/",2)) + return one .. "/" .. two + elseif lpegmatch(isroot,one) then + local two = lpegmatch(deslasher,concat(lst,"/",2)) + if lpegmatch(hasroot,two) then + return two + else + return "/" .. two + end + elseif one == "" then + return lpegmatch(stripper,concat(lst,"/",2)) + else + return lpegmatch(deslasher,concat(lst,"/")) + end +end + +-- print(file.join("c:/whatever","name")) +-- print(file.join("//","/y")) +-- print(file.join("/","/y")) +-- print(file.join("","/y")) +-- print(file.join("/x/","/y")) +-- print(file.join("x/","/y")) +-- print(file.join("http://","/y")) +-- print(file.join("http://a","/y")) +-- print(file.join("http:///a","/y")) +-- print(file.join("//nas-1","/y")) + +-- The previous one fails on "a.b/c" so Taco came up with a split based +-- variant. After some skyping we got it sort of compatible with the old +-- one. After that the anchoring to currentdir was added in a better way. +-- Of course there are some optimizations too. Finally we had to deal with +-- windows drive prefixes and things like sys://. Eventually gsubs and +-- finds were replaced by lpegs. + +local drivespec = R("az","AZ")^1 * colon +local anchors = fwslash + drivespec +local untouched = periods + (1-period)^1 * P(-1) +local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * Ct(lpeg.splitat(S("/\\")^1)) +local absolute = fwslash function file.collapsepath(str,anchor) - if anchor and not find(str,"^/") and not find(str,"^%a:") then + if not str then + return + end + if anchor and not lpegmatch(anchors,str) then str = getcurrentdir() .. "/" .. str end if str == "" or str =="." then return "." - elseif find(str,"^%.%.") then - str = gsub(str,"\\","/") - return str - elseif not find(str,"%.") then - str = gsub(str,"\\","/") - return str + elseif lpegmatch(untouched,str) then + return lpegmatch(reslasher,str) end - str = gsub(str,"\\","/") - local starter, rest = match(str,"^(%a+:/*)(.-)$") - if starter then - str = rest - end - local oldelements = checkedsplit(str,"/") + local starter, oldelements = lpegmatch(splitstarter,str) local newelements = { } local i = #oldelements while i > 0 do @@ -290,29 +452,37 @@ function file.collapsepath(str,anchor) return starter or "." elseif starter then return starter .. concat(newelements, '/') - elseif find(str,"^/") then + elseif lpegmatch(absolute,str) then return "/" .. concat(newelements,'/') else return concat(newelements, '/') end end ---~ local function test(str) ---~ print(string.format("%-20s %-15s %-15s",str,file.collapsepath(str),file.collapsepath(str,true))) ---~ end ---~ test("a/b.c/d") test("b.c/d") test("b.c/..") ---~ test("/") test("c:/..") test("sys://..") ---~ test("") test("./") test(".") test("..") test("./..") test("../..") ---~ test("a") test("./a") test("/a") test("a/../..") ---~ test("a/./b/..") test("a/aa/../b/bb") test("a/.././././b/..") test("a/./././b/..") ---~ test("a/b/c/../..") test("./a/b/c/../..") test("a/b/c/../..") +-- local function test(str) +-- print(string.format("%-20s %-15s %-15s",str,file.collapsepath(str),file.collapsepath(str,true))) +-- end +-- test("a/b.c/d") test("b.c/d") test("b.c/..") +-- test("/") test("c:/..") test("sys://..") +-- test("") test("./") test(".") test("..") test("./..") test("../..") +-- test("a") test("./a") test("/a") test("a/../..") +-- test("a/./b/..") test("a/aa/../b/bb") test("a/.././././b/..") test("a/./././b/..") +-- test("a/b/c/../..") test("./a/b/c/../..") test("a/b/c/../..") + +local validchars = R("az","09","AZ","--","..") +local pattern_a = lpeg.replacer(1-validchars) +local pattern_a = Cs((validchars + P(1)/"-")^1) +local whatever = P("-")^0 / "" +local pattern_b = Cs(whatever * (1 - whatever * -1)^1) function file.robustname(str,strict) - str = gsub(str,"[^%a%d%/%-%.\\]+","-") - if strict then - return lower(gsub(str,"^%-*(.-)%-*$","%1")) - else - return str + if str then + str = lpegmatch(pattern_a,str) or str + if strict then + return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) + else + return str + end end end @@ -320,105 +490,25 @@ file.readdata = io.loaddata file.savedata = io.savedata function file.copy(oldname,newname) - file.savedata(newname,io.loaddata(oldname)) + if oldname and newname then + local data = io.loaddata(oldname) + if data and data ~= "" then + file.savedata(newname,data) + end + end end --- lpeg variants, slightly faster, not always - ---~ local period = P(".") ---~ local slashes = S("\\/") ---~ local noperiod = 1-period ---~ local noslashes = 1-slashes ---~ local name = noperiod^1 - ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 - ---~ function file.suffixonly(name) ---~ return lpegmatch(pattern,name) or "" ---~ end - ---~ local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1) - ---~ function file.removesuffix(name) ---~ return lpegmatch(pattern,name) ---~ end - ---~ local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 - ---~ function file.basename(name) ---~ return lpegmatch(pattern,name) or name ---~ end - ---~ local pattern = (noslashes^0 * slashes)^1 * Cp() * noslashes^1 * -1 - ---~ function file.dirname(name) ---~ local p = lpegmatch(pattern,name) ---~ if p then ---~ return sub(name,1,p-2) ---~ else ---~ return "" ---~ end ---~ end - ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 - ---~ function file.addsuffix(name, suffix) ---~ local p = lpegmatch(pattern,name) ---~ if p then ---~ return name ---~ else ---~ return name .. "." .. suffix ---~ end ---~ end - ---~ local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 - ---~ function file.replacesuffix(name,suffix) ---~ local p = lpegmatch(pattern,name) ---~ if p then ---~ return sub(name,1,p-2) .. "." .. suffix ---~ else ---~ return name .. "." .. suffix ---~ end ---~ end - ---~ local pattern = (noslashes^0 * slashes)^0 * Cp() * ((noperiod^1 * period)^1 * Cp() + P(true)) * noperiod^1 * -1 - ---~ function file.nameonly(name) ---~ local a, b = lpegmatch(pattern,name) ---~ if b then ---~ return sub(name,a,b-2) ---~ elseif a then ---~ return sub(name,a) ---~ else ---~ return name ---~ end ---~ end - ---~ local test = file.suffixonly ---~ local test = file.basename ---~ local test = file.dirname ---~ local test = file.addsuffix ---~ local test = file.replacesuffix ---~ local test = file.nameonly - ---~ print(1,test("./a/b/c/abd.def.xxx","!!!")) ---~ print(2,test("./../b/c/abd.def.xxx","!!!")) ---~ print(3,test("a/b/c/abd.def.xxx","!!!")) ---~ print(4,test("a/b/c/def.xxx","!!!")) ---~ print(5,test("a/b/c/def","!!!")) ---~ print(6,test("def","!!!")) ---~ print(7,test("def.xxx","!!!")) - ---~ local tim = os.clock() for i=1,250000 do local ext = test("abd.def.xxx","!!!") end print(os.clock()-tim) - -- also rewrite previous local letter = R("az","AZ") + S("_-+") local separator = P("://") -local qualified = P(".")^0 * P("/") + letter*P(":") + letter^1*separator + letter^1 * P("/") -local rootbased = P("/") + letter*P(":") +local qualified = period^0 * fwslash + + letter * colon + + letter^1 * separator + + letter^1 * fwslash +local rootbased = fwslash + + letter * colon lpeg.patterns.qualified = qualified lpeg.patterns.rootbased = rootbased @@ -426,68 +516,13 @@ lpeg.patterns.rootbased = rootbased -- ./name ../name /name c: :// name/name function file.is_qualified_path(filename) - return lpegmatch(qualified,filename) ~= nil + return filename and lpegmatch(qualified,filename) ~= nil end function file.is_rootbased_path(filename) - return lpegmatch(rootbased,filename) ~= nil + return filename and lpegmatch(rootbased,filename) ~= nil end --- actually these are schemes - -local slash = S("\\/") -local period = P(".") -local drive = C(R("az","AZ")) * P(":") -local path = C(((1-slash)^0 * slash)^0) -local suffix = period * C(P(1-period)^0 * P(-1)) -local base = C((1-suffix)^0) -local rest = C(P(1)^0) - -drive = drive + Cc("") -path = path + Cc("") -base = base + Cc("") -suffix = suffix + Cc("") - -local pattern_a = drive * path * base * suffix -local pattern_b = path * base * suffix -local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra captures -local pattern_d = path * rest - -function file.splitname(str,splitdrive) - if splitdrive then - return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix - else - return lpegmatch(pattern_b,str) -- returns path, base, suffix - end -end - -function file.splitbase(str) - return lpegmatch(pattern_d,str) -- returns path, base+suffix -end - -function file.nametotable(str,splitdrive) -- returns table - local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) - if splitdrive then - return { - path = path, - drive = drive, - subpath = subpath, - name = name, - base = base, - suffix = suffix, - } - else - return { - path = path, - name = name, - base = base, - suffix = suffix, - } - end -end - --- print(file.splitbase("a/b/c.txt")) - -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end -- -- test { "c:", "c:/aa", "c:/aa/bb", "c:/aa/bb/cc", "c:/aa/bb/cc.dd", "c:/aa/bb/cc.dd.ee" } @@ -495,18 +530,35 @@ end -- test { "/aa", "/aa/bb", "/aa/bb/cc", "/aa/bb/cc.dd", "/aa/bb/cc.dd.ee" } -- test { "aa", "aa/bb", "aa/bb/cc", "aa/bb/cc.dd", "aa/bb/cc.dd.ee" } ---~ -- todo: ---~ ---~ if os.type == "windows" then ---~ local currentdir = getcurrentdir ---~ function getcurrentdir() ---~ return (gsub(currentdir(),"\\","/")) ---~ end ---~ end +-- -- maybe: +-- +-- if os.type == "windows" then +-- local currentdir = getcurrentdir +-- function getcurrentdir() +-- return lpegmatch(reslasher,currentdir()) +-- end +-- end -- for myself: function file.strip(name,dir) - local b, a = match(name,"^(.-)" .. dir .. "(.*)$") - return a ~= "" and a or name + if name then + local b, a = match(name,"^(.-)" .. dir .. "(.*)$") + return a ~= "" and a or name + end end + +-- local debuglist = { +-- "pathpart", "basename", "nameonly", "suffixonly", "suffix", "dirname", "extname", +-- "addsuffix", "removesuffix", "replacesuffix", "join", +-- "strip","collapsepath", "joinpath", "splitpath", +-- } + +-- for i=1,#debuglist do +-- local name = debuglist[i] +-- local f = file[name] +-- file[name] = function(...) +-- print(name,f(...)) +-- return f(...) +-- end +-- end -- cgit v1.2.3 From abbbb9d4ddb7c864b671199cda404ad4cb33ccfa Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 19:23:35 +0200 Subject: update l-md5 --- lualibs-md5.lua | 57 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/lualibs-md5.lua b/lualibs-md5.lua index 6abf2e1..8ac20a5 100644 --- a/lualibs-md5.lua +++ b/lualibs-md5.lua @@ -7,29 +7,60 @@ if not modules then modules = { } end modules ['l-md5'] = { -- This also provides file checksums and checkers. +if not md5 then + md5 = optionalrequire("md5") +end + +if not md5 then + md5 = { + sum = function(str) print("error: md5 is not loaded (sum ignored)") return str end, + sumhexa = function(str) print("error: md5 is not loaded (sumhexa ignored)") return str end, + } +end + local md5, file = md5, file local gsub, format, byte = string.gsub, string.format, string.byte +local md5sum = md5.sum local function convert(str,fmt) - return (gsub(md5.sum(str),".",function(chr) return format(fmt,byte(chr)) end)) + return (gsub(md5sum(str),".",function(chr) return format(fmt,byte(chr)) end)) end if not md5.HEX then function md5.HEX(str) return convert(str,"%02X") end end if not md5.hex then function md5.hex(str) return convert(str,"%02x") end end if not md5.dec then function md5.dec(str) return convert(str,"%03i") end end ---~ if not md5.HEX then ---~ local function remap(chr) return format("%02X",byte(chr)) end ---~ function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end ---~ end ---~ if not md5.hex then ---~ local function remap(chr) return format("%02x",byte(chr)) end ---~ function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end ---~ end ---~ if not md5.dec then ---~ local function remap(chr) return format("%03i",byte(chr)) end ---~ function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end ---~ end +-- local P, Cs, lpegmatch = lpeg.P, lpeg.Cs,lpeg.match +-- +-- if not md5.HEX then +-- local function remap(chr) return format("%02X",byte(chr)) end +-- function md5.HEX(str) return (gsub(md5.sum(str),".",remap)) end +-- end +-- +-- if not md5.hex then +-- local function remap(chr) return format("%02x",byte(chr)) end +-- function md5.hex(str) return (gsub(md5.sum(str),".",remap)) end +-- end +-- +-- if not md5.dec then +-- local function remap(chr) return format("%03i",byte(chr)) end +-- function md5.dec(str) return (gsub(md5.sum(str),".",remap)) end +-- end + +-- if not md5.HEX then +-- local pattern_HEX = Cs( ( P(1) / function(chr) return format("%02X",byte(chr)) end)^0 ) +-- function md5.HEX(str) return lpegmatch(pattern_HEX,md5.sum(str)) end +-- end +-- +-- if not md5.hex then +-- local pattern_hex = Cs( ( P(1) / function(chr) return format("%02x",byte(chr)) end)^0 ) +-- function md5.hex(str) return lpegmatch(pattern_hex,md5.sum(str)) end +-- end +-- +-- if not md5.dec then +-- local pattern_dec = Cs( ( P(1) / function(chr) return format("%02i",byte(chr)) end)^0 ) +-- function md5.dec(str) return lpegmatch(pattern_dec,md5.sum(str)) end +-- end function file.needsupdating(oldname,newname,threshold) -- size modification access change local oldtime = lfs.attributes(oldname,"modification") -- cgit v1.2.3 From cdcc80639d5fddc1845cb51dc9251204ab239e11 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 19:26:47 +0200 Subject: update l-dir --- lualibs-dir.lua | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/lualibs-dir.lua b/lualibs-dir.lua index 3deb660..00cda38 100644 --- a/lualibs-dir.lua +++ b/lualibs-dir.lua @@ -8,7 +8,7 @@ if not modules then modules = { } end modules ['l-dir'] = { -- dir.expandname will be merged with cleanpath and collapsepath -local type = type +local type, select = type, select local find, gmatch, match, gsub = string.find, string.gmatch, string.match, string.gsub local concat, insert, remove = table.concat, table.insert, table.remove local lpegmatch = lpeg.match @@ -24,6 +24,7 @@ local walkdir = lfs.dir local isdir = lfs.isdir local isfile = lfs.isfile local currentdir = lfs.currentdir +local chdir = lfs.chdir -- in case we load outside luatex @@ -261,15 +262,15 @@ local onwindows = os.type == "windows" or find(os.getenv("PATH"),";") if onwindows then function dir.mkdirs(...) - local str, pth, t = "", "", { ... } - for i=1,#t do - local s = t[i] - if s ~= "" then - if str ~= "" then - str = str .. "/" .. s - else - str = s - end + local str, pth = "", "" + for i=1,select("#",...) do + local s = select(i,...) + if s == "" then + -- skip + elseif str == "" then + str = s + else + str = str .. "/" .. s end end local first, middle, last @@ -329,9 +330,9 @@ if onwindows then else function dir.mkdirs(...) - local str, pth, t = "", "", { ... } - for i=1,#t do - local s = t[i] + local str, pth = "", "" + for i=1,select("#",...) do + local s = select(i,...) if s and s ~= "" then -- we catch nil and false if str ~= "" then str = str .. "/" .. s @@ -385,7 +386,7 @@ if onwindows then function dir.expandname(str) -- will be merged with cleanpath and collapsepath local first, nothing, last = match(str,"^(//)(//*)(.*)$") if first then - first = dir.current() .. "/" + first = dir.current() .. "/" -- dir.current sanitizes end if not first then first, last = match(str,"^(//)/*(.*)$") @@ -394,10 +395,10 @@ if onwindows then first, last = match(str,"^([a-zA-Z]:)(.*)$") if first and not find(last,"^/") then local d = currentdir() - if lfs.chdir(first) then + if chdir(first) then first = dir.current() end - lfs.chdir(d) + chdir(d) end end if not first then @@ -433,13 +434,16 @@ file.expandname = dir.expandname -- for convenience local stack = { } function dir.push(newdir) - insert(stack,lfs.currentdir()) + insert(stack,currentdir()) + if newdir and newdir ~= "" then + chdir(newdir) + end end function dir.pop() local d = remove(stack) if d then - lfs.chdir(d) + chdir(d) end return d end -- cgit v1.2.3 From c21c90799dce48224a7072b57d074b545da4880f Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 19:33:54 +0200 Subject: update l-unicode --- lualibs-unicode.lua | 789 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 569 insertions(+), 220 deletions(-) diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua index 630c349..813ffd5 100644 --- a/lualibs-unicode.lua +++ b/lualibs-unicode.lua @@ -10,29 +10,45 @@ if not modules then modules = { } end modules ['l-unicode'] = { -- todo: utf.sub replacement (used in syst-aux) -local concat = table.concat +-- we put these in the utf namespace: + +utf = utf or (unicode and unicode.utf8) or { } + +utf.characters = utf.characters or string.utfcharacters +utf.values = utf.values or string.utfvalues + +-- string.utfvalues +-- string.utfcharacters +-- string.characters +-- string.characterpairs +-- string.bytes +-- string.bytepairs + local type = type -local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct +local char, byte, format, sub = string.char, string.byte, string.format, string.sub +local concat = table.concat +local P, C, R, Cs, Ct, Cmt, Cc, Carg, Cp = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Carg, lpeg.Cp local lpegmatch, patterns = lpeg.match, lpeg.patterns -local utftype = patterns.utftype -local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format -local utfsplitlines = string.utfsplitlines - -if not unicode then - unicode = { } +local bytepairs = string.bytepairs -end +local finder = lpeg.finder +local replacer = lpeg.replacer -local unicode = unicode +local utfvalues = utf.values +local utfgmatch = utf.gmatch -- not always present -utf = utf or unicode.utf8 +local p_utftype = patterns.utftype +local p_utfoffset = patterns.utfoffset +local p_utf8char = patterns.utf8char +local p_utf8byte = patterns.utf8byte +local p_utfbom = patterns.utfbom +local p_newline = patterns.newline +local p_whitespace = patterns.whitespace -if not utf then +if not unicode then - utf8 = { } - unicode.utf8 = utf8 - utf = utf8 + unicode = { utf = utf } -- for a while end @@ -94,8 +110,8 @@ local utfchar, utfbyte = utf.char, utf.byte -- As we want to get rid of the (unmaintained) utf library we implement our own -- variants (in due time an independent module): -function unicode.filetype(data) - return data and lpegmatch(utftype,data) or "unknown" +function utf.filetype(data) + return data and lpegmatch(p_utftype,data) or "unknown" end local toentities = Cs ( @@ -115,32 +131,31 @@ function utf.toentities(str) return lpegmatch(toentities,str) end ---~ local utfchr = { } -- 60K -> 2.638 M extra mem but currently not called that often (on latin) ---~ ---~ setmetatable(utfchr, { __index = function(t,k) local v = utfchar(k) t[k] = v return v end } ) ---~ ---~ collectgarbage("collect") ---~ local u = collectgarbage("count")*1024 ---~ local t = os.clock() ---~ for i=1,1000 do ---~ for i=1,600 do ---~ local a = utfchr[i] ---~ end ---~ end ---~ print(os.clock()-t,collectgarbage("count")*1024-u) - ---~ collectgarbage("collect") ---~ local t = os.clock() ---~ for i=1,1000 do ---~ for i=1,600 do ---~ local a = utfchar(i) ---~ end ---~ end ---~ print(os.clock()-t,collectgarbage("count")*1024-u) - ---~ local byte = string.byte ---~ local utfchar = utf.char ---~ local lpegmatch = lpeg.match, lpeg.P, lpeg.C, lpeg.R, lpeg.Cs +-- local utfchr = { } -- 60K -> 2.638 M extra mem but currently not called that often (on latin) +-- +-- setmetatable(utfchr, { __index = function(t,k) local v = utfchar(k) t[k] = v return v end } ) +-- +-- collectgarbage("collect") +-- local u = collectgarbage("count")*1024 +-- local t = os.clock() +-- for i=1,1000 do +-- for i=1,600 do +-- local a = utfchr[i] +-- end +-- end +-- print(os.clock()-t,collectgarbage("count")*1024-u) + +-- collectgarbage("collect") +-- local t = os.clock() +-- for i=1,1000 do +-- for i=1,600 do +-- local a = utfchar(i) +-- end +-- end +-- print(os.clock()-t,collectgarbage("count")*1024-u) + +-- local byte = string.byte +-- local utfchar = utf.char local one = P(1) local two = C(1) * C(1) @@ -206,7 +221,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) -function string.toutf(s) +function string.toutf(s) -- in string namespace return lpegmatch(pattern,s) or s -- todo: utf32 end @@ -222,26 +237,274 @@ local validatedutf = Cs ( patterns.validatedutf = validatedutf -function string.validutf(str) - return lpegmatch(validatedutf,str) +function utf.is_valid(str) + return type(str) == "string" and lpegmatch(validatedutf,str) or false end +if not utf.len then -utf.length = string.utflength -utf.split = string.utfsplit -utf.splitines = string.utfsplitlines -utf.valid = string.validutf + -- -- alternative 1: 0.77 + -- + -- local utfcharcounter = utfbom^-1 * Cs((p_utf8char/'!')^0) + -- + -- function utf.len(str) + -- return #lpegmatch(utfcharcounter,str or "") + -- end + -- + -- -- alternative 2: 1.70 + -- + -- local n = 0 + -- + -- local utfcharcounter = utfbom^-1 * (p_utf8char/function() n = n + 1 end)^0 -- slow + -- + -- function utf.length(str) + -- n = 0 + -- lpegmatch(utfcharcounter,str or "") + -- return n + -- end + -- + -- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) + + -- local n = 0 + -- + -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( Cp() * ( + -- -- patterns.utf8one ^1 * Cc(1) + -- -- + patterns.utf8two ^1 * Cc(2) + -- -- + patterns.utf8three^1 * Cc(3) + -- -- + patterns.utf8four ^1 * Cc(4) ) * Cp() / function(f,d,t) n = n + (t - f)/d end + -- -- )^0 ) -- just as many captures as below + -- + -- -- local utfcharcounter = lpeg.patterns.utfbom^-1 * P ( ( + -- -- (Cmt(patterns.utf8one ^1,function(_,_,s) n = n + #s return true end)) + -- -- + (Cmt(patterns.utf8two ^1,function(_,_,s) n = n + #s/2 return true end)) + -- -- + (Cmt(patterns.utf8three^1,function(_,_,s) n = n + #s/3 return true end)) + -- -- + (Cmt(patterns.utf8four ^1,function(_,_,s) n = n + #s/4 return true end)) + -- -- )^0 ) -- not interesting as it creates strings but sometimes faster + -- + -- -- The best so far: + -- + -- local utfcharcounter = utfbom^-1 * P ( ( + -- Cp() * (patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end + -- + Cp() * (patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end + -- + Cp() * (patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end + -- + Cp() * (patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end + -- )^0 ) + + -- function utf.len(str) + -- n = 0 + -- lpegmatch(utfcharcounter,str or "") + -- return n + -- end + + local n, f = 0, 1 + + local utfcharcounter = patterns.utfbom^-1 * Cmt ( + Cc(1) * patterns.utf8one ^1 + + Cc(2) * patterns.utf8two ^1 + + Cc(3) * patterns.utf8three^1 + + Cc(4) * patterns.utf8four ^1, + function(_,t,d) -- due to Cc no string captures, so faster + n = n + (t - f)/d + f = t + return true + end + )^0 + + function utf.len(str) + n, f = 0, 1 + lpegmatch(utfcharcounter,str or "") + return n + end + + -- -- these are quite a bit slower: + + -- utfcharcounter = utfbom^-1 * (Cmt(P(1) * R("\128\191")^0, function() n = n + 1 return true end))^0 -- 50+ times slower + -- utfcharcounter = utfbom^-1 * (Cmt(P(1), function() n = n + 1 return true end) * R("\128\191")^0)^0 -- 50- times slower -if not utf.len then - utf.len = utf.length end --- a replacement for simple gsubs: +utf.length = utf.len + +if not utf.sub then + + -- inefficient as lpeg just copies ^n + + -- local function sub(str,start,stop) + -- local pattern = p_utf8char^-(start-1) * C(p_utf8char^-(stop-start+1)) + -- inspect(pattern) + -- return lpegmatch(pattern,str) or "" + -- end + + -- local b, e, n, first, last = 0, 0, 0, 0, 0 + -- + -- local function slide(s,p) + -- n = n + 1 + -- if n == first then + -- b = p + -- if not last then + -- return nil + -- end + -- end + -- if n == last then + -- e = p + -- return nil + -- else + -- return p + -- end + -- end + -- + -- local pattern = Cmt(p_utf8char,slide)^0 + -- + -- function utf.sub(str,start,stop) -- todo: from the end + -- if not start then + -- return str + -- end + -- b, e, n, first, last = 0, 0, 0, start, stop + -- lpegmatch(pattern,str) + -- if not stop then + -- return sub(str,b) + -- else + -- return sub(str,b,e-1) + -- end + -- end + + -- print(utf.sub("Hans Hagen is my name")) + -- print(utf.sub("Hans Hagen is my name",5)) + -- print(utf.sub("Hans Hagen is my name",5,10)) + + local utflength = utf.length + + -- also negative indices, upto 10 times slower than a c variant + + local b, e, n, first, last = 0, 0, 0, 0, 0 + + local function slide_zero(s,p) + n = n + 1 + if n >= last then + e = p - 1 + else + return p + end + end -local utf8char = patterns.utf8char + local function slide_one(s,p) + n = n + 1 + if n == first then + b = p + end + if n >= last then + e = p - 1 + else + return p + end + end + + local function slide_two(s,p) + n = n + 1 + if n == first then + b = p + else + return true + end + end + + local pattern_zero = Cmt(p_utf8char,slide_zero)^0 + local pattern_one = Cmt(p_utf8char,slide_one )^0 + local pattern_two = Cmt(p_utf8char,slide_two )^0 + + function utf.sub(str,start,stop) + if not start then + return str + end + if start == 0 then + start = 1 + end + if not stop then + if start < 0 then + local l = utflength(str) -- we can inline this function if needed + start = l + start + else + start = start - 1 + end + b, n, first = 0, 0, start + lpegmatch(pattern_two,str) + if n >= first then + return sub(str,b) + else + return "" + end + end + if start < 0 or stop < 0 then + local l = utf.length(str) + if start < 0 then + start = l + start + if start <= 0 then + start = 1 + else + start = start + 1 + end + end + if stop < 0 then + stop = l + stop + if stop == 0 then + stop = 1 + else + stop = stop + 1 + end + end + end + if start > stop then + return "" + elseif start > 1 then + b, e, n, first, last = 0, 0, 0, start - 1, stop + lpegmatch(pattern_one,str) + if n >= first and e == 0 then + e = #str + end + return sub(str,b,e) + else + b, e, n, last = 1, 0, 0, stop + lpegmatch(pattern_zero,str) + if e == 0 then + e = #str + end + return sub(str,b,e) + end + end + + -- local n = 100000 + -- local str = string.rep("123456àáâãäå",100) + -- + -- for i=-15,15,1 do + -- for j=-15,15,1 do + -- if utf.xsub(str,i,j) ~= utf.sub(str,i,j) then + -- print("error",i,j,"l>"..utf.xsub(str,i,j),"s>"..utf.sub(str,i,j)) + -- end + -- end + -- if utf.xsub(str,i) ~= utf.sub(str,i) then + -- print("error",i,"l>"..utf.xsub(str,i),"s>"..utf.sub(str,i)) + -- end + -- end + + -- print(" 1, 7",utf.xsub(str, 1, 7),utf.sub(str, 1, 7)) + -- print(" 0, 7",utf.xsub(str, 0, 7),utf.sub(str, 0, 7)) + -- print(" 0, 9",utf.xsub(str, 0, 9),utf.sub(str, 0, 9)) + -- print(" 4 ",utf.xsub(str, 4 ),utf.sub(str, 4 )) + -- print(" 0 ",utf.xsub(str, 0 ),utf.sub(str, 0 )) + -- print(" 0, 0",utf.xsub(str, 0, 0),utf.sub(str, 0, 0)) + -- print(" 4, 4",utf.xsub(str, 4, 4),utf.sub(str, 4, 4)) + -- print(" 4, 0",utf.xsub(str, 4, 0),utf.sub(str, 4, 0)) + -- print("-3, 0",utf.xsub(str,-3, 0),utf.sub(str,-3, 0)) + -- print(" 0,-3",utf.xsub(str, 0,-3),utf.sub(str, 0,-3)) + -- print(" 5,-3",utf.xsub(str,-5,-3),utf.sub(str,-5,-3)) + -- print("-3 ",utf.xsub(str,-3 ),utf.sub(str,-3 )) + +end + +-- a replacement for simple gsubs: function utf.remapper(mapping) - local pattern = Cs((utf8char/mapping)^0) + local pattern = Cs((p_utf8char/mapping)^0) return function(str) if not str or str == "" then return "" @@ -254,158 +517,113 @@ end -- local remap = utf.remapper { a = 'd', b = "c", c = "b", d = "a" } -- print(remap("abcd 1234 abcd")) +-- + +function utf.replacer(t) -- no precheck, always string builder + local r = replacer(t,false,false,true) + return function(str) + return lpegmatch(r,str) + end +end + +function utf.subtituter(t) -- with precheck and no building if no match + local f = finder (t) + local r = replacer(t,false,false,true) + return function(str) + local i = lpegmatch(f,str) + if not i then + return str + elseif i > #str then + return str + else + -- return sub(str,1,i-2) .. lpegmatch(r,str,i-1) -- slower + return lpegmatch(r,str) + end + end +end + +-- inspect(utf.split("a b c d")) +-- inspect(utf.split("a b c d",true)) + +local utflinesplitter = p_utfbom^-1 * lpeg.tsplitat(p_newline) +local utfcharsplitter_ows = p_utfbom^-1 * Ct(C(p_utf8char)^0) +local utfcharsplitter_iws = p_utfbom^-1 * Ct((p_whitespace^1 + C(p_utf8char))^0) +local utfcharsplitter_raw = Ct(C(p_utf8char)^0) + +patterns.utflinesplitter = utflinesplitter + +function utf.splitlines(str) + return lpegmatch(utflinesplitter,str or "") +end + +function utf.split(str,ignorewhitespace) -- new + if ignorewhitespace then + return lpegmatch(utfcharsplitter_iws,str or "") + else + return lpegmatch(utfcharsplitter_ows,str or "") + end +end + +function utf.totable(str) -- keeps bom + return lpegmatch(utfcharsplitter_raw,str) +end + -- 0 EF BB BF UTF-8 -- 1 FF FE UTF-16-little-endian -- 2 FE FF UTF-16-big-endian -- 3 FF FE 00 00 UTF-32-little-endian -- 4 00 00 FE FF UTF-32-big-endian - -unicode.utfname = { - [0] = 'utf-8', - [1] = 'utf-16-le', - [2] = 'utf-16-be', - [3] = 'utf-32-le', - [4] = 'utf-32-be' -} - +-- -- \000 fails in <= 5.0 but is valid in >=5.1 where %z is depricated -function unicode.utftype(f) - local str = f:read(4) - if not str then - f:seek('set') - return 0 - -- elseif find(str,"^%z%z\254\255") then -- depricated - -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged - elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH) - return 4 - -- elseif find(str,"^\255\254%z%z") then -- depricated - -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged - elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH) - return 3 - elseif find(str,"^\254\255") then - f:seek('set',2) - return 2 - elseif find(str,"^\255\254") then - f:seek('set',2) - return 1 - elseif find(str,"^\239\187\191") then - f:seek('set',3) - return 0 - else - f:seek('set') - return 0 +-- utf.name = { +-- [0] = 'utf-8', +-- [1] = 'utf-16-le', +-- [2] = 'utf-16-be', +-- [3] = 'utf-32-le', +-- [4] = 'utf-32-be' +-- } +-- +-- function utf.magic(f) +-- local str = f:read(4) +-- if not str then +-- f:seek('set') +-- return 0 +-- -- elseif find(str,"^%z%z\254\255") then -- depricated +-- -- elseif find(str,"^\000\000\254\255") then -- not permitted and bugged +-- elseif find(str,"\000\000\254\255",1,true) then -- seems to work okay (TH) +-- return 4 +-- -- elseif find(str,"^\255\254%z%z") then -- depricated +-- -- elseif find(str,"^\255\254\000\000") then -- not permitted and bugged +-- elseif find(str,"\255\254\000\000",1,true) then -- seems to work okay (TH) +-- return 3 +-- elseif find(str,"^\254\255") then +-- f:seek('set',2) +-- return 2 +-- elseif find(str,"^\255\254") then +-- f:seek('set',2) +-- return 1 +-- elseif find(str,"^\239\187\191") then +-- f:seek('set',3) +-- return 0 +-- else +-- f:seek('set') +-- return 0 +-- end +-- end + +function utf.magic(f) -- not used + local str = f:read(4) or "" + local off = lpegmatch(p_utfoffset,str) + if off < 4 then + f:seek('set',off) end + return lpegmatch(p_utftype,str) end ---~ function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg ---~ local result, tmp, n, m, p, r, t = { }, { }, 0, 0, 0, 0, 0 -- we reuse tmp ---~ -- lf | cr | crlf / (cr:13, lf:10) ---~ local function doit() -- inline this ---~ if n == 10 then ---~ if p ~= 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = 0 ---~ end ---~ elseif n == 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = n ---~ else ---~ t = t + 1 ---~ tmp[t] = utfchar(n) ---~ p = 0 ---~ end ---~ end ---~ for l,r in bytepairs(str) do ---~ if r then ---~ if endian then -- maybe make two loops ---~ n = 256*l + r ---~ else ---~ n = 256*r + l ---~ end ---~ if m > 0 then ---~ n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 ---~ m = 0 ---~ doit() ---~ elseif n >= 0xD800 and n <= 0xDBFF then ---~ m = n ---~ else ---~ doit() ---~ end ---~ end ---~ end ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) -- we reused tmp, hence t ---~ end ---~ return result ---~ end - ---~ function unicode.utf32_to_utf8(str, endian) ---~ local result, tmp, n, m, p, r, t = { }, { }, 0, -1, 0, 0, 0 ---~ -- lf | cr | crlf / (cr:13, lf:10) ---~ local function doit() -- inline this ---~ if n == 10 then ---~ if p ~= 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = 0 ---~ end ---~ elseif n == 13 then ---~ if t > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) ---~ t = 0 ---~ end ---~ p = n ---~ else ---~ t = t + 1 ---~ tmp[t] = utfchar(n) ---~ p = 0 ---~ end ---~ end ---~ for a,b in bytepairs(str) do ---~ if a and b then ---~ if m < 0 then ---~ if endian then -- maybe make two loops ---~ m = 256*256*256*a + 256*256*b ---~ else ---~ m = 256*b + a ---~ end ---~ else ---~ if endian then -- maybe make two loops ---~ n = m + 256*a + b ---~ else ---~ n = m + 256*256*256*b + 256*256*a ---~ end ---~ m = -1 ---~ doit() ---~ end ---~ else ---~ break ---~ end ---~ end ---~ if #tmp > 0 then ---~ r = r + 1 ---~ result[r] = concat(tmp,"",1,t) -- we reused tmp, hence t ---~ end ---~ return result ---~ end - local function utf16_to_utf8_be(t) if type(t) == "string" then - t = utfsplitlines(str) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -433,7 +651,7 @@ end local function utf16_to_utf8_le(t) if type(t) == "string" then - t = utfsplitlines(str) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -461,7 +679,7 @@ end local function utf32_to_utf8_be(t) if type(t) == "string" then - t = utfsplitlines(t) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -486,7 +704,7 @@ end local function utf32_to_utf8_le(t) if type(t) == "string" then - t = utfsplitlines(t) + t = lpegmatch(utflinesplitter,t) end local result = { } -- we reuse result for i=1,#t do @@ -509,20 +727,20 @@ local function utf32_to_utf8_le(t) return t end -unicode.utf32_to_utf8_be = utf32_to_utf8_be -unicode.utf32_to_utf8_le = utf32_to_utf8_le -unicode.utf16_to_utf8_be = utf16_to_utf8_be -unicode.utf16_to_utf8_le = utf16_to_utf8_le +utf.utf32_to_utf8_be = utf32_to_utf8_be +utf.utf32_to_utf8_le = utf32_to_utf8_le +utf.utf16_to_utf8_be = utf16_to_utf8_be +utf.utf16_to_utf8_le = utf16_to_utf8_le -function unicode.utf8_to_utf8(t) - return type(t) == "string" and utfsplitlines(t) or t +function utf.utf8_to_utf8(t) + return type(t) == "string" and lpegmatch(utflinesplitter,t) or t end -function unicode.utf16_to_utf8(t,endian) +function utf.utf16_to_utf8(t,endian) return endian and utf16_to_utf8_be(t) or utf16_to_utf8_le(t) or t end -function unicode.utf32_to_utf8(t,endian) +function utf.utf32_to_utf8(t,endian) return endian and utf32_to_utf8_be(t) or utf32_to_utf8_le(t) or t end @@ -548,7 +766,7 @@ local function big(c) end end --- function unicode.utf8_to_utf16(str,littleendian) +-- function utf.utf8_to_utf16(str,littleendian) -- if littleendian then -- return char(255,254) .. utfgsub(str,".",little) -- else @@ -559,7 +777,7 @@ end local _, l_remap = utf.remapper(little) local _, b_remap = utf.remapper(big) -function unicode.utf8_to_utf16(str,littleendian) +function utf.utf8_to_utf16(str,littleendian) if littleendian then return char(255,254) .. lpegmatch(l_remap,str) else @@ -567,27 +785,158 @@ function unicode.utf8_to_utf16(str,littleendian) end end -function unicode.utfcodes(str) - local t, n = { }, 0 - for u in utfvalues(str) do - n = n + 1 - t[n] = format("0x%04X",u) - end - return concat(t,separator or " ") +-- function utf.tocodes(str,separator) -- can be sped up with an lpeg +-- local t, n = { }, 0 +-- for u in utfvalues(str) do +-- n = n + 1 +-- t[n] = format("0x%04X",u) +-- end +-- return concat(t,separator or " ") +-- end + +local pattern = Cs ( + (p_utf8byte / function(unicode ) return format( "0x%04X", unicode) end) * + (p_utf8byte * Carg(1) / function(unicode,separator) return format("%s0x%04X",separator,unicode) end)^0 +) + +function utf.tocodes(str,separator) + return lpegmatch(pattern,str,1,separator or " ") end -function unicode.ustring(s) +function utf.ustring(s) return format("U+%05X",type(s) == "number" and s or utfbyte(s)) end -function unicode.xstring(s) +function utf.xstring(s) return format("0x%05X",type(s) == "number" and s or utfbyte(s)) end -- -local pattern = Ct(C(patterns.utf8char)^0) +local p_nany = p_utf8char / "" + +if utfgmatch then + + function utf.count(str,what) + if type(what) == "string" then + local n = 0 + for _ in utfgmatch(str,what) do + n = n + 1 + end + return n + else -- 4 times slower but still faster than / function + return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str) + end + end + +else + + local cache = { } + + function utf.count(str,what) + if type(what) == "string" then + local p = cache[what] + if not p then + p = Cs((P(what)/" " + p_nany)^0) + cache[p] = p + end + return #lpegmatch(p,str) + else -- 4 times slower but still faster than / function + return #lpegmatch(Cs((P(what)/" " + p_nany)^0),str) + end + end + +end + +-- maybe also register as string.utf* + + +if not utf.characters then + + -- New: this gmatch hack is taken from the Lua 5.2 book. It's about two times slower + -- than the built-in string.utfcharacters. + + function utf.characters(str) + return gmatch(str,".[\128-\191]*") + end + + string.utfcharacters = utf.characters + +end + +if not utf.values then + + -- So, a logical next step is to check for the values variant. It over five times + -- slower than the built-in string.utfvalues. I optimized it a bit for n=0,1. + + ----- wrap, yield, gmatch = coroutine.wrap, coroutine.yield, string.gmatch + local find = string.find + + local dummy = function() + -- we share this one + end + + -- function utf.values(str) + -- local n = #str + -- if n == 0 then + -- return wrap(dummy) + -- elseif n == 1 then + -- return wrap(function() yield(utfbyte(str)) end) + -- else + -- return wrap(function() for s in gmatch(str,".[\128-\191]*") do + -- yield(utfbyte(s)) + -- end end) + -- end + -- end + -- + -- faster: + + function utf.values(str) + local n = #str + if n == 0 then + return dummy + elseif n == 1 then + return function() return utfbyte(str) end + else + local p = 1 + -- local n = #str + return function() + -- if p <= n then -- slower than the last find + local b, e = find(str,".[\128-\191]*",p) + if b then + p = e + 1 + return utfbyte(sub(str,b,e)) + end + -- end + end + end + end + + -- slower: + -- + -- local pattern = C(patterns.utf8character) * Cp() + -- ----- pattern = patterns.utf8character/utfbyte * Cp() + -- ----- pattern = patterns.utf8byte * Cp() + -- + -- function utf.values(str) -- one of the cases where a find is faster than an lpeg + -- local n = #str + -- if n == 0 then + -- return dummy + -- elseif n == 1 then + -- return function() return utfbyte(str) end + -- else + -- local p = 1 + -- return function() + -- local s, e = lpegmatch(pattern,str,p) + -- if e then + -- p = e + -- return utfbyte(s) + -- -- return s + -- end + -- end + -- end + -- end + + string.utfvalues = utf.values -function utf.totable(str) - return lpegmatch(pattern,str) end -- cgit v1.2.3 From e951dc73befda47cc984844416e0a0dc4460b5b1 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 19:39:31 +0200 Subject: update l-url --- lualibs-url.lua | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/lualibs-url.lua b/lualibs-url.lua index ab50028..4624a05 100644 --- a/lualibs-url.lua +++ b/lualibs-url.lua @@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['l-url'] = { license = "see context related readme files" } -local char, gmatch, gsub, format, byte, find = string.char, string.gmatch, string.gsub, string.format, string.byte, string.find +local char, format, byte = string.char, string.format, string.byte local concat = table.concat local tonumber, type = tonumber, type local P, C, R, S, Cs, Cc, Ct, Cf, Cg, V = lpeg.P, lpeg.C, lpeg.R, lpeg.S, lpeg.Cs, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cg, lpeg.V @@ -45,6 +45,8 @@ local nothing = Cc("") local escapedchar = (percent * C(hexdigit * hexdigit)) / tochar local escaped = (plus / " ") + escapedchar +local noslash = P("/") / "" + -- we assume schemes with more than 1 character (in order to avoid problems with windows disks) -- we also assume that when we have a scheme, we also have an authority -- @@ -233,29 +235,23 @@ function url.construct(hash) -- dodo: we need to escape ! return lpegmatch(escaper,concat(fullurl)) end -function url.filename(filename) -- why no lpeg here ? - local t = hashed(filename) - return (t.scheme == "file" and (gsub(t.path,"^/([a-zA-Z])([:|])/)","%1:"))) or filename +local pattern = Cs(noslash * R("az","AZ") * (S(":|")/":") * noslash * P(1)^0) + +function url.filename(filename) + local spec = hashed(filename) + local path = spec.path + return (spec.scheme == "file" and path and lpegmatch(pattern,path)) or filename end +-- print(url.filename("/c|/test")) +-- print(url.filename("/c/test")) + local function escapestring(str) return lpegmatch(escaper,str) end url.escape = escapestring --- function url.query(str) -- separator could be an option --- if type(str) == "string" then --- local t = { } --- for k, v in gmatch(str,"([^&=]*)=([^&=]*)") do --- t[k] = v --- end --- return t --- else --- return str --- end --- end - function url.query(str) if type(str) == "string" then return lpegmatch(splitquery,str) or "" @@ -283,14 +279,19 @@ end -- /test/ | /test | test/ | test => test +local pattern = Cs(noslash^0 * (1 - noslash * P(-1))^0) + function url.barepath(path) if not path or path == "" then return "" else - return (gsub(path,"^/?(.-)/?$","%1")) + return lpegmatch(pattern,path) end end +-- print(url.barepath("/test"),url.barepath("test/"),url.barepath("/test/"),url.barepath("test")) +-- print(url.barepath("/x/yz"),url.barepath("x/yz/"),url.barepath("/x/yz/"),url.barepath("x/yz")) + --~ print(url.filename("file:///c:/oeps.txt")) --~ print(url.filename("c:/oeps.txt")) --~ print(url.filename("file:///oeps.txt")) -- cgit v1.2.3 From f7d3661b93321974c2e0a8583053e4fa4a8717f5 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 19:53:52 +0200 Subject: update dtx for new loading sequence --- lualibs.dtx | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/lualibs.dtx b/lualibs.dtx index e38e89f..dcb0e5a 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -155,7 +155,7 @@ and the derived file lualibs.lua. % % The \textsf{lualibs} bundle contains files from two Con\TeX t Lua % library categories: The generic auxiliary functions (original file prefix: -% |t-|) together form something close to a standard libary. Most of these are +% |l-|) together form something close to a standard libary. Most of these are % extensions of an existing namespace, like for instance |l-table.lua| which % adds full-fledged serialization capabilities to the Lua table library. % They were imported under the \textsf{lualibs}-prefix. @@ -165,22 +165,24 @@ and the derived file lualibs.lua. % \centering % \caption{Extensions of the Lua standard library.} % \begin{tabular}{l l l} -% \textsf{lualibs} name & Con\TeX t name & content \\ +% \textsf{lualibs} name & Con\TeX t name & content \\ % \hline -% lualibs-string.lua & l-string.lua & string manipulation \\ -% lualibs-lpeg.lua & l-lpeg.lua & patterns \\ -% lualibs-boolean.lua & l-boolean.lua & boolean converter \\ -% lualibs-number.lua & l-number.lua & bit set \\ -% lualibs-math.lua & l-math.lua & math functions \\ -% lualibs-table.lua & l-table.lua & serialization, conversion \\ -% lualibs-io.lua & l-io.lua & reading and writing files \\ -% lualibs-os.lua & l-os.lua & platform specific code \\ -% lualibs-file.lua & l-file.lua & filesystem operations \\ -% lualibs-md5.lua & l-md5.lua & checksum functions \\ -% lualibs-dir.lua & l-dir.lua & directory handling \\ -% lualibs-unicode.lua & l-unicode.lua & utf and unicode \\ -% lualibs-url.lua & l-url.lua & url handling \\ -% lualibs-set.lua & l-set.lua & sets \\ +% lualibs-lua.lua & l-lua.lua & compatibility, library paths \\ +% lualibs-lpeg.lua & l-lpeg.lua & patterns \\ +% lualibs-function.lua & l-function.lua & empty except for dummy \\ +% lualibs-string.lua & l-string.lua & string manipulation \\ +% lualibs-table.lua & l-table.lua & serialization, conversion \\ +% lualibs-boolean.lua & l-boolean.lua & boolean converter \\ +% lualibs-number.lua & l-number.lua & bit operations \\ +% lualibs-math.lua & l-math.lua & math functions \\ +% lualibs-io.lua & l-io.lua & reading and writing files \\ +% lualibs-os.lua & l-os.lua & platform specific code \\ +% lualibs-file.lua & l-file.lua & filesystem operations \\ +% lualibs-md5.lua & l-md5.lua & checksum functions \\ +% lualibs-dir.lua & l-dir.lua & directory handling \\ +% lualibs-unicode.lua & l-unicode.lua & utf and unicode \\ +% lualibs-url.lua & l-url.lua & url handling \\ +% lualibs-set.lua & l-set.lua & sets \\ % \end{tabular} % \label{tab:extensions} % \end{table} @@ -234,12 +236,14 @@ end % Load the modules. % % \begin{macrocode} -require("lualibs-string") +require("lualibs-lua") require("lualibs-lpeg") +require("lualibs-function") +require("lualibs-string") +require("lualibs-table") require("lualibs-boolean") require("lualibs-number") require("lualibs-math") -require("lualibs-table") require("lualibs-io") require("lualibs-os") require("lualibs-file") @@ -248,6 +252,7 @@ require("lualibs-dir") require("lualibs-unicode") require("lualibs-url") require("lualibs-set") + require("lualibs-util-lua") require("lualibs-util-sto") require("lualibs-util-mrg") -- cgit v1.2.3 From 1501d27d4e51f1cbfaf452cc022cf35515d74ac2 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 21:10:16 +0200 Subject: update util-str --- lualibs-util-str.lua | 701 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 670 insertions(+), 31 deletions(-) diff --git a/lualibs-util-str.lua b/lualibs-util-str.lua index 377dd16..4890a11 100644 --- a/lualibs-util-str.lua +++ b/lualibs-util-str.lua @@ -10,9 +10,36 @@ utilities = utilities or {} utilities.strings = utilities.strings or { } local strings = utilities.strings -local gsub, rep = string.gsub, string.rep -local Cs, C, Cp, P, Carg = lpeg.Cs, lpeg.C, lpeg.Cp, lpeg.P, lpeg.Carg +local format, gsub, rep, sub = string.format, string.gsub, string.rep, string.sub +local load, dump = load, string.dump +local tonumber, type, tostring = tonumber, type, tostring +local unpack, concat = table.unpack, table.concat +local P, V, C, S, R, Ct, Cs, Cp, Carg, Cc = lpeg.P, lpeg.V, lpeg.C, lpeg.S, lpeg.R, lpeg.Ct, lpeg.Cs, lpeg.Cp, lpeg.Carg, lpeg.Cc local patterns, lpegmatch = lpeg.patterns, lpeg.match +local utfchar, utfbyte = utf.char, utf.byte +----- loadstripped = utilities.lua.loadstripped +----- setmetatableindex = table.setmetatableindex + +local loadstripped = _LUAVERSION < 5.2 and load or function(str) + return load(dump(load(str),true)) -- it only makes sense in luajit and luatex where we have a stipped load +end + +-- todo: make a special namespace for the formatter + +if not number then number = { } end -- temp hack for luatex-fonts + +local stripper = patterns.stripzeros + +local function points(n) + return (not n or n == 0) and "0pt" or lpegmatch(stripper,format("%.5fpt",n/65536)) +end + +local function basepoints(n) + return (not n or n == 0) and "0bp" or lpegmatch(stripper,format("%.5fbp", n*(7200/7227)/65536)) +end + +number.points = points +number.basepoints = basepoints -- str = " \n \ntest \n test\ntest " -- print("["..string.gsub(string.collapsecrlf(str),"\n","+").."]") @@ -47,17 +74,15 @@ function strings.newrepeater(str,offset) return t end t = { } - setmetatable(t, { - __index = function(t,k) - if not k then - return "" - end - local n = k + offset - local s = n > 0 and rep(str,n) or "" - t[k] = s - return s + setmetatable(t, { __index = function(t,k) + if not k then + return "" end - } ) + local n = k + offset + local s = n > 0 and rep(str,n) or "" + t[k] = s + return s + end }) s[offset] = t return t end @@ -69,6 +94,8 @@ local extra, tab, start = 0, 0, 4, 0 local nspaces = strings.newrepeater(" ") +string.nspaces = nspaces + local pattern = Carg(1) / function(t) extra, tab, start = 0, t or 7, 1 @@ -94,20 +121,20 @@ function strings.tabtospace(str,tab) return lpegmatch(pattern,str,1,tab or 7) end ---~ local t = { ---~ "1234567123456712345671234567", ---~ "\tb\tc", ---~ "a\tb\tc", ---~ "aa\tbb\tcc", ---~ "aaa\tbbb\tccc", ---~ "aaaa\tbbbb\tcccc", ---~ "aaaaa\tbbbbb\tccccc", ---~ "aaaaaa\tbbbbbb\tcccccc\n aaaaaa\tbbbbbb\tcccccc", ---~ "one\n two\nxxx three\nxx four\nx five\nsix", ---~ } ---~ for k=1,#t do ---~ print(strings.tabtospace(t[k])) ---~ end +-- local t = { +-- "1234567123456712345671234567", +-- "\tb\tc", +-- "a\tb\tc", +-- "aa\tbb\tcc", +-- "aaa\tbbb\tccc", +-- "aaaa\tbbbb\tcccc", +-- "aaaaa\tbbbbb\tccccc", +-- "aaaaaa\tbbbbbb\tcccccc\n aaaaaa\tbbbbbb\tcccccc", +-- "one\n two\nxxx three\nxx four\nx five\nsix", +-- } +-- for k=1,#t do +-- print(strings.tabtospace(t[k])) +-- end function strings.striplong(str) -- strips all leading spaces str = gsub(str,"^%s*","") @@ -115,13 +142,625 @@ function strings.striplong(str) -- strips all leading spaces return str end ---~ local template = string.striplong([[ ---~ aaaa ---~ bb ---~ cccccc ---~ ]]) +-- local template = string.striplong([[ +-- aaaa +-- bb +-- cccccc +-- ]]) function strings.nice(str) str = gsub(str,"[:%-+_]+"," ") -- maybe more return str end + +-- Work in progress. Interesting is that compared to the built-in this is faster in +-- luatex than in luajittex where we have a comparable speed. It only makes sense +-- to use the formatter when a (somewhat) complex format is used a lot. Each formatter +-- is a function so there is some overhead and not all formatted output is worth that +-- overhead. Keep in mind that there is an extra function call involved. In principle +-- we end up with a string concatination so one could inline such a sequence but often +-- at the cost of less readabinity. So, it's a sort of (visual) compromise. Of course +-- there is the benefit of more variants. (Concerning the speed: a simple format like +-- %05fpt is better off with format than with a formatter, but as soon as you put +-- something in front formatters become faster. Passing the pt as extra argument makes +-- formatters behave better. Of course this is rather implementation dependent. Also, +-- when a specific format is only used a few times the overhead in creating it is not +-- compensated by speed.) +-- +-- More info can be found in cld-mkiv.pdf so here I stick to a simple list. +-- +-- integer %...i number +-- integer %...d number +-- unsigned %...u number +-- character %...c number +-- hexadecimal %...x number +-- HEXADECIMAL %...X number +-- octal %...o number +-- string %...s string number +-- float %...f number +-- exponential %...e number +-- exponential %...E number +-- autofloat %...g number +-- autofloat %...G number +-- utf character %...c number +-- force tostring %...S any +-- force tostring %Q any +-- force tonumber %N number (strip leading zeros) +-- signed number %I number +-- rounded number %r number +-- 0xhexadecimal %...h character number +-- 0xHEXADECIMAL %...H character number +-- U+hexadecimal %...u character number +-- U+HEXADECIMAL %...U character number +-- points %p number (scaled points) +-- basepoints %b number (scaled points) +-- table concat %...t table +-- serialize %...T sequenced (no nested tables) +-- boolean (logic) %l boolean +-- BOOLEAN %L boolean +-- whitespace %...w +-- automatic %...a 'whatever' (string, table, ...) +-- automatic %...a "whatever" (string, table, ...) + +local n = 0 + +-- we are somewhat sloppy in parsing prefixes as it's not that critical + +-- hard to avoid but we can collect them in a private namespace if needed + +-- inline the next two makes no sense as we only use this in logging + +local sequenced = table.sequenced + +function string.autodouble(s,sep) + if s == nil then + return '""' + end + local t = type(s) + if t == "number" then + return tostring(s) -- tostring not really needed + end + if t == "table" then + return ('"' .. sequenced(s,sep or ",") .. '"') + end + return ('"' .. tostring(s) .. '"') +end + +function string.autosingle(s,sep) + if s == nil then + return "''" + end + local t = type(s) + if t == "number" then + return tostring(s) -- tostring not really needed + end + if t == "table" then + return ("'" .. sequenced(s,sep or ",") .. "'") + end + return ("'" .. tostring(s) .. "'") +end + +local tracedchars = { } +string.tracedchars = tracedchars +strings.tracers = tracedchars + +function string.tracedchar(b) + -- todo: table + if type(b) == "number" then + return tracedchars[b] or (utfchar(b) .. " (U+" .. format('%05X',b) .. ")") + else + local c = utfbyte(b) + return tracedchars[c] or (b .. " (U+" .. format('%05X',c) .. ")") + end +end + +function number.signed(i) + if i > 0 then + return "+", i + else + return "-", -i + end +end + +local preamble = [[ +local type = type +local tostring = tostring +local tonumber = tonumber +local format = string.format +local concat = table.concat +local signed = number.signed +local points = number.points +local basepoints = number.basepoints +local utfchar = utf.char +local utfbyte = utf.byte +local lpegmatch = lpeg.match +local nspaces = string.nspaces +local tracedchar = string.tracedchar +local autosingle = string.autosingle +local autodouble = string.autodouble +local sequenced = table.sequenced +]] + +local template = [[ +%s +%s +return function(%s) return %s end +]] + +local arguments = { "a1" } -- faster than previously used (select(n,...)) + +setmetatable(arguments, { __index = + function(t,k) + local v = t[k-1] .. ",a" .. k + t[k] = v + return v + end +}) + +local prefix_any = C((S("+- .") + R("09"))^0) +local prefix_tab = C((1-R("az","AZ","09","%%"))^0) + +-- we've split all cases as then we can optimize them (let's omit the fuzzy u) + +-- todo: replace outer formats in next by .. + +local format_s = function(f) + n = n + 1 + if f and f ~= "" then + return format("format('%%%ss',a%s)",f,n) + else -- best no tostring in order to stay compatible (.. does a selective tostring too) + return format("(a%s or '')",n) -- goodie: nil check + end +end + +local format_S = function(f) -- can be optimized + n = n + 1 + if f and f ~= "" then + return format("format('%%%ss',tostring(a%s))",f,n) + else + return format("tostring(a%s)",n) + end +end + +local format_q = function() + n = n + 1 + return format("(a%s and format('%%q',a%s) or '')",n,n) -- goodie: nil check (maybe separate lpeg, not faster) +end + +local format_Q = function() -- can be optimized + n = n + 1 + return format("format('%%q',tostring(a%s))",n) +end + +local format_i = function(f) + n = n + 1 + if f and f ~= "" then + return format("format('%%%si',a%s)",f,n) + else + return format("a%s",n) + end +end + +local format_d = format_i + +local format_I = function(f) + n = n + 1 + return format("format('%%s%%%si',signed(a%s))",f,n) +end + +local format_f = function(f) + n = n + 1 + return format("format('%%%sf',a%s)",f,n) +end + +local format_g = function(f) + n = n + 1 + return format("format('%%%sg',a%s)",f,n) +end + +local format_G = function(f) + n = n + 1 + return format("format('%%%sG',a%s)",f,n) +end + +local format_e = function(f) + n = n + 1 + return format("format('%%%se',a%s)",f,n) +end + +local format_E = function(f) + n = n + 1 + return format("format('%%%sE',a%s)",f,n) +end + +local format_x = function(f) + n = n + 1 + return format("format('%%%sx',a%s)",f,n) +end + +local format_X = function(f) + n = n + 1 + return format("format('%%%sX',a%s)",f,n) +end + +local format_o = function(f) + n = n + 1 + return format("format('%%%so',a%s)",f,n) +end + +local format_c = function() + n = n + 1 + return format("utfchar(a%s)",n) +end + +local format_C = function() + n = n + 1 + return format("tracedchar(a%s)",n) +end + +local format_r = function(f) + n = n + 1 + return format("format('%%%s.0f',a%s)",f,n) +end + +local format_h = function(f) + n = n + 1 + if f == "-" then + f = sub(f,2) + return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + else + return format("format('0x%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + end +end + +local format_H = function(f) + n = n + 1 + if f == "-" then + f = sub(f,2) + return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + else + return format("format('0x%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + end +end + +local format_u = function(f) + n = n + 1 + if f == "-" then + f = sub(f,2) + return format("format('%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + else + return format("format('u+%%%sx',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + end +end + +local format_U = function(f) + n = n + 1 + if f == "-" then + f = sub(f,2) + return format("format('%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + else + return format("format('U+%%%sX',type(a%s) == 'number' and a%s or utfbyte(a%s))",f == "" and "05" or f,n,n,n) + end +end + +local format_p = function() + n = n + 1 + return format("points(a%s)",n) +end + +local format_b = function() + n = n + 1 + return format("basepoints(a%s)",n) +end + +local format_t = function(f) + n = n + 1 + if f and f ~= "" then + return format("concat(a%s,%q)",n,f) + else + return format("concat(a%s)",n) + end +end + +local format_T = function(f) + n = n + 1 + if f and f ~= "" then + return format("sequenced(a%s,%q)",n,f) + else + return format("sequenced(a%s)",n) + end +end + +local format_l = function() + n = n + 1 + return format("(a%s and 'true' or 'false')",n) +end + +local format_L = function() + n = n + 1 + return format("(a%s and 'TRUE' or 'FALSE')",n) +end + +local format_N = function() -- strips leading zeros + n = n + 1 + return format("tostring(tonumber(a%s) or a%s)",n,n) +end + +local format_a = function(f) + n = n + 1 + if f and f ~= "" then + return format("autosingle(a%s,%q)",n,f) + else + return format("autosingle(a%s)",n) + end +end + +local format_A = function(f) + n = n + 1 + if f and f ~= "" then + return format("autodouble(a%s,%q)",n,f) + else + return format("autodouble(a%s)",n) + end +end + +local format_w = function(f) -- handy when doing depth related indent + n = n + 1 + f = tonumber(f) + if f then -- not that useful + return format("nspaces[%s+a%s]",f,n) -- no real need for tonumber + else + return format("nspaces[a%s]",n) -- no real need for tonumber + end +end + +local format_W = function(f) -- handy when doing depth related indent + return format("nspaces[%s]",tonumber(f) or 0) +end + +local format_rest = function(s) + return format("%q",s) -- catches " and \n and such +end + +local format_extension = function(extensions,f,name) + local extension = extensions[name] or "tostring(%s)" + local f = tonumber(f) or 1 + if f == 0 then + return extension + elseif f == 1 then + n = n + 1 + local a = "a" .. n + return format(extension,a,a) -- maybe more times? + elseif f < 0 then + local a = "a" .. (n + f + 1) + return format(extension,a,a) + else + local t = { } + for i=1,f do + n = n + 1 + t[#t+1] = "a" .. n + end + return format(extension,unpack(t)) + end +end + +local builder = Cs { "start", + start = ( + ( + P("%") / "" + * ( + V("!") -- new + + V("s") + V("q") + + V("i") + V("d") + + V("f") + V("g") + V("G") + V("e") + V("E") + + V("x") + V("X") + V("o") + -- + + V("c") + + V("C") + + V("S") -- new + + V("Q") -- new + + V("N") -- new + -- + + V("r") + + V("h") + V("H") + V("u") + V("U") + + V("p") + V("b") + + V("t") + V("T") + + V("l") + V("L") + + V("I") + + V("h") -- new + + V("w") -- new + + V("W") -- new + + V("a") -- new + + V("A") -- new + -- + + V("*") -- ignores probably messed up % + ) + + V("*") + ) + * (P(-1) + Carg(1)) + )^0, + -- + ["s"] = (prefix_any * P("s")) / format_s, -- %s => regular %s (string) + ["q"] = (prefix_any * P("q")) / format_q, -- %q => regular %q (quoted string) + ["i"] = (prefix_any * P("i")) / format_i, -- %i => regular %i (integer) + ["d"] = (prefix_any * P("d")) / format_d, -- %d => regular %d (integer) + ["f"] = (prefix_any * P("f")) / format_f, -- %f => regular %f (float) + ["g"] = (prefix_any * P("g")) / format_g, -- %g => regular %g (float) + ["G"] = (prefix_any * P("G")) / format_G, -- %G => regular %G (float) + ["e"] = (prefix_any * P("e")) / format_e, -- %e => regular %e (float) + ["E"] = (prefix_any * P("E")) / format_E, -- %E => regular %E (float) + ["x"] = (prefix_any * P("x")) / format_x, -- %x => regular %x (hexadecimal) + ["X"] = (prefix_any * P("X")) / format_X, -- %X => regular %X (HEXADECIMAL) + ["o"] = (prefix_any * P("o")) / format_o, -- %o => regular %o (octal) + -- + ["S"] = (prefix_any * P("S")) / format_S, -- %S => %s (tostring) + ["Q"] = (prefix_any * P("Q")) / format_S, -- %Q => %q (tostring) + ["N"] = (prefix_any * P("N")) / format_N, -- %N => tonumber (strips leading zeros) + ["c"] = (prefix_any * P("c")) / format_c, -- %c => utf character (extension to regular) + ["C"] = (prefix_any * P("C")) / format_C, -- %c => U+.... utf character + -- + ["r"] = (prefix_any * P("r")) / format_r, -- %r => round + ["h"] = (prefix_any * P("h")) / format_h, -- %h => 0x0a1b2 (when - no 0x) was v + ["H"] = (prefix_any * P("H")) / format_H, -- %H => 0x0A1B2 (when - no 0x) was V + ["u"] = (prefix_any * P("u")) / format_u, -- %u => u+0a1b2 (when - no u+) + ["U"] = (prefix_any * P("U")) / format_U, -- %U => U+0A1B2 (when - no U+) + ["p"] = (prefix_any * P("p")) / format_p, -- %p => 12.345pt / maybe: P (and more units) + ["b"] = (prefix_any * P("b")) / format_b, -- %b => 12.342bp / maybe: B (and more units) + ["t"] = (prefix_tab * P("t")) / format_t, -- %t => concat + ["T"] = (prefix_tab * P("T")) / format_T, -- %t => sequenced + ["l"] = (prefix_tab * P("l")) / format_l, -- %l => boolean + ["L"] = (prefix_tab * P("L")) / format_L, -- %L => BOOLEAN + ["I"] = (prefix_any * P("I")) / format_I, -- %I => signed integer + -- + ["w"] = (prefix_any * P("w")) / format_w, -- %w => n spaces (optional prefix is added) + ["W"] = (prefix_any * P("W")) / format_W, -- %W => mandate prefix, no specifier + -- + ["a"] = (prefix_any * P("a")) / format_a, -- %a => '...' (forces tostring) + ["A"] = (prefix_any * P("A")) / format_A, -- %A => "..." (forces tostring) + -- + ["*"] = Cs(((1-P("%"))^1 + P("%%")/"%%%%")^1) / format_rest, -- rest (including %%) + -- + ["!"] = Carg(2) * prefix_any * P("!") * C((1-P("!"))^1) * P("!") / format_extension, +} + +-- we can be clever and only alias what is needed + +local direct = Cs ( + P("%")/"" + * Cc([[local format = string.format return function(str) return format("%]]) + * (S("+- .") + R("09"))^0 + * S("sqidfgGeExXo") + * Cc([[",str) end]]) + * P(-1) + ) + +local function make(t,str) + local f + local p + local p = lpegmatch(direct,str) + if p then + f = loadstripped(p)() + else + n = 0 + p = lpegmatch(builder,str,1,"..",t._extensions_) -- after this we know n + if n > 0 then + p = format(template,preamble,t._preamble_,arguments[n],p) +-- print("builder>",p) + f = loadstripped(p)() + else + f = function() return str end + end + end + t[str] = f + return f +end + +-- -- collect periodically +-- +-- local threshold = 1000 -- max nof cached formats +-- +-- local function make(t,str) +-- local f = rawget(t,str) +-- if f then +-- return f +-- end +-- local parent = t._t_ +-- if parent._n_ > threshold then +-- local m = { _t_ = parent } +-- getmetatable(parent).__index = m +-- setmetatable(m, { __index = make }) +-- else +-- parent._n_ = parent._n_ + 1 +-- end +-- local f +-- local p = lpegmatch(direct,str) +-- if p then +-- f = loadstripped(p)() +-- else +-- n = 0 +-- p = lpegmatch(builder,str,1,"..",parent._extensions_) -- after this we know n +-- if n > 0 then +-- p = format(template,preamble,parent._preamble_,arguments[n],p) +-- -- print("builder>",p) +-- f = loadstripped(p)() +-- else +-- f = function() return str end +-- end +-- end +-- t[str] = f +-- return f +-- end + +local function use(t,fmt,...) + return t[fmt](...) +end + +strings.formatters = { } + +-- we cannot make these tables weak, unless we start using an indirect +-- table (metatable) in which case we could better keep a count and +-- clear that table when a threshold is reached + +function strings.formatters.new() + local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter" } + setmetatable(t, { __index = make, __call = use }) + return t +end + +-- function strings.formatters.new() +-- local t = { _extensions_ = { }, _preamble_ = "", _type_ = "formatter", _n_ = 0 } +-- local m = { _t_ = t } +-- setmetatable(t, { __index = m, __call = use }) +-- setmetatable(m, { __index = make }) +-- return t +-- end + +local formatters = strings.formatters.new() -- the default instance + +string.formatters = formatters -- in the main string namespace +string.formatter = function(str,...) return formatters[str](...) end -- sometimes nicer name + +local function add(t,name,template,preamble) + if type(t) == "table" and t._type_ == "formatter" then + t._extensions_[name] = template or "%s" + if preamble then + t._preamble_ = preamble .. "\n" .. t._preamble_ -- so no overload ! + end + end +end + +strings.formatters.add = add + +-- registered in the default instance (should we fall back on this one?) + +lpeg.patterns.xmlescape = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P('"')/""" + P(1))^0) +lpeg.patterns.texescape = Cs((C(S("#$%\\{}"))/"\\%1" + P(1))^0) + +add(formatters,"xml",[[lpegmatch(xmlescape,%s)]],[[local xmlescape = lpeg.patterns.xmlescape]]) +add(formatters,"tex",[[lpegmatch(texescape,%s)]],[[local texescape = lpeg.patterns.texescape]]) + +-- -- yes or no: +-- +-- local function make(t,str) +-- local f +-- local p = lpegmatch(direct,str) +-- if p then +-- f = loadstripped(p)() +-- else +-- n = 0 +-- p = lpegmatch(builder,str,1,",") -- after this we know n +-- if n > 0 then +-- p = format(template,template_shortcuts,arguments[n],p) +-- f = loadstripped(p)() +-- else +-- f = function() return str end +-- end +-- end +-- t[str] = f +-- return f +-- end +-- +-- local formatteds = string.formatteds or { } +-- string.formatteds = formatteds +-- +-- setmetatable(formatteds, { __index = make, __call = use }) -- cgit v1.2.3 From 568a5bc386522a788131791700e089d2b81991fa Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 21:26:51 +0200 Subject: update util-tab --- lualibs-util-tab.lua | 307 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 258 insertions(+), 49 deletions(-) diff --git a/lualibs-util-tab.lua b/lualibs-util-tab.lua index 7a2da29..ecf36b1 100644 --- a/lualibs-util-tab.lua +++ b/lualibs-util-tab.lua @@ -10,24 +10,53 @@ utilities = utilities or {} utilities.tables = utilities.tables or { } local tables = utilities.tables -local format, gmatch, rep, gsub = string.format, string.gmatch, string.rep, string.gsub +local format, gmatch, gsub = string.format, string.gmatch, string.gsub local concat, insert, remove = table.concat, table.insert, table.remove local setmetatable, getmetatable, tonumber, tostring = setmetatable, getmetatable, tonumber, tostring -local type, next, rawset, tonumber, loadstring = type, next, rawset, tonumber, loadstring -local lpegmatch, P, Cs = lpeg.match, lpeg.P, lpeg.Cs +local type, next, rawset, tonumber, tostring, load, select = type, next, rawset, tonumber, tostring, load, select +local lpegmatch, P, Cs, Cc = lpeg.match, lpeg.P, lpeg.Cs, lpeg.Cc +local serialize, sortedkeys, sortedpairs = table.serialize, table.sortedkeys, table.sortedpairs +local formatters = string.formatters -function tables.definetable(target) -- defines undefined tables - local composed, t, n = nil, { }, 0 - for name in gmatch(target,"([^%.]+)") do - n = n + 1 +local splitter = lpeg.tsplitat(".") + +function tables.definetable(target,nofirst,nolast) -- defines undefined tables + local composed, shortcut, t = nil, nil, { } + local snippets = lpegmatch(splitter,target) + for i=1,#snippets - (nolast and 1 or 0) do + local name = snippets[i] if composed then - composed = composed .. "." .. name + composed = shortcut .. "." .. name + shortcut = shortcut .. "_" .. name + t[#t+1] = formatters["local %s = %s if not %s then %s = { } %s = %s end"](shortcut,composed,shortcut,shortcut,composed,shortcut) else composed = name + shortcut = name + if not nofirst then + t[#t+1] = formatters["%s = %s or { }"](composed,composed) + end end - t[n] = format("%s = %s or { }",composed,composed) end - return concat(t,"\n") + if nolast then + composed = shortcut .. "." .. snippets[#snippets] + end + return concat(t,"\n"), composed +end + +-- local t = tables.definedtable("a","b","c","d") + +function tables.definedtable(...) + local t = _G + for i=1,select("#",...) do + local li = select(i,...) + local tl = t[li] + if not tl then + tl = { } + t[li] = tl + end + t = tl + end + return t end function tables.accesstable(target,root) @@ -98,35 +127,131 @@ end -- experimental +local escape = Cs(Cc('"') * ((P('"')/'""' + P(1))^0) * Cc('"')) + +function table.tocsv(t,specification) + if t and #t > 0 then + local result = { } + local r = { } + specification = specification or { } + local fields = specification.fields + if type(fields) ~= "string" then + fields = sortedkeys(t[1]) + end + local separator = specification.separator or "," + if specification.preamble == true then + for f=1,#fields do + r[f] = lpegmatch(escape,tostring(fields[f])) + end + result[1] = concat(r,separator) + end + for i=1,#t do + local ti = t[i] + for f=1,#fields do + local field = ti[fields[f]] + if type(field) == "string" then + r[f] = lpegmatch(escape,field) + else + r[f] = tostring(field) + end + end + result[#result+1] = concat(r,separator) + end + return concat(result,"\n") + else + return "" + end +end + +-- local nspaces = utilities.strings.newrepeater(" ") +-- local escape = Cs((P("<")/"<" + P(">")/">" + P("&")/"&" + P(1))^0) +-- +-- local function toxml(t,d,result,step) +-- for k, v in sortedpairs(t) do +-- local s = nspaces[d] +-- local tk = type(k) +-- local tv = type(v) +-- if tv == "table" then +-- if tk == "number" then +-- result[#result+1] = format("%s",s,k) +-- toxml(v,d+step,result,step) +-- result[#result+1] = format("%s",s,k) +-- else +-- result[#result+1] = format("%s<%s>",s,k) +-- toxml(v,d+step,result,step) +-- result[#result+1] = format("%s",s,k) +-- end +-- elseif tv == "string" then +-- if tk == "number" then +-- result[#result+1] = format("%s%s",s,k,lpegmatch(escape,v),k) +-- else +-- result[#result+1] = format("%s<%s>%s",s,k,lpegmatch(escape,v),k) +-- end +-- elseif tk == "number" then +-- result[#result+1] = format("%s%s",s,k,tostring(v),k) +-- else +-- result[#result+1] = format("%s<%s>%s",s,k,tostring(v),k) +-- end +-- end +-- end +-- +-- much faster + +local nspaces = utilities.strings.newrepeater(" ") + local function toxml(t,d,result,step) - for k, v in table.sortedpairs(t) do - if type(v) == "table" then - if type(k) == "number" then - result[#result+1] = format("%s",d,k) - toxml(v,d..step,result,step) - result[#result+1] = format("%s",d,k) + for k, v in sortedpairs(t) do + local s = nspaces[d] -- inlining this is somewhat faster but gives more formatters + local tk = type(k) + local tv = type(v) + if tv == "table" then + if tk == "number" then + result[#result+1] = formatters["%s"](s,k) + toxml(v,d+step,result,step) + result[#result+1] = formatters["%s"](s,k) + else + result[#result+1] = formatters["%s<%s>"](s,k) + toxml(v,d+step,result,step) + result[#result+1] = formatters["%s"](s,k) + end + elseif tv == "string" then + if tk == "number" then + result[#result+1] = formatters["%s%!xml!"](s,k,v,k) else - result[#result+1] = format("%s<%s>",d,k) - toxml(v,d..step,result,step) - result[#result+1] = format("%s",d,k) + result[#result+1] = formatters["%s<%s>%!xml!"](s,k,v,k) end - elseif type(k) == "number" then - result[#result+1] = format("%s%s",d,k,v,k) + elseif tk == "number" then + result[#result+1] = formatters["%s%S"](s,k,v,k) else - result[#result+1] = format("%s<%s>%s",d,k,tostring(v),k) + result[#result+1] = formatters["%s<%s>%S"](s,k,v,k) end end end -function table.toxml(t,name,nobanner,indent,spaces) +-- function table.toxml(t,name,nobanner,indent,spaces) +-- local noroot = name == false +-- local result = (nobanner or noroot) and { } or { "" } +-- local indent = rep(" ",indent or 0) +-- local spaces = rep(" ",spaces or 1) +-- if noroot then +-- toxml( t, inndent, result, spaces) +-- else +-- toxml( { [name or "root"] = t }, indent, result, spaces) +-- end +-- return concat(result,"\n") +-- end + +function table.toxml(t,specification) + specification = specification or { } + local name = specification.name local noroot = name == false - local result = (nobanner or noroot) and { } or { "" } - local indent = rep(" ",indent or 0) - local spaces = rep(" ",spaces or 1) + local result = (specification.nobanner or noroot) and { } or { "" } + local indent = specification.indent or 0 + local spaces = specification.spaces or 1 if noroot then - toxml( t, inndent, result, spaces) + toxml( t, indent, result, spaces) else - toxml( { [name or "root"] = t }, indent, result, spaces) + toxml( { [name or "data"] = t }, indent, result, spaces) end return concat(result,"\n") end @@ -144,7 +269,7 @@ function tables.encapsulate(core,capsule,protect) end for key, value in next, core do if capsule[key] then - print(format("\ninvalid inheritance '%s' in '%s': %s",key,tostring(core))) + print(formatters["\ninvalid %s %a in %a"]("inheritance",key,core)) os.exit() else capsule[key] = value @@ -158,7 +283,7 @@ function tables.encapsulate(core,capsule,protect) __index = capsule, __newindex = function(t,key,value) if capsule[key] then - print(format("\ninvalid overload '%s' in '%s'",key,tostring(core))) + print(formatters["\ninvalid %s %a' in %a"]("overload",key,core)) os.exit() else rawset(t,key,value) @@ -168,7 +293,7 @@ function tables.encapsulate(core,capsule,protect) end end -local function serialize(t,r,outer) -- no mixes +local function fastserialize(t,r,outer) -- no mixes r[#r+1] = "{" local n = #t if n > 0 then @@ -176,27 +301,27 @@ local function serialize(t,r,outer) -- no mixes local v = t[i] local tv = type(v) if tv == "string" then - r[#r+1] = format("%q,",v) + r[#r+1] = formatters["%q,"](v) elseif tv == "number" then - r[#r+1] = format("%s,",v) + r[#r+1] = formatters["%s,"](v) elseif tv == "table" then - serialize(v,r) + fastserialize(v,r) elseif tv == "boolean" then - r[#r+1] = format("%s,",tostring(v)) + r[#r+1] = formatters["%S,"](v) end end else for k, v in next, t do local tv = type(v) if tv == "string" then - r[#r+1] = format("[%q]=%q,",k,v) + r[#r+1] = formatters["[%q]=%q,"](k,v) elseif tv == "number" then - r[#r+1] = format("[%q]=%s,",k,v) + r[#r+1] = formatters["[%q]=%s,"](k,v) elseif tv == "table" then - r[#r+1] = format("[%q]=",k) - serialize(v,r) + r[#r+1] = formatters["[%q]="](k) + fastserialize(v,r) elseif tv == "boolean" then - r[#r+1] = format("[%q]=%s,",k,tostring(v)) + r[#r+1] = formatters["[%q]=%S,"](k,v) end end end @@ -208,15 +333,67 @@ local function serialize(t,r,outer) -- no mixes return r end -function table.fastserialize(t,prefix) - return concat(serialize(t,{ prefix or "return" },true)) +-- local f_hashed_string = formatters["[%q]=%q,"] +-- local f_hashed_number = formatters["[%q]=%s,"] +-- local f_hashed_table = formatters["[%q]="] +-- local f_hashed_true = formatters["[%q]=true,"] +-- local f_hashed_false = formatters["[%q]=false,"] +-- +-- local f_indexed_string = formatters["%q,"] +-- local f_indexed_number = formatters["%s,"] +-- ----- f_indexed_true = formatters["true,"] +-- ----- f_indexed_false = formatters["false,"] +-- +-- local function fastserialize(t,r,outer) -- no mixes +-- r[#r+1] = "{" +-- local n = #t +-- if n > 0 then +-- for i=1,n do +-- local v = t[i] +-- local tv = type(v) +-- if tv == "string" then +-- r[#r+1] = f_indexed_string(v) +-- elseif tv == "number" then +-- r[#r+1] = f_indexed_number(v) +-- elseif tv == "table" then +-- fastserialize(v,r) +-- elseif tv == "boolean" then +-- -- r[#r+1] = v and f_indexed_true(k) or f_indexed_false(k) +-- r[#r+1] = v and "true," or "false," +-- end +-- end +-- else +-- for k, v in next, t do +-- local tv = type(v) +-- if tv == "string" then +-- r[#r+1] = f_hashed_string(k,v) +-- elseif tv == "number" then +-- r[#r+1] = f_hashed_number(k,v) +-- elseif tv == "table" then +-- r[#r+1] = f_hashed_table(k) +-- fastserialize(v,r) +-- elseif tv == "boolean" then +-- r[#r+1] = v and f_hashed_true(k) or f_hashed_false(k) +-- end +-- end +-- end +-- if outer then +-- r[#r+1] = "}" +-- else +-- r[#r+1] = "}," +-- end +-- return r +-- end + +function table.fastserialize(t,prefix) -- so prefix should contain the = + return concat(fastserialize(t,{ prefix or "return" },true)) end function table.deserialize(str) if not str or str == "" then return end - local code = loadstring(str) + local code = load(str) if not code then return end @@ -233,7 +410,7 @@ function table.load(filename) if filename then local t = io.loaddata(filename) if t and t ~= "" then - t = loadstring(t) + t = load(t) if type(t) == "function" then t = t() if type(t) == "table" then @@ -244,6 +421,10 @@ function table.load(filename) end end +function table.save(filename,t,n,...) + io.savedata(filename,serialize(t,n == nil and true or n,...)) +end + local function slowdrop(t) local r = { } local l = { } @@ -252,11 +433,11 @@ local function slowdrop(t) local j = 0 for k, v in next, ti do j = j + 1 - l[j] = format("%s=%q",k,v) + l[j] = formatters["%s=%q"](k,v) end - r[i] = format(" {%s},\n",concat(l)) + r[i] = formatters[" {%t},\n"](l) end - return format("return {\n%s}",concat(r)) + return formatters["return {\n%st}"](r) end local function fastdrop(t) @@ -265,7 +446,7 @@ local function fastdrop(t) local ti = t[i] r[#r+1] = " {" for k, v in next, ti do - r[#r+1] = format("%s=%q",k,v) + r[#r+1] = formatters["%s=%q"](k,v) end r[#r+1] = "},\n" end @@ -273,7 +454,7 @@ local function fastdrop(t) return concat(r) end -function table.drop(t,slow) +function table.drop(t,slow) -- only { { a=2 }, {a=3} } if #t == 0 then return "return { }" elseif slow == true then @@ -282,3 +463,31 @@ function table.drop(t,slow) return fastdrop(t) -- some 15% faster end end + +function table.autokey(t,k) + local v = { } + t[k] = v + return v +end + +local selfmapper = { __index = function(t,k) t[k] = k return k end } + +function table.twowaymapper(t) + if not t then + t = { } + else + for i=0,#t do + local ti = t[i] -- t[1] = "one" + if ti then + local i = tostring(i) + t[i] = ti -- t["1"] = "one" + t[ti] = i -- t["one"] = "1" + end + end + t[""] = t[0] or "" + end + -- setmetatableindex(t,"key") + setmetatable(t,selfmapper) + return t +end + -- cgit v1.2.3 From f1674ad138029a1aa36b77e711b56d720d9f6894 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 21:30:50 +0200 Subject: update util-sto --- lualibs-util-sto.lua | 81 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/lualibs-util-sto.lua b/lualibs-util-sto.lua index 42ee6cf..191d6cd 100644 --- a/lualibs-util-sto.lua +++ b/lualibs-util-sto.lua @@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['util-sto'] = { license = "see context related readme files" } -local setmetatable, getmetatable = setmetatable, getmetatable +local setmetatable, getmetatable, type = setmetatable, getmetatable, type utilities = utilities or { } utilities.storage = utilities.storage or { } @@ -14,8 +14,9 @@ local storage = utilities.storage function storage.mark(t) if not t then - texio.write_nl("fatal error: storage cannot be marked") - return -- os.exit() + print("\nfatal error: storage cannot be marked\n") + os.exit() + return end local m = getmetatable(t) if not m then @@ -44,36 +45,37 @@ end function storage.checked(t) if not t then - texio.write_nl("fatal error: storage has not been allocated") - return -- os.exit() + report("\nfatal error: storage has not been allocated\n") + os.exit() + return end return t end ---~ function utilities.storage.delay(parent,name,filename) ---~ local m = getmetatable(parent) ---~ m.__list[name] = filename ---~ end ---~ ---~ function utilities.storage.predefine(parent) ---~ local list = { } ---~ local m = getmetatable(parent) or { ---~ __list = list, ---~ __index = function(t,k) ---~ local l = require(list[k]) ---~ t[k] = l ---~ return l ---~ end ---~ } ---~ setmetatable(parent,m) ---~ end ---~ ---~ bla = { } ---~ utilities.storage.predefine(bla) ---~ utilities.storage.delay(bla,"test","oepsoeps") ---~ local t = bla.test ---~ table.print(t) ---~ print(t.a) +-- function utilities.storage.delay(parent,name,filename) +-- local m = getmetatable(parent) +-- m.__list[name] = filename +-- end +-- +-- function utilities.storage.predefine(parent) +-- local list = { } +-- local m = getmetatable(parent) or { +-- __list = list, +-- __index = function(t,k) +-- local l = require(list[k]) +-- t[k] = l +-- return l +-- end +-- } +-- setmetatable(parent,m) +-- end +-- +-- bla = { } +-- utilities.storage.predefine(bla) +-- utilities.storage.delay(bla,"test","oepsoeps") +-- local t = bla.test +-- table.print(t) +-- print(t.a) function storage.setinitializer(data,initialize) local m = getmetatable(data) or { } @@ -98,21 +100,28 @@ end -- table namespace ? -local function f_empty () return "" end -- t,k -local function f_self (t,k) t[k] = k return k end -local function f_ignore() end -- t,k,v +local function f_empty () return "" end -- t,k +local function f_self (t,k) t[k] = k return k end +local function f_table (t,k) local v = { } t[k] = v return v end +local function f_ignore() end -- t,k,v local t_empty = { __index = f_empty } local t_self = { __index = f_self } +local t_table = { __index = f_table } local t_ignore = { __newindex = f_ignore } function table.setmetatableindex(t,f) + if type(t) ~= "table" then + f, t = t, { } + end local m = getmetatable(t) if m then if f == "empty" then m.__index = f_empty elseif f == "key" then m.__index = f_self + elseif f == "table" then + m.__index = f_table else m.__index = f end @@ -121,6 +130,8 @@ function table.setmetatableindex(t,f) setmetatable(t, t_empty) elseif f == "key" then setmetatable(t, t_self) + elseif f == "table" then + setmetatable(t, t_table) else setmetatable(t,{ __index = f }) end @@ -129,6 +140,9 @@ function table.setmetatableindex(t,f) end function table.setmetatablenewindex(t,f) + if type(t) ~= "table" then + f, t = t, { } + end local m = getmetatable(t) if m then if f == "ignore" then @@ -147,6 +161,9 @@ function table.setmetatablenewindex(t,f) end function table.setmetatablecall(t,f) + if type(t) ~= "table" then + f, t = t, { } + end local m = getmetatable(t) if m then m.__call = f -- cgit v1.2.3 From 64d4b28bb8f9bf40a96edf73d3a8bc276df5992e Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 21:57:43 +0200 Subject: update util-mrg --- lualibs-util-mrg.lua | 136 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 109 insertions(+), 27 deletions(-) diff --git a/lualibs-util-mrg.lua b/lualibs-util-mrg.lua index 8d6c5dd..78b23dc 100644 --- a/lualibs-util-mrg.lua +++ b/lualibs-util-mrg.lua @@ -12,13 +12,17 @@ local gsub, format = string.gsub, string.format local concat = table.concat local type, next = type, next -utilities = utilities or {} +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt, Cb, Cg = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt, lpeg.Cb, lpeg.Cg +local lpegmatch, patterns = lpeg.match, lpeg.patterns + +utilities = utilities or { } local merger = utilities.merger or { } utilities.merger = merger -utilities.report = logs and logs.reporter("system") or print - merger.strip_comment = true +local report = logs.reporter("system","merge") +utilities.report = report + local m_begin_merge = "begin library merge" local m_end_merge = "end library merge" local m_begin_closure = "do -- create closure to overcome 200 locals limit" @@ -41,6 +45,15 @@ local m_faked = "-- " .. m_begin_merge .. "\n\n" .. "-- " .. m_end_merge .. "\n\n" +local m_report = [[ +-- used libraries : %s +-- skipped libraries : %s +-- original bytes : %s +-- stripped bytes : %s +]] + +local m_preloaded = [[package.loaded[%q] = package.loaded[%q] or true]] + local function self_fake() return m_faked end @@ -52,25 +65,87 @@ end local function self_load(name) local data = io.loaddata(name) or "" if data == "" then - utilities.report("merge: unknown file %s",name) + report("unknown file %a",name) else - utilities.report("merge: inserting %s",name) + report("inserting file %a",name) end return data or "" end +-- -- saves some 20K .. scite comments +-- data = gsub(data,"%-%-~[^\n\r]*[\r\n]","") +-- -- saves some 20K .. ldx comments +-- data = gsub(data,"%-%-%[%[ldx%-%-.-%-%-ldx%]%]%-%-","") + +local space = patterns.space +local eol = patterns.newline +local equals = P("=")^0 +local open = P("[") * Cg(equals,"init") * P("[") * P("\n")^-1 +local close = P("]") * C(equals) * P("]") +local closeeq = Cmt(close * Cb("init"), function(s,i,a,b) return a == b end) +local longstring = open * (1 - closeeq)^0 * close + +local quoted = patterns.quoted +local emptyline = space^0 * eol +local operator1 = P("<=") + P(">=") + P("~=") + P("..") + S("/^<>=*+%%") +local operator2 = S("*+/") +local operator3 = S("-") +local separator = S(",;") + +local ignore = (P("]") * space^1 * P("=") * space^1 * P("]")) / "]=[" + + (P("=") * space^1 * P("{")) / "={" + + (P("(") * space^1) / "(" + + (P("{") * (space+eol)^1 * P("}")) / "{}" +local strings = quoted -- / function (s) print("<<"..s..">>") return s end +local longcmt = (emptyline^0 * P("--") * longstring * emptyline^0) / "" +local longstr = longstring +local comment = emptyline^0 * P("--") * P("-")^0 * (1-eol)^0 * emptyline^1 / "\n" +local pack = ((eol+space)^0 / "") * operator1 * ((eol+space)^0 / "") + + ((eol+space)^0 / "") * operator2 * ((space)^0 / "") + + ((eol+space)^1 / "") * operator3 * ((space)^1 / "") + + ((space)^0 / "") * separator * ((space)^0 / "") +local lines = emptyline^2 / "\n" +local spaces = (space * space) / " " +----- spaces = ((space+eol)^1 ) / " " + +local compact = Cs ( ( + ignore + + strings + + longcmt + + longstr + + comment + + pack + + lines + + spaces + + 1 +)^1 ) + +local strip = Cs((emptyline^2/"\n" + 1)^0) +local stripreturn = Cs((1-P("return") * space^1 * P(1-space-eol)^1 * (space+eol)^0 * P(-1))^1) + +function merger.compact(data) + return lpegmatch(strip,lpegmatch(compact,data)) +end + +local function self_compact(data) + local delta = 0 + if merger.strip_comment then + local before = #data + data = lpegmatch(compact,data) + data = lpegmatch(strip,data) -- also strips in longstrings ... alas + -- data = string.strip(data) + local after = #data + delta = before - after + report("original size %s, compacted to %s, stripped %s",before,after,delta) + data = format("-- original size: %s, stripped down to: %s\n\n%s",before,after,data) + end + return lpegmatch(stripreturn,data) or data, delta +end + local function self_save(name, data) if data ~= "" then - if merger.strip_comment then - local n = #data - -- saves some 20K .. scite comments - data = gsub(data,"%-%-~[^\n\r]*[\r\n]","") - -- saves some 20K .. ldx comments - data = gsub(data,"%-%-%[%[ldx%-%-.-%-%-ldx%]%]%-%-","") - utilities.report("merge: %s bytes of comment stripped, %s bytes of code left",n-#data,#data) - end io.savedata(name,data) - utilities.report("merge: saving %s",name) + report("saving %s with size %s",name,#data) end end @@ -87,7 +162,7 @@ local function self_libs(libs,list) local lib = libs[i] for j=1,#list do local pth = gsub(list[j],"\\","/") -- file.clean_path - utilities.report("merge: checking library path %s",pth) + report("checking library path %a",pth) local name = pth .. "/" .. lib if lfs.isfile(name) then foundpath = pth @@ -96,30 +171,37 @@ local function self_libs(libs,list) if foundpath then break end end if foundpath then - utilities.report("merge: using library path %s",foundpath) - local right, wrong = { }, { } + report("using library path %a",foundpath) + local right, wrong, original, stripped = { }, { }, 0, 0 for i=1,#libs do local lib = libs[i] local fullname = foundpath .. "/" .. lib if lfs.isfile(fullname) then - utilities.report("merge: using library %s",fullname) + report("using library %a",fullname) + local preloaded = file.nameonly(lib) + local data = io.loaddata(fullname,true) + original = original + #data + local data, delta = self_compact(data) right[#right+1] = lib result[#result+1] = m_begin_closure - result[#result+1] = io.loaddata(fullname,true) + result[#result+1] = format(m_preloaded,preloaded,preloaded) + result[#result+1] = data result[#result+1] = m_end_closure + stripped = stripped + delta else - utilities.report("merge: skipping library %s",fullname) + report("skipping library %a",fullname) wrong[#wrong+1] = lib end end - if #right > 0 then - utilities.report("merge: used libraries: %s",concat(right," ")) - end - if #wrong > 0 then - utilities.report("merge: skipped libraries: %s",concat(wrong," ")) - end + right = #right > 0 and concat(right," ") or "-" + wrong = #wrong > 0 and concat(wrong," ") or "-" + report("used libraries: %a",right) + report("skipped libraries: %a",wrong) + report("original bytes: %a",original) + report("stripped bytes: %a",stripped) + result[#result+1] = format(m_report,right,wrong,original,stripped) else - utilities.report("merge: no valid library path found") + report("no valid library path found") end return concat(result, "\n\n") end -- cgit v1.2.3 From 6b7ebe9f0f9917564249eeae45d3daf9fb1fc127 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 22:00:59 +0200 Subject: update util-dim --- lualibs-util-dim.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lualibs-util-dim.lua b/lualibs-util-dim.lua index 4bae2d0..47b2706 100644 --- a/lualibs-util-dim.lua +++ b/lualibs-util-dim.lua @@ -20,6 +20,7 @@ local P, S, R, Cc, C, lpegmatch = lpeg.P, lpeg.S, lpeg.R, lpeg.Cc, lpeg.C, lpeg. local allocate = utilities.storage.allocate local setmetatableindex = table.setmetatableindex +local formatters = string.formatters --this might become another namespace @@ -86,6 +87,7 @@ local dimenfactors = allocate { format (string) is implemented using this table.

--ldx]]-- + local function numbertodimen(n,unit,fmt) if type(n) == 'string' then return n -- cgit v1.2.3 From 931e84efa118de9821ad6729f537d03183611e8d Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 22:07:08 +0200 Subject: update util-lua --- lualibs-util-lua.lua | 459 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 288 insertions(+), 171 deletions(-) diff --git a/lualibs-util-lua.lua b/lualibs-util-lua.lua index 2baeaa8..f3be9dc 100644 --- a/lualibs-util-lua.lua +++ b/lualibs-util-lua.lua @@ -7,14 +7,16 @@ if not modules then modules = { } end modules ['util-lua'] = { license = "see context related readme files" } +-- we will remove the 5.1 code some day soon + local rep, sub, byte, dump, format = string.rep, string.sub, string.byte, string.dump, string.format -local loadstring, loadfile, type = loadstring, loadfile, type +local load, loadfile, type = load, loadfile, type utilities = utilities or {} utilities.lua = utilities.lua or { } local luautilities = utilities.lua -utilities.report = logs and logs.reporter("system") or print -- can be overloaded later +local report_lua = logs.reporter("system","lua") local tracestripping = false local forcestupidcompile = true -- use internal bytecode compiler @@ -22,213 +24,328 @@ luautilities.stripcode = true -- support stripping when asked for luautilities.alwaysstripcode = false -- saves 1 meg on 7 meg compressed format file (2012.08.12) luautilities.nofstrippedchunks = 0 luautilities.nofstrippedbytes = 0 +local strippedchunks = { } -- allocate() +luautilities.strippedchunks = strippedchunks --- The next function was posted by Peter Cawley on the lua list and strips line --- number information etc. from the bytecode data blob. We only apply this trick --- when we store data tables. Stripping makes the compressed format file about --- 1MB smaller (and uncompressed we save at least 6MB). --- --- You can consider this feature an experiment, so it might disappear. There is --- no noticeable gain in runtime although the memory footprint should be somewhat --- smaller (and the file system has a bit less to deal with). --- --- Begin of borrowed code ... works for Lua 5.1 which LuaTeX currently uses ... - -local function strip_code_pc(dump,name) - local before = #dump - local version, format, endian, int, size, ins, num = byte(dump,5,11) - local subint - if endian == 1 then - subint = function(dump, i, l) - local val = 0 - for n = l, 1, -1 do - val = val * 256 + byte(dump,i + n - 1) - end - return val, i + l +luautilities.suffixes = { + tma = "tma", + tmc = jit and "tmb" or "tmc", + lua = "lua", + luc = jit and "lub" or "luc", + lui = "lui", + luv = "luv", + luj = "luj", + tua = "tua", + tuc = "tuc", +} + +-- environment.loadpreprocessedfile can be set to a preprocessor + +if jit or status.luatex_version >= 74 then + + local function register(name) + if tracestripping then + report_lua("stripped bytecode from %a",name or "unknown") end - else - subint = function(dump, i, l) - local val = 0 - for n = 1, l, 1 do - val = val * 256 + byte(dump,i + n - 1) + strippedchunks[#strippedchunks+1] = name + luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 + end + + local function stupidcompile(luafile,lucfile,strip) + local code = io.loaddata(luafile) + if code and code ~= "" then + code = load(code) + if code then + code = dump(code,strip and luautilities.stripcode or luautilities.alwaysstripcode) + if code and code ~= "" then + register(name) + io.savedata(lucfile,code) + return true, 0 + end + else + report_lua("fatal error %a in file %a",1,luafile) end - return val, i + l + else + report_lua("fatal error %a in file %a",2,luafile) end + return false, 0 end - local strip_function - strip_function = function(dump) - local count, offset = subint(dump, 1, size) - local stripped, dirty = rep("\0", size), offset + count - offset = offset + count + int * 2 + 4 - offset = offset + int + subint(dump, offset, int) * ins - count, offset = subint(dump, offset, int) - for n = 1, count do - local t - t, offset = subint(dump, offset, 1) - if t == 1 then - offset = offset + 1 - elseif t == 4 then - offset = offset + size + subint(dump, offset, size) - elseif t == 3 then - offset = offset + num - end + + -- quite subtle ... doing this wrong incidentally can give more bytes + + function luautilities.loadedluacode(fullname,forcestrip,name) + -- quite subtle ... doing this wrong incidentally can give more bytes + name = name or fullname + local code = environment.loadpreprocessedfile and environment.loadpreprocessedfile(fullname) or loadfile(fullname) + if code then + code() end - count, offset = subint(dump, offset, int) - stripped = stripped .. sub(dump,dirty, offset - 1) - for n = 1, count do - local proto, off = strip_function(sub(dump,offset, -1)) - stripped, offset = stripped .. proto, offset + off - 1 + if forcestrip and luautilities.stripcode then + if type(forcestrip) == "function" then + forcestrip = forcestrip(fullname) + end + if forcestrip or luautilities.alwaysstripcode then + register(name) + return load(dump(code,true)), 0 + else + return code, 0 + end + elseif luautilities.alwaysstripcode then + register(name) + return load(dump(code,true)), 0 + else + return code, 0 end - offset = offset + subint(dump, offset, int) * int + int - count, offset = subint(dump, offset, int) - for n = 1, count do - offset = offset + subint(dump, offset, size) + size + int * 2 + end + + function luautilities.strippedloadstring(code,forcestrip,name) -- not executed + if forcestrip and luautilities.stripcode or luautilities.alwaysstripcode then + code = load(code) + if not code then + report_lua("fatal error %a in file %a",3,name) + end + register(name) + code = dump(code,true) end - count, offset = subint(dump, offset, int) - for n = 1, count do - offset = offset + subint(dump, offset, size) + size + return load(code), 0 + end + + function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true + report_lua("compiling %a into %a",luafile,lucfile) + os.remove(lucfile) + local done = stupidcompile(luafile,lucfile,strip ~= false) + if done then + report_lua("dumping %a into %a stripped",luafile,lucfile) + if cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then + report_lua("removing %a",luafile) + os.remove(luafile) + end end - stripped = stripped .. rep("\0", int * 3) - return stripped, offset + return done end - dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) - local after = #dump - local delta = before-after - if tracestripping then - utilities.report("stripped bytecode: %s, before %s, after %s, delta %s",name or "unknown",before,after,delta) + + function luautilities.loadstripped(...) + local l = load(...) + if l then + return load(dump(l,true)) + end end - luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 - luautilities.nofstrippedbytes = luautilities.nofstrippedbytes + delta - return dump, delta -end --- ... end of borrowed code. +else -local function strippedbytecode(code,forcestrip,name) - if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then - return strip_code_pc(code,name) - else - return code, 0 + -- The next function was posted by Peter Cawley on the lua list and strips line + -- number information etc. from the bytecode data blob. We only apply this trick + -- when we store data tables. Stripping makes the compressed format file about + -- 1MB smaller (and uncompressed we save at least 6MB). + -- + -- You can consider this feature an experiment, so it might disappear. There is + -- no noticeable gain in runtime although the memory footprint should be somewhat + -- smaller (and the file system has a bit less to deal with). + -- + -- Begin of borrowed code ... works for Lua 5.1 which LuaTeX currently uses ... + + local function register(name,before,after) + local delta = before - after + if tracestripping then + report_lua("bytecodes stripped from %a, # before %s, # after %s, delta %s",name,before,after,delta) + end + strippedchunks[#strippedchunks+1] = name + luautilities.nofstrippedchunks = luautilities.nofstrippedchunks + 1 + luautilities.nofstrippedbytes = luautilities.nofstrippedbytes + delta + return delta end -end -luautilities.stripbytecode = strip_code_pc -luautilities.strippedbytecode = strippedbytecode + local strip_code_pc -local function fatalerror(name) - utilities.report(format("fatal error in %q",name or "unknown")) -end + if _MAJORVERSION == 5 and _MINORVERSION == 1 then + + strip_code_pc = function(dump,name) + local before = #dump + local version, format, endian, int, size, ins, num = byte(dump,5,11) + local subint + if endian == 1 then + subint = function(dump, i, l) + local val = 0 + for n = l, 1, -1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + else + subint = function(dump, i, l) + local val = 0 + for n = 1, l, 1 do + val = val * 256 + byte(dump,i + n - 1) + end + return val, i + l + end + end + local strip_function + strip_function = function(dump) + local count, offset = subint(dump, 1, size) + local stripped, dirty = rep("\0", size), offset + count + offset = offset + count + int * 2 + 4 + offset = offset + int + subint(dump, offset, int) * ins + count, offset = subint(dump, offset, int) + for n = 1, count do + local t + t, offset = subint(dump, offset, 1) + if t == 1 then + offset = offset + 1 + elseif t == 4 then + offset = offset + size + subint(dump, offset, size) + elseif t == 3 then + offset = offset + num + end + end + count, offset = subint(dump, offset, int) + stripped = stripped .. sub(dump,dirty, offset - 1) + for n = 1, count do + local proto, off = strip_function(sub(dump,offset, -1)) + stripped, offset = stripped .. proto, offset + off - 1 + end + offset = offset + subint(dump, offset, int) * int + int + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + int * 2 + end + count, offset = subint(dump, offset, int) + for n = 1, count do + offset = offset + subint(dump, offset, size) + size + end + stripped = stripped .. rep("\0", int * 3) + return stripped, offset + end + dump = sub(dump,1,12) .. strip_function(sub(dump,13,-1)) + local after = #dump + local delta = register(name,before,after) + return dump, delta + end --- quite subtle ... doing this wrong incidentally can give more bytes + else + strip_code_pc = function(dump,name) + return dump, 0 + end -function luautilities.loadedluacode(fullname,forcestrip,name) - -- quite subtle ... doing this wrong incidentally can give more bytes - name = name or fullname - local code = loadfile(fullname) - if code then - code() end - if forcestrip and luautilities.stripcode then - if type(forcestrip) == "function" then - forcestrip = forcestrip(fullname) + + -- ... end of borrowed code. + + -- quite subtle ... doing this wrong incidentally can give more bytes + + function luautilities.loadedluacode(fullname,forcestrip,name) + -- quite subtle ... doing this wrong incidentally can give more bytes + local code = environment.loadpreprocessedfile and environment.preprocessedloadfile(fullname) or loadfile(fullname) + if code then + code() end - if forcestrip then - local code, n = strip_code_pc(dump(code,name)) - return loadstring(code), n + if forcestrip and luautilities.stripcode then + if type(forcestrip) == "function" then + forcestrip = forcestrip(fullname) + end + if forcestrip then + local code, n = strip_code_pc(dump(code),name) + return load(code), n + elseif luautilities.alwaysstripcode then + return load(strip_code_pc(dump(code),name)) + else + return code, 0 + end elseif luautilities.alwaysstripcode then - return loadstring(strip_code_pc(dump(code),name)) + return load(strip_code_pc(dump(code),name)) else return code, 0 end - elseif luautilities.alwaysstripcode then - return loadstring(strip_code_pc(dump(code),name)) - else - return code, 0 end -end -function luautilities.strippedloadstring(code,forcestrip,name) -- not executed - local n = 0 - if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then - code = loadstring(code) - if not code then - fatalerror(name) + function luautilities.strippedloadstring(code,forcestrip,name) -- not executed + local n = 0 + if (forcestrip and luautilities.stripcode) or luautilities.alwaysstripcode then + code = load(code) + if not code then + report_lua("fatal error in file %a",name) + end + code, n = strip_code_pc(dump(code),name) end - code, n = strip_code_pc(dump(code),name) + return load(code), n end - return loadstring(code), n -end -local function stupidcompile(luafile,lucfile,strip) - local code = io.loaddata(luafile) - local n = 0 - if code and code ~= "" then - code = loadstring(code) - if not code then - fatalerror() - end - code = dump(code) - if strip then - code, n = strippedbytecode(code,true,luafile) -- last one is reported - end + local function stupidcompile(luafile,lucfile,strip) + local code = io.loaddata(luafile) + local n = 0 if code and code ~= "" then - io.savedata(lucfile,code) + code = load(code) + if not code then + report_lua("fatal error in file %a",luafile) + end + code = dump(code) + if strip then + code, n = strip_code_pc(code,luautilities.stripcode or luautilities.alwaysstripcode,luafile) -- last one is reported + end + if code and code ~= "" then + io.savedata(lucfile,code) + end end + return n end - return n -end -local luac_normal = "texluac -o %q %q" -local luac_strip = "texluac -s -o %q %q" + local luac_normal = "texluac -o %q %q" + local luac_strip = "texluac -s -o %q %q" -function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true - utilities.report("lua: compiling %s into %s",luafile,lucfile) - os.remove(lucfile) - local done = false - if strip ~= false then - strip = true - end - if forcestupidcompile then - fallback = true - elseif strip then - done = os.spawn(format(luac_strip, lucfile,luafile)) == 0 - else - done = os.spawn(format(luac_normal,lucfile,luafile)) == 0 - end - if not done and fallback then - local n = stupidcompile(luafile,lucfile,strip) - if n > 0 then - utilities.report("lua: %s dumped into %s (%i bytes stripped)",luafile,lucfile,n) + function luautilities.compile(luafile,lucfile,cleanup,strip,fallback) -- defaults: cleanup=false strip=true + report_lua("compiling %a into %a",luafile,lucfile) + os.remove(lucfile) + local done = false + if strip ~= false then + strip = true + end + if forcestupidcompile then + fallback = true + elseif strip then + done = os.spawn(format(luac_strip, lucfile,luafile)) == 0 else - utilities.report("lua: %s dumped into %s (unstripped)",luafile,lucfile) + done = os.spawn(format(luac_normal,lucfile,luafile)) == 0 end - cleanup = false -- better see how bad it is - end - if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then - utilities.report("lua: removing %s",luafile) - os.remove(luafile) + if not done and fallback then + local n = stupidcompile(luafile,lucfile,strip) + if n > 0 then + report_lua("%a dumped into %a (%i bytes stripped)",luafile,lucfile,n) + else + report_lua("%a dumped into %a (unstripped)",luafile,lucfile) + end + cleanup = false -- better see how bad it is + done = true -- hm + end + if done and cleanup == true and lfs.isfile(lucfile) and lfs.isfile(luafile) then + report_lua("removing %a",luafile) + os.remove(luafile) + end + return done end - return done -end ---~ local getmetatable, type = getmetatable, type - ---~ local types = { } - ---~ function luautilities.registerdatatype(d,name) ---~ types[getmetatable(d)] = name ---~ end ---~ function luautilities.datatype(d) ---~ local t = type(d) ---~ if t == "userdata" then ---~ local m = getmetatable(d) ---~ return m and types[m] or "userdata" ---~ else ---~ return t ---~ end ---~ end + luautilities.loadstripped = loadstring ---~ luautilities.registerdatatype(lpeg.P("!"),"lpeg") +end ---~ print(luautilities.datatype(lpeg.P("oeps"))) +-- local getmetatable, type = getmetatable, type +-- +-- local types = { } +-- +-- function luautilities.registerdatatype(d,name) +-- types[getmetatable(d)] = name +-- end +-- +-- function luautilities.datatype(d) +-- local t = type(d) +-- if t == "userdata" then +-- local m = getmetatable(d) +-- return m and types[m] or "userdata" +-- else +-- return t +-- end +-- end +-- +-- luautilities.registerdatatype(lpeg.P("!"),"lpeg") +-- +-- print(luautilities.datatype(lpeg.P("oeps"))) -- cgit v1.2.3 From 170fbd1a07aaadace4b976f62e67572c53c8449b Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 7 Apr 2013 22:12:30 +0200 Subject: update dtx --- lualibs.dtx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lualibs.dtx b/lualibs.dtx index dcb0e5a..e9a20a6 100644 --- a/lualibs.dtx +++ b/lualibs.dtx @@ -252,14 +252,14 @@ require("lualibs-dir") require("lualibs-unicode") require("lualibs-url") require("lualibs-set") - -require("lualibs-util-lua") -require("lualibs-util-sto") -require("lualibs-util-mrg") -require("lualibs-util-dim") require("lualibs-util-str") +--[[everything below apparently not required for the fontloader]] require("lualibs-util-tab") +require("lualibs-util-sto") +require("lualibs-util-dim") require("lualibs-util-jsn") +--require("lualibs-util-mrg")-- not required +require("lualibs-util-lua") % \end{macrocode} % % \iffalse -- cgit v1.2.3