From 31c8bf0930a6d3e353a552d6bab71f62bc25f34e Mon Sep 17 00:00:00 2001 From: Marius Date: Mon, 24 Dec 2012 21:40:31 +0200 Subject: beta 2012.12.24 20:08 --- tex/generic/context/luatex/luatex-fonts-merged.lua | 3596 ++++++++++---------- tex/generic/context/luatex/luatex-fonts.lua | 4 +- 2 files changed, 1809 insertions(+), 1791 deletions(-) (limited to 'tex/generic') diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 1f402cd4f..2b1f94b14 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,10 +1,10 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 12/10/12 23:20:34 +-- merge date : 12/24/12 20:08:31 do -- begin closure to overcome local limits and interference -if not modules then modules = { } end modules ['l-functions'] = { +if not modules then modules = { } end modules ['l-lpeg'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -12,1167 +12,1004 @@ if not modules then modules = { } end modules ['l-functions'] = { license = "see context related readme files" } -functions = functions or { } - -function functions.dummy() end +-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) -end -- closure +-- move utf -> l-unicode +-- move string -> l-string or keep it here -do -- begin closure to overcome local limits and interference +local lpeg = require("lpeg") -if not modules then modules = { } end modules ['l-string'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} +-- tracing (only used when we encounter a problem in integration of lpeg in luatex) -local string = string -local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct +-- some code will move to unicode and string --- some functions may disappear as they are not used anywhere +local report = texio and texio.write_nl or print -if not string.split then +-- local lpmatch = lpeg.match +-- local lpprint = lpeg.print +-- local lpp = lpeg.P +-- local lpr = lpeg.R +-- local lps = lpeg.S +-- local lpc = lpeg.C +-- local lpb = lpeg.B +-- local lpv = lpeg.V +-- local lpcf = lpeg.Cf +-- local lpcb = lpeg.Cb +-- local lpcg = lpeg.Cg +-- local lpct = lpeg.Ct +-- local lpcs = lpeg.Cs +-- local lpcc = lpeg.Cc +-- local lpcmt = lpeg.Cmt +-- local lpcarg = lpeg.Carg - -- this will be overloaded by a faster lpeg variant +-- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end - function string.split(str,pattern) - local t = { } - if #str > 0 then - local n = 1 - for s in gmatch(str..pattern,"(.-)"..pattern) do - t[n] = s - n = n + 1 - end - end - return t - end +-- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end +-- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end +-- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end +-- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end +-- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end +-- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end +-- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end +-- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end +-- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end +-- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end +-- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end +-- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end +-- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end +-- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end -end +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format -function string.unquoted(str) - return (gsub(str,"^([\"\'])(.*)%1$","%2")) -end +-- Beware, we predefine a bunch of patterns here and one reason for doing so +-- is that we get consistent behaviour in some of the visualizers. ---~ function stringunquoted(str) ---~ if find(str,"^[\'\"]") then ---~ return sub(str,2,-2) ---~ else ---~ return str ---~ end ---~ end +lpeg.patterns = lpeg.patterns or { } -- so that we can share +local patterns = lpeg.patterns -function string.quoted(str) - return format("%q",str) -- always " -end +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt +local lpegtype, lpegmatch = lpeg.type, lpeg.match -function string.count(str,pattern) -- variant 3 - local n = 0 - for _ in gmatch(str,pattern) do -- not for utf - n = n + 1 - end - return n -end +local anything = P(1) +local endofstring = P(-1) +local alwaysmatched = P(true) -function string.limit(str,n,sentinel) -- not utf proof - if #str > n then - sentinel = sentinel or "..." - return sub(str,1,(n-#sentinel)) .. sentinel - else - return str - end -end +patterns.anything = anything +patterns.endofstring = endofstring +patterns.beginofstring = alwaysmatched +patterns.alwaysmatched = alwaysmatched -local space = S(" \t\v\n") -local nospace = 1 - space -local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code +local digit, sign = R('09'), S('+-') +local cr, lf, crlf = P("\r"), P("\n"), P("\r\n") +local newline = crlf + S("\r\n") -- cr + lf +local escaped = P("\\") * anything +local squote = P("'") +local dquote = P('"') +local space = P(" ") -function string.strip(str) - return lpegmatch(stripper,str) or "" -end +local utfbom_32_be = P('\000\000\254\255') +local utfbom_32_le = P('\255\254\000\000') +local utfbom_16_be = P('\255\254') +local utfbom_16_le = P('\254\255') +local utfbom_8 = P('\239\187\191') +local utfbom = utfbom_32_be + utfbom_32_le + + utfbom_16_be + utfbom_16_le + + utfbom_8 +local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) + + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) + + utfbom_8 * Cc(3) + Cc(0) -function string.is_empty(str) - return not find(str,"%S") -end +local utf8next = R("\128\191") -local patterns_escapes = { - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%(", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} +patterns.utf8one = R("\000\127") +patterns.utf8two = R("\194\223") * utf8next +patterns.utf8three = R("\224\239") * utf8next * utf8next +patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next +patterns.utfbom = utfbom +patterns.utftype = utftype +patterns.utfoffset = utfoffset -local simple_escapes = { - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} +local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four +local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) -function string.escapedpattern(str,simple) - return (gsub(str,".",simple and simple_escapes or patterns_escapes)) -end +patterns.utf8 = utf8char +patterns.utf8char = utf8char +patterns.validutf8 = validutf8char +patterns.validutf8char = validutf8char -function string.topattern(str,lowercase,strict) - if str == "" then - return ".*" - else - str = gsub(str,".",simple_escapes) - if lowercase then - str = lower(str) - end - if strict then - return "^" .. str .. "$" - else - return str - end - end -end +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer +local nonspacer = 1 - spacer +local nonwhitespace = 1 - whitespace +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace +patterns.nonspacer = nonspacer +patterns.nonwhitespace = nonwhitespace -function string.valid(str,default) - return (type(str) == "string" and str ~= "" and str) or default or nil -end +local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto --- obsolete names: +----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) +local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) -string.quote = string.quoted -string.unquote = string.unquoted +patterns.stripper = stripper +patterns.collapser = collapser --- handy fallback +patterns.digit = digit +patterns.sign = sign +patterns.cardinal = sign^0 * digit^1 +patterns.integer = sign^0 * digit^1 +patterns.unsigned = digit^0 * P('.') * digit^1 +patterns.float = sign^0 * patterns.unsigned +patterns.cunsigned = digit^0 * P(',') * digit^1 +patterns.cfloat = sign^0 * patterns.cunsigned +patterns.number = patterns.float + patterns.integer +patterns.cnumber = patterns.cfloat + patterns.integer +patterns.oct = P("0") * R("07")^1 +patterns.octal = patterns.oct +patterns.HEX = P("0x") * R("09","AF")^1 +patterns.hex = P("0x") * R("09","af")^1 +patterns.hexadecimal = P("0x") * R("09","AF","af")^1 +patterns.lowercase = R("az") +patterns.uppercase = R("AZ") +patterns.letter = patterns.lowercase + patterns.uppercase +patterns.space = space +patterns.tab = P("\t") +patterns.spaceortab = patterns.space + patterns.tab +patterns.newline = newline +patterns.emptyline = newline^1 +patterns.equal = P("=") +patterns.comma = P(",") +patterns.commaspacer = P(",") * spacer^0 +patterns.period = P(".") +patterns.colon = P(":") +patterns.semicolon = P(";") +patterns.underscore = P("_") +patterns.escaped = escaped +patterns.squote = squote +patterns.dquote = dquote +patterns.nosquote = (escaped + (1-squote))^0 +patterns.nodquote = (escaped + (1-dquote))^0 +patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle +patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle +patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble +patterns.unspacer = ((patterns.spacer^1)/"")^0 -string.itself = function(s) return s end +patterns.singlequoted = squote * patterns.nosquote * squote +patterns.doublequoted = dquote * patterns.nodquote * dquote +patterns.quoted = patterns.doublequoted + patterns.singlequoted --- also handy (see utf variant) +patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) -local pattern = Ct(C(1)^0) -- string and not utf ! +patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 +patterns.beginline = #(1-newline) -function string.totable(str) - return lpegmatch(pattern,str) +local function anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * V(1) } end -end -- closure - -do -- begin closure to overcome local limits and interference - -if not modules then modules = { } end modules ['l-table'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs -local table, string = table, string -local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove -local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match -local getmetatable, setmetatable = getmetatable, setmetatable -local getinfo = debug.getinfo - --- Starting with version 5.2 Lua no longer provide ipairs, which makes --- sense. As we already used the for loop and # in most places the --- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashion we also hardly used pairs. --- --- Hm, actually ipairs was retained, but we no longer use it anyway. --- --- Just in case, we provide the fallbacks as discussed in Programming --- in Lua (http://www.lua.org/pil/7.3.html): - -if not ipairs then - - -- for k, v in ipairs(t) do ... end - -- for k=1,#t do local v = t[k] ... end +lpeg.anywhere = anywhere - local function iterate(a,i) - i = i + 1 - local v = a[i] - if v ~= nil then - return i, v --, nil - end +function lpeg.instringchecker(p) + p = anywhere(p) + return function(str) + return lpegmatch(p,str) and true or false end +end - function ipairs(a) - return iterate, a, 0 - end +function lpeg.splitter(pattern, action) + return (((1-P(pattern))^1)/action+1)^0 +end +function lpeg.tsplitter(pattern, action) + return Ct((((1-P(pattern))^1)/action+1)^0) end -if not pairs then +-- probleem: separator can be lpeg and that does not hash too well, but +-- it's quite okay as the key is then not garbage collected - -- for k, v in pairs(t) do ... end - -- for k, v in next, t do ... end +local splitters_s, splitters_m, splitters_t = { }, { }, { } - function pairs(t) - return next, t -- , nil +local function splitat(separator,single) + local splitter = (single and splitters_s[separator]) or splitters_m[separator] + if not splitter then + separator = P(separator) + local other = C((1 - separator)^0) + if single then + local any = anything + splitter = other * (separator * C(any^0) + "") -- ? + splitters_s[separator] = splitter + else + splitter = other * (separator * other)^0 + splitters_m[separator] = splitter + end end - + return splitter end --- Also, unpack has been moved to the table table, and for compatiility --- reasons we provide both now. - -if not table.unpack then - table.unpack = _G.unpack -elseif not unpack then - _G.unpack = table.unpack +local function tsplitat(separator) + local splitter = splitters_t[separator] + if not splitter then + splitter = Ct(splitat(separator)) + splitters_t[separator] = splitter + end + return splitter end --- extra functions, some might go (when not used) +lpeg.splitat = splitat +lpeg.tsplitat = tsplitat -function table.strip(tab) - local lst, l = { }, 0 - for i=1,#tab do - local s = gsub(tab[i],"^%s*(.-)%s*$","%1") - if s == "" then - -- skip this one - else - l = l + 1 - lst[l] = s - end +function string.splitup(str,separator) + if not separator then + separator = "," end - return lst + return lpegmatch(splitters_m[separator] or splitat(separator),str) end -function table.keys(t) - if t then - local keys, k = { }, 0 - for key, _ in next, t do - k = k + 1 - keys[k] = key - end - return keys - else - return { } - end -end +-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more +-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more +-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps -local function compare(a,b) - local ta, tb = type(a), type(b) -- needed, else 11 < 2 - if ta == tb then - return a < b - else - return tostring(a) < tostring(b) - end -end +local cache = { } -local function sortedkeys(tab) - if tab then - local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed - for key,_ in next, tab do - s = s + 1 - srt[s] = key - if category == 3 then - -- no further check - else - local tkey = type(key) - if tkey == "string" then - category = (category == 2 and 3) or 1 - elseif tkey == "number" then - category = (category == 1 and 3) or 2 - else - category = 3 - end - end - end - if category == 0 or category == 3 then - sort(srt,compare) - else - sort(srt) - end - return srt - else - return { } +function lpeg.split(separator,str) + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c end + return lpegmatch(c,str) end -local function sortedhashkeys(tab) -- fast one - if tab then - local srt, s = { }, 0 - for key,_ in next, tab do - if key then - s= s + 1 - srt[s] = key - end +function string.split(str,separator) + if separator then + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c end - sort(srt) - return srt + return lpegmatch(c,str) else - return { } + return { str } end end -function table.allkeys(t) - local keys = { } - for i=1,#t do - for k, v in next, t[i] do - keys[k] = true - end - end - return sortedkeys(keys) -end +local spacing = patterns.spacer^0 * newline -- sort of strip +local empty = spacing * Cc("") +local nonempty = Cs((1-spacing)^1) * spacing^-1 +local content = (empty + nonempty)^1 -table.sortedkeys = sortedkeys -table.sortedhashkeys = sortedhashkeys +patterns.textline = content -local function nothing() end +local linesplitter = tsplitat(newline) -local function sortedhash(t) - if t then - local n, s = 0, sortedkeys(t) -- the robust one - local function kv(s) - n = n + 1 - local k = s[n] - return k, t[k] - end - return kv, s - else - return nothing - end +patterns.linesplitter = linesplitter + +function string.splitlines(str) + return lpegmatch(linesplitter,str) end -table.sortedhash = sortedhash -table.sortedpairs = sortedhash +-- lpeg.splitters = cache -- no longer public -function table.append(t,list) - local n = #t - for i=1,#list do - n = n + 1 - t[n] = list[i] - end - return t -end +local cache = { } -function table.prepend(t, list) - local nl = #list - local nt = nl + #t - for i=#t,1,-1 do - t[nt] = t[i] - nt = nt - 1 - end - for i=1,#list do - t[i] = list[i] +function lpeg.checkedsplit(separator,str) + local c = cache[separator] + if not c then + separator = P(separator) + local other = C((1 - separator)^1) + c = Ct(separator^0 * other * (separator^1 * other)^0) + cache[separator] = c end - return t + return lpegmatch(c,str) end -function table.merge(t, ...) -- first one is target - t = t or { } - local lst = { ... } - for i=1,#lst do - for k, v in next, lst[i] do - t[k] = v - end +function string.checkedsplit(str,separator) + local c = cache[separator] + if not c then + separator = P(separator) + local other = C((1 - separator)^1) + c = Ct(separator^0 * other * (separator^1 * other)^0) + cache[separator] = c end - return t + return lpegmatch(c,str) end -function table.merged(...) - local tmp, lst = { }, { ... } - for i=1,#lst do - for k, v in next, lst[i] do - tmp[k] = v +-- from roberto's site: + +local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end +local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end +local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end + +local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 + +patterns.utf8byte = utf8byte + +--~ local str = " a b c d " + +--~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]") + +local cache = { } + +function lpeg.stripper(str) + if type(str) == "string" then + local s = cache[str] + if not s then + s = Cs(((S(str)^1)/"" + 1)^0) + cache[str] = s end + return s + else + return Cs(((str^1)/"" + 1)^0) end - return tmp end -function table.imerge(t, ...) - local lst, nt = { ... }, #t - for i=1,#lst do - local nst = lst[i] - for j=1,#nst do - nt = nt + 1 - t[nt] = nst[j] +local cache = { } + +function lpeg.keeper(str) + if type(str) == "string" then + local s = cache[str] + if not s then + s = Cs((((1-S(str))^1)/"" + 1)^0) + cache[str] = s end + return s + else + return Cs((((1-str)^1)/"" + 1)^0) end - return t end -function table.imerged(...) - local tmp, ntmp, lst = { }, 0, {...} - for i=1,#lst do - local nst = lst[i] - for j=1,#nst do - ntmp = ntmp + 1 - tmp[ntmp] = nst[j] - end - end - return tmp +function lpeg.frontstripper(str) -- or pattern (yet undocumented) + return (P(str) + P(true)) * Cs(anything^0) end -local function fastcopy(old,metatabletoo) -- fast one - if old then - local new = { } - for k,v in next, old do - if type(v) == "table" then - new[k] = fastcopy(v,metatabletoo) -- was just table.copy - else - new[k] = v - end - end - if metatabletoo then - -- optional second arg - local mt = getmetatable(old) - if mt then - setmetatable(new,mt) - end - end - return new - else - return { } - end +function lpeg.endstripper(str) -- or pattern (yet undocumented) + return Cs((1 - P(str) * endofstring)^0) end --- todo : copy without metatable +-- Just for fun I looked at the used bytecode and +-- p = (p and p + pp) or pp gets one more (testset). -local function copy(t, tables) -- taken from lua wiki, slightly adapted - tables = tables or { } - local tcopy = {} - if not tables[t] then - tables[t] = tcopy - end - for i,v in next, t do -- brrr, what happens with sparse indexed - if type(i) == "table" then - if tables[i] then - i = tables[i] - else - i = copy(i, tables) +-- todo: cache when string + +function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys + local pattern + local u = isutf and utf8char or 1 + if type(one) == "table" then + local no = #one + local p = P(false) + if no == 0 then + for k, v in next, one do + p = p + P(k) / v + end + pattern = Cs((p + u)^0) + elseif no == 1 then + local o = one[1] + one, two = P(o[1]), o[2] + -- pattern = Cs(((1-one)^1 + one/two)^0) + pattern = Cs((one/two + u)^0) + else + for i=1,no do + local o = one[i] + p = p + P(o[1]) / o[2] end + pattern = Cs((p + u)^0) end - if type(v) ~= "table" then - tcopy[i] = v - elseif tables[v] then - tcopy[i] = tables[v] + else + pattern = Cs((P(one)/(two or "") + u)^0) + end + if makefunction then + return function(str) + return lpegmatch(pattern,str) + end + else + return pattern + end +end + +function lpeg.finder(lst,makefunction) + local pattern + if type(lst) == "table" then + pattern = P(false) + if #lst == 0 then + for k, v in next, lst do + pattern = pattern + P(k) -- ignore key, so we can use a replacer table + end else - tcopy[i] = copy(v, tables) + for i=1,#lst do + pattern = pattern + P(lst[i]) + end end + else + pattern = P(lst) end - local mt = getmetatable(t) - if mt then - setmetatable(tcopy,mt) + pattern = (1-pattern)^0 * pattern + if makefunction then + return function(str) + return lpegmatch(pattern,str) + end + else + return pattern end - return tcopy end -table.fastcopy = fastcopy -table.copy = copy +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) -function table.derive(parent) - local child = { } - if parent then - setmetatable(child,{ __index = parent }) +local splitters_f, splitters_s = { }, { } + +function lpeg.firstofsplit(separator) -- always return value + local splitter = splitters_f[separator] + if not splitter then + separator = P(separator) + splitter = C((1 - separator)^0) + splitters_f[separator] = splitter end - return child + return splitter end -function table.tohash(t,value) - local h = { } - if t then - if value == nil then value = true end - for _, v in next, t do -- no ipairs here - h[v] = value - end +function lpeg.secondofsplit(separator) -- nil if not split + local splitter = splitters_s[separator] + if not splitter then + separator = P(separator) + splitter = (1 - separator)^0 * separator * C(anything^0) + splitters_s[separator] = splitter end - return h + return splitter end -function table.fromhash(t) - local hsh, h = { }, 0 - for k, v in next, t do -- no ipairs here - if v then - h = h + 1 - hsh[h] = k - end +function lpeg.balancer(left,right) + left, right = P(left), P(right) + return P { left * ((1 - left - right) + V(1))^0 * right } +end + +-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) + +-- -- slower: +-- +-- function lpeg.counter(pattern) +-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 +-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end +-- end + +local nany = utf8char/"" + +function lpeg.counter(pattern) + pattern = Cs((P(pattern)/" " + nany)^0) + return function(str) + return #lpegmatch(pattern,str) end - return hsh end -local noquotes, hexify, handle, reduce, compact, inline, functions +-- utf extensies -local reserved = table.tohash { -- intercept a language inconvenience: no reserved words as key - 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if', - 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while', -} +local utfcharacters = utf and utf.characters or string.utfcharacters +local utfgmatch = unicode and unicode.utf8.gmatch +local utfchar = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char) -local function simple_table(t) - if #t > 0 then - local n = 0 - for _,v in next, t do - n = n + 1 +lpeg.UP = lpeg.P + +if utfcharacters then + + function lpeg.US(str) + local p = P(false) + for uc in utfcharacters(str) do + p = p + P(uc) end - if n == #t then - local tt, nt = { }, 0 - for i=1,#t do - local v = t[i] - local tv = type(v) - if tv == "number" then - nt = nt + 1 - if hexify then - tt[nt] = format("0x%04X",v) - else - tt[nt] = tostring(v) -- tostring not needed - end - elseif tv == "boolean" then - nt = nt + 1 - tt[nt] = tostring(v) - elseif tv == "string" then - nt = nt + 1 - tt[nt] = format("%q",v) - else - tt = nil - break - end - end - return tt + return p + end + + +elseif utfgmatch then + + function lpeg.US(str) + local p = P(false) + for uc in utfgmatch(str,".") do + p = p + P(uc) end + return p end - return nil -end --- Because this is a core function of mkiv I moved some function calls --- inline. --- --- twice as fast in a test: --- --- local propername = lpeg.P(lpeg.R("AZ","az","__") * lpeg.R("09","AZ","az", "__")^0 * lpeg.P(-1) ) +else --- problem: there no good number_to_string converter with the best resolution + function lpeg.US(str) + local p = P(false) + local f = function(uc) + p = p + P(uc) + end + lpegmatch((utf8char/f)^0,str) + return p + end -local function dummy() end +end -local function do_serialize(root,name,depth,level,indexed) - if level > 0 then - depth = depth .. " " - if indexed then - handle(format("%s{",depth)) - else - local tn = type(name) - if tn == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s[0x%04X]={",depth,name)) - else - handle(format("%s[%s]={",depth,name)) - end - elseif tn == "string" then - if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then - handle(format("%s%s={",depth,name)) - else - handle(format("%s[%q]={",depth,name)) - end - elseif tn == "boolean" then - handle(format("%s[%s]={",depth,tostring(name))) - else - handle(format("%s{",depth)) - end +local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture + +function lpeg.UR(str,more) + local first, last + if type(str) == "number" then + first = str + last = more or first + else + first, last = lpegmatch(range,str) + if not last then + return P(str) end end - -- we could check for k (index) being number (cardinal) - if root and next(root) then - -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) - -- if compact then - -- -- NOT: for k=1,#root do (we need to quit at nil) - -- for k,v in ipairs(root) do -- can we use next? - -- if not first then first = k end - -- last = last + 1 - -- end - -- end - local first, last = nil, 0 - if compact then - last = #root - for k=1,last do --- if not root[k] then - if root[k] == nil then - last = k - 1 - break - end - end - if last > 0 then - first = 1 - end + if first == last then + return P(str) + elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium + local p = P(false) + for i=first,last do + p = p + P(utfchar(i)) end - local sk = sortedkeys(root) - for i=1,#sk do - local k = sk[i] - local v = root[k] - --~ if v == root then - -- circular - --~ else - local t, tk = type(v), type(k) - if compact and first and tk == "number" and k >= first and k <= last then - if t == "number" then - if hexify then - handle(format("%s 0x%04X,",depth,v)) - else - handle(format("%s %s,",depth,v)) -- %.99g - end - elseif t == "string" then - if reduce and tonumber(v) then - handle(format("%s %s,",depth,v)) - else - handle(format("%s %q,",depth,v)) - end - elseif t == "table" then - if not next(v) then - handle(format("%s {},",depth)) - elseif inline then -- and #t > 0 - local st = simple_table(v) - if st then - handle(format("%s { %s },",depth,concat(st,", "))) - else - do_serialize(v,k,depth,level+1,true) - end - else - do_serialize(v,k,depth,level+1,true) - end - elseif t == "boolean" then - handle(format("%s %s,",depth,tostring(v))) - elseif t == "function" then - if functions then - handle(format('%s loadstring(%q),',depth,dump(v))) - else - handle(format('%s "function",',depth)) - end - else - handle(format("%s %q,",depth,tostring(v))) - end - elseif k == "__p__" then -- parent - if false then - handle(format("%s __p__=nil,",depth)) - end - elseif t == "number" then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) - else - handle(format("%s [%s]=%s,",depth,k,v)) -- %.99g - end - elseif tk == "boolean" then - if hexify then - handle(format("%s [%s]=0x%04X,",depth,tostring(k),v)) - else - handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g - end - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - if hexify then - handle(format("%s %s=0x%04X,",depth,k,v)) - else - handle(format("%s %s=%s,",depth,k,v)) -- %.99g - end - else - if hexify then - handle(format("%s [%q]=0x%04X,",depth,k,v)) - else - handle(format("%s [%q]=%s,",depth,k,v)) -- %.99g - end - end - elseif t == "string" then - if reduce and tonumber(v) then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=%s,",depth,k,v)) - else - handle(format("%s [%s]=%s,",depth,k,v)) - end - elseif tk == "boolean" then - handle(format("%s [%s]=%s,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=%s,",depth,k,v)) - else - handle(format("%s [%q]=%s,",depth,k,v)) - end - else - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=%q,",depth,k,v)) - else - handle(format("%s [%s]=%q,",depth,k,v)) - end - elseif tk == "boolean" then - handle(format("%s [%s]=%q,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=%q,",depth,k,v)) - else - handle(format("%s [%q]=%q,",depth,k,v)) - end - end - elseif t == "table" then - if not next(v) then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]={},",depth,k)) - else - handle(format("%s [%s]={},",depth,k)) - end - elseif tk == "boolean" then - handle(format("%s [%s]={},",depth,tostring(k))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s={},",depth,k)) - else - handle(format("%s [%q]={},",depth,k)) - end - elseif inline then - local st = simple_table(v) - if st then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) - else - handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) - end - elseif tk == "boolean" then -- or find(k,"^%d+$") then - handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s={ %s },",depth,k,concat(st,", "))) - else - handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) - end - else - do_serialize(v,k,depth,level+1) - end + return p -- nil when invalid range + else + local f = function(b) + return b >= first and b <= last + end + -- tricky, these nested captures + return utf8byte / f -- nil when invalid range + end +end + +-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) + +-- lpeg.print(lpeg.R("ab","cd","gh")) +-- lpeg.print(lpeg.P("a","b","c")) +-- lpeg.print(lpeg.S("a","b","c")) + +-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +-- print(lpeg.count("äáàa",lpeg.UP("áà"))) +-- print(lpeg.count("äáàa",lpeg.US("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR("aá"))) +-- print(lpeg.count("äáàa",lpeg.UR("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) + +function lpeg.is_lpeg(p) + return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order + if type(list) ~= "table" then + list = { list, ... } + end + -- table.sort(list) -- longest match first + local p = P(list[1]) + for l=2,#list do + p = p + P(list[l]) + end + return p +end + +-- For the moment here, but it might move to utilities. Beware, we need to +-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we +-- loop back from the end cq. prepend. + +local sort = table.sort + +local function copyindexed(old) + local new = { } + for i=1,#old do + new[i] = old + end + return new +end + +local function sortedkeys(tab) + local keys, s = { }, 0 + for key,_ in next, tab do + s = s + 1 + keys[s] = key + end + sort(keys) + return keys +end + +function lpeg.append(list,pp,delayed,checked) + local p = pp + if #list > 0 then + local keys = copyindexed(list) + sort(keys) + for i=#keys,1,-1 do + local k = keys[i] + if p then + p = P(k) + p + else + p = P(k) + end + end + elseif delayed then -- hm, it looks like the lpeg parser resolves anyway + local keys = sortedkeys(list) + if p then + for i=1,#keys,1 do + local k = keys[i] + local v = list[k] + p = P(k)/list + p + end + else + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + p = P(k) + p else - do_serialize(v,k,depth,level+1) + p = P(k) end - elseif t == "boolean" then - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) - else - handle(format("%s [%s]=%s,",depth,k,tostring(v))) - end - elseif tk == "boolean" then -- or find(k,"^%d+$") then - handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=%s,",depth,k,tostring(v))) + end + if p then + p = p / list + end + end + elseif checked then + -- problem: substitution gives a capture + local keys = sortedkeys(list) + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + if k == v then + p = P(k) + p else - handle(format("%s [%q]=%s,",depth,k,tostring(v))) - end - elseif t == "function" then - if functions then - local f = getinfo(v).what == "C" and dump(dummy) or dump(v) - -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f)) - else - handle(format("%s [%s]=loadstring(%q),",depth,k,f)) - end - elseif tk == "boolean" then - handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=loadstring(%q),",depth,k,f)) - else - handle(format("%s [%q]=loadstring(%q),",depth,k,f)) - end + p = P(k)/v + p end else - if tk == "number" then -- or find(k,"^%d+$") then - if hexify then - handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) - else - handle(format("%s [%s]=%q,",depth,k,tostring(v))) - end - elseif tk == "boolean" then -- or find(k,"^%d+$") then - handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=%q,",depth,k,tostring(v))) + if k == v then + p = P(k) else - handle(format("%s [%q]=%q,",depth,k,tostring(v))) + p = P(k)/v end end - --~ end + end + else + local keys = sortedkeys(list) + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + p = P(k)/v + p + else + p = P(k)/v + end end end - if level > 0 then - handle(format("%s},",depth)) - end + return p end --- replacing handle by a direct t[#t+1] = ... (plus test) is not much --- faster (0.03 on 1.00 for zapfino.tma) - -local function serialize(_handle,root,name,specification) -- handle wins - local tname = type(name) - if type(specification) == "table" then - noquotes = specification.noquotes - hexify = specification.hexify - handle = _handle or specification.handle or print - reduce = specification.reduce or false - functions = specification.functions - compact = specification.compact - inline = specification.inline and compact - if functions == nil then - functions = true - end - if compact == nil then - compact = true - end - if inline == nil then - inline = compact - end - else - noquotes = false - hexify = false - handle = _handle or print - reduce = false - compact = true - inline = true - functions = true - end - if tname == "string" then - if name == "return" then - handle("return {") - else - handle(name .. "={") - end - elseif tname == "number" then - if hexify then - handle(format("[0x%04X]={",name)) - else - handle("[" .. name .. "]={") - end - elseif tname == "boolean" then - if name then - handle("return {") - else - handle("{") - end - else - handle("t={") - end - if root then - -- The dummy access will initialize a table that has a delayed initialization - -- using a metatable. (maybe explicitly test for metatable) - if getmetatable(root) then -- todo: make this an option, maybe even per subtable - local dummy = root._w_h_a_t_e_v_e_r_ - root._w_h_a_t_e_v_e_r_ = nil - end - -- Let's forget about empty tables. - if next(root) then - do_serialize(root,name,"",0) - end - end - handle("}") -end - ---~ name: ---~ ---~ true : return { } ---~ false : { } ---~ nil : t = { } ---~ string : string = { } ---~ 'return' : return { } ---~ number : [number] = { } - -function table.serialize(root,name,specification) - local t, n = { }, 0 - local function flush(s) - n = n + 1 - t[n] = s - end - serialize(flush,root,name,specification) - return concat(t,"\n") -end - -table.tohandle = serialize +-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) +-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) --- sometimes tables are real use (zapfino extra pro is some 85M) in which --- case a stepwise serialization is nice; actually, we could consider: --- --- for line in table.serializer(root,name,reduce,noquotes) do --- ...(line) +-- function lpeg.exact_match(words,case_insensitive) +-- local pattern = concat(words) +-- if case_insensitive then +-- local pattern = S(upper(characters)) + S(lower(characters)) +-- local list = { } +-- for i=1,#words do +-- list[lower(words[i])] = true +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- return list[lower(s)] and i +-- end) +-- else +-- local pattern = S(concat(words)) +-- local list = { } +-- for i=1,#words do +-- list[words[i]] = true +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- return list[s] and i +-- end) +-- end -- end --- --- so this is on the todo list - -local maxtab = 2*1024 -function table.tofile(filename,root,name,specification) - local f = io.open(filename,'w') - if f then - if maxtab > 1 then - local t, n = { }, 0 - local function flush(s) - n = n + 1 - t[n] = s - if n > maxtab then - f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice - t, n = { }, 0 -- we could recycle t if needed - end - end - serialize(flush,root,name,specification) - f:write(concat(t,"\n"),"\n") - else - local function flush(s) - f:write(s,"\n") - end - serialize(flush,root,name,specification) - end - f:close() - io.flush() - end -end +-- experiment: -local function flattened(t,f,depth) - if f == nil then - f = { } - depth = 0xFFFF - elseif tonumber(f) then - -- assume that only two arguments are given - depth = f - f = { } - elseif not depth then - depth = 0xFFFF - end - for k, v in next, t do - if type(k) ~= "number" then - if depth > 0 and type(v) == "table" then - flattened(v,f,depth-1) - else - f[k] = v - end - end - end - local n = #f - for k=1,#t do +local function make(t) + local p + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] local v = t[k] - if depth > 0 and type(v) == "table" then - flattened(v,f,depth-1) - n = #f - else - n = n + 1 - f[n] = v - end - end - return f -end - -table.flattened = flattened - -local function unnest(t,f) -- only used in mk, for old times sake - if not f then -- and only relevant for token lists - f = { } - end - for i=1,#t do - local v = t[i] - if type(v) == "table" then - if type(v[1]) == "table" then - unnest(v,f) + if not p then + if next(v) then + p = P(k) * make(v) else - f[#f+1] = v + p = P(k) end else - f[#f+1] = v - end - end - return f -end - -function table.unnest(t) -- bad name - return unnest(t) -end - -local function are_equal(a,b,n,m) -- indexed - if a and b and #a == #b then - n = n or 1 - m = m or #a - for i=n,m do - local ai, bi = a[i], b[i] - if ai==bi then - -- same - elseif type(ai)=="table" and type(bi)=="table" then - if not are_equal(ai,bi) then - return false - end + if next(v) then + p = p + P(k) * make(v) else - return false + p = p + P(k) end end - return true - else - return false end + return p end -local function identical(a,b) -- assumes same structure - for ka, va in next, a do - local vb = b[ka] - if va == vb then - -- same - elseif type(va) == "table" and type(vb) == "table" then - if not identical(va,vb) then - return false +function lpeg.utfchartabletopattern(list) -- goes to util-lpg + local tree = { } + for i=1,#list do + local t = tree + for c in gmatch(list[i],".") do + if not t[c] then + t[c] = { } end - else - return false + t = t[c] end end - return true + return make(tree) end -table.identical = identical -table.are_equal = are_equal +-- inspect ( lpeg.utfchartabletopattern { +-- utfchar(0x00A0), -- nbsp +-- utfchar(0x2000), -- enquad +-- utfchar(0x2001), -- emquad +-- utfchar(0x2002), -- enspace +-- utfchar(0x2003), -- emspace +-- utfchar(0x2004), -- threeperemspace +-- utfchar(0x2005), -- fourperemspace +-- utfchar(0x2006), -- sixperemspace +-- utfchar(0x2007), -- figurespace +-- utfchar(0x2008), -- punctuationspace +-- utfchar(0x2009), -- breakablethinspace +-- utfchar(0x200A), -- hairspace +-- utfchar(0x200B), -- zerowidthspace +-- utfchar(0x202F), -- narrownobreakspace +-- utfchar(0x205F), -- math thinspace +-- } ) --- maybe also make a combined one +-- a few handy ones: +-- +-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3 -function table.compact(t) - if t then - for k,v in next, t do - if not next(v) then - t[k] = nil - end - end - end +patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol + +end -- closure + +do -- begin closure to overcome local limits and interference + +if not modules then modules = { } end modules ['l-functions'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +functions = functions or { } + +function functions.dummy() end + +end -- closure + +do -- begin closure to overcome local limits and interference + +if not modules then modules = { } end modules ['l-string'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local string = string +local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs + +-- Some functions are already defined in l-lpeg and maybe some from here will +-- move there (unless we also expose caches). + +-- if not string.split then +-- +-- function string.split(str,pattern) +-- local t = { } +-- if #str > 0 then +-- local n = 1 +-- for s in gmatch(str..pattern,"(.-)"..pattern) do +-- t[n] = s +-- n = n + 1 +-- end +-- end +-- return t +-- end +-- +-- end + +-- function string.unquoted(str) +-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern +-- end + +local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote + + patterns.dquote * C(patterns.nodquote) * patterns.dquote + +function string.unquoted(str) + return lpegmatch(unquoted,str) or str end -function table.contains(t, v) - if t then - for i=1, #t do - if t[i] == v then - return i - end - end - end - return false +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"')) + +function string.quoted(str) + return format("%q",str) -- always " end -function table.count(t) +function string.count(str,pattern) -- variant 3 local n = 0 - for k, v in next, t do + for _ in gmatch(str,pattern) do -- not for utf n = n + 1 end return n end -function table.swapped(t,s) -- hash - local n = { } - if s then - for k, v in next, s do - n[k] = v - end - end - for k, v in next, t do - n[v] = k +function string.limit(str,n,sentinel) -- not utf proof + if #str > n then + sentinel = sentinel or "..." + return sub(str,1,(n-#sentinel)) .. sentinel + else + return str end - return n end -function table.mirror(t) -- hash - local n = { } - for k, v in next, t do - n[v] = k - n[k] = v - end - return n -end +local stripper = patterns.stripper +local collapser = patterns.collapser -function table.reversed(t) - if t then - local tt, tn = { }, #t - if tn > 0 then - local ttn = 0 - for i=tn,1,-1 do - ttn = ttn + 1 - tt[ttn] = t[i] - end - end - return tt - end +function string.strip(str) + return lpegmatch(stripper,str) or "" end -function table.sequenced(t,sep) -- hash only - if t then - local s, n = { }, 0 - for k, v in sortedhash(t) do - if simple then - if v == true then - n = n + 1 - s[n] = k - elseif v and v~= "" then - n = n + 1 - s[n] = k .. "=" .. tostring(v) - end - else - n = n + 1 - s[n] = k .. "=" .. tostring(v) - end - end - return concat(s, sep or " | ") - else - return "" - end +function string.collapsespaces(str) + return lpegmatch(collapser,str) or "" end -function table.print(t,...) - if type(t) ~= "table" then - print(tostring(t)) +-- function string.is_empty(str) +-- return not find(str,"%S") +-- end + +local pattern = P(" ")^0 * P(-1) + +function string.is_empty(str) + if str == "" then + return true else - table.tohandle(print,t,...) + return lpegmatch(pattern,str) and true or false end end --- -- -- obsolete but we keep them for a while and might comment them later -- -- -- --- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) +-- if not string.escapedpattern then +-- +-- local patterns_escapes = { +-- ["%"] = "%%", +-- ["."] = "%.", +-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", +-- ["["] = "%[", ["]"] = "%]", +-- ["("] = "%(", [")"] = "%)", +-- -- ["{"] = "%{", ["}"] = "%}" +-- -- ["^"] = "%^", ["$"] = "%$", +-- } +-- +-- local simple_escapes = { +-- ["-"] = "%-", +-- ["."] = "%.", +-- ["?"] = ".", +-- ["*"] = ".*", +-- } +-- +-- function string.escapedpattern(str,simple) +-- return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +-- end +-- +-- function string.topattern(str,lowercase,strict) +-- if str == "" then +-- return ".*" +-- else +-- str = gsub(str,".",simple_escapes) +-- if lowercase then +-- str = lower(str) +-- end +-- if strict then +-- return "^" .. str .. "$" +-- else +-- return str +-- end +-- end +-- end +-- +-- end -function table.sub(t,i,j) - return { unpack(t,i,j) } -end +--- needs checking --- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) +local anything = patterns.anything +local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ? +local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ? +local matchescapes = Cc(".") * S("*?") -- wildcard and single match -function table.is_empty(t) - return not t or not next(t) +local pattern_a = Cs ( ( allescapes + anything )^0 ) +local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 ) +local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") ) + +function string.escapedpattern(str,simple) + return lpegmatch(simple and pattern_b or pattern_a,str) end -function table.has_one_entry(t) - return t and not next(t,next(t)) +function string.topattern(str,lowercase,strict) + if str == "" then + return ".*" + elseif strict then + str = lpegmatch(pattern_c,str) + else + str = lpegmatch(pattern_b,str) + end + if lowercase then + return lower(str) + else + return str + end end --- new +-- print(string.escapedpattern("12+34*.tex",false)) +-- print(string.escapedpattern("12+34*.tex",true)) +-- print(string.topattern ("12+34*.tex",false,false)) +-- print(string.topattern ("12+34*.tex",false,true)) -function table.loweredkeys(t) -- maybe utf - local l = { } - for k, v in next, t do - l[lower(k)] = v - end - return l +function string.valid(str,default) + return (type(str) == "string" and str ~= "" and str) or default or nil end --- new, might move (maybe duplicate) +-- handy fallback -function table.unique(old) - local hash = { } - local new = { } - local n = 0 - for i=1,#old do - local oi = old[i] - if not hash[oi] then - n = n + 1 - new[n] = oi - hash[oi] = true - end - end - return new +string.itself = function(s) return s end + +-- also handy (see utf variant) + +local pattern = Ct(C(1)^0) -- string and not utf ! + +function string.totable(str) + return lpegmatch(pattern,str) end -function table.sorted(t,...) - sort(t,...) - return t -- still sorts in-place +-- handy from within tex: + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) end +-- obsolete names: + +string.quote = string.quoted +string.unquote = string.unquoted end -- closure do -- begin closure to overcome local limits and interference -if not modules then modules = { } end modules ['l-lpeg'] = { +if not modules then modules = { } end modules ['l-table'] = { version = 1.001, comment = "companion to luat-lib.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -1180,874 +1017,1022 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } +local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select +local table, string = table, string +local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove +local format, lower, dump = string.format, string.lower, string.dump +local getmetatable, setmetatable = getmetatable, setmetatable +local getinfo = debug.getinfo +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local floor = math.floor --- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) - -local lpeg = require("lpeg") +-- extra functions, some might go (when not used) --- tracing (only used when we encounter a problem in integration of lpeg in luatex) +local stripper = patterns.stripper --- some code will move to unicode and string +function table.strip(tab) + local lst, l = { }, 0 + for i=1,#tab do + local s = lpegmatch(stripper,tab[i]) or "" + if s == "" then + -- skip this one + else + l = l + 1 + lst[l] = s + end + end + return lst +end -local report = texio and texio.write_nl or print +function table.keys(t) + if t then + local keys, k = { }, 0 + for key, _ in next, t do + k = k + 1 + keys[k] = key + end + return keys + else + return { } + end +end --- local lpmatch = lpeg.match --- local lpprint = lpeg.print --- local lpp = lpeg.P --- local lpr = lpeg.R --- local lps = lpeg.S --- local lpc = lpeg.C --- local lpb = lpeg.B --- local lpv = lpeg.V --- local lpcf = lpeg.Cf --- local lpcb = lpeg.Cb --- local lpcg = lpeg.Cg --- local lpct = lpeg.Ct --- local lpcs = lpeg.Cs --- local lpcc = lpeg.Cc --- local lpcmt = lpeg.Cmt --- local lpcarg = lpeg.Carg +local function compare(a,b) + local ta, tb = type(a), type(b) -- needed, else 11 < 2 + if ta == tb then + return a < b + else + return tostring(a) < tostring(b) + end +end --- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end +local function sortedkeys(tab) + if tab then + local srt, category, s = { }, 0, 0 -- 0=unknown 1=string, 2=number 3=mixed + for key,_ in next, tab do + s = s + 1 + srt[s] = key + if category == 3 then + -- no further check + else + local tkey = type(key) + if tkey == "string" then + category = (category == 2 and 3) or 1 + elseif tkey == "number" then + category = (category == 1 and 3) or 2 + else + category = 3 + end + end + end + if category == 0 or category == 3 then + sort(srt,compare) + else + sort(srt) + end + return srt + else + return { } + end +end --- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end --- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end --- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end --- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end --- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end --- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end --- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end --- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end --- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end --- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end --- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end --- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end --- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end --- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end +local function sortedhashkeys(tab) -- fast one + if tab then + local srt, s = { }, 0 + for key,_ in next, tab do + if key then + s= s + 1 + srt[s] = key + end + end + sort(srt) + return srt + else + return { } + end +end -local type, next = type, next -local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format +function table.allkeys(t) + local keys = { } + for i=1,#t do + for k, v in next, t[i] do + keys[k] = true + end + end + return sortedkeys(keys) +end --- Beware, we predefine a bunch of patterns here and one reason for doing so --- is that we get consistent behaviour in some of the visualizers. +table.sortedkeys = sortedkeys +table.sortedhashkeys = sortedhashkeys -lpeg.patterns = lpeg.patterns or { } -- so that we can share -local patterns = lpeg.patterns +local function nothing() end -local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp -local lpegtype, lpegmatch = lpeg.type, lpeg.match +local function sortedhash(t) + if t then + local n, s = 0, sortedkeys(t) -- the robust one + local function kv(s) + n = n + 1 + local k = s[n] + return k, t[k] + end + return kv, s + else + return nothing + end +end -local utfcharacters = string.utfcharacters -local utfgmatch = unicode and unicode.utf8.gmatch +table.sortedhash = sortedhash +table.sortedpairs = sortedhash -- obsolete -local anything = P(1) -local endofstring = P(-1) -local alwaysmatched = P(true) +function table.append(t,list) + local n = #t + for i=1,#list do + n = n + 1 + t[n] = list[i] + end + return t +end -patterns.anything = anything -patterns.endofstring = endofstring -patterns.beginofstring = alwaysmatched -patterns.alwaysmatched = alwaysmatched +function table.prepend(t, list) + local nl = #list + local nt = nl + #t + for i=#t,1,-1 do + t[nt] = t[i] + nt = nt - 1 + end + for i=1,#list do + t[i] = list[i] + end + return t +end -local digit, sign = R('09'), S('+-') -local cr, lf, crlf = P("\r"), P("\n"), P("\r\n") -local newline = crlf + S("\r\n") -- cr + lf -local escaped = P("\\") * anything -local squote = P("'") -local dquote = P('"') -local space = P(" ") +-- function table.merge(t, ...) -- first one is target +-- t = t or { } +-- local lst = { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- t[k] = v +-- end +-- end +-- return t +-- end -local utfbom_32_be = P('\000\000\254\255') -local utfbom_32_le = P('\255\254\000\000') -local utfbom_16_be = P('\255\254') -local utfbom_16_le = P('\254\255') -local utfbom_8 = P('\239\187\191') -local utfbom = utfbom_32_be + utfbom_32_le - + utfbom_16_be + utfbom_16_le - + utfbom_8 -local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le" - + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le" - + utfbom_8 / "utf-8" + alwaysmatched / "unknown" +function table.merge(t, ...) -- first one is target + t = t or { } + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do + t[k] = v + end + end + return t +end -local utf8next = R("\128\191") +-- function table.merged(...) +-- local tmp, lst = { }, { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- tmp[k] = v +-- end +-- end +-- return tmp +-- end -patterns.utf8one = R("\000\127") -patterns.utf8two = R("\194\223") * utf8next -patterns.utf8three = R("\224\239") * utf8next * utf8next -patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next -patterns.utfbom = utfbom -patterns.utftype = utftype +function table.merged(...) + local t = { } + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do + t[k] = v + end + end + return t +end -local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four -local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) +-- function table.imerge(t, ...) +-- local lst, nt = { ... }, #t +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- nt = nt + 1 +-- t[nt] = nst[j] +-- end +-- end +-- return t +-- end -patterns.utf8 = utf8char -patterns.utf8char = utf8char -patterns.validutf8 = validutf8char -patterns.validutf8char = validutf8char +function table.imerge(t, ...) + local nt = #t + for i=1,select("#",...) do + local nst = select(i,...) + for j=1,#nst do + nt = nt + 1 + t[nt] = nst[j] + end + end + return t +end -local eol = S("\n\r") -local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) -local whitespace = eol + spacer +-- function table.imerged(...) +-- local tmp, ntmp, lst = { }, 0, {...} +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- ntmp = ntmp + 1 +-- tmp[ntmp] = nst[j] +-- end +-- end +-- return tmp +-- end -patterns.digit = digit -patterns.sign = sign -patterns.cardinal = sign^0 * digit^1 -patterns.integer = sign^0 * digit^1 -patterns.unsigned = digit^0 * P('.') * digit^1 -patterns.float = sign^0 * patterns.unsigned -patterns.cunsigned = digit^0 * P(',') * digit^1 -patterns.cfloat = sign^0 * patterns.cunsigned -patterns.number = patterns.float + patterns.integer -patterns.cnumber = patterns.cfloat + patterns.integer -patterns.oct = P("0") * R("07")^1 -patterns.octal = patterns.oct -patterns.HEX = P("0x") * R("09","AF")^1 -patterns.hex = P("0x") * R("09","af")^1 -patterns.hexadecimal = P("0x") * R("09","AF","af")^1 -patterns.lowercase = R("az") -patterns.uppercase = R("AZ") -patterns.letter = patterns.lowercase + patterns.uppercase -patterns.space = space -patterns.tab = P("\t") -patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = eol -patterns.spacer = spacer -patterns.whitespace = whitespace -patterns.newline = newline -patterns.emptyline = newline^1 -patterns.nonspacer = 1 - spacer -patterns.nonwhitespace = 1 - whitespace -patterns.equal = P("=") -patterns.comma = P(",") -patterns.commaspacer = P(",") * spacer^0 -patterns.period = P(".") -patterns.colon = P(":") -patterns.semicolon = P(";") -patterns.underscore = P("_") -patterns.escaped = escaped -patterns.squote = squote -patterns.dquote = dquote -patterns.nosquote = (escaped + (1-squote))^0 -patterns.nodquote = (escaped + (1-dquote))^0 -patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble -patterns.unspacer = ((patterns.spacer^1)/"")^0 +function table.imerged(...) + local tmp, ntmp = { }, 0 + for i=1,select("#",...) do + local nst = select(i,...) + for j=1,#nst do + ntmp = ntmp + 1 + tmp[ntmp] = nst[j] + end + end + return tmp +end -patterns.singlequoted = squote * patterns.nosquote * squote -patterns.doublequoted = dquote * patterns.nodquote * dquote -patterns.quoted = patterns.doublequoted + patterns.singlequoted - -patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 -patterns.beginline = #(1-newline) +local function fastcopy(old,metatabletoo) -- fast one + if old then + local new = { } + for k, v in next, old do + if type(v) == "table" then + new[k] = fastcopy(v,metatabletoo) -- was just table.copy + else + new[k] = v + end + end + if metatabletoo then + -- optional second arg + local mt = getmetatable(old) + if mt then + setmetatable(new,mt) + end + end + return new + else + return { } + end +end --- print(string.unquoted("test")) --- print(string.unquoted([["t\"est"]])) --- print(string.unquoted([["t\"est"x]])) --- print(string.unquoted("\'test\'")) --- print(string.unquoted('"test"')) --- print(string.unquoted('"test"')) +-- todo : copy without metatable -local function anywhere(pattern) --slightly adapted from website - return P { P(pattern) + 1 * V(1) } +local function copy(t, tables) -- taken from lua wiki, slightly adapted + tables = tables or { } + local tcopy = {} + if not tables[t] then + tables[t] = tcopy + end + for i,v in next, t do -- brrr, what happens with sparse indexed + if type(i) == "table" then + if tables[i] then + i = tables[i] + else + i = copy(i, tables) + end + end + if type(v) ~= "table" then + tcopy[i] = v + elseif tables[v] then + tcopy[i] = tables[v] + else + tcopy[i] = copy(v, tables) + end + end + local mt = getmetatable(t) + if mt then + setmetatable(tcopy,mt) + end + return tcopy end -lpeg.anywhere = anywhere +table.fastcopy = fastcopy +table.copy = copy -function lpeg.instringchecker(p) - p = anywhere(p) - return function(str) - return lpegmatch(p,str) and true or false +function table.derive(parent) -- for the moment not public + local child = { } + if parent then + setmetatable(child,{ __index = parent }) end + return child end -function lpeg.splitter(pattern, action) - return (((1-P(pattern))^1)/action+1)^0 +function table.tohash(t,value) + local h = { } + if t then + if value == nil then value = true end + for _, v in next, t do -- no ipairs here + h[v] = value + end + end + return h end -function lpeg.tsplitter(pattern, action) - return Ct((((1-P(pattern))^1)/action+1)^0) +function table.fromhash(t) + local hsh, h = { }, 0 + for k, v in next, t do -- no ipairs here + if v then + h = h + 1 + hsh[h] = k + end + end + return hsh end --- probleem: separator can be lpeg and that does not hash too well, but --- it's quite okay as the key is then not garbage collected +local noquotes, hexify, handle, reduce, compact, inline, functions -local splitters_s, splitters_m, splitters_t = { }, { }, { } +local reserved = table.tohash { -- intercept a language inconvenience: no reserved words as key + 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', 'if', + 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', 'until', 'while', +} -local function splitat(separator,single) - local splitter = (single and splitters_s[separator]) or splitters_m[separator] - if not splitter then - separator = P(separator) - local other = C((1 - separator)^0) - if single then - local any = anything - splitter = other * (separator * C(any^0) + "") -- ? - splitters_s[separator] = splitter - else - splitter = other * (separator * other)^0 - splitters_m[separator] = splitter +local function simple_table(t) + if #t > 0 then + local n = 0 + for _,v in next, t do + n = n + 1 + end + if n == #t then + local tt, nt = { }, 0 + for i=1,#t do + local v = t[i] + local tv = type(v) + if tv == "number" then + nt = nt + 1 + if hexify then + tt[nt] = format("0x%04X",v) + else + tt[nt] = tostring(v) -- tostring not needed + end + elseif tv == "boolean" then + nt = nt + 1 + tt[nt] = tostring(v) + elseif tv == "string" then + nt = nt + 1 + tt[nt] = format("%q",v) + else + tt = nil + break + end + end + return tt end end - return splitter + return nil end -local function tsplitat(separator) - local splitter = splitters_t[separator] - if not splitter then - splitter = Ct(splitat(separator)) - splitters_t[separator] = splitter - end - return splitter -end +-- Because this is a core function of mkiv I moved some function calls +-- inline. +-- +-- twice as fast in a test: +-- +-- local propername = lpeg.P(lpeg.R("AZ","az","__") * lpeg.R("09","AZ","az", "__")^0 * lpeg.P(-1) ) -lpeg.splitat = splitat -lpeg.tsplitat = tsplitat +-- problem: there no good number_to_string converter with the best resolution -function string.splitup(str,separator) - if not separator then - separator = "," - end - return lpegmatch(splitters_m[separator] or splitat(separator),str) -end +-- probably using .. is faster than format +-- maybe split in a few cases (yes/no hexify) ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps +-- todo: %g faster on numbers than %s -local cache = { } +local propername = patterns.propername -- was find(name,"^%a[%w%_]*$") -function lpeg.split(separator,str) - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c - end - return lpegmatch(c,str) -end +local function dummy() end -function string.split(str,separator) - if separator then - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c +local function do_serialize(root,name,depth,level,indexed) + if level > 0 then + depth = depth .. " " + if indexed then + handle(format("%s{",depth)) + else + local tn = type(name) + if tn == "number" then + if hexify then + handle(format("%s[0x%04X]={",depth,name)) + else + handle(format("%s[%s]={",depth,name)) + end + elseif tn == "string" then + if noquotes and not reserved[name] and lpegmatch(propername,name) then + handle(format("%s%s={",depth,name)) + else + handle(format("%s[%q]={",depth,name)) + end + elseif tn == "boolean" then + handle(format("%s[%s]={",depth,tostring(name))) + else + handle(format("%s{",depth)) + end + end + end + -- we could check for k (index) being number (cardinal) + if root and next(root) then + -- local first, last = nil, 0 -- #root cannot be trusted here (will be ok in 5.2 when ipairs is gone) + -- if compact then + -- -- NOT: for k=1,#root do (we need to quit at nil) + -- for k,v in ipairs(root) do -- can we use next? + -- if not first then first = k end + -- last = last + 1 + -- end + -- end + local first, last = nil, 0 + if compact then + last = #root + for k=1,last do + if root[k] == nil then + last = k - 1 + break + end + end + if last > 0 then + first = 1 + end + end + local sk = sortedkeys(root) + for i=1,#sk do + local k = sk[i] + local v = root[k] + --~ if v == root then + -- circular + --~ else + local t, tk = type(v), type(k) + if compact and first and tk == "number" and k >= first and k <= last then + if t == "number" then + if hexify then + handle(format("%s 0x%04X,",depth,v)) + else + handle(format("%s %s,",depth,v)) -- %.99g + end + elseif t == "string" then + if reduce and tonumber(v) then + handle(format("%s %s,",depth,v)) + else + handle(format("%s %q,",depth,v)) + end + elseif t == "table" then + if not next(v) then + handle(format("%s {},",depth)) + elseif inline then -- and #t > 0 + local st = simple_table(v) + if st then + handle(format("%s { %s },",depth,concat(st,", "))) + else + do_serialize(v,k,depth,level+1,true) + end + else + do_serialize(v,k,depth,level+1,true) + end + elseif t == "boolean" then + handle(format("%s %s,",depth,tostring(v))) + elseif t == "function" then + if functions then + handle(format('%s load(%q),',depth,dump(v))) + else + handle(format('%s "function",',depth)) + end + else + handle(format("%s %q,",depth,tostring(v))) + end + elseif k == "__p__" then -- parent + if false then + handle(format("%s __p__=nil,",depth)) + end + elseif t == "number" then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) + else + handle(format("%s [%s]=%s,",depth,k,v)) -- %.99g + end + elseif tk == "boolean" then + if hexify then + handle(format("%s [%s]=0x%04X,",depth,tostring(k),v)) + else + handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g + end + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + if hexify then + handle(format("%s %s=0x%04X,",depth,k,v)) + else + handle(format("%s %s=%s,",depth,k,v)) -- %.99g + end + else + if hexify then + handle(format("%s [%q]=0x%04X,",depth,k,v)) + else + handle(format("%s [%q]=%s,",depth,k,v)) -- %.99g + end + end + elseif t == "string" then + if reduce and tonumber(v) then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=%s,",depth,k,v)) + else + handle(format("%s [%s]=%s,",depth,k,v)) + end + elseif tk == "boolean" then + handle(format("%s [%s]=%s,",depth,tostring(k),v)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=%s,",depth,k,v)) + else + handle(format("%s [%q]=%s,",depth,k,v)) + end + else + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=%q,",depth,k,v)) + else + handle(format("%s [%s]=%q,",depth,k,v)) + end + elseif tk == "boolean" then + handle(format("%s [%s]=%q,",depth,tostring(k),v)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=%q,",depth,k,v)) + else + handle(format("%s [%q]=%q,",depth,k,v)) + end + end + elseif t == "table" then + if not next(v) then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]={},",depth,k)) + else + handle(format("%s [%s]={},",depth,k)) + end + elseif tk == "boolean" then + handle(format("%s [%s]={},",depth,tostring(k))) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s={},",depth,k)) + else + handle(format("%s [%q]={},",depth,k)) + end + elseif inline then + local st = simple_table(v) + if st then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) + else + handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) + end + elseif tk == "boolean" then + handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s={ %s },",depth,k,concat(st,", "))) + else + handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) + end + else + do_serialize(v,k,depth,level+1) + end + else + do_serialize(v,k,depth,level+1) + end + elseif t == "boolean" then + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) + else + handle(format("%s [%s]=%s,",depth,k,tostring(v))) + end + elseif tk == "boolean" then + handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=%s,",depth,k,tostring(v))) + else + handle(format("%s [%q]=%s,",depth,k,tostring(v))) + end + elseif t == "function" then + if functions then + local f = getinfo(v).what == "C" and dump(dummy) or dump(v) + -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=load(%q),",depth,k,f)) + else + handle(format("%s [%s]=load(%q),",depth,k,f)) + end + elseif tk == "boolean" then + handle(format("%s [%s]=load(%q),",depth,tostring(k),f)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=load(%q),",depth,k,f)) + else + handle(format("%s [%q]=load(%q),",depth,k,f)) + end + end + else + if tk == "number" then + if hexify then + handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) + else + handle(format("%s [%s]=%q,",depth,k,tostring(v))) + end + elseif tk == "boolean" then + handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=%q,",depth,k,tostring(v))) + else + handle(format("%s [%q]=%q,",depth,k,tostring(v))) + end + end + --~ end end - return lpegmatch(c,str) - else - return { str } - end -end - -local spacing = patterns.spacer^0 * newline -- sort of strip -local empty = spacing * Cc("") -local nonempty = Cs((1-spacing)^1) * spacing^-1 -local content = (empty + nonempty)^1 - -patterns.textline = content - ---~ local linesplitter = Ct(content^0) ---~ ---~ function string.splitlines(str) ---~ return lpegmatch(linesplitter,str) ---~ end - -local linesplitter = tsplitat(newline) - -patterns.linesplitter = linesplitter - -function string.splitlines(str) - return lpegmatch(linesplitter,str) -end - -local utflinesplitter = utfbom^-1 * tsplitat(newline) - -patterns.utflinesplitter = utflinesplitter - -function string.utfsplitlines(str) - return lpegmatch(utflinesplitter,str or "") -end - -local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) -local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) - -function string.utfsplit(str,ignorewhitespace) -- new - if ignorewhitespace then - return lpegmatch(utfcharsplitter_iws,str or "") - else - return lpegmatch(utfcharsplitter_ows,str or "") - end -end - --- inspect(string.utfsplit("a b c d")) --- inspect(string.utfsplit("a b c d",true)) - --- -- alternative 1: 0.77 --- --- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) --- --- function string.utflength(str) --- return #lpegmatch(utfcharcounter,str or "") --- end --- --- -- alternative 2: 1.70 --- --- local n = 0 --- --- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow --- --- function string.utflength(str) --- n = 0 --- lpegmatch(utfcharcounter,str or "") --- return n --- end --- --- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) - -local n = 0 - -local utfcharcounter = utfbom^-1 * Cs ( ( - Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end - + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end - + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end - + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end -)^0 ) - -function string.utflength(str) - n = 0 - lpegmatch(utfcharcounter,str or "") - return n -end - ---~ lpeg.splitters = cache -- no longer public - -local cache = { } - -function lpeg.checkedsplit(separator,str) - local c = cache[separator] - if not c then - separator = P(separator) - local other = C((1 - separator)^1) - c = Ct(separator^0 * other * (separator^1 * other)^0) - cache[separator] = c end - return lpegmatch(c,str) -end - -function string.checkedsplit(str,separator) - local c = cache[separator] - if not c then - separator = P(separator) - local other = C((1 - separator)^1) - c = Ct(separator^0 * other * (separator^1 * other)^0) - cache[separator] = c - end - return lpegmatch(c,str) -end - ---~ from roberto's site: - -local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end -local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end -local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end - -local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 - -patterns.utf8byte = utf8byte - ---~ local str = " a b c d " - ---~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]") - -local cache = { } - -function lpeg.stripper(str) - if type(str) == "string" then - local s = cache[str] - if not s then - s = Cs(((S(str)^1)/"" + 1)^0) - cache[str] = s - end - return s - else - return Cs(((str^1)/"" + 1)^0) + if level > 0 then + handle(format("%s},",depth)) end end -local cache = { } +-- replacing handle by a direct t[#t+1] = ... (plus test) is not much +-- faster (0.03 on 1.00 for zapfino.tma) -function lpeg.keeper(str) - if type(str) == "string" then - local s = cache[str] - if not s then - s = Cs((((1-S(str))^1)/"" + 1)^0) - cache[str] = s +local function serialize(_handle,root,name,specification) -- handle wins + local tname = type(name) + if type(specification) == "table" then + noquotes = specification.noquotes + hexify = specification.hexify + handle = _handle or specification.handle or print + reduce = specification.reduce or false + functions = specification.functions + compact = specification.compact + inline = specification.inline and compact + if functions == nil then + functions = true end - return s - else - return Cs((((1-str)^1)/"" + 1)^0) - end -end - -function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(anything^0) -end - -function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * endofstring)^0) -end - --- Just for fun I looked at the used bytecode and --- p = (p and p + pp) or pp gets one more (testset). - -function lpeg.replacer(one,two,makefunction) - local pattern - if type(one) == "table" then - local no = #one - local p = P(false) - if no == 0 then - for k, v in next, one do - p = p + P(k) / v - end - pattern = Cs((p + 1)^0) - elseif no == 1 then - local o = one[1] - one, two = P(o[1]), o[2] - -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two + 1)^0) - else - for i=1,no do - local o = one[i] - p = p + P(o[1]) / o[2] - end - pattern = Cs((p + 1)^0) + if compact == nil then + compact = true end - else - one = P(one) - two = two or "" - -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two +1)^0) - end - if makefunction then - return function(str) - return lpegmatch(pattern,str) + if inline == nil then + inline = compact end else - return pattern + noquotes = false + hexify = false + handle = _handle or print + reduce = false + compact = true + inline = true + functions = true end -end - -function lpeg.finder(lst,makefunction) - local pattern - if type(lst) == "table" then - local p = P(false) - for i=1,#lst do - p = p + P(lst[i]) + if tname == "string" then + if name == "return" then + handle("return {") + else + handle(name .. "={") end - pattern = (p + 1)^0 - else - pattern = (P(lst) + 1)^0 - end - if makefunction then - return function(str) - return lpegmatch(pattern,str) + elseif tname == "number" then + if hexify then + handle(format("[0x%04X]={",name)) + else + handle("[" .. name .. "]={") end - else - return pattern - end -end - --- print(lpeg.match(lpeg.replacer("e","a"),"test test")) --- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) --- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) - -local splitters_f, splitters_s = { }, { } - -function lpeg.firstofsplit(separator) -- always return value - local splitter = splitters_f[separator] - if not splitter then - separator = P(separator) - splitter = C((1 - separator)^0) - splitters_f[separator] = splitter - end - return splitter -end - -function lpeg.secondofsplit(separator) -- nil if not split - local splitter = splitters_s[separator] - if not splitter then - separator = P(separator) - splitter = (1 - separator)^0 * separator * C(anything^0) - splitters_s[separator] = splitter - end - return splitter -end - -function lpeg.balancer(left,right) - left, right = P(left), P(right) - return P { left * ((1 - left - right) + V(1))^0 * right } -end - ---~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) ---~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty ---~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) ---~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) ---~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty ---~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) ---~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) ---~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) - ---~ -- slower: ---~ ---~ function lpeg.counter(pattern) ---~ local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 ---~ return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end ---~ end - -local nany = utf8char/"" - -function lpeg.counter(pattern) - pattern = Cs((P(pattern)/" " + nany)^0) - return function(str) - return #lpegmatch(pattern,str) - end -end - -if utfgmatch then - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local n = 0 - for _ in utfgmatch(str,what) do - n = n + 1 - end - return n - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) + elseif tname == "boolean" then + if name then + handle("return {") + else + handle("{") end + else + handle("t={") end - -else - - local cache = { } - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local p = cache[what] - if not p then - p = Cs((P(what)/" " + nany)^0) - cache[p] = p - end - return #lpegmatch(p,str) - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) + if root then + -- The dummy access will initialize a table that has a delayed initialization + -- using a metatable. (maybe explicitly test for metatable) + if getmetatable(root) then -- todo: make this an option, maybe even per subtable + local dummy = root._w_h_a_t_e_v_e_r_ + root._w_h_a_t_e_v_e_r_ = nil + end + -- Let's forget about empty tables. + if next(root) then + do_serialize(root,name,"",0) end end - + handle("}") end -local patterns_escapes = { -- also defines in l-string - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%)", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} - -local simple_escapes = { -- also defines in l-string - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} - -local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) -local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) +-- name: +-- +-- true : return { } +-- false : { } +-- nil : t = { } +-- string : string = { } +-- "return" : return { } +-- number : [number] = { } -function string.escapedpattern(str,simple) - return lpegmatch(simple and s or p,str) +function table.serialize(root,name,specification) + local t, n = { }, 0 + local function flush(s) + n = n + 1 + t[n] = s + end + serialize(flush,root,name,specification) + return concat(t,"\n") end --- utf extensies +table.tohandle = serialize -lpeg.UP = lpeg.P +-- sometimes tables are real use (zapfino extra pro is some 85M) in which +-- case a stepwise serialization is nice; actually, we could consider: +-- +-- for line in table.serializer(root,name,reduce,noquotes) do +-- ...(line) +-- end +-- +-- so this is on the todo list -if utfcharacters then +local maxtab = 2*1024 - function lpeg.US(str) - local p = P(false) - for uc in utfcharacters(str) do - p = p + P(uc) +function table.tofile(filename,root,name,specification) + local f = io.open(filename,'w') + if f then + if maxtab > 1 then + local t, n = { }, 0 + local function flush(s) + n = n + 1 + t[n] = s + if n > maxtab then + f:write(concat(t,"\n"),"\n") -- hm, write(sometable) should be nice + t, n = { }, 0 -- we could recycle t if needed + end + end + serialize(flush,root,name,specification) + f:write(concat(t,"\n"),"\n") + else + local function flush(s) + f:write(s,"\n") + end + serialize(flush,root,name,specification) end - return p + f:close() + io.flush() end +end - -elseif utfgmatch then - - function lpeg.US(str) - local p = P(false) - for uc in utfgmatch(str,".") do - p = p + P(uc) +local function flattened(t,f,depth) + if f == nil then + f = { } + depth = 0xFFFF + elseif tonumber(f) then + -- assume that only two arguments are given + depth = f + f = { } + elseif not depth then + depth = 0xFFFF + end + for k, v in next, t do + if type(k) ~= "number" then + if depth > 0 and type(v) == "table" then + flattened(v,f,depth-1) + else + f[k] = v + end end - return p end - -else - - function lpeg.US(str) - local p = P(false) - local f = function(uc) - p = p + P(uc) + local n = #f + for k=1,#t do + local v = t[k] + if depth > 0 and type(v) == "table" then + flattened(v,f,depth-1) + n = #f + else + n = n + 1 + f[n] = v end - lpegmatch((utf8char/f)^0,str) - return p end - + return f end -local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture - -local utfchar = unicode and unicode.utf8 and unicode.utf8.char +table.flattened = flattened -function lpeg.UR(str,more) - local first, last - if type(str) == "number" then - first = str - last = more or first - else - first, last = lpegmatch(range,str) - if not last then - return P(str) - end +local function unnest(t,f) -- only used in mk, for old times sake + if not f then -- and only relevant for token lists + f = { } -- this one can become obsolete end - if first == last then - return P(str) - elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium - local p = P(false) - for i=first,last do - p = p + P(utfchar(i)) - end - return p -- nil when invalid range - else - local f = function(b) - return b >= first and b <= last + for i=1,#t do + local v = t[i] + if type(v) == "table" then + if type(v[1]) == "table" then + unnest(v,f) + else + f[#f+1] = v + end + else + f[#f+1] = v end - -- tricky, these nested captures - return utf8byte / f -- nil when invalid range end + return f end --- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) - ---~ lpeg.print(lpeg.R("ab","cd","gh")) ---~ lpeg.print(lpeg.P("a","b","c")) ---~ lpeg.print(lpeg.S("a","b","c")) - ---~ print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) ---~ print(lpeg.count("äáàa",lpeg.UP("áà"))) ---~ print(lpeg.count("äáàa",lpeg.US("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("aá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) - -function lpeg.is_lpeg(p) - return p and lpegtype(p) == "pattern" -end - -function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order - if type(list) ~= "table" then - list = { list, ... } - end - -- table.sort(list) -- longest match first - local p = P(list[1]) - for l=2,#list do - p = p + P(list[l]) - end - return p +function table.unnest(t) -- bad name + return unnest(t) end --- For the moment here, but it might move to utilities. Beware, we need to --- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we --- loop back from the end cq. prepend. - -local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency! - -function lpeg.append(list,pp,delayed,checked) - local p = pp - if #list > 0 then - local keys = fastcopy(list) - sort(keys) - for i=#keys,1,-1 do - local k = keys[i] - if p then - p = P(k) + p - else - p = P(k) - end - end - elseif delayed then -- hm, it looks like the lpeg parser resolves anyway - local keys = sortedkeys(list) - if p then - for i=1,#keys,1 do - local k = keys[i] - local v = list[k] - p = P(k)/list + p - end - else - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - p = P(k) + p - else - p = P(k) - end - end - if p then - p = p / list - end - end - elseif checked then - -- problem: substitution gives a capture - local keys = sortedkeys(list) - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - if k == v then - p = P(k) + p - else - p = P(k)/v + p - end - else - if k == v then - p = P(k) - else - p = P(k)/v +local function are_equal(a,b,n,m) -- indexed + if a and b and #a == #b then + n = n or 1 + m = m or #a + for i=n,m do + local ai, bi = a[i], b[i] + if ai==bi then + -- same + elseif type(ai) == "table" and type(bi) == "table" then + if not are_equal(ai,bi) then + return false end - end - end - else - local keys = sortedkeys(list) - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - p = P(k)/v + p else - p = P(k)/v + return false end end + return true + else + return false end - return p end --- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) --- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) +local function identical(a,b) -- assumes same structure + for ka, va in next, a do + local vb = b[ka] + if va == vb then + -- same + elseif type(va) == "table" and type(vb) == "table" then + if not identical(va,vb) then + return false + end + else + return false + end + end + return true +end --- function lpeg.exact_match(words,case_insensitive) --- local pattern = concat(words) --- if case_insensitive then --- local pattern = S(upper(characters)) + S(lower(characters)) --- local list = { } --- for i=1,#words do --- list[lower(words[i])] = true --- end --- return Cmt(pattern^1, function(_,i,s) --- return list[lower(s)] and i --- end) --- else --- local pattern = S(concat(words)) --- local list = { } --- for i=1,#words do --- list[words[i]] = true --- end --- return Cmt(pattern^1, function(_,i,s) --- return list[s] and i --- end) --- end --- end +table.identical = identical +table.are_equal = are_equal --- experiment: +-- maybe also make a combined one -local function make(t) - local p --- for k, v in next, t do - for k, v in table.sortedhash(t) do - if not p then - if next(v) then - p = P(k) * make(v) - else - p = P(k) +function table.compact(t) -- remove empty tables, assumes subtables + if t then + for k, v in next, t do + if not next(v) then -- no type checking + t[k] = nil end - else - if next(v) then - p = p + P(k) * make(v) - else - p = p + P(k) + end + end +end + +function table.contains(t, v) + if t then + for i=1, #t do + if t[i] == v then + return i end end end - return p + return false end -function lpeg.utfchartabletopattern(list) - local tree = { } - for i=1,#list do - local t = tree - for c in gmatch(list[i],".") do - if not t[c] then - t[c] = { } +function table.count(t) + local n = 0 + for k, v in next, t do + n = n + 1 + end + return n +end + +function table.swapped(t,s) -- hash + local n = { } + if s then + for k, v in next, s do + n[k] = v + end + end + for k, v in next, t do + n[v] = k + end + return n +end + +function table.mirrored(t) -- hash + local n = { } + for k, v in next, t do + n[v] = k + n[k] = v + end + return n +end + +function table.reversed(t) + if t then + local tt, tn = { }, #t + if tn > 0 then + local ttn = 0 + for i=tn,1,-1 do + ttn = ttn + 1 + tt[ttn] = t[i] end - t = t[c] end + return tt end - return make(tree) end --- inspect ( lpeg.utfchartabletopattern { --- utfchar(0x00A0), -- nbsp --- utfchar(0x2000), -- enquad --- utfchar(0x2001), -- emquad --- utfchar(0x2002), -- enspace --- utfchar(0x2003), -- emspace --- utfchar(0x2004), -- threeperemspace --- utfchar(0x2005), -- fourperemspace --- utfchar(0x2006), -- sixperemspace --- utfchar(0x2007), -- figurespace --- utfchar(0x2008), -- punctuationspace --- utfchar(0x2009), -- breakablethinspace --- utfchar(0x200A), -- hairspace --- utfchar(0x200B), -- zerowidthspace --- utfchar(0x202F), -- narrownobreakspace --- utfchar(0x205F), -- math thinspace --- } ) +function table.reverse(t) + if t then + local n = #t + for i=1,floor(n/2) do + local j = n - i + 1 + t[i], t[j] = t[j], t[i] + end + return t + end +end --- handy from within tex: +function table.sequenced(t,sep) -- hash only + if t then + local s, n = { }, 0 + for k, v in sortedhash(t) do + if simple then + if v == true then + n = n + 1 + s[n] = k + elseif v and v~= "" then + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + else + n = n + 1 + s[n] = k .. "=" .. tostring(v) + end + end + return concat(s, sep or " | ") + else + return "" + end +end -local lpegmatch = lpeg.match +function table.print(t,...) + if type(t) ~= "table" then + print(tostring(t)) + else + table.tohandle(print,t,...) + end +end -local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! +-- -- -- obsolete but we keep them for a while and might comment them later -- -- -- -function string.tformat(fmt,...) - return format(lpegmatch(replacer,fmt),...) +-- roughly: copy-loop : unpack : sub == 0.9 : 0.4 : 0.45 (so in critical apps, use unpack) + +function table.sub(t,i,j) + return { unpack(t,i,j) } end --- strips leading and trailing spaces and collapsed all other spaces +-- slower than #t on indexed tables (#t only returns the size of the numerically indexed slice) + +function table.is_empty(t) + return not t or not next(t) +end -local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) +function table.has_one_entry(t) + return t and not next(t,next(t)) +end -function string.collapsespaces(str) - return lpegmatch(pattern,str) +-- new + +function table.loweredkeys(t) -- maybe utf + local l = { } + for k, v in next, t do + l[lower(k)] = v + end + return l +end + +-- new, might move (maybe duplicate) + +function table.unique(old) + local hash = { } + local new = { } + local n = 0 + for i=1,#old do + local oi = old[i] + if not hash[oi] then + n = n + 1 + new[n] = oi + hash[oi] = true + end + end + return new end +function table.sorted(t,...) + sort(t,...) + return t -- still sorts in-place +end + + end -- closure do -- begin closure to overcome local limits and interference @@ -2205,25 +2190,25 @@ local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = C((noslashes^0 * slashes^1)^1) local function pathpart(name,default) - return lpegmatch(pattern,name) or default or "" + return name and lpegmatch(pattern,name) or default or "" end local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 local function basename(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0 local function nameonly(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 local function suffixonly(name) - return lpegmatch(pattern,name) or "" + return name and lpegmatch(pattern,name) or "" end file.pathpart = pathpart @@ -2254,7 +2239,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture local pattern_d = path * rest function file.splitname(str,splitdrive) - if splitdrive then + if not str then + -- error + elseif splitdrive then return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix else return lpegmatch(pattern_b,str) -- returns path, base, suffix @@ -2262,34 +2249,36 @@ function file.splitname(str,splitdrive) end function file.splitbase(str) - return lpegmatch(pattern_d,str) -- returns path, base+suffix + return str and lpegmatch(pattern_d,str) -- returns path, base+suffix end function file.nametotable(str,splitdrive) -- returns table - local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) - if splitdrive then - return { - path = path, - drive = drive, - subpath = subpath, - name = name, - base = base, - suffix = suffix, - } - else - return { - path = path, - name = name, - base = base, - suffix = suffix, - } + if str then + local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) + if splitdrive then + return { + path = path, + drive = drive, + subpath = subpath, + name = name, + base = base, + suffix = suffix, + } + else + return { + path = path, + name = name, + base = base, + suffix = suffix, + } + end end end local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1) function file.removesuffix(name) - return lpegmatch(pattern,name) + return name and lpegmatch(pattern,name) end -- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 @@ -2306,8 +2295,8 @@ end local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix) -function file.addsuffix(filename, suffix, criterium) - if not suffix or suffix == "" then +function file.addsuffix(filename,suffix,criterium) + if not filename or not suffix or suffix == "" then return filename elseif criterium == true then return filename .. "." .. suffix @@ -2353,7 +2342,7 @@ local suffix = period * (1-period-slashes)^1 * -1 local pattern = Cs((1-suffix)^0) function file.replacesuffix(name,suffix) - if suffix and suffix ~= "" then + if name and suffix and suffix ~= "" then return lpegmatch(pattern,name) .. "." .. suffix else return name @@ -2362,10 +2351,10 @@ end -- -local reslasher = lpeg.replacer(S("\\"),"/") +local reslasher = lpeg.replacer(P("\\"),"/") function file.reslash(str) - return lpegmatch(reslasher,str) + return str and lpegmatch(reslasher,str) end -- We should be able to use: @@ -2381,7 +2370,9 @@ end -- variant: function file.is_writable(name) - if lfs.isdir(name) then + if not name then + -- error + elseif lfs.isdir(name) then name = name .. "/m_t_x_t_e_s_t.tmp" local f = io.open(name,"wb") if f then @@ -2409,24 +2400,32 @@ end local readable = P("r") * Cc(true) function file.is_readable(name) - local a = attributes(name) - return a and lpegmatch(readable,a.permissions) or false + if name then + local a = attributes(name) + return a and lpegmatch(readable,a.permissions) or false + else + return false + end end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated function file.size(name) - local a = attributes(name) - return a and a.size or 0 + if name then + local a = attributes(name) + return a and a.size or 0 + else + return 0 + end end function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split) - return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) + return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) end function file.joinpath(tab,separator) -- table - return concat(tab,separator or io.pathseparator) -- can have trailing // + return tab and concat(tab,separator or io.pathseparator) -- can have trailing // end local stripper = Cs(P(fwslash)^0/"" * reslasher) @@ -2434,14 +2433,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon local isroot = fwslash^1 * -1 local hasroot = fwslash^1 -function file.join(...) -- rather dirty +local deslasher = lpeg.replacer(S("\\/")^1,"/") + +-- If we have a network or prefix then there is a change that we end up with two +-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2) +-- split and rebuild as url. Of course we could assume no network paths (which +-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but +-- then we still have to deal with urls ... anyhow, multiple // are never a real +-- problem but just ugly. + +function file.join(...) local lst = { ... } local one = lst[1] if lpegmatch(isnetwork,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) return one .. "/" .. two elseif lpegmatch(isroot,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) if lpegmatch(hasroot,two) then return two else @@ -2450,7 +2458,7 @@ function file.join(...) -- rather dirty elseif one == "" then return lpegmatch(stripper,concat(lst,"/",2)) else - return lpegmatch(reslasher,concat(lst,"/")) + return lpegmatch(deslasher,concat(lst,"/")) end end @@ -2479,6 +2487,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C local absolute = fwslash function file.collapsepath(str,anchor) + if not str then + return + end if anchor and not lpegmatch(anchors,str) then str = getcurrentdir() .. "/" .. str end @@ -2488,7 +2499,6 @@ function file.collapsepath(str,anchor) return lpegmatch(reslasher,str) end local starter, oldelements = lpegmatch(splitstarter,str) --- inspect(oldelements) local newelements = { } local i = #oldelements while i > 0 do @@ -2542,11 +2552,13 @@ local whatever = P("-")^0 / "" local pattern_b = Cs(whatever * (1 - whatever * -1)^1) function file.robustname(str,strict) - str = lpegmatch(pattern_a,str) or str - if strict then - return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) - else - return str + if str then + str = lpegmatch(pattern_a,str) or str + if strict then + return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) + else + return str + end end end @@ -2554,7 +2566,9 @@ file.readdata = io.loaddata file.savedata = io.savedata function file.copy(oldname,newname) - file.savedata(newname,io.loaddata(oldname)) + if oldname and newname then + file.savedata(newname,io.loaddata(oldname)) + end end -- also rewrite previous @@ -2575,11 +2589,11 @@ lpeg.patterns.rootbased = rootbased -- ./name ../name /name c: :// name/name function file.is_qualified_path(filename) - return lpegmatch(qualified,filename) ~= nil + return filename and lpegmatch(qualified,filename) ~= nil end function file.is_rootbased_path(filename) - return lpegmatch(rootbased,filename) ~= nil + return filename and lpegmatch(rootbased,filename) ~= nil end -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end @@ -2601,8 +2615,10 @@ end -- for myself: function file.strip(name,dir) - local b, a = match(name,"^(.-)" .. dir .. "(.*)$") - return a ~= "" and a or name + if name then + local b, a = match(name,"^(.-)" .. dir .. "(.*)$") + return a ~= "" and a or name + end end -- local debuglist = { @@ -2954,7 +2970,7 @@ function io.readstring(f,n,m) f:seek("set",n) n = m end - local str = gsub(f:read(n),"%z","") + local str = gsub(f:read(n),"\000","") return str end @@ -3544,11 +3560,8 @@ if not modules then modules = { } end modules ['font-con'] = { license = "see context related readme files" } - -- some names of table entries will be changed (no _) -local utf = unicode.utf8 - local next, tostring, rawget = next, tostring, rawget local format, match, lower, gsub = string.format, string.match, string.lower, string.gsub local utfbyte = utf.byte @@ -5651,8 +5664,6 @@ if not modules then modules = { } end modules ['font-otf'] = { -- more checking against low level calls of functions -local utf = unicode.utf8 - local utfbyte = utf.byte local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip local type, next, tonumber, tostring = type, next, tonumber, tostring @@ -6092,32 +6103,32 @@ function otf.load(filename,format,sub,featurefile) starttiming(data) report_otf("file size: %s", size) enhancers.apply(data,filename,fontdata) + local packtime = { } if packdata then if cleanup > 0 then collectgarbage("collect") ---~ lua.collectgarbage() end + starttiming(packtime) enhance("pack",data,filename,nil) + stoptiming(packtime) end report_otf("saving in cache: %s",filename) data = containers.write(otf.cache, hash, data) if cleanup > 1 then collectgarbage("collect") ---~ lua.collectgarbage() end stoptiming(data) if elapsedtime then -- not in generic - report_otf("preprocessing and caching took %s seconds",elapsedtime(data)) + report_otf("preprocessing and caching took %s seconds (packtime: %s)", + elapsedtime(data),packdata and elapsedtime(packtime) or 0) end fontloader.close(fontdata) -- free memory if cleanup > 3 then collectgarbage("collect") ---~ lua.collectgarbage() end data = containers.read(otf.cache, hash) -- this frees the old table and load the sparse one if cleanup > 2 then collectgarbage("collect") ---~ lua.collectgarbage() end else data = nil @@ -6695,7 +6706,10 @@ actions["reorganize subtables"] = function(data,filename,raw) -- local name = gk.name -- - if features then + if not name then + -- in fact an error + report_otf("skipping weird lookup number %s",k) + elseif features then -- scripts, tag, ismac local f = { } for i=1,#features do @@ -7128,6 +7142,9 @@ actions["merge kern classes"] = function(data,filename,raw) if type(lookups) ~= "table" then lookups = { lookups } end + -- if offsets[1] == nil then + -- offsets[1] = "" + -- end -- we can check the max in the loop -- local maxseconds = getn(seconds) for n, s in next, firsts do @@ -7148,9 +7165,9 @@ actions["merge kern classes"] = function(data,filename,raw) if splt then local extrakerns = { } local baseoffset = (fk-1) * maxseconds - -- for sk=2,maxseconds do - -- local sv = seconds[sk] - for sk, sv in next, seconds do + for sk=2,maxseconds do -- will become 1 based in future luatex + local sv = seconds[sk] + -- for sk, sv in next, seconds do local splt = split[sv] if splt then -- redundant test local offset = offsets[baseoffset + sk] @@ -8377,7 +8394,7 @@ if not modules then modules = { } end modules ['node-inj'] = { -- This is very experimental (this will change when we have luatex > .50 and -- a few pending thingies are available. Also, Idris needs to make a few more -- test fonts. Btw, future versions of luatex will have extended glyph properties --- that can be of help. +-- that can be of help. Some optimizations can go away when we have faster machines. local next = next @@ -8864,7 +8881,7 @@ if not modules then modules = { } end modules ['font-ota'] = { -- this might become scrp-*.lua -local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat +local type = type if not trackers then trackers = { register = function() end } end @@ -9249,6 +9266,8 @@ if not modules then modules = { } end modules ['font-otn'] = { -- handle positions (we need example fonts) -- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) -- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) +-- remove some optimizations (when I have a faster machine) + --[[ldx--

This module is a bit more split up that I'd like but since we also want to test @@ -12601,7 +12620,6 @@ if not modules then modules = { } end modules ['font-def'] = { -- We can overload some of the definers.functions so we don't local them. -local concat = table.concat local format, gmatch, match, find, lower, gsub = string.format, string.gmatch, string.match, string.find, string.lower, string.gsub local tostring, next = tostring, next local lpegmatch = lpeg.match diff --git a/tex/generic/context/luatex/luatex-fonts.lua b/tex/generic/context/luatex/luatex-fonts.lua index dd2c902cd..6b502cd24 100644 --- a/tex/generic/context/luatex/luatex-fonts.lua +++ b/tex/generic/context/luatex/luatex-fonts.lua @@ -22,7 +22,7 @@ if not modules then modules = { } end modules ['luatex-fonts'] = { -- -- Todo: all global namespaces in called modules will get local shortcuts. -utf = unicode.utf8 +utf = utf or unicode.utf8 if not generic_context then @@ -132,10 +132,10 @@ else -- mess up ConTeXt code for the sake of general generality. Around -- version 1.0 there will be an official api defined. + loadmodule('l-lpeg.lua') loadmodule('l-function.lua') loadmodule('l-string.lua') loadmodule('l-table.lua') - loadmodule('l-lpeg.lua') loadmodule('l-boolean.lua') loadmodule('l-math.lua') loadmodule('l-file.lua') -- cgit v1.2.3