diff options
author | Marius <mariausol@gmail.com> | 2012-12-24 21:40:31 +0200 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2012-12-24 21:40:31 +0200 |
commit | 31c8bf0930a6d3e353a552d6bab71f62bc25f34e (patch) | |
tree | f9fd41e91d4751cdddd2bdfc15ae69f78b61f73f /tex/generic | |
parent | 1c36d2cc9c25d74a806ee74c0ee28da32d5e84ba (diff) | |
download | context-31c8bf0930a6d3e353a552d6bab71f62bc25f34e.tar.gz |
beta 2012.12.24 20:08
Diffstat (limited to 'tex/generic')
-rw-r--r-- | tex/generic/context/luatex/luatex-fonts-merged.lua | 2260 | ||||
-rw-r--r-- | tex/generic/context/luatex/luatex-fonts.lua | 4 |
2 files changed, 1141 insertions, 1123 deletions
diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 1f402cd4f..2b1f94b14 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,789 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 12/10/12 23:20:34 +-- merge date : 12/24/12 20:08:31 + +do -- begin closure to overcome local limits and interference + +if not modules then modules = { } end modules ['l-lpeg'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) + +-- move utf -> l-unicode +-- move string -> l-string or keep it here + +local lpeg = require("lpeg") + +-- tracing (only used when we encounter a problem in integration of lpeg in luatex) + +-- some code will move to unicode and string + +local report = texio and texio.write_nl or print + +-- local lpmatch = lpeg.match +-- local lpprint = lpeg.print +-- local lpp = lpeg.P +-- local lpr = lpeg.R +-- local lps = lpeg.S +-- local lpc = lpeg.C +-- local lpb = lpeg.B +-- local lpv = lpeg.V +-- local lpcf = lpeg.Cf +-- local lpcb = lpeg.Cb +-- local lpcg = lpeg.Cg +-- local lpct = lpeg.Ct +-- local lpcs = lpeg.Cs +-- local lpcc = lpeg.Cc +-- local lpcmt = lpeg.Cmt +-- local lpcarg = lpeg.Carg + +-- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end + +-- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end +-- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end +-- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end +-- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end +-- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end +-- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end +-- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end +-- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end +-- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end +-- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end +-- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end +-- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end +-- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end +-- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end + +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format + +-- Beware, we predefine a bunch of patterns here and one reason for doing so +-- is that we get consistent behaviour in some of the visualizers. + +lpeg.patterns = lpeg.patterns or { } -- so that we can share +local patterns = lpeg.patterns + +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt +local lpegtype, lpegmatch = lpeg.type, lpeg.match + +local anything = P(1) +local endofstring = P(-1) +local alwaysmatched = P(true) + +patterns.anything = anything +patterns.endofstring = endofstring +patterns.beginofstring = alwaysmatched +patterns.alwaysmatched = alwaysmatched + +local digit, sign = R('09'), S('+-') +local cr, lf, crlf = P("\r"), P("\n"), P("\r\n") +local newline = crlf + S("\r\n") -- cr + lf +local escaped = P("\\") * anything +local squote = P("'") +local dquote = P('"') +local space = P(" ") + +local utfbom_32_be = P('\000\000\254\255') +local utfbom_32_le = P('\255\254\000\000') +local utfbom_16_be = P('\255\254') +local utfbom_16_le = P('\254\255') +local utfbom_8 = P('\239\187\191') +local utfbom = utfbom_32_be + utfbom_32_le + + utfbom_16_be + utfbom_16_le + + utfbom_8 +local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) + + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) + + utfbom_8 * Cc(3) + Cc(0) + +local utf8next = R("\128\191") + +patterns.utf8one = R("\000\127") +patterns.utf8two = R("\194\223") * utf8next +patterns.utf8three = R("\224\239") * utf8next * utf8next +patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next +patterns.utfbom = utfbom +patterns.utftype = utftype +patterns.utfoffset = utfoffset + +local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four +local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) + +patterns.utf8 = utf8char +patterns.utf8char = utf8char +patterns.validutf8 = validutf8char +patterns.validutf8char = validutf8char + +local eol = S("\n\r") +local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace = eol + spacer +local nonspacer = 1 - spacer +local nonwhitespace = 1 - whitespace + +patterns.eol = eol +patterns.spacer = spacer +patterns.whitespace = whitespace +patterns.nonspacer = nonspacer +patterns.nonwhitespace = nonwhitespace + +local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto + +----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) +local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) + +patterns.stripper = stripper +patterns.collapser = collapser + +patterns.digit = digit +patterns.sign = sign +patterns.cardinal = sign^0 * digit^1 +patterns.integer = sign^0 * digit^1 +patterns.unsigned = digit^0 * P('.') * digit^1 +patterns.float = sign^0 * patterns.unsigned +patterns.cunsigned = digit^0 * P(',') * digit^1 +patterns.cfloat = sign^0 * patterns.cunsigned +patterns.number = patterns.float + patterns.integer +patterns.cnumber = patterns.cfloat + patterns.integer +patterns.oct = P("0") * R("07")^1 +patterns.octal = patterns.oct +patterns.HEX = P("0x") * R("09","AF")^1 +patterns.hex = P("0x") * R("09","af")^1 +patterns.hexadecimal = P("0x") * R("09","AF","af")^1 +patterns.lowercase = R("az") +patterns.uppercase = R("AZ") +patterns.letter = patterns.lowercase + patterns.uppercase +patterns.space = space +patterns.tab = P("\t") +patterns.spaceortab = patterns.space + patterns.tab +patterns.newline = newline +patterns.emptyline = newline^1 +patterns.equal = P("=") +patterns.comma = P(",") +patterns.commaspacer = P(",") * spacer^0 +patterns.period = P(".") +patterns.colon = P(":") +patterns.semicolon = P(";") +patterns.underscore = P("_") +patterns.escaped = escaped +patterns.squote = squote +patterns.dquote = dquote +patterns.nosquote = (escaped + (1-squote))^0 +patterns.nodquote = (escaped + (1-dquote))^0 +patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle +patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle +patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble +patterns.unspacer = ((patterns.spacer^1)/"")^0 + +patterns.singlequoted = squote * patterns.nosquote * squote +patterns.doublequoted = dquote * patterns.nodquote * dquote +patterns.quoted = patterns.doublequoted + patterns.singlequoted + +patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) + +patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 +patterns.beginline = #(1-newline) + +local function anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * V(1) } +end + +lpeg.anywhere = anywhere + +function lpeg.instringchecker(p) + p = anywhere(p) + return function(str) + return lpegmatch(p,str) and true or false + end +end + +function lpeg.splitter(pattern, action) + return (((1-P(pattern))^1)/action+1)^0 +end + +function lpeg.tsplitter(pattern, action) + return Ct((((1-P(pattern))^1)/action+1)^0) +end + +-- probleem: separator can be lpeg and that does not hash too well, but +-- it's quite okay as the key is then not garbage collected + +local splitters_s, splitters_m, splitters_t = { }, { }, { } + +local function splitat(separator,single) + local splitter = (single and splitters_s[separator]) or splitters_m[separator] + if not splitter then + separator = P(separator) + local other = C((1 - separator)^0) + if single then + local any = anything + splitter = other * (separator * C(any^0) + "") -- ? + splitters_s[separator] = splitter + else + splitter = other * (separator * other)^0 + splitters_m[separator] = splitter + end + end + return splitter +end + +local function tsplitat(separator) + local splitter = splitters_t[separator] + if not splitter then + splitter = Ct(splitat(separator)) + splitters_t[separator] = splitter + end + return splitter +end + +lpeg.splitat = splitat +lpeg.tsplitat = tsplitat + +function string.splitup(str,separator) + if not separator then + separator = "," + end + return lpegmatch(splitters_m[separator] or splitat(separator),str) +end + +-- local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more +-- local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more +-- local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps +-- local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps + +local cache = { } + +function lpeg.split(separator,str) + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c + end + return lpegmatch(c,str) +end + +function string.split(str,separator) + if separator then + local c = cache[separator] + if not c then + c = tsplitat(separator) + cache[separator] = c + end + return lpegmatch(c,str) + else + return { str } + end +end + +local spacing = patterns.spacer^0 * newline -- sort of strip +local empty = spacing * Cc("") +local nonempty = Cs((1-spacing)^1) * spacing^-1 +local content = (empty + nonempty)^1 + +patterns.textline = content + +local linesplitter = tsplitat(newline) + +patterns.linesplitter = linesplitter + +function string.splitlines(str) + return lpegmatch(linesplitter,str) +end + +-- lpeg.splitters = cache -- no longer public + +local cache = { } + +function lpeg.checkedsplit(separator,str) + local c = cache[separator] + if not c then + separator = P(separator) + local other = C((1 - separator)^1) + c = Ct(separator^0 * other * (separator^1 * other)^0) + cache[separator] = c + end + return lpegmatch(c,str) +end + +function string.checkedsplit(str,separator) + local c = cache[separator] + if not c then + separator = P(separator) + local other = C((1 - separator)^1) + c = Ct(separator^0 * other * (separator^1 * other)^0) + cache[separator] = c + end + return lpegmatch(c,str) +end + +-- from roberto's site: + +local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end +local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end +local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end + +local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 + +patterns.utf8byte = utf8byte + +--~ local str = " a b c d " + +--~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]") + +local cache = { } + +function lpeg.stripper(str) + if type(str) == "string" then + local s = cache[str] + if not s then + s = Cs(((S(str)^1)/"" + 1)^0) + cache[str] = s + end + return s + else + return Cs(((str^1)/"" + 1)^0) + end +end + +local cache = { } + +function lpeg.keeper(str) + if type(str) == "string" then + local s = cache[str] + if not s then + s = Cs((((1-S(str))^1)/"" + 1)^0) + cache[str] = s + end + return s + else + return Cs((((1-str)^1)/"" + 1)^0) + end +end + +function lpeg.frontstripper(str) -- or pattern (yet undocumented) + return (P(str) + P(true)) * Cs(anything^0) +end + +function lpeg.endstripper(str) -- or pattern (yet undocumented) + return Cs((1 - P(str) * endofstring)^0) +end + +-- Just for fun I looked at the used bytecode and +-- p = (p and p + pp) or pp gets one more (testset). + +-- todo: cache when string + +function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys + local pattern + local u = isutf and utf8char or 1 + if type(one) == "table" then + local no = #one + local p = P(false) + if no == 0 then + for k, v in next, one do + p = p + P(k) / v + end + pattern = Cs((p + u)^0) + elseif no == 1 then + local o = one[1] + one, two = P(o[1]), o[2] + -- pattern = Cs(((1-one)^1 + one/two)^0) + pattern = Cs((one/two + u)^0) + else + for i=1,no do + local o = one[i] + p = p + P(o[1]) / o[2] + end + pattern = Cs((p + u)^0) + end + else + pattern = Cs((P(one)/(two or "") + u)^0) + end + if makefunction then + return function(str) + return lpegmatch(pattern,str) + end + else + return pattern + end +end + +function lpeg.finder(lst,makefunction) + local pattern + if type(lst) == "table" then + pattern = P(false) + if #lst == 0 then + for k, v in next, lst do + pattern = pattern + P(k) -- ignore key, so we can use a replacer table + end + else + for i=1,#lst do + pattern = pattern + P(lst[i]) + end + end + else + pattern = P(lst) + end + pattern = (1-pattern)^0 * pattern + if makefunction then + return function(str) + return lpegmatch(pattern,str) + end + else + return pattern + end +end + +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) + +local splitters_f, splitters_s = { }, { } + +function lpeg.firstofsplit(separator) -- always return value + local splitter = splitters_f[separator] + if not splitter then + separator = P(separator) + splitter = C((1 - separator)^0) + splitters_f[separator] = splitter + end + return splitter +end + +function lpeg.secondofsplit(separator) -- nil if not split + local splitter = splitters_s[separator] + if not splitter then + separator = P(separator) + splitter = (1 - separator)^0 * separator * C(anything^0) + splitters_s[separator] = splitter + end + return splitter +end + +function lpeg.balancer(left,right) + left, right = P(left), P(right) + return P { left * ((1 - left - right) + V(1))^0 * right } +end + +-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) + +-- -- slower: +-- +-- function lpeg.counter(pattern) +-- local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 +-- return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end +-- end + +local nany = utf8char/"" + +function lpeg.counter(pattern) + pattern = Cs((P(pattern)/" " + nany)^0) + return function(str) + return #lpegmatch(pattern,str) + end +end + +-- utf extensies + +local utfcharacters = utf and utf.characters or string.utfcharacters +local utfgmatch = unicode and unicode.utf8.gmatch +local utfchar = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char) + +lpeg.UP = lpeg.P + +if utfcharacters then + + function lpeg.US(str) + local p = P(false) + for uc in utfcharacters(str) do + p = p + P(uc) + end + return p + end + + +elseif utfgmatch then + + function lpeg.US(str) + local p = P(false) + for uc in utfgmatch(str,".") do + p = p + P(uc) + end + return p + end + +else + + function lpeg.US(str) + local p = P(false) + local f = function(uc) + p = p + P(uc) + end + lpegmatch((utf8char/f)^0,str) + return p + end + +end + +local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture + +function lpeg.UR(str,more) + local first, last + if type(str) == "number" then + first = str + last = more or first + else + first, last = lpegmatch(range,str) + if not last then + return P(str) + end + end + if first == last then + return P(str) + elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium + local p = P(false) + for i=first,last do + p = p + P(utfchar(i)) + end + return p -- nil when invalid range + else + local f = function(b) + return b >= first and b <= last + end + -- tricky, these nested captures + return utf8byte / f -- nil when invalid range + end +end + +-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) + +-- lpeg.print(lpeg.R("ab","cd","gh")) +-- lpeg.print(lpeg.P("a","b","c")) +-- lpeg.print(lpeg.S("a","b","c")) + +-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +-- print(lpeg.count("äáàa",lpeg.UP("áà"))) +-- print(lpeg.count("äáàa",lpeg.US("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR("aá"))) +-- print(lpeg.count("äáàa",lpeg.UR("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) + +function lpeg.is_lpeg(p) + return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order + if type(list) ~= "table" then + list = { list, ... } + end + -- table.sort(list) -- longest match first + local p = P(list[1]) + for l=2,#list do + p = p + P(list[l]) + end + return p +end + +-- For the moment here, but it might move to utilities. Beware, we need to +-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we +-- loop back from the end cq. prepend. + +local sort = table.sort + +local function copyindexed(old) + local new = { } + for i=1,#old do + new[i] = old + end + return new +end + +local function sortedkeys(tab) + local keys, s = { }, 0 + for key,_ in next, tab do + s = s + 1 + keys[s] = key + end + sort(keys) + return keys +end + +function lpeg.append(list,pp,delayed,checked) + local p = pp + if #list > 0 then + local keys = copyindexed(list) + sort(keys) + for i=#keys,1,-1 do + local k = keys[i] + if p then + p = P(k) + p + else + p = P(k) + end + end + elseif delayed then -- hm, it looks like the lpeg parser resolves anyway + local keys = sortedkeys(list) + if p then + for i=1,#keys,1 do + local k = keys[i] + local v = list[k] + p = P(k)/list + p + end + else + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + p = P(k) + p + else + p = P(k) + end + end + if p then + p = p / list + end + end + elseif checked then + -- problem: substitution gives a capture + local keys = sortedkeys(list) + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + if k == v then + p = P(k) + p + else + p = P(k)/v + p + end + else + if k == v then + p = P(k) + else + p = P(k)/v + end + end + end + else + local keys = sortedkeys(list) + for i=1,#keys do + local k = keys[i] + local v = list[k] + if p then + p = P(k)/v + p + else + p = P(k)/v + end + end + end + return p +end + +-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) +-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) + +-- function lpeg.exact_match(words,case_insensitive) +-- local pattern = concat(words) +-- if case_insensitive then +-- local pattern = S(upper(characters)) + S(lower(characters)) +-- local list = { } +-- for i=1,#words do +-- list[lower(words[i])] = true +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- return list[lower(s)] and i +-- end) +-- else +-- local pattern = S(concat(words)) +-- local list = { } +-- for i=1,#words do +-- list[words[i]] = true +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- return list[s] and i +-- end) +-- end +-- end + +-- experiment: + +local function make(t) + local p + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] + local v = t[k] + if not p then + if next(v) then + p = P(k) * make(v) + else + p = P(k) + end + else + if next(v) then + p = p + P(k) * make(v) + else + p = p + P(k) + end + end + end + return p +end + +function lpeg.utfchartabletopattern(list) -- goes to util-lpg + local tree = { } + for i=1,#list do + local t = tree + for c in gmatch(list[i],".") do + if not t[c] then + t[c] = { } + end + t = t[c] + end + end + return make(tree) +end + +-- inspect ( lpeg.utfchartabletopattern { +-- utfchar(0x00A0), -- nbsp +-- utfchar(0x2000), -- enquad +-- utfchar(0x2001), -- emquad +-- utfchar(0x2002), -- enspace +-- utfchar(0x2003), -- emspace +-- utfchar(0x2004), -- threeperemspace +-- utfchar(0x2005), -- fourperemspace +-- utfchar(0x2006), -- sixperemspace +-- utfchar(0x2007), -- figurespace +-- utfchar(0x2008), -- punctuationspace +-- utfchar(0x2009), -- breakablethinspace +-- utfchar(0x200A), -- hairspace +-- utfchar(0x200B), -- zerowidthspace +-- utfchar(0x202F), -- narrownobreakspace +-- utfchar(0x205F), -- math thinspace +-- } ) + +-- a few handy ones: +-- +-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3 + +patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol + +end -- closure do -- begin closure to overcome local limits and interference @@ -29,40 +812,46 @@ if not modules then modules = { } end modules ['l-string'] = { } local string = string -local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct - --- some functions may disappear as they are not used anywhere +local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs -if not string.split then +-- Some functions are already defined in l-lpeg and maybe some from here will +-- move there (unless we also expose caches). - -- this will be overloaded by a faster lpeg variant +-- if not string.split then +-- +-- function string.split(str,pattern) +-- local t = { } +-- if #str > 0 then +-- local n = 1 +-- for s in gmatch(str..pattern,"(.-)"..pattern) do +-- t[n] = s +-- n = n + 1 +-- end +-- end +-- return t +-- end +-- +-- end - function string.split(str,pattern) - local t = { } - if #str > 0 then - local n = 1 - for s in gmatch(str..pattern,"(.-)"..pattern) do - t[n] = s - n = n + 1 - end - end - return t - end +-- function string.unquoted(str) +-- return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern +-- end -end +local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote + + patterns.dquote * C(patterns.nodquote) * patterns.dquote function string.unquoted(str) - return (gsub(str,"^([\"\'])(.*)%1$","%2")) + return lpegmatch(unquoted,str) or str end ---~ function stringunquoted(str) ---~ if find(str,"^[\'\"]") then ---~ return sub(str,2,-2) ---~ else ---~ return str ---~ end ---~ end +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"')) function string.quoted(str) return format("%q",str) -- always " @@ -85,65 +874,112 @@ function string.limit(str,n,sentinel) -- not utf proof end end -local space = S(" \t\v\n") -local nospace = 1 - space -local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code +local stripper = patterns.stripper +local collapser = patterns.collapser function string.strip(str) return lpegmatch(stripper,str) or "" end +function string.collapsespaces(str) + return lpegmatch(collapser,str) or "" +end + +-- function string.is_empty(str) +-- return not find(str,"%S") +-- end + +local pattern = P(" ")^0 * P(-1) + function string.is_empty(str) - return not find(str,"%S") + if str == "" then + return true + else + return lpegmatch(pattern,str) and true or false + end end -local patterns_escapes = { - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%(", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} -local simple_escapes = { - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} +-- if not string.escapedpattern then +-- +-- local patterns_escapes = { +-- ["%"] = "%%", +-- ["."] = "%.", +-- ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", +-- ["["] = "%[", ["]"] = "%]", +-- ["("] = "%(", [")"] = "%)", +-- -- ["{"] = "%{", ["}"] = "%}" +-- -- ["^"] = "%^", ["$"] = "%$", +-- } +-- +-- local simple_escapes = { +-- ["-"] = "%-", +-- ["."] = "%.", +-- ["?"] = ".", +-- ["*"] = ".*", +-- } +-- +-- function string.escapedpattern(str,simple) +-- return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +-- end +-- +-- function string.topattern(str,lowercase,strict) +-- if str == "" then +-- return ".*" +-- else +-- str = gsub(str,".",simple_escapes) +-- if lowercase then +-- str = lower(str) +-- end +-- if strict then +-- return "^" .. str .. "$" +-- else +-- return str +-- end +-- end +-- end +-- +-- end + +--- needs checking + +local anything = patterns.anything +local allescapes = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ? +local someescapes = Cc("%") * S(".-+%()[]") -- also {} and ^$ ? +local matchescapes = Cc(".") * S("*?") -- wildcard and single match + +local pattern_a = Cs ( ( allescapes + anything )^0 ) +local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 ) +local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") ) function string.escapedpattern(str,simple) - return (gsub(str,".",simple and simple_escapes or patterns_escapes)) + return lpegmatch(simple and pattern_b or pattern_a,str) end function string.topattern(str,lowercase,strict) if str == "" then return ".*" + elseif strict then + str = lpegmatch(pattern_c,str) else - str = gsub(str,".",simple_escapes) - if lowercase then - str = lower(str) - end - if strict then - return "^" .. str .. "$" - else - return str - end + str = lpegmatch(pattern_b,str) + end + if lowercase then + return lower(str) + else + return str end end +-- print(string.escapedpattern("12+34*.tex",false)) +-- print(string.escapedpattern("12+34*.tex",true)) +-- print(string.topattern ("12+34*.tex",false,false)) +-- print(string.topattern ("12+34*.tex",false,true)) function string.valid(str,default) return (type(str) == "string" and str ~= "" and str) or default or nil end --- obsolete names: - -string.quote = string.quoted -string.unquote = string.unquoted - -- handy fallback string.itself = function(s) return s end @@ -156,6 +992,19 @@ function string.totable(str) return lpegmatch(pattern,str) end +-- handy from within tex: + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) + return format(lpegmatch(replacer,fmt),...) +end + +-- obsolete names: + +string.quote = string.quoted +string.unquote = string.unquoted + end -- closure do -- begin closure to overcome local limits and interference @@ -168,68 +1017,23 @@ if not modules then modules = { } end modules ['l-table'] = { license = "see context related readme files" } -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select local table, string = table, string local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove -local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match +local format, lower, dump = string.format, string.lower, string.dump local getmetatable, setmetatable = getmetatable, setmetatable local getinfo = debug.getinfo - --- Starting with version 5.2 Lua no longer provide ipairs, which makes --- sense. As we already used the for loop and # in most places the --- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashion we also hardly used pairs. --- --- Hm, actually ipairs was retained, but we no longer use it anyway. --- --- Just in case, we provide the fallbacks as discussed in Programming --- in Lua (http://www.lua.org/pil/7.3.html): - -if not ipairs then - - -- for k, v in ipairs(t) do ... end - -- for k=1,#t do local v = t[k] ... end - - local function iterate(a,i) - i = i + 1 - local v = a[i] - if v ~= nil then - return i, v --, nil - end - end - - function ipairs(a) - return iterate, a, 0 - end - -end - -if not pairs then - - -- for k, v in pairs(t) do ... end - -- for k, v in next, t do ... end - - function pairs(t) - return next, t -- , nil - end - -end - --- Also, unpack has been moved to the table table, and for compatiility --- reasons we provide both now. - -if not table.unpack then - table.unpack = _G.unpack -elseif not unpack then - _G.unpack = table.unpack -end +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local floor = math.floor -- extra functions, some might go (when not used) +local stripper = patterns.stripper + function table.strip(tab) local lst, l = { }, 0 for i=1,#tab do - local s = gsub(tab[i],"^%s*(.-)%s*$","%1") + local s = lpegmatch(stripper,tab[i]) or "" if s == "" then -- skip this one else @@ -338,7 +1142,7 @@ local function sortedhash(t) end table.sortedhash = sortedhash -table.sortedpairs = sortedhash +table.sortedpairs = sortedhash -- obsolete function table.append(t,list) local n = #t @@ -362,31 +1166,63 @@ function table.prepend(t, list) return t end +-- function table.merge(t, ...) -- first one is target +-- t = t or { } +-- local lst = { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- t[k] = v +-- end +-- end +-- return t +-- end + function table.merge(t, ...) -- first one is target t = t or { } - local lst = { ... } - for i=1,#lst do - for k, v in next, lst[i] do + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do t[k] = v end end return t end +-- function table.merged(...) +-- local tmp, lst = { }, { ... } +-- for i=1,#lst do +-- for k, v in next, lst[i] do +-- tmp[k] = v +-- end +-- end +-- return tmp +-- end + function table.merged(...) - local tmp, lst = { }, { ... } - for i=1,#lst do - for k, v in next, lst[i] do - tmp[k] = v + local t = { } + for i=1,select("#",...) do + for k, v in next, (select(i,...)) do + t[k] = v end end - return tmp + return t end +-- function table.imerge(t, ...) +-- local lst, nt = { ... }, #t +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- nt = nt + 1 +-- t[nt] = nst[j] +-- end +-- end +-- return t +-- end + function table.imerge(t, ...) - local lst, nt = { ... }, #t - for i=1,#lst do - local nst = lst[i] + local nt = #t + for i=1,select("#",...) do + local nst = select(i,...) for j=1,#nst do nt = nt + 1 t[nt] = nst[j] @@ -395,10 +1231,22 @@ function table.imerge(t, ...) return t end +-- function table.imerged(...) +-- local tmp, ntmp, lst = { }, 0, {...} +-- for i=1,#lst do +-- local nst = lst[i] +-- for j=1,#nst do +-- ntmp = ntmp + 1 +-- tmp[ntmp] = nst[j] +-- end +-- end +-- return tmp +-- end + function table.imerged(...) - local tmp, ntmp, lst = { }, 0, {...} - for i=1,#lst do - local nst = lst[i] + local tmp, ntmp = { }, 0 + for i=1,select("#",...) do + local nst = select(i,...) for j=1,#nst do ntmp = ntmp + 1 tmp[ntmp] = nst[j] @@ -410,7 +1258,7 @@ end local function fastcopy(old,metatabletoo) -- fast one if old then local new = { } - for k,v in next, old do + for k, v in next, old do if type(v) == "table" then new[k] = fastcopy(v,metatabletoo) -- was just table.copy else @@ -464,7 +1312,7 @@ end table.fastcopy = fastcopy table.copy = copy -function table.derive(parent) +function table.derive(parent) -- for the moment not public local child = { } if parent then setmetatable(child,{ __index = parent }) @@ -545,6 +1393,13 @@ end -- problem: there no good number_to_string converter with the best resolution +-- probably using .. is faster than format +-- maybe split in a few cases (yes/no hexify) + +-- todo: %g faster on numbers than %s + +local propername = patterns.propername -- was find(name,"^%a[%w%_]*$") + local function dummy() end local function do_serialize(root,name,depth,level,indexed) @@ -554,14 +1409,14 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s{",depth)) else local tn = type(name) - if tn == "number" then -- or find(k,"^%d+$") then + if tn == "number" then if hexify then handle(format("%s[0x%04X]={",depth,name)) else handle(format("%s[%s]={",depth,name)) end elseif tn == "string" then - if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then + if noquotes and not reserved[name] and lpegmatch(propername,name) then handle(format("%s%s={",depth,name)) else handle(format("%s[%q]={",depth,name)) @@ -587,7 +1442,6 @@ local function do_serialize(root,name,depth,level,indexed) if compact then last = #root for k=1,last do --- if not root[k] then if root[k] == nil then last = k - 1 break @@ -635,7 +1489,7 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s %s,",depth,tostring(v))) elseif t == "function" then if functions then - handle(format('%s loadstring(%q),',depth,dump(v))) + handle(format('%s load(%q),',depth,dump(v))) else handle(format('%s "function",',depth)) end @@ -647,7 +1501,7 @@ local function do_serialize(root,name,depth,level,indexed) handle(format("%s __p__=nil,",depth)) end elseif t == "number" then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=0x%04X,",depth,k,v)) else @@ -659,7 +1513,7 @@ local function do_serialize(root,name,depth,level,indexed) else handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g end - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then if hexify then handle(format("%s %s=0x%04X,",depth,k,v)) else @@ -674,7 +1528,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "string" then if reduce and tonumber(v) then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,v)) else @@ -682,13 +1536,13 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]=%s,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%s,",depth,k,v)) else handle(format("%s [%q]=%s,",depth,k,v)) end else - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,v)) else @@ -696,7 +1550,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]=%q,",depth,tostring(k),v)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%q,",depth,k,v)) else handle(format("%s [%q]=%q,",depth,k,v)) @@ -704,7 +1558,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif t == "table" then if not next(v) then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]={},",depth,k)) else @@ -712,7 +1566,7 @@ local function do_serialize(root,name,depth,level,indexed) end elseif tk == "boolean" then handle(format("%s [%s]={},",depth,tostring(k))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s={},",depth,k)) else handle(format("%s [%q]={},",depth,k)) @@ -720,15 +1574,15 @@ local function do_serialize(root,name,depth,level,indexed) elseif inline then local st = simple_table(v) if st then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%s]={ %s },",depth,k,concat(st,", "))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s={ %s },",depth,k,concat(st,", "))) else handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) @@ -740,15 +1594,15 @@ local function do_serialize(root,name,depth,level,indexed) do_serialize(v,k,depth,level+1) end elseif t == "boolean" then - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%s,",depth,k,tostring(v))) else handle(format("%s [%s]=%s,",depth,k,tostring(v))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%s,",depth,k,tostring(v))) else handle(format("%s [%q]=%s,",depth,k,tostring(v))) @@ -757,30 +1611,30 @@ local function do_serialize(root,name,depth,level,indexed) if functions then local f = getinfo(v).what == "C" and dump(dummy) or dump(v) -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then - handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f)) + handle(format("%s [0x%04X]=load(%q),",depth,k,f)) else - handle(format("%s [%s]=loadstring(%q),",depth,k,f)) + handle(format("%s [%s]=load(%q),",depth,k,f)) end elseif tk == "boolean" then - handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f)) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then - handle(format("%s %s=loadstring(%q),",depth,k,f)) + handle(format("%s [%s]=load(%q),",depth,tostring(k),f)) + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then + handle(format("%s %s=load(%q),",depth,k,f)) else - handle(format("%s [%q]=loadstring(%q),",depth,k,f)) + handle(format("%s [%q]=load(%q),",depth,k,f)) end end else - if tk == "number" then -- or find(k,"^%d+$") then + if tk == "number" then if hexify then handle(format("%s [0x%04X]=%q,",depth,k,tostring(v))) else handle(format("%s [%s]=%q,",depth,k,tostring(v))) end - elseif tk == "boolean" then -- or find(k,"^%d+$") then + elseif tk == "boolean" then handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) - elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then + elseif noquotes and not reserved[k] and lpegmatch(propername,k) then handle(format("%s %s=%q,",depth,k,tostring(v))) else handle(format("%s [%q]=%q,",depth,k,tostring(v))) @@ -861,14 +1715,14 @@ local function serialize(_handle,root,name,specification) -- handle wins handle("}") end ---~ name: ---~ ---~ true : return { } ---~ false : { } ---~ nil : t = { } ---~ string : string = { } ---~ 'return' : return { } ---~ number : [number] = { } +-- name: +-- +-- true : return { } +-- false : { } +-- nil : t = { } +-- string : string = { } +-- "return" : return { } +-- number : [number] = { } function table.serialize(root,name,specification) local t, n = { }, 0 @@ -957,7 +1811,7 @@ table.flattened = flattened local function unnest(t,f) -- only used in mk, for old times sake if not f then -- and only relevant for token lists - f = { } + f = { } -- this one can become obsolete end for i=1,#t do local v = t[i] @@ -986,7 +1840,7 @@ local function are_equal(a,b,n,m) -- indexed local ai, bi = a[i], b[i] if ai==bi then -- same - elseif type(ai)=="table" and type(bi)=="table" then + elseif type(ai) == "table" and type(bi) == "table" then if not are_equal(ai,bi) then return false end @@ -1021,10 +1875,10 @@ table.are_equal = are_equal -- maybe also make a combined one -function table.compact(t) +function table.compact(t) -- remove empty tables, assumes subtables if t then - for k,v in next, t do - if not next(v) then + for k, v in next, t do + if not next(v) then -- no type checking t[k] = nil end end @@ -1063,7 +1917,7 @@ function table.swapped(t,s) -- hash return n end -function table.mirror(t) -- hash +function table.mirrored(t) -- hash local n = { } for k, v in next, t do n[v] = k @@ -1086,6 +1940,17 @@ function table.reversed(t) end end +function table.reverse(t) + if t then + local n = #t + for i=1,floor(n/2) do + local j = n - i + 1 + t[i], t[j] = t[j], t[i] + end + return t + end +end + function table.sequenced(t,sep) -- hash only if t then local s, n = { }, 0 @@ -1172,886 +2037,6 @@ end -- closure do -- begin closure to overcome local limits and interference -if not modules then modules = { } end modules ['l-lpeg'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - - --- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) - -local lpeg = require("lpeg") - --- tracing (only used when we encounter a problem in integration of lpeg in luatex) - --- some code will move to unicode and string - -local report = texio and texio.write_nl or print - --- local lpmatch = lpeg.match --- local lpprint = lpeg.print --- local lpp = lpeg.P --- local lpr = lpeg.R --- local lps = lpeg.S --- local lpc = lpeg.C --- local lpb = lpeg.B --- local lpv = lpeg.V --- local lpcf = lpeg.Cf --- local lpcb = lpeg.Cb --- local lpcg = lpeg.Cg --- local lpct = lpeg.Ct --- local lpcs = lpeg.Cs --- local lpcc = lpeg.Cc --- local lpcmt = lpeg.Cmt --- local lpcarg = lpeg.Carg - --- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end - --- function lpeg.P (l) local p = lpp (l) report("LPEG P =") lpprint(l) return p end --- function lpeg.R (l) local p = lpr (l) report("LPEG R =") lpprint(l) return p end --- function lpeg.S (l) local p = lps (l) report("LPEG S =") lpprint(l) return p end --- function lpeg.C (l) local p = lpc (l) report("LPEG C =") lpprint(l) return p end --- function lpeg.B (l) local p = lpb (l) report("LPEG B =") lpprint(l) return p end --- function lpeg.V (l) local p = lpv (l) report("LPEG V =") lpprint(l) return p end --- function lpeg.Cf (l) local p = lpcf (l) report("LPEG Cf =") lpprint(l) return p end --- function lpeg.Cb (l) local p = lpcb (l) report("LPEG Cb =") lpprint(l) return p end --- function lpeg.Cg (l) local p = lpcg (l) report("LPEG Cg =") lpprint(l) return p end --- function lpeg.Ct (l) local p = lpct (l) report("LPEG Ct =") lpprint(l) return p end --- function lpeg.Cs (l) local p = lpcs (l) report("LPEG Cs =") lpprint(l) return p end --- function lpeg.Cc (l) local p = lpcc (l) report("LPEG Cc =") lpprint(l) return p end --- function lpeg.Cmt (l) local p = lpcmt (l) report("LPEG Cmt =") lpprint(l) return p end --- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end - -local type, next = type, next -local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format - --- Beware, we predefine a bunch of patterns here and one reason for doing so --- is that we get consistent behaviour in some of the visualizers. - -lpeg.patterns = lpeg.patterns or { } -- so that we can share -local patterns = lpeg.patterns - -local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp -local lpegtype, lpegmatch = lpeg.type, lpeg.match - -local utfcharacters = string.utfcharacters -local utfgmatch = unicode and unicode.utf8.gmatch - -local anything = P(1) -local endofstring = P(-1) -local alwaysmatched = P(true) - -patterns.anything = anything -patterns.endofstring = endofstring -patterns.beginofstring = alwaysmatched -patterns.alwaysmatched = alwaysmatched - -local digit, sign = R('09'), S('+-') -local cr, lf, crlf = P("\r"), P("\n"), P("\r\n") -local newline = crlf + S("\r\n") -- cr + lf -local escaped = P("\\") * anything -local squote = P("'") -local dquote = P('"') -local space = P(" ") - -local utfbom_32_be = P('\000\000\254\255') -local utfbom_32_le = P('\255\254\000\000') -local utfbom_16_be = P('\255\254') -local utfbom_16_le = P('\254\255') -local utfbom_8 = P('\239\187\191') -local utfbom = utfbom_32_be + utfbom_32_le - + utfbom_16_be + utfbom_16_le - + utfbom_8 -local utftype = utfbom_32_be / "utf-32-be" + utfbom_32_le / "utf-32-le" - + utfbom_16_be / "utf-16-be" + utfbom_16_le / "utf-16-le" - + utfbom_8 / "utf-8" + alwaysmatched / "unknown" - -local utf8next = R("\128\191") - -patterns.utf8one = R("\000\127") -patterns.utf8two = R("\194\223") * utf8next -patterns.utf8three = R("\224\239") * utf8next * utf8next -patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next -patterns.utfbom = utfbom -patterns.utftype = utftype - -local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four -local validutf8char = utf8char^0 * endofstring * Cc(true) + Cc(false) - -patterns.utf8 = utf8char -patterns.utf8char = utf8char -patterns.validutf8 = validutf8char -patterns.validutf8char = validutf8char - -local eol = S("\n\r") -local spacer = S(" \t\f\v") -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) -local whitespace = eol + spacer - -patterns.digit = digit -patterns.sign = sign -patterns.cardinal = sign^0 * digit^1 -patterns.integer = sign^0 * digit^1 -patterns.unsigned = digit^0 * P('.') * digit^1 -patterns.float = sign^0 * patterns.unsigned -patterns.cunsigned = digit^0 * P(',') * digit^1 -patterns.cfloat = sign^0 * patterns.cunsigned -patterns.number = patterns.float + patterns.integer -patterns.cnumber = patterns.cfloat + patterns.integer -patterns.oct = P("0") * R("07")^1 -patterns.octal = patterns.oct -patterns.HEX = P("0x") * R("09","AF")^1 -patterns.hex = P("0x") * R("09","af")^1 -patterns.hexadecimal = P("0x") * R("09","AF","af")^1 -patterns.lowercase = R("az") -patterns.uppercase = R("AZ") -patterns.letter = patterns.lowercase + patterns.uppercase -patterns.space = space -patterns.tab = P("\t") -patterns.spaceortab = patterns.space + patterns.tab -patterns.eol = eol -patterns.spacer = spacer -patterns.whitespace = whitespace -patterns.newline = newline -patterns.emptyline = newline^1 -patterns.nonspacer = 1 - spacer -patterns.nonwhitespace = 1 - whitespace -patterns.equal = P("=") -patterns.comma = P(",") -patterns.commaspacer = P(",") * spacer^0 -patterns.period = P(".") -patterns.colon = P(":") -patterns.semicolon = P(";") -patterns.underscore = P("_") -patterns.escaped = escaped -patterns.squote = squote -patterns.dquote = dquote -patterns.nosquote = (escaped + (1-squote))^0 -patterns.nodquote = (escaped + (1-dquote))^0 -patterns.unsingle = (squote/"") * patterns.nosquote * (squote/"") -patterns.undouble = (dquote/"") * patterns.nodquote * (dquote/"") -patterns.unquoted = patterns.undouble + patterns.unsingle -- more often undouble -patterns.unspacer = ((patterns.spacer^1)/"")^0 - -patterns.singlequoted = squote * patterns.nosquote * squote -patterns.doublequoted = dquote * patterns.nodquote * dquote -patterns.quoted = patterns.doublequoted + patterns.singlequoted - -patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 -patterns.beginline = #(1-newline) - --- print(string.unquoted("test")) --- print(string.unquoted([["t\"est"]])) --- print(string.unquoted([["t\"est"x]])) --- print(string.unquoted("\'test\'")) --- print(string.unquoted('"test"')) --- print(string.unquoted('"test"')) - -local function anywhere(pattern) --slightly adapted from website - return P { P(pattern) + 1 * V(1) } -end - -lpeg.anywhere = anywhere - -function lpeg.instringchecker(p) - p = anywhere(p) - return function(str) - return lpegmatch(p,str) and true or false - end -end - -function lpeg.splitter(pattern, action) - return (((1-P(pattern))^1)/action+1)^0 -end - -function lpeg.tsplitter(pattern, action) - return Ct((((1-P(pattern))^1)/action+1)^0) -end - --- probleem: separator can be lpeg and that does not hash too well, but --- it's quite okay as the key is then not garbage collected - -local splitters_s, splitters_m, splitters_t = { }, { }, { } - -local function splitat(separator,single) - local splitter = (single and splitters_s[separator]) or splitters_m[separator] - if not splitter then - separator = P(separator) - local other = C((1 - separator)^0) - if single then - local any = anything - splitter = other * (separator * C(any^0) + "") -- ? - splitters_s[separator] = splitter - else - splitter = other * (separator * other)^0 - splitters_m[separator] = splitter - end - end - return splitter -end - -local function tsplitat(separator) - local splitter = splitters_t[separator] - if not splitter then - splitter = Ct(splitat(separator)) - splitters_t[separator] = splitter - end - return splitter -end - -lpeg.splitat = splitat -lpeg.tsplitat = tsplitat - -function string.splitup(str,separator) - if not separator then - separator = "," - end - return lpegmatch(splitters_m[separator] or splitat(separator),str) -end - ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps->what->more")) -- oeps what more ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps->what->more")) -- oeps what->more ---~ local p = splitat("->",false) print(lpegmatch(p,"oeps")) -- oeps ---~ local p = splitat("->",true) print(lpegmatch(p,"oeps")) -- oeps - -local cache = { } - -function lpeg.split(separator,str) - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c - end - return lpegmatch(c,str) -end - -function string.split(str,separator) - if separator then - local c = cache[separator] - if not c then - c = tsplitat(separator) - cache[separator] = c - end - return lpegmatch(c,str) - else - return { str } - end -end - -local spacing = patterns.spacer^0 * newline -- sort of strip -local empty = spacing * Cc("") -local nonempty = Cs((1-spacing)^1) * spacing^-1 -local content = (empty + nonempty)^1 - -patterns.textline = content - ---~ local linesplitter = Ct(content^0) ---~ ---~ function string.splitlines(str) ---~ return lpegmatch(linesplitter,str) ---~ end - -local linesplitter = tsplitat(newline) - -patterns.linesplitter = linesplitter - -function string.splitlines(str) - return lpegmatch(linesplitter,str) -end - -local utflinesplitter = utfbom^-1 * tsplitat(newline) - -patterns.utflinesplitter = utflinesplitter - -function string.utfsplitlines(str) - return lpegmatch(utflinesplitter,str or "") -end - -local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) -local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) - -function string.utfsplit(str,ignorewhitespace) -- new - if ignorewhitespace then - return lpegmatch(utfcharsplitter_iws,str or "") - else - return lpegmatch(utfcharsplitter_ows,str or "") - end -end - --- inspect(string.utfsplit("a b c d")) --- inspect(string.utfsplit("a b c d",true)) - --- -- alternative 1: 0.77 --- --- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) --- --- function string.utflength(str) --- return #lpegmatch(utfcharcounter,str or "") --- end --- --- -- alternative 2: 1.70 --- --- local n = 0 --- --- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow --- --- function string.utflength(str) --- n = 0 --- lpegmatch(utfcharcounter,str or "") --- return n --- end --- --- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) - -local n = 0 - -local utfcharcounter = utfbom^-1 * Cs ( ( - Cp() * (lpeg.patterns.utf8one )^1 * Cp() / function(f,t) n = n + t - f end - + Cp() * (lpeg.patterns.utf8two )^1 * Cp() / function(f,t) n = n + (t - f)/2 end - + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end - + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end -)^0 ) - -function string.utflength(str) - n = 0 - lpegmatch(utfcharcounter,str or "") - return n -end - ---~ lpeg.splitters = cache -- no longer public - -local cache = { } - -function lpeg.checkedsplit(separator,str) - local c = cache[separator] - if not c then - separator = P(separator) - local other = C((1 - separator)^1) - c = Ct(separator^0 * other * (separator^1 * other)^0) - cache[separator] = c - end - return lpegmatch(c,str) -end - -function string.checkedsplit(str,separator) - local c = cache[separator] - if not c then - separator = P(separator) - local other = C((1 - separator)^1) - c = Ct(separator^0 * other * (separator^1 * other)^0) - cache[separator] = c - end - return lpegmatch(c,str) -end - ---~ from roberto's site: - -local function f2(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end -local function f3(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end -local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end - -local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 - -patterns.utf8byte = utf8byte - ---~ local str = " a b c d " - ---~ local s = lpeg.stripper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.keeper(lpeg.R("az")) print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.stripper("ab") print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.keeper("ab") print("["..lpegmatch(s,str).."]") - -local cache = { } - -function lpeg.stripper(str) - if type(str) == "string" then - local s = cache[str] - if not s then - s = Cs(((S(str)^1)/"" + 1)^0) - cache[str] = s - end - return s - else - return Cs(((str^1)/"" + 1)^0) - end -end - -local cache = { } - -function lpeg.keeper(str) - if type(str) == "string" then - local s = cache[str] - if not s then - s = Cs((((1-S(str))^1)/"" + 1)^0) - cache[str] = s - end - return s - else - return Cs((((1-str)^1)/"" + 1)^0) - end -end - -function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(anything^0) -end - -function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * endofstring)^0) -end - --- Just for fun I looked at the used bytecode and --- p = (p and p + pp) or pp gets one more (testset). - -function lpeg.replacer(one,two,makefunction) - local pattern - if type(one) == "table" then - local no = #one - local p = P(false) - if no == 0 then - for k, v in next, one do - p = p + P(k) / v - end - pattern = Cs((p + 1)^0) - elseif no == 1 then - local o = one[1] - one, two = P(o[1]), o[2] - -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two + 1)^0) - else - for i=1,no do - local o = one[i] - p = p + P(o[1]) / o[2] - end - pattern = Cs((p + 1)^0) - end - else - one = P(one) - two = two or "" - -- pattern = Cs(((1-one)^1 + one/two)^0) - pattern = Cs((one/two +1)^0) - end - if makefunction then - return function(str) - return lpegmatch(pattern,str) - end - else - return pattern - end -end - -function lpeg.finder(lst,makefunction) - local pattern - if type(lst) == "table" then - local p = P(false) - for i=1,#lst do - p = p + P(lst[i]) - end - pattern = (p + 1)^0 - else - pattern = (P(lst) + 1)^0 - end - if makefunction then - return function(str) - return lpegmatch(pattern,str) - end - else - return pattern - end -end - --- print(lpeg.match(lpeg.replacer("e","a"),"test test")) --- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) --- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) - -local splitters_f, splitters_s = { }, { } - -function lpeg.firstofsplit(separator) -- always return value - local splitter = splitters_f[separator] - if not splitter then - separator = P(separator) - splitter = C((1 - separator)^0) - splitters_f[separator] = splitter - end - return splitter -end - -function lpeg.secondofsplit(separator) -- nil if not split - local splitter = splitters_s[separator] - if not splitter then - separator = P(separator) - splitter = (1 - separator)^0 * separator * C(anything^0) - splitters_s[separator] = splitter - end - return splitter -end - -function lpeg.balancer(left,right) - left, right = P(left), P(right) - return P { left * ((1 - left - right) + V(1))^0 * right } -end - ---~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) ---~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty ---~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) ---~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) ---~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty ---~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) ---~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) ---~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) - ---~ -- slower: ---~ ---~ function lpeg.counter(pattern) ---~ local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end + lpeg.anything)^0 ---~ return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end ---~ end - -local nany = utf8char/"" - -function lpeg.counter(pattern) - pattern = Cs((P(pattern)/" " + nany)^0) - return function(str) - return #lpegmatch(pattern,str) - end -end - -if utfgmatch then - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local n = 0 - for _ in utfgmatch(str,what) do - n = n + 1 - end - return n - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) - end - end - -else - - local cache = { } - - function lpeg.count(str,what) -- replaces string.count - if type(what) == "string" then - local p = cache[what] - if not p then - p = Cs((P(what)/" " + nany)^0) - cache[p] = p - end - return #lpegmatch(p,str) - else -- 4 times slower but still faster than / function - return #lpegmatch(Cs((P(what)/" " + nany)^0),str) - end - end - -end - -local patterns_escapes = { -- also defines in l-string - ["%"] = "%%", - ["."] = "%.", - ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", - ["["] = "%[", ["]"] = "%]", - ["("] = "%)", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} - -local simple_escapes = { -- also defines in l-string - ["-"] = "%-", - ["."] = "%.", - ["?"] = ".", - ["*"] = ".*", -} - -local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) -local s = Cs((S("-.+*%()[]") / simple_escapes + anything)^0) - -function string.escapedpattern(str,simple) - return lpegmatch(simple and s or p,str) -end - --- utf extensies - -lpeg.UP = lpeg.P - -if utfcharacters then - - function lpeg.US(str) - local p = P(false) - for uc in utfcharacters(str) do - p = p + P(uc) - end - return p - end - - -elseif utfgmatch then - - function lpeg.US(str) - local p = P(false) - for uc in utfgmatch(str,".") do - p = p + P(uc) - end - return p - end - -else - - function lpeg.US(str) - local p = P(false) - local f = function(uc) - p = p + P(uc) - end - lpegmatch((utf8char/f)^0,str) - return p - end - -end - -local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture - -local utfchar = unicode and unicode.utf8 and unicode.utf8.char - -function lpeg.UR(str,more) - local first, last - if type(str) == "number" then - first = str - last = more or first - else - first, last = lpegmatch(range,str) - if not last then - return P(str) - end - end - if first == last then - return P(str) - elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium - local p = P(false) - for i=first,last do - p = p + P(utfchar(i)) - end - return p -- nil when invalid range - else - local f = function(b) - return b >= first and b <= last - end - -- tricky, these nested captures - return utf8byte / f -- nil when invalid range - end -end - --- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) - ---~ lpeg.print(lpeg.R("ab","cd","gh")) ---~ lpeg.print(lpeg.P("a","b","c")) ---~ lpeg.print(lpeg.S("a","b","c")) - ---~ print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) ---~ print(lpeg.count("äáàa",lpeg.UP("áà"))) ---~ print(lpeg.count("äáàa",lpeg.US("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("aá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) - -function lpeg.is_lpeg(p) - return p and lpegtype(p) == "pattern" -end - -function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order - if type(list) ~= "table" then - list = { list, ... } - end - -- table.sort(list) -- longest match first - local p = P(list[1]) - for l=2,#list do - p = p + P(list[l]) - end - return p -end - --- For the moment here, but it might move to utilities. Beware, we need to --- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we --- loop back from the end cq. prepend. - -local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency! - -function lpeg.append(list,pp,delayed,checked) - local p = pp - if #list > 0 then - local keys = fastcopy(list) - sort(keys) - for i=#keys,1,-1 do - local k = keys[i] - if p then - p = P(k) + p - else - p = P(k) - end - end - elseif delayed then -- hm, it looks like the lpeg parser resolves anyway - local keys = sortedkeys(list) - if p then - for i=1,#keys,1 do - local k = keys[i] - local v = list[k] - p = P(k)/list + p - end - else - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - p = P(k) + p - else - p = P(k) - end - end - if p then - p = p / list - end - end - elseif checked then - -- problem: substitution gives a capture - local keys = sortedkeys(list) - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - if k == v then - p = P(k) + p - else - p = P(k)/v + p - end - else - if k == v then - p = P(k) - else - p = P(k)/v - end - end - end - else - local keys = sortedkeys(list) - for i=1,#keys do - local k = keys[i] - local v = list[k] - if p then - p = P(k)/v + p - else - p = P(k)/v - end - end - end - return p -end - --- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) --- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) - --- function lpeg.exact_match(words,case_insensitive) --- local pattern = concat(words) --- if case_insensitive then --- local pattern = S(upper(characters)) + S(lower(characters)) --- local list = { } --- for i=1,#words do --- list[lower(words[i])] = true --- end --- return Cmt(pattern^1, function(_,i,s) --- return list[lower(s)] and i --- end) --- else --- local pattern = S(concat(words)) --- local list = { } --- for i=1,#words do --- list[words[i]] = true --- end --- return Cmt(pattern^1, function(_,i,s) --- return list[s] and i --- end) --- end --- end - --- experiment: - -local function make(t) - local p --- for k, v in next, t do - for k, v in table.sortedhash(t) do - if not p then - if next(v) then - p = P(k) * make(v) - else - p = P(k) - end - else - if next(v) then - p = p + P(k) * make(v) - else - p = p + P(k) - end - end - end - return p -end - -function lpeg.utfchartabletopattern(list) - local tree = { } - for i=1,#list do - local t = tree - for c in gmatch(list[i],".") do - if not t[c] then - t[c] = { } - end - t = t[c] - end - end - return make(tree) -end - --- inspect ( lpeg.utfchartabletopattern { --- utfchar(0x00A0), -- nbsp --- utfchar(0x2000), -- enquad --- utfchar(0x2001), -- emquad --- utfchar(0x2002), -- enspace --- utfchar(0x2003), -- emspace --- utfchar(0x2004), -- threeperemspace --- utfchar(0x2005), -- fourperemspace --- utfchar(0x2006), -- sixperemspace --- utfchar(0x2007), -- figurespace --- utfchar(0x2008), -- punctuationspace --- utfchar(0x2009), -- breakablethinspace --- utfchar(0x200A), -- hairspace --- utfchar(0x200B), -- zerowidthspace --- utfchar(0x202F), -- narrownobreakspace --- utfchar(0x205F), -- math thinspace --- } ) - --- handy from within tex: - -local lpegmatch = lpeg.match - -local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! - -function string.tformat(fmt,...) - return format(lpegmatch(replacer,fmt),...) -end - --- strips leading and trailing spaces and collapsed all other spaces - -local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) - -function string.collapsespaces(str) - return lpegmatch(pattern,str) -end - -end -- closure - -do -- begin closure to overcome local limits and interference - if not modules then modules = { } end modules ['l-boolean'] = { version = 1.001, comment = "companion to luat-lib.mkiv", @@ -2205,25 +2190,25 @@ local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = C((noslashes^0 * slashes^1)^1) local function pathpart(name,default) - return lpegmatch(pattern,name) or default or "" + return name and lpegmatch(pattern,name) or default or "" end local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1 local function basename(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0 local function nameonly(name) - return lpegmatch(pattern,name) or name + return name and lpegmatch(pattern,name) or name end local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1 local function suffixonly(name) - return lpegmatch(pattern,name) or "" + return name and lpegmatch(pattern,name) or "" end file.pathpart = pathpart @@ -2254,7 +2239,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture local pattern_d = path * rest function file.splitname(str,splitdrive) - if splitdrive then + if not str then + -- error + elseif splitdrive then return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix else return lpegmatch(pattern_b,str) -- returns path, base, suffix @@ -2262,34 +2249,36 @@ function file.splitname(str,splitdrive) end function file.splitbase(str) - return lpegmatch(pattern_d,str) -- returns path, base+suffix + return str and lpegmatch(pattern_d,str) -- returns path, base+suffix end function file.nametotable(str,splitdrive) -- returns table - local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) - if splitdrive then - return { - path = path, - drive = drive, - subpath = subpath, - name = name, - base = base, - suffix = suffix, - } - else - return { - path = path, - name = name, - base = base, - suffix = suffix, - } + if str then + local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) + if splitdrive then + return { + path = path, + drive = drive, + subpath = subpath, + name = name, + base = base, + suffix = suffix, + } + else + return { + path = path, + name = name, + base = base, + suffix = suffix, + } + end end end local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1) function file.removesuffix(name) - return lpegmatch(pattern,name) + return name and lpegmatch(pattern,name) end -- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 @@ -2306,8 +2295,8 @@ end local suffix = period/"" * (1-period-slashes)^1 * -1 local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix) -function file.addsuffix(filename, suffix, criterium) - if not suffix or suffix == "" then +function file.addsuffix(filename,suffix,criterium) + if not filename or not suffix or suffix == "" then return filename elseif criterium == true then return filename .. "." .. suffix @@ -2353,7 +2342,7 @@ local suffix = period * (1-period-slashes)^1 * -1 local pattern = Cs((1-suffix)^0) function file.replacesuffix(name,suffix) - if suffix and suffix ~= "" then + if name and suffix and suffix ~= "" then return lpegmatch(pattern,name) .. "." .. suffix else return name @@ -2362,10 +2351,10 @@ end -- -local reslasher = lpeg.replacer(S("\\"),"/") +local reslasher = lpeg.replacer(P("\\"),"/") function file.reslash(str) - return lpegmatch(reslasher,str) + return str and lpegmatch(reslasher,str) end -- We should be able to use: @@ -2381,7 +2370,9 @@ end -- variant: function file.is_writable(name) - if lfs.isdir(name) then + if not name then + -- error + elseif lfs.isdir(name) then name = name .. "/m_t_x_t_e_s_t.tmp" local f = io.open(name,"wb") if f then @@ -2409,24 +2400,32 @@ end local readable = P("r") * Cc(true) function file.is_readable(name) - local a = attributes(name) - return a and lpegmatch(readable,a.permissions) or false + if name then + local a = attributes(name) + return a and lpegmatch(readable,a.permissions) or false + else + return false + end end file.isreadable = file.is_readable -- depricated file.iswritable = file.is_writable -- depricated function file.size(name) - local a = attributes(name) - return a and a.size or 0 + if name then + local a = attributes(name) + return a and a.size or 0 + else + return 0 + end end function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split) - return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) + return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) end function file.joinpath(tab,separator) -- table - return concat(tab,separator or io.pathseparator) -- can have trailing // + return tab and concat(tab,separator or io.pathseparator) -- can have trailing // end local stripper = Cs(P(fwslash)^0/"" * reslasher) @@ -2434,14 +2433,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon local isroot = fwslash^1 * -1 local hasroot = fwslash^1 -function file.join(...) -- rather dirty +local deslasher = lpeg.replacer(S("\\/")^1,"/") + +-- If we have a network or prefix then there is a change that we end up with two +-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2) +-- split and rebuild as url. Of course we could assume no network paths (which +-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but +-- then we still have to deal with urls ... anyhow, multiple // are never a real +-- problem but just ugly. + +function file.join(...) local lst = { ... } local one = lst[1] if lpegmatch(isnetwork,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) return one .. "/" .. two elseif lpegmatch(isroot,one) then - local two = lpegmatch(reslasher,concat(lst,"/",2)) + local two = lpegmatch(deslasher,concat(lst,"/",2)) if lpegmatch(hasroot,two) then return two else @@ -2450,7 +2458,7 @@ function file.join(...) -- rather dirty elseif one == "" then return lpegmatch(stripper,concat(lst,"/",2)) else - return lpegmatch(reslasher,concat(lst,"/")) + return lpegmatch(deslasher,concat(lst,"/")) end end @@ -2479,6 +2487,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C local absolute = fwslash function file.collapsepath(str,anchor) + if not str then + return + end if anchor and not lpegmatch(anchors,str) then str = getcurrentdir() .. "/" .. str end @@ -2488,7 +2499,6 @@ function file.collapsepath(str,anchor) return lpegmatch(reslasher,str) end local starter, oldelements = lpegmatch(splitstarter,str) --- inspect(oldelements) local newelements = { } local i = #oldelements while i > 0 do @@ -2542,11 +2552,13 @@ local whatever = P("-")^0 / "" local pattern_b = Cs(whatever * (1 - whatever * -1)^1) function file.robustname(str,strict) - str = lpegmatch(pattern_a,str) or str - if strict then - return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) - else - return str + if str then + str = lpegmatch(pattern_a,str) or str + if strict then + return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) + else + return str + end end end @@ -2554,7 +2566,9 @@ file.readdata = io.loaddata file.savedata = io.savedata function file.copy(oldname,newname) - file.savedata(newname,io.loaddata(oldname)) + if oldname and newname then + file.savedata(newname,io.loaddata(oldname)) + end end -- also rewrite previous @@ -2575,11 +2589,11 @@ lpeg.patterns.rootbased = rootbased -- ./name ../name /name c: :// name/name function file.is_qualified_path(filename) - return lpegmatch(qualified,filename) ~= nil + return filename and lpegmatch(qualified,filename) ~= nil end function file.is_rootbased_path(filename) - return lpegmatch(rootbased,filename) ~= nil + return filename and lpegmatch(rootbased,filename) ~= nil end -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end @@ -2601,8 +2615,10 @@ end -- for myself: function file.strip(name,dir) - local b, a = match(name,"^(.-)" .. dir .. "(.*)$") - return a ~= "" and a or name + if name then + local b, a = match(name,"^(.-)" .. dir .. "(.*)$") + return a ~= "" and a or name + end end -- local debuglist = { @@ -2954,7 +2970,7 @@ function io.readstring(f,n,m) f:seek("set",n) n = m end - local str = gsub(f:read(n),"%z","") + local str = gsub(f:read(n),"\000","") return str end @@ -3544,11 +3560,8 @@ if not modules then modules = { } end modules ['font-con'] = { license = "see context related readme files" } - -- some names of table entries will be changed (no _) -local utf = unicode.utf8 - local next, tostring, rawget = next, tostring, rawget local format, match, lower, gsub = string.format, string.match, string.lower, string.gsub local utfbyte = utf.byte @@ -5651,8 +5664,6 @@ if not modules then modules = { } end modules ['font-otf'] = { -- more checking against low level calls of functions -local utf = unicode.utf8 - local utfbyte = utf.byte local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip local type, next, tonumber, tostring = type, next, tonumber, tostring @@ -6092,32 +6103,32 @@ function otf.load(filename,format,sub,featurefile) starttiming(data) report_otf("file size: %s", size) enhancers.apply(data,filename,fontdata) + local packtime = { } if packdata then if cleanup > 0 then collectgarbage("collect") ---~ lua.collectgarbage() end + starttiming(packtime) enhance("pack",data,filename,nil) + stoptiming(packtime) end report_otf("saving in cache: %s",filename) data = containers.write(otf.cache, hash, data) if cleanup > 1 then collectgarbage("collect") ---~ lua.collectgarbage() end stoptiming(data) if elapsedtime then -- not in generic - report_otf("preprocessing and caching took %s seconds",elapsedtime(data)) + report_otf("preprocessing and caching took %s seconds (packtime: %s)", + elapsedtime(data),packdata and elapsedtime(packtime) or 0) end fontloader.close(fontdata) -- free memory if cleanup > 3 then collectgarbage("collect") ---~ lua.collectgarbage() end data = containers.read(otf.cache, hash) -- this frees the old table and load the sparse one if cleanup > 2 then collectgarbage("collect") ---~ lua.collectgarbage() end else data = nil @@ -6695,7 +6706,10 @@ actions["reorganize subtables"] = function(data,filename,raw) -- local name = gk.name -- - if features then + if not name then + -- in fact an error + report_otf("skipping weird lookup number %s",k) + elseif features then -- scripts, tag, ismac local f = { } for i=1,#features do @@ -7128,6 +7142,9 @@ actions["merge kern classes"] = function(data,filename,raw) if type(lookups) ~= "table" then lookups = { lookups } end + -- if offsets[1] == nil then + -- offsets[1] = "" + -- end -- we can check the max in the loop -- local maxseconds = getn(seconds) for n, s in next, firsts do @@ -7148,9 +7165,9 @@ actions["merge kern classes"] = function(data,filename,raw) if splt then local extrakerns = { } local baseoffset = (fk-1) * maxseconds - -- for sk=2,maxseconds do - -- local sv = seconds[sk] - for sk, sv in next, seconds do + for sk=2,maxseconds do -- will become 1 based in future luatex + local sv = seconds[sk] + -- for sk, sv in next, seconds do local splt = split[sv] if splt then -- redundant test local offset = offsets[baseoffset + sk] @@ -8377,7 +8394,7 @@ if not modules then modules = { } end modules ['node-inj'] = { -- This is very experimental (this will change when we have luatex > .50 and -- a few pending thingies are available. Also, Idris needs to make a few more -- test fonts. Btw, future versions of luatex will have extended glyph properties --- that can be of help. +-- that can be of help. Some optimizations can go away when we have faster machines. local next = next @@ -8864,7 +8881,7 @@ if not modules then modules = { } end modules ['font-ota'] = { -- this might become scrp-*.lua -local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat +local type = type if not trackers then trackers = { register = function() end } end @@ -9249,6 +9266,8 @@ if not modules then modules = { } end modules ['font-otn'] = { -- handle positions (we need example fonts) -- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) -- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) +-- remove some optimizations (when I have a faster machine) + --[[ldx-- <p>This module is a bit more split up that I'd like but since we also want to test @@ -12601,7 +12620,6 @@ if not modules then modules = { } end modules ['font-def'] = { -- We can overload some of the definers.functions so we don't local them. -local concat = table.concat local format, gmatch, match, find, lower, gsub = string.format, string.gmatch, string.match, string.find, string.lower, string.gsub local tostring, next = tostring, next local lpegmatch = lpeg.match diff --git a/tex/generic/context/luatex/luatex-fonts.lua b/tex/generic/context/luatex/luatex-fonts.lua index dd2c902cd..6b502cd24 100644 --- a/tex/generic/context/luatex/luatex-fonts.lua +++ b/tex/generic/context/luatex/luatex-fonts.lua @@ -22,7 +22,7 @@ if not modules then modules = { } end modules ['luatex-fonts'] = { -- -- Todo: all global namespaces in called modules will get local shortcuts. -utf = unicode.utf8 +utf = utf or unicode.utf8 if not generic_context then @@ -132,10 +132,10 @@ else -- mess up ConTeXt code for the sake of general generality. Around -- version 1.0 there will be an official api defined. + loadmodule('l-lpeg.lua') loadmodule('l-function.lua') loadmodule('l-string.lua') loadmodule('l-table.lua') - loadmodule('l-lpeg.lua') loadmodule('l-boolean.lua') loadmodule('l-math.lua') loadmodule('l-file.lua') |