diff options
Diffstat (limited to 'tex/generic')
| -rw-r--r-- | tex/generic/context/luatex/luatex-fonts-merged.lua | 2260 | ||||
| -rw-r--r-- | tex/generic/context/luatex/luatex-fonts.lua | 4 | 
2 files changed, 1141 insertions, 1123 deletions
| diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 1f402cd4f..2b1f94b14 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,789 @@  -- merged file : luatex-fonts-merged.lua  -- parent file : luatex-fonts.lua --- merge date  : 12/10/12 23:20:34 +-- merge date  : 12/24/12 20:08:31 + +do -- begin closure to overcome local limits and interference + +if not modules then modules = { } end modules ['l-lpeg'] = { +    version   = 1.001, +    comment   = "companion to luat-lib.mkiv", +    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", +    copyright = "PRAGMA ADE / ConTeXt Development Team", +    license   = "see context related readme files" +} + +-- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) + +-- move utf    -> l-unicode +-- move string -> l-string or keep it here + +local lpeg = require("lpeg") + +-- tracing (only used when we encounter a problem in integration of lpeg in luatex) + +-- some code will move to unicode and string + +local report = texio and texio.write_nl or print + +-- local lpmatch = lpeg.match +-- local lpprint = lpeg.print +-- local lpp     = lpeg.P +-- local lpr     = lpeg.R +-- local lps     = lpeg.S +-- local lpc     = lpeg.C +-- local lpb     = lpeg.B +-- local lpv     = lpeg.V +-- local lpcf    = lpeg.Cf +-- local lpcb    = lpeg.Cb +-- local lpcg    = lpeg.Cg +-- local lpct    = lpeg.Ct +-- local lpcs    = lpeg.Cs +-- local lpcc    = lpeg.Cc +-- local lpcmt   = lpeg.Cmt +-- local lpcarg  = lpeg.Carg + +-- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end + +-- function lpeg.P    (l) local p = lpp   (l) report("LPEG P =")    lpprint(l) return p end +-- function lpeg.R    (l) local p = lpr   (l) report("LPEG R =")    lpprint(l) return p end +-- function lpeg.S    (l) local p = lps   (l) report("LPEG S =")    lpprint(l) return p end +-- function lpeg.C    (l) local p = lpc   (l) report("LPEG C =")    lpprint(l) return p end +-- function lpeg.B    (l) local p = lpb   (l) report("LPEG B =")    lpprint(l) return p end +-- function lpeg.V    (l) local p = lpv   (l) report("LPEG V =")    lpprint(l) return p end +-- function lpeg.Cf   (l) local p = lpcf  (l) report("LPEG Cf =")   lpprint(l) return p end +-- function lpeg.Cb   (l) local p = lpcb  (l) report("LPEG Cb =")   lpprint(l) return p end +-- function lpeg.Cg   (l) local p = lpcg  (l) report("LPEG Cg =")   lpprint(l) return p end +-- function lpeg.Ct   (l) local p = lpct  (l) report("LPEG Ct =")   lpprint(l) return p end +-- function lpeg.Cs   (l) local p = lpcs  (l) report("LPEG Cs =")   lpprint(l) return p end +-- function lpeg.Cc   (l) local p = lpcc  (l) report("LPEG Cc =")   lpprint(l) return p end +-- function lpeg.Cmt  (l) local p = lpcmt (l) report("LPEG Cmt =")  lpprint(l) return p end +-- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end + +local type, next = type, next +local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format + +-- Beware, we predefine a bunch of patterns here and one reason for doing so +-- is that we get consistent behaviour in some of the visualizers. + +lpeg.patterns  = lpeg.patterns or { } -- so that we can share +local patterns = lpeg.patterns + +local P, R, S, V, Ct, C, Cs, Cc, Cp, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp, lpeg.Cmt +local lpegtype, lpegmatch = lpeg.type, lpeg.match + +local anything         = P(1) +local endofstring      = P(-1) +local alwaysmatched    = P(true) + +patterns.anything      = anything +patterns.endofstring   = endofstring +patterns.beginofstring = alwaysmatched +patterns.alwaysmatched = alwaysmatched + +local digit, sign      = R('09'), S('+-') +local cr, lf, crlf     = P("\r"), P("\n"), P("\r\n") +local newline          = crlf + S("\r\n") -- cr + lf +local escaped          = P("\\") * anything +local squote           = P("'") +local dquote           = P('"') +local space            = P(" ") + +local utfbom_32_be     = P('\000\000\254\255') +local utfbom_32_le     = P('\255\254\000\000') +local utfbom_16_be     = P('\255\254') +local utfbom_16_le     = P('\254\255') +local utfbom_8         = P('\239\187\191') +local utfbom           = utfbom_32_be + utfbom_32_le +                       + utfbom_16_be + utfbom_16_le +                       + utfbom_8 +local utftype          = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le  * Cc("utf-32-le") +                       + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le  * Cc("utf-16-le") +                       + utfbom_8     * Cc("utf-8")     + alwaysmatched * Cc("utf-8") -- assume utf8 +local utfoffset        = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) +                       + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) +                       + utfbom_8     * Cc(3) + Cc(0) + +local utf8next         = R("\128\191") + +patterns.utf8one       = R("\000\127") +patterns.utf8two       = R("\194\223") * utf8next +patterns.utf8three     = R("\224\239") * utf8next * utf8next +patterns.utf8four      = R("\240\244") * utf8next * utf8next * utf8next +patterns.utfbom        = utfbom +patterns.utftype       = utftype +patterns.utfoffset     = utfoffset + +local utf8char         = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four +local validutf8char    = utf8char^0 * endofstring * Cc(true) + Cc(false) + +patterns.utf8          = utf8char +patterns.utf8char      = utf8char +patterns.validutf8     = validutf8char +patterns.validutf8char = validutf8char + +local eol              = S("\n\r") +local spacer           = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) +local whitespace       = eol + spacer +local nonspacer        = 1 - spacer +local nonwhitespace    = 1 - whitespace + +patterns.eol           = eol +patterns.spacer        = spacer +patterns.whitespace    = whitespace +patterns.nonspacer     = nonspacer +patterns.nonwhitespace = nonwhitespace + +local stripper         = spacer^0 * C((spacer^0     * nonspacer^1)^0) -- from example by roberto + +----- collapser        = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) +local collapser        = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) + +patterns.stripper      = stripper +patterns.collapser     = collapser + +patterns.digit         = digit +patterns.sign          = sign +patterns.cardinal      = sign^0 * digit^1 +patterns.integer       = sign^0 * digit^1 +patterns.unsigned      = digit^0 * P('.') * digit^1 +patterns.float         = sign^0 * patterns.unsigned +patterns.cunsigned     = digit^0 * P(',') * digit^1 +patterns.cfloat        = sign^0 * patterns.cunsigned +patterns.number        = patterns.float + patterns.integer +patterns.cnumber       = patterns.cfloat + patterns.integer +patterns.oct           = P("0") * R("07")^1 +patterns.octal         = patterns.oct +patterns.HEX           = P("0x") * R("09","AF")^1 +patterns.hex           = P("0x") * R("09","af")^1 +patterns.hexadecimal   = P("0x") * R("09","AF","af")^1 +patterns.lowercase     = R("az") +patterns.uppercase     = R("AZ") +patterns.letter        = patterns.lowercase + patterns.uppercase +patterns.space         = space +patterns.tab           = P("\t") +patterns.spaceortab    = patterns.space + patterns.tab +patterns.newline       = newline +patterns.emptyline     = newline^1 +patterns.equal         = P("=") +patterns.comma         = P(",") +patterns.commaspacer   = P(",") * spacer^0 +patterns.period        = P(".") +patterns.colon         = P(":") +patterns.semicolon     = P(";") +patterns.underscore    = P("_") +patterns.escaped       = escaped +patterns.squote        = squote +patterns.dquote        = dquote +patterns.nosquote      = (escaped + (1-squote))^0 +patterns.nodquote      = (escaped + (1-dquote))^0 +patterns.unsingle      = (squote/"") * patterns.nosquote * (squote/"") -- will change to C in the middle +patterns.undouble      = (dquote/"") * patterns.nodquote * (dquote/"") -- will change to C in the middle +patterns.unquoted      = patterns.undouble + patterns.unsingle -- more often undouble +patterns.unspacer      = ((patterns.spacer^1)/"")^0 + +patterns.singlequoted  = squote * patterns.nosquote * squote +patterns.doublequoted  = dquote * patterns.nodquote * dquote +patterns.quoted        = patterns.doublequoted + patterns.singlequoted + +patterns.propername    = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) + +patterns.somecontent   = (anything - newline - space)^1 -- (utf8char - newline - space)^1 +patterns.beginline     = #(1-newline) + +local function anywhere(pattern) --slightly adapted from website +    return P { P(pattern) + 1 * V(1) } +end + +lpeg.anywhere = anywhere + +function lpeg.instringchecker(p) +    p = anywhere(p) +    return function(str) +        return lpegmatch(p,str) and true or false +    end +end + +function lpeg.splitter(pattern, action) +    return (((1-P(pattern))^1)/action+1)^0 +end + +function lpeg.tsplitter(pattern, action) +    return Ct((((1-P(pattern))^1)/action+1)^0) +end + +-- probleem: separator can be lpeg and that does not hash too well, but +-- it's quite okay as the key is then not garbage collected + +local splitters_s, splitters_m, splitters_t = { }, { }, { } + +local function splitat(separator,single) +    local splitter = (single and splitters_s[separator]) or splitters_m[separator] +    if not splitter then +        separator = P(separator) +        local other = C((1 - separator)^0) +        if single then +            local any = anything +            splitter = other * (separator * C(any^0) + "") -- ? +            splitters_s[separator] = splitter +        else +            splitter = other * (separator * other)^0 +            splitters_m[separator] = splitter +        end +    end +    return splitter +end + +local function tsplitat(separator) +    local splitter = splitters_t[separator] +    if not splitter then +        splitter = Ct(splitat(separator)) +        splitters_t[separator] = splitter +    end +    return splitter +end + +lpeg.splitat  = splitat +lpeg.tsplitat = tsplitat + +function string.splitup(str,separator) +    if not separator then +        separator = "," +    end +    return lpegmatch(splitters_m[separator] or splitat(separator),str) +end + +-- local p = splitat("->",false)  print(lpegmatch(p,"oeps->what->more"))  -- oeps what more +-- local p = splitat("->",true)   print(lpegmatch(p,"oeps->what->more"))  -- oeps what->more +-- local p = splitat("->",false)  print(lpegmatch(p,"oeps"))              -- oeps +-- local p = splitat("->",true)   print(lpegmatch(p,"oeps"))              -- oeps + +local cache = { } + +function lpeg.split(separator,str) +    local c = cache[separator] +    if not c then +        c = tsplitat(separator) +        cache[separator] = c +    end +    return lpegmatch(c,str) +end + +function string.split(str,separator) +    if separator then +        local c = cache[separator] +        if not c then +            c = tsplitat(separator) +            cache[separator] = c +        end +        return lpegmatch(c,str) +    else +        return { str } +    end +end + +local spacing  = patterns.spacer^0 * newline -- sort of strip +local empty    = spacing * Cc("") +local nonempty = Cs((1-spacing)^1) * spacing^-1 +local content  = (empty + nonempty)^1 + +patterns.textline = content + +local linesplitter = tsplitat(newline) + +patterns.linesplitter = linesplitter + +function string.splitlines(str) +    return lpegmatch(linesplitter,str) +end + +-- lpeg.splitters = cache -- no longer public + +local cache = { } + +function lpeg.checkedsplit(separator,str) +    local c = cache[separator] +    if not c then +        separator = P(separator) +        local other = C((1 - separator)^1) +        c = Ct(separator^0 * other * (separator^1 * other)^0) +        cache[separator] = c +    end +    return lpegmatch(c,str) +end + +function string.checkedsplit(str,separator) +    local c = cache[separator] +    if not c then +        separator = P(separator) +        local other = C((1 - separator)^1) +        c = Ct(separator^0 * other * (separator^1 * other)^0) +        cache[separator] = c +    end +    return lpegmatch(c,str) +end + +-- from roberto's site: + +local function f2(s) local c1, c2         = byte(s,1,2) return   c1 * 64 + c2                       -    12416 end +local function f3(s) local c1, c2, c3     = byte(s,1,3) return  (c1 * 64 + c2) * 64 + c3            -   925824 end +local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end + +local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 + +patterns.utf8byte = utf8byte + +--~ local str = " a b c d " + +--~ local s = lpeg.stripper(lpeg.R("az"))   print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper(lpeg.R("az"))     print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.stripper("ab")           print("["..lpegmatch(s,str).."]") +--~ local s = lpeg.keeper("ab")             print("["..lpegmatch(s,str).."]") + +local cache = { } + +function lpeg.stripper(str) +    if type(str) == "string" then +        local s = cache[str] +        if not s then +            s = Cs(((S(str)^1)/"" + 1)^0) +            cache[str] = s +        end +        return s +    else +        return Cs(((str^1)/"" + 1)^0) +    end +end + +local cache = { } + +function lpeg.keeper(str) +    if type(str) == "string" then +        local s = cache[str] +        if not s then +            s = Cs((((1-S(str))^1)/"" + 1)^0) +            cache[str] = s +        end +        return s +    else +        return Cs((((1-str)^1)/"" + 1)^0) +    end +end + +function lpeg.frontstripper(str) -- or pattern (yet undocumented) +    return (P(str) + P(true)) * Cs(anything^0) +end + +function lpeg.endstripper(str) -- or pattern (yet undocumented) +    return Cs((1 - P(str) * endofstring)^0) +end + +-- Just for fun I looked at the used bytecode and +-- p = (p and p + pp) or pp gets one more (testset). + +-- todo: cache when string + +function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sort the keys +    local pattern +    local u = isutf and utf8char or 1 +    if type(one) == "table" then +        local no = #one +        local p = P(false) +        if no == 0 then +            for k, v in next, one do +                p = p + P(k) / v +            end +            pattern = Cs((p + u)^0) +        elseif no == 1 then +            local o = one[1] +            one, two = P(o[1]), o[2] +         -- pattern = Cs(((1-one)^1 + one/two)^0) +            pattern = Cs((one/two + u)^0) +        else +            for i=1,no do +                local o = one[i] +                p = p + P(o[1]) / o[2] +            end +            pattern = Cs((p + u)^0) +        end +    else +        pattern = Cs((P(one)/(two or "") + u)^0) +    end +    if makefunction then +        return function(str) +            return lpegmatch(pattern,str) +        end +    else +        return pattern +    end +end + +function lpeg.finder(lst,makefunction) +    local pattern +    if type(lst) == "table" then +        pattern = P(false) +        if #lst == 0 then +            for k, v in next, lst do +                pattern = pattern + P(k) -- ignore key, so we can use a replacer table +            end +        else +            for i=1,#lst do +                pattern = pattern + P(lst[i]) +            end +        end +    else +        pattern = P(lst) +    end +    pattern = (1-pattern)^0 * pattern +    if makefunction then +        return function(str) +            return lpegmatch(pattern,str) +        end +    else +        return pattern +    end +end + +-- print(lpeg.match(lpeg.replacer("e","a"),"test test")) +-- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) +-- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) + +local splitters_f, splitters_s = { }, { } + +function lpeg.firstofsplit(separator) -- always return value +    local splitter = splitters_f[separator] +    if not splitter then +        separator = P(separator) +        splitter = C((1 - separator)^0) +        splitters_f[separator] = splitter +    end +    return splitter +end + +function lpeg.secondofsplit(separator) -- nil if not split +    local splitter = splitters_s[separator] +    if not splitter then +        separator = P(separator) +        splitter = (1 - separator)^0 * separator * C(anything^0) +        splitters_s[separator] = splitter +    end +    return splitter +end + +function lpeg.balancer(left,right) +    left, right = P(left), P(right) +    return P { left * ((1 - left - right) + V(1))^0 * right } +end + +-- print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) +-- print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty +-- print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) +-- print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) +-- print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty +-- print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) +-- print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) +-- print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) + +-- -- slower: +-- +-- function lpeg.counter(pattern) +--     local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end  + lpeg.anything)^0 +--     return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end +-- end + +local nany = utf8char/"" + +function lpeg.counter(pattern) +    pattern = Cs((P(pattern)/" " + nany)^0) +    return function(str) +        return #lpegmatch(pattern,str) +    end +end + +-- utf extensies + +local utfcharacters = utf and utf.characters or string.utfcharacters +local utfgmatch     = unicode and unicode.utf8.gmatch +local utfchar       = utf and utf.char or (unicode and unicode.utf8 and unicode.utf8.char) + +lpeg.UP = lpeg.P + +if utfcharacters then + +    function lpeg.US(str) +        local p = P(false) +        for uc in utfcharacters(str) do +            p = p + P(uc) +        end +        return p +    end + + +elseif utfgmatch then + +    function lpeg.US(str) +        local p = P(false) +        for uc in utfgmatch(str,".") do +            p = p + P(uc) +        end +        return p +    end + +else + +    function lpeg.US(str) +        local p = P(false) +        local f = function(uc) +            p = p + P(uc) +        end +        lpegmatch((utf8char/f)^0,str) +        return p +    end + +end + +local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture + +function lpeg.UR(str,more) +    local first, last +    if type(str) == "number" then +        first = str +        last = more or first +    else +        first, last = lpegmatch(range,str) +        if not last then +            return P(str) +        end +    end +    if first == last then +        return P(str) +    elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium +        local p = P(false) +        for i=first,last do +            p = p + P(utfchar(i)) +        end +        return p -- nil when invalid range +    else +        local f = function(b) +            return b >= first and b <= last +        end +        -- tricky, these nested captures +        return utf8byte / f -- nil when invalid range +    end +end + +-- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) + +-- lpeg.print(lpeg.R("ab","cd","gh")) +-- lpeg.print(lpeg.P("a","b","c")) +-- lpeg.print(lpeg.S("a","b","c")) + +-- print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) +-- print(lpeg.count("äáàa",lpeg.UP("áà"))) +-- print(lpeg.count("äáàa",lpeg.US("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR("aá"))) +-- print(lpeg.count("äáàa",lpeg.UR("àá"))) +-- print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) + +function lpeg.is_lpeg(p) +    return p and lpegtype(p) == "pattern" +end + +function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order +    if type(list) ~= "table" then +        list = { list, ... } +    end + -- table.sort(list) -- longest match first +    local p = P(list[1]) +    for l=2,#list do +        p = p + P(list[l]) +    end +    return p +end + +-- For the moment here, but it might move to utilities. Beware, we need to +-- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we +-- loop back from the end cq. prepend. + +local sort = table.sort + +local function copyindexed(old) +    local new = { } +    for i=1,#old do +        new[i] = old +    end +    return new +end + +local function sortedkeys(tab) +    local keys, s = { }, 0 +    for key,_ in next, tab do +        s = s + 1 +        keys[s] = key +    end +    sort(keys) +    return keys +end + +function lpeg.append(list,pp,delayed,checked) +    local p = pp +    if #list > 0 then +        local keys = copyindexed(list) +        sort(keys) +        for i=#keys,1,-1 do +            local k = keys[i] +            if p then +                p = P(k) + p +            else +                p = P(k) +            end +        end +    elseif delayed then -- hm, it looks like the lpeg parser resolves anyway +        local keys = sortedkeys(list) +        if p then +            for i=1,#keys,1 do +                local k = keys[i] +                local v = list[k] +                p = P(k)/list + p +            end +        else +            for i=1,#keys do +                local k = keys[i] +                local v = list[k] +                if p then +                    p = P(k) + p +                else +                    p = P(k) +                end +            end +            if p then +                p = p / list +            end +        end +    elseif checked then +        -- problem: substitution gives a capture +        local keys = sortedkeys(list) +        for i=1,#keys do +            local k = keys[i] +            local v = list[k] +            if p then +                if k == v then +                    p = P(k) + p +                else +                    p = P(k)/v + p +                end +            else +                if k == v then +                    p = P(k) +                else +                    p = P(k)/v +                end +            end +        end +    else +        local keys = sortedkeys(list) +        for i=1,#keys do +            local k = keys[i] +            local v = list[k] +            if p then +                p = P(k)/v + p +            else +                p = P(k)/v +            end +        end +    end +    return p +end + +-- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) +-- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) + +-- function lpeg.exact_match(words,case_insensitive) +--     local pattern = concat(words) +--     if case_insensitive then +--         local pattern = S(upper(characters)) + S(lower(characters)) +--         local list = { } +--         for i=1,#words do +--             list[lower(words[i])] = true +--         end +--         return Cmt(pattern^1, function(_,i,s) +--             return list[lower(s)] and i +--         end) +--     else +--         local pattern = S(concat(words)) +--         local list = { } +--         for i=1,#words do +--             list[words[i]] = true +--         end +--         return Cmt(pattern^1, function(_,i,s) +--             return list[s] and i +--         end) +--     end +-- end + +-- experiment: + +local function make(t) +    local p +    local keys = sortedkeys(t) +    for i=1,#keys do +        local k = keys[i] +        local v = t[k] +        if not p then +            if next(v) then +                p = P(k) * make(v) +            else +                p = P(k) +            end +        else +            if next(v) then +                p = p + P(k) * make(v) +            else +                p = p + P(k) +            end +        end +    end +    return p +end + +function lpeg.utfchartabletopattern(list) -- goes to util-lpg +    local tree = { } +    for i=1,#list do +        local t = tree +        for c in gmatch(list[i],".") do +            if not t[c] then +                t[c] = { } +            end +            t = t[c] +        end +    end +    return make(tree) +end + +-- inspect ( lpeg.utfchartabletopattern { +--     utfchar(0x00A0), -- nbsp +--     utfchar(0x2000), -- enquad +--     utfchar(0x2001), -- emquad +--     utfchar(0x2002), -- enspace +--     utfchar(0x2003), -- emspace +--     utfchar(0x2004), -- threeperemspace +--     utfchar(0x2005), -- fourperemspace +--     utfchar(0x2006), -- sixperemspace +--     utfchar(0x2007), -- figurespace +--     utfchar(0x2008), -- punctuationspace +--     utfchar(0x2009), -- breakablethinspace +--     utfchar(0x200A), -- hairspace +--     utfchar(0x200B), -- zerowidthspace +--     utfchar(0x202F), -- narrownobreakspace +--     utfchar(0x205F), -- math thinspace +-- } ) + +-- a few handy ones: +-- +-- faster than find(str,"[\n\r]") when match and # > 7 and always faster when # > 3 + +patterns.containseol = lpeg.finder(eol) -- (1-eol)^0 * eol + +end -- closure  do -- begin closure to overcome local limits and interference @@ -29,40 +812,46 @@ if not modules then modules = { } end modules ['l-string'] = {  }  local string = string -local sub, gsub, find, match, gmatch, format, char, byte, rep, lower = string.sub, string.gsub, string.find, string.match, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower -local lpegmatch, S, C, Ct = lpeg.match, lpeg.S, lpeg.C, lpeg.Ct - --- some functions may disappear as they are not used anywhere +local sub, gmatch, format, char, byte, rep, lower = string.sub, string.gmatch, string.format, string.char, string.byte, string.rep, string.lower +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local P, S, C, Ct, Cc, Cs = lpeg.P, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.Cs -if not string.split then +-- Some functions are already defined in l-lpeg and maybe some from here will +-- move there (unless we also expose caches). -    -- this will be overloaded by a faster lpeg variant +-- if not string.split then +-- +--     function string.split(str,pattern) +--         local t = { } +--         if #str > 0 then +--             local n = 1 +--             for s in gmatch(str..pattern,"(.-)"..pattern) do +--                 t[n] = s +--                 n = n + 1 +--             end +--         end +--         return t +--     end +-- +-- end -    function string.split(str,pattern) -        local t = { } -        if #str > 0 then -            local n = 1 -            for s in gmatch(str..pattern,"(.-)"..pattern) do -                t[n] = s -                n = n + 1 -            end -        end -        return t -    end +-- function string.unquoted(str) +--     return (gsub(str,"^([\"\'])(.*)%1$","%2")) -- interesting pattern +-- end -end +local unquoted = patterns.squote * C(patterns.nosquote) * patterns.squote +               + patterns.dquote * C(patterns.nodquote) * patterns.dquote  function string.unquoted(str) -    return (gsub(str,"^([\"\'])(.*)%1$","%2")) +    return lpegmatch(unquoted,str) or str  end ---~ function stringunquoted(str) ---~     if find(str,"^[\'\"]") then ---~         return sub(str,2,-2) ---~     else ---~         return str ---~     end ---~ end +-- print(string.unquoted("test")) +-- print(string.unquoted([["t\"est"]])) +-- print(string.unquoted([["t\"est"x]])) +-- print(string.unquoted("\'test\'")) +-- print(string.unquoted('"test"')) +-- print(string.unquoted('"test"'))  function string.quoted(str)      return format("%q",str) -- always " @@ -85,65 +874,112 @@ function string.limit(str,n,sentinel) -- not utf proof      end  end -local space    = S(" \t\v\n") -local nospace  = 1 - space -local stripper = space^0 * C((space^0 * nospace^1)^0) -- roberto's code +local stripper  = patterns.stripper +local collapser = patterns.collapser  function string.strip(str)      return lpegmatch(stripper,str) or ""  end +function string.collapsespaces(str) +    return lpegmatch(collapser,str) or "" +end + +-- function string.is_empty(str) +--     return not find(str,"%S") +-- end + +local pattern = P(" ")^0 * P(-1) +  function string.is_empty(str) -    return not find(str,"%S") +    if str == "" then +        return true +    else +        return lpegmatch(pattern,str) and true or false +    end  end -local patterns_escapes = { -    ["%"] = "%%", -    ["."] = "%.", -    ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", -    ["["] = "%[", ["]"] = "%]", -    ["("] = "%(", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} -local simple_escapes = { -    ["-"] = "%-", -    ["."] = "%.", -    ["?"] = ".", -    ["*"] = ".*", -} +-- if not string.escapedpattern then +-- +--     local patterns_escapes = { +--         ["%"] = "%%", +--         ["."] = "%.", +--         ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", +--         ["["] = "%[", ["]"] = "%]", +--         ["("] = "%(", [")"] = "%)", +--      -- ["{"] = "%{", ["}"] = "%}" +--      -- ["^"] = "%^", ["$"] = "%$", +--     } +-- +--     local simple_escapes = { +--         ["-"] = "%-", +--         ["."] = "%.", +--         ["?"] = ".", +--         ["*"] = ".*", +--     } +-- +--     function string.escapedpattern(str,simple) +--         return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +--     end +-- +--     function string.topattern(str,lowercase,strict) +--         if str == "" then +--             return ".*" +--         else +--             str = gsub(str,".",simple_escapes) +--             if lowercase then +--                 str = lower(str) +--             end +--             if strict then +--                 return "^" .. str .. "$" +--             else +--                 return str +--             end +--         end +--     end +-- +-- end + +--- needs checking + +local anything     = patterns.anything +local allescapes   = Cc("%") * S(".-+%?()[]*") -- also {} and ^$ ? +local someescapes  = Cc("%") * S(".-+%()[]")   -- also {} and ^$ ? +local matchescapes = Cc(".") * S("*?")         -- wildcard and single match + +local pattern_a = Cs ( ( allescapes + anything )^0 ) +local pattern_b = Cs ( ( someescapes + matchescapes + anything )^0 ) +local pattern_c = Cs ( Cc("^") * ( someescapes + matchescapes + anything )^0 * Cc("$") )  function string.escapedpattern(str,simple) -    return (gsub(str,".",simple and simple_escapes or patterns_escapes)) +    return lpegmatch(simple and pattern_b or pattern_a,str)  end  function string.topattern(str,lowercase,strict)      if str == "" then          return ".*" +    elseif strict then +        str = lpegmatch(pattern_c,str)      else -        str = gsub(str,".",simple_escapes) -        if lowercase then -            str = lower(str) -        end -        if strict then -            return "^" .. str .. "$" -        else -            return str -        end +        str = lpegmatch(pattern_b,str) +    end +    if lowercase then +        return lower(str) +    else +        return str      end  end +-- print(string.escapedpattern("12+34*.tex",false)) +-- print(string.escapedpattern("12+34*.tex",true)) +-- print(string.topattern     ("12+34*.tex",false,false)) +-- print(string.topattern     ("12+34*.tex",false,true))  function string.valid(str,default)      return (type(str) == "string" and str ~= "" and str) or default or nil  end --- obsolete names: - -string.quote   = string.quoted -string.unquote = string.unquoted -  -- handy fallback  string.itself  = function(s) return s end @@ -156,6 +992,19 @@ function string.totable(str)      return lpegmatch(pattern,str)  end +-- handy from within tex: + +local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! + +function string.tformat(fmt,...) +    return format(lpegmatch(replacer,fmt),...) +end + +-- obsolete names: + +string.quote   = string.quoted +string.unquote = string.unquoted +  end -- closure  do -- begin closure to overcome local limits and interference @@ -168,68 +1017,23 @@ if not modules then modules = { } end modules ['l-table'] = {      license   = "see context related readme files"  } -local type, next, tostring, tonumber, ipairs = type, next, tostring, tonumber, ipairs +local type, next, tostring, tonumber, ipairs, select = type, next, tostring, tonumber, ipairs, select  local table, string = table, string  local concat, sort, insert, remove = table.concat, table.sort, table.insert, table.remove -local format, find, gsub, lower, dump, match = string.format, string.find, string.gsub, string.lower, string.dump, string.match +local format, lower, dump = string.format, string.lower, string.dump  local getmetatable, setmetatable = getmetatable, setmetatable  local getinfo = debug.getinfo - --- Starting with version 5.2 Lua no longer provide ipairs, which makes --- sense. As we already used the for loop and # in most places the --- impact on ConTeXt was not that large; the remaining ipairs already --- have been replaced. In a similar fashion we also hardly used pairs. --- --- Hm, actually ipairs was retained, but we no longer use it anyway. --- --- Just in case, we provide the fallbacks as discussed in Programming --- in Lua (http://www.lua.org/pil/7.3.html): - -if not ipairs then - -    -- for k, v in ipairs(t) do                ... end -    -- for k=1,#t            do local v = t[k] ... end - -    local function iterate(a,i) -        i = i + 1 -        local v = a[i] -        if v ~= nil then -            return i, v --, nil -        end -    end - -    function ipairs(a) -        return iterate, a, 0 -    end - -end - -if not pairs then - -    -- for k, v in pairs(t) do ... end -    -- for k, v in next, t  do ... end - -    function pairs(t) -        return next, t -- , nil -    end - -end - --- Also, unpack has been moved to the table table, and for compatiility --- reasons we provide both now. - -if not table.unpack then -    table.unpack = _G.unpack -elseif not unpack then -    _G.unpack = table.unpack -end +local lpegmatch, patterns = lpeg.match, lpeg.patterns +local floor = math.floor  -- extra functions, some might go (when not used) +local stripper = patterns.stripper +  function table.strip(tab)      local lst, l = { }, 0      for i=1,#tab do -        local s = gsub(tab[i],"^%s*(.-)%s*$","%1") +        local s = lpegmatch(stripper,tab[i]) or ""          if s == "" then              -- skip this one          else @@ -338,7 +1142,7 @@ local function sortedhash(t)  end  table.sortedhash  = sortedhash -table.sortedpairs = sortedhash +table.sortedpairs = sortedhash -- obsolete  function table.append(t,list)      local n = #t @@ -362,31 +1166,63 @@ function table.prepend(t, list)      return t  end +-- function table.merge(t, ...) -- first one is target +--     t = t or { } +--     local lst = { ... } +--     for i=1,#lst do +--         for k, v in next, lst[i] do +--             t[k] = v +--         end +--     end +--     return t +-- end +  function table.merge(t, ...) -- first one is target      t = t or { } -    local lst = { ... } -    for i=1,#lst do -        for k, v in next, lst[i] do +    for i=1,select("#",...) do +        for k, v in next, (select(i,...)) do              t[k] = v          end      end      return t  end +-- function table.merged(...) +--     local tmp, lst = { }, { ... } +--     for i=1,#lst do +--         for k, v in next, lst[i] do +--             tmp[k] = v +--         end +--     end +--     return tmp +-- end +  function table.merged(...) -    local tmp, lst = { }, { ... } -    for i=1,#lst do -        for k, v in next, lst[i] do -            tmp[k] = v +    local t = { } +    for i=1,select("#",...) do +        for k, v in next, (select(i,...)) do +            t[k] = v          end      end -    return tmp +    return t  end +-- function table.imerge(t, ...) +--     local lst, nt = { ... }, #t +--     for i=1,#lst do +--         local nst = lst[i] +--         for j=1,#nst do +--             nt = nt + 1 +--             t[nt] = nst[j] +--         end +--     end +--     return t +-- end +  function table.imerge(t, ...) -    local lst, nt = { ... }, #t -    for i=1,#lst do -        local nst = lst[i] +    local nt = #t +    for i=1,select("#",...) do +        local nst = select(i,...)          for j=1,#nst do              nt = nt + 1              t[nt] = nst[j] @@ -395,10 +1231,22 @@ function table.imerge(t, ...)      return t  end +-- function table.imerged(...) +--     local tmp, ntmp, lst = { }, 0, {...} +--     for i=1,#lst do +--         local nst = lst[i] +--         for j=1,#nst do +--             ntmp = ntmp + 1 +--             tmp[ntmp] = nst[j] +--         end +--     end +--     return tmp +-- end +  function table.imerged(...) -    local tmp, ntmp, lst = { }, 0, {...} -    for i=1,#lst do -        local nst = lst[i] +    local tmp, ntmp = { }, 0 +    for i=1,select("#",...) do +        local nst = select(i,...)          for j=1,#nst do              ntmp = ntmp + 1              tmp[ntmp] = nst[j] @@ -410,7 +1258,7 @@ end  local function fastcopy(old,metatabletoo) -- fast one      if old then          local new = { } -        for k,v in next, old do +        for k, v in next, old do              if type(v) == "table" then                  new[k] = fastcopy(v,metatabletoo) -- was just table.copy              else @@ -464,7 +1312,7 @@ end  table.fastcopy = fastcopy  table.copy     = copy -function table.derive(parent) +function table.derive(parent) -- for the moment not public      local child = { }      if parent then          setmetatable(child,{ __index = parent }) @@ -545,6 +1393,13 @@ end  -- problem: there no good number_to_string converter with the best resolution +-- probably using .. is faster than format +-- maybe split in a few cases (yes/no hexify) + +-- todo: %g faster on numbers than %s + +local propername = patterns.propername -- was find(name,"^%a[%w%_]*$") +  local function dummy() end  local function do_serialize(root,name,depth,level,indexed) @@ -554,14 +1409,14 @@ local function do_serialize(root,name,depth,level,indexed)              handle(format("%s{",depth))          else              local tn = type(name) -            if tn == "number" then -- or find(k,"^%d+$") then +            if tn == "number" then                  if hexify then                      handle(format("%s[0x%04X]={",depth,name))                  else                      handle(format("%s[%s]={",depth,name))                  end              elseif tn == "string" then -                if noquotes and not reserved[name] and find(name,"^%a[%w%_]*$") then +                if noquotes and not reserved[name] and lpegmatch(propername,name) then                      handle(format("%s%s={",depth,name))                  else                      handle(format("%s[%q]={",depth,name)) @@ -587,7 +1442,6 @@ local function do_serialize(root,name,depth,level,indexed)          if compact then              last = #root              for k=1,last do ---                 if not root[k] then                  if root[k] == nil then                      last = k - 1                      break @@ -635,7 +1489,7 @@ local function do_serialize(root,name,depth,level,indexed)                      handle(format("%s %s,",depth,tostring(v)))                  elseif t == "function" then                      if functions then -                        handle(format('%s loadstring(%q),',depth,dump(v))) +                        handle(format('%s load(%q),',depth,dump(v)))                      else                          handle(format('%s "function",',depth))                      end @@ -647,7 +1501,7 @@ local function do_serialize(root,name,depth,level,indexed)                      handle(format("%s __p__=nil,",depth))                  end              elseif t == "number" then -                if tk == "number" then -- or find(k,"^%d+$") then +                if tk == "number" then                      if hexify then                          handle(format("%s [0x%04X]=0x%04X,",depth,k,v))                      else @@ -659,7 +1513,7 @@ local function do_serialize(root,name,depth,level,indexed)                      else                          handle(format("%s [%s]=%s,",depth,tostring(k),v)) -- %.99g                      end -                elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                      if hexify then                          handle(format("%s %s=0x%04X,",depth,k,v))                      else @@ -674,7 +1528,7 @@ local function do_serialize(root,name,depth,level,indexed)                  end              elseif t == "string" then                  if reduce and tonumber(v) then -                    if tk == "number" then -- or find(k,"^%d+$") then +                    if tk == "number" then                          if hexify then                              handle(format("%s [0x%04X]=%s,",depth,k,v))                          else @@ -682,13 +1536,13 @@ local function do_serialize(root,name,depth,level,indexed)                          end                      elseif tk == "boolean" then                          handle(format("%s [%s]=%s,",depth,tostring(k),v)) -                    elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                          handle(format("%s %s=%s,",depth,k,v))                      else                          handle(format("%s [%q]=%s,",depth,k,v))                      end                  else -                    if tk == "number" then -- or find(k,"^%d+$") then +                    if tk == "number" then                          if hexify then                              handle(format("%s [0x%04X]=%q,",depth,k,v))                          else @@ -696,7 +1550,7 @@ local function do_serialize(root,name,depth,level,indexed)                          end                      elseif tk == "boolean" then                          handle(format("%s [%s]=%q,",depth,tostring(k),v)) -                    elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                          handle(format("%s %s=%q,",depth,k,v))                      else                          handle(format("%s [%q]=%q,",depth,k,v)) @@ -704,7 +1558,7 @@ local function do_serialize(root,name,depth,level,indexed)                  end              elseif t == "table" then                  if not next(v) then -                    if tk == "number" then -- or find(k,"^%d+$") then +                    if tk == "number" then                          if hexify then                              handle(format("%s [0x%04X]={},",depth,k))                          else @@ -712,7 +1566,7 @@ local function do_serialize(root,name,depth,level,indexed)                          end                      elseif tk == "boolean" then                          handle(format("%s [%s]={},",depth,tostring(k))) -                    elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                          handle(format("%s %s={},",depth,k))                      else                          handle(format("%s [%q]={},",depth,k)) @@ -720,15 +1574,15 @@ local function do_serialize(root,name,depth,level,indexed)                  elseif inline then                      local st = simple_table(v)                      if st then -                        if tk == "number" then -- or find(k,"^%d+$") then +                        if tk == "number" then                              if hexify then                                  handle(format("%s [0x%04X]={ %s },",depth,k,concat(st,", ")))                              else                                  handle(format("%s [%s]={ %s },",depth,k,concat(st,", ")))                              end -                        elseif tk == "boolean" then -- or find(k,"^%d+$") then +                        elseif tk == "boolean" then                              handle(format("%s [%s]={ %s },",depth,tostring(k),concat(st,", "))) -                        elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                        elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                              handle(format("%s %s={ %s },",depth,k,concat(st,", ")))                          else                              handle(format("%s [%q]={ %s },",depth,k,concat(st,", "))) @@ -740,15 +1594,15 @@ local function do_serialize(root,name,depth,level,indexed)                      do_serialize(v,k,depth,level+1)                  end              elseif t == "boolean" then -                if tk == "number" then -- or find(k,"^%d+$") then +                if tk == "number" then                      if hexify then                          handle(format("%s [0x%04X]=%s,",depth,k,tostring(v)))                      else                          handle(format("%s [%s]=%s,",depth,k,tostring(v)))                      end -                elseif tk == "boolean" then -- or find(k,"^%d+$") then +                elseif tk == "boolean" then                      handle(format("%s [%s]=%s,",depth,tostring(k),tostring(v))) -                elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                      handle(format("%s %s=%s,",depth,k,tostring(v)))                  else                      handle(format("%s [%q]=%s,",depth,k,tostring(v))) @@ -757,30 +1611,30 @@ local function do_serialize(root,name,depth,level,indexed)                  if functions then                      local f = getinfo(v).what == "C" and dump(dummy) or dump(v)                   -- local f = getinfo(v).what == "C" and dump(function(...) return v(...) end) or dump(v) -                    if tk == "number" then -- or find(k,"^%d+$") then +                    if tk == "number" then                          if hexify then -                            handle(format("%s [0x%04X]=loadstring(%q),",depth,k,f)) +                            handle(format("%s [0x%04X]=load(%q),",depth,k,f))                          else -                            handle(format("%s [%s]=loadstring(%q),",depth,k,f)) +                            handle(format("%s [%s]=load(%q),",depth,k,f))                          end                      elseif tk == "boolean" then -                        handle(format("%s [%s]=loadstring(%q),",depth,tostring(k),f)) -                    elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then -                        handle(format("%s %s=loadstring(%q),",depth,k,f)) +                        handle(format("%s [%s]=load(%q),",depth,tostring(k),f)) +                    elseif noquotes and not reserved[k] and lpegmatch(propername,k) then +                        handle(format("%s %s=load(%q),",depth,k,f))                      else -                        handle(format("%s [%q]=loadstring(%q),",depth,k,f)) +                        handle(format("%s [%q]=load(%q),",depth,k,f))                      end                  end              else -                if tk == "number" then -- or find(k,"^%d+$") then +                if tk == "number" then                      if hexify then                          handle(format("%s [0x%04X]=%q,",depth,k,tostring(v)))                      else                          handle(format("%s [%s]=%q,",depth,k,tostring(v)))                      end -                elseif tk == "boolean" then -- or find(k,"^%d+$") then +                elseif tk == "boolean" then                      handle(format("%s [%s]=%q,",depth,tostring(k),tostring(v))) -                elseif noquotes and not reserved[k] and find(k,"^%a[%w%_]*$") then +                elseif noquotes and not reserved[k] and lpegmatch(propername,k) then                      handle(format("%s %s=%q,",depth,k,tostring(v)))                  else                      handle(format("%s [%q]=%q,",depth,k,tostring(v))) @@ -861,14 +1715,14 @@ local function serialize(_handle,root,name,specification) -- handle wins      handle("}")  end ---~ name: ---~ ---~ true     : return     { } ---~ false    :            { } ---~ nil      : t        = { } ---~ string   : string   = { } ---~ 'return' : return     { } ---~ number   : [number] = { } +-- name: +-- +-- true     : return     { } +-- false    :            { } +-- nil      : t        = { } +-- string   : string   = { } +-- "return" : return     { } +-- number   : [number] = { }  function table.serialize(root,name,specification)      local t, n = { }, 0 @@ -957,7 +1811,7 @@ table.flattened = flattened  local function unnest(t,f) -- only used in mk, for old times sake      if not f then          -- and only relevant for token lists -        f = { } +        f = { }            -- this one can become obsolete      end      for i=1,#t do          local v = t[i] @@ -986,7 +1840,7 @@ local function are_equal(a,b,n,m) -- indexed              local ai, bi = a[i], b[i]              if ai==bi then                  -- same -            elseif type(ai)=="table" and type(bi)=="table" then +            elseif type(ai) == "table" and type(bi) == "table" then                  if not are_equal(ai,bi) then                      return false                  end @@ -1021,10 +1875,10 @@ table.are_equal = are_equal  -- maybe also make a combined one -function table.compact(t) +function table.compact(t) -- remove empty tables, assumes subtables      if t then -        for k,v in next, t do -            if not next(v) then +        for k, v in next, t do +            if not next(v) then -- no type checking                  t[k] = nil              end          end @@ -1063,7 +1917,7 @@ function table.swapped(t,s) -- hash      return n  end -function table.mirror(t) -- hash +function table.mirrored(t) -- hash      local n = { }      for k, v in next, t do          n[v] = k @@ -1086,6 +1940,17 @@ function table.reversed(t)      end  end +function table.reverse(t) +    if t then +        local n = #t +        for i=1,floor(n/2) do +            local j = n - i + 1 +            t[i], t[j] = t[j], t[i] +        end +        return t +    end +end +  function table.sequenced(t,sep) -- hash only      if t then          local s, n = { }, 0 @@ -1172,886 +2037,6 @@ end -- closure  do -- begin closure to overcome local limits and interference -if not modules then modules = { } end modules ['l-lpeg'] = { -    version   = 1.001, -    comment   = "companion to luat-lib.mkiv", -    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL", -    copyright = "PRAGMA ADE / ConTeXt Development Team", -    license   = "see context related readme files" -} - - --- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) - -local lpeg = require("lpeg") - --- tracing (only used when we encounter a problem in integration of lpeg in luatex) - --- some code will move to unicode and string - -local report = texio and texio.write_nl or print - --- local lpmatch = lpeg.match --- local lpprint = lpeg.print --- local lpp     = lpeg.P --- local lpr     = lpeg.R --- local lps     = lpeg.S --- local lpc     = lpeg.C --- local lpb     = lpeg.B --- local lpv     = lpeg.V --- local lpcf    = lpeg.Cf --- local lpcb    = lpeg.Cb --- local lpcg    = lpeg.Cg --- local lpct    = lpeg.Ct --- local lpcs    = lpeg.Cs --- local lpcc    = lpeg.Cc --- local lpcmt   = lpeg.Cmt --- local lpcarg  = lpeg.Carg - --- function lpeg.match(l,...) report("LPEG MATCH") lpprint(l) return lpmatch(l,...) end - --- function lpeg.P    (l) local p = lpp   (l) report("LPEG P =")    lpprint(l) return p end --- function lpeg.R    (l) local p = lpr   (l) report("LPEG R =")    lpprint(l) return p end --- function lpeg.S    (l) local p = lps   (l) report("LPEG S =")    lpprint(l) return p end --- function lpeg.C    (l) local p = lpc   (l) report("LPEG C =")    lpprint(l) return p end --- function lpeg.B    (l) local p = lpb   (l) report("LPEG B =")    lpprint(l) return p end --- function lpeg.V    (l) local p = lpv   (l) report("LPEG V =")    lpprint(l) return p end --- function lpeg.Cf   (l) local p = lpcf  (l) report("LPEG Cf =")   lpprint(l) return p end --- function lpeg.Cb   (l) local p = lpcb  (l) report("LPEG Cb =")   lpprint(l) return p end --- function lpeg.Cg   (l) local p = lpcg  (l) report("LPEG Cg =")   lpprint(l) return p end --- function lpeg.Ct   (l) local p = lpct  (l) report("LPEG Ct =")   lpprint(l) return p end --- function lpeg.Cs   (l) local p = lpcs  (l) report("LPEG Cs =")   lpprint(l) return p end --- function lpeg.Cc   (l) local p = lpcc  (l) report("LPEG Cc =")   lpprint(l) return p end --- function lpeg.Cmt  (l) local p = lpcmt (l) report("LPEG Cmt =")  lpprint(l) return p end --- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end - -local type, next = type, next -local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format - --- Beware, we predefine a bunch of patterns here and one reason for doing so --- is that we get consistent behaviour in some of the visualizers. - -lpeg.patterns  = lpeg.patterns or { } -- so that we can share -local patterns = lpeg.patterns - -local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp -local lpegtype, lpegmatch = lpeg.type, lpeg.match - -local utfcharacters    = string.utfcharacters -local utfgmatch        = unicode and unicode.utf8.gmatch - -local anything         = P(1) -local endofstring      = P(-1) -local alwaysmatched    = P(true) - -patterns.anything      = anything -patterns.endofstring   = endofstring -patterns.beginofstring = alwaysmatched -patterns.alwaysmatched = alwaysmatched - -local digit, sign      = R('09'), S('+-') -local cr, lf, crlf     = P("\r"), P("\n"), P("\r\n") -local newline          = crlf + S("\r\n") -- cr + lf -local escaped          = P("\\") * anything -local squote           = P("'") -local dquote           = P('"') -local space            = P(" ") - -local utfbom_32_be     = P('\000\000\254\255') -local utfbom_32_le     = P('\255\254\000\000') -local utfbom_16_be     = P('\255\254') -local utfbom_16_le     = P('\254\255') -local utfbom_8         = P('\239\187\191') -local utfbom           = utfbom_32_be + utfbom_32_le -                       + utfbom_16_be + utfbom_16_le -                       + utfbom_8 -local utftype          = utfbom_32_be / "utf-32-be" + utfbom_32_le  / "utf-32-le" -                       + utfbom_16_be / "utf-16-be" + utfbom_16_le  / "utf-16-le" -                       + utfbom_8     / "utf-8"     + alwaysmatched / "unknown" - -local utf8next         = R("\128\191") - -patterns.utf8one       = R("\000\127") -patterns.utf8two       = R("\194\223") * utf8next -patterns.utf8three     = R("\224\239") * utf8next * utf8next -patterns.utf8four      = R("\240\244") * utf8next * utf8next * utf8next -patterns.utfbom        = utfbom -patterns.utftype       = utftype - -local utf8char         = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four -local validutf8char    = utf8char^0 * endofstring * Cc(true) + Cc(false) - -patterns.utf8          = utf8char -patterns.utf8char      = utf8char -patterns.validutf8     = validutf8char -patterns.validutf8char = validutf8char - -local eol              = S("\n\r") -local spacer           = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto) -local whitespace       = eol + spacer - -patterns.digit         = digit -patterns.sign          = sign -patterns.cardinal      = sign^0 * digit^1 -patterns.integer       = sign^0 * digit^1 -patterns.unsigned      = digit^0 * P('.') * digit^1 -patterns.float         = sign^0 * patterns.unsigned -patterns.cunsigned     = digit^0 * P(',') * digit^1 -patterns.cfloat        = sign^0 * patterns.cunsigned -patterns.number        = patterns.float + patterns.integer -patterns.cnumber       = patterns.cfloat + patterns.integer -patterns.oct           = P("0") * R("07")^1 -patterns.octal         = patterns.oct -patterns.HEX           = P("0x") * R("09","AF")^1 -patterns.hex           = P("0x") * R("09","af")^1 -patterns.hexadecimal   = P("0x") * R("09","AF","af")^1 -patterns.lowercase     = R("az") -patterns.uppercase     = R("AZ") -patterns.letter        = patterns.lowercase + patterns.uppercase -patterns.space         = space -patterns.tab           = P("\t") -patterns.spaceortab    = patterns.space + patterns.tab -patterns.eol           = eol -patterns.spacer        = spacer -patterns.whitespace    = whitespace -patterns.newline       = newline -patterns.emptyline     = newline^1 -patterns.nonspacer     = 1 - spacer -patterns.nonwhitespace = 1 - whitespace -patterns.equal         = P("=") -patterns.comma         = P(",") -patterns.commaspacer   = P(",") * spacer^0 -patterns.period        = P(".") -patterns.colon         = P(":") -patterns.semicolon     = P(";") -patterns.underscore    = P("_") -patterns.escaped       = escaped -patterns.squote        = squote -patterns.dquote        = dquote -patterns.nosquote      = (escaped + (1-squote))^0 -patterns.nodquote      = (escaped + (1-dquote))^0 -patterns.unsingle      = (squote/"") * patterns.nosquote * (squote/"") -patterns.undouble      = (dquote/"") * patterns.nodquote * (dquote/"") -patterns.unquoted      = patterns.undouble + patterns.unsingle -- more often undouble -patterns.unspacer      = ((patterns.spacer^1)/"")^0 - -patterns.singlequoted  = squote * patterns.nosquote * squote -patterns.doublequoted  = dquote * patterns.nodquote * dquote -patterns.quoted        = patterns.doublequoted + patterns.singlequoted - -patterns.somecontent   = (anything - newline - space)^1 -- (utf8char - newline - space)^1 -patterns.beginline     = #(1-newline) - --- print(string.unquoted("test")) --- print(string.unquoted([["t\"est"]])) --- print(string.unquoted([["t\"est"x]])) --- print(string.unquoted("\'test\'")) --- print(string.unquoted('"test"')) --- print(string.unquoted('"test"')) - -local function anywhere(pattern) --slightly adapted from website -    return P { P(pattern) + 1 * V(1) } -end - -lpeg.anywhere = anywhere - -function lpeg.instringchecker(p) -    p = anywhere(p) -    return function(str) -        return lpegmatch(p,str) and true or false -    end -end - -function lpeg.splitter(pattern, action) -    return (((1-P(pattern))^1)/action+1)^0 -end - -function lpeg.tsplitter(pattern, action) -    return Ct((((1-P(pattern))^1)/action+1)^0) -end - --- probleem: separator can be lpeg and that does not hash too well, but --- it's quite okay as the key is then not garbage collected - -local splitters_s, splitters_m, splitters_t = { }, { }, { } - -local function splitat(separator,single) -    local splitter = (single and splitters_s[separator]) or splitters_m[separator] -    if not splitter then -        separator = P(separator) -        local other = C((1 - separator)^0) -        if single then -            local any = anything -            splitter = other * (separator * C(any^0) + "") -- ? -            splitters_s[separator] = splitter -        else -            splitter = other * (separator * other)^0 -            splitters_m[separator] = splitter -        end -    end -    return splitter -end - -local function tsplitat(separator) -    local splitter = splitters_t[separator] -    if not splitter then -        splitter = Ct(splitat(separator)) -        splitters_t[separator] = splitter -    end -    return splitter -end - -lpeg.splitat  = splitat -lpeg.tsplitat = tsplitat - -function string.splitup(str,separator) -    if not separator then -        separator = "," -    end -    return lpegmatch(splitters_m[separator] or splitat(separator),str) -end - ---~ local p = splitat("->",false)  print(lpegmatch(p,"oeps->what->more"))  -- oeps what more ---~ local p = splitat("->",true)   print(lpegmatch(p,"oeps->what->more"))  -- oeps what->more ---~ local p = splitat("->",false)  print(lpegmatch(p,"oeps"))              -- oeps ---~ local p = splitat("->",true)   print(lpegmatch(p,"oeps"))              -- oeps - -local cache = { } - -function lpeg.split(separator,str) -    local c = cache[separator] -    if not c then -        c = tsplitat(separator) -        cache[separator] = c -    end -    return lpegmatch(c,str) -end - -function string.split(str,separator) -    if separator then -        local c = cache[separator] -        if not c then -            c = tsplitat(separator) -            cache[separator] = c -        end -        return lpegmatch(c,str) -    else -        return { str } -    end -end - -local spacing  = patterns.spacer^0 * newline -- sort of strip -local empty    = spacing * Cc("") -local nonempty = Cs((1-spacing)^1) * spacing^-1 -local content  = (empty + nonempty)^1 - -patterns.textline = content - ---~ local linesplitter = Ct(content^0) ---~ ---~ function string.splitlines(str) ---~     return lpegmatch(linesplitter,str) ---~ end - -local linesplitter = tsplitat(newline) - -patterns.linesplitter = linesplitter - -function string.splitlines(str) -    return lpegmatch(linesplitter,str) -end - -local utflinesplitter = utfbom^-1 * tsplitat(newline) - -patterns.utflinesplitter = utflinesplitter - -function string.utfsplitlines(str) -    return lpegmatch(utflinesplitter,str or "") -end - -local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0) -local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0) - -function string.utfsplit(str,ignorewhitespace) -- new -    if ignorewhitespace then -        return lpegmatch(utfcharsplitter_iws,str or "") -    else -        return lpegmatch(utfcharsplitter_ows,str or "") -    end -end - --- inspect(string.utfsplit("a b c d")) --- inspect(string.utfsplit("a b c d",true)) - --- -- alternative 1: 0.77 --- --- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0) --- --- function string.utflength(str) ---     return #lpegmatch(utfcharcounter,str or "") --- end --- --- -- alternative 2: 1.70 --- --- local n = 0 --- --- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow --- --- function string.utflength(str) ---     n = 0 ---     lpegmatch(utfcharcounter,str or "") ---     return n --- end --- --- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047) - -local n = 0 - -local utfcharcounter = utfbom^-1 * Cs ( ( -    Cp() * (lpeg.patterns.utf8one  )^1 * Cp() / function(f,t) n = n +  t - f    end -  + Cp() * (lpeg.patterns.utf8two  )^1 * Cp() / function(f,t) n = n + (t - f)/2 end -  + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end -  + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end -)^0 ) - -function string.utflength(str) -    n = 0 -    lpegmatch(utfcharcounter,str or "") -    return n -end - ---~ lpeg.splitters = cache -- no longer public - -local cache = { } - -function lpeg.checkedsplit(separator,str) -    local c = cache[separator] -    if not c then -        separator = P(separator) -        local other = C((1 - separator)^1) -        c = Ct(separator^0 * other * (separator^1 * other)^0) -        cache[separator] = c -    end -    return lpegmatch(c,str) -end - -function string.checkedsplit(str,separator) -    local c = cache[separator] -    if not c then -        separator = P(separator) -        local other = C((1 - separator)^1) -        c = Ct(separator^0 * other * (separator^1 * other)^0) -        cache[separator] = c -    end -    return lpegmatch(c,str) -end - ---~ from roberto's site: - -local function f2(s) local c1, c2         = byte(s,1,2) return   c1 * 64 + c2                       -    12416 end -local function f3(s) local c1, c2, c3     = byte(s,1,3) return  (c1 * 64 + c2) * 64 + c3            -   925824 end -local function f4(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end - -local utf8byte = patterns.utf8one/byte + patterns.utf8two/f2 + patterns.utf8three/f3 + patterns.utf8four/f4 - -patterns.utf8byte = utf8byte - ---~ local str = " a b c d " - ---~ local s = lpeg.stripper(lpeg.R("az"))   print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.keeper(lpeg.R("az"))     print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.stripper("ab")           print("["..lpegmatch(s,str).."]") ---~ local s = lpeg.keeper("ab")             print("["..lpegmatch(s,str).."]") - -local cache = { } - -function lpeg.stripper(str) -    if type(str) == "string" then -        local s = cache[str] -        if not s then -            s = Cs(((S(str)^1)/"" + 1)^0) -            cache[str] = s -        end -        return s -    else -        return Cs(((str^1)/"" + 1)^0) -    end -end - -local cache = { } - -function lpeg.keeper(str) -    if type(str) == "string" then -        local s = cache[str] -        if not s then -            s = Cs((((1-S(str))^1)/"" + 1)^0) -            cache[str] = s -        end -        return s -    else -        return Cs((((1-str)^1)/"" + 1)^0) -    end -end - -function lpeg.frontstripper(str) -- or pattern (yet undocumented) -    return (P(str) + P(true)) * Cs(anything^0) -end - -function lpeg.endstripper(str) -- or pattern (yet undocumented) -    return Cs((1 - P(str) * endofstring)^0) -end - --- Just for fun I looked at the used bytecode and --- p = (p and p + pp) or pp gets one more (testset). - -function lpeg.replacer(one,two,makefunction) -    local pattern -    if type(one) == "table" then -        local no = #one -        local p = P(false) -        if no == 0 then -            for k, v in next, one do -                p = p + P(k) / v -            end -            pattern = Cs((p + 1)^0) -        elseif no == 1 then -            local o = one[1] -            one, two = P(o[1]), o[2] -         -- pattern = Cs(((1-one)^1 + one/two)^0) -            pattern = Cs((one/two + 1)^0) -        else -            for i=1,no do -                local o = one[i] -                p = p + P(o[1]) / o[2] -            end -            pattern = Cs((p + 1)^0) -        end -    else -        one = P(one) -        two = two or "" -     -- pattern = Cs(((1-one)^1 + one/two)^0) -        pattern = Cs((one/two +1)^0) -    end -    if makefunction then -        return function(str) -            return lpegmatch(pattern,str) -        end -    else -        return pattern -    end -end - -function lpeg.finder(lst,makefunction) -    local pattern -    if type(lst) == "table" then -        local p = P(false) -        for i=1,#lst do -            p = p + P(lst[i]) -        end -        pattern = (p + 1)^0 -    else -        pattern = (P(lst) + 1)^0 -    end -    if makefunction then -        return function(str) -            return lpegmatch(pattern,str) -        end -    else -        return pattern -    end -end - --- print(lpeg.match(lpeg.replacer("e","a"),"test test")) --- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test")) --- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test")) - -local splitters_f, splitters_s = { }, { } - -function lpeg.firstofsplit(separator) -- always return value -    local splitter = splitters_f[separator] -    if not splitter then -        separator = P(separator) -        splitter = C((1 - separator)^0) -        splitters_f[separator] = splitter -    end -    return splitter -end - -function lpeg.secondofsplit(separator) -- nil if not split -    local splitter = splitters_s[separator] -    if not splitter then -        separator = P(separator) -        splitter = (1 - separator)^0 * separator * C(anything^0) -        splitters_s[separator] = splitter -    end -    return splitter -end - -function lpeg.balancer(left,right) -    left, right = P(left), P(right) -    return P { left * ((1 - left - right) + V(1))^0 * right } -end - ---~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de")) ---~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty ---~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc")) ---~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de")) ---~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty ---~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc")) ---~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc")) ---~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc")) - ---~ -- slower: ---~ ---~ function lpeg.counter(pattern) ---~     local n, pattern = 0, (lpeg.P(pattern)/function() n = n + 1 end  + lpeg.anything)^0 ---~     return function(str) n = 0 ; lpegmatch(pattern,str) ; return n end ---~ end - -local nany = utf8char/"" - -function lpeg.counter(pattern) -    pattern = Cs((P(pattern)/" " + nany)^0) -    return function(str) -        return #lpegmatch(pattern,str) -    end -end - -if utfgmatch then - -    function lpeg.count(str,what) -- replaces string.count -        if type(what) == "string" then -            local n = 0 -            for _ in utfgmatch(str,what) do -                n = n + 1 -            end -            return n -        else -- 4 times slower but still faster than / function -            return #lpegmatch(Cs((P(what)/" " + nany)^0),str) -        end -    end - -else - -    local cache = { } - -    function lpeg.count(str,what) -- replaces string.count -        if type(what) == "string" then -            local p = cache[what] -            if not p then -                p = Cs((P(what)/" " + nany)^0) -                cache[p] = p -            end -            return #lpegmatch(p,str) -        else -- 4 times slower but still faster than / function -            return #lpegmatch(Cs((P(what)/" " + nany)^0),str) -        end -    end - -end - -local patterns_escapes = { -- also defines in l-string -    ["%"] = "%%", -    ["."] = "%.", -    ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", -    ["["] = "%[", ["]"] = "%]", -    ["("] = "%)", [")"] = "%)", - -- ["{"] = "%{", ["}"] = "%}" - -- ["^"] = "%^", ["$"] = "%$", -} - -local simple_escapes = { -- also defines in l-string -    ["-"] = "%-", -    ["."] = "%.", -    ["?"] = ".", -    ["*"] = ".*", -} - -local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0) -local s = Cs((S("-.+*%()[]") / simple_escapes   + anything)^0) - -function string.escapedpattern(str,simple) -    return lpegmatch(simple and s or p,str) -end - --- utf extensies - -lpeg.UP = lpeg.P - -if utfcharacters then - -    function lpeg.US(str) -        local p = P(false) -        for uc in utfcharacters(str) do -            p = p + P(uc) -        end -        return p -    end - - -elseif utfgmatch then - -    function lpeg.US(str) -        local p = P(false) -        for uc in utfgmatch(str,".") do -            p = p + P(uc) -        end -        return p -    end - -else - -    function lpeg.US(str) -        local p = P(false) -        local f = function(uc) -            p = p + P(uc) -        end -        lpegmatch((utf8char/f)^0,str) -        return p -    end - -end - -local range = utf8byte * utf8byte + Cc(false) -- utf8byte is already a capture - -local utfchar = unicode and unicode.utf8 and unicode.utf8.char - -function lpeg.UR(str,more) -    local first, last -    if type(str) == "number" then -        first = str -        last = more or first -    else -        first, last = lpegmatch(range,str) -        if not last then -            return P(str) -        end -    end -    if first == last then -        return P(str) -    elseif utfchar and (last - first < 8) then -- a somewhat arbitrary criterium -        local p = P(false) -        for i=first,last do -            p = p + P(utfchar(i)) -        end -        return p -- nil when invalid range -    else -        local f = function(b) -            return b >= first and b <= last -        end -        -- tricky, these nested captures -        return utf8byte / f -- nil when invalid range -    end -end - --- print(lpeg.match(lpeg.Cs((C(lpeg.UR("αω"))/{ ["χ"] = "OEPS" })^0),"αωχαω")) - ---~ lpeg.print(lpeg.R("ab","cd","gh")) ---~ lpeg.print(lpeg.P("a","b","c")) ---~ lpeg.print(lpeg.S("a","b","c")) - ---~ print(lpeg.count("äáàa",lpeg.P("á") + lpeg.P("à"))) ---~ print(lpeg.count("äáàa",lpeg.UP("áà"))) ---~ print(lpeg.count("äáàa",lpeg.US("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("aá"))) ---~ print(lpeg.count("äáàa",lpeg.UR("àá"))) ---~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF))) - -function lpeg.is_lpeg(p) -    return p and lpegtype(p) == "pattern" -end - -function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order -    if type(list) ~= "table" then -        list = { list, ... } -    end - -- table.sort(list) -- longest match first -    local p = P(list[1]) -    for l=2,#list do -        p = p + P(list[l]) -    end -    return p -end - --- For the moment here, but it might move to utilities. Beware, we need to --- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we --- loop back from the end cq. prepend. - -local sort, fastcopy, sortedkeys = table.sort, table.fastcopy, table.sortedkeys -- dependency! - -function lpeg.append(list,pp,delayed,checked) -    local p = pp -    if #list > 0 then -        local keys = fastcopy(list) -        sort(keys) -        for i=#keys,1,-1 do -            local k = keys[i] -            if p then -                p = P(k) + p -            else -                p = P(k) -            end -        end -    elseif delayed then -- hm, it looks like the lpeg parser resolves anyway -        local keys = sortedkeys(list) -        if p then -            for i=1,#keys,1 do -                local k = keys[i] -                local v = list[k] -                p = P(k)/list + p -            end -        else -            for i=1,#keys do -                local k = keys[i] -                local v = list[k] -                if p then -                    p = P(k) + p -                else -                    p = P(k) -                end -            end -            if p then -                p = p / list -            end -        end -    elseif checked then -        -- problem: substitution gives a capture -        local keys = sortedkeys(list) -        for i=1,#keys do -            local k = keys[i] -            local v = list[k] -            if p then -                if k == v then -                    p = P(k) + p -                else -                    p = P(k)/v + p -                end -            else -                if k == v then -                    p = P(k) -                else -                    p = P(k)/v -                end -            end -        end -    else -        local keys = sortedkeys(list) -        for i=1,#keys do -            local k = keys[i] -            local v = list[k] -            if p then -                p = P(k)/v + p -            else -                p = P(k)/v -            end -        end -    end -    return p -end - --- inspect(lpeg.append({ a = "1", aa = "1", aaa = "1" } ,nil,true)) --- inspect(lpeg.append({ ["degree celsius"] = "1", celsius = "1", degree = "1" } ,nil,true)) - --- function lpeg.exact_match(words,case_insensitive) ---     local pattern = concat(words) ---     if case_insensitive then ---         local pattern = S(upper(characters)) + S(lower(characters)) ---         local list = { } ---         for i=1,#words do ---             list[lower(words[i])] = true ---         end ---         return Cmt(pattern^1, function(_,i,s) ---             return list[lower(s)] and i ---         end) ---     else ---         local pattern = S(concat(words)) ---         local list = { } ---         for i=1,#words do ---             list[words[i]] = true ---         end ---         return Cmt(pattern^1, function(_,i,s) ---             return list[s] and i ---         end) ---     end --- end - --- experiment: - -local function make(t) -    local p ---     for k, v in next, t do -    for k, v in table.sortedhash(t) do -        if not p then -            if next(v) then -                p = P(k) * make(v) -            else -                p = P(k) -            end -        else -            if next(v) then -                p = p + P(k) * make(v) -            else -                p = p + P(k) -            end -        end -    end -    return p -end - -function lpeg.utfchartabletopattern(list) -    local tree = { } -    for i=1,#list do -        local t = tree -        for c in gmatch(list[i],".") do -            if not t[c] then -                t[c] = { } -            end -            t = t[c] -        end -    end -    return make(tree) -end - --- inspect ( lpeg.utfchartabletopattern { ---     utfchar(0x00A0), -- nbsp ---     utfchar(0x2000), -- enquad ---     utfchar(0x2001), -- emquad ---     utfchar(0x2002), -- enspace ---     utfchar(0x2003), -- emspace ---     utfchar(0x2004), -- threeperemspace ---     utfchar(0x2005), -- fourperemspace ---     utfchar(0x2006), -- sixperemspace ---     utfchar(0x2007), -- figurespace ---     utfchar(0x2008), -- punctuationspace ---     utfchar(0x2009), -- breakablethinspace ---     utfchar(0x200A), -- hairspace ---     utfchar(0x200B), -- zerowidthspace ---     utfchar(0x202F), -- narrownobreakspace ---     utfchar(0x205F), -- math thinspace --- } ) - --- handy from within tex: - -local lpegmatch = lpeg.match - -local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg! - -function string.tformat(fmt,...) -    return format(lpegmatch(replacer,fmt),...) -end - --- strips leading and trailing spaces and collapsed all other spaces - -local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0) - -function string.collapsespaces(str) -    return lpegmatch(pattern,str) -end - -end -- closure - -do -- begin closure to overcome local limits and interference -  if not modules then modules = { } end modules ['l-boolean'] = {      version   = 1.001,      comment   = "companion to luat-lib.mkiv", @@ -2205,25 +2190,25 @@ local suffix    = period/"" * (1-period-slashes)^1 * -1  local pattern = C((noslashes^0 * slashes^1)^1)  local function pathpart(name,default) -    return lpegmatch(pattern,name) or default or "" +    return name and lpegmatch(pattern,name) or default or ""  end  local pattern = (noslashes^0 * slashes)^1 * C(noslashes^1) * -1  local function basename(name) -    return lpegmatch(pattern,name) or name +    return name and lpegmatch(pattern,name) or name  end  local pattern = (noslashes^0 * slashes^1)^0 * Cs((1-suffix)^1) * suffix^0  local function nameonly(name) -    return lpegmatch(pattern,name) or name +    return name and lpegmatch(pattern,name) or name  end  local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * C(noperiod^1) * -1  local function suffixonly(name) -    return lpegmatch(pattern,name) or "" +    return name and lpegmatch(pattern,name) or ""  end  file.pathpart   = pathpart @@ -2254,7 +2239,9 @@ local pattern_c = C(drive * path) * C(base * suffix) -- trick: two extra capture  local pattern_d =           path  *   rest  function file.splitname(str,splitdrive) -    if splitdrive then +    if not str then +        -- error +    elseif splitdrive then          return lpegmatch(pattern_a,str) -- returns drive, path, base, suffix      else          return lpegmatch(pattern_b,str) -- returns path, base, suffix @@ -2262,34 +2249,36 @@ function file.splitname(str,splitdrive)  end  function file.splitbase(str) -    return lpegmatch(pattern_d,str) -- returns path, base+suffix +    return str and lpegmatch(pattern_d,str) -- returns path, base+suffix  end  function file.nametotable(str,splitdrive) -- returns table -    local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) -    if splitdrive then -        return { -            path    = path, -            drive   = drive, -            subpath = subpath, -            name    = name, -            base    = base, -            suffix  = suffix, -        } -    else -        return { -            path    = path, -            name    = name, -            base    = base, -            suffix  = suffix, -        } +    if str then +        local path, drive, subpath, name, base, suffix = lpegmatch(pattern_c,str) +        if splitdrive then +            return { +                path    = path, +                drive   = drive, +                subpath = subpath, +                name    = name, +                base    = base, +                suffix  = suffix, +            } +        else +            return { +                path    = path, +                name    = name, +                base    = base, +                suffix  = suffix, +            } +        end      end  end  local pattern = Cs(((period * noperiod^1 * -1)/"" + 1)^1)  function file.removesuffix(name) -    return lpegmatch(pattern,name) +    return name and lpegmatch(pattern,name)  end  -- local pattern = (noslashes^0 * slashes)^0 * (noperiod^1 * period)^1 * Cp() * noperiod^1 * -1 @@ -2306,8 +2295,8 @@ end  local suffix  = period/"" * (1-period-slashes)^1 * -1  local pattern = Cs((noslashes^0 * slashes^1)^0 * ((1-suffix)^1)) * Cs(suffix) -function file.addsuffix(filename, suffix, criterium) -    if not suffix or suffix == "" then +function file.addsuffix(filename,suffix,criterium) +    if not filename or not suffix or suffix == "" then          return filename      elseif criterium == true then          return filename .. "." .. suffix @@ -2353,7 +2342,7 @@ local suffix  = period * (1-period-slashes)^1 * -1  local pattern = Cs((1-suffix)^0)  function file.replacesuffix(name,suffix) -    if suffix and suffix ~= "" then +    if name and suffix and suffix ~= "" then          return lpegmatch(pattern,name) .. "." .. suffix      else          return name @@ -2362,10 +2351,10 @@ end  -- -local reslasher = lpeg.replacer(S("\\"),"/") +local reslasher = lpeg.replacer(P("\\"),"/")  function file.reslash(str) -    return lpegmatch(reslasher,str) +    return str and lpegmatch(reslasher,str)  end  -- We should be able to use: @@ -2381,7 +2370,9 @@ end  -- variant:  function file.is_writable(name) -    if lfs.isdir(name) then +    if not name then +        -- error +    elseif lfs.isdir(name) then          name = name .. "/m_t_x_t_e_s_t.tmp"          local f = io.open(name,"wb")          if f then @@ -2409,24 +2400,32 @@ end  local readable = P("r") * Cc(true)  function file.is_readable(name) -    local a = attributes(name) -    return a and lpegmatch(readable,a.permissions) or false +    if name then +        local a = attributes(name) +        return a and lpegmatch(readable,a.permissions) or false +    else +        return false +    end  end  file.isreadable = file.is_readable -- depricated  file.iswritable = file.is_writable -- depricated  function file.size(name) -    local a = attributes(name) -    return a and a.size or 0 +    if name then +        local a = attributes(name) +        return a and a.size or 0 +    else +        return 0 +    end  end  function file.splitpath(str,separator) -- string .. reslash is a bonus (we could do a direct split) -    return checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator) +    return str and checkedsplit(lpegmatch(reslasher,str),separator or io.pathseparator)  end  function file.joinpath(tab,separator) -- table -    return concat(tab,separator or io.pathseparator) -- can have trailing // +    return tab and concat(tab,separator or io.pathseparator) -- can have trailing //  end  local stripper  = Cs(P(fwslash)^0/"" * reslasher) @@ -2434,14 +2433,23 @@ local isnetwork = fwslash * fwslash * (1-fwslash) + (1-fwslash-colon)^1 * colon  local isroot    = fwslash^1 * -1  local hasroot   = fwslash^1 -function file.join(...) -- rather dirty +local deslasher = lpeg.replacer(S("\\/")^1,"/") + +-- If we have a network or prefix then there is a change that we end up with two +-- // in the middle ... we could prevent this if we (1) expand prefixes: and (2) +-- split and rebuild as url. Of course we could assume no network paths (which +-- makes sense) adn assume either mapped drives (windows) or mounts (unix) but +-- then we still have to deal with urls ... anyhow, multiple // are never a real +-- problem but just ugly. + +function file.join(...)      local lst = { ... }      local one = lst[1]      if lpegmatch(isnetwork,one) then -        local two = lpegmatch(reslasher,concat(lst,"/",2)) +        local two = lpegmatch(deslasher,concat(lst,"/",2))          return one .. "/" .. two      elseif lpegmatch(isroot,one) then -        local two = lpegmatch(reslasher,concat(lst,"/",2)) +        local two = lpegmatch(deslasher,concat(lst,"/",2))          if lpegmatch(hasroot,two) then              return two          else @@ -2450,7 +2458,7 @@ function file.join(...) -- rather dirty      elseif one == "" then          return lpegmatch(stripper,concat(lst,"/",2))      else -        return lpegmatch(reslasher,concat(lst,"/")) +        return lpegmatch(deslasher,concat(lst,"/"))      end  end @@ -2479,6 +2487,9 @@ local splitstarter = (Cs(drivespec * (bwslash/"/" + fwslash)^0) + Cc(false)) * C  local absolute     = fwslash  function file.collapsepath(str,anchor) +    if not str then +        return +    end      if anchor and not lpegmatch(anchors,str) then          str = getcurrentdir() .. "/" .. str      end @@ -2488,7 +2499,6 @@ function file.collapsepath(str,anchor)          return lpegmatch(reslasher,str)      end      local starter, oldelements = lpegmatch(splitstarter,str) --- inspect(oldelements)      local newelements = { }      local i = #oldelements      while i > 0 do @@ -2542,11 +2552,13 @@ local whatever   = P("-")^0 / ""  local pattern_b  = Cs(whatever * (1 - whatever * -1)^1)  function file.robustname(str,strict) -    str = lpegmatch(pattern_a,str) or str -    if strict then -        return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) -    else -        return str +    if str then +        str = lpegmatch(pattern_a,str) or str +        if strict then +            return lpegmatch(pattern_b,str) or str -- two step is cleaner (less backtracking) +        else +            return str +        end      end  end @@ -2554,7 +2566,9 @@ file.readdata = io.loaddata  file.savedata = io.savedata  function file.copy(oldname,newname) -    file.savedata(newname,io.loaddata(oldname)) +    if oldname and newname then +        file.savedata(newname,io.loaddata(oldname)) +    end  end  -- also rewrite previous @@ -2575,11 +2589,11 @@ lpeg.patterns.rootbased = rootbased  -- ./name ../name  /name c: :// name/name  function file.is_qualified_path(filename) -    return lpegmatch(qualified,filename) ~= nil +    return filename and lpegmatch(qualified,filename) ~= nil  end  function file.is_rootbased_path(filename) -    return lpegmatch(rootbased,filename) ~= nil +    return filename and lpegmatch(rootbased,filename) ~= nil  end  -- function test(t) for k, v in next, t do print(v, "=>", file.splitname(v)) end end @@ -2601,8 +2615,10 @@ end  -- for myself:  function file.strip(name,dir) -    local b, a = match(name,"^(.-)" .. dir .. "(.*)$") -    return a ~= "" and a or name +    if name then +        local b, a = match(name,"^(.-)" .. dir .. "(.*)$") +        return a ~= "" and a or name +    end  end  -- local debuglist = { @@ -2954,7 +2970,7 @@ function io.readstring(f,n,m)          f:seek("set",n)          n = m      end -    local str = gsub(f:read(n),"%z","") +    local str = gsub(f:read(n),"\000","")      return str  end @@ -3544,11 +3560,8 @@ if not modules then modules = { } end modules ['font-con'] = {      license   = "see context related readme files"  } -  -- some names of table entries will be changed (no _) -local utf = unicode.utf8 -  local next, tostring, rawget = next, tostring, rawget  local format, match, lower, gsub = string.format, string.match, string.lower, string.gsub  local utfbyte = utf.byte @@ -5651,8 +5664,6 @@ if not modules then modules = { } end modules ['font-otf'] = {  -- more checking against low level calls of functions -local utf = unicode.utf8 -  local utfbyte = utf.byte  local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip  local type, next, tonumber, tostring = type, next, tonumber, tostring @@ -6092,32 +6103,32 @@ function otf.load(filename,format,sub,featurefile)              starttiming(data)              report_otf("file size: %s", size)              enhancers.apply(data,filename,fontdata) +            local packtime = { }              if packdata then                  if cleanup > 0 then                      collectgarbage("collect") ---~ lua.collectgarbage()                  end +                starttiming(packtime)                  enhance("pack",data,filename,nil) +                stoptiming(packtime)              end              report_otf("saving in cache: %s",filename)              data = containers.write(otf.cache, hash, data)              if cleanup > 1 then                  collectgarbage("collect") ---~ lua.collectgarbage()              end              stoptiming(data)              if elapsedtime then -- not in generic -                report_otf("preprocessing and caching took %s seconds",elapsedtime(data)) +                report_otf("preprocessing and caching took %s seconds (packtime: %s)", +                    elapsedtime(data),packdata and elapsedtime(packtime) or 0)              end              fontloader.close(fontdata) -- free memory              if cleanup > 3 then                  collectgarbage("collect") ---~ lua.collectgarbage()              end              data = containers.read(otf.cache, hash) -- this frees the old table and load the sparse one              if cleanup > 2 then                  collectgarbage("collect") ---~ lua.collectgarbage()              end          else              data = nil @@ -6695,7 +6706,10 @@ actions["reorganize subtables"] = function(data,filename,raw)                      --                      local name = gk.name                      -- -                    if features then +                    if not name then +                        -- in fact an error +                        report_otf("skipping weird lookup number %s",k) +                    elseif features then                          -- scripts, tag, ismac                          local f = { }                          for i=1,#features do @@ -7128,6 +7142,9 @@ actions["merge kern classes"] = function(data,filename,raw)                              if type(lookups) ~= "table" then                                  lookups = { lookups }                              end +                         -- if offsets[1] == nil then +                         --     offsets[1] = "" +                         -- end                              -- we can check the max in the loop                           -- local maxseconds = getn(seconds)                              for n, s in next, firsts do @@ -7148,9 +7165,9 @@ actions["merge kern classes"] = function(data,filename,raw)                                      if splt then                                          local extrakerns = { }                                          local baseoffset = (fk-1) * maxseconds -                                     -- for sk=2,maxseconds do -                                     --     local sv = seconds[sk] -                                        for sk, sv in next, seconds do +                                        for sk=2,maxseconds do -- will become 1 based in future luatex +                                            local sv = seconds[sk] +                                     -- for sk, sv in next, seconds do                                              local splt = split[sv]                                              if splt then -- redundant test                                                  local offset = offsets[baseoffset + sk] @@ -8377,7 +8394,7 @@ if not modules then modules = { } end modules ['node-inj'] = {  -- This is very experimental (this will change when we have luatex > .50 and  -- a few pending thingies are available. Also, Idris needs to make a few more  -- test fonts. Btw, future versions of luatex will have extended glyph properties --- that can be of help. +-- that can be of help. Some optimizations can go away when we have faster machines.  local next = next @@ -8864,7 +8881,7 @@ if not modules then modules = { } end modules ['font-ota'] = {  -- this might become scrp-*.lua -local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat +local type = type  if not trackers then trackers = { register = function() end } end @@ -9249,6 +9266,8 @@ if not modules then modules = { } end modules ['font-otn'] = {  -- handle positions (we need example fonts)  -- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere)  -- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) +-- remove some optimizations (when I have a faster machine) +  --[[ldx--  <p>This module is a bit more split up that I'd like but since we also want to test @@ -12601,7 +12620,6 @@ if not modules then modules = { } end modules ['font-def'] = {  -- We can overload some of the definers.functions so we don't local them. -local concat = table.concat  local format, gmatch, match, find, lower, gsub = string.format, string.gmatch, string.match, string.find, string.lower, string.gsub  local tostring, next = tostring, next  local lpegmatch = lpeg.match diff --git a/tex/generic/context/luatex/luatex-fonts.lua b/tex/generic/context/luatex/luatex-fonts.lua index dd2c902cd..6b502cd24 100644 --- a/tex/generic/context/luatex/luatex-fonts.lua +++ b/tex/generic/context/luatex/luatex-fonts.lua @@ -22,7 +22,7 @@ if not modules then modules = { } end modules ['luatex-fonts'] = {  --  -- Todo: all global namespaces in called modules will get local shortcuts. -utf = unicode.utf8 +utf = utf or unicode.utf8  if not generic_context then @@ -132,10 +132,10 @@ else      -- mess up ConTeXt code for the sake of general generality. Around      -- version 1.0 there will be an official api defined. +    loadmodule('l-lpeg.lua')      loadmodule('l-function.lua')      loadmodule('l-string.lua')      loadmodule('l-table.lua') -    loadmodule('l-lpeg.lua')      loadmodule('l-boolean.lua')      loadmodule('l-math.lua')      loadmodule('l-file.lua') | 
