1 files changed, 59 insertions, 169 deletions
diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua
index d92b722ed..13294ab0d 100644
--- a/tex/context/base/l-lpeg.lua
+++ b/tex/context/base/l-lpeg.lua
@@ -13,8 +13,6 @@ local lpeg = require("lpeg")
 
 -- tracing (only used when we encounter a problem in integration of lpeg in luatex)
 
--- some code will move to unicode and string
-
 local report = texio and texio.write_nl or print
 
 -- local lpmatch = lpeg.match
@@ -51,8 +49,8 @@ local report = texio and texio.write_nl or print
 -- function lpeg.Cmt  (l) local p = lpcmt (l) report("LPEG Cmt =")  lpprint(l) return p end
 -- function lpeg.Carg (l) local p = lpcarg(l) report("LPEG Carg =") lpprint(l) return p end
 
-local type, next = type, next
-local byte, char, gmatch, format = string.byte, string.char, string.gmatch, string.format
+local type = type
+local byte, char, gmatch = string.byte, string.char, string.gmatch
 
 -- Beware, we predefine a bunch of patterns here and one reason for doing so
 -- is that we get consistent behaviour in some of the visualizers.
@@ -60,8 +58,9 @@ local byte, char, gmatch, format = string.byte, string.char, string.gmatch, stri
 lpeg.patterns  = lpeg.patterns or { } -- so that we can share
 local patterns = lpeg.patterns
 
-local P, R, S, V, Ct, C, Cs, Cc, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc, lpeg.Cp
-local lpegtype, lpegmatch = lpeg.type, lpeg.match
+local P, R, S, V, match = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match
+local Ct, C, Cs, Cc = lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc
+local lpegtype = lpeg.type
 
 local utfcharacters    = string.utfcharacters
 local utfgmatch        = unicode and unicode.utf8.gmatch
@@ -112,10 +111,6 @@ patterns.utf8char      = utf8char
 patterns.validutf8     = validutf8char
 patterns.validutf8char = validutf8char
 
-local eol              = S("\n\r")
-local spacer           = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
-local whitespace       = eol + spacer
-
 patterns.digit         = digit
 patterns.sign          = sign
 patterns.cardinal      = sign^0 * digit^1
@@ -135,16 +130,16 @@ patterns.letter        = patterns.lowercase + patterns.uppercase
 patterns.space         = space
 patterns.tab           = P("\t")
 patterns.spaceortab    = patterns.space + patterns.tab
-patterns.eol           = eol
-patterns.spacer        = spacer
-patterns.whitespace    = whitespace
+patterns.eol           = S("\n\r")
+patterns.spacer        = S(" \t\f\v")  -- + char(0xc2, 0xa0) if we want utf (cf mail roberto)
 patterns.newline       = newline
 patterns.emptyline     = newline^1
-patterns.nonspacer     = 1 - spacer
-patterns.nonwhitespace = 1 - whitespace
+patterns.nonspacer     = 1 - patterns.spacer
+patterns.whitespace    = patterns.eol + patterns.spacer
+patterns.nonwhitespace = 1 - patterns.whitespace
 patterns.equal         = P("=")
 patterns.comma         = P(",")
-patterns.commaspacer   = P(",") * spacer^0
+patterns.commaspacer   = P(",") * patterns.spacer^0
 patterns.period        = P(".")
 patterns.colon         = P(":")
 patterns.semicolon     = P(";")
@@ -159,10 +154,6 @@ patterns.undouble      = (dquote/"") * patterns.nodquote * (dquote/"")
 patterns.unquoted      = patterns.undouble + patterns.unsingle -- more often undouble
 patterns.unspacer      = ((patterns.spacer^1)/"")^0
 
-patterns.singlequoted  = squote * patterns.nosquote * squote
-patterns.doublequoted  = dquote * patterns.nodquote * dquote
-patterns.quoted        = patterns.doublequoted + patterns.singlequoted
-
 patterns.somecontent   = (anything - newline - space)^1 -- (utf8char - newline - space)^1
 patterns.beginline     = #(1-newline)
 
@@ -173,17 +164,8 @@ patterns.beginline     = #(1-newline)
 -- print(string.unquoted('"test"'))
 -- print(string.unquoted('"test"'))
 
-local function anywhere(pattern) --slightly adapted from website
-    return P { P(pattern) + 1 * V(1) }
-end
-
-lpeg.anywhere = anywhere
-
-function lpeg.instringchecker(p)
-    p = anywhere(p)
-    return function(str)
-        return lpegmatch(p,str) and true or false
-    end
+function lpeg.anywhere(pattern) --slightly adapted from website
+    return P { P(pattern) + 1 * V(1) } -- why so complex?
 end
 
 function lpeg.splitter(pattern, action)
@@ -232,13 +214,13 @@ function string.splitup(str,separator)
     if not separator then
         separator = ","
     end
-    return lpegmatch(splitters_m[separator] or splitat(separator),str)
+    return match(splitters_m[separator] or splitat(separator),str)
 end
 
---~ local p = splitat("->",false)  print(lpegmatch(p,"oeps->what->more"))  -- oeps what more
---~ local p = splitat("->",true)   print(lpegmatch(p,"oeps->what->more"))  -- oeps what->more
---~ local p = splitat("->",false)  print(lpegmatch(p,"oeps"))              -- oeps
---~ local p = splitat("->",true)   print(lpegmatch(p,"oeps"))              -- oeps
+--~ local p = splitat("->",false)  print(match(p,"oeps->what->more"))  -- oeps what more
+--~ local p = splitat("->",true)   print(match(p,"oeps->what->more"))  -- oeps what->more
+--~ local p = splitat("->",false)  print(match(p,"oeps"))              -- oeps
+--~ local p = splitat("->",true)   print(match(p,"oeps"))              -- oeps
 
 local cache = { }
 
@@ -248,20 +230,16 @@ function lpeg.split(separator,str)
         c = tsplitat(separator)
         cache[separator] = c
     end
-    return lpegmatch(c,str)
+    return match(c,str)
 end
 
 function string.split(str,separator)
-    if separator then
-        local c = cache[separator]
-        if not c then
-            c = tsplitat(separator)
-            cache[separator] = c
-        end
-        return lpegmatch(c,str)
-    else
-        return { str }
+    local c = cache[separator]
+    if not c then
+        c = tsplitat(separator)
+        cache[separator] = c
     end
+    return match(c,str)
 end
 
 local spacing  = patterns.spacer^0 * newline -- sort of strip
@@ -274,7 +252,7 @@ patterns.textline = content
 --~ local linesplitter = Ct(content^0)
 --~
 --~ function string.splitlines(str)
---~     return lpegmatch(linesplitter,str)
+--~     return match(linesplitter,str)
 --~ end
 
 local linesplitter = tsplitat(newline)
@@ -282,7 +260,7 @@ local linesplitter = tsplitat(newline)
 patterns.linesplitter = linesplitter
 
 function string.splitlines(str)
-    return lpegmatch(linesplitter,str)
+    return match(linesplitter,str)
 end
 
 local utflinesplitter = utfbom^-1 * tsplitat(newline)
@@ -290,58 +268,7 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline)
 patterns.utflinesplitter = utflinesplitter
 
 function string.utfsplitlines(str)
-    return lpegmatch(utflinesplitter,str or "")
-end
-
-local utfcharsplitter_ows = utfbom^-1 * Ct(C(utf8char)^0)
-local utfcharsplitter_iws = utfbom^-1 * Ct((whitespace^1 + C(utf8char))^0)
-
-function string.utfsplit(str,ignorewhitespace) -- new
-    if ignorewhitespace then
-        return lpegmatch(utfcharsplitter_iws,str or "")
-    else
-        return lpegmatch(utfcharsplitter_ows,str or "")
-    end
-end
-
--- inspect(string.utfsplit("a b c d"))
--- inspect(string.utfsplit("a b c d",true))
-
--- -- alternative 1: 0.77
---
--- local utfcharcounter = utfbom^-1 * Cs((utf8char/'!')^0)
---
--- function string.utflength(str)
---     return #lpegmatch(utfcharcounter,str or "")
--- end
---
--- -- alternative 2: 1.70
---
--- local n = 0
---
--- local utfcharcounter = utfbom^-1 * (utf8char/function() n = n + 1 end)^0 -- slow
---
--- function string.utflength(str)
---     n = 0
---     lpegmatch(utfcharcounter,str or "")
---     return n
--- end
---
--- -- alternative 3: 0.24 (native unicode.utf8.len: 0.047)
-
-local n = 0
-
-local utfcharcounter = utfbom^-1 * Cs ( (
-    Cp() * (lpeg.patterns.utf8one  )^1 * Cp() / function(f,t) n = n +  t - f    end
-  + Cp() * (lpeg.patterns.utf8two  )^1 * Cp() / function(f,t) n = n + (t - f)/2 end
-  + Cp() * (lpeg.patterns.utf8three)^1 * Cp() / function(f,t) n = n + (t - f)/3 end
-  + Cp() * (lpeg.patterns.utf8four )^1 * Cp() / function(f,t) n = n + (t - f)/4 end
-)^0 )
-
-function string.utflength(str)
-    n = 0
-    lpegmatch(utfcharcounter,str or "")
-    return n
+    return match(utflinesplitter,str or "")
 end
 
 --~ lpeg.splitters = cache -- no longer public
@@ -356,7 +283,7 @@ function lpeg.checkedsplit(separator,str)
         c = Ct(separator^0 * other * (separator^1 * other)^0)
         cache[separator] = c
     end
-    return lpegmatch(c,str)
+    return match(c,str)
 end
 
 function string.checkedsplit(str,separator)
@@ -367,7 +294,7 @@ function string.checkedsplit(str,separator)
         c = Ct(separator^0 * other * (separator^1 * other)^0)
         cache[separator] = c
     end
-    return lpegmatch(c,str)
+    return match(c,str)
 end
 
 --~ from roberto's site:
@@ -382,10 +309,10 @@ patterns.utf8byte = utf8byte
 
 --~ local str = " a b c d "
 
---~ local s = lpeg.stripper(lpeg.R("az"))   print("["..lpegmatch(s,str).."]")
---~ local s = lpeg.keeper(lpeg.R("az"))     print("["..lpegmatch(s,str).."]")
---~ local s = lpeg.stripper("ab")           print("["..lpegmatch(s,str).."]")
---~ local s = lpeg.keeper("ab")             print("["..lpegmatch(s,str).."]")
+--~ local s = lpeg.stripper(lpeg.R("az"))   print("["..lpeg.match(s,str).."]")
+--~ local s = lpeg.keeper(lpeg.R("az"))     print("["..lpeg.match(s,str).."]")
+--~ local s = lpeg.stripper("ab")           print("["..lpeg.match(s,str).."]")
+--~ local s = lpeg.keeper("ab")             print("["..lpeg.match(s,str).."]")
 
 local cache = { }
 
@@ -418,11 +345,11 @@ function lpeg.keeper(str)
 end
 
 function lpeg.frontstripper(str) -- or pattern (yet undocumented)
-    return (P(str) + P(true)) * Cs(anything^0)
+    return (P(str) + P(true)) * Cs(P(1)^0)
 end
 
 function lpeg.endstripper(str) -- or pattern (yet undocumented)
-    return Cs((1 - P(str) * endofstring)^0)
+    return Cs((1 - P(str) * P(-1))^0)
 end
 
 -- Just for fun I looked at the used bytecode and
@@ -431,22 +358,8 @@ end
 function lpeg.replacer(one,two)
     if type(one) == "table" then
         local no = #one
-        local p
-        if no == 0 then
-            for k, v in next, one do
-                local pp = P(k) / v
-                if p then
-                    p = p + pp
-                else
-                    p = pp
-                end
-            end
-            return Cs((p + 1)^0)
-        elseif no == 1 then
-            local o = one[1]
-            one, two = P(o[1]), o[2]
-            return Cs(((1-one)^1 + one/two)^0)
-        else
+        if no > 0 then
+            local p
             for i=1,no do
                 local o = one[i]
                 local pp = P(o[1]) / o[2]
@@ -459,16 +372,11 @@ function lpeg.replacer(one,two)
             return Cs((p + 1)^0)
         end
     else
-        one = P(one)
         two = two or ""
-        return Cs(((1-one)^1 + one/two)^0)
+        return Cs((P(one)/two + 1)^0)
     end
 end
 
--- print(lpeg.match(lpeg.replacer("e","a"),"test test"))
--- print(lpeg.match(lpeg.replacer{{"e","a"}},"test test"))
--- print(lpeg.match(lpeg.replacer({ e = "a", t = "x" }),"test test"))
-
 local splitters_f, splitters_s = { }, { }
 
 function lpeg.firstofsplit(separator) -- always return value
@@ -496,14 +404,14 @@ function lpeg.balancer(left,right)
     return P { left * ((1 - left - right) + V(1))^0 * right }
 end
 
---~ print(1,lpegmatch(lpeg.firstofsplit(":"),"bc:de"))
---~ print(2,lpegmatch(lpeg.firstofsplit(":"),":de")) -- empty
---~ print(3,lpegmatch(lpeg.firstofsplit(":"),"bc"))
---~ print(4,lpegmatch(lpeg.secondofsplit(":"),"bc:de"))
---~ print(5,lpegmatch(lpeg.secondofsplit(":"),"bc:")) -- empty
---~ print(6,lpegmatch(lpeg.secondofsplit(":",""),"bc"))
---~ print(7,lpegmatch(lpeg.secondofsplit(":"),"bc"))
---~ print(9,lpegmatch(lpeg.secondofsplit(":","123"),"bc"))
+--~ print(1,match(lpeg.firstofsplit(":"),"bc:de"))
+--~ print(2,match(lpeg.firstofsplit(":"),":de")) -- empty
+--~ print(3,match(lpeg.firstofsplit(":"),"bc"))
+--~ print(4,match(lpeg.secondofsplit(":"),"bc:de"))
+--~ print(5,match(lpeg.secondofsplit(":"),"bc:")) -- empty
+--~ print(6,match(lpeg.secondofsplit(":",""),"bc"))
+--~ print(7,match(lpeg.secondofsplit(":"),"bc"))
+--~ print(9,match(lpeg.secondofsplit(":","123"),"bc"))
 
 --~ -- slower:
 --~
@@ -517,7 +425,7 @@ local nany = utf8char/""
 function lpeg.counter(pattern)
     pattern = Cs((P(pattern)/" " + nany)^0)
     return function(str)
-        return #lpegmatch(pattern,str)
+        return #match(pattern,str)
     end
 end
 
@@ -531,7 +439,7 @@ if utfgmatch then
             end
             return n
         else -- 4 times slower but still faster than / function
-            return #lpegmatch(Cs((P(what)/" " + nany)^0),str)
+            return #match(Cs((P(what)/" " + nany)^0),str)
         end
     end
 
@@ -546,9 +454,9 @@ else
                 p = Cs((P(what)/" " + nany)^0)
                 cache[p] = p
             end
-            return #lpegmatch(p,str)
+            return #match(p,str)
         else -- 4 times slower but still faster than / function
-            return #lpegmatch(Cs((P(what)/" " + nany)^0),str)
+            return #match(Cs((P(what)/" " + nany)^0),str)
         end
     end
 
@@ -575,7 +483,7 @@ local p = Cs((S("-.+*%()[]") / patterns_escapes + anything)^0)
 local s = Cs((S("-.+*%()[]") / simple_escapes   + anything)^0)
 
 function string.escapedpattern(str,simple)
-    return lpegmatch(simple and s or p,str)
+    return match(simple and s or p,str)
 end
 
 -- utf extensies
@@ -622,7 +530,7 @@ else
                 p = P(uc)
             end
         end
-        lpegmatch((utf8char/f)^0,str)
+        match((utf8char/f)^0,str)
         return p
     end
 
@@ -638,7 +546,7 @@ function lpeg.UR(str,more)
         first = str
         last = more or first
     else
-        first, last = lpegmatch(range,str)
+        first, last = match(range,str)
         if not last then
             return P(str)
         end
@@ -674,15 +582,11 @@ end
 --~ print(lpeg.count("äáàa",lpeg.UR("àá")))
 --~ print(lpeg.count("äáàa",lpeg.UR(0x0000,0xFFFF)))
 
-function lpeg.is_lpeg(p)
-    return p and lpegtype(p) == "pattern"
-end
-
-function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assume proper order
+function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then")
     if type(list) ~= "table" then
         list = { list, ... }
     end
- -- table.sort(list) -- longest match first
+ -- sort(list) -- longest match first
     local p = P(list[1])
     for l=2,#list do
         p = p + P(list[l])
@@ -690,6 +594,10 @@ function lpeg.oneof(list,...) -- lpeg.oneof("elseif","else","if","then") -- assu
     return p
 end
 
+function lpeg.is_lpeg(p)
+    return p and lpegtype(p) == "pattern"
+end
+
 -- For the moment here, but it might move to utilities. Beware, we need to
 -- have the longest keyword first, so 'aaa' comes beforte 'aa' which is why we
 -- loop back from the end cq. prepend.
@@ -846,21 +754,3 @@ end
 --     utfchar(0x202F), -- narrownobreakspace
 --     utfchar(0x205F), -- math thinspace
 -- } )
-
--- handy from within tex:
-
-local lpegmatch = lpeg.match
-
-local replacer = lpeg.replacer("@","%%") -- Watch the escaped % in lpeg!
-
-function string.tformat(fmt,...)
-    return format(lpegmatch(replacer,fmt),...)
-end
-
--- strips leading and trailing spaces and collapsed all other spaces
-
-local pattern = Cs(whitespace^0/"" * ((whitespace^1 * P(-1) / "") + (whitespace^1/" ") + P(1))^0)
-
-function string.collapsespaces(str)
-    return lpegmatch(pattern,str)
-end