1 files changed, 165 insertions, 0 deletions
diff --git a/luaextra-lpeg.lua b/luaextra-lpeg.lua
new file mode 100644
index 0000000..9e761e4
--- /dev/null
+++ b/luaextra-lpeg.lua
@@ -0,0 +1,165 @@
+if not modules then modules = { } end modules ['l-lpeg'] = {
+    version   = 1.001,
+    comment   = "companion to luat-lib.mkiv",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files"
+}
+
+lpeg = require("lpeg")
+
+local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc
+local match = lpeg.match
+
+--~ l-lpeg.lua :
+
+--~ lpeg.digit         = lpeg.R('09')^1
+--~ lpeg.sign          = lpeg.S('+-')^1
+--~ lpeg.cardinal      = lpeg.P(lpeg.sign^0 * lpeg.digit^1)
+--~ lpeg.integer       = lpeg.P(lpeg.sign^0 * lpeg.digit^1)
+--~ lpeg.float         = lpeg.P(lpeg.sign^0 * lpeg.digit^0 * lpeg.P('.') * lpeg.digit^1)
+--~ lpeg.number        = lpeg.float + lpeg.integer
+--~ lpeg.oct           = lpeg.P("0") * lpeg.R('07')^1
+--~ lpeg.hex           = lpeg.P("0x") * (lpeg.R('09') + lpeg.R('AF'))^1
+--~ lpeg.uppercase     = lpeg.P("AZ")
+--~ lpeg.lowercase     = lpeg.P("az")
+
+--~ lpeg.eol           = lpeg.S('\r\n\f')^1 -- includes formfeed
+--~ lpeg.space         = lpeg.S(' ')^1
+--~ lpeg.nonspace      = lpeg.P(1-lpeg.space)^1
+--~ lpeg.whitespace    = lpeg.S(' \r\n\f\t')^1
+--~ lpeg.nonwhitespace = lpeg.P(1-lpeg.whitespace)^1
+
+local hash = { }
+
+function lpeg.anywhere(pattern) --slightly adapted from website
+    return P { P(pattern) + 1 * lpeg.V(1) }
+end
+
+function lpeg.startswith(pattern) --slightly adapted
+    return P(pattern)
+end
+
+function lpeg.splitter(pattern, action)
+    return (((1-P(pattern))^1)/action+1)^0
+end
+
+-- variant:
+
+--~ local parser = lpeg.Ct(lpeg.splitat(newline))
+
+local crlf     = P("\r\n")
+local cr       = P("\r")
+local lf       = P("\n")
+local space    = S(" \t\f\v")  -- + string.char(0xc2, 0xa0) if we want utf (cf mail roberto)
+local newline  = crlf + cr + lf
+local spacing  = space^0 * newline
+
+local empty    = spacing * Cc("")
+local nonempty = Cs((1-spacing)^1) * spacing^-1
+local content  = (empty + nonempty)^1
+
+local capture = Ct(content^0)
+
+function string:splitlines()
+    return match(capture,self)
+end
+
+lpeg.linebyline = content -- better make a sublibrary
+
+--~ local p = lpeg.splitat("->",false)  print(match(p,"oeps->what->more"))  -- oeps what more
+--~ local p = lpeg.splitat("->",true)   print(match(p,"oeps->what->more"))  -- oeps what->more
+--~ local p = lpeg.splitat("->",false)  print(match(p,"oeps"))              -- oeps
+--~ local p = lpeg.splitat("->",true)   print(match(p,"oeps"))              -- oeps
+
+local splitters_s, splitters_m = { }, { }
+
+local function splitat(separator,single)
+    local splitter = (single and splitters_s[separator]) or splitters_m[separator]
+    if not splitter then
+        separator = P(separator)
+        if single then
+            local other, any = C((1 - separator)^0), P(1)
+            splitter = other * (separator * C(any^0) + "") -- ?
+            splitters_s[separator] = splitter
+        else
+            local other = C((1 - separator)^0)
+            splitter = other * (separator * other)^0
+            splitters_m[separator] = splitter
+        end
+    end
+    return splitter
+end
+
+lpeg.splitat = splitat
+
+local cache = { }
+
+function string:split(separator)
+    local c = cache[separator]
+    if not c then
+        c = Ct(splitat(separator))
+        cache[separator] = c
+    end
+    return match(c,self)
+end
+
+local cache = { }
+
+function string:checkedsplit(separator)
+    local c = cache[separator]
+    if not c then
+        separator = P(separator)
+        local other = C((1 - separator)^0)
+        c = Ct(separator^0 * other * (separator^1 * other)^0)
+        cache[separator] = c
+    end
+    return match(c,self)
+end
+
+--~ function lpeg.L(list,pp)
+--~     local p = pp
+--~     for l=1,#list do
+--~         if p then
+--~             p = p + lpeg.P(list[l])
+--~         else
+--~             p = lpeg.P(list[l])
+--~         end
+--~     end
+--~     return p
+--~ end
+
+--~ from roberto's site:
+--~
+--~ -- decode a two-byte UTF-8 sequence
+--~ local function f2 (s)
+--~   local c1, c2 = string.byte(s, 1, 2)
+--~   return c1 * 64 + c2 - 12416
+--~ end
+--~
+--~ -- decode a three-byte UTF-8 sequence
+--~ local function f3 (s)
+--~   local c1, c2, c3 = string.byte(s, 1, 3)
+--~   return (c1 * 64 + c2) * 64 + c3 - 925824
+--~ end
+--~
+--~ -- decode a four-byte UTF-8 sequence
+--~ local function f4 (s)
+--~   local c1, c2, c3, c4 = string.byte(s, 1, 4)
+--~   return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168
+--~ end
+--~
+--~ local cont = lpeg.R("\128\191")   -- continuation byte
+--~
+--~ local utf8 = lpeg.R("\0\127") / string.byte
+--~            + lpeg.R("\194\223") * cont / f2
+--~            + lpeg.R("\224\239") * cont * cont / f3
+--~            + lpeg.R("\240\244") * cont * cont * cont / f4
+--~
+--~ local decode_pattern = lpeg.Ct(utf8^0) * -1
+
+
+local cont = R("\128\191")   -- continuation byte
+
+lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont
+