diff options
author | Khaled Hosny <khaledhosny@eglug.org> | 2010-01-11 15:15:52 +0200 |
---|---|---|
committer | Khaled Hosny <khaledhosny@eglug.org> | 2010-01-11 16:03:13 +0200 |
commit | f1c3cb9dc199c28cdd2c813eb1ea5c21345125d0 (patch) | |
tree | 808986ca292251a39a2fc9fa3a7e01b9e761577e /luaextra-lpeg.lua | |
parent | e5ad063f805ecbf5fd093712bc60ef5aec25a6fd (diff) | |
download | lualibs-f1c3cb9dc199c28cdd2c813eb1ea5c21345125d0.tar.gz |
Import ConTeX's lua libraries
Replace most of luaextra.lua with a bunch of require() calls and add
renamed (but unmodified) ConTeX lua libraries to the repository.
ConTeXt's l-xml.lua module has been excluded because it depends on a
bunch of other ConTeXt specific modules. Also l-pdfview.lua has been
dropped, I don't know what to use it for.
Diffstat (limited to 'luaextra-lpeg.lua')
-rw-r--r-- | luaextra-lpeg.lua | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/luaextra-lpeg.lua b/luaextra-lpeg.lua new file mode 100644 index 0000000..9e761e4 --- /dev/null +++ b/luaextra-lpeg.lua @@ -0,0 +1,165 @@ +if not modules then modules = { } end modules ['l-lpeg'] = { + version = 1.001, + comment = "companion to luat-lib.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +lpeg = require("lpeg") + +local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc +local match = lpeg.match + +--~ l-lpeg.lua : + +--~ lpeg.digit = lpeg.R('09')^1 +--~ lpeg.sign = lpeg.S('+-')^1 +--~ lpeg.cardinal = lpeg.P(lpeg.sign^0 * lpeg.digit^1) +--~ lpeg.integer = lpeg.P(lpeg.sign^0 * lpeg.digit^1) +--~ lpeg.float = lpeg.P(lpeg.sign^0 * lpeg.digit^0 * lpeg.P('.') * lpeg.digit^1) +--~ lpeg.number = lpeg.float + lpeg.integer +--~ lpeg.oct = lpeg.P("0") * lpeg.R('07')^1 +--~ lpeg.hex = lpeg.P("0x") * (lpeg.R('09') + lpeg.R('AF'))^1 +--~ lpeg.uppercase = lpeg.P("AZ") +--~ lpeg.lowercase = lpeg.P("az") + +--~ lpeg.eol = lpeg.S('\r\n\f')^1 -- includes formfeed +--~ lpeg.space = lpeg.S(' ')^1 +--~ lpeg.nonspace = lpeg.P(1-lpeg.space)^1 +--~ lpeg.whitespace = lpeg.S(' \r\n\f\t')^1 +--~ lpeg.nonwhitespace = lpeg.P(1-lpeg.whitespace)^1 + +local hash = { } + +function lpeg.anywhere(pattern) --slightly adapted from website + return P { P(pattern) + 1 * lpeg.V(1) } +end + +function lpeg.startswith(pattern) --slightly adapted + return P(pattern) +end + +function lpeg.splitter(pattern, action) + return (((1-P(pattern))^1)/action+1)^0 +end + +-- variant: + +--~ local parser = lpeg.Ct(lpeg.splitat(newline)) + +local crlf = P("\r\n") +local cr = P("\r") +local lf = P("\n") +local space = S(" \t\f\v") -- + string.char(0xc2, 0xa0) if we want utf (cf mail roberto) +local newline = crlf + cr + lf +local spacing = space^0 * newline + +local empty = spacing * Cc("") +local nonempty = Cs((1-spacing)^1) * spacing^-1 +local content = (empty + nonempty)^1 + +local capture = Ct(content^0) + +function string:splitlines() + return match(capture,self) +end + +lpeg.linebyline = content -- better make a sublibrary + +--~ local p = lpeg.splitat("->",false) print(match(p,"oeps->what->more")) -- oeps what more +--~ local p = lpeg.splitat("->",true) print(match(p,"oeps->what->more")) -- oeps what->more +--~ local p = lpeg.splitat("->",false) print(match(p,"oeps")) -- oeps +--~ local p = lpeg.splitat("->",true) print(match(p,"oeps")) -- oeps + +local splitters_s, splitters_m = { }, { } + +local function splitat(separator,single) + local splitter = (single and splitters_s[separator]) or splitters_m[separator] + if not splitter then + separator = P(separator) + if single then + local other, any = C((1 - separator)^0), P(1) + splitter = other * (separator * C(any^0) + "") -- ? + splitters_s[separator] = splitter + else + local other = C((1 - separator)^0) + splitter = other * (separator * other)^0 + splitters_m[separator] = splitter + end + end + return splitter +end + +lpeg.splitat = splitat + +local cache = { } + +function string:split(separator) + local c = cache[separator] + if not c then + c = Ct(splitat(separator)) + cache[separator] = c + end + return match(c,self) +end + +local cache = { } + +function string:checkedsplit(separator) + local c = cache[separator] + if not c then + separator = P(separator) + local other = C((1 - separator)^0) + c = Ct(separator^0 * other * (separator^1 * other)^0) + cache[separator] = c + end + return match(c,self) +end + +--~ function lpeg.L(list,pp) +--~ local p = pp +--~ for l=1,#list do +--~ if p then +--~ p = p + lpeg.P(list[l]) +--~ else +--~ p = lpeg.P(list[l]) +--~ end +--~ end +--~ return p +--~ end + +--~ from roberto's site: +--~ +--~ -- decode a two-byte UTF-8 sequence +--~ local function f2 (s) +--~ local c1, c2 = string.byte(s, 1, 2) +--~ return c1 * 64 + c2 - 12416 +--~ end +--~ +--~ -- decode a three-byte UTF-8 sequence +--~ local function f3 (s) +--~ local c1, c2, c3 = string.byte(s, 1, 3) +--~ return (c1 * 64 + c2) * 64 + c3 - 925824 +--~ end +--~ +--~ -- decode a four-byte UTF-8 sequence +--~ local function f4 (s) +--~ local c1, c2, c3, c4 = string.byte(s, 1, 4) +--~ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 +--~ end +--~ +--~ local cont = lpeg.R("\128\191") -- continuation byte +--~ +--~ local utf8 = lpeg.R("\0\127") / string.byte +--~ + lpeg.R("\194\223") * cont / f2 +--~ + lpeg.R("\224\239") * cont * cont / f3 +--~ + lpeg.R("\240\244") * cont * cont * cont / f4 +--~ +--~ local decode_pattern = lpeg.Ct(utf8^0) * -1 + + +local cont = R("\128\191") -- continuation byte + +lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont + |