From 281539cf53b8ec43d72e06cbdba874b2de6e758d Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 11 Dec 2014 21:03:53 +0100 Subject: sync with Context as of 2014-12-11 --- lualibs-lpeg.lua | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) (limited to 'lualibs-lpeg.lua') diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index f3fd28b..192e32f 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -145,6 +145,9 @@ patterns.utfbom_8 = utfbom_8 patterns.utf_16_be_nl = P("\000\r\000\n") + P("\000\r") + P("\000\n") -- P("\000\r") * (P("\000\n") + P(true)) + P("\000\n") patterns.utf_16_le_nl = P("\r\000\n\000") + P("\r\000") + P("\n\000") -- P("\r\000") * (P("\n\000") + P(true)) + P("\n\000") +patterns.utf_32_be_nl = P("\000\000\000\r\000\000\000\n") + P("\000\000\000\r") + P("\000\000\000\n") +patterns.utf_32_le_nl = P("\r\000\000\000\n\000\000\000") + P("\r\000\000\000") + P("\n\000\000\000") + patterns.utf8one = R("\000\127") patterns.utf8two = R("\194\223") * utf8next patterns.utf8three = R("\224\239") * utf8next * utf8next @@ -183,10 +186,26 @@ local fullstripper = whitespace^0 * C((whitespace^0 * nonwhitespace^1)^0) ----- collapser = Cs(spacer^0/"" * ((spacer^1 * endofstring / "") + (spacer^1/" ") + P(1))^0) local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) +local b_collapser = Cs( whitespace^0 /"" * (nonwhitespace^1 + whitespace^1/" ")^0) +local e_collapser = Cs((whitespace^1 * P(-1)/"" + nonwhitespace^1 + whitespace^1/" ")^0) +local m_collapser = Cs( (nonwhitespace^1 + whitespace^1/" ")^0) + +local b_stripper = Cs( spacer^0 /"" * (nonspacer^1 + spacer^1/" ")^0) +local e_stripper = Cs((spacer^1 * P(-1)/"" + nonspacer^1 + spacer^1/" ")^0) +local m_stripper = Cs( (nonspacer^1 + spacer^1/" ")^0) + patterns.stripper = stripper patterns.fullstripper = fullstripper patterns.collapser = collapser +patterns.b_collapser = b_collapser +patterns.m_collapser = m_collapser +patterns.e_collapser = e_collapser + +patterns.b_stripper = b_stripper +patterns.m_stripper = m_stripper +patterns.e_stripper = e_stripper + patterns.lowercase = lowercase patterns.uppercase = uppercase patterns.letter = patterns.lowercase + patterns.uppercase @@ -1014,3 +1033,75 @@ lpeg.patterns.stripzeros = stripper -- lpegmatch(stripper,str) -- print(#str, os.clock()-ts, lpegmatch(stripper,sample)) +-- for practical reasone we keep this here: + +local byte_to_HEX = { } +local byte_to_hex = { } +local byte_to_dec = { } -- for md5 +local hex_to_byte = { } + +for i=0,255 do + local H = format("%02X",i) + local h = format("%02x",i) + local d = format("%03i",i) + local c = char(i) + byte_to_HEX[c] = H + byte_to_hex[c] = h + byte_to_dec[c] = d + hex_to_byte[h] = c + hex_to_byte[H] = c +end + +local hextobyte = P(2)/hex_to_byte +local bytetoHEX = P(1)/byte_to_HEX +local bytetohex = P(1)/byte_to_hex +local bytetodec = P(1)/byte_to_dec +local hextobytes = Cs(hextobyte^0) +local bytestoHEX = Cs(bytetoHEX^0) +local bytestohex = Cs(bytetohex^0) +local bytestodec = Cs(bytetodec^0) + +patterns.hextobyte = hextobyte +patterns.bytetoHEX = bytetoHEX +patterns.bytetohex = bytetohex +patterns.bytetodec = bytetodec +patterns.hextobytes = hextobytes +patterns.bytestoHEX = bytestoHEX +patterns.bytestohex = bytestohex +patterns.bytestodec = bytestodec + +function string.toHEX(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end + +function string.tohex(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestohex,s) + end +end + +function string.todec(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestodec,s) + end +end + +function string.tobytes(s) + if not s or s == "" then + return s + else + return lpegmatch(hextobytes,s) + end +end + +-- local h = "ADFE0345" +-- local b = lpegmatch(patterns.hextobytes,h) +-- print(h,b,string.tohex(b),string.toHEX(b)) -- cgit v1.2.3 From a79c845dba381f543d09526f77dcf2b4687b73a7 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Mon, 4 May 2015 23:10:29 +0200 Subject: sync with Context as of 2015-05-04 --- lualibs-lpeg.lua | 236 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 151 insertions(+), 85 deletions(-) (limited to 'lualibs-lpeg.lua') diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index 192e32f..55a0d89 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -10,6 +10,8 @@ if not modules then modules = { } end modules ['l-lpeg'] = { -- if i can use new features like capture / 2 and .B (at first sight the xml -- parser is some 5% slower) +-- lpeg.P("abc") is faster than lpeg.P("a") * lpeg.P("b") * lpeg.P("c") + -- a new lpeg fails on a #(1-P(":")) test and really needs a + P(-1) -- move utf -> l-unicode @@ -19,7 +21,7 @@ lpeg = require("lpeg") -- The latest lpeg doesn't have print any more, and even the new ones are not -- available by default (only when debug mode is enabled), which is a pitty as --- as it helps nailign down bottlenecks. Performance seems comparable: some 10% +-- as it helps nailing down bottlenecks. Performance seems comparable: some 10% -- slower pattern compilation, same parsing speed, although, -- -- local p = lpeg.C(lpeg.P(1)^0 * lpeg.P(-1)) @@ -834,121 +836,185 @@ end -- experiment: --- local function make(t) --- local p --- local keys = sortedkeys(t) --- for i=1,#keys do --- local k = keys[i] --- local v = t[k] --- if not p then --- if next(v) then --- p = P(k) * make(v) --- else --- p = P(k) --- end --- else --- if next(v) then --- p = p + P(k) * make(v) --- else --- p = p + P(k) --- end --- end --- end --- return p --- end - --- local function make(t) --- local p = P(false) --- local keys = sortedkeys(t) --- for i=1,#keys do --- local k = keys[i] --- local v = t[k] --- if next(v) then --- p = p + P(k) * make(v) --- else --- p = p + P(k) --- end --- end --- return p --- end - --- function lpeg.utfchartabletopattern(list) -- goes to util-lpg --- local tree = { } --- for i=1,#list do --- local t = tree --- for c in gmatch(list[i],".") do --- local tc = t[c] --- if not tc then --- tc = { } --- t[c] = tc --- end --- t = tc --- end --- end --- return make(tree) --- end +local p_false = P(false) +local p_true = P(true) -local function make(t,hash) - local p = P(false) +local function make(t) + local function making(t) + local p = p_false + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] + if k ~= "" then + local v = t[k] + if v == true then + p = p + P(k) * p_true + elseif v == false then + -- can't happen + else + p = p + P(k) * making(v) + end + end + end + if t[""] then + p = p + p_true + end + return p + end + local p = p_false local keys = sortedkeys(t) for i=1,#keys do local k = keys[i] - local v = t[k] - local h = hash[v] - if h then - if next(v) then - p = p + P(k) * (make(v,hash) + P(true)) + if k ~= "" then + local v = t[k] + if v == true then + p = p + P(k) * p_true + elseif v == false then + -- can't happen else - p = p + P(k) * P(true) + p = p + P(k) * making(v) end - else - if next(v) then - p = p + P(k) * make(v,hash) + end + end + return p +end + +local function collapse(t,x) + if type(t) ~= "table" then + return t, x + else + local n = next(t) + if n == nil then + return t, x + elseif next(t,n) == nil then + -- one entry + local k = n + local v = t[k] + if type(v) == "table" then + return collapse(v,x..k) else - p = p + P(k) + return v, x .. k + end + else + local tt = { } + for k, v in next, t do + local vv, kk = collapse(v,k) + tt[kk] = vv end + return tt, x end end - return p end function lpeg.utfchartabletopattern(list) -- goes to util-lpg local tree = { } - local hash = { } local n = #list if n == 0 then - -- we could always use this branch for s in next, list do local t = tree + local p, pk for c in gmatch(s,".") do - local tc = t[c] - if not tc then - tc = { } - t[c] = tc + if t == true then + t = { [c] = true, [""] = true } + p[pk] = t + p = t + t = false + elseif t == false then + t = { [c] = false } + p[pk] = t + p = t + t = false + else + local tc = t[c] + if not tc then + tc = false + t[c] = false + end + p = t + t = tc end - t = tc + pk = c + end + if t == false then + p[pk] = true + elseif t == true then + -- okay + else + t[""] = true end - hash[t] = s end else for i=1,n do - local t = tree local s = list[i] + local t = tree + local p, pk for c in gmatch(s,".") do - local tc = t[c] - if not tc then - tc = { } - t[c] = tc + if t == true then + t = { [c] = true, [""] = true } + p[pk] = t + p = t + t = false + elseif t == false then + t = { [c] = false } + p[pk] = t + p = t + t = false + else + local tc = t[c] + if not tc then + tc = false + t[c] = false + end + p = t + t = tc end - t = tc + pk = c + end + if t == false then + p[pk] = true + elseif t == true then + -- okay + else + t[""] = true end - hash[t] = s end end - return make(tree,hash) +-- collapse(tree,"") -- needs testing, maybe optional, slightly faster because P("x")*P("X") seems slower than P"(xX") (why) +-- inspect(tree) + return make(tree) end --- inspect ( lpeg.utfchartabletopattern { +-- local t = { "start", "stoep", "staart", "paard" } +-- local p = lpeg.Cs((lpeg.utfchartabletopattern(t)/string.upper + 1)^1) + +-- local t = { "a", "abc", "ac", "abe", "abxyz", "xy", "bef","aa" } +-- local p = lpeg.Cs((lpeg.utfchartabletopattern(t)/string.upper + 1)^1) + +-- inspect(lpegmatch(p,"a")) +-- inspect(lpegmatch(p,"aa")) +-- inspect(lpegmatch(p,"aaaa")) +-- inspect(lpegmatch(p,"ac")) +-- inspect(lpegmatch(p,"bc")) +-- inspect(lpegmatch(p,"zzbczz")) +-- inspect(lpegmatch(p,"zzabezz")) +-- inspect(lpegmatch(p,"ab")) +-- inspect(lpegmatch(p,"abc")) +-- inspect(lpegmatch(p,"abe")) +-- inspect(lpegmatch(p,"xa")) +-- inspect(lpegmatch(p,"bx")) +-- inspect(lpegmatch(p,"bax")) +-- inspect(lpegmatch(p,"abxyz")) +-- inspect(lpegmatch(p,"foobarbefcrap")) + +-- local t = { ["^"] = 1, ["^^"] = 2, ["^^^"] = 3, ["^^^^"] = 4 } +-- local p = lpeg.Cs((lpeg.utfchartabletopattern(t)/t + 1)^1) +-- inspect(lpegmatch(p," ^ ^^ ^^^ ^^^^ ^^^^^ ^^^^^^ ^^^^^^^ ")) + +-- local t = { ["^^"] = 2, ["^^^"] = 3, ["^^^^"] = 4 } +-- local p = lpeg.Cs((lpeg.utfchartabletopattern(t)/t + 1)^1) +-- inspect(lpegmatch(p," ^ ^^ ^^^ ^^^^ ^^^^^ ^^^^^^ ^^^^^^^ ")) + +-- lpeg.utfchartabletopattern { -- utfchar(0x00A0), -- nbsp -- utfchar(0x2000), -- enquad -- utfchar(0x2001), -- emquad @@ -964,7 +1030,7 @@ end -- utfchar(0x200B), -- zerowidthspace -- utfchar(0x202F), -- narrownobreakspace -- utfchar(0x205F), -- math thinspace --- } ) +-- } -- a few handy ones: -- -- cgit v1.2.3 From ce17332266dff01cfb9f83edbf10b7e080a770ca Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sat, 21 Nov 2015 16:35:59 +0100 Subject: sync with Context as of 2015-11-21 --- lualibs-lpeg.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lualibs-lpeg.lua') diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index 55a0d89..5be1246 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -82,7 +82,7 @@ local lpegtype, lpegmatch, lpegprint = lpeg.type, lpeg.match, lpeg.print -- let's start with an inspector: if setinspector then - setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end) + setinspector("lpeg",function(v) if lpegtype(v) then lpegprint(v) return true end end) end -- Beware, we predefine a bunch of patterns here and one reason for doing so -- cgit v1.2.3