From 281539cf53b8ec43d72e06cbdba874b2de6e758d Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 11 Dec 2014 21:03:53 +0100 Subject: sync with Context as of 2014-12-11 --- lualibs-lpeg.lua | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) (limited to 'lualibs-lpeg.lua') diff --git a/lualibs-lpeg.lua b/lualibs-lpeg.lua index f3fd28b..192e32f 100644 --- a/lualibs-lpeg.lua +++ b/lualibs-lpeg.lua @@ -145,6 +145,9 @@ patterns.utfbom_8 = utfbom_8 patterns.utf_16_be_nl = P("\000\r\000\n") + P("\000\r") + P("\000\n") -- P("\000\r") * (P("\000\n") + P(true)) + P("\000\n") patterns.utf_16_le_nl = P("\r\000\n\000") + P("\r\000") + P("\n\000") -- P("\r\000") * (P("\n\000") + P(true)) + P("\n\000") +patterns.utf_32_be_nl = P("\000\000\000\r\000\000\000\n") + P("\000\000\000\r") + P("\000\000\000\n") +patterns.utf_32_le_nl = P("\r\000\000\000\n\000\000\000") + P("\r\000\000\000") + P("\n\000\000\000") + patterns.utf8one = R("\000\127") patterns.utf8two = R("\194\223") * utf8next patterns.utf8three = R("\224\239") * utf8next * utf8next @@ -183,10 +186,26 @@ local fullstripper = whitespace^0 * C((whitespace^0 * nonwhitespace^1)^0) ----- collapser = Cs(spacer^0/"" * ((spacer^1 * endofstring / "") + (spacer^1/" ") + P(1))^0) local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) +local b_collapser = Cs( whitespace^0 /"" * (nonwhitespace^1 + whitespace^1/" ")^0) +local e_collapser = Cs((whitespace^1 * P(-1)/"" + nonwhitespace^1 + whitespace^1/" ")^0) +local m_collapser = Cs( (nonwhitespace^1 + whitespace^1/" ")^0) + +local b_stripper = Cs( spacer^0 /"" * (nonspacer^1 + spacer^1/" ")^0) +local e_stripper = Cs((spacer^1 * P(-1)/"" + nonspacer^1 + spacer^1/" ")^0) +local m_stripper = Cs( (nonspacer^1 + spacer^1/" ")^0) + patterns.stripper = stripper patterns.fullstripper = fullstripper patterns.collapser = collapser +patterns.b_collapser = b_collapser +patterns.m_collapser = m_collapser +patterns.e_collapser = e_collapser + +patterns.b_stripper = b_stripper +patterns.m_stripper = m_stripper +patterns.e_stripper = e_stripper + patterns.lowercase = lowercase patterns.uppercase = uppercase patterns.letter = patterns.lowercase + patterns.uppercase @@ -1014,3 +1033,75 @@ lpeg.patterns.stripzeros = stripper -- lpegmatch(stripper,str) -- print(#str, os.clock()-ts, lpegmatch(stripper,sample)) +-- for practical reasone we keep this here: + +local byte_to_HEX = { } +local byte_to_hex = { } +local byte_to_dec = { } -- for md5 +local hex_to_byte = { } + +for i=0,255 do + local H = format("%02X",i) + local h = format("%02x",i) + local d = format("%03i",i) + local c = char(i) + byte_to_HEX[c] = H + byte_to_hex[c] = h + byte_to_dec[c] = d + hex_to_byte[h] = c + hex_to_byte[H] = c +end + +local hextobyte = P(2)/hex_to_byte +local bytetoHEX = P(1)/byte_to_HEX +local bytetohex = P(1)/byte_to_hex +local bytetodec = P(1)/byte_to_dec +local hextobytes = Cs(hextobyte^0) +local bytestoHEX = Cs(bytetoHEX^0) +local bytestohex = Cs(bytetohex^0) +local bytestodec = Cs(bytetodec^0) + +patterns.hextobyte = hextobyte +patterns.bytetoHEX = bytetoHEX +patterns.bytetohex = bytetohex +patterns.bytetodec = bytetodec +patterns.hextobytes = hextobytes +patterns.bytestoHEX = bytestoHEX +patterns.bytestohex = bytestohex +patterns.bytestodec = bytestodec + +function string.toHEX(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestoHEX,s) + end +end + +function string.tohex(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestohex,s) + end +end + +function string.todec(s) + if not s or s == "" then + return s + else + return lpegmatch(bytestodec,s) + end +end + +function string.tobytes(s) + if not s or s == "" then + return s + else + return lpegmatch(hextobytes,s) + end +end + +-- local h = "ADFE0345" +-- local b = lpegmatch(patterns.hextobytes,h) +-- print(h,b,string.tohex(b),string.toHEX(b)) -- cgit v1.2.3