From a743c730417d9abcdab792724e869da4c431964c Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sat, 23 Jun 2018 14:55:39 +0200 Subject: cal: implement content line parser --- cal.lua | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 158 insertions(+), 18 deletions(-) diff --git a/cal.lua b/cal.lua index 6dd2c6b..709597a 100755 --- a/cal.lua +++ b/cal.lua @@ -11,26 +11,106 @@ local ioopen = io.open local string = require "string" local stringformat = string.format -local println = function (...) print (stringformat (...)) end +local table = require "table" +local tableconcat = table.concat + +local verboselvl = 3 + +local mk_out = function (stream, threshold, newlinep) + local out = io.stdout + if stream == "err" or stream == "stderr" then out = io.stderr end + return function (...) + if verboselvl >= threshold then + local ok, msg = pcall (stringformat, ...) + if ok then + out:write (msg) + if newlinep == true then out:write "\n" end + ---else silently ignore + end + end + end +end + +local println = mk_out ("stdout", 0, true) +local errorln = mk_out ("stderr", 0, true) +local noiseln = mk_out ("stderr", 1, true) +local debugln = mk_out ("stderr", 2, true) + +local print_calendar +local fmt_calendar_params +do + fmt_calendar_params = function (params) + local acc = { } + local len = #params + + for i = 1, len do + local params = params [i] + acc [i] = stringformat ("“%s” → “%s”", params.name, params.value) + end + + return tableconcat (acc, ", ") + end +end local parse_calendar do local lpeg = require "lpeg" local lpegmatch = lpeg.match local C = lpeg.C + local Cf = lpeg.Cf + local Cg = lpeg.Cg local Cp = lpeg.Cp local Cs = lpeg.Cs + local Ct = lpeg.Ct local P = lpeg.P + local R = lpeg.R local S = lpeg.S local p_space = P" " local p_cr = P"\r" local p_lf = P"\n" - local p_white_fold = S" \t" + local p_wsp = S" \t" + local p_white_fold = p_wsp local p_white = S" \n\r\t\v" - local p_eol = p_cr * p_lf + p_lf + local p_eof = P(-1) + local p_eol = p_eof + p_cr * p_lf + p_lf local p_noeol = P(1) - p_eol + local p_comma = P"," + local p_colon = P":" + local p_semicolon = P";" + local p_dash = P"-" + local p_equals = P"=" + local p_dquote = P"\"" + + local p_alpha = R("az", "AZ") + local p_digit = R"09" + +--[[-- + NON-US-ASCII = %x80-F8 + QSAFE-CHAR = WSP / %x21 / %x23-7E / NON-US-ASCII + SAFE-CHAR = WSP / %x21 / %x23-2B / %x2D-39 / %x3C-7E + / NON-US-ASCII + VALUE-CHAR = WSP / %x21-7E / NON-US-ASCII + +--]]-- + + local p_non_ascii = R"\x7f\xff" + local p_value_char = p_wsp + + R"#~" -- 0x23–0x7e + + p_non_ascii + local p_qsafe_char = p_wsp + + P"!" -- 0x21 + + R"#~" -- 0x23–0x7e + + p_non_ascii + local p_safe_char = p_wsp --[[ printable range excluding {",:;} ]] + + P"!" -- 0x21 + + R"#+" -- 0x23–0x2b + + R"-9" -- 0x2d–0x39 + + R"<~" -- 0x3c–0x7e + + p_non_ascii + ----- p_safe_char = p_wsp + (R"!~" - S",:;") + --[[-- RFC2445: Long content lines SHOULD be split into a multiple line @@ -44,42 +124,102 @@ local parse_calendar do local p_folded_line_1 = p_noeol^1 * (p_eol / "") local p_folded_line_c = p_white_fold/"" * p_folded_line_1 - local p_content_line = Cs(p_folded_line_1 * p_folded_line_c^0) * Cp() + local p_folded_line = Cs(p_folded_line_1 * p_folded_line_c^0) * Cp() local p_skip_line = p_noeol^0 * p_eol * Cp() + --[[-- + + contentline = name *(";" param ) ":" value CRLF + name = x-name / iana-token + iana-token = 1*(ALPHA / DIGIT / "-") + x-name = "X-" [vendorid "-"] 1*(ALPHA / DIGIT / "-") + vendorid = 3*(ALPHA / DIGIT) ;Vendor identification + param = param-name "=" param-value + *("," param-value) + param-name = iana-token / x-token + param-value = paramtext / quoted-string + paramtext = *SAFE-CHAR + value = *VALUE-CHAR + quoted-string = DQUOTE *QSAFE-CHAR DQUOTE + + --]]-- + + local add_param = function (t, k, v) + debugln ("»»»» add_param ({%s}, %s, %s)", t, tostring (k), tostring (v)) + + t [#t + 1] = { name = k, value = v } + + return t + end + + local p_quoted_string = p_dquote * p_qsafe_char^0 * p_dquote + local p_ianatok = (p_alpha + p_digit + p_dash)^1 + local p_xtok = nil --[[ XXX rule missing from RFC ]] + local p_param_name = p_ianatok -- + p_xtok + local p_param_text = p_safe_char^0 + local p_value = p_value_char^0 + local p_param_value = p_param_text + p_quoted_string + local p_param = Cg ( C(p_param_name) + * p_equals + * Ct(C(p_param_value) * (p_comma * C(p_param_value))^0)) + local p_params = Cf (Ct"" * (p_semicolon * p_param)^0, add_param) + local p_vendorid = (p_alpha + p_digit)^3 + local p_xname = P"X" * p_dash * (p_vendorid * p_dash)^-1 + * (p_ianatok) + local p_name = p_xname + p_ianatok + local p_content_line = C(p_name) + * p_params^-1 + * p_colon * C(p_value) * Cp() + local parse_content_line = function (raw, pos0) - local res, pos1 = lpegmatch (p_content_line, raw, pos0) + local tmp, pos1 = lpegmatch (p_folded_line, raw, pos0) + if tmp == nil then return false end - return res, pos1 + local name, params, value, epos + name, params, value, epos = lpegmatch (p_content_line, tmp) + + if name == nil or value == nil then return false end + + if epos ~= #tmp + 1 then + noiseln ("parsing unfolded line stopped %d characters short \z + of EOL [%d]“%s”", epos - #tmp - 1, #tmp, tmp) + end + + return true, pos1, name, params, value end local skip_line = function (raw, pos0) return lpegmatch (p_skip_line, raw, pos0) end - local errline = function (pos) - end + parse_calendar = function (raw, pos0, acc, consumed, nline, nskipped) + if pos0 == nil then return parse_calendar (raw, 1, { }, 0, 1, 0) end - parse_calendar = function (raw, pos0, consumed, nline, nskipped) - if pos0 == nil then return parse_calendar (raw, 1, 0, 1) end + local ok, pos1, name, params, value = parse_content_line (raw, pos0) - local cline, pos1 = parse_content_line (raw, pos0) - if cline == nil then + if ok == false then pos1 = skip_line (raw, pos0) - println ("[%d–%d] %d bad content line; skipping", pos0, pos1, nline) + if pos0 == pos1 then + noiseln ("»»» [%d] reached EOF, terminating after %d bytes, \z + %d calendar lines", + pos0, consumed, nline) + return acc + end + errorln ("[%d–%d] %d bad content line; skipping", pos0, pos1, nline) nskipped = nskipped + 1 else - println ("[%d–%d] %d [%s]", pos0, pos1, nline, cline) + noiseln ("»»» [%d–%d] “%s” [%s] “%s”", + pos0, pos1, name, fmt_calendar_params (params), value) + acc [#acc + 1] = { pos = { pos0, pos1 } + , name = name, params = params, value = value } end - return parse_calendar (raw, pos1, consumed + pos1 - pos0, nline + 1, + + return parse_calendar (raw, pos1, acc, consumed + (pos1 - pos0), nline + 1, nskipped) end end -local print_calendar do -end - local loaddata = function (fname) local fh = ioopen (fname, "r") -- cgit v1.2.3