summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <phg@phi-gamma.net>2018-06-23 14:55:39 +0200
committerPhilipp Gesang <phg@phi-gamma.net>2018-06-26 13:57:25 +0200
commita743c730417d9abcdab792724e869da4c431964c (patch)
tree31e7a0c46970abeda0551c8481e2594330b03c3a
parent2ab4257bf85adcc055e783df5735601cec9cbfff (diff)
downloadcaldr-a743c730417d9abcdab792724e869da4c431964c.tar.gz
cal: implement content line parser
-rwxr-xr-xcal.lua176
1 files changed, 158 insertions, 18 deletions
diff --git a/cal.lua b/cal.lua
index 6dd2c6b..709597a 100755
--- a/cal.lua
+++ b/cal.lua
@@ -11,26 +11,106 @@ local ioopen = io.open
local string = require "string"
local stringformat = string.format
-local println = function (...) print (stringformat (...)) end
+local table = require "table"
+local tableconcat = table.concat
+
+local verboselvl = 3
+
+local mk_out = function (stream, threshold, newlinep)
+ local out = io.stdout
+ if stream == "err" or stream == "stderr" then out = io.stderr end
+ return function (...)
+ if verboselvl >= threshold then
+ local ok, msg = pcall (stringformat, ...)
+ if ok then
+ out:write (msg)
+ if newlinep == true then out:write "\n" end
+ ---else silently ignore
+ end
+ end
+ end
+end
+
+local println = mk_out ("stdout", 0, true)
+local errorln = mk_out ("stderr", 0, true)
+local noiseln = mk_out ("stderr", 1, true)
+local debugln = mk_out ("stderr", 2, true)
+
+local print_calendar
+local fmt_calendar_params
+do
+ fmt_calendar_params = function (params)
+ local acc = { }
+ local len = #params
+
+ for i = 1, len do
+ local params = params [i]
+ acc [i] = stringformat ("“%s” → “%s”", params.name, params.value)
+ end
+
+ return tableconcat (acc, ", ")
+ end
+end
local parse_calendar do
local lpeg = require "lpeg"
local lpegmatch = lpeg.match
local C = lpeg.C
+ local Cf = lpeg.Cf
+ local Cg = lpeg.Cg
local Cp = lpeg.Cp
local Cs = lpeg.Cs
+ local Ct = lpeg.Ct
local P = lpeg.P
+ local R = lpeg.R
local S = lpeg.S
local p_space = P" "
local p_cr = P"\r"
local p_lf = P"\n"
- local p_white_fold = S" \t"
+ local p_wsp = S" \t"
+ local p_white_fold = p_wsp
local p_white = S" \n\r\t\v"
- local p_eol = p_cr * p_lf + p_lf
+ local p_eof = P(-1)
+ local p_eol = p_eof + p_cr * p_lf + p_lf
local p_noeol = P(1) - p_eol
+ local p_comma = P","
+ local p_colon = P":"
+ local p_semicolon = P";"
+ local p_dash = P"-"
+ local p_equals = P"="
+ local p_dquote = P"\""
+
+ local p_alpha = R("az", "AZ")
+ local p_digit = R"09"
+
+--[[--
+ NON-US-ASCII = %x80-F8
+ QSAFE-CHAR = WSP / %x21 / %x23-7E / NON-US-ASCII
+ SAFE-CHAR = WSP / %x21 / %x23-2B / %x2D-39 / %x3C-7E
+ / NON-US-ASCII
+ VALUE-CHAR = WSP / %x21-7E / NON-US-ASCII
+
+--]]--
+
+ local p_non_ascii = R"\x7f\xff"
+ local p_value_char = p_wsp
+ + R"#~" -- 0x23–0x7e
+ + p_non_ascii
+ local p_qsafe_char = p_wsp
+ + P"!" -- 0x21
+ + R"#~" -- 0x23–0x7e
+ + p_non_ascii
+ local p_safe_char = p_wsp --[[ printable range excluding {",:;} ]]
+ + P"!" -- 0x21
+ + R"#+" -- 0x23–0x2b
+ + R"-9" -- 0x2d–0x39
+ + R"<~" -- 0x3c–0x7e
+ + p_non_ascii
+ ----- p_safe_char = p_wsp + (R"!~" - S",:;")
+
--[[--
RFC2445: Long content lines SHOULD be split into a multiple line
@@ -44,42 +124,102 @@ local parse_calendar do
local p_folded_line_1 = p_noeol^1 * (p_eol / "")
local p_folded_line_c = p_white_fold/"" * p_folded_line_1
- local p_content_line = Cs(p_folded_line_1 * p_folded_line_c^0) * Cp()
+ local p_folded_line = Cs(p_folded_line_1 * p_folded_line_c^0) * Cp()
local p_skip_line = p_noeol^0 * p_eol * Cp()
+ --[[--
+
+ contentline = name *(";" param ) ":" value CRLF
+ name = x-name / iana-token
+ iana-token = 1*(ALPHA / DIGIT / "-")
+ x-name = "X-" [vendorid "-"] 1*(ALPHA / DIGIT / "-")
+ vendorid = 3*(ALPHA / DIGIT) ;Vendor identification
+ param = param-name "=" param-value
+ *("," param-value)
+ param-name = iana-token / x-token
+ param-value = paramtext / quoted-string
+ paramtext = *SAFE-CHAR
+ value = *VALUE-CHAR
+ quoted-string = DQUOTE *QSAFE-CHAR DQUOTE
+
+ --]]--
+
+ local add_param = function (t, k, v)
+ debugln ("»»»» add_param ({%s}, %s, %s)", t, tostring (k), tostring (v))
+
+ t [#t + 1] = { name = k, value = v }
+
+ return t
+ end
+
+ local p_quoted_string = p_dquote * p_qsafe_char^0 * p_dquote
+ local p_ianatok = (p_alpha + p_digit + p_dash)^1
+ local p_xtok = nil --[[ XXX rule missing from RFC ]]
+ local p_param_name = p_ianatok -- + p_xtok
+ local p_param_text = p_safe_char^0
+ local p_value = p_value_char^0
+ local p_param_value = p_param_text + p_quoted_string
+ local p_param = Cg ( C(p_param_name)
+ * p_equals
+ * Ct(C(p_param_value) * (p_comma * C(p_param_value))^0))
+ local p_params = Cf (Ct"" * (p_semicolon * p_param)^0, add_param)
+ local p_vendorid = (p_alpha + p_digit)^3
+ local p_xname = P"X" * p_dash * (p_vendorid * p_dash)^-1
+ * (p_ianatok)
+ local p_name = p_xname + p_ianatok
+ local p_content_line = C(p_name)
+ * p_params^-1
+ * p_colon * C(p_value) * Cp()
+
local parse_content_line = function (raw, pos0)
- local res, pos1 = lpegmatch (p_content_line, raw, pos0)
+ local tmp, pos1 = lpegmatch (p_folded_line, raw, pos0)
+ if tmp == nil then return false end
- return res, pos1
+ local name, params, value, epos
+ name, params, value, epos = lpegmatch (p_content_line, tmp)
+
+ if name == nil or value == nil then return false end
+
+ if epos ~= #tmp + 1 then
+ noiseln ("parsing unfolded line stopped %d characters short \z
+ of EOL [%d]“%s”", epos - #tmp - 1, #tmp, tmp)
+ end
+
+ return true, pos1, name, params, value
end
local skip_line = function (raw, pos0)
return lpegmatch (p_skip_line, raw, pos0)
end
- local errline = function (pos)
- end
+ parse_calendar = function (raw, pos0, acc, consumed, nline, nskipped)
+ if pos0 == nil then return parse_calendar (raw, 1, { }, 0, 1, 0) end
- parse_calendar = function (raw, pos0, consumed, nline, nskipped)
- if pos0 == nil then return parse_calendar (raw, 1, 0, 1) end
+ local ok, pos1, name, params, value = parse_content_line (raw, pos0)
- local cline, pos1 = parse_content_line (raw, pos0)
- if cline == nil then
+ if ok == false then
pos1 = skip_line (raw, pos0)
- println ("[%d–%d] %d bad content line; skipping", pos0, pos1, nline)
+ if pos0 == pos1 then
+ noiseln ("»»» [%d] reached EOF, terminating after %d bytes, \z
+ %d calendar lines",
+ pos0, consumed, nline)
+ return acc
+ end
+ errorln ("[%d–%d] %d bad content line; skipping", pos0, pos1, nline)
nskipped = nskipped + 1
else
- println ("[%d–%d] %d [%s]", pos0, pos1, nline, cline)
+ noiseln ("»»» [%d–%d] “%s” [%s] “%s”",
+ pos0, pos1, name, fmt_calendar_params (params), value)
+ acc [#acc + 1] = { pos = { pos0, pos1 }
+ , name = name, params = params, value = value }
end
- return parse_calendar (raw, pos1, consumed + pos1 - pos0, nline + 1,
+
+ return parse_calendar (raw, pos1, acc, consumed + (pos1 - pos0), nline + 1,
nskipped)
end
end
-local print_calendar do
-end
-
local loaddata = function (fname)
local fh = ioopen (fname, "r")