summaryrefslogtreecommitdiff
path: root/cal.lua
blob: 3a5c5099be237617519d8214ccc5fc9a73be5d0d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
--[[--

    RFC2445 parser / printer.

--]]--

local pcall         = pcall

local io            = require "io"
local ioopen        = io.open
local iostdin       = io.stdin
local iostdout      = io.stdout

local math          = require "math"
local mathmin       = math.min

local string        = require "string"
local stringformat  = string.format
local stringlower   = string.lower
local stringsub     = string.sub

local table         = require "table"
local tableconcat   = table.concat

local common        = require "common"
local println       = common.println
local errorln       = common.errorln
local noiseln       = common.noiseln
local debugln       = common.debugln

--[[--

  fmt_calendar_lines (cal) -> string
  — take a flattened lines representation of a calendar and formats it into a
  string

--]]--

local fmt_calendar_lines do
  local fold_rfc2445    = 75  --- B
  local fold_wsp        = " " --- could be tab as well
  local crlf            = "\r\n"

  local fmt_line = function (ln)
    local params = ""
    local lparams = ln.params
    if lparams ~= nil then
      local acc = { }
      for i = 1, #lparams do
        local param = lparams [i]
        local value = param.value
        acc [#acc + 1] = ";"
        acc [#acc + 1] = param.name
        acc [#acc + 1] = "="
        for j = 1, #value do
          if j > 1 then
            acc [#acc + 1] = ","
          end
          acc [#acc + 1] = value [j]
        end
      end
      params = tableconcat (acc)
    end
    return stringformat ("%s%s:%s", ln.name, params, ln.value)
  end

  local fold_line = function (line, len, newline)
    local acc = { }
    local pos = 1

    while pos < #line do
      local wsp  = ""
      local pos1
      if pos == 1 then
        pos1 = mathmin (#line, pos + len)
      else
        pos1 = mathmin (#line, pos + len - 1) --- account for leading whitespace
        wsp = fold_wsp
      end
      acc [#acc + 1] = wsp .. stringsub (line, pos, pos1)
      pos = pos1
    end

    return tableconcat (acc, newline)
  end

  fmt_calendar_lines = function (cal, fold, newline, i, acc)
    print("fmt_calendar_lines()", cal, fold, newline, i, acc)
    if i == nil then
      return fmt_calendar_lines (cal,
                                 fold or fold_rfc2445,
                                 newline or crlf,
                                 1, { })
    end
    if i > #cal then return tableconcat (acc, newline) end

    local cur  = cal [i]
    local line = fmt_line (cur)

    if #line > fold then
      line = fold_line (line, fold, newline)
    end

    acc [#acc + 1] = line

    return fmt_calendar_lines (cal, fold, newline, i + 1, acc)
  end
end

local fmt_calendar_params do
  fmt_calendar_params = function (params)
    local acc = { }
    local len = #params

    for i = 1, len do
      local param = params [i]
      acc [i] = stringformat ("“%s” → [%s]",
                              param.name,
                              tableconcat (param.value, ", "))
    end

    return tableconcat (acc, ", ")
  end
end

--[[--

  parse_calendar_lines(str)
  — dissect a calendar *str* into lines, unfolding the input if necessary

--]]--

local parse_calendar_lines do
  local lpeg            = require "lpeg"
  local lpegmatch       = lpeg.match
  local C               = lpeg.C
  local Cf              = lpeg.Cf
  local Cg              = lpeg.Cg
  local Cp              = lpeg.Cp
  local Cs              = lpeg.Cs
  local Ct              = lpeg.Ct
  local P               = lpeg.P
  local R               = lpeg.R
  local S               = lpeg.S

  local p_space         = P" "
  local p_cr            = P"\r"
  local p_lf            = P"\n"

  local p_wsp           = S" \t"
  local p_white_fold    = p_wsp
  local p_white         = S" \n\r\t\v"
  local p_eof           = P(-1)
  local p_eol           = p_eof + p_cr * p_lf + p_lf
  local p_noeol         = P(1) - p_eol

  local p_comma         = P","
  local p_colon         = P":"
  local p_semicolon     = P";"
  local p_dash          = P"-"
  local p_equals        = P"="
  local p_dquote        = P"\""

  local p_alpha         = R("az", "AZ")
  local p_digit         = R"09"

--[[--
     NON-US-ASCII       = %x80-F8
     QSAFE-CHAR         = WSP / %x21 / %x23-7E / NON-US-ASCII
     SAFE-CHAR          = WSP / %x21 / %x23-2B / %x2D-39 / %x3C-7E
                        / NON-US-ASCII
     VALUE-CHAR         = WSP / %x21-7E / NON-US-ASCII

--]]--

  local p_non_ascii     = R"\x7f\xff"
  local p_value_char    = p_wsp
                        + R"#~"       -- 0x23–0x7e
                        + p_non_ascii
  local p_qsafe_char    = p_wsp
                        + P"!"        -- 0x21
                        + R"#~"       -- 0x23–0x7e
                        + p_non_ascii
  local p_safe_char     = p_wsp --[[ printable range excluding {",:;} ]]
                        + P"!"        -- 0x21
                        + R"#+"       -- 0x23–0x2b
                        + R"-9"       -- 0x2d–0x39
                        + R"<~"       -- 0x3c–0x7e
                        + p_non_ascii
  ----- p_safe_char     = p_wsp + (R"!~" - S",:;")

  --[[--

    RFC2445: Long content lines SHOULD be split into a multiple line
    representations using a line "folding" technique. That is, a long line can
    be split between any two characters by inserting a CRLF immediately
    followed by a single linear white space character (i.e., SPACE, US-ASCII
    decimal 32 or HTAB, US-ASCII decimal 9).

  --]]--

  local p_folded_line_1 = p_noeol^1 * (p_eol / "")
  local p_folded_line_c = p_white_fold/"" * p_folded_line_1

  local p_folded_line   = Cs(p_folded_line_1 * p_folded_line_c^0) * Cp()

  local p_skip_line     = p_noeol^0 * p_eol * Cp()

  --[[--

     contentline        = name *(";" param ) ":" value CRLF
     name               = x-name / iana-token
     iana-token         = 1*(ALPHA / DIGIT / "-")
     x-name             = "X-" [vendorid "-"] 1*(ALPHA / DIGIT / "-")
     vendorid           = 3*(ALPHA / DIGIT)     ;Vendor identification
     param              = param-name "=" param-value
                          *("," param-value)
     param-name         = iana-token / x-token
     param-value        = paramtext / quoted-string
     paramtext          = *SAFE-CHAR
     value              = *VALUE-CHAR
     quoted-string      = DQUOTE *QSAFE-CHAR DQUOTE

  --]]--

  local add_param = function (t, k, v)
    debugln ("»»»» add_param ({%s}, %s, %s)", t, tostring (k), tostring (v))

    t [#t + 1] = { name = k, value = v }

    return t
  end

  local p_quoted_string = p_dquote * p_qsafe_char^0 * p_dquote
  local p_ianatok       = (p_alpha + p_digit + p_dash)^1
  local p_xtok          = nil --[[ XXX rule missing from RFC ]]
  local p_param_name    = p_ianatok -- + p_xtok
  local p_param_text    = p_safe_char^0
  local p_value         = p_value_char^0
  local p_param_value   = p_param_text + p_quoted_string
  local p_param         = Cg (  C(p_param_name)
                              * p_equals
                              * Ct(C(p_param_value) * (p_comma * C(p_param_value))^0))
  local p_params        = Cf (Ct"" * (p_semicolon * p_param)^0, add_param)
  local p_vendorid      = (p_alpha + p_digit)^3
  local p_xname         = P"X" * p_dash * (p_vendorid * p_dash)^-1
                        * (p_ianatok)
  local p_name          = p_xname + p_ianatok
  local p_content_line  = C(p_name)
                        * p_params^-1
                        * p_colon * C(p_value) * Cp()

  local parse_content_line = function (raw, pos0)
    local tmp, pos1 = lpegmatch (p_folded_line, raw, pos0)
    if tmp == nil then return false end

    local name, params, value, epos
    name, params, value, epos = lpegmatch (p_content_line, tmp)

    if name == nil or value == nil then return false end

    if epos ~= #tmp + 1 then
      noiseln ("parsing unfolded line stopped %d characters short \z
                of EOL [%d]“%s”", epos - #tmp - 1, #tmp, tmp)
    end

    return true, pos1, name, params, value
  end

  local skip_line = function (raw, pos0)
    return lpegmatch (p_skip_line, raw, pos0)
  end

  parse_calendar_lines = function (raw, pos0, acc, consumed, nline, nskipped)
    if pos0 == nil then return parse_calendar_lines (raw, 1, { }, 0, 1, 0) end

    local ok, pos1, name, params, value = parse_content_line (raw, pos0)

    if ok == false then
      pos1 = skip_line (raw, pos0)
      if pos0 == pos1 then
        noiseln ("»»» [%d] reached EOF, terminating after %d bytes, \z
                  %d calendar lines",
                 pos0, consumed, nline)
        return acc
      end
      errorln ("[%d–%d] %d bad content line; skipping", pos0, pos1, nline)
      nskipped = nskipped + 1
    else
      noiseln ("»»» [%d–%d] “%s” [%s] “%s”",
               pos0, pos1, name, fmt_calendar_params (params), value)
      acc [#acc + 1] = { pos = { pos0, pos1 }
                       , name = name, params = params, value = value }
    end

    return parse_calendar_lines (raw, pos1, acc, consumed + (pos1 - pos0),
                                 nline + 1, nskipped)
  end
end --- [parse_calendar_lines]

local calendar_of_lines do
  --[[--

    line handlers either return *true*, new line index, data or
    *false*, new line index and an error message.

  --]]--
  local process
  
  process = function (lines, n, acc)
    if n == nil then n = 1 end

    if n > #lines then
      if acc == nil then
        return false, n, "no usable input"
      end
      return true, n, acc
    end

    local cur = lines [n]
    if cur == nil then
      return false, n, "hit garbage line"
    end

    local curname  = stringlower (cur.name)
    local curvalue = cur.value

    if curname == "begin" then
      if stringlower (curvalue) == "vcalendar" then
        acc = { value = { } }
      end

      local ok, nn, ret = process (lines, n + 1,
                                   { kind   = "scope"
                                   , name   = curvalue
                                   , params = cur.params
                                   , value  = { }
                                   })
      if not ok then
        return false, nn, stringformat ("bad nested content in “%s” scope: %s",
                                        curvalue, ret)
      end
      --- parsing successful, append to current elements and continue with
      --- the next line
      acc.value [#acc.value + 1] = ret
      return process (lines, nn, acc)
    end

    if curname == "end" then
      if curvalue ~= acc.name then
        return false, n, stringformat ("scope mismatch: \z
                                        expecting “%s”, got “%s”",
                                       n, acc.kind, curvalue)
      end
      --- closing the current scope, return accu
      return true, n + 1, acc
    end

    acc.value [#acc.value + 1] = { kind   = "other"
                                 , name   = curname
                                 , value  = curvalue
                                 , params = cur.params
                                 }

    return process (lines, n + 1, acc)
  end

  calendar_of_lines = function (lines)
    local ok, n, components = process (lines)
    if not ok then
      return error (stringformat ("error at index %d: %s", n, components))
    end
    return components
  end
end --- [calendar_of_lines]


--[[--

  lines_of_calendar (cal) -> bool, lines
  — takes a structured calendar and returns a flattened representation of lines

--]]--

local lines_of_calendar do

  lines_of_calendar = function (cal, n, acc)
    if n == nil then return lines_of_calendar (cal, 1, { }) end

    local cur = cal [n]
    if cur == nil then
      return false, stringformat ("bad calendar object at index %d", n)
    end

    local curkind  = cur.kind
    local curname  = stringupper (cur.name)
    local curvalue = cur.value

    if curkind == "scope" then
      acc [#acc + 1] = { name = "BEGIN", value = curvalue }
      local ok
      ok, acc = lines_of_calendar (cal, n, acc)
      if not ok then
        return false, stringformat ("bad calendar component at index %d \z
                                     inside “%s” scope: %s",
                                    n, cur.value, acc)
      end
      acc [#acc + 1] = { name = "END", value = curvalue }

      return lines_of_calendar (cal, n + 1, acc)
    end

    if curkind == "other" then
      acc [#acc + 1] = { name  = curname
                       , value = curvalue
                       , prams = cur.params
                       }
      return lines_of_calendar (cal, n + 1, acc)
    end

    return false, stringformat ("invalid object kind “%s” at index %d \z
                                 in calendar object", curkind, n)
  end
end --- [lines_of_calendar]

--[[--

  fmt_calendar (cal) -> string
  — takes a structured calendar and emits a string in RFC2445 format.

--]]--

local fmt_calendar = function (cal)
  local ok, lines = lines_of_calendar (cal)
  if not ok then
    return ok, stringformat ("failed to convert calendar to temporary line format: %s",
                             lines)
  end

  return fmt_calendar_lines (lines)
end

-------------------------------------------------------------------------------
--- API
-------------------------------------------------------------------------------

local parse = function (raw)
  local ok, lines = pcall (parse_calendar_lines, raw)
  if not ok then
    return false, "failed to parse calendar object: " .. lines
  end

  local ok, obj = pcall (calendar_of_lines, lines)
  if not ok then
    return false, "failed to internalize calendar structure: " .. obj
  end

  return true, obj
end

local parse_handle = function (fh)
  local raw = fh:read ("*a")
  if raw == nil then return nil end
  return parse (raw)
end

local parse_file = function (fname)
  local fh = ioopen (fname, "r")
  local ok, cal = parse_handle (fh)
  fh:close ()
  return ok, cal
end

local parse_stdin = function ()
  return parse_handle (iostdin)
end

local format = function (cal, newline)
  return fmt_calendar (cal, nil, newline or "\n")
end

local output_handle = function (cal, fh)
  local ok, str = fmt_calendar (cal, nil, newline or "\n")
  if not ok then
    return false, str
  end
  fh:write (str)
end

local output = function (cal)
  return output_handle (cal, iostdout)
end

return { parse         = parse
       , parse_handle  = parse_handle
       , parse_file    = parse_file
       , parse_stdin   = parse_stdin
       , format        = format
       , output        = output
       , output_handle = output_handle
       }