summaryrefslogtreecommitdiff
path: root/mod/tex/context/third/rst/rst_helpers.lua
diff options
context:
space:
mode:
authorPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-12-30 16:05:45 +0100
committerPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2010-12-30 16:05:45 +0100
commit6dae96e6a7bb0dba0a43fc08ecf3b4614f80006c (patch)
treeac9e9dad8b326d6b39b76148d88cafba3c52fe5c /mod/tex/context/third/rst/rst_helpers.lua
parent1af3e6e47b9072c481aa2d40cedd3b9b99220817 (diff)
downloadcontext-rst-6dae96e6a7bb0dba0a43fc08ecf3b4614f80006c.tar.gz
module code
Diffstat (limited to 'mod/tex/context/third/rst/rst_helpers.lua')
-rw-r--r--mod/tex/context/third/rst/rst_helpers.lua632
1 files changed, 632 insertions, 0 deletions
diff --git a/mod/tex/context/third/rst/rst_helpers.lua b/mod/tex/context/third/rst/rst_helpers.lua
new file mode 100644
index 0000000..f081559
--- /dev/null
+++ b/mod/tex/context/third/rst/rst_helpers.lua
@@ -0,0 +1,632 @@
+#!/usr/bin/env texlua
+--------------------------------------------------------------------------------
+-- FILE: rst_helpers.lua
+-- USAGE: ./rst_helpers.lua
+-- DESCRIPTION: Complement to the reStructuredText parser
+-- AUTHOR: Philipp Gesang (Phg), <megas.kapaneus@gmail.com>
+-- VERSION: 1.0
+-- CREATED: 07/09/10 01:03:08 CEST
+--------------------------------------------------------------------------------
+--
+
+local utf = unicode.utf8
+local P, R, S, V, match
+ = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match
+
+local C, Carg, Cb, Cc, Cg, Cmt, Cp, Cs, Ct
+ = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct
+
+helpers = helpers or {}
+helpers.table = {}
+helpers.cell = {}
+
+function helpers.dbg_writef(...)
+ if helpers_debug == true then
+ io.write(string.format(...))
+ end
+end
+
+--helpers.dbg_write = dbg_write
+local dbg_write = helpers.dbg_writef
+
+helpers.patterns = {}
+
+do
+ local p = helpers.patterns
+ p.dash = P"-"
+ p.equals = P"="
+ p.plus = P"+"
+ p.bar = P"|"
+ p.eol = P"\n"
+ p.last = -P(1)
+ p.space = P" "
+
+ p.dash_or_equals = p.dash + p.equals
+
+ p.celldelim = p.bar + p.plus
+ p.cellcontent = (1 - p.celldelim)
+ p.cell = p.celldelim * C((1 - p.celldelim)^1) * #p.celldelim
+ p.cell_line = p.plus * p.dash^1 * #p.plus
+ p.dashesonly = p.dash^1 * p.last
+ p.spacesonly = p.space^1 * p.last
+
+ p.col_start = Cp() * p.dash_or_equals^1
+ p.col_stop = p.dash_or_equals^1 * Cp()
+ p.column_starts = p.col_start * ( p.space^1 * p.col_start)^1
+ p.column_stops = p.col_stop * ( p.space^1 * p.col_stop)^1
+
+ p.st_headsep = p.equals^1 * (p.space^1 * p.equals^1)^1
+ p.st_colspan = p.dash^1 * (p.space^1 * p.dash^1)^0 * p.space^0 * p.last
+ p.st_span_starts = Ct(Cp() * p.dash^1 * (p.space^1 * Cp() * p.dash^1)^0)
+ p.st_span_stops = Ct(p.dash^1 * Cp() * (p.space^1 * p.dash^1 * Cp())^0)
+
+
+ p.cells = P{
+ [1] = "cells",
+ cells = p.celldelim
+ * (C(V"in_cell")
+ * (V"matchwidth" * C(V"in_cell")) ^1),
+
+ in_cell = p.cellcontent^1
+ + (p.dash - p.cellcontent)^1,
+
+ matchwidth = Cmt(C(p.celldelim) * Carg(1), function(s,i,del, layout)
+ local pos = 1
+ local lw = layout.widths
+ for n=1, #lw do
+ pos = pos + lw[n] + 1
+ if (i - 1) == pos then return true end
+ end
+ return false
+ end),
+ }
+
+ p.sep_line = p.plus * (p.dash^1 * p.plus)^1 * p.last
+ p.sep_head = p.plus * (p.equals^1 * p.plus)^1 * p.last
+
+ p.sep_part = ((1 - p.cell_line)^0 * p.cell_line) - p.sep_line
+
+ p.new_row = p.sep_line + p.sep_head + p.sep_part
+
+ p.whitespace = S" \t\v\r\n"^1
+ p.strip = p.whitespace^0 * C((1 - (p.whitespace * p.last))^1) * p.whitespace^0 * p.last
+
+
+ local colon = P":"
+ local escaped_colon = P"\\:"
+ local nocolon = (escaped_colon + (1 - colon))^1
+ p.colon_right = nocolon * colon
+ p.colon_keyval = C(nocolon) * colon * p.space^1 * C((1 - (p.space^0 * P(-1)))^1)
+
+ -- color expression matching for text roles
+ local digit = R"09"
+ local dot = P"."
+ local colvalue = digit * dot * digit^1
+ + digit
+ + dot * digit^1
+ local coldelim = P"_" + P"-"
+ p.rgbvalues = P"rgb_"
+ * Ct( C(colvalue) * coldelim * C(colvalue) * coldelim * C(colvalue) )
+end
+
+function helpers.cell.create(raw, n_row, n_col, parent, variant)
+ local p = helpers.patterns
+ local cell = {}
+ cell.stripped = raw and p.strip:match(raw) or ""
+ cell.content = raw
+ cell.width = raw and utf.len(raw) or 0
+ cell.bytes = raw and #raw or 0
+ cell.variant = "normal" -- [normal|separator|y_continue|x_continue]
+ cell.pos = {}
+ cell.pos.x = n_col
+ cell.pos.y = n_row
+ cell.span = {}
+ cell.span.x = 1
+ cell.span.y = 1
+ cell.parent = parent
+ return cell
+end
+
+function helpers.cell.get_x_span(content, layout, init)
+ local acc = 0
+ local lw = layout.widths
+ for n=init, #lw do
+ acc = acc + lw[n] + 1
+ if utf.len(content) + 1 == acc then
+ return n - init
+ end
+ end
+ return false
+end
+
+
+-- Extending a cell by 1 cell horizontally.
+function helpers.cell.add_x (cell)
+ cell.span.x = cell.span.x + 1
+end
+
+
+local function set_layout (line)
+ local p = helpers.patterns
+ local layout = {}
+ local slice = Ct((p.plus * C(p.dash^1) * #p.plus)^1)
+
+ layout.widths = {}
+ layout.slices = {}
+ for n, elm in ipairs(slice:match(line)) do
+ layout.widths[n] = #elm
+ layout.slices[n] = elm
+ end
+ return layout
+end
+
+helpers_debug = true
+function helpers.table.create(raw)
+ local newtab = {}
+ newtab.rows = {}
+ newtab.layout = set_layout(raw[1])
+
+ local p = helpers.patterns
+
+ newtab.resolve_parent = function(row, col, array)
+ local array = array or newtab.rows
+ local cell = array[row][col]
+ local par_row, par_col = row, col
+ if cell.parent then
+ par_row, par_col = newtab.resolve_parent(cell.parent.y, cell.parent.x)
+ end
+ return par_row, par_col
+ end
+
+ newtab.__init = function()
+ local hc = helpers.cell
+ local rowcount = 0
+ local newtablayout = newtab.layout
+ for nr, row in ipairs(raw) do
+ newtab.rows[nr] = {}
+ local this_row = newtab.rows[nr]
+ this_row.sepline = p.sep_line:match(row)
+ this_row.sephead = p.sep_head:match(row)
+ this_row.seppart = p.sep_part:match(row)
+ if this_row.sephead then
+ newtab.has_head = true
+ newtab.head_end = nr
+ end
+
+ local splitted = { p.cells:match(row, 1, newtablayout) }
+ local pos_layout, pos_row = 1, 1
+ local make_empty = {}
+ make_empty.n, make_empty.parent = 0, nil
+
+ while pos_layout <= #newtablayout.widths do
+ local splitpos = splitted[pos_layout]
+ local layoutwidth = newtablayout.widths[pos_layout]
+ local span = 1
+ local this
+
+ if make_empty.n > 0 then
+ make_empty.n = make_empty.n - 1
+ this = hc.create("", nr, pos_layout, make_empty.parent)
+ this.parent = make_empty.parent
+ p_row, p_col = newtab.resolve_parent(this.parent.y, this.parent.x)
+ local thisparent = newtab.rows[p_row][p_col]
+ if this_row.sepline or this_row.sephead or
+ newtab.rows[p_row][p_col].variant == "separator" then
+ this.variant = "separator"
+ else
+ this.variant = "empty1"
+ end
+ else
+ local cellwidth = utf.len(splitpos)
+ if cellwidth > layoutwidth then
+ span = span + hc.get_x_span(splitpos, newtablayout, pos_layout)
+ end
+ pos_row = pos_row + span
+ this = hc.create(splitpos, nr, pos_layout, nil)
+ if p.dashesonly:match(splitpos) or
+ this_row.sepline or this_row.sephead then
+ this.variant = "separator"
+ end
+ this.span.x = span
+ make_empty.n = span - 1
+ make_empty.parent = span > 1 and { y = nr, x = pos_layout } or nil
+ end
+
+ this_row[pos_layout] = this
+ pos_layout = pos_layout + 1
+ end -- while
+ end -- for loop over rows
+
+ local oldrows = newtab.rows
+ local newrows = oldrows
+ for nc, width in ipairs(newtablayout.widths) do
+ -- this is gonna be extremely slow but at least it's readable
+ local newrow
+ local currentrow = 1
+ for nr, row in ipairs(newrows) do
+ local cell = row[nc]
+ dbg_write("nc: %s, nr:%2s | %9s | ", nc, nr,cell.variant)
+ if row.sepline or row.sephead
+ or p.dashesonly:match(cell.content)
+ or cell.variant == "separator" then -- separator; skipping and beginning new row
+ newrows[nr][nc] = cell
+ currentrow = currentrow + 1
+ newrow = true
+ dbg_write("new >%24s< ", cell.stripped)
+ if cell.parent then dbg_write("parent |") else dbg_write("no par |") end
+ else
+ dbg_write("old >%24s< ", cell.stripped)
+ if cell.parent then dbg_write("parent |") else dbg_write("no par |") end
+ if newrow then
+ newrows[nr][nc] = cell
+ currentrow = currentrow + 1
+ else -- continuing parent
+
+ local par_row, par_col
+ local parent
+ if cell.parent then
+ par_row, par_col = newtab.resolve_parent(cell.parent.y, cell.parent.x, newrows)
+ dbg_write(" use %s,%2s | ", par_col, par_row)
+ else -- Using vertical predecessor.
+ par_row, par_col = newtab.resolve_parent(nr-1,nc, newrows)
+ dbg_write(" new %s,%2s | ", par_col, par_row)
+ end
+ parent = newrows[par_row][par_col]
+
+ if newrows[nr].seppart then
+ dbg_write("span++")
+ parent.span.y = parent.span.y + 1
+ end
+
+ parent.content = parent.content .. cell.content
+ parent.stripped = parent.stripped .. " " .. cell.stripped
+ cell.variant = "empty2"
+ cell.parent = { x = par_col, y = par_row }
+ end
+ newrow = false
+ end
+ dbg_write("\n")
+ newrows[nr][nc] = cell
+ end -- for loop over rows
+ end -- for loop over columns
+ --newtab.rows = oldrows
+ newtab.rows = newrows
+ end
+
+ newtab.__init()
+
+ newtab.__draw_debug = function()
+ for nr, row in ipairs(newtab.rows) do
+ for nc, cell in ipairs(row) do
+ local field = cell.variant:sub(1,7)
+ if cell.parent then
+ field = field .. string.format(" %s,%2s",cell.parent.x, cell.parent.y)
+ end
+ dbg_write("%12s | ", field)
+ end
+ dbg_write("\n")
+ end
+ end
+
+ return newtab
+end
+
+
+
+function helpers.table.resolve_parent (row, col, array)
+ local cell = array[row][col]
+ local par_row, par_col = row, col
+ if cell.parent then
+ par_row, par_col = self.resolve_parent(cell.parent.y, cell.parent.x)
+ end
+ return par_row, par_col
+end
+
+
+-- Check the column boundaries of a simple table.
+function helpers.get_st_boundaries (str)
+ local p = helpers.patterns
+ local starts, stops, slices = {}, {}, {}
+ for n, elm in ipairs({ p.column_starts:match(str) }) do
+ slices[n] = { start = elm }
+ starts[elm] = true
+ end
+ for n, elm in ipairs({ p.column_stops :match(str) }) do
+ slices[n]["stop"] = elm
+ stops[elm] = true
+ end
+ return { starts = starts, stops = stops, slices = slices }
+end
+
+function helpers.table.simple(raw)
+ local rows = {}
+ local multispans = {}
+ local bounds = helpers.get_st_boundaries(raw[1])
+ local p = helpers.patterns
+
+ for nr, row in ipairs(raw) do
+ local newrow = {}
+ local nc = 1
+ if not p.st_headsep:match(row) and
+ not p.st_colspan:match(row) then
+ local starts, stops = {}, {}
+ local check_span = false
+ if p.st_colspan:match(raw[nr+1]) then -- expect spans over several columns
+ starts = p.st_span_starts:match(raw[nr+1])
+ stops = p.st_span_stops :match(raw[nr+1])
+ check_span = true
+ else
+ for colnr, slice in ipairs(bounds.slices) do
+ starts[colnr] = slice.start
+ stops [colnr] = slice.stop
+ end
+ end
+
+ for nc, start in ipairs(starts) do
+ -- last column can exceed layout width
+ local stop = nc ~= #starts and stops[nc] or #row
+ local cell = {
+ content = "",
+ span = { x = 1, y = 1 },
+ --ignore = false
+ }
+ cell.content = string.strip(row:sub(start, stop))
+ if check_span then
+ local start_at, stop_at
+ for colnr, slice in ipairs(bounds.slices) do
+ if slice.start == start then
+ start_at = colnr
+ end
+ if start_at and
+ not (colnr == #bounds.slices) then
+ if slice.stop == stop then
+ stop_at = colnr
+ break
+ end
+ else -- last column, width doesn't matter
+ stop_at = colnr
+ end
+ end
+ cell.span.x = 1 + stop_at - start_at
+ end
+ newrow[nc] = cell
+ end
+ elseif p.st_colspan:match(row) then
+ newrow.ignore = true
+ elseif not rows.head_end and
+ nr > 1 and #raw > nr then -- ends the header
+ rows.head_end = nr
+ newrow.head_sep = true
+ newrow.ignore = true
+ else
+ newrow.ignore = true
+ end
+ rows[nr] = newrow
+ end
+
+ for nr, row in ipairs(rows) do
+ if not row.ignore and row[1].content == "" then
+ row.ignore = true
+ for nc, cell in ipairs(row) do
+ local par_row, par_col = helpers.table.resolve_parent(nr - 1, nc, rows)
+ parent = rows[par_row][par_col]
+ parent.content = parent.content .. " " .. cell.content
+ cell.content = ""
+ end
+
+ end
+ end
+
+ return rows
+end
+
+helpers.list = {}
+
+do
+ local c = {}
+ c.roman = S"ivxlcdm"^1
+ c.Roman = S"IVXLCDM"^1
+ c.alpha = R"az" - P"i" - P"v" - P"x" - P"l"
+ c.Alpha = R"AZ" - P"I" - P"V" - P"X" - P"L"
+ c.digit = R"09"^1
+ c.auto = P"#"
+
+ local stripme = S" ()."
+ local dontstrip = 1 - stripme
+ local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0
+
+ local con = function (str)
+ str = itemstripper:match(str)
+ for conv, pat in next, c do
+ if pat:match(str) then
+ return conv
+ end
+ end
+ return false
+ end
+ helpers.list.conversion = con
+
+ local rnums = {
+ i = 1,
+ v = 5,
+ x = 10,
+ l = 50,
+ c = 100,
+ d = 500,
+ m = 1000,
+ }
+
+ local function roman_to_arab (str)
+ local n = 1
+ local curr, succ
+ local max_three = { }
+ local value = 0
+ while n <= #str do
+ if curr and curr == max_three[#max_three] then
+ if #max_three >= 3 then
+ return "Not a number"
+ else
+ max_three[#max_three+1] = curr
+ end
+ else
+ max_three = { curr }
+ end
+
+ curr = rnums[str:sub(n,n)] or 1
+
+ n = n + 1
+ succ = str:sub(n,n)
+
+ if succ and succ ~= "" then
+ succ = rnums[succ]
+ if curr < succ then
+ --n = n + 1
+ --value = value + succ - curr
+ value = value - curr
+ else
+ value = value + curr
+ end
+ else
+ value = value + curr
+ end
+ end
+ return value
+ end
+ helpers.list.roman_to_arab = roman_to_arab
+
+ local suc = function (str, old)
+ str, old = itemstripper:match(str), itemstripper:match(old)
+ local n_str, n_old = tonumber(str), tonumber(old)
+ if n_str and n_old then -- arabic numeral
+ return n_str == n_old + 1
+ end
+
+ local con_str, con_old = con(str), con(old)
+ if con_str == "alpha" or
+ con_str == "Alpha" then
+ return str:byte() == old:byte() + 1
+ else -- “I'm a Roman!” - “A woman?” - “No, *Roman*! - Au!” - “So your father was a woman?”
+ if not (str:lower() == str or
+ str:upper() == str) then -- uneven cased --> fail
+ return false
+ end
+
+ local trc = state.roman_cache
+ n_str = trc[str] or nil
+ n_old = trc[old] or nil
+ if not n_str then
+ n_str = roman_to_arab(str:lower())
+ trc[str] = n_str
+ end
+ if not n_old then
+ n_old = roman_to_arab(old:lower())
+ trc[old] = n_old
+ end
+ return n_str == n_old + 1
+ end
+ end
+ helpers.list.successor = suc
+
+ local greater = function (str, old)
+ str, old = itemstripper:match(str), itemstripper:match(old)
+ local n_str, n_old = tonumber(str), tonumber(old)
+ if n_str and n_old then -- arabic numeral
+ return n_str > n_old
+ end
+
+ local con_str, con_old = con(str), con(old)
+ if con_str == "alpha" or
+ con_str == "Alpha" then
+ return str:byte() > old:byte()
+ else
+ if not (str:lower() == str or
+ str:upper() == str) then -- uneven cased --> fail
+ return false
+ end
+
+
+ local trc = state.roman_cache
+ n_str = trc[str] or nil
+ n_old = trc[old] or nil
+ if not n_str then
+ n_str = roman_to_arab(str:lower())
+ trc[str] = n_str
+ end
+ if not n_old then
+ n_old = roman_to_arab(old:lower())
+ trc[old] = n_old
+ end
+ return n_str > n_old
+ end
+ end
+ helpers.list.greater = greater
+
+ local gd = function(str)
+ str = itemstripper:match(str)
+ local value
+ local con_str = con(str)
+ if con_str == "alpha" or
+ con_str == "Alpha" then
+ return str:byte()
+ else
+ if not (str:lower() == str or
+ str:upper() == str) then
+ return false
+ end
+
+ local trc = state.roman_cache
+ n_str = trc[str] or nil
+ if not n_str then
+ n_str = roman_to_arab(str:lower())
+ trc[str] = n_str
+ end
+ return n_str
+ end
+ end
+
+ helpers.list.get_decimal = gd
+end
+
+helpers.string = {}
+
+do
+ -- This grammar inside the function is slightly faster than the same as an upvalue
+ -- with the value of “width” repeatedly given via lpeg.Carg(). This holds
+ -- for repeated calls as well.
+ local ulen = utf.len
+ function helpers.string.wrapat (str, width)
+ local width = width or 65
+ local linelength = 0
+ local wrap = P{
+ [1] = "wrapper",
+
+ wrapper = Cs(V"nowhitespace"^0 * (Cs(V"wrapme") + V"other")^1),
+ whitespace = S" \t\v" + P"\n" / function() linelength = 0 end,
+ nowhitespace = 1 - V"whitespace",
+ ignore = P[[\\type{]] * (1 - P"}")^0 * P"}",
+ -- the initial whitespace of the “other” pattern must not be
+ -- enforced (“^1”) as it will break the exceptions (“ignore”
+ -- pattern)! In general it is better to have the wrapper ignore some
+ -- valid breaks than to not have it matching some valid strings at
+ -- all.
+ other = Cmt(V"whitespace"^0 * (V"ignore" + (1 - V"whitespace")^1), function(s,i,w)
+ linelength = linelength + ulen(w)
+ return true
+ end),
+ wrapme = Cmt(V"whitespace"^1 * (1 - V"whitespace" - V"ignore")^1, function(s,i,w)
+ local lw = ulen(w)
+ if linelength + lw > width then
+ linelength = lw
+ return true
+ end
+ return false
+ end) / function (word) return "\n" .. word:match("[^%s]+") end,
+ }
+
+ local reflowed = wrap:match(str)
+ return reflowed
+ end
+end
+
+return helpers
+