From 6dae96e6a7bb0dba0a43fc08ecf3b4614f80006c Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 30 Dec 2010 16:05:45 +0100 Subject: module code --- mod/tex/context/third/rst/rst_helpers.lua | 632 ++++++++++++++++++++++++++++++ 1 file changed, 632 insertions(+) create mode 100644 mod/tex/context/third/rst/rst_helpers.lua (limited to 'mod/tex/context/third/rst/rst_helpers.lua') diff --git a/mod/tex/context/third/rst/rst_helpers.lua b/mod/tex/context/third/rst/rst_helpers.lua new file mode 100644 index 0000000..f081559 --- /dev/null +++ b/mod/tex/context/third/rst/rst_helpers.lua @@ -0,0 +1,632 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +-- FILE: rst_helpers.lua +-- USAGE: ./rst_helpers.lua +-- DESCRIPTION: Complement to the reStructuredText parser +-- AUTHOR: Philipp Gesang (Phg), +-- VERSION: 1.0 +-- CREATED: 07/09/10 01:03:08 CEST +-------------------------------------------------------------------------------- +-- + +local utf = unicode.utf8 +local P, R, S, V, match + = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match + +local C, Carg, Cb, Cc, Cg, Cmt, Cp, Cs, Ct + = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct + +helpers = helpers or {} +helpers.table = {} +helpers.cell = {} + +function helpers.dbg_writef(...) + if helpers_debug == true then + io.write(string.format(...)) + end +end + +--helpers.dbg_write = dbg_write +local dbg_write = helpers.dbg_writef + +helpers.patterns = {} + +do + local p = helpers.patterns + p.dash = P"-" + p.equals = P"=" + p.plus = P"+" + p.bar = P"|" + p.eol = P"\n" + p.last = -P(1) + p.space = P" " + + p.dash_or_equals = p.dash + p.equals + + p.celldelim = p.bar + p.plus + p.cellcontent = (1 - p.celldelim) + p.cell = p.celldelim * C((1 - p.celldelim)^1) * #p.celldelim + p.cell_line = p.plus * p.dash^1 * #p.plus + p.dashesonly = p.dash^1 * p.last + p.spacesonly = p.space^1 * p.last + + p.col_start = Cp() * p.dash_or_equals^1 + p.col_stop = p.dash_or_equals^1 * Cp() + p.column_starts = p.col_start * ( p.space^1 * p.col_start)^1 + p.column_stops = p.col_stop * ( p.space^1 * p.col_stop)^1 + + p.st_headsep = p.equals^1 * (p.space^1 * p.equals^1)^1 + p.st_colspan = p.dash^1 * (p.space^1 * p.dash^1)^0 * p.space^0 * p.last + p.st_span_starts = Ct(Cp() * p.dash^1 * (p.space^1 * Cp() * p.dash^1)^0) + p.st_span_stops = Ct(p.dash^1 * Cp() * (p.space^1 * p.dash^1 * Cp())^0) + + + p.cells = P{ + [1] = "cells", + cells = p.celldelim + * (C(V"in_cell") + * (V"matchwidth" * C(V"in_cell")) ^1), + + in_cell = p.cellcontent^1 + + (p.dash - p.cellcontent)^1, + + matchwidth = Cmt(C(p.celldelim) * Carg(1), function(s,i,del, layout) + local pos = 1 + local lw = layout.widths + for n=1, #lw do + pos = pos + lw[n] + 1 + if (i - 1) == pos then return true end + end + return false + end), + } + + p.sep_line = p.plus * (p.dash^1 * p.plus)^1 * p.last + p.sep_head = p.plus * (p.equals^1 * p.plus)^1 * p.last + + p.sep_part = ((1 - p.cell_line)^0 * p.cell_line) - p.sep_line + + p.new_row = p.sep_line + p.sep_head + p.sep_part + + p.whitespace = S" \t\v\r\n"^1 + p.strip = p.whitespace^0 * C((1 - (p.whitespace * p.last))^1) * p.whitespace^0 * p.last + + + local colon = P":" + local escaped_colon = P"\\:" + local nocolon = (escaped_colon + (1 - colon))^1 + p.colon_right = nocolon * colon + p.colon_keyval = C(nocolon) * colon * p.space^1 * C((1 - (p.space^0 * P(-1)))^1) + + -- color expression matching for text roles + local digit = R"09" + local dot = P"." + local colvalue = digit * dot * digit^1 + + digit + + dot * digit^1 + local coldelim = P"_" + P"-" + p.rgbvalues = P"rgb_" + * Ct( C(colvalue) * coldelim * C(colvalue) * coldelim * C(colvalue) ) +end + +function helpers.cell.create(raw, n_row, n_col, parent, variant) + local p = helpers.patterns + local cell = {} + cell.stripped = raw and p.strip:match(raw) or "" + cell.content = raw + cell.width = raw and utf.len(raw) or 0 + cell.bytes = raw and #raw or 0 + cell.variant = "normal" -- [normal|separator|y_continue|x_continue] + cell.pos = {} + cell.pos.x = n_col + cell.pos.y = n_row + cell.span = {} + cell.span.x = 1 + cell.span.y = 1 + cell.parent = parent + return cell +end + +function helpers.cell.get_x_span(content, layout, init) + local acc = 0 + local lw = layout.widths + for n=init, #lw do + acc = acc + lw[n] + 1 + if utf.len(content) + 1 == acc then + return n - init + end + end + return false +end + + +-- Extending a cell by 1 cell horizontally. +function helpers.cell.add_x (cell) + cell.span.x = cell.span.x + 1 +end + + +local function set_layout (line) + local p = helpers.patterns + local layout = {} + local slice = Ct((p.plus * C(p.dash^1) * #p.plus)^1) + + layout.widths = {} + layout.slices = {} + for n, elm in ipairs(slice:match(line)) do + layout.widths[n] = #elm + layout.slices[n] = elm + end + return layout +end + +helpers_debug = true +function helpers.table.create(raw) + local newtab = {} + newtab.rows = {} + newtab.layout = set_layout(raw[1]) + + local p = helpers.patterns + + newtab.resolve_parent = function(row, col, array) + local array = array or newtab.rows + local cell = array[row][col] + local par_row, par_col = row, col + if cell.parent then + par_row, par_col = newtab.resolve_parent(cell.parent.y, cell.parent.x) + end + return par_row, par_col + end + + newtab.__init = function() + local hc = helpers.cell + local rowcount = 0 + local newtablayout = newtab.layout + for nr, row in ipairs(raw) do + newtab.rows[nr] = {} + local this_row = newtab.rows[nr] + this_row.sepline = p.sep_line:match(row) + this_row.sephead = p.sep_head:match(row) + this_row.seppart = p.sep_part:match(row) + if this_row.sephead then + newtab.has_head = true + newtab.head_end = nr + end + + local splitted = { p.cells:match(row, 1, newtablayout) } + local pos_layout, pos_row = 1, 1 + local make_empty = {} + make_empty.n, make_empty.parent = 0, nil + + while pos_layout <= #newtablayout.widths do + local splitpos = splitted[pos_layout] + local layoutwidth = newtablayout.widths[pos_layout] + local span = 1 + local this + + if make_empty.n > 0 then + make_empty.n = make_empty.n - 1 + this = hc.create("", nr, pos_layout, make_empty.parent) + this.parent = make_empty.parent + p_row, p_col = newtab.resolve_parent(this.parent.y, this.parent.x) + local thisparent = newtab.rows[p_row][p_col] + if this_row.sepline or this_row.sephead or + newtab.rows[p_row][p_col].variant == "separator" then + this.variant = "separator" + else + this.variant = "empty1" + end + else + local cellwidth = utf.len(splitpos) + if cellwidth > layoutwidth then + span = span + hc.get_x_span(splitpos, newtablayout, pos_layout) + end + pos_row = pos_row + span + this = hc.create(splitpos, nr, pos_layout, nil) + if p.dashesonly:match(splitpos) or + this_row.sepline or this_row.sephead then + this.variant = "separator" + end + this.span.x = span + make_empty.n = span - 1 + make_empty.parent = span > 1 and { y = nr, x = pos_layout } or nil + end + + this_row[pos_layout] = this + pos_layout = pos_layout + 1 + end -- while + end -- for loop over rows + + local oldrows = newtab.rows + local newrows = oldrows + for nc, width in ipairs(newtablayout.widths) do + -- this is gonna be extremely slow but at least it's readable + local newrow + local currentrow = 1 + for nr, row in ipairs(newrows) do + local cell = row[nc] + dbg_write("nc: %s, nr:%2s | %9s | ", nc, nr,cell.variant) + if row.sepline or row.sephead + or p.dashesonly:match(cell.content) + or cell.variant == "separator" then -- separator; skipping and beginning new row + newrows[nr][nc] = cell + currentrow = currentrow + 1 + newrow = true + dbg_write("new >%24s< ", cell.stripped) + if cell.parent then dbg_write("parent |") else dbg_write("no par |") end + else + dbg_write("old >%24s< ", cell.stripped) + if cell.parent then dbg_write("parent |") else dbg_write("no par |") end + if newrow then + newrows[nr][nc] = cell + currentrow = currentrow + 1 + else -- continuing parent + + local par_row, par_col + local parent + if cell.parent then + par_row, par_col = newtab.resolve_parent(cell.parent.y, cell.parent.x, newrows) + dbg_write(" use %s,%2s | ", par_col, par_row) + else -- Using vertical predecessor. + par_row, par_col = newtab.resolve_parent(nr-1,nc, newrows) + dbg_write(" new %s,%2s | ", par_col, par_row) + end + parent = newrows[par_row][par_col] + + if newrows[nr].seppart then + dbg_write("span++") + parent.span.y = parent.span.y + 1 + end + + parent.content = parent.content .. cell.content + parent.stripped = parent.stripped .. " " .. cell.stripped + cell.variant = "empty2" + cell.parent = { x = par_col, y = par_row } + end + newrow = false + end + dbg_write("\n") + newrows[nr][nc] = cell + end -- for loop over rows + end -- for loop over columns + --newtab.rows = oldrows + newtab.rows = newrows + end + + newtab.__init() + + newtab.__draw_debug = function() + for nr, row in ipairs(newtab.rows) do + for nc, cell in ipairs(row) do + local field = cell.variant:sub(1,7) + if cell.parent then + field = field .. string.format(" %s,%2s",cell.parent.x, cell.parent.y) + end + dbg_write("%12s | ", field) + end + dbg_write("\n") + end + end + + return newtab +end + + + +function helpers.table.resolve_parent (row, col, array) + local cell = array[row][col] + local par_row, par_col = row, col + if cell.parent then + par_row, par_col = self.resolve_parent(cell.parent.y, cell.parent.x) + end + return par_row, par_col +end + + +-- Check the column boundaries of a simple table. +function helpers.get_st_boundaries (str) + local p = helpers.patterns + local starts, stops, slices = {}, {}, {} + for n, elm in ipairs({ p.column_starts:match(str) }) do + slices[n] = { start = elm } + starts[elm] = true + end + for n, elm in ipairs({ p.column_stops :match(str) }) do + slices[n]["stop"] = elm + stops[elm] = true + end + return { starts = starts, stops = stops, slices = slices } +end + +function helpers.table.simple(raw) + local rows = {} + local multispans = {} + local bounds = helpers.get_st_boundaries(raw[1]) + local p = helpers.patterns + + for nr, row in ipairs(raw) do + local newrow = {} + local nc = 1 + if not p.st_headsep:match(row) and + not p.st_colspan:match(row) then + local starts, stops = {}, {} + local check_span = false + if p.st_colspan:match(raw[nr+1]) then -- expect spans over several columns + starts = p.st_span_starts:match(raw[nr+1]) + stops = p.st_span_stops :match(raw[nr+1]) + check_span = true + else + for colnr, slice in ipairs(bounds.slices) do + starts[colnr] = slice.start + stops [colnr] = slice.stop + end + end + + for nc, start in ipairs(starts) do + -- last column can exceed layout width + local stop = nc ~= #starts and stops[nc] or #row + local cell = { + content = "", + span = { x = 1, y = 1 }, + --ignore = false + } + cell.content = string.strip(row:sub(start, stop)) + if check_span then + local start_at, stop_at + for colnr, slice in ipairs(bounds.slices) do + if slice.start == start then + start_at = colnr + end + if start_at and + not (colnr == #bounds.slices) then + if slice.stop == stop then + stop_at = colnr + break + end + else -- last column, width doesn't matter + stop_at = colnr + end + end + cell.span.x = 1 + stop_at - start_at + end + newrow[nc] = cell + end + elseif p.st_colspan:match(row) then + newrow.ignore = true + elseif not rows.head_end and + nr > 1 and #raw > nr then -- ends the header + rows.head_end = nr + newrow.head_sep = true + newrow.ignore = true + else + newrow.ignore = true + end + rows[nr] = newrow + end + + for nr, row in ipairs(rows) do + if not row.ignore and row[1].content == "" then + row.ignore = true + for nc, cell in ipairs(row) do + local par_row, par_col = helpers.table.resolve_parent(nr - 1, nc, rows) + parent = rows[par_row][par_col] + parent.content = parent.content .. " " .. cell.content + cell.content = "" + end + + end + end + + return rows +end + +helpers.list = {} + +do + local c = {} + c.roman = S"ivxlcdm"^1 + c.Roman = S"IVXLCDM"^1 + c.alpha = R"az" - P"i" - P"v" - P"x" - P"l" + c.Alpha = R"AZ" - P"I" - P"V" - P"X" - P"L" + c.digit = R"09"^1 + c.auto = P"#" + + local stripme = S" ()." + local dontstrip = 1 - stripme + local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0 + + local con = function (str) + str = itemstripper:match(str) + for conv, pat in next, c do + if pat:match(str) then + return conv + end + end + return false + end + helpers.list.conversion = con + + local rnums = { + i = 1, + v = 5, + x = 10, + l = 50, + c = 100, + d = 500, + m = 1000, + } + + local function roman_to_arab (str) + local n = 1 + local curr, succ + local max_three = { } + local value = 0 + while n <= #str do + if curr and curr == max_three[#max_three] then + if #max_three >= 3 then + return "Not a number" + else + max_three[#max_three+1] = curr + end + else + max_three = { curr } + end + + curr = rnums[str:sub(n,n)] or 1 + + n = n + 1 + succ = str:sub(n,n) + + if succ and succ ~= "" then + succ = rnums[succ] + if curr < succ then + --n = n + 1 + --value = value + succ - curr + value = value - curr + else + value = value + curr + end + else + value = value + curr + end + end + return value + end + helpers.list.roman_to_arab = roman_to_arab + + local suc = function (str, old) + str, old = itemstripper:match(str), itemstripper:match(old) + local n_str, n_old = tonumber(str), tonumber(old) + if n_str and n_old then -- arabic numeral + return n_str == n_old + 1 + end + + local con_str, con_old = con(str), con(old) + if con_str == "alpha" or + con_str == "Alpha" then + return str:byte() == old:byte() + 1 + else -- “I'm a Roman!” - “A woman?” - “No, *Roman*! - Au!” - “So your father was a woman?” + if not (str:lower() == str or + str:upper() == str) then -- uneven cased --> fail + return false + end + + local trc = state.roman_cache + n_str = trc[str] or nil + n_old = trc[old] or nil + if not n_str then + n_str = roman_to_arab(str:lower()) + trc[str] = n_str + end + if not n_old then + n_old = roman_to_arab(old:lower()) + trc[old] = n_old + end + return n_str == n_old + 1 + end + end + helpers.list.successor = suc + + local greater = function (str, old) + str, old = itemstripper:match(str), itemstripper:match(old) + local n_str, n_old = tonumber(str), tonumber(old) + if n_str and n_old then -- arabic numeral + return n_str > n_old + end + + local con_str, con_old = con(str), con(old) + if con_str == "alpha" or + con_str == "Alpha" then + return str:byte() > old:byte() + else + if not (str:lower() == str or + str:upper() == str) then -- uneven cased --> fail + return false + end + + + local trc = state.roman_cache + n_str = trc[str] or nil + n_old = trc[old] or nil + if not n_str then + n_str = roman_to_arab(str:lower()) + trc[str] = n_str + end + if not n_old then + n_old = roman_to_arab(old:lower()) + trc[old] = n_old + end + return n_str > n_old + end + end + helpers.list.greater = greater + + local gd = function(str) + str = itemstripper:match(str) + local value + local con_str = con(str) + if con_str == "alpha" or + con_str == "Alpha" then + return str:byte() + else + if not (str:lower() == str or + str:upper() == str) then + return false + end + + local trc = state.roman_cache + n_str = trc[str] or nil + if not n_str then + n_str = roman_to_arab(str:lower()) + trc[str] = n_str + end + return n_str + end + end + + helpers.list.get_decimal = gd +end + +helpers.string = {} + +do + -- This grammar inside the function is slightly faster than the same as an upvalue + -- with the value of “width” repeatedly given via lpeg.Carg(). This holds + -- for repeated calls as well. + local ulen = utf.len + function helpers.string.wrapat (str, width) + local width = width or 65 + local linelength = 0 + local wrap = P{ + [1] = "wrapper", + + wrapper = Cs(V"nowhitespace"^0 * (Cs(V"wrapme") + V"other")^1), + whitespace = S" \t\v" + P"\n" / function() linelength = 0 end, + nowhitespace = 1 - V"whitespace", + ignore = P[[\\type{]] * (1 - P"}")^0 * P"}", + -- the initial whitespace of the “other” pattern must not be + -- enforced (“^1”) as it will break the exceptions (“ignore” + -- pattern)! In general it is better to have the wrapper ignore some + -- valid breaks than to not have it matching some valid strings at + -- all. + other = Cmt(V"whitespace"^0 * (V"ignore" + (1 - V"whitespace")^1), function(s,i,w) + linelength = linelength + ulen(w) + return true + end), + wrapme = Cmt(V"whitespace"^1 * (1 - V"whitespace" - V"ignore")^1, function(s,i,w) + local lw = ulen(w) + if linelength + lw > width then + linelength = lw + return true + end + return false + end) / function (word) return "\n" .. word:match("[^%s]+") end, + } + + local reflowed = wrap:match(str) + return reflowed + end +end + +return helpers + -- cgit v1.2.3