#!/usr/bin/env texlua -------------------------------------------------------------------------------- -- FILE: rst_helpers.lua -- USAGE: ./rst_helpers.lua -- DESCRIPTION: Complement to the reStructuredText parser -- AUTHOR: Philipp Gesang (Phg), -- VERSION: 1.0 -- CREATED: 07/09/10 01:03:08 CEST -------------------------------------------------------------------------------- -- local utf = unicode.utf8 local P, R, S, V, match = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match local C, Carg, Cb, Cc, Cg, Cmt, Cp, Cs, Ct = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct local helpers = {} helpers.table = {} helpers.cell = {} function helpers.dbg_writef(...) if helpers_debug == true then io.write(string.format(...)) end end --helpers.dbg_write = dbg_write local dbg_write = helpers.dbg_writef helpers.patterns = {} do local p = helpers.patterns p.dash = P"-" p.equals = P"=" p.plus = P"+" p.bar = P"|" p.eol = P"\n" p.last = -P(1) p.space = P" " p.dash_or_equals = p.dash + p.equals p.celldelim = p.bar + p.plus p.cellcontent = (1 - p.celldelim) p.cell = p.celldelim * C((1 - p.celldelim)^1) * #p.celldelim p.cell_line = p.plus * p.dash^1 * #p.plus p.dashesonly = p.dash^1 * p.last p.spacesonly = p.space^1 * p.last p.col_start = Cp() * p.dash_or_equals^1 p.col_stop = p.dash_or_equals^1 * Cp() p.column_starts = p.col_start * ( p.space^1 * p.col_start)^1 p.column_stops = p.col_stop * ( p.space^1 * p.col_stop)^1 p.st_headsep = p.equals^1 * (p.space^1 * p.equals^1)^1 p.st_colspan = p.dash^1 * (p.space^1 * p.dash^1)^0 * p.space^0 * p.last p.st_span_starts = Ct(Cp() * p.dash^1 * (p.space^1 * Cp() * p.dash^1)^0) p.st_span_stops = Ct(p.dash^1 * Cp() * (p.space^1 * p.dash^1 * Cp())^0) p.cells = P{ [1] = "cells", cells = p.celldelim * (C(V"in_cell") * (V"matchwidth" * C(V"in_cell")) ^1), in_cell = p.cellcontent^1 + (p.dash - p.cellcontent)^1, matchwidth = Cmt(C(p.celldelim) * Carg(1), function(s,i,del, layout) local pos = 1 local lw = layout.widths for n=1, #lw do pos = pos + lw[n] + 1 if (i - 1) == pos then return true end end return false end), } p.sep_line = p.plus * (p.dash^1 * p.plus)^1 * p.last p.sep_head = p.plus * (p.equals^1 * p.plus)^1 * p.last p.sep_part = ((1 - p.cell_line)^0 * p.cell_line) - p.sep_line p.new_row = p.sep_line + p.sep_head + p.sep_part p.whitespace = S" \t\v\r\n"^1 p.strip = p.whitespace^0 * C((1 - (p.whitespace * p.last))^1) * p.whitespace^0 * p.last end function helpers.cell.create(raw, n_row, n_col, parent, variant) local p = helpers.patterns local cell = {} cell.stripped = raw and p.strip:match(raw) or "" cell.content = raw cell.width = raw and utf.len(raw) or 0 cell.bytes = raw and #raw or 0 cell.variant = "normal" -- [normal|separator|y_continue|x_continue] cell.pos = {} cell.pos.x = n_col cell.pos.y = n_row cell.span = {} cell.span.x = 1 cell.span.y = 1 cell.parent = parent return cell end function helpers.cell.get_x_span(content, layout, init) local acc = 0 local lw = layout.widths for n=init, #lw do acc = acc + lw[n] + 1 if utf.len(content) + 1 == acc then return n - init end end return false end -- Extending a cell by 1 cell horizontally. function helpers.cell.add_x (cell) cell.span.x = cell.span.x + 1 end local function set_layout (line) local p = helpers.patterns local layout = {} local slice = Ct((p.plus * C(p.dash^1) * #p.plus)^1) layout.widths = {} layout.slices = {} for n, elm in ipairs(slice:match(line)) do layout.widths[n] = #elm layout.slices[n] = elm end return layout end --function helpers.table.create(raw) --local self = {} --self.rows = {} --local p = patterns --self.resolve_parent = function(row, col) --print(row, col) --local cell = self.rows[row][col] --local par_row, par_col --if cell.parent then --par_row, par_col = self.resolve_parent(cell.parent.y, cell.parent.x) --else --return par_row, par_col --end --end --self.__init = function() --local hc = helpers.cell --self.layout = set_layout(raw[1]) --local rowcount = 0 -- first get the correct horizontal spans --local next_is_new_row = true --for n, line in ipairs(raw) do -- caching the test results --local sepline = p.sep_line:match(line) --local sephead = p.sep_head:match(line) --local seppart = p.sep_part:match(line) --local newrow = sepline or sephead or seppart --if newrow and n > 1 then --if sephead then --self.has_head = true --end --rowcount = rowcount + 1 --end --if not (sepline or --sephead) then --local row = {} --row.newrow = (newrow or next_is_new_row) and true or false --next_is_new_row = false --local sl = self.layout --local splitted = {p.cells:match(line, 1, sl)} --local pos_layout, pos_line = 1, 1 --local last = nil --local ignore = {} --ignore.n, ignore.parent = 0, nil --while pos_layout <= #sl.slices do --local splitpos = splitted[pos_line] --local splitpos = splitted[pos_layout] --local width_layout = sl.widths[pos_layout] --local span = 1 --local this --if ignore.n > 0 then --ignore.n = ignore.n - 1 --this = hc.create(false, n, pos_layout, true) --row[pos_layout] = this --this.parent = ignore.parent --else --local width_cell = utf.len(splitpos) --if width_cell > width_layout then --print(splitpos, width_cell, sl.slices[pos_layout], width_layout) -- check the horizontal span --span = span + hc.get_x_span(splitpos, sl, pos_layout) --end --pos_line = pos_line + span --this = hc.create(splitpos, n, pos_layout, false) --if p.dashesonly:match(splitpos) then --this.variant = "separator" --end --this.span.x = span --last = this --row[pos_layout] = this --ignore.n = span - 1 --ignore.parent = ignore.n > 0 and { y = rowcount, x = pos_layout } or nil --end --pos_layout = pos_layout + 1 --print(">",this.pos.y,this.pos.x,this.span.y,this.span.x) --if this.span.y > 1 then --print(">"..this.content.."<") --end --end --self.rows[#self.rows+1] = row --print(#self.rows.."<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<") --for i,j in ipairs(row) do print(i,j.content) end --print(row.newrow) --else -- separator --next_is_new_row = true --end --print(string.format("[t-%2s,%2s]> %s", n, m, cell)) --end --print("rc = "..rowcount, "nr = "..#self.rows) --self.rowcount = rowcount --if self.rowcount < #self.rows then -- some cells span vertically --local has_partsep = function(row) --for n, cell in ipairs(row) do --if cell.variant == "separator" then --return true --end --end --return false --end --local dont_do_next = false --for nr,row in ipairs(self.rows) do --local ps = has_partsep(row) --row.ignore = ps and true or false --if ps then --self.rows[nr+1].newrow = true --end --if (not row.newrow or ps) and not dont_do_next then --print(">>>>>>>>>> NEXT <<<<<<<<<<<<") --print(row.newrow and has_partsep(row)) -- continues last row either fully or partially --for nc, cell in ipairs(row) do --print(nr, nc, cell.content) --if cell.content == false then -- empty cell --elseif cell.variant ~= "separator" then --local par_row, par_col --if cell.parent then --par_row, par_col = self.resolve_parent(cell.parent.y, cell.parent.x) --print("old parent: ", par_row, par_col) --else -- no previous span, setting parent to preceding row --par_row, par_col = nr -1, nc --print("new parent: ", par_row, par_col) --end --print (nr, nc, cell.content, cell.parent, par_row, par_col) --local parent = self.rows[par_row][par_col] --parent.span.y = parent.span.y + 1 --parent.content = parent.content .. cell.content --parent.stripped = parent.stripped .. " " .. cell.stripped --if ps then --local successor = self.rows[nr+1][nc] --successor.parent = { x = par_col, y = par_row } --parent.span.y = parent.span.y + 1 --parent.content = parent.content .. successor.content --parent.stripped = parent.stripped .. " " .. successor.stripped --dont_do_next = true --else --dont_do_next = false --end --print(cell.content, parent.content) --add_y(cell) --end --end --elseif dont_do_next then --row.ignore = true --dont_do_next = false --end --end --end -- vertical span handler --end -- end __init --self.__init() --return self --end helpers_debug = true function helpers.table.create(raw) local self = {} self.rows = {} self.layout = set_layout(raw[1]) local p = helpers.patterns self.resolve_parent = function(row, col, array) local array = array or self.rows local cell = array[row][col] local par_row, par_col = row, col if cell.parent then par_row, par_col = self.resolve_parent(cell.parent.y, cell.parent.x) end return par_row, par_col end self.__init = function() local hc = helpers.cell local rowcount = 0 local selflayout = self.layout for nr, row in ipairs(raw) do self.rows[nr] = {} local this_row = self.rows[nr] this_row.sepline = p.sep_line:match(row) this_row.sephead = p.sep_head:match(row) this_row.seppart = p.sep_part:match(row) if this_row.sephead then self.has_head = true self.head_end = nr end local splitted = { p.cells:match(row, 1, selflayout) } local pos_layout, pos_row = 1, 1 local make_empty = {} make_empty.n, make_empty.parent = 0, nil while pos_layout <= #selflayout.widths do local splitpos = splitted[pos_layout] local layoutwidth = selflayout.widths[pos_layout] local span = 1 local this if make_empty.n > 0 then make_empty.n = make_empty.n - 1 this = hc.create("", nr, pos_layout, make_empty.parent) this.parent = make_empty.parent p_row, p_col = self.resolve_parent(this.parent.y, this.parent.x) local thisparent = self.rows[p_row][p_col] if this_row.sepline or this_row.sephead or self.rows[p_row][p_col].variant == "separator" then this.variant = "separator" else this.variant = "empty1" end else local cellwidth = utf.len(splitpos) if cellwidth > layoutwidth then span = span + hc.get_x_span(splitpos, selflayout, pos_layout) end pos_row = pos_row + span this = hc.create(splitpos, nr, pos_layout, nil) if p.dashesonly:match(splitpos) or this_row.sepline or this_row.sephead then this.variant = "separator" end this.span.x = span make_empty.n = span - 1 make_empty.parent = span > 1 and { y = nr, x = pos_layout } or nil end this_row[pos_layout] = this pos_layout = pos_layout + 1 end -- while end -- for loop over rows local oldrows = self.rows --local newrows = {} local newrows = oldrows for nc, width in ipairs(selflayout.widths) do -- this is gonna be extremely slow but at least it's readable local newrow --print("next columnt <-----------------------------") local currentrow = 1 for nr, row in ipairs(newrows) do --newrows[nr] = newrows[nr] or {} local cell = row[nc] dbg_write("nc: %s, nr:%2s | %9s | ", nc, nr,cell.variant) if row.sepline or row.sephead or p.dashesonly:match(cell.content) or cell.variant == "separator" then -- separator; skipping and beginning new row --newrows[currentrow] = cell newrows[nr][nc] = cell currentrow = currentrow + 1 newrow = true dbg_write("new >%24s< ", cell.stripped) if cell.parent then dbg_write("parent |") else dbg_write("no par |") end else dbg_write("old >%24s< ", cell.stripped) if cell.parent then dbg_write("parent |") else dbg_write("no par |") end if newrow then --newrows[currentrow] = cell newrows[nr][nc] = cell currentrow = currentrow + 1 else -- continuing parent local par_row, par_col local parent if cell.parent then --dbg_write(" use %s,%2s | ", cell.parent.x, cell.parent.y) par_row, par_col = self.resolve_parent(cell.parent.y, cell.parent.x, newrows) dbg_write(" use %s,%2s | ", par_col, par_row) else -- Using vertical predecessor. par_row, par_col = self.resolve_parent(nr-1,nc, newrows) dbg_write(" new %s,%2s | ", par_col, par_row) end parent = newrows[par_row][par_col] if newrows[nr].seppart then dbg_write("span++") parent.span.y = parent.span.y + 1 end --print(#newrows, par_row) --if cell.variant ~= "empty1" then -- Cells of type empty1 already have their content -- delegated onto their respective horizontal parent. parent.content = parent.content .. cell.content parent.stripped = parent.stripped .. " " .. cell.stripped cell.variant = "empty2" --end cell.parent = { x = par_col, y = par_row } --this = hc.create("", nr, nc, { x = par_col, y = par_row }, "empty2") --this.variant = "empty2" --cell = hc.create("", nr, nc, { x = par_col, y = par_row }, "empty2") --newrows[nr][nc] = this end newrow = false end dbg_write("\n") newrows[nr][nc] = cell end -- for loop over rows end -- for loop over columns --self.rows = oldrows self.rows = newrows end self.__init() self.__draw_debug = function() for nr, row in ipairs(self.rows) do for nc, cell in ipairs(row) do local field = cell.variant:sub(1,7) if cell.parent then field = field .. string.format(" %s,%2s",cell.parent.x, cell.parent.y) end dbg_write("%12s | ", field) --dbg_write("%10s | ",(cell.parent and "true" or "false")) end dbg_write("\n") end end --self.__draw_debug() return self end function helpers.table.resolve_parent (row, col, array) local cell = array[row][col] local par_row, par_col = row, col if cell.parent then par_row, par_col = self.resolve_parent(cell.parent.y, cell.parent.x) end return par_row, par_col end -- Check the column boundaries of a simple table. function helpers.get_st_boundaries (str) local p = helpers.patterns local starts, stops, slices = {}, {}, {} for n, elm in ipairs({ p.column_starts:match(str) }) do slices[n] = { start = elm } starts[elm] = true end for n, elm in ipairs({ p.column_stops :match(str) }) do slices[n]["stop"] = elm stops[elm] = true end return { starts = starts, stops = stops, slices = slices } end function helpers.table.simple(raw) local rows = {} local multispans = {} local bounds = helpers.get_st_boundaries(raw[1]) local p = helpers.patterns for nr, row in ipairs(raw) do local newrow = {} local nc = 1 if not p.st_headsep:match(row) and not p.st_colspan:match(row) then local starts, stops = {}, {} local check_span = false if p.st_colspan:match(raw[nr+1]) then -- expect spans over several columns starts = p.st_span_starts:match(raw[nr+1]) stops = p.st_span_stops :match(raw[nr+1]) check_span = true else for colnr, slice in ipairs(bounds.slices) do starts[colnr] = slice.start stops [colnr] = slice.stop end end for nc, start in ipairs(starts) do -- last column can exceed layout width local stop = nc ~= #starts and stops[nc] or #row local cell = { content = "", span = { x = 1, y = 1 }, --ignore = false } cell.content = string.strip(row:sub(start, stop)) print(">>".. row:sub(start, stop) .."<<", ">>"..cell.content.."<<") if check_span then local start_at, stop_at for colnr, slice in ipairs(bounds.slices) do print(start, slice.start, stop, slice.stop) if slice.start == start then start_at = colnr end if start_at and not (colnr == #bounds.slices) then if slice.stop == stop then stop_at = colnr break end else -- last column, width doesn't matter stop_at = colnr end end cell.span.x = 1 + stop_at - start_at print(start_at, stop_at, cell.span.x) end newrow[nc] = cell end elseif p.st_colspan:match(row) then newrow.ignore = true elseif not rows.head_end and nr > 1 and #raw > nr then -- ends the header rows.head_end = nr newrow.head_sep = true newrow.ignore = true else newrow.ignore = true end rows[nr] = newrow end for nr, row in ipairs(rows) do if not row.ignore and row[1].content == "" then row.ignore = true for nc, cell in ipairs(row) do local par_row, par_col = helpers.table.resolve_parent(nr - 1, nc, rows) parent = rows[par_row][par_col] parent.content = parent.content .. " " .. cell.content cell.content = "" end end end return rows end helpers.list = {} do local c = {} c.roman = S"ivxlcdm"^1 c.Roman = S"IVXLCDM"^1 c.alpha = R"az" - P"i" c.Alpha = R"AZ" - P"I" c.digit = R"09"^1 c.auto = P"#" local stripme = S" ()." local dontstrip = 1 - stripme local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0 local con = function (str) --print("This is it: >"..str.."<") str = itemstripper:match(str) for conv, pat in next, c do if pat:match(str) then return conv end end return false end helpers.list.conversion = con local rnums = { i = 1, v = 5, x = 10, l = 50, c = 100, d = 500, m = 1000, } local function roman_to_arab (str) local n = 1 local curr, succ local max_three = { } local value = 0 while n <= #str do if curr and curr == max_three[#max_three] then if #max_three >= 3 then return "Not a number" else max_three[#max_three+1] = curr end else max_three = { curr } end curr = rnums[str:sub(n,n)] n = n + 1 succ = str:sub(n,n) if succ and succ ~= "" then succ = rnums[succ] if curr < succ then --n = n + 1 --value = value + succ - curr value = value - curr else value = value + curr end else value = value + curr end end return value end helpers.list.roman_to_arab = roman_to_arab local suc = function (str, old) str, old = itemstripper:match(str), itemstripper:match(old) local n_str, n_old = tonumber(str), tonumber(old) if n_str and n_old then -- arabic numeral return n_str == n_old + 1 end local con_str, con_old = con(str), con(old) if con_str == "alpha" or con_str == "Alpha" then return str:byte() == old:byte() + 1 else -- “I'm a Roman!” - “A woman?” - “No, *Roman*! - Au!” - “So your father was a woman?” if not (str:lower() == str or str:upper() == str) then -- uneven cased --> fail return false end local trc = state.roman_cache n_str = trc[str] or nil n_old = trc[old] or nil if not n_str then n_str = roman_to_arab(str:lower()) trc[str] = n_str end if not n_old then n_old = roman_to_arab(old:lower()) trc[old] = n_old end --print(n_str, n_old, n_str == n_old + 1 ) return n_str == n_old + 1 end end helpers.list.successor = suc end helpers.string = {} do -- This grammar inside the function is slightly faster than the same as an upvalue -- with the value of “width” repeatedly given via lpeg.Carg(). This holds -- for repeated calls as well. local ulen = utf.len function helpers.string.wrapat (str, width) local linelength = 0 local wrap = P{ [1] = "wrapper", wrapper = Cs(V"nowhitespace"^0 * (Cs(V"wrapme") + V"other")^1), whitespace = S" \t\v" + P"\n" / function() linelength = 0 end, nowhitespace = 1 - V"whitespace", ignore = P"\\type{" * (1 - P"}")^1 * P"}", other = Cmt(V"whitespace"^1 * (V"ignore" + (1 - V"whitespace")^1), function(s,i,w) linelength = linelength + ulen(w) return true end), wrapme = Cmt(V"whitespace"^1 * (1 - V"whitespace" - V"ignore")^1, function(s,i,w) local lw = ulen(w) if linelength + lw > width then linelength = lw return true end return false --end) / function (word) return "\n" .. match(V"whitespace"^1 * C((1 - V"whitespace")^1), word) end, end) / function (word) return "\n" .. word:match("[^%s]+") end, } local reflowed = wrap:match(str) return reflowed end end return helpers