From 7652729ada000906e5e6b2b4d0c5dea01c73c29d Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Fri, 28 Feb 2014 07:06:53 +0100 Subject: clean up rst_parser.lua --- mod/tex/context/third/rst/rst_parser.lua | 185 +++++++++++++++++-------------- 1 file changed, 101 insertions(+), 84 deletions(-) (limited to 'mod/tex/context') diff --git a/mod/tex/context/third/rst/rst_parser.lua b/mod/tex/context/third/rst/rst_parser.lua index 2dcc4d3..e633899 100644 --- a/mod/tex/context/third/rst/rst_parser.lua +++ b/mod/tex/context/third/rst/rst_parser.lua @@ -5,7 +5,7 @@ -- DESCRIPTION: https://bitbucket.org/phg/context-rst/overview -- AUTHOR: Philipp Gesang (Phg), -- VERSION: 0.6 --- CHANGED: 2013-06-03 18:52:42+0200 +-- CHANGED: 2014-02-28 06:49:01+0100 -------------------------------------------------------------------------------- -- @@ -16,26 +16,31 @@ thirddata.rst_helpers = { } environment.loadluafile"rst_helpers" environment.loadluafile"rst_directives" -environment.loadluafile"rst_setups" +environment.loadluafile"rst_setups" environment.loadluafile"rst_context" -local rst = thirddata.rst -local helpers = thirddata.rst_helpers -local optional_setups = thirddata.rst_setups - -rst.strip_BOM = true -rst.expandtab = true -rst.shiftwidth = 4 -rst.crlf = true -helpers.rst_debug = false - -local iowrite = io.write -local ioopen = io.open -local stringformat = string.format -local stringlen = string.len -local stringstrip = string.strip -local utf = unicode.utf8 -local utflen = utf.len +local rst = thirddata.rst +local helpers = thirddata.rst_helpers +local optional_setups = thirddata.rst_setups + +rst.strip_BOM = true +rst.expandtab = true +rst.shiftwidth = 4 +rst.crlf = true +helpers.rst_debug = false + +local utf = unicode.utf8 + +local ioopen = io.open +local iowrite = io.write +local stringfind = string.find +local stringformat = string.format +local stringgsub = string.gsub +local stringlen = string.len +local stringmatch = string.match +local stringstrip = string.strip +local stringsub = string.sub +local utflen = utf.len local warn do @@ -64,11 +69,11 @@ do end local C, Cb, Cc, Cg, - Cmt, Cp, Cs, Ct + Cmt, Cp, Cs, Ct = lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct -local P, R, S, V, match +local P, R, S, V, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match local utf = unicode.utf8 @@ -99,7 +104,7 @@ state.addme = {} do local first_adornment = "" - local valid_adornment = P{ + local valid_adornment = P { [1] = "adorncheck", adorncheck = V"check_first" * V"check_other"^1 * -P(1), check_first = Cmt(V"adornment_char", function(_,_, first) @@ -112,7 +117,7 @@ do return char == prev end) , - adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], + adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], } state.valid_adornment = valid_adornment end @@ -138,7 +143,7 @@ local utfchar = P{ -- from l-lpeg.lua, modified to use as grammar -local parser = P{ +local rst_parser = P { [1] = V"document", document = V"blank_line"^0 * Cs(V"block"^1), @@ -374,20 +379,20 @@ local parser = P{ st_other_rows = (V"st_content"^1 * V"st_separator")^1, st_content = V"blank_line"^-1 - * C(V"st_matchlayout"), + * C(V"st_matchlayout"), st_matchlayout = -#V"st_separator" * Cmt((1 - V"eol")^1, function (s, i, content) -- Don't check for matching indent but if the rest is -- fine then the line should be sane. This allows -- cells starting with spaces. - content = content:sub(#state.currentindent) + content = stringsub (content, #state.currentindent) local tcb = state.currentlayout.bounds local n = 1 local spaces_only = P" "^1 while n < #tcb.slices do local from = tcb.slices[n] .stop local to = tcb.slices[n+1].start - local between = spaces_only:match(content, from) + local between = lpegmatch (spaces_only, content, from) if not between then -- Cell spanning more than one row. -- pass warn("sta-c", "span", from, to, i) @@ -413,7 +418,7 @@ local parser = P{ return state.currentlayout.raw == layout end) , - + st_colspan_sep = Cmt(V"dash"^1 * (V"spaces" * V"dash"^1)^0, function(s, i, layout) local tcb = state.currentlayout.bounds local this = helpers.get_st_boundaries (layout) @@ -484,7 +489,7 @@ local parser = P{ , - gt_cell = (V"gt_content_cell" + V"gt_line_cell") + gt_cell = (V"gt_content_cell" + V"gt_line_cell") * (V"table_intersection" + V"table_vline") , @@ -500,7 +505,7 @@ local parser = P{ gt_body = ((V"gt_contentrow" - V"gt_bodysep")^1 * V"gt_bodysep")^1, - gt_bodysep = V"gt_matchindent" + gt_bodysep = V"gt_matchindent" * C(Cmt(V"table_intersection" * (V"table_hline"^1 * V"table_intersection")^1, function(s, i, separator) local matchme = state.currentwidth @@ -514,8 +519,8 @@ local parser = P{ * V"gt_headsep" , - gt_headsep = V"gt_matchindent" - * C(Cmt(V"table_intersection" + gt_headsep = V"gt_matchindent" + * C(Cmt(V"table_intersection" * (V"table_header_hline"^1 * V"table_intersection")^1, function(s, i, separator) local matchme = state.currentwidth warn("tab-s", "head", #separator == matchme, #separator, matchme, i) @@ -547,9 +552,9 @@ local parser = P{ * (1 - V"eol")^1 * V"eol" , - + block_quote_other = Cmt(V"space"^1, function (s, i, indent) - warn("bkq-m", #indent, #state.currentindent, + warn("bkq-m", #indent, #state.currentindent, indent, state.currentindent, i) return state.currentindent == indent end) / "" @@ -563,16 +568,16 @@ local parser = P{ block_quote_attri_first = Cmt(V"space"^1 * V"attrib_dash" * V"space", function (s, i, indent) local t = state - warn("bqa-i", utflen(indent), #t.currentindent, - indent, t.currentindent, i) - local ret = indent:match(" *") == t.currentindent + warn("bqa-i", utflen(indent), #t.currentindent, + indent, t.currentindent, i) + local ret = stringmatch (indent, " *") == t.currentindent t.currentindent = ret and indent or t.currentindent return ret end) / "" * (1 - V"eol")^1 * V"eol" , - + block_quote_attri_other = Cmt(V"space"^1, function (s, i, indent) warn("bqa-m", #indent, utflen(state.currentindent), indent, state.currentindent, i) @@ -609,7 +614,7 @@ local parser = P{ line_block_empty = Cmt(V"line_block_empty_marker", function(s, i, marker) warn("lbk-e", #marker, #state.currentindent, marker, state.currentindent, i) - marker = marker:gsub("|.*", "| ") + marker = stringgsub (marker, "|.*", "| ") return state.currentindent == marker end) / "" / rst.line_block_empty @@ -654,7 +659,7 @@ local parser = P{ unquoted_literal_block_lines = V"literal_block_first" * (V"blank_line"^-1 * V"literal_block_other")^0 , - + quoted_literal_block_lines = V"quoted_literal_block_first" * V"quoted_literal_block_other"^0 -- no blank lines allowed , @@ -677,10 +682,10 @@ local parser = P{ * V"eol", literal_block_other = Cmt(V"space"^1, function (s, i, indent) - warn("lbk-m", + warn("lbk-m", #indent, #state.currentindent, - #indent >= #state.currentindent, + #indent >= #state.currentindent, i) return #indent >= #state.currentindent end) @@ -702,10 +707,10 @@ local parser = P{ , quoted_literal_block_other = Cmt(V"adornment_char", function (s, i, indent) - warn("qlb-m", + warn("qlb-m", #indent, #state.currentindent, - #indent >= #state.currentindent, + #indent >= #state.currentindent, i) return #indent >= #state.currentindent end) @@ -762,7 +767,7 @@ local parser = P{ + V"option_dos_vms") * V"option_arg"^-1, - option_arg = (V"equals" + V"space") + option_arg = (V"equals" + V"space") * ((V"letter" * (V"letter" + V"digit")^1) + (V"angle_left" * (1 - V"angle_right")^1 * V"angle_right")), @@ -818,7 +823,7 @@ local parser = P{ * Ct(V"definition_def")) , - definition_term = #(1 - V"space" - V"field_marker") + definition_term = #(1 - V"space" - V"field_marker") * (1 - V"eol" - V"definition_classifier_separator")^1 , @@ -888,11 +893,11 @@ local parser = P{ bullet_first = #Cmt(V"bullet_indent", function (s, i, bullet) local t = state local oldbullet = t.bullets[t.depth] - local n_spaces = match(P" "^0, bullet) - warn("first", - t.depth, + local n_spaces = lpegmatch(P" "^0, bullet) + warn("first", + t.depth, (t.depth == 0 and n_spaces >= 1) or - (t.depth > 0 and n_spaces > 1), + (t.depth > 0 and n_spaces > 1), bullet, oldbullet, helpers.list.conversion(bullet)) @@ -924,10 +929,10 @@ local parser = P{ bullet_cont = Cmt(V"bullet_indent", function (s, i, bullet) local t = state local conversion = helpers.list.conversion - warn("conti", - t.depth, + warn("conti", + t.depth, bullet == t.bullets[t.depth], - bullet, + bullet, t.bullets[t.depth], t.lastbullets[t.depth], conversion(t.lastbullet), @@ -957,7 +962,7 @@ local parser = P{ , -- ^^^^^^^^^^^^^ -- otherwise matches bullet_first - + bullet_rest = (1 - V"eol")^1 * V"eol", -- rest of one line bullet_next = V"space"^1 @@ -965,8 +970,8 @@ local parser = P{ bullet_match = Cmt(V"bullet_next", function (s, i, this) local t = state - warn("match", - t.depth, + warn("match", + t.depth, stringlen(this) == utflen(t.bullets[t.depth]), utflen(t.bullets[t.depth]), stringlen(this) ) return stringlen(this) == utflen(t.bullets[t.depth]) @@ -983,7 +988,7 @@ local parser = P{ number_char = V"roman_numeral" + V"Roman_numeral" + P"#" - + V"digit"^1 + + V"digit"^1 + R"AZ" + R"az" , @@ -1014,9 +1019,11 @@ local parser = P{ -- The whitespace handling after the overline is necessary because headings -- without overline aren't allowed to be indented. section_before = C(Cmt(V"section_adorn", function(s,i, adorn) + local adorn_matched = lpegmatch (state.valid_adornment, adorn) state.previousadorn = adorn - warn ("sec-f", state.valid_adornment:match(adorn), adorn:sub(1,2) .. "...", "", i) - if state.valid_adornment:match(adorn) then + warn ("sec-f", adorn_matched, + stringsub (adorn, 1,2) .. "...", "", i) + if adorn_matched then return true end return false @@ -1030,11 +1037,13 @@ local parser = P{ section_after = C(Cmt(V"section_adorn", function(s,i, adorn) local tests = false - tests = state.valid_adornment:match(adorn) and true + if lpegmatch (state.valid_adornment, adorn) then + tests = true + end if state.previousadorn then tests = tests and adorn == state.previousadorn end - warn ("sec-o", tests, adorn:sub(1,2) .. "…", "", i) + warn ("sec-a", tests, stringsub (adorn, 1,2) .. "…", "", i) state.previousadorn = nil return tests end)) @@ -1043,8 +1052,10 @@ local parser = P{ section_once = C(Cmt(V"section_adorn", function(s,i, adorn) local tests = false - tests = state.valid_adornment:match(adorn) and true - warn ("sec-o", tests, adorn:sub(1,2) .. "…", "", i) + if lpegmatch (state.valid_adornment, adorn) then + tests = true + end + warn ("sec-o", tests, stringsub (adorn, 1,2) .. "…", "", i) state.previousadorn = nil return tests end)) @@ -1074,11 +1085,11 @@ local parser = P{ target_indentmatch = Cmt(V"target_nextindent" -- I ♡ LPEG! * Cb("indent"), function (s, i, a, b) - return a == b + return a == b end), target_link = ( V"space"^0 * V"target_firstindent" - * Ct(C(1 - V"whitespace" - V"eol")^1 + * Ct(C(1 - V"whitespace" - V"eol")^1 * (V"target_indentmatch" * C(1 - V"whitespace" - V"eol")^1)^0) * V"eol" * #(1 - V"whitespace" - "eol")) / rst.joinindented @@ -1118,7 +1129,7 @@ local parser = P{ * (V"included_literal_block" + V"eol") , - par_other = V"par_matchindent" + par_other = V"par_matchindent" * C((1 - V"literal_block_shorthand" - V"eol")^1) * (V"included_literal_block" + V"eol") , @@ -1147,8 +1158,8 @@ local parser = P{ literal_block_shorthand = Cs((V"colon" * V"space" * V"double_colon" + V"double_colon") * V"whitespace"^0 - * V"eol" - * V"blank_line") + * V"eol" + * V"blank_line") -- The \unskip is necessary because the lines of a -- paragraph get concatenated from a table with a -- space as separator. And the literal block is @@ -1339,11 +1350,16 @@ local parser = P{ table_header_hline = P"=", } +--- 225 rules at 2014-02-28 with lpeg 0.12 and Luatex 0.78.3 +--lpeg.print(rst_parser) +--lpeg.ptree(rst_parser) +--os.exit() + local file_helpers = { } function file_helpers.strip_BOM (raw) - if raw:match"^\239\187\191" then - return raw:sub(4) + if stringmatch (raw, "^\239\187\191") then + return stringsub (raw, 4) end return raw end @@ -1369,21 +1385,21 @@ do function file_helpers.expandtab (raw) position = 1 - return p_expand:match(raw) + return lpegmatch (p_expand, raw) end end --- Spotted by Philipp A. function file_helpers.insert_blank (raw) - if not raw:find"\n%s$" then + if not stringfind (raw, "\n%s$") then return raw .. "\n\n" end return raw end function file_helpers.crlf (raw) - if raw:find"\r\n" then - return raw:gsub("\r\n", "\n") + if stringfind (raw, "\r\n") then + return stringgsub (raw, "\r\n", "\n") end return raw end @@ -1467,7 +1483,7 @@ function thirddata.rst.standalone (infile, outfile) local testdata = load_file(infile) if testdata == 1 then return 1 end - local processeddata = parser:match(testdata) + local processeddata = lpegmatch (rst_parser, testdata) local setups = get_setups(false) processeddata = setups .. processeddata .. [[ @@ -1489,23 +1505,23 @@ function thirddata.rst.standalone (infile, outfile) return 0 end +local p_strip_comments do local Cs, P = lpeg.Cs, lpeg.P local percent = P"%" local eol = P"\n" local comment = percent * (1 - eol)^0 * eol / "\n" - strip_comments = Cs((comment + 1)^0) + p_strip_comments = Cs((comment + 1)^0) end function thirddata.rst.do_rst_file(fname) - local rst_parser = parser local raw_data = load_file(fname) - local processed = rst_parser:match(raw_data) + local processed = lpegmatch (rst_parser, raw_data) local setups = get_setups(false) local tmp_file = tex.jobname .. "–rst_temporary.tex.tmp" if processed then - processed = strip_comments:match(setups..processed.."\n\\stoptext\n") + processed = lpegmatch (p_strip_comments, setups..processed.."\n\\stoptext\n") save_file(tmp_file, processed) context.input("./"..tmp_file) end @@ -1514,16 +1530,15 @@ end local rst_inclusions = { } local rst_incsetups = { } function thirddata.rst.do_rst_inclusion (iname, fname) - local rst_parser = parser local raw_data = load_file(fname) - local processed = rst_parser:match(raw_data) + local processed = lpegmatch (rst_parser, raw_data) local setups = get_setups(true) local incnr = #rst_incsetups + 1 local tmp_file = tex.jobname .. stringformat("–rst_inclusion-%d.tex.tmp", incnr) if processed then - processed = strip_comments:match(processed) + processed = lpegmatch (p_strip_comments, processed) save_file(tmp_file, processed) rst_inclusions[iname] = tmp_file rst_incsetups[#rst_incsetups +1] = setups @@ -1547,13 +1562,13 @@ function thirddata.rst.get_rst_inclusion (iname) end function thirddata.rst.do_rst_snippet(txt) - local processed = parser:match(txt) + local processed = lpegmatch (rst_parser, txt) local setups = get_setups(true) local tmp_file = tex.jobname .. "–rst_temporary.tex.tmp" if processed then warn("·cs·",txt) - processed = strip_comments:match(setups..processed) + processed = lpegmatch (p_strip_comments, setups..processed) save_file(tmp_file,processed) context.input("./"..tmp_file) else @@ -1586,3 +1601,5 @@ end if not (context or scripts) then return main() end + +--- vim:tw=79:et:sw=4:ts=8:sts=4 -- cgit v1.2.3