clean up rst_parser.lua

author: Philipp Gesang <phg@phi-gamma.net> 2014-02-28 07:06:53 +0100
committer: Philipp Gesang <phg@phi-gamma.net> 2014-02-28 07:06:53 +0100
commit: 7652729ada000906e5e6b2b4d0c5dea01c73c29d (patch)
tree: fdf28f8a2ba163f69e6fd4d496e2daa2eeb11e9d /mod/tex/context/third
parent: a3c01af8bbd581000e276cca076023c308bd688c (diff)
download: context-rst-7652729ada000906e5e6b2b4d0c5dea01c73c29d.tar.gz
1 files changed, 101 insertions, 84 deletions
diff --git a/mod/tex/context/third/rst/rst_parser.lua b/mod/tex/context/third/rst/rst_parser.lua
index 2dcc4d3..e633899 100644
--- a/mod/tex/context/third/rst/rst_parser.lua
+++ b/mod/tex/context/third/rst/rst_parser.lua
@@ -5,7 +5,7 @@
 --  DESCRIPTION:  https://bitbucket.org/phg/context-rst/overview
 --       AUTHOR:  Philipp Gesang (Phg), <phg42.2a@gmail.com>
 --      VERSION:  0.6
---      CHANGED:  2013-06-03 18:52:42+0200
+--      CHANGED:  2014-02-28 06:49:01+0100
 --------------------------------------------------------------------------------
 --
 
@@ -16,26 +16,31 @@ thirddata.rst_helpers = { }
 
 environment.loadluafile"rst_helpers"
 environment.loadluafile"rst_directives"
-environment.loadluafile"rst_setups" 
+environment.loadluafile"rst_setups"
 environment.loadluafile"rst_context"
 
-local rst             = thirddata.rst
-local helpers         = thirddata.rst_helpers
-local optional_setups = thirddata.rst_setups
-
-rst.strip_BOM     = true
-rst.expandtab     = true
-rst.shiftwidth    = 4
-rst.crlf          = true
-helpers.rst_debug = false
-
-local iowrite      = io.write
-local ioopen       = io.open
-local stringformat = string.format
-local stringlen    = string.len
-local stringstrip  = string.strip
-local utf          = unicode.utf8
-local utflen       = utf.len
+local rst                   = thirddata.rst
+local helpers               = thirddata.rst_helpers
+local optional_setups       = thirddata.rst_setups
+
+rst.strip_BOM               = true
+rst.expandtab               = true
+rst.shiftwidth              = 4
+rst.crlf                    = true
+helpers.rst_debug           = false
+
+local utf                   = unicode.utf8
+
+local ioopen                = io.open
+local iowrite               = io.write
+local stringfind            = string.find
+local stringformat          = string.format
+local stringgsub            = string.gsub
+local stringlen             = string.len
+local stringmatch           = string.match
+local stringstrip           = string.strip
+local stringsub             = string.sub
+local utflen                = utf.len
 
 local warn
 do
@@ -64,11 +69,11 @@ do
 end
 
 local C,   Cb, Cc, Cg,
-      Cmt, Cp, Cs, Ct 
+      Cmt, Cp, Cs, Ct
     = lpeg.C,   lpeg.Cb, lpeg.Cc, lpeg.Cg,
       lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct
 
-local P, R, S, V, match 
+local P, R, S, V, lpegmatch
     = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match
 
 local utf = unicode.utf8
@@ -99,7 +104,7 @@ state.addme                = {}
 
 do
     local first_adornment = ""
-    local valid_adornment = P{
+    local valid_adornment = P {
         [1] = "adorncheck",
         adorncheck  = V"check_first" * V"check_other"^1 * -P(1),
         check_first = Cmt(V"adornment_char", function(_,_, first)
@@ -112,7 +117,7 @@ do
                             return char == prev
                         end)
                     ,
-        adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], 
+        adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]],
     }
     state.valid_adornment = valid_adornment
 end
@@ -138,7 +143,7 @@ local utfchar = P{ -- from l-lpeg.lua, modified to use as grammar
 
 
 
-local parser = P{
+local rst_parser = P {
     [1] = V"document",
 
     document = V"blank_line"^0 * Cs(V"block"^1),
@@ -374,20 +379,20 @@ local parser = P{
     st_other_rows = (V"st_content"^1 * V"st_separator")^1,
 
     st_content = V"blank_line"^-1
-               * C(V"st_matchlayout"),       
+               * C(V"st_matchlayout"),
 
     st_matchlayout = -#V"st_separator" * Cmt((1 - V"eol")^1, function (s, i, content)
                         -- Don't check for matching indent but if the rest is
                         -- fine then the line should be sane. This allows
                         -- cells starting with spaces.
-                        content = content:sub(#state.currentindent)
+                        content = stringsub (content, #state.currentindent)
                         local tcb = state.currentlayout.bounds
                         local n = 1
                         local spaces_only = P" "^1
                         while n < #tcb.slices do
                             local from = tcb.slices[n]  .stop
                             local to   = tcb.slices[n+1].start
-                            local between = spaces_only:match(content, from)
+                            local between = lpegmatch (spaces_only, content, from)
                             if not between then -- Cell spanning more than one row.
                                 -- pass
                                 warn("sta-c", "span", from, to, i)
@@ -413,7 +418,7 @@ local parser = P{
                         return state.currentlayout.raw == layout
                     end)
                   ,
-                        
+
     st_colspan_sep = Cmt(V"dash"^1 * (V"spaces" * V"dash"^1)^0, function(s, i, layout)
                          local tcb = state.currentlayout.bounds
                          local this = helpers.get_st_boundaries (layout)
@@ -484,7 +489,7 @@ local parser = P{
     ,
 
 
-    gt_cell = (V"gt_content_cell" + V"gt_line_cell") 
+    gt_cell = (V"gt_content_cell" + V"gt_line_cell")
     * (V"table_intersection" + V"table_vline")
     ,
 
@@ -500,7 +505,7 @@ local parser = P{
 
     gt_body = ((V"gt_contentrow" - V"gt_bodysep")^1 * V"gt_bodysep")^1,
 
-    gt_bodysep = V"gt_matchindent" 
+    gt_bodysep = V"gt_matchindent"
                * C(Cmt(V"table_intersection"
                      * (V"table_hline"^1 * V"table_intersection")^1, function(s, i, separator)
                           local matchme = state.currentwidth
@@ -514,8 +519,8 @@ local parser = P{
             * V"gt_headsep"
             ,
 
-    gt_headsep = V"gt_matchindent" 
-               * C(Cmt(V"table_intersection" 
+    gt_headsep = V"gt_matchindent"
+               * C(Cmt(V"table_intersection"
                     * (V"table_header_hline"^1 * V"table_intersection")^1, function(s, i, separator)
                           local matchme = state.currentwidth
                           warn("tab-s", "head", #separator == matchme, #separator, matchme, i)
@@ -547,9 +552,9 @@ local parser = P{
                       * (1 - V"eol")^1
                       * V"eol"
                       ,
-                    
+
     block_quote_other = Cmt(V"space"^1, function (s, i, indent)
-                            warn("bkq-m", #indent, #state.currentindent, 
+                            warn("bkq-m", #indent, #state.currentindent,
                                            indent,  state.currentindent, i)
                             return state.currentindent == indent
                         end) / ""
@@ -563,16 +568,16 @@ local parser = P{
 
     block_quote_attri_first = Cmt(V"space"^1 * V"attrib_dash" * V"space", function (s, i, indent)
                                    local t = state
-                                   warn("bqa-i", utflen(indent), #t.currentindent, 
-                                                 indent,           t.currentindent, i)
-                                   local ret = indent:match(" *") == t.currentindent
+                                   warn("bqa-i", utflen(indent), #t.currentindent,
+                                                 indent,         t.currentindent, i)
+                                   local ret = stringmatch (indent, " *") == t.currentindent
                                    t.currentindent = ret and indent or t.currentindent
                                    return ret
                                end) / ""
                             * (1 - V"eol")^1
                             * V"eol"
                             ,
-                    
+
     block_quote_attri_other = Cmt(V"space"^1, function (s, i, indent)
                                   warn("bqa-m", #indent, utflen(state.currentindent),
                                                  indent,  state.currentindent, i)
@@ -609,7 +614,7 @@ local parser = P{
 
     line_block_empty = Cmt(V"line_block_empty_marker", function(s, i, marker)
                             warn("lbk-e", #marker, #state.currentindent, marker, state.currentindent, i)
-                            marker = marker:gsub("|.*", "| ")
+                            marker = stringgsub (marker, "|.*", "| ")
                             return state.currentindent == marker
                         end) / ""
                      / rst.line_block_empty
@@ -654,7 +659,7 @@ local parser = P{
     unquoted_literal_block_lines = V"literal_block_first"
                                  * (V"blank_line"^-1 * V"literal_block_other")^0
                                  ,
-                                 
+
     quoted_literal_block_lines =  V"quoted_literal_block_first"
                                * V"quoted_literal_block_other"^0 -- no blank lines allowed
                                ,
@@ -677,10 +682,10 @@ local parser = P{
                    * V"eol",
 
     literal_block_other = Cmt(V"space"^1, function (s, i, indent)
-                        warn("lbk-m", 
+                        warn("lbk-m",
                              #indent,
                              #state.currentindent,
-                             #indent >= #state.currentindent, 
+                             #indent >= #state.currentindent,
                              i)
                         return #indent >= #state.currentindent
                     end)
@@ -702,10 +707,10 @@ local parser = P{
                    ,
 
     quoted_literal_block_other = Cmt(V"adornment_char", function (s, i, indent)
-                        warn("qlb-m", 
+                        warn("qlb-m",
                              #indent,
                              #state.currentindent,
-                             #indent >= #state.currentindent, 
+                             #indent >= #state.currentindent,
                              i)
                         return #indent >= #state.currentindent
                     end)
@@ -762,7 +767,7 @@ local parser = P{
             + V"option_dos_vms")
             * V"option_arg"^-1,
 
-    option_arg = (V"equals" + V"space") 
+    option_arg = (V"equals" + V"space")
                * ((V"letter" * (V"letter" + V"digit")^1)
                 + (V"angle_left" * (1 - V"angle_right")^1 * V"angle_right")),
 
@@ -818,7 +823,7 @@ local parser = P{
                        * Ct(V"definition_def"))
                     ,
 
-    definition_term = #(1 - V"space" - V"field_marker") 
+    definition_term = #(1 - V"space" - V"field_marker")
                     * (1 - V"eol" - V"definition_classifier_separator")^1
                     ,
 
@@ -888,11 +893,11 @@ local parser = P{
     bullet_first = #Cmt(V"bullet_indent", function (s, i, bullet)
                         local t = state
                         local oldbullet = t.bullets[t.depth]
-                        local n_spaces = match(P" "^0, bullet)
-                        warn("first", 
-                            t.depth, 
+                        local n_spaces = lpegmatch(P" "^0, bullet)
+                        warn("first",
+                            t.depth,
                             (t.depth == 0 and n_spaces >= 1) or
-                            (t.depth >  0 and n_spaces >  1), 
+                            (t.depth >  0 and n_spaces >  1),
                             bullet,
                             oldbullet,
                             helpers.list.conversion(bullet))
@@ -924,10 +929,10 @@ local parser = P{
     bullet_cont  = Cmt(V"bullet_indent", function (s, i, bullet)
                         local t = state
                         local conversion = helpers.list.conversion
-                        warn("conti", 
-                                t.depth, 
+                        warn("conti",
+                                t.depth,
                                 bullet == t.bullets[t.depth],
-                                bullet, 
+                                bullet,
                                 t.bullets[t.depth],
                                 t.lastbullets[t.depth],
                                 conversion(t.lastbullet),
@@ -957,7 +962,7 @@ local parser = P{
                     ,
                          --                                     ^^^^^^^^^^^^^
                          --                                     otherwise matches bullet_first
- 
+
     bullet_rest = (1 - V"eol")^1 * V"eol",  -- rest of one line
 
     bullet_next  = V"space"^1
@@ -965,8 +970,8 @@ local parser = P{
 
     bullet_match = Cmt(V"bullet_next", function (s, i, this)
                          local t = state
-                         warn("match", 
-                                t.depth, 
+                         warn("match",
+                                t.depth,
                                 stringlen(this) == utflen(t.bullets[t.depth]),
                                 utflen(t.bullets[t.depth]), stringlen(this) )
                          return stringlen(this) == utflen(t.bullets[t.depth])
@@ -983,7 +988,7 @@ local parser = P{
     number_char = V"roman_numeral"
                 + V"Roman_numeral"
                 + P"#"
-                + V"digit"^1 
+                + V"digit"^1
                 + R"AZ"
                 + R"az"
                 ,
@@ -1014,9 +1019,11 @@ local parser = P{
     -- The whitespace handling after the overline is necessary because headings
     -- without overline aren't allowed to be indented.
     section_before = C(Cmt(V"section_adorn", function(s,i, adorn)
+                          local adorn_matched = lpegmatch (state.valid_adornment, adorn)
                           state.previousadorn = adorn
-                          warn ("sec-f", state.valid_adornment:match(adorn), adorn:sub(1,2) .. "...", "", i)
-                          if state.valid_adornment:match(adorn) then
+                          warn ("sec-f", adorn_matched,
+                                stringsub (adorn, 1,2) .. "...", "", i)
+                          if adorn_matched then
                               return true
                           end
                           return false
@@ -1030,11 +1037,13 @@ local parser = P{
 
     section_after = C(Cmt(V"section_adorn", function(s,i, adorn)
                          local tests = false
-                         tests = state.valid_adornment:match(adorn) and true
+                         if lpegmatch (state.valid_adornment, adorn) then
+                           tests = true
+                         end
                          if state.previousadorn then
                              tests = tests and adorn == state.previousadorn
                          end
-                         warn ("sec-o", tests, adorn:sub(1,2) .. "…", "", i)
+                         warn ("sec-a", tests, stringsub (adorn, 1,2) .. "…", "", i)
                          state.previousadorn = nil
                          return tests
                      end))
@@ -1043,8 +1052,10 @@ local parser = P{
 
     section_once = C(Cmt(V"section_adorn", function(s,i, adorn)
                          local tests = false
-                         tests = state.valid_adornment:match(adorn) and true
-                         warn ("sec-o", tests, adorn:sub(1,2) .. "…", "", i)
+                         if lpegmatch (state.valid_adornment, adorn) then
+                           tests = true
+                         end
+                         warn ("sec-o", tests, stringsub (adorn, 1,2) .. "…", "", i)
                          state.previousadorn = nil
                          return tests
                      end))
@@ -1074,11 +1085,11 @@ local parser = P{
 
     target_indentmatch = Cmt(V"target_nextindent" -- I ♡ LPEG!
                            * Cb("indent"), function (s, i, a, b)
-                                return a == b 
+                                return a == b
                             end),
 
     target_link  = ( V"space"^0 * V"target_firstindent"
-                 * Ct(C(1 - V"whitespace" - V"eol")^1 
+                 * Ct(C(1 - V"whitespace" - V"eol")^1
                     * (V"target_indentmatch"
                      * C(1 - V"whitespace" - V"eol")^1)^0)
                  * V"eol" * #(1 - V"whitespace" - "eol")) / rst.joinindented
@@ -1118,7 +1129,7 @@ local parser = P{
               * (V"included_literal_block" + V"eol")
               ,
 
-    par_other = V"par_matchindent" 
+    par_other = V"par_matchindent"
               * C((1 - V"literal_block_shorthand" - V"eol")^1)
               * (V"included_literal_block" + V"eol")
               ,
@@ -1147,8 +1158,8 @@ local parser = P{
     literal_block_shorthand = Cs((V"colon" * V"space" * V"double_colon"
                                 + V"double_colon")
                              * V"whitespace"^0
-                             * V"eol" 
-                             * V"blank_line") 
+                             * V"eol"
+                             * V"blank_line")
                              -- The \unskip is necessary because the lines of a
                              -- paragraph get concatenated from a table with a
                              -- space as separator. And the literal block is
@@ -1339,11 +1350,16 @@ local parser = P{
     table_header_hline = P"=",
 }
 
+--- 225 rules at 2014-02-28 with lpeg 0.12 and Luatex 0.78.3
+--lpeg.print(rst_parser)
+--lpeg.ptree(rst_parser)
+--os.exit()
+
 local file_helpers = { }
 
 function file_helpers.strip_BOM (raw)
-    if raw:match"^\239\187\191" then
-        return raw:sub(4)
+    if stringmatch (raw, "^\239\187\191") then
+        return stringsub (raw, 4)
     end
     return raw
 end
@@ -1369,21 +1385,21 @@ do
 
     function file_helpers.expandtab (raw)
         position = 1
-        return p_expand:match(raw)
+        return lpegmatch (p_expand, raw)
     end
 end
 
 --- Spotted by Philipp A.
 function file_helpers.insert_blank (raw)
-    if not raw:find"\n%s$" then
+    if not stringfind (raw, "\n%s$") then
         return raw .. "\n\n"
     end
     return raw
 end
 
 function file_helpers.crlf (raw)
-    if raw:find"\r\n" then
-        return raw:gsub("\r\n", "\n")
+    if stringfind (raw, "\r\n") then
+        return stringgsub (raw, "\r\n", "\n")
     end
     return raw
 end
@@ -1467,7 +1483,7 @@ function thirddata.rst.standalone (infile, outfile)
     local testdata = load_file(infile)
     if testdata == 1 then return 1 end
 
-    local processeddata = parser:match(testdata)
+    local processeddata = lpegmatch (rst_parser, testdata)
     local setups = get_setups(false)
 
     processeddata = setups .. processeddata .. [[
@@ -1489,23 +1505,23 @@ function thirddata.rst.standalone (infile, outfile)
     return 0
 end
 
+local p_strip_comments
 do
     local Cs, P = lpeg.Cs, lpeg.P
     local percent = P"%"
     local eol     = P"\n"
     local comment = percent * (1 - eol)^0 * eol / "\n"
-    strip_comments = Cs((comment + 1)^0)
+    p_strip_comments = Cs((comment + 1)^0)
 end
 
 function thirddata.rst.do_rst_file(fname)
-    local rst_parser = parser
     local raw_data   = load_file(fname)
-    local processed  = rst_parser:match(raw_data)
+    local processed  = lpegmatch (rst_parser, raw_data)
     local setups     = get_setups(false)
     local tmp_file   = tex.jobname .. "–rst_temporary.tex.tmp"
 
     if processed then
-        processed = strip_comments:match(setups..processed.."\n\\stoptext\n")
+        processed = lpegmatch (p_strip_comments, setups..processed.."\n\\stoptext\n")
         save_file(tmp_file, processed)
         context.input("./"..tmp_file)
     end
@@ -1514,16 +1530,15 @@ end
 local rst_inclusions = { }
 local rst_incsetups  = { }
 function thirddata.rst.do_rst_inclusion (iname, fname)
-    local rst_parser = parser
     local raw_data   = load_file(fname)
-    local processed  = rst_parser:match(raw_data)
+    local processed  = lpegmatch (rst_parser, raw_data)
     local setups     = get_setups(true)
 
     local incnr    = #rst_incsetups  + 1
     local tmp_file = tex.jobname .. stringformat("–rst_inclusion-%d.tex.tmp", incnr)
 
     if processed then
-        processed = strip_comments:match(processed)
+        processed = lpegmatch (p_strip_comments, processed)
         save_file(tmp_file, processed)
         rst_inclusions[iname] = tmp_file
         rst_incsetups[#rst_incsetups +1] = setups
@@ -1547,13 +1562,13 @@ function thirddata.rst.get_rst_inclusion (iname)
 end
 
 function thirddata.rst.do_rst_snippet(txt)
-    local processed  = parser:match(txt)
+    local processed  = lpegmatch (rst_parser, txt)
     local setups     = get_setups(true)
     local tmp_file   = tex.jobname .. "–rst_temporary.tex.tmp"
 
     if processed then
         warn("·cs·",txt)
-        processed = strip_comments:match(setups..processed)
+        processed = lpegmatch (p_strip_comments, setups..processed)
         save_file(tmp_file,processed)
         context.input("./"..tmp_file)
     else
@@ -1586,3 +1601,5 @@ end
 if not (context or scripts) then
     return main()
 end
+
+--- vim:tw=79:et:sw=4:ts=8:sts=4
author	Philipp Gesang <phg@phi-gamma.net>	2014-02-28 07:06:53 +0100
committer	Philipp Gesang <phg@phi-gamma.net>	2014-02-28 07:06:53 +0100
commit	7652729ada000906e5e6b2b4d0c5dea01c73c29d (patch)
tree	fdf28f8a2ba163f69e6fd4d496e2daa2eeb11e9d /mod/tex/context/third
parent	a3c01af8bbd581000e276cca076023c308bd688c (diff)
download	context-rst-7652729ada000906e5e6b2b4d0c5dea01c73c29d.tar.gz