diff options
| author | Philipp Gesang <phg@phi-gamma.net> | 2014-02-28 07:06:53 +0100 | 
|---|---|---|
| committer | Philipp Gesang <phg@phi-gamma.net> | 2014-02-28 07:06:53 +0100 | 
| commit | 7652729ada000906e5e6b2b4d0c5dea01c73c29d (patch) | |
| tree | fdf28f8a2ba163f69e6fd4d496e2daa2eeb11e9d /mod/tex/context/third | |
| parent | a3c01af8bbd581000e276cca076023c308bd688c (diff) | |
| download | context-rst-7652729ada000906e5e6b2b4d0c5dea01c73c29d.tar.gz | |
clean up rst_parser.lua
Diffstat (limited to 'mod/tex/context/third')
| -rw-r--r-- | mod/tex/context/third/rst/rst_parser.lua | 185 | 
1 files changed, 101 insertions, 84 deletions
diff --git a/mod/tex/context/third/rst/rst_parser.lua b/mod/tex/context/third/rst/rst_parser.lua index 2dcc4d3..e633899 100644 --- a/mod/tex/context/third/rst/rst_parser.lua +++ b/mod/tex/context/third/rst/rst_parser.lua @@ -5,7 +5,7 @@  --  DESCRIPTION:  https://bitbucket.org/phg/context-rst/overview  --       AUTHOR:  Philipp Gesang (Phg), <phg42.2a@gmail.com>  --      VERSION:  0.6 ---      CHANGED:  2013-06-03 18:52:42+0200 +--      CHANGED:  2014-02-28 06:49:01+0100  --------------------------------------------------------------------------------  -- @@ -16,26 +16,31 @@ thirddata.rst_helpers = { }  environment.loadluafile"rst_helpers"  environment.loadluafile"rst_directives" -environment.loadluafile"rst_setups"  +environment.loadluafile"rst_setups"  environment.loadluafile"rst_context" -local rst             = thirddata.rst -local helpers         = thirddata.rst_helpers -local optional_setups = thirddata.rst_setups - -rst.strip_BOM     = true -rst.expandtab     = true -rst.shiftwidth    = 4 -rst.crlf          = true -helpers.rst_debug = false - -local iowrite      = io.write -local ioopen       = io.open -local stringformat = string.format -local stringlen    = string.len -local stringstrip  = string.strip -local utf          = unicode.utf8 -local utflen       = utf.len +local rst                   = thirddata.rst +local helpers               = thirddata.rst_helpers +local optional_setups       = thirddata.rst_setups + +rst.strip_BOM               = true +rst.expandtab               = true +rst.shiftwidth              = 4 +rst.crlf                    = true +helpers.rst_debug           = false + +local utf                   = unicode.utf8 + +local ioopen                = io.open +local iowrite               = io.write +local stringfind            = string.find +local stringformat          = string.format +local stringgsub            = string.gsub +local stringlen             = string.len +local stringmatch           = string.match +local stringstrip           = string.strip +local stringsub             = string.sub +local utflen                = utf.len  local warn  do @@ -64,11 +69,11 @@ do  end  local C,   Cb, Cc, Cg, -      Cmt, Cp, Cs, Ct  +      Cmt, Cp, Cs, Ct      = lpeg.C,   lpeg.Cb, lpeg.Cc, lpeg.Cg,        lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct -local P, R, S, V, match  +local P, R, S, V, lpegmatch      = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match  local utf = unicode.utf8 @@ -99,7 +104,7 @@ state.addme                = {}  do      local first_adornment = "" -    local valid_adornment = P{ +    local valid_adornment = P {          [1] = "adorncheck",          adorncheck  = V"check_first" * V"check_other"^1 * -P(1),          check_first = Cmt(V"adornment_char", function(_,_, first) @@ -112,7 +117,7 @@ do                              return char == prev                          end)                      , -        adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]],  +        adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]],      }      state.valid_adornment = valid_adornment  end @@ -138,7 +143,7 @@ local utfchar = P{ -- from l-lpeg.lua, modified to use as grammar -local parser = P{ +local rst_parser = P {      [1] = V"document",      document = V"blank_line"^0 * Cs(V"block"^1), @@ -374,20 +379,20 @@ local parser = P{      st_other_rows = (V"st_content"^1 * V"st_separator")^1,      st_content = V"blank_line"^-1 -               * C(V"st_matchlayout"),        +               * C(V"st_matchlayout"),      st_matchlayout = -#V"st_separator" * Cmt((1 - V"eol")^1, function (s, i, content)                          -- Don't check for matching indent but if the rest is                          -- fine then the line should be sane. This allows                          -- cells starting with spaces. -                        content = content:sub(#state.currentindent) +                        content = stringsub (content, #state.currentindent)                          local tcb = state.currentlayout.bounds                          local n = 1                          local spaces_only = P" "^1                          while n < #tcb.slices do                              local from = tcb.slices[n]  .stop                              local to   = tcb.slices[n+1].start -                            local between = spaces_only:match(content, from) +                            local between = lpegmatch (spaces_only, content, from)                              if not between then -- Cell spanning more than one row.                                  -- pass                                  warn("sta-c", "span", from, to, i) @@ -413,7 +418,7 @@ local parser = P{                          return state.currentlayout.raw == layout                      end)                    , -                         +      st_colspan_sep = Cmt(V"dash"^1 * (V"spaces" * V"dash"^1)^0, function(s, i, layout)                           local tcb = state.currentlayout.bounds                           local this = helpers.get_st_boundaries (layout) @@ -484,7 +489,7 @@ local parser = P{      , -    gt_cell = (V"gt_content_cell" + V"gt_line_cell")  +    gt_cell = (V"gt_content_cell" + V"gt_line_cell")      * (V"table_intersection" + V"table_vline")      , @@ -500,7 +505,7 @@ local parser = P{      gt_body = ((V"gt_contentrow" - V"gt_bodysep")^1 * V"gt_bodysep")^1, -    gt_bodysep = V"gt_matchindent"  +    gt_bodysep = V"gt_matchindent"                 * C(Cmt(V"table_intersection"                       * (V"table_hline"^1 * V"table_intersection")^1, function(s, i, separator)                            local matchme = state.currentwidth @@ -514,8 +519,8 @@ local parser = P{              * V"gt_headsep"              , -    gt_headsep = V"gt_matchindent"  -               * C(Cmt(V"table_intersection"  +    gt_headsep = V"gt_matchindent" +               * C(Cmt(V"table_intersection"                      * (V"table_header_hline"^1 * V"table_intersection")^1, function(s, i, separator)                            local matchme = state.currentwidth                            warn("tab-s", "head", #separator == matchme, #separator, matchme, i) @@ -547,9 +552,9 @@ local parser = P{                        * (1 - V"eol")^1                        * V"eol"                        , -                     +      block_quote_other = Cmt(V"space"^1, function (s, i, indent) -                            warn("bkq-m", #indent, #state.currentindent,  +                            warn("bkq-m", #indent, #state.currentindent,                                             indent,  state.currentindent, i)                              return state.currentindent == indent                          end) / "" @@ -563,16 +568,16 @@ local parser = P{      block_quote_attri_first = Cmt(V"space"^1 * V"attrib_dash" * V"space", function (s, i, indent)                                     local t = state -                                   warn("bqa-i", utflen(indent), #t.currentindent,  -                                                 indent,           t.currentindent, i) -                                   local ret = indent:match(" *") == t.currentindent +                                   warn("bqa-i", utflen(indent), #t.currentindent, +                                                 indent,         t.currentindent, i) +                                   local ret = stringmatch (indent, " *") == t.currentindent                                     t.currentindent = ret and indent or t.currentindent                                     return ret                                 end) / ""                              * (1 - V"eol")^1                              * V"eol"                              , -                     +      block_quote_attri_other = Cmt(V"space"^1, function (s, i, indent)                                    warn("bqa-m", #indent, utflen(state.currentindent),                                                   indent,  state.currentindent, i) @@ -609,7 +614,7 @@ local parser = P{      line_block_empty = Cmt(V"line_block_empty_marker", function(s, i, marker)                              warn("lbk-e", #marker, #state.currentindent, marker, state.currentindent, i) -                            marker = marker:gsub("|.*", "| ") +                            marker = stringgsub (marker, "|.*", "| ")                              return state.currentindent == marker                          end) / ""                       / rst.line_block_empty @@ -654,7 +659,7 @@ local parser = P{      unquoted_literal_block_lines = V"literal_block_first"                                   * (V"blank_line"^-1 * V"literal_block_other")^0                                   , -                                  +      quoted_literal_block_lines =  V"quoted_literal_block_first"                                 * V"quoted_literal_block_other"^0 -- no blank lines allowed                                 , @@ -677,10 +682,10 @@ local parser = P{                     * V"eol",      literal_block_other = Cmt(V"space"^1, function (s, i, indent) -                        warn("lbk-m",  +                        warn("lbk-m",                               #indent,                               #state.currentindent, -                             #indent >= #state.currentindent,  +                             #indent >= #state.currentindent,                               i)                          return #indent >= #state.currentindent                      end) @@ -702,10 +707,10 @@ local parser = P{                     ,      quoted_literal_block_other = Cmt(V"adornment_char", function (s, i, indent) -                        warn("qlb-m",  +                        warn("qlb-m",                               #indent,                               #state.currentindent, -                             #indent >= #state.currentindent,  +                             #indent >= #state.currentindent,                               i)                          return #indent >= #state.currentindent                      end) @@ -762,7 +767,7 @@ local parser = P{              + V"option_dos_vms")              * V"option_arg"^-1, -    option_arg = (V"equals" + V"space")  +    option_arg = (V"equals" + V"space")                 * ((V"letter" * (V"letter" + V"digit")^1)                  + (V"angle_left" * (1 - V"angle_right")^1 * V"angle_right")), @@ -818,7 +823,7 @@ local parser = P{                         * Ct(V"definition_def"))                      , -    definition_term = #(1 - V"space" - V"field_marker")  +    definition_term = #(1 - V"space" - V"field_marker")                      * (1 - V"eol" - V"definition_classifier_separator")^1                      , @@ -888,11 +893,11 @@ local parser = P{      bullet_first = #Cmt(V"bullet_indent", function (s, i, bullet)                          local t = state                          local oldbullet = t.bullets[t.depth] -                        local n_spaces = match(P" "^0, bullet) -                        warn("first",  -                            t.depth,  +                        local n_spaces = lpegmatch(P" "^0, bullet) +                        warn("first", +                            t.depth,                              (t.depth == 0 and n_spaces >= 1) or -                            (t.depth >  0 and n_spaces >  1),  +                            (t.depth >  0 and n_spaces >  1),                              bullet,                              oldbullet,                              helpers.list.conversion(bullet)) @@ -924,10 +929,10 @@ local parser = P{      bullet_cont  = Cmt(V"bullet_indent", function (s, i, bullet)                          local t = state                          local conversion = helpers.list.conversion -                        warn("conti",  -                                t.depth,  +                        warn("conti", +                                t.depth,                                  bullet == t.bullets[t.depth], -                                bullet,  +                                bullet,                                  t.bullets[t.depth],                                  t.lastbullets[t.depth],                                  conversion(t.lastbullet), @@ -957,7 +962,7 @@ local parser = P{                      ,                           --                                     ^^^^^^^^^^^^^                           --                                     otherwise matches bullet_first -  +      bullet_rest = (1 - V"eol")^1 * V"eol",  -- rest of one line      bullet_next  = V"space"^1 @@ -965,8 +970,8 @@ local parser = P{      bullet_match = Cmt(V"bullet_next", function (s, i, this)                           local t = state -                         warn("match",  -                                t.depth,  +                         warn("match", +                                t.depth,                                  stringlen(this) == utflen(t.bullets[t.depth]),                                  utflen(t.bullets[t.depth]), stringlen(this) )                           return stringlen(this) == utflen(t.bullets[t.depth]) @@ -983,7 +988,7 @@ local parser = P{      number_char = V"roman_numeral"                  + V"Roman_numeral"                  + P"#" -                + V"digit"^1  +                + V"digit"^1                  + R"AZ"                  + R"az"                  , @@ -1014,9 +1019,11 @@ local parser = P{      -- The whitespace handling after the overline is necessary because headings      -- without overline aren't allowed to be indented.      section_before = C(Cmt(V"section_adorn", function(s,i, adorn) +                          local adorn_matched = lpegmatch (state.valid_adornment, adorn)                            state.previousadorn = adorn -                          warn ("sec-f", state.valid_adornment:match(adorn), adorn:sub(1,2) .. "...", "", i) -                          if state.valid_adornment:match(adorn) then +                          warn ("sec-f", adorn_matched, +                                stringsub (adorn, 1,2) .. "...", "", i) +                          if adorn_matched then                                return true                            end                            return false @@ -1030,11 +1037,13 @@ local parser = P{      section_after = C(Cmt(V"section_adorn", function(s,i, adorn)                           local tests = false -                         tests = state.valid_adornment:match(adorn) and true +                         if lpegmatch (state.valid_adornment, adorn) then +                           tests = true +                         end                           if state.previousadorn then                               tests = tests and adorn == state.previousadorn                           end -                         warn ("sec-o", tests, adorn:sub(1,2) .. "…", "", i) +                         warn ("sec-a", tests, stringsub (adorn, 1,2) .. "…", "", i)                           state.previousadorn = nil                           return tests                       end)) @@ -1043,8 +1052,10 @@ local parser = P{      section_once = C(Cmt(V"section_adorn", function(s,i, adorn)                           local tests = false -                         tests = state.valid_adornment:match(adorn) and true -                         warn ("sec-o", tests, adorn:sub(1,2) .. "…", "", i) +                         if lpegmatch (state.valid_adornment, adorn) then +                           tests = true +                         end +                         warn ("sec-o", tests, stringsub (adorn, 1,2) .. "…", "", i)                           state.previousadorn = nil                           return tests                       end)) @@ -1074,11 +1085,11 @@ local parser = P{      target_indentmatch = Cmt(V"target_nextindent" -- I ♡ LPEG!                             * Cb("indent"), function (s, i, a, b) -                                return a == b  +                                return a == b                              end),      target_link  = ( V"space"^0 * V"target_firstindent" -                 * Ct(C(1 - V"whitespace" - V"eol")^1  +                 * Ct(C(1 - V"whitespace" - V"eol")^1                      * (V"target_indentmatch"                       * C(1 - V"whitespace" - V"eol")^1)^0)                   * V"eol" * #(1 - V"whitespace" - "eol")) / rst.joinindented @@ -1118,7 +1129,7 @@ local parser = P{                * (V"included_literal_block" + V"eol")                , -    par_other = V"par_matchindent"  +    par_other = V"par_matchindent"                * C((1 - V"literal_block_shorthand" - V"eol")^1)                * (V"included_literal_block" + V"eol")                , @@ -1147,8 +1158,8 @@ local parser = P{      literal_block_shorthand = Cs((V"colon" * V"space" * V"double_colon"                                  + V"double_colon")                               * V"whitespace"^0 -                             * V"eol"  -                             * V"blank_line")  +                             * V"eol" +                             * V"blank_line")                               -- The \unskip is necessary because the lines of a                               -- paragraph get concatenated from a table with a                               -- space as separator. And the literal block is @@ -1339,11 +1350,16 @@ local parser = P{      table_header_hline = P"=",  } +--- 225 rules at 2014-02-28 with lpeg 0.12 and Luatex 0.78.3 +--lpeg.print(rst_parser) +--lpeg.ptree(rst_parser) +--os.exit() +  local file_helpers = { }  function file_helpers.strip_BOM (raw) -    if raw:match"^\239\187\191" then -        return raw:sub(4) +    if stringmatch (raw, "^\239\187\191") then +        return stringsub (raw, 4)      end      return raw  end @@ -1369,21 +1385,21 @@ do      function file_helpers.expandtab (raw)          position = 1 -        return p_expand:match(raw) +        return lpegmatch (p_expand, raw)      end  end  --- Spotted by Philipp A.  function file_helpers.insert_blank (raw) -    if not raw:find"\n%s$" then +    if not stringfind (raw, "\n%s$") then          return raw .. "\n\n"      end      return raw  end  function file_helpers.crlf (raw) -    if raw:find"\r\n" then -        return raw:gsub("\r\n", "\n") +    if stringfind (raw, "\r\n") then +        return stringgsub (raw, "\r\n", "\n")      end      return raw  end @@ -1467,7 +1483,7 @@ function thirddata.rst.standalone (infile, outfile)      local testdata = load_file(infile)      if testdata == 1 then return 1 end -    local processeddata = parser:match(testdata) +    local processeddata = lpegmatch (rst_parser, testdata)      local setups = get_setups(false)      processeddata = setups .. processeddata .. [[ @@ -1489,23 +1505,23 @@ function thirddata.rst.standalone (infile, outfile)      return 0  end +local p_strip_comments  do      local Cs, P = lpeg.Cs, lpeg.P      local percent = P"%"      local eol     = P"\n"      local comment = percent * (1 - eol)^0 * eol / "\n" -    strip_comments = Cs((comment + 1)^0) +    p_strip_comments = Cs((comment + 1)^0)  end  function thirddata.rst.do_rst_file(fname) -    local rst_parser = parser      local raw_data   = load_file(fname) -    local processed  = rst_parser:match(raw_data) +    local processed  = lpegmatch (rst_parser, raw_data)      local setups     = get_setups(false)      local tmp_file   = tex.jobname .. "–rst_temporary.tex.tmp"      if processed then -        processed = strip_comments:match(setups..processed.."\n\\stoptext\n") +        processed = lpegmatch (p_strip_comments, setups..processed.."\n\\stoptext\n")          save_file(tmp_file, processed)          context.input("./"..tmp_file)      end @@ -1514,16 +1530,15 @@ end  local rst_inclusions = { }  local rst_incsetups  = { }  function thirddata.rst.do_rst_inclusion (iname, fname) -    local rst_parser = parser      local raw_data   = load_file(fname) -    local processed  = rst_parser:match(raw_data) +    local processed  = lpegmatch (rst_parser, raw_data)      local setups     = get_setups(true)      local incnr    = #rst_incsetups  + 1      local tmp_file = tex.jobname .. stringformat("–rst_inclusion-%d.tex.tmp", incnr)      if processed then -        processed = strip_comments:match(processed) +        processed = lpegmatch (p_strip_comments, processed)          save_file(tmp_file, processed)          rst_inclusions[iname] = tmp_file          rst_incsetups[#rst_incsetups +1] = setups @@ -1547,13 +1562,13 @@ function thirddata.rst.get_rst_inclusion (iname)  end  function thirddata.rst.do_rst_snippet(txt) -    local processed  = parser:match(txt) +    local processed  = lpegmatch (rst_parser, txt)      local setups     = get_setups(true)      local tmp_file   = tex.jobname .. "–rst_temporary.tex.tmp"      if processed then          warn("·cs·",txt) -        processed = strip_comments:match(setups..processed) +        processed = lpegmatch (p_strip_comments, setups..processed)          save_file(tmp_file,processed)          context.input("./"..tmp_file)      else @@ -1586,3 +1601,5 @@ end  if not (context or scripts) then      return main()  end + +--- vim:tw=79:et:sw=4:ts=8:sts=4  | 
