diff options
| author | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2011-09-11 15:25:11 +0200 | 
|---|---|---|
| committer | Philipp Gesang <pgesang@ix.urz.uni-heidelberg.de> | 2011-09-11 15:25:11 +0200 | 
| commit | 76678cc480be2f36824abed06e893ec321343393 (patch) | |
| tree | 39022de469820f0249d31d033291971744b57faf /mod/tex/context/third | |
| parent | 4e0e3ac9de16f0cfd60f6666fd942e74a104b7d5 (diff) | |
| download | context-rst-76678cc480be2f36824abed06e893ec321343393.tar.gz | |
parser code cleanup
Diffstat (limited to 'mod/tex/context/third')
| -rw-r--r-- | mod/tex/context/third/rst/rst_parser.lua | 181 | 
1 files changed, 74 insertions, 107 deletions
diff --git a/mod/tex/context/third/rst/rst_parser.lua b/mod/tex/context/third/rst/rst_parser.lua index 77d07ef..9735635 100644 --- a/mod/tex/context/third/rst/rst_parser.lua +++ b/mod/tex/context/third/rst/rst_parser.lua @@ -68,41 +68,33 @@ local P, R, S, V, match  local utf = unicode.utf8 -local eol = P"\n" - -state = {} -state.depth = 0 -state.bullets = {} -- mapping bullet forms to depth -state.bullets.max = 0 -state.lastbullet = "" -state.lastbullets = {} -state.roman_cache = {} -- storing roman numerals that were already converted -state.currentindent  = "" -- used in definition lists and elsewhere -state.previousindent = "" -- for literal blocks included in paragraphs to restore the paragraph indent +state                = {} +state.depth          = 0 +state.bullets        = {}  -- mapping bullet forms to depth +state.bullets.max    = 0 +state.lastbullet     = "" +state.lastbullets    = {} +state.roman_cache    = {}  -- storing roman numerals that were already converted +state.currentindent  = ""  -- used in definition lists and elsewhere +state.previousindent = ""  -- for literal blocks included in paragraphs to restore the paragraph indent  state.currentwidth   = 0   -- table layout  state.currentlayout  = {}  -- table layout  state.previousadorn  = nil -- section underlining and overlining -state.footnotes = {} +state.footnotes            = {}  state.footnotes.autonumber = 0  state.footnotes.numbered   = {}  state.footnotes.labeled    = {}  state.footnotes.autolabel  = {}  state.footnotes.symbol     = {} -state.addme = {} +state.addme                = {}  do      local first_adornment = ""      local valid_adornment = P{          [1] = "adorncheck",          adorncheck  = V"check_first" * V"check_other"^1 * -P(1), - -    --  check_first = Cg(V"adornment_char", "first"),   -- This *should* work but but due to some heavenly -                                                        -- intervention the governing rules of the universe -                                                        -- have been altered so as to annoy everybody  -                                                        -- trying to deploy it. -          check_first = Cmt(V"adornment_char", function(_,_, first)                              first_adornment = first                              return true @@ -291,7 +283,7 @@ local parser = P{                          return true                      end)                    * (1 - V"eol")^1 * V"eol" -                    , +                  ,      fn_matchindent = Cmt(V"space"^1, function(s, i, indent)                          local tc = state.currentindent @@ -1222,69 +1214,58 @@ local parser = P{  --------------------------------------------------------------------------------  -- Urls  -------------------------------------------------------------------------------- -    uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0, +    uri             = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0, -    url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://", +    url_protocol    = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://",      url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation", -    url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, -    url_path_char = R("az", "AZ", "09") + S"-_.!~*'()", -    url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1, +    url_domain      = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, +    url_path_char   = R("az", "AZ", "09") + S"-_.!~*'()", +    url_path        = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1,  --------------------------------------------------------------------------------  -- Terminal Symbols and Low-Level Elements  -------------------------------------------------------------------------------- -    word = (1 - V"punctuation" - V"end_block" - V"spacing" - V"eol")^1, -- TODO : no punctuation (later) - -    asterisk = P"*", - -    bareia = P"`", -    double_bareia = V"bareia" * V"bareia", -    escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1, - -    slash = P"/", -    doubleslash = V"slash" * V"slash", - -    backslash = P"\\", -    bar = P"|", - -    groupchars = S"()[]{}", +    asterisk          = P"*", +    backslash         = P"\\", +    bar               = P"|", +    bareia            = P"`", +    slash             = P"/", +    solidus           = P"⁄", +    equals            = P"=",      --- Punctuation      -- Some of the following are used for markup as well as for punctuation. -    comma = P",", -    colon = P":", -    double_colon = V"colon" * V"colon", -    escaped_colon = V"backslash" * V"colon", -    dot = P".", -    period = V"dot", -    double_dot = V"dot" * V"dot", -    interpunct = P"·", -    underscore = P"_", +    apostrophe        = P"’" + P"'", +    comma             = P",", +    colon             = P":", +    dot               = P".", +    interpunct        = P"·", +    semicolon         = P";", +    underscore        = P"_", +    dash              = P"-", +    emdash            = P"—", +    hyphen            = P"‐", +    questionmark      = P"?", +    exclamationmark   = P"!", +    interrobang       = P"‽", +    lsquare           = P"[", +    rsquare           = P"]", +    ellipsis          = P"…" + P"...", +    guillemets        = P"«" + P"»", +    quotationmarks    = P"‘" + P"’" + P"“" + P"”", + +    period            = V"dot", +    double_dot        = V"dot" * V"dot", +    double_colon      = V"colon" * V"colon", +    escaped_colon     = V"backslash" * V"colon",      double_underscore = V"underscore" * V"underscore", -    dash = P"-", -    double_dash = V"dash" * V"dash", -    triple_dash = V"double_dash" * V"dash", -    emdash = P"—", -    attrib_dash = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks -    dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―", -    hyphen = P"‐", -    semicolon = P";", -    questionmark = P"?", -    exclamationmark = P"!", -    inverted_exclamationmark = P"¡", -    inverted_questionmark = P"¿", -    interrobang = P"‽", - -    apostrophe = P"’" + P"'", -    --brackets   = P"[ ], (",, { }, ⟨ ⟩ ) -    lsquare = P"[", -    rsquare = P"]", -    ellipsis   = P"…" + P"...", -    guillemets = P"«" + P"»", -    quotationmarks= P"‘" + P"’" + P"“" + P"”", -    solidus= P"⁄", +    double_dash       = V"dash" * V"dash", +    triple_dash       = V"double_dash" * V"dash", +    attrib_dash       = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks +    dashes            = V"dash" + P"‒" + P"–" + V"emdash" + P"―", +      punctuation = V"apostrophe" @@ -1306,54 +1287,40 @@ local parser = P{                  + V"underscore"                  , -    -- These are treated separately as the might begin a paragraph (sigh!). -    inverted_punctuation = V"inverted_exclamationmark" -                         + V"inverted_questionmark", -      -- End punctuation -    letter = R"az" + R"AZ", - -    equals = P"=", - -    space = P" ", -    spaces = V"space"^1, -    whitespace = (P" " + Cs(P"\t") / "        " + Cs(S"\v") / " "), -    spacing = V"whitespace"^1, -    blank_line = V"space"^0 * V"eol", +    letter       = R"az" + R"AZ", +    digit        = R"09", +                  +    space        = P" ", +    spaces       = V"space"^1, +    whitespace   = (P" " + Cs(P"\t") / "        " + Cs(S"\v") / " "), +    spacing      = V"whitespace"^1, +    blank_line   = V"whitespace"^0 * V"eol",      rest_of_line = (1 - V"eol")^1, -    eol = P"\n", -    eof = V"eol"^0 * -P(1), +    eol          = S"\r\n", +    eof          = V"eol"^0 * -P(1), -    end_block = V"blank_line"^1 -              + V"eof" -              + (V"whitespace"^0 * V"eol" -               * (V"whitespace"^0 * V"eol")^0 * V"eof") -              , +    end_block    = V"blank_line"^1 * V"eof"^-1 +                 + V"eof" +                 ,      -- diverse markup character sets -    delimiters = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space",   -- inline markup -    adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings -    bullet_char = S"*+-" + P"•" + P"‣" + P"⁃",                       -- bullet lists -    argument_char = V"double_dash" * V"dash" * V"slash",             -- option lists - -    digit = R"09", -    roman_numeral = S"ivxlcdm"^1, -    Roman_numeral = S"IVXLCDM"^1, +    adornment_char     = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings +    bullet_char        = S"*+-" + P"•" + P"‣" + P"⁃",                    -- bullet lists -    inline_delimiter = P"**" + P"``" + S"*`", -    angle_left       = P"<", -    angle_right      = P">", -    enclosed_open    = S[['"([{<]], -    enclosed_close   = S[['")]}>]], +    roman_numeral      = S"ivxlcdm"^1, +    Roman_numeral      = S"IVXLCDM"^1, -    gartenzaun = P"#", +    angle_left         = P"<", +    angle_right        = P">", +    gartenzaun         = P"#",      table_intersection = P"+", -    table_hline = V"dash", -    table_vline = V"bar", +    table_hline        = V"dash", +    table_vline        = V"bar",      table_header_hline = P"=",  }  | 
