From 76678cc480be2f36824abed06e893ec321343393 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sun, 11 Sep 2011 15:25:11 +0200 Subject: parser code cleanup --- mod/tex/context/third/rst/rst_parser.lua | 181 +++++++++++++------------------ 1 file changed, 74 insertions(+), 107 deletions(-) (limited to 'mod/tex/context') diff --git a/mod/tex/context/third/rst/rst_parser.lua b/mod/tex/context/third/rst/rst_parser.lua index 77d07ef..9735635 100644 --- a/mod/tex/context/third/rst/rst_parser.lua +++ b/mod/tex/context/third/rst/rst_parser.lua @@ -68,41 +68,33 @@ local P, R, S, V, match local utf = unicode.utf8 -local eol = P"\n" - -state = {} -state.depth = 0 -state.bullets = {} -- mapping bullet forms to depth -state.bullets.max = 0 -state.lastbullet = "" -state.lastbullets = {} -state.roman_cache = {} -- storing roman numerals that were already converted -state.currentindent = "" -- used in definition lists and elsewhere -state.previousindent = "" -- for literal blocks included in paragraphs to restore the paragraph indent +state = {} +state.depth = 0 +state.bullets = {} -- mapping bullet forms to depth +state.bullets.max = 0 +state.lastbullet = "" +state.lastbullets = {} +state.roman_cache = {} -- storing roman numerals that were already converted +state.currentindent = "" -- used in definition lists and elsewhere +state.previousindent = "" -- for literal blocks included in paragraphs to restore the paragraph indent state.currentwidth = 0 -- table layout state.currentlayout = {} -- table layout state.previousadorn = nil -- section underlining and overlining -state.footnotes = {} +state.footnotes = {} state.footnotes.autonumber = 0 state.footnotes.numbered = {} state.footnotes.labeled = {} state.footnotes.autolabel = {} state.footnotes.symbol = {} -state.addme = {} +state.addme = {} do local first_adornment = "" local valid_adornment = P{ [1] = "adorncheck", adorncheck = V"check_first" * V"check_other"^1 * -P(1), - - -- check_first = Cg(V"adornment_char", "first"), -- This *should* work but but due to some heavenly - -- intervention the governing rules of the universe - -- have been altered so as to annoy everybody - -- trying to deploy it. - check_first = Cmt(V"adornment_char", function(_,_, first) first_adornment = first return true @@ -291,7 +283,7 @@ local parser = P{ return true end) * (1 - V"eol")^1 * V"eol" - , + , fn_matchindent = Cmt(V"space"^1, function(s, i, indent) local tc = state.currentindent @@ -1222,69 +1214,58 @@ local parser = P{ -------------------------------------------------------------------------------- -- Urls -------------------------------------------------------------------------------- - uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0, + uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0, - url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://", + url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://", url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation", - url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, - url_path_char = R("az", "AZ", "09") + S"-_.!~*'()", - url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1, + url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, + url_path_char = R("az", "AZ", "09") + S"-_.!~*'()", + url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1, -------------------------------------------------------------------------------- -- Terminal Symbols and Low-Level Elements -------------------------------------------------------------------------------- - word = (1 - V"punctuation" - V"end_block" - V"spacing" - V"eol")^1, -- TODO : no punctuation (later) - - asterisk = P"*", - - bareia = P"`", - double_bareia = V"bareia" * V"bareia", - escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1, - - slash = P"/", - doubleslash = V"slash" * V"slash", - - backslash = P"\\", - bar = P"|", - - groupchars = S"()[]{}", + asterisk = P"*", + backslash = P"\\", + bar = P"|", + bareia = P"`", + slash = P"/", + solidus = P"⁄", + equals = P"=", --- Punctuation -- Some of the following are used for markup as well as for punctuation. - comma = P",", - colon = P":", - double_colon = V"colon" * V"colon", - escaped_colon = V"backslash" * V"colon", - dot = P".", - period = V"dot", - double_dot = V"dot" * V"dot", - interpunct = P"·", - underscore = P"_", + apostrophe = P"’" + P"'", + comma = P",", + colon = P":", + dot = P".", + interpunct = P"·", + semicolon = P";", + underscore = P"_", + dash = P"-", + emdash = P"—", + hyphen = P"‐", + questionmark = P"?", + exclamationmark = P"!", + interrobang = P"‽", + lsquare = P"[", + rsquare = P"]", + ellipsis = P"…" + P"...", + guillemets = P"«" + P"»", + quotationmarks = P"‘" + P"’" + P"“" + P"”", + + period = V"dot", + double_dot = V"dot" * V"dot", + double_colon = V"colon" * V"colon", + escaped_colon = V"backslash" * V"colon", double_underscore = V"underscore" * V"underscore", - dash = P"-", - double_dash = V"dash" * V"dash", - triple_dash = V"double_dash" * V"dash", - emdash = P"—", - attrib_dash = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks - dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―", - hyphen = P"‐", - semicolon = P";", - questionmark = P"?", - exclamationmark = P"!", - inverted_exclamationmark = P"¡", - inverted_questionmark = P"¿", - interrobang = P"‽", - - apostrophe = P"’" + P"'", - --brackets = P"[ ], (",, { }, ⟨ ⟩ ) - lsquare = P"[", - rsquare = P"]", - ellipsis = P"…" + P"...", - guillemets = P"«" + P"»", - quotationmarks= P"‘" + P"’" + P"“" + P"”", - solidus= P"⁄", + double_dash = V"dash" * V"dash", + triple_dash = V"double_dash" * V"dash", + attrib_dash = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks + dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―", + punctuation = V"apostrophe" @@ -1306,54 +1287,40 @@ local parser = P{ + V"underscore" , - -- These are treated separately as the might begin a paragraph (sigh!). - inverted_punctuation = V"inverted_exclamationmark" - + V"inverted_questionmark", - -- End punctuation - letter = R"az" + R"AZ", - - equals = P"=", - - space = P" ", - spaces = V"space"^1, - whitespace = (P" " + Cs(P"\t") / " " + Cs(S"\v") / " "), - spacing = V"whitespace"^1, - blank_line = V"space"^0 * V"eol", + letter = R"az" + R"AZ", + digit = R"09", + + space = P" ", + spaces = V"space"^1, + whitespace = (P" " + Cs(P"\t") / " " + Cs(S"\v") / " "), + spacing = V"whitespace"^1, + blank_line = V"whitespace"^0 * V"eol", rest_of_line = (1 - V"eol")^1, - eol = P"\n", - eof = V"eol"^0 * -P(1), + eol = S"\r\n", + eof = V"eol"^0 * -P(1), - end_block = V"blank_line"^1 - + V"eof" - + (V"whitespace"^0 * V"eol" - * (V"whitespace"^0 * V"eol")^0 * V"eof") - , + end_block = V"blank_line"^1 * V"eof"^-1 + + V"eof" + , -- diverse markup character sets - delimiters = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space", -- inline markup - adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings - bullet_char = S"*+-" + P"•" + P"‣" + P"⁃", -- bullet lists - argument_char = V"double_dash" * V"dash" * V"slash", -- option lists - - digit = R"09", - roman_numeral = S"ivxlcdm"^1, - Roman_numeral = S"IVXLCDM"^1, + adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings + bullet_char = S"*+-" + P"•" + P"‣" + P"⁃", -- bullet lists - inline_delimiter = P"**" + P"``" + S"*`", - angle_left = P"<", - angle_right = P">", - enclosed_open = S[['"([{<]], - enclosed_close = S[['")]}>]], + roman_numeral = S"ivxlcdm"^1, + Roman_numeral = S"IVXLCDM"^1, - gartenzaun = P"#", + angle_left = P"<", + angle_right = P">", + gartenzaun = P"#", table_intersection = P"+", - table_hline = V"dash", - table_vline = V"bar", + table_hline = V"dash", + table_vline = V"bar", table_header_hline = P"=", } -- cgit v1.2.3