summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2011-09-11 15:25:11 +0200
committerPhilipp Gesang <pgesang@ix.urz.uni-heidelberg.de>2011-09-11 15:25:11 +0200
commit76678cc480be2f36824abed06e893ec321343393 (patch)
tree39022de469820f0249d31d033291971744b57faf
parent4e0e3ac9de16f0cfd60f6666fd942e74a104b7d5 (diff)
downloadcontext-rst-76678cc480be2f36824abed06e893ec321343393.tar.gz
parser code cleanup
-rw-r--r--mod/tex/context/third/rst/rst_parser.lua181
1 files changed, 74 insertions, 107 deletions
diff --git a/mod/tex/context/third/rst/rst_parser.lua b/mod/tex/context/third/rst/rst_parser.lua
index 77d07ef..9735635 100644
--- a/mod/tex/context/third/rst/rst_parser.lua
+++ b/mod/tex/context/third/rst/rst_parser.lua
@@ -68,41 +68,33 @@ local P, R, S, V, match
local utf = unicode.utf8
-local eol = P"\n"
-
-state = {}
-state.depth = 0
-state.bullets = {} -- mapping bullet forms to depth
-state.bullets.max = 0
-state.lastbullet = ""
-state.lastbullets = {}
-state.roman_cache = {} -- storing roman numerals that were already converted
-state.currentindent = "" -- used in definition lists and elsewhere
-state.previousindent = "" -- for literal blocks included in paragraphs to restore the paragraph indent
+state = {}
+state.depth = 0
+state.bullets = {} -- mapping bullet forms to depth
+state.bullets.max = 0
+state.lastbullet = ""
+state.lastbullets = {}
+state.roman_cache = {} -- storing roman numerals that were already converted
+state.currentindent = "" -- used in definition lists and elsewhere
+state.previousindent = "" -- for literal blocks included in paragraphs to restore the paragraph indent
state.currentwidth = 0 -- table layout
state.currentlayout = {} -- table layout
state.previousadorn = nil -- section underlining and overlining
-state.footnotes = {}
+state.footnotes = {}
state.footnotes.autonumber = 0
state.footnotes.numbered = {}
state.footnotes.labeled = {}
state.footnotes.autolabel = {}
state.footnotes.symbol = {}
-state.addme = {}
+state.addme = {}
do
local first_adornment = ""
local valid_adornment = P{
[1] = "adorncheck",
adorncheck = V"check_first" * V"check_other"^1 * -P(1),
-
- -- check_first = Cg(V"adornment_char", "first"), -- This *should* work but but due to some heavenly
- -- intervention the governing rules of the universe
- -- have been altered so as to annoy everybody
- -- trying to deploy it.
-
check_first = Cmt(V"adornment_char", function(_,_, first)
first_adornment = first
return true
@@ -291,7 +283,7 @@ local parser = P{
return true
end)
* (1 - V"eol")^1 * V"eol"
- ,
+ ,
fn_matchindent = Cmt(V"space"^1, function(s, i, indent)
local tc = state.currentindent
@@ -1222,69 +1214,58 @@ local parser = P{
--------------------------------------------------------------------------------
-- Urls
--------------------------------------------------------------------------------
- uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0,
+ uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0,
- url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://",
+ url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://",
url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation",
- url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0,
- url_path_char = R("az", "AZ", "09") + S"-_.!~*'()",
- url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1,
+ url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0,
+ url_path_char = R("az", "AZ", "09") + S"-_.!~*'()",
+ url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1,
--------------------------------------------------------------------------------
-- Terminal Symbols and Low-Level Elements
--------------------------------------------------------------------------------
- word = (1 - V"punctuation" - V"end_block" - V"spacing" - V"eol")^1, -- TODO : no punctuation (later)
-
- asterisk = P"*",
-
- bareia = P"`",
- double_bareia = V"bareia" * V"bareia",
- escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1,
-
- slash = P"/",
- doubleslash = V"slash" * V"slash",
-
- backslash = P"\\",
- bar = P"|",
-
- groupchars = S"()[]{}",
+ asterisk = P"*",
+ backslash = P"\\",
+ bar = P"|",
+ bareia = P"`",
+ slash = P"/",
+ solidus = P"⁄",
+ equals = P"=",
--- Punctuation
-- Some of the following are used for markup as well as for punctuation.
- comma = P",",
- colon = P":",
- double_colon = V"colon" * V"colon",
- escaped_colon = V"backslash" * V"colon",
- dot = P".",
- period = V"dot",
- double_dot = V"dot" * V"dot",
- interpunct = P"·",
- underscore = P"_",
+ apostrophe = P"’" + P"'",
+ comma = P",",
+ colon = P":",
+ dot = P".",
+ interpunct = P"·",
+ semicolon = P";",
+ underscore = P"_",
+ dash = P"-",
+ emdash = P"—",
+ hyphen = P"‐",
+ questionmark = P"?",
+ exclamationmark = P"!",
+ interrobang = P"‽",
+ lsquare = P"[",
+ rsquare = P"]",
+ ellipsis = P"…" + P"...",
+ guillemets = P"«" + P"»",
+ quotationmarks = P"‘" + P"’" + P"“" + P"”",
+
+ period = V"dot",
+ double_dot = V"dot" * V"dot",
+ double_colon = V"colon" * V"colon",
+ escaped_colon = V"backslash" * V"colon",
double_underscore = V"underscore" * V"underscore",
- dash = P"-",
- double_dash = V"dash" * V"dash",
- triple_dash = V"double_dash" * V"dash",
- emdash = P"—",
- attrib_dash = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks
- dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―",
- hyphen = P"‐",
- semicolon = P";",
- questionmark = P"?",
- exclamationmark = P"!",
- inverted_exclamationmark = P"¡",
- inverted_questionmark = P"¿",
- interrobang = P"‽",
-
- apostrophe = P"’" + P"'",
- --brackets = P"[ ], (",, { }, ⟨ ⟩ )
- lsquare = P"[",
- rsquare = P"]",
- ellipsis = P"…" + P"...",
- guillemets = P"«" + P"»",
- quotationmarks= P"‘" + P"’" + P"“" + P"”",
- solidus= P"⁄",
+ double_dash = V"dash" * V"dash",
+ triple_dash = V"double_dash" * V"dash",
+ attrib_dash = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks
+ dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―",
+
punctuation = V"apostrophe"
@@ -1306,54 +1287,40 @@ local parser = P{
+ V"underscore"
,
- -- These are treated separately as the might begin a paragraph (sigh!).
- inverted_punctuation = V"inverted_exclamationmark"
- + V"inverted_questionmark",
-
-- End punctuation
- letter = R"az" + R"AZ",
-
- equals = P"=",
-
- space = P" ",
- spaces = V"space"^1,
- whitespace = (P" " + Cs(P"\t") / " " + Cs(S"\v") / " "),
- spacing = V"whitespace"^1,
- blank_line = V"space"^0 * V"eol",
+ letter = R"az" + R"AZ",
+ digit = R"09",
+
+ space = P" ",
+ spaces = V"space"^1,
+ whitespace = (P" " + Cs(P"\t") / " " + Cs(S"\v") / " "),
+ spacing = V"whitespace"^1,
+ blank_line = V"whitespace"^0 * V"eol",
rest_of_line = (1 - V"eol")^1,
- eol = P"\n",
- eof = V"eol"^0 * -P(1),
+ eol = S"\r\n",
+ eof = V"eol"^0 * -P(1),
- end_block = V"blank_line"^1
- + V"eof"
- + (V"whitespace"^0 * V"eol"
- * (V"whitespace"^0 * V"eol")^0 * V"eof")
- ,
+ end_block = V"blank_line"^1 * V"eof"^-1
+ + V"eof"
+ ,
-- diverse markup character sets
- delimiters = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space", -- inline markup
- adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings
- bullet_char = S"*+-" + P"•" + P"‣" + P"⁃", -- bullet lists
- argument_char = V"double_dash" * V"dash" * V"slash", -- option lists
-
- digit = R"09",
- roman_numeral = S"ivxlcdm"^1,
- Roman_numeral = S"IVXLCDM"^1,
+ adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings
+ bullet_char = S"*+-" + P"•" + P"‣" + P"⁃", -- bullet lists
- inline_delimiter = P"**" + P"``" + S"*`",
- angle_left = P"<",
- angle_right = P">",
- enclosed_open = S[['"([{<]],
- enclosed_close = S[['")]}>]],
+ roman_numeral = S"ivxlcdm"^1,
+ Roman_numeral = S"IVXLCDM"^1,
- gartenzaun = P"#",
+ angle_left = P"<",
+ angle_right = P">",
+ gartenzaun = P"#",
table_intersection = P"+",
- table_hline = V"dash",
- table_vline = V"bar",
+ table_hline = V"dash",
+ table_vline = V"bar",
table_header_hline = P"=",
}