summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rst_context.lua185
-rw-r--r--rst_parser.lua112
2 files changed, 190 insertions, 107 deletions
diff --git a/rst_context.lua b/rst_context.lua
index 13e4522..033c252 100644
--- a/rst_context.lua
+++ b/rst_context.lua
@@ -13,6 +13,8 @@
require "lpeg"
+require "rst_helpers"
+
local C, Cb, Cc, Cg, Cmt, Cp, Cs, Ct, P, R, S, V, match = lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match
if not context then -- standard context lpeg stripper from l-string.lua
@@ -27,8 +29,9 @@ if not context then -- standard context lpeg stripper from l-string.lua
end
end
-
local rst_context = {}
+
+
rst_context.collected_references = {}
rst_context.collected_adornments = {}
rst_context.last_section_level = 0
@@ -52,17 +55,6 @@ function rst_context.strong_emphasis (str)
return [[{\\sc ]] .. str .. [[}]]
end
-function rst_context.paragraph (str)
- -- ugly as hell and probably slow too, but the alternative would be lots of
- -- concatenation
- return string.format([[
-
-\\startparagraph
-%s
-\\stopparagraph
-]], str)
-end
-
function rst_context.literal (str)
str = str:gsub([[\]], [[\\]]) -- evade escaping of backslashes
return [[\\type{]] .. str .. [[}]]
@@ -145,6 +137,159 @@ function rst_context.joinindented (tab)
return table.concat (tab, "")
end
+local inline_parser = P{
+ [1] = "block",
+
+ block = Cs((V"inline_element" + 1)^1),
+
+
+ inline_element = Cs(V"precede_inline"
+ * (V"strong_emphasis"
+ + V"emphasis"
+ + V"inline_literal"
+ + V"interpreted_text"
+-- + V"inline_internal_target" -- TODO
+ + V"reference"
+-- + V"footnote_reference" -- TODO
+-- + V"substitution_reference" -- TODO
+ + V"link_standalone")
+ * V"succede_inline"),
+
+ space = P" ",
+ whitespace = (P" " + Cs(P"\t") / " " + Cs(S"\v") / " "),
+ spacing = V"whitespace"^1,
+
+ eol = P"\n",
+ inline_delimiters = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space", -- inline markup
+ --inline_delimiter = P"**" + P"``" + S"*`",
+ asterisk = P"*",
+ double_asterisk = V"asterisk" * V"asterisk",
+ bareia = P"`",
+ backslash = P"\\",
+ bar = P"|",
+ double_bareia = V"bareia" * V"bareia",
+ escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1,
+ colon = P":",
+ semicolon = P";",
+ underscore = P"_",
+ double_underscore = V"underscore" * V"underscore",
+ dot = P".",
+ interpunct = P"·",
+ comma = P",",
+ dash = P"-",
+ emdash = P"—",
+ ellipsis = P"…" + P"...",
+ exclamationmark = P"!",
+ questionmark = P"?",
+ interrobang = P"‽",
+ double_dash = V"dash" * V"dash",
+ triple_dash = V"double_dash" * V"dash",
+ hyphen = P"‐",
+ dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―",
+ letter = R"az" + R"AZ",
+ groupchars = S"()[]{}",
+ apostrophe = P"’" + P"'",
+
+ guillemets = P"«" + P"»",
+ quotationmarks= P"‘" + P"’" + P"“" + P"”",
+ solidus= P"⁄",
+ slash = P"/",
+
+ punctuation = V"apostrophe"
+ + V"colon"
+ + V"comma"
+ + V"dashes"
+ + V"dot"
+ + V"ellipsis"
+ + V"exclamationmark"
+ + V"guillemets"
+ + V"hyphen"
+ + V"interpunct"
+ + V"interrobang"
+ + V"questionmark"
+ + V"quotationmarks"
+ + V"semicolon"
+ + V"slash"
+ + V"solidus"
+ + V"underscore"
+ ,
+
+ precede_inline = V"spacing"
+ + V"eol"
+ + S[['"([{<-/:]]
+ + P"‘" + P"“" + P"’" + P"«" + P"¡" + P"¿"
+ + V"inline_delimiters"
+ + P"„", -- not in standard Murkin reST
+
+ succede_inline = V"spacing"
+ + S[['")]}>-/:.,;!?\]]
+ + P"’" + P"”" + P"»"
+ + V"inline_delimiters"
+ + P"“", -- non-standard again but who cares
+
+ emphasis = (V"asterisk" - V"double_asterisk")
+ * Cs((1 - V"spacing" - V"eol" - V"asterisk")
+ * ((1 - (1 * V"asterisk"))^0
+ * (1 - V"spacing" - V"eol" - V"asterisk"))^-1)
+ * V"asterisk"
+ / rst_context.emphasis,
+
+ strong_emphasis = V"double_asterisk"
+ * Cs((1 - V"spacing" - V"eol" - V"asterisk")
+ * ((1 - (1 * V"double_asterisk"))^0
+ * (1 - V"spacing" - V"eol" - V"asterisk"))^-1)
+ * V"double_asterisk"
+ / rst_context.strong_emphasis,
+
+ inline_literal = V"double_bareia"
+ * C ((V"escaped_bareia" - V"spacing" - V"eol" - V"bareia")
+ * ((V"escaped_bareia" - (1 * V"double_bareia"))^0
+ * (V"escaped_bareia" - V"spacing" - V"eol" - V"bareia"))^-1)
+ * V"double_bareia"
+ / rst_context.literal,
+
+ interpreted_text = C(V"role_marker"^-1)
+ * (V"bareia" - V"double_bareia")
+ * C ((1 - V"spacing" - V"eol" - V"bareia")
+ * ((1 - (1 * V"bareia"))^0
+ * (1 - V"spacing" - V"eol" - V"bareia"))^-1)
+ * V"bareia"
+ * C(V"role_marker"^-1)
+ / rst_context.interpreted_text,
+
+ role_marker = V"colon" * (V"letter" + V"dash" + V"underscore" + V"dot")^1 * V"colon",
+
+ link_standalone = C(V"uri")
+ / rst_context.link_standalone,
+
+ reference = Cs(V"_reference")
+ / rst_context.reference,
+
+ _reference = (1 - V"underscore" - V"spacing" - V"eol" - V"punctuation" - V"groupchars")^1 * V"underscore",
+
+--------------------------------------------------------------------------------
+-- Urls
+--------------------------------------------------------------------------------
+ uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0,
+
+ url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://",
+ url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation",
+ url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0,
+ url_path_char = R("az", "AZ", "09") + S"-_.!~*'()",
+ url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1,
+}
+
+function rst_context.paragraph (tab)
+ local str = inline_parser:match(table.concat(tab, " "))
+ print(inline_parser:match(table.concat(tab, " ")))
+ return string.format([[
+
+\\startparagraph
+%s
+\\stopparagraph
+]], str)
+end
+
local sectionlevels = {
[1] = "chapter",
[2] = "section",
@@ -350,7 +495,7 @@ function rst_context.field (tab)
\\fieldname{%s}
\\fieldbody{%s}
\\stopfield
-]], name, body)
+]], name, inline_parser:match(body))
end
function rst_context.line_comment (str)
@@ -379,7 +524,7 @@ function rst_context.option_list (str)
\\eTR
\\eTABLEhead
\\bTABLEbody
-]] .. str .. [[
+]] .. inline_parser:match(str) .. [[
\\eTABLEbody
\\eTABLE
@@ -422,7 +567,7 @@ function rst_context.line_block (str)
return [[
\\startlines
-]] .. str .. [[\\stoplines
+]] .. inline_parser:match(str) .. [[\\stoplines
]]
end
@@ -443,7 +588,7 @@ function rst_context.block_quote (tab)
\\startlinecorrection
\\startblockquote
-]] .. tab[1] .. [[
+]] .. inline_parser:match(tab[1]) .. [[
\\stopblockquote
]]
@@ -498,7 +643,7 @@ function rst_context.grid_table (tab)
\\eTABLEbody
\\eTABLE
]]
- local test = ""
+ local body = ""
for i,r in ipairs(tab.rows) do
local isempty = true
for n, cell in ipairs(r) do
@@ -513,7 +658,7 @@ function rst_context.grid_table (tab)
for n,c in ipairs(r) do
if not (c.parent or
c.variant == "separator") then
- local celltext = c.stripped
+ local celltext = inline_parser:match(c.stripped)
if c.span.x or c.span.y then
local span_exp = "["
if c.span.x then
@@ -529,10 +674,10 @@ function rst_context.grid_table (tab)
row = row .. "\n " .. [[\\bTC ]] .. celltext .. [[\\eTC]]
end
end
- test = test .. row .. "\n" .. [[\\eTR]] .. "\n"
+ body = body .. row .. "\n" .. [[\\eTR]] .. "\n"
end
end
- return head .. test .. tail
+ return head .. body .. tail
end
function rst_context.table_row (tab)
diff --git a/rst_parser.lua b/rst_parser.lua
index 768f689..d68645f 100644
--- a/rst_parser.lua
+++ b/rst_parser.lua
@@ -225,7 +225,6 @@ local parser = P{
grid_table = Ct(V"gt_first_row"
* V"gt_other_rows")
* V"blank_line"^1
- --/ rst.grid_table
/ function(tab)
return rst.grid_table(helpers.table.create(tab))
end
@@ -330,7 +329,7 @@ local parser = P{
return true
end) / ""
* -V"attrib_dash"
- * V"text_element"^1
+ * (1 - V"eol")^1
* V"eol"
,
@@ -340,7 +339,7 @@ local parser = P{
return tracklists.currentindent == indent
end) / ""
* -V"attrib_dash"
- * V"text_element"^1
+ * (1 - V"eol")^1
* V"eol"
,
@@ -355,7 +354,7 @@ local parser = P{
t.currentindent = ret and indent or t.currentindent
return ret
end) / ""
- * V"text_element"^1
+ * (1 - V"eol")^1
* V"eol"
,
@@ -364,7 +363,7 @@ local parser = P{
indent, tracklists.currentindent, i)
return utf.len(tracklists.currentindent) == #indent
end) / ""
- * V"text_element"^1
+ * (1 - V"eol")^1
* V"eol"
,
@@ -407,7 +406,7 @@ local parser = P{
* V"line_block_line"
,
- line_block_line = Cs(V"text_element"^1
+ line_block_line = Cs((1 - V"eol")^1
* V"line_block_cont"^0
* V"eol")
/ rst.line_block_line
@@ -418,7 +417,7 @@ local parser = P{
warn("lbk-c", #spaces, #tracklists.currentindent, spaces, tracklists.currentindent, i)
return #spaces >= #tracklists.currentindent
end) / ""
- * V"text_element"^1
+ * (1 - V"eol")^1
,
--------------------------------------------------------------------------------
@@ -521,12 +520,12 @@ local parser = P{
option_desc_single = V"space"^2
--* V"rest_of_line"
- * V"text_element"^1
+ * (1 - V"eol")^1
* V"eol",
option_desc_more = V"space"^2
--* V"rest_of_line"
- * V"text_element"^1
+ * (1 - V"eol")^1
* V"eol"
* V"indented_lines"
* (V"blank_line" * V"indented_lines")^0,
@@ -575,7 +574,7 @@ local parser = P{
field_name = (V"escaped_colon" + (1 - V"colon"))^1,
- field_body = C(V"text_element"^1 * V"eol"
+ field_body = C((1 - V"eol")^1 * V"eol"
* V"indented_lines"^-1),
--------------------------------------------------------------------------------
@@ -837,84 +836,23 @@ local parser = P{
-- Paragraphs * Inline Markup
--------------------------------------------------------------------------------
- --paragraph = -(V"double_dot" + V"double_underscore") -- + V"bullet_indent")
- paragraph = -V"punctuation"
- * Cs((V"text_element" + (V"eol" - V"endpar"))^1)
- * V"endpar"
+ paragraph = V"par_setindent"
+ * Ct(C((1 - V"eol")^1) * V"eol"
+ * (V"par_matchindent" * C((1 - V"eol")^1) * V"eol")^0)
+ * V"blank_line"^1
+ --* V"endpar"
/ rst.paragraph,
- text_element = V"included_literal_block"
- + V"enclosed_inline"
- + V"inline_elements"
- + V"word"
- + V"punctuation"
- + V"spacing"
- ,
+ par_setindent = Cmt(V"space"^0, function (s, i, indent)
+ warn("par-i", #indent, "", "", i)
+ tracklists.currentindent = indent
+ return true
+ end),
- -- Ignore single occurences of inline markup delimiters in certain
- -- environments.
- enclosed_inline = Cg(V"enclosed_open", "opener")
- * V"inline_delimiter"
- * Cmt(C(V"enclosed_close") * Cb("opener"), function(i, p, closer, opener)
- return closer == enclosed_mapping[opener]
- end),
-
- precede_inline = V"spacing"
- + V"eol"
- + S[['"([{<-/:]]
- + P"‘" + P"“" + P"’" + P"«" + P"¡" + P"¿"
- + V"delimiters"
- + P"„", -- not in standard Murkin reST
-
- succede_inline = V"spacing"
- + S[['")]}>-/:.,;!?\]]
- + P"’" + P"”" + P"»"
- + V"delimiters"
- + P"“", -- non-standard again but who cares
-
- inline_elements = Cs(V"precede_inline"
- * (V"strong_emphasis"
- + V"emphasis"
- + V"inline_literal"
- + V"interpreted_text"
--- + V"inline_internal_target" -- TODO
- + V"reference"
--- + V"footnote_reference" -- TODO
--- + V"substitution_reference" -- TODO
- + V"link_standalone")
- * V"succede_inline"),
-
- emphasis = (V"asterisk" - V"double_asterisk")
- * Cs((1 - V"spacing" - V"eol" - V"asterisk")
- * ((1 - (1 * V"asterisk"))^0
- * (1 - V"spacing" - V"eol" - V"asterisk"))^-1) -- looks like lisp
- * V"asterisk"
- / rst.emphasis,
-
- strong_emphasis = V"double_asterisk"
- * Cs((1 - V"spacing" - V"eol" - V"asterisk")
- * ((1 - (1 * V"double_asterisk"))^0
- * (1 - V"spacing" - V"eol" - V"asterisk"))^-1)
- * V"double_asterisk"
- / rst.strong_emphasis,
-
- inline_literal = V"double_bareia"
- * C ((V"escaped_bareia" - V"spacing" - V"eol" - V"bareia")
- * ((V"escaped_bareia" - (1 * V"double_bareia"))^0
- * (V"escaped_bareia" - V"spacing" - V"eol" - V"bareia"))^-1)
- * V"double_bareia"
- / rst.literal,
-
- interpreted_text = C(V"role_marker"^-1)
- * (V"bareia" - V"double_bareia")
- * C ((1 - V"spacing" - V"eol" - V"bareia")
- * ((1 - (1 * V"bareia"))^0
- * (1 - V"spacing" - V"eol" - V"bareia"))^-1)
- * V"bareia"
- * C(V"role_marker"^-1)
- / rst.interpreted_text,
-
- role_marker = V"colon" * (V"letter" + V"dash" + V"underscore" + V"dot")^1 * V"colon",
+ par_matchindent = Cmt(V"space"^0, function (s, i, indent)
+ warn("par-m", tracklists.currentindent == indent, #indent, #tracklists.currentindent, i)
+ return tracklists.currentindent == indent
+ end),
link_standalone = C(V"uri")
/ rst.link_standalone,
@@ -970,14 +908,14 @@ local parser = P{
tracklists.currentindent = indent
return true
end)
- * V"text_element"^1
+ * (1 - V"eol")^1
* V"eol",
indented_other = Cmt(V"space"^1, function (s, i, indent)
warn("idt-m", indent, tracklists.currentindent, indent == tracklists.currentindent, i)
return indent == tracklists.currentindent
end)
- * V"text_element"^1
+ * (1 - V"eol")^1
* V"eol",
--------------------------------------------------------------------------------