From d0253ecefdd3acac717e6215d46b17977ac28319 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 9 Sep 2010 00:58:24 +0200 Subject: moved inline elements formatting to formatter --- rst_context.lua | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++------ rst_parser.lua | 112 ++++++++-------------------------- 2 files changed, 190 insertions(+), 107 deletions(-) diff --git a/rst_context.lua b/rst_context.lua index 13e4522..033c252 100644 --- a/rst_context.lua +++ b/rst_context.lua @@ -13,6 +13,8 @@ require "lpeg" +require "rst_helpers" + local C, Cb, Cc, Cg, Cmt, Cp, Cs, Ct, P, R, S, V, match = lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match if not context then -- standard context lpeg stripper from l-string.lua @@ -27,8 +29,9 @@ if not context then -- standard context lpeg stripper from l-string.lua end end - local rst_context = {} + + rst_context.collected_references = {} rst_context.collected_adornments = {} rst_context.last_section_level = 0 @@ -52,17 +55,6 @@ function rst_context.strong_emphasis (str) return [[{\\sc ]] .. str .. [[}]] end -function rst_context.paragraph (str) - -- ugly as hell and probably slow too, but the alternative would be lots of - -- concatenation - return string.format([[ - -\\startparagraph -%s -\\stopparagraph -]], str) -end - function rst_context.literal (str) str = str:gsub([[\]], [[\\]]) -- evade escaping of backslashes return [[\\type{]] .. str .. [[}]] @@ -145,6 +137,159 @@ function rst_context.joinindented (tab) return table.concat (tab, "") end +local inline_parser = P{ + [1] = "block", + + block = Cs((V"inline_element" + 1)^1), + + + inline_element = Cs(V"precede_inline" + * (V"strong_emphasis" + + V"emphasis" + + V"inline_literal" + + V"interpreted_text" +-- + V"inline_internal_target" -- TODO + + V"reference" +-- + V"footnote_reference" -- TODO +-- + V"substitution_reference" -- TODO + + V"link_standalone") + * V"succede_inline"), + + space = P" ", + whitespace = (P" " + Cs(P"\t") / " " + Cs(S"\v") / " "), + spacing = V"whitespace"^1, + + eol = P"\n", + inline_delimiters = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space", -- inline markup + --inline_delimiter = P"**" + P"``" + S"*`", + asterisk = P"*", + double_asterisk = V"asterisk" * V"asterisk", + bareia = P"`", + backslash = P"\\", + bar = P"|", + double_bareia = V"bareia" * V"bareia", + escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1, + colon = P":", + semicolon = P";", + underscore = P"_", + double_underscore = V"underscore" * V"underscore", + dot = P".", + interpunct = P"·", + comma = P",", + dash = P"-", + emdash = P"—", + ellipsis = P"…" + P"...", + exclamationmark = P"!", + questionmark = P"?", + interrobang = P"‽", + double_dash = V"dash" * V"dash", + triple_dash = V"double_dash" * V"dash", + hyphen = P"‐", + dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―", + letter = R"az" + R"AZ", + groupchars = S"()[]{}", + apostrophe = P"’" + P"'", + + guillemets = P"«" + P"»", + quotationmarks= P"‘" + P"’" + P"“" + P"”", + solidus= P"⁄", + slash = P"/", + + punctuation = V"apostrophe" + + V"colon" + + V"comma" + + V"dashes" + + V"dot" + + V"ellipsis" + + V"exclamationmark" + + V"guillemets" + + V"hyphen" + + V"interpunct" + + V"interrobang" + + V"questionmark" + + V"quotationmarks" + + V"semicolon" + + V"slash" + + V"solidus" + + V"underscore" + , + + precede_inline = V"spacing" + + V"eol" + + S[['"([{<-/:]] + + P"‘" + P"“" + P"’" + P"«" + P"¡" + P"¿" + + V"inline_delimiters" + + P"„", -- not in standard Murkin reST + + succede_inline = V"spacing" + + S[['")]}>-/:.,;!?\]] + + P"’" + P"”" + P"»" + + V"inline_delimiters" + + P"“", -- non-standard again but who cares + + emphasis = (V"asterisk" - V"double_asterisk") + * Cs((1 - V"spacing" - V"eol" - V"asterisk") + * ((1 - (1 * V"asterisk"))^0 + * (1 - V"spacing" - V"eol" - V"asterisk"))^-1) + * V"asterisk" + / rst_context.emphasis, + + strong_emphasis = V"double_asterisk" + * Cs((1 - V"spacing" - V"eol" - V"asterisk") + * ((1 - (1 * V"double_asterisk"))^0 + * (1 - V"spacing" - V"eol" - V"asterisk"))^-1) + * V"double_asterisk" + / rst_context.strong_emphasis, + + inline_literal = V"double_bareia" + * C ((V"escaped_bareia" - V"spacing" - V"eol" - V"bareia") + * ((V"escaped_bareia" - (1 * V"double_bareia"))^0 + * (V"escaped_bareia" - V"spacing" - V"eol" - V"bareia"))^-1) + * V"double_bareia" + / rst_context.literal, + + interpreted_text = C(V"role_marker"^-1) + * (V"bareia" - V"double_bareia") + * C ((1 - V"spacing" - V"eol" - V"bareia") + * ((1 - (1 * V"bareia"))^0 + * (1 - V"spacing" - V"eol" - V"bareia"))^-1) + * V"bareia" + * C(V"role_marker"^-1) + / rst_context.interpreted_text, + + role_marker = V"colon" * (V"letter" + V"dash" + V"underscore" + V"dot")^1 * V"colon", + + link_standalone = C(V"uri") + / rst_context.link_standalone, + + reference = Cs(V"_reference") + / rst_context.reference, + + _reference = (1 - V"underscore" - V"spacing" - V"eol" - V"punctuation" - V"groupchars")^1 * V"underscore", + +-------------------------------------------------------------------------------- +-- Urls +-------------------------------------------------------------------------------- + uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0, + + url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://", + url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation", + url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, + url_path_char = R("az", "AZ", "09") + S"-_.!~*'()", + url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1, +} + +function rst_context.paragraph (tab) + local str = inline_parser:match(table.concat(tab, " ")) + print(inline_parser:match(table.concat(tab, " "))) + return string.format([[ + +\\startparagraph +%s +\\stopparagraph +]], str) +end + local sectionlevels = { [1] = "chapter", [2] = "section", @@ -350,7 +495,7 @@ function rst_context.field (tab) \\fieldname{%s} \\fieldbody{%s} \\stopfield -]], name, body) +]], name, inline_parser:match(body)) end function rst_context.line_comment (str) @@ -379,7 +524,7 @@ function rst_context.option_list (str) \\eTR \\eTABLEhead \\bTABLEbody -]] .. str .. [[ +]] .. inline_parser:match(str) .. [[ \\eTABLEbody \\eTABLE @@ -422,7 +567,7 @@ function rst_context.line_block (str) return [[ \\startlines -]] .. str .. [[\\stoplines +]] .. inline_parser:match(str) .. [[\\stoplines ]] end @@ -443,7 +588,7 @@ function rst_context.block_quote (tab) \\startlinecorrection \\startblockquote -]] .. tab[1] .. [[ +]] .. inline_parser:match(tab[1]) .. [[ \\stopblockquote ]] @@ -498,7 +643,7 @@ function rst_context.grid_table (tab) \\eTABLEbody \\eTABLE ]] - local test = "" + local body = "" for i,r in ipairs(tab.rows) do local isempty = true for n, cell in ipairs(r) do @@ -513,7 +658,7 @@ function rst_context.grid_table (tab) for n,c in ipairs(r) do if not (c.parent or c.variant == "separator") then - local celltext = c.stripped + local celltext = inline_parser:match(c.stripped) if c.span.x or c.span.y then local span_exp = "[" if c.span.x then @@ -529,10 +674,10 @@ function rst_context.grid_table (tab) row = row .. "\n " .. [[\\bTC ]] .. celltext .. [[\\eTC]] end end - test = test .. row .. "\n" .. [[\\eTR]] .. "\n" + body = body .. row .. "\n" .. [[\\eTR]] .. "\n" end end - return head .. test .. tail + return head .. body .. tail end function rst_context.table_row (tab) diff --git a/rst_parser.lua b/rst_parser.lua index 768f689..d68645f 100644 --- a/rst_parser.lua +++ b/rst_parser.lua @@ -225,7 +225,6 @@ local parser = P{ grid_table = Ct(V"gt_first_row" * V"gt_other_rows") * V"blank_line"^1 - --/ rst.grid_table / function(tab) return rst.grid_table(helpers.table.create(tab)) end @@ -330,7 +329,7 @@ local parser = P{ return true end) / "" * -V"attrib_dash" - * V"text_element"^1 + * (1 - V"eol")^1 * V"eol" , @@ -340,7 +339,7 @@ local parser = P{ return tracklists.currentindent == indent end) / "" * -V"attrib_dash" - * V"text_element"^1 + * (1 - V"eol")^1 * V"eol" , @@ -355,7 +354,7 @@ local parser = P{ t.currentindent = ret and indent or t.currentindent return ret end) / "" - * V"text_element"^1 + * (1 - V"eol")^1 * V"eol" , @@ -364,7 +363,7 @@ local parser = P{ indent, tracklists.currentindent, i) return utf.len(tracklists.currentindent) == #indent end) / "" - * V"text_element"^1 + * (1 - V"eol")^1 * V"eol" , @@ -407,7 +406,7 @@ local parser = P{ * V"line_block_line" , - line_block_line = Cs(V"text_element"^1 + line_block_line = Cs((1 - V"eol")^1 * V"line_block_cont"^0 * V"eol") / rst.line_block_line @@ -418,7 +417,7 @@ local parser = P{ warn("lbk-c", #spaces, #tracklists.currentindent, spaces, tracklists.currentindent, i) return #spaces >= #tracklists.currentindent end) / "" - * V"text_element"^1 + * (1 - V"eol")^1 , -------------------------------------------------------------------------------- @@ -521,12 +520,12 @@ local parser = P{ option_desc_single = V"space"^2 --* V"rest_of_line" - * V"text_element"^1 + * (1 - V"eol")^1 * V"eol", option_desc_more = V"space"^2 --* V"rest_of_line" - * V"text_element"^1 + * (1 - V"eol")^1 * V"eol" * V"indented_lines" * (V"blank_line" * V"indented_lines")^0, @@ -575,7 +574,7 @@ local parser = P{ field_name = (V"escaped_colon" + (1 - V"colon"))^1, - field_body = C(V"text_element"^1 * V"eol" + field_body = C((1 - V"eol")^1 * V"eol" * V"indented_lines"^-1), -------------------------------------------------------------------------------- @@ -837,84 +836,23 @@ local parser = P{ -- Paragraphs * Inline Markup -------------------------------------------------------------------------------- - --paragraph = -(V"double_dot" + V"double_underscore") -- + V"bullet_indent") - paragraph = -V"punctuation" - * Cs((V"text_element" + (V"eol" - V"endpar"))^1) - * V"endpar" + paragraph = V"par_setindent" + * Ct(C((1 - V"eol")^1) * V"eol" + * (V"par_matchindent" * C((1 - V"eol")^1) * V"eol")^0) + * V"blank_line"^1 + --* V"endpar" / rst.paragraph, - text_element = V"included_literal_block" - + V"enclosed_inline" - + V"inline_elements" - + V"word" - + V"punctuation" - + V"spacing" - , + par_setindent = Cmt(V"space"^0, function (s, i, indent) + warn("par-i", #indent, "", "", i) + tracklists.currentindent = indent + return true + end), - -- Ignore single occurences of inline markup delimiters in certain - -- environments. - enclosed_inline = Cg(V"enclosed_open", "opener") - * V"inline_delimiter" - * Cmt(C(V"enclosed_close") * Cb("opener"), function(i, p, closer, opener) - return closer == enclosed_mapping[opener] - end), - - precede_inline = V"spacing" - + V"eol" - + S[['"([{<-/:]] - + P"‘" + P"“" + P"’" + P"«" + P"¡" + P"¿" - + V"delimiters" - + P"„", -- not in standard Murkin reST - - succede_inline = V"spacing" - + S[['")]}>-/:.,;!?\]] - + P"’" + P"”" + P"»" - + V"delimiters" - + P"“", -- non-standard again but who cares - - inline_elements = Cs(V"precede_inline" - * (V"strong_emphasis" - + V"emphasis" - + V"inline_literal" - + V"interpreted_text" --- + V"inline_internal_target" -- TODO - + V"reference" --- + V"footnote_reference" -- TODO --- + V"substitution_reference" -- TODO - + V"link_standalone") - * V"succede_inline"), - - emphasis = (V"asterisk" - V"double_asterisk") - * Cs((1 - V"spacing" - V"eol" - V"asterisk") - * ((1 - (1 * V"asterisk"))^0 - * (1 - V"spacing" - V"eol" - V"asterisk"))^-1) -- looks like lisp - * V"asterisk" - / rst.emphasis, - - strong_emphasis = V"double_asterisk" - * Cs((1 - V"spacing" - V"eol" - V"asterisk") - * ((1 - (1 * V"double_asterisk"))^0 - * (1 - V"spacing" - V"eol" - V"asterisk"))^-1) - * V"double_asterisk" - / rst.strong_emphasis, - - inline_literal = V"double_bareia" - * C ((V"escaped_bareia" - V"spacing" - V"eol" - V"bareia") - * ((V"escaped_bareia" - (1 * V"double_bareia"))^0 - * (V"escaped_bareia" - V"spacing" - V"eol" - V"bareia"))^-1) - * V"double_bareia" - / rst.literal, - - interpreted_text = C(V"role_marker"^-1) - * (V"bareia" - V"double_bareia") - * C ((1 - V"spacing" - V"eol" - V"bareia") - * ((1 - (1 * V"bareia"))^0 - * (1 - V"spacing" - V"eol" - V"bareia"))^-1) - * V"bareia" - * C(V"role_marker"^-1) - / rst.interpreted_text, - - role_marker = V"colon" * (V"letter" + V"dash" + V"underscore" + V"dot")^1 * V"colon", + par_matchindent = Cmt(V"space"^0, function (s, i, indent) + warn("par-m", tracklists.currentindent == indent, #indent, #tracklists.currentindent, i) + return tracklists.currentindent == indent + end), link_standalone = C(V"uri") / rst.link_standalone, @@ -970,14 +908,14 @@ local parser = P{ tracklists.currentindent = indent return true end) - * V"text_element"^1 + * (1 - V"eol")^1 * V"eol", indented_other = Cmt(V"space"^1, function (s, i, indent) warn("idt-m", indent, tracklists.currentindent, indent == tracklists.currentindent, i) return indent == tracklists.currentindent end) - * V"text_element"^1 + * (1 - V"eol")^1 * V"eol", -------------------------------------------------------------------------------- -- cgit v1.2.3