#!/usr/bin/env texlua -------------------------------------------------------------------------------- -- FILE: rst-parser.lua -- USAGE: ./rst-parser.lua -- DESCRIPTION: -- OPTIONS: --- -- REQUIREMENTS: --- -- AUTHOR: Philipp Gesang (Phg), -- VERSION: 1.0 -- CREATED: 31/08/10 11:53:49 CEST -------------------------------------------------------------------------------- -- --require "lpeg" rst = require "rst_context" helpers = require "rst_helpers" local rst_debug = false local warn = function(str, ...) if not rst_debug then return false end local slen = #str + 3 str = "*["..str.."]" for i,j in ipairs({...}) do if 80 - i * 8 - slen < 0 then local indent = "" for i=1, slen do indent = indent .. " " end str = str .. "\n" .. indent end str = str .. string.format(" |%6s", string.strip(tostring(j))) end io.write(str .. " |\n") return 0 end local debugme = function(x) print ("HERE >"..x.."<") return x end local C, Cb, Cc, Cg, Cmt, Cp, Cs, Ct = lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct local P, R, S, V, match = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match local utf = unicode.utf8 local eol = P"\n" local tracklists = {} tracklists.depth = 0 tracklists.bullets = {} -- mapping bullet forms to depth tracklists.bullets.max = 0 tracklists.lastbullet = "" tracklists.roman_cache = {} -- storing roman numerals that were already converted tracklists.currentindent = "" -- used in definition lists and elsewhere tracklists.currentwidth = 0-- table layout local enclosed_mapping = { ["'"] = "'", ['"'] = '"', ["("] = ")", ["["] = "]", ["{"] = "}", ["<"] = ">", } local utfchar = P{ -- from l-lpeg.lua, modified to use as grammar [1] = "utfchar", utf8byte = R("\128\191"), utf8one = R("\000\127"), utf8two = R("\194\223") * V"utf8byte", utf8three = R("\224\239") * V"utf8byte" * V"utf8byte", utf8four = R("\240\244") * V"utf8byte" * V"utf8byte" * V"utf8byte", utfchar = V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four", } do local c = {} c.roman = S"ivxlcdm"^1 c.Roman = S"IVXLCDM"^1 c.alpha = R"az" - P"i" c.Alpha = R"AZ" - P"I" c.digit = R"09"^1 c.auto = P"#" local stripme = S" ()." local dontstrip = 1 - stripme local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0 local con = function (str) --print("This is it: >"..str.."<") str = itemstripper:match(str) for conv, pat in next, c do if pat:match(str) then return conv end end return false end tracklists.conversion = con local rnums = { i = 1, v = 5, x = 10, l = 50, c = 100, d = 500, m = 1000, } local function roman_to_arab (str) local n = 1 local curr, succ local max_three = { } local value = 0 while n <= #str do if curr and curr == max_three[#max_three] then if #max_three >= 3 then return "Not a number" else max_three[#max_three+1] = curr end else max_three = { curr } end curr = rnums[str:sub(n,n)] n = n + 1 succ = str:sub(n,n) if succ and succ ~= "" then succ = rnums[succ] if curr < succ then --n = n + 1 --value = value + succ - curr value = value - curr else value = value + curr end else value = value + curr end end return value end tracklists.roman_to_arab = roman_to_arab local suc = function (str, old) str, old = itemstripper:match(str), itemstripper:match(old) local n_str, n_old = tonumber(str), tonumber(old) if n_str and n_old then -- arabic numeral return n_str == n_old + 1 end local con_str, con_old = con(str), con(old) if con_str == "alpha" or con_str == "Alpha" then return str:byte() == old:byte() + 1 else -- “I'm a Roman!” - “A woman?” - “No, *Roman*! - Au!” - “So your father was a woman?” if not (str:lower() == str or str:upper() == str) then -- uneven cased --> fail return false end local trc = tracklists.roman_cache n_str = trc[str] or nil n_old = trc[old] or nil if not n_str then n_str = roman_to_arab(str:lower()) trc[str] = n_str end if not n_old then n_old = roman_to_arab(old:lower()) trc[old] = n_old end --print(n_str, n_old, n_str == n_old + 1 ) return n_str == n_old + 1 end end tracklists.successor = suc end local parser = P{ [1] = V"document", document = Cs(V"block"^1), -------------------------------------------------------------------------------- -- Blocks -------------------------------------------------------------------------------- block = V"target_block" + V"comment" + V"line_block" + Cs(V"table_block") / rst.escape + Cs(V"section") / rst.escape + Cs(V"transition") --/ rst.escape + V"literal_block" + Cs(V"block_quote") / rst.escape + Cs(V"list") / rst.escape + Cs(V"paragraph") / rst.escape , -------------------------------------------------------------------------------- -- Table block -------------------------------------------------------------------------------- table_block = V"grid_table" --+ V"simple_table" / rst.table , -------------------------------------------------------------------------------- -- Grid table -------------------------------------------------------------------------------- grid_table = Ct(V"gt_first_row" * V"gt_other_rows") * V"blank_line"^1 / function(tab) return rst.grid_table(helpers.table.create(tab)) end , gt_first_row = V"gt_setindent" * C(V"gt_sethorizontal") * V"eol" , --gt_setindent = Cg(V"space"^0, "tableindent"), gt_setindent = Cmt(V"space"^0, function(s, i, indent) warn("tab-i", true, #indent, "set", i) tracklists.currentindent = indent return true end) , gt_layoutmarkers = V"table_intersection" + V"table_hline" + V"table_header_hline", gt_sethorizontal = Cmt(V"gt_layoutmarkers"^3, function (s, i, width) warn("tab-h", "width", "true", #width, "set", i) tracklists.currentwidth = #width return true end) , gt_other_rows = V"gt_head"^-1 * V"gt_body" , --gt_matchindent = Cmt(V"space"^0 * Cb"tableindent", function (s, i, this, matchme) gt_matchindent = Cmt(V"space"^0, function (s, i, this) local matchme = tracklists.currentindent warn("tab-m", "indent", #this == #matchme, #this, #matchme, i) return #this == #matchme end) , gt_cell = (V"gt_content_cell" + V"gt_line_cell") * (V"table_intersection" + V"table_vline") , gt_content_cell = ((1 - V"table_vline" - V"table_intersection" - V"eol")^1), gt_line_cell = V"table_hline"^1, gt_contentrow = V"gt_matchindent" * C((V"table_intersection" + V"table_vline") * V"gt_cell"^1) * V"whitespace"^-1 * V"eol" , gt_body = ((V"gt_contentrow" - V"gt_bodysep")^1 * V"gt_bodysep")^1, --gt_row = (V"gt_contentrow" - V"gt_bodysep")^1 --* C(V"gt_bodysep") --, gt_bodysep = V"gt_matchindent" * C(Cmt(V"table_intersection" * (V"table_hline"^1 * V"table_intersection")^1, function(s, i, separator) local matchme = tracklists.currentwidth warn("tab-m", "body", #separator == matchme, #separator, matchme, i) return #separator == matchme end)) * V"whitespace"^-1 * V"eol" , gt_head = V"gt_contentrow"^1 * V"gt_headsep" , gt_headsep = V"gt_matchindent" * C(Cmt(V"table_intersection" * (V"table_header_hline"^1 * V"table_intersection")^1, function(s, i, separator) local matchme = tracklists.currentwidth warn("tab-s", "head", #separator == matchme, #separator, matchme, i) return #separator == matchme end)) * V"whitespace"^-1 * V"eol" , -------------------------------------------------------------------------------- -- Block quotes -------------------------------------------------------------------------------- block_quote = Ct(Cs(V"block_quote_first" * V"block_quote_other"^0 * (V"blank_line" * V"block_quote_other"^1)^0) * (V"blank_line" * Cs(V"block_quote_attri"))^-1) * V"blank_line" / rst.block_quote , block_quote_first = Cmt(V"space"^1, function (s, i, indent) warn("bkq-i", #indent, "", indent, "", i) tracklists.currentindent = indent return true end) / "" * -V"attrib_dash" * (1 - V"eol")^1 * V"eol" , block_quote_other = Cmt(V"space"^1, function (s, i, indent) warn("bkq-m", #indent, #tracklists.currentindent, indent, tracklists.currentindent, i) return tracklists.currentindent == indent end) / "" * -V"attrib_dash" * (1 - V"eol")^1 * V"eol" , block_quote_attri = V"block_quote_attri_first" * V"block_quote_attri_other"^0, block_quote_attri_first = Cmt(V"space"^1 * V"attrib_dash" * V"space", function (s, i, indent) local t = tracklists warn("bqa-i", utf.len(indent), #t.currentindent, indent, t.currentindent, i) local ret = indent:match(" *") == t.currentindent t.currentindent = ret and indent or t.currentindent return ret end) / "" * (1 - V"eol")^1 * V"eol" , block_quote_attri_other = Cmt(V"space"^1, function (s, i, indent) warn("bqa-m", #indent, utf.len(tracklists.currentindent), indent, tracklists.currentindent, i) return utf.len(tracklists.currentindent) == #indent end) / "" * (1 - V"eol")^1 * V"eol" , -------------------------------------------------------------------------------- -- Line blocks -------------------------------------------------------------------------------- line_block = Cs(V"line_block_first" * (V"line_block_other" + V"line_block_empty")^1) * V"blank_line" / rst.line_block , line_block_marker = V"space"^0 * V"bar" * V"space", line_block_empty_marker = V"space"^0 * V"bar" * V"space"^0 * V"eol", line_block_first = Cmt(V"line_block_marker", function(s, i, marker) warn("lbk-i", #marker, "", marker, "", i) tracklists.currentindent = marker return true end) / "" * V"line_block_line" , line_block_empty = Cmt(V"line_block_empty_marker", function(s, i, marker) warn("lbk-e", #marker, #tracklists.currentindent, marker, tracklists.currentindent, i) marker = marker:gsub("|.*", "| ") return tracklists.currentindent == marker end) / "" / rst.line_block_empty , line_block_other = Cmt(V"line_block_marker", function(s, i, marker) warn("lbk-m", #marker, #tracklists.currentindent, marker, tracklists.currentindent, i) return tracklists.currentindent == marker end) / "" * V"line_block_line" , line_block_line = Cs((1 - V"eol")^1 * V"line_block_cont"^0 * V"eol") / rst.line_block_line , line_block_cont = (V"eol" - V"line_block_marker") * Cmt(V"space"^1, function(s, i, spaces) warn("lbk-c", #spaces, #tracklists.currentindent, spaces, tracklists.currentindent, i) return #spaces >= #tracklists.currentindent end) / "" * (1 - V"eol")^1 , -------------------------------------------------------------------------------- -- Literal blocks -------------------------------------------------------------------------------- literal_block = V"unquoted_literal_block" + V"quoted_literal_block", literal_block = V"literal_block_marker" * Cs(V"literal_block_lines" * (V"blank_line"^1 * V"literal_block_lines")^0) * V"blank_line"^0 / rst.literal_block, literal_block_marker = V"double_colon" * V"eol" * V"blank_line", literal_block_lines = V"unquoted_literal_block_lines" + V"quoted_literal_block_lines", unquoted_literal_block_lines = V"literal_block_first" * (V"literal_block_other" - V"blank_line")^0, quoted_literal_block_lines = V"quoted_literal_block_first" * (V"quoted_literal_block_other" - V"blank_line")^0, literal_block_first = Cmt(V"space"^1, function (s, i, indent) warn("lbk-f", #indent, "", "", i) if not indent or indent == "" then return false end tracklists.currentindent = indent return true end) * V"rest_of_line" * V"eol", literal_block_other = Cmt(V"space"^1, function (s, i, indent) warn("lbk-m", #indent, #tracklists.currentindent, #indent >= #tracklists.currentindent, i) return #indent >= #tracklists.currentindent end) * V"rest_of_line" * V"eol", quoted_literal_block_first = Cmt(V"adornment_char", function (s, i, indent) warn("lbk-f", #indent, "", "", i) if not indent or indent == "" then return false end tracklists.currentindent = indent return true end) * V"rest_of_line" * V"eol", quoted_literal_block_other = Cmt(V"adornment_char", function (s, i, indent) warn("lbk-m", #indent, #tracklists.currentindent, #indent >= #tracklists.currentindent, i) return #indent >= #tracklists.currentindent end) * V"rest_of_line" * V"eol", -------------------------------------------------------------------------------- -- Lists -------------------------------------------------------------------------------- list = V"option_list" + V"definition_list" + V"bullet_list" + V"field_list" , -------------------------------------------------------------------------------- -- Option lists -------------------------------------------------------------------------------- option_list = Cs((V"option_list_item" * V"blank_line"^-1)^1) /rst.option_list, option_list_item = Ct(C(V"option_group") * Cs(V"option_description")) / rst.option_item, option_description = V"option_desc_next" + V"option_desc_more" + V"option_desc_single", option_desc_single = V"space"^2 --* V"rest_of_line" * (1 - V"eol")^1 * V"eol", option_desc_more = V"space"^2 --* V"rest_of_line" * (1 - V"eol")^1 * V"eol" * V"indented_lines" * (V"blank_line" * V"indented_lines")^0, option_desc_next = V"eol" * V"indented_lines" * (V"blank_line" * V"indented_lines")^0, option_group = V"option" * (V"comma" * V"space" * V"option")^0, option = (V"option_posixlong" + V"option_posixshort" + V"option_dos_vms") * V"option_arg"^-1, option_arg = (V"equals" + V"space") * ((V"letter" * (V"letter" + V"digit")^1) + (V"angle_left" * (1 - V"angle_right")^1 * V"angle_right")), option_posixshort = V"dash" * (V"letter" + V"digit"), option_posixlong = V"double_dash" * V"letter" * (V"letter" + V"digit" + V"dash")^1, option_dos_vms = V"slash" * V"letter"^1, -------------------------------------------------------------------------------- -- Field lists (for bibliographies etc.) -------------------------------------------------------------------------------- field_list = Cs(V"field"^1) * V"blank_line"^1 / rst.field_list, field = Ct(V"field_marker" * V"whitespace" * V"field_body") / rst.field, field_marker = V"colon" * C(V"field_name") * V"colon", field_name = (V"escaped_colon" + (1 - V"colon"))^1, field_body = C((1 - V"eol")^1 * V"eol" * V"indented_lines"^-1), -------------------------------------------------------------------------------- -- Definition lists -------------------------------------------------------------------------------- definition_list = Cs(V"definition_item" * (V"blank_line" * V"definition_item")^0) * V"blank_line" / rst.deflist , definition_item = Cs(V"definition_term" * V"definition_classifiers" * V"eol" * V"definition_def") / rst.deflist_item, definition_term = Cs((1 - V"eol" - V"definition_classifier_separator")^1) / rst.deflist_term, definition_classifier_separator = V"space" * V"colon" * V"space", definition_classifiers = V"definition_classifier"^0, definition_classifier = V"definition_classifier_separator" * Cs((1 - V"eol" - V"definition_classifier_separator")^1) / rst.deflist_classifier, definition_def = Cs(V"definition_firstpar" * V"definition_par"^0) / rst.deflist_def, definition_indent = Cmt(V"space"^1, function(s, i, indent) warn("def-i", #indent, #tracklists.currentindent, indent == tracklists.currentindent, i) tracklists.currentindent = indent return true end), definition_firstpar = Cs(V"definition_parinit" * (V"definition_parline" - V"blank_line")^0) / rst.paragraph, definition_par = V"blank_line" * Cs((V"definition_parline" - V"blank_line")^1) / rst.paragraph, definition_parinit = V"definition_indent" * (1 - V"eol")^1 * V"eol" , definition_parline = V"definition_match" * (1 - V"eol")^1 * V"eol" , definition_match = Cmt(V"space"^1, function (s, i, this) warn("def-m", #this, #tracklists.currentindent, this == tracklists.currentindent, i) return this == tracklists.currentindent end), -------------------------------------------------------------------------------- -- Bullet lists and enumerations -------------------------------------------------------------------------------- -- the next rule handles enumerations as well bullet_list = V"bullet_init" --* (V"bullet_list" --+ V"bullet_continue")^0 * (V"bullet_continue" + V"bullet_list")^0 * V"bullet_stop" * Cmt(Cc(nil), function (s, i) local t = tracklists warn("close", t.depth) t.bullets[t.depth] = nil -- “pop” t.depth = t.depth - 1 return true end), --bullet_stop =V"blank_line" * Cs(Cc("")) / rst.stopitemize, bullet_stop =V"endpar" * Cs(Cc("")) / rst.stopitemize, bullet_init = V"eol"^0 * V"bullet_first" * V"bullet_itemrest", bullet_first = #Cmt(V"bullet_indent", function (s, i, bullet) local t = tracklists local oldbullet = t.bullets[t.depth] local n_spaces = match(P" "^0, bullet) warn("first", t.depth, (t.depth == 0 and n_spaces == 1) or (t.depth > 0 and n_spaces > 1), bullet, oldbullet, t.conversion(bullet)) if t.depth == 0 and n_spaces == 1 then -- first level t.depth = 1 -- “push” t.bullets[1] = bullet t.lastbullet = bullet t.bullets.max = t.bullets.max < t.depth and t.depth or t.bullets.max return true elseif t.depth > 0 and n_spaces > 1 then -- sublist (of sublist)^0 if n_spaces >= utf.len(oldbullet) then t.depth = t.depth + 1 t.bullets[t.depth] = bullet t.lastbullet = bullet t.bullets.max = t.bullets.max < t.depth and t.depth or t.bullets.max return true end end return false end) --* V"bullet_indent" / rst.startitemize, * Cs(V"bullet_indent") / rst.startitemize, bullet_indent = V"space"^0 * V"bullet_expr" * V"space"^1, bullet_cont = Cmt(V"bullet_indent", function (s, i, bullet) local t = tracklists warn("conti", t.depth, bullet == t.bullets[t.depth], bullet, t.bullets[t.depth], t.conversion(t.lastbullet), t.conversion(bullet) ) if utf.len(t.bullets[t.depth]) ~= utf.len(bullet) then return false elseif not t.conversion(bullet) and t.bullets[t.depth] == bullet then return true elseif t.conversion(t.lastbullet) == t.conversion(bullet) then -- same type return t.conversion(bullet) == "auto" or t.successor(bullet, t.lastbullet) end --return false return t.bullets[t.depth] == bullet end) / "", -- ^^^^^ -- otherwise returns the value of V"bullet_indent", not sure why … bullet_continue = V"blank_line" * V"bullet_cont" * V"bullet_itemrest", bullet_itemrest = Cs(V"bullet_rest" -- first line * ((V"bullet_match" * V"bullet_rest")^0 -- any successive lines --* (V"eol" * (V"blank_line" * (V"bullet_match" * (V"bullet_rest" - V"bullet_indent"))^1)^0)) / rst.bullet_item, -- ^^^^^^^^^^^^^ -- otherwise matches bullet_first bullet_rest = Cs((1 - V"eol")^1 * V"eol"), -- rest of one line bullet_next = V"space"^1, bullet_match = #Cmt(V"bullet_next", function (s, i, this) local t = tracklists warn("match", t.depth, string.len(this) == utf.len(t.bullets[t.depth]), utf.len(t.bullets[t.depth]), string.len(this) ) return string.len(this) == utf.len(t.bullets[t.depth]) end), bullet_expr = V"bullet_char" + (P"(" * V"number_char" * P")") + (V"number_char" * P")") + (V"number_char" * V"dot") * #V"space" + (V"number_char" * #V"space") , number_char = V"roman_numeral" + V"Roman_numeral" + P"#" + V"digit"^1 + R"AZ" + R"az", -------------------------------------------------------------------------------- -- Transitions -------------------------------------------------------------------------------- transition_line = C(V"adornment_char"^4), transition = V"eol"^0 * V"transition_line" * V"endpar" /rst.transition, -------------------------------------------------------------------------------- -- Sectioning -------------------------------------------------------------------------------- section_adorn = C(V"adornment_char"^1) * V"space"^0 * V"eol", -- The whitespace handling after the overline is necessary because headings -- without overline aren't allowed to be indented. section = V"blank_line"^0 * (V"section_adorn" * V"whitespace"^0)^-1 * C((1 - V"whitespace") * (1 - V"eol")^1) * V"eol" * V"section_adorn" * V"eol"^-1 / rst.section, -- validity checking done by the formatter. Now, if -- this ain't lazy then I don't know … -------------------------------------------------------------------------------- -- Target Blocks -------------------------------------------------------------------------------- tname_normal = C((V"escaped_colon" + 1 - V"colon")^1) * V"colon", tname_bareia = C(V"bareia" * (1 - V"eol" - V"bareia")^1 * V"bareia") * V"colon", target_name = V"double_dot" * V"space" * V"underscore" * (V"tname_bareia" + V"tname_normal"), target_firstindent = V"eol" * Cg(V"space"^1, "indent"), target_nextindent = V"eol" * C(V"space"^1), target_indentmatch = Cmt(V"target_nextindent" -- I ♡ LPEG! * Cb("indent"), function (s, i, a, b) return a == b end), target_link = ( V"space"^0 * V"target_firstindent" -- * C((1 - V"eol")^1) * V"eol") * Ct(C(1 - V"whitespace" - V"eol")^1 * (V"target_indentmatch" * C(1 - V"whitespace" - V"eol")^1)^0) * V"eol" * #(1 - V"whitespace" - "eol")) / rst.joinindented + C((1 - V"eol")^1) * V"eol" * #(V"double_dot" + V"eol") + (1 - V"endpar")^0 * Cc("make me constant!"), target = Ct((V"target_name" * (V"space"^0 * V"eol" * V"target_name")^0) * V"space"^0 * V"target_link") / rst.target, anonymous_prefix = (V"double_dot" * V"space" * V"double_underscore" * V"colon") + (V"double_underscore"), anonymous_target = V"anonymous_prefix" * V"space"^0 * Ct(Cc"" * V"target_link") / rst.target, target_block = (V"anonymous_target" + V"target")^1 * V"endpar", -------------------------------------------------------------------------------- -- Paragraphs * Inline Markup -------------------------------------------------------------------------------- paragraph = V"par_setindent" * Ct(C((1 - V"eol")^1) * V"eol" * (V"par_matchindent" * C((1 - V"eol")^1) * V"eol")^0) * V"blank_line"^1 --* V"endpar" / rst.paragraph, par_setindent = Cmt(V"space"^0, function (s, i, indent) warn("par-i", #indent, "", "", i) tracklists.currentindent = indent return true end), par_matchindent = Cmt(V"space"^0, function (s, i, indent) warn("par-m", tracklists.currentindent == indent, #indent, #tracklists.currentindent, i) return tracklists.currentindent == indent end), link_standalone = C(V"uri") / rst.link_standalone, reference = Cs(V"_reference") / rst.reference, _reference = (1 - V"underscore" - V"spacing" - V"eol" - V"punctuation" - V"groupchars")^1 * V"underscore", included_literal_block = V"literal_block_shorthand" * V"literal_block_markerless", literal_block_shorthand = ((V"colon" * V"space"^1)^-1 * V"double_colon") / ":" * (V"eol" * V"blank_line" / "") , literal_block_markerless = Cs(V"literal_block_lines" * (V"blank_line"^1 * V"literal_block_lines")^0) * V"blank_line"^0 / rst.literal_block, -------------------------------------------------------------------------------- -- Comments -------------------------------------------------------------------------------- comment = V"block_comment" + V"line_comment", block_comment = V"double_dot" * V"whitespace"^0 * V"eol" * Cs(V"indented_lines") * V"eol"^0 --* V"blank_line"^0 / rst.block_comment, line_comment = V"double_dot" * V"whitespace"^0 * Cs((1 - V"eol")^0 * V"eol") / rst.line_comment, -------------------------------------------------------------------------------- -- Generic indented block -------------------------------------------------------------------------------- indented_lines = V"indented_first" * (V"indented_other" - V"blank_line" - V"field_marker")^0, indented_first = Cmt(V"space"^1, function (s, i, indent) warn("idt-f", indent, i) if not indent or indent == "" then return false end tracklists.currentindent = indent return true end) * (1 - V"eol")^1 * V"eol", indented_other = Cmt(V"space"^1, function (s, i, indent) warn("idt-m", indent, tracklists.currentindent, indent == tracklists.currentindent, i) return indent == tracklists.currentindent end) * (1 - V"eol")^1 * V"eol", -------------------------------------------------------------------------------- -- Urls -------------------------------------------------------------------------------- uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0, url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://", url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation", url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, url_path_char = R("az", "AZ", "09") + S"-_.!~*'()", url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1, -------------------------------------------------------------------------------- -- Terminal Symbols and Low-Level Elements -------------------------------------------------------------------------------- word = (1 - V"punctuation" - V"endpar" - V"spacing" - V"eol")^1, -- TODO : no punctuation (later) asterisk = P"*", double_asterisk = V"asterisk" * V"asterisk", bareia = P"`", double_bareia = V"bareia" * V"bareia", escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1, slash = P"/", doubleslash = V"slash" * V"slash", backslash = P"\\", bar = P"|", groupchars = S"()[]{}", --- Punctuation -- Some of the following are used for markup as well as for punctuation. comma = P",", colon = P":", double_colon = V"colon" * V"colon", escaped_colon = V"backslash" * V"colon", dot = P".", period = V"dot", double_dot = V"dot" * V"dot", interpunct = P"·", underscore = P"_", double_underscore = V"underscore" * V"underscore", dash = P"-", double_dash = V"dash" * V"dash", triple_dash = V"double_dash" * V"dash", emdash = P"—", attrib_dash = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―", hyphen = P"‐", semicolon = P";", questionmark = P"?", exclamationmark = P"!", inverted_exclamationmark = P"¡", inverted_questionmark = P"¿", interrobang = P"‽", apostrophe = P"’" + P"'", --brackets = P"[ ], (",, { }, ⟨ ⟩ ) ellipsis = P"…" + P"...", guillemets = P"«" + P"»", quotationmarks= P"‘" + P"’" + P"“" + P"”", solidus= P"⁄", punctuation = V"apostrophe" + V"colon" + V"comma" + V"dashes" + V"dot" + V"ellipsis" + V"exclamationmark" + V"guillemets" + V"hyphen" + V"interpunct" + V"interrobang" + V"questionmark" + V"quotationmarks" + V"semicolon" + V"slash" + V"solidus" + V"underscore" , -- These are treated separately as the might begin a paragraph (sigh!). inverted_punctuation = V"inverted_exclamationmark" + V"inverted_questionmark", -- End punctuation letter = R"az" + R"AZ", equals = P"=", space = P" ", spaces = V"space"^1, whitespace = (P" " + Cs(P"\t") / " " + Cs(S"\v") / " "), spacing = V"whitespace"^1, blank_line = V"space"^0 * V"eol", rest_of_line = (1 - V"eol")^1, eol = P"\n", eof = V"eol"^0 * -P(1), endpar = V"eol" * (V"blank_line"^1 + V"eof"), -- diverse markup character sets delimiters = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space", -- inline markup adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings bullet_char = S"*+-" + P"•" + P"‣" + P"⁃", -- bullet lists argument_char = V"double_dash" * V"dash" * V"slash", -- option lists digit = R"09", roman_numeral = S"ivxlcdm"^1, Roman_numeral = S"IVXLCDM"^1, inline_delimiter = P"**" + P"``" + S"*`", angle_left = P"<", angle_right = P">", enclosed_open = S[['"([{<]], enclosed_close = S[['")]}>]], table_intersection = P"+", table_hline = V"dash", table_vline = V"bar", table_header_hline = P"=", } f = io.open("inlinefmt.rst", "r") testdata = f:read("*all") f:close() print(parser:match(testdata)) --print(">>>Last used char>: " ..tracklists.lastbullet.." <<<<") --print(">>>Max list nestin>: "..tracklists.bullets.max .." <<<<") --for i,j in next, rst.collected_references do --print (string.format("== %7s => %s <=", i,j)) --end --parser:print()