#!/usr/bin/env texlua -------------------------------------------------------------------------------- -- FILE: rst-parser.lua -- USAGE: ./rst-parser.lua -- DESCRIPTION: -- OPTIONS: --- -- REQUIREMENTS: --- -- AUTHOR: Philipp Gesang (Phg), -- VERSION: 1.0 -- CREATED: 31/08/10 11:53:49 CEST -------------------------------------------------------------------------------- -- --require "lpeg" rst = require "rst_context" helpers = require "rst_helpers" local rst_debug = true local warn = function(str, ...) if not rst_debug then return false end local slen = #str + 3 str = "*["..str.."]" for i,j in ipairs({...}) do if 80 - i * 8 - slen < 0 then local indent = "" for i=1, slen do indent = indent .. " " end str = str .. "\n" .. indent end str = str .. string.format(" |%6s", string.strip(tostring(j))) end io.write(str .. " |\n") return 0 end local debugme = function(x) print ("HERE >"..x.."<") return x end local C, Cb, Cc, Cg, Cmt, Cp, Cs, Ct = lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct local P, R, S, V, match = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match local utf = unicode.utf8 local eol = P"\n" state = {} state.depth = 0 state.bullets = {} -- mapping bullet forms to depth state.bullets.max = 0 state.lastbullet = "" state.lastbullets = {} state.roman_cache = {} -- storing roman numerals that were already converted state.currentindent = "" -- used in definition lists and elsewhere state.currentwidth = 0 -- table layout state.currentlayout = {} -- table layout state.footnotes = {} state.footnotes.autonumber = 0 state.footnotes.numbered = {} state.footnotes.labeled = {} state.footnotes.autolabel = {} state.footnotes.symbol = {} state.addme = {} local enclosed_mapping = { ["'"] = "'", ['"'] = '"', ["("] = ")", ["["] = "]", ["{"] = "}", ["<"] = ">", } local utfchar = P{ -- from l-lpeg.lua, modified to use as grammar [1] = "utfchar", utf8byte = R("\128\191"), utf8one = R("\000\127"), utf8two = R("\194\223") * V"utf8byte", utf8three = R("\224\239") * V"utf8byte" * V"utf8byte", utf8four = R("\240\244") * V"utf8byte" * V"utf8byte" * V"utf8byte", utfchar = V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four", } local parser = P{ [1] = V"document", document = V"blank_line"^0 * Cs(V"block"^1), -------------------------------------------------------------------------------- -- Blocks -------------------------------------------------------------------------------- block = V"target_block" + Cs(V"list") / rst.escape + V"comment" + V"line_block" + Cs(V"table_block") / rst.escape + Cs(V"section") / rst.escape + Cs(V"transition") --/ rst.escape + V"literal_block" + Cs(V"block_quote") / rst.escape + V"explicit_markup" + Cs(V"paragraph") / rst.escape , -------------------------------------------------------------------------------- -- Explicit markup block -------------------------------------------------------------------------------- --explicit_markup = V"double_dot" * V"whitespace" --* V"explicit_body" --* V"whitespace" --, explicit_markup_start = V"double_dot" * V"whitespace", explicit_markup = V"footnote_block" --+ V"hyperlink_target" --+ V"directive" --+ V"substitution_definition" --+ V"comment block" , -------------------------------------------------------------------------------- -- Explicit markup hyperlink target -------------------------------------------------------------------------------- --hyperlink_target_block = V"hyperlink_target" --* (V"blank_line"^-1 * V"hyperlink_target")^0 --* V"end_block" --, --hyperlink_target = V"hl_anonymous" ----+ V"hl_named" --, --hl_anonymous = V"hl_anonymous_start" --* C(V"link_block") --, --hl_anonymous_start = (V"double_underscore" --+ V"double_dot" * V"space" * V"double_underscore" * V"colon") --* V"whitespace" --, --link_block = -------------------------------------------------------------------------------- -- Explicit markup footnote block -------------------------------------------------------------------------------- footnote_block = V"footnote"^1 * V"end_block", footnote = V"explicit_markup_start" * (V"footnote_marker" + V"citation_reference_label") * C(V"footnote_content") * (V"blank_line" - V"end_block")^-1 / rst.footnote , footnote_marker = V"lsquare" * C(V"footnote_label") * V"rsquare" * V"whitespace"^0 , citation_reference_label = V"lsquare" * C(V"letter" * (1 - V"rsquare")^1) * V"rsquare" * V"whitespace"^0, footnote_label = V"digit"^1 + (V"gartenzaun" * V"letter"^1) + V"gartenzaun" + V"asterisk" , footnote_content = V"footnote_long" -- single line + V"footnote_simple" , footnote_simple = (1 - V"eol")^1 * V"eol" , footnote_long = (1 - V"eol")^1 * V"eol" * V"footnote_body" , footnote_body = V"fn_body_first" * (V"fn_body_other" + V"fn_body_other_block")^0 , fn_body_first = Cmt(V"space"^1, function(s, i, indent) warn("fn-in", true, #indent) state.currentindent = indent return true end) * (1 - V"eol")^1 * V"eol" , fn_matchindent = Cmt(V"space"^1, function(s, i, indent) local tc = state.currentindent warn("fn-ma", tc == indent, #tc, #indent, i) return tc == indent end) , fn_body_other = V"fn_body_other_regular" * (V"blank_line" * V"fn_body_other_regular")^0 , fn_body_other_regular = V"fn_matchindent" * (1 - V"eol")^1 * V"eol" , -- TODO find a way to get those to work in footnotes! fn_body_other_block = V"line_block" + V"table_block" + V"transition" + V"block_quote" + V"list" , -------------------------------------------------------------------------------- -- Table block -------------------------------------------------------------------------------- table_block = V"simple_table" + V"grid_table" , -------------------------------------------------------------------------------- -- Simple tables -------------------------------------------------------------------------------- simple_table = Ct(V"st_first_row" * V"st_other_rows") * V"end_block" / function (tab) return rst.simple_table(helpers.table.simple(tab)) end , st_first_row = V"st_setindent" * C(V"st_setlayout") * V"space"^0 * V"eol" , st_setindent = Cmt(V"space"^0, function(s, i, indent) warn("sta-i", "true", #indent, "set", i) state.currentindent = indent return true end) , st_matchindent = Cmt(V"space"^0, function(s, i, indent) warn("sta-m", state.currentindent == indent, #indent, #state.currentindent, i) return state.currentindent == indent end) , st_setlayout = Cmt((V"equals"^1) * (V"spaces" * V"equals"^1)^1, function(s, i, layout) local tc = state.currentlayout warn("sta-l", #layout, "set", "", i) tc.raw = layout tc.bounds = help.get_st_boundaries(layout) return true end) , st_other_rows = (V"st_content"^1 * V"st_separator")^1, st_content = V"blank_line"^-1 * C(V"st_matchlayout"), st_matchlayout = -#V"st_separator" * Cmt((1 - V"eol")^1, function (s, i, content) -- Don't check for matching indent but if the rest is -- fine then the line should be sane. This allows -- cells starting with spaces. content = content:sub(#state.currentindent) local tcb = state.currentlayout.bounds local n = 1 local spaces_only = P" "^1 while n < #tcb.slices do local from = tcb.slices[n] .stop local to = tcb.slices[n+1].start --print(n, from, to, content) local between = spaces_only:match(content, from) if not between then -- Cell spanning more than one row. -- pass warn("sta-c", "span", from, to, i) elseif not (between >= to) then warn("sta-c", "false", from, to, i) return false end n = n + 1 end warn("sta-c", "true", #tcb.slices, "", i) return true end) * V"eol" , st_separator = V"st_matchindent" * C(V"st_normal_sep" + V"st_colspan_sep") * V"eol" , st_normal_sep = Cmt((V"equals"^1) * (V"spaces" * V"equals"^1)^1, function(s, i, layout) warn("sta-s", state.currentlayout.raw == layout, #layout, #state.currentlayout.raw, i) return state.currentlayout.raw == layout end) , st_colspan_sep = Cmt(V"dash"^1 * (V"spaces" * V"dash"^1)^0, function(s, i, layout) local tcb = state.currentlayout.bounds local this = help.get_st_boundaries (layout) local start_valid = false for start, _ in next, this.starts do if tcb.starts[start] then start_valid = true local stop_valid = false for stop, _ in next, this.stops do if tcb.stops[stop] then -- bingo stop_valid = true end end if not stop_valid then warn("sta-x", stop_valid, #layout, #state.currentlayout.raw, i) return false end end end warn("sta-x", start_valid, #layout, #state.currentlayout.raw, i) return start_valid end) , -------------------------------------------------------------------------------- -- Grid tables -------------------------------------------------------------------------------- grid_table = Ct(V"gt_first_row" * V"gt_other_rows") * V"blank_line"^1 / function(tab) return rst.grid_table(helpers.table.create(tab)) end , gt_first_row = V"gt_setindent" * C(V"gt_sethorizontal") * V"eol" , gt_setindent = Cmt(V"space"^0, function(s, i, indent) warn("tab-i", true, #indent, "set", i) state.currentindent = indent return true end) , gt_layoutmarkers = V"table_intersection" + V"table_hline" + V"table_header_hline", gt_sethorizontal = Cmt(V"gt_layoutmarkers"^3, function (s, i, width) warn("tab-h", "width", "true", #width, "set", i) state.currentwidth = #width return true end) , gt_other_rows = V"gt_head"^-1 * V"gt_body" , gt_matchindent = Cmt(V"space"^0, function (s, i, this) local matchme = state.currentindent warn("tab-m", "indent", #this == #matchme, #this, #matchme, i) return #this == #matchme end) , gt_cell = (V"gt_content_cell" + V"gt_line_cell") * (V"table_intersection" + V"table_vline") , gt_content_cell = ((1 - V"table_vline" - V"table_intersection" - V"eol")^1), gt_line_cell = V"table_hline"^1, gt_contentrow = V"gt_matchindent" * C((V"table_intersection" + V"table_vline") * V"gt_cell"^1) * V"whitespace"^-1 * V"eol" , gt_body = ((V"gt_contentrow" - V"gt_bodysep")^1 * V"gt_bodysep")^1, gt_bodysep = V"gt_matchindent" * C(Cmt(V"table_intersection" * (V"table_hline"^1 * V"table_intersection")^1, function(s, i, separator) local matchme = state.currentwidth warn("tab-m", "body", #separator == matchme, #separator, matchme, i) return #separator == matchme end)) * V"whitespace"^-1 * V"eol" , gt_head = V"gt_contentrow"^1 * V"gt_headsep" , gt_headsep = V"gt_matchindent" * C(Cmt(V"table_intersection" * (V"table_header_hline"^1 * V"table_intersection")^1, function(s, i, separator) local matchme = state.currentwidth warn("tab-s", "head", #separator == matchme, #separator, matchme, i) return #separator == matchme end)) * V"whitespace"^-1 * V"eol" , -------------------------------------------------------------------------------- -- Block quotes -------------------------------------------------------------------------------- block_quote = Ct(Cs(V"block_quote_first" * V"block_quote_other"^0 * (V"blank_line" * V"block_quote_other"^1)^0) * (V"blank_line" * Cs(V"block_quote_attri"))^-1) * V"end_block" / rst.block_quote , block_quote_first = Cmt(V"space"^1, function (s, i, indent) warn("bkq-i", #indent, "", indent, "", i) state.currentindent = indent return true end) / "" * -V"attrib_dash" * (1 - V"eol")^1 * V"eol" , block_quote_other = Cmt(V"space"^1, function (s, i, indent) warn("bkq-m", #indent, #state.currentindent, indent, state.currentindent, i) return state.currentindent == indent end) / "" * -V"attrib_dash" * (1 - V"eol")^1 * V"eol" , block_quote_attri = V"block_quote_attri_first" * V"block_quote_attri_other"^0, block_quote_attri_first = Cmt(V"space"^1 * V"attrib_dash" * V"space", function (s, i, indent) local t = state warn("bqa-i", utf.len(indent), #t.currentindent, indent, t.currentindent, i) local ret = indent:match(" *") == t.currentindent t.currentindent = ret and indent or t.currentindent return ret end) / "" * (1 - V"eol")^1 * V"eol" , block_quote_attri_other = Cmt(V"space"^1, function (s, i, indent) warn("bqa-m", #indent, utf.len(state.currentindent), indent, state.currentindent, i) return utf.len(state.currentindent) == #indent end) / "" * (1 - V"eol")^1 * V"eol" , -------------------------------------------------------------------------------- -- Line blocks -------------------------------------------------------------------------------- line_block = Cs(V"line_block_first" * (V"line_block_other" + V"line_block_empty")^1) * V"blank_line" / rst.line_block , line_block_marker = V"space"^0 * V"bar" * V"space", line_block_empty_marker = V"space"^0 * V"bar" * V"space"^0 * V"eol", line_block_first = Cmt(V"line_block_marker", function(s, i, marker) warn("lbk-i", #marker, "", marker, "", i) state.currentindent = marker return true end) / "" * V"line_block_line" , line_block_empty = Cmt(V"line_block_empty_marker", function(s, i, marker) warn("lbk-e", #marker, #state.currentindent, marker, state.currentindent, i) marker = marker:gsub("|.*", "| ") return state.currentindent == marker end) / "" / rst.line_block_empty , line_block_other = Cmt(V"line_block_marker", function(s, i, marker) warn("lbk-m", #marker, #state.currentindent, marker, state.currentindent, i) return state.currentindent == marker end) / "" * V"line_block_line" , line_block_line = Cs((1 - V"eol")^1 * V"line_block_cont"^0 * V"eol") / rst.line_block_line , line_block_cont = (V"eol" - V"line_block_marker") * Cmt(V"space"^1, function(s, i, spaces) warn("lbk-c", #spaces, #state.currentindent, spaces, state.currentindent, i) return #spaces >= #state.currentindent end) / "" * (1 - V"eol")^1 , -------------------------------------------------------------------------------- -- Literal blocks -------------------------------------------------------------------------------- literal_block = V"unquoted_literal_block" + V"quoted_literal_block", literal_block = V"literal_block_marker" * Cs(V"literal_block_lines" * (V"blank_line"^1 * V"literal_block_lines")^0) * V"blank_line"^0 / rst.literal_block, literal_block_marker = V"double_colon" * V"eol" * V"blank_line", literal_block_lines = V"unquoted_literal_block_lines" + V"quoted_literal_block_lines", unquoted_literal_block_lines = V"literal_block_first" * (V"literal_block_other" - V"blank_line")^0, quoted_literal_block_lines = V"quoted_literal_block_first" * (V"quoted_literal_block_other" - V"blank_line")^0, literal_block_first = Cmt(V"space"^1, function (s, i, indent) warn("lbk-f", #indent, "", "", i) if not indent or indent == "" then return false end state.currentindent = indent return true end) * V"rest_of_line" * V"eol", literal_block_other = Cmt(V"space"^1, function (s, i, indent) warn("lbk-m", #indent, #state.currentindent, #indent >= #state.currentindent, i) return #indent >= #state.currentindent end) * V"rest_of_line" * V"eol", quoted_literal_block_first = Cmt(V"adornment_char", function (s, i, indent) warn("lbk-f", #indent, "", "", i) if not indent or indent == "" then return false end state.currentindent = indent return true end) * V"rest_of_line" * V"eol", quoted_literal_block_other = Cmt(V"adornment_char", function (s, i, indent) warn("lbk-m", #indent, #state.currentindent, #indent >= #state.currentindent, i) return #indent >= #state.currentindent end) * V"rest_of_line" * V"eol", -------------------------------------------------------------------------------- -- Lists -------------------------------------------------------------------------------- list = V"option_list" + V"bullet_list" + V"definition_list" + V"field_list" , -------------------------------------------------------------------------------- -- Option lists -------------------------------------------------------------------------------- option_list = Cs((V"option_list_item" * V"blank_line"^-1)^1) /rst.option_list, option_list_item = Ct(C(V"option_group") * Cs(V"option_description")) / rst.option_item, option_description = V"option_desc_next" + V"option_desc_more" + V"option_desc_single", option_desc_single = V"space"^2 --* V"rest_of_line" * (1 - V"eol")^1 * V"eol", option_desc_more = V"space"^2 * (1 - V"eol")^1 * V"eol" * V"indented_lines" * (V"blank_line" * V"indented_lines")^0, option_desc_next = V"eol" * V"indented_lines" * (V"blank_line" * V"indented_lines")^0, option_group = V"option" * (V"comma" * V"space" * V"option")^0, option = (V"option_posixlong" + V"option_posixshort" + V"option_dos_vms") * V"option_arg"^-1, option_arg = (V"equals" + V"space") * ((V"letter" * (V"letter" + V"digit")^1) + (V"angle_left" * (1 - V"angle_right")^1 * V"angle_right")), option_posixshort = V"dash" * (V"letter" + V"digit"), option_posixlong = V"double_dash" * V"letter" * (V"letter" + V"digit" + V"dash")^1, option_dos_vms = V"slash" * V"letter"^1, -------------------------------------------------------------------------------- -- Field lists (for bibliographies etc.) -------------------------------------------------------------------------------- field_list = Cs(V"field"^1) * V"blank_line"^1 / rst.field_list, field = Ct(V"field_marker" * V"whitespace" * V"field_body") / rst.field, field_marker = V"colon" * C(V"field_name") * V"colon", field_name = (V"escaped_colon" + (1 - V"colon"))^1, field_body = C((1 - V"eol")^1 * V"eol" * V"indented_lines"^-1), -------------------------------------------------------------------------------- -- Definition lists -------------------------------------------------------------------------------- definition_list = Cs(V"definition_item" * (V"blank_line" * V"definition_item")^0) * V"end_block" / rst.deflist , definition_item = Cs(V"definition_term" * V"definition_classifiers" * V"eol" * V"definition_def") / rst.deflist_item, definition_term = Cs((1 - V"eol" - V"definition_classifier_separator")^1) / rst.deflist_term, definition_classifier_separator = V"space" * V"colon" * V"space", definition_classifiers = V"definition_classifier"^0, definition_classifier = V"definition_classifier_separator" * Cs((1 - V"eol" - V"definition_classifier_separator")^1) / rst.deflist_classifier, definition_def = Cs(V"definition_firstpar" * V"definition_par"^0) / rst.deflist_def, definition_indent = Cmt(V"space"^1, function(s, i, indent) warn("def-i", #indent, #state.currentindent, indent == state.currentindent, i) state.currentindent = indent return true end), definition_firstpar = Cs(V"definition_parinit" * (V"definition_parline" - V"blank_line")^0) / rst.paragraph, definition_par = V"blank_line" * Cs((V"definition_parline" - V"blank_line")^1) / rst.paragraph, definition_parinit = V"definition_indent" * (1 - V"eol")^1 * V"eol" , definition_parline = V"definition_match" * (1 - V"eol")^1 * V"eol" , definition_match = Cmt(V"space"^1, function (s, i, this) warn("def-m", #this, #state.currentindent, this == state.currentindent, i) return this == state.currentindent end), -------------------------------------------------------------------------------- -- Bullet lists and enumerations -------------------------------------------------------------------------------- -- the next rule handles enumerations as well bullet_list = V"bullet_init" --* (V"bullet_list" --+ V"bullet_continue")^0 --* (V"bullet_continue" + V"bullet_list")^0 * (V"blank_line"^-1 * (V"bullet_list" + V"bullet_continue"))^0 * V"bullet_stop" * Cmt(Cc(nil), function (s, i) local t = state warn("close", t.depth) t.bullets[t.depth] = nil -- “pop” t.depth = t.depth - 1 t.lastbullet = t.lastbullets[t.depth] return true end), --bullet_stop = V"blank_line" * Cs(Cc("")) / rst.stopitemize, bullet_stop = V"end_block" * Cs(Cc("")) / rst.stopitemize, bullet_init = V"bullet_first" * V"bullet_itemrest", bullet_first = #Cmt(V"bullet_indent", function (s, i, bullet) local t = state local oldbullet = t.bullets[t.depth] local n_spaces = match(P" "^0, bullet) warn("first", t.depth, (t.depth == 0 and n_spaces == 1) or (t.depth > 0 and n_spaces > 1), bullet, oldbullet, helpers.list.conversion(bullet)) if t.depth == 0 and n_spaces == 1 then -- first level t.depth = 1 -- “push” t.bullets[1] = bullet t.lastbullet = bullet t.bullets.max = t.bullets.max < t.depth and t.depth or t.bullets.max return true elseif t.depth > 0 and n_spaces > 1 then -- sublist (of sublist)^0 if n_spaces >= utf.len(oldbullet) then t.lastbullets[t.depth] = t.lastbullet t.depth = t.depth + 1 t.bullets[t.depth] = bullet t.lastbullet = bullet t.bullets.max = t.bullets.max < t.depth and t.depth or t.bullets.max return true end end return false end) * Cs(V"bullet_indent") / rst.startitemize, bullet_indent = V"space"^0 * V"bullet_expr" * V"space"^1, bullet_cont = Cmt(V"bullet_indent", function (s, i, bullet) local t = state local conversion = helpers.list.conversion warn("conti", t.depth, bullet == t.bullets[t.depth], bullet, t.bullets[t.depth], t.lastbullets[t.depth], conversion(t.lastbullet), conversion(bullet) ) if utf.len(t.bullets[t.depth]) ~= utf.len(bullet) then return false elseif not conversion(bullet) and t.bullets[t.depth] == bullet then return true elseif conversion(t.lastbullet) == conversion(bullet) then -- same type local autoconv = conversion(bullet) == "auto" local successor = helpers.list.successor(bullet, t.lastbullet) t.lastbullet = bullet return autoconv or successor end --return t.bullets[t.depth] == bullet end) / "", -- ^^^^^ -- otherwise returns the value of V"bullet_indent", not sure why … bullet_continue = V"bullet_cont" * V"bullet_itemrest", bullet_itemrest = Cs(V"bullet_rest" -- first line * ((V"bullet_match" * V"bullet_rest")^0 -- any successive lines --* (V"eol" * (V"blank_line" * (V"bullet_match" * (V"bullet_rest" - V"bullet_indent"))^1)^0)) / rst.bullet_item, -- ^^^^^^^^^^^^^ -- otherwise matches bullet_first bullet_rest = Cs((1 - V"eol")^1 * V"eol"), -- rest of one line bullet_next = V"space"^1, bullet_match = #Cmt(V"bullet_next", function (s, i, this) local t = state warn("match", t.depth, string.len(this) == utf.len(t.bullets[t.depth]), utf.len(t.bullets[t.depth]), string.len(this) ) return string.len(this) == utf.len(t.bullets[t.depth]) end), bullet_expr = V"bullet_char" + (P"(" * V"number_char" * P")") + (V"number_char" * P")") + (V"number_char" * V"dot") * #V"space" + (V"number_char" * #V"space") , number_char = V"roman_numeral" + V"Roman_numeral" + P"#" + V"digit"^1 + R"AZ" + R"az", -------------------------------------------------------------------------------- -- Transitions -------------------------------------------------------------------------------- transition_line = C(V"adornment_char"^4), transition = V"eol"^0 * V"transition_line" * V"end_block" /rst.transition, -------------------------------------------------------------------------------- -- Sectioning -------------------------------------------------------------------------------- section_adorn = C(V"adornment_char"^1) * V"space"^0 * V"eol", -- The whitespace handling after the overline is necessary because headings -- without overline aren't allowed to be indented. section = (V"section_adorn" * V"whitespace"^0)^-1 * C((1 - V"whitespace") * (1 - V"eol")^1) * V"eol" * V"section_adorn" * V"eol"^-1 / rst.section, -- validity checking done by the formatter. Now, if -- this ain't lazy then I don't know … -------------------------------------------------------------------------------- -- Target Blocks -------------------------------------------------------------------------------- tname_normal = C((V"escaped_colon" + 1 - V"colon")^1) * V"colon", tname_bareia = C(V"bareia" * (1 - V"eol" - V"bareia")^1 * V"bareia") * V"colon", target_name = V"double_dot" * V"space" * V"underscore" * (V"tname_bareia" + V"tname_normal"), target_firstindent = V"eol" * Cg(V"space"^1, "indent"), target_nextindent = V"eol" * C(V"space"^1), target_indentmatch = Cmt(V"target_nextindent" -- I ♡ LPEG! * Cb("indent"), function (s, i, a, b) return a == b end), target_link = ( V"space"^0 * V"target_firstindent" * Ct(C(1 - V"whitespace" - V"eol")^1 * (V"target_indentmatch" * C(1 - V"whitespace" - V"eol")^1)^0) * V"eol" * #(1 - V"whitespace" - "eol")) / rst.joinindented + C((1 - V"eol")^1) * V"eol" * #(V"double_dot" + V"double_underscore" + V"eol") + (1 - V"end_block")^0 * Cc(""), target = Ct((V"target_name" * (V"space"^0 * V"eol" * V"target_name")^0) * V"space"^0 * V"target_link") / rst.target, anonymous_prefix = (V"double_dot" * V"space" * V"double_underscore" * V"colon") + (V"double_underscore") , anonymous_target = V"anonymous_prefix" * V"space"^0 * Ct(Cc"" * V"target_link") / rst.target , target_block = (V"anonymous_target" + V"target")^1 * V"end_block", -------------------------------------------------------------------------------- -- Paragraphs * Inline Markup -------------------------------------------------------------------------------- paragraph = V"par_setindent" * Ct(C((1 - V"eol")^1) * V"eol" * (V"par_matchindent" * C((1 - V"eol")^1) * V"eol")^0) * V"end_block" / rst.paragraph, par_setindent = Cmt(V"space"^0, function (s, i, indent) warn("par-i", #indent, "", "", i) state.currentindent = indent return true end), par_matchindent = Cmt(V"space"^0, function (s, i, indent) warn("par-m", state.currentindent == indent, #indent, #state.currentindent, i) return state.currentindent == indent end), link_standalone = C(V"uri") / rst.link_standalone, reference = Cs(V"_reference") / rst.reference, _reference = (1 - V"underscore" - V"spacing" - V"eol" - V"punctuation" - V"groupchars")^1 * V"underscore", included_literal_block = V"literal_block_shorthand" * V"literal_block_markerless", literal_block_shorthand = ((V"colon" * V"space"^1)^-1 * V"double_colon") / ":" * (V"eol" * V"blank_line" / "") , literal_block_markerless = Cs(V"literal_block_lines" * (V"blank_line"^1 * V"literal_block_lines")^0) * V"blank_line"^0 / rst.literal_block, -------------------------------------------------------------------------------- -- Comments -------------------------------------------------------------------------------- comment = V"double_dot" * V"whitespace"^0 * ((V"block_comment" + V"line_comment") - V"footnote_marker") , block_comment = V"eol" * Cs(V"indented_lines") * V"eol"^0 / rst.block_comment, line_comment = Cs((1 - V"eol")^0 * V"eol") / rst.line_comment, -------------------------------------------------------------------------------- -- Generic indented block -------------------------------------------------------------------------------- indented_lines = V"indented_first" * (V"indented_other" - V"blank_line" - V"field_marker")^0, indented_first = Cmt(V"space"^1, function (s, i, indent) warn("idt-f", indent, i) if not indent or indent == "" then return false end state.currentindent = indent return true end) * (1 - V"eol")^1 * V"eol", indented_other = Cmt(V"space"^1, function (s, i, indent) warn("idt-m", indent, state.currentindent, indent == state.currentindent, i) return indent == state.currentindent end) * (1 - V"eol")^1 * V"eol", -------------------------------------------------------------------------------- -- Urls -------------------------------------------------------------------------------- uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0, url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://", url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation", url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, url_path_char = R("az", "AZ", "09") + S"-_.!~*'()", url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1, -------------------------------------------------------------------------------- -- Terminal Symbols and Low-Level Elements -------------------------------------------------------------------------------- word = (1 - V"punctuation" - V"end_block" - V"spacing" - V"eol")^1, -- TODO : no punctuation (later) asterisk = P"*", double_asterisk = V"asterisk" * V"asterisk", bareia = P"`", double_bareia = V"bareia" * V"bareia", escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1, slash = P"/", doubleslash = V"slash" * V"slash", backslash = P"\\", bar = P"|", groupchars = S"()[]{}", --- Punctuation -- Some of the following are used for markup as well as for punctuation. comma = P",", colon = P":", double_colon = V"colon" * V"colon", escaped_colon = V"backslash" * V"colon", dot = P".", period = V"dot", double_dot = V"dot" * V"dot", interpunct = P"·", underscore = P"_", double_underscore = V"underscore" * V"underscore", dash = P"-", double_dash = V"dash" * V"dash", triple_dash = V"double_dash" * V"dash", emdash = P"—", attrib_dash = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―", hyphen = P"‐", semicolon = P";", questionmark = P"?", exclamationmark = P"!", inverted_exclamationmark = P"¡", inverted_questionmark = P"¿", interrobang = P"‽", apostrophe = P"’" + P"'", --brackets = P"[ ], (",, { }, ⟨ ⟩ ) lsquare = P"[", rsquare = P"]", ellipsis = P"…" + P"...", guillemets = P"«" + P"»", quotationmarks= P"‘" + P"’" + P"“" + P"”", solidus= P"⁄", punctuation = V"apostrophe" + V"colon" + V"comma" + V"dashes" + V"dot" + V"ellipsis" + V"exclamationmark" + V"guillemets" + V"hyphen" + V"interpunct" + V"interrobang" + V"questionmark" + V"quotationmarks" + V"semicolon" + V"slash" + V"solidus" + V"underscore" , -- These are treated separately as the might begin a paragraph (sigh!). inverted_punctuation = V"inverted_exclamationmark" + V"inverted_questionmark", -- End punctuation letter = R"az" + R"AZ", equals = P"=", space = P" ", spaces = V"space"^1, whitespace = (P" " + Cs(P"\t") / " " + Cs(S"\v") / " "), spacing = V"whitespace"^1, blank_line = V"space"^0 * V"eol", rest_of_line = (1 - V"eol")^1, eol = P"\n", eof = V"eol"^0 * -P(1), end_block = V"blank_line"^1 + V"eof" + (V"whitespace"^0 * V"eol" * (V"whitespace"^0 * V"eol")^0 * V"eof") , -- diverse markup character sets delimiters = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space", -- inline markup adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings bullet_char = S"*+-" + P"•" + P"‣" + P"⁃", -- bullet lists argument_char = V"double_dash" * V"dash" * V"slash", -- option lists digit = R"09", roman_numeral = S"ivxlcdm"^1, Roman_numeral = S"IVXLCDM"^1, inline_delimiter = P"**" + P"``" + S"*`", angle_left = P"<", angle_right = P">", enclosed_open = S[['"([{<]], enclosed_close = S[['")]}>]], gartenzaun = P"#", table_intersection = P"+", table_hline = V"dash", table_vline = V"bar", table_header_hline = P"=", } local function load_file (name) f = assert(io.open(name, "r"), "Not a file!") if not f then return 1 end local tmp = f:read("*all") f:close() return tmp end local function save_file (name, data) f = assert(io.open(name, "w"), "Could not open file "..name.." for writing! Check its permissions") if not f then return 1 end f:write(data) f:close() return 0 end local function get_setups () local setups = [[ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~% %{ Setups }% %~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \setupcolors[state=start] \setupinteraction[state=start,focus=standard,color=darkgreen,contrastcolor=darkgreen] \setupbodyfontenvironment [default] [em=italic] \sethyphenatedurlnormal{:=?&} \sethyphenatedurlbefore{?&} \sethyphenatedurlafter {:=/-} ]] for item, _ in next, state.addme do local f = optional_setups[item] setups = setups .. f() end return setups .. "\\starttext" end local function main() local testdata = load_file(arg[1]) if testdata == 1 then return 1 end local processeddata = parser:match(testdata) local setups = get_setups() processeddata = setups .. processeddata .. "\\stoptext" if processeddata then save_file(arg[2], processeddata) else return 1 end --for i,j in next, rst.anonymous_links do --print(i,j) --end --print(">>>Last used char>: " ..state.lastbullet.." <<<<") --print(">>>Max list nestin>: "..state.bullets.max .." <<<<") --for i,j in next, rst.collected_references do --print (string.format("== %7s => %s <=", i,j)) --end --parser:print() return 0 end return main()