diff options
| author | Philipp Gesang <phg@phi-gamma.net> | 2014-03-01 22:47:25 +0100 | 
|---|---|---|
| committer | Philipp Gesang <phg@phi-gamma.net> | 2014-03-01 22:47:25 +0100 | 
| commit | 7d1114cd66025cc18535f3cdab3105e66bbda48d (patch) | |
| tree | eca33193cdbb0d7923527b0c0bfb58cd893036b6 /src | |
| parent | 7652729ada000906e5e6b2b4d0c5dea01c73c29d (diff) | |
| download | context-rst-7d1114cd66025cc18535f3cdab3105e66bbda48d.tar.gz | |
adopt more conventional directory structure
Diffstat (limited to 'src')
| -rw-r--r-- | src/mtx-t-rst.lua | 64 | ||||
| -rw-r--r-- | src/rst_context.lua | 1316 | ||||
| -rw-r--r-- | src/rst_directives.lua | 381 | ||||
| -rw-r--r-- | src/rst_helpers.lua | 657 | ||||
| -rw-r--r-- | src/rst_parser.lua | 1605 | ||||
| -rw-r--r-- | src/rst_setups.lua | 377 | ||||
| -rw-r--r-- | src/t-rst.mkiv | 241 | 
7 files changed, 4641 insertions, 0 deletions
| diff --git a/src/mtx-t-rst.lua b/src/mtx-t-rst.lua new file mode 100644 index 0000000..6735b1d --- /dev/null +++ b/src/mtx-t-rst.lua @@ -0,0 +1,64 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +--         FILE:  mtx-rst.lua +--        USAGE:  mtxrun --script rst --if=input.rst --of=output.tex  +--  DESCRIPTION:  context script interface for the reStructuredText module +-- REQUIREMENTS:  latest ConTeXt MkIV +--       AUTHOR:  Philipp Gesang (Phg), <megas.kapaneus@gmail.com> +--      CHANGED:  2013-03-27 00:25:32+0100 +-------------------------------------------------------------------------------- +-- + +scripts     = scripts or { } +scripts.rst = { } + +environment.loadluafile("rst_parser") + +local ea = environment.argument + +local helpinfo = [[ +=============================================================== +    The reStructuredText module, command line interface. +    © 2010--2013 Philipp Gesang. License: 2-clause BSD. +    Home: <https://bitbucket.org/phg/context-rst/> +=============================================================== + +USAGE: + +    mtxrun --script rst --if=input.rst --of=output.tex + +Mandatory arguments: + +    “infile.rst” is your input file containing reST markup. +    “outfile.tex” is the target file that the TeX-code will be +                  written to. + +Optional arguments: +    --et=bool   “expandtab”, should tab chars (“\t”, “\v”) be +                converted to spaces? +    --sw=int    “shiftwidth”, tab stop modulo factor. + +=============================================================== +]] + +local application = logs.application { +    name     = "mtx-rst", +    banner   = "The reStructuredText module for ConTeXt, hg-rev 125+", +    helpinfo = helpinfo, +} + +scripts.rst.input  = ea("if") +scripts.rst.output = ea("of") + +if scripts.rst.input and scripts.rst.output then +    local expandtab  = ea("et") == "true" and true +    local shiftwidth = ea("sw") +    local debug      = ea("debug") == "true" +    if expandtab  then thirddata.rst.expandtab  = true end +    if shiftwdith then thirddata.rst.shiftwidth = tonumber(shiftwidth) end +    if debug      then thirddata.rst_helpers.rst_debug = debug end +    thirddata.rst.standalone(scripts.rst.input, scripts.rst.output) +else +    application.help() +end + diff --git a/src/rst_context.lua b/src/rst_context.lua new file mode 100644 index 0000000..c7e21fe --- /dev/null +++ b/src/rst_context.lua @@ -0,0 +1,1316 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +--         FILE:  rst_context.lua +--        USAGE:  called by rst_parser.lua +--  DESCRIPTION:  Complement to the reStructuredText parser +--       AUTHOR:  Philipp Gesang (Phg), <phg42.2a@gmail.com> +--      CHANGED:  2013-03-26 22:46:17+0100 +-------------------------------------------------------------------------------- +-- +--- TODO +---   Find an appropriate way to handle generic tables irrespective of the grid +---   settings. The problem is: +---   http://archive.contextgarden.net/message/20100912.112605.8a1aaf13.en.html +---   Seems we'll have to choose either the grid or split tables as default. Not +---   good. + + +local helpers        = helpers        or thirddata and thirddata.rst_helpers +local rst_directives = rst_directives or thirddata and thirddata.rst_directives + +local utf         = unicode.utf8 +local utflen      = utf.len +local utflower    = utf.lower +local utfupper    = utf.upper +local iowrite     = io.write +local tableconcat = table.concat + +local stringmatch  = string.match +local stringgmatch = string.gmatch +local stringgsub   = string.gsub + +local dbg_write = helpers.dbg_writef + +local C,  Cb, Cc, Cg, Cmt, Cp, +      Cs, Ct, P,  R,  S,   V,  lpegmatch +      = lpeg.C,  lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, +        lpeg.Cs, lpeg.Ct, lpeg.P,  lpeg.R,  lpeg.S,   lpeg.V,  lpeg.match + +-- This one should ignore escaped spaces. +do +    local stripper = P{ +        [1] = "stripper", +        stripper = V"space"^0 * C((V"space"^0 * (V"escaped" + V"nospace")^1)^0), +        space    = S(" \t\v\n"), +        nospace  = 1 - V"space", +        escaped  = P"\\" * V"space" +    } +    function string.strip(str) +        return lpegmatch(stripper, str) or "" +    end +end +local stringstrip  = string.strip +local stringformat = string.format + +local err = function(str) +    if str then +        iowrite("\n*[rstctx] Error: " .. str .. "\n\n") +    end +end + +local rst_context = thirddata.rst + +rst_context.collected_adornments = {} +rst_context.last_section_level   = 0 +rst_context.anonymous_targets    = 0 +rst_context.anonymous_links      = {} + +rst_context.collected_references = {} +rst_context.context_references   = {} +rst_context.structure_references = {} +rst_context.anonymous_set        = {} + +rst_context.substitutions        = {} +rst_context.lastitemnumber       = 0  -- enumerations in RST allow arbitrary skips + +rst_context.current_footnote_number   = 0 +rst_context.current_symbolnote_number = 0 + +function rst_context.addsetups(item) +    local state = rst_context.state +    state.addme[item] = state.addme[item] or true +    return 0 +end + +function rst_context.footnote_reference (label) +    local tf = rst_context.state.footnotes +    if stringmatch(label, "^%d+$") then -- all digits +        local c = tonumber(label) +        return [[\\footnote{\\getbuffer[__footnote_number_]].. c .."]}" +    elseif label == "#" then --autonumber +        local rc = rst_context.current_footnote_number +        rc = rc + 1 +        rst_context.current_footnote_number = rc +        return [[\\footnote{\\getbuffer[__footnote_number_]].. rc .."]}" +    elseif stringmatch(label, "^#.+$") then +        local thelabel = stringmatch(label, "^#(.+)$") +        return [[\\footnote{\\getbuffer[__footnote_label_]].. thelabel .."]}" +    elseif label == "*" then +        local rc = rst_context.current_symbolnote_number +        rc = rc + 1 +        rst_context.current_symbolnote_number = rc +        return [[\\symbolnote{\\getbuffer[__footnote_symbol_]].. rc .."]}" +    else -- “citation reference” for now treating them like footnotes +        rst_context.addsetups("citations") +        return [[\\cite{]] .. label .. [[}]] +    end +end + +do +    local w = S" \v\t\n" / "_" +    local wp = Cs((w + 1)^1) +    function rst_context.whitespace_to_underscore(str) +        return  str and lpegmatch(wp, str) or "" +    end +end + +--- So we can use crefs[n][2] to refer to the place where the reference was +--- created. +local function get_context_reference (str) +    local crefs = rst_context.context_references +    local srefs = rst_context.structure_references +    srefs[str] = true +    refstring = "__target_" .. rst_context.whitespace_to_underscore(str) +    crefs[#crefs + 1] = { refstring, str } +    return refstring +end + +function rst_context.emphasis (str) +    return [[{\\em ]] .. str .. [[}]] +end + +function rst_context.strong_emphasis (str) +    return [[{\\sc ]] .. str .. [[}]] +end + +function rst_context.literal (str) +    return [[\\type{]] .. str .. [[}]] +end + +--- ROLES for interpreted text + +rst_context.roles = {} +rst_context.roles.emphasis = rst_context.emphasis +rst_context.roles.strong_emphasis = rst_context.strong_emphasis +rst_context.roles.literal = rst_context.literal +rst_context.roles.bold = function(str) +    return [[{\\bold ]] .. str .. [[}]] +end +rst_context.roles.bf = rst_context.roles.bold + +rst_context.roles.italic = function(str) +    return [[{\\italic ]] .. str .. [[}]] +end +rst_context.roles.it = rst_context.roles.italic + +rst_context.roles.sans = function(str) +    return [[{\\ss ]] .. str .. [[}]] +end +rst_context.roles.sans_serif = rst_context.roles.sans +rst_context.roles.ss         = rst_context.roles.sans + +rst_context.roles.uppercase = function(str) +    return utfupper(str) +end + +rst_context.roles.lowercase = function(str) +    return utflower(str) +end + +rst_context.roles.color = function(color, str) +    local p = helpers.patterns +    local definition = stringmatch(color, "^color_(.+)$") +    if stringmatch(definition, "^rgb_") then -- assume rgb +        local rgb = lpegmatch(p.rgbvalues, definition) +        definition = stringformat([[r=%s,g=%s,b=%s]], rgb[1], rgb[2], rgb[3]) +    end +    return stringformat([[\\colored[%s]{%s}]], definition, str) +end + +-------------------------------------------------------------------------------- +--- Inofficial text roles for my private bib +-------------------------------------------------------------------------------- + +-- Afterthought: +-- Different citation commands are essentially typographical instructions: +-- they are concerned with the final representation of the data with respect to +-- a concrete implementation. Not the thing at all that would make reST +-- portable. But then its support for Python-style string escaping &c. ain’t at +-- all portable either. The problem is the same with XML written to be +-- processed with ConTeXt -- when processing the text directly in MkIV you’ll +-- always find yourself adding setups that allow fine-grained control of the +-- typeset output. At the same time those instructions directly contradict the +-- main reason for XML: to provide an application-independent data markup. +-- Typesetting XML (and now reST) with TeX, you will always end up writing TeX +-- code disguised in XML brackets. (Btw. the docutils reST specification has +-- the same kind of inclination to HTML -- some of its components don’t even +-- have a meaning save in HTML peculiarities.) If you strive to avoid this +-- *and* would like to have decent typesetting, you should use the +-- automatically generated TeX code as a starting point for the actual +-- typesetting job. Wish it was possible to have both -- the data in a +-- universal form and the output in the Optimal Typesetting System -- but +-- that’s a dream for now. If you really read these musings, then prove me +-- wrong if you can! Or go tell those digital publishers and their willing +-- subordinates, the authors, who think they can save a few pennys, +-- substituting the typesetter and editor by some fancy software. Keep in mind +-- that zapf.tex is not just random dummy text. </rant> + +function rst_context.roles.ctsh(str) -- shorthand +    rst_context.addsetups("citator") +    return [[\\ctsh{]] .. str .. [[}]] +end + +function rst_context.roles.ctas(str) -- short cite +    rst_context.addsetups("citator") +    return [[\\ctas{]] .. str .. [[}]] +end + +function rst_context.roles.ctau(str) -- author only +    rst_context.addsetups("citator") +    return [[\\ctau{]] .. str .. [[}]] +end + +function rst_context.roles.cttt(str) -- title only +    rst_context.addsetups("citator") +    return [[\\cttt{]] .. str .. [[}]] +end + +function rst_context.roles.ctay(str) -- author year +    rst_context.addsetups("citator") +    return [[\\ctay{]] .. str .. [[}]] +end + +function rst_context.roles.ctfu(str) -- full cite +    rst_context.addsetups("citator") +    return [[\\ctfu{]] .. str .. [[}]] +end + +function rst_context.roles.nocite(str) -- nocite +    rst_context.addsetups("citator") +    return [[\\nocite[]] .. str .. [=[]]=] +end + +-------------------------------------------------------------------------------- +--- End citator roles +-------------------------------------------------------------------------------- + +-------------------------------------------------------------------------------- +--- Experimental roles. +-------------------------------------------------------------------------------- + +--- Feature request by Philipp A. +function rst_context.roles.math(str) +    return [[\\mathematics{]] .. str .. [[}]] +end + +-------------------------------------------------------------------------------- +--- End roles +-------------------------------------------------------------------------------- + +function rst_context.interpreted_text (...) +    local tab = { ... } +    local role, str +    role = stringmatch(tab[1], "^:(.*):$") or stringmatch(tab[3], "^:(.*):$") +    str  = tab[2] + +    if not role then -- implicit role +        role = "emphasis" +    end + +    if stringmatch(role, "^color_") then +        return rst_context.roles.color(role, str) +    end + +    return rst_context.roles[role](str) +end + +function rst_context.link_standalone (str) +    return "\n" +        .. [[\\goto{\\hyphenatedurl{]] .. str .. [[}}[url(]] .. str .. [=[)]]=] +end + +function rst_context.reference (str) +    rst_context.addsetups("references") +    str = stringmatch(str, "^`?([^`]+)`?_$") +    return [[\\RSTchoosegoto{__target_]] .. rst_context.whitespace_to_underscore(str) .. "}{" +            .. str .. "}" +end + +function rst_context.anon_reference (str) +    rst_context.addsetups("references") +    str = stringmatch(str, "^`?([^`]+)`?__$") +    rst_context.anonymous_links[#rst_context.anonymous_links+1] = str +    link = "__target_anon_" .. #rst_context.anonymous_links +    return stringformat([[\\RSTchoosegoto{%s}{%s}]], link, str) +end + +local whitespace = S" \n\t\v" +local nowhitespace = 1 - whitespace +local removewhitespace = Cs((nowhitespace^1 + Cs(whitespace / ""))^0) + +function rst_context.target (tab) +    rst_context.addsetups("references") +    --local tab = { ... } +    local  refs = rst_context.collected_references +    local arefs = rst_context.anonymous_set +    local target = tab[#tab] -- Ct + C could be clearer but who cares +    tab[#tab] = nil + +    local function create_anonymous () +        rst_context.anonymous_targets = rst_context.anonymous_targets + 1 +        return { "anon_" .. rst_context.anonymous_targets, rst_context.anonymous_targets } +    end + +    local insert = "" + +    if target == "" then -- links here +        for _, id in next, tab do +            insert = insert .. "\n\\reference[__target_" .. id .. "]{}" +        end +    else +        for i=1,#tab do +            local id = tab[i] +            if id == "" then -- anonymous +                local anon = create_anonymous() +                id, arefs[anon[1]] = anon[1], anon[2] +            else +                local tmp = tab[i] +                tmp = stringgsub(tmp, "\\:",":") +                tmp = stringmatch(tmp, "`?([^`]+)`?") +                id = tmp +                --id = tab[i]:gsub("\\:",":"):match("`?([^`]+)`?") -- deescaping +            end +            if id then +                refs[id] = refs[id] or target +            end +        end +    end + +    return insert +end + +function rst_context.inline_internal_target (str) +    return "\\\\reference[__target_" .. rst_context.whitespace_to_underscore(str) .."]{}" +end + +function rst_context.substitution_reference (str, underscores) +    local sub = "" +    rst_context.addsetups "substitutions" +    if underscores == "_" then -- normal reference +        sub = sub .. [[\\reference[__target_]] .. rst_context.whitespace_to_underscore(stringstrip(str)) .. "]{}" +    elseif underscores == "__" then -- normal reference +        rst_context.anonymous_targets = rst_context.anonymous_targets + 1 +        sub = sub .. [[\\reference[__target_anon_]] .. rst_context.anonymous_targets .. "]{}" +    end +    return sub .. [[{\\RSTsubstitution]] .. stringgsub(str, "%s", "") .. "}" +end + +do +    -- see catc-sym.tex +    local escape_me = { +        ["&"]   = [[\letterampersand ]], +        ["$"]   = [[\letterdollar ]], +        ["#"]   = [[\letterhash ]], +        ["^"]   = [[\letterhat ]], +        ["_"]   = [[\letterunderscore ]], +    } + +    local chars +    for chr, repl in next, escape_me do +        chars = chars and chars + (P(chr) / repl) or P(chr) / repl +    end + +    local p_escape = P{ +        [1]      = Cs((V"skip" +                 --+ V"literal" -- achieved via gsub later +                 + chars +                 + 1)^1), +        skip1    = P"\\starttyping" * (1 - P"\\stoptyping")^1, +        balanced = P"{" * (V"balanced" + (1 - P"}"))^0 * P"}", +        skip2    = P"\\type" * V"balanced", +        skip3    = P"\\mathematics" * V"balanced", +        skip     = V"skip1" + V"skip2" + V"skip3", +        --literal  = Cs(P"\\" / "") * 1 +    } + +    function rst_context.escape (str) +        str = stringgsub(str, "\\(.)", "%1") +        return lpegmatch(p_escape, str) +    end +end + +function rst_context.joinindented (tab) +    return tableconcat (tab, "") +end + +local corresponding = { +    ['"'] = '"', +    ["'"] = "'", +    ["{"] = "}", +    ["("] = ")", +    ["["] = "]", +    ["<"] = ">", +} + +local inline_parser = P{ +    [1] = "block", + +    block = Cs(V"inline_as_first"^-1 * (V"except" + V"inline_element" + V"normal_char")^0), + +    inline_element = V"precede_inline" +                   * Cs(V"inline_do_elements") +                   * #V"succede_inline" +                   + V"footnote_reference" +                   , + +    -- Ugly but needed in case the first element of a paragraph is inline +    -- formatted. +    inline_as_first = V"inline_do_elements" * #V"succede_inline", + +    except = P"\\starttyping" * (1 - P"\\stoptyping")^1 * P"\\stoptyping" +           + V"enclosed" +           , + +    inline_do_elements = V"strong_emphasis" +                       + V"substitution_reference" +                       + V"anon_reference" +                       + V"inline_literal" +                       + V"reference" +                       + V"emphasis" +                       + V"interpreted_text" +                       + V"inline_internal_target" +                       + V"link_standalone" +                       , + +    precede_inline = V"spacing" +                   + V"eol" +                   + -P(1) +                   + S[['"([{<-/:]] +                   + P"‘" + P"“" + P"’" + P"«" + P"¡" + P"¿" +                   + V"inline_delimiter" +                   + P"„", -- not in standard Murkin reST + +    succede_inline = V"spacing" +                   + V"eol" +                   + S[['")]}>-/:.,;!?\]] +                   + P"’" + P"”" + P"»" +                   + V"inline_delimiter" +                   + -P(1) +                   + P"“" -- non-standard again but who cares +                   , + +    enclosed = V"precede_inline"^-1 +             * Cg(V"quote_single" + V"quote_double" + V"leftpar", "lastgroup") +             * V"inline_delimiter" +             * Cmt(C(V"quote_single" + V"quote_double" + V"rightpar") * Cb("lastgroup"), function(s, i, char, oldchar) +                    return corresponding[oldchar] == char +                end) +             * V"succede_inline"^-1 +             * -V"underscore" +             , + +    space = P" ", +    whitespace = (P" " + Cs(P"\t") / "        " + Cs(S"\v") / " "), +    spacing = V"whitespace"^1, + +    eol = P"\n", +    --inline_delimiters = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space",        -- inline markup +    inline_delimiter = P"‐" + P"‑" + P"‒" + P"–" + V"emdash" + V"space" +                     + V"bareia" +                     + V"asterisk" +                     + V"bar" +                     + V"lbrack" + V"rbrack" +                     ,        -- inline markup +    asterisk = P"*", +    quote_single = P"'", +    quote_double = P'"', +    double_asterisk = V"asterisk" * V"asterisk", +    bareia = P"`", +    backslash = P"\\", +    bar = P"|", +    double_bareia = V"bareia" * V"bareia", +    escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1, +    colon = P":", +    escaped_colon = (Cs(V"backslash") / "" * V"colon") + 1, +    semicolon = P";", +    underscore = P"_", +    double_underscore = V"underscore" * V"underscore", +    dot = P".", +    interpunct = P"·", +    comma = P",", +    dash = P"-", +    emdash = P"—", +    ellipsis   = P"…" + P"...", +    exclamationmark = P"!", +    questionmark = P"?", +    interrobang = P"‽", +    double_dash = V"dash" * V"dash", +    triple_dash = V"double_dash" * V"dash", +    hyphen = P"‐", +    dashes = V"dash" + P"‒" + P"–" + V"emdash" + P"―", + +    lparenthesis = P"(", +    rparenthesis = P")", +    lbrack  = P"[", +    rbrack  = P"]", +    lbrace  = P"{" / [[{\\letterleftbrace}]], +    rbrace  = P"}" / [[{\\letterrightbrace}]], +    less    = P"<", +    greater = P">", +    leftpar  = V"lparenthesis" + V"lbrack" + V"lbrace" + V"less", +    rightpar = V"rparenthesis" + V"rbrack" + V"rbrace" + V"greater", + +    normal_char = V"lbrace" + V"rbrace" + V"lbrack" + V"rbrack" -- escape those if in input +                + 1 +                , + +    --groupchars = S"()[]{}", +    groupchars = V"leftpar" + V"rightpar", +    apostrophe = P"’" + P"'", + +    guillemets = P"«" + P"»", +    quotationmarks= P"‘" + P"’" + P"“" + P"”", +    solidus= P"⁄", +    slash = P"/", + +    gartenzaun = P"#", +    digit  = R"09", +    letter = R"az" + R"AZ", + +    punctuation = V"apostrophe" +                + V"colon" +                + V"comma" +                + V"dashes" +                + V"dot" +                + V"ellipsis" +                + V"exclamationmark" +                + V"guillemets" +                + V"hyphen" +                + V"interpunct" +                + V"interrobang" +                + V"questionmark" +                + V"quotationmarks" +                + V"semicolon" +                + V"slash" +                + V"solidus" +                + V"underscore" +                , + +    emphasis        = (V"asterisk" - V"double_asterisk") +                    * Cs((V"normal_char" - V"spacing" - V"eol" - V"asterisk") +                       * ((V"normal_char" - (V"normal_char" * V"asterisk"))^0 +                        * (V"normal_char" - V"spacing" - V"eol" - V"asterisk"))^-1) +                    * V"asterisk" +                    / rst_context.emphasis, + +    strong_emphasis = V"double_asterisk" +                    * Cs((V"normal_char" - V"spacing" - V"eol" - V"asterisk") +                       * ((V"normal_char" - (V"normal_char" * V"double_asterisk"))^0 +                        * (V"normal_char" - V"spacing" - V"eol" - V"asterisk"))^-1) +                    * V"double_asterisk" +                    / rst_context.strong_emphasis, + +    inline_literal  = V"double_bareia" +                    * C ((V"escaped_bareia" - V"spacing" - V"eol" - V"bareia") +                       * ((V"escaped_bareia" - (V"normal_char" * V"double_bareia"))^0 +                        * (V"escaped_bareia" - V"spacing" - V"eol" - V"bareia"))^-1) +                    * V"double_bareia" +                    / rst_context.literal, + +    interpreted_single_char = (V"normal_char" - V"spacing" - V"eol" - V"bareia") * #V"bareia", +    interpreted_multi_char  = (V"normal_char" - V"spacing" - V"eol" - V"bareia") * (V"normal_char" - (1 * V"bareia"))^0 * (1 - V"spacing" - V"eol" - V"bareia"), + +    interpreted_text = C(V"role_marker"^-1) +                     * (V"bareia" - V"double_bareia") +                     * C(V"interpreted_single_char" + V"interpreted_multi_char") +                     * V"bareia" +                     * C(V"role_marker"^-1) +                     / rst_context.interpreted_text, + +    role_marker = V"colon" * (V"backslash" * V"colon" + V"letter" + V"digit" + V"dash" + V"underscore" + V"dot")^1 * V"colon", + +    link_standalone = C(V"uri") +                    / rst_context.link_standalone, + +    anon_reference = Cs(V"anon_phrase_reference" + V"anon_normal_reference") +              / rst_context.anon_reference, + +    anon_normal_reference = C((1 - V"underscore" - V"spacing" - V"eol" - V"punctuation" - V"groupchars")^1) * V"double_underscore", + +    anon_phrase_reference = (V"bareia" - V"double_bareia") +                          * C((1 - V"bareia")^1) +                          * V"bareia" * V"double_underscore" +                          , + +    reference = Cs(V"normal_reference" + V"phrase_reference") +              / rst_context.reference, + +    normal_reference = (1 - V"underscore" - V"spacing" - V"eol" - V"punctuation" - V"groupchars")^1 * V"underscore", + +    phrase_reference = (V"bareia" - V"double_bareia") +                     * C((1 - V"bareia")^1) +                     * V"bareia" * V"underscore" +                     , + +    footnote_reference = V"lbrack" +                       * Cs(V"footnote_label" + V"citation_reference_label") +                       * V"rbrack" +                       * V"underscore" +                       / rst_context.footnote_reference +                       , + +    footnote_label = V"digit"^1 +                   + V"gartenzaun" * V"letter"^1 +                   + V"gartenzaun" +                   + V"asterisk" +                   , + +    citation_reference_label = V"letter" * (1 - V"rbrack")^1, + +    inline_internal_target = V"underscore" +                           * V"bareia" +                           * Cs((1 - V"bareia")^1) +                           * V"bareia" +                           / rst_context.inline_internal_target +                           , + +    substitution_reference = V"bar" +                           * C((1 - V"bar")^1) +                           * V"bar" +                           * C((V"double_underscore" + V"underscore")^-1) +                           / rst_context.substitution_reference +                           , + +-------------------------------------------------------------------------------- +-- Urls +-------------------------------------------------------------------------------- +    uri = V"url_protocol" * V"url_domain" * V"url_path_char"^0, + +    url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://", +    url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation", +    url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, +    url_path_char = R("az", "AZ", "09") + S[[-_.!~*'()/]], +} + +rst_context.inline_parser = inline_parser + +function rst_context.paragraph (data) +    local str +    if not data then +        return "" +    elseif type(data) == "table" then +--        str = #data > 1 and  helpers.string.wrapat(lpegmatch(inline_parser, tableconcat(data, " ")), 65)  +--                        or   inline_parser:match(data[1]) +        if #data > 1 then +            str = helpers.string.wrapat( +                lpegmatch(inline_parser, tableconcat(data, " ")) +                , 65) +        else +            str = lpegmatch(inline_parser, data[1]) +        end +    else +        str = data +    end +    return stringformat([[ + +\\startparagraph +%s +\\stopparagraph +]], str) +end + +local sectionlevels = { +    [1] = "chapter", +    [2] = "section", +    [3] = "subsection", +    [4] = "subsubsection", +    [5] = "subsubsubsection", +} + +local function get_line_pattern (chr) +    return P(chr)^1 * (-P(1)) +end + +function rst_context.section (...)  -- TODO general cleanup; move validity +    local tab = { ... }             -- checking to parser. +    local section, str = true, "" +    local adornchar  +    local ulen = utflen +    if #tab == 3 then -- TODO use unicode length with ConTeXt +        adornchar = tab[1]:sub(1,1) +        section = ulen(tab[1]) >= ulen(tab[2]) +        str = stringstrip(tab[2]) +    else -- no overline +        adornchar = tab[2]:sub(1,1) +        section = ulen(tab[1]) <= ulen(tab[2]) +        str = tab[1] +    end + +    if section then -- determine level +        local level = rst_context.last_section_level +        local rca = rst_context.collected_adornments +        if rca[adornchar] then +            level = rca[adornchar] +        else +            level = level + 1 +            rca[adornchar] = level +            rst_context.last_section_level = level +        end + +        ref = get_context_reference (str) + +        str = stringformat("\n\\\\%s[%s]{%s}\n", sectionlevels[level], ref, str) +    else +        return [[{\\bf fix your sectioning!}\\endgraf}]] +    end + +    return section and str or "" +end + +-- Prime time for the fancybreak module. +function rst_context.transition (str) +    rst_context.addsetups("breaks") +    --return "\\fancybreak\n" +    return "\\fancybreak{$* * *$}\n" +end + +function rst_context.bullet_marker(str) +    return "marker" +end + +-- This one should ignore escaped spaces. +do +    local stripper = P{ +        [1] = "stripper", +        stripper = V"space"^0 * C((V"space"^0 * V"nospace"^1)^0), +        space    = S(" \t\v\n"), +        escaped  = P"\\" * V"space", +        nospace  = V"escaped" + (1 - V"space"), +    } +    function stringstrip(str) +        return lpegmatch(stripper, str) or "" +    end  +end + +local enumeration_types = { +    ["*"] = "*", -- unordered bulleted +    ["+"] = "*", +    ["-"] = "*", +    ["•"] = "*", +    ["‣"] = "*", +    ["⁃"] = "*", + +    ["#"] = "n", -- numbered lists and conversion +    ["A"] = "A", +    ["a"] = "a", +    ["I"] = "R", +    ["i"] = "r", +} + +-- \setupitemize[left=(, right=), margin=4em, stopper=] + +local stripme   = S"()." +local dontstrip = 1 - stripme +local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0 + +local function parse_itemstring(str) +    local offset = nil +    local setup = ",fit][itemalign=flushright," +    if stringmatch(str, "^%(") then +        setup = setup .. [[left=(,]] +    end +    if stringmatch(str, "%)$") then +        setup = setup .. [[right=)]] +    end +    if stringmatch(str, "%.$") then +        setup = setup .. [[stopper={.\\space}]] +    end +    local num = stringmatch(str, "^%d") +    if num then +        -- http://thread.gmane.org/gmane.comp.tex.context/61728/focus=61729 +        setup = setup .. ",start=" .. num +        str = "n" +    end + +    str = lpegmatch(itemstripper, str) +    str = enumeration_types[str] or str +    return { setup = setup, str = str } +end + +function rst_context.startitemize(str) +    local setup = "" +    local result = "" +    str = stringstrip(str) + +    local listtype = enumeration_types[str] or parse_itemstring(str) + +    if type(listtype) == "table" then +        setup = listtype.setup +        listtype = listtype.str +    end + +    result = [[ +\\startitemize[]] .. listtype .. setup .. [[] +]]  +    return result +end + +local last_item = {} -- stack +local current_itemdepth = 0 +function rst_context.stopitemize(str) +    last_item[current_itemdepth] = nil +    current_itemdepth = current_itemdepth - 1 +    return str .. [[ +\\stopitemize +]] +end + +function rst_context.bullet_item (tab) +    local li = last_item +    -- The capture of the first item has the \startitemize as  +    -- *second* element in the array. +    local content  = #tab == 2 and tab[2] or tab[3] +    local startstr = #tab == 3 and tab[2] or nil +    local itemtype = tab[1] +    local result = startstr or "" +    if startstr then +        current_itemdepth = current_itemdepth + 1 +        li[current_itemdepth] = itemtype +    elseif li[current_itemdepth] then +        if helpers.list.successor(itemtype, li[current_itemdepth]) then +            -- just leave it alone +        elseif helpers.list.greater(itemtype, li[current_itemdepth]) then +            local itemnum = tonumber(stringstrip(itemtype)) or helpers.list.get_decimal(itemtype) +            result = result .. stringformat([[ +\\setnumber[itemgroup:itemize]{%s} +]], itemnum) +        end +        li[current_itemdepth] = itemtype +    end + +    return result .. [[ + +\\item ]] .. lpegmatch(inline_parser, content) .. [[ + +]] +end + +-------------------------------------------------------------------------------- +-- Definition lists  +-------------------------------------------------------------------------------- +-- TODO define proper setups (probably bnf-like and some narrower for def-paragraphs) + +function rst_context.deflist (list) +    rst_context.addsetups("deflist") + +    local deflist = [[ +\\startRSTdefinitionlist +]]  +    for nd=1, #list do +        local item = list[nd] +        local term = item[1] +        local nc = 2 +        local tmp = [[ + +  \\RSTdeflistterm{]] .. stringstrip(term) .. "}" +        if #item > 2 then +            while nc < #item do +                tmp = tmp .. [[ + +  \\RSTdeflistclassifier{]] .. stringstrip(item[nc]) .. "}" +                nc = nc + 1 +            end +        end +        tmp = tmp .. [[ + +  \\RSTdeflistdefinition{% +]] +        local final = item[#item] +        for np=1, #final do +            local par = final[np] +            tmp = tmp .. [[ +    \\RSTdeflistparagraph{% +]] .. lpegmatch(inline_parser, par) .. "}\n" +        end +        tmp = tmp .. "  }" +        deflist = deflist .. tmp +    end +    return deflist .. [[ + +\\stopRSTdefinitionlist +]] +end + +-------------------------------------------------------------------------------- +-- Field lists +-------------------------------------------------------------------------------- + +-- TODO Do something useful with field lists. For now I'm not sure what as the +-- bibliography directives from the reST specification seem to make sense only +-- when using docinfo and, after all, we have .bib files that are portable. + +function rst_context.field_list (str) +    rst_context.addsetups("fieldlist") +    return [[ + +\\startRSTfieldlist]] .. str .. [[\\eTABLEbody\\stopRSTfieldlist +]] +end + +function rst_context.field_name (str) +    return [[\\fieldname{]] .. str .. [[}]] +end + +function rst_context.field_body (str) +    return [[\\fieldbody{]] .. lpegmatch(inline_parser, str) .. [[}]] +end + +function rst_context.field (tab) +    local name, body = tab[1], tab[2] +    return stringformat([[ + +    \\RSTfieldname{%s} +    \\RSTfieldbody{%s} +]], name, lpegmatch(inline_parser, body)) +end + +function rst_context.line_comment (str) +    return "% " .. str +end + +function rst_context.block_comment (str) +    return stringformat([[ + +\iffalse %% start block comment +%s\fi %% stop block comment +]], str) +end + +function rst_context.option_list (str) +    return [[ +\\setupTABLE[c][first] [background=color, backgroundcolor=grey, style=\tt] +\\setupTABLE[c][each]  [frame=off] +\\setupTABLE[r][each]  [frame=off] +\\bTABLE[split=yes,option=stretch] +\\bTABLEhead +\\bTR +  \\bTH  Option \\eTH +  \\bTH  Description \\eTH +\\eTR +\\eTABLEhead +\\bTABLEbody +]] .. lpegmatch(inline_parser, str) .. [[ + +\\eTABLEbody +\\eTABLE +]] +end + +function rst_context.option_item (tab) +    return stringformat([[\\bTR\\bTC %s \\eTC\\bTC %s \\eTC\\eTR +]], tab[1], tab[2]) +end + +function rst_context.test(str) +    return ":" +end + +function rst_context.literal_block (str, included) +    local indent = P" "^1 +    local stripme = #str +    for line in stringgmatch(str, "[^\n]+") do +        -- setting to the lowest indend of all lines +        local idt = lpegmatch(indent, line) +        if line and idt then +            stripme = idt < stripme and idt or stripme +        end +    end + +    local strip = P{ +        [1] = "strip", +        strip = Cs(V"line"^1), +        eol = P"\n", +        restofline = (1 - V"eol")^0, +        stop = Cs(V"eol" * P" "^0) * -P(1) / "", -- remove trailing blank lines +        line = Cs(V"restofline" * (V"stop" + V"eol")) / function (line) +            return #line > stripme and line:sub(stripme) or line +        end, +    } + +    str = lpegmatch(strip, str) +    str = [[ + +\starttyping[lines=hyphenated] +]] .. str .. [[ + +\stoptyping +]] +    if included then -- escaping can ruin your day +        str = str:gsub("\\", "\\\\") +    end +    return str +end + +function rst_context.included_literal_block (str) +    return rst_context.literal_block(str, true) +end + +function rst_context.line_block (str) +    rst_context.addsetups("lines") +    return [[ + +\\startlines +]] .. lpegmatch(inline_parser, str) .. [[\\stoplines +]] +end + +function rst_context.line_block_line(str) +    str = str:gsub("\n", " ") +    return str .. "\n" +end + +function rst_context.line_block_empty() +    return "\n" +end + +function rst_context.block_quote (tab) +    rst_context.addsetups("blockquote") +    local str = [[ +\\startlinecorrection +\\blank[small] +\\startblockquote +]] .. lpegmatch(inline_parser, tab[1]) .. [[ + +\\stopblockquote +]] + +    return tab[2] and str .. [[ +\\blank[small] +\\startattribution +]] .. lpegmatch(inline_parser, tab[2]) .. [[ +\\stopattribution +\\blank[small] +\\stoplinecorrection +]]  or str .. [[ +\\blank[small] +\\stoplinecorrection +]]  +end + +--function rst_context.table (str) +    --return [[ +--\\startlinecorrection +--]] .. str .. [[ + +--\\stoplinecorrection +--]] +--end + +function rst_context.grid_table (tab) +    local body = "" +    local nr = 1 +    local head +    if tab.has_head then +        head = [[ +\\setupTABLE[c][each]  [frame=off] +\\setupTABLE[r][each]  [frame=off] +%\\startlinecorrection +\\bTABLE[split=repeat,option=stretch] +\\bTABLEhead +]] +        while nr <= tab.head_end do +            local r = tab.rows[nr] +            local isempty = true +            for n=1, #r do +                local cell = r[n] +                if cell.variant == "normal" then +                    isempty = false +                    break +                end +            end + +            if not isempty then +                local row = [[\\bTR]] +                for n=1, #r do +                    local c = r[n] +                    if not (c.parent or +                            c.variant == "separator") then +                        local celltext = lpegmatch(inline_parser, c.stripped) +                        if c.span.x or c.span.y then +                            local span_exp = "[" +                            if c.span.x then +                                span_exp = span_exp .. "nc=" .. c.span.x .. "," +                            end +                            if c.span.y then +                                span_exp = span_exp .. "nr=" .. c.span.y +                            end +                            celltext  = span_exp .. "] " .. celltext + +                        end + +                        row = row .. "\n  " .. [[\\bTH ]] .. celltext .. [[\\eTH]] +                    end +                end +                head = head .. row .. "\n" .. [[\\eTR]] .. "\n" +            end +            nr = nr + 1 +        end +        head = head .. [[ +\\eTABLEhead +\\bTABLEbody +]]  +    else +        head = [[ +\\setupTABLE[c][each]  [frame=off] +\\setupTABLE[r][each]  [frame=off] +%\\startlinecorrection +\\bTABLE[split=repeat,option=stretch] +\\bTABLEbody +]]  +    end +    while nr <= #tab.rows do +        local r = tab.rows[nr] +        local isempty = true +        for n=1, #r do +            local cell = r[n] +            if cell.variant == "normal" then +                isempty = false +                break +            end +        end + +        if not isempty then +            local row = [[\\bTR]] +            for n=1, #r do +                local c = r[n] +                if not (c.parent or +                        c.variant == "separator") then +                    local celltext = lpegmatch(inline_parser, c.stripped) +                    if c.span.x or c.span.y then +                        local span_exp = "[" +                        if c.span.x then +                            span_exp = span_exp .. "nc=" .. c.span.x .. "," +                        end +                        if c.span.y then +                            span_exp = span_exp .. "nr=" .. c.span.y +                        end +                        celltext  = span_exp .. "] " .. celltext + +                    end + +                    row = row .. "\n  " .. [[\\bTC ]] .. celltext .. [[\\eTC]] +                end +            end +            body = body .. row .. "\n" .. [[\\eTR]] .. "\n" +        end +        nr = nr + 1 +    end +    local tail = [[ +\\eTABLEbody +\\eTABLE +%\\stoplinecorrection +]] +    return head .. body .. tail +end + + +function rst_context.simple_table(tab) +    local head +    local nr = 1 +    if tab.head_end then +        head = [[ +\\setupTABLE[c][each]  [frame=off] +\\setupTABLE[r][each]  [frame=off] +%\\startlinecorrection +\\bTABLE[split=yes,option=stretch] +\\bTABLEhead +]] +        while nr <= tab.head_end do +            local row = tab[nr] +            if not row.ignore then +                dbg_write(">hr>" .. #row) +                head = head .. [[\\bTR]] +                for nc=1, #row do +                    local cell = row[nc] +                    dbg_write("%7s | ", cell.content) +                    local celltext = lpegmatch(inline_parser, cell.content) +                    if cell.span then +                        head = head .. stringformat([=[\\bTH[nc=%s]%s\\eTH]=], cell.span.x, celltext or "") +                    else +                        head = head .. [[\\bTH ]] .. celltext .. [[\\eTH]] +                    end +                end +                dbg_write("\n") +                head = head .. "\\\\eTR\n" +            end +            nr = nr + 1 +        end + +        head = head .. [[ +\\eTABLEhead +\\bTABLEbody +]]  +    else +        head = [[ +\\setupTABLE[c][each]  [frame=off] +\\setupTABLE[r][each]  [frame=off] +%\\startlinecorrection +\\bTABLE[split=yes,option=stretch] +\\bTABLEbody +]]  +    end +    local tail = [[ +\\eTABLEbody +\\eTABLE +%\\stoplinecorrection +]] +    local body = "" +    while nr <= #tab do +        local row = tab[nr] +        if not row.ignore then +            dbg_write(">tr>" .. #row) +            body = body .. [[\\bTR]] +            for nc=1, #row do +                local cell = row[nc] +                dbg_write("%7s | ", cell.content) +                local celltext = lpegmatch(inline_parser, cell.content) +                if cell.span then +                    body = body .. stringformat([=[\\bTC[nc=%s]%s\\eTC]=], cell.span.x, celltext or "") +                else +                    body = body .. [[\\bTC ]] .. celltext .. [[\\eTC]] +                end +            end +            dbg_write("\n") +            body = body .. "\\\\eTR\n" +        end +        nr = nr + 1 +    end +    return head .. body .. tail +end + +function rst_context.footnote (label, content) +    local tf = rst_context.state.footnotes +    rst_context.addsetups("footnotes") +    if stringmatch(label, "^%d+$") then -- all digits +        tf.numbered[tonumber(label)] = +            rst_context.escape(lpegmatch(inline_parser, content)) +    elseif label == "#" then --autonumber +        repeat -- until next unrequested number +            tf.autonumber = tf.autonumber + 1 +        until tf.numbered[tf.autonumber] == nil +        tf.numbered[tf.autonumber] = +            rst_context.escape(lpegmatch(inline_parser, content)) +    elseif stringmatch(label, "^#.+$") then +        local thelabel = stringmatch(label, "^#(.+)$") +        tf.autolabel[thelabel] = +            rst_context.escape(lpegmatch(inline_parser, content)) +    elseif label == "*" then +        rst_context.addsetups("footnote_symbol") +        tf.symbol[#tf.symbol+1] = +            rst_context.escape(lpegmatch(inline_parser, content)) +    else -- “citation reference” treated like ordinary footnote +        repeat -- until next unrequested number +            tf.autonumber = tf.autonumber + 1 +        until tf.numbered[tf.autonumber] == nil +        tf.numbered[tf.autonumber] = +            rst_context.escape(lpegmatch(inline_parser, content)) +    end +    return "" +end + +--- hack to differentiate inline images +local special_substitutions = { +    image = "inline_image", +} + +function rst_context.substitution_definition (subtext, directive, data) +    local special = special_substitutions[directive] +    if special then +        --- override; pass data directly +        directive = special +    else +        local tmp +        if data.first ~= "" then +            tmp = { data.first } +        else +            tmp = { } +        end +        data.first = nil +        for i=1, #data do -- paragraphs +            local current = tableconcat(data[i], "\n") +            --current = lpegmatch(inline_parser, current) +            --current = rst_context.escape(current) +            tmp[#tmp+1] = current +        end +        data = tableconcat(tmp, "\n\n") +        data = stringstrip(data) +    end +    subtext = stringgsub(subtext, "%s", "") +    rst_context.substitutions[subtext] = { directive = directive, +                                           data      = data } +    return "" +end + +-- not to be confused with the directive definition table rst_directives +function rst_context.directive(directive, data) +    local fun = rst_directives[directive] +    if fun then +        rst_context.addsetups("directive") +        local result = "" +        result = fun(data) +        return result +    end +    return "" +end + +-- vim:ft=lua:sw=4:ts=4:expandtab diff --git a/src/rst_directives.lua b/src/rst_directives.lua new file mode 100644 index 0000000..f5572b7 --- /dev/null +++ b/src/rst_directives.lua @@ -0,0 +1,381 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +--         FILE:  rst_directives.lua +--        USAGE:  called by rst_parser.lua +--  DESCRIPTION:  Complement to the reStructuredText parser +--       AUTHOR:  Philipp Gesang (Phg), <phg42.2a@gmail.com> +--      CHANGED:  2013-06-03 18:52:35+0200 +-------------------------------------------------------------------------------- +-- + +local helpers = helpers or thirddata and thirddata.rst_helpers + +-------------------------------------------------------------------------------- +-- Directives for use with |substitutions| +-------------------------------------------------------------------------------- + +local rst_directives     = { } +thirddata.rst_directives = rst_directives +local rst_context        = thirddata.rst + +local lpegmatch      = lpeg.match +local stringformat   = string.format +local stringstrip    = string.strip +local tableconcat    = table.concat +local tableflattened = table.flattened +local type           = type + +--rst_directives.anonymous     = 0 +rst_directives.images        = {} +rst_directives.images.done   = {} +rst_directives.images.values = {} + + +rst_directives.images.keys = { +    ["width"]   = "width", +    ["size"]    = "width", +    ["caption"] = "caption", +    ["alt"]     = "caption", +    ["scale"]   = "scale", +} + +rst_directives.images.values.scale = function (orig) +    -- http://wiki.contextgarden.net/Reference/en/useexternalfigure +    -- scale=1000 is original size; to get 72%, use scale=720. +    return tonumber(orig) * 1000 +end + +rst_directives.images.values.width = { +    ["fit"]    = "\\hsize", +    ["hsize"]  = "\\hsize", +    ["broad"]  = "\\hsize", +    ["normal"] = "local", +    ["normal"] = "local", +} + +-- we won't allow passing arbitrary setups to context +local permitted_setups = { +    "width", +    "scale" +} + +local function img_setup (properties) +    local result = "" +    for _, prop in next, permitted_setups do +        if properties[prop] then +            result = result .. prop .. "=" .. properties[prop] .. "," +        end +    end +    if result ~= "" then +        result = "[" .. result .. "]" +    end +    return result +end + +local collect_image_properties = function (data) +    local image_directives  = rst_directives.images +    local p_keyval          = helpers.patterns.colon_keyval +    local properties        = { } + +    data = tableflattened(data) +    for i=1, #data do +        local str = stringstrip(data[i]) +        local key, val = lpegmatch(p_keyval, str) +        if key and val then +            key = image_directives.keys[key] -- sanitize key expression +            local valtype = type(image_directives.values[key]) +            if valtype == "table" then +                val = image_directives.values[key][val] +            elseif valtype == "function" then +                val = image_directives.values[key](val) +            end +            properties[key] = val +        end +    end +    return properties +end + +--- ordinary image directives are converted to floats + +local float_image = function (data) +    rst_context.addsetups "image" +    local inline_parser = rst_context.inline_parser +    local properties +    local anon          = false +    local rdi           = rst_directives.images +    local hp            = helpers.patterns +    local caption       = "" +    local name          = "" + +    if data.name then +        name = stringstrip(data.name) +        data.name   = nil +    else +        if next(data[1]) then +            name = data[1][1] +        end +    end + +    --rd.anonymous = rd.anonymous + 1 +    --anon = true -- indicates a nameless picture +    --name = "anonymous" .. rd.anonymous + +    properties = collect_image_properties(data) + +    if properties.caption then +        caption = lpegmatch(inline_parser, properties.caption) +        caption = rst_context.escape(caption) +    end + +    properties.setup = img_setup(properties) or "" +    local img = "" +--    local images_done = rdi.done +--    if not anon then -- TODO: implement? +--        if not images_done[name] then +--            img = img .. stringformat([[ +-- +--\useexternalfigure[%s][%s][]%% +--]], name, data) +--        images_done[name] = true +--        end +--        img = img .. stringformat([[ +--\def\RSTsubstitution%s{%% +--  \placefigure[here]{%s}{\externalfigure[%s]%s}%% +--} +--]], name, rst_context.escape(lpegmatch(inline_parser, properties.caption)), name, properties.setup) +--    else -- image won't be referenced but used instantly +    img = stringformat( +        "\n\\placefigure[here]{%s}{\\externalfigure[%s]%s}", +        caption, +        name, +        properties.setup) +--    end +    return img +end + +--- inline substitutions are converted to bare external figures +local inline_image = function (name, data) +    rst_context.addsetups "image" +    local filename  = data.first +    local p_keyval  = helpers.patterns.colon_keyval +    local properties + +    if not filename then --- garbage, ignore +        return "" +    end +    data.first = nil +    filename   = stringstrip(filename) +    properties = collect_image_properties(data) + +    local scheme  = "\n\\def\\RSTsubstitution%s{\n  \\externalfigure[%s]%s%%\n}\n" +    local options = "" +    if next(properties) then +        local tmp = { } +        tmp[#tmp+1] = "[" +        for key, value in next, properties do +            tmp[#tmp+1] = key +            tmp[#tmp+1] = "={" +            tmp[#tmp+1] = rst_context.escape(value) +            tmp[#tmp+1] = "}," +        end +        tmp[#tmp+1] = "]" +        options = tableconcat(tmp) +    end +    return stringformat(scheme, name, filename, options) +end + +rst_directives.image        = float_image +rst_directives.inline_image = inline_image + +rst_directives.caution = function(data) +    local inline_parser = rst_context.inline_parser +    rst_context.addsetups("dbend") +    rst_context.addsetups("caution") +    local text = { } +    for i=1, #data do -- paragraphs +        local current = tableconcat(data[i], "\n") +        current = lpegmatch(inline_parser, current) +        current = rst_context.escape(current) +        text[i] = current +    end +    return stringformat([[ +\startRSTcaution +%s +\stopRSTcaution +]], tableconcat(text, "\n\n")) +end + +rst_directives.danger = function(data) +    local inline_parser = rst_context.inline_parser +    rst_context.addsetups("dbend") +    rst_context.addsetups("danger") +    local text = { } +    for i=1, #data do -- paragraphs +        local current = tableconcat(data[i], "\n") +        current = lpegmatch(inline_parser, current) +        current = rst_context.escape(current) +        text[i] = current +    end +    return stringformat([[ +\startRSTdanger +%s +\stopRSTdanger +]], tableconcat(text, "\n\n")) +end + +-- http://docutils.sourceforge.net/docs/ref/rst/directives.html +rst_directives.DANGER = function(data) +    local inline_parser = rst_context.inline_parser +    local text = { } +    for i=1, #data do -- paragraphs +        local current = tableconcat(data[i], "\n") +        current = lpegmatch(inline_parser, current) +        current = rst_context.escape(current) +        text[i] = current +    end +    return stringformat([[ + +%% The Rabbit of Caerbannog +\startlinecorrection +\blank[force,big] +\framed[frame=on, +        corner=round, +        rulethickness=5pt, +        align=middle, +        width=\hsize, +        frameoffset=.5em, +        backgroundoffset=1em, +        background=color, +        backgroundcolor=red, +        foreground=color, +        foregroundcolor=black]{%% +  \language[en-gb]\tfb\bf +  Follow only if ye be men of valour, for the entrance to this cave is guarded +  by a creature so foul, so cruel that no man yet has fought with it and lived. +  Bones of full fifty men lie strewn about its lair. So, brave knights, if you +  do doubt your courage or your strength, come no further, for death awaits you +  all with nasty, big, pointy teeth.%% +  \blank[force,big] +  %s%% +} +\blank[force,big] +\stoplinecorrection +]], tableconcat(text, "\n\n")) +end + +rst_directives.mp = function(name, data) +    local mpcode = stringformat([[ +\startreusableMPgraphic{%s} +%s +\stopreusableMPgraphic +]], name, data) +    mpcode = mpcode .. stringformat([[ +\def\RSTsubstitution%s{%% +  \reuseMPgraphic{%s}%% +} +]], name, name) +    return mpcode +end + +--- There’s an issue with buffers leaving trailing spaces due to their +--- implementation. +--- http://archive.contextgarden.net/message/20111108.175913.1d994624.en.html +rst_directives.ctx = function(name, data) +    local ctx = stringformat([[ + +\startbuffer[%s] +%s\stopbuffer +\def\RSTsubstitution%s{%% +  \getbuffer[%s]\removeunwantedspaces%% +} +]], name, data, name, name) +    return ctx +end + +rst_directives.lua = function(name, data) +    local luacode = stringformat([[ + +\startbuffer[%s] +\startluacode +%s +\stopluacode +\stopbuffer +\def\RSTsubstitution%s{%% +  \getbuffer[%s]\removeunwantedspaces%% +} +]], name, data, name, name) +    return luacode +end + +-------------------------------------------------------------------------------- +--- Experimental math directive +-------------------------------------------------------------------------------- + +rst_directives.math = function (name, data) +    data = data or name +    local formula +    if type(data) == "table" then +        local last, i = #data, 1 +        while i <= last do +            local line = stringstrip(data[i]) +            if line and line ~= "" then +                formula = formula and formula .. " " .. line or line +            end +            i = i + 1 +        end +    end +    return stringformat([[ +\startformula +%s +\stopformula +]], formula) +end + +-------------------------------------------------------------------------------- +--- End math directive +-------------------------------------------------------------------------------- + +rst_directives.replace = function(name, data) +    return stringformat([[ + +\def\RSTsubstitution%s{%s} +]], name, data) +end + +-------------------------------------------------------------------------------- +--- Containers. +-------------------------------------------------------------------------------- + +--- *data*: +---     { [1]  -> directive name, +---       [>1] -> paragraphs } + +rst_directives.container = function(data) +    local inline_parser = rst_context.inline_parser +    local tmp = { } +    for i=1, #data do -- paragraphs +        local current = tableconcat(data[i], "\n") +        current = lpegmatch(inline_parser, current) +        current = rst_context.escape(current) +        tmp[i] = current +    end +    local content = tableconcat(tmp, "\n\n") +    local name = data.name +    if name and name ~= "" then +        name = stringstrip(data.name) +        return stringformat([[ +\start[%s]%% +%s%% +\stop +]], name, content) +    else +        return stringformat([[ +\begingroup%% +%s%% +\endgroup +]], content) +    end +end + +-- vim:ft=lua:sw=4:ts=4:expandtab + diff --git a/src/rst_helpers.lua b/src/rst_helpers.lua new file mode 100644 index 0000000..97d4dd7 --- /dev/null +++ b/src/rst_helpers.lua @@ -0,0 +1,657 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +--         FILE:  rst_helpers.lua +--        USAGE:  called by rst_parser.lua +--  DESCRIPTION:  Complement to the reStructuredText parser +--       AUTHOR:  Philipp Gesang (Phg), <phg42.2a@gmail.com> +--      CHANGED:  2013-03-26 23:55:04+0100 +-------------------------------------------------------------------------------- +-- + +local P, R, S, V, lpegmatch +    = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match + +local C,   Carg, Cb, Cc, Cg, +      Cmt, Cp,   Cs, Ct  +    = lpeg.C,   lpeg.Carg, lpeg.Cb, lpeg.Cc, lpeg.Cg, +      lpeg.Cmt, lpeg.Cp,   lpeg.Cs, lpeg.Ct + +local helpers +helpers       = thirddata.rst_helpers +helpers.table = {} +helpers.cell  = {} + +local utf    = unicode.utf8 +local utflen = utf.len + +local stringstrip  = string.strip +local stringformat = string.format + +function helpers.dbg_writef(...) +    if helpers.rst_debug then +        io.write(stringformat(...)) +    end +end + +local dbg_write = helpers.dbg_writef + +helpers.patterns  = {} + +do +    local p = helpers.patterns +    p.dash   = P"-" +    p.equals = P"=" +    p.plus   = P"+" +    p.bar    = P"|" +    p.eol    = P"\n" +    p.last   = -P(1) +    p.space  = P" " + +    p.dash_or_equals = p.dash + p.equals + +    p.celldelim   = p.bar + p.plus +    p.cellcontent = (1 - p.celldelim) +    p.cell        = p.celldelim * C((1 - p.celldelim)^1) * #p.celldelim +    p.cell_line   = p.plus * p.dash^1 * #p.plus +    p.dashesonly  = p.dash^1  * p.last +    p.spacesonly  = p.space^1 * p.last + +    p.col_start = Cp() * p.dash_or_equals^1 +    p.col_stop  = p.dash_or_equals^1 * Cp() +    p.column_starts = Ct(p.col_start * ( p.space^1 * p.col_start)^1) +    p.column_stops  = Ct(p.col_stop  * ( p.space^1 * p.col_stop)^1) + +    p.st_headsep = p.equals^1 * (p.space^1 * p.equals^1)^1 +    p.st_colspan = p.dash^1 * (p.space^1 * p.dash^1)^0 * p.space^0 * p.last +    p.st_span_starts = Ct(Cp() * p.dash^1 * (p.space^1 * Cp() * p.dash^1)^0) +    p.st_span_stops  = Ct(p.dash^1 * Cp() * (p.space^1 * p.dash^1 * Cp())^0) + + +    p.cells = P{ +        [1] = "cells", +        cells = p.celldelim  +              * (C(V"in_cell") +               * (V"matchwidth" * C(V"in_cell"))^1), + +        in_cell = p.cellcontent^1 +                + (p.dash - p.cellcontent)^1, + +        matchwidth = Cmt(C(p.celldelim) * Carg(1), function(s,i,del, layout) +                         local pos = 1 +                         local lw  = layout.widths +                         for n=1, #lw do +                             pos = pos + lw[n] + 1 +                             if (i - 1) == pos then return true end +                         end +                         return false +                     end), +    } + +    p.sep_line = p.plus * (p.dash^1   * p.plus)^1 * p.last +    p.sep_head = p.plus * (p.equals^1 * p.plus)^1 * p.last + +    p.sep_part = ((1 - p.cell_line)^0 * p.cell_line) - p.sep_line + +    p.new_row = p.sep_line + p.sep_head + p.sep_part + +    p.whitespace = S" \t\v\r\n"^1 +    p.strip = p.whitespace^0 * C((1 - (p.whitespace * p.last))^1) * p.whitespace^0 * p.last + + +    local colon = P":" +    local escaped_colon = P"\\:" +    local nocolon = (escaped_colon + (1 - colon))^1 +    p.colon_right = nocolon * colon +    p.colon_keyval = colon^-1 * C(nocolon) * colon * p.space^1 * C((1 - (p.space^0 * P(-1)))^1) + +    -- color expression matching for text roles +    local digit = R"09" +    local dot   = P"." +    local colvalue = digit * dot * digit^1 +                   + digit +                   + dot * digit^1 +    local coldelim = P"_" + P"-" +    p.rgbvalues = P"rgb_" +                * Ct( C(colvalue) * coldelim * C(colvalue) * coldelim * C(colvalue) ) +end + +function helpers.cell.create(raw, n_row, n_col, parent, variant) +    local p = helpers.patterns +    local cell = {} +    cell.stripped = raw and p.strip:match(raw) or "" +    cell.content  = raw +    cell.width    = raw and utflen(raw) or 0 +    cell.bytes    = raw and #raw or 0 +    cell.variant  = "normal" -- [normal|separator|y_continue|x_continue] +    cell.pos      = {} +    cell.pos.x    = n_col +    cell.pos.y    = n_row +    cell.span     = {} +    cell.span.x   = 1 +    cell.span.y   = 1 +    cell.parent   = parent +    return cell +end + +function helpers.cell.get_x_span(content, layout, init) +    local acc = 0 +    local lw = layout.widths +    for n=init, #lw do +        acc = acc + lw[n] + 1 +        if utflen(content) + 1 == acc then  +            return n - init +        end +    end +    return false +end + + +-- Extending a cell by 1 cell horizontally. +function helpers.cell.add_x (cell) +    cell.span.x = cell.span.x + 1 +end + + +local function set_layout (line) +    local p = helpers.patterns +    local layout = {} +    local slice = Ct((p.plus * C(p.dash^1) * #p.plus)^1) + +    layout.widths = {} +    layout.slices = {} +    local elms = lpegmatch(slice, line) +    for n=1, #elms do +        local elm = elms[n] +        layout.widths[n] = #elm +        layout.slices[n] =  elm +    end +    return layout +end + +function helpers.table.create(raw) +    local newtab = {} +    newtab.rows  = {} +    newtab.layout = set_layout(raw[1]) + +    local p = helpers.patterns + +    newtab.resolve_parent = function(row, col, array) +        local array = array or newtab.rows +        local cell  = array[row][col] +        local par_row, par_col = row, col +        if cell.parent then +            par_row, par_col = newtab.resolve_parent(cell.parent.y, cell.parent.x) +        end +        return par_row, par_col +    end + +    newtab.__init = function() +        local hc = helpers.cell +        local rowcount = 0 +        local newtablayout = newtab.layout +        for nr=1, #raw do +            local row = raw[nr] +            newtab.rows[nr] = {} +            local this_row = newtab.rows[nr] +            this_row.sepline = p.sep_line:match(row) +            this_row.sephead = p.sep_head:match(row) +            this_row.seppart = p.sep_part:match(row) +            if this_row.sephead then +                newtab.has_head = true +                newtab.head_end = nr +            end + +            local splitted = { p.cells:match(row, 1, newtablayout) } +            local pos_layout, pos_row = 1, 1 +            local make_empty = {} +            make_empty.n, make_empty.parent = 0, nil + +            while pos_layout <= #newtablayout.widths do +                local splitpos = splitted[pos_layout] +                local layoutwidth = newtablayout.widths[pos_layout] +                local span = 1 +                local this + +                if make_empty.n > 0 then +                    make_empty.n = make_empty.n - 1 +                    this = hc.create("", nr, pos_layout, make_empty.parent) +                    this.parent  = make_empty.parent +                    p_row, p_col = newtab.resolve_parent(this.parent.y, this.parent.x) +                    local thisparent = newtab.rows[p_row][p_col] +                    if this_row.sepline or this_row.sephead or +                        newtab.rows[p_row][p_col].variant == "separator" then +                        this.variant = "separator" +                    else +                        this.variant = "empty1" +                    end +                else +                    local cellwidth = utflen(splitpos) +                    if cellwidth > layoutwidth then +                        span = span + hc.get_x_span(splitpos, newtablayout, pos_layout) +                    end +                    pos_row = pos_row + span +                    this = hc.create(splitpos, nr, pos_layout, nil) +                    if p.dashesonly:match(splitpos) or +                        this_row.sepline or this_row.sephead then +                        this.variant = "separator" +                    end +                    this.span.x = span +                    make_empty.n = span - 1 +                    make_empty.parent = span > 1 and { y = nr, x = pos_layout } or nil +                end + +                this_row[pos_layout] = this +                pos_layout = pos_layout + 1 +            end -- while +        end -- for loop over rows + +        local oldrows = newtab.rows +        local newrows = oldrows +        for nc=1, #newtablayout.widths do +            local width = newtablayout.widths[nc] +            -- this is gonna be extremely slow but at least it's readable +            local newrow +            local currentrow = 1 +            for nr=1, #newrows do +                local row = newrows[nr] +                local cell = row[nc] +                dbg_write("nc: %s, nr:%2s | %9s | ", nc, nr,cell.variant) +                if  row.sepline or row.sephead +                    or p.dashesonly:match(cell.content) +                    or cell.variant == "separator" then -- separator; skipping and beginning new row +                    newrows[nr][nc] = cell +                    currentrow = currentrow + 1 +                    newrow = true +                    dbg_write("new >%24s< ", cell.stripped) +                    if cell.parent then dbg_write("parent |") else dbg_write("no par |") end +                else +                    dbg_write("old >%24s< ", cell.stripped) +                    if cell.parent then dbg_write("parent |") else dbg_write("no par |") end +                    if newrow then +                        newrows[nr][nc] = cell +                        currentrow = currentrow + 1 +                    else -- continuing parent + +                        local par_row, par_col +                        local parent +                        if cell.parent then +                            par_row, par_col = newtab.resolve_parent(cell.parent.y, cell.parent.x, newrows) +                            dbg_write(" use %s,%2s | ", par_col, par_row) +                        else -- Using vertical predecessor. +                            par_row, par_col = newtab.resolve_parent(nr-1,nc, newrows) +                            dbg_write(" new %s,%2s | ", par_col, par_row) +                        end +                        parent = newrows[par_row][par_col] + +                        if newrows[nr].seppart then +                            dbg_write("span++") +                            parent.span.y   = parent.span.y + 1 +                        end + +                            parent.content  = parent.content  .. cell.content +                            parent.stripped = parent.stripped .. " " .. cell.stripped +                            cell.variant = "empty2" +                        cell.parent  = { x = par_col, y = par_row } +                    end +                    newrow = false +                end +                dbg_write("\n") +                newrows[nr][nc] = cell +            end -- for loop over rows +        end -- for loop over columns +        --newtab.rows = oldrows +        newtab.rows = newrows +    end + +    newtab.__init() + +--[[ +    newtab.__draw_debug = function() +        for nr=1, #newtab.rows do +            local row = newtab.rows[nr] +            for nc=1, #row do +                local cell = row[nc] +                local field = cell.variant:sub(1,7) +                if cell.parent then +                    field = field .. string.format(" %s,%2s",cell.parent.x, cell.parent.y) +                end +                dbg_write("%12s | ", field) +            end +            dbg_write("\n") +        end +    end +--]] + +    return newtab +end + + + +function helpers.table.resolve_parent (row, col, array) +    local cell = array[row][col] +    local par_row, par_col = row, col +    if cell.parent then +        par_row, par_col = self.resolve_parent(cell.parent.y, cell.parent.x) +    end +    return par_row, par_col +end + + +-- Check the column boundaries of a simple table. +function helpers.get_st_boundaries (str) +    local p_column_starts = helpers.patterns.column_starts +    local p_column_stops  = helpers.patterns.column_stops +    local starts, stops, slices, elms = { }, { }, { }, nil + +    elms = lpegmatch(p_column_starts, str) +    for n=1, #elms do +        local elm = elms[n] +        slices[n] = { start = elm } +        starts[elm] = true +    end + +    elms = lpegmatch(p_column_stops, str) +    for n=1, #elms do +        local elm = elms[n] +        slices[n]["stop"]  = elm +        stops[elm] = true +    end +    return { starts = starts, stops = stops, slices = slices } +end + +function helpers.table.simple(raw) +    local rows = {} +    local multispans = {} +    local bounds = helpers.get_st_boundaries(raw[1]) +    local p = helpers.patterns + +    for nr=1, #raw do +        local row = raw[nr] +        local newrow = {} +        if not p.st_headsep:match(row) and +           not p.st_colspan:match(row) then +            local starts, stops = {}, {} +            local check_span = false +            if p.st_colspan:match(raw[nr+1]) then  -- expect spans over several columns +                starts = p.st_span_starts:match(raw[nr+1]) +                stops  = p.st_span_stops :match(raw[nr+1]) +                check_span = true +            else +                for ncol=1, #bounds.slices do +                    local slice = bounds.slices[ncol] +                    starts[ncol] = slice.start +                    stops [ncol] = slice.stop +                end +            end + +            for nc=1, #starts do +                local start = starts[nc] +                -- last column can exceed layout width +                local stop = nc ~= #starts and stops[nc] or #row +                local cell = { +                    content = "", +                    span   = { x = 1, y = 1 }, +                } +                cell.content = stringstrip(row:sub(start, stop)) +                if check_span then +                    local start_at, stop_at +                    for ncol=1, #bounds.slices do +                        local slice = bounds.slices[ncol] +                        if slice.start == start then +                            start_at = ncol +                        end +                        if start_at and +                           not (ncol == #bounds.slices) then +                            if slice.stop == stop then +                                stop_at = ncol +                                break +                            end +                        else -- last column, width doesn't matter +                            stop_at = ncol +                        end +                    end +                    cell.span.x = 1 + stop_at - start_at +                end +                newrow[nc] = cell +            end +        elseif p.st_colspan:match(row) then +            newrow.ignore = true +        elseif not rows.head_end    and +                nr > 1 and #raw > nr then -- ends the header +            rows.head_end = nr +            newrow.head_sep = true +            newrow.ignore = true +        else +            newrow.ignore = true +        end +        rows[nr] = newrow +    end + +    for nr=1, #rows do +        local row = rows[nr] +        if not row.ignore and row[1].content == "" then +            row.ignore = true +            for nc=1, #row do +                local cell = row[nc] +                local par_row, par_col = helpers.table.resolve_parent(nr - 1, nc, rows) +                parent = rows[par_row][par_col] +                parent.content = parent.content .. " " .. cell.content +                cell.content = "" +            end + +        end +    end + +    return rows +end + +helpers.list = {} + +do +    local c = {} +    c.roman = S"ivxlcdm"^1 +    c.Roman = S"IVXLCDM"^1 +    c.alpha = R"az" - P"i" - P"v" - P"x" - P"l" +    c.Alpha = R"AZ" - P"I" - P"V" - P"X" - P"L" +    c.digit = R"09"^1 +    c.auto  = P"#" + +    local stripme   = S" ()." +    local dontstrip = 1 - stripme +    local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0 + +    local con = function (str) +        str = itemstripper:match(str) +        for conv, pat in next, c do +            if pat:match(str) then +                return conv +            end +        end +        return false +    end +    helpers.list.conversion = con + +    local rnums = { +        i = 1, +        v = 5, +        x = 10, +        l = 50, +        c = 100, +        d = 500, +        m = 1000, +    } + +    local function roman_to_arab (str) +        local n = 1 +        local curr, succ +        local max_three = { } +        local value = 0 +        while n <= #str do +            if curr and curr == max_three[#max_three] then +                if #max_three >= 3 then +                    return "Not a number" +                else +                    max_three[#max_three+1] = curr +                end      +            else     +                max_three = { curr } +            end      + +            curr = rnums[str:sub(n,n)] or 1 + +            n = n + 1 +            succ = str:sub(n,n) + +            if succ and succ ~= "" then +                succ = rnums[succ] +                if curr < succ then +                    --n = n + 1 +                    --value = value + succ - curr +                    value = value  - curr +                else     +                    value = value + curr +                end      +            else     +                value = value + curr +            end      +        end      +        return value +    end +    helpers.list.roman_to_arab = roman_to_arab + +    local suc = function (str, old) +        str, old = itemstripper:match(str), itemstripper:match(old) +        local n_str, n_old = tonumber(str), tonumber(old) +        if n_str and n_old then -- arabic numeral +            return n_str == n_old + 1 +        end + +        local con_str, con_old = con(str), con(old) +        if con_str == "alpha"  or +           con_str == "Alpha" then +            return str:byte() == old:byte() + 1 +        else -- “I'm a Roman!” - “A woman?” - “No, *Roman*! - Au!” - “So your father was a woman?” +            if not (str:lower() == str  or +                    str:upper() == str) then -- uneven cased --> fail +                return false +            end + +            local trc = thirddata.rst.state.roman_cache +            n_str = trc[str] or nil +            n_old = trc[old] or nil +            if not n_str then +                n_str = roman_to_arab(str:lower()) +                trc[str] = n_str +            end +            if not n_old then +                n_old = roman_to_arab(old:lower()) +                trc[old] = n_old +            end +            return n_str == n_old + 1  +        end +    end +    helpers.list.successor = suc + +    local greater = function (str, old) +        str, old = itemstripper:match(str), itemstripper:match(old) +        local n_str, n_old = tonumber(str), tonumber(old) +        if n_str and n_old then -- arabic numeral +            return n_str > n_old +        end + +        local con_str, con_old = con(str), con(old) +        if con_str == "alpha"  or +           con_str == "Alpha" then +            return str:byte() > old:byte() +        else +            if not (str:lower() == str  or +                    str:upper() == str) then -- uneven cased --> fail +                return false +            end + + +            local trc = thirddata.rst.state.roman_cache +            n_str = trc[str] or nil +            n_old = trc[old] or nil +            if not n_str then +                n_str = roman_to_arab(str:lower()) +                trc[str] = n_str +            end +            if not n_old then +                n_old = roman_to_arab(old:lower()) +                trc[old] = n_old +            end +            return n_str > n_old +        end +    end +    helpers.list.greater = greater + +    local gd = function(str) +        str = itemstripper:match(str) +        local value +        local con_str = con(str) +        if con_str == "alpha"  or +           con_str == "Alpha" then +            return str:byte() +        else +            if not (str:lower() == str  or +                    str:upper() == str) then +                return false +            end + +            local trc = thirddata.rst.state.roman_cache +            n_str = trc[str] or nil +            if not n_str then +                n_str = roman_to_arab(str:lower()) +                trc[str] = n_str +            end +            return n_str +        end +    end + +    helpers.list.get_decimal = gd +end + +helpers.string = {} + +do +    --- This grammar inside the function is slightly faster than the +    --- same as an upvalue with the value of “width” repeatedly given +    --- via lpeg.Carg(). This holds for repeated calls as well. +    local ulen = utflen +    function helpers.string.wrapat (str, width) +        local width = width or 65 +        local linelength = 0 +        local wrap = P{ +            [1] = "wrapper", + +            wrapper       = Cs(V"nowhitespace"^0 * (Cs(V"wrapme") + V"other")^1), +            whitespace    = S" \t\v" + P"\n" / function() linelength = 0 end, +            nowhitespace  = 1 - V"whitespace", +            typing        = P[[\\type{]]  * (1 - P"}")^0 * P"}", +            typingenv     = P[[\\starttyping]] * (1 - P[[\\stoptyping]])^0 * P[[\\stoptyping]], +            ignore        = V"typing" + V"typingenv", +            --- the initial whitespace of the “other” pattern must not +            --- be enforced (“^1”) as it will break the exceptions +            --- (“ignore” pattern)! In general it is better to have the +            --- wrapper ignore some valid breaks than to not have it +            --- matching some valid strings at all. +            other         = Cmt(V"whitespace"^0 * (V"ignore" + (1 - V"whitespace")^1), function(s,i,w) +                                   linelength = linelength + ulen(w) +                                   return true +                               end), +            wrapme = Cmt(V"whitespace"^1 * (1 - V"whitespace" - V"ignore")^1, function(s,i,w) +                        local lw = ulen(w) +                        if linelength + lw > width then +                            linelength = lw +                            return true +                        end +                        return false +                    end) / function (word) return "\n" .. word:match("[^%s]+") end, +        } + +        local reflowed = wrap:match(str) +        return reflowed +    end +end + diff --git a/src/rst_parser.lua b/src/rst_parser.lua new file mode 100644 index 0000000..e633899 --- /dev/null +++ b/src/rst_parser.lua @@ -0,0 +1,1605 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +--         FILE:  rst_parser.lua +--        USAGE:  refer to doc/documentation.rst +--  DESCRIPTION:  https://bitbucket.org/phg/context-rst/overview +--       AUTHOR:  Philipp Gesang (Phg), <phg42.2a@gmail.com> +--      VERSION:  0.6 +--      CHANGED:  2014-02-28 06:49:01+0100 +-------------------------------------------------------------------------------- +-- + + +thirddata             = thirddata or { } +thirddata.rst         = { } +thirddata.rst_helpers = { } + +environment.loadluafile"rst_helpers" +environment.loadluafile"rst_directives" +environment.loadluafile"rst_setups" +environment.loadluafile"rst_context" + +local rst                   = thirddata.rst +local helpers               = thirddata.rst_helpers +local optional_setups       = thirddata.rst_setups + +rst.strip_BOM               = true +rst.expandtab               = true +rst.shiftwidth              = 4 +rst.crlf                    = true +helpers.rst_debug           = false + +local utf                   = unicode.utf8 + +local ioopen                = io.open +local iowrite               = io.write +local stringfind            = string.find +local stringformat          = string.format +local stringgsub            = string.gsub +local stringlen             = string.len +local stringmatch           = string.match +local stringstrip           = string.strip +local stringsub             = string.sub +local utflen                = utf.len + +local warn +do +    local ndebug = 0 +    warn = function(str, ...) +        if not helpers.rst_debug then return false end +        ndebug = ndebug + 1 +        local slen = #str + 3 +        --str = "*["..str.."]" +        str = stringformat("*[%4d][%s]", ndebug, str) +        local arglst = { ... } +        for i=1, #arglst do +            local current = arglst[i] +            if 80 - i * 8 - slen < 0 then +                local indent = "" +                for i=1, slen do +                    indent = indent .. " " +                end +                str = str .. "\n" .. indent +            end +            str = str .. stringformat(" |%6s", stringstrip(tostring(current))) +        end +        iowrite(str .. " |\n") +        return 0 +    end +end + +local C,   Cb, Cc, Cg, +      Cmt, Cp, Cs, Ct +    = lpeg.C,   lpeg.Cb, lpeg.Cc, lpeg.Cg, +      lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct + +local P, R, S, V, lpegmatch +    = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match + +local utf = unicode.utf8 + +local state          = {} +thirddata.rst.state  = state + +state.depth          = 0 +state.bullets        = {}  -- mapping bullet forms to depth +state.bullets.max    = 0 +state.lastbullet     = "" +state.lastbullets    = {} +state.roman_cache    = {}  -- storing roman numerals that were already converted +state.currentindent  = ""  -- used in definition lists and elsewhere +state.previousindent = ""  -- for literal blocks included in paragraphs to restore the paragraph indent +state.currentwidth   = 0   -- table layout +state.currentlayout  = {}  -- table layout +state.previousadorn  = nil -- section underlining and overlining + +state.footnotes            = {} +state.footnotes.autonumber = 0 +state.footnotes.numbered   = {} +state.footnotes.labeled    = {} +state.footnotes.autolabel  = {} +state.footnotes.symbol     = {} + +state.addme                = {} + +do +    local first_adornment = "" +    local valid_adornment = P { +        [1] = "adorncheck", +        adorncheck  = V"check_first" * V"check_other"^1 * -P(1), +        check_first = Cmt(V"adornment_char", function(_,_, first) +                            first_adornment = first +                            return true +                        end) +                    , +        check_other = Cmt(V"adornment_char", function(_,_, char) +                            local prev = first_adornment +                            return char == prev +                        end) +                    , +        adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], +    } +    state.valid_adornment = valid_adornment +end + +local enclosed_mapping = { +    ["'"] = "'", +    ['"'] = '"', +    ["("] = ")", +    ["["] = "]", +    ["{"] = "}", +    ["<"] = ">", +} + +local utfchar = P{ -- from l-lpeg.lua, modified to use as grammar +    [1] = "utfchar", +    utf8byte      = R("\128\191"), +    utf8one       = R("\000\127"), +    utf8two       = R("\194\223") * V"utf8byte", +    utf8three     = R("\224\239") * V"utf8byte" * V"utf8byte", +    utf8four      = R("\240\244") * V"utf8byte" * V"utf8byte" * V"utf8byte", +    utfchar       = V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four", +} + + + +local rst_parser = P { +    [1] = V"document", + +    document = V"blank_line"^0 * Cs(V"block"^1), + +-------------------------------------------------------------------------------- +-- Blocks +-------------------------------------------------------------------------------- + +    block = V"explicit_markup" +          + Cs(V"section")     / rst.escape +          + V"target_block" +          + V"literal_block" +          + Cs(V"list")        / rst.escape +          + Cs(V"line_block")  / rst.escape +          + Cs(V"table_block") / rst.escape +          + V"transition"    --/ rst.escape +          + V"comment_block" +          + Cs(V"block_quote") / rst.escape +          + Cs(V"paragraph")   / rst.escape +          , + +-------------------------------------------------------------------------------- +-- Explicit markup block +-------------------------------------------------------------------------------- + +    explicit_markup_start = V"double_dot" * V"whitespace", + +    explicit_markup = V"footnote_block" +                    + V"directive_block" +                    + V"substitution_definition" +                    , + +    explicit_markup_block = V"explicit_markup"^1 +                          , + +-------------------------------------------------------------------------------- +-- Directives block +-------------------------------------------------------------------------------- + +    directive_block = V"directive" +                    --* (V"blank_line"^-1 * V"directive")^0 +                    * V"end_block" +                    , + +    directive = V"explicit_markup_start" +              * C(((V"escaped_colon" + (1 - V"colon" - V"eol")) +                 - V"substitution_text")^1) --> directive name +              * V"double_colon" +              * Ct(V"directive_block_multi" + V"directive_block_single") --> content +              / rst.directive +              , + +    directive_block_multi = Cg((1 - V"eol")^0, "name") -- name +                          * V"eol" +                          * V"blank_line"^0 -- how many empty lines are permitted? +                          * V"directive_indented_lines" +                          , + +    directive_block_single = V"whitespace"^1 * Ct(C((1 - V"eol")^1)) * V"eol", + +-------------------------------------------------------------------------------- +-- Substitution definition block +-------------------------------------------------------------------------------- + +    substitution_definition = V"explicit_markup_start" +                            * V"substitution_text" +                            * V"whitespace"^1 +                            * C((1 - V"colon" - V"space" - V"eol")^1) -- directive +                            * V"double_colon" +                            * Ct(V"data_directive_block") +                            * V"end_block"^-1 +                            / rst.substitution_definition +                            , + +    substitution_text = V"bar" +                      * C((1 - V"bar" - V"eol")^1) +                      * V"bar" +                      , + +    data_directive_block = V"data_directive_block_multi" +                         + V"data_directive_block_single" +                         , +    data_directive_block_single = Ct(C((1 - V"eol")^0)) * V"eol", + +    data_directive_block_multi  = Cg((1 - V"eol")^0, "first") * V"eol" +                                * V"directive_indented_lines" +                                , + +    directive_indented_lines = Ct(V"directive_indented_first" +                                * V"directive_indented_other"^0) +                             * (V"blank_line"^1 * Ct(V"directive_indented_other"^1))^0 +                             , + + +    directive_indented_first = Cmt(V"space"^1, function(s,i,indent) +                                    warn("sub-i", #indent, i) +                                    state.currentindent = indent +                                    return true +                                end) +                             * C((1 - V"eol")^1) * V"eol" +                             , + +    directive_indented_other = Cmt(V"space"^1, function(s,i,indent) +                                    warn("sub-m", +                                      #state.currentindent <= #indent, +                                      #indent, +                                      #state.currentindent, +                                      i) +                                    return #state.currentindent <= #indent +                                end) +                             * C((1 - V"eol")^1) * V"eol" +                             , + + +-------------------------------------------------------------------------------- +-- Explicit markup footnote block +-------------------------------------------------------------------------------- + +    footnote_block = V"footnote"^1 * V"end_block", + +    footnote = V"explicit_markup_start" +             * (V"footnote_marker" + V"citation_reference_label") +             * C(V"footnote_content") +             * (V"blank_line" - V"end_block")^-1 +             / rst.footnote +             , + +    footnote_marker = V"lsquare" * C(V"footnote_label") * V"rsquare" * V"whitespace"^0 +                    , + +    citation_reference_label = V"lsquare" * C(V"letter" * (1 - V"rsquare")^1) * V"rsquare" * V"whitespace"^0, + +    footnote_label = V"digit"^1 +                   + (V"gartenzaun" * V"letter"^1) +                   + V"gartenzaun" +                   + V"asterisk" +                   , + +    footnote_content = V"footnote_long" -- single line +                     + V"footnote_simple" +                     , + +    footnote_simple = (1 - V"eol")^1 * V"eol" +                    , + +    footnote_long = (1 - V"eol")^1 * V"eol" +                  * V"footnote_body" +                  , + +    footnote_body = V"fn_body_first" +                  * (V"fn_body_other" + V"fn_body_other_block")^0 +                  , + +    fn_body_first = Cmt(V"space"^1, function(s, i, indent) +                        warn("fn-in", true, #indent) +                        state.currentindent = indent +                        return true +                    end) +                  * (1 - V"eol")^1 * V"eol" +                  , + +    fn_matchindent = Cmt(V"space"^1, function(s, i, indent) +                        local tc = state.currentindent +                        warn("fn-ma", tc == indent, #tc, #indent, i) +                        return tc == indent +                    end) +                   , + +    fn_body_other = V"fn_body_other_regular" +                  * (V"blank_line" * V"fn_body_other_regular")^0 +                  , + +    fn_body_other_regular = V"fn_matchindent" +                          * (1 - V"eol")^1 * V"eol" +                          , + +    -- TODO find a way to get those to work in footnotes! +    fn_body_other_block = V"line_block" +                        + V"table_block" +                        + V"transition" +                        + V"block_quote" +                        + V"list" +                        , + +-------------------------------------------------------------------------------- +-- Table block +-------------------------------------------------------------------------------- + +    table_block = V"simple_table" +                + V"grid_table" +                , + +-------------------------------------------------------------------------------- +-- Simple tables +-------------------------------------------------------------------------------- + +    simple_table = Ct(V"st_first_row" +                    * V"st_other_rows") +                 * V"end_block" +                 / function (tab) +                     return rst.simple_table(helpers.table.simple(tab)) +                 end +                 , + +    st_first_row = V"st_setindent" +                 * C(V"st_setlayout") +                 * V"space"^0 +                 * V"eol" +                 , + +    st_setindent = Cmt(V"space"^0, function(s, i, indent) +                        warn("sta-i", "true",  #indent, "set", i) +                        state.currentindent = indent +                        return true +                    end) +                 , + +    st_matchindent = Cmt(V"space"^0, function(s, i, indent) +                          warn("sta-m", state.currentindent == indent, #indent, #state.currentindent, i) +                          return state.currentindent == indent +                      end) +                   , + +    st_setlayout = Cmt((V"equals"^1) * (V"spaces" * V"equals"^1)^1, function(s, i, layout) +                        local tc = state.currentlayout +                        warn("sta-l", #layout, "set", "", i) +                        tc.raw = layout +                        tc.bounds = helpers.get_st_boundaries(layout) +                        return true +                    end) +                 , + +    st_other_rows = (V"st_content"^1 * V"st_separator")^1, + +    st_content = V"blank_line"^-1 +               * C(V"st_matchlayout"), + +    st_matchlayout = -#V"st_separator" * Cmt((1 - V"eol")^1, function (s, i, content) +                        -- Don't check for matching indent but if the rest is +                        -- fine then the line should be sane. This allows +                        -- cells starting with spaces. +                        content = stringsub (content, #state.currentindent) +                        local tcb = state.currentlayout.bounds +                        local n = 1 +                        local spaces_only = P" "^1 +                        while n < #tcb.slices do +                            local from = tcb.slices[n]  .stop +                            local to   = tcb.slices[n+1].start +                            local between = lpegmatch (spaces_only, content, from) +                            if not between then -- Cell spanning more than one row. +                                -- pass +                                warn("sta-c", "span", from, to, i) +                            elseif not (between >= to) then +                                warn("sta-c", "false", from, to, i) +                                return false +                            end +                            n = n + 1 +                        end +                        warn("sta-c", "true", #tcb.slices, "", i) +                        return true +                     end) +                     * V"eol" +                   , + +    st_separator = V"st_matchindent" +                 * C(V"st_normal_sep" + V"st_colspan_sep") +                 * V"eol" +                 , + +    st_normal_sep = Cmt((V"equals"^1) * (V"spaces" * V"equals"^1)^1, function(s, i, layout) +                        warn("sta-s", state.currentlayout.raw == layout, #layout, #state.currentlayout.raw, i) +                        return state.currentlayout.raw == layout +                    end) +                  , + +    st_colspan_sep = Cmt(V"dash"^1 * (V"spaces" * V"dash"^1)^0, function(s, i, layout) +                         local tcb = state.currentlayout.bounds +                         local this = helpers.get_st_boundaries (layout) +                         local start_valid = false +                         for start, _ in next, this.starts do +                             if tcb.starts[start] then +                                 start_valid = true +                                 local stop_valid = false +                                 for stop, _ in next, this.stops do +                                     if tcb.stops[stop] then -- bingo +                                         stop_valid = true +                                     end +                                 end +                                 if not stop_valid then +                                     warn("sta-x", stop_valid, #layout, #state.currentlayout.raw, i) +                                     return false +                                 end +                             end +                         end +                         warn("sta-x", start_valid, #layout, #state.currentlayout.raw, i) +                         return start_valid +                     end) +                   , + + +-------------------------------------------------------------------------------- +-- Grid tables +-------------------------------------------------------------------------------- + +    grid_table = Ct(V"gt_first_row" +                  * V"gt_other_rows") +               * V"blank_line"^1 +               / function(tab) +                   return rst.grid_table(helpers.table.create(tab)) +               end +               , + +    gt_first_row = V"gt_setindent" +                 * C(V"gt_sethorizontal") +                 * V"eol" +                 , + +    gt_setindent = Cmt(V"space"^0, function(s, i, indent) +                        warn("tab-i", true, #indent, "set", i) +                        state.currentindent = indent +                        return true +                    end) +                 , + +    gt_layoutmarkers = V"table_intersection" + V"table_hline" + V"table_header_hline", + +    gt_sethorizontal = Cmt(V"gt_layoutmarkers"^3, function (s, i, width) +                             warn("tab-h", "width", "true", #width, "set", i) +                             state.currentwidth = #width +                             return true +                         end) +                     , + +    gt_other_rows = V"gt_head"^-1 +                  * V"gt_body" +                  , + +    gt_matchindent = Cmt(V"space"^0, function (s, i, this) +        local matchme = state.currentindent +        warn("tab-m", "indent", #this == #matchme, #this, #matchme, i) +        return #this == #matchme +    end) +    , + + +    gt_cell = (V"gt_content_cell" + V"gt_line_cell") +    * (V"table_intersection" + V"table_vline") +    , + +    gt_content_cell = ((1 - V"table_vline" - V"table_intersection" - V"eol")^1), + +    gt_line_cell = V"table_hline"^1, + +    gt_contentrow = V"gt_matchindent" +                   * C((V"table_intersection" + V"table_vline") +                     * V"gt_cell"^1) +                   * V"whitespace"^-1 * V"eol" +                  , + +    gt_body = ((V"gt_contentrow" - V"gt_bodysep")^1 * V"gt_bodysep")^1, + +    gt_bodysep = V"gt_matchindent" +               * C(Cmt(V"table_intersection" +                     * (V"table_hline"^1 * V"table_intersection")^1, function(s, i, separator) +                          local matchme = state.currentwidth +                          warn("tab-m", "body", #separator == matchme, #separator, matchme, i) +                          return #separator == matchme +                      end)) +               * V"whitespace"^-1 * V"eol" +               , + +    gt_head = V"gt_contentrow"^1 +            * V"gt_headsep" +            , + +    gt_headsep = V"gt_matchindent" +               * C(Cmt(V"table_intersection" +                    * (V"table_header_hline"^1 * V"table_intersection")^1, function(s, i, separator) +                          local matchme = state.currentwidth +                          warn("tab-s", "head", #separator == matchme, #separator, matchme, i) +                          return #separator == matchme +                      end)) +               * V"whitespace"^-1 * V"eol" +               , + + +-------------------------------------------------------------------------------- +-- Block quotes +-------------------------------------------------------------------------------- + +    block_quote = Ct(Cs(V"block_quote_first" +                   * V"block_quote_other"^0 +                   * (V"blank_line" * V"block_quote_other"^1)^0) +                   * (V"blank_line" +                   *  Cs(V"block_quote_attri"))^-1) +                * V"end_block" +                / rst.block_quote +                , + +    block_quote_first = Cmt(V"space"^1, function (s, i, indent) +                             warn("bkq-i", #indent, "", indent, "", i) +                             state.currentindent = indent +                             return true +                         end) / "" +                      * -V"attrib_dash" +                      * (1 - V"eol")^1 +                      * V"eol" +                      , + +    block_quote_other = Cmt(V"space"^1, function (s, i, indent) +                            warn("bkq-m", #indent, #state.currentindent, +                                           indent,  state.currentindent, i) +                            return state.currentindent == indent +                        end) / "" +                      * -V"attrib_dash" +                      * (1 - V"eol")^1 +                      * V"eol" +                      , + +    block_quote_attri = V"block_quote_attri_first" +                      * V"block_quote_attri_other"^0, + +    block_quote_attri_first = Cmt(V"space"^1 * V"attrib_dash" * V"space", function (s, i, indent) +                                   local t = state +                                   warn("bqa-i", utflen(indent), #t.currentindent, +                                                 indent,         t.currentindent, i) +                                   local ret = stringmatch (indent, " *") == t.currentindent +                                   t.currentindent = ret and indent or t.currentindent +                                   return ret +                               end) / "" +                            * (1 - V"eol")^1 +                            * V"eol" +                            , + +    block_quote_attri_other = Cmt(V"space"^1, function (s, i, indent) +                                  warn("bqa-m", #indent, utflen(state.currentindent), +                                                 indent,  state.currentindent, i) +                                  return utflen(state.currentindent) == #indent +                              end) / "" +                            * (1 - V"eol")^1 +                            * V"eol" +                            , + +-------------------------------------------------------------------------------- +-- Line blocks +-------------------------------------------------------------------------------- + +    line_block = Cs(V"line_block_first" +                  * (V"line_block_other" +                   + V"line_block_empty")^1) +               --* V"blank_line" +               * V"end_block" +               / rst.line_block +               , + +    line_block_marker = V"space"^0 * V"bar" * V"space", + +    line_block_empty_marker = V"space"^0 * V"bar" * V"space"^0 * V"eol", + + +    line_block_first = Cmt(V"line_block_marker", function(s, i, marker) +                            warn("lbk-i", #marker, "", marker, "", i) +                            state.currentindent = marker +                            return true +                        end) / "" +                     * V"line_block_line" +                     , + +    line_block_empty = Cmt(V"line_block_empty_marker", function(s, i, marker) +                            warn("lbk-e", #marker, #state.currentindent, marker, state.currentindent, i) +                            marker = stringgsub (marker, "|.*", "| ") +                            return state.currentindent == marker +                        end) / "" +                     / rst.line_block_empty +                     , + +    line_block_other = Cmt(V"line_block_marker", function(s, i, marker) +                            warn("lbk-m", #marker, #state.currentindent, marker, state.currentindent, i) +                            return state.currentindent == marker +                        end) / "" +                     * V"line_block_line" +                     , + +    line_block_line = Cs((1 - V"eol")^1 +                       * V"line_block_cont"^0 +                       * V"eol") +                    / rst.line_block_line +                    , + +    line_block_cont = (V"eol" - V"line_block_marker") +                    * Cmt(V"space"^1, function(s, i, spaces) +                            warn("lbk-c", #spaces, #state.currentindent, spaces, state.currentindent, i) +                            return #spaces >= #state.currentindent +                        end) / "" +                    * (1 - V"eol")^1 +                    , + +-------------------------------------------------------------------------------- +-- Literal blocks +-------------------------------------------------------------------------------- + +    literal_block = V"literal_block_marker" +                    * Cs(V"literal_block_lines") +                    * V"end_block" +                    / rst.literal_block, + +    literal_block_marker = V"double_colon" * V"whitespace"^0 * V"eol" * V"blank_line", + +    literal_block_lines = V"unquoted_literal_block_lines" +                        + V"quoted_literal_block_lines" +                        , + +    unquoted_literal_block_lines = V"literal_block_first" +                                 * (V"blank_line"^-1 * V"literal_block_other")^0 +                                 , + +    quoted_literal_block_lines =  V"quoted_literal_block_first" +                               * V"quoted_literal_block_other"^0 -- no blank lines allowed +                               , + +    literal_block_first = Cmt(V"space"^1, function (s, i, indent) +                        warn("lbk-f", #indent, "", "", i) +                        if not indent or +                            indent == "" then +                            return false +                        end +                        if state.currentindent and #state.currentindent < #indent then +                            state.currentindent = state.currentindent .. " " +                            return true +                        else +                            state.currentindent = " " +                            return true +                        end +                    end) +                   * V"rest_of_line" +                   * V"eol", + +    literal_block_other = Cmt(V"space"^1, function (s, i, indent) +                        warn("lbk-m", +                             #indent, +                             #state.currentindent, +                             #indent >= #state.currentindent, +                             i) +                        return #indent >= #state.currentindent +                    end) +                   * V"rest_of_line" +                   * V"eol" +                   , + +    quoted_literal_block_first = Cmt(V"adornment_char", function (s, i, indent) +                        warn("qlb-f", #indent, indent, "", i) +                        if not indent    or +                            indent == "" then +                            return false +                        end +                        state.currentindent = indent +                        return true +                    end) +                   * V"rest_of_line" +                   * V"eol" +                   , + +    quoted_literal_block_other = Cmt(V"adornment_char", function (s, i, indent) +                        warn("qlb-m", +                             #indent, +                             #state.currentindent, +                             #indent >= #state.currentindent, +                             i) +                        return #indent >= #state.currentindent +                    end) +                   * V"rest_of_line" +                   * V"eol", + +-------------------------------------------------------------------------------- +-- Lists +-------------------------------------------------------------------------------- + +    list = (V"option_list" +          + V"bullet_list" +          + V"definition_list" +          + V"field_list") +         - V"explicit_markup_start" +         , + +-------------------------------------------------------------------------------- +-- Option lists +-------------------------------------------------------------------------------- + +    option_list = Cs((V"option_list_item" +                   * V"blank_line"^-1)^1) +                /rst.option_list, + +    option_list_item = Ct(C(V"option_group") +                        * Cs(V"option_description")) +                     / rst.option_item, + +    option_description = V"option_desc_next" +                       + V"option_desc_more" +                       + V"option_desc_single", + +    option_desc_single = V"space"^2 +                       --* V"rest_of_line" +                       * (1 - V"eol")^1 +                       * V"eol", + +    option_desc_more = V"space"^2 +                     * (1 - V"eol")^1 +                     * V"eol" +                     * V"indented_lines" +                     * (V"blank_line" * V"indented_lines")^0, + +    option_desc_next = V"eol" +                     * V"indented_lines" +                     * (V"blank_line" * V"indented_lines")^0, + +    option_group = V"option" +                 * (V"comma" * V"space" * V"option")^0, + +    option = (V"option_posixlong" +            + V"option_posixshort" +            + V"option_dos_vms") +            * V"option_arg"^-1, + +    option_arg = (V"equals" + V"space") +               * ((V"letter" * (V"letter" + V"digit")^1) +                + (V"angle_left" * (1 - V"angle_right")^1 * V"angle_right")), + +    option_posixshort = V"dash" * (V"letter" + V"digit"), + +    option_posixlong = V"double_dash" +                     * V"letter" +                     * (V"letter" + V"digit" + V"dash")^1, + +    option_dos_vms = V"slash" +                   * V"letter"^1, + +-------------------------------------------------------------------------------- +-- Field lists (for bibliographies etc.) +-------------------------------------------------------------------------------- + +    field_list = Cs(V"field" +                  * (V"blank_line"^-1 * V"field")^0) +               * V"end_block" +               / rst.field_list, + +    field = Ct(V"field_marker" +             * V"whitespace" +             * V"field_body") +          / rst.field, + +    field_marker = V"colon" +                 * C(V"field_name") +                 * V"colon", + +    field_name = (V"escaped_colon" + (1 - V"colon"))^1, + +    field_body = V"field_single" + V"field_multi", + +    field_single = C((1 -V"eol")^1) * V"eol", + +    field_multi = C((1 - V"eol")^0 * V"eol" +                  * V"indented_lines"^-1), + +-------------------------------------------------------------------------------- +-- Definition lists +-------------------------------------------------------------------------------- + +    definition_list = Ct((V"definition_item" - V"comment") +                      * (V"blank_line" * V"definition_item")^0) +                    * V"end_block" +                    / rst.deflist +                    , + +    definition_item = Ct(C(V"definition_term") +                       * V"definition_classifiers" +                       * V"eol" +                       * Ct(V"definition_def")) +                    , + +    definition_term = #(1 - V"space" - V"field_marker") +                    * (1 - V"eol" - V"definition_classifier_separator")^1 +                    , + +    definition_classifier_separator = V"space" * V"colon" * V"space", + +    definition_classifiers = V"definition_classifier"^0, + +    definition_classifier = V"definition_classifier_separator" +                          * C((1 - V"eol" - V"definition_classifier_separator")^1) +                          , + +    definition_def = C(V"definition_firstpar") * C(V"definition_par")^0 +                   , + +    definition_indent = Cmt(V"space"^1, function(s, i, indent) +                            warn("def-i", #indent, #state.currentindent, indent == state.currentindent, i) +                            state.currentindent = indent +                            return true +                        end), + +    definition_firstpar = V"definition_parinit" +                        * (V"definition_parline" - V"blank_line")^0 +                        , + +    definition_par = V"blank_line" +                   * (V"definition_parline" - V"blank_line")^1 +                   , + +    definition_parinit = V"definition_indent" +                       * (1 - V"eol")^1 +                       * V"eol" +                       , + +    definition_parline = V"definition_match" +                       * (1 - V"eol")^1 +                       * V"eol" +                       , + +    definition_match = Cmt(V"space"^1, function (s, i, this) +                            warn("def-m", #this, #state.currentindent, this == state.currentindent, i) +                            return this == state.currentindent +                        end), + +-------------------------------------------------------------------------------- +-- Bullet lists and enumerations +-------------------------------------------------------------------------------- + +    -- the next rule handles enumerations as well +    bullet_list = V"bullet_init" +                * (V"blank_line"^-1 * (V"bullet_list" + V"bullet_continue"))^1 +                * V"bullet_stop" +                * Cmt(Cc(nil), function (s, i) +                    local t = state +                    warn("close", t.depth) +                    t.bullets[t.depth] = nil -- “pop” +                    t.depth = t.depth - 1 +                    t.lastbullet = t.lastbullets[t.depth] +                    return true +                end), + +    bullet_stop = V"end_block" / rst.stopitemize, + +    bullet_init = Ct(C(V"bullet_first") * V"bullet_itemrest") +                / rst.bullet_item +                , + +    bullet_first = #Cmt(V"bullet_indent", function (s, i, bullet) +                        local t = state +                        local oldbullet = t.bullets[t.depth] +                        local n_spaces = lpegmatch(P" "^0, bullet) +                        warn("first", +                            t.depth, +                            (t.depth == 0 and n_spaces >= 1) or +                            (t.depth >  0 and n_spaces >  1), +                            bullet, +                            oldbullet, +                            helpers.list.conversion(bullet)) + +                        if t.depth == 0 and n_spaces >= 1 then -- first level +                            t.depth = 1             -- “push” +                            t.bullets[1] = bullet +                            t.lastbullet = bullet +                            t.bullets.max = t.bullets.max < t.depth and t.depth or t.bullets.max +                            return true +                        elseif t.depth > 0 and n_spaces > 1 then    -- sublist (of sublist)^0 +                            if n_spaces >= utflen(oldbullet) then +                                t.lastbullets[t.depth] = t.lastbullet +                                t.depth = t.depth + 1 +                                t.bullets[t.depth] = bullet +                                t.lastbullet = bullet +                                t.bullets.max = t.bullets.max < t.depth and t.depth or t.bullets.max +                                return true +                            end +                        end +                        return false +                    end) +                    * V"bullet_indent" +                    / rst.startitemize +                    , + +    bullet_indent = V"space"^0 * V"bullet_expr" * V"space"^1, + +    bullet_cont  = Cmt(V"bullet_indent", function (s, i, bullet) +                        local t = state +                        local conversion = helpers.list.conversion +                        warn("conti", +                                t.depth, +                                bullet == t.bullets[t.depth], +                                bullet, +                                t.bullets[t.depth], +                                t.lastbullets[t.depth], +                                conversion(t.lastbullet), +                                conversion(bullet) +                                ) + +                        if utflen(t.bullets[t.depth]) ~= utflen(bullet) then +                            return false +                        elseif not conversion(bullet) and t.bullets[t.depth] == bullet then +                            return true +                        elseif conversion(t.lastbullet) == conversion(bullet) then -- same type +                            local autoconv  = conversion(bullet) == "auto" +                            local greater   = helpers.list.greater  (bullet, t.lastbullet) +                            t.lastbullet = bullet +                            return autoconv or successor or greater +                        end +                    end), + +    bullet_continue = Ct(C(V"bullet_cont") * V"bullet_itemrest") +                    /rst.bullet_item +                    , + +    bullet_itemrest = C(V"bullet_rest"                               -- first line +                       * ((V"bullet_match" * V"bullet_rest")^0        -- any successive lines +                        * (V"blank_line" +                         * (V"bullet_match" * (V"bullet_rest" - V"bullet_indent"))^1)^0)) +                    , +                         --                                     ^^^^^^^^^^^^^ +                         --                                     otherwise matches bullet_first + +    bullet_rest = (1 - V"eol")^1 * V"eol",  -- rest of one line + +    bullet_next  = V"space"^1 +                 , + +    bullet_match = Cmt(V"bullet_next", function (s, i, this) +                         local t = state +                         warn("match", +                                t.depth, +                                stringlen(this) == utflen(t.bullets[t.depth]), +                                utflen(t.bullets[t.depth]), stringlen(this) ) +                         return stringlen(this) == utflen(t.bullets[t.depth]) +                     end) +                 , + +    bullet_expr = V"bullet_char" +                + (P"(" * V"number_char" * P")") +                +        (V"number_char" * P")") +                + (V"number_char" * V"dot") * #V"space" +                + (V"number_char" * #V"space") +                , + +    number_char = V"roman_numeral" +                + V"Roman_numeral" +                + P"#" +                + V"digit"^1 +                + R"AZ" +                + R"az" +                , + +-------------------------------------------------------------------------------- +-- Transitions +-------------------------------------------------------------------------------- + +    transition_line = C(V"adornment_char"^4), + +    transition = V"transition_line" * V"eol" +               * V"end_block" +               / rst.transition +               , + +-------------------------------------------------------------------------------- +-- Sectioning +-------------------------------------------------------------------------------- + +    section_adorn = V"adornment_char"^1, + +    section = ((V"section_text" * V"section_once") +             + (V"section_before" * V"section_text" * V"section_after")) +            / rst.section +            * (V"end_block" + V"blank_line") +            , + +    -- The whitespace handling after the overline is necessary because headings +    -- without overline aren't allowed to be indented. +    section_before = C(Cmt(V"section_adorn", function(s,i, adorn) +                          local adorn_matched = lpegmatch (state.valid_adornment, adorn) +                          state.previousadorn = adorn +                          warn ("sec-f", adorn_matched, +                                stringsub (adorn, 1,2) .. "...", "", i) +                          if adorn_matched then +                              return true +                          end +                          return false +                      end)) +                   * V"whitespace"^0 +                   * V"eol" +                   * V"whitespace"^0 +                   , + +    section_text = C((1 - V"space" - V"eol") * (1 - V"eol")^1) * V"eol", + +    section_after = C(Cmt(V"section_adorn", function(s,i, adorn) +                         local tests = false +                         if lpegmatch (state.valid_adornment, adorn) then +                           tests = true +                         end +                         if state.previousadorn then +                             tests = tests and adorn == state.previousadorn +                         end +                         warn ("sec-a", tests, stringsub (adorn, 1,2) .. "…", "", i) +                         state.previousadorn = nil +                         return tests +                     end)) +                    * V"whitespace"^0 +                    , + +    section_once = C(Cmt(V"section_adorn", function(s,i, adorn) +                         local tests = false +                         if lpegmatch (state.valid_adornment, adorn) then +                           tests = true +                         end +                         warn ("sec-o", tests, stringsub (adorn, 1,2) .. "…", "", i) +                         state.previousadorn = nil +                         return tests +                     end)) +                    * V"whitespace"^0 +                    , + +-------------------------------------------------------------------------------- +-- Target Blocks +-------------------------------------------------------------------------------- + +    tname_normal = C((V"escaped_colon" + 1 - V"colon")^1) +                 * V"colon", + +    tname_bareia = C(V"bareia" +                    * (1 - V"eol" - V"bareia")^1 +                    * V"bareia") +                 * V"colon", + +    target_name = V"double_dot" +                * V"space" +                * V"underscore" +                * (V"tname_bareia" + V"tname_normal"), + +    target_firstindent = V"eol" * Cg(V"space"^1, "indent"), + +    target_nextindent  = V"eol" * C(V"space"^1), + +    target_indentmatch = Cmt(V"target_nextindent" -- I ♡ LPEG! +                           * Cb("indent"), function (s, i, a, b) +                                return a == b +                            end), + +    target_link  = ( V"space"^0 * V"target_firstindent" +                 * Ct(C(1 - V"whitespace" - V"eol")^1 +                    * (V"target_indentmatch" +                     * C(1 - V"whitespace" - V"eol")^1)^0) +                 * V"eol" * #(1 - V"whitespace" - "eol")) / rst.joinindented +                 + C((1 - V"eol")^1) * V"eol" * #(V"double_dot" + V"double_underscore" + V"eol") +                 + (1 - V"end_block")^0 * Cc(""), + +    target       = Ct((V"target_name" * (V"space"^0 * V"eol" * V"target_name")^0) +                 * V"space"^0 +                 * V"target_link") +                 / rst.target, + +    anonymous_prefix = (V"double_dot" * V"space" * V"double_underscore" * V"colon") +                     + (V"double_underscore") +                     , + +    anonymous_target = V"anonymous_prefix" +                     * V"space"^0 +                     * Ct(Cc"" * V"target_link") +                     / rst.target +                     , + +    target_block = (V"anonymous_target" + V"target")^1 +                 * V"end_block", + +-------------------------------------------------------------------------------- +-- Paragraphs * Inline Markup +-------------------------------------------------------------------------------- + +    paragraph = Ct(V"par_first" +                 * V"par_other"^0) / rst.paragraph +              * V"end_block" +              * V"reset_depth" +              , + +    par_first = V"par_setindent" +              * C((1 - V"literal_block_shorthand" - V"eol")^1) +              * (V"included_literal_block" + V"eol") +              , + +    par_other = V"par_matchindent" +              * C((1 - V"literal_block_shorthand" - V"eol")^1) +              * (V"included_literal_block" + V"eol") +              , + +    par_setindent = Cmt(V"space"^0, function (s, i, indent) +                        warn("par-i", #indent, "", "", i) +                        state.previousindent = state.currentindent +                        state.currentindent = indent +                        return true +                    end), + +    par_matchindent = Cmt(V"space"^0, function (s, i, indent) +                          warn("par-m", state.currentindent == indent, #indent, #state.currentindent, i) +                          return state.currentindent == indent +                      end), + +    included_literal_block = V"literal_block_shorthand" +                           * V"literal_block_markerless" +                           * Cmt(Cp(), function (s, i, _) +                                  warn("par-s", "", #state.previousindent, #state.currentindent, i) +                                  state.currentindent = state.previousindent +                                  return true +                              end) +                           , + +    literal_block_shorthand = Cs((V"colon" * V"space" * V"double_colon" +                                + V"double_colon") +                             * V"whitespace"^0 +                             * V"eol" +                             * V"blank_line") +                             -- The \unskip is necessary because the lines of a +                             -- paragraph get concatenated from a table with a +                             -- space as separator. And the literal block is +                             -- treated as one such line, hence it would be +                             -- preceded by a space. As the ":" character +                             -- always  follows a non-space this should be a +                             -- safe, albeit unpleasant, hack. If you don't +                             -- agree then file a bug report and I'll look into +                             -- it. +                             / "\\\\unskip:" +                            , + +    literal_block_markerless = Cs(V"literal_block_lines") +                             * V"blank_line" +                             / rst.included_literal_block +                             , + +    -- This is needed because lpeg.Cmt() patterns are evaluated even +    -- if they are part of a larger pattern that doesn’t match. The +    -- result is that they confuse the nesting. +    -- Resetting the current nesting depth at every end of block +    -- should be safe because this pattern always matches last. +    reset_depth = Cmt(Cc("nothing") / "", function (s,i, something) +                        state.depth = 0 +                        warn("reset", "", state.depth, #state.currentindent, i) +                        return true +                    end) +                , + +-------------------------------------------------------------------------------- +-- Comments +-------------------------------------------------------------------------------- + +    comment_block = V"comment" +                  * V"end_block"^-1 +                  , + +    comment = V"double_dot" / "" +            * (V"block_comment" + V"line_comment") +            , + +    block_comment = V"whitespace"^0 +                  * Cs((1 - V"eol")^0 * V"eol" +                     * V"indented_lines") +                  / rst.block_comment, + +    line_comment = V"whitespace"^1 +                 * C((1 - V"eol")^0 * V"eol") +                 / rst.line_comment +                 , + +-------------------------------------------------------------------------------- +-- Generic indented block +-------------------------------------------------------------------------------- + +    indented_lines = V"indented_first" +                   * (V"indented_other"^0 +                    * (V"blank_line" * V"indented_other"^1)^0) +                   , + +    indented_first = Cmt(V"space"^1, function (s, i, indent) +                        warn("idt-f", indent, i) +                        state.currentindent = indent +                        return true +                    end) / "" +                   * (1 - V"eol")^1 +                   * V"eol" +                   , + +    indented_other = Cmt(V"space"^1, function (s, i, indent) +                        warn("idt-m", #indent, #state.currentindent, #indent == #state.currentindent, i) +                        return indent == state.currentindent +                    end) / "" +                   * (1 - V"eol")^1 +                   * V"eol" +                   , + +-------------------------------------------------------------------------------- +-- Urls +-------------------------------------------------------------------------------- +    uri             = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0, + +    url_protocol    = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://", +    url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation", +    url_domain      = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0, +    url_path_char   = R("az", "AZ", "09") + S"-_.!~*'()", +    url_path        = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1, + +-------------------------------------------------------------------------------- +-- Terminal Symbols and Low-Level Elements +-------------------------------------------------------------------------------- + +    asterisk          = P"*", +    backslash         = P"\\", +    bar               = P"|", +    bareia            = P"`", +    slash             = P"/", +    solidus           = P"⁄", +    equals            = P"=", + +    --- Punctuation +    -- Some of the following are used for markup as well as for punctuation. + +    apostrophe        = P"’" + P"'", +    comma             = P",", +    colon             = P":", +    dot               = P".", +    interpunct        = P"·", +    semicolon         = P";", +    underscore        = P"_", +    dash              = P"-", +    emdash            = P"—", +    hyphen            = P"‐", +    questionmark      = P"?", +    exclamationmark   = P"!", +    interrobang       = P"‽", +    lsquare           = P"[", +    rsquare           = P"]", +    ellipsis          = P"…" + P"...", +    guillemets        = P"«" + P"»", +    quotationmarks    = P"‘" + P"’" + P"“" + P"”", + +    period            = V"dot", +    double_dot        = V"dot" * V"dot", +    double_colon      = V"colon" * V"colon", +    escaped_colon     = V"backslash" * V"colon", +    double_underscore = V"underscore" * V"underscore", +    double_dash       = V"dash" * V"dash", +    triple_dash       = V"double_dash" * V"dash", +    attrib_dash       = V"triple_dash" + V"double_dash" + V"emdash", -- begins quote attribution blocks +    dashes            = V"dash" + P"‒" + P"–" + V"emdash" + P"―", + + + +    punctuation = V"apostrophe" +                + V"colon" +                + V"comma" +                + V"dashes" +                + V"dot" +                + V"ellipsis" +                + V"exclamationmark" +                + V"guillemets" +                + V"hyphen" +                + V"interpunct" +                + V"interrobang" +                + V"questionmark" +                + V"quotationmarks" +                + V"semicolon" +                + V"slash" +                + V"solidus" +                + V"underscore" +                , + +    -- End punctuation + +    letter       = R"az" + R"AZ", +    digit        = R"09", + +    space        = P" ", +    spaces       = V"space"^1, +    whitespace   = (P" " + Cs(P"\t") / "        " + Cs(S"\v") / " "), +    spacing      = V"whitespace"^1, +    blank_line   = V"whitespace"^0 * V"eol", + +    rest_of_line = (1 - V"eol")^1, + +    eol          = S"\r\n", +    eof          = V"eol"^0 * -P(1), + +    end_block    = V"blank_line"^1 * V"eof"^-1 +                 + V"eof" +                 , + +    -- diverse markup character sets +    adornment_char     = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings +    bullet_char        = S"*+-" + P"•" + P"‣" + P"⁃",                    -- bullet lists + +    roman_numeral      = S"ivxlcdm"^1, +    Roman_numeral      = S"IVXLCDM"^1, + +    angle_left         = P"<", +    angle_right        = P">", +    gartenzaun         = P"#", + +    table_intersection = P"+", +    table_hline        = V"dash", +    table_vline        = V"bar", +    table_header_hline = P"=", +} + +--- 225 rules at 2014-02-28 with lpeg 0.12 and Luatex 0.78.3 +--lpeg.print(rst_parser) +--lpeg.ptree(rst_parser) +--os.exit() + +local file_helpers = { } + +function file_helpers.strip_BOM (raw) +    if stringmatch (raw, "^\239\187\191") then +        return stringsub (raw, 4) +    end +    return raw +end + +--- Tab expansion: feature request by Philipp A. +do +    local shiftwidth = rst.shiftwidth +    local stringrep  = string.rep +    local position   = 1 + +    local reset_position     = function ()  position = 1 return "\n" end +    local increment_position = function (c) position = position + 1 return c end +    local expand_tab         = function () +        local expand = (shiftwidth - position) % shiftwidth + 1 +        position     = position + expand +        return stringrep(" ", expand) +    end + +    local tab      = S"\t\v" / expand_tab +    local utfchar  = utfchar / increment_position +    local eol      = P"\n"   / reset_position +    local p_expand = Cs((tab + eol + utfchar)^1) + +    function file_helpers.expandtab (raw) +        position = 1 +        return lpegmatch (p_expand, raw) +    end +end + +--- Spotted by Philipp A. +function file_helpers.insert_blank (raw) +    if not stringfind (raw, "\n%s$") then +        return raw .. "\n\n" +    end +    return raw +end + +function file_helpers.crlf (raw) +    if stringfind (raw, "\r\n") then +        return stringgsub (raw, "\r\n", "\n") +    end +    return raw +end + +local function load_file (name) +    f = assert(ioopen(name, "r"), "Not a file!") +    if not f then return 1 end +    local tmp = f:read("*all") +    f:close() + +    local fh = file_helpers +    if thirddata.rst.strip_BOM then +        tmp = fh.strip_BOM(tmp) +    end +    if thirddata.rst.crlf then +        tmp = fh.crlf(tmp) +    end +    if thirddata.rst.expandtab then +        tmp = fh.expandtab(tmp) +    end +    return fh.insert_blank(tmp) +end + +local function save_file (name, data) +    f = assert(ioopen(name, "w"), "Could not open file "..name.." for writing! Check its permissions") +    if not f then return 1 end +    f:write(data) +    f:close() +    return 0 +end + +local function get_setups (inline) +    local optional_setups = optional_setups +    local setups = "" +    if not inline then +        setups = setups .. [[ +%+-------------------------------------------------------------+% +%|                           Setups                            |% +%+-------------------------------------------------------------+% +% General                                                       % +%---------------------------------------------------------------% + +]] +    end + +    setups = setups .. [[ +\setupcolors[state=start] +%% Interaction is supposed to be handled manually. +%%\setupinteraction[state=start,focus=standard,color=darkgreen,contrastcolor=darkgreen] +\setupbodyfontenvironment [default]  [em=italic] +\sethyphenatedurlnormal{:=?&} +\sethyphenatedurlbefore{?&} +\sethyphenatedurlafter {:=/-} + +\doifundefined{startparagraph}{% -->mkii +  \enableregime[utf] +  \let\startparagraph\relax +  \let\stopparagraph\endgraf +} + +]] +    for item, _ in next, state.addme do +        local f = optional_setups[item] +        setups = f and setups .. f() or setups +    end +    if not inline then +        setups = setups .. [[ + + +%+-------------------------------------------------------------+% +%|                            Main                             |% +%+-------------------------------------------------------------+% + +\starttext +]] +    end +    return setups +end + +function thirddata.rst.standalone (infile, outfile) +    local testdata = load_file(infile) +    if testdata == 1 then return 1 end + +    local processeddata = lpegmatch (rst_parser, testdata) +    local setups = get_setups(false) + +    processeddata = setups .. processeddata .. [[ + +\stoptext + +%+-------------------------------------------------------------+% +%|                       End of Document                       |% +%+-------------------------------------------------------------+% + +% vim:ft=context:tw=65:shiftwidth=2:tabstop=2:set expandtab +]] + +    if processeddata then +        save_file(outfile, processeddata) +    else +        return 1 +    end +    return 0 +end + +local p_strip_comments +do +    local Cs, P = lpeg.Cs, lpeg.P +    local percent = P"%" +    local eol     = P"\n" +    local comment = percent * (1 - eol)^0 * eol / "\n" +    p_strip_comments = Cs((comment + 1)^0) +end + +function thirddata.rst.do_rst_file(fname) +    local raw_data   = load_file(fname) +    local processed  = lpegmatch (rst_parser, raw_data) +    local setups     = get_setups(false) +    local tmp_file   = tex.jobname .. "–rst_temporary.tex.tmp" + +    if processed then +        processed = lpegmatch (p_strip_comments, setups..processed.."\n\\stoptext\n") +        save_file(tmp_file, processed) +        context.input("./"..tmp_file) +    end +end + +local rst_inclusions = { } +local rst_incsetups  = { } +function thirddata.rst.do_rst_inclusion (iname, fname) +    local raw_data   = load_file(fname) +    local processed  = lpegmatch (rst_parser, raw_data) +    local setups     = get_setups(true) + +    local incnr    = #rst_incsetups  + 1 +    local tmp_file = tex.jobname .. stringformat("–rst_inclusion-%d.tex.tmp", incnr) + +    if processed then +        processed = lpegmatch (p_strip_comments, processed) +        save_file(tmp_file, processed) +        rst_inclusions[iname] = tmp_file +        rst_incsetups[#rst_incsetups +1] = setups +    end +end + +function thirddata.rst.do_rst_setups () +    local out = table.concat(rst_incsetups) +    --context(out) --- why doesn’t this work? +    local tmp_file = tex.jobname .. "–rst_setups.tex.tmp" +    save_file(tmp_file, out) +    context.input(tmp_file) +end + +function thirddata.rst.get_rst_inclusion (iname) +    if rst_inclusions[iname] then +        context.input(rst_inclusions[iname]) +    else +        context(stringformat([[{\bf File for inclusion “%s” not found.}\par ]], iname)) +    end +end + +function thirddata.rst.do_rst_snippet(txt) +    local processed  = lpegmatch (rst_parser, txt) +    local setups     = get_setups(true) +    local tmp_file   = tex.jobname .. "–rst_temporary.tex.tmp" + +    if processed then +        warn("·cs·",txt) +        processed = lpegmatch (p_strip_comments, setups..processed) +        save_file(tmp_file,processed) +        context.input("./"..tmp_file) +    else +        warn("·cs·",txt) +        context.par() +        context("{\\bf context-rst could not process snippet.\\par}") +        context.type(txt) +        context.par() +    end +end + +local usage_info = [[ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +                           rstConTeXt +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Functionality has been moved, the reST converter can now be +accessed via mtxrun: + +    $mtxrun --script rst + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +]] + +local function main() +    iowrite("\n"..usage_info.."\n") +    return -1 +end + +if not (context or scripts) then +    return main() +end + +--- vim:tw=79:et:sw=4:ts=8:sts=4 diff --git a/src/rst_setups.lua b/src/rst_setups.lua new file mode 100644 index 0000000..31f314e --- /dev/null +++ b/src/rst_setups.lua @@ -0,0 +1,377 @@ +#!/usr/bin/env texlua +-------------------------------------------------------------------------------- +--         FILE:  rst_setups.lua +--        USAGE:  called by rst_parser.lua +--  DESCRIPTION:  Complement to the reStructuredText parser +--       AUTHOR:  Philipp Gesang (Phg), <phg42.2a@gmail.com> +--      CHANGED:  2013-06-03 18:52:29+0200 +-------------------------------------------------------------------------------- +-- + +local optional_setups   = { } +thirddata.rst_setups    = optional_setups +local rst_directives    = thirddata.rst_directives +local rst_context       = thirddata.rst + +local stringformat      = string.format +local stringstrip       = string.strip +local stringgsub        = string.gsub + +function optional_setups.footnote_symbol () +    local setup = [[ +%---------------------------------------------------------------% +% Footnotes with symbol conversion                              % +%---------------------------------------------------------------% +\definenote[symbolnote][footnote] +\setupnote [symbolnote][way=bypage,numberconversion=set 2] +]] +    return setup +end + +function optional_setups.footnotes () +    local tf = rst_context.state.footnotes +    local fn = [[ + +%---------------------------------------------------------------% +% Footnotes                                                     % +%---------------------------------------------------------------% +]] +    local buffer = [[ + +%% %s +\startbuffer[%s] +%s\stopbuffer +]] +     +    for nf, note in next, tf.numbered do +        fn = fn .. stringformat(buffer, "Autonumbered footnote", "__footnote_number_"..nf, note) +    end +    for nf, note in next, tf.autolabel do +        fn = fn .. stringformat(buffer, "Labeled footnote", "__footnote_label_"..nf, note) +    end +    for nf, note in next, tf.symbol do +        fn = fn .. stringformat(buffer, "Symbol footnote", "__footnote_symbol_"..nf, note) +    end +    return fn +end + +function optional_setups.references () +    local refs  = rst_context.collected_references +    local crefs = rst_context.context_references +    local arefs = rst_context.anonymous_set +     +    local function urlescape (str) +        return str:gsub("#", "\\#") +    end + +    local function resolve_indirect (r) +        if r and r:match(".*_$") then -- pointing elsewhere +            local look_me_up = r:match("^`?([^`]*)`?_$") +            local result = resolve_indirect (refs[look_me_up]) +            if result then +                return result +            else +                if rst_context.structure_references[look_me_up] then +                    -- Internal link, no useURL etc. +                    return false +                end +            end +        end +        return r +    end + +    local refsection = [[ + +%---------------------------------------------------------------% +% References                                                    % +%---------------------------------------------------------------% + +]] +    local references = {} +    local ref_keys   = {} +    for ref, target in next, refs do +        ref_keys[#ref_keys+1] = [[__target_]] .. rst_context.whitespace_to_underscore(ref) +        target = resolve_indirect(target) +        if target ~= false then +            ref_text = ref +            if arefs[ref_text] then +                ref_text = rst_context.anonymous_links[tonumber(arefs[ref_text])] +            end +            references[#references+1] = stringformat([[ +\useURL[__target_%s] [%s] []   [%s] ]], rst_context.whitespace_to_underscore(ref), urlescape(target), ref_text) +        end +    end +    refsection = refsection .. table.concat(references, "\n") +    -- this is needed in order to select the right reference command later +    refsection = refsection .. "\n\n" .. [[\def \RSTexternalreferences{]] .. table.concat(ref_keys, ",") .. [[} + +% #1 target name, #2 link text +\def\RSTchoosegoto#1#2{% +  \rawdoifinsetelse{#1}{\RSTexternalreferences}% +    {\from[#1]}% +    {\goto{#2}[#1]}% +} +]] + +    return refsection +end + +function optional_setups.substitutions () +    local directives = rst_directives +    local substitutions = [[ + +%---------------------------------------------------------------% +% Substitutions                                                 % +%---------------------------------------------------------------% +]] +    local rs = rst_context.substitutions +    for name, content in next, rs do +        local id, data = content.directive, content.data +        local directive = directives[id] +        if directive then +            substitutions = substitutions .. directive(name, data) +        else +            err(id .. " does not exist.") +        end +    end +    return substitutions +end + +function optional_setups.directive () +    --local dirstr = [[ + +--%---------------------------------------------------------------% +--% Directives                                                    % +--%---------------------------------------------------------------% +--]] +    --return dirstr +    return "" +end + +function optional_setups.blockquote () +    return [[ + +%---------------------------------------------------------------% +% Blockquotes                                                   % +%---------------------------------------------------------------% +\setupdelimitedtext  [blockquote][style={\tfx}] % awful placeholder +\definedelimitedtext[attribution][blockquote] +\setupdelimitedtext [attribution][style={\tfx\it}] +]] +end + +function optional_setups.deflist () +    return [[ + +%---------------------------------------------------------------% +% Definitionlist                                                % +%---------------------------------------------------------------% +\def\startRSTdefinitionlist{ +  \bgroup +  \def      \RSTdeflistterm##1{{\bf ##1}} +  \def\RSTdeflistclassifier##1{\hbox to 1em{\it ##1}} +  \def\RSTdeflistdefinition##1{% +    \startnarrower[left] +    ##1% +    \stopnarrower} +  \def\RSTdeflistparagraph ##1{% +    \startparagraph{% +      \noindentation ##1 +    \stopparagraph} +  } +} + +\let\stopRSTdefinitionlist\egroup +]] +end + +function optional_setups.lines () +    return [[ + +%---------------------------------------------------------------% +% Lines environment (line blocks)                               % +%---------------------------------------------------------------% + +\setuplines[% +  space=on,% +  before={\startlinecorrection\blank[small]},% +  after={\blank[small]\stoplinecorrection},% +] +]] +end + +function optional_setups.breaks () +    return [[ + +%---------------------------------------------------------------% +% Fancy transitions                                             % +%---------------------------------------------------------------% + +% Get Wolfgang’s module at <https://bitbucket.org/wolfs/fancybreak>. +\usemodule[fancybreak] +\setupfancybreak[symbol=star] +]] +end + +function optional_setups.fieldlist () +    return [[ + +%---------------------------------------------------------------% +% Fieldlists                                                    % +%---------------------------------------------------------------% + +\def\startRSTfieldlist{% +  \bgroup% +  \unexpanded\def\RSTfieldname##1{\bTR\bTC ##1\eTC} +  \unexpanded\def\RSTfieldbody##1{\bTC ##1\eTC\eTR} +% +  \setupTABLE[c][first] [background=color, backgroundcolor=grey, style=\bf] +  \setupTABLE[c][2]     [align=right] +  \setupTABLE[c][each]  [frame=off] +  \setupTABLE[r][each]  [frame=off] +  \bTABLE[split=yes,option=stretch] +  \bTABLEhead +  \bTR +   \bTH  Field       \eTH +   \bTH  Body        \eTH +  \eTR +  \eTABLEhead +  \bTABLEbody +} + +\def\stopRSTfieldlist{% +  %\eTABLEbody % doesn't work, temporarily moved to rst_context.field_list() +  \eTABLE +  \egroup% +} +]] +end + +function optional_setups.dbend () +    -- There's just no reason for not providing this. +    optional_setups.dbend_done = true +    return [[ +%---------------------------------------------------------------% +% Dangerous bend                                                % +%---------------------------------------------------------------% + +\loadmapfile [manfnt.map] +\definefontsynonym [bends] [manfnt] + +\def\GetSym#1{\getglyph{bends}{\char#1}} + +\startsymbolset [Dangerous Bends] +    \definesymbol [dbend]       [\GetSym{127}] +    \definesymbol [lhdbend]     [\GetSym{126}] +    \definesymbol [lhdbend]     [\GetSym{0}] +\stopsymbolset + +\setupsymbolset [Dangerous Bends] + +]] +end + +function optional_setups.caution () +    local result = "" +    --if not optional_setups.dbend_done then +        --result = result .. optional_setups.dbend() +    --end +    return result .. [[ +%---------------------------------------------------------------% +% Caution directive                                             % +%---------------------------------------------------------------% + +\usemodule[lettrine] + +\setbox0=\hbox{\symbol[dbend]} +\newskip\RSTbendskip +\RSTbendskip=\wd0 +\advance\RSTbendskip by 1em % These two lines should add +\advance\RSTbendskip by 1pt % 13.4pt in mkiv and 13.14983pt in mkii +                            % to make the indent equal to the indent +                            % of the “danger” directive. +                            % (2*(width)dbend + (kern)1pt + 1em + +\def\startRSTcaution{% +\startparagraph +\dontleavehmode\lettrine[Lines=2,Raise=.6,Findent=\RSTbendskip,Nindent=0pt]{\symbol[dbend]}{}% +} + +\let\stopRSTcaution\stopparagraph + +]] + +end + +function optional_setups.danger () +    local result = "" +    --if not optional_setups.dbend_done then +        --result = result .. optional_setups.dbend() +    --end +    return result .. [[ +%---------------------------------------------------------------% +% Danger directive                                              % +%---------------------------------------------------------------% + +\usemodule[lettrine] + +\def\startRSTdanger{% +\startparagraph +\lettrine[Lines=2,Raise=.6,Findent=1em,Nindent=0pt]{\symbol[dbend]\kern 1pt\symbol[dbend]}{}% +} + +\let\stopRSTdanger\stopparagraph + +]] + +end + +function optional_setups.citations () +    local cit = [[ +%---------------------------------------------------------------% +% Citations                                                     % +%---------------------------------------------------------------% +\setupbibtex[database=\jobname] +]] +     + +    return cit +end + +function optional_setups.citator () +    local cit = [[ +%---------------------------------------------------------------% +% Citator Options                                               % +%---------------------------------------------------------------% +\usemodule[citator] +\loadbibdb{\jobname.bib} +\setupcitator[sortmode=authoryear] +\setupcite[mainmode=authoryear] + +\startbuffer[bibliography] +\chapter{References} +\setupbodyfont[small] +\bibbykey{shorthand}{all}{author} +\stopbuffer + +\prependtoks \getbuffer[bibliography] \to \everystoptext +]] + +    return cit +end + +function optional_setups.image () +    local image = [[ + +%---------------------------------------------------------------% +% images                                                        % +%---------------------------------------------------------------% +\setupexternalfigure[location={local,global,default}] + +]] +    return image +end + +return optional_setups + +-- vim:ft=lua:sw=4:ts=4:expandtab:tw=80 diff --git a/src/t-rst.mkiv b/src/t-rst.mkiv new file mode 100644 index 0000000..48801b5 --- /dev/null +++ b/src/t-rst.mkiv @@ -0,0 +1,241 @@ +%D \module [ +%D         file=t-rst, +%D      version=0.6 ‘It’s the Arts’, +%D        title=\CONTEXT\ User Module, +%D     subtitle=reStructuredText, +%D       author=Philipp Gesang, +%D         date=\currentdate, +%D    copyright=Philipp Gesang, +%D      license=2-clause BSD, +%D ] + +%M \usemodule  [rst] +%M \usemodule  [int-load] +%M \loadsetups [t-letterspace.xml] + +%C Read the license conditions in the file \type{COPYING}. + +%M \definecolor [gutenred] [x=bf221f] % rubrication from digitized Göttingen Gutenberg bible +%M \setupinteraction [contrastcolor=gutenred,color=gutenred] +%M +%M \define\beautifyshowsetups{% +%M   \unexpanded \def \setupnumfont  {\rm}% +%M   \unexpanded \def \setuptxtfont  {\rm}% +%M   \unexpanded \def \setupintfont  {\rm\sc\Word}% +%M   \unexpanded \def \setupvarfont  {\rm\it}% +%M   \unexpanded \def \setupoptfont  {\rm\it}% +%M   \unexpanded \def \setupalwcolor {gutenred}% +%M   \unexpanded \def \setupoptcolor {gutenred}% +%M   \defineframedtext [setuptext] [ +%M     frame=off, +%M     background=color, +%M     backgroundcolor=gray:2, +%M     width=\hsize, +%M     height=fit, +%M     align=right, +%M     offset=0.75em, +%M   ]% +%M } +%M  +%M \let \Oldshowsetup \showsetup +%M  +%M \define [1] \showsetup {% +%M   \bgroup \beautifyshowsetups% +%M     \Oldshowsetup{#1}% +%M   \egroup% +%M } + +\writestatus{loading}{ConTeXt User Module / reStructuredText} + +\unprotect + +\startinterface all +  \setinterfacevariable {RST} {RST} +\stopinterface + +\definenamespace [\v!RST] [ +  type=module, +  comment=reStructuredText module, +  version=0.6, +  name=\v!RST, +  style=\v!no, +  command=\v!yes, +  setup=\v!list, +  parent=\v!RST, +] + +%D Loading the reStructuredText parser. +\ctxloadluafile{rst_parser} + +%D Easy way to define a global test setting. Activated +%D by \type{\usemodule[rst][test=yes]}. + +\startmoduletestsection +  \ctxlua{thirddata.rst_helpers.rst_debug = true} +\stopmoduletestsection + +%D To process inline reST markup we’ll have to reset all catcodes +%D except for grouping, escaping and cs arguments. + +\newcatcodetable   \RST_catcodes +\startcatcodetable \RST_catcodes +  \catcode`\^^I = 12 +  \catcode`\^^M = 12 +  \catcode`\^^L = 12 +  \catcode`\    = 12 +  \catcode`\^^Z = 12 +  \catcode`\\   =  0 +  \catcode`\%   = 12 +  \catcode`\#   =  6 +  \catcode`\_   = 12 +  \catcode`\^   = 12 +  \catcode`\&   = 12 +  \catcode`\|   = 12 +  \catcode`\{   =  1 +  \catcode`\}   =  2 +  \catcode`\~   = 12 +  \catcode`\$   = 12 +\stopcatcodetable + +%D \section {User-level Commands} +%D +%D \subsection{Typesetting reST-Files} +%D +%D \macros +%D    {typesetRSTfile} +%D +%D This command loads and processes an \type{*.rst} file. +%D All necessary setups for the elements to be used (e.g. tables) +%D have to be specified {\em before} this macro is called. +%D As \type{\typesetRSTfile} is intended to process a single file +%D only, it will handle \type{\start|stoptext} automatically. +%D Thus, the user should never supply any of these manually, +%D neither before nor after \type{\typesetRSTfile}. +%D +%D We now handle rogue utf-8 byte order marks on demand, just set +%D the optional parameter \type{stripBOM} to {\em true}. +%D +%D There also is an option \type{expandtab} to convert tabs +%D (ascii 0x09) to indents prior to converting reST input. The +%D expansion width defaults to {\em 4} and can be configured +%D through the parameter \type{shiftwidth} (takes an integer). +%D +%D \showsetup{typesetRSTfile} + +\def\do_typeset_RST_file[#1]#2{% +  \iffirstargument +    \getparameters[RST][#1]% +    \doifdefined{RSTstripBOM}  {\ctxlua{thirddata.rst.strip_BOM  = \RSTstripBOM}}% +    \doifdefined{RSTexpandtab} {\ctxlua{thirddata.rst.expandtab  = \RSTexpandtab}}% +    \doifdefined{RSTshiftwidth}{\ctxlua{thirddata.rst.shiftwidth = \RSTshiftwidth}}% +  \fi +  \ctxlua{thirddata.rst.do_rst_file("#2")}% +} + +\def\typesetRSTfile{% +  \dosingleempty\do_typeset_RST_file% +} + +%D \subsection{Typesetting Inline Snippets} +%D +%D reST markup can be handy in situations where \CONTEXT\ markup +%D would result in unappropriately verbose source code, e.g. when +%D typesetting tables with simple layout. +%D +%D \macros +%D    {RST,startRST} +%D +%D The environment \type{\[start|stop]RST} and the macro +%D \type{\RST} allow access to reST-parser from inside a +%D \CONTEXT\ document when the module is loaded. +%D +%D \showsetup{RST} +%D \showsetup{startRST} + +% Wolfgang’s code below. + +\unexpanded \def \startRST{% +  \begingroup +  \setcatcodetable \RST_catcodes +  \do_start_RST% +} + +\let \stopRST \relax + +\def \do_start_RST#1\stopRST{% +  \endgroup% +  \ctxlua{thirddata.rst.do_rst_snippet(\!!bs#1\!!es)}% +} + +\def \RST{% +  \begingroup +  \setcatcodetable \RST_catcodes +  \do_RST% +} + +\def \do_RST#1{% +  \endgroup% +  \ctxlua{thirddata.rst.do_rst_snippet(\!!bs#1\!!es)}% +} + +%D \subsection{Including multiple reST files} +%D +%D \macros +%D    {defineRSTinclusion,startRSTproject,RSTinclusion} +%D +%D When content is split among multiple files, these macros allow +%D for including them in arbitrary order. Note that setups from +%D previous includes, unless overwritten, will remain defined. +%D \type{\defineRSTinclusion} takes three arguments: the first one +%D will be the identifier that can be used to refer to the actual +%D inclusion, which is specified via the second argument as a +%D filename. The third optional argument receives the usual +%D setups \type{stripBOM}, \type{expandtab} and +%D \type{shiftwidth}. +%D +%D Defined inclusions can be typeset only within an the +%D \type{\startRSTproject} environment using the macro +%D \type{RSTinclusion}. Between those inclusion all kinds of +%D \TEX\ code except for \type{\starttext} and \type{\stoptext} +%D are permitted. + +\def\do_define_RST_inclusion[#1][#2][#3]{% +  \ifthirdargument +    \getparameters[RST][#3]% +    \doifdefined{RSTstripBOM}  {\ctxlua{thirddata.rst.strip_BOM  = \RSTstripBOM}}% +    \doifdefined{RSTexpandtab} {\ctxlua{thirddata.rst.expandtab  = \RSTexpandtab}}% +    \doifdefined{RSTshiftwidth}{\ctxlua{thirddata.rst.shiftwidth = \RSTshiftwidth}}% +  \fi +  \ifsecondargument +    \ctxlua{thirddata.rst.do_rst_inclusion("#1", "#2")}% +  \fi% +} + +\def\defineRSTinclusion{% +  \dotripleempty\do_define_RST_inclusion% +} + +\def\do_RST_inclusion[#1]{% +  \iffirstargument +    \ctxlua{thirddata.rst.get_rst_inclusion("#1")}% +  \fi% +} + +\def\do_RST_setups{% +  \ctxlua{thirddata.rst.do_rst_setups()}% +} + +\def\startRSTproject{ +  \begingroup +  \def\RSTinclusion{\dosingleempty\do_RST_inclusion} +  \do_RST_setups +  \starttext% +} + +\def\stopRSTproject{ +  \stoptext \endgroup \endinput +} + +\protect \endinput + +% vim:ft=context:sw=2:ts=2 | 
