#!/usr/bin/env texlua
--------------------------------------------------------------------------------
--         FILE:  rst-parser.lua
--        USAGE:  ./rst-parser.lua 
--  DESCRIPTION:  
--      OPTIONS:  ---
-- REQUIREMENTS:  ---
--       AUTHOR:  Philipp Gesang (Phg), <megas.kapaneus@gmail.com>
--      VERSION:  1.0
--      CREATED:  31/08/10 11:53:49 CEST
--------------------------------------------------------------------------------
--

require "lpeg"
rst = require "rst_context"


local rst_debug = true

local warn = function(str, ...)
    if not rst_debug then return false end
    local slen = #str + 3
    str = "*["..str.."]"
    for i,j in ipairs({...}) do
        if 80 - i * 8 - slen < 0 then
            local indent = ""
            for i=1, slen do
                indent = indent .. " "
            end
            str = str .. "\n" .. indent
        end
        str = str .. string.format(" |%6s", string.strip(tostring(j)))
    end
    io.write(str .. " |\n")
    return 0
end

local debugme = function(x) print ("HERE >"..x.."<") return x end

local C, Cb, Cc, Cg, Cmt, Cp, Cs, Ct, P, R, S, V, match = lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match

local utf = unicode.utf8

local eol = P"\n"

local tracklists = {}
tracklists.depth = 0
tracklists.bullets = {} -- mapping bullet forms to depth
tracklists.bullets.max = 0
tracklists.lastbullet = ""
tracklists.roman_cache = {} -- storing roman numerals that were already converted
tracklists.currentindent = "" -- used in definition lists and elsewhere

--n = 0

local enclosed_mapping = {
    ["'"] = "'",
    ['"'] = '"',
    ["("] = ")",
    ["["] = "]",
    ["{"] = "}",
    ["<"] = ">",
}

local utfchar = P{ -- from l-lpeg.lua, modified to use as grammar
    [1] = "utfchar",
    utf8byte      = R("\128\191"),
    utf8one       = R("\000\127"),
    utf8two       = R("\194\223") * V"utf8byte",
    utf8three     = R("\224\239") * V"utf8byte" * V"utf8byte",
    utf8four      = R("\240\244") * V"utf8byte" * V"utf8byte" * V"utf8byte",
    utfchar       = V"utf8one" + V"utf8two" + V"utf8three" + V"utf8four",
}


do
    local c = {}
    c.roman = S"ivxlcdm"^1
    c.Roman = S"IVXLCDM"^1
    c.alpha = R"az" - P"i"
    c.Alpha = R"AZ" - P"I"
    c.digit = R"09"^1
    c.auto  = P"#"

    local stripme   = S" ()."
    local dontstrip = 1 - stripme
    local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0

    local con = function (str)
        --print("This is it: >"..str.."<")
        str = itemstripper:match(str)
        for conv, pat in next, c do
            if pat:match(str) then
                return conv
            end
        end
        return false
    end
    tracklists.conversion = con

    local rnums = {
        i = 1,
        v = 5,
        x = 10,
        l = 50,
        c = 100,
        d = 500,
        m = 1000,
    }

    local function roman_to_arab (str)
        local n = 1
        local curr, succ
        local max_three = { }
        local value = 0
        while n <= #str do
            if curr and curr == max_three[#max_three] then
                if #max_three >= 3 then
                    return "Not a number"
                else
                    max_three[#max_three+1] = curr
                end     
            else    
                max_three = { curr }
            end     

            curr = rnums[str:sub(n,n)]

            n = n + 1
            succ = str:sub(n,n)

            if succ and succ ~= "" then
                succ = rnums[succ]
                if curr < succ then
                    --n = n + 1
                    --value = value + succ - curr
                    value = value  - curr
                else    
                    value = value + curr
                end     
            else    
                value = value + curr
            end     
        end     
        return value
    end
    tracklists.roman_to_arab = roman_to_arab

    local suc = function (str, old)
        str, old = itemstripper:match(str), itemstripper:match(old)
        local n_str, n_old = tonumber(str), tonumber(old)
        if n_str and n_old then -- arabic numeral
            return n_str == n_old + 1
        end

        local con_str, con_old = con(str), con(old)
        if con_str == "alpha"  or
           con_str == "Alpha" then
            return str:byte() == old:byte() + 1
        else -- “I'm a Roman!” - “A woman?” - “No, *Roman*! - Au!” - “So your father was a woman?”
            if not (str:lower() == str  or
                    str:upper() == str) then -- uneven cased --> fail
                return false
            end


            local trc = tracklists.roman_cache
            n_str = trc[str] or nil
            n_old = trc[old] or nil
            if not n_str then
                n_str = roman_to_arab(str:lower())
                trc[str] = n_str
            end
            if not n_old then
                n_old = roman_to_arab(old:lower())
                trc[old] = n_old
            end
            --print(n_str, n_old, n_str == n_old + 1 )
            return n_str == n_old + 1 
        end

    end
    tracklists.successor = suc
end

local parser = P{
    [1] = V"document",

    document = Cs(V"block"^1),

--------------------------------------------------------------------------------
-- Blocks
--------------------------------------------------------------------------------

    block = V"target_block"
          + V"comment"
          + V"line_block"
          + Cs(V"section")    / rst.escape
          + Cs(V"transition") --/ rst.escape
          + V"literal_block"
          + Cs(V"list")       / rst.escape
          + Cs(V"paragraph")  / rst.escape
          ,

--------------------------------------------------------------------------------
-- Line blocks
--------------------------------------------------------------------------------

    line_block = Cs(V"line_block_first"
                  * (V"line_block_other"
                   + V"line_block_empty")^1)
               * V"blank_line"
               / rst.line_block
               ,

    line_block_marker = V"space"^0 * V"bar" * V"space",

    line_block_empty_marker = V"space"^0 * V"bar" * V"space"^0 * V"eol",


    line_block_first = Cmt(V"line_block_marker", function(s, i, marker)
                            warn("lbk-i", #marker, "", marker, "", i)
                            tracklists.currentindent = marker
                            return true
                        end) / ""
                     * V"line_block_line"
                     ,

    line_block_empty = Cmt(V"line_block_empty_marker", function(s, i, marker)
                            warn("lbk-e", #marker, #tracklists.currentindent, marker, tracklists.currentindent, i)
                            marker = marker:gsub("|.*", "| ")
                            return tracklists.currentindent == marker
                        end) / ""
                     / rst.line_block_empty
                     ,

    line_block_other = Cmt(V"line_block_marker", function(s, i, marker)
                            warn("lbk-m", #marker, #tracklists.currentindent, marker, tracklists.currentindent, i)
                            return tracklists.currentindent == marker
                        end) / ""
                     * V"line_block_line"
                     ,

    line_block_line = Cs(V"text_elements"^1
                       * V"line_block_cont"^0
                       * V"eol")
                    / rst.line_block_line
                    ,

    line_block_cont = (V"eol" - V"line_block_marker")
                    * Cmt(V"space"^1, function(s, i, spaces)
                            warn("lbk-c", #spaces, #tracklists.currentindent, spaces, tracklists.currentindent, i)
                            return #spaces >= #tracklists.currentindent
                        end) / ""
                    * V"text_elements"^1
                    ,

--------------------------------------------------------------------------------
-- Literal blocks
--------------------------------------------------------------------------------

    literal_block = V"unquoted_literal_block"
                  + V"quoted_literal_block",

    literal_block = V"literal_block_marker"
                    * Cs(V"literal_block_lines"
                       * (V"blank_line"^1 * V"literal_block_lines")^0)
                    * V"blank_line"^0
                    / rst.literal_block,

    literal_block_marker = V"double_colon" * V"eol" * V"blank_line",

    literal_block_lines = V"unquoted_literal_block_lines"
                        + V"quoted_literal_block_lines",

    unquoted_literal_block_lines = V"literal_block_first"
                                 * (V"literal_block_other"
                                  - V"blank_line")^0,
                                 
    quoted_literal_block_lines =   V"quoted_literal_block_first"
                                 * (V"quoted_literal_block_other"
                                  - V"blank_line")^0,

    literal_block_first = Cmt(V"space"^1, function (s, i, indent)
                        warn("lbk-f", #indent, "", "", i)
                        if not indent    or
                            indent == "" then
                            return false
                        end
                        tracklists.currentindent = indent
                        return true
                    end)
                   * V"rest_of_line"
                   * V"eol",

    literal_block_other = Cmt(V"space"^1, function (s, i, indent)
                        warn("lbk-m", 
                             #indent,
                             #tracklists.currentindent,
                             #indent >= #tracklists.currentindent, 
                             i)
                        return #indent >= #tracklists.currentindent
                    end)
                   * V"rest_of_line"
                   * V"eol",

    quoted_literal_block_first = Cmt(V"adornment_char", function (s, i, indent)
                        warn("lbk-f", #indent, "", "", i)
                        if not indent    or
                            indent == "" then
                            return false
                        end
                        tracklists.currentindent = indent
                        return true
                    end)
                   * V"rest_of_line"
                   * V"eol",

    quoted_literal_block_other = Cmt(V"adornment_char", function (s, i, indent)
                        warn("lbk-m", 
                             #indent,
                             #tracklists.currentindent,
                             #indent >= #tracklists.currentindent, 
                             i)
                        return #indent >= #tracklists.currentindent
                    end)
                   * V"rest_of_line"
                   * V"eol",

--------------------------------------------------------------------------------
-- Lists
--------------------------------------------------------------------------------

    list = V"option_list"
         + V"definition_list"
         + V"bullet_list"
         + V"field_list"
         ,

--------------------------------------------------------------------------------
-- Option lists
--------------------------------------------------------------------------------

    option_list = Cs((V"option_list_item"
                   * V"blank_line"^-1)^1)
                /rst.option_list,

    option_list_item = Ct(C(V"option_group")
                        * Cs(V"option_description"))
                     / rst.option_item,

    option_description = V"option_desc_next"
                       + V"option_desc_more"
                       + V"option_desc_single",

    option_desc_single = V"space"^2
                       --* V"rest_of_line"
                       * V"text_elements"^1
                       * V"eol",

    option_desc_more = V"space"^2
                     --* V"rest_of_line"
                     * V"text_elements"^1
                     * V"eol"
                     * V"indented_lines"
                     * (V"blank_line" * V"indented_lines")^0,

    option_desc_next = V"eol"
                     * V"indented_lines"
                     * (V"blank_line" * V"indented_lines")^0,

    option_group = V"option"
                 * (V"comma" * V"space" * V"option")^0,

    option = (V"option_posixlong"
            + V"option_posixshort"
            + V"option_dos_vms")
            * V"option_arg"^-1,

    option_arg = (V"equals" + V"space") 
               * ((V"letter" * (V"letter" + V"digit")^1)
                + (V"angle_left" * (1 - V"angle_right")^1 * V"angle_right")),

    option_posixshort = V"dash" * (V"letter" + V"digit"),

    option_posixlong = V"double_dash"
                     * V"letter"
                     * (V"letter" + V"digit" + V"dash")^1,

    option_dos_vms = V"slash"
                   * V"letter"^1,

--------------------------------------------------------------------------------
-- Field lists (for bibliographies etc.)
--------------------------------------------------------------------------------

    field_list = Cs(V"field"^1)
               * V"blank_line"^1
               / rst.field_list,

    field = Ct(V"field_marker"
             * V"whitespace"
             * V"field_body")
          / rst.field,

    field_marker = V"colon"
                 * C(V"field_name")
                 * V"colon",

    field_name = (V"escaped_colon" + (1 - V"colon"))^1,

    field_body = C(V"text_elements"^1 * V"eol"
                 * V"indented_lines"^-1),

--------------------------------------------------------------------------------
-- Definition lists
--------------------------------------------------------------------------------

    definition_list = Cs(V"definition_item"
                      * (V"blank_line" * V"definition_item")^0)
                    * V"blank_line"
                    / rst.deflist
                    ,

    definition_item = Cs(V"definition_term"
                       * V"definition_classifiers"
                       * V"eol"
                       * V"definition_def")
                    / rst.deflist_item,

    definition_term = Cs((1 - V"eol" - V"definition_classifier_separator")^1) 
                    / rst.deflist_term,

    definition_classifier_separator = V"space" * V"colon" * V"space",

    definition_classifiers = V"definition_classifier"^0,

    definition_classifier = V"definition_classifier_separator"
                          * Cs((1 - V"eol" - V"definition_classifier_separator")^1)
                          / rst.deflist_classifier,

    definition_def = Cs(V"definition_firstpar"
                      * V"definition_par"^0)
                      / rst.deflist_def,

    definition_indent = Cmt(V"space"^1, function(s, i, indent)
                            warn("def-i", #indent, #tracklists.currentindent, indent == tracklists.currentindent, i)
                            tracklists.currentindent = indent
                            return true
                        end),

    definition_firstpar = Cs(V"definition_parinit"
                          * (V"definition_parline" - V"blank_line")^0)
                          / rst.paragraph,

    definition_par = V"blank_line"
                   * Cs((V"definition_parline" - V"blank_line")^1)
                   / rst.paragraph,

    definition_parinit = V"definition_indent"
                       * (1 - V"eol")^1
                       * V"eol"
                       ,

    definition_parline = V"definition_match"
                       * (1 - V"eol")^1
                       * V"eol"
                       ,

    definition_match = Cmt(V"space"^1, function (s, i, this)
                            warn("def-m", #this, #tracklists.currentindent, this == tracklists.currentindent, i)
                            return this == tracklists.currentindent
                        end),

--------------------------------------------------------------------------------
-- Bullet lists and enumerations
--------------------------------------------------------------------------------

    -- the next rule handles enumerations as well
    bullet_list = V"bullet_init"
                --* (V"bullet_list"
                 --+ V"bullet_continue")^0
                * (V"bullet_continue"
                 + V"bullet_list")^0
                * V"bullet_stop"
                * Cmt(Cc(nil), function (s, i)
                    local t = tracklists
                    warn("close", t.depth)
                    t.bullets[t.depth] = nil -- “pop”
                    t.depth = t.depth - 1
                    return true
                end),

    --bullet_stop =V"blank_line" * Cs(Cc("")) / rst.stopitemize,
    bullet_stop =V"endpar" * Cs(Cc("")) / rst.stopitemize,

    bullet_init = V"eol"^0 * V"bullet_first" * V"bullet_itemrest",

    bullet_first = #Cmt(V"bullet_indent", function (s, i, bullet)
                        local t = tracklists
                        local oldbullet = t.bullets[t.depth]
                        local n_spaces = match(P" "^0, bullet)
                        warn("first", 
                            t.depth, 
                            (t.depth == 0 and n_spaces == 1) or
                            (t.depth >  0 and n_spaces >  1), bullet, oldbullet,
                            t.conversion(bullet))

                        if t.depth == 0 and n_spaces == 1 then -- first level
                            t.depth = 1             -- “push”
                            t.bullets[1] = bullet
                            t.lastbullet = bullet
                            t.bullets.max = t.bullets.max < t.depth and t.depth or t.bullets.max
                            return true
                        elseif t.depth > 0 and n_spaces > 1 then    -- sublist (of sublist)^0
                            if n_spaces >= utf.len(oldbullet) then
                                t.depth = t.depth + 1
                                t.bullets[t.depth] = bullet
                                t.lastbullet = bullet
                                t.bullets.max = t.bullets.max < t.depth and t.depth or t.bullets.max
                                return true
                            end
                        end
                        return false
                    end)
                    --* V"bullet_indent" / rst.startitemize,
                    * Cs(V"bullet_indent") / rst.startitemize,

    bullet_indent = V"space"^0 * V"bullet_expr" * V"space"^1,

    bullet_cont  = Cmt(V"bullet_indent", function (s, i, bullet)
                        local t = tracklists
                        warn("conti", 
                                t.depth, 
                                bullet == t.bullets[t.depth],
                                bullet, 
                                t.bullets[t.depth],
                                t.conversion(t.lastbullet),
                                t.conversion(bullet)
                                )

                        if utf.len(t.bullets[t.depth]) ~= utf.len(bullet) then
                            return false
                        elseif not t.conversion(bullet) and t.bullets[t.depth] == bullet then
                            return true
                        elseif t.conversion(t.lastbullet) == t.conversion(bullet) then -- same type
                            return t.conversion(bullet) == "auto" or t.successor(bullet, t.lastbullet)
                        end
                        --return false
                        return t.bullets[t.depth] == bullet
                    end) / "",
                    --   ^^^^^
                    --   otherwise returns the value of V"bullet_indent", not sure why …

    bullet_continue = V"blank_line"
                    * V"bullet_cont"
                    * V"bullet_itemrest",

    bullet_itemrest = Cs(V"bullet_rest"                               -- first line
                       * ((V"bullet_match" * V"bullet_rest")^0        -- any successive lines
                        --* (V"eol"
                        * (V"blank_line"
                         * (V"bullet_match" * (V"bullet_rest" - V"bullet_indent"))^1)^0))
                    / rst.bullet_item,
                         --                                     ^^^^^^^^^^^^^
                         --                                     otherwise matches bullet_first
 
    bullet_rest = Cs((1 - V"eol")^1 * V"eol"),  -- rest of one line

    bullet_next  = V"space"^1,
    bullet_match = #Cmt(V"bullet_next", function (s, i, this)
                         local t = tracklists
                         warn("match", 
                                t.depth, 
                                string.len(this) == utf.len(t.bullets[t.depth]),
                                utf.len(t.bullets[t.depth]), string.len(this) )
                         return string.len(this) == utf.len(t.bullets[t.depth])
                     end),

    bullet_expr = V"bullet_char"
                + (P"(" * V"number_char" * P")")
                +        (V"number_char" * P")")
                + (V"number_char" * V"dot") * #V"space"
                + (V"number_char" * #V"space")
                ,

    number_char = V"roman_numeral"
                + V"Roman_numeral"
                + P"#"
                + V"digit"^1 
                + R"AZ"
                + R"az",

--------------------------------------------------------------------------------
-- Transitions
--------------------------------------------------------------------------------

    transition_line = C(V"adornment_char"^4),

    transition = V"eol"^0
               * V"transition_line"
               * V"endpar"
               /rst.transition,

--------------------------------------------------------------------------------
-- Sectioning
--------------------------------------------------------------------------------

    section_adorn = C(V"adornment_char"^1) * V"eol",

    -- The whitespace handling after the overline is necessary because headings
    -- without overline aren't allowed to be indented.
    section = V"eol"^0
            * (V"section_adorn" * V"whitespace"^0)^-1
            * C((1 - V"whitespace") * (1 - V"eol" - V"adornment_char")^1)
            * V"eol"
            * V"section_adorn"
            * V"eol"^-1
            / rst.section, -- validity checking done by the formatter. Now, if
                           -- this ain't lazy then I don't know …

--------------------------------------------------------------------------------
-- Target Blocks
--------------------------------------------------------------------------------

    tname_normal = C((V"escaped_colon" + 1 - V"colon")^1)
                 * V"colon",

    tname_bareia = C(V"bareia"
                    * (1 - V"eol" - V"bareia")^1
                    * V"bareia")
                 * V"colon",

    target_name = V"double_dot"
                * V"space"
                * V"underscore"
                * (V"tname_bareia" + V"tname_normal"),

    target_firstindent = V"eol" * Cg(V"space"^1, "indent"),
    target_nextindent  = V"eol" * C(V"space"^1),
    target_indentmatch = Cmt(V"target_nextindent" -- I ♡ LPEG!
                           * Cb("indent"), function (s, i, a, b)
                                return a == b 
                            end),

    target_link  = ( V"space"^0 * V"target_firstindent" -- * C((1 - V"eol")^1) * V"eol")
                 * Ct(C(1 - V"whitespace" - V"eol")^1 
                    * (V"target_indentmatch"
                     * C(1 - V"whitespace" - V"eol")^1)^0)
                 * V"eol" * #(1 - V"whitespace" - "eol")) / rst.joinindented
                 + C((1 - V"eol")^1) * V"eol" * #(V"double_dot" + V"eol")
                 + (1 - V"endpar")^0 * Cc("make me constant!"),

    target       = Ct((V"target_name" * (V"space"^0 * V"eol" * V"target_name")^0)
                 * V"space"^0
                 * V"target_link")
                 / rst.target,

    anonymous_prefix = (V"double_dot" * V"space" * V"double_underscore" * V"colon")
                     + (V"double_underscore"),

    anonymous_target = V"anonymous_prefix"
                     * V"space"^0
                     * Ct(Cc"" * V"target_link")
                     / rst.target,

    target_block = (V"anonymous_target" + V"target")^1
                 * V"endpar",

--------------------------------------------------------------------------------
-- Paragraphs * Inline Markup
--------------------------------------------------------------------------------

    --paragraph = -(V"double_dot" + V"double_underscore") --  + V"bullet_indent") 
    paragraph = -V"punctuation"
              * Cs((V"text_elements" + (V"eol" - V"endpar"))^1)
              * V"endpar"
              / rst.paragraph,

    text_elements = V"included_literal_block"
                  + V"enclosed_inline"
                  + V"inline_elements" 
                  + V"word" 
                  + V"punctuation"
                  + V"spacing"
                  ,

    -- Ignore single occurences of inline markup delimiters in certain
    -- environments.
    enclosed_inline = Cg(V"enclosed_open", "opener") 
                       * V"inline_delimiter" 
                       * Cmt(C(V"enclosed_close") * Cb("opener"), function(i, p, closer, opener)
                           return closer == enclosed_mapping[opener]
                       end),

    precede_inline = V"spacing"
                   + V"eol"
                   + S[['"([{<-/:]]
                   + P"‘" + P"“" + P"’" + P"«" + P"¡" + P"¿"
                   + V"delimiters"
                   + P"„", -- not in standard Murkin reST

    succede_inline = V"spacing"
                   + S[['")]}>-/:.,;!?\]]
                   + P"’" + P"”" + P"»"
                   + V"delimiters"
                   + P"“", -- non-standard again but who cares

    inline_elements = Cs(V"precede_inline"
                    * (V"strong_emphasis"
                     + V"emphasis"
                     + V"inline_literal"
                     + V"interpreted_text"
--                   + V"inline_internal_target" -- TODO
                     + V"reference"
--                   + V"footnote_reference"     -- TODO
--                   + V"substitution_reference" -- TODO
                     + V"link_standalone")
                    * V"succede_inline"),

    emphasis        = (V"asterisk" - V"double_asterisk") 
                    * Cs((1 - V"spacing" - V"eol" - V"asterisk")
                       * ((1 - (1 * V"asterisk"))^0 
                        * (1 - V"spacing" - V"eol" - V"asterisk"))^-1) -- looks like lisp
                    * V"asterisk" 
                    / rst.emphasis,

    strong_emphasis = V"double_asterisk" 
                    * Cs((1 - V"spacing" - V"eol" - V"asterisk")
                       * ((1 - (1 * V"double_asterisk"))^0 
                        * (1 - V"spacing" - V"eol" - V"asterisk"))^-1) 
                    * V"double_asterisk"  
                    / rst.strong_emphasis,

    inline_literal  = V"double_bareia"
                    * C ((V"escaped_bareia" - V"spacing" - V"eol" - V"bareia")
                       * ((V"escaped_bareia" - (1 * V"double_bareia"))^0
                        * (V"escaped_bareia" - V"spacing" - V"eol" - V"bareia"))^-1)
                    * V"double_bareia"
                    / rst.literal,

    interpreted_text = C(V"role_marker"^-1)
                     * (V"bareia" - V"double_bareia")
                     * C ((1 - V"spacing" - V"eol" - V"bareia")
                        * ((1 - (1 * V"bareia"))^0
                         * (1 - V"spacing" - V"eol" - V"bareia"))^-1)
                     * V"bareia"
                     * C(V"role_marker"^-1)
                     / rst.interpreted_text,

    role_marker = V"colon" * (V"letter" + V"dash" + V"underscore" + V"dot")^1 * V"colon",

    link_standalone = C(V"uri")
                    / rst.link_standalone,

    reference = Cs(V"_reference")
              / rst.reference,

    _reference = (1 - V"underscore" - V"spacing" - V"eol" - V"punctuation" - V"groupchars")^1 * V"underscore",

    included_literal_block = V"literal_block_shorthand"
                           * V"literal_block_markerless",

    literal_block_shorthand = ((V"colon" * V"space"^1)^-1 
                             * V"double_colon") / ":"
                            * (V"eol" * V"blank_line" / "")
                            ,

    literal_block_markerless = Cs(V"literal_block_lines"
                               * (V"blank_line"^1 * V"literal_block_lines")^0)
                             * V"blank_line"^0
                             / rst.literal_block,

--------------------------------------------------------------------------------
-- Comments
--------------------------------------------------------------------------------

    comment = V"block_comment" + V"line_comment",

    block_comment = V"double_dot" * V"whitespace"^0 * V"eol"
                  * Cs(V"indented_lines")
                  * V"eol"^0
                  --* V"blank_line"^0
                  / rst.block_comment,

    line_comment = V"double_dot" * V"whitespace"^0
                * Cs((1 - V"eol")^0 * V"eol") 
                / rst.line_comment,

--------------------------------------------------------------------------------
-- Generic indented block
--------------------------------------------------------------------------------

    indented_lines = V"indented_first"
                   * (V"indented_other"
                    - V"blank_line" - V"field_marker")^0,

    indented_first = Cmt(V"space"^1, function (s, i, indent)
                        warn("idt-f", indent, i)
                        if not indent    or
                            indent == "" then
                            return false
                        end
                        tracklists.currentindent = indent
                        return true
                    end)
                   * V"text_elements"^1
                   * V"eol",

    indented_other = Cmt(V"space"^1, function (s, i, indent)
                        warn("idt-m", indent, tracklists.currentindent, indent == tracklists.currentindent, i)
                        return indent == tracklists.currentindent
                    end)
                   * V"text_elements"^1
                   * V"eol",

--------------------------------------------------------------------------------
-- Urls
--------------------------------------------------------------------------------
    uri = V"url_protocol" * V"url_domain" * (V"slash" * V"url_path")^0,

    url_protocol = (P"http" + P"ftp" + P"shttp" + P"sftp") * P"://",
    url_domain_char = 1 - V"dot" - V"spacing" - V"eol" - V"punctuation",
    url_domain = V"url_domain_char"^1 * (V"dot" * V"url_domain_char"^1)^0,
    url_path_char = R("az", "AZ", "09") + S"-_.!~*'()",
    url_path = V"slash" * (V"url_path_char"^1 * V"slash"^-1)^1,

--------------------------------------------------------------------------------
-- Terminal Symbols and Low-Level Elements
--------------------------------------------------------------------------------

    word = (1 - V"punctuation" - V"endpar" - V"spacing" - V"eol")^1, -- TODO : no punctuation (later)

    asterisk = P"*",
    double_asterisk = V"asterisk" * V"asterisk",

    bareia = P"`",
    double_bareia = V"bareia" * V"bareia",
    escaped_bareia = (Cs(V"backslash") / "" * V"bareia") + 1,

    slash = P"/",
    doubleslash = V"slash" * V"slash",

    backslash = P"\\",
    bar = P"|",

    groupchars = S"()[]{}",

    --- Punctuation
    -- Some of the following are used for markup as well as for punctuation.

    comma = P",",
    colon = P":",
    double_colon = V"colon" * V"colon",
    escaped_colon = V"backslash" * V"colon",
    dot = P".",
    period = V"dot",
    double_dot = V"dot" * V"dot",
    interpunct = P"·",
    underscore = P"_",
    double_underscore = V"underscore" * V"underscore",
    dash = P"-",
    double_dash = V"dash" * V"dash",
    dashes = V"dash" + P"‒" + P"–" + P"—" + P"―",
    hyphen = P"‐",
    semicolon = P";",
    questionmark = P"?",
    exclamationmark = P"!",
    inverted_exclamationmark = P"¡",
    inverted_questionmark = P"¿",
    interrobang = P"‽",

    apostrophe = P"’" + P"'",
    --brackets   = P"[ ], (",, { }, ⟨ ⟩ )
    ellipsis   = P"…" + P"...",
    guillemets = P"«" + P"»",
    quotationmarks= P"‘" + P"’" + P"“" + P"”",
    solidus= P"⁄",


    punctuation = V"apostrophe"
                + V"colon" 
                + V"comma" 
                + V"dashes"
                + V"dot" 
                + V"ellipsis"
                + V"exclamationmark"
                + V"guillemets"
                + V"hyphen"
                + V"interpunct"
                + V"interrobang"
                + V"questionmark" 
                + V"quotationmarks"
                + V"semicolon" 
                + V"slash"
                + V"solidus"
                + V"underscore"
                ,

    -- These are treated separately as the might begin a paragraph (sigh!).
    inverted_punctuation = V"inverted_exclamationmark"
                         + V"inverted_questionmark",

    -- End punctuation

    letter = R"az" + R"AZ",

    equals = P"=",

    space = P" ",
    spaces = V"space"^1,
    whitespace = (P" " + Cs(P"\t") / "        " + Cs(S"\v") / " "),
    spacing = V"whitespace"^1,
    blank_line = V"space"^0 * V"eol",

    rest_of_line = (1 - V"eol")^1,

    eol = P"\n",
    eof = V"eol"^0 * -P(1),
    endpar = V"eol" * (V"blank_line"^1 + V"eof"),

    -- diverse markup character sets
    delimiters = P"‐" + P"‑" + P"‒" + P"–" + P"—" + V"space",        -- inline markup
    adornment_char = S[[!"#$%&'()*+,-./:;<=>?@[]^_`{|}~]] + P[[\\]], -- headings
    bullet_char = S"*+-" + P"•" + P"‣" + P"⁃",                       -- bullet lists
    argument_char = V"double_dash" * V"dash" * V"slash",             -- option lists

    digit = R"09",
    roman_numeral = S"ivxlcdm"^1,
    Roman_numeral = S"IVXLCDM"^1,

    inline_delimiter = P"**" + P"``" + S"*`",
    angle_left       = P"<",
    angle_right      = P">",
    enclosed_open    = S[['"([{<]],
    enclosed_close   = S[['")]}>]],
}

f = io.open("option_lists.rst", "r")
testdata = f:read("*all")
f:close()

print(parser:match(testdata))
--print(">>>Last used char>: " ..tracklists.lastbullet.." <<<<")
--print(">>>Max list nestin>: "..tracklists.bullets.max .." <<<<")

--for i,j in next, rst.collected_references do
    --print (string.format("== %7s => %s <=", i,j))
--end
--parser:print()