diff options
-rw-r--r-- | rst_parser.lua | 157 |
1 files changed, 133 insertions, 24 deletions
diff --git a/rst_parser.lua b/rst_parser.lua index 14d23ff..cc322de 100644 --- a/rst_parser.lua +++ b/rst_parser.lua @@ -14,6 +14,27 @@ require "lpeg" rst = require "rst_context" + +local rst_debug = true + +local warn = function(str, ...) + if not rst_debug then return false end + local slen = #str + 3 + str = "*["..str.."]" + for i,j in ipairs({...}) do + if 80 - i * 8 - slen < 0 then + local indent = "" + for i=1, slen do + indent = indent .. " " + end + str = str .. "\n" .. indent + end + str = str .. string.format(" |%6s", string.strip(tostring(j))) + end + io.write(str .. " |\n") + return 0 +end + local C, Cb, Cc, Cg, Cmt, Cp, Cs, Ct, P, R, S, V, match = lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match local utf = unicode.utf8 @@ -25,6 +46,7 @@ tracklists.depth = 0 tracklists.bullets = {} -- mapping bullet forms to depth tracklists.bullets.max = 0 tracklists.lastbullet = "" +tracklists.roman_cache = {} -- storing roman numerals that were already converted n = 0 @@ -52,8 +74,8 @@ do local c = {} c.roman = S"ivxlcdm"^1 c.Roman = S"IVXLCDM"^1 - c.alpha = R"az" - c.Alpha = R"AZ" + c.alpha = R"az" - P"i" + c.Alpha = R"AZ" - P"I" c.digit = R"09"^1 c.auto = P"#" @@ -62,7 +84,7 @@ do local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0 local con = function (str) - print("This is it: >"..str.."<") + --print("This is it: >"..str.."<") str = itemstripper:match(str) for conv, pat in next, c do if pat:match(str) then @@ -72,6 +94,90 @@ do return false end tracklists.conversion = con + + local rnums = { + i = 1, + v = 5, + x = 10, + l = 50, + c = 100, + d = 500, + m = 1000, + } + + local function roman_to_arab (str) + local n = 1 + local curr, succ + local max_three = { } + local value = 0 + while n <= #str do + if curr and curr == max_three[#max_three] then + if #max_three >= 3 then + return "Not a number" + else + max_three[#max_three+1] = curr + end + else + max_three = { curr } + end + + curr = rnums[str:sub(n,n)] + + n = n + 1 + succ = str:sub(n,n) + + if succ and succ ~= "" then + succ = rnums[succ] + if curr < succ then + --n = n + 1 + --value = value + succ - curr + value = value - curr + else + value = value + curr + end + else + value = value + curr + end + end + return value + end + tracklists.roman_to_arab = roman_to_arab + + local suc = function (str, old) + str, old = itemstripper:match(str), itemstripper:match(old) + local n_str, n_old = tonumber(str), tonumber(old) + if n_str and n_old then -- arabic numeral + return n_str == n_old + 1 + end + + local con_str, con_old = con(str), con(old) + if con_str == "alpha" or + con_str == "Alpha" then + return str:byte() == old:byte() + 1 + else -- “I'm a Roman!” - “A woman?” - “No, *Roman*! - Au!” - “So your father was a woman?” + if not (str:lower() == str or + str:upper() == str) then -- uneven cased --> fail + return false + end + + + local trc = tracklists.roman_cache + n_str = trc[str] or nil + n_old = trc[old] or nil + if not n_str then + n_str = roman_to_arab(str:lower()) + trc[str] = n_str + end + if not n_old then + n_old = roman_to_arab(old:lower()) + trc[old] = n_old + end + --print(n_str, n_old, n_str == n_old + 1 ) + return n_str == n_old + 1 + end + + end + tracklists.successor = suc end local parser = P{ @@ -117,7 +223,7 @@ local parser = P{ * V"bullet_stop" * Cmt(Cc(nil), function (s, i) local t = tracklists - print("[close]>", t.depth) + warn("close", t.depth) t.bullets[t.depth] = nil -- “pop” t.depth = t.depth - 1 return true @@ -131,7 +237,7 @@ local parser = P{ local t = tracklists local oldbullet = t.bullets[t.depth] local n_spaces = match(P" "^0, bullet) - print("[first]>", + warn("first", t.depth, (t.depth == 0 and n_spaces == 1) or (t.depth > 0 and n_spaces > 1), bullet, oldbullet, @@ -161,7 +267,7 @@ local parser = P{ bullet_cont = Cmt(V"bullet_indent", function (s, i, bullet) local t = tracklists - print("[contin]>>", + warn("conti", t.depth, bullet == t.bullets[t.depth], bullet, @@ -172,29 +278,44 @@ local parser = P{ if utf.len(t.bullets[t.depth]) ~= utf.len(bullet) then return false - elseif t.bullets[t.depth] == bullet then + elseif not t.conversion(bullet) and t.bullets[t.depth] == bullet then return true elseif t.conversion(t.lastbullet) == t.conversion(bullet) then -- same type - return true + return t.conversion(bullet) == "auto" or t.successor(bullet, t.lastbullet) end - return true - --return t.bullets[t.depth] == bullet + --return false + return t.bullets[t.depth] == bullet end) / "", -- ^^^^^ -- otherwise returns the value of V"bullet_indent", not sure why … - bullet_continue = V"eol"^0 -- ^-1 + bullet_continue = V"bullet_blank" * V"bullet_cont" * V"bullet_itemrest", bullet_itemrest = Cs(V"bullet_rest" -- first line * ((V"bullet_match" * V"bullet_rest")^0 -- any successive lines - * (V"eol" + --* (V"eol" + * (V"bullet_blank" * (V"bullet_match" * (V"bullet_rest" - V"bullet_indent"))^1)^0)) / rst.bullet_item, -- ^^^^^^^^^^^^^ -- otherwise matches bullet_first + bullet_rest = Cs((1 - V"eol")^1 * V"eol"), -- rest of one line + + bullet_blank = V"eol" + V"space"^1 * V"eol", + + bullet_next = V"space"^1, + bullet_match = #Cmt(V"bullet_next", function (s, i, this) + local t = tracklists + warn("match", + t.depth, + string.len(this) == utf.len(t.bullets[t.depth]), + utf.len(t.bullets[t.depth]), string.len(this) ) + return string.len(this) == utf.len(t.bullets[t.depth]) + end), + bullet_expr = V"bullet_char" + (P"(" * V"number_char" * P")") + (V"number_char" * P")") @@ -209,18 +330,6 @@ local parser = P{ + R"AZ" + R"az", - bullet_rest = Cs((1 - V"eol")^1 * V"eol"), -- rest of one line - - bullet_next = V"space"^1, - bullet_match = #Cmt(V"bullet_next", function (s, i, this) - local t = tracklists - print("[match]>>>", - t.depth, - string.len(this) == utf.len(t.bullets[t.depth]), - utf.len(t.bullets[t.depth]), string.len(this) ) - return string.len(this) == utf.len(t.bullets[t.depth]) - end), - -------------------------------------------------------------------------------- -- Transitions -------------------------------------------------------------------------------- |