summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rst_parser.lua157
1 files changed, 133 insertions, 24 deletions
diff --git a/rst_parser.lua b/rst_parser.lua
index 14d23ff..cc322de 100644
--- a/rst_parser.lua
+++ b/rst_parser.lua
@@ -14,6 +14,27 @@
require "lpeg"
rst = require "rst_context"
+
+local rst_debug = true
+
+local warn = function(str, ...)
+ if not rst_debug then return false end
+ local slen = #str + 3
+ str = "*["..str.."]"
+ for i,j in ipairs({...}) do
+ if 80 - i * 8 - slen < 0 then
+ local indent = ""
+ for i=1, slen do
+ indent = indent .. " "
+ end
+ str = str .. "\n" .. indent
+ end
+ str = str .. string.format(" |%6s", string.strip(tostring(j)))
+ end
+ io.write(str .. " |\n")
+ return 0
+end
+
local C, Cb, Cc, Cg, Cmt, Cp, Cs, Ct, P, R, S, V, match = lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.match
local utf = unicode.utf8
@@ -25,6 +46,7 @@ tracklists.depth = 0
tracklists.bullets = {} -- mapping bullet forms to depth
tracklists.bullets.max = 0
tracklists.lastbullet = ""
+tracklists.roman_cache = {} -- storing roman numerals that were already converted
n = 0
@@ -52,8 +74,8 @@ do
local c = {}
c.roman = S"ivxlcdm"^1
c.Roman = S"IVXLCDM"^1
- c.alpha = R"az"
- c.Alpha = R"AZ"
+ c.alpha = R"az" - P"i"
+ c.Alpha = R"AZ" - P"I"
c.digit = R"09"^1
c.auto = P"#"
@@ -62,7 +84,7 @@ do
local itemstripper = stripme^0 * C(dontstrip^1) * stripme^0
local con = function (str)
- print("This is it: >"..str.."<")
+ --print("This is it: >"..str.."<")
str = itemstripper:match(str)
for conv, pat in next, c do
if pat:match(str) then
@@ -72,6 +94,90 @@ do
return false
end
tracklists.conversion = con
+
+ local rnums = {
+ i = 1,
+ v = 5,
+ x = 10,
+ l = 50,
+ c = 100,
+ d = 500,
+ m = 1000,
+ }
+
+ local function roman_to_arab (str)
+ local n = 1
+ local curr, succ
+ local max_three = { }
+ local value = 0
+ while n <= #str do
+ if curr and curr == max_three[#max_three] then
+ if #max_three >= 3 then
+ return "Not a number"
+ else
+ max_three[#max_three+1] = curr
+ end
+ else
+ max_three = { curr }
+ end
+
+ curr = rnums[str:sub(n,n)]
+
+ n = n + 1
+ succ = str:sub(n,n)
+
+ if succ and succ ~= "" then
+ succ = rnums[succ]
+ if curr < succ then
+ --n = n + 1
+ --value = value + succ - curr
+ value = value - curr
+ else
+ value = value + curr
+ end
+ else
+ value = value + curr
+ end
+ end
+ return value
+ end
+ tracklists.roman_to_arab = roman_to_arab
+
+ local suc = function (str, old)
+ str, old = itemstripper:match(str), itemstripper:match(old)
+ local n_str, n_old = tonumber(str), tonumber(old)
+ if n_str and n_old then -- arabic numeral
+ return n_str == n_old + 1
+ end
+
+ local con_str, con_old = con(str), con(old)
+ if con_str == "alpha" or
+ con_str == "Alpha" then
+ return str:byte() == old:byte() + 1
+ else -- “I'm a Roman!” - “A woman?” - “No, *Roman*! - Au!” - “So your father was a woman?”
+ if not (str:lower() == str or
+ str:upper() == str) then -- uneven cased --> fail
+ return false
+ end
+
+
+ local trc = tracklists.roman_cache
+ n_str = trc[str] or nil
+ n_old = trc[old] or nil
+ if not n_str then
+ n_str = roman_to_arab(str:lower())
+ trc[str] = n_str
+ end
+ if not n_old then
+ n_old = roman_to_arab(old:lower())
+ trc[old] = n_old
+ end
+ --print(n_str, n_old, n_str == n_old + 1 )
+ return n_str == n_old + 1
+ end
+
+ end
+ tracklists.successor = suc
end
local parser = P{
@@ -117,7 +223,7 @@ local parser = P{
* V"bullet_stop"
* Cmt(Cc(nil), function (s, i)
local t = tracklists
- print("[close]>", t.depth)
+ warn("close", t.depth)
t.bullets[t.depth] = nil -- “pop”
t.depth = t.depth - 1
return true
@@ -131,7 +237,7 @@ local parser = P{
local t = tracklists
local oldbullet = t.bullets[t.depth]
local n_spaces = match(P" "^0, bullet)
- print("[first]>",
+ warn("first",
t.depth,
(t.depth == 0 and n_spaces == 1) or
(t.depth > 0 and n_spaces > 1), bullet, oldbullet,
@@ -161,7 +267,7 @@ local parser = P{
bullet_cont = Cmt(V"bullet_indent", function (s, i, bullet)
local t = tracklists
- print("[contin]>>",
+ warn("conti",
t.depth,
bullet == t.bullets[t.depth],
bullet,
@@ -172,29 +278,44 @@ local parser = P{
if utf.len(t.bullets[t.depth]) ~= utf.len(bullet) then
return false
- elseif t.bullets[t.depth] == bullet then
+ elseif not t.conversion(bullet) and t.bullets[t.depth] == bullet then
return true
elseif t.conversion(t.lastbullet) == t.conversion(bullet) then -- same type
- return true
+ return t.conversion(bullet) == "auto" or t.successor(bullet, t.lastbullet)
end
- return true
- --return t.bullets[t.depth] == bullet
+ --return false
+ return t.bullets[t.depth] == bullet
end) / "",
-- ^^^^^
-- otherwise returns the value of V"bullet_indent", not sure why …
- bullet_continue = V"eol"^0 -- ^-1
+ bullet_continue = V"bullet_blank"
* V"bullet_cont"
* V"bullet_itemrest",
bullet_itemrest = Cs(V"bullet_rest" -- first line
* ((V"bullet_match" * V"bullet_rest")^0 -- any successive lines
- * (V"eol"
+ --* (V"eol"
+ * (V"bullet_blank"
* (V"bullet_match" * (V"bullet_rest" - V"bullet_indent"))^1)^0))
/ rst.bullet_item,
-- ^^^^^^^^^^^^^
-- otherwise matches bullet_first
+ bullet_rest = Cs((1 - V"eol")^1 * V"eol"), -- rest of one line
+
+ bullet_blank = V"eol" + V"space"^1 * V"eol",
+
+ bullet_next = V"space"^1,
+ bullet_match = #Cmt(V"bullet_next", function (s, i, this)
+ local t = tracklists
+ warn("match",
+ t.depth,
+ string.len(this) == utf.len(t.bullets[t.depth]),
+ utf.len(t.bullets[t.depth]), string.len(this) )
+ return string.len(this) == utf.len(t.bullets[t.depth])
+ end),
+
bullet_expr = V"bullet_char"
+ (P"(" * V"number_char" * P")")
+ (V"number_char" * P")")
@@ -209,18 +330,6 @@ local parser = P{
+ R"AZ"
+ R"az",
- bullet_rest = Cs((1 - V"eol")^1 * V"eol"), -- rest of one line
-
- bullet_next = V"space"^1,
- bullet_match = #Cmt(V"bullet_next", function (s, i, this)
- local t = tracklists
- print("[match]>>>",
- t.depth,
- string.len(this) == utf.len(t.bullets[t.depth]),
- utf.len(t.bullets[t.depth]), string.len(this) )
- return string.len(this) == utf.len(t.bullets[t.depth])
- end),
-
--------------------------------------------------------------------------------
-- Transitions
--------------------------------------------------------------------------------