diff options
Diffstat (limited to 'tex/context/base/util-prs.lua')
-rw-r--r-- | tex/context/base/util-prs.lua | 73 |
1 files changed, 56 insertions, 17 deletions
diff --git a/tex/context/base/util-prs.lua b/tex/context/base/util-prs.lua index ab1bfb8eb..e7191d0e9 100644 --- a/tex/context/base/util-prs.lua +++ b/tex/context/base/util-prs.lua @@ -6,8 +6,10 @@ if not modules then modules = { } end modules ['util-prs'] = { license = "see context related readme files" } -local P, R, V, C, Ct, Cs, Carg = lpeg.P, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg -local lpegmatch = lpeg.match +local lpeg, table, string = lpeg, table, string + +local P, R, V, S, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc +local lpegmatch, patterns = lpeg.match, lpeg.patterns local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find local tostring, type, next = tostring, type, next @@ -19,29 +21,39 @@ parsers.patterns = parsers.patterns or { } local setmetatableindex = table.setmetatableindex local sortedhash = table.sortedhash +-- we share some patterns + +local space = P(' ') +local equal = P("=") +local comma = P(",") +local lbrace = P("{") +local rbrace = P("}") +local period = S(".") +local punctuation = S(".,:;") +local spacer = patterns.spacer +local whitespace = patterns.whitespace +local newline = patterns.newline +local anything = patterns.anything +local endofstring = patterns.endofstring + -- we could use a Cf Cg construct local escape, left, right = P("\\"), P('{'), P('}') -lpeg.patterns.balanced = P { +patterns.balanced = P { [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, [2] = left * V(1) * right } -local space = P(' ') -local equal = P("=") -local comma = P(",") -local lbrace = P("{") -local rbrace = P("}") local nobrace = 1 - (lbrace+rbrace) local nested = P { lbrace * (nobrace + V(1))^0 * rbrace } local spaces = space^0 local argument = Cs((lbrace/"") * ((nobrace + nested)^0) * (rbrace/"")) -local content = (1-P(-1))^0 +local content = (1-endofstring)^0 -lpeg.patterns.nested = nested -- no capture -lpeg.patterns.argument = argument -- argument after e.g. = -lpeg.patterns.content = content -- rest after e.g = +patterns.nested = nested -- no capture +patterns.argument = argument -- argument after e.g. = +patterns.content = content -- rest after e.g = local value = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0) @@ -60,10 +72,6 @@ local function set(key,value) hash[key] = value end -local function set(key,value) - hash[key] = value -end - local pattern_a_s = (pattern_a/set)^1 local pattern_b_s = (pattern_b/set)^1 local pattern_c_s = (pattern_c/set)^1 @@ -114,7 +122,7 @@ end local separator = comma * space^0 local value = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0) -local pattern = Ct(value*(separator*value)^0) +local pattern = spaces * Ct(value*(separator*value)^0) -- "aap, {noot}, mies" : outer {} removes, leading spaces ignored @@ -237,3 +245,34 @@ end function parsers.listitem(str) return gmatch(str,"[^, ]+") end + +-- +local digit = R("09") + +local pattern = Cs { "start", + start = V("one") + V("two") + V("three"), + rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0, + thousand = digit * digit * digit, + one = digit * V("rest"), + two = digit * digit * V("rest"), + three = V("thousand") * V("rest"), +} + +patterns.splitthousands = pattern -- maybe better in the parsers namespace ? + +function parsers.splitthousands(str) + return lpegmatch(pattern,str) or str +end + +-- print(parsers.splitthousands("11111111111.11")) + +local optionalwhitespace = whitespace^0 + +patterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1) +patterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1) +patterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1) + +-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n " +-- inspect(lpegmatch(patterns.paragraphs,str)) +-- inspect(lpegmatch(patterns.sentences,str)) +-- inspect(lpegmatch(patterns.words,str)) |