1 files changed, 56 insertions, 17 deletions
diff --git a/tex/context/base/util-prs.lua b/tex/context/base/util-prs.lua
index ab1bfb8eb..e7191d0e9 100644
--- a/tex/context/base/util-prs.lua
+++ b/tex/context/base/util-prs.lua
@@ -6,8 +6,10 @@ if not modules then modules = { } end modules ['util-prs'] = {
     license   = "see context related readme files"
 }
 
-local P, R, V, C, Ct, Cs, Carg = lpeg.P, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg
-local lpegmatch = lpeg.match
+local lpeg, table, string = lpeg, table, string
+
+local P, R, V, S, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc
+local lpegmatch, patterns = lpeg.match, lpeg.patterns
 local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find
 local tostring, type, next = tostring, type, next
 
@@ -19,29 +21,39 @@ parsers.patterns  = parsers.patterns or { }
 local setmetatableindex = table.setmetatableindex
 local sortedhash        = table.sortedhash
 
+-- we share some patterns
+
+local space       = P(' ')
+local equal       = P("=")
+local comma       = P(",")
+local lbrace      = P("{")
+local rbrace      = P("}")
+local period      = S(".")
+local punctuation = S(".,:;")
+local spacer      = patterns.spacer
+local whitespace  = patterns.whitespace
+local newline     = patterns.newline
+local anything    = patterns.anything
+local endofstring = patterns.endofstring
+
 -- we could use a Cf Cg construct
 
 local escape, left, right = P("\\"), P('{'), P('}')
 
-lpeg.patterns.balanced = P {
+patterns.balanced = P {
     [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
     [2] = left * V(1) * right
 }
 
-local space     = P(' ')
-local equal     = P("=")
-local comma     = P(",")
-local lbrace    = P("{")
-local rbrace    = P("}")
 local nobrace   = 1 - (lbrace+rbrace)
 local nested    = P { lbrace * (nobrace + V(1))^0 * rbrace }
 local spaces    = space^0
 local argument  = Cs((lbrace/"") * ((nobrace + nested)^0) * (rbrace/""))
-local content   = (1-P(-1))^0
+local content   = (1-endofstring)^0
 
-lpeg.patterns.nested   = nested    -- no capture
-lpeg.patterns.argument = argument  -- argument after e.g. =
-lpeg.patterns.content  = content   -- rest after e.g =
+patterns.nested   = nested    -- no capture
+patterns.argument = argument  -- argument after e.g. =
+patterns.content  = content   -- rest after e.g =
 
 local value     = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0)
 
@@ -60,10 +72,6 @@ local function set(key,value)
     hash[key] = value
 end
 
-local function set(key,value)
-    hash[key] = value
-end
-
 local pattern_a_s = (pattern_a/set)^1
 local pattern_b_s = (pattern_b/set)^1
 local pattern_c_s = (pattern_c/set)^1
@@ -114,7 +122,7 @@ end
 
 local separator = comma * space^0
 local value     = P(lbrace * C((nobrace + nested)^0) * rbrace) + C((nested + (1-comma))^0)
-local pattern   = Ct(value*(separator*value)^0)
+local pattern   = spaces * Ct(value*(separator*value)^0)
 
 -- "aap, {noot}, mies" : outer {} removes, leading spaces ignored
 
@@ -237,3 +245,34 @@ end
 function parsers.listitem(str)
     return gmatch(str,"[^, ]+")
 end
+
+--
+local digit = R("09")
+
+local pattern = Cs { "start",
+    start    = V("one") + V("two") + V("three"),
+    rest     = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0,
+    thousand = digit * digit * digit,
+    one      = digit * V("rest"),
+    two      = digit * digit * V("rest"),
+    three    = V("thousand") * V("rest"),
+}
+
+patterns.splitthousands = pattern -- maybe better in the parsers namespace ?
+
+function parsers.splitthousands(str)
+    return lpegmatch(pattern,str) or str
+end
+
+-- print(parsers.splitthousands("11111111111.11"))
+
+local optionalwhitespace = whitespace^0
+
+patterns.words      = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1)
+patterns.sentences  = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1)
+patterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1)
+
+-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n "
+-- inspect(lpegmatch(patterns.paragraphs,str))
+-- inspect(lpegmatch(patterns.sentences,str))
+-- inspect(lpegmatch(patterns.words,str))