From 7b3b116e3777e1dd17ef3d335b2a503a603bc264 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Sat, 21 Jul 2012 13:05:00 +0200 Subject: beta 2012.07.21 13:05 --- scripts/context/lua/mtxrun.lua | 41 +++++++++++++++++++++++++--------- scripts/context/stubs/mswin/mtxrun.lua | 41 +++++++++++++++++++++++++--------- scripts/context/stubs/unix/mtxrun | 41 +++++++++++++++++++++++++--------- 3 files changed, 93 insertions(+), 30 deletions(-) (limited to 'scripts') diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 1e4958e39..07bbcaa91 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -1449,11 +1449,11 @@ function lpeg.keeper(str) end function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(P(1)^0) + return (P(str) + P(true)) * Cs(anything^0) end function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * P(-1))^0) + return Cs((1 - P(str) * endofstring)^0) end -- Just for fun I looked at the used bytecode and @@ -4781,7 +4781,7 @@ if not modules then modules = { } end modules ['util-prs'] = { local lpeg, table, string = lpeg, table, string -local P, R, V, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc +local P, R, V, S, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc local lpegmatch, patterns = lpeg.match, lpeg.patterns local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find local tostring, type, next = tostring, type, next @@ -4794,6 +4794,21 @@ parsers.patterns = parsers.patterns or { } local setmetatableindex = table.setmetatableindex local sortedhash = table.sortedhash +-- we share some patterns + +local space = P(' ') +local equal = P("=") +local comma = P(",") +local lbrace = P("{") +local rbrace = P("}") +local period = S(".") +local punctuation = S(".,:;") +local spacer = patterns.spacer +local whitespace = patterns.whitespace +local newline = patterns.newline +local anything = patterns.anything +local endofstring = patterns.endofstring + -- we could use a Cf Cg construct local escape, left, right = P("\\"), P('{'), P('}') @@ -4803,16 +4818,11 @@ patterns.balanced = P { [2] = left * V(1) * right } -local space = P(' ') -local equal = P("=") -local comma = P(",") -local lbrace = P("{") -local rbrace = P("}") local nobrace = 1 - (lbrace+rbrace) local nested = P { lbrace * (nobrace + V(1))^0 * rbrace } local spaces = space^0 local argument = Cs((lbrace/"") * ((nobrace + nested)^0) * (rbrace/"")) -local content = (1-P(-1))^0 +local content = (1-endofstring)^0 patterns.nested = nested -- no capture patterns.argument = argument -- argument after e.g. = @@ -5014,7 +5024,7 @@ local digit = R("09") local pattern = Cs { "start", start = V("one") + V("two") + V("three"), - rest = (Cc(",") * V("thousand"))^0 * (P(".") + P(-1)) * P(1)^0, + rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0, thousand = digit * digit * digit, one = digit * V("rest"), two = digit * digit * V("rest"), @@ -5029,6 +5039,17 @@ end -- print(parsers.splitthousands("11111111111.11")) +local optionalwhitespace = whitespace^0 + +patterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1) +patterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1) +patterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1) + +-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n " +-- inspect(lpegmatch(patterns.paragraphs,str)) +-- inspect(lpegmatch(patterns.sentences,str)) +-- inspect(lpegmatch(patterns.words,str)) + end -- of closure diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index 1e4958e39..07bbcaa91 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -1449,11 +1449,11 @@ function lpeg.keeper(str) end function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(P(1)^0) + return (P(str) + P(true)) * Cs(anything^0) end function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * P(-1))^0) + return Cs((1 - P(str) * endofstring)^0) end -- Just for fun I looked at the used bytecode and @@ -4781,7 +4781,7 @@ if not modules then modules = { } end modules ['util-prs'] = { local lpeg, table, string = lpeg, table, string -local P, R, V, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc +local P, R, V, S, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc local lpegmatch, patterns = lpeg.match, lpeg.patterns local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find local tostring, type, next = tostring, type, next @@ -4794,6 +4794,21 @@ parsers.patterns = parsers.patterns or { } local setmetatableindex = table.setmetatableindex local sortedhash = table.sortedhash +-- we share some patterns + +local space = P(' ') +local equal = P("=") +local comma = P(",") +local lbrace = P("{") +local rbrace = P("}") +local period = S(".") +local punctuation = S(".,:;") +local spacer = patterns.spacer +local whitespace = patterns.whitespace +local newline = patterns.newline +local anything = patterns.anything +local endofstring = patterns.endofstring + -- we could use a Cf Cg construct local escape, left, right = P("\\"), P('{'), P('}') @@ -4803,16 +4818,11 @@ patterns.balanced = P { [2] = left * V(1) * right } -local space = P(' ') -local equal = P("=") -local comma = P(",") -local lbrace = P("{") -local rbrace = P("}") local nobrace = 1 - (lbrace+rbrace) local nested = P { lbrace * (nobrace + V(1))^0 * rbrace } local spaces = space^0 local argument = Cs((lbrace/"") * ((nobrace + nested)^0) * (rbrace/"")) -local content = (1-P(-1))^0 +local content = (1-endofstring)^0 patterns.nested = nested -- no capture patterns.argument = argument -- argument after e.g. = @@ -5014,7 +5024,7 @@ local digit = R("09") local pattern = Cs { "start", start = V("one") + V("two") + V("three"), - rest = (Cc(",") * V("thousand"))^0 * (P(".") + P(-1)) * P(1)^0, + rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0, thousand = digit * digit * digit, one = digit * V("rest"), two = digit * digit * V("rest"), @@ -5029,6 +5039,17 @@ end -- print(parsers.splitthousands("11111111111.11")) +local optionalwhitespace = whitespace^0 + +patterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1) +patterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1) +patterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1) + +-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n " +-- inspect(lpegmatch(patterns.paragraphs,str)) +-- inspect(lpegmatch(patterns.sentences,str)) +-- inspect(lpegmatch(patterns.words,str)) + end -- of closure diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index 1e4958e39..07bbcaa91 100755 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -1449,11 +1449,11 @@ function lpeg.keeper(str) end function lpeg.frontstripper(str) -- or pattern (yet undocumented) - return (P(str) + P(true)) * Cs(P(1)^0) + return (P(str) + P(true)) * Cs(anything^0) end function lpeg.endstripper(str) -- or pattern (yet undocumented) - return Cs((1 - P(str) * P(-1))^0) + return Cs((1 - P(str) * endofstring)^0) end -- Just for fun I looked at the used bytecode and @@ -4781,7 +4781,7 @@ if not modules then modules = { } end modules ['util-prs'] = { local lpeg, table, string = lpeg, table, string -local P, R, V, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc +local P, R, V, S, C, Ct, Cs, Carg, Cc = lpeg.P, lpeg.R, lpeg.V, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cs, lpeg.Carg, lpeg.Cc local lpegmatch, patterns = lpeg.match, lpeg.patterns local concat, format, gmatch, find = table.concat, string.format, string.gmatch, string.find local tostring, type, next = tostring, type, next @@ -4794,6 +4794,21 @@ parsers.patterns = parsers.patterns or { } local setmetatableindex = table.setmetatableindex local sortedhash = table.sortedhash +-- we share some patterns + +local space = P(' ') +local equal = P("=") +local comma = P(",") +local lbrace = P("{") +local rbrace = P("}") +local period = S(".") +local punctuation = S(".,:;") +local spacer = patterns.spacer +local whitespace = patterns.whitespace +local newline = patterns.newline +local anything = patterns.anything +local endofstring = patterns.endofstring + -- we could use a Cf Cg construct local escape, left, right = P("\\"), P('{'), P('}') @@ -4803,16 +4818,11 @@ patterns.balanced = P { [2] = left * V(1) * right } -local space = P(' ') -local equal = P("=") -local comma = P(",") -local lbrace = P("{") -local rbrace = P("}") local nobrace = 1 - (lbrace+rbrace) local nested = P { lbrace * (nobrace + V(1))^0 * rbrace } local spaces = space^0 local argument = Cs((lbrace/"") * ((nobrace + nested)^0) * (rbrace/"")) -local content = (1-P(-1))^0 +local content = (1-endofstring)^0 patterns.nested = nested -- no capture patterns.argument = argument -- argument after e.g. = @@ -5014,7 +5024,7 @@ local digit = R("09") local pattern = Cs { "start", start = V("one") + V("two") + V("three"), - rest = (Cc(",") * V("thousand"))^0 * (P(".") + P(-1)) * P(1)^0, + rest = (Cc(",") * V("thousand"))^0 * (P(".") + endofstring) * anything^0, thousand = digit * digit * digit, one = digit * V("rest"), two = digit * digit * V("rest"), @@ -5029,6 +5039,17 @@ end -- print(parsers.splitthousands("11111111111.11")) +local optionalwhitespace = whitespace^0 + +patterns.words = Ct((Cs((1-punctuation-whitespace)^1) + anything)^1) +patterns.sentences = Ct((optionalwhitespace * Cs((1-period)^0 * period))^1) +patterns.paragraphs = Ct((optionalwhitespace * Cs((whitespace^1*endofstring/"" + 1 - (spacer^0*newline*newline))^1))^1) + +-- local str = " Word1 word2. \n Word3 word4. \n\n Word5 word6.\n " +-- inspect(lpegmatch(patterns.paragraphs,str)) +-- inspect(lpegmatch(patterns.sentences,str)) +-- inspect(lpegmatch(patterns.words,str)) + end -- of closure -- cgit v1.2.3