diff options
author | Marius <mariausol@gmail.com> | 2013-10-20 01:20:14 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2013-10-20 01:20:14 +0300 |
commit | 965214d981e6129b782c67adcaf3a81aedcb0bac (patch) | |
tree | 84f5945aae8efc9b6eb1898b873be5453cafe43d /tex/context/base/l-lpeg.lua | |
parent | e7d0d90a434e5452ff9e86c8abab5a4cac35e2f1 (diff) | |
download | context-965214d981e6129b782c67adcaf3a81aedcb0bac.tar.gz |
stable 2013.05.28 00:36
Diffstat (limited to 'tex/context/base/l-lpeg.lua')
-rw-r--r-- | tex/context/base/l-lpeg.lua | 150 |
1 files changed, 40 insertions, 110 deletions
diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua index 399b3ad65..323c73b69 100644 --- a/tex/context/base/l-lpeg.lua +++ b/tex/context/base/l-lpeg.lua @@ -13,19 +13,6 @@ if not modules then modules = { } end modules ['l-lpeg'] = { lpeg = require("lpeg") --- The latest lpeg doesn't have print any more, and even the new ones are not --- available by default (only when debug mode is enabled), which is a pitty as --- as it helps bailign down bottlenecks. Performance seems comparable, although --- --- local p = lpeg.C(lpeg.P(1)^0 * lpeg.P(-1)) --- local a = string.rep("123",10) --- lpeg.match(p,a) --- --- is nearly 20% slower and also still suboptimal (i.e. a match that runs from --- begin to end, one of the cases where string matchers win). - -if not lpeg.print then function lpeg.print(...) print(lpeg.pcode(...)) end end - -- tracing (only used when we encounter a problem in integration of lpeg in luatex) -- some code will move to unicode and string @@ -82,6 +69,7 @@ setinspector(function(v) if lpegtype(v) then lpegprint(v) return true end end) lpeg.patterns = lpeg.patterns or { } -- so that we can share local patterns = lpeg.patterns + local anything = P(1) local endofstring = P(-1) local alwaysmatched = P(true) @@ -91,59 +79,37 @@ patterns.endofstring = endofstring patterns.beginofstring = alwaysmatched patterns.alwaysmatched = alwaysmatched -local sign = S('+-') -local zero = P('0') -local digit = R('09') -local octdigit = R("07") -local lowercase = R("az") -local uppercase = R("AZ") -local underscore = P("_") -local hexdigit = digit + lowercase + uppercase +local digit, sign = R('09'), S('+-') local cr, lf, crlf = P("\r"), P("\n"), P("\r\n") local newline = crlf + S("\r\n") -- cr + lf local escaped = P("\\") * anything local squote = P("'") local dquote = P('"') local space = P(" ") -local period = P(".") -local comma = P(",") - -local utfbom_32_be = P('\000\000\254\255') -- 00 00 FE FF -local utfbom_32_le = P('\255\254\000\000') -- FF FE 00 00 -local utfbom_16_be = P('\254\255') -- FE FF -local utfbom_16_le = P('\255\254') -- FF FE -local utfbom_8 = P('\239\187\191') -- EF BB BF + +local utfbom_32_be = P('\000\000\254\255') +local utfbom_32_le = P('\255\254\000\000') +local utfbom_16_be = P('\255\254') +local utfbom_16_le = P('\254\255') +local utfbom_8 = P('\239\187\191') local utfbom = utfbom_32_be + utfbom_32_le + utfbom_16_be + utfbom_16_le + utfbom_8 local utftype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") + utfbom_8 * Cc("utf-8") + alwaysmatched * Cc("utf-8") -- assume utf8 -local utfstricttype = utfbom_32_be * Cc("utf-32-be") + utfbom_32_le * Cc("utf-32-le") - + utfbom_16_be * Cc("utf-16-be") + utfbom_16_le * Cc("utf-16-le") - + utfbom_8 * Cc("utf-8") local utfoffset = utfbom_32_be * Cc(4) + utfbom_32_le * Cc(4) + utfbom_16_be * Cc(2) + utfbom_16_le * Cc(2) + utfbom_8 * Cc(3) + Cc(0) local utf8next = R("\128\191") -patterns.utfbom_32_be = utfbom_32_be -patterns.utfbom_32_le = utfbom_32_le -patterns.utfbom_16_be = utfbom_16_be -patterns.utfbom_16_le = utfbom_16_le -patterns.utfbom_8 = utfbom_8 - -patterns.utf_16_be_nl = P("\000\r\000\n") + P("\000\r") + P("\000\n") -patterns.utf_16_le_nl = P("\r\000\n\000") + P("\r\000") + P("\n\000") - patterns.utf8one = R("\000\127") patterns.utf8two = R("\194\223") * utf8next patterns.utf8three = R("\224\239") * utf8next * utf8next patterns.utf8four = R("\240\244") * utf8next * utf8next * utf8next patterns.utfbom = utfbom patterns.utftype = utftype -patterns.utfstricttype = utfstricttype patterns.utfoffset = utfoffset local utf8char = patterns.utf8one + patterns.utf8two + patterns.utf8three + patterns.utf8four @@ -171,14 +137,29 @@ patterns.nonwhitespace = nonwhitespace local stripper = spacer^0 * C((spacer^0 * nonspacer^1)^0) -- from example by roberto ------ collapser = Cs(spacer^0/"" * ((spacer^1 * endofstring / "") + (spacer^1/" ") + P(1))^0) +----- collapser = Cs(spacer^0/"" * ((spacer^1 * P(-1) / "") + (spacer^1/" ") + P(1))^0) local collapser = Cs(spacer^0/"" * nonspacer^0 * ((spacer^0/" " * nonspacer^1)^0)) patterns.stripper = stripper patterns.collapser = collapser -patterns.lowercase = lowercase -patterns.uppercase = uppercase +patterns.digit = digit +patterns.sign = sign +patterns.cardinal = sign^0 * digit^1 +patterns.integer = sign^0 * digit^1 +patterns.unsigned = digit^0 * P('.') * digit^1 +patterns.float = sign^0 * patterns.unsigned +patterns.cunsigned = digit^0 * P(',') * digit^1 +patterns.cfloat = sign^0 * patterns.cunsigned +patterns.number = patterns.float + patterns.integer +patterns.cnumber = patterns.cfloat + patterns.integer +patterns.oct = P("0") * R("07")^1 +patterns.octal = patterns.oct +patterns.HEX = P("0x") * R("09","AF")^1 +patterns.hex = P("0x") * R("09","af")^1 +patterns.hexadecimal = P("0x") * R("09","AF","af")^1 +patterns.lowercase = R("az") +patterns.uppercase = R("AZ") patterns.letter = patterns.lowercase + patterns.uppercase patterns.space = space patterns.tab = P("\t") @@ -186,12 +167,12 @@ patterns.spaceortab = patterns.space + patterns.tab patterns.newline = newline patterns.emptyline = newline^1 patterns.equal = P("=") -patterns.comma = comma -patterns.commaspacer = comma * spacer^0 -patterns.period = period +patterns.comma = P(",") +patterns.commaspacer = P(",") * spacer^0 +patterns.period = P(".") patterns.colon = P(":") patterns.semicolon = P(";") -patterns.underscore = underscore +patterns.underscore = P("_") patterns.escaped = escaped patterns.squote = squote patterns.dquote = dquote @@ -206,38 +187,12 @@ patterns.singlequoted = squote * patterns.nosquote * squote patterns.doublequoted = dquote * patterns.nodquote * dquote patterns.quoted = patterns.doublequoted + patterns.singlequoted -patterns.digit = digit -patterns.octdigit = octdigit -patterns.hexdigit = hexdigit -patterns.sign = sign -patterns.cardinal = digit^1 -patterns.integer = sign^-1 * digit^1 -patterns.unsigned = digit^0 * period * digit^1 -patterns.float = sign^-1 * patterns.unsigned -patterns.cunsigned = digit^0 * comma * digit^1 -patterns.cfloat = sign^-1 * patterns.cunsigned -patterns.number = patterns.float + patterns.integer -patterns.cnumber = patterns.cfloat + patterns.integer -patterns.oct = zero * octdigit^1 -patterns.octal = patterns.oct -patterns.HEX = zero * P("X") * (digit+uppercase)^1 -patterns.hex = zero * P("x") * (digit+lowercase)^1 -patterns.hexadecimal = zero * S("xX") * hexdigit^1 - -patterns.hexafloat = sign^-1 - * zero * S("xX") - * (hexdigit^0 * period * hexdigit^1 + hexdigit^1 * period * hexdigit^0 + hexdigit^1) - * (S("pP") * sign^-1 * hexdigit^1)^-1 -patterns.decafloat = sign^-1 - * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1) - * S("eE") * sign^-1 * digit^1 - -patterns.propername = (uppercase + lowercase + underscore) * (uppercase + lowercase + underscore + digit)^0 * endofstring +patterns.propername = R("AZ","az","__") * R("09","AZ","az", "__")^0 * P(-1) patterns.somecontent = (anything - newline - space)^1 -- (utf8char - newline - space)^1 patterns.beginline = #(1-newline) -patterns.longtostring = Cs(whitespace^0/"" * ((patterns.quoted + nonwhitespace^1 + whitespace^1/"" * (P(-1) + Cc(" ")))^0)) +patterns.longtostring = Cs(whitespace^0/"" * nonwhitespace^0 * ((whitespace^0/" " * (patterns.quoted + nonwhitespace)^1)^0)) local function anywhere(pattern) --slightly adapted from website return P { P(pattern) + 1 * V(1) } @@ -466,10 +421,7 @@ function lpeg.replacer(one,two,makefunction,isutf) -- in principle we should sor end end --- local pattern1 = P(1-P(pattern))^0 * P(pattern) : test for not nil --- local pattern2 = (P(pattern) * Cc(true) + P(1))^0 : test for true (could be faster, but not much) - -function lpeg.finder(lst,makefunction) -- beware: slower than find with 'patternless finds' +function lpeg.finder(lst,makefunction) local pattern if type(lst) == "table" then pattern = P(false) @@ -504,8 +456,8 @@ local splitters_f, splitters_s = { }, { } function lpeg.firstofsplit(separator) -- always return value local splitter = splitters_f[separator] if not splitter then - local pattern = P(separator) - splitter = C((1 - pattern)^0) + separator = P(separator) + splitter = C((1 - separator)^0) splitters_f[separator] = splitter end return splitter @@ -514,35 +466,13 @@ end function lpeg.secondofsplit(separator) -- nil if not split local splitter = splitters_s[separator] if not splitter then - local pattern = P(separator) - splitter = (1 - pattern)^0 * pattern * C(anything^0) - splitters_s[separator] = splitter - end - return splitter -end - -local splitters_s, splitters_p = { }, { } - -function lpeg.beforesuffix(separator) -- nil if nothing but empty is ok - local splitter = splitters_s[separator] - if not splitter then - local pattern = P(separator) - splitter = C((1 - pattern)^0) * pattern * endofstring + separator = P(separator) + splitter = (1 - separator)^0 * separator * C(anything^0) splitters_s[separator] = splitter end return splitter end -function lpeg.afterprefix(separator) -- nil if nothing but empty is ok - local splitter = splitters_p[separator] - if not splitter then - local pattern = P(separator) - splitter = pattern * C(anything^0) - splitters_p[separator] = splitter - end - return splitter -end - function lpeg.balancer(left,right) left, right = P(left), P(right) return P { left * ((1 - left - right) + V(1))^0 * right } @@ -902,9 +832,9 @@ end -- moved here (before util-str) ------ digit = R("09") ------ period = P(".") ------ zero = P("0") +local digit = R("09") +local period = P(".") +local zero = P("0") local trailingzeros = zero^0 * -digit -- suggested by Roberto R local case_1 = period * trailingzeros / "" local case_2 = period * (digit - trailingzeros)^1 * (trailingzeros / "") |