diff options
author | Hans Hagen <pragma@wxs.nl> | 2009-10-21 10:28:00 +0200 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2009-10-21 10:28:00 +0200 |
commit | 8248cba2a913b762265cfb43c1cd93d0c36888ef (patch) | |
tree | 52dcce58f312884635c52a7b65ee30a657620fec /tex/context/base/l-lpeg.lua | |
parent | e95ec93b5fe70c7d0e16071a1df8777cb9289d3b (diff) | |
download | context-8248cba2a913b762265cfb43c1cd93d0c36888ef.tar.gz |
beta 2009.10.21 10:28
Diffstat (limited to 'tex/context/base/l-lpeg.lua')
-rw-r--r-- | tex/context/base/l-lpeg.lua | 37 |
1 files changed, 36 insertions, 1 deletions
diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua index 2c95730c4..b2a646fcb 100644 --- a/tex/context/base/l-lpeg.lua +++ b/tex/context/base/l-lpeg.lua @@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } -local P, S, Ct, C, Cs, Cc = lpeg.P, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc +local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc --~ l-lpeg.lua : @@ -112,3 +112,38 @@ end --~ end --~ return p --~ end + +--~ from roberto's site: +--~ +--~ -- decode a two-byte UTF-8 sequence +--~ local function f2 (s) +--~ local c1, c2 = string.byte(s, 1, 2) +--~ return c1 * 64 + c2 - 12416 +--~ end +--~ +--~ -- decode a three-byte UTF-8 sequence +--~ local function f3 (s) +--~ local c1, c2, c3 = string.byte(s, 1, 3) +--~ return (c1 * 64 + c2) * 64 + c3 - 925824 +--~ end +--~ +--~ -- decode a four-byte UTF-8 sequence +--~ local function f4 (s) +--~ local c1, c2, c3, c4 = string.byte(s, 1, 4) +--~ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 +--~ end +--~ +--~ local cont = lpeg.R("\128\191") -- continuation byte +--~ +--~ local utf8 = lpeg.R("\0\127") / string.byte +--~ + lpeg.R("\194\223") * cont / f2 +--~ + lpeg.R("\224\239") * cont * cont / f3 +--~ + lpeg.R("\240\244") * cont * cont * cont / f4 +--~ +--~ local decode_pattern = lpeg.Ct(utf8^0) * -1 + + +local cont = R("\128\191") -- continuation byte + +lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont + |