From 8248cba2a913b762265cfb43c1cd93d0c36888ef Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Wed, 21 Oct 2009 10:28:00 +0200 Subject: beta 2009.10.21 10:28 --- tex/context/base/l-lpeg.lua | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'tex/context/base/l-lpeg.lua') diff --git a/tex/context/base/l-lpeg.lua b/tex/context/base/l-lpeg.lua index 2c95730c4..b2a646fcb 100644 --- a/tex/context/base/l-lpeg.lua +++ b/tex/context/base/l-lpeg.lua @@ -6,7 +6,7 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } -local P, S, Ct, C, Cs, Cc = lpeg.P, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc +local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc --~ l-lpeg.lua : @@ -112,3 +112,38 @@ end --~ end --~ return p --~ end + +--~ from roberto's site: +--~ +--~ -- decode a two-byte UTF-8 sequence +--~ local function f2 (s) +--~ local c1, c2 = string.byte(s, 1, 2) +--~ return c1 * 64 + c2 - 12416 +--~ end +--~ +--~ -- decode a three-byte UTF-8 sequence +--~ local function f3 (s) +--~ local c1, c2, c3 = string.byte(s, 1, 3) +--~ return (c1 * 64 + c2) * 64 + c3 - 925824 +--~ end +--~ +--~ -- decode a four-byte UTF-8 sequence +--~ local function f4 (s) +--~ local c1, c2, c3, c4 = string.byte(s, 1, 4) +--~ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 +--~ end +--~ +--~ local cont = lpeg.R("\128\191") -- continuation byte +--~ +--~ local utf8 = lpeg.R("\0\127") / string.byte +--~ + lpeg.R("\194\223") * cont / f2 +--~ + lpeg.R("\224\239") * cont * cont / f3 +--~ + lpeg.R("\240\244") * cont * cont * cont / f4 +--~ +--~ local decode_pattern = lpeg.Ct(utf8^0) * -1 + + +local cont = R("\128\191") -- continuation byte + +lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont + -- cgit v1.2.3