From 8248cba2a913b762265cfb43c1cd93d0c36888ef Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Wed, 21 Oct 2009 10:28:00 +0200 Subject: beta 2009.10.21 10:28 --- tex/generic/context/luatex-fonts-merged.lua | 39 +++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'tex/generic') diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua index fec779d9e..6c9a341dc 100644 --- a/tex/generic/context/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : c:/data/develop/context/texmf/tex/generic/context/luatex-fonts-merged.lua -- parent file : c:/data/develop/context/texmf/tex/generic/context/luatex-fonts.lua --- merge date : 10/19/09 14:48:40 +-- merge date : 10/21/09 10:32:02 do -- begin closure to overcome local limits and interference @@ -274,7 +274,7 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } -local P, S, Ct, C, Cs, Cc = lpeg.P, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc +local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc --~ l-lpeg.lua : @@ -381,6 +381,41 @@ end --~ return p --~ end +--~ from roberto's site: +--~ +--~ -- decode a two-byte UTF-8 sequence +--~ local function f2 (s) +--~ local c1, c2 = string.byte(s, 1, 2) +--~ return c1 * 64 + c2 - 12416 +--~ end +--~ +--~ -- decode a three-byte UTF-8 sequence +--~ local function f3 (s) +--~ local c1, c2, c3 = string.byte(s, 1, 3) +--~ return (c1 * 64 + c2) * 64 + c3 - 925824 +--~ end +--~ +--~ -- decode a four-byte UTF-8 sequence +--~ local function f4 (s) +--~ local c1, c2, c3, c4 = string.byte(s, 1, 4) +--~ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 +--~ end +--~ +--~ local cont = lpeg.R("\128\191") -- continuation byte +--~ +--~ local utf8 = lpeg.R("\0\127") / string.byte +--~ + lpeg.R("\194\223") * cont / f2 +--~ + lpeg.R("\224\239") * cont * cont / f3 +--~ + lpeg.R("\240\244") * cont * cont * cont / f4 +--~ +--~ local decode_pattern = lpeg.Ct(utf8^0) * -1 + + +local cont = R("\128\191") -- continuation byte + +lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont + + end -- closure do -- begin closure to overcome local limits and interference -- cgit v1.2.3