From 8248cba2a913b762265cfb43c1cd93d0c36888ef Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Wed, 21 Oct 2009 10:28:00 +0200 Subject: beta 2009.10.21 10:28 --- scripts/context/lua/mtxrun.lua | 37 +++++++++++++++++++++++++++++++++- scripts/context/stubs/mswin/mtxrun.lua | 37 +++++++++++++++++++++++++++++++++- scripts/context/stubs/unix/mtxrun | 37 +++++++++++++++++++++++++++++++++- 3 files changed, 108 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index c63727452..cbb27098d 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -317,7 +317,7 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } -local P, S, Ct, C, Cs, Cc = lpeg.P, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc +local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc --~ l-lpeg.lua : @@ -424,6 +424,41 @@ end --~ return p --~ end +--~ from roberto's site: +--~ +--~ -- decode a two-byte UTF-8 sequence +--~ local function f2 (s) +--~ local c1, c2 = string.byte(s, 1, 2) +--~ return c1 * 64 + c2 - 12416 +--~ end +--~ +--~ -- decode a three-byte UTF-8 sequence +--~ local function f3 (s) +--~ local c1, c2, c3 = string.byte(s, 1, 3) +--~ return (c1 * 64 + c2) * 64 + c3 - 925824 +--~ end +--~ +--~ -- decode a four-byte UTF-8 sequence +--~ local function f4 (s) +--~ local c1, c2, c3, c4 = string.byte(s, 1, 4) +--~ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 +--~ end +--~ +--~ local cont = lpeg.R("\128\191") -- continuation byte +--~ +--~ local utf8 = lpeg.R("\0\127") / string.byte +--~ + lpeg.R("\194\223") * cont / f2 +--~ + lpeg.R("\224\239") * cont * cont / f3 +--~ + lpeg.R("\240\244") * cont * cont * cont / f4 +--~ +--~ local decode_pattern = lpeg.Ct(utf8^0) * -1 + + +local cont = R("\128\191") -- continuation byte + +lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont + + end -- of closure diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index c63727452..cbb27098d 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -317,7 +317,7 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } -local P, S, Ct, C, Cs, Cc = lpeg.P, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc +local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc --~ l-lpeg.lua : @@ -424,6 +424,41 @@ end --~ return p --~ end +--~ from roberto's site: +--~ +--~ -- decode a two-byte UTF-8 sequence +--~ local function f2 (s) +--~ local c1, c2 = string.byte(s, 1, 2) +--~ return c1 * 64 + c2 - 12416 +--~ end +--~ +--~ -- decode a three-byte UTF-8 sequence +--~ local function f3 (s) +--~ local c1, c2, c3 = string.byte(s, 1, 3) +--~ return (c1 * 64 + c2) * 64 + c3 - 925824 +--~ end +--~ +--~ -- decode a four-byte UTF-8 sequence +--~ local function f4 (s) +--~ local c1, c2, c3, c4 = string.byte(s, 1, 4) +--~ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 +--~ end +--~ +--~ local cont = lpeg.R("\128\191") -- continuation byte +--~ +--~ local utf8 = lpeg.R("\0\127") / string.byte +--~ + lpeg.R("\194\223") * cont / f2 +--~ + lpeg.R("\224\239") * cont * cont / f3 +--~ + lpeg.R("\240\244") * cont * cont * cont / f4 +--~ +--~ local decode_pattern = lpeg.Ct(utf8^0) * -1 + + +local cont = R("\128\191") -- continuation byte + +lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont + + end -- of closure diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index c63727452..cbb27098d 100755 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -317,7 +317,7 @@ if not modules then modules = { } end modules ['l-lpeg'] = { license = "see context related readme files" } -local P, S, Ct, C, Cs, Cc = lpeg.P, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc +local P, R, S, Ct, C, Cs, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.Ct, lpeg.C, lpeg.Cs, lpeg.Cc --~ l-lpeg.lua : @@ -424,6 +424,41 @@ end --~ return p --~ end +--~ from roberto's site: +--~ +--~ -- decode a two-byte UTF-8 sequence +--~ local function f2 (s) +--~ local c1, c2 = string.byte(s, 1, 2) +--~ return c1 * 64 + c2 - 12416 +--~ end +--~ +--~ -- decode a three-byte UTF-8 sequence +--~ local function f3 (s) +--~ local c1, c2, c3 = string.byte(s, 1, 3) +--~ return (c1 * 64 + c2) * 64 + c3 - 925824 +--~ end +--~ +--~ -- decode a four-byte UTF-8 sequence +--~ local function f4 (s) +--~ local c1, c2, c3, c4 = string.byte(s, 1, 4) +--~ return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 +--~ end +--~ +--~ local cont = lpeg.R("\128\191") -- continuation byte +--~ +--~ local utf8 = lpeg.R("\0\127") / string.byte +--~ + lpeg.R("\194\223") * cont / f2 +--~ + lpeg.R("\224\239") * cont * cont / f3 +--~ + lpeg.R("\240\244") * cont * cont * cont / f4 +--~ +--~ local decode_pattern = lpeg.Ct(utf8^0) * -1 + + +local cont = R("\128\191") -- continuation byte + +lpeg.utf8 = R("\0\127") + R("\194\223") * cont + R("\224\239") * cont * cont + R("\240\244") * cont * cont * cont + + end -- of closure -- cgit v1.2.3