diff options
author | Hans Hagen <pragma@wxs.nl> | 2012-05-30 11:26:00 +0200 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2012-05-30 11:26:00 +0200 |
commit | c3efc6042c5a5a4d0f1a80bc3a097f0ae2963f7c (patch) | |
tree | 6bb75e016db19cd96e3bfd971e2b44abcede2474 /scripts | |
parent | 48e2f23903816dbe8843329bfcd3e6658f22b139 (diff) | |
download | context-c3efc6042c5a5a4d0f1a80bc3a097f0ae2963f7c.tar.gz |
beta 2012.05.30 11:26
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/context/lua/mtxrun.lua | 22 | ||||
-rw-r--r-- | scripts/context/stubs/mswin/mtxrun.lua | 22 | ||||
-rwxr-xr-x | scripts/context/stubs/unix/mtxrun | 22 |
3 files changed, 60 insertions, 6 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 644c1d9b2..108f2a8a1 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -4087,8 +4087,10 @@ local one = P(1) local two = C(1) * C(1) local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) +-- actually one of them is already utf ... sort of useless this one + local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) + four / function(a,b,c,d) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) + P("\255\254") * Cs( ( - four / function(b,a,d,c) + four / function(b,a,d,c) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4114,6 +4116,22 @@ function string.toutf(s) return lpegmatch(pattern,s) or s -- todo: utf32 end +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + end -- of closure diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index 644c1d9b2..108f2a8a1 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -4087,8 +4087,10 @@ local one = P(1) local two = C(1) * C(1) local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) +-- actually one of them is already utf ... sort of useless this one + local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) + four / function(a,b,c,d) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) + P("\255\254") * Cs( ( - four / function(b,a,d,c) + four / function(b,a,d,c) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4114,6 +4116,22 @@ function string.toutf(s) return lpegmatch(pattern,s) or s -- todo: utf32 end +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + end -- of closure diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index 644c1d9b2..108f2a8a1 100755 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -4087,8 +4087,10 @@ local one = P(1) local two = C(1) * C(1) local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) +-- actually one of them is already utf ... sort of useless this one + local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) + four / function(a,b,c,d) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) + P("\255\254") * Cs( ( - four / function(b,a,d,c) + four / function(b,a,d,c) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4114,6 +4116,22 @@ function string.toutf(s) return lpegmatch(pattern,s) or s -- todo: utf32 end +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + end -- of closure |