summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorHans Hagen <pragma@wxs.nl>2012-05-30 11:26:00 +0200
committerHans Hagen <pragma@wxs.nl>2012-05-30 11:26:00 +0200
commitc3efc6042c5a5a4d0f1a80bc3a097f0ae2963f7c (patch)
tree6bb75e016db19cd96e3bfd971e2b44abcede2474 /scripts
parent48e2f23903816dbe8843329bfcd3e6658f22b139 (diff)
downloadcontext-c3efc6042c5a5a4d0f1a80bc3a097f0ae2963f7c.tar.gz
beta 2012.05.30 11:26
Diffstat (limited to 'scripts')
-rw-r--r--scripts/context/lua/mtxrun.lua22
-rw-r--r--scripts/context/stubs/mswin/mtxrun.lua22
-rwxr-xr-xscripts/context/stubs/unix/mtxrun22
3 files changed, 60 insertions, 6 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 644c1d9b2..108f2a8a1 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -4087,8 +4087,10 @@ local one = P(1)
local two = C(1) * C(1)
local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1)
+-- actually one of them is already utf ... sort of useless this one
+
local pattern = P("\254\255") * Cs( (
- four / function(a,b,c,d)
+ four / function(a,b,c,d)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
+ P("\255\254") * Cs( (
- four / function(b,a,d,c)
+ four / function(b,a,d,c)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4114,6 +4116,22 @@ function string.toutf(s)
return lpegmatch(pattern,s) or s -- todo: utf32
end
+local validatedutf = Cs (
+ (
+ patterns.utf8one
+ + patterns.utf8two
+ + patterns.utf8three
+ + patterns.utf8four
+ + P(1) / "�"
+ )^0
+)
+
+patterns.validatedutf = validatedutf
+
+function string.validutf(str)
+ return lpegmatch(validatedutf,str)
+end
+
end -- of closure
diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua
index 644c1d9b2..108f2a8a1 100644
--- a/scripts/context/stubs/mswin/mtxrun.lua
+++ b/scripts/context/stubs/mswin/mtxrun.lua
@@ -4087,8 +4087,10 @@ local one = P(1)
local two = C(1) * C(1)
local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1)
+-- actually one of them is already utf ... sort of useless this one
+
local pattern = P("\254\255") * Cs( (
- four / function(a,b,c,d)
+ four / function(a,b,c,d)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
+ P("\255\254") * Cs( (
- four / function(b,a,d,c)
+ four / function(b,a,d,c)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4114,6 +4116,22 @@ function string.toutf(s)
return lpegmatch(pattern,s) or s -- todo: utf32
end
+local validatedutf = Cs (
+ (
+ patterns.utf8one
+ + patterns.utf8two
+ + patterns.utf8three
+ + patterns.utf8four
+ + P(1) / "�"
+ )^0
+)
+
+patterns.validatedutf = validatedutf
+
+function string.validutf(str)
+ return lpegmatch(validatedutf,str)
+end
+
end -- of closure
diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun
index 644c1d9b2..108f2a8a1 100755
--- a/scripts/context/stubs/unix/mtxrun
+++ b/scripts/context/stubs/unix/mtxrun
@@ -4087,8 +4087,10 @@ local one = P(1)
local two = C(1) * C(1)
local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1)
+-- actually one of them is already utf ... sort of useless this one
+
local pattern = P("\254\255") * Cs( (
- four / function(a,b,c,d)
+ four / function(a,b,c,d)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
+ P("\255\254") * Cs( (
- four / function(b,a,d,c)
+ four / function(b,a,d,c)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4114,6 +4116,22 @@ function string.toutf(s)
return lpegmatch(pattern,s) or s -- todo: utf32
end
+local validatedutf = Cs (
+ (
+ patterns.utf8one
+ + patterns.utf8two
+ + patterns.utf8three
+ + patterns.utf8four
+ + P(1) / "�"
+ )^0
+)
+
+patterns.validatedutf = validatedutf
+
+function string.validutf(str)
+ return lpegmatch(validatedutf,str)
+end
+
end -- of closure