summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/context/lua/mtxrun.lua22
-rw-r--r--scripts/context/stubs/mswin/mtxrun.lua22
-rw-r--r--scripts/context/stubs/unix/mtxrun22
3 files changed, 60 insertions, 6 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 644c1d9b2..108f2a8a1 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -4087,8 +4087,10 @@ local one = P(1)
local two = C(1) * C(1)
local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1)
+-- actually one of them is already utf ... sort of useless this one
+
local pattern = P("\254\255") * Cs( (
- four / function(a,b,c,d)
+ four / function(a,b,c,d)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
+ P("\255\254") * Cs( (
- four / function(b,a,d,c)
+ four / function(b,a,d,c)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4114,6 +4116,22 @@ function string.toutf(s)
return lpegmatch(pattern,s) or s -- todo: utf32
end
+local validatedutf = Cs (
+ (
+ patterns.utf8one
+ + patterns.utf8two
+ + patterns.utf8three
+ + patterns.utf8four
+ + P(1) / "�"
+ )^0
+)
+
+patterns.validatedutf = validatedutf
+
+function string.validutf(str)
+ return lpegmatch(validatedutf,str)
+end
+
end -- of closure
diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua
index 644c1d9b2..108f2a8a1 100644
--- a/scripts/context/stubs/mswin/mtxrun.lua
+++ b/scripts/context/stubs/mswin/mtxrun.lua
@@ -4087,8 +4087,10 @@ local one = P(1)
local two = C(1) * C(1)
local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1)
+-- actually one of them is already utf ... sort of useless this one
+
local pattern = P("\254\255") * Cs( (
- four / function(a,b,c,d)
+ four / function(a,b,c,d)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
+ P("\255\254") * Cs( (
- four / function(b,a,d,c)
+ four / function(b,a,d,c)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4114,6 +4116,22 @@ function string.toutf(s)
return lpegmatch(pattern,s) or s -- todo: utf32
end
+local validatedutf = Cs (
+ (
+ patterns.utf8one
+ + patterns.utf8two
+ + patterns.utf8three
+ + patterns.utf8four
+ + P(1) / "�"
+ )^0
+)
+
+patterns.validatedutf = validatedutf
+
+function string.validutf(str)
+ return lpegmatch(validatedutf,str)
+end
+
end -- of closure
diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun
index 644c1d9b2..108f2a8a1 100644
--- a/scripts/context/stubs/unix/mtxrun
+++ b/scripts/context/stubs/unix/mtxrun
@@ -4087,8 +4087,10 @@ local one = P(1)
local two = C(1) * C(1)
local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1)
+-- actually one of them is already utf ... sort of useless this one
+
local pattern = P("\254\255") * Cs( (
- four / function(a,b,c,d)
+ four / function(a,b,c,d)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( (
+ one
)^1 )
+ P("\255\254") * Cs( (
- four / function(b,a,d,c)
+ four / function(b,a,d,c)
local ab = 0xFF * byte(a) + byte(b)
local cd = 0xFF * byte(c) + byte(d)
return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
@@ -4114,6 +4116,22 @@ function string.toutf(s)
return lpegmatch(pattern,s) or s -- todo: utf32
end
+local validatedutf = Cs (
+ (
+ patterns.utf8one
+ + patterns.utf8two
+ + patterns.utf8three
+ + patterns.utf8four
+ + P(1) / "�"
+ )^0
+)
+
+patterns.validatedutf = validatedutf
+
+function string.validutf(str)
+ return lpegmatch(validatedutf,str)
+end
+
end -- of closure