diff options
author | Marius <mariausol@gmail.com> | 2012-05-30 12:40:14 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2012-05-30 12:40:14 +0300 |
commit | ad5540007a2ac6504a47289ace109a76ec759a7e (patch) | |
tree | 4a5f42846a6c889f3bd86dbfdb80e46a6820b001 /scripts | |
parent | 250c6bdd3d89cde9d2401172ea0310536eb58db8 (diff) | |
download | context-ad5540007a2ac6504a47289ace109a76ec759a7e.tar.gz |
beta 2012.05.30 11:26
Diffstat (limited to 'scripts')
-rw-r--r-- | scripts/context/lua/mtxrun.lua | 22 | ||||
-rw-r--r-- | scripts/context/stubs/mswin/mtxrun.lua | 22 | ||||
-rw-r--r-- | scripts/context/stubs/unix/mtxrun | 22 |
3 files changed, 60 insertions, 6 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 644c1d9b2..108f2a8a1 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -4087,8 +4087,10 @@ local one = P(1) local two = C(1) * C(1) local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) +-- actually one of them is already utf ... sort of useless this one + local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) + four / function(a,b,c,d) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) + P("\255\254") * Cs( ( - four / function(b,a,d,c) + four / function(b,a,d,c) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4114,6 +4116,22 @@ function string.toutf(s) return lpegmatch(pattern,s) or s -- todo: utf32 end +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + end -- of closure diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index 644c1d9b2..108f2a8a1 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -4087,8 +4087,10 @@ local one = P(1) local two = C(1) * C(1) local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) +-- actually one of them is already utf ... sort of useless this one + local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) + four / function(a,b,c,d) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) + P("\255\254") * Cs( ( - four / function(b,a,d,c) + four / function(b,a,d,c) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4114,6 +4116,22 @@ function string.toutf(s) return lpegmatch(pattern,s) or s -- todo: utf32 end +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + end -- of closure diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index 644c1d9b2..108f2a8a1 100644 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -4087,8 +4087,10 @@ local one = P(1) local two = C(1) * C(1) local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) +-- actually one of them is already utf ... sort of useless this one + local pattern = P("\254\255") * Cs( ( - four / function(a,b,c,d) + four / function(a,b,c,d) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4099,7 +4101,7 @@ local pattern = P("\254\255") * Cs( ( + one )^1 ) + P("\255\254") * Cs( ( - four / function(b,a,d,c) + four / function(b,a,d,c) local ab = 0xFF * byte(a) + byte(b) local cd = 0xFF * byte(c) + byte(d) return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) @@ -4114,6 +4116,22 @@ function string.toutf(s) return lpegmatch(pattern,s) or s -- todo: utf32 end +local validatedutf = Cs ( + ( + patterns.utf8one + + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + + P(1) / "�" + )^0 +) + +patterns.validatedutf = validatedutf + +function string.validutf(str) + return lpegmatch(validatedutf,str) +end + end -- of closure |