summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtxrun.lua
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/context/lua/mtxrun.lua')
-rw-r--r--scripts/context/lua/mtxrun.lua36
1 files changed, 35 insertions, 1 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 29665417e..158d11ecd 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -1350,7 +1350,7 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline)
patterns.utflinesplitter = utflinesplitter
function string.utfsplitlines(str)
- return match(utflinesplitter,str)
+ return match(utflinesplitter,str or "")
end
@@ -3902,6 +3902,40 @@ end
+local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs
+
+local one = P(1)
+local two = C(1) * C(1)
+local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1)
+
+local pattern = P("\254\255") * Cs( (
+ four / function(a,b,c,d)
+ local ab = 0xFF * byte(a) + byte(b)
+ local cd = 0xFF * byte(c) + byte(d)
+ return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
+ end
+ + two / function(a,b)
+ return utfchar(byte(a)*256 + byte(b))
+ end
+ + one
+ )^1 )
+ + P("\255\254") * Cs( (
+ four / function(b,a,d,c)
+ local ab = 0xFF * byte(a) + byte(b)
+ local cd = 0xFF * byte(c) + byte(d)
+ return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000)
+ end
+ + two / function(b,a)
+ return utfchar(byte(a)*256 + byte(b))
+ end
+ + one
+ )^1 )
+
+function string.toutf(s)
+ return lpegmatch(pattern,s) or s -- todo: utf32
+end
+
+
end -- of closure
do -- create closure to overcome 200 locals limit