diff options
Diffstat (limited to 'scripts/context/lua/mtxrun.lua')
-rw-r--r-- | scripts/context/lua/mtxrun.lua | 36 |
1 files changed, 35 insertions, 1 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index 29665417e..158d11ecd 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -1350,7 +1350,7 @@ local utflinesplitter = utfbom^-1 * tsplitat(newline) patterns.utflinesplitter = utflinesplitter function string.utfsplitlines(str) - return match(utflinesplitter,str) + return match(utflinesplitter,str or "") end @@ -3902,6 +3902,40 @@ end +local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end + + end -- of closure do -- create closure to overcome 200 locals limit |