diff options
Diffstat (limited to 'tex/context/base/l-unicode.lua')
-rw-r--r-- | tex/context/base/l-unicode.lua | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua index f30c32b9a..6295ecb7d 100644 --- a/tex/context/base/l-unicode.lua +++ b/tex/context/base/l-unicode.lua @@ -426,3 +426,40 @@ end --~ end --~ end --~ print(os.clock()-t,collectgarbage("count")*1024-u) + +--~ local byte = string.byte +--~ local utfchar = utf.char +--~ local lpegmatch = lpeg.match, lpeg.P, lpeg.C, lpeg.R, lpeg.Cs + +local P, C, R, Cs = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs + +local one = P(1) +local two = C(1) * C(1) +local four = C(R(utfchar(0xD8),utfchar(0xFF))) * C(1) * C(1) * C(1) + +local pattern = P("\254\255") * Cs( ( + four / function(a,b,c,d) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(a,b) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + + P("\255\254") * Cs( ( + four / function(b,a,d,c) + local ab = 0xFF * byte(a) + byte(b) + local cd = 0xFF * byte(c) + byte(d) + return utfchar((ab-0xD800)*0x400 + (cd-0xDC00) + 0x10000) + end + + two / function(b,a) + return utfchar(byte(a)*256 + byte(b)) + end + + one + )^1 ) + +function string.toutf(s) + return lpegmatch(pattern,s) or s -- todo: utf32 +end |