diff options
Diffstat (limited to 'tex/context/base/l-unicode.lua')
-rw-r--r-- | tex/context/base/l-unicode.lua | 64 |
1 files changed, 46 insertions, 18 deletions
diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua index d0c05bb86..236d3711e 100644 --- a/tex/context/base/l-unicode.lua +++ b/tex/context/base/l-unicode.lua @@ -52,8 +52,8 @@ function unicode.utftype(f) -- \000 fails ! end end -function unicode.utf16_to_utf8(str, endian) - garbagecollector.push() +function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg +--~ garbagecollector.push() local result = { } local tc, uc = table.concat, unicode.utf8.char local tmp, n, m, p = { }, 0, 0, 0 @@ -75,30 +75,32 @@ function unicode.utf16_to_utf8(str, endian) end end for l,r in str:bytepairs() do - if endian then - n = l*256 + r - else - n = r*256 + l - end - if m > 0 then - n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 - m = 0 - doit() - elseif n >= 0xD800 and n <= 0xDBFF then - m = n - else - doit() + if r then + if endian then + n = l*256 + r + else + n = r*256 + l + end + if m > 0 then + n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 + m = 0 + doit() + elseif n >= 0xD800 and n <= 0xDBFF then + m = n + else + doit() + end end end if #tmp > 0 then result[#result+1] = tc(tmp,"") end - garbagecollector.pop() +--~ garbagecollector.pop() return result end function unicode.utf32_to_utf8(str, endian) - garbagecollector.push() +--~ garbagecollector.push() local result = { } local tc, uc = table.concat, unicode.utf8.char local tmp, n, m, p = { }, 0, -1, 0 @@ -143,6 +145,32 @@ function unicode.utf32_to_utf8(str, endian) if #tmp > 0 then result[#result+1] = tc(tmp,"") end - garbagecollector.pop() +--~ garbagecollector.pop() return result end + +function unicode.utf8_to_utf16(str,littleendian) + if littleendian then + return char(255,254) .. utf.gsub(str,".",function(c) + local b = byte(c) + if b < 0x10000 then + return char(b%256,b/256) + else + b = b - 0x10000 + local b1, b2 = b/1024 + 0xD800, b%1024 + 0xDC00 + return char(b1%256,b1/256,b2%256,b2/256) + end + end) + else + return char(254,255) .. utf.gsub(str,".",function(c) + local b = byte(c) + if b < 0x10000 then + return char(b/256,b%256) + else + b = b - 0x10000 + local b1, b2 = b/1024 + 0xD800, b%1024 + 0xDC00 + return char(b1/256,b1%256,b2/256,b2%256) + end + end) + end +end |