diff options
author | Khaled Hosny <khaledhosny@eglug.org> | 2010-05-10 17:23:06 +0300 |
---|---|---|
committer | Khaled Hosny <khaledhosny@eglug.org> | 2010-05-10 17:23:06 +0300 |
commit | 7002dfe5556f503320d9bee480e5fb43bdb6e7a3 (patch) | |
tree | 384ee73556657b6818c78b006cfa625776c980cd /luaextra-unicode.lua | |
parent | 20864d5fdb81f248500f2e94c95b3b15569cc277 (diff) | |
download | lualibs-7002dfe5556f503320d9bee480e5fb43bdb6e7a3.tar.gz |
Rename to lualibs
Rename luaextra->lualibs when sensible, also use luatexbase instead of
luatextra.
Diffstat (limited to 'luaextra-unicode.lua')
-rw-r--r-- | luaextra-unicode.lua | 193 |
1 files changed, 0 insertions, 193 deletions
diff --git a/luaextra-unicode.lua b/luaextra-unicode.lua deleted file mode 100644 index 290234d..0000000 --- a/luaextra-unicode.lua +++ /dev/null @@ -1,193 +0,0 @@ -if not modules then modules = { } end modules ['l-unicode'] = { - version = 1.001, - comment = "companion to luat-lib.mkiv", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} - -if not unicode then - - unicode = { utf8 = { } } - - local floor, char = math.floor, string.char - - function unicode.utf8.utfchar(n) - if n < 0x80 then - return char(n) - elseif n < 0x800 then - return char(0xC0 + floor(n/0x40)) .. char(0x80 + (n % 0x40)) - elseif n < 0x10000 then - return char(0xE0 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) - elseif n < 0x40000 then - return char(0xF0 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) - else -- wrong: - -- return char(0xF1 + floor(n/0x1000000)) .. char(0x80 + floor(n/0x40000)) .. char(0x80 + floor(n/0x1000)) .. char(0x80 + (floor(n/0x40) % 0x40)) .. char(0x80 + (n % 0x40)) - return "?" - end - end - -end - -utf = utf or unicode.utf8 - -local concat, utfchar, utfgsub = table.concat, utf.char, utf.gsub -local char, byte, find, bytepairs = string.char, string.byte, string.find, string.bytepairs - --- 0 EF BB BF UTF-8 --- 1 FF FE UTF-16-little-endian --- 2 FE FF UTF-16-big-endian --- 3 FF FE 00 00 UTF-32-little-endian --- 4 00 00 FE FF UTF-32-big-endian - -unicode.utfname = { - [0] = 'utf-8', - [1] = 'utf-16-le', - [2] = 'utf-16-be', - [3] = 'utf-32-le', - [4] = 'utf-32-be' -} - -function unicode.utftype(f) -- \000 fails ! - local str = f:read(4) - if not str then - f:seek('set') - return 0 - elseif find(str,"^%z%z\254\255") then - return 4 - elseif find(str,"^\255\254%z%z") then - return 3 - elseif find(str,"^\254\255") then - f:seek('set',2) - return 2 - elseif find(str,"^\255\254") then - f:seek('set',2) - return 1 - elseif find(str,"^\239\187\191") then - f:seek('set',3) - return 0 - else - f:seek('set') - return 0 - end -end - -function unicode.utf16_to_utf8(str, endian) -- maybe a gsub is faster or an lpeg - local result, tmp, n, m, p = { }, { }, 0, 0, 0 - -- lf | cr | crlf / (cr:13, lf:10) - local function doit() - if n == 10 then - if p ~= 13 then - result[#result+1] = concat(tmp) - tmp = { } - p = 0 - end - elseif n == 13 then - result[#result+1] = concat(tmp) - tmp = { } - p = n - else - tmp[#tmp+1] = utfchar(n) - p = 0 - end - end - for l,r in bytepairs(str) do - if r then - if endian then - n = l*256 + r - else - n = r*256 + l - end - if m > 0 then - n = (m-0xD800)*0x400 + (n-0xDC00) + 0x10000 - m = 0 - doit() - elseif n >= 0xD800 and n <= 0xDBFF then - m = n - else - doit() - end - end - end - if #tmp > 0 then - result[#result+1] = concat(tmp) - end - return result -end - -function unicode.utf32_to_utf8(str, endian) - local result = { } - local tmp, n, m, p = { }, 0, -1, 0 - -- lf | cr | crlf / (cr:13, lf:10) - local function doit() - if n == 10 then - if p ~= 13 then - result[#result+1] = concat(tmp) - tmp = { } - p = 0 - end - elseif n == 13 then - result[#result+1] = concat(tmp) - tmp = { } - p = n - else - tmp[#tmp+1] = utfchar(n) - p = 0 - end - end - for a,b in bytepairs(str) do - if a and b then - if m < 0 then - if endian then - m = a*256*256*256 + b*256*256 - else - m = b*256 + a - end - else - if endian then - n = m + a*256 + b - else - n = m + b*256*256*256 + a*256*256 - end - m = -1 - doit() - end - else - break - end - end - if #tmp > 0 then - result[#result+1] = concat(tmp) - end - return result -end - -local function little(c) - local b = byte(c) -- b = c:byte() - if b < 0x10000 then - return char(b%256,b/256) - else - b = b - 0x10000 - local b1, b2 = b/1024 + 0xD800, b%1024 + 0xDC00 - return char(b1%256,b1/256,b2%256,b2/256) - end -end - -local function big(c) - local b = byte(c) - if b < 0x10000 then - return char(b/256,b%256) - else - b = b - 0x10000 - local b1, b2 = b/1024 + 0xD800, b%1024 + 0xDC00 - return char(b1/256,b1%256,b2/256,b2%256) - end -end - -function unicode.utf8_to_utf16(str,littleendian) - if littleendian then - return char(255,254) .. utfgsub(str,".",little) - else - return char(254,255) .. utfgsub(str,".",big) - end -end |