From a82cbf2bce00df54f59d1b81805e8b40029b93c6 Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Sat, 14 Dec 2013 14:31:43 +0100 Subject: sync with Context as of 2013-12-14 --- lualibs-unicode.lua | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'lualibs-unicode.lua') diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua index 7ada394..902f6a0 100644 --- a/lualibs-unicode.lua +++ b/lualibs-unicode.lua @@ -935,19 +935,27 @@ end local _, l_remap = utf.remapper(little) local _, b_remap = utf.remapper(big) -function utf.utf8_to_utf16_be(str) - return char(254,255) .. lpegmatch(b_remap,str) +function utf.utf8_to_utf16_be(str,nobom) + if nobom then + return lpegmatch(b_remap,str) + else + return char(254,255) .. lpegmatch(b_remap,str) + end end -function utf.utf8_to_utf16_le(str) - return char(255,254) .. lpegmatch(l_remap,str) +function utf.utf8_to_utf16_le(str,nobom) + if nobom then + return lpegmatch(l_remap,str) + else + return char(255,254) .. lpegmatch(l_remap,str) + end end -function utf.utf8_to_utf16(str,littleendian) +function utf.utf8_to_utf16(str,littleendian,nobom) if littleendian then - return utf.utf8_to_utf16_le(str) + return utf.utf8_to_utf16_le(str,nobom) else - return utf.utf8_to_utf16_be(str) + return utf.utf8_to_utf16_be(str,nobom) end end -- cgit v1.2.3 From 8823c59ae9a86ecd8b765d5f936be894582777dc Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Wed, 25 Dec 2013 22:32:54 +0100 Subject: sync with Context as of 2013-12-25 --- lualibs-unicode.lua | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'lualibs-unicode.lua') diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua index 902f6a0..6601a4c 100644 --- a/lualibs-unicode.lua +++ b/lualibs-unicode.lua @@ -9,7 +9,6 @@ if not modules then modules = { } end modules ['l-unicode'] = { -- this module will be reorganized -- todo: utf.sub replacement (used in syst-aux) - -- we put these in the utf namespace: utf = utf or (unicode and unicode.utf8) or { } @@ -1130,3 +1129,13 @@ if not utf.values then string.utfvalues = utf.values end + +function utf.chrlen(u) -- u is number + return + (u < 0x80 and 1) or + (u < 0xE0 and 2) or + (u < 0xF0 and 3) or + (u < 0xF8 and 4) or + (u < 0xFC and 5) or + (u < 0xFE and 6) or 0 +end -- cgit v1.2.3