summaryrefslogtreecommitdiff
path: root/lualibs-unicode.lua
diff options
context:
space:
mode:
authorPhilipp Gesang <phg42.2a@gmail.com>2013-09-21 04:44:56 -0700
committerPhilipp Gesang <phg42.2a@gmail.com>2013-09-21 04:44:56 -0700
commitbc287b4f6bb89c78ec57e0e0871fa054075efdf1 (patch)
tree7e636bed27130e0b4846a7d41dfcf50e12a83492 /lualibs-unicode.lua
parent49880e8803d8f813aebcb15a5ecb6dc70d0caa17 (diff)
parentd96d015887e6fcbe1ca8defcec44cb794421785d (diff)
downloadlualibs-bc287b4f6bb89c78ec57e0e0871fa054075efdf1.tar.gz
Merge pull request #19 from phi-gamma/master
sync with Context as of 2013-09-21
Diffstat (limited to 'lualibs-unicode.lua')
-rw-r--r--lualibs-unicode.lua31
1 files changed, 24 insertions, 7 deletions
diff --git a/lualibs-unicode.lua b/lualibs-unicode.lua
index 3ce5bd3..7ada394 100644
--- a/lualibs-unicode.lua
+++ b/lualibs-unicode.lua
@@ -38,13 +38,14 @@ local replacer = lpeg.replacer
local utfvalues = utf.values
local utfgmatch = utf.gmatch -- not always present
-local p_utftype = patterns.utftype
-local p_utfoffset = patterns.utfoffset
-local p_utf8char = patterns.utf8char
-local p_utf8byte = patterns.utf8byte
-local p_utfbom = patterns.utfbom
-local p_newline = patterns.newline
-local p_whitespace = patterns.whitespace
+local p_utftype = patterns.utftype
+local p_utfstricttype = patterns.utfstricttype
+local p_utfoffset = patterns.utfoffset
+local p_utf8char = patterns.utf8char
+local p_utf8byte = patterns.utf8byte
+local p_utfbom = patterns.utfbom
+local p_newline = patterns.newline
+local p_whitespace = patterns.whitespace
if not unicode then
@@ -976,6 +977,22 @@ function utf.xstring(s)
return format("0x%05X",type(s) == "number" and s or utfbyte(s))
end
+function utf.toeight(str)
+ if not str then
+ return nil
+ end
+ local utftype = lpegmatch(p_utfstricttype,str)
+ if utftype == "utf-8" then
+ return sub(str,4)
+ elseif utftype == "utf-16-le" then
+ return utf16_to_utf8_le(str)
+ elseif utftype == "utf-16-be" then
+ return utf16_to_utf8_ne(str)
+ else
+ return str
+ end
+end
+
--
local p_nany = p_utf8char / ""