diff options
author | Marius <mariausol@gmail.com> | 2011-06-08 20:40:24 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2011-06-08 20:40:24 +0300 |
commit | e5fe861660d5cf60cfeb67f7e57f659b309e9613 (patch) | |
tree | f5bc4ea0cd61f18c096f0fd5df8cf439700ba71e /tex/context/base/l-unicode.lua | |
parent | b17f22aa285224dcf5b1dbccd795bc73b9a16426 (diff) | |
download | context-e5fe861660d5cf60cfeb67f7e57f659b309e9613.tar.gz |
beta 2011.06.08 19:06
Diffstat (limited to 'tex/context/base/l-unicode.lua')
-rw-r--r-- | tex/context/base/l-unicode.lua | 19 |
1 files changed, 18 insertions, 1 deletions
diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua index 2fa50282f..0c7b24bd4 100644 --- a/tex/context/base/l-unicode.lua +++ b/tex/context/base/l-unicode.lua @@ -380,12 +380,29 @@ end --~ print(unicode.utfcodes(str)) local lpegmatch = lpeg.match -local utftype = lpeg.patterns.utftype +local patterns = lpeg.patterns +local utftype = patterns.utftype function unicode.filetype(data) return data and lpegmatch(utftype,data) or "unknown" end +local toentities = lpeg.Cs ( + ( + patterns.utf8one + + ( + patterns.utf8two + + patterns.utf8three + + patterns.utf8four + ) / function(s) local b = utfbyte(s) if b < 127 then return s else return format("&#%X;",b) end end + )^0 +) + +patterns.toentities = toentities + +function utf.toentities(str) + return lpegmatch(toentities,str) +end --~ local utfchr = { } -- 60K -> 2.638 M extra mem but currently not called that often (on latin) --~ --~ setmetatable(utfchr, { __index = function(t,k) local v = utfchar(k) t[k] = v return v end } ) |