diff options
Diffstat (limited to 'tex/context/base/l-unicode.lua')
-rw-r--r-- | tex/context/base/l-unicode.lua | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua index 7c452ef8f..de835a7d8 100644 --- a/tex/context/base/l-unicode.lua +++ b/tex/context/base/l-unicode.lua @@ -317,6 +317,11 @@ if not utf.len then return n end + -- -- these are quite a bit slower: + + -- utfcharcounter = utfbom^-1 * (Cmt(P(1) * R("\128\191")^0, function() n = n + 1 return true end))^0 -- 50+ times slower + -- utfcharcounter = utfbom^-1 * (Cmt(P(1), function() n = n + 1 return true end) * R("\128\191")^0)^0 -- 50- times slower + end utf.length = utf.len @@ -845,3 +850,45 @@ else end -- maybe also register as string.utf* + + +if not utf.characters then + + -- New: this gmatch hack is taken from the Lua 5.2 book. It's about two times slower + -- than the built-in string.utfcharacters. + + function utf.characters(str) + return gmatch(str,".[\128-\191]*") + end + + string.utfcharacters = utf.characters + +end + +if not utf.values then + + -- So, a logical next step is to check for the values variant. It over five times + -- slower than the built-in string.utfvalues. I optimized it a bit for n=0,1. + + local wrap, yield, gmatch = coroutine.wrap, coroutine.yield, string.gmatch + + local dummy = function() + -- we share this one + end + + function utf.values(str) + local n = #str + if n == 0 then + return wrap(dummy) + elseif n == 1 then + return wrap(function() yield(utfbyte(str)) end) + else + return wrap(function() for s in gmatch(str,".[\128-\191]*") do + yield(utfbyte(s)) + end end) + end + end + + string.utfvalues = utf.values + +end |