summaryrefslogtreecommitdiff
path: root/tex/context/base/l-unicode.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/l-unicode.lua')
-rw-r--r--tex/context/base/l-unicode.lua47
1 files changed, 47 insertions, 0 deletions
diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua
index 7c452ef8f..de835a7d8 100644
--- a/tex/context/base/l-unicode.lua
+++ b/tex/context/base/l-unicode.lua
@@ -317,6 +317,11 @@ if not utf.len then
return n
end
+ -- -- these are quite a bit slower:
+
+ -- utfcharcounter = utfbom^-1 * (Cmt(P(1) * R("\128\191")^0, function() n = n + 1 return true end))^0 -- 50+ times slower
+ -- utfcharcounter = utfbom^-1 * (Cmt(P(1), function() n = n + 1 return true end) * R("\128\191")^0)^0 -- 50- times slower
+
end
utf.length = utf.len
@@ -845,3 +850,45 @@ else
end
-- maybe also register as string.utf*
+
+
+if not utf.characters then
+
+ -- New: this gmatch hack is taken from the Lua 5.2 book. It's about two times slower
+ -- than the built-in string.utfcharacters.
+
+ function utf.characters(str)
+ return gmatch(str,".[\128-\191]*")
+ end
+
+ string.utfcharacters = utf.characters
+
+end
+
+if not utf.values then
+
+ -- So, a logical next step is to check for the values variant. It over five times
+ -- slower than the built-in string.utfvalues. I optimized it a bit for n=0,1.
+
+ local wrap, yield, gmatch = coroutine.wrap, coroutine.yield, string.gmatch
+
+ local dummy = function()
+ -- we share this one
+ end
+
+ function utf.values(str)
+ local n = #str
+ if n == 0 then
+ return wrap(dummy)
+ elseif n == 1 then
+ return wrap(function() yield(utfbyte(str)) end)
+ else
+ return wrap(function() for s in gmatch(str,".[\128-\191]*") do
+ yield(utfbyte(s))
+ end end)
+ end
+ end
+
+ string.utfvalues = utf.values
+
+end