summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtxrun.lua
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/context/lua/mtxrun.lua')
-rw-r--r--scripts/context/lua/mtxrun.lua55
1 files changed, 53 insertions, 2 deletions
diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 6daa59f9d..00c1fc13c 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -4285,10 +4285,10 @@ if not modules then modules = { } end modules ['l-unicode'] = {
local concat = table.concat
local type = type
-local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct
+local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt
local lpegmatch, patterns = lpeg.match, lpeg.patterns
local utftype = patterns.utftype
-local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format
+local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub
local utfsplitlines = string.utfsplitlines
if not unicode then
@@ -4362,6 +4362,57 @@ if not utf.byte then
end
+if not utf.sub then
+
+ local utf8char = patterns.utf8char
+
+ -- inefficient as lpeg just copies ^n
+
+ -- local function sub(str,start,stop)
+ -- local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1))
+ -- inspect(pattern)
+ -- return lpegmatch(pattern,str) or ""
+ -- end
+
+ local b, e, n, first, last = 0, 0, 0, 0, 0
+
+ local function slide(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ if not last then
+ return nil
+ end
+ end
+ if n == last then
+ e = p
+ return nil
+ else
+ return p
+ end
+ end
+
+ local pattern = Cmt(utf8char,slide)^0
+
+ function utf.sub(str,start,stop) -- todo: from the end
+ if not start then
+ return str
+ end
+ b, e, n, first, last = 0, 0, 0, start, stop
+ lpegmatch(pattern,str)
+ if not stop then
+ return sub(str,b)
+ else
+ return sub(str,b,e)
+ end
+ end
+
+ -- print(utf.sub("Hans Hagen is my name"))
+ -- print(utf.sub("Hans Hagen is my name",5))
+ -- print(utf.sub("Hans Hagen is my name",5,10))
+
+end
+
local utfchar, utfbyte = utf.char, utf.byte
-- As we want to get rid of the (unmaintained) utf library we implement our own