summaryrefslogtreecommitdiff
path: root/tex/context/base/l-unicode.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/l-unicode.lua')
-rw-r--r--tex/context/base/l-unicode.lua55
1 files changed, 53 insertions, 2 deletions
diff --git a/tex/context/base/l-unicode.lua b/tex/context/base/l-unicode.lua
index 630c34960..7fd380b88 100644
--- a/tex/context/base/l-unicode.lua
+++ b/tex/context/base/l-unicode.lua
@@ -12,10 +12,10 @@ if not modules then modules = { } end modules ['l-unicode'] = {
local concat = table.concat
local type = type
-local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct
+local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt
local lpegmatch, patterns = lpeg.match, lpeg.patterns
local utftype = patterns.utftype
-local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format
+local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub
local utfsplitlines = string.utfsplitlines
if not unicode then
@@ -89,6 +89,57 @@ if not utf.byte then
end
+if not utf.sub then
+
+ local utf8char = patterns.utf8char
+
+ -- inefficient as lpeg just copies ^n
+
+ -- local function sub(str,start,stop)
+ -- local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1))
+ -- inspect(pattern)
+ -- return lpegmatch(pattern,str) or ""
+ -- end
+
+ local b, e, n, first, last = 0, 0, 0, 0, 0
+
+ local function slide(s,p)
+ n = n + 1
+ if n == first then
+ b = p
+ if not last then
+ return nil
+ end
+ end
+ if n == last then
+ e = p
+ return nil
+ else
+ return p
+ end
+ end
+
+ local pattern = Cmt(utf8char,slide)^0
+
+ function utf.sub(str,start,stop) -- todo: from the end
+ if not start then
+ return str
+ end
+ b, e, n, first, last = 0, 0, 0, start, stop
+ lpegmatch(pattern,str)
+ if not stop then
+ return sub(str,b)
+ else
+ return sub(str,b,e)
+ end
+ end
+
+ -- print(utf.sub("Hans Hagen is my name"))
+ -- print(utf.sub("Hans Hagen is my name",5))
+ -- print(utf.sub("Hans Hagen is my name",5,10))
+
+end
+
local utfchar, utfbyte = utf.char, utf.byte
-- As we want to get rid of the (unmaintained) utf library we implement our own