From 95100142e8341d8bd582e16a17a26118dfc8975c Mon Sep 17 00:00:00 2001
From: Marius <mariausol@gmail.com>
Date: Tue, 4 Dec 2012 18:40:14 +0200
Subject: beta 2012.12.04 16:56

---
 scripts/context/lua/mtxrun.lua         | 55 ++++++++++++++++++++++++++++++++--
 scripts/context/stubs/mswin/mtxrun.lua | 55 ++++++++++++++++++++++++++++++++--
 scripts/context/stubs/unix/mtxrun      | 55 ++++++++++++++++++++++++++++++++--
 3 files changed, 159 insertions(+), 6 deletions(-)

(limited to 'scripts')

diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua
index 6daa59f9d..00c1fc13c 100644
--- a/scripts/context/lua/mtxrun.lua
+++ b/scripts/context/lua/mtxrun.lua
@@ -4285,10 +4285,10 @@ if not modules then modules = { } end modules ['l-unicode'] = {
 
 local concat = table.concat
 local type = type
-local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct
+local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt
 local lpegmatch, patterns = lpeg.match, lpeg.patterns
 local utftype = patterns.utftype
-local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format
+local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub
 local utfsplitlines = string.utfsplitlines
 
 if not unicode then
@@ -4362,6 +4362,57 @@ if not utf.byte then
 
 end
 
+if not utf.sub then
+
+    local utf8char = patterns.utf8char
+
+    -- inefficient as lpeg just copies ^n
+
+    -- local function sub(str,start,stop)
+    --     local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1))
+    --     inspect(pattern)
+    --     return lpegmatch(pattern,str) or ""
+    -- end
+
+    local b, e, n, first, last = 0, 0, 0, 0, 0
+
+    local function slide(s,p)
+        n = n + 1
+        if n == first then
+            b = p
+            if not last then
+                return nil
+            end
+        end
+        if n == last then
+            e = p
+            return nil
+        else
+            return p
+        end
+    end
+
+    local pattern = Cmt(utf8char,slide)^0
+
+    function utf.sub(str,start,stop) -- todo: from the end
+        if not start then
+            return str
+        end
+        b, e, n, first, last = 0, 0, 0, start, stop
+        lpegmatch(pattern,str)
+        if not stop then
+            return sub(str,b)
+        else
+            return sub(str,b,e)
+        end
+    end
+
+    -- print(utf.sub("Hans Hagen is my name"))
+    -- print(utf.sub("Hans Hagen is my name",5))
+    -- print(utf.sub("Hans Hagen is my name",5,10))
+
+end
+
 local utfchar, utfbyte = utf.char, utf.byte
 
 -- As we want to get rid of the (unmaintained) utf library we implement our own
diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua
index 6daa59f9d..00c1fc13c 100644
--- a/scripts/context/stubs/mswin/mtxrun.lua
+++ b/scripts/context/stubs/mswin/mtxrun.lua
@@ -4285,10 +4285,10 @@ if not modules then modules = { } end modules ['l-unicode'] = {
 
 local concat = table.concat
 local type = type
-local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct
+local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt
 local lpegmatch, patterns = lpeg.match, lpeg.patterns
 local utftype = patterns.utftype
-local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format
+local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub
 local utfsplitlines = string.utfsplitlines
 
 if not unicode then
@@ -4362,6 +4362,57 @@ if not utf.byte then
 
 end
 
+if not utf.sub then
+
+    local utf8char = patterns.utf8char
+
+    -- inefficient as lpeg just copies ^n
+
+    -- local function sub(str,start,stop)
+    --     local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1))
+    --     inspect(pattern)
+    --     return lpegmatch(pattern,str) or ""
+    -- end
+
+    local b, e, n, first, last = 0, 0, 0, 0, 0
+
+    local function slide(s,p)
+        n = n + 1
+        if n == first then
+            b = p
+            if not last then
+                return nil
+            end
+        end
+        if n == last then
+            e = p
+            return nil
+        else
+            return p
+        end
+    end
+
+    local pattern = Cmt(utf8char,slide)^0
+
+    function utf.sub(str,start,stop) -- todo: from the end
+        if not start then
+            return str
+        end
+        b, e, n, first, last = 0, 0, 0, start, stop
+        lpegmatch(pattern,str)
+        if not stop then
+            return sub(str,b)
+        else
+            return sub(str,b,e)
+        end
+    end
+
+    -- print(utf.sub("Hans Hagen is my name"))
+    -- print(utf.sub("Hans Hagen is my name",5))
+    -- print(utf.sub("Hans Hagen is my name",5,10))
+
+end
+
 local utfchar, utfbyte = utf.char, utf.byte
 
 -- As we want to get rid of the (unmaintained) utf library we implement our own
diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun
index 6daa59f9d..00c1fc13c 100644
--- a/scripts/context/stubs/unix/mtxrun
+++ b/scripts/context/stubs/unix/mtxrun
@@ -4285,10 +4285,10 @@ if not modules then modules = { } end modules ['l-unicode'] = {
 
 local concat = table.concat
 local type = type
-local P, C, R, Cs, Ct = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct
+local P, C, R, Cs, Ct, Cmt = lpeg.P, lpeg.C, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.Cmt
 local lpegmatch, patterns = lpeg.match, lpeg.patterns
 local utftype = patterns.utftype
-local char, byte, find, bytepairs, utfvalues, format = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format
+local char, byte, find, bytepairs, utfvalues, format, sub = string.char, string.byte, string.find, string.bytepairs, string.utfvalues, string.format, string.sub
 local utfsplitlines = string.utfsplitlines
 
 if not unicode then
@@ -4362,6 +4362,57 @@ if not utf.byte then
 
 end
 
+if not utf.sub then
+
+    local utf8char = patterns.utf8char
+
+    -- inefficient as lpeg just copies ^n
+
+    -- local function sub(str,start,stop)
+    --     local pattern = utf8char^-(start-1) * C(utf8char^-(stop-start+1))
+    --     inspect(pattern)
+    --     return lpegmatch(pattern,str) or ""
+    -- end
+
+    local b, e, n, first, last = 0, 0, 0, 0, 0
+
+    local function slide(s,p)
+        n = n + 1
+        if n == first then
+            b = p
+            if not last then
+                return nil
+            end
+        end
+        if n == last then
+            e = p
+            return nil
+        else
+            return p
+        end
+    end
+
+    local pattern = Cmt(utf8char,slide)^0
+
+    function utf.sub(str,start,stop) -- todo: from the end
+        if not start then
+            return str
+        end
+        b, e, n, first, last = 0, 0, 0, start, stop
+        lpegmatch(pattern,str)
+        if not stop then
+            return sub(str,b)
+        else
+            return sub(str,b,e)
+        end
+    end
+
+    -- print(utf.sub("Hans Hagen is my name"))
+    -- print(utf.sub("Hans Hagen is my name",5))
+    -- print(utf.sub("Hans Hagen is my name",5,10))
+
+end
+
 local utfchar, utfbyte = utf.char, utf.byte
 
 -- As we want to get rid of the (unmaintained) utf library we implement our own
-- 
cgit v1.2.3