23 files changed, 942 insertions, 251 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua
index dce24a2b9..b53da82ea 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua
@@ -10,23 +10,22 @@ local global, string, table, lpeg = _G, string, table, lpeg
 local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
 local type = type
 
--- local lexer            = require("lexer")
-local lexer          = require("scite-context-lexer")
-local context          = lexer.context
-local patterns         = context.patterns
+local lexer       = require("scite-context-lexer")
+local context     = lexer.context
+local patterns    = context.patterns
 
-local token            = lexer.token
-local exact_match      = lexer.exact_match
+local token       = lexer.token
+local exact_match = lexer.exact_match
 
-local bibtexlexer      = lexer.new("bib","scite-context-lexer-bibtex")
-local whitespace       = bibtexlexer.whitespace
+local bibtexlexer = lexer.new("bib","scite-context-lexer-bibtex")
+local whitespace  = bibtexlexer.whitespace
 
-    local escape, left, right = P("\\"), P('{'), P('}')
+local escape, left, right = P("\\"), P('{'), P('}')
 
-    patterns.balanced = P {
-        [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
-        [2] = left * V(1) * right
-    }
+patterns.balanced = P {
+    [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
+    [2] = left * V(1) * right
+}
 
 -- taken from bibl-bib.lua
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua
new file mode 100644
index 000000000..ea9c56712
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua
@@ -0,0 +1,598 @@
+local info = {
+    version   = 1.002,
+    comment   = "scintilla lpeg lexer for plain text (with spell checking)",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files",
+}
+
+local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp
+local find, match = string.find, string.match
+
+local lexer        = require("scite-context-lexer")
+local context      = lexer.context
+local patterns     = context.patterns
+
+local token        = lexer.token
+
+local bidilexer    = lexer.new("bidi","scite-context-lexer-bidi")
+local whitespace   = bidilexer.whitespace
+
+local space        = patterns.space
+local any          = patterns.any
+
+-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+
+require("char-def")
+
+characters.directions  = { }
+
+setmetatable(characters.directions,{ __index = function(t,k)
+    local d = data[k]
+    if d then
+        local v = d.direction
+        if v then
+            t[k] = v
+            return v
+        end
+    end
+    t[k] = false -- maybe 'l'
+    return false
+end })
+
+characters.mirrors  = { }
+
+setmetatable(characters.mirrors,{ __index = function(t,k)
+    local d = data[k]
+    if d then
+        local v = d.mirror
+        if v then
+            t[k] = v
+            return v
+        end
+    end
+    t[k] = false
+    return false
+end })
+
+characters.textclasses  = { }
+
+setmetatable(characters.textclasses,{ __index = function(t,k)
+    local d = data[k]
+    if d then
+        local v = d.textclass
+        if v then
+            t[k] = v
+            return v
+        end
+    end
+    t[k] = false
+    return false
+end })
+
+local directiondata  = characters.directions
+local mirrordata     = characters.mirrors
+local textclassdata  = characters.textclasses
+
+local maximum_stack  = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much
+local analyze_fences = false
+
+local whitespace = {
+    lre = true,
+    rle = true,
+    lro = true,
+    rlo = true,
+    pdf = true,
+    bn  = true,
+    ws  = true,
+}
+
+local b_s_ws_on = {
+    b   = true,
+    s   = true,
+    ws  = true,
+    on  = true
+}
+
+local mt_space  = { __index = { char = 0x0020, direction = "ws",  original = "ws",  level = 0 } }
+local mt_lre    = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } }
+local mt_rle    = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } }
+local mt_pdf    = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } }
+local mt_object = { __index = { char = 0xFFFC, direction = "on",  original = "on",  level = 0 } }
+
+local list  = { }
+local stack = { }
+
+setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end })
+
+local function build_list(head)
+    -- P1
+    local size = 0
+    lpegmatch(pattern,head)
+    return list, size
+end
+
+local function resolve_fences(list,size,start,limit)
+    -- N0: funny effects, not always better, so it's an option
+    local nofstack = 0
+    for i=start,limit do
+        local entry = list[i]
+        if entry.direction == "on" then
+            local char   = entry.char
+            local mirror = mirrordata[char]
+            if mirror then
+                local class = textclassdata[char]
+                entry.mirror = mirror
+                entry.class  = class
+                if class == "open" then
+                    nofstack       = nofstack + 1
+                    local stacktop = stack[nofstack]
+                    stacktop[1]    = mirror
+                    stacktop[2]    = i
+                    stacktop[3]    = false -- not used
+                elseif nofstack == 0 then
+                    -- skip
+                elseif class == "close" then
+                    while nofstack > 0 do
+                        local stacktop = stack[nofstack]
+                        if stacktop[1] == char then
+                            local open  = stacktop[2]
+                            local close = i
+                            list[open ].paired = close
+                            list[close].paired = open
+                            break
+                        else
+                            -- do we mirror or not
+                        end
+                        nofstack = nofstack - 1
+                    end
+                end
+            end
+        end
+    end
+end
+
+local function get_baselevel(list,size,direction)
+    if direction == "TRT" then
+        return 1, "TRT", true
+    elseif direction == "TLT" then
+        return 0, "TLT", true
+    end
+    -- P2, P3:
+    for i=1,size do
+        local entry     = list[i]
+        local direction = entry.direction
+        if direction == "r" or direction == "al" then -- and an ?
+            return 1, "TRT", true
+        elseif direction == "l" then
+            return 0, "TLT", true
+        end
+    end
+    return 0, "TLT", false
+end
+
+local function resolve_explicit(list,size,baselevel)
+-- if list.rle or list.lre or list.rlo or list.lro then
+    -- X1
+    local level    = baselevel
+    local override = "on"
+    local nofstack = 0
+    for i=1,size do
+        local entry     = list[i]
+        local direction = entry.direction
+        -- X2
+        if direction == "rle" then
+            if nofstack < maximum_stack then
+                nofstack        = nofstack + 1
+                local stacktop  = stack[nofstack]
+                stacktop[1]     = level
+                stacktop[2]     = override
+                level           = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
+                override        = "on"
+                entry.level     = level
+                entry.direction = "bn"
+                entry.remove    = true
+            end
+        -- X3
+        elseif direction == "lre" then
+            if nofstack < maximum_stack then
+                nofstack        = nofstack + 1
+                local stacktop  = stack[nofstack]
+                stacktop[1]     = level
+                stacktop[2]     = override
+                level           = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
+                override        = "on"
+                entry.level     = level
+                entry.direction = "bn"
+                entry.remove    = true
+            end
+        -- X4
+        elseif direction == "rlo" then
+            if nofstack < maximum_stack then
+                nofstack        = nofstack + 1
+                local stacktop  = stack[nofstack]
+                stacktop[1]     = level
+                stacktop[2]     = override
+                level           = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
+                override        = "r"
+                entry.level     = level
+                entry.direction = "bn"
+                entry.remove    = true
+            end
+        -- X5
+        elseif direction == "lro" then
+            if nofstack < maximum_stack then
+                nofstack        = nofstack + 1
+                local stacktop  = stack[nofstack]
+                stacktop[1]     = level
+                stacktop[2]     = override
+                level           = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
+                override        = "l"
+                entry.level     = level
+                entry.direction = "bn"
+                entry.remove    = true
+            end
+        -- X7
+        elseif direction == "pdf" then
+            if nofstack < maximum_stack then
+                local stacktop  = stack[nofstack]
+                level           = stacktop[1]
+                override        = stacktop[2]
+                nofstack        = nofstack - 1
+                entry.level     = level
+                entry.direction = "bn"
+                entry.remove    = true
+            end
+        -- X6
+        else
+            entry.level = level
+            if override ~= "on" then
+                entry.direction = override
+            end
+        end
+    end
+-- else
+--     for i=1,size do
+--         list[i].level = baselevel
+--     end
+-- end
+    -- X8 (reset states and overrides after paragraph)
+end
+
+local function resolve_weak(list,size,start,limit,orderbefore,orderafter)
+    -- W1: non spacing marks get the direction of the previous character
+-- if list.nsm then
+    for i=start,limit do
+        local entry = list[i]
+        if entry.direction == "nsm" then
+            if i == start then
+                entry.direction = orderbefore
+            else
+                entry.direction = list[i-1].direction
+            end
+        end
+    end
+-- end
+    -- W2: mess with numbers and arabic
+-- if list.en then
+    for i=start,limit do
+        local entry = list[i]
+        if entry.direction == "en" then
+            for j=i-1,start,-1 do
+                local prev = list[j]
+                local direction = prev.direction
+                if direction == "al" then
+                    entry.direction = "an"
+                    break
+                elseif direction == "r" or direction == "l" then
+                    break
+                end
+            end
+        end
+    end
+-- end
+    -- W3
+-- if list.al then
+    for i=start,limit do
+        local entry = list[i]
+        if entry.direction == "al" then
+            entry.direction = "r"
+        end
+    end
+-- end
+    -- W4: make separators number
+-- if list.es or list.cs then
+        -- skip
+--     if false then
+    if false then
+        for i=start+1,limit-1 do
+            local entry     = list[i]
+            local direction = entry.direction
+            if direction == "es" then
+                if list[i-1].direction == "en" and list[i+1].direction == "en" then
+                    entry.direction = "en"
+                end
+            elseif direction == "cs" then
+                local prevdirection = list[i-1].direction
+                if prevdirection == "en" then
+                    if list[i+1].direction == "en" then
+                        entry.direction = "en"
+                    end
+                elseif prevdirection == "an" and list[i+1].direction == "an" then
+                    entry.direction = "an"
+                end
+            end
+        end
+    else -- only more efficient when we have es/cs
+        local runner = start + 2
+        local before = list[start]
+        local entry  = list[start + 1]
+        local after  = list[runner]
+        while after do
+            local direction = entry.direction
+            if direction == "es" then
+                if before.direction == "en" and after.direction == "en" then
+                    entry.direction = "en"
+                end
+            elseif direction == "cs" then
+                local prevdirection = before.direction
+                if prevdirection == "en" then
+                    if after.direction == "en" then
+                        entry.direction = "en"
+                    end
+                elseif prevdirection == "an" and after.direction == "an" then
+                    entry.direction = "an"
+                end
+            end
+            before  = current
+            current = after
+            after   = list[runner]
+            runner  = runner + 1
+        end
+    end
+-- end
+    -- W5
+-- if list.et then
+    local i = start
+    while i <= limit do
+        if list[i].direction == "et" then
+            local runstart = i
+            local runlimit = runstart
+            for i=runstart,limit do
+                if list[i].direction == "et" then
+                    runlimit = i
+                else
+                    break
+                end
+            end
+            local rundirection = runstart == start and sor or list[runstart-1].direction
+            if rundirection ~= "en" then
+                rundirection = runlimit == limit and orderafter or list[runlimit+1].direction
+            end
+            if rundirection == "en" then
+                for j=runstart,runlimit do
+                    list[j].direction = "en"
+                end
+            end
+            i = runlimit
+        end
+        i = i + 1
+    end
+-- end
+    -- W6
+-- if list.es or list.cs or list.et then
+    for i=start,limit do
+        local entry     = list[i]
+        local direction = entry.direction
+        if direction == "es" or direction == "et" or direction == "cs" then
+            entry.direction = "on"
+        end
+    end
+-- end
+    -- W7
+    for i=start,limit do
+        local entry = list[i]
+        if entry.direction == "en" then
+            local prev_strong = orderbefore
+            for j=i-1,start,-1 do
+                local direction = list[j].direction
+                if direction == "l" or direction == "r" then
+                    prev_strong = direction
+                    break
+                end
+            end
+            if prev_strong == "l" then
+                entry.direction = "l"
+            end
+        end
+    end
+end
+
+local function resolve_neutral(list,size,start,limit,orderbefore,orderafter)
+    -- N1, N2
+    for i=start,limit do
+        local entry = list[i]
+        if b_s_ws_on[entry.direction] then
+            -- this needs checking
+            local leading_direction, trailing_direction, resolved_direction
+            local runstart = i
+            local runlimit = runstart
+            for j=runstart+1,limit do
+                if b_s_ws_on[list[j].direction] then
+                    runlimit = j
+                else
+                    break
+                end
+            end
+            if runstart == start then
+                leading_direction = orderbefore
+            else
+                leading_direction = list[runstart-1].direction
+                if leading_direction == "en" or leading_direction == "an" then
+                    leading_direction = "r"
+                end
+            end
+            if runlimit == limit then
+                trailing_direction = orderafter
+            else
+                trailing_direction = list[runlimit+1].direction
+                if trailing_direction == "en" or trailing_direction == "an" then
+                    trailing_direction = "r"
+                end
+            end
+            if leading_direction == trailing_direction then
+                -- N1
+                resolved_direction = leading_direction
+            else
+                -- N2 / does the weird period
+                resolved_direction = entry.level % 2 == 1 and "r" or "l"
+            end
+            for j=runstart,runlimit do
+                list[j].direction = resolved_direction
+            end
+            i = runlimit
+        end
+        i = i + 1
+    end
+end
+
+local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
+    for i=start,limit do
+        local entry     = list[i]
+        local level     = entry.level
+        local direction = entry.direction
+        if level % 2 ~= 1 then -- even
+            -- I1
+            if direction == "r" then
+                entry.level = level + 1
+            elseif direction == "an" or direction == "en" then
+                entry.level = level + 2
+            end
+        else
+            -- I2
+            if direction == "l" or direction == "en" or direction == "an" then
+                entry.level = level + 1
+            end
+        end
+    end
+end
+
+local function resolve_levels(list,size,baselevel,analyze_fences)
+    -- X10
+    local start = 1
+    while start < size do
+        local level = list[start].level
+        local limit = start + 1
+        while limit < size and list[limit].level == level do
+            limit = limit + 1
+        end
+        local prev_level  = start == 1    and baselevel or list[start-1].level
+        local next_level  = limit == size and baselevel or list[limit+1].level
+        local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l"
+        local orderafter  = (level > next_level and level or next_level) % 2 == 1 and "r" or "l"
+        -- W1 .. W7
+        resolve_weak(list,size,start,limit,orderbefore,orderafter)
+        -- N0
+        if analyze_fences then
+            resolve_fences(list,size,start,limit)
+        end
+        -- N1 .. N2
+        resolve_neutral(list,size,start,limit,orderbefore,orderafter)
+        -- I1 .. I2
+        resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
+        start = limit
+    end
+    -- L1
+    for i=1,size do
+        local entry     = list[i]
+        local direction = entry.original
+        -- (1)
+        if direction == "s" or direction == "b" then
+            entry.level = baselevel
+            -- (2)
+            for j=i-1,1,-1 do
+                local entry = list[j]
+                if whitespace[entry.original] then
+                    entry.level = baselevel
+                else
+                    break
+                end
+            end
+        end
+    end
+    -- (3)
+    for i=size,1,-1 do
+        local entry = list[i]
+        if whitespace[entry.original] then
+            entry.level = baselevel
+        else
+            break
+        end
+    end
+    -- L4
+    if analyze_fences then
+        for i=1,size do
+            local entry = list[i]
+            if entry.level % 2 == 1 then -- odd(entry.level)
+                if entry.mirror and not entry.paired then
+                    entry.mirror = false
+                end
+                -- okay
+            elseif entry.mirror then
+                entry.mirror = false
+            end
+        end
+    else
+        for i=1,size do
+            local entry = list[i]
+            if entry.level % 2 == 1 then -- odd(entry.level)
+                local mirror = mirrordata[entry.char]
+                if mirror then
+                    entry.mirror = mirror
+                end
+            end
+        end
+    end
+end
+
+local index = 1
+
+local function process(head,direction)
+    local list, size = build_list(head)
+    local baselevel = get_baselevel(list,size,direction) -- we always have an inline dir node in context
+    resolve_explicit(list,size,baselevel)
+    resolve_levels(list,size,baselevel,analyze_fences)
+    index = 1
+    return list, size
+end
+
+-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+
+local utf     = lexer.helpers.utfbytepattern
+
+-- local t_start = token("default", utf, function(s,i) if i == 1 then index = 1 process(s) end end))
+-- local t_bidi  = token("error",   utf / function() index = index + 1 return list[index].direction == "r" end)
+-- local t_rest  = token("default", any)
+
+-- bidilexer._rules = {
+--     { "start", t_start },
+--     { "bidi",  t_bidi  },
+--     { "rest",  t_rest  },
+-- }
+
+bidilexer._grammar = #utf * function(s,i)
+    process(s)
+    local t = { }
+    local n = 0
+    for i=1,size do
+        n = n + 1 t[n] = i
+        n = n + 1 t[n] = "error"
+    end
+    return t
+end
+
+bidilexer._tokenstyles = context.styleset
+
+return bidilexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cld.lua b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua
index a5fbf9cd7..7bda7800e 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-cld.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua
@@ -6,13 +6,12 @@ local info = {
     license   = "see context related readme files",
 }
 
--- local lexer           = require("lexer")
-local lexer          = require("scite-context-lexer")
-local context         = lexer.context
-local patterns        = context.patterns
+local lexer    = require("scite-context-lexer")
+local context  = lexer.context
+local patterns = context.patterns
 
-local cldlexer        = lexer.new("cld","scite-context-lexer-cld")
-local lualexer        = lexer.load("scite-context-lexer-lua")
+local cldlexer = lexer.new("cld","scite-context-lexer-cld")
+local lualexer = lexer.load("scite-context-lexer-lua")
 
 -- can probably be done nicer now, a bit of a hack
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua
index e8ff3c1ff..631a802fe 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua
@@ -6,13 +6,12 @@ local info = {
     license   = "see context related readme files",
 }
 
--- local lexer           = require("lexer")
-local lexer          = require("scite-context-lexer")
-local context         = lexer.context
-local patterns        = context.patterns
+local lexer       = require("scite-context-lexer")
+local context     = lexer.context
+local patterns    = context.patterns
 
-local cppweblexer     = lexer.new("cpp-web","scite-context-lexer-cpp")
-local cpplexer        = lexer.load("scite-context-lexer-cpp")
+local cppweblexer = lexer.new("cpp-web","scite-context-lexer-cpp")
+local cpplexer    = lexer.load("scite-context-lexer-cpp")
 
 -- can probably be done nicer now, a bit of a hack
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua
index d56dc58f9..d9079855f 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua
@@ -10,8 +10,7 @@ local info = {
 
 local P, R, S = lpeg.P, lpeg.R, lpeg.S
 
--- local lexer       = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer       = require("scite-context-lexer")
 local context     = lexer.context
 local patterns    = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua
index 69590ed34..5d3096b7d 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua
@@ -1,4 +1,4 @@
--- local info = {
+local info = {
     version   = 1.002,
     comment   = "scintilla lpeg lexer that triggers whitespace backtracking",
     author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -10,8 +10,7 @@
 -- we need to trigger that, for instance in the bibtex lexer, but still
 -- we get failed lexing
 
--- local lexer        = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer        = require("scite-context-lexer")
 local context      = lexer.context
 local patterns     = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua
index 5d5b689d2..b1304f65c 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua
@@ -6,8 +6,7 @@ local info = {
     license   = "see context related readme files",
 }
 
--- local lexer       = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer       = require("scite-context-lexer")
 local context     = lexer.context
 local patterns    = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua
index a8aa8dbe3..ba14f5206 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-lua.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua
@@ -13,7 +13,6 @@ local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
 local match, find = string.match, string.find
 local setmetatable = setmetatable
 
--- local lexer       = require("lexer")
 local lexer       = require("scite-context-lexer")
 local context     = lexer.context
 local patterns    = context.patterns
@@ -47,7 +46,7 @@ local functions = {
     "pcall", "print", "rawequal", "rawget", "rawset", "require",
     "setmetatable", "tonumber", "tostring", "type", "unpack", "xpcall", "select",
 
-    "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32",
+    "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", "utf8",
 }
 
 local constants = {
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-mps.lua b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua
index e24a41d0c..1c87ea6d0 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-mps.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua
@@ -10,8 +10,7 @@ local global, string, table, lpeg = _G, string, table, lpeg
 local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
 local type = type
 
--- local lexer              = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer              = require("scite-context-lexer")
 local context            = lexer.context
 local patterns           = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua
index cdf33cf7c..155a9bd51 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua
@@ -10,8 +10,7 @@ local info = {
 
 local P, R, S, C, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.V
 
--- local lexer             = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer             = require("scite-context-lexer")
 local context           = lexer.context
 local patterns          = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua
index f08d16488..14ba5296b 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua
@@ -10,7 +10,6 @@ local info = {
 
 local P, R = lpeg.P, lpeg.R
 
--- local lexer          = require("lexer")
 local lexer          = require("scite-context-lexer")
 local context        = lexer.context
 local patterns       = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua
index 1d4796ea5..0fd238d63 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua
@@ -6,13 +6,12 @@ local info = {
     license   = "see context related readme files",
 }
 
--- pdf is normally startic .. i.e. not edited so we don't really
+-- pdf is normally static .. i.e. not edited so we don't really
 -- need embedded lexers.
 
 local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
 
--- local lexer             = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer             = require("scite-context-lexer")
 local context           = lexer.context
 local patterns          = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-sql.lua b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua
index ea432c5c9..cf0a03331 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-sql.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua
@@ -8,8 +8,7 @@ local info = {
 
 local P, R, S = lpeg.P, lpeg.R, lpeg.S
 
--- local lexer       = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer       = require("scite-context-lexer")
 local context     = lexer.context
 local patterns    = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua
index 4a55fd143..88499a9c2 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua
@@ -6,13 +6,12 @@ local info = {
     license   = "see context related readme files",
 }
 
--- local lexer           = require("lexer")
-local lexer          = require("scite-context-lexer")
-local context         = lexer.context
-local patterns        = context.patterns
+local lexer       = require("scite-context-lexer")
+local context     = lexer.context
+local patterns    = context.patterns
 
-local texweblexer     = lexer.new("tex-web","scite-context-lexer-tex")
-local texlexer        = lexer.load("scite-context-lexer-tex")
+local texweblexer = lexer.new("tex-web","scite-context-lexer-tex")
+local texlexer    = lexer.load("scite-context-lexer-tex")
 
 -- can probably be done nicer now, a bit of a hack
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua
index bc08bfcd9..1f1246fc0 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-tex.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua
@@ -31,8 +31,7 @@ local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, l
 local type, next = type, next
 local find, match, lower, upper = string.find, string.match, string.lower, string.upper
 
--- local lexer        = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer        = require("scite-context-lexer")
 local context      = lexer.context
 local patterns     = context.patterns
 local inform       = context.inform
@@ -145,6 +144,9 @@ local validminimum = 3
 
 -- fails (empty loop message) ... latest lpeg issue?
 
+-- todo: Make sure we only do this at the beginning .. a pitty that we
+-- can't store a state .. now is done too often.
+
 local knownpreamble = Cmt(P("% "), function(input,i,_) -- todo : utfbomb, was #P("% ")
     if i < 10 then
         validwords, validminimum = false, 3
@@ -220,10 +222,12 @@ local p_comment              = commentline
 ----- p_helper               = backslash * exact_match(helpers)
 ----- p_primitive            = backslash * exact_match(primitives)
 
-local p_command              = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * #(1-cstoken)
-local p_constant             = backslash * lexer.helpers.utfchartabletopattern(constants)       * #(1-cstoken)
-local p_helper               = backslash * lexer.helpers.utfchartabletopattern(helpers)         * #(1-cstoken)
-local p_primitive            = backslash * lexer.helpers.utfchartabletopattern(primitives)      * #(1-cstoken)
+local p_csdone               = #(1-cstoken) + P(-1)
+
+local p_command              = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * p_csdone
+local p_constant             = backslash * lexer.helpers.utfchartabletopattern(constants)       * p_csdone
+local p_helper               = backslash * lexer.helpers.utfchartabletopattern(helpers)         * p_csdone
+local p_primitive            = backslash * lexer.helpers.utfchartabletopattern(primitives)      * p_csdone
 
 local p_ifprimitive          = P("\\if") * cstoken^1
 local p_csname               = backslash * (cstoken^1 + P(1))
@@ -446,12 +450,17 @@ local stopmetafuncode        = token("embedded", stopmetafun)
 local callers                = token("embedded", P("\\") * metafuncall) * metafunarguments
                              + token("embedded", P("\\") * luacall)
 
-lexer.embed_lexer(contextlexer, cldlexer, startluacode,     stopluacode)
 lexer.embed_lexer(contextlexer, mpslexer, startmetafuncode, stopmetafuncode)
+lexer.embed_lexer(contextlexer, cldlexer, startluacode,     stopluacode)
+
+-- preamble is inefficient as it probably gets called each time (so some day I really need to
+-- patch the plugin)
+
+contextlexer._preamble = preamble
 
 contextlexer._rules = {
     { "whitespace",  spacing     },
-    { "preamble",    preamble    },
+ -- { "preamble",    preamble    },
     { "word",        word        },
     { "text",        text        }, -- non words
     { "comment",     comment     },
@@ -459,10 +468,10 @@ contextlexer._rules = {
  -- { "subsystem",   subsystem   },
     { "callers",     callers     },
     { "subsystem",   subsystem   },
+    { "ifprimitive", ifprimitive },
     { "helper",      helper      },
     { "command",     command     },
     { "primitive",   primitive   },
-    { "ifprimitive", ifprimitive },
  -- { "subsystem",   subsystem   },
     { "reserved",    reserved    },
     { "csname",      csname      },
@@ -490,10 +499,10 @@ if web then
         { "comment",     comment     },
         { "constant",    constant    },
         { "callers",     callers     },
+        { "ifprimitive", ifprimitive },
         { "helper",      helper      },
         { "command",     command     },
         { "primitive",   primitive   },
-        { "ifprimitive", ifprimitive },
         { "reserved",    reserved    },
         { "csname",      csname      },
         { "grouping",    grouping    },
@@ -514,10 +523,10 @@ else
         { "comment",     comment     },
         { "constant",    constant    },
         { "callers",     callers     },
+        { "ifprimitive", ifprimitive },
         { "helper",      helper      },
         { "command",     command     },
         { "primitive",   primitive   },
-        { "ifprimitive", ifprimitive },
         { "reserved",    reserved    },
         { "csname",      csname      },
         { "grouping",    grouping    },
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-txt.lua b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua
index 152e9a663..8ecfff7cb 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-txt.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua
@@ -9,8 +9,7 @@ local info = {
 local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp
 local find, match = string.find, string.match
 
--- local lexer        = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer        = require("scite-context-lexer")
 local context      = lexer.context
 local patterns     = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua
index 141de20e1..3cef71739 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua
@@ -8,8 +8,7 @@ local info = {
 
 local P, R, S, C, Cg, Cb, Cs, Cmt, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Cs, lpeg.Cmt, lpeg.match
 
--- local lexer        = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer        = require("scite-context-lexer")
 local context      = lexer.context
 local patterns     = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-web.lua
index 6fe5ac84c..81a6f90df 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-web.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-web.lua
@@ -8,8 +8,7 @@ local info = {
 
 local P, R, S = lpeg.P, lpeg.R, lpeg.S
 
--- local lexer       = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer       = require("scite-context-lexer")
 local context     = lexer.context
 local patterns    = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua
index 25fa9128f..f5ca86cb2 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua
@@ -8,8 +8,7 @@ local info = {
 
 local P = lpeg.P
 
--- local lexer         = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer         = require("scite-context-lexer")
 local context       = lexer.context
 local patterns      = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua
index 2d7260b69..40de8f603 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua
@@ -8,8 +8,7 @@ local info = {
 
 local P = lpeg.P
 
--- local lexer           = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer           = require("scite-context-lexer")
 local context         = lexer.context
 local patterns        = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua
index 1ee96ba89..a1b717a6a 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua
@@ -8,7 +8,6 @@ local info = {
 
 local P = lpeg.P
 
--- local lexer          = require("lexer")
 local lexer          = require("scite-context-lexer")
 local context        = lexer.context
 local patterns       = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
index 1b7e2e897..bbdb3febc 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
@@ -17,8 +17,7 @@ local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
 local type = type
 local match, find = string.match, string.find
 
--- local lexer            = require("lexer")
-local lexer          = require("scite-context-lexer")
+local lexer            = require("scite-context-lexer")
 local context          = lexer.context
 local patterns         = context.patterns
 
diff --git a/context/data/textadept/context/lexers/scite-context-lexer.lua b/context/data/textadept/context/lexers/scite-context-lexer.lua
index e526d5045..37f236a89 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer.lua
@@ -8,11 +8,6 @@ local info = {
 
 }
 
--- todo: hook into context resolver etc
--- todo: only old api in lexers, rest in context subnamespace
--- todo: make sure we can run in one state .. copies or shared?
--- todo: auto-nesting
-
 if lpeg.setmaxstack then lpeg.setmaxstack(1000) end
 
 local log      = false
@@ -27,169 +22,252 @@ local inspect  = false -- can save some 15% (maybe easier on scintilla)
 
 -- GET GOING
 --
--- You need to copy this file over lexer.lua. In principle other lexers could work too but
--- not now. Maybe some day. All patterns will move into the patterns name space. I might do
--- the same with styles. If you run an older version of SciTE you can take one of the
--- archives. Pre 3.41 versions can just be copied to the right path, as there we still use
--- part of the normal lexer.
+-- You need to copy this file over lexer.lua. In principle other lexers could work
+-- too but not now. Maybe some day. All patterns will move into the patterns name
+-- space. I might do the same with styles. If you run an older version of SciTE you
+-- can take one of the archives. Pre 3.41 versions can just be copied to the right
+-- path, as there we still use part of the normal lexer. Below we mention some
+-- issues with different versions of SciTE. We try to keep up with changes but best
+-- check careful if the version that yuou install works as expected because SciTE
+-- and the scintillua dll need to be in sync.
 --
 -- REMARK
 --
--- We started using lpeg lexing as soon as it came available. Because we had rather demanding
--- files and also wanted to use nested lexers, we ended up with our own variant. At least at
--- that time this was more robust and also faster (as we have some pretty large lua data files
--- and also work with large xml files). As a consequence successive versions had to be adapted
--- to changes in the (at that time still unstable) api. In addition to lexing we also have
--- spell checking and such. Around version 3.60 things became more stable so I don't expect to
--- change much.
+-- We started using lpeg lexing as soon as it came available. Because we had rather
+-- demanding files and also wanted to use nested lexers, we ended up with our own
+-- variant. At least at that time this was more robust and also much faster (as we
+-- have some pretty large Lua data files and also work with large xml files). As a
+-- consequence successive versions had to be adapted to changes in the (at that time
+-- still unstable) api. In addition to lexing we also have spell checking and such.
+-- Around version 3.60 things became more stable so I don't expect to change much.
+--
+-- LEXING
 --
--- STATUS
+-- When pc's showed up we wrote our own editor (texedit) in MODULA 2. It was fast,
+-- had multiple overlapping (text) windows, could run in the at most 1M memory at
+-- that time, etc. The realtime file browsing with lexing that we had at that time
+-- is still on my current wish list. The color scheme and logic that we used related
+-- to the logic behind the ConTeXt user interface that evolved.
 --
--- todo: maybe use a special stripped version of the dll (stable api) and add a bit more
---       interfacing to scintilla
--- todo: investigate if we can use the already built in lua instance so that we can combine the
---       power of lexign with extensions
--- todo: play with hotspot and other properties (but no real need now)
--- todo: maybe come up with an extension to the api subsystem
--- todo: add proper tracing and so .. not too hard as we can run on mtxrun, but we lack a console
---       for debugging (ok, chicken-egg as lexers probably need to be loaded before a console can
---       kick in)
--- todo: get rid of these lexers.STYLE_XX and lexers.XX (hide such details)
+-- Later I rewrote the editor in perl/tk. I don't like the perl syntax but tk
+-- widgets are very powerful and hard to beat. In fact, TextAdept reminds me of
+-- that: wrap your own interface around a framework (tk had an edit control that one
+-- could control completely not that different from scintilla). Last time I checked
+-- it still ran fine so I might try to implement something like its file handling in
+-- TextAdept.
 --
--- wish: access to all scite properties and in fact integrate in scite
+-- In the end I settled for SciTE for which I wrote TeX and MetaPost lexers that
+-- could handle keyword sets. With respect to lexing (syntax highlighting) ConTeXt
+-- has a long history, if only because we need it for manuals. Anyway, in the end we
+-- arrived at lpeg based lexing (which is quite natural as we have lots of lpeg
+-- usage in ConTeXt). The basic color schemes haven't changed much. The most
+-- prominent differences are the nested lexers.
 --
+-- In the meantime I made the lexer suitable for typesetting sources which was no
+-- big deal as we already had that in place (ConTeXt used lpeg from the day it
+-- showed up so we have several lexing options there too).
 --
--- In the meantime I made the lexer suitable for typesetting sources which was no big deal as we
--- already had that in place (ConTeXt used lpeg from the day it showed up so we have several lexing
--- options there too).
+-- Keep in mind that in ConTeXt (typesetting) lexing can follow several approached:
+-- line based (which is handy for verbatim mode), syntax mode (which is nice for
+-- tutorials), and tolerant mode (so that one can also show bad examples or errors).
+-- These demands can clash.
 --
 -- HISTORY
 --
--- The fold and lex functions are copied and patched from original code by Mitchell (see lexer.lua).
--- All errors are mine. The ability to use lpeg in scintilla is a real nice addition and a brilliant
--- move. The code is a byproduct of the (mainly Lua based) textadept (at the time I ran into it was
--- a rapidly moving target so I decided to stick ot SciTE). When I played with it, it had no realtime
--- output pane but that seems to be dealt with now (2017). I need to have a look at it in more detail
--- but a first test again mad the output hang and it was a bit slow too (and I also want the log pane
--- as scite has it, on the right, in view). So, for now I stick to SciTE even when it's somewhat
--- crippled by the fact that we cannot hook our own (language dependent) lexer into the output pane
--- (somehow the errorlist lexer is hard coded into the editor). Hopefully that will change some day.
--- So, how did we arrive where we're now.
+-- The remarks below are more for myself so that I keep track of changes in the
+-- way we adapt to the changes in the scintillua and scite.
+--
+-- The fold and lex functions are copied and patched from original code by Mitchell
+-- (see lexer.lua) in the scintillua distribution. So whatever I say below, assume
+-- that all errors are mine. The ability to use lpeg in scintilla is a real nice
+-- addition and a brilliant move. The code is a byproduct of the (mainly Lua based)
+-- TextAdept which at the time I ran into it was a rapidly moving target so I
+-- decided to stick ot SciTE. When I played with it, it had no realtime output pane
+-- although that seems to be dealt with now (2017). I need to have a look at it in
+-- more detail but a first test again made the output hang and it was a bit slow too
+-- (and I also want the log pane as SciTE has it, on the right, in view). So, for
+-- now I stick to SciTE even when it's somewhat crippled by the fact that we cannot
+-- hook our own (language dependent) lexer into the output pane (somehow the
+-- errorlist lexer is hard coded into the editor). Hopefully that will change some
+-- day. The ConTeXt distribution has cmd runner for textdept that will plug in the
+-- lexers discussed here as well as a dedicated runner. Considere it an experiment.
 --
--- Starting with SciTE version 3.20 there is an issue with coloring. As we still lack a connection
--- with SciTE itself (properties as well as printing to the log pane) and we cannot trace this (on
--- windows). As far as I can see, there are no fundamental changes in lexer.lua or LexLPeg.cxx so it
--- must be in Scintilla itself. So for the moment I stick to 3.10. Indicators are: no lexing of 'next'
--- and 'goto <label>' in the Lua lexer and no brace highlighting either. Interesting is that it does
--- work ok in the cld lexer (so the Lua code is okay). All seems to be ok again in later versions,
--- so, when you update best check first and just switch back to an older version as normally a SciTE
--- update is not critital. When char-def.lua lexes real fast this is a signal that the lexer quits
--- somewhere halfway. Maybe there are some hard coded limitations on the amount of styles and/or
--- length of names.
+-- The basic code hasn't changed much but we had to adapt a few times to changes in
+-- the api and/or work around bugs. Starting with SciTE version 3.20 there was an
+-- issue with coloring. We still lacked a connection with SciTE itself (properties
+-- as well as printing to the log pane) and we could not trace this (on windows).
+-- However on unix we can see messages! As far as I can see, there are no
+-- fundamental changes in lexer.lua or LexLPeg.cxx so it must be/have been in
+-- Scintilla itself. So we went back to 3.10. Indicators of issues are: no lexing of
+-- 'next' and 'goto <label>' in the Lua lexer and no brace highlighting either.
+-- Interesting is that it does work ok in the cld lexer (so the Lua code is okay).
+-- All seems to be ok again in later versions, so, when you update best check first
+-- and just switch back to an older version as normally a SciTE update is not
+-- critital. When char-def.lua lexes real fast this is a signal that the lexer quits
+-- somewhere halfway. Maybe there are some hard coded limitations on the amount of
+-- styles and/or length of names.
 --
--- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay again. So, this
--- version assumes 3.24 or higher. In 3.24 we have a different token result, i.e. no longer a { tag,
--- pattern } but just two return values. I didn't check other changes but will do that when I run into
--- issues. I had optimized these small tables by hashing which was more efficient but this is no longer
--- needed. For the moment we keep some of that code around as I don't know what happens in future
--- versions. I'm anyway still happy with this kind of lexing.
+-- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay
+-- again. So, this version assumes 3.24 or higher. In 3.24 we have a different token
+-- result, i.e. no longer a { tag, pattern } but just two return values. I didn't
+-- check other changes but will do that when I run into issues. I had already
+-- optimized these small tables by hashing which was much more efficient (and maybe
+-- even more efficient than the current approach) but this is no longer needed. For
+-- the moment we keep some of that code around as I don't know what happens in
+-- future versions. I'm anyway still happy with this kind of lexing.
 --
--- In 3.31 another major change took place: some helper constants (maybe they're no longer constants)
--- and functions were moved into the lexer modules namespace but the functions are assigned to the Lua
--- module afterward so we cannot alias them beforehand. We're probably getting close to a stable
--- interface now. I've considered making a whole copy and patch the other functions too as we need an
--- extra nesting model. However, I don't want to maintain too much. An unfortunate change in 3.03 is
--- that no longer a script can be specified. This means that instead of loading the extensions via the
--- properties file, we now need to load them in our own lexers, unless of course we replace lexer.lua
+-- In 3.31 another major change took place: some helper constants (maybe they're no
+-- longer constants) and functions were moved into the lexer modules namespace but
+-- the functions are assigned to the Lua module afterward so we cannot alias them
+-- beforehand. We're probably getting close to a stable interface now. At that time
+-- for the first time I considered making a whole copy and patch the other functions
+-- too as we need an extra nesting model. However, I don't want to maintain too
+-- much. An unfortunate change in 3.03 is that no longer a script can be specified.
+-- This means that instead of loading the extensions via the properties file, we now
+-- need to load them in our own lexers, unless of course we replace lexer.lua
 -- completely (which adds another installation issue).
 --
--- Another change has been that _LEXERHOME is no longer available. It looks like more and more
--- functionality gets dropped so maybe at some point we need to ship our own dll/so files. For instance,
--- I'd like to have access to the current filename and other scite properties. We could then cache some
--- info with each file, if only we had knowledge of what file we're dealing with.
+-- Another change has been that _LEXERHOME is no longer available. It looks like
+-- more and more functionality gets dropped so maybe at some point we need to ship
+-- our own dll/so files. For instance, I'd like to have access to the current
+-- filename and other SciTE properties. We could then cache some info with each
+-- file, if only we had knowledge of what file we're dealing with. This all makes a
+-- nice installation more complex and (worse) makes it hard to share files between
+-- different editors usign s similar directory structure.
 --
--- For huge files folding can be pretty slow and I do have some large ones that I keep open all the time.
--- Loading is normally no ussue, unless one has remembered the status and the cursor is at the last line
--- of a 200K line file. Optimizing the fold function brought down loading of char-def.lua from 14 sec
--- => 8 sec. Replacing the word_match function and optimizing the lex function gained another 2+ seconds.
--- A 6 second load is quite ok for me. The changed lexer table structure (no subtables) brings loading
--- down to a few seconds.
+-- For huge files folding can be pretty slow and I do have some large ones that I
+-- keep open all the time. Loading is normally no ussue, unless one has remembered
+-- the status and the cursor is at the last line of a 200K line file. Optimizing the
+-- fold function brought down loading of char-def.lua from 14 sec => 8 sec.
+-- Replacing the word_match function and optimizing the lex function gained another
+-- 2+ seconds. A 6 second load is quite ok for me. The changed lexer table structure
+-- (no subtables) brings loading down to a few seconds.
 --
--- When the lexer path is copied to the textadept lexer path, and the theme definition to theme path
--- (as lexer.lua), the lexer works there as well. Although ... when I decided to check the state of
--- textadept i had to adapt some loader code. It's not pretty but works and also permits overloading.
--- When I have time and motive I will make a proper setup file to tune the look and feel a bit and
--- associate suffixes with the context lexer. The textadept editor has a nice style tracing option but
--- lacks the tabs for selecting files that scite has. It also has no integrated run that pipes to the
--- log pane. Interesting is that the jit version of textadept crashes on lexing large files (and does
--- not feel faster either; maybe a side effect of known limitations as we know that luajit is more
--- limited than stock lua). Btw, in the meantime on unix one can test easier as there we can enable
--- the loggers in this module.
+-- When the lexer path is copied to the TextAdept lexer path, and the theme
+-- definition to theme path (as lexer.lua), the lexer works there as well. Although
+-- ... when I decided to check the state of TextAdept I had to adapt some loader
+-- code. The solution is not pretty but works and also permits overloading. When I
+-- have time and motive I will make a proper setup file to tune the look and feel a
+-- bit more than we do now. The TextAdept editor nwo has tabs and a console so it
+-- has become more useable for me (it's still somewhat slower than SciTE).
+-- Interesting is that the jit version of TextAdept crashes on lexing large files
+-- (and does not feel faster either; maybe a side effect of known limitations as we
+-- know that Luajit is more limited than stock Lua).
 --
--- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name .. '_whitespace' which means
--- that we need to have it frozen at the moment we load another lexer. Because spacing is used to revert
--- to a parent lexer we need to make sure that we load children as late as possible in order not to get
--- the wrong whitespace trigger. This took me quite a while to figure out (not being that familiar with
--- the internals). The lex and fold functions have been optimized. It is a pitty that there is no proper
--- print available. Another thing needed is a default style in our own theme style definition, as otherwise
--- we get wrong nested lexers, especially if they are larger than a view. This is the hardest part of
+-- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name ..
+-- '_whitespace' which means that we need to have it frozen at the moment we load
+-- another lexer. Because spacing is used to revert to a parent lexer we need to
+-- make sure that we load children as late as possible in order not to get the wrong
+-- whitespace trigger. This took me quite a while to figure out (not being that
+-- familiar with the internals). The lex and fold functions have been optimized. It
+-- is a pitty that there is no proper print available. Another thing needed is a
+-- default style in our own theme style definition, as otherwise we get wrong nested
+-- lexers, especially if they are larger than a view. This is the hardest part of
 -- getting things right.
 --
--- It's a pitty that there is no scintillua library for the OSX version of scite. Even better would be
--- to have the scintillua library as integral part of scite as that way I could use OSX alongside
--- windows and linux (depending on needs). Also nice would be to have a proper interface to scite then
--- because currently the lexer is rather isolated and the lua version does not provide all standard
--- libraries. It would also be good to have lpeg support in the regular scite lua extension (currently
--- you need to pick it up from someplace else).
+-- It's a pitty that there is no scintillua library for the OSX version of SciTE.
+-- Even better would be to have the scintillua library as integral part of SciTE as
+-- that way I could use OSX alongside windows and linux (depending on needs). Also
+-- nice would be to have a proper interface to SciTE then because currently the
+-- lexer is rather isolated and the Lua version does not provide all standard
+-- libraries. It would also be good to have lpeg support in the regular SciTE Lua
+-- extension (currently you need to pick it up from someplace else). I keep hoping.
 --
--- With 3.41 the interface changed again so it gets time to look into the C++ code and consider compiling
--- and patching myself. Loading is more complicated now as the lexer gets loaded automatically so we have
--- little control over extending the code now. After a few days trying all kind of solutions I decided to
--- follow a different approach: drop in a complete replacement. This of course means that I need to keep
--- track of even more changes (which for sure will happen) but at least I get rid of interferences. The
--- api (lexing and configuration) is simply too unstable across versions. Maybe in a few years things have
--- stabelized again. (Or maybe it's not really expected that one writes lexers at all.) A side effect is
--- that I now no longer will use shipped lexers but just the built-in ones in addition to the context
--- lpeg lexers. Not that it matters much as the context lexers cover what I need (and I can always write
--- more).
+-- With 3.41 the interface changed again so it became time to look into the C++ code
+-- and consider compiling and patching myself, something that I like to avoid.
+-- Loading is more complicated now as the lexer gets loaded automatically so we have
+-- little control over extending the code now. After a few days trying all kind of
+-- solutions I decided to follow a different approach: drop in a complete
+-- replacement. This of course means that I need to keep track of even more changes
+-- (which for sure will happen) but at least I get rid of interferences. Till 3.60
+-- the api (lexing and configuration) was simply too unstable across versions which
+-- is a pitty because we expect authors to install SciTE without hassle. Maybe in a
+-- few years things will have stabelized. Maybe it's also not really expected that
+-- one writes lexers at all. A side effect is that I now no longer will use shipped
+-- lexers for languages that I made no lexer for, but just the built-in ones in
+-- addition to the ConTeXt lpeg lexers. Not that it matters much as the ConTeXt
+-- lexers cover what I need (and I can always write more). For editing TeX files one
+-- only needs a limited set of lexers (TeX, MetaPost, Lua, BibTeX, C/W, PDF, SQL,
+-- etc). I can add more when I want.
 --
--- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu which left me with an
--- incompatible SciTE and lexer library and updating was not possible due to the lack of 64 bit libraries.
--- We'll see what the future brings.
+-- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu
+-- which left me with an incompatible SciTE and lexer library and updating was not
+-- possible due to the lack of 64 bit libraries. We'll see what the future brings.
+-- For now I can use SciTE under wine on linux. The fact that scintillua ships
+-- independently is a showstopper.
 --
--- Promissing is that the library now can use another Lua instance so maybe some day it will get properly
--- in SciTE and we can use more clever scripting.
+-- Promissing is that the library now can use another Lua instance so maybe some day
+-- it will get properly in SciTE and we can use more clever scripting.
 --
--- In some lexers we use embedded ones even if we could do it directly, The reason is that when the end
--- token is edited (e.g. -->), backtracking to the space before the begin token (e.g. <!--) results in
--- applying the surrounding whitespace which in turn means that when the end token is edited right,
--- backtracking doesn't go back. One solution (in the dll) would be to backtrack several space categories.
+-- In some lexers we use embedded ones even if we could do it directly, The reason
+-- is that when the end token is edited (e.g. -->), backtracking to the space before
+-- the begin token (e.g. <!--) results in applying the surrounding whitespace which
+-- in turn means that when the end token is edited right, backtracking doesn't go
+-- back. One solution (in the dll) would be to backtrack several space categories.
 -- After all, lexing is quite fast (applying the result is much slower).
 --
--- For some reason the first blob of text tends to go wrong (pdf and web). It would be nice to have 'whole
--- doc' initial lexing. Quite fishy as it makes it impossible to lex the first part well (for already opened
--- documents) because only a partial text is passed.
+-- For some reason the first blob of text tends to go wrong (pdf and web). It would
+-- be nice to have 'whole doc' initial lexing. Quite fishy as it makes it impossible
+-- to lex the first part well (for already opened documents) because only a partial
+-- text is passed.
 --
--- So, maybe I should just write this from scratch (assuming more generic usage) because after all, the dll
--- expects just tables, based on a string. I can then also do some more aggressive resource sharing (needed
--- when used generic).
+-- So, maybe I should just write this from scratch (assuming more generic usage)
+-- because after all, the dll expects just tables, based on a string. I can then
+-- also do some more aggressive resource sharing (needed when used generic).
 --
--- I think that nested lexers are still bugged (esp over longer ranges). It never was robust or maybe it's
--- simply not meant for too complex cases (well, it probably *is* tricky material). The 3.24 version was
--- probably the best so far. The fact that styles bleed between lexers even if their states are isolated is
--- an issue. Another issus is that zero characters in the text passed to the lexer can mess things up (pdf
--- files have them in streams).
+-- I think that nested lexers are still bugged (esp over longer ranges). It never
+-- was robust or maybe it's simply not meant for too complex cases (well, it
+-- probably *is* tricky material). The 3.24 version was probably the best so far.
+-- The fact that styles bleed between lexers even if their states are isolated is an
+-- issue. Another issus is that zero characters in the text passed to the lexer can
+-- mess things up (pdf files have them in streams).
 --
--- For more complex 'languages', like web or xml, we need to make sure that we use e.g. 'default' for
--- spacing that makes up some construct. Ok, we then still have a backtracking issue but less.
+-- For more complex 'languages', like web or xml, we need to make sure that we use
+-- e.g. 'default' for spacing that makes up some construct. Ok, we then still have a
+-- backtracking issue but less.
 --
--- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++ and we ship an ini
--- file for that editor with some installation instructions embedded.
+-- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++
+-- and we ship an ini file for that editor with some installation instructions
+-- embedded. Also, TextAdept has a console so that we can run realtime. The spawner
+-- is still not perfect (sometimes hangs) but it was enough reason to spend time on
+-- making our lexer work with TextAdept and create a setup.
+--
+-- TRACING
+--
+-- The advantage is that we now can check more easily with regular Lua(TeX). We can
+-- also use wine and print to the console (somehow stdout is intercepted there.) So,
+-- I've added a bit of tracing. Interesting is to notice that each document gets its
+-- own instance which has advantages but also means that when we are spellchecking
+-- we reload the word lists each time. (In the past I assumed a shared instance and
+-- took some precautions. But I can fix this.)
 --
 -- TODO
 --
--- I can make an export to context, but first I'll redo the code that makes the grammar,
--- as we only seem to need
+-- It would be nice if we could lods some ConTeXt Lua modules (the basic set) and
+-- then use resolvers and such.
+--
+-- The current lexer basics are still a mix between old and new. Maybe I should redo
+-- some more. This is probably easier in TextAdept than in SciTE.
+--
+-- We have to make sure we don't overload ConTeXt definitions when this code is used
+-- in ConTeXt. I still have to add some of the goodies that we have there in lexers
+-- into these.
+--
+-- Maybe I should use a special stripped on the one hand and extended version of the
+-- dll (stable api) and at least add a bit more interfacing to scintilla.
+--
+-- I need to investigate if we can use the already built in Lua instance so that we
+-- can combine the power of lexing with extensions.
+--
+-- I need to play with hotspot and other properties like indicators (whatever they
+-- are).
+--
+-- I want to get rid of these lexers.STYLE_XX and lexers.XX things. This is possible
+-- when we give up compatibility. Generalize the helpers that I wrote for SciTE so
+-- that they also can be used TextAdept.
+--
+-- I can make an export to ConTeXt, but first I'll redo the code that makes the
+-- grammar, as we only seem to need
 --
 --   lexer._TOKENSTYLES : table
 --   lexer._CHILDREN    : flag
@@ -199,38 +277,30 @@ local inspect  = false -- can save some 15% (maybe easier on scintilla)
 --   lexers.load        : function
 --   lexers.lex         : function
 --
--- So, if we drop compatibility with other lex definitions, we can make things simpler. Howeverm in the
--- meantime one can just do this:
+-- So, if we drop compatibility with other lex definitions, we can make things
+-- simpler. However, in the meantime one can just do this:
 --
 --    context --extra=listing --scite [--compact --verycompact] somefile.tex
 --
--- and get a printable document. So, this todo is obsolete.
-
--- TRACING
+-- and get a printable document. So, this todo is a bit obsolete.
 --
--- The advantage is that we now can check more easily with regular Lua(TeX). We can also use wine and print
--- to the console (somehow stdout is intercepted there.) So, I've added a bit of tracing. Interesting is to
--- notice that each document gets its own instance which has advantages but also means that when we are
--- spellchecking we reload the word lists each time. (In the past I assumed a shared instance and took
--- some precautions.)
-
--- todo: make sure we don't overload context definitions when used in context
+-- Properties is an ugly mess ... due to chages in the interface we're now left
+-- with some hybrid that sort of works ok
 
--- properties is an ugly mess ... due to chages in the interface we're now left with some hybrid
--- that sort of works ok
+-- textadept: buffer:colourise(0,-1)
 
 local lpeg  = require("lpeg")
 
 local global = _G
-local find, gmatch, match, lower, upper, gsub, sub, format = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format
+local find, gmatch, match, lower, upper, gsub, sub, format, byte = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format, string.byte
 local concat, sort = table.concat, table.sort
 local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring
 local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
 local lpegmatch = lpeg.match
 
+local usage   = (textadept and "textadept") or (resolvers and "context") or "scite"
 local nesting = 0
-
-local print = (textadept and ui and ui.print) or print
+local print   = textadept and ui and ui.print or print
 
 local function report(fmt,str,...)
     if log then
@@ -679,21 +749,34 @@ local locations = {
 --     end
 -- end
 
-local function collect(name)
-    local rootlist = lexers.LEXERPATH or "."
-    for root in gmatch(rootlist,"[^;]+") do
-        local root = gsub(root,"/[^/]-lua$","")
-        for i=1,#locations do
-            local fullname =  root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
-            if trace then
-                report("attempt to locate '%s'",fullname)
-            end
-            local okay, result = pcall(function () return dofile(fullname) end)
-            if okay then
-                return result, fullname
+local collect
+
+if usage == "context" then
+
+    collect = function(name)
+        return require(name), name
+    end
+
+else
+
+    collect = function(name)
+        local rootlist = lexers.LEXERPATH or "."
+        for root in gmatch(rootlist,"[^;]+") do
+            local root = gsub(root,"/[^/]-lua$","")
+            for i=1,#locations do
+                local fullname =  root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
+                if trace then
+                    report("attempt to locate '%s'",fullname)
+                end
+                local okay, result = pcall(function () return dofile(fullname) end)
+                if okay then
+                    return result, fullname
+                end
             end
         end
+    --     return require(name), name
     end
+
 end
 
 function context.loadluafile(name)
@@ -1371,25 +1454,33 @@ local function add_lexer(grammar, lexer) -- mostly the same as the original
 end
 
 local function build_grammar(lexer,initial_rule) -- same as the original
-    local children = lexer._CHILDREN
+    local children   = lexer._CHILDREN
     local lexer_name = lexer._NAME
-    if children then
+    local preamble   = lexer._preamble
+    local grammar    = lexer._grammar
+    if grammar then
+        -- experiment
+    elseif children then
         if not initial_rule then
             initial_rule = lexer_name
         end
-        local grammar = { initial_rule }
+        grammar = { initial_rule }
         add_lexer(grammar, lexer)
         lexer._INITIALRULE = initial_rule
-        lexer._GRAMMAR = Ct(P(grammar))
+        grammar = Ct(P(grammar))
         if trace then
             report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children)
         end
     else
-        lexer._GRAMMAR = Ct(join_tokens(lexer)^0)
+        grammar = Ct(join_tokens(lexer)^0)
         if trace then
             report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?")
         end
     end
+    if preamble then
+        grammar = preamble^-1 * grammar
+    end
+    lexer._GRAMMAR = grammar
 end
 
 -- So far. We need these local functions in the next one.
@@ -1534,7 +1625,7 @@ function context.lex(lexer,text,init_style)
         if trace then
             report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style)
         end
-        return matched(lexer,grammar,text)
+        return result
     else
         if trace then
             report("lexing '%s' with initial style '%s'",lexer._NAME,init_style)
@@ -1733,7 +1824,7 @@ function context.loadlexer(filename,namespace)
     lexer = load_lexer(filename,namespace) or nolexer(filename,namespace)
     usedlexers[filename] = lexer
     --
-    if not lexer._rules and not lexer._lexer then
+    if not lexer._rules and not lexer._lexer and not lexer_grammar then
         lexer._lexer = parent_lexer
     end
     --
@@ -1765,16 +1856,19 @@ function context.loadlexer(filename,namespace)
     end
     --
     local _r = lexer._rules
-    if _r then
+    local _g = lexer._grammar
+    if _r or _g then
         local _s = lexer._tokenstyles
         if _s then
             for token, style in next, _s do
                 add_style(lexer, token, style)
             end
         end
-        for i=1,#_r do
-            local rule = _r[i]
-            add_rule(lexer, rule[1], rule[2])
+        if _r then
+            for i=1,#_r do
+                local rule = _r[i]
+                add_rule(lexer, rule[1], rule[2])
+            end
         end
         build_grammar(lexer)
     end
@@ -2001,10 +2095,20 @@ do
  --     return make(tree)
  -- end
 
-    helpers.utfcharpattern = P(1) * R("\128\191")^0 -- unchecked but fast
+    local utf8next         = R("\128\191")
+    local utf8one          = R("\000\127")
+    local utf8two          = R("\194\223") * utf8next
+    local utf8three        = R("\224\239") * utf8next * utf8next
+    local utf8four         = R("\240\244") * utf8next * utf8next * utf8next
+
+    helpers.utfcharpattern = P(1) * utf8next^0 -- unchecked but fast
+    helpers.utfbytepattern = utf8one   / byte
+                           + utf8two   / function(s) local c1, c2         = byte(s,1,2) return   c1 * 64 + c2                       -    12416 end
+                           + utf8three / function(s) local c1, c2, c3     = byte(s,1,3) return  (c1 * 64 + c2) * 64 + c3            -   925824 end
+                           + utf8four  / function(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
 
-    local p_false = P(false)
-    local p_true  = P(true)
+    local p_false          = P(false)
+    local p_true           = P(true)
 
     local function make(t)
         local function making(t)