1 files changed, 138 insertions, 100 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer.lua b/context/data/scite/lexers/scite-context-lexer.lua
index 688eb5776..04937cbc0 100644
--- a/context/data/scite/lexers/scite-context-lexer.lua
+++ b/context/data/scite/lexers/scite-context-lexer.lua
@@ -16,6 +16,14 @@ local info = {
 -- function and optimizing the lex function gained another 2+ seconds. A 6 second load
 -- is quite ok for me.
 
+-- Function load(lexer_name) starts with _M.WHITESPACE = lexer_name..'_whitespace' which
+-- means that we need to have it frozen at the moment we load another lexer. Because spacing
+-- is used to revert to a parent lexer we need to make sure that we load children as late
+-- as possible in order not to get the wrong whitespace trigger. This took me quite a while
+-- to figure out (not being that familiar with the internals). BTW, if performance becomes
+-- an issue we can rewrite the main lex function (memorize the grammars and speed up the
+-- byline variant).
+
 local R, P, S, Cp, Cs, Ct, Cmt, Cc = lpeg.R, lpeg.P, lpeg.S, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc
 local lpegmatch = lpeg.match
 local find, gmatch, match, lower, upper, gsub = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub
@@ -27,22 +35,6 @@ dofile(_LEXERHOME .. '/lexer.lua')
 
 lexer.context = lexer.context or { }
 
--- function lexer.context.loaddefinitions(name)
---     local basepath = lexer.context and lexer.context.path or _LEXERHOME
---     local definitions = loadfile(basepath and (basepath .. "/" .. name) or name)
---     if not definitions then
---         definitions = loadfile(_LEXERHOME .. "/context/" .. name)
---     end
---     if type(definitions) == "function" then
---         definitions = definitions()
---     end
---     if type(definitions) == "table" then
---         return definitions
---     else
---         return nil
---     end
--- end
-
 function lexer.context.loaddefinitions(name)
     local definitions = loadfile(_LEXERHOME .. "/context/" .. name)
     if not definitions and lexer.context and lexer.context.path then
@@ -91,19 +83,34 @@ function lexer.context.word_match(words,word_chars,case_insensitive)
     end
 end
 
--- nicer anyway:
+-- nicer (todo: utf):
 
--- todo: utf
+local defaults = R("az","AZ","\127\255","__")
 
-function lexer.context.exact_match(words,case_insensitive)
-    local pattern = S(concat(words)) + R("az","AZ","\127\255") -- the concat catches _ etc
+function lexer.context.exact_match(words,word_chars,case_insensitive)
+    local characters = concat(words)
+    local pattern -- the concat catches _ etc
+    if word_chars == true or word_chars == false or word_chars == nil then
+        word_chars = ""
+    end
+    if type(word_chars) == "string" then
+        pattern = S(characters) + defaults
+        if case_insensitive then
+            pattern = pattern + S(upper(characters)) + S(lower(characters))
+        end
+        if word_chars ~= "" then
+            pattern = pattern + S(word_chars)
+        end
+    elseif word_chars then
+        pattern = word_chars
+    end
     if case_insensitive then
         local list = { }
         for i=1,#words do
             list[lower(words[i])] = true
         end
         return Cmt(pattern^1, function(_,i,s)
-            return list[lower(s)] and i
+            return list[lower(s)] -- and i
         end)
     else
         local list = { }
@@ -111,28 +118,20 @@ function lexer.context.exact_match(words,case_insensitive)
             list[words[i]] = true
         end
         return Cmt(pattern^1, function(_,i,s)
-            return list[s] and i
+            return list[s] -- and i
         end)
     end
 end
 
-function lexer.context.word_match(words,word_chars,case_insensitive) -- word_chars not used (can be omitted)
-    if word_chars == true then
-        return lexer.context.exact_match(words,true)
-    else
-        return lexer.context.exact_match(words,case_insensitive)
-    end
-end
+-- overloaded functions
 
--- Overloaded functions.
+local FOLD_BASE         = SC_FOLDLEVELBASE
+local FOLD_HEADER       = SC_FOLDLEVELHEADERFLAG
+local FOLD_BLANK        = SC_FOLDLEVELWHITEFLAG
 
-local FOLD_BASE   = SC_FOLDLEVELBASE
-local FOLD_HEADER = SC_FOLDLEVELHEADERFLAG
-local FOLD_BLANK  = SC_FOLDLEVELWHITEFLAG
-
-local newline = P("\r\n") + S("\r\n")
-
-local splitlines = Ct( ( Ct ( (Cp() * Cs((1-newline)^1) * newline^-1) + (Cp() * Cc("") * newline) ) )^0)
+local get_style_at      = GetStyleAt
+local get_property      = GetProperty
+local get_indent_amount = GetIndentAmount
 
 local h_table, b_table, n_table = { }, { }, { }
 
@@ -140,53 +139,53 @@ setmetatable(h_table, { __index = function(t,level) local v = { level, FOLD_HEAD
 setmetatable(b_table, { __index = function(t,level) local v = { level, FOLD_BLANK  } t[level] = v return v end })
 setmetatable(n_table, { __index = function(t,level) local v = { level              } t[level] = v return v end })
 
-local get_style_at      = GetStyleAt
-local get_property      = GetProperty
-local get_indent_amount = GetIndentAmount
-
---         local lines = lpegmatch(splitlines,text) -- iterating over lines is faster
---         for i=1, #lines do
---             local li = lines[i]
---             local line = li[2]
---             if line ~= "" then
---                 local pos = li[1]
---                 for i=1,nofpatterns do
---                     for s, m in gmatch(line,patterns[i]) do
---                         if hash[m] then
---                             local symbols = fold_symbols[get_style_at(start_pos + pos + s - 1)]
---                             if symbols then
---                                 local l = symbols[m]
---                                 if l then
---                                     local t = type(l)
---                                     if t == 'number' then
---                                         current_level = current_level + l
---                                     elseif t == 'function' then
---                                         current_level = current_level + l(text, pos, line, s, match)
---                                     end
---                                     if current_level < FOLD_BASE then -- integrate in previous
---                                         current_level = FOLD_BASE
---                                     end
---                                 end
+-- local newline    = P("\r\n") + S("\r\n")
+-- local splitlines = Ct( ( Ct ( (Cp() * Cs((1-newline)^1) * newline^-1) + (Cp() * Cc("") * newline) ) )^0)
+--
+-- local lines = lpegmatch(splitlines,text) -- iterating over lines is faster
+-- for i=1, #lines do
+--     local li = lines[i]
+--     local line = li[2]
+--     if line ~= "" then
+--         local pos = li[1]
+--         for i=1,nofpatterns do
+--             for s, m in gmatch(line,patterns[i]) do
+--                 if hash[m] then
+--                     local symbols = fold_symbols[get_style_at(start_pos + pos + s - 1)]
+--                     if symbols then
+--                         local l = symbols[m]
+--                         if l then
+--                             local t = type(l)
+--                             if t == 'number' then
+--                                 current_level = current_level + l
+--                             elseif t == 'function' then
+--                                 current_level = current_level + l(text, pos, line, s, match)
+--                             end
+--                             if current_level < FOLD_BASE then -- integrate in previous
+--                                 current_level = FOLD_BASE
 --                             end
 --                         end
 --                     end
 --                 end
---                 if current_level > prev_level then
---                     folds[line_num] = h_table[prev_level] -- { prev_level, FOLD_HEADER }
---                 else
---                     folds[line_num] = n_table[prev_level] -- { prev_level }
---                 end
---                 prev_level = current_level
---             else
---                 folds[line_num] = b_table[prev_level] -- { prev_level, FOLD_BLANK }
 --             end
---             line_num = line_num + 1
 --         end
-
--- not that much faster but less memory:
+--         if current_level > prev_level then
+--             folds[line_num] = h_table[prev_level] -- { prev_level, FOLD_HEADER }
+--         else
+--             folds[line_num] = n_table[prev_level] -- { prev_level }
+--         end
+--         prev_level = current_level
+--     else
+--         folds[line_num] = b_table[prev_level] -- { prev_level, FOLD_BLANK }
+--     end
+--     line_num = line_num + 1
+-- end
+--
+-- -- not that much faster but less memory:
 
 local action_y, action_n
 
+local newline    = P("\r\n") + S("\r\n")
 local splitlines = ( (
     (Cp() * Cs((1-newline)^1) * newline^-1) / function(p,l) action_y(p,l) end
   + (                           newline   ) / function()    action_n()    end
@@ -194,7 +193,7 @@ local splitlines = ( (
 
 function lexer.context.fold(text, start_pos, start_line, start_level)
     if text == '' then
-        return folds
+        return { }
     end
     local lexer = global._LEXER
     if lexer._fold then
@@ -220,7 +219,7 @@ function lexer.context.fold(text, start_pos, start_line, start_level)
             end
             fold_symbols._hash = hash
         end
-        action_y = function(pos,line)
+        action_y = function(pos,line) -- we can consider moving this one outside the function
             for i=1,nofpatterns do
                 for s, m in gmatch(line,patterns[i]) do
                     if hash[m] then
@@ -253,12 +252,12 @@ function lexer.context.fold(text, start_pos, start_line, start_level)
             prev_level = current_level
             line_num = line_num + 1
         end
-        action_n = function()
+        action_n = function() -- we can consider moving this one outside the function
             folds[line_num] = b_table[prev_level] -- { prev_level, FOLD_BLANK }
             line_num = line_num + 1
         end
         local lines = lpegmatch(splitlines,text)
-    elseif get_property('fold.by.indentation', 1) == 1 then
+    elseif get_property('fold.by.indentation',1) == 1 then
         local current_line = start_line
         local prev_level = start_level
         for _, line in gmatch(text,'([\t ]*)(.-)\r?\n') do
@@ -303,58 +302,97 @@ function lexer.context.fold(text, start_pos, start_line, start_level)
     return folds
 end
 
-function lexer.context.lex(text, init_style)
+function lexer.context.lex(text,init_style)
     local lexer = global._LEXER
     local grammar = lexer._GRAMMAR
     if not grammar then
         return { }
-    elseif lexer._LEXBYLINE then
+    elseif lexer._LEXBYLINE then -- we could keep token
         local tokens = { }
         local offset = 0
         local noftokens = 0
-        for line in gmatch(text,'[^\r\n]*\r?\n?') do -- could be an lpeg
-            local line_tokens = lpeg_match(grammar, line)
-            if line_tokens then
-                for i=1,#line_tokens do
-                    local token = line_tokens[i]
-                    token[2] = token[2] + offset
+        if true then
+            for line in gmatch(text,'[^\r\n]*\r?\n?') do -- could be an lpeg
+                local line_tokens = lpeg_match(grammar,line)
+                if line_tokens then
+                    for i=1,#line_tokens do
+                        local token = line_tokens[i]
+                        token[2] = token[2] + offset
+                        noftokens = noftokens + 1
+                        tokens[noftokens] = token
+                    end
+                end
+                offset = offset + #line
+                if noftokens > 0 and tokens[noftokens][2] ~= offset then
                     noftokens = noftokens + 1
-                    tokens[noftokens] = token
+                    tokens[noftokens] = { 'default', offset + 1 }
                 end
             end
-            offset = offset + #line
-            if noftokens > 0 and tokens[noftokens][2] ~= offset then
-                noftokens = noftokens + 1
-                tokens[noftokens] = { 'default', offset + 1 }
+        else -- alternative
+            local lasttoken, lastoffset
+            for line in gmatch(text,'[^\r\n]*\r?\n?') do -- could be an lpeg
+                local line_tokens = lpeg_match(grammar,line)
+                if line_tokens then
+                    for i=1,#line_tokens do
+                        lasttoken = line_tokens[i]
+                        lastoffset = lasttoken[2] + offset
+                        lasttoken[2] = lastoffset
+                        noftokens = noftokens + 1
+                        tokens[noftokens] = lasttoken
+                    end
+                end
+                offset = offset + #line
+                if lastoffset ~= offset then
+                    lastoffset = offset + 1
+                    lasttoken = { 'default', lastoffset }
+                    noftokens = noftokens + 1
+                    tokens[noftokens] = lasttoken
+                end
             end
         end
         return tokens
     elseif lexer._CHILDREN then
+        -- as we cannot print, tracing is not possible ... this might change as we can as well
+        -- generate them all in one go (sharing as much as possible)
         local _hash = lexer._HASH
         if not hash then
             hash = { }
             lexer._HASH = hash
         end
         grammar = hash[init_style]
-        if not grammar then
+        if grammar then
+            lexer._GRAMMAR = grammar
+        else
             for style, style_num in next, lexer._TOKENS do
                 if style_num == init_style then
                     local lexer_name = match(style,'^(.+)_whitespace') or lexer._NAME
                     if lexer._INITIALRULE ~= lexer_name then
-                        build_grammar(lexer, lexer_name)
+                        grammar = hash[lexer_name]
+                        if not grammar then
+                            build_grammar(lexer,lexer_name)
+                            grammar = lexer._GRAMMAR
+                            hash[lexer_name] = grammar
+                        end
                     end
                     break
                 end
             end
-            grammar = lexer._GRAMMAR
+            grammar = grammar or lexer._GRAMMAR
             hash[init_style] = grammar
         end
-        return lpegmatch(grammar, text)
+        return lpegmatch(grammar,text)
     else
-        return lpegmatch(grammar, text)
+        return lpegmatch(grammar,text)
     end
 end
 
-lexer.fold       = lexer.context.fold
-lexer.lex        = lexer.context.lex
-lexer.word_match = lexer.context.word_match
+-- todo: keywords: one lookup and multiple matches
+
+-- function lexer.context.token(name, patt)
+--     return Ct(patt * Cc(name) * Cp())
+-- end
+
+lexer.fold        = lexer.context.fold
+lexer.lex         = lexer.context.lex
+-- lexer.token       = lexer.context.token
+lexer.exact_match = lexer.context.exact_match