summaryrefslogtreecommitdiff
path: root/context/data/scite/context/lexers/scite-context-lexer.lua
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/scite/context/lexers/scite-context-lexer.lua')
-rw-r--r--context/data/scite/context/lexers/scite-context-lexer.lua71
1 files changed, 59 insertions, 12 deletions
diff --git a/context/data/scite/context/lexers/scite-context-lexer.lua b/context/data/scite/context/lexers/scite-context-lexer.lua
index af21461b9..fc16e261a 100644
--- a/context/data/scite/context/lexers/scite-context-lexer.lua
+++ b/context/data/scite/context/lexers/scite-context-lexer.lua
@@ -19,6 +19,10 @@ local inspect = false -- can save some 15% (maybe easier on scintilla)
-- local log = true
-- local trace = true
+-- local f = io.open("e:/tmp/lexers.log","w")
+-- print = function(...)
+-- f:write(...,"\n")
+-- end
-- GET GOING
--
@@ -292,7 +296,7 @@ local inspect = false -- can save some 15% (maybe easier on scintilla)
local lpeg = require("lpeg")
local global = _G
-local find, gmatch, match, lower, upper, gsub, sub, format = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format
+local find, gmatch, match, lower, upper, gsub, sub, format, byte = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format, string.byte
local concat, sort = table.concat, table.sort
local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring
local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
@@ -1034,7 +1038,12 @@ end
-- },
-- }
-local lists = { }
+local lists = { }
+local disabled = false
+
+function context.disablewordcheck()
+ disabled = true
+end
function context.setwordlist(tag,limit) -- returns hash (lowercase keys and original values)
if not tag or tag == "" then
@@ -1454,25 +1463,30 @@ local function add_lexer(grammar, lexer) -- mostly the same as the original
end
local function build_grammar(lexer,initial_rule) -- same as the original
- local children = lexer._CHILDREN
+ local children = lexer._CHILDREN
local lexer_name = lexer._NAME
+ local preamble = lexer._preamble
if children then
if not initial_rule then
initial_rule = lexer_name
end
- local grammar = { initial_rule }
+ grammar = { initial_rule }
add_lexer(grammar, lexer)
lexer._INITIALRULE = initial_rule
- lexer._GRAMMAR = Ct(P(grammar))
+ grammar = Ct(P(grammar))
if trace then
report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children)
end
else
- lexer._GRAMMAR = Ct(join_tokens(lexer)^0)
+ grammar = Ct(join_tokens(lexer)^0)
if trace then
report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?")
end
end
+ if preamble then
+ grammar = preamble^-1 * grammar
+ end
+ lexer._GRAMMAR = grammar
end
-- So far. We need these local functions in the next one.
@@ -1618,6 +1632,27 @@ function context.lex(lexer,text,init_style)
report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style)
end
return matched(lexer,grammar,text)
+-- local result = matched(lexer,grammar,text)
+-- local fil = io.open("e:/tmp/foo.log","w")
+-- local len = #text
+-- local old = 1
+-- local txt = false
+-- for i=1,#result,2 do
+-- local tag = result[i]
+-- local pos = result[i+1]
+-- if nxt and tag == "text" then
+-- local wrd = sub(text,old,pos-1)
+-- fil:write(tag,"\t",wrd,"\n")
+-- if txt then
+-- result[i] = "internal"
+-- txt = false
+-- else
+-- txt = true
+-- end
+-- end
+-- old = pos
+-- end
+-- fil:close()
else
if trace then
report("lexing '%s' with initial style '%s'",lexer._NAME,init_style)
@@ -1855,9 +1890,11 @@ function context.loadlexer(filename,namespace)
add_style(lexer, token, style)
end
end
- for i=1,#_r do
- local rule = _r[i]
- add_rule(lexer, rule[1], rule[2])
+ if _r then
+ for i=1,#_r do
+ local rule = _r[i]
+ add_rule(lexer, rule[1], rule[2])
+ end
end
build_grammar(lexer)
end
@@ -2084,10 +2121,20 @@ do
-- return make(tree)
-- end
- helpers.utfcharpattern = P(1) * R("\128\191")^0 -- unchecked but fast
+ local utf8next = R("\128\191")
+ local utf8one = R("\000\127")
+ local utf8two = R("\194\223") * utf8next
+ local utf8three = R("\224\239") * utf8next * utf8next
+ local utf8four = R("\240\244") * utf8next * utf8next * utf8next
+
+ helpers.utfcharpattern = P(1) * utf8next^0 -- unchecked but fast
+ helpers.utfbytepattern = utf8one / byte
+ + utf8two / function(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
+ + utf8three / function(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
+ + utf8four / function(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
- local p_false = P(false)
- local p_true = P(true)
+ local p_false = P(false)
+ local p_true = P(true)
local function make(t)
local function making(t)