summaryrefslogtreecommitdiff
path: root/context/data/textadept/context/lexers/scite-context-lexer.lua
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/textadept/context/lexers/scite-context-lexer.lua')
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer.lua405
1 files changed, 333 insertions, 72 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer.lua b/context/data/textadept/context/lexers/scite-context-lexer.lua
index 37f236a89..234b03c05 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer.lua
@@ -8,6 +8,10 @@ local info = {
}
+-- We need a copy of this file to lexer.lua in the same path. This was not needed
+-- before version 10 but I can't figure out what else to do. It looks like there
+-- is some loading of lexer.lua but I can't see where.
+
if lpeg.setmaxstack then lpeg.setmaxstack(1000) end
local log = false
@@ -232,6 +236,17 @@ local inspect = false -- can save some 15% (maybe easier on scintilla)
-- is still not perfect (sometimes hangs) but it was enough reason to spend time on
-- making our lexer work with TextAdept and create a setup.
--
+-- Some bad news. The interface changed (again) in textadept 10, some for the better
+-- (but a bit different from what happens here) and some for the worse, especially
+-- moving some code to the init file so we now need some bad hacks. I decided to
+-- stay with the old method of defining lexers and because the lexer cannot be run
+-- in parallel any more (some change in the binary?) I will probably also cleanup
+-- code below as we no longer need to be compatible. Unfortunately textadept is too
+-- much a moving target to simply kick in some (tex related) production flow (apart
+-- from the fact that it doesn't yet have the scite like realtime console). I'll
+-- keep an eye on it. Because we don't need many added features I might as well decide
+-- to make a lean and mean instance (after all the license permits forking).
+
-- TRACING
--
-- The advantage is that we now can check more easily with regular Lua(TeX). We can
@@ -243,8 +258,8 @@ local inspect = false -- can save some 15% (maybe easier on scintilla)
--
-- TODO
--
--- It would be nice if we could lods some ConTeXt Lua modules (the basic set) and
--- then use resolvers and such.
+-- It would be nice if we could load some ConTeXt Lua modules (the basic set) and
+-- then use resolvers and such. But it might not work well with scite.
--
-- The current lexer basics are still a mix between old and new. Maybe I should redo
-- some more. This is probably easier in TextAdept than in SciTE.
@@ -300,7 +315,17 @@ local lpegmatch = lpeg.match
local usage = (textadept and "textadept") or (resolvers and "context") or "scite"
local nesting = 0
-local print = textadept and ui and ui.print or print
+local output = nil
+
+----- print = textadept and ui and ui.print or print -- crashes when ui is not yet defined
+
+local function print(...)
+ if not output then
+ output = io.open("lexer.log","w")
+ end
+ output:write(...,"\n")
+ output:flush()
+end
local function report(fmt,str,...)
if log then
@@ -319,6 +344,36 @@ end
inform("loading context lexer module (global table: %s)",tostring(global))
+do
+
+ local floor = math and math.floor
+ local format = format
+ local tonumber = tonumber
+
+ if not floor then
+
+ if tonumber(string.match(_VERSION,"%d%.%d")) < 5.3 then
+ floor = function(n)
+ return tonumber(format("%d",n))
+ end
+ else
+ -- 5.3 has a mixed number system and format %d doesn't work with
+ -- floats any longer ... no fun
+ floor = function(n)
+ return (n - n % 1)
+ end
+ end
+
+ math = math or { }
+
+ math.floor = floor
+
+ end
+
+end
+
+local floor = math.floor
+
if not package.searchpath then
-- Unfortunately the io library is only available when we end up
@@ -412,7 +467,9 @@ local default = {
local predefined = {
"default", "linenumber", "bracelight", "bracebad", "controlchar",
- "indentguide", "calltip"
+ "indentguide", "calltip",
+ -- seems new
+ "folddisplaytext"
}
-- Bah ... ugly ... nicer would be a proper hash .. we now have properties
@@ -510,9 +567,16 @@ lexers.property_expanded = setmetatable({ }, {
check_main_properties()
end
--
- return gsub(property[k],"[$%%]%b()", function(k)
- return t[sub(k,3,-2)]
- end)
+-- return gsub(property[k],"[$%%]%b()", function(k)
+-- return t[sub(k,3,-2)]
+-- end)
+ local v = property[k]
+ if v then
+ v = gsub(v,"[$%%]%b()", function(k)
+ return t[sub(k,3,-2)]
+ end)
+ end
+ return v
end,
__newindex = function(t,k,v)
report("properties are read-only, '%s' is not changed",k)
@@ -835,32 +899,42 @@ function context.loaddefinitions(name)
return type(data) == "table" and data
end
+-- A bit of regression in textadept > 10 so updated ... done a bit different.
+-- We don't use this in the context lexers anyway.
+
function context.word_match(words,word_chars,case_insensitive)
- local chars = "%w_" -- maybe just "" when word_chars
- if word_chars then
- chars = "^([" .. chars .. gsub(word_chars,"([%^%]%-])", "%%%1") .."]+)"
- else
- chars = "^([" .. chars .."]+)"
+ -- used to be proper tables ...
+ if type(words) == "string" then
+ local clean = gsub(words,"%-%-[^\n]+","")
+ local split = { }
+ for s in gmatch(clean,"%S+") do
+ split[#split+1] = s
+ end
+ words = split
+ end
+ local list = { }
+ for i=1,#words do
+ list[words[i]] = true
end
if case_insensitive then
- local word_list = { }
- for i=1,#words do
- word_list[lower(words[i])] = true
- end
- return P(function(input, index)
- local s, e, word = find(input,chars,index)
- return word and word_list[lower(word)] and e + 1 or nil
- end)
- else
- local word_list = { }
for i=1,#words do
- word_list[words[i]] = true
+ list[lower(words[i])] = true
end
- return P(function(input, index)
- local s, e, word = find(input,chars,index)
- return word and word_list[word] and e + 1 or nil
- end)
end
+ local chars = S(word_chars or "")
+ for i=1,#words do
+ chars = chars + S(words[i])
+ end
+ local match = case_insensitive and
+ function(input,index,word)
+ -- We can speed mixed case if needed.
+ return (list[word] or list[lower(word)]) and index or nil
+ end
+ or
+ function(input,index,word)
+ return list[word] and index or nil
+ end
+ return Cmt(chars^1,match)
end
-- Patterns are grouped in a separate namespace but the regular lexers expect
@@ -888,6 +962,11 @@ do
local hexadecimal = P("0") * S("xX")
* (hexdigit^0 * period * hexdigit^1 + hexdigit^1 * period * hexdigit^0 + hexdigit^1)
* (S("pP") * sign^-1 * hexdigit^1)^-1 -- *
+ local integer = sign^-1
+ * (hexadecimal + octal + decimal)
+ local float = sign^-1
+ * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1)
+ * S("eE") * sign^-1 * digit^1 -- *
patterns.idtoken = idtoken
patterns.digit = digit
@@ -904,15 +983,13 @@ do
patterns.decimal = decimal
patterns.octal = octal
patterns.hexadecimal = hexadecimal
- patterns.float = sign^-1
- * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1)
- * S("eE") * sign^-1 * digit^1 -- *
+ patterns.float = float
patterns.cardinal = decimal
patterns.signeddecimal = sign^-1 * decimal
patterns.signedoctal = sign^-1 * octal
patterns.signedhexadecimal = sign^-1 * hexadecimal
- patterns.integer = sign^-1 * (hexadecimal + octal + decimal)
+ patterns.integer = integer
patterns.real =
sign^-1 * ( -- at most one
digit^1 * period * digit^0 -- 10.0 10.
@@ -928,6 +1005,7 @@ do
patterns.nospacing = (1-space)^1
patterns.eol = eol
patterns.newline = P("\r\n") + eol
+ patterns.backslash = backslash
local endof = S("\n\r\f")
@@ -943,7 +1021,7 @@ do
lexers.extend = extend
lexers.alpha = alpha
lexers.digit = digit
- lexers.alnum = alnum
+ lexers.alnum = alpha + digit
lexers.lower = lower
lexers.upper = upper
lexers.xdigit = hexdigit
@@ -1034,7 +1112,12 @@ end
-- },
-- }
-local lists = { }
+local lists = { }
+local disabled = false
+
+function context.disablewordcheck()
+ disabled = true
+end
function context.setwordlist(tag,limit) -- returns hash (lowercase keys and original values)
if not tag or tag == "" then
@@ -1127,6 +1210,8 @@ local p_nop = newline
local folders = { }
+-- Snippets from the > 10 code .. but we do things different so ...
+
local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
local folder = folders[lexer]
if not folder then
@@ -1136,6 +1221,12 @@ local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
local fold_symbols = lexer._foldsymbols
local fold_pattern = lexer._foldpattern -- use lpeg instead (context extension)
--
+ -- textadept >= 10
+ --
+ -- local zerosumlines = lexer.property_int["fold.on.zero.sum.lines"] > 0 -- not done
+ -- local compact = lexer.property_int['fold.compact'] > 0 -- not done
+ -- local lowercase = lexer._CASEINSENSITIVEFOLDPOINTS -- useless (utf will distort)
+ --
if fold_pattern then
-- if no functions are found then we could have a faster one
fold_pattern = Cp() * C(fold_pattern) / function(s,match)
@@ -1168,7 +1259,7 @@ local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
-- the traditional one but a bit optimized
local fold_symbols_patterns = fold_symbols._patterns
local action_y = function(pos,line)
- for j = 1, #fold_symbols_patterns do
+ for j=1, #fold_symbols_patterns do
for s, match in gmatch(line,fold_symbols_patterns[j]) do -- "()(" .. patterns[i] .. ")"
local symbols = fold_symbols[style_at[start_pos + pos + s - 1]]
local l = symbols and symbols[match]
@@ -1311,11 +1402,11 @@ function context.fold(lexer,text,start_pos,start_line,start_level) -- hm, we had
if filesize <= threshold_by_parsing then
return fold_by_parsing(text,start_pos,start_line,start_level,lexer)
end
- elseif lexer.properties("fold.by.indentation",1) > 0 then
+ elseif lexer._FOLDBYINDENTATION or lexer.properties("fold.by.indentation",1) > 0 then
if filesize <= threshold_by_indentation then
return fold_by_indentation(text,start_pos,start_line,start_level,lexer)
end
- elseif lexer.properties("fold.by.line",1) > 0 then
+ elseif lexer._FOLDBYLINE or lexer.properties("fold.by.line",1) > 0 then
if filesize <= threshold_by_line then
return fold_by_line(text,start_pos,start_line,start_level,lexer)
end
@@ -1334,6 +1425,20 @@ local function add_rule(lexer,id,rule) -- unchanged
lexer._RULEORDER[#lexer._RULEORDER + 1] = id
end
+local function modify_rule(lexer,id,rule) -- needed for textadept > 10
+ if lexer._lexer then
+ lexer = lexer._lexer
+ end
+ lexer._RULES[id] = rule
+end
+
+local function get_rule(lexer,id) -- needed for textadept > 10
+ if lexer._lexer then
+ lexer = lexer._lexer
+ end
+ return lexer._RULES[id]
+end
+
-- I finally figured out that adding more styles was an issue because of several
-- reasons:
--
@@ -1357,12 +1462,20 @@ local function add_style(lexer,token_name,style) -- changed a bit around 3.41
if trace and detail then
report("default style '%s' is ignored as extra style",token_name)
end
- return
+ if textadept then
+ -- go on, stored per buffer
+ else
+ return
+ end
elseif predefinedstyles[token_name] then
if trace and detail then
report("predefined style '%s' is ignored as extra style",token_name)
end
- return
+ if textadept then
+ -- go on, stored per buffer
+ else
+ return
+ end
else
if trace and detail then
report("adding extra style '%s' as '%s'",token_name,style)
@@ -1379,6 +1492,7 @@ local function add_style(lexer,token_name,style) -- changed a bit around 3.41
lexer._TOKENSTYLES[token_name] = num_styles
lexer._EXTRASTYLES[token_name] = style
lexer._numstyles = num_styles + 1
+ -- hm, the original (now) also copies to the parent ._lexer
end
local function check_styles(lexer)
@@ -1428,6 +1542,8 @@ local function join_tokens(lexer) -- slightly different from the original (no 'a
end
end
+-- hm, maybe instead of a grammer just a flat one
+
local function add_lexer(grammar, lexer) -- mostly the same as the original
local token_rule = join_tokens(lexer)
local lexer_name = lexer._NAME
@@ -1458,9 +1574,10 @@ local function build_grammar(lexer,initial_rule) -- same as the original
local lexer_name = lexer._NAME
local preamble = lexer._preamble
local grammar = lexer._grammar
- if grammar then
- -- experiment
- elseif children then
+ -- if grammar then
+ -- -- experiment
+ -- elseif children then
+ if children then
if not initial_rule then
initial_rule = lexer_name
end
@@ -1533,17 +1650,17 @@ local function matched(lexer,grammar,text)
else
txt = "!no text!"
end
- report("%4i : %s > %s (%s) (%s)",n/2,ti,tn,s[ti] or "!unset!",txt)
+ report("%4i : %s > %s (%s) (%s)",floor(n/2),ti,tn,s[ti] or "!unset!",txt)
p = tn
else
break
end
end
end
- report("lexer results: %s, length: %s, ranges: %s",lexer._NAME,#text,#t/2)
+ report("lexer results: %s, length: %s, ranges: %s",lexer._NAME,#text,floor(#t/2))
if collapse then
t = collapsed(t)
- report("lexer collapsed: %s, length: %s, ranges: %s",lexer._NAME,#text,#t/2)
+ report("lexer collapsed: %s, length: %s, ranges: %s",lexer._NAME,#text,floor(#t/2))
end
elseif collapse then
t = collapsed(t)
@@ -1553,6 +1670,9 @@ end
-- Todo: make nice generic lexer (extra argument with start/stop commands) for
-- context itself.
+--
+-- In textadept >= 10 grammar building seem to have changed a bit. So, in retrospect
+-- I could better have just dropped compatibility and stick to ctx lexers only.
function context.lex(lexer,text,init_style)
-- local lexer = global._LEXER
@@ -1623,9 +1743,9 @@ function context.lex(lexer,text,init_style)
hash[init_style] = grammar
end
if trace then
- report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style)
+ report("lexing '%s' with initial style '%s' and %s children", lexer._NAME,init_style,#lexer._CHILDREN or 0)
end
- return result
+ return matched(lexer,grammar,text)
else
if trace then
report("lexing '%s' with initial style '%s'",lexer._NAME,init_style)
@@ -1634,7 +1754,8 @@ function context.lex(lexer,text,init_style)
end
end
--- hm, changed in 3.24 .. no longer small table but one table:
+-- hm, changed in 3.24 .. no longer small table but one table (so we could remove our
+-- agressive optimization which worked quite well)
function context.token(name, patt)
return patt * Cc(name) * Cp()
@@ -1694,6 +1815,7 @@ function context.new(name,filename)
check_whitespace(lexer)
check_styles(lexer)
check_properties(lexer)
+ lexer._tokenstyles = context.styleset
return lexer
end
@@ -1799,7 +1921,36 @@ end
-- namespace can be automatic: if parent then use name of parent (chain)
+-- The original lexer framework had a rather messy user uinterface (e.g. moving
+-- stuff from _rules to _RULES at some point but I could live with that. Now it uses
+-- add_ helpers. But the subsystem is still not clean and pretty. Now, I can move to
+-- the add_ but there is no gain in it so we support a mix which gives somewhat ugly
+-- code. In fact, there should be proper subtables for this. I might actually do
+-- this because we now always overload the normal lexer (parallel usage seems no
+-- longer possible). For SciTE we can actually do a conceptual upgrade (more the
+-- context way) because there is no further development there. That way we could
+-- make even more advanced lexers.
+
+local savedrequire = require
+
+local escapes = {
+ ["%"] = "%%",
+ ["."] = "%.",
+ ["+"] = "%+", ["-"] = "%-", ["*"] = "%*",
+ ["["] = "%[", ["]"] = "%]",
+ ["("] = "%(", [")"] = "%)",
+ -- ["{"] = "%{", ["}"] = "%}"
+ -- ["^"] = "%^", ["$"] = "%$",
+}
+
function context.loadlexer(filename,namespace)
+
+ if textadept then
+ require = function(name)
+ return savedrequire(name == "lexer" and "scite-context-lexer" or name)
+ end
+ end
+
nesting = nesting + 1
if not namespace then
namespace = filename
@@ -1824,7 +1975,7 @@ function context.loadlexer(filename,namespace)
lexer = load_lexer(filename,namespace) or nolexer(filename,namespace)
usedlexers[filename] = lexer
--
- if not lexer._rules and not lexer._lexer and not lexer_grammar then
+ if not lexer._rules and not lexer._lexer and not lexer_grammar then -- hmm should be lexer._grammar
lexer._lexer = parent_lexer
end
--
@@ -1857,7 +2008,8 @@ function context.loadlexer(filename,namespace)
--
local _r = lexer._rules
local _g = lexer._grammar
- if _r or _g then
+ -- if _r or _g then
+ if _r then
local _s = lexer._tokenstyles
if _s then
for token, style in next, _s do
@@ -1871,6 +2023,9 @@ function context.loadlexer(filename,namespace)
end
end
build_grammar(lexer)
+ else
+ -- other lexers
+ build_grammar(lexer)
end
--
add_style(lexer, lexer.whitespace, lexers.STYLE_WHITESPACE)
@@ -1880,7 +2035,7 @@ function context.loadlexer(filename,namespace)
local patterns = foldsymbols._patterns
if patterns then
for i = 1, #patterns do
- patterns[i] = "()(" .. patterns[i] .. ")"
+ patterns[i] = "()(" .. gsub(patterns[i],".",escapes) .. ")"
end
end
end
@@ -1894,6 +2049,10 @@ function context.loadlexer(filename,namespace)
context.inspect(lexer)
end
--
+ if textadept then
+ require = savedrequire
+ end
+ --
return lexer
end
@@ -1993,8 +2152,8 @@ lexers.inspect = context.inspect
lexers.report = context.report
lexers.inform = context.inform
--- helper .. alas ... the lexer's lua instance is rather crippled .. not even
--- math is part of it
+-- helper .. alas ... in scite the lexer's lua instance is rather crippled .. not
+-- even math is part of it
do
@@ -2003,26 +2162,6 @@ do
local format = format
local tonumber = tonumber
- if not floor then
-
- if tonumber(string.match(_VERSION,"%d%.%d")) < 5.3 then
- floor = function(n)
- return tonumber(format("%d",n))
- end
- else
- -- 5.3 has a mixed number system and format %d doesn't work with
- -- floats any longer ... no fun
- floor = function(n)
- return (n - n % 1)
- end
- end
-
- math = math or { }
-
- math.floor = floor
-
- end
-
local function utfchar(n)
if n < 0x80 then
return char(n)
@@ -2398,6 +2537,128 @@ function lexers.fold_line_comments(prefix)
end
end
+-- There are some fundamental changes in textadept version 10 and I don't want to
+-- adapt again so we go the reverse route: map new to old. This is needed because
+-- we need to load other lexers which is teh result of not being able to load the
+-- lexer framework in parallel. Something happened in 10 that makes the main lexer
+-- always enforced so now we need to really replace that one (and even then it loads
+-- twice (i can probably sort that out). Maybe there's now some hard coded magic
+-- in the binary.
+
+if textadept then
+
+ -- Folds are still somewhat weak because of the end condition not being
+ -- bound to a start .. probably to complex and it seems to work anyhow. As
+ -- we have extended thinsg we just remap.
+
+ local function add_fold_point(lexer,token_name,start_symbol,end_symbol)
+ if type(start_symbol) == "string" then
+ local foldsymbols = lexer._foldsymbols
+ if not foldsymbols then
+ foldsymbols = { }
+ lexer._foldsymbols = foldsymbols
+ end
+ local patterns = foldsymbols._patterns
+ if not patterns then
+ patterns = { }
+ usedpatt = { } -- > 10 uses a mixed index/hash (we don't use patterns)
+ foldsymbols._patterns = patterns
+ foldsymbols._usedpatt = usedpatt
+ end
+ local foldsymbol = foldsymbols[token_name]
+ if not foldsymbol then
+ foldsymbol = { }
+ foldsymbols[token_name] = foldsymbol
+ end
+ if not usedpatt[start_symbol] then
+ patterns[#patterns+1] = start_symbol
+ usedpatt[start_symbol] = true
+ end
+ if type(end_symbol) == "string" then
+ foldsymbol[start_symbol] = 1
+ foldsymbol[end_symbol] = -1
+ if not usedpatt[end_symbol] then
+ patterns[#patterns+1] = end_symbol
+ usedpatt[end_symbol] = true
+ end
+ else
+ foldsymbol[start_symbol] = end_symbol
+ end
+ end
+ end
+
+ local function add_style(lexer,name,style)
+ local tokenstyles = lexer._tokenstyles
+ if not tokenstyles then
+ tokenstyles = { }
+ lexer._tokenstyles = tokenstyles
+ end
+ tokenstyles[name] = style
+ end
+
+ local function add_rule(lexer,id,rule)
+ local rules = lexer._rules
+ if not rules then
+ rules = { }
+ lexer._rules = rules
+ end
+ rules[#rules+1] = { id, rule }
+ end
+
+ local function modify_rule(lexer,id,rule) -- needed for textadept > 10
+ if lexer._lexer then
+ lexer = lexer._lexer
+ end
+ local RULES = lexer._RULES
+ if RULES then
+ RULES[id] = rule
+ end
+ end
+
+ local function get_rule(lexer,id) -- needed for textadept > 10
+ if lexer._lexer then
+ lexer = lexer._lexer
+ end
+ local RULES = lexer._RULES
+ if RULES then
+ return RULES[id]
+ end
+ end
+
+ local new = context.new
+ local lmt = {
+ __index = {
+
+ add_rule = add_rule,
+ modify_rule = modify_rule,
+ get_rule = get_rule,
+ add_style = add_style,
+ add_fold_point = add_fold_point,
+
+ join_tokens = join_tokens,
+ build_grammar = build_grammar,
+
+ embed = lexers.embed,
+ lex = lexers.lex,
+ fold = lexers.fold
+
+ }
+ }
+
+ function lexers.new(name,options)
+ local lexer = new(name)
+ if options then
+ lexer._LEXBYLINE = options['lex_by_line']
+ lexer._FOLDBYINDENTATION = options['fold_by_indentation']
+ lexer._CASEINSENSITIVEFOLDPOINTS = options['case_insensitive_fold_points']
+ lexer._lexer = options['inherit']
+ end
+ setmetatable(lexer,lmt)
+ return lexer
+ end
+
+end
+
-- done
return lexers