diff options
Diffstat (limited to 'context/data/textadept/context/lexers/scite-context-lexer.lua')
-rw-r--r-- | context/data/textadept/context/lexers/scite-context-lexer.lua | 405 |
1 files changed, 333 insertions, 72 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer.lua b/context/data/textadept/context/lexers/scite-context-lexer.lua index 37f236a89..234b03c05 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer.lua @@ -8,6 +8,10 @@ local info = { } +-- We need a copy of this file to lexer.lua in the same path. This was not needed +-- before version 10 but I can't figure out what else to do. It looks like there +-- is some loading of lexer.lua but I can't see where. + if lpeg.setmaxstack then lpeg.setmaxstack(1000) end local log = false @@ -232,6 +236,17 @@ local inspect = false -- can save some 15% (maybe easier on scintilla) -- is still not perfect (sometimes hangs) but it was enough reason to spend time on -- making our lexer work with TextAdept and create a setup. -- +-- Some bad news. The interface changed (again) in textadept 10, some for the better +-- (but a bit different from what happens here) and some for the worse, especially +-- moving some code to the init file so we now need some bad hacks. I decided to +-- stay with the old method of defining lexers and because the lexer cannot be run +-- in parallel any more (some change in the binary?) I will probably also cleanup +-- code below as we no longer need to be compatible. Unfortunately textadept is too +-- much a moving target to simply kick in some (tex related) production flow (apart +-- from the fact that it doesn't yet have the scite like realtime console). I'll +-- keep an eye on it. Because we don't need many added features I might as well decide +-- to make a lean and mean instance (after all the license permits forking). + -- TRACING -- -- The advantage is that we now can check more easily with regular Lua(TeX). We can @@ -243,8 +258,8 @@ local inspect = false -- can save some 15% (maybe easier on scintilla) -- -- TODO -- --- It would be nice if we could lods some ConTeXt Lua modules (the basic set) and --- then use resolvers and such. +-- It would be nice if we could load some ConTeXt Lua modules (the basic set) and +-- then use resolvers and such. But it might not work well with scite. -- -- The current lexer basics are still a mix between old and new. Maybe I should redo -- some more. This is probably easier in TextAdept than in SciTE. @@ -300,7 +315,17 @@ local lpegmatch = lpeg.match local usage = (textadept and "textadept") or (resolvers and "context") or "scite" local nesting = 0 -local print = textadept and ui and ui.print or print +local output = nil + +----- print = textadept and ui and ui.print or print -- crashes when ui is not yet defined + +local function print(...) + if not output then + output = io.open("lexer.log","w") + end + output:write(...,"\n") + output:flush() +end local function report(fmt,str,...) if log then @@ -319,6 +344,36 @@ end inform("loading context lexer module (global table: %s)",tostring(global)) +do + + local floor = math and math.floor + local format = format + local tonumber = tonumber + + if not floor then + + if tonumber(string.match(_VERSION,"%d%.%d")) < 5.3 then + floor = function(n) + return tonumber(format("%d",n)) + end + else + -- 5.3 has a mixed number system and format %d doesn't work with + -- floats any longer ... no fun + floor = function(n) + return (n - n % 1) + end + end + + math = math or { } + + math.floor = floor + + end + +end + +local floor = math.floor + if not package.searchpath then -- Unfortunately the io library is only available when we end up @@ -412,7 +467,9 @@ local default = { local predefined = { "default", "linenumber", "bracelight", "bracebad", "controlchar", - "indentguide", "calltip" + "indentguide", "calltip", + -- seems new + "folddisplaytext" } -- Bah ... ugly ... nicer would be a proper hash .. we now have properties @@ -510,9 +567,16 @@ lexers.property_expanded = setmetatable({ }, { check_main_properties() end -- - return gsub(property[k],"[$%%]%b()", function(k) - return t[sub(k,3,-2)] - end) +-- return gsub(property[k],"[$%%]%b()", function(k) +-- return t[sub(k,3,-2)] +-- end) + local v = property[k] + if v then + v = gsub(v,"[$%%]%b()", function(k) + return t[sub(k,3,-2)] + end) + end + return v end, __newindex = function(t,k,v) report("properties are read-only, '%s' is not changed",k) @@ -835,32 +899,42 @@ function context.loaddefinitions(name) return type(data) == "table" and data end +-- A bit of regression in textadept > 10 so updated ... done a bit different. +-- We don't use this in the context lexers anyway. + function context.word_match(words,word_chars,case_insensitive) - local chars = "%w_" -- maybe just "" when word_chars - if word_chars then - chars = "^([" .. chars .. gsub(word_chars,"([%^%]%-])", "%%%1") .."]+)" - else - chars = "^([" .. chars .."]+)" + -- used to be proper tables ... + if type(words) == "string" then + local clean = gsub(words,"%-%-[^\n]+","") + local split = { } + for s in gmatch(clean,"%S+") do + split[#split+1] = s + end + words = split + end + local list = { } + for i=1,#words do + list[words[i]] = true end if case_insensitive then - local word_list = { } - for i=1,#words do - word_list[lower(words[i])] = true - end - return P(function(input, index) - local s, e, word = find(input,chars,index) - return word and word_list[lower(word)] and e + 1 or nil - end) - else - local word_list = { } for i=1,#words do - word_list[words[i]] = true + list[lower(words[i])] = true end - return P(function(input, index) - local s, e, word = find(input,chars,index) - return word and word_list[word] and e + 1 or nil - end) end + local chars = S(word_chars or "") + for i=1,#words do + chars = chars + S(words[i]) + end + local match = case_insensitive and + function(input,index,word) + -- We can speed mixed case if needed. + return (list[word] or list[lower(word)]) and index or nil + end + or + function(input,index,word) + return list[word] and index or nil + end + return Cmt(chars^1,match) end -- Patterns are grouped in a separate namespace but the regular lexers expect @@ -888,6 +962,11 @@ do local hexadecimal = P("0") * S("xX") * (hexdigit^0 * period * hexdigit^1 + hexdigit^1 * period * hexdigit^0 + hexdigit^1) * (S("pP") * sign^-1 * hexdigit^1)^-1 -- * + local integer = sign^-1 + * (hexadecimal + octal + decimal) + local float = sign^-1 + * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1) + * S("eE") * sign^-1 * digit^1 -- * patterns.idtoken = idtoken patterns.digit = digit @@ -904,15 +983,13 @@ do patterns.decimal = decimal patterns.octal = octal patterns.hexadecimal = hexadecimal - patterns.float = sign^-1 - * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1) - * S("eE") * sign^-1 * digit^1 -- * + patterns.float = float patterns.cardinal = decimal patterns.signeddecimal = sign^-1 * decimal patterns.signedoctal = sign^-1 * octal patterns.signedhexadecimal = sign^-1 * hexadecimal - patterns.integer = sign^-1 * (hexadecimal + octal + decimal) + patterns.integer = integer patterns.real = sign^-1 * ( -- at most one digit^1 * period * digit^0 -- 10.0 10. @@ -928,6 +1005,7 @@ do patterns.nospacing = (1-space)^1 patterns.eol = eol patterns.newline = P("\r\n") + eol + patterns.backslash = backslash local endof = S("\n\r\f") @@ -943,7 +1021,7 @@ do lexers.extend = extend lexers.alpha = alpha lexers.digit = digit - lexers.alnum = alnum + lexers.alnum = alpha + digit lexers.lower = lower lexers.upper = upper lexers.xdigit = hexdigit @@ -1034,7 +1112,12 @@ end -- }, -- } -local lists = { } +local lists = { } +local disabled = false + +function context.disablewordcheck() + disabled = true +end function context.setwordlist(tag,limit) -- returns hash (lowercase keys and original values) if not tag or tag == "" then @@ -1127,6 +1210,8 @@ local p_nop = newline local folders = { } +-- Snippets from the > 10 code .. but we do things different so ... + local function fold_by_parsing(text,start_pos,start_line,start_level,lexer) local folder = folders[lexer] if not folder then @@ -1136,6 +1221,12 @@ local function fold_by_parsing(text,start_pos,start_line,start_level,lexer) local fold_symbols = lexer._foldsymbols local fold_pattern = lexer._foldpattern -- use lpeg instead (context extension) -- + -- textadept >= 10 + -- + -- local zerosumlines = lexer.property_int["fold.on.zero.sum.lines"] > 0 -- not done + -- local compact = lexer.property_int['fold.compact'] > 0 -- not done + -- local lowercase = lexer._CASEINSENSITIVEFOLDPOINTS -- useless (utf will distort) + -- if fold_pattern then -- if no functions are found then we could have a faster one fold_pattern = Cp() * C(fold_pattern) / function(s,match) @@ -1168,7 +1259,7 @@ local function fold_by_parsing(text,start_pos,start_line,start_level,lexer) -- the traditional one but a bit optimized local fold_symbols_patterns = fold_symbols._patterns local action_y = function(pos,line) - for j = 1, #fold_symbols_patterns do + for j=1, #fold_symbols_patterns do for s, match in gmatch(line,fold_symbols_patterns[j]) do -- "()(" .. patterns[i] .. ")" local symbols = fold_symbols[style_at[start_pos + pos + s - 1]] local l = symbols and symbols[match] @@ -1311,11 +1402,11 @@ function context.fold(lexer,text,start_pos,start_line,start_level) -- hm, we had if filesize <= threshold_by_parsing then return fold_by_parsing(text,start_pos,start_line,start_level,lexer) end - elseif lexer.properties("fold.by.indentation",1) > 0 then + elseif lexer._FOLDBYINDENTATION or lexer.properties("fold.by.indentation",1) > 0 then if filesize <= threshold_by_indentation then return fold_by_indentation(text,start_pos,start_line,start_level,lexer) end - elseif lexer.properties("fold.by.line",1) > 0 then + elseif lexer._FOLDBYLINE or lexer.properties("fold.by.line",1) > 0 then if filesize <= threshold_by_line then return fold_by_line(text,start_pos,start_line,start_level,lexer) end @@ -1334,6 +1425,20 @@ local function add_rule(lexer,id,rule) -- unchanged lexer._RULEORDER[#lexer._RULEORDER + 1] = id end +local function modify_rule(lexer,id,rule) -- needed for textadept > 10 + if lexer._lexer then + lexer = lexer._lexer + end + lexer._RULES[id] = rule +end + +local function get_rule(lexer,id) -- needed for textadept > 10 + if lexer._lexer then + lexer = lexer._lexer + end + return lexer._RULES[id] +end + -- I finally figured out that adding more styles was an issue because of several -- reasons: -- @@ -1357,12 +1462,20 @@ local function add_style(lexer,token_name,style) -- changed a bit around 3.41 if trace and detail then report("default style '%s' is ignored as extra style",token_name) end - return + if textadept then + -- go on, stored per buffer + else + return + end elseif predefinedstyles[token_name] then if trace and detail then report("predefined style '%s' is ignored as extra style",token_name) end - return + if textadept then + -- go on, stored per buffer + else + return + end else if trace and detail then report("adding extra style '%s' as '%s'",token_name,style) @@ -1379,6 +1492,7 @@ local function add_style(lexer,token_name,style) -- changed a bit around 3.41 lexer._TOKENSTYLES[token_name] = num_styles lexer._EXTRASTYLES[token_name] = style lexer._numstyles = num_styles + 1 + -- hm, the original (now) also copies to the parent ._lexer end local function check_styles(lexer) @@ -1428,6 +1542,8 @@ local function join_tokens(lexer) -- slightly different from the original (no 'a end end +-- hm, maybe instead of a grammer just a flat one + local function add_lexer(grammar, lexer) -- mostly the same as the original local token_rule = join_tokens(lexer) local lexer_name = lexer._NAME @@ -1458,9 +1574,10 @@ local function build_grammar(lexer,initial_rule) -- same as the original local lexer_name = lexer._NAME local preamble = lexer._preamble local grammar = lexer._grammar - if grammar then - -- experiment - elseif children then + -- if grammar then + -- -- experiment + -- elseif children then + if children then if not initial_rule then initial_rule = lexer_name end @@ -1533,17 +1650,17 @@ local function matched(lexer,grammar,text) else txt = "!no text!" end - report("%4i : %s > %s (%s) (%s)",n/2,ti,tn,s[ti] or "!unset!",txt) + report("%4i : %s > %s (%s) (%s)",floor(n/2),ti,tn,s[ti] or "!unset!",txt) p = tn else break end end end - report("lexer results: %s, length: %s, ranges: %s",lexer._NAME,#text,#t/2) + report("lexer results: %s, length: %s, ranges: %s",lexer._NAME,#text,floor(#t/2)) if collapse then t = collapsed(t) - report("lexer collapsed: %s, length: %s, ranges: %s",lexer._NAME,#text,#t/2) + report("lexer collapsed: %s, length: %s, ranges: %s",lexer._NAME,#text,floor(#t/2)) end elseif collapse then t = collapsed(t) @@ -1553,6 +1670,9 @@ end -- Todo: make nice generic lexer (extra argument with start/stop commands) for -- context itself. +-- +-- In textadept >= 10 grammar building seem to have changed a bit. So, in retrospect +-- I could better have just dropped compatibility and stick to ctx lexers only. function context.lex(lexer,text,init_style) -- local lexer = global._LEXER @@ -1623,9 +1743,9 @@ function context.lex(lexer,text,init_style) hash[init_style] = grammar end if trace then - report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style) + report("lexing '%s' with initial style '%s' and %s children", lexer._NAME,init_style,#lexer._CHILDREN or 0) end - return result + return matched(lexer,grammar,text) else if trace then report("lexing '%s' with initial style '%s'",lexer._NAME,init_style) @@ -1634,7 +1754,8 @@ function context.lex(lexer,text,init_style) end end --- hm, changed in 3.24 .. no longer small table but one table: +-- hm, changed in 3.24 .. no longer small table but one table (so we could remove our +-- agressive optimization which worked quite well) function context.token(name, patt) return patt * Cc(name) * Cp() @@ -1694,6 +1815,7 @@ function context.new(name,filename) check_whitespace(lexer) check_styles(lexer) check_properties(lexer) + lexer._tokenstyles = context.styleset return lexer end @@ -1799,7 +1921,36 @@ end -- namespace can be automatic: if parent then use name of parent (chain) +-- The original lexer framework had a rather messy user uinterface (e.g. moving +-- stuff from _rules to _RULES at some point but I could live with that. Now it uses +-- add_ helpers. But the subsystem is still not clean and pretty. Now, I can move to +-- the add_ but there is no gain in it so we support a mix which gives somewhat ugly +-- code. In fact, there should be proper subtables for this. I might actually do +-- this because we now always overload the normal lexer (parallel usage seems no +-- longer possible). For SciTE we can actually do a conceptual upgrade (more the +-- context way) because there is no further development there. That way we could +-- make even more advanced lexers. + +local savedrequire = require + +local escapes = { + ["%"] = "%%", + ["."] = "%.", + ["+"] = "%+", ["-"] = "%-", ["*"] = "%*", + ["["] = "%[", ["]"] = "%]", + ["("] = "%(", [")"] = "%)", + -- ["{"] = "%{", ["}"] = "%}" + -- ["^"] = "%^", ["$"] = "%$", +} + function context.loadlexer(filename,namespace) + + if textadept then + require = function(name) + return savedrequire(name == "lexer" and "scite-context-lexer" or name) + end + end + nesting = nesting + 1 if not namespace then namespace = filename @@ -1824,7 +1975,7 @@ function context.loadlexer(filename,namespace) lexer = load_lexer(filename,namespace) or nolexer(filename,namespace) usedlexers[filename] = lexer -- - if not lexer._rules and not lexer._lexer and not lexer_grammar then + if not lexer._rules and not lexer._lexer and not lexer_grammar then -- hmm should be lexer._grammar lexer._lexer = parent_lexer end -- @@ -1857,7 +2008,8 @@ function context.loadlexer(filename,namespace) -- local _r = lexer._rules local _g = lexer._grammar - if _r or _g then + -- if _r or _g then + if _r then local _s = lexer._tokenstyles if _s then for token, style in next, _s do @@ -1871,6 +2023,9 @@ function context.loadlexer(filename,namespace) end end build_grammar(lexer) + else + -- other lexers + build_grammar(lexer) end -- add_style(lexer, lexer.whitespace, lexers.STYLE_WHITESPACE) @@ -1880,7 +2035,7 @@ function context.loadlexer(filename,namespace) local patterns = foldsymbols._patterns if patterns then for i = 1, #patterns do - patterns[i] = "()(" .. patterns[i] .. ")" + patterns[i] = "()(" .. gsub(patterns[i],".",escapes) .. ")" end end end @@ -1894,6 +2049,10 @@ function context.loadlexer(filename,namespace) context.inspect(lexer) end -- + if textadept then + require = savedrequire + end + -- return lexer end @@ -1993,8 +2152,8 @@ lexers.inspect = context.inspect lexers.report = context.report lexers.inform = context.inform --- helper .. alas ... the lexer's lua instance is rather crippled .. not even --- math is part of it +-- helper .. alas ... in scite the lexer's lua instance is rather crippled .. not +-- even math is part of it do @@ -2003,26 +2162,6 @@ do local format = format local tonumber = tonumber - if not floor then - - if tonumber(string.match(_VERSION,"%d%.%d")) < 5.3 then - floor = function(n) - return tonumber(format("%d",n)) - end - else - -- 5.3 has a mixed number system and format %d doesn't work with - -- floats any longer ... no fun - floor = function(n) - return (n - n % 1) - end - end - - math = math or { } - - math.floor = floor - - end - local function utfchar(n) if n < 0x80 then return char(n) @@ -2398,6 +2537,128 @@ function lexers.fold_line_comments(prefix) end end +-- There are some fundamental changes in textadept version 10 and I don't want to +-- adapt again so we go the reverse route: map new to old. This is needed because +-- we need to load other lexers which is teh result of not being able to load the +-- lexer framework in parallel. Something happened in 10 that makes the main lexer +-- always enforced so now we need to really replace that one (and even then it loads +-- twice (i can probably sort that out). Maybe there's now some hard coded magic +-- in the binary. + +if textadept then + + -- Folds are still somewhat weak because of the end condition not being + -- bound to a start .. probably to complex and it seems to work anyhow. As + -- we have extended thinsg we just remap. + + local function add_fold_point(lexer,token_name,start_symbol,end_symbol) + if type(start_symbol) == "string" then + local foldsymbols = lexer._foldsymbols + if not foldsymbols then + foldsymbols = { } + lexer._foldsymbols = foldsymbols + end + local patterns = foldsymbols._patterns + if not patterns then + patterns = { } + usedpatt = { } -- > 10 uses a mixed index/hash (we don't use patterns) + foldsymbols._patterns = patterns + foldsymbols._usedpatt = usedpatt + end + local foldsymbol = foldsymbols[token_name] + if not foldsymbol then + foldsymbol = { } + foldsymbols[token_name] = foldsymbol + end + if not usedpatt[start_symbol] then + patterns[#patterns+1] = start_symbol + usedpatt[start_symbol] = true + end + if type(end_symbol) == "string" then + foldsymbol[start_symbol] = 1 + foldsymbol[end_symbol] = -1 + if not usedpatt[end_symbol] then + patterns[#patterns+1] = end_symbol + usedpatt[end_symbol] = true + end + else + foldsymbol[start_symbol] = end_symbol + end + end + end + + local function add_style(lexer,name,style) + local tokenstyles = lexer._tokenstyles + if not tokenstyles then + tokenstyles = { } + lexer._tokenstyles = tokenstyles + end + tokenstyles[name] = style + end + + local function add_rule(lexer,id,rule) + local rules = lexer._rules + if not rules then + rules = { } + lexer._rules = rules + end + rules[#rules+1] = { id, rule } + end + + local function modify_rule(lexer,id,rule) -- needed for textadept > 10 + if lexer._lexer then + lexer = lexer._lexer + end + local RULES = lexer._RULES + if RULES then + RULES[id] = rule + end + end + + local function get_rule(lexer,id) -- needed for textadept > 10 + if lexer._lexer then + lexer = lexer._lexer + end + local RULES = lexer._RULES + if RULES then + return RULES[id] + end + end + + local new = context.new + local lmt = { + __index = { + + add_rule = add_rule, + modify_rule = modify_rule, + get_rule = get_rule, + add_style = add_style, + add_fold_point = add_fold_point, + + join_tokens = join_tokens, + build_grammar = build_grammar, + + embed = lexers.embed, + lex = lexers.lex, + fold = lexers.fold + + } + } + + function lexers.new(name,options) + local lexer = new(name) + if options then + lexer._LEXBYLINE = options['lex_by_line'] + lexer._FOLDBYINDENTATION = options['fold_by_indentation'] + lexer._CASEINSENSITIVEFOLDPOINTS = options['case_insensitive_fold_points'] + lexer._lexer = options['inherit'] + end + setmetatable(lexer,lmt) + return lexer + end + +end + -- done return lexers |