diff options
author | Hans Hagen <pragma@wxs.nl> | 2014-04-28 23:24:00 +0200 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2014-04-28 23:24:00 +0200 |
commit | 3a9be11a68e3bd5453edf1c0c7d469442dbd0cf3 (patch) | |
tree | 01d807ccd33b88e0c3e03e2ba2372bf665b0adc3 /context/data/scite/lexers/scite-context-lexer-lua.lua | |
parent | 7e02e6e8f9e6bee6c8813d3937fdbc8deb2d6e74 (diff) | |
download | context-3a9be11a68e3bd5453edf1c0c7d469442dbd0cf3.tar.gz |
beta 2014.04.28 23:24
Diffstat (limited to 'context/data/scite/lexers/scite-context-lexer-lua.lua')
-rw-r--r-- | context/data/scite/lexers/scite-context-lexer-lua.lua | 373 |
1 files changed, 0 insertions, 373 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer-lua.lua b/context/data/scite/lexers/scite-context-lexer-lua.lua deleted file mode 100644 index ebb69c979..000000000 --- a/context/data/scite/lexers/scite-context-lexer-lua.lua +++ /dev/null @@ -1,373 +0,0 @@ -local info = { - version = 1.002, - comment = "scintilla lpeg lexer for lua", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files", -} - --- beware: all multiline is messy, so even if it's no lexer, it should be an embedded lexer --- we probably could use a local whitespace variant but this is cleaner - -local P, R, S, C, Cg, Cb, Cs, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Cs, lpeg.Cmt, lpeg.Cp -local match, find = string.match, string.find -local setmetatable = setmetatable - -local lexer = require("lexer") -local context = lexer.context -local patterns = context.patterns - -local token = lexer.token -local exact_match = lexer.exact_match -local just_match = lexer.just_match - -local lualexer = lexer.new("lua","scite-context-lexer-lua") -local whitespace = lualexer.whitespace - -local stringlexer = lexer.load("scite-context-lexer-lua-longstring") - -local directives = { } -- communication channel - --- this will be extended - --- we could combine some in a hash that returns the class that then makes the token --- this can save time on large files - -local keywords = { - 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', -- 'goto', - 'if', 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', - 'until', 'while', -} - -local functions = { - 'assert', 'collectgarbage', 'dofile', 'error', 'getmetatable', - 'ipairs', 'load', 'loadfile', 'module', 'next', 'pairs', - 'pcall', 'print', 'rawequal', 'rawget', 'rawset', 'require', - 'setmetatable', 'tonumber', 'tostring', 'type', 'unpack', 'xpcall', 'select', - - "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", -} - -local constants = { - '_G', '_VERSION', '_M', '...', '_ENV', - -- here too - '__add', '__call', '__concat', '__div', '__idiv', '__eq', '__gc', '__index', - '__le', '__lt', '__metatable', '__mode', '__mul', '__newindex', - '__pow', '__sub', '__tostring', '__unm', '__len', - '__pairs', '__ipairs', - 'NaN', -} - --- local tokenmappings = { } --- --- for i=1,#keywords do tokenmappings[keywords [i]] = "keyword" } --- for i=1,#functions do tokenmappings[functions[i]] = "function" } --- for i=1,#constants do tokenmappings[constants[i]] = "constant" } - -local internals = { -- __ - 'add', 'call', 'concat', 'div', 'eq', 'gc', 'index', - 'le', 'lt', 'metatable', 'mode', 'mul', 'newindex', - 'pow', 'sub', 'tostring', 'unm', 'len', -} - -local depricated = { - "arg", "arg.n", - "loadstring", "setfenv", "getfenv", - "pack", -} - -local csnames = { -- todo: option - "commands", - "context", - "ctx", - "metafun", - "metapost", -} - -local level = nil -local setlevel = function(_,i,s) level = s return i end - -local equals = P("=")^0 - -local longonestart = P("[[") -local longonestop = P("]]") -local longonestring = (1-longonestop)^0 - -local longtwostart = P('[') * Cmt(equals,setlevel) * P('[') -local longtwostop = P(']') * equals * P(']') - -local sentinels = { } setmetatable(sentinels, { __index = function(t,k) local v = "]" .. k .. "]" t[k] = v return v end }) - -local longtwostring = P(function(input,index) - if level then - -- local sentinel = ']' .. level .. ']' - local sentinel = sentinels[level] - local _, stop = find(input,sentinel,index,true) - return stop and stop + 1 - #sentinel or #input + 1 - end -end) - - local longtwostring_body = longtwostring - - local longtwostring_end = P(function(input,index) - if level then - -- local sentinel = ']' .. level .. ']' - local sentinel = sentinels[level] - local _, stop = find(input,sentinel,index,true) - return stop and stop + 1 or #input + 1 - end - end) - -local longcomment = Cmt(#('[[' + ('[' * C(equals) * '[')), function(input,index,level) - -- local sentinel = ']' .. level .. ']' - local sentinel = sentinels[level] - local _, stop = find(input,sentinel,index,true) - return stop and stop + 1 or #input + 1 -end) - -local space = patterns.space -- S(" \n\r\t\f\v") -local any = patterns.any -local eol = patterns.eol - -local squote = P("'") -local dquote = P('"') -local escaped = P("\\") * P(1) -local dashes = P('--') - -local spacing = token(whitespace, space^1) -local rest = token("default", any) - -local shortcomment = token("comment", dashes * (1-eol)^0) -local longcomment = token("comment", dashes * longcomment) - --- fails on very long string with \ at end of lines (needs embedded lexer) --- and also on newline before " but it makes no sense to waste time on it - -local shortstring = token("quote", dquote) - * token("string", (escaped + (1-dquote))^0) - * token("quote", dquote) - + token("quote", squote) - * token("string", (escaped + (1-squote))^0) - * token("quote", squote) - ------ longstring = token("quote", longonestart) ------ * token("string", longonestring) ------ * token("quote", longonestop) ------ + token("quote", longtwostart) ------ * token("string", longtwostring) ------ * token("quote", longtwostop) - -local string = shortstring ------ + longstring - -lexer.embed_lexer(lualexer, stringlexer, token("quote",longtwostart), token("string",longtwostring_body) * token("quote",longtwostring_end)) - -local integer = P("-")^-1 * (patterns.hexadecimal + patterns.decimal) -local number = token("number", patterns.float + integer) - --- officially 127-255 are ok but not utf so useless - ------ validword = R("AZ","az","__") * R("AZ","az","__","09")^0 - -local utf8character = P(1) * R("\128\191")^1 -local validword = (R("AZ","az","__") + utf8character) * (R("AZ","az","__","09") + utf8character)^0 -local validsuffix = (R("AZ","az") + utf8character) * (R("AZ","az","__","09") + utf8character)^0 - -local identifier = token("default",validword) - ------ operator = token("special", P('..') + P('~=') + S('+-*/%^#=<>;:,.{}[]()')) -- maybe split off {}[]() ------ operator = token("special", S('+-*/%^#=<>;:,{}[]()') + P('..') + P('.') + P('~=') ) -- maybe split off {}[]() ------ operator = token("special", S('+-*/%^#=<>;:,{}[]().') + P('~=') ) -- no ^1 because of nested lexers -local operator = token("special", S('+-*/%^#=<>;:,{}[]().|~')) -- no ^1 because of nested lexers - -local structure = token("special", S('{}[]()')) - -local optionalspace = spacing^0 -local hasargument = #S("{([") - -local gotokeyword = token("keyword", P("goto")) - * spacing - * token("grouping",validword) -local gotolabel = token("keyword", P("::")) - * token("grouping",validword) - * token("keyword", P("::")) - -local p_keywords = exact_match(keywords) -local p_functions = exact_match(functions) -local p_constants = exact_match(constants) -local p_internals = P("__") - * exact_match(internals) -local p_csnames = just_match(csnames) -local keyword = token("keyword", p_keywords) -local builtin = token("plain", p_functions) -local constant = token("data", p_constants) -local internal = token("data", p_internals) -local csname = token("user", p_csnames) - * ( - optionalspace * hasargument - + ( optionalspace * token("special", S(".:")) * optionalspace * token("user", validword ) )^1 - + token("user", P("_") * validsuffix) - ) - -local identifier = token("default", validword) - * ( optionalspace * token("special", S(".:")) * optionalspace * ( - token("warning", p_keywords) + - token("data", p_internals) + - token("default", validword ) - ) )^0 - --- local t = { } for k, v in next, tokenmappings do t[#t+1] = k end t = table.concat(t) --- -- local experimental = (S(t)^1) / function(s) return tokenmappings[s] end * Cp() --- --- local experimental = Cmt(S(t)^1, function(_,i,s) --- local t = tokenmappings[s] --- if t then --- return true, t, i --- end --- end) - -lualexer._rules = { - { 'whitespace', spacing }, - { 'keyword', keyword }, -- can be combined - -- { 'structure', structure }, - { 'function', builtin }, -- can be combined - { 'constant', constant }, -- can be combined - -- { 'experimental', experimental }, -- works but better split - { 'csname', csname }, - { 'goto', gotokeyword }, - { 'identifier', identifier }, - { 'string', string }, - { 'number', number }, - { 'longcomment', longcomment }, - { 'shortcomment', shortcomment }, - { 'label', gotolabel }, - { 'operator', operator }, - { 'rest', rest }, -} - --- -- experiment --- --- local idtoken = R("az","AZ","__") --- --- function context.one_of_match(specification) --- local pattern = idtoken -- the concat catches _ etc --- local list = { } --- for i=1,#specification do --- local style = specification[i][1] --- local words = specification[i][2] --- pattern = pattern + S(table.concat(words)) --- for i=1,#words do --- list[words[i]] = style --- end --- end --- return Cmt(pattern^1, function(_,i,s) --- local style = list[s] --- if style then --- return true, { style, i } -- and i or nil --- else --- -- fail --- end --- end) --- end --- --- local whatever = context.one_of_match { --- { "keyword", keywords }, -- keyword --- { "plain", functions }, -- builtin --- { "data", constants }, -- constant --- } --- --- lualexer._rules = { --- { 'whitespace', spacing }, --- { 'whatever', whatever }, --- { 'csname', csname }, --- { 'goto', gotokeyword }, --- { 'identifier', identifier }, --- { 'string', string }, --- { 'number', number }, --- { 'longcomment', longcomment }, --- { 'shortcomment', shortcomment }, --- { 'label', gotolabel }, --- { 'operator', operator }, --- { 'rest', rest }, --- } - -lualexer._tokenstyles = context.styleset - --- lualexer._foldpattern = R("az")^2 + S("{}[]") -- separate entry else interference - -lualexer._foldpattern = (P("end") + P("if") + P("do") + P("function") + P("repeat") + P("until")) * P(#(1 - R("az"))) - + S("{}[]") - -lualexer._foldsymbols = { - _patterns = { - '[a-z][a-z]+', - '[{}%[%]]', - }, - ['keyword'] = { -- challenge: if=0 then=1 else=-1 elseif=-1 - ['if'] = 1, -- if .. [then|else] .. end - ['do'] = 1, -- [while] do .. end - ['function'] = 1, -- function .. end - ['repeat'] = 1, -- repeat .. until - ['until'] = -1, - ['end'] = -1, - }, - ['comment'] = { - ['['] = 1, [']'] = -1, - }, - -- ['quote'] = { -- confusing - -- ['['] = 1, [']'] = -1, - -- }, - ['special'] = { - -- ['('] = 1, [')'] = -1, - ['{'] = 1, ['}'] = -1, - }, -} - --- embedded in tex: - -local cstoken = R("az","AZ","\127\255") + S("@!?_") -local texcsname = P("\\") * cstoken^1 -local commentline = P('%') * (1-S("\n\r"))^0 - -local texcomment = token('comment', Cmt(commentline, function() return directives.cld_inline end)) - -local longthreestart = P("\\!!bs") -local longthreestop = P("\\!!es") -local longthreestring = (1-longthreestop)^0 - -local texstring = token("quote", longthreestart) - * token("string", longthreestring) - * token("quote", longthreestop) - ------ texcommand = token("user", texcsname) -local texcommand = token("warning", texcsname) - --- local texstring = token("quote", longthreestart) --- * (texcommand + token("string",P(1-texcommand-longthreestop)^1) - longthreestop)^0 -- we match long non-\cs sequences --- * token("quote", longthreestop) - --- local whitespace = "whitespace" --- local spacing = token(whitespace, space^1) - -lualexer._directives = directives - -lualexer._rules_cld = { - { 'whitespace', spacing }, - { 'texstring', texstring }, - { 'texcomment', texcomment }, - { 'texcommand', texcommand }, - -- { 'structure', structure }, - { 'keyword', keyword }, - { 'function', builtin }, - { 'csname', csname }, - { 'constant', constant }, - { 'identifier', identifier }, - { 'string', string }, - { 'longcomment', longcomment }, - { 'shortcomment', shortcomment }, -- should not be used inline so best signal it as comment (otherwise complex state till end of inline) - { 'number', number }, - { 'operator', operator }, - { 'rest', rest }, -} - -return lualexer |