diff options
author | Marius <mariausol@gmail.com> | 2011-09-17 10:40:15 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2011-09-17 10:40:15 +0300 |
commit | 32956188684f3f0bd1cc077a6870fdd57fea0cfc (patch) | |
tree | 740208d07a5e6ab91a5a30e723fcc3e93c83114f /context/data/scite/lexers/scite-context-lexer-lua.lua | |
parent | 0c9823815b15cad63d4b9827205a605192e8cd81 (diff) | |
download | context-32956188684f3f0bd1cc077a6870fdd57fea0cfc.tar.gz |
beta 2011.09.17 09:40
Diffstat (limited to 'context/data/scite/lexers/scite-context-lexer-lua.lua')
-rw-r--r-- | context/data/scite/lexers/scite-context-lexer-lua.lua | 219 |
1 files changed, 219 insertions, 0 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer-lua.lua b/context/data/scite/lexers/scite-context-lexer-lua.lua new file mode 100644 index 000000000..2a0f48026 --- /dev/null +++ b/context/data/scite/lexers/scite-context-lexer-lua.lua @@ -0,0 +1,219 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for cld/lua", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- Adapted from lua.lua by Mitchell who based it on a lexer by Peter Odding. + +local lexer = lexer +local token, style, colors, exact_match, no_style = lexer.token, lexer.style, lexer.colors, lexer.exact_match, lexer.style_nothing +local P, R, S, C, Cg, Cb, Cs, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Cs, lpeg.Cmt +local match, find = string.match, string.find +local global = _G + +module(...) + +local cldlexer = _M + +_directives = { } -- communication channel + +local keywords = { + 'and', 'break', 'do', 'else', 'elseif', 'end', 'false', 'for', 'function', + 'if', 'in', 'local', 'nil', 'not', 'or', 'repeat', 'return', 'then', 'true', + 'until', 'while', +} + +local functions = { + 'assert', 'collectgarbage', 'dofile', 'error', 'getfenv', 'getmetatable', + 'ipairs', 'load', 'loadfile', 'loadstring', 'module', 'next', 'pairs', + 'pcall', 'print', 'rawequal', 'rawget', 'rawset', 'require', 'setfenv', + 'setmetatable', 'tonumber', 'tostring', 'type', 'unpack', 'xpcall', +} + +local constants = { + '_G', '_VERSION', '_M', +} + +local csnames = { -- todo: option + "context", + "metafun", +} + +local level = nil +local setlevel = function(_,i,s) level = s return i end + +local equals = P("=")^0 + +local longonestart = P("[[") +local longonestop = P("]]") +local longonestring = (1-longonestop)^0 + +local longtwostart = P('[') * Cmt(equals,setlevel) * P('[') +local longtwostop = P(']') * equals * P(']') + +local longtwostring = P(function(input,index) + if level then + local sentinel = ']' .. level .. ']' + local _, stop = find(input,sentinel,index,true) + return stop and stop + 1 - #sentinel or #input + 1 + end +end) + +-- local longtwostart = P("[") * Cg(equals, "init") * P("[") +-- local longtwostop = P("]") * C(equals) * P("]") +-- local longtwocheck = Cmt(longtwostop * Cb("init"), function(s,i,a,b) return a == b end) +-- local longtwostring = (P(1) - longtwocheck)^0 + +local longcomment = Cmt(#('[[' + ('[' * P('=')^0 * '[')), function(input,index) + local level = match(input,'^%[(=*)%[',index) + level = "==" + if level then + local _, stop = find(input,']' .. level .. ']',index,true) + return stop and stop + 1 or #input + 1 + end +end) + +local longcomment = Cmt(#('[[' + ('[' * C(P('=')^0) * '[')), function(input,index,level) + local _, stop = find(input,']' .. level .. ']',index,true) + return stop and stop + 1 or #input + 1 +end) + +local whitespace = cldlexer.WHITESPACE -- triggers states + +local space = lexer.space -- S(" \n\r\t\f\v") +local any = lexer.any + +local squote = P("'") +local dquote = P('"') +local escaped = P("\\") * P(1) +local dashes = P('--') + +local spacing = token(whitespace, space^1) +local rest = token("default", any) + +local shortcomment = token("comment", dashes * lexer.nonnewline^0) +local longcomment = token("comment", dashes * longcomment) + +local shortstring = token("quote", dquote) + * token("string", (escaped + (1-dquote))^0 ) + * token("quote", dquote) + + token("quote", squote) + * token("string", (escaped + (1-squote))^0 ) + * token("quote", squote) + +local longstring = token("quote", longonestart) + * token("string", longonestring) + * token("quote", longonestop) + + token("quote", longtwostart) + * token("string", longtwostring) + * token("quote", longtwostop) + +local string = shortstring + + longstring + +local integer = P('-')^-1 * (lexer.hex_num + lexer.dec_num) +local number = token("number", lexer.float + integer) + +-- officially 127-255 are ok but not utf so useless + +local validword = R("AZ","az","__") * R("AZ","az","__","09")^0 + +local identifier = token("default",validword) + +local operator = token("special", P('..') + P('~=') + S('+-*/%^#=<>;:,.{}[]()')) -- maybe split off {}[]() + +local optionalspace = spacing^0 +local hasargument = #S("{(") + +local keyword = token("keyword", exact_match(keywords )) +local builtin = token("plain", exact_match(functions)) +local constant = token("data", exact_match(constants)) +local csname = token("user", exact_match(csnames )) + * ( + optionalspace * hasargument + + ( optionalspace * token("special", P(".")) * optionalspace * token("user", validword) )^1 + ) + +_rules = { + { 'whitespace', spacing }, + { 'keyword', keyword }, + { 'function', builtin }, + { 'csname', csname }, + { 'constant', constant }, + { 'identifier', identifier }, + { 'string', string }, + { 'longcomment', longcomment }, + { 'shortcomment', shortcomment }, + { 'number', number }, + { 'operator', operator }, + { 'rest', rest }, +} + +_tokenstyles = lexer.context.styleset + +_foldsymbols = { + _patterns = { + '%l+', + '[%({%)}%[%]]', + }, + ['keyword'] = { + ['if'] = 1, + ['end'] = -1, + ['do'] = 1, + ['function'] = 1, + ['repeat'] = 1, + ['until'] = -1, + }, + ['comment'] = { + ['['] = 1, [']'] = -1, + }, + ['quote'] = { -- to be tested + ['['] = 1, [']'] = -1, + }, + ['special'] = { + ['('] = 1, [')'] = -1, + ['{'] = 1, ['}'] = -1, + }, +} + +-- embedded in tex: + +local cstoken = R("az","AZ","\127\255") + S("@!?_") +local csnametex = P("\\") * cstoken^1 +local commentline = P('%') * (1-S("\n\r"))^0 + +local texcomment = token('comment', Cmt(commentline, function() return _directives.cld_inline end)) + +local longthreestart = P("\\!!bs") +local longthreestop = P("\\!!es") +local longthreestring = (1-longthreestop)^0 + +local texstring = token("quote", longthreestart) + * token("string", longthreestring) + * token("quote", longthreestop) + +-- local texcommand = token("user", csnametex) +-- +-- local texstring = token("quote", longthreestart) +-- * (texcommand + token("string",P(1-texcommand-longthreestop)^1) - longthreestop)^0 -- we match long non-\cs sequences +-- * token("quote", longthreestop) + +_rules_cld = { + { 'whitespace', spacing }, + { 'texstring', texstring }, + { 'texcomment', texcomment }, + { 'keyword', keyword }, + { 'function', builtin }, + { 'csname', csname }, + { 'constant', constant }, + { 'identifier', identifier }, + { 'string', string }, + { 'longcomment', longcomment }, + { 'shortcomment', shortcomment }, -- should not be used inline so best signal it as comment (otherwise complex state till end of inline) + { 'number', number }, + { 'operator', operator }, + { 'rest', rest }, +} |