diff options
author | Marius <mariausol@gmail.com> | 2011-09-15 10:20:14 +0300 |
---|---|---|
committer | Marius <mariausol@gmail.com> | 2011-09-15 10:20:14 +0300 |
commit | 99ff9ece308b251302ce7a18f9be0d68278d9ee7 (patch) | |
tree | 5eb0b389881fd5412bcff70b030b4f9552ce213b /context/data/scite/lexers/scite-context-lexer-tex.lua | |
parent | a39b448f695e8f4ce44c909a493d83643e8227cc (diff) | |
download | context-99ff9ece308b251302ce7a18f9be0d68278d9ee7.tar.gz |
beta 2011.09.15 09:08
Diffstat (limited to 'context/data/scite/lexers/scite-context-lexer-tex.lua')
-rw-r--r-- | context/data/scite/lexers/scite-context-lexer-tex.lua | 162 |
1 files changed, 100 insertions, 62 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer-tex.lua b/context/data/scite/lexers/scite-context-lexer-tex.lua index 4a1a0a766..caab6fc4b 100644 --- a/context/data/scite/lexers/scite-context-lexer-tex.lua +++ b/context/data/scite/lexers/scite-context-lexer-tex.lua @@ -6,7 +6,7 @@ local info = { license = "see context related readme files", } - +-- maybe: _LINEBYLINE variant for large files (no nesting) -- maybe: protected_macros --[[ @@ -24,11 +24,8 @@ local info = { -- local interface = props["keywordclass.macros.context.en"] -- local interface = lexer.get_property("keywordclass.macros.context.en","") - -- the embedded lexers don't backtrack (so they're not that usefull on large - -- texts) which is probably a scintilla issue (trade off between speed and lexable - -- area); also there is some weird bleeding back to the parent lexer with respect - -- to colors (i.e. the \ in \relax can become black) so I might as well use private - -- color specifications + -- it seems that whitespace triggers the lexer when embedding happens, but this + -- is quite fragile due to duplicate styles -- this lexer does not care about other macro packages (one can of course add a fake -- interface but it's not on the agenda) @@ -37,21 +34,23 @@ local info = { local lexer = lexer local global, string, table, lpeg = _G, string, table, lpeg -local token, style, colors, word_match, no_style = lexer.token, lexer.style, lexer.colors, lexer.word_match, lexer.style_nothing -local exact_match = lexer.context.exact_match -local P, R, S, V, C, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt +local token, style, colors, exact_match, no_style = lexer.token, lexer.style, lexer.colors, lexer.exact_match, lexer.style_nothing +local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt, lpeg.Cp, lpeg.Cc, lpeg.Ct local type, next, pcall, loadfile, setmetatable = type, next, pcall, loadfile, setmetatable +local find, match = string.find, string.match module(...) local contextlexer = _M + local basepath = lexer.context and lexer.context.path or _LEXERHOME local commands = { en = { } } local primitives = { } local helpers = { } +local constants = { } -do +do -- todo: only once, store in global local definitions = lexer.context.loaddefinitions("mult-def.lua") @@ -67,7 +66,13 @@ do end end end - helpers = definitions.helpers or { } + end + + local definitions = lexer.context.loaddefinitions("mult-low.lua") + + if definitions then + helpers = definitions.helpers or { } + constants = definitions.constants or { } end local definitions = lexer.context.loaddefinitions("mult-prm.lua") @@ -84,43 +89,81 @@ end local currentcommands = commands.en or { } -local knowncommand = Cmt(R("az","AZ")^1, function(_,i,s) +local cstoken = R("az","AZ","\127\255") + S("@!?_") + +local knowncommand = Cmt(cstoken^1, function(_,i,s) return currentcommands[s] and i end) -local find, match = string.find, string.match - -local knownpreamble = Cmt(P('% '), function(input,i,_) +local knownpreamble = Cmt(P("% "), function(input,i,_) if i < 10 then - local s, e, word = find(input,'^(.+)[\n\r]',i) + local s, e, word = find(input,'^(.+)[\n\r]',i) -- combine with match if word then local interface = match(word,"interface=(..)") if interface then - currentcommands = commands[interface] or commands.en or { } + currentcommands = commands[interface] or commands.en or { } end end end return false end) -local whitespace = lexer.WHITESPACE -- triggers states -local any_char = lexer.any_char +-- -- the token list contains { "style", endpos } entries +-- -- +-- -- in principle this is faster but it is also crash sensitive for large files + +-- local constants_hash = { } for i=1,#constants do constants_hash [constants [i]] = true end +-- local helpers_hash = { } for i=1,#helpers do helpers_hash [helpers [i]] = true end +-- local primitives_hash = { } for i=1,#primitives do primitives_hash[primitives[i]] = true end + +-- local specialword = Ct( P('\\') * Cmt( C(cstoken^1), function(input,i,s) +-- if currentcommands[s] then +-- return true, "command", i +-- elseif constants_hash[s] then +-- return true, "data", i +-- elseif helpers_hash[s] then +-- return true, "plain", i +-- elseif primitives_hash[s] then +-- return true, "primitive", i +-- else -- if starts with if then primitive +-- return true, "user", i +-- end +-- end) ) + +-- local specialword = P('\\') * Cmt( C(cstoken^1), function(input,i,s) +-- if currentcommands[s] then +-- return true, { "command", i } +-- elseif constants_hash[s] then +-- return true, { "data", i } +-- elseif helpers_hash[s] then +-- return true, { "plain", i } +-- elseif primitives_hash[s] then +-- return true, { "primitive", i } +-- else -- if starts with if then primitive +-- return true, { "user", i } +-- end +-- end) + +local whitespace = contextlexer.WHITESPACE -- triggers states local space = lexer.space -- S(" \n\r\t\f\v") -local cstoken = R("az","AZ") + S("@!?_") -- todo: utf8 +local any = lexer.any local spacing = token(whitespace, space^1) +local rest = token('default', any) local preamble = token('preamble', knownpreamble) local comment = token('comment', P('%') * (1-S("\n\r"))^0) local command = token('command', P('\\') * knowncommand) +local constant = token('data', P('\\') * exact_match(constants)) local helper = token('plain', P('\\') * exact_match(helpers)) local primitive = token('primitive', P('\\') * exact_match(primitives)) local ifprimitive = token('primitive', P('\\if') * cstoken^1) local csname = token('user', P('\\') * (cstoken^1 + P(1))) -local grouping = token('grouping', S("{$}")) -local specials = token('specials', S("#()[]<>=\"")) -local extras = token('extras', S("`~%^&_-+/\'|")) -local default = token('default', P(1)) +local grouping = token('grouping', S("{$}")) -- maybe also \bgroup \egroup \begingroup \endgroup +local special = token('special', S("#()[]<>=\"")) +local extra = token('extra', S("`~%^&_-+/\'|")) + +local text = token('default', cstoken^1 ) ----- startluacode = token("grouping", P("\\startluacode")) ----- stopluacode = token("grouping", P("\\stopluacode")) @@ -128,8 +171,7 @@ local default = token('default', P(1)) local luastatus = nil local luaenvironment = P("luacode") -local inlinelua = P("\\ctxlua") - + P("\\ctxcommand") +local inlinelua = P("\\ctx") * ( P("lua") + P("command") ) + P("\\cldcontext") local startlua = P("\\start") * Cmt(luaenvironment,function(_,i,s) luastatus = s return true end) @@ -142,13 +184,17 @@ local stoplua = P("\\stop") * Cmt(luaenvironment,function(_,i,s) local startluacode = token("embedded", startlua) local stopluacode = token("embedded", stoplua) -local metafunenvironment = P("MPcode") - + P("useMPgraphic") - + P("reusableMPgraphic") - + P("uniqueMPgraphic") - + P("MPinclusions") - + P("MPextensions") - + P("MPgraphic") +-- local metafunenvironment = P("useMPgraphic") +-- + P("reusableMPgraphic") +-- + P("uniqueMPgraphic") +-- + P("MPcode") +-- + P("MPpage") +-- + P("MPinclusions") +-- + P("MPextensions") +-- + P("MPgraphic") + +local metafunenvironment = ( P("use") + P("reusable") + P("unique") ) * ("MPgraphic") + + P("MP") * ( P("code")+ P("page") + P("inclusions") + P("extensions") + P("graphic") ) -- local metafunstatus = nil -- this does not work, as the status gets lost in an embedded lexer -- local startmetafun = P("\\start") * Cmt(metafunenvironment,function(_,i,s) metafunstatus = s return true end) @@ -157,59 +203,50 @@ local metafunenvironment = P("MPcode") local startmetafun = P("\\start") * metafunenvironment local stopmetafun = P("\\stop") * metafunenvironment -local openargument = token("specials",P("{")) -local closeargument = token("specials",P("}")) -local argumentcontent = token("any_char",(1-P("}"))^0) +local openargument = token("special", P("{")) +local closeargument = token("special", P("}")) +local argumentcontent = token("default",(1-P("}"))^0) -local metafunarguments = (token("default",spacing^0) * openargument * argumentcontent * closeargument)^-2 +local metafunarguments = (spacing^0 * openargument * argumentcontent * closeargument)^-2 local startmetafuncode = token("embedded", startmetafun) * metafunarguments local stopmetafuncode = token("embedded", stopmetafun) --- Function load(lexer_name) starts with _M.WHITESPACE = lexer_name..'_whitespace' which means that we need to --- have frozen at the moment we load another lexer. Because spacing is used to revert to a parent lexer we need --- to make sure that we load children as late as possible in order not to get the wrong whitespace trigger. This --- took me quite a while to figure out (not being that familiar with the internals). BTW, if performance becomes --- an issue we can rewrite the main lex function (memorize the grammars and speed up the byline variant). - local cldlexer = lexer.load('scite-context-lexer-cld') local mpslexer = lexer.load('scite-context-lexer-mps') lexer.embed_lexer(contextlexer, cldlexer, startluacode, stopluacode) lexer.embed_lexer(contextlexer, mpslexer, startmetafuncode, stopmetafuncode) +-- Watch the text grabber, after all, we're talking mostly of text (beware, +-- no punctuation here as it can be special. We might go for utf here. + _rules = { { "whitespace", spacing }, { "preamble", preamble }, + + { "text", text }, + { "comment", comment }, + + { "constant", constant }, { "helper", helper }, { "command", command }, { "ifprimitive", ifprimitive }, { "primitive", primitive }, { "csname", csname }, + + -- { "whatever", specialword }, -- not yet, crashes + { "grouping", grouping }, - { "specials", specials }, - { "extras", extras }, - { 'any_char', any_char }, -} + { "special", special }, + { "extra", extra }, -_tokenstyles = { - { "preamble", lexer.style_context_preamble }, - { "comment", lexer.style_context_comment }, - { "default", lexer.style_context_default }, - { 'number', lexer.style_context_number }, - { "embedded", lexer.style_context_embedded }, - { "grouping", lexer.style_context_grouping }, - { "primitive", lexer.style_context_primitive }, - { "plain", lexer.style_context_plain }, - { "command", lexer.style_context_command }, - { "user", lexer.style_context_user }, - { "specials", lexer.style_context_specials }, - { "extras", lexer.style_context_extras }, - { "quote", lexer.style_context_quote }, - { "keyword", lexer.style_context_keyword }, + { "rest", rest }, } +_tokenstyles = lexer.context.styleset + local folds = { ["\\start"] = 1, ["\\stop" ] = -1, ["\\begin"] = 1, ["\\end" ] = -1, @@ -222,5 +259,6 @@ _foldsymbols = { }, ["helper"] = folds, ["command"] = folds, + ["user"] = folds, ["grouping"] = folds, } |