summaryrefslogtreecommitdiff
path: root/context/data/scite/lexers/scite-context-lexer-tex.lua
diff options
context:
space:
mode:
authorMarius <mariausol@gmail.com>2011-09-15 10:20:14 +0300
committerMarius <mariausol@gmail.com>2011-09-15 10:20:14 +0300
commit99ff9ece308b251302ce7a18f9be0d68278d9ee7 (patch)
tree5eb0b389881fd5412bcff70b030b4f9552ce213b /context/data/scite/lexers/scite-context-lexer-tex.lua
parenta39b448f695e8f4ce44c909a493d83643e8227cc (diff)
downloadcontext-99ff9ece308b251302ce7a18f9be0d68278d9ee7.tar.gz
beta 2011.09.15 09:08
Diffstat (limited to 'context/data/scite/lexers/scite-context-lexer-tex.lua')
-rw-r--r--context/data/scite/lexers/scite-context-lexer-tex.lua162
1 files changed, 100 insertions, 62 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer-tex.lua b/context/data/scite/lexers/scite-context-lexer-tex.lua
index 4a1a0a766..caab6fc4b 100644
--- a/context/data/scite/lexers/scite-context-lexer-tex.lua
+++ b/context/data/scite/lexers/scite-context-lexer-tex.lua
@@ -6,7 +6,7 @@ local info = {
license = "see context related readme files",
}
-
+-- maybe: _LINEBYLINE variant for large files (no nesting)
-- maybe: protected_macros
--[[
@@ -24,11 +24,8 @@ local info = {
-- local interface = props["keywordclass.macros.context.en"]
-- local interface = lexer.get_property("keywordclass.macros.context.en","")
- -- the embedded lexers don't backtrack (so they're not that usefull on large
- -- texts) which is probably a scintilla issue (trade off between speed and lexable
- -- area); also there is some weird bleeding back to the parent lexer with respect
- -- to colors (i.e. the \ in \relax can become black) so I might as well use private
- -- color specifications
+ -- it seems that whitespace triggers the lexer when embedding happens, but this
+ -- is quite fragile due to duplicate styles
-- this lexer does not care about other macro packages (one can of course add a fake
-- interface but it's not on the agenda)
@@ -37,21 +34,23 @@ local info = {
local lexer = lexer
local global, string, table, lpeg = _G, string, table, lpeg
-local token, style, colors, word_match, no_style = lexer.token, lexer.style, lexer.colors, lexer.word_match, lexer.style_nothing
-local exact_match = lexer.context.exact_match
-local P, R, S, V, C, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt
+local token, style, colors, exact_match, no_style = lexer.token, lexer.style, lexer.colors, lexer.exact_match, lexer.style_nothing
+local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt, lpeg.Cp, lpeg.Cc, lpeg.Ct
local type, next, pcall, loadfile, setmetatable = type, next, pcall, loadfile, setmetatable
+local find, match = string.find, string.match
module(...)
local contextlexer = _M
+
local basepath = lexer.context and lexer.context.path or _LEXERHOME
local commands = { en = { } }
local primitives = { }
local helpers = { }
+local constants = { }
-do
+do -- todo: only once, store in global
local definitions = lexer.context.loaddefinitions("mult-def.lua")
@@ -67,7 +66,13 @@ do
end
end
end
- helpers = definitions.helpers or { }
+ end
+
+ local definitions = lexer.context.loaddefinitions("mult-low.lua")
+
+ if definitions then
+ helpers = definitions.helpers or { }
+ constants = definitions.constants or { }
end
local definitions = lexer.context.loaddefinitions("mult-prm.lua")
@@ -84,43 +89,81 @@ end
local currentcommands = commands.en or { }
-local knowncommand = Cmt(R("az","AZ")^1, function(_,i,s)
+local cstoken = R("az","AZ","\127\255") + S("@!?_")
+
+local knowncommand = Cmt(cstoken^1, function(_,i,s)
return currentcommands[s] and i
end)
-local find, match = string.find, string.match
-
-local knownpreamble = Cmt(P('% '), function(input,i,_)
+local knownpreamble = Cmt(P("% "), function(input,i,_)
if i < 10 then
- local s, e, word = find(input,'^(.+)[\n\r]',i)
+ local s, e, word = find(input,'^(.+)[\n\r]',i) -- combine with match
if word then
local interface = match(word,"interface=(..)")
if interface then
- currentcommands = commands[interface] or commands.en or { }
+ currentcommands = commands[interface] or commands.en or { }
end
end
end
return false
end)
-local whitespace = lexer.WHITESPACE -- triggers states
-local any_char = lexer.any_char
+-- -- the token list contains { "style", endpos } entries
+-- --
+-- -- in principle this is faster but it is also crash sensitive for large files
+
+-- local constants_hash = { } for i=1,#constants do constants_hash [constants [i]] = true end
+-- local helpers_hash = { } for i=1,#helpers do helpers_hash [helpers [i]] = true end
+-- local primitives_hash = { } for i=1,#primitives do primitives_hash[primitives[i]] = true end
+
+-- local specialword = Ct( P('\\') * Cmt( C(cstoken^1), function(input,i,s)
+-- if currentcommands[s] then
+-- return true, "command", i
+-- elseif constants_hash[s] then
+-- return true, "data", i
+-- elseif helpers_hash[s] then
+-- return true, "plain", i
+-- elseif primitives_hash[s] then
+-- return true, "primitive", i
+-- else -- if starts with if then primitive
+-- return true, "user", i
+-- end
+-- end) )
+
+-- local specialword = P('\\') * Cmt( C(cstoken^1), function(input,i,s)
+-- if currentcommands[s] then
+-- return true, { "command", i }
+-- elseif constants_hash[s] then
+-- return true, { "data", i }
+-- elseif helpers_hash[s] then
+-- return true, { "plain", i }
+-- elseif primitives_hash[s] then
+-- return true, { "primitive", i }
+-- else -- if starts with if then primitive
+-- return true, { "user", i }
+-- end
+-- end)
+
+local whitespace = contextlexer.WHITESPACE -- triggers states
local space = lexer.space -- S(" \n\r\t\f\v")
-local cstoken = R("az","AZ") + S("@!?_") -- todo: utf8
+local any = lexer.any
local spacing = token(whitespace, space^1)
+local rest = token('default', any)
local preamble = token('preamble', knownpreamble)
local comment = token('comment', P('%') * (1-S("\n\r"))^0)
local command = token('command', P('\\') * knowncommand)
+local constant = token('data', P('\\') * exact_match(constants))
local helper = token('plain', P('\\') * exact_match(helpers))
local primitive = token('primitive', P('\\') * exact_match(primitives))
local ifprimitive = token('primitive', P('\\if') * cstoken^1)
local csname = token('user', P('\\') * (cstoken^1 + P(1)))
-local grouping = token('grouping', S("{$}"))
-local specials = token('specials', S("#()[]<>=\""))
-local extras = token('extras', S("`~%^&_-+/\'|"))
-local default = token('default', P(1))
+local grouping = token('grouping', S("{$}")) -- maybe also \bgroup \egroup \begingroup \endgroup
+local special = token('special', S("#()[]<>=\""))
+local extra = token('extra', S("`~%^&_-+/\'|"))
+
+local text = token('default', cstoken^1 )
----- startluacode = token("grouping", P("\\startluacode"))
----- stopluacode = token("grouping", P("\\stopluacode"))
@@ -128,8 +171,7 @@ local default = token('default', P(1))
local luastatus = nil
local luaenvironment = P("luacode")
-local inlinelua = P("\\ctxlua")
- + P("\\ctxcommand")
+local inlinelua = P("\\ctx") * ( P("lua") + P("command") )
+ P("\\cldcontext")
local startlua = P("\\start") * Cmt(luaenvironment,function(_,i,s) luastatus = s return true end)
@@ -142,13 +184,17 @@ local stoplua = P("\\stop") * Cmt(luaenvironment,function(_,i,s)
local startluacode = token("embedded", startlua)
local stopluacode = token("embedded", stoplua)
-local metafunenvironment = P("MPcode")
- + P("useMPgraphic")
- + P("reusableMPgraphic")
- + P("uniqueMPgraphic")
- + P("MPinclusions")
- + P("MPextensions")
- + P("MPgraphic")
+-- local metafunenvironment = P("useMPgraphic")
+-- + P("reusableMPgraphic")
+-- + P("uniqueMPgraphic")
+-- + P("MPcode")
+-- + P("MPpage")
+-- + P("MPinclusions")
+-- + P("MPextensions")
+-- + P("MPgraphic")
+
+local metafunenvironment = ( P("use") + P("reusable") + P("unique") ) * ("MPgraphic")
+ + P("MP") * ( P("code")+ P("page") + P("inclusions") + P("extensions") + P("graphic") )
-- local metafunstatus = nil -- this does not work, as the status gets lost in an embedded lexer
-- local startmetafun = P("\\start") * Cmt(metafunenvironment,function(_,i,s) metafunstatus = s return true end)
@@ -157,59 +203,50 @@ local metafunenvironment = P("MPcode")
local startmetafun = P("\\start") * metafunenvironment
local stopmetafun = P("\\stop") * metafunenvironment
-local openargument = token("specials",P("{"))
-local closeargument = token("specials",P("}"))
-local argumentcontent = token("any_char",(1-P("}"))^0)
+local openargument = token("special", P("{"))
+local closeargument = token("special", P("}"))
+local argumentcontent = token("default",(1-P("}"))^0)
-local metafunarguments = (token("default",spacing^0) * openargument * argumentcontent * closeargument)^-2
+local metafunarguments = (spacing^0 * openargument * argumentcontent * closeargument)^-2
local startmetafuncode = token("embedded", startmetafun) * metafunarguments
local stopmetafuncode = token("embedded", stopmetafun)
--- Function load(lexer_name) starts with _M.WHITESPACE = lexer_name..'_whitespace' which means that we need to
--- have frozen at the moment we load another lexer. Because spacing is used to revert to a parent lexer we need
--- to make sure that we load children as late as possible in order not to get the wrong whitespace trigger. This
--- took me quite a while to figure out (not being that familiar with the internals). BTW, if performance becomes
--- an issue we can rewrite the main lex function (memorize the grammars and speed up the byline variant).
-
local cldlexer = lexer.load('scite-context-lexer-cld')
local mpslexer = lexer.load('scite-context-lexer-mps')
lexer.embed_lexer(contextlexer, cldlexer, startluacode, stopluacode)
lexer.embed_lexer(contextlexer, mpslexer, startmetafuncode, stopmetafuncode)
+-- Watch the text grabber, after all, we're talking mostly of text (beware,
+-- no punctuation here as it can be special. We might go for utf here.
+
_rules = {
{ "whitespace", spacing },
{ "preamble", preamble },
+
+ { "text", text },
+
{ "comment", comment },
+
+ { "constant", constant },
{ "helper", helper },
{ "command", command },
{ "ifprimitive", ifprimitive },
{ "primitive", primitive },
{ "csname", csname },
+
+ -- { "whatever", specialword }, -- not yet, crashes
+
{ "grouping", grouping },
- { "specials", specials },
- { "extras", extras },
- { 'any_char', any_char },
-}
+ { "special", special },
+ { "extra", extra },
-_tokenstyles = {
- { "preamble", lexer.style_context_preamble },
- { "comment", lexer.style_context_comment },
- { "default", lexer.style_context_default },
- { 'number', lexer.style_context_number },
- { "embedded", lexer.style_context_embedded },
- { "grouping", lexer.style_context_grouping },
- { "primitive", lexer.style_context_primitive },
- { "plain", lexer.style_context_plain },
- { "command", lexer.style_context_command },
- { "user", lexer.style_context_user },
- { "specials", lexer.style_context_specials },
- { "extras", lexer.style_context_extras },
- { "quote", lexer.style_context_quote },
- { "keyword", lexer.style_context_keyword },
+ { "rest", rest },
}
+_tokenstyles = lexer.context.styleset
+
local folds = {
["\\start"] = 1, ["\\stop" ] = -1,
["\\begin"] = 1, ["\\end" ] = -1,
@@ -222,5 +259,6 @@ _foldsymbols = {
},
["helper"] = folds,
["command"] = folds,
+ ["user"] = folds,
["grouping"] = folds,
}