diff options
Diffstat (limited to 'context/data/textadept/context/lexers')
23 files changed, 5325 insertions, 0 deletions
diff --git a/context/data/textadept/context/lexers/lexer.rme b/context/data/textadept/context/lexers/lexer.rme new file mode 100644 index 000000000..5e9604f63 --- /dev/null +++ b/context/data/textadept/context/lexers/lexer.rme @@ -0,0 +1 @@ +We have no lexer.lua here! diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua new file mode 100644 index 000000000..dce24a2b9 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua @@ -0,0 +1,196 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for bibtex", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local global, string, table, lpeg = _G, string, table, lpeg +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local type = type + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token +local exact_match = lexer.exact_match + +local bibtexlexer = lexer.new("bib","scite-context-lexer-bibtex") +local whitespace = bibtexlexer.whitespace + + local escape, left, right = P("\\"), P('{'), P('}') + + patterns.balanced = P { + [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, + [2] = left * V(1) * right + } + +-- taken from bibl-bib.lua + +local anything = patterns.anything +local percent = P("%") +local start = P("@") +local comma = P(",") +local hash = P("#") +local escape = P("\\") +local single = P("'") +local double = P('"') +local left = P('{') +local right = P('}') +local lineending = S("\n\r") +local space = S(" \t\n\r\f") +local spaces = space^1 +local equal = P("=") + +local keyword = (R("az","AZ","09") + S("@_:-"))^1 +----- s_quoted = ((escape*single) + spaces + (1-single))^0 +----- d_quoted = ((escape*double) + spaces + (1-double))^0 +local s_quoted = ((escape*single) + (1-single))^0 +local d_quoted = ((escape*double) + (1-double))^0 + +local balanced = patterns.balanced + +local t_spacing = token(whitespace, space^1) +local t_optionalws = token("default", space^1)^0 + +local t_equal = token("operator",equal) +local t_left = token("grouping",left) +local t_right = token("grouping",right) +local t_comma = token("operator",comma) +local t_hash = token("operator",hash) + +local t_s_value = token("operator",single) + * token("text",s_quoted) + * token("operator",single) +local t_d_value = token("operator",double) + * token("text",d_quoted) + * token("operator",double) +local t_b_value = token("operator",left) + * token("text",balanced) + * token("operator",right) +local t_r_value = token("text",keyword) + +local t_keyword = token("keyword",keyword) +local t_key = token("command",keyword) +local t_label = token("warning",keyword) + +local t_somevalue = t_s_value + t_d_value + t_b_value + t_r_value +local t_value = t_somevalue + * ((t_optionalws * t_hash * t_optionalws) * t_somevalue)^0 + +local t_assignment = t_optionalws + * t_key + * t_optionalws + * t_equal + * t_optionalws + * t_value + +local t_shortcut = t_keyword + * t_optionalws + * t_left + * t_optionalws + * (t_assignment * t_comma^0)^0 + * t_optionalws + * t_right + +local t_definition = t_keyword + * t_optionalws + * t_left + * t_optionalws + * t_label + * t_optionalws + * t_comma + * (t_assignment * t_comma^0)^0 + * t_optionalws + * t_right + +local t_comment = t_keyword + * t_optionalws + * t_left + * token("text",(1-t_right)^0) + * t_optionalws + * t_right + +local t_forget = token("comment",percent^1 * (1-lineending)^0) + +local t_rest = token("default",anything) + +-- this kind of lexing seems impossible as the size of the buffer passed to the lexer is not +-- large enough .. but we can cheat and use this: +-- +-- function OnOpen(filename) editor:Colourise(1,editor.TextLength) end -- or is it 0? + +-- somehow lexing fails on this more complex lexer when we insert something, there is no +-- backtracking to whitespace when we have no embedded lexer, so we fake one ... this works +-- to some extend but not in all cases (e.g. editing inside line fails) .. maybe i need to +-- patch the dll ... (better not) + +local dummylexer = lexer.load("scite-context-lexer-dummy","bib-dum") + +local dummystart = token("embedded",P("\001")) -- an unlikely to be used character +local dummystop = token("embedded",P("\002")) -- an unlikely to be used character + +lexer.embed_lexer(bibtexlexer,dummylexer,dummystart,dummystop) + +-- maybe we need to define each functional block as lexer (some 4) so i'll do that when +-- this issue is persistent ... maybe consider making a local lexer options (not load, +-- just lexer.new or so) .. or maybe do the reverse, embed the main one in a dummy child + +bibtexlexer._rules = { + { "whitespace", t_spacing }, + { "forget", t_forget }, + { "shortcut", t_shortcut }, + { "definition", t_definition }, + { "comment", t_comment }, + { "rest", t_rest }, +} + +-- local t_assignment = t_key +-- * t_optionalws +-- * t_equal +-- * t_optionalws +-- * t_value +-- +-- local t_shortcut = t_keyword +-- * t_optionalws +-- * t_left +-- +-- local t_definition = t_keyword +-- * t_optionalws +-- * t_left +-- * t_optionalws +-- * t_label +-- * t_optionalws +-- * t_comma +-- +-- bibtexlexer._rules = { +-- { "whitespace", t_spacing }, +-- { "assignment", t_assignment }, +-- { "definition", t_definition }, +-- { "shortcut", t_shortcut }, +-- { "right", t_right }, +-- { "comma", t_comma }, +-- { "forget", t_forget }, +-- { "comment", t_comment }, +-- { "rest", t_rest }, +-- } + +bibtexlexer._tokenstyles = context.styleset + +bibtexlexer._foldpattern = P("{") + P("}") + +bibtexlexer._foldsymbols = { + _patterns = { + "{", + "}", + }, + ["grouping"] = { + ["{"] = 1, + ["}"] = -1, + }, +} + +return bibtexlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cld.lua b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua new file mode 100644 index 000000000..a5fbf9cd7 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua @@ -0,0 +1,24 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for cld", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local cldlexer = lexer.new("cld","scite-context-lexer-cld") +local lualexer = lexer.load("scite-context-lexer-lua") + +-- can probably be done nicer now, a bit of a hack + +cldlexer._rules = lualexer._rules_cld +cldlexer._tokenstyles = lualexer._tokenstyles +cldlexer._foldsymbols = lualexer._foldsymbols +cldlexer._directives = lualexer._directives + +return cldlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua new file mode 100644 index 000000000..e8ff3c1ff --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua @@ -0,0 +1,24 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for cpp web", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local cppweblexer = lexer.new("cpp-web","scite-context-lexer-cpp") +local cpplexer = lexer.load("scite-context-lexer-cpp") + +-- can probably be done nicer now, a bit of a hack + +cppweblexer._rules = cpplexer._rules_web +cppweblexer._tokenstyles = cpplexer._tokenstyles +cppweblexer._foldsymbols = cpplexer._foldsymbols +cppweblexer._directives = cpplexer._directives + +return cppweblexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua new file mode 100644 index 000000000..d56dc58f9 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua @@ -0,0 +1,189 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for cpp", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- looks liks the original cpp lexer but web ready (so nothing special here yet) + +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token +local exact_match = lexer.exact_match + +local cpplexer = lexer.new("cpp","scite-context-lexer-cpp") +local whitespace = cpplexer.whitespace + +local keywords = { -- copied from cpp.lua + -- c + "asm", "auto", "break", "case", "const", "continue", "default", "do", "else", + "extern", "false", "for", "goto", "if", "inline", "register", "return", + "sizeof", "static", "switch", "true", "typedef", "volatile", "while", + "restrict", + -- hm + "_Bool", "_Complex", "_Pragma", "_Imaginary", + -- c++. + "catch", "class", "const_cast", "delete", "dynamic_cast", "explicit", + "export", "friend", "mutable", "namespace", "new", "operator", "private", + "protected", "public", "signals", "slots", "reinterpret_cast", + "static_assert", "static_cast", "template", "this", "throw", "try", "typeid", + "typename", "using", "virtual" +} + +local datatypes = { -- copied from cpp.lua + "bool", "char", "double", "enum", "float", "int", "long", "short", "signed", + "struct", "union", "unsigned", "void" +} + +local macros = { -- copied from cpp.lua + "define", "elif", "else", "endif", "error", "if", "ifdef", "ifndef", "import", + "include", "line", "pragma", "undef", "using", "warning" +} + +local space = patterns.space -- S(" \n\r\t\f\v") +local any = patterns.any +local restofline = patterns.restofline +local startofline = patterns.startofline + +local squote = P("'") +local dquote = P('"') +local period = P(".") +local escaped = P("\\") * P(1) +local slashes = P("//") +local begincomment = P("/*") +local endcomment = P("*/") +local percent = P("%") + +local hexadecimal = patterns.hexadecimal +local decimal = patterns.decimal +local float = patterns.float +local integer = P("-")^-1 * (hexadecimal + decimal) -- also in patterns ? + +local spacing = token(whitespace, space^1) +local rest = token("default", any) + +local shortcomment = token("comment", slashes * restofline^0) +local longcomment = token("comment", begincomment * (1-endcomment)^0 * endcomment^-1) + +local shortstring = token("quote", dquote) -- can be shared + * token("string", (escaped + (1-dquote))^0) + * token("quote", dquote) + + token("quote", squote) + * token("string", (escaped + (1-squote))^0) + * token("quote", squote) + +local number = token("number", float + integer) + +local validword = R("AZ","az","__") * R("AZ","az","__","09")^0 +local identifier = token("default",validword) + +local operator = token("special", S("+-*/%^!=<>;:{}[]().&|?~")) + +----- optionalspace = spacing^0 + +local p_keywords = exact_match(keywords) +local p_datatypes = exact_match(datatypes) +local p_macros = exact_match(macros) + +local keyword = token("keyword", p_keywords) +local datatype = token("keyword", p_datatypes) +local identifier = token("default", validword) + +local macro = token("data", #P("#") * startofline * P("#") * S("\t ")^0 * p_macros) + +cpplexer._rules = { + { "whitespace", spacing }, + { "keyword", keyword }, + { "type", datatype }, + { "identifier", identifier }, + { "string", shortstring }, + { "longcomment", longcomment }, + { "shortcomment", shortcomment }, + { "number", number }, + { "macro", macro }, + { "operator", operator }, + { "rest", rest }, +} + +local web = lexer.loadluafile("scite-context-lexer-web-snippets") + +if web then + + lexer.inform("supporting web snippets in cpp lexer") + + cpplexer._rules_web = { + { "whitespace", spacing }, + { "keyword", keyword }, + { "type", datatype }, + { "identifier", identifier }, + { "string", shortstring }, + { "longcomment", longcomment }, + { "shortcomment", shortcomment }, + { "web", web.pattern }, + { "number", number }, + { "macro", macro }, + { "operator", operator }, + { "rest", rest }, + } + +else + + lexer.report("not supporting web snippets in cpp lexer") + + cpplexer._rules_web = { + { "whitespace", spacing }, + { "keyword", keyword }, + { "type", datatype }, + { "identifier", identifier }, + { "string", shortstring }, + { "longcomment", longcomment }, + { "shortcomment", shortcomment }, + { "number", number }, + { "macro", macro }, + { "operator", operator }, + { "rest", rest }, + } + +end + +cpplexer._tokenstyles = context.styleset + +cpplexer._foldpattern = P("/*") + P("*/") + S("{}") -- separate entry else interference (singular?) + +cpplexer._foldsymbols = { + _patterns = { + "[{}]", + "/%*", + "%*/", + }, + -- ["data"] = { -- macro + -- ["region"] = 1, + -- ["endregion"] = -1, + -- ["if"] = 1, + -- ["ifdef"] = 1, + -- ["ifndef"] = 1, + -- ["endif"] = -1, + -- }, + ["special"] = { -- operator + ["{"] = 1, + ["}"] = -1, + }, + ["comment"] = { + ["/*"] = 1, + ["*/"] = -1, + } +} + +-- -- by indentation: + +cpplexer._foldpatterns = nil +cpplexer._foldsymbols = nil + +return cpplexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua new file mode 100644 index 000000000..69590ed34 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua @@ -0,0 +1,36 @@ +-- local info = { + version = 1.002, + comment = "scintilla lpeg lexer that triggers whitespace backtracking", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- the lexer dll doesn't backtrack when there is no embedded lexer so +-- we need to trigger that, for instance in the bibtex lexer, but still +-- we get failed lexing + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local dummylexer = lexer.new("dummy","scite-context-lexer-dummy") +local whitespace = dummylexer.whitespace + +local space = patterns.space +local nospace = (1-space) + +local t_spacing = token(whitespace, space ^1) +local t_rest = token("default", nospace^1) + +dummylexer._rules = { + { "whitespace", t_spacing }, + { "rest", t_rest }, +} + +dummylexer._tokenstyles = context.styleset + +return dummylexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua new file mode 100644 index 000000000..5d5b689d2 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua @@ -0,0 +1,32 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for lua longstrings", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local stringlexer = lexer.new("lua-longstring","scite-context-lexer-lua-longstring") +local whitespace = stringlexer.whitespace + +local space = patterns.space +local nospace = 1 - space + +local p_spaces = token(whitespace, space ^1) +local p_string = token("string", nospace^1) + +stringlexer._rules = { + { "whitespace", p_spaces }, + { "string", p_string }, +} + +stringlexer._tokenstyles = context.styleset + +return stringlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua new file mode 100644 index 000000000..a8aa8dbe3 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua @@ -0,0 +1,389 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for lua", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- beware: all multiline is messy, so even if it's no lexer, it should be an embedded lexer +-- we probably could use a local whitespace variant but this is cleaner + +local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp +local match, find = string.match, string.find +local setmetatable = setmetatable + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token +local exact_match = lexer.exact_match +local just_match = lexer.just_match + +local lualexer = lexer.new("lua","scite-context-lexer-lua") +local whitespace = lualexer.whitespace + +local stringlexer = lexer.load("scite-context-lexer-lua-longstring") +----- labellexer = lexer.load("scite-context-lexer-lua-labelstring") + +local directives = { } -- communication channel + +-- this will be extended + +-- we could combine some in a hash that returns the class that then makes the token +-- this can save time on large files + +local keywords = { + "and", "break", "do", "else", "elseif", "end", "false", "for", "function", -- "goto", + "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", + "until", "while", +} + +local functions = { + "assert", "collectgarbage", "dofile", "error", "getmetatable", + "ipairs", "load", "loadfile", "module", "next", "pairs", + "pcall", "print", "rawequal", "rawget", "rawset", "require", + "setmetatable", "tonumber", "tostring", "type", "unpack", "xpcall", "select", + + "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", +} + +local constants = { + "_G", "_VERSION", "_M", "...", "_ENV", + -- here too + "__add", "__call", "__concat", "__div", "__idiv", "__eq", "__gc", "__index", + "__le", "__lt", "__metatable", "__mode", "__mul", "__newindex", + "__pow", "__sub", "__tostring", "__unm", "__len", + "__pairs", "__ipairs", + "NaN", +} + +-- local tokenmappings = { } +-- +-- for i=1,#keywords do tokenmappings[keywords [i]] = "keyword" } +-- for i=1,#functions do tokenmappings[functions[i]] = "function" } +-- for i=1,#constants do tokenmappings[constants[i]] = "constant" } + +local internals = { -- __ + "add", "call", "concat", "div", "eq", "gc", "index", + "le", "lt", "metatable", "mode", "mul", "newindex", + "pow", "sub", "tostring", "unm", "len", +} + +local depricated = { + "arg", "arg.n", + "loadstring", "setfenv", "getfenv", + "pack", +} + +local csnames = { -- todo: option + "commands", + "context", +-- "ctxcmd", +-- "ctx", + "metafun", + "metapost", +} + +local level = nil +local setlevel = function(_,i,s) level = s return i end + +local equals = P("=")^0 + +local longonestart = P("[[") +local longonestop = P("]]") +local longonestring = (1-longonestop)^0 + +local longtwostart = P("[") * Cmt(equals,setlevel) * P("[") +local longtwostop = P("]") * equals * P("]") + +local sentinels = { } setmetatable(sentinels, { __index = function(t,k) local v = "]" .. k .. "]" t[k] = v return v end }) + +local longtwostring = P(function(input,index) + if level then + -- local sentinel = "]" .. level .. "]" + local sentinel = sentinels[level] + local _, stop = find(input,sentinel,index,true) + return stop and stop + 1 - #sentinel or #input + 1 + end +end) + + local longtwostring_body = longtwostring + + local longtwostring_end = P(function(input,index) + if level then + -- local sentinel = "]" .. level .. "]" + local sentinel = sentinels[level] + local _, stop = find(input,sentinel,index,true) + return stop and stop + 1 or #input + 1 + end + end) + +local longcomment = Cmt(#("[[" + ("[" * C(equals) * "[")), function(input,index,level) + -- local sentinel = "]" .. level .. "]" + local sentinel = sentinels[level] + local _, stop = find(input,sentinel,index,true) + return stop and stop + 1 or #input + 1 +end) + +local space = patterns.space -- S(" \n\r\t\f\v") +local any = patterns.any +local eol = patterns.eol + +local squote = P("'") +local dquote = P('"') +local escaped = P("\\") * P(1) +local dashes = P("--") + +local spacing = token(whitespace, space^1) +local rest = token("default", any) + +local shortcomment = token("comment", dashes * (1-eol)^0) +local longcomment = token("comment", dashes * longcomment) + +-- fails on very long string with \ at end of lines (needs embedded lexer) +-- and also on newline before " but it makes no sense to waste time on it + +local shortstring = token("quote", dquote) + * token("string", (escaped + (1-dquote))^0) + * token("quote", dquote) + + token("quote", squote) + * token("string", (escaped + (1-squote))^0) + * token("quote", squote) + +----- longstring = token("quote", longonestart) +----- * token("string", longonestring) +----- * token("quote", longonestop) +----- + token("quote", longtwostart) +----- * token("string", longtwostring) +----- * token("quote", longtwostop) + +local string = shortstring +----- + longstring + +lexer.embed_lexer(lualexer, stringlexer, token("quote",longtwostart), token("string",longtwostring_body) * token("quote",longtwostring_end)) + +local integer = P("-")^-1 * (patterns.hexadecimal + patterns.decimal) +local number = token("number", patterns.float + integer) + * (token("error",R("AZ","az","__")^1))^0 + +-- officially 127-255 are ok but not utf so useless + +----- validword = R("AZ","az","__") * R("AZ","az","__","09")^0 + +local utf8character = P(1) * R("\128\191")^1 +local validword = (R("AZ","az","__") + utf8character) * (R("AZ","az","__","09") + utf8character)^0 +local validsuffix = (R("AZ","az") + utf8character) * (R("AZ","az","__","09") + utf8character)^0 + +local identifier = token("default",validword) + +----- operator = token("special", P('..') + P('~=') + S('+-*/%^#=<>;:,.{}[]()')) -- maybe split off {}[]() +----- operator = token("special", S('+-*/%^#=<>;:,{}[]()') + P('..') + P('.') + P('~=') ) -- maybe split off {}[]() +----- operator = token("special", S('+-*/%^#=<>;:,{}[]().') + P('~=') ) -- no ^1 because of nested lexers +local operator = token("special", S('+-*/%^#=<>;:,{}[]().|~')) -- no ^1 because of nested lexers + +local structure = token("special", S('{}[]()')) + +local optionalspace = spacing^0 +local hasargument = #S("{([") + +-- ideal should be an embedded lexer .. + +local gotokeyword = token("keyword", P("goto")) + * spacing + * token("grouping",validword) +local gotolabel = token("keyword", P("::")) + * (spacing + shortcomment)^0 + * token("grouping",validword) + * (spacing + shortcomment)^0 + * token("keyword", P("::")) + +----- p_keywords = exact_match(keywords) +----- p_functions = exact_match(functions) +----- p_constants = exact_match(constants) +----- p_internals = P("__") +----- * exact_match(internals) + +local p_finish = #(1-R("az","AZ","__")) +local p_keywords = lexer.helpers.utfchartabletopattern(keywords) * p_finish -- exact_match(keywords) +local p_functions = lexer.helpers.utfchartabletopattern(functions) * p_finish -- exact_match(functions) +local p_constants = lexer.helpers.utfchartabletopattern(constants) * p_finish -- exact_match(constants) +local p_internals = P("__") + * lexer.helpers.utfchartabletopattern(internals) * p_finish -- exact_match(internals) + +local p_csnames = lexer.helpers.utfchartabletopattern(csnames) -- * p_finish -- just_match(csnames) +local p_ctnames = P("ctx") * R("AZ","az","__")^0 +local keyword = token("keyword", p_keywords) +local builtin = token("plain", p_functions) +local constant = token("data", p_constants) +local internal = token("data", p_internals) +local csname = token("user", p_csnames + p_ctnames) + * p_finish * optionalspace * ( + hasargument + + ( token("special", S(".:")) * optionalspace * token("user", validword) )^1 + )^-1 + +local identifier = token("default", validword) + * ( optionalspace * token("special", S(".:")) * optionalspace * ( + token("warning", p_keywords) + + token("data", p_internals) + + token("default", validword ) + ) )^0 + +-- local t = { } for k, v in next, tokenmappings do t[#t+1] = k end t = table.concat(t) +-- -- local experimental = (S(t)^1) / function(s) return tokenmappings[s] end * Cp() +-- +-- local experimental = Cmt(S(t)^1, function(_,i,s) +-- local t = tokenmappings[s] +-- if t then +-- return true, t, i +-- end +-- end) + +lualexer._rules = { + { "whitespace", spacing }, + { "keyword", keyword }, -- can be combined + -- { "structure", structure }, + { "function", builtin }, -- can be combined + { "constant", constant }, -- can be combined + -- { "experimental", experimental }, -- works but better split + { "csname", csname }, + { "goto", gotokeyword }, + { "identifier", identifier }, + { "string", string }, + { "number", number }, + { "longcomment", longcomment }, + { "shortcomment", shortcomment }, + { "label", gotolabel }, + { "operator", operator }, + { "rest", rest }, +} + +-- -- experiment +-- +-- local idtoken = R("az","AZ","__") +-- +-- function context.one_of_match(specification) +-- local pattern = idtoken -- the concat catches _ etc +-- local list = { } +-- for i=1,#specification do +-- local style = specification[i][1] +-- local words = specification[i][2] +-- pattern = pattern + S(table.concat(words)) +-- for i=1,#words do +-- list[words[i]] = style +-- end +-- end +-- return Cmt(pattern^1, function(_,i,s) +-- local style = list[s] +-- if style then +-- return true, { style, i } -- and i or nil +-- else +-- -- fail +-- end +-- end) +-- end +-- +-- local whatever = context.one_of_match { +-- { "keyword", keywords }, -- keyword +-- { "plain", functions }, -- builtin +-- { "data", constants }, -- constant +-- } +-- +-- lualexer._rules = { +-- { "whitespace", spacing }, +-- { "whatever", whatever }, +-- { "csname", csname }, +-- { "goto", gotokeyword }, +-- { "identifier", identifier }, +-- { "string", string }, +-- { "number", number }, +-- { "longcomment", longcomment }, +-- { "shortcomment", shortcomment }, +-- { "label", gotolabel }, +-- { "operator", operator }, +-- { "rest", rest }, +-- } + +lualexer._tokenstyles = context.styleset + +-- lualexer._foldpattern = R("az")^2 + S("{}[]") -- separate entry else interference + +lualexer._foldpattern = (P("end") + P("if") + P("do") + P("function") + P("repeat") + P("until")) * P(#(1 - R("az"))) + + S("{}[]") + +lualexer._foldsymbols = { + _patterns = { + "[a-z][a-z]+", + "[{}%[%]]", + }, + ["keyword"] = { -- challenge: if=0 then=1 else=-1 elseif=-1 + ["if"] = 1, -- if .. [then|else] .. end + ["do"] = 1, -- [while] do .. end + ["function"] = 1, -- function .. end + ["repeat"] = 1, -- repeat .. until + ["until"] = -1, + ["end"] = -1, + }, + ["comment"] = { + ["["] = 1, ["]"] = -1, + }, + -- ["quote"] = { -- confusing + -- ["["] = 1, ["]"] = -1, + -- }, + ["special"] = { + -- ["("] = 1, [")"] = -1, + ["{"] = 1, ["}"] = -1, + }, +} + +-- embedded in tex: + +local cstoken = R("az","AZ","\127\255") + S("@!?_") +local texcsname = P("\\") * cstoken^1 +local commentline = P("%") * (1-S("\n\r"))^0 + +local texcomment = token("comment", Cmt(commentline, function() return directives.cld_inline end)) + +local longthreestart = P("\\!!bs") +local longthreestop = P("\\!!es") +local longthreestring = (1-longthreestop)^0 + +local texstring = token("quote", longthreestart) + * token("string", longthreestring) + * token("quote", longthreestop) + +----- texcommand = token("user", texcsname) +local texcommand = token("warning", texcsname) + +-- local texstring = token("quote", longthreestart) +-- * (texcommand + token("string",P(1-texcommand-longthreestop)^1) - longthreestop)^0 -- we match long non-\cs sequences +-- * token("quote", longthreestop) + +-- local whitespace = "whitespace" +-- local spacing = token(whitespace, space^1) + +lualexer._directives = directives + +lualexer._rules_cld = { + { "whitespace", spacing }, + { "texstring", texstring }, + { "texcomment", texcomment }, + { "texcommand", texcommand }, + -- { "structure", structure }, + { "keyword", keyword }, + { "function", builtin }, + { "csname", csname }, + { "constant", constant }, + { "identifier", identifier }, + { "string", string }, + { "longcomment", longcomment }, + { "shortcomment", shortcomment }, -- should not be used inline so best signal it as comment (otherwise complex state till end of inline) + { "number", number }, + { "operator", operator }, + { "rest", rest }, +} + +return lualexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-mps.lua b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua new file mode 100644 index 000000000..e24a41d0c --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua @@ -0,0 +1,183 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for metafun", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local global, string, table, lpeg = _G, string, table, lpeg +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V +local type = type + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token +local exact_match = lexer.exact_match + +local metafunlexer = lexer.new("mps","scite-context-lexer-mps") +local whitespace = metafunlexer.whitespace + +local metapostprimitives = { } +local metapostinternals = { } +local metapostshortcuts = { } +local metapostcommands = { } + +local metafuninternals = { } +local metafunshortcuts = { } +local metafuncommands = { } + +local mergedshortcuts = { } +local mergedinternals = { } + +do + + local definitions = context.loaddefinitions("scite-context-data-metapost") + + if definitions then + metapostprimitives = definitions.primitives or { } + metapostinternals = definitions.internals or { } + metapostshortcuts = definitions.shortcuts or { } + metapostcommands = definitions.commands or { } + end + + local definitions = context.loaddefinitions("scite-context-data-metafun") + + if definitions then + metafuninternals = definitions.internals or { } + metafunshortcuts = definitions.shortcuts or { } + metafuncommands = definitions.commands or { } + end + + for i=1,#metapostshortcuts do + mergedshortcuts[#mergedshortcuts+1] = metapostshortcuts[i] + end + for i=1,#metafunshortcuts do + mergedshortcuts[#mergedshortcuts+1] = metafunshortcuts[i] + end + + for i=1,#metapostinternals do + mergedinternals[#mergedinternals+1] = metapostinternals[i] + end + for i=1,#metafuninternals do + mergedinternals[#mergedinternals+1] = metafuninternals[i] + end + +end + +local space = patterns.space -- S(" \n\r\t\f\v") +local any = patterns.any + +local dquote = P('"') +local cstoken = patterns.idtoken +local mptoken = patterns.alpha +local leftbrace = P("{") +local rightbrace = P("}") +local number = patterns.real + +local cstokentex = R("az","AZ","\127\255") + S("@!?_") + +-- we could collapse as in tex + +local spacing = token(whitespace, space^1) +local rest = token("default", any) +local comment = token("comment", P("%") * (1-S("\n\r"))^0) +local internal = token("reserved", exact_match(mergedshortcuts,false)) +local shortcut = token("data", exact_match(mergedinternals)) +local helper = token("command", exact_match(metafuncommands)) +local plain = token("plain", exact_match(metapostcommands)) +local quoted = token("quote", dquote) + * token("string", P(1-dquote)^0) + * token("quote", dquote) +local texstuff = token("quote", P("btex ") + P("verbatimtex ")) + * token("string", P(1-P(" etex"))^0) + * token("quote", P(" etex")) +local primitive = token("primitive", exact_match(metapostprimitives)) +local identifier = token("default", cstoken^1) +local number = token("number", number) +local grouping = token("grouping", S("()[]{}")) -- can be an option +local suffix = token("number", P("#@") + P("@#") + P("#")) +local special = token("special", P("#@") + P("@#") + S("#()[]{}<>=:\"")) -- or else := <> etc split +local texlike = token("warning", P("\\") * cstokentex^1) +local extra = token("extra", P("+-+") + P("++") + S("`~%^&_-+*/\'|\\")) + +local nested = P { leftbrace * (V(1) + (1-rightbrace))^0 * rightbrace } +local texlike = token("embedded", P("\\") * (P("MP") + P("mp")) * mptoken^1) + * spacing^0 + * token("grouping", leftbrace) + * token("default", (nested + (1-rightbrace))^0 ) + * token("grouping", rightbrace) + + token("warning", P("\\") * cstokentex^1) + +-- lua: we assume: lua ( "lua code" ) + +local cldlexer = lexer.load("scite-context-lexer-cld","mps-cld") + +local startlua = P("lua") * space^0 * P('(') * space^0 * P('"') +local stoplua = P('"') * space^0 * P(')') + +local startluacode = token("embedded", startlua) +local stopluacode = #stoplua * token("embedded", stoplua) + +lexer.embed_lexer(metafunlexer, cldlexer, startluacode, stopluacode) + +local luacall = token("embedded",P("lua") * ( P(".") * R("az","AZ","__")^1 )^1) + +metafunlexer._rules = { + { "whitespace", spacing }, + { "comment", comment }, + { "internal", internal }, + { "shortcut", shortcut }, + { "helper", helper }, + { "plain", plain }, + { "primitive", primitive }, + { "luacall", luacall }, + { "texstuff", texstuff }, + { "suffix", suffix }, + { "identifier", identifier }, + { "number", number }, + { "quoted", quoted }, + -- { "grouping", grouping }, -- can be an option + { "special", special }, + { "texlike", texlike }, + { "extra", extra }, + { "rest", rest }, +} + +metafunlexer._tokenstyles = context.styleset + +metafunlexer._foldpattern = patterns.lower^2 -- separate entry else interference + +metafunlexer._foldsymbols = { + _patterns = { + "[a-z][a-z]+", + }, + ["plain"] = { + ["beginfig"] = 1, + ["endfig"] = -1, + ["beginglyph"] = 1, + ["endglyph"] = -1, + -- ["begingraph"] = 1, + -- ["endgraph"] = -1, + }, + ["primitive"] = { + ["def"] = 1, + ["vardef"] = 1, + ["primarydef"] = 1, + ["secondarydef" ] = 1, + ["tertiarydef"] = 1, + ["enddef"] = -1, + ["if"] = 1, + ["fi"] = -1, + ["for"] = 1, + ["forever"] = 1, + ["endfor"] = -1, + } +} + +-- if inspect then inspect(metafunlexer) end + +return metafunlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua new file mode 100644 index 000000000..cdf33cf7c --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua @@ -0,0 +1,137 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for pdf objects", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- no longer used: nesting lexers with whitespace in start/stop is unreliable + +local P, R, S, C, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.V + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local pdfobjectlexer = lexer.new("pdfobj","scite-context-lexer-pdf-object") +local whitespace = pdfobjectlexer.whitespace + +local space = patterns.space +local spacing = patterns.spacing +local nospacing = patterns.nospacing +local anything = patterns.anything +local newline = patterns.eol +local real = patterns.real +local cardinal = patterns.cardinal + +local lparent = P("(") +local rparent = P(")") +local langle = P("<") +local rangle = P(">") +local escape = P("\\") +local unicodetrigger = P("feff") + +local nametoken = 1 - space - S("<>/[]()") +local name = P("/") * nametoken^1 + +local p_string = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 } + +local t_spacing = token(whitespace, spacing) +local t_spaces = token(whitespace, spacing)^0 +local t_rest = token("default", nospacing) -- anything + +local p_stream = P("stream") +local p_endstream = P("endstream") +local p_obj = P("obj") +local p_endobj = P("endobj") +local p_reference = P("R") + +local p_objectnumber = patterns.cardinal +local p_comment = P("%") * (1-S("\n\r"))^0 + +local t_string = token("quote", lparent) + * token("string", p_string) + * token("quote", rparent) +local t_unicode = token("quote", langle) + * token("plain", unicodetrigger) + * token("string", (1-rangle)^1) + * token("quote", rangle) +local t_whatsit = token("quote", langle) + * token("string", (1-rangle)^1) + * token("quote", rangle) +local t_keyword = token("command", name) +local t_constant = token("constant", name) +local t_number = token("number", real) +-- t_reference = token("number", cardinal) +-- * t_spacing +-- * token("number", cardinal) +local t_reserved = token("number", P("true") + P("false") + P("NULL")) +local t_reference = token("warning", cardinal) + * t_spacing + * token("warning", cardinal) + * t_spacing + * token("keyword", p_reference) + +local t_comment = token("comment", p_comment) + +local t_openobject = token("warning", p_objectnumber * spacing) +-- * t_spacing + * token("warning", p_objectnumber * spacing) +-- * t_spacing + * token("keyword", p_obj) +local t_closeobject = token("keyword", p_endobj) + +local t_opendictionary = token("grouping", P("<<")) +local t_closedictionary = token("grouping", P(">>")) + +local t_openarray = token("grouping", P("[")) +local t_closearray = token("grouping", P("]")) + +-- todo: comment + +local t_stream = token("keyword", p_stream) +-- * token("default", newline * (1-newline*p_endstream*newline)^1 * newline) +-- * token("text", (1 - p_endstream)^1) + * (token("text", (1 - p_endstream-spacing)^1) + t_spacing)^1 + * token("keyword", p_endstream) + +local t_dictionary = { "dictionary", + dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary, + array = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray, + whatever = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit, + } + +----- t_object = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg) +----- object = t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject, +----- dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary, +----- array = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray, +----- whatever = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit, +----- number = t_number, +----- } + +local t_object = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg) + dictionary = t_dictionary.dictionary, + array = t_dictionary.array, + whatever = t_dictionary.whatever, + object = t_openobject^-1 * t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject, + number = t_number, + } + +pdfobjectlexer._shared = { + dictionary = t_dictionary, + object = t_object, + stream = t_stream, +} + +pdfobjectlexer._rules = { + { "whitespace", t_spacing }, -- in fact, here we don't want whitespace as it's top level lexer work + { "object", t_object }, +} + +pdfobjectlexer._tokenstyles = context.styleset + +return pdfobjectlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua new file mode 100644 index 000000000..f08d16488 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua @@ -0,0 +1,44 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for pdf xref", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- no longer used: nesting lexers with whitespace in start/stop is unreliable + +local P, R = lpeg.P, lpeg.R + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local pdfxreflexer = lexer.new("pdfxref","scite-context-lexer-pdf-xref") +local whitespace = pdfxreflexer.whitespace + +local spacing = patterns.spacing +local cardinal = patterns.cardinal +local alpha = patterns.alpha + +local t_spacing = token(whitespace, spacing) + +local p_xref = P("xref") +local t_xref = token("keyword",p_xref) + * token("number", cardinal * spacing * cardinal * spacing) + +local t_number = token("number", cardinal * spacing * cardinal * spacing) + * token("keyword", alpha) + +pdfxreflexer._rules = { + { "whitespace", t_spacing }, + { "xref", t_xref }, + { "number", t_number }, +} + +pdfxreflexer._tokenstyles = context.styleset + +return pdfxreflexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua new file mode 100644 index 000000000..1d4796ea5 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua @@ -0,0 +1,205 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for pdf", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- pdf is normally startic .. i.e. not edited so we don't really +-- need embedded lexers. + +local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local pdflexer = lexer.new("pdf","scite-context-lexer-pdf") +local whitespace = pdflexer.whitespace + +----- pdfobjectlexer = lexer.load("scite-context-lexer-pdf-object") +----- pdfxreflexer = lexer.load("scite-context-lexer-pdf-xref") + +local anything = patterns.anything +local space = patterns.space +local spacing = patterns.spacing +local nospacing = patterns.nospacing +local anything = patterns.anything +local restofline = patterns.restofline + +local t_whitespace = token(whitespace, spacing) +local t_spacing = token("default", spacing) +----- t_rest = token("default", nospacing) +local t_rest = token("default", anything) + +local p_comment = P("%") * restofline +local t_comment = token("comment", p_comment) + +-- whatever + +local space = patterns.space +local spacing = patterns.spacing +local nospacing = patterns.nospacing +local anything = patterns.anything +local newline = patterns.eol +local real = patterns.real +local cardinal = patterns.cardinal +local alpha = patterns.alpha + +local lparent = P("(") +local rparent = P(")") +local langle = P("<") +local rangle = P(">") +local escape = P("\\") +local unicodetrigger = P("feff") + +local nametoken = 1 - space - S("<>/[]()") +local name = P("/") * nametoken^1 + +local p_string = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 } + +local t_spacing = token("default", spacing) +local t_spaces = token("default", spacing)^0 +local t_rest = token("default", nospacing) -- anything + +local p_stream = P("stream") +local p_endstream = P("endstream") +local p_obj = P("obj") +local p_endobj = P("endobj") +local p_reference = P("R") + +local p_objectnumber = patterns.cardinal +local p_comment = P("%") * (1-S("\n\r"))^0 + +local t_string = token("quote", lparent) + * token("string", p_string) + * token("quote", rparent) +local t_unicode = token("quote", langle) + * token("plain", unicodetrigger) + * token("string", (1-rangle)^1) + * token("quote", rangle) +local t_whatsit = token("quote", langle) + * token("string", (1-rangle)^1) + * token("quote", rangle) +local t_keyword = token("command", name) +local t_constant = token("constant", name) +local t_number = token("number", real) +-- t_reference = token("number", cardinal) +-- * t_spacing +-- * token("number", cardinal) +local t_reserved = token("number", P("true") + P("false") + P("NULL")) +-- t_reference = token("warning", cardinal * spacing * cardinal * spacing) +-- * token("keyword", p_reference) +local t_reference = token("warning", cardinal) + * t_spacing + * token("warning", cardinal) + * t_spacing + * token("keyword", p_reference) + +local t_comment = token("comment", p_comment) + +local t_openobject = token("warning", p_objectnumber) + * t_spacing + * token("warning", p_objectnumber) + * t_spacing + * token("keyword", p_obj) +-- t_openobject = token("warning", p_objectnumber * spacing) +-- * token("warning", p_objectnumber * spacing) +-- * token("keyword", p_obj) +local t_closeobject = token("keyword", p_endobj) + +local t_opendictionary = token("grouping", P("<<")) +local t_closedictionary = token("grouping", P(">>")) + +local t_openarray = token("grouping", P("[")) +local t_closearray = token("grouping", P("]")) + +local t_stream = token("keyword", p_stream) + * token("text", (1 - p_endstream)^1) + * token("keyword", p_endstream) + +local t_dictionary = { "dictionary", + dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary, + array = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray, + whatever = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit, + } + +local t_object = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg) + dictionary = t_dictionary.dictionary, + array = t_dictionary.array, + whatever = t_dictionary.whatever, + object = t_openobject * t_spaces * (V("dictionary")^-1 * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject, + number = t_number, + } + +-- objects ... sometimes NUL characters play havoc ... and in xref we have +-- issues with embedded lexers that have spaces in the start and stop +-- conditions and this cannot be handled well either ... so, an imperfect +-- solution ... but anyway, there is not that much that can end up in +-- the root of the tree see we're sort of safe + +local p_trailer = P("trailer") +local t_trailer = token("keyword", p_trailer) + * t_spacing + * t_dictionary +-- t_trailer = token("keyword", p_trailer * spacing) +-- * t_dictionary + +local p_startxref = P("startxref") +local t_startxref = token("keyword", p_startxref) + * t_spacing + * token("number", cardinal) +-- t_startxref = token("keyword", p_startxref * spacing) +-- * token("number", cardinal) + +local p_xref = P("xref") +local t_xref = token("keyword",p_xref) + * t_spacing + * token("number", cardinal) + * t_spacing + * token("number", cardinal) + * spacing +-- t_xref = token("keyword",p_xref) +-- * token("number", spacing * cardinal * spacing * cardinal * spacing) + +local t_number = token("number", cardinal) + * t_spacing + * token("number", cardinal) + * t_spacing + * token("keyword", S("fn")) +-- t_number = token("number", cardinal * spacing * cardinal * spacing) +-- * token("keyword", S("fn")) + +pdflexer._rules = { + { "whitespace", t_whitespace }, + { "object", t_object }, + { "comment", t_comment }, + { "trailer", t_trailer }, + { "startxref", t_startxref }, + { "xref", t_xref }, + { "number", t_number }, + { "rest", t_rest }, +} + +pdflexer._tokenstyles = context.styleset + +-- lexer.inspect(pdflexer) + +-- collapser: obj endobj stream endstream + +pdflexer._foldpattern = p_obj + p_endobj + p_stream + p_endstream + +pdflexer._foldsymbols = { + ["keyword"] = { + ["obj"] = 1, + ["endobj"] = -1, + ["stream"] = 1, + ["endstream"] = -1, + }, +} + +return pdflexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-sql.lua b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua new file mode 100644 index 000000000..ea432c5c9 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua @@ -0,0 +1,239 @@ +local info = { + version = 1.001, + comment = "scintilla lpeg lexer for sql", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token +local exact_match = lexer.exact_match + +local sqllexer = lexer.new("sql","scite-context-lexer-sql") +local whitespace = sqllexer.whitespace + +-- ANSI SQL 92 | 99 | 2003 + +local keywords_standard = { + "absolute", "action", "add", "after", "all", "allocate", "alter", "and", "any", + "are", "array", "as", "asc", "asensitive", "assertion", "asymmetric", "at", + "atomic", "authorization", "avg", "before", "begin", "between", "bigint", + "binary", "bit", "bit_length", "blob", "boolean", "both", "breadth", "by", + "call", "called", "cascade", "cascaded", "case", "cast", "catalog", "char", + "char_length", "character", "character_length", "check", "clob", "close", + "coalesce", "collate", "collation", "column", "commit", "condition", "connect", + "connection", "constraint", "constraints", "constructor", "contains", "continue", + "convert", "corresponding", "count", "create", "cross", "cube", "current", + "current_date", "current_default_transform_group", "current_path", + "current_role", "current_time", "current_timestamp", + "current_transform_group_for_type", "current_user", "cursor", "cycle", "data", + "date", "day", "deallocate", "dec", "decimal", "declare", "default", + "deferrable", "deferred", "delete", "depth", "deref", "desc", "describe", + "descriptor", "deterministic", "diagnostics", "disconnect", "distinct", "do", + "domain", "double", "drop", "dynamic", "each", "element", "else", "elseif", + "end", "equals", "escape", "except", "exception", "exec", "execute", "exists", + "exit", "external", "extract", "false", "fetch", "filter", "first", "float", + "for", "foreign", "found", "free", "from", "full", "function", "general", "get", + "global", "go", "goto", "grant", "group", "grouping", "handler", "having", + "hold", "hour", "identity", "if", "immediate", "in", "indicator", "initially", + "inner", "inout", "input", "insensitive", "insert", "int", "integer", + "intersect", "interval", "into", "is", "isolation", "iterate", "join", "key", + "language", "large", "last", "lateral", "leading", "leave", "left", "level", + "like", "local", "localtime", "localtimestamp", "locator", "loop", "lower", + "map", "match", "max", "member", "merge", "method", "min", "minute", "modifies", + "module", "month", "multiset", "names", "national", "natural", "nchar", "nclob", + "new", "next", "no", "none", "not", "null", "nullif", "numeric", "object", + "octet_length", "of", "old", "on", "only", "open", "option", "or", "order", + "ordinality", "out", "outer", "output", "over", "overlaps", "pad", "parameter", + "partial", "partition", "path", "position", "precision", "prepare", "preserve", + "primary", "prior", "privileges", "procedure", "public", "range", "read", + "reads", "real", "recursive", "ref", "references", "referencing", "relative", + "release", "repeat", "resignal", "restrict", "result", "return", "returns", + "revoke", "right", "role", "rollback", "rollup", "routine", "row", "rows", + "savepoint", "schema", "scope", "scroll", "search", "second", "section", + "select", "sensitive", "session", "session_user", "set", "sets", "signal", + "similar", "size", "smallint", "some", "space", "specific", "specifictype", + "sql", "sqlcode", "sqlerror", "sqlexception", "sqlstate", "sqlwarning", "start", + "state", "static", "submultiset", "substring", "sum", "symmetric", "system", + "system_user", "table", "tablesample", "temporary", "then", "time", "timestamp", + "timezone_hour", "timezone_minute", "to", "trailing", "transaction", "translate", + "translation", "treat", "trigger", "trim", "true", "under", "undo", "union", + "unique", "unknown", "unnest", "until", "update", "upper", "usage", "user", + "using", "value", "values", "varchar", "varying", "view", "when", "whenever", + "where", "while", "window", "with", "within", "without", "work", "write", "year", + "zone", +} + +-- The dialects list is taken from drupal.org with standard subtracted. +-- +-- MySQL 3.23.x | 4.x | 5.x +-- PostGreSQL 8.1 +-- MS SQL Server 2000 +-- MS ODBC +-- Oracle 10.2 + +local keywords_dialects = { + "a", "abort", "abs", "access", "ada", "admin", "aggregate", "alias", "also", + "always", "analyse", "analyze", "assignment", "attribute", "attributes", "audit", + "auto_increment", "avg_row_length", "backup", "backward", "bernoulli", "bitvar", + "bool", "break", "browse", "bulk", "c", "cache", "cardinality", "catalog_name", + "ceil", "ceiling", "chain", "change", "character_set_catalog", + "character_set_name", "character_set_schema", "characteristics", "characters", + "checked", "checkpoint", "checksum", "class", "class_origin", "cluster", + "clustered", "cobol", "collation_catalog", "collation_name", "collation_schema", + "collect", "column_name", "columns", "command_function", "command_function_code", + "comment", "committed", "completion", "compress", "compute", "condition_number", + "connection_name", "constraint_catalog", "constraint_name", "constraint_schema", + "containstable", "conversion", "copy", "corr", "covar_pop", "covar_samp", + "createdb", "createrole", "createuser", "csv", "cume_dist", "cursor_name", + "database", "databases", "datetime", "datetime_interval_code", + "datetime_interval_precision", "day_hour", "day_microsecond", "day_minute", + "day_second", "dayofmonth", "dayofweek", "dayofyear", "dbcc", "defaults", + "defined", "definer", "degree", "delay_key_write", "delayed", "delimiter", + "delimiters", "dense_rank", "deny", "derived", "destroy", "destructor", + "dictionary", "disable", "disk", "dispatch", "distinctrow", "distributed", "div", + "dual", "dummy", "dump", "dynamic_function", "dynamic_function_code", "enable", + "enclosed", "encoding", "encrypted", "end-exec", "enum", "errlvl", "escaped", + "every", "exclude", "excluding", "exclusive", "existing", "exp", "explain", + "fields", "file", "fillfactor", "final", "float4", "float8", "floor", "flush", + "following", "force", "fortran", "forward", "freetext", "freetexttable", + "freeze", "fulltext", "fusion", "g", "generated", "granted", "grants", + "greatest", "header", "heap", "hierarchy", "high_priority", "holdlock", "host", + "hosts", "hour_microsecond", "hour_minute", "hour_second", "identified", + "identity_insert", "identitycol", "ignore", "ilike", "immutable", + "implementation", "implicit", "include", "including", "increment", "index", + "infile", "infix", "inherit", "inherits", "initial", "initialize", "insert_id", + "instance", "instantiable", "instead", "int1", "int2", "int3", "int4", "int8", + "intersection", "invoker", "isam", "isnull", "k", "key_member", "key_type", + "keys", "kill", "lancompiler", "last_insert_id", "least", "length", "less", + "limit", "lineno", "lines", "listen", "ln", "load", "location", "lock", "login", + "logs", "long", "longblob", "longtext", "low_priority", "m", "matched", + "max_rows", "maxextents", "maxvalue", "mediumblob", "mediumint", "mediumtext", + "message_length", "message_octet_length", "message_text", "middleint", + "min_rows", "minus", "minute_microsecond", "minute_second", "minvalue", + "mlslabel", "mod", "mode", "modify", "monthname", "more", "move", "mumps", + "myisam", "name", "nesting", "no_write_to_binlog", "noaudit", "nocheck", + "nocompress", "nocreatedb", "nocreaterole", "nocreateuser", "noinherit", + "nologin", "nonclustered", "normalize", "normalized", "nosuperuser", "nothing", + "notify", "notnull", "nowait", "nullable", "nulls", "number", "octets", "off", + "offline", "offset", "offsets", "oids", "online", "opendatasource", "openquery", + "openrowset", "openxml", "operation", "operator", "optimize", "optionally", + "options", "ordering", "others", "outfile", "overlay", "overriding", "owner", + "pack_keys", "parameter_mode", "parameter_name", "parameter_ordinal_position", + "parameter_specific_catalog", "parameter_specific_name", + "parameter_specific_schema", "parameters", "pascal", "password", "pctfree", + "percent", "percent_rank", "percentile_cont", "percentile_disc", "placing", + "plan", "pli", "postfix", "power", "preceding", "prefix", "preorder", "prepared", + "print", "proc", "procedural", "process", "processlist", "purge", "quote", + "raid0", "raiserror", "rank", "raw", "readtext", "recheck", "reconfigure", + "regexp", "regr_avgx", "regr_avgy", "regr_count", "regr_intercept", "regr_r2", + "regr_slope", "regr_sxx", "regr_sxy", "regr_syy", "reindex", "reload", "rename", + "repeatable", "replace", "replication", "require", "reset", "resource", + "restart", "restore", "returned_cardinality", "returned_length", + "returned_octet_length", "returned_sqlstate", "rlike", "routine_catalog", + "routine_name", "routine_schema", "row_count", "row_number", "rowcount", + "rowguidcol", "rowid", "rownum", "rule", "save", "scale", "schema_name", + "schemas", "scope_catalog", "scope_name", "scope_schema", "second_microsecond", + "security", "self", "separator", "sequence", "serializable", "server_name", + "setof", "setuser", "share", "show", "shutdown", "simple", "soname", "source", + "spatial", "specific_name", "sql_big_result", "sql_big_selects", + "sql_big_tables", "sql_calc_found_rows", "sql_log_off", "sql_log_update", + "sql_low_priority_updates", "sql_select_limit", "sql_small_result", + "sql_warnings", "sqlca", "sqrt", "ssl", "stable", "starting", "statement", + "statistics", "status", "stddev_pop", "stddev_samp", "stdin", "stdout", + "storage", "straight_join", "strict", "string", "structure", "style", + "subclass_origin", "sublist", "successful", "superuser", "synonym", "sysdate", + "sysid", "table_name", "tables", "tablespace", "temp", "template", "terminate", + "terminated", "text", "textsize", "than", "ties", "tinyblob", "tinyint", + "tinytext", "toast", "top", "top_level_count", "tran", "transaction_active", + "transactions_committed", "transactions_rolled_back", "transform", "transforms", + "trigger_catalog", "trigger_name", "trigger_schema", "truncate", "trusted", + "tsequal", "type", "uescape", "uid", "unbounded", "uncommitted", "unencrypted", + "unlisten", "unlock", "unnamed", "unsigned", "updatetext", "use", + "user_defined_type_catalog", "user_defined_type_code", "user_defined_type_name", + "user_defined_type_schema", "utc_date", "utc_time", "utc_timestamp", "vacuum", + "valid", "validate", "validator", "var_pop", "var_samp", "varbinary", "varchar2", + "varcharacter", "variable", "variables", "verbose", "volatile", "waitfor", + "width_bucket", "writetext", "x509", "xor", "year_month", "zerofill", +} + +local space = patterns.space -- S(" \n\r\t\f\v") +local any = patterns.any +local restofline = patterns.restofline +local startofline = patterns.startofline + +local squote = P("'") +local dquote = P('"') +local bquote = P('`') +local escaped = P("\\") * P(1) + +local begincomment = P("/*") +local endcomment = P("*/") + +local decimal = patterns.decimal +local float = patterns.float +local integer = P("-")^-1 * decimal + +local spacing = token(whitespace, space^1) +local rest = token("default", any) + +local shortcomment = token("comment", (P("#") + P("--")) * restofline^0) +local longcomment = token("comment", begincomment * (1-endcomment)^0 * endcomment^-1) + +local p_validword = R("AZ","az","__") * R("AZ","az","__","09")^0 +local identifier = token("default",p_validword) + +local shortstring = token("quote", dquote) -- can be shared + * token("string", (escaped + (1-dquote))^0) + * token("quote", dquote) + + token("quote", squote) + * token("string", (escaped + (1-squote))^0) + * token("quote", squote) + + token("quote", bquote) + * token("string", (escaped + (1-bquote))^0) + * token("quote", bquote) + +local p_keywords_s = exact_match(keywords_standard,nil,true) +local p_keywords_d = exact_match(keywords_dialects,nil,true) +local keyword_s = token("keyword", p_keywords_s) +local keyword_d = token("command", p_keywords_d) + +local number = token("number", float + integer) +local operator = token("special", S("+-*/%^!=<>;:{}[]().&|?~")) + +sqllexer._tokenstyles = context.styleset + +sqllexer._foldpattern = P("/*") + P("*/") + S("{}") -- separate entry else interference + +sqllexer._foldsymbols = { + _patterns = { + "/%*", + "%*/", + }, + ["comment"] = { + ["/*"] = 1, + ["*/"] = -1, + } +} + +sqllexer._rules = { + { "whitespace", spacing }, + { "keyword-s", keyword_s }, + { "keyword-d", keyword_d }, + { "identifier", identifier }, + { "string", shortstring }, + { "longcomment", longcomment }, + { "shortcomment", shortcomment }, + { "number", number }, + { "operator", operator }, + { "rest", rest }, +} + +return sqllexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua new file mode 100644 index 000000000..4a55fd143 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua @@ -0,0 +1,24 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for tex web", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local texweblexer = lexer.new("tex-web","scite-context-lexer-tex") +local texlexer = lexer.load("scite-context-lexer-tex") + +-- can probably be done nicer now, a bit of a hack + +texweblexer._rules = texlexer._rules_web +texweblexer._tokenstyles = texlexer._tokenstyles +texweblexer._foldsymbols = texlexer._foldsymbols +texweblexer._directives = texlexer._directives + +return texweblexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua new file mode 100644 index 000000000..bc08bfcd9 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua @@ -0,0 +1,567 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for context", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- maybe: _LINEBYLINE variant for large files (no nesting) +-- maybe: protected_macros + +--[[ + + experiment dd 2009/10/28 .. todo: + + -- figure out if tabs instead of splits are possible + -- locate an option to enter name in file dialogue (like windows permits) + -- figure out why loading a file fails + -- we cannot print to the log pane + -- we cannot access props["keywordclass.macros.context.en"] + -- lexer.get_property only handles integers + -- we cannot run a command to get the location of mult-def.lua + + -- local interface = props["keywordclass.macros.context.en"] + -- local interface = lexer.get_property("keywordclass.macros.context.en","") + +]]-- + +local global, string, table, lpeg = _G, string, table, lpeg +local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt, lpeg.Cp, lpeg.Cc, lpeg.Ct +local type, next = type, next +local find, match, lower, upper = string.find, string.match, string.lower, string.upper + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns +local inform = context.inform + +local token = lexer.token +local exact_match = lexer.exact_match + +local contextlexer = lexer.new("tex","scite-context-lexer-tex") +local whitespace = contextlexer.whitespace + +local cldlexer = lexer.load("scite-context-lexer-cld") +local mpslexer = lexer.load("scite-context-lexer-mps") + +local commands = { en = { } } +local primitives = { } +local helpers = { } +local constants = { } + +do -- todo: only once, store in global + + -- commands helpers primitives + + local definitions = context.loaddefinitions("scite-context-data-interfaces") + + if definitions then + local list = { } + for interface, list in next, definitions do + list[#list+1] = interface + local c = { } + for i=1,#list do + c[list[i]] = true + end + if interface ~= "en" then + list = definitions.en + if list then + for i=1,#list do + c[list[i]] = true + end + end + end + commands[interface] = c + end + inform("context user interfaces '%s' supported",table.concat(list," ")) + end + + local definitions = context.loaddefinitions("scite-context-data-context") + local overloaded = { } + + if definitions then + helpers = definitions.helpers or { } + constants = definitions.constants or { } + for i=1,#helpers do + overloaded[helpers[i]] = true + end + for i=1,#constants do + overloaded[constants[i]] = true + end + end + + local definitions = context.loaddefinitions("scite-context-data-tex") + + if definitions then + local function add(data,normal) + for k, v in next, data do + if v ~= "/" and v ~= "-" then + if not overloaded[v] then + primitives[#primitives+1] = v + end + if normal then + v = "normal" .. v + if not overloaded[v] then + primitives[#primitives+1] = v + end + end + end + end + end + add(definitions.tex,true) + add(definitions.etex,true) + add(definitions.pdftex,true) + add(definitions.aleph,true) + add(definitions.omega,true) + add(definitions.luatex,true) + add(definitions.xetex,true) + end + +end + +local currentcommands = commands.en or { } + +local cstoken = R("az","AZ","\127\255") + S("@!?_") + +local knowncommand = Cmt(cstoken^1, function(_,i,s) + return currentcommands[s] and i +end) + +local utfchar = context.utfchar +local wordtoken = context.patterns.wordtoken +local iwordtoken = context.patterns.iwordtoken +local wordpattern = context.patterns.wordpattern +local iwordpattern = context.patterns.iwordpattern +local invisibles = context.patterns.invisibles +local checkedword = context.checkedword +local styleofword = context.styleofword +local setwordlist = context.setwordlist +local validwords = false +local validminimum = 3 + +-- % language=uk + +-- fails (empty loop message) ... latest lpeg issue? + +local knownpreamble = Cmt(P("% "), function(input,i,_) -- todo : utfbomb, was #P("% ") + if i < 10 then + validwords, validminimum = false, 3 + local s, e, word = find(input,"^(.-)[\n\r]",i) -- combine with match + if word then + local interface = match(word,"interface=([a-z][a-z]+)") + if interface and #interface == 2 then + inform("enabling context user interface '%s'",interface) + currentcommands = commands[interface] or commands.en or { } + end + local language = match(word,"language=([a-z][a-z]+)") + validwords, validminimum = setwordlist(language) + end + end + return false +end) + +-- -- the token list contains { "style", endpos } entries +-- -- +-- -- in principle this is faster but it is also crash sensitive for large files + +-- local constants_hash = { } for i=1,#constants do constants_hash [constants [i]] = true end +-- local helpers_hash = { } for i=1,#helpers do helpers_hash [helpers [i]] = true end +-- local primitives_hash = { } for i=1,#primitives do primitives_hash[primitives[i]] = true end + +-- local specialword = Ct( P("\\") * Cmt( C(cstoken^1), function(input,i,s) +-- if currentcommands[s] then +-- return true, "command", i +-- elseif constants_hash[s] then +-- return true, "data", i +-- elseif helpers_hash[s] then +-- return true, "plain", i +-- elseif primitives_hash[s] then +-- return true, "primitive", i +-- else -- if starts with if then primitive +-- return true, "user", i +-- end +-- end) ) + +-- local specialword = P("\\") * Cmt( C(cstoken^1), function(input,i,s) +-- if currentcommands[s] then +-- return true, { "command", i } +-- elseif constants_hash[s] then +-- return true, { "data", i } +-- elseif helpers_hash[s] then +-- return true, { "plain", i } +-- elseif primitives_hash[s] then +-- return true, { "primitive", i } +-- else -- if starts with if then primitive +-- return true, { "user", i } +-- end +-- end) + +-- experiment: keep space with whatever ... less tables + +-- 10pt + +local commentline = P("%") * (1-S("\n\r"))^0 +local endline = S("\n\r")^1 + +local space = patterns.space -- S(" \n\r\t\f\v") +local any = patterns.any +local backslash = P("\\") +local hspace = S(" \t") + +local p_spacing = space^1 +local p_rest = any + +local p_preamble = knownpreamble +local p_comment = commentline +----- p_command = backslash * knowncommand +----- p_constant = backslash * exact_match(constants) +----- p_helper = backslash * exact_match(helpers) +----- p_primitive = backslash * exact_match(primitives) + +local p_command = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * #(1-cstoken) +local p_constant = backslash * lexer.helpers.utfchartabletopattern(constants) * #(1-cstoken) +local p_helper = backslash * lexer.helpers.utfchartabletopattern(helpers) * #(1-cstoken) +local p_primitive = backslash * lexer.helpers.utfchartabletopattern(primitives) * #(1-cstoken) + +local p_ifprimitive = P("\\if") * cstoken^1 +local p_csname = backslash * (cstoken^1 + P(1)) +local p_grouping = S("{$}") +local p_special = S("#()[]<>=\"") +local p_extra = S("`~%^&_-+/\'|") +local p_text = iwordtoken^1 --maybe add punctuation and space + +local p_reserved = backslash * ( + P("??") + R("az") * P("!") + ) * cstoken^1 + +local p_number = context.patterns.real +local p_unit = P("pt") + P("bp") + P("sp") + P("mm") + P("cm") + P("cc") + P("dd") + +-- no looking back = #(1-S("[=")) * cstoken^3 * #(1-S("=]")) + +-- This one gives stack overflows: +-- +-- local p_word = Cmt(iwordpattern, function(_,i,s) +-- if validwords then +-- return checkedword(validwords,validminimum,s,i) +-- else +-- -- return true, { "text", i } +-- return true, "text", i +-- end +-- end) +-- +-- So we use this one instead: + +----- p_word = Ct( iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() ) -- the function can be inlined +local p_word = iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined + +----- p_text = (1 - p_grouping - p_special - p_extra - backslash - space + hspace)^1 + +-- keep key pressed at end-of syst-aux.mkiv: +-- +-- 0 : 15 sec +-- 1 : 13 sec +-- 2 : 10 sec +-- +-- the problem is that quite some style subtables get generated so collapsing ranges helps + +local option = 1 + +if option == 1 then + + p_comment = p_comment^1 + p_grouping = p_grouping^1 + p_special = p_special^1 + p_extra = p_extra^1 + + p_command = p_command^1 + p_constant = p_constant^1 + p_helper = p_helper^1 + p_primitive = p_primitive^1 + p_ifprimitive = p_ifprimitive^1 + p_reserved = p_reserved^1 + +elseif option == 2 then + + local included = space^0 + + p_comment = (p_comment * included)^1 + p_grouping = (p_grouping * included)^1 + p_special = (p_special * included)^1 + p_extra = (p_extra * included)^1 + + p_command = (p_command * included)^1 + p_constant = (p_constant * included)^1 + p_helper = (p_helper * included)^1 + p_primitive = (p_primitive * included)^1 + p_ifprimitive = (p_ifprimitive * included)^1 + p_reserved = (p_reserved * included)^1 + +end + +local p_invisible = invisibles^1 + +local spacing = token(whitespace, p_spacing ) + +local rest = token("default", p_rest ) +local preamble = token("preamble", p_preamble ) +local comment = token("comment", p_comment ) +local command = token("command", p_command ) +local constant = token("data", p_constant ) +local helper = token("plain", p_helper ) +local primitive = token("primitive", p_primitive ) +local ifprimitive = token("primitive", p_ifprimitive) +local reserved = token("reserved", p_reserved ) +local csname = token("user", p_csname ) +local grouping = token("grouping", p_grouping ) +local number = token("number", p_number ) + * token("constant", p_unit ) +local special = token("special", p_special ) +local reserved = token("reserved", p_reserved ) -- reserved internal preproc +local extra = token("extra", p_extra ) +local invisible = token("invisible", p_invisible ) +local text = token("default", p_text ) +local word = p_word + +----- startluacode = token("grouping", P("\\startluacode")) +----- stopluacode = token("grouping", P("\\stopluacode")) + +local luastatus = false +local luatag = nil +local lualevel = 0 + +local function startdisplaylua(_,i,s) + luatag = s + luastatus = "display" + cldlexer._directives.cld_inline = false + return true +end + +local function stopdisplaylua(_,i,s) + local ok = luatag == s + if ok then + cldlexer._directives.cld_inline = false + luastatus = false + end + return ok +end + +local function startinlinelua(_,i,s) + if luastatus == "display" then + return false + elseif not luastatus then + luastatus = "inline" + cldlexer._directives.cld_inline = true + lualevel = 1 + return true + else-- if luastatus == "inline" then + lualevel = lualevel + 1 + return true + end +end + +local function stopinlinelua_b(_,i,s) -- { + if luastatus == "display" then + return false + elseif luastatus == "inline" then + lualevel = lualevel + 1 -- ? + return false + else + return true + end +end + +local function stopinlinelua_e(_,i,s) -- } + if luastatus == "display" then + return false + elseif luastatus == "inline" then + lualevel = lualevel - 1 + local ok = lualevel <= 0 -- was 0 + if ok then + cldlexer._directives.cld_inline = false + luastatus = false + end + return ok + else + return true + end +end + +contextlexer._reset_parser = function() + luastatus = false + luatag = nil + lualevel = 0 +end + +local luaenvironment = P("lua") * (P("setups") + P("code") + P(true)) + + P("ctxfunction") * (P("definition") + P(true)) + +local inlinelua = P("\\") * ( + P("ctx") * (P("lua") + P("command") + P("late") * (P("lua") + P("command")) + P("function")) + + P("cld") * (P("command") + P("context")) + + P("luaexpr") + + (P("direct") + P("late")) * P("lua") + ) + +local startlua = P("\\start") * Cmt(luaenvironment,startdisplaylua) + + P("<?lua") * Cmt(P(true),startdisplaylua) + + inlinelua * space^0 * ( Cmt(P("{"),startinlinelua) ) + +local stoplua = P("\\stop") * Cmt(luaenvironment,stopdisplaylua) + + P("?>") * Cmt(P(true),stopdisplaylua) + + Cmt(P("{"),stopinlinelua_b) + + Cmt(P("}"),stopinlinelua_e) + +local startluacode = token("embedded", startlua) +local stopluacode = #stoplua * token("embedded", stoplua) + +local luacall = P("clf_") * R("az","__","AZ")^1 + +local metafuncall = ( P("reusable") + P("usable") + P("unique") + P("use") + P("reuse") ) * ("MPgraphic") + + P("uniqueMPpagegraphic") + + P("MPpositiongraphic") + +local metafunenvironment = metafuncall -- ( P("use") + P("reusable") + P("unique") ) * ("MPgraphic") + + P("MP") * ( P("code")+ P("page") + P("inclusions") + P("initializations") + P("definitions") + P("extensions") + P("graphic") + P("calculation") ) + +local startmetafun = P("\\start") * metafunenvironment +local stopmetafun = P("\\stop") * metafunenvironment -- todo match start + +----- subsystem = token("embedded", P("\\xml") * R("az")^1 + (P("\\st") * (P("art") + P("op")) * P("xmlsetups"))) +local subsystemtags = P("xml") + P("btx") -- will be pluggable or maybe even a proper list of valid commands +local subsystemmacro = P("\\") * (subsystemtags * R("az")^1 + (R("az")-subsystemtags)^1 * subsystemtags * R("az")^1) +local subsystem = token("embedded", subsystemmacro) + +local openargument = token("special", P("{")) +local closeargument = token("special", P("}")) +local argumentcontent = token("default",(1-P("}"))^0) -- maybe space needs a treatment + +local metafunarguments = (spacing^0 * openargument * argumentcontent * closeargument)^-2 + +local startmetafuncode = token("embedded", startmetafun) * metafunarguments +local stopmetafuncode = token("embedded", stopmetafun) + +local callers = token("embedded", P("\\") * metafuncall) * metafunarguments + + token("embedded", P("\\") * luacall) + +lexer.embed_lexer(contextlexer, cldlexer, startluacode, stopluacode) +lexer.embed_lexer(contextlexer, mpslexer, startmetafuncode, stopmetafuncode) + +contextlexer._rules = { + { "whitespace", spacing }, + { "preamble", preamble }, + { "word", word }, + { "text", text }, -- non words + { "comment", comment }, + { "constant", constant }, + -- { "subsystem", subsystem }, + { "callers", callers }, + { "subsystem", subsystem }, + { "helper", helper }, + { "command", command }, + { "primitive", primitive }, + { "ifprimitive", ifprimitive }, + -- { "subsystem", subsystem }, + { "reserved", reserved }, + { "csname", csname }, + -- { "whatever", specialword }, -- not yet, crashes + { "grouping", grouping }, + -- { "number", number }, + { "special", special }, + { "extra", extra }, + { "invisible", invisible }, + { "rest", rest }, +} + +-- Watch the text grabber, after all, we're talking mostly of text (beware, +-- no punctuation here as it can be special). We might go for utf here. + +local web = lexer.loadluafile("scite-context-lexer-web-snippets") + +if web then + + lexer.inform("supporting web snippets in tex lexer") + + contextlexer._rules_web = { + { "whitespace", spacing }, + { "text", text }, -- non words + { "comment", comment }, + { "constant", constant }, + { "callers", callers }, + { "helper", helper }, + { "command", command }, + { "primitive", primitive }, + { "ifprimitive", ifprimitive }, + { "reserved", reserved }, + { "csname", csname }, + { "grouping", grouping }, + { "special", special }, + { "extra", extra }, + { "invisible", invisible }, + { "web", web.pattern }, + { "rest", rest }, + } + +else + + lexer.report("not supporting web snippets in tex lexer") + + contextlexer._rules_web = { + { "whitespace", spacing }, + { "text", text }, -- non words + { "comment", comment }, + { "constant", constant }, + { "callers", callers }, + { "helper", helper }, + { "command", command }, + { "primitive", primitive }, + { "ifprimitive", ifprimitive }, + { "reserved", reserved }, + { "csname", csname }, + { "grouping", grouping }, + { "special", special }, + { "extra", extra }, + { "invisible", invisible }, + { "rest", rest }, + } + +end + +contextlexer._tokenstyles = context.styleset + +local environment = { + ["\\start"] = 1, ["\\stop"] = -1, + -- ["\\begin"] = 1, ["\\end" ] = -1, +} + +-- local block = { +-- ["\\begin"] = 1, ["\\end" ] = -1, +-- } + +local group = { + ["{"] = 1, ["}"] = -1, +} + +contextlexer._foldpattern = P("\\" ) * (P("start") + P("stop")) + S("{}") -- separate entry else interference + +contextlexer._foldsymbols = { -- these need to be style references .. todo: multiple styles + _patterns = { + "\\start", "\\stop", -- regular environments + -- "\\begin", "\\end", -- (moveable) blocks + "[{}]", + }, + ["command"] = environment, + ["constant"] = environment, + ["data"] = environment, + ["user"] = environment, + ["embedded"] = environment, + ["helper"] = environment, + ["plain"] = environment, + ["grouping"] = group, +} + +-- context.inspect(contextlexer) + +return contextlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-txt.lua b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua new file mode 100644 index 000000000..152e9a663 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua @@ -0,0 +1,81 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for plain text (with spell checking)", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp +local find, match = string.find, string.match + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local textlexer = lexer.new("txt","scite-context-lexer-txt") +local whitespace = textlexer.whitespace + +local space = patterns.space +local any = patterns.any +local wordtoken = patterns.wordtoken +local wordpattern = patterns.wordpattern + +local checkedword = context.checkedword +local styleofword = context.styleofword +local setwordlist = context.setwordlist +local validwords = false +local validminimum = 3 + +-- local styleset = context.newstyleset { +-- "default", +-- "text", "okay", "error", "warning", +-- "preamble", +-- } + +-- [#!-%] language=uk + +local p_preamble = Cmt((S("#!-%") * P(" ")), function(input,i,_) -- todo: utf bomb no longer # + if i == 1 then -- < 10 then + validwords, validminimum = false, 3 + local s, e, line = find(input,"^[#!%-%%](.+)[\n\r]",i) + if line then + local language = match(line,"language=([a-z]+)") + if language then + validwords, validminimum = setwordlist(language) + end + end + end + return false +end) + +local t_preamble = + token("preamble", p_preamble) + +local t_word = + wordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined + +local t_text = + token("default", wordtoken^1) + +local t_rest = + token("default", (1-wordtoken-space)^1) + +local t_spacing = + token(whitespace, space^1) + +textlexer._rules = { + { "whitespace", t_spacing }, + { "preamble", t_preamble }, + { "word", t_word }, -- words >= 3 + { "text", t_text }, -- non words + { "rest", t_rest }, +} + +textlexer._LEXBYLINE = true -- new (needs testing, not yet as the system changed in 3.24) +textlexer._tokenstyles = context.styleset + +return textlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua new file mode 100644 index 000000000..141de20e1 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua @@ -0,0 +1,134 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for web snippets", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P, R, S, C, Cg, Cb, Cs, Cmt, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Cs, lpeg.Cmt, lpeg.match + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local websnippets = { } + +local space = patterns.space -- S(" \n\r\t\f\v") +local any = patterns.any +local restofline = patterns.restofline +local startofline = patterns.startofline + +local squote = P("'") +local dquote = P('"') +local period = P(".") + +local t_whitespace = token(whitespace, space^1) +local t_spacing = token("default", space^1) +local t_rest = token("default", any) + +-- the web subset + +local p_beginofweb = P("@") +local p_endofweb = P("@>") + +-- @, @/ @| @# @+ @; @[ @] + +local p_directive_1 = p_beginofweb * S(",/|#+;[]") +local t_directive_1 = token("label",p_directive_1) + +-- @.text @>(monospaced) +-- @:text @>(macro driven) +-- @= verbose@> +-- @! underlined @> +-- @t text @> (hbox) +-- @q ignored @> + +local p_typeset = p_beginofweb * S(".:=!tq") +local t_typeset = token("label",p_typeset) * token("warning",(1-p_endofweb)^1) * token("label",p_endofweb) + +-- @^index@> + +local p_index = p_beginofweb * P("^") +local t_index = token("label",p_index) * token("function",(1-p_endofweb)^1) * token("label",p_endofweb) + +-- @f text renderclass + +local p_render = p_beginofweb * S("f") +local t_render = token("label",p_render) * t_spacing * token("warning",(1-space)^1) * t_spacing * token("label",(1-space)^1) + +-- @s idem +-- @p idem +-- @& strip (spaces before) +-- @h + +local p_directive_2 = p_beginofweb * S("sp&h") +local t_directive_2 = token("label",p_directive_2) + +-- @< ... @> [=|+=|] +-- @(foo@> + +local p_reference = p_beginofweb * S("<(") +local t_reference = token("label",p_reference) * token("function",(1-p_endofweb)^1) * token("label",p_endofweb * (P("+=") + P("="))^-1) + +-- @'char' (ascii code) + +local p_character = p_beginofweb * S("'") +local t_character = token("label",p_character) * token("reserved",(1-squote)^1) * token("label",squote) + +-- @l nonascii + +local p_nonascii = p_beginofweb * S("l") +local t_nonascii = token("label",p_nonascii) * t_spacing * token("reserved",(1-space)^1) + +-- @x @y @z changefile +-- @i webfile + +local p_filename = p_beginofweb * S("xyzi") +local t_filename = token("label",p_filename) * t_spacing * token("reserved",(1-space)^1) + +-- @@ escape + +local p_escape = p_beginofweb * p_beginofweb +local t_escape = token("text",p_escape) + +-- structure + +-- @* title. + +-- local p_section = p_beginofweb * P("*")^1 +-- local t_section = token("label",p_section) * t_spacing * token("function",(1-period)^1) * token("label",period) + +-- @ explanation + +-- local p_explanation = p_beginofweb +-- local t_explanation = token("label",p_explanation) * t_spacing^1 + +-- @d macro + +-- local p_macro = p_beginofweb * P("d") +-- local t_macro = token("label",p_macro) + +-- @c code + +-- local p_code = p_beginofweb * P("c") +-- local t_code = token("label",p_code) + +websnippets.pattern = P ( + t_typeset + + t_index + + t_render + + t_reference + + t_filename + + t_directive_1 + + t_directive_2 + + t_character + + t_nonascii + + t_escape +) + + +return websnippets diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-web.lua new file mode 100644 index 000000000..6fe5ac84c --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-web.lua @@ -0,0 +1,68 @@ +local info = { + version = 1.003, + comment = "scintilla lpeg lexer for web", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P, R, S = lpeg.P, lpeg.R, lpeg.S + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token +local exact_match = lexer.exact_match + +local weblexer = lexer.new("web","scite-context-lexer-web") +local whitespace = weblexer.whitespace + +local space = patterns.space -- S(" \n\r\t\f\v") +local any = patterns.any +local restofline = patterns.restofline +local startofline = patterns.startofline + +local period = P(".") +local percent = P("%") + +local spacing = token(whitespace, space^1) +local rest = token("default", any) + +local eop = P("@>") +local eos = eop * P("+")^-1 * P("=") + +-- we can put some of the next in the web-snippets file +-- is f okay here? + +local texcomment = token("comment", percent * restofline^0) + +local texpart = token("label",P("@")) * #spacing + + token("label",P("@") * P("*")^1) * token("function",(1-period)^1) * token("label",period) +local midpart = token("label",P("@d")) * #spacing + + token("label",P("@f")) * #spacing +local cpppart = token("label",P("@c")) * #spacing + + token("label",P("@p")) * #spacing + + token("label",P("@") * S("<(")) * token("function",(1-eop)^1) * token("label",eos) + +local anypart = P("@") * ( P("*")^1 + S("dfcp") + space^1 + S("<(") * (1-eop)^1 * eos ) +local limbo = 1 - anypart - percent + +local texlexer = lexer.load("scite-context-lexer-tex-web") +local cpplexer = lexer.load("scite-context-lexer-cpp-web") + +lexer.embed_lexer(weblexer, texlexer, texpart + limbo, #anypart) +lexer.embed_lexer(weblexer, cpplexer, cpppart + midpart, #anypart) + +local texcomment = token("comment", percent * restofline^0) + +weblexer._rules = { + { "whitespace", spacing }, + { "texcomment", texcomment }, -- else issues with first tex section + { "rest", rest }, +} + +weblexer._tokenstyles = context.styleset + +return weblexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua new file mode 100644 index 000000000..25fa9128f --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua @@ -0,0 +1,34 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for xml cdata", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P = lpeg.P + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local xmlcdatalexer = lexer.new("xml-cdata","scite-context-lexer-xml-cdata") +local whitespace = xmlcdatalexer.whitespace + +local space = patterns.space +local nospace = 1 - space - P("]]>") + +local t_spaces = token(whitespace, space ^1) +local t_cdata = token("comment", nospace^1) + +xmlcdatalexer._rules = { + { "whitespace", t_spaces }, + { "cdata", t_cdata }, +} + +xmlcdatalexer._tokenstyles = context.styleset + +return xmlcdatalexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua new file mode 100644 index 000000000..2d7260b69 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua @@ -0,0 +1,34 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for xml comments", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P = lpeg.P + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local xmlcommentlexer = lexer.new("xml-comment","scite-context-lexer-xml-comment") +local whitespace = xmlcommentlexer.whitespace + +local space = patterns.space +local nospace = 1 - space - P("-->") + +local t_spaces = token(whitespace, space ^1) +local t_comment = token("comment", nospace^1) + +xmlcommentlexer._rules = { + { "whitespace", t_spaces }, + { "comment", t_comment }, +} + +xmlcommentlexer._tokenstyles = context.styleset + +return xmlcommentlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua new file mode 100644 index 000000000..1ee96ba89 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua @@ -0,0 +1,34 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for xml script", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P = lpeg.P + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local xmlscriptlexer = lexer.new("xml-script","scite-context-lexer-xml-script") +local whitespace = xmlscriptlexer.whitespace + +local space = patterns.space +local nospace = 1 - space - (P("</") * P("script") + P("SCRIPT")) * P(">") + +local t_spaces = token(whitespace, space ^1) +local t_script = token("default", nospace^1) + +xmlscriptlexer._rules = { + { "whitespace", t_spaces }, + { "script", t_script }, +} + +xmlscriptlexer._tokenstyles = context.styleset + +return xmlscriptlexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua new file mode 100644 index 000000000..1b7e2e897 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua @@ -0,0 +1,351 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for xml", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- adapted from the regular context pretty printer code (after all, lexing +-- boils down to much of the same and there are only so many ways to do +-- things). Simplified a bit as we have a different nesting model. + +-- todo: parse entities in attributes + +local global, string, table, lpeg = _G, string, table, lpeg +local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp +local type = type +local match, find = string.match, string.find + +-- local lexer = require("lexer") +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token +local exact_match = lexer.exact_match + +local xmllexer = lexer.new("xml","scite-context-lexer-xml") +local whitespace = xmllexer.whitespace + +local xmlcommentlexer = lexer.load("scite-context-lexer-xml-comment") +local xmlcdatalexer = lexer.load("scite-context-lexer-xml-cdata") +local xmlscriptlexer = lexer.load("scite-context-lexer-xml-script") +local lualexer = lexer.load("scite-context-lexer-lua") + +local space = patterns.space +local any = patterns.any + +local dquote = P('"') +local squote = P("'") +local colon = P(":") +local semicolon = P(";") +local equal = P("=") +local ampersand = P("&") + +local name = (R("az","AZ","09") + S("_-."))^1 +local openbegin = P("<") +local openend = P("</") +local closebegin = P("/>") + P(">") +local closeend = P(">") +local opencomment = P("<!--") +local closecomment = P("-->") +local openinstruction = P("<?") +local closeinstruction = P("?>") +local opencdata = P("<![CDATA[") +local closecdata = P("]]>") +local opendoctype = P("<!DOCTYPE") -- could grab the whole doctype +local closedoctype = P("]>") + P(">") +local openscript = openbegin * (P("script") + P("SCRIPT")) * (1-closeend)^0 * closeend -- begin +local closescript = openend * (P("script") + P("SCRIPT")) * closeend + +local openlua = "<?lua" +local closelua = "?>" + +-- <!DOCTYPE Something PUBLIC "... ..." "..." [ ... ] > +-- <!DOCTYPE Something PUBLIC "... ..." "..." > +-- <!DOCTYPE Something SYSTEM "... ..." [ ... ] > +-- <!DOCTYPE Something SYSTEM "... ..." > +-- <!DOCTYPE Something [ ... ] > +-- <!DOCTYPE Something > + +local entity = ampersand * (1-semicolon)^1 * semicolon + +local utfchar = context.utfchar +local wordtoken = context.patterns.wordtoken +local iwordtoken = context.patterns.iwordtoken +local wordpattern = context.patterns.wordpattern +local iwordpattern = context.patterns.iwordpattern +local invisibles = context.patterns.invisibles +local checkedword = context.checkedword +local styleofword = context.styleofword +local setwordlist = context.setwordlist +local validwords = false +local validminimum = 3 + +-- <?xml version="1.0" encoding="UTF-8" language="uk" ?> +-- +-- <?context-directive editor language us ?> + +local t_preamble = Cmt(P("<?xml "), function(input,i,_) -- todo: utf bomb, no longer # + if i < 200 then + validwords, validminimum = false, 3 + local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-directive%s+editor%s+language%s+(..)%s+%?>") + -- if not language then + -- language = match(input,"^<%?xml[^>]*language=[\"\'](..)[\"\'][^>]*%?>",i) + -- end + if language then + validwords, validminimum = setwordlist(language) + end + end + return false +end) + +local t_word = +-- Ct( iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() ) -- the function can be inlined + iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined + +local t_rest = + token("default", any) + +local t_text = + token("default", (1-S("<>&")-space)^1) + +local t_spacing = + token(whitespace, space^1) + +local t_optionalwhitespace = + token("default", space^1)^0 + +local t_localspacing = + token("default", space^1) + +-- Because we want a differently colored open and close we need an embedded lexer (whitespace +-- trigger). What is actually needed is that scintilla applies the current whitespace style. +-- Even using different style keys is not robust as they can be shared. I'll fix the main +-- lexer code. + +local t_sstring = + token("quote",dquote) + * token("string",(1-dquote)^0) -- different from context + * token("quote",dquote) + +local t_dstring = + token("quote",squote) + * token("string",(1-squote)^0) -- different from context + * token("quote",squote) + +-- local t_comment = +-- token("command",opencomment) +-- * token("comment",(1-closecomment)^0) -- different from context +-- * token("command",closecomment) + +-- local t_cdata = +-- token("command",opencdata) +-- * token("comment",(1-closecdata)^0) -- different from context +-- * token("command",closecdata) + +-- maybe cdata just text (then we don't need the extra lexer as we only have one comment then) + +-- <!DOCTYPE Something PUBLIC "... ..." "..." [ ... ] > +-- <!DOCTYPE Something PUBLIC "... ..." "..." > +-- <!DOCTYPE Something SYSTEM "... ..." [ ... ] > +-- <!DOCTYPE Something SYSTEM "... ..." > +-- <!DOCTYPE Something [ ... ] > +-- <!DOCTYPE Something > + +-- <!ENTITY xxxx SYSTEM "yyyy" NDATA zzzz> +-- <!ENTITY xxxx PUBLIC "yyyy" > +-- <!ENTITY xxxx "yyyy" > + +local t_docstr = t_dstring + t_sstring + +local t_docent = token("command",P("<!ENTITY")) + * t_optionalwhitespace + * token("keyword",name) + * t_optionalwhitespace + * ( + ( + token("constant",P("SYSTEM")) + * t_optionalwhitespace + * t_docstr + * t_optionalwhitespace + * token("constant",P("NDATA")) + * t_optionalwhitespace + * token("keyword",name) + ) + ( + token("constant",P("PUBLIC")) + * t_optionalwhitespace + * t_docstr + ) + ( + t_docstr + ) + ) + * t_optionalwhitespace + * token("command",P(">")) + +local t_docele = token("command",P("<!ELEMENT")) + * t_optionalwhitespace + * token("keyword",name) + * t_optionalwhitespace + * token("command",P("(")) + * ( + t_localspacing + + token("constant",P("#CDATA") + P("#PCDATA") + P("ANY")) + + token("text",P(",")) + + token("comment",(1-S(",)"))^1) + )^1 + * token("command",P(")")) + * t_optionalwhitespace + * token("command",P(">")) + +local t_docset = token("command",P("[")) + * t_optionalwhitespace + * ((t_optionalwhitespace * (t_docent + t_docele))^1 + token("comment",(1-P("]"))^0)) + * t_optionalwhitespace + * token("command",P("]")) + +local t_doctype = token("command",P("<!DOCTYPE")) + * t_optionalwhitespace + * token("keyword",name) + * t_optionalwhitespace + * ( + ( + token("constant",P("PUBLIC")) + * t_optionalwhitespace + * t_docstr + * t_optionalwhitespace + * t_docstr + * t_optionalwhitespace + ) + ( + token("constant",P("SYSTEM")) + * t_optionalwhitespace + * t_docstr + * t_optionalwhitespace + ) + )^-1 + * t_docset^-1 + * t_optionalwhitespace + * token("command",P(">")) + +lexer.embed_lexer(xmllexer, lualexer, token("command", openlua), token("command", closelua)) +lexer.embed_lexer(xmllexer, xmlcommentlexer, token("command", opencomment), token("command", closecomment)) +lexer.embed_lexer(xmllexer, xmlcdatalexer, token("command", opencdata), token("command", closecdata)) +lexer.embed_lexer(xmllexer, xmlscriptlexer, token("command", openscript), token("command", closescript)) + +-- local t_name = +-- token("plain",name) +-- * ( +-- token("default",colon) +-- * token("keyword",name) +-- ) +-- + token("keyword",name) + +local t_name = -- more robust + token("plain",name * colon)^-1 + * token("keyword",name) + +-- local t_key = +-- token("plain",name) +-- * ( +-- token("default",colon) +-- * token("constant",name) +-- ) +-- + token("constant",name) + +local t_key = + token("plain",name * colon)^-1 + * token("constant",name) + +local t_attributes = ( + t_optionalwhitespace + * t_key + * t_optionalwhitespace + * token("plain",equal) + * t_optionalwhitespace + * (t_dstring + t_sstring) + * t_optionalwhitespace +)^0 + +local t_open = + token("keyword",openbegin) + * ( + t_name + * t_optionalwhitespace + * t_attributes + * token("keyword",closebegin) + + + token("error",(1-closebegin)^1) + ) + +local t_close = + token("keyword",openend) + * ( + t_name + * t_optionalwhitespace + * token("keyword",closeend) + + + token("error",(1-closeend)^1) + ) + +local t_entity = + token("constant",entity) + +local t_instruction = + token("command",openinstruction * P("xml")) + * t_optionalwhitespace + * t_attributes + * t_optionalwhitespace + * token("command",closeinstruction) + + token("command",openinstruction * name) + * token("default",(1-closeinstruction)^1) + * token("command",closeinstruction) + +local t_invisible = + token("invisible",invisibles^1) + +-- local t_preamble = +-- token("preamble", t_preamble ) + +xmllexer._rules = { + { "whitespace", t_spacing }, + { "preamble", t_preamble }, + { "word", t_word }, + -- { "text", t_text }, + -- { "comment", t_comment }, + -- { "cdata", t_cdata }, + { "doctype", t_doctype }, + { "instruction", t_instruction }, + { "close", t_close }, + { "open", t_open }, + { "entity", t_entity }, + { "invisible", t_invisible }, + { "rest", t_rest }, +} + +xmllexer._tokenstyles = context.styleset + +xmllexer._foldpattern = P("</") + P("<") + P("/>") -- separate entry else interference ++ P("<!--") + P("-->") + +xmllexer._foldsymbols = { + _patterns = { + "</", + "/>", + "<", + }, + ["keyword"] = { + ["</"] = -1, + ["/>"] = -1, + ["<"] = 1, + }, + ["command"] = { + ["</"] = -1, + ["/>"] = -1, + ["<!--"] = 1, + ["-->"] = -1, + ["<"] = 1, + }, +} + +return xmllexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer.lua b/context/data/textadept/context/lexers/scite-context-lexer.lua new file mode 100644 index 000000000..e526d5045 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer.lua @@ -0,0 +1,2299 @@ +local info = { + version = 1.400, + comment = "basics for scintilla lpeg lexer for context/metafun", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", + comment = "contains copyrighted code from mitchell.att.foicica.com", + +} + +-- todo: hook into context resolver etc +-- todo: only old api in lexers, rest in context subnamespace +-- todo: make sure we can run in one state .. copies or shared? +-- todo: auto-nesting + +if lpeg.setmaxstack then lpeg.setmaxstack(1000) end + +local log = false +local trace = false +local detail = false +local show = false -- nice for tracing (also for later) +local collapse = false -- can save some 15% (maybe easier on scintilla) +local inspect = false -- can save some 15% (maybe easier on scintilla) + +-- local log = true +-- local trace = true + +-- GET GOING +-- +-- You need to copy this file over lexer.lua. In principle other lexers could work too but +-- not now. Maybe some day. All patterns will move into the patterns name space. I might do +-- the same with styles. If you run an older version of SciTE you can take one of the +-- archives. Pre 3.41 versions can just be copied to the right path, as there we still use +-- part of the normal lexer. +-- +-- REMARK +-- +-- We started using lpeg lexing as soon as it came available. Because we had rather demanding +-- files and also wanted to use nested lexers, we ended up with our own variant. At least at +-- that time this was more robust and also faster (as we have some pretty large lua data files +-- and also work with large xml files). As a consequence successive versions had to be adapted +-- to changes in the (at that time still unstable) api. In addition to lexing we also have +-- spell checking and such. Around version 3.60 things became more stable so I don't expect to +-- change much. +-- +-- STATUS +-- +-- todo: maybe use a special stripped version of the dll (stable api) and add a bit more +-- interfacing to scintilla +-- todo: investigate if we can use the already built in lua instance so that we can combine the +-- power of lexign with extensions +-- todo: play with hotspot and other properties (but no real need now) +-- todo: maybe come up with an extension to the api subsystem +-- todo: add proper tracing and so .. not too hard as we can run on mtxrun, but we lack a console +-- for debugging (ok, chicken-egg as lexers probably need to be loaded before a console can +-- kick in) +-- todo: get rid of these lexers.STYLE_XX and lexers.XX (hide such details) +-- +-- wish: access to all scite properties and in fact integrate in scite +-- +-- +-- In the meantime I made the lexer suitable for typesetting sources which was no big deal as we +-- already had that in place (ConTeXt used lpeg from the day it showed up so we have several lexing +-- options there too). +-- +-- HISTORY +-- +-- The fold and lex functions are copied and patched from original code by Mitchell (see lexer.lua). +-- All errors are mine. The ability to use lpeg in scintilla is a real nice addition and a brilliant +-- move. The code is a byproduct of the (mainly Lua based) textadept (at the time I ran into it was +-- a rapidly moving target so I decided to stick ot SciTE). When I played with it, it had no realtime +-- output pane but that seems to be dealt with now (2017). I need to have a look at it in more detail +-- but a first test again mad the output hang and it was a bit slow too (and I also want the log pane +-- as scite has it, on the right, in view). So, for now I stick to SciTE even when it's somewhat +-- crippled by the fact that we cannot hook our own (language dependent) lexer into the output pane +-- (somehow the errorlist lexer is hard coded into the editor). Hopefully that will change some day. +-- So, how did we arrive where we're now. +-- +-- Starting with SciTE version 3.20 there is an issue with coloring. As we still lack a connection +-- with SciTE itself (properties as well as printing to the log pane) and we cannot trace this (on +-- windows). As far as I can see, there are no fundamental changes in lexer.lua or LexLPeg.cxx so it +-- must be in Scintilla itself. So for the moment I stick to 3.10. Indicators are: no lexing of 'next' +-- and 'goto <label>' in the Lua lexer and no brace highlighting either. Interesting is that it does +-- work ok in the cld lexer (so the Lua code is okay). All seems to be ok again in later versions, +-- so, when you update best check first and just switch back to an older version as normally a SciTE +-- update is not critital. When char-def.lua lexes real fast this is a signal that the lexer quits +-- somewhere halfway. Maybe there are some hard coded limitations on the amount of styles and/or +-- length of names. +-- +-- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay again. So, this +-- version assumes 3.24 or higher. In 3.24 we have a different token result, i.e. no longer a { tag, +-- pattern } but just two return values. I didn't check other changes but will do that when I run into +-- issues. I had optimized these small tables by hashing which was more efficient but this is no longer +-- needed. For the moment we keep some of that code around as I don't know what happens in future +-- versions. I'm anyway still happy with this kind of lexing. +-- +-- In 3.31 another major change took place: some helper constants (maybe they're no longer constants) +-- and functions were moved into the lexer modules namespace but the functions are assigned to the Lua +-- module afterward so we cannot alias them beforehand. We're probably getting close to a stable +-- interface now. I've considered making a whole copy and patch the other functions too as we need an +-- extra nesting model. However, I don't want to maintain too much. An unfortunate change in 3.03 is +-- that no longer a script can be specified. This means that instead of loading the extensions via the +-- properties file, we now need to load them in our own lexers, unless of course we replace lexer.lua +-- completely (which adds another installation issue). +-- +-- Another change has been that _LEXERHOME is no longer available. It looks like more and more +-- functionality gets dropped so maybe at some point we need to ship our own dll/so files. For instance, +-- I'd like to have access to the current filename and other scite properties. We could then cache some +-- info with each file, if only we had knowledge of what file we're dealing with. +-- +-- For huge files folding can be pretty slow and I do have some large ones that I keep open all the time. +-- Loading is normally no ussue, unless one has remembered the status and the cursor is at the last line +-- of a 200K line file. Optimizing the fold function brought down loading of char-def.lua from 14 sec +-- => 8 sec. Replacing the word_match function and optimizing the lex function gained another 2+ seconds. +-- A 6 second load is quite ok for me. The changed lexer table structure (no subtables) brings loading +-- down to a few seconds. +-- +-- When the lexer path is copied to the textadept lexer path, and the theme definition to theme path +-- (as lexer.lua), the lexer works there as well. Although ... when I decided to check the state of +-- textadept i had to adapt some loader code. It's not pretty but works and also permits overloading. +-- When I have time and motive I will make a proper setup file to tune the look and feel a bit and +-- associate suffixes with the context lexer. The textadept editor has a nice style tracing option but +-- lacks the tabs for selecting files that scite has. It also has no integrated run that pipes to the +-- log pane. Interesting is that the jit version of textadept crashes on lexing large files (and does +-- not feel faster either; maybe a side effect of known limitations as we know that luajit is more +-- limited than stock lua). Btw, in the meantime on unix one can test easier as there we can enable +-- the loggers in this module. +-- +-- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name .. '_whitespace' which means +-- that we need to have it frozen at the moment we load another lexer. Because spacing is used to revert +-- to a parent lexer we need to make sure that we load children as late as possible in order not to get +-- the wrong whitespace trigger. This took me quite a while to figure out (not being that familiar with +-- the internals). The lex and fold functions have been optimized. It is a pitty that there is no proper +-- print available. Another thing needed is a default style in our own theme style definition, as otherwise +-- we get wrong nested lexers, especially if they are larger than a view. This is the hardest part of +-- getting things right. +-- +-- It's a pitty that there is no scintillua library for the OSX version of scite. Even better would be +-- to have the scintillua library as integral part of scite as that way I could use OSX alongside +-- windows and linux (depending on needs). Also nice would be to have a proper interface to scite then +-- because currently the lexer is rather isolated and the lua version does not provide all standard +-- libraries. It would also be good to have lpeg support in the regular scite lua extension (currently +-- you need to pick it up from someplace else). +-- +-- With 3.41 the interface changed again so it gets time to look into the C++ code and consider compiling +-- and patching myself. Loading is more complicated now as the lexer gets loaded automatically so we have +-- little control over extending the code now. After a few days trying all kind of solutions I decided to +-- follow a different approach: drop in a complete replacement. This of course means that I need to keep +-- track of even more changes (which for sure will happen) but at least I get rid of interferences. The +-- api (lexing and configuration) is simply too unstable across versions. Maybe in a few years things have +-- stabelized again. (Or maybe it's not really expected that one writes lexers at all.) A side effect is +-- that I now no longer will use shipped lexers but just the built-in ones in addition to the context +-- lpeg lexers. Not that it matters much as the context lexers cover what I need (and I can always write +-- more). +-- +-- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu which left me with an +-- incompatible SciTE and lexer library and updating was not possible due to the lack of 64 bit libraries. +-- We'll see what the future brings. +-- +-- Promissing is that the library now can use another Lua instance so maybe some day it will get properly +-- in SciTE and we can use more clever scripting. +-- +-- In some lexers we use embedded ones even if we could do it directly, The reason is that when the end +-- token is edited (e.g. -->), backtracking to the space before the begin token (e.g. <!--) results in +-- applying the surrounding whitespace which in turn means that when the end token is edited right, +-- backtracking doesn't go back. One solution (in the dll) would be to backtrack several space categories. +-- After all, lexing is quite fast (applying the result is much slower). +-- +-- For some reason the first blob of text tends to go wrong (pdf and web). It would be nice to have 'whole +-- doc' initial lexing. Quite fishy as it makes it impossible to lex the first part well (for already opened +-- documents) because only a partial text is passed. +-- +-- So, maybe I should just write this from scratch (assuming more generic usage) because after all, the dll +-- expects just tables, based on a string. I can then also do some more aggressive resource sharing (needed +-- when used generic). +-- +-- I think that nested lexers are still bugged (esp over longer ranges). It never was robust or maybe it's +-- simply not meant for too complex cases (well, it probably *is* tricky material). The 3.24 version was +-- probably the best so far. The fact that styles bleed between lexers even if their states are isolated is +-- an issue. Another issus is that zero characters in the text passed to the lexer can mess things up (pdf +-- files have them in streams). +-- +-- For more complex 'languages', like web or xml, we need to make sure that we use e.g. 'default' for +-- spacing that makes up some construct. Ok, we then still have a backtracking issue but less. +-- +-- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++ and we ship an ini +-- file for that editor with some installation instructions embedded. +-- +-- TODO +-- +-- I can make an export to context, but first I'll redo the code that makes the grammar, +-- as we only seem to need +-- +-- lexer._TOKENSTYLES : table +-- lexer._CHILDREN : flag +-- lexer._EXTRASTYLES : table +-- lexer._GRAMMAR : flag +-- +-- lexers.load : function +-- lexers.lex : function +-- +-- So, if we drop compatibility with other lex definitions, we can make things simpler. Howeverm in the +-- meantime one can just do this: +-- +-- context --extra=listing --scite [--compact --verycompact] somefile.tex +-- +-- and get a printable document. So, this todo is obsolete. + +-- TRACING +-- +-- The advantage is that we now can check more easily with regular Lua(TeX). We can also use wine and print +-- to the console (somehow stdout is intercepted there.) So, I've added a bit of tracing. Interesting is to +-- notice that each document gets its own instance which has advantages but also means that when we are +-- spellchecking we reload the word lists each time. (In the past I assumed a shared instance and took +-- some precautions.) + +-- todo: make sure we don't overload context definitions when used in context + +-- properties is an ugly mess ... due to chages in the interface we're now left with some hybrid +-- that sort of works ok + +local lpeg = require("lpeg") + +local global = _G +local find, gmatch, match, lower, upper, gsub, sub, format = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format +local concat, sort = table.concat, table.sort +local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring +local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg +local lpegmatch = lpeg.match + +local nesting = 0 + +local print = (textadept and ui and ui.print) or print + +local function report(fmt,str,...) + if log then + if str then + fmt = format(fmt,str,...) + end + print(format("scite lpeg lexer > %s > %s",nesting == 0 and "-" or nesting,fmt)) + end +end + +local function inform(...) + if log and trace then + report(...) + end +end + +inform("loading context lexer module (global table: %s)",tostring(global)) + +if not package.searchpath then + + -- Unfortunately the io library is only available when we end up + -- in this branch of code. + + inform("using adapted function 'package.searchpath' (if used at all)") + + function package.searchpath(name,path) + local tried = { } + for part in gmatch(path,"[^;]+") do + local filename = gsub(part,"%?",name) + local f = io.open(filename,"r") + if f then + inform("file found on path: %s",filename) + f:close() + return filename + end + tried[#tried + 1] = format("no file '%s'",filename) + end + -- added: local path .. for testing + local f = io.open(filename,"r") + if f then + inform("file found on current path: %s",filename) + f:close() + return filename + end + -- + tried[#tried + 1] = format("no file '%s'",filename) + return nil, concat(tried,"\n") + end + +end + +local lexers = { } +local context = { } +local helpers = { } +lexers.context = context +lexers.helpers = helpers + +local patterns = { } +context.patterns = patterns -- todo: lexers.patterns + +context.report = report +context.inform = inform + +lexers.LEXERPATH = package.path -- can be multiple paths separated by ; + +if resolvers then + -- todo: set LEXERPATH + -- todo: set report +end + +local function sortedkeys(hash) -- simple version, good enough for here + local t, n = { }, 0 + for k, v in next, hash do + t[#t+1] = k + local l = #tostring(k) + if l > n then + n = l + end + end + sort(t) + return t, n +end + +helpers.sortedkeys = sortedkeys + +local usedlexers = { } +local parent_lexer = nil + +-- The problem with styles is that there is some nasty interaction with scintilla +-- and each version of lexer dll/so has a different issue. So, from now on we will +-- just add them here. There is also a limit on some 30 styles. Maybe I should +-- hash them in order to reuse. + +-- todo: work with proper hashes and analyze what styles are really used by a +-- lexer + +local default = { + "nothing", "whitespace", "comment", "string", "number", "keyword", + "identifier", "operator", "error", "preprocessor", "constant", "variable", + "function", "type", "label", "embedded", + "quote", "special", "extra", "reserved", "okay", "warning", + "command", "internal", "preamble", "grouping", "primitive", "plain", + "user", + -- not used (yet) .. we cross the 32 boundary so had to patch the initializer, see (1) + "char", "class", "data", "definition", "invisible", "regex", + "standout", "tag", + "text", +} + +local predefined = { + "default", "linenumber", "bracelight", "bracebad", "controlchar", + "indentguide", "calltip" +} + +-- Bah ... ugly ... nicer would be a proper hash .. we now have properties +-- as well as STYLE_* and some connection between them ... why .. ok, we +-- could delay things but who cares. Anyway, at this moment the properties +-- are still unknown. + +local function preparestyles(list) + local reverse = { } + for i=1,#list do + local k = list[i] + local K = upper(k) + local s = "style." .. k + lexers[K] = k -- is this used + lexers["STYLE_"..K] = "$(" .. k .. ")" + reverse[k] = true + end + return reverse +end + +local defaultstyles = preparestyles(default) +local predefinedstyles = preparestyles(predefined) + +-- These helpers are set afterwards so we delay their initialization ... there +-- is no need to alias each time again and this way we can more easily adapt +-- to updates. + +-- These keep changing (values, functions, tables ...) so we nee to check these +-- with each update. Some of them are set in the loader (the require 'lexer' is +-- in fact not a real one as the lexer code is loaded in the dll). It's also not +-- getting more efficient. + +-- FOLD_BASE = lexers.FOLD_BASE or SC_FOLDLEVELBASE +-- FOLD_HEADER = lexers.FOLD_HEADER or SC_FOLDLEVELHEADERFLAG +-- FOLD_BLANK = lexers.FOLD_BLANK or SC_FOLDLEVELWHITEFLAG +-- get_style_at = lexers.get_style_at or GetStyleAt +-- get_indent_amount = lexers.get_indent_amount or GetIndentAmount +-- get_property = lexers.get_property or GetProperty +-- get_fold_level = lexers.get_fold_level or GetFoldLevel + +-- It needs checking: do we have access to all properties now? I'll clean +-- this up anyway as I want a simple clean and stable model. + +-- This is somewhat messy. The lexer dll provides some virtual fields: +-- +-- + property +-- + property_int +-- + style_at +-- + fold_level +-- + indent_amount +-- +-- but for some reasons not: +-- +-- + property_expanded +-- +-- As a consequence we need to define it here because otherwise the +-- lexer will crash. The fuzzy thing is that we don't have to define +-- the property and property_int tables but we do have to define the +-- expanded beforehand. The folding properties are no longer interfaced +-- so the interface to scite is now rather weak (only a few hard coded +-- properties). + +local FOLD_BASE = 0 +local FOLD_HEADER = 0 +local FOLD_BLANK = 0 + +local style_at = { } +local indent_amount = { } +local fold_level = { } + +local function check_main_properties() + if not lexers.property then + lexers.property = { } + end + if not lexers.property_int then + lexers.property_int = setmetatable({ }, { + __index = function(t,k) + -- why the tostring .. it relies on lua casting to a number when + -- doing a comparison + return tonumber(lexers.property[k]) or 0 -- tostring removed + end, + -- __newindex = function(t,k,v) + -- report("properties are read-only, '%s' is not changed",k) + -- end, + }) + end +end + +lexers.property_expanded = setmetatable({ }, { + __index = function(t,k) + -- better be safe for future changes .. what if at some point this is + -- made consistent in the dll ... we need to keep an eye on that + local property = lexers.property + if not property then + check_main_properties() + end + -- + return gsub(property[k],"[$%%]%b()", function(k) + return t[sub(k,3,-2)] + end) + end, + __newindex = function(t,k,v) + report("properties are read-only, '%s' is not changed",k) + end, +}) + +-- A downward compatible feature but obsolete: + +-- local function get_property(tag,default) +-- return lexers.property_int[tag] or lexers.property[tag] or default +-- end + +-- We still want our own properties (as it keeps changing so better play +-- safe from now on). At some point I can freeze them. + +local function check_properties(lexer) + if lexer.properties then + return lexer + end + check_main_properties() + -- we use a proxy + local mainproperties = lexers.property + local properties = { } + local expanded = setmetatable({ }, { + __index = function(t,k) + return gsub(properties[k] or mainproperties[k],"[$%%]%b()", function(k) + return t[sub(k,3,-2)] + end) + end, + }) + lexer.properties = setmetatable(properties, { + __index = mainproperties, + __call = function(t,k,default) -- expands + local v = expanded[k] + local t = type(default) + if t == "number" then + return tonumber(v) or default + elseif t == "boolean" then + return v == nil and default or v + else + return v or default + end + end, + }) + return lexer +end + +-- do +-- lexers.property = { foo = 123, red = "R" } +-- local a = check_properties({}) print("a.foo",a.properties.foo) +-- a.properties.foo = "bar" print("a.foo",a.properties.foo) +-- a.properties.foo = "bar:$(red)" print("a.foo",a.properties.foo) print("a.foo",a.properties("foo")) +-- end + +local function set(value,default) + if value == 0 or value == false or value == "0" then + return false + elseif value == 1 or value == true or value == "1" then + return true + else + return default + end +end + +local function check_context_properties() + local property = lexers.property -- let's hope that this stays + log = set(property["lexer.context.log"], log) + trace = set(property["lexer.context.trace"], trace) + detail = set(property["lexer.context.detail"], detail) + show = set(property["lexer.context.show"], show) + collapse = set(property["lexer.context.collapse"],collapse) + inspect = set(property["lexer.context.inspect"], inspect) +end + +function context.registerproperties(p) -- global + check_main_properties() + local property = lexers.property -- let's hope that this stays + for k, v in next, p do + property[k] = v + end + check_context_properties() +end + +context.properties = setmetatable({ }, { + __index = lexers.property, + __newindex = function(t,k,v) + check_main_properties() + lexers.property[k] = v + check_context_properties() + end, +}) + +-- We want locals to we set them delayed. Once. + +local function initialize() + FOLD_BASE = lexers.FOLD_BASE + FOLD_HEADER = lexers.FOLD_HEADER + FOLD_BLANK = lexers.FOLD_BLANK + -- + style_at = lexers.style_at -- table + indent_amount = lexers.indent_amount -- table + fold_level = lexers.fold_level -- table + -- + check_main_properties() + -- + initialize = nil +end + +-- Style handler. +-- +-- The property table will be set later (after loading) by the library. The +-- styleset is not needed any more as we predefine all styles as defaults +-- anyway (too bug sensitive otherwise). + +local function tocolors(colors) + local colorset = { } + local property_int = lexers.property_int or { } + for k, v in next, colors do + if type(v) == "table" then + local r, g, b = v[1], v[2], v[3] + if r and g and b then + v = tonumber(format("%02X%02X%02X",b,g,r),16) or 0 -- hm + elseif r then + v = tonumber(format("%02X%02X%02X",r,r,r),16) or 0 + else + v = 0 + end + end + colorset[k] = v + property_int["color."..k] = v + end + return colorset +end + +local function toproperty(specification) + local serialized = { } + for key, value in next, specification do + if value == true then + serialized[#serialized+1] = key + elseif type(value) == "table" then + local r, g, b = value[1], value[2], value[3] + if r and g and b then + value = format("#%02X%02X%02X",r,g,b) or "#000000" + elseif r then + value = format("#%02X%02X%02X",r,r,r) or "#000000" + else + value = "#000000" + end + serialized[#serialized+1] = key .. ":" .. value + else + serialized[#serialized+1] = key .. ":" .. tostring(value) + end + end + return concat(serialized,",") +end + +local function tostyles(styles) + local styleset = { } + local property = lexers.property or { } + for k, v in next, styles do + v = toproperty(v) + styleset[k] = v + property["style."..k] = v + end + return styleset +end + +context.toproperty = toproperty +context.tostyles = tostyles +context.tocolors = tocolors + +-- If we had one instance/state of Lua as well as all regular libraries +-- preloaded we could use the context base libraries. So, let's go poor- +-- mans solution now. + +function context.registerstyles(styles) + local styleset = tostyles(styles) + context.styles = styles + context.styleset = styleset + if detail then + local t, n = sortedkeys(styleset) + local template = " %-" .. n .. "s : %s" + report("initializing styleset:") + for i=1,#t do + local k = t[i] + report(template,k,styleset[k]) + end + elseif trace then + report("initializing styleset") + end +end + +function context.registercolors(colors) -- needed for textadept + local colorset = tocolors(colors) + context.colors = colors + context.colorset = colorset + if detail then + local t, n = sortedkeys(colorset) + local template = " %-" .. n .. "s : %i" + report("initializing colorset:") + for i=1,#t do + local k = t[i] + report(template,k,colorset[k]) + end + elseif trace then + report("initializing colorset") + end +end + +-- Some spell checking related stuff. Unfortunately we cannot use a path set +-- by property. This will get a hook for resolvers. + +local locations = { + "context/lexers", -- context lexers + "context/lexers/data", -- context lexers + "../lexers", -- original lexers + "../lexers/data", -- original lexers + ".", -- whatever + "./data", -- whatever +} + +-- local function collect(name) +-- local root = gsub(lexers.LEXERPATH or ".","/.-lua$","") .. "/" -- this is a horrible hack +-- -- report("module '%s' locating '%s'",tostring(lexers),name) +-- for i=1,#locations do +-- local fullname = root .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc +-- if trace then +-- report("attempt to locate '%s'",fullname) +-- end +-- local okay, result = pcall(function () return dofile(fullname) end) +-- if okay then +-- return result, fullname +-- end +-- end +-- end + +local function collect(name) + local rootlist = lexers.LEXERPATH or "." + for root in gmatch(rootlist,"[^;]+") do + local root = gsub(root,"/[^/]-lua$","") + for i=1,#locations do + local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc + if trace then + report("attempt to locate '%s'",fullname) + end + local okay, result = pcall(function () return dofile(fullname) end) + if okay then + return result, fullname + end + end + end +end + +function context.loadluafile(name) + local data, fullname = collect(name) + if data then + if trace then + report("lua file '%s' has been loaded",fullname) + end + return data, fullname + end + if not textadept then + report("unable to load lua file '%s'",name) + end +end + +-- in fact we could share more as we probably process the data but then we need +-- to have a more advanced helper + +local cache = { } + +function context.loaddefinitions(name) + local data = cache[name] + if data then + if trace then + report("reusing definitions '%s'",name) + end + return data + elseif trace and data == false then + report("definitions '%s' were not found",name) + end + local data, fullname = collect(name) + if not data then + if not textadept then + report("unable to load definition file '%s'",name) + end + data = false + elseif trace then + report("definition file '%s' has been loaded",fullname) + if detail then + local t, n = sortedkeys(data) + local template = " %-" .. n .. "s : %s" + for i=1,#t do + local k = t[i] + local v = data[k] + if type(v) ~= "table" then + report(template,k,tostring(v)) + elseif #v > 0 then + report(template,k,#v) + else + -- no need to show hash + end + end + end + end + cache[name] = data + return type(data) == "table" and data +end + +function context.word_match(words,word_chars,case_insensitive) + local chars = "%w_" -- maybe just "" when word_chars + if word_chars then + chars = "^([" .. chars .. gsub(word_chars,"([%^%]%-])", "%%%1") .."]+)" + else + chars = "^([" .. chars .."]+)" + end + if case_insensitive then + local word_list = { } + for i=1,#words do + word_list[lower(words[i])] = true + end + return P(function(input, index) + local s, e, word = find(input,chars,index) + return word and word_list[lower(word)] and e + 1 or nil + end) + else + local word_list = { } + for i=1,#words do + word_list[words[i]] = true + end + return P(function(input, index) + local s, e, word = find(input,chars,index) + return word and word_list[word] and e + 1 or nil + end) + end +end + +-- Patterns are grouped in a separate namespace but the regular lexers expect +-- shortcuts to be present in the lexers library. Maybe I'll incorporate some +-- of l-lpeg later. + +do + + local anything = P(1) + local idtoken = R("az","AZ","\127\255","__") + local digit = R("09") + local sign = S("+-") + local period = P(".") + local octdigit = R("07") + local hexdigit = R("09","AF","af") + local lower = R("az") + local upper = R("AZ") + local alpha = upper + lower + local space = S(" \n\r\t\f\v") + local eol = S("\r\n") + local backslash = P("\\") + local decimal = digit^1 + local octal = P("0") + * octdigit^1 + local hexadecimal = P("0") * S("xX") + * (hexdigit^0 * period * hexdigit^1 + hexdigit^1 * period * hexdigit^0 + hexdigit^1) + * (S("pP") * sign^-1 * hexdigit^1)^-1 -- * + + patterns.idtoken = idtoken + patterns.digit = digit + patterns.sign = sign + patterns.period = period + patterns.octdigit = octdigit + patterns.hexdigit = hexdigit + patterns.ascii = R("\000\127") -- useless + patterns.extend = R("\000\255") -- useless + patterns.control = R("\000\031") + patterns.lower = lower + patterns.upper = upper + patterns.alpha = alpha + patterns.decimal = decimal + patterns.octal = octal + patterns.hexadecimal = hexadecimal + patterns.float = sign^-1 + * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1) + * S("eE") * sign^-1 * digit^1 -- * + patterns.cardinal = decimal + + patterns.signeddecimal = sign^-1 * decimal + patterns.signedoctal = sign^-1 * octal + patterns.signedhexadecimal = sign^-1 * hexadecimal + patterns.integer = sign^-1 * (hexadecimal + octal + decimal) + patterns.real = + sign^-1 * ( -- at most one + digit^1 * period * digit^0 -- 10.0 10. + + digit^0 * period * digit^1 -- 0.10 .10 + + digit^1 -- 10 + ) + + patterns.anything = anything + patterns.any = anything + patterns.restofline = (1-eol)^1 + patterns.space = space + patterns.spacing = space^1 + patterns.nospacing = (1-space)^1 + patterns.eol = eol + patterns.newline = P("\r\n") + eol + + local endof = S("\n\r\f") + + patterns.startofline = P(function(input,index) + return (index == 1 or lpegmatch(endof,input,index-1)) and index + end) + + -- These are the expected ones for other lexers. Maybe all in own namespace + -- and provide compatibility layer. or should I just remove them? + + lexers.any = anything + lexers.ascii = ascii + lexers.extend = extend + lexers.alpha = alpha + lexers.digit = digit + lexers.alnum = alnum + lexers.lower = lower + lexers.upper = upper + lexers.xdigit = hexdigit + lexers.cntrl = control + lexers.graph = R("!~") + lexers.print = R(" ~") + lexers.punct = R("!/", ":@", "[\'", "{~") + lexers.space = space + lexers.newline = S("\r\n\f")^1 + lexers.nonnewline = 1 - lexers.newline + lexers.nonnewline_esc = 1 - (lexers.newline + '\\') + backslash * anything + lexers.dec_num = decimal + lexers.oct_num = octal + lexers.hex_num = hexadecimal + lexers.integer = integer + lexers.float = float + lexers.word = (alpha + "_") * (alpha + digit + "_")^0 -- weird, why digits + +end + +-- end of patterns + +function context.exact_match(words,word_chars,case_insensitive) + local characters = concat(words) + local pattern -- the concat catches _ etc + if word_chars == true or word_chars == false or word_chars == nil then + word_chars = "" + end + if type(word_chars) == "string" then + pattern = S(characters) + patterns.idtoken + if case_insensitive then + pattern = pattern + S(upper(characters)) + S(lower(characters)) + end + if word_chars ~= "" then + pattern = pattern + S(word_chars) + end + elseif word_chars then + pattern = word_chars + end + if case_insensitive then + local list = { } + if #words == 0 then + for k, v in next, words do + list[lower(k)] = v + end + else + for i=1,#words do + list[lower(words[i])] = true + end + end + return Cmt(pattern^1, function(_,i,s) + return list[lower(s)] -- and i or nil + end) + else + local list = { } + if #words == 0 then + for k, v in next, words do + list[k] = v + end + else + for i=1,#words do + list[words[i]] = true + end + end + return Cmt(pattern^1, function(_,i,s) + return list[s] -- and i or nil + end) + end +end + +function context.just_match(words) + local p = P(words[1]) + for i=2,#words do + p = p + P(words[i]) + end + return p +end + +-- spell checking (we can only load lua files) +-- +-- return { +-- min = 3, +-- max = 40, +-- n = 12345, +-- words = { +-- ["someword"] = "someword", +-- ["anotherword"] = "Anotherword", +-- }, +-- } + +local lists = { } + +function context.setwordlist(tag,limit) -- returns hash (lowercase keys and original values) + if not tag or tag == "" then + return false, 3 + end + local list = lists[tag] + if not list then + list = context.loaddefinitions("spell-" .. tag) + if not list or type(list) ~= "table" then + if not textadept then + report("invalid spell checking list for '%s'",tag) + end + list = { words = false, min = 3 } + else + list.words = list.words or false + list.min = list.min or 3 + end + lists[tag] = list + end + if trace then + report("enabling spell checking for '%s' with minimum '%s'",tag,list.min) + end + return list.words, list.min +end + +patterns.wordtoken = R("az","AZ","\127\255") +patterns.wordpattern = patterns.wordtoken^3 -- todo: if limit and #s < limit then + +function context.checkedword(validwords,validminimum,s,i) -- ,limit + if not validwords then -- or #s < validminimum then + return true, "text", i -- true, "default", i + else + -- keys are lower + local word = validwords[s] + if word == s then + return true, "okay", i -- exact match + elseif word then + return true, "warning", i -- case issue + else + local word = validwords[lower(s)] + if word == s then + return true, "okay", i -- exact match + elseif word then + return true, "warning", i -- case issue + elseif upper(s) == s then + return true, "warning", i -- probably a logo or acronym + else + return true, "error", i + end + end + end +end + +function context.styleofword(validwords,validminimum,s) -- ,limit + if not validwords or #s < validminimum then + return "text" + else + -- keys are lower + local word = validwords[s] + if word == s then + return "okay" -- exact match + elseif word then + return "warning" -- case issue + else + local word = validwords[lower(s)] + if word == s then + return "okay" -- exact match + elseif word then + return "warning" -- case issue + elseif upper(s) == s then + return "warning" -- probably a logo or acronym + else + return "error" + end + end + end +end + +-- overloaded functions + +local h_table, b_table, n_table = { }, { }, { } -- from the time small tables were used (optimization) + +setmetatable(h_table, { __index = function(t,level) local v = { level, FOLD_HEADER } t[level] = v return v end }) +setmetatable(b_table, { __index = function(t,level) local v = { level, FOLD_BLANK } t[level] = v return v end }) +setmetatable(n_table, { __index = function(t,level) local v = { level } t[level] = v return v end }) + +local newline = patterns.newline +local p_yes = Cp() * Cs((1-newline)^1) * newline^-1 +local p_nop = newline + +local folders = { } + +local function fold_by_parsing(text,start_pos,start_line,start_level,lexer) + local folder = folders[lexer] + if not folder then + -- + local pattern, folds, text, start_pos, line_num, prev_level, current_level + -- + local fold_symbols = lexer._foldsymbols + local fold_pattern = lexer._foldpattern -- use lpeg instead (context extension) + -- + if fold_pattern then + -- if no functions are found then we could have a faster one + fold_pattern = Cp() * C(fold_pattern) / function(s,match) + local symbols = fold_symbols[style_at[start_pos + s]] + if symbols then + local l = symbols[match] + if l then + current_level = current_level + l + end + end + end + local action_y = function() + folds[line_num] = prev_level + if current_level > prev_level then + folds[line_num] = prev_level + FOLD_HEADER + end + if current_level < FOLD_BASE then + current_level = FOLD_BASE + end + prev_level = current_level + line_num = line_num + 1 + end + local action_n = function() + folds[line_num] = prev_level + FOLD_BLANK + line_num = line_num + 1 + end + pattern = ((fold_pattern + (1-newline))^1 * newline / action_y + newline/action_n)^0 + + else + -- the traditional one but a bit optimized + local fold_symbols_patterns = fold_symbols._patterns + local action_y = function(pos,line) + for j = 1, #fold_symbols_patterns do + for s, match in gmatch(line,fold_symbols_patterns[j]) do -- "()(" .. patterns[i] .. ")" + local symbols = fold_symbols[style_at[start_pos + pos + s - 1]] + local l = symbols and symbols[match] + local t = type(l) + if t == "number" then + current_level = current_level + l + elseif t == "function" then + current_level = current_level + l(text, pos, line, s, match) + end + end + end + folds[line_num] = prev_level + if current_level > prev_level then + folds[line_num] = prev_level + FOLD_HEADER + end + if current_level < FOLD_BASE then + current_level = FOLD_BASE + end + prev_level = current_level + line_num = line_num + 1 + end + local action_n = function() + folds[line_num] = prev_level + FOLD_BLANK + line_num = line_num + 1 + end + pattern = (p_yes/action_y + p_nop/action_n)^0 + end + -- + local reset_parser = lexer._reset_parser + -- + folder = function(_text_,_start_pos_,_start_line_,_start_level_) + if reset_parser then + reset_parser() + end + folds = { } + text = _text_ + start_pos = _start_pos_ + line_num = _start_line_ + prev_level = _start_level_ + current_level = prev_level + lpegmatch(pattern,text) + -- make folds collectable + local t = folds + folds = nil + return t + end + folders[lexer] = folder + end + return folder(text,start_pos,start_line,start_level,lexer) +end + +local folds, current_line, prev_level + +local function action_y() + local current_level = FOLD_BASE + indent_amount[current_line] + if current_level > prev_level then -- next level + local i = current_line - 1 + local f + while true do + f = folds[i] + if not f then + break + elseif f[2] == FOLD_BLANK then + i = i - 1 + else + f[2] = FOLD_HEADER -- low indent + break + end + end + folds[current_line] = { current_level } -- high indent + elseif current_level < prev_level then -- prev level + local f = folds[current_line - 1] + if f then + f[1] = prev_level -- high indent + end + folds[current_line] = { current_level } -- low indent + else -- same level + folds[current_line] = { prev_level } + end + prev_level = current_level + current_line = current_line + 1 +end + +local function action_n() + folds[current_line] = { prev_level, FOLD_BLANK } + current_line = current_line + 1 +end + +local pattern = ( S("\t ")^0 * ( (1-patterns.eol)^1 / action_y + P(true) / action_n) * newline )^0 + +local function fold_by_indentation(text,start_pos,start_line,start_level) + -- initialize + folds = { } + current_line = start_line + prev_level = start_level + -- define + -- -- not here .. pattern binds and local functions are not frozen + -- analyze + lpegmatch(pattern,text) + -- flatten + for line, level in next, folds do + folds[line] = level[1] + (level[2] or 0) + end + -- done, make folds collectable + local t = folds + folds = nil + return t +end + +local function fold_by_line(text,start_pos,start_line,start_level) + local folds = { } + -- can also be lpeg'd + for _ in gmatch(text,".-\r?\n") do + folds[start_line] = n_table[start_level] -- { start_level } -- stile tables ? needs checking + start_line = start_line + 1 + end + return folds +end + +local threshold_by_lexer = 512 * 1024 -- we don't know the filesize yet +local threshold_by_parsing = 512 * 1024 -- we don't know the filesize yet +local threshold_by_indentation = 512 * 1024 -- we don't know the filesize yet +local threshold_by_line = 512 * 1024 -- we don't know the filesize yet + +function context.fold(lexer,text,start_pos,start_line,start_level) -- hm, we had size thresholds .. where did they go + if text == "" then + return { } + end + if initialize then + initialize() + end + local fold_by_lexer = lexer._fold + local fold_by_symbols = lexer._foldsymbols + local filesize = 0 -- we don't know that + if fold_by_lexer then + if filesize <= threshold_by_lexer then + return fold_by_lexer(text,start_pos,start_line,start_level,lexer) + end + elseif fold_by_symbols then -- and lexer.properties("fold.by.parsing",1) > 0 then + if filesize <= threshold_by_parsing then + return fold_by_parsing(text,start_pos,start_line,start_level,lexer) + end + elseif lexer.properties("fold.by.indentation",1) > 0 then + if filesize <= threshold_by_indentation then + return fold_by_indentation(text,start_pos,start_line,start_level,lexer) + end + elseif lexer.properties("fold.by.line",1) > 0 then + if filesize <= threshold_by_line then + return fold_by_line(text,start_pos,start_line,start_level,lexer) + end + end + return { } +end + +-- The following code is mostly unchanged: + +local function add_rule(lexer,id,rule) -- unchanged + if not lexer._RULES then + lexer._RULES = { } + lexer._RULEORDER = { } + end + lexer._RULES[id] = rule + lexer._RULEORDER[#lexer._RULEORDER + 1] = id +end + +-- I finally figured out that adding more styles was an issue because of several +-- reasons: +-- +-- + in old versions there was a limit in the amount, so we overran the built-in +-- hard coded scintilla range +-- + then, the add_style function didn't check for already known ones, so again +-- we had an overrun (with some magic that could be avoided) +-- + then, when I messed with a new default set I realized that there is no check +-- in initializing _TOKENSTYLES (here the inspect function helps) +-- + of course it was mostly a side effect of passing all the used styles to the +-- _tokenstyles instead of only the not-default ones but such a thing should not +-- matter (read: intercepted) +-- +-- This finally removed a head-ache and was revealed by lots of tracing, which I +-- should have built in way earlier. + +local function add_style(lexer,token_name,style) -- changed a bit around 3.41 + -- We don't add styles that are already defined as this can overflow the + -- amount possible (in old versions of scintilla). + if defaultstyles[token_name] then + if trace and detail then + report("default style '%s' is ignored as extra style",token_name) + end + return + elseif predefinedstyles[token_name] then + if trace and detail then + report("predefined style '%s' is ignored as extra style",token_name) + end + return + else + if trace and detail then + report("adding extra style '%s' as '%s'",token_name,style) + end + end + -- This is unchanged. We skip the dangerous zone. + local num_styles = lexer._numstyles + if num_styles == 32 then + num_styles = num_styles + 8 + end + if num_styles >= 255 then + report("there can't be more than %s styles",255) + end + lexer._TOKENSTYLES[token_name] = num_styles + lexer._EXTRASTYLES[token_name] = style + lexer._numstyles = num_styles + 1 +end + +local function check_styles(lexer) + -- Here we also use a check for the dangerous zone. That way we can have a + -- larger default set. The original code just assumes that #default is less + -- than the dangerous zone's start. + local numstyles = 0 + local tokenstyles = { } + for i=1, #default do + if numstyles == 32 then + numstyles = numstyles + 8 + end + tokenstyles[default[i]] = numstyles + numstyles = numstyles + 1 + end + -- Unchanged. + for i=1, #predefined do + tokenstyles[predefined[i]] = i + 31 + end + lexer._TOKENSTYLES = tokenstyles + lexer._numstyles = numstyles + lexer._EXTRASTYLES = { } + return lexer +end + +-- At some point an 'any' append showed up in the original code ... +-- but I see no need to catch that case ... beter fix the specification. +-- +-- hm, why are many joined twice + +local function join_tokens(lexer) -- slightly different from the original (no 'any' append) + local patterns = lexer._RULES + local order = lexer._RULEORDER + -- report("lexer: %s, tokens: %s",lexer._NAME,table.concat(order," + ")) + if patterns and order then + local token_rule = patterns[order[1]] -- normally whitespace + for i=2,#order do + token_rule = token_rule + patterns[order[i]] + end + if lexer._TYPE ~= "context" then + token_rule = token_rule + lexers.token(lexers.DEFAULT, patterns.any) + end + lexer._TOKENRULE = token_rule + return token_rule + else + return P(1) + end +end + +local function add_lexer(grammar, lexer) -- mostly the same as the original + local token_rule = join_tokens(lexer) + local lexer_name = lexer._NAME + local children = lexer._CHILDREN + for i=1,#children do + local child = children[i] + if child._CHILDREN then + add_lexer(grammar, child) + end + local child_name = child._NAME + local rules = child._EMBEDDEDRULES[lexer_name] + local rules_token_rule = grammar["__" .. child_name] or rules.token_rule + local pattern = (-rules.end_rule * rules_token_rule)^0 * rules.end_rule^-1 + grammar[child_name] = pattern * V(lexer_name) + local embedded_child = "_" .. child_name + grammar[embedded_child] = rules.start_rule * pattern + token_rule = V(embedded_child) + token_rule + end + if trace then + report("adding lexer '%s' with %s children",lexer_name,#children) + end + grammar["__" .. lexer_name] = token_rule + grammar[lexer_name] = token_rule^0 +end + +local function build_grammar(lexer,initial_rule) -- same as the original + local children = lexer._CHILDREN + local lexer_name = lexer._NAME + if children then + if not initial_rule then + initial_rule = lexer_name + end + local grammar = { initial_rule } + add_lexer(grammar, lexer) + lexer._INITIALRULE = initial_rule + lexer._GRAMMAR = Ct(P(grammar)) + if trace then + report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children) + end + else + lexer._GRAMMAR = Ct(join_tokens(lexer)^0) + if trace then + report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?") + end + end +end + +-- So far. We need these local functions in the next one. + +local lineparsers = { } + +local maxmatched = 100 + +local function collapsed(t) + local lasttoken = nil + local lastindex = nil + for i=1,#t,2 do + local token = t[i] + local position = t[i+1] + if token == lasttoken then + t[lastindex] = position + elseif lastindex then + lastindex = lastindex + 1 + t[lastindex] = token + lastindex = lastindex + 1 + t[lastindex] = position + lasttoken = token + else + lastindex = i+1 + lasttoken = token + end + end + for i=#t,lastindex+1,-1 do + t[i] = nil + end + return t +end + +local function matched(lexer,grammar,text) + -- text = string.gsub(text,"\z","!") + local t = lpegmatch(grammar,text) + if trace then + if show then + report("output of lexer: %s (max %s entries)",lexer._NAME,maxmatched) + local s = lexer._TOKENSTYLES + local p = 1 + for i=1,2*maxmatched,2 do + local n = i + 1 + local ti = t[i] + local tn = t[n] + if ti then + local txt = sub(text,p,tn-1) + if txt then + txt = gsub(txt,"[%s]"," ") + else + txt = "!no text!" + end + report("%4i : %s > %s (%s) (%s)",n/2,ti,tn,s[ti] or "!unset!",txt) + p = tn + else + break + end + end + end + report("lexer results: %s, length: %s, ranges: %s",lexer._NAME,#text,#t/2) + if collapse then + t = collapsed(t) + report("lexer collapsed: %s, length: %s, ranges: %s",lexer._NAME,#text,#t/2) + end + elseif collapse then + t = collapsed(t) + end + return t +end + +-- Todo: make nice generic lexer (extra argument with start/stop commands) for +-- context itself. + +function context.lex(lexer,text,init_style) + -- local lexer = global._LEXER + local grammar = lexer._GRAMMAR + if initialize then + initialize() + end + if not grammar then + return { } + elseif lexer._LEXBYLINE then -- we could keep token + local tokens = { } + local offset = 0 + local noftokens = 0 + local lineparser = lineparsers[lexer] + if not lineparser then -- probably a cmt is more efficient + lineparser = C((1-newline)^0 * newline) / function(line) + local length = #line + local line_tokens = length > 0 and lpegmatch(grammar,line) + if line_tokens then + for i=1,#line_tokens,2 do + noftokens = noftokens + 1 + tokens[noftokens] = line_tokens[i] + noftokens = noftokens + 1 + tokens[noftokens] = line_tokens[i + 1] + offset + end + end + offset = offset + length + if noftokens > 0 and tokens[noftokens] ~= offset then + noftokens = noftokens + 1 + tokens[noftokens] = "default" + noftokens = noftokens + 1 + tokens[noftokens] = offset + 1 + end + end + lineparser = lineparser^0 + lineparsers[lexer] = lineparser + end + lpegmatch(lineparser,text) + return tokens + elseif lexer._CHILDREN then + local hash = lexer._HASH -- hm, was _hash + if not hash then + hash = { } + lexer._HASH = hash + end + grammar = hash[init_style] + if grammar then + lexer._GRAMMAR = grammar + -- lexer._GRAMMAR = lexer._GRAMMAR or grammar + else + for style, style_num in next, lexer._TOKENSTYLES do + if style_num == init_style then + -- the name of the lexers is filtered from the whitespace + -- specification .. weird code, should be a reverse hash + local lexer_name = match(style,"^(.+)_whitespace") or lexer._NAME + if lexer._INITIALRULE ~= lexer_name then + grammar = hash[lexer_name] + if not grammar then + build_grammar(lexer,lexer_name) + grammar = lexer._GRAMMAR + hash[lexer_name] = grammar + end + end + break + end + end + grammar = grammar or lexer._GRAMMAR + hash[init_style] = grammar + end + if trace then + report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style) + end + return matched(lexer,grammar,text) + else + if trace then + report("lexing '%s' with initial style '%s'",lexer._NAME,init_style) + end + return matched(lexer,grammar,text) + end +end + +-- hm, changed in 3.24 .. no longer small table but one table: + +function context.token(name, patt) + return patt * Cc(name) * Cp() +end + +-- The next ones were mostly unchanged (till now), we moved it here when 3.41 +-- became close to impossible to combine with cq. overload and a merge was +-- the only solution. It makes later updates more painful but the update to +-- 3.41 was already a bit of a nightmare anyway. + +-- Loading lexers is rather interwoven with what the dll/so sets and +-- it changes over time. So, we need to keep an eye on changes. One +-- problem that we always faced were the limitations in length of +-- lexer names (as they get app/prepended occasionally to strings with +-- a hard coded limit). So, we always used alternative names and now need +-- to make sure this doesn't clash. As I no longer intend to use shipped +-- lexers I could strip away some of the code in the future, but keeping +-- it as reference makes sense. + +-- I spend quite some time figuring out why 3.41 didn't work or crashed which +-- is hard when no stdout is available and when the io library is absent. In +-- the end of of the problems was in the _NAME setting. We set _NAME +-- to e.g. 'tex' but load from a file with a longer name, which we do +-- as we don't want to clash with existing files, we end up in +-- lexers not being found. + +local whitespaces = { } + +local function push_whitespace(name) + table.insert(whitespaces,lexers.WHITESPACE or "whitespace") + lexers.WHITESPACE = name .. "_whitespace" +end + +local function pop_whitespace() + lexers.WHITESPACE = table.remove(whitespaces) or "whitespace" +end + +local function check_whitespace(lexer,name) + if lexer then + lexer.whitespace = (name or lexer.name or lexer._NAME) .. "_whitespace" + end +end + +function context.new(name,filename) + local lexer = { + _TYPE = "context", + -- + _NAME = name, -- used for token building + _FILENAME = filename, -- for diagnostic purposed + -- + name = name, + filename = filename, + } + if trace then + report("initializing lexer tagged '%s' from file '%s'",name,filename or name) + end + check_whitespace(lexer) + check_styles(lexer) + check_properties(lexer) + return lexer +end + +local function nolexer(name) + local lexer = { + _TYPE = "unset", + _NAME = name, + -- _rules = { }, + } + check_styles(lexer) + check_whitespace(lexer) + check_properties(lexer) + return lexer +end + +local function load_lexer(name,namespace) + if trace then + report("loading lexer file '%s'",name) + end + push_whitespace(namespace or name) -- for traditional lexers .. no alt_name yet + local lexer, fullname = context.loadluafile(name) + pop_whitespace() + if not lexer then + report("invalid lexer file '%s'",name) + elseif trace then + report("lexer file '%s' has been loaded",fullname) + end + if type(lexer) ~= "table" then + if trace then + report("lexer file '%s' gets a dummy lexer",name) + end + return nolexer(name) + end + if lexer._TYPE ~= "context" then + lexer._TYPE = "native" + check_styles(lexer) + check_whitespace(lexer,namespace or name) + check_properties(lexer) + end + if not lexer._NAME then + lexer._NAME = name -- so: filename + end + if name ~= namespace then + lexer._NAME = namespace + end + return lexer +end + +-- tracing ... + +local function inspect_lexer(lexer,level) + -- If we had the regular libs available I could use the usual + -- helpers. + local parent = lexer._lexer + lexer._lexer = nil -- prevent endless recursion + local name = lexer._NAME + local function showstyles_1(tag,styles) + local numbers = { } + for k, v in next, styles do + numbers[v] = k + end + -- sort by number and make number hash too + local keys = sortedkeys(numbers) + for i=1,#keys do + local k = keys[i] + local v = numbers[k] + report("[%s %s] %s %s = %s",level,name,tag,k,v) + end + end + local function showstyles_2(tag,styles) + local keys = sortedkeys(styles) + for i=1,#keys do + local k = keys[i] + local v = styles[k] + report("[%s %s] %s %s = %s",level,name,tag,k,v) + end + end + local keys = sortedkeys(lexer) + for i=1,#keys do + local k = keys[i] + local v = lexer[k] + report("[%s %s] root key : %s = %s",level,name,k,tostring(v)) + end + showstyles_1("token style",lexer._TOKENSTYLES) + showstyles_2("extra style",lexer._EXTRASTYLES) + local children = lexer._CHILDREN + if children then + for i=1,#children do + inspect_lexer(children[i],level+1) + end + end + lexer._lexer = parent +end + +function context.inspect(lexer) + inspect_lexer(lexer,0) +end + +-- An optional second argument has been introduced so that one can embed a lexer +-- more than once ... maybe something to look into (as not it's done by remembering +-- the start sequence ... quite okay but maybe suboptimal ... anyway, never change +-- a working solution). + +-- namespace can be automatic: if parent then use name of parent (chain) + +function context.loadlexer(filename,namespace) + nesting = nesting + 1 + if not namespace then + namespace = filename + end + local lexer = usedlexers[namespace] -- we load by filename but the internal name can be short + if lexer then + if trace then + report("reusing lexer '%s'",namespace) + end + nesting = nesting - 1 + return lexer + elseif trace then + report("loading lexer '%s'",namespace) + end + -- + if initialize then + initialize() + end + -- + parent_lexer = nil + -- + lexer = load_lexer(filename,namespace) or nolexer(filename,namespace) + usedlexers[filename] = lexer + -- + if not lexer._rules and not lexer._lexer then + lexer._lexer = parent_lexer + end + -- + if lexer._lexer then + local _l = lexer._lexer + local _r = lexer._rules + local _s = lexer._tokenstyles + if not _l._tokenstyles then + _l._tokenstyles = { } + end + if _r then + local rules = _l._rules + local name = lexer.name + for i=1,#_r do + local rule = _r[i] + rules[#rules + 1] = { + name .. "_" .. rule[1], + rule[2], + } + end + end + if _s then + local tokenstyles = _l._tokenstyles + for token, style in next, _s do + tokenstyles[token] = style + end + end + lexer = _l + end + -- + local _r = lexer._rules + if _r then + local _s = lexer._tokenstyles + if _s then + for token, style in next, _s do + add_style(lexer, token, style) + end + end + for i=1,#_r do + local rule = _r[i] + add_rule(lexer, rule[1], rule[2]) + end + build_grammar(lexer) + end + -- + add_style(lexer, lexer.whitespace, lexers.STYLE_WHITESPACE) + -- + local foldsymbols = lexer._foldsymbols + if foldsymbols then + local patterns = foldsymbols._patterns + if patterns then + for i = 1, #patterns do + patterns[i] = "()(" .. patterns[i] .. ")" + end + end + end + -- + lexer.lex = lexers.lex + lexer.fold = lexers.fold + -- + nesting = nesting - 1 + -- + if inspect then + context.inspect(lexer) + end + -- + return lexer +end + +-- I probably need to check this occasionally with the original as I've messed around a bit +-- in the past to get nesting working well as one can hit the max number of styles, get +-- clashes due to fuzzy inheritance etc. so there is some interplay with the other patched +-- code. + +function context.embed_lexer(parent, child, start_rule, end_rule) -- mostly the same as the original + local embeddedrules = child._EMBEDDEDRULES + if not embeddedrules then + embeddedrules = { } + child._EMBEDDEDRULES = embeddedrules + end + if not child._RULES then + local rules = child._rules + if not rules then + report("child lexer '%s' has no rules",child._NAME or "unknown") + rules = { } + child._rules = rules + end + for i=1,#rules do + local rule = rules[i] + add_rule(child, rule[1], rule[2]) + end + end + embeddedrules[parent._NAME] = { + ["start_rule"] = start_rule, + ["token_rule"] = join_tokens(child), + ["end_rule"] = end_rule + } + local children = parent._CHILDREN + if not children then + children = { } + parent._CHILDREN = children + end + children[#children + 1] = child + local tokenstyles = parent._tokenstyles + if not tokenstyles then + tokenstyles = { } + parent._tokenstyles = tokenstyles + end + local childname = child._NAME + local whitespace = childname .. "_whitespace" + tokenstyles[whitespace] = lexers.STYLE_WHITESPACE -- all these STYLE_THINGS will go .. just a proper hash + if trace then + report("using whitespace '%s' as trigger for '%s' with property '%s'",whitespace,childname,lexers.STYLE_WHITESPACE) + end + local childstyles = child._tokenstyles + if childstyles then + for token, style in next, childstyles do + tokenstyles[token] = style + end + end + -- new, a bit redone, untested, no clue yet what it is for + local parentsymbols = parent._foldsymbols + local childsymbols = child ._foldsymbols + if not parentsymbols then + parentsymbols = { } + parent._foldsymbols = parentsymbols + end + if childsymbols then + for token, symbols in next, childsymbols do + local tokensymbols = parentsymbols[token] + if not tokensymbols then + tokensymbols = { } + parentsymbols[token] = tokensymbols + end + for k, v in next, symbols do + if type(k) == 'number' then + tokensymbols[#tokensymbols + 1] = v + elseif not tokensymbols[k] then + tokensymbols[k] = v + end + end + end + end + -- + child._lexer = parent + parent_lexer = parent +end + +-- we now move the adapted code to the lexers namespace + +lexers.new = context.new +lexers.load = context.loadlexer +------.loadlexer = context.loadlexer +lexers.loadluafile = context.loadluafile +lexers.embed_lexer = context.embed_lexer +lexers.fold = context.fold +lexers.lex = context.lex +lexers.token = context.token +lexers.word_match = context.word_match +lexers.exact_match = context.exact_match +lexers.just_match = context.just_match +lexers.inspect = context.inspect +lexers.report = context.report +lexers.inform = context.inform + +-- helper .. alas ... the lexer's lua instance is rather crippled .. not even +-- math is part of it + +do + + local floor = math and math.floor + local char = string.char + local format = format + local tonumber = tonumber + + if not floor then + + if tonumber(string.match(_VERSION,"%d%.%d")) < 5.3 then + floor = function(n) + return tonumber(format("%d",n)) + end + else + -- 5.3 has a mixed number system and format %d doesn't work with + -- floats any longer ... no fun + floor = function(n) + return (n - n % 1) + end + end + + math = math or { } + + math.floor = floor + + end + + local function utfchar(n) + if n < 0x80 then + return char(n) + elseif n < 0x800 then + return char( + 0xC0 + floor(n/0x40), + 0x80 + (n % 0x40) + ) + elseif n < 0x10000 then + return char( + 0xE0 + floor(n/0x1000), + 0x80 + (floor(n/0x40) % 0x40), + 0x80 + (n % 0x40) + ) + elseif n < 0x40000 then + return char( + 0xF0 + floor(n/0x40000), + 0x80 + floor(n/0x1000), + 0x80 + (floor(n/0x40) % 0x40), + 0x80 + (n % 0x40) + ) + else + -- return char( + -- 0xF1 + floor(n/0x1000000), + -- 0x80 + floor(n/0x40000), + -- 0x80 + floor(n/0x1000), + -- 0x80 + (floor(n/0x40) % 0x40), + -- 0x80 + (n % 0x40) + -- ) + return "?" + end + end + + context.utfchar = utfchar + + -- -- the next one is good enough for use here but not perfect (see context for a + -- -- better one) + -- + -- local function make(t) + -- local p + -- for k, v in next, t do + -- if not p then + -- if next(v) then + -- p = P(k) * make(v) + -- else + -- p = P(k) + -- end + -- else + -- if next(v) then + -- p = p + P(k) * make(v) + -- else + -- p = p + P(k) + -- end + -- end + -- end + -- return p + -- end + -- + -- function lpeg.utfchartabletopattern(list) + -- local tree = { } + -- for i=1,#list do + -- local t = tree + -- for c in gmatch(list[i],".") do + -- if not t[c] then + -- t[c] = { } + -- end + -- t = t[c] + -- end + -- end + -- return make(tree) + -- end + + helpers.utfcharpattern = P(1) * R("\128\191")^0 -- unchecked but fast + + local p_false = P(false) + local p_true = P(true) + + local function make(t) + local function making(t) + local p = p_false + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] + if k ~= "" then + local v = t[k] + if v == true then + p = p + P(k) * p_true + elseif v == false then + -- can't happen + else + p = p + P(k) * making(v) + end + end + end + if t[""] then + p = p + p_true + end + return p + end + local p = p_false + local keys = sortedkeys(t) + for i=1,#keys do + local k = keys[i] + if k ~= "" then + local v = t[k] + if v == true then + p = p + P(k) * p_true + elseif v == false then + -- can't happen + else + p = p + P(k) * making(v) + end + end + end + return p + end + + local function collapse(t,x) + if type(t) ~= "table" then + return t, x + else + local n = next(t) + if n == nil then + return t, x + elseif next(t,n) == nil then + -- one entry + local k = n + local v = t[k] + if type(v) == "table" then + return collapse(v,x..k) + else + return v, x .. k + end + else + local tt = { } + for k, v in next, t do + local vv, kk = collapse(v,k) + tt[kk] = vv + end + return tt, x + end + end + end + + function helpers.utfchartabletopattern(list) + local tree = { } + local n = #list + if n == 0 then + for s in next, list do + local t = tree + local p, pk + for c in gmatch(s,".") do + if t == true then + t = { [c] = true, [""] = true } + p[pk] = t + p = t + t = false + elseif t == false then + t = { [c] = false } + p[pk] = t + p = t + t = false + else + local tc = t[c] + if not tc then + tc = false + t[c] = false + end + p = t + t = tc + end + pk = c + end + if t == false then + p[pk] = true + elseif t == true then + -- okay + else + t[""] = true + end + end + else + for i=1,n do + local s = list[i] + local t = tree + local p, pk + for c in gmatch(s,".") do + if t == true then + t = { [c] = true, [""] = true } + p[pk] = t + p = t + t = false + elseif t == false then + t = { [c] = false } + p[pk] = t + p = t + t = false + else + local tc = t[c] + if not tc then + tc = false + t[c] = false + end + p = t + t = tc + end + pk = c + end + if t == false then + p[pk] = true + elseif t == true then + -- okay + else + t[""] = true + end + end + end + collapse(tree,"") + -- inspect(tree) + return make(tree) + end + + patterns.invisibles = helpers.utfchartabletopattern { + utfchar(0x00A0), -- nbsp + utfchar(0x2000), -- enquad + utfchar(0x2001), -- emquad + utfchar(0x2002), -- enspace + utfchar(0x2003), -- emspace + utfchar(0x2004), -- threeperemspace + utfchar(0x2005), -- fourperemspace + utfchar(0x2006), -- sixperemspace + utfchar(0x2007), -- figurespace + utfchar(0x2008), -- punctuationspace + utfchar(0x2009), -- breakablethinspace + utfchar(0x200A), -- hairspace + utfchar(0x200B), -- zerowidthspace + utfchar(0x202F), -- narrownobreakspace + utfchar(0x205F), -- math thinspace + } + + -- now we can make: + + patterns.iwordtoken = patterns.wordtoken - patterns.invisibles + patterns.iwordpattern = patterns.iwordtoken^3 + +end + +-- The following helpers are not used, partially replaced by other mechanisms and +-- when needed I'll first optimize them. I only made them somewhat more readable. + +function lexers.delimited_range(chars, single_line, no_escape, balanced) -- unchanged + local s = sub(chars,1,1) + local e = #chars == 2 and sub(chars,2,2) or s + local range + local b = balanced and s or "" + local n = single_line and "\n" or "" + if no_escape then + local invalid = S(e .. n .. b) + range = patterns.any - invalid + else + local invalid = S(e .. n .. b) + patterns.backslash + range = patterns.any - invalid + patterns.backslash * patterns.any + end + if balanced and s ~= e then + return P { + s * (range + V(1))^0 * e + } + else + return s * range^0 * P(e)^-1 + end +end + +function lexers.starts_line(patt) -- unchanged + return P ( function(input, index) + if index == 1 then + return index + end + local char = sub(input,index - 1,index - 1) + if char == "\n" or char == "\r" or char == "\f" then + return index + end + end ) * patt +end + +function lexers.last_char_includes(s) -- unchanged + s = "[" .. gsub(s,"[-%%%[]", "%%%1") .. "]" + return P ( function(input, index) + if index == 1 then + return index + end + local i = index + while match(sub(input,i - 1,i - 1),"[ \t\r\n\f]") do + i = i - 1 + end + if match(sub(input,i - 1,i - 1),s) then + return index + end + end) +end + +function lexers.nested_pair(start_chars, end_chars) -- unchanged + local s = start_chars + local e = P(end_chars)^-1 + return P { + s * (patterns.any - s - end_chars + V(1))^0 * e + } +end + +local function prev_line_is_comment(prefix, text, pos, line, s) -- unchanged + local start = find(line,"%S") + if start < s and not find(line,prefix,start,true) then + return false + end + local p = pos - 1 + if sub(text,p,p) == "\n" then + p = p - 1 + if sub(text,p,p) == "\r" then + p = p - 1 + end + if sub(text,p,p) ~= "\n" then + while p > 1 and sub(text,p - 1,p - 1) ~= "\n" + do p = p - 1 + end + while find(sub(text,p,p),"^[\t ]$") do + p = p + 1 + end + return sub(text,p,p + #prefix - 1) == prefix + end + end + return false +end + +local function next_line_is_comment(prefix, text, pos, line, s) + local p = find(text,"\n",pos + s) + if p then + p = p + 1 + while find(sub(text,p,p),"^[\t ]$") do + p = p + 1 + end + return sub(text,p,p + #prefix - 1) == prefix + end + return false +end + +function lexers.fold_line_comments(prefix) + local property_int = lexers.property_int + return function(text, pos, line, s) + if property_int["fold.line.comments"] == 0 then + return 0 + end + if s > 1 and match(line,"^%s*()") < s then + return 0 + end + local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s) + local next_line_comment = next_line_is_comment(prefix, text, pos, line, s) + if not prev_line_comment and next_line_comment then + return 1 + end + if prev_line_comment and not next_line_comment then + return -1 + end + return 0 + end +end + +-- done + +return lexers |