summaryrefslogtreecommitdiff
path: root/context/data/textadept/context/lexers
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/textadept/context/lexers')
-rw-r--r--context/data/textadept/context/lexers/lexer.rme1
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua196
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-cld.lua24
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua24
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-cpp.lua189
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-dummy.lua36
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua32
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-lua.lua389
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-mps.lua183
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua137
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua44
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-pdf.lua205
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-sql.lua239
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua24
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-tex.lua567
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-txt.lua81
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua134
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-web.lua68
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua34
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua34
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua34
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml.lua351
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer.lua2299
23 files changed, 5325 insertions, 0 deletions
diff --git a/context/data/textadept/context/lexers/lexer.rme b/context/data/textadept/context/lexers/lexer.rme
new file mode 100644
index 000000000..5e9604f63
--- /dev/null
+++ b/context/data/textadept/context/lexers/lexer.rme
@@ -0,0 +1 @@
+We have no lexer.lua here!
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua
new file mode 100644
index 000000000..dce24a2b9
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua
@@ -0,0 +1,196 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for bibtex",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local global, string, table, lpeg = _G, string, table, lpeg
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local type = type
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+local exact_match = lexer.exact_match
+
+local bibtexlexer = lexer.new("bib","scite-context-lexer-bibtex")
+local whitespace = bibtexlexer.whitespace
+
+ local escape, left, right = P("\\"), P('{'), P('}')
+
+ patterns.balanced = P {
+ [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
+ [2] = left * V(1) * right
+ }
+
+-- taken from bibl-bib.lua
+
+local anything = patterns.anything
+local percent = P("%")
+local start = P("@")
+local comma = P(",")
+local hash = P("#")
+local escape = P("\\")
+local single = P("'")
+local double = P('"')
+local left = P('{')
+local right = P('}')
+local lineending = S("\n\r")
+local space = S(" \t\n\r\f")
+local spaces = space^1
+local equal = P("=")
+
+local keyword = (R("az","AZ","09") + S("@_:-"))^1
+----- s_quoted = ((escape*single) + spaces + (1-single))^0
+----- d_quoted = ((escape*double) + spaces + (1-double))^0
+local s_quoted = ((escape*single) + (1-single))^0
+local d_quoted = ((escape*double) + (1-double))^0
+
+local balanced = patterns.balanced
+
+local t_spacing = token(whitespace, space^1)
+local t_optionalws = token("default", space^1)^0
+
+local t_equal = token("operator",equal)
+local t_left = token("grouping",left)
+local t_right = token("grouping",right)
+local t_comma = token("operator",comma)
+local t_hash = token("operator",hash)
+
+local t_s_value = token("operator",single)
+ * token("text",s_quoted)
+ * token("operator",single)
+local t_d_value = token("operator",double)
+ * token("text",d_quoted)
+ * token("operator",double)
+local t_b_value = token("operator",left)
+ * token("text",balanced)
+ * token("operator",right)
+local t_r_value = token("text",keyword)
+
+local t_keyword = token("keyword",keyword)
+local t_key = token("command",keyword)
+local t_label = token("warning",keyword)
+
+local t_somevalue = t_s_value + t_d_value + t_b_value + t_r_value
+local t_value = t_somevalue
+ * ((t_optionalws * t_hash * t_optionalws) * t_somevalue)^0
+
+local t_assignment = t_optionalws
+ * t_key
+ * t_optionalws
+ * t_equal
+ * t_optionalws
+ * t_value
+
+local t_shortcut = t_keyword
+ * t_optionalws
+ * t_left
+ * t_optionalws
+ * (t_assignment * t_comma^0)^0
+ * t_optionalws
+ * t_right
+
+local t_definition = t_keyword
+ * t_optionalws
+ * t_left
+ * t_optionalws
+ * t_label
+ * t_optionalws
+ * t_comma
+ * (t_assignment * t_comma^0)^0
+ * t_optionalws
+ * t_right
+
+local t_comment = t_keyword
+ * t_optionalws
+ * t_left
+ * token("text",(1-t_right)^0)
+ * t_optionalws
+ * t_right
+
+local t_forget = token("comment",percent^1 * (1-lineending)^0)
+
+local t_rest = token("default",anything)
+
+-- this kind of lexing seems impossible as the size of the buffer passed to the lexer is not
+-- large enough .. but we can cheat and use this:
+--
+-- function OnOpen(filename) editor:Colourise(1,editor.TextLength) end -- or is it 0?
+
+-- somehow lexing fails on this more complex lexer when we insert something, there is no
+-- backtracking to whitespace when we have no embedded lexer, so we fake one ... this works
+-- to some extend but not in all cases (e.g. editing inside line fails) .. maybe i need to
+-- patch the dll ... (better not)
+
+local dummylexer = lexer.load("scite-context-lexer-dummy","bib-dum")
+
+local dummystart = token("embedded",P("\001")) -- an unlikely to be used character
+local dummystop = token("embedded",P("\002")) -- an unlikely to be used character
+
+lexer.embed_lexer(bibtexlexer,dummylexer,dummystart,dummystop)
+
+-- maybe we need to define each functional block as lexer (some 4) so i'll do that when
+-- this issue is persistent ... maybe consider making a local lexer options (not load,
+-- just lexer.new or so) .. or maybe do the reverse, embed the main one in a dummy child
+
+bibtexlexer._rules = {
+ { "whitespace", t_spacing },
+ { "forget", t_forget },
+ { "shortcut", t_shortcut },
+ { "definition", t_definition },
+ { "comment", t_comment },
+ { "rest", t_rest },
+}
+
+-- local t_assignment = t_key
+-- * t_optionalws
+-- * t_equal
+-- * t_optionalws
+-- * t_value
+--
+-- local t_shortcut = t_keyword
+-- * t_optionalws
+-- * t_left
+--
+-- local t_definition = t_keyword
+-- * t_optionalws
+-- * t_left
+-- * t_optionalws
+-- * t_label
+-- * t_optionalws
+-- * t_comma
+--
+-- bibtexlexer._rules = {
+-- { "whitespace", t_spacing },
+-- { "assignment", t_assignment },
+-- { "definition", t_definition },
+-- { "shortcut", t_shortcut },
+-- { "right", t_right },
+-- { "comma", t_comma },
+-- { "forget", t_forget },
+-- { "comment", t_comment },
+-- { "rest", t_rest },
+-- }
+
+bibtexlexer._tokenstyles = context.styleset
+
+bibtexlexer._foldpattern = P("{") + P("}")
+
+bibtexlexer._foldsymbols = {
+ _patterns = {
+ "{",
+ "}",
+ },
+ ["grouping"] = {
+ ["{"] = 1,
+ ["}"] = -1,
+ },
+}
+
+return bibtexlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cld.lua b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua
new file mode 100644
index 000000000..a5fbf9cd7
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua
@@ -0,0 +1,24 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for cld",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local cldlexer = lexer.new("cld","scite-context-lexer-cld")
+local lualexer = lexer.load("scite-context-lexer-lua")
+
+-- can probably be done nicer now, a bit of a hack
+
+cldlexer._rules = lualexer._rules_cld
+cldlexer._tokenstyles = lualexer._tokenstyles
+cldlexer._foldsymbols = lualexer._foldsymbols
+cldlexer._directives = lualexer._directives
+
+return cldlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua
new file mode 100644
index 000000000..e8ff3c1ff
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua
@@ -0,0 +1,24 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for cpp web",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local cppweblexer = lexer.new("cpp-web","scite-context-lexer-cpp")
+local cpplexer = lexer.load("scite-context-lexer-cpp")
+
+-- can probably be done nicer now, a bit of a hack
+
+cppweblexer._rules = cpplexer._rules_web
+cppweblexer._tokenstyles = cpplexer._tokenstyles
+cppweblexer._foldsymbols = cpplexer._foldsymbols
+cppweblexer._directives = cpplexer._directives
+
+return cppweblexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua
new file mode 100644
index 000000000..d56dc58f9
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua
@@ -0,0 +1,189 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for cpp",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- looks liks the original cpp lexer but web ready (so nothing special here yet)
+
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+local exact_match = lexer.exact_match
+
+local cpplexer = lexer.new("cpp","scite-context-lexer-cpp")
+local whitespace = cpplexer.whitespace
+
+local keywords = { -- copied from cpp.lua
+ -- c
+ "asm", "auto", "break", "case", "const", "continue", "default", "do", "else",
+ "extern", "false", "for", "goto", "if", "inline", "register", "return",
+ "sizeof", "static", "switch", "true", "typedef", "volatile", "while",
+ "restrict",
+ -- hm
+ "_Bool", "_Complex", "_Pragma", "_Imaginary",
+ -- c++.
+ "catch", "class", "const_cast", "delete", "dynamic_cast", "explicit",
+ "export", "friend", "mutable", "namespace", "new", "operator", "private",
+ "protected", "public", "signals", "slots", "reinterpret_cast",
+ "static_assert", "static_cast", "template", "this", "throw", "try", "typeid",
+ "typename", "using", "virtual"
+}
+
+local datatypes = { -- copied from cpp.lua
+ "bool", "char", "double", "enum", "float", "int", "long", "short", "signed",
+ "struct", "union", "unsigned", "void"
+}
+
+local macros = { -- copied from cpp.lua
+ "define", "elif", "else", "endif", "error", "if", "ifdef", "ifndef", "import",
+ "include", "line", "pragma", "undef", "using", "warning"
+}
+
+local space = patterns.space -- S(" \n\r\t\f\v")
+local any = patterns.any
+local restofline = patterns.restofline
+local startofline = patterns.startofline
+
+local squote = P("'")
+local dquote = P('"')
+local period = P(".")
+local escaped = P("\\") * P(1)
+local slashes = P("//")
+local begincomment = P("/*")
+local endcomment = P("*/")
+local percent = P("%")
+
+local hexadecimal = patterns.hexadecimal
+local decimal = patterns.decimal
+local float = patterns.float
+local integer = P("-")^-1 * (hexadecimal + decimal) -- also in patterns ?
+
+local spacing = token(whitespace, space^1)
+local rest = token("default", any)
+
+local shortcomment = token("comment", slashes * restofline^0)
+local longcomment = token("comment", begincomment * (1-endcomment)^0 * endcomment^-1)
+
+local shortstring = token("quote", dquote) -- can be shared
+ * token("string", (escaped + (1-dquote))^0)
+ * token("quote", dquote)
+ + token("quote", squote)
+ * token("string", (escaped + (1-squote))^0)
+ * token("quote", squote)
+
+local number = token("number", float + integer)
+
+local validword = R("AZ","az","__") * R("AZ","az","__","09")^0
+local identifier = token("default",validword)
+
+local operator = token("special", S("+-*/%^!=<>;:{}[]().&|?~"))
+
+----- optionalspace = spacing^0
+
+local p_keywords = exact_match(keywords)
+local p_datatypes = exact_match(datatypes)
+local p_macros = exact_match(macros)
+
+local keyword = token("keyword", p_keywords)
+local datatype = token("keyword", p_datatypes)
+local identifier = token("default", validword)
+
+local macro = token("data", #P("#") * startofline * P("#") * S("\t ")^0 * p_macros)
+
+cpplexer._rules = {
+ { "whitespace", spacing },
+ { "keyword", keyword },
+ { "type", datatype },
+ { "identifier", identifier },
+ { "string", shortstring },
+ { "longcomment", longcomment },
+ { "shortcomment", shortcomment },
+ { "number", number },
+ { "macro", macro },
+ { "operator", operator },
+ { "rest", rest },
+}
+
+local web = lexer.loadluafile("scite-context-lexer-web-snippets")
+
+if web then
+
+ lexer.inform("supporting web snippets in cpp lexer")
+
+ cpplexer._rules_web = {
+ { "whitespace", spacing },
+ { "keyword", keyword },
+ { "type", datatype },
+ { "identifier", identifier },
+ { "string", shortstring },
+ { "longcomment", longcomment },
+ { "shortcomment", shortcomment },
+ { "web", web.pattern },
+ { "number", number },
+ { "macro", macro },
+ { "operator", operator },
+ { "rest", rest },
+ }
+
+else
+
+ lexer.report("not supporting web snippets in cpp lexer")
+
+ cpplexer._rules_web = {
+ { "whitespace", spacing },
+ { "keyword", keyword },
+ { "type", datatype },
+ { "identifier", identifier },
+ { "string", shortstring },
+ { "longcomment", longcomment },
+ { "shortcomment", shortcomment },
+ { "number", number },
+ { "macro", macro },
+ { "operator", operator },
+ { "rest", rest },
+ }
+
+end
+
+cpplexer._tokenstyles = context.styleset
+
+cpplexer._foldpattern = P("/*") + P("*/") + S("{}") -- separate entry else interference (singular?)
+
+cpplexer._foldsymbols = {
+ _patterns = {
+ "[{}]",
+ "/%*",
+ "%*/",
+ },
+ -- ["data"] = { -- macro
+ -- ["region"] = 1,
+ -- ["endregion"] = -1,
+ -- ["if"] = 1,
+ -- ["ifdef"] = 1,
+ -- ["ifndef"] = 1,
+ -- ["endif"] = -1,
+ -- },
+ ["special"] = { -- operator
+ ["{"] = 1,
+ ["}"] = -1,
+ },
+ ["comment"] = {
+ ["/*"] = 1,
+ ["*/"] = -1,
+ }
+}
+
+-- -- by indentation:
+
+cpplexer._foldpatterns = nil
+cpplexer._foldsymbols = nil
+
+return cpplexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua
new file mode 100644
index 000000000..69590ed34
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua
@@ -0,0 +1,36 @@
+-- local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer that triggers whitespace backtracking",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- the lexer dll doesn't backtrack when there is no embedded lexer so
+-- we need to trigger that, for instance in the bibtex lexer, but still
+-- we get failed lexing
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local dummylexer = lexer.new("dummy","scite-context-lexer-dummy")
+local whitespace = dummylexer.whitespace
+
+local space = patterns.space
+local nospace = (1-space)
+
+local t_spacing = token(whitespace, space ^1)
+local t_rest = token("default", nospace^1)
+
+dummylexer._rules = {
+ { "whitespace", t_spacing },
+ { "rest", t_rest },
+}
+
+dummylexer._tokenstyles = context.styleset
+
+return dummylexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua
new file mode 100644
index 000000000..5d5b689d2
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua
@@ -0,0 +1,32 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for lua longstrings",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local stringlexer = lexer.new("lua-longstring","scite-context-lexer-lua-longstring")
+local whitespace = stringlexer.whitespace
+
+local space = patterns.space
+local nospace = 1 - space
+
+local p_spaces = token(whitespace, space ^1)
+local p_string = token("string", nospace^1)
+
+stringlexer._rules = {
+ { "whitespace", p_spaces },
+ { "string", p_string },
+}
+
+stringlexer._tokenstyles = context.styleset
+
+return stringlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua
new file mode 100644
index 000000000..a8aa8dbe3
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua
@@ -0,0 +1,389 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for lua",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- beware: all multiline is messy, so even if it's no lexer, it should be an embedded lexer
+-- we probably could use a local whitespace variant but this is cleaner
+
+local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
+local match, find = string.match, string.find
+local setmetatable = setmetatable
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+local exact_match = lexer.exact_match
+local just_match = lexer.just_match
+
+local lualexer = lexer.new("lua","scite-context-lexer-lua")
+local whitespace = lualexer.whitespace
+
+local stringlexer = lexer.load("scite-context-lexer-lua-longstring")
+----- labellexer = lexer.load("scite-context-lexer-lua-labelstring")
+
+local directives = { } -- communication channel
+
+-- this will be extended
+
+-- we could combine some in a hash that returns the class that then makes the token
+-- this can save time on large files
+
+local keywords = {
+ "and", "break", "do", "else", "elseif", "end", "false", "for", "function", -- "goto",
+ "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true",
+ "until", "while",
+}
+
+local functions = {
+ "assert", "collectgarbage", "dofile", "error", "getmetatable",
+ "ipairs", "load", "loadfile", "module", "next", "pairs",
+ "pcall", "print", "rawequal", "rawget", "rawset", "require",
+ "setmetatable", "tonumber", "tostring", "type", "unpack", "xpcall", "select",
+
+ "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32",
+}
+
+local constants = {
+ "_G", "_VERSION", "_M", "...", "_ENV",
+ -- here too
+ "__add", "__call", "__concat", "__div", "__idiv", "__eq", "__gc", "__index",
+ "__le", "__lt", "__metatable", "__mode", "__mul", "__newindex",
+ "__pow", "__sub", "__tostring", "__unm", "__len",
+ "__pairs", "__ipairs",
+ "NaN",
+}
+
+-- local tokenmappings = { }
+--
+-- for i=1,#keywords do tokenmappings[keywords [i]] = "keyword" }
+-- for i=1,#functions do tokenmappings[functions[i]] = "function" }
+-- for i=1,#constants do tokenmappings[constants[i]] = "constant" }
+
+local internals = { -- __
+ "add", "call", "concat", "div", "eq", "gc", "index",
+ "le", "lt", "metatable", "mode", "mul", "newindex",
+ "pow", "sub", "tostring", "unm", "len",
+}
+
+local depricated = {
+ "arg", "arg.n",
+ "loadstring", "setfenv", "getfenv",
+ "pack",
+}
+
+local csnames = { -- todo: option
+ "commands",
+ "context",
+-- "ctxcmd",
+-- "ctx",
+ "metafun",
+ "metapost",
+}
+
+local level = nil
+local setlevel = function(_,i,s) level = s return i end
+
+local equals = P("=")^0
+
+local longonestart = P("[[")
+local longonestop = P("]]")
+local longonestring = (1-longonestop)^0
+
+local longtwostart = P("[") * Cmt(equals,setlevel) * P("[")
+local longtwostop = P("]") * equals * P("]")
+
+local sentinels = { } setmetatable(sentinels, { __index = function(t,k) local v = "]" .. k .. "]" t[k] = v return v end })
+
+local longtwostring = P(function(input,index)
+ if level then
+ -- local sentinel = "]" .. level .. "]"
+ local sentinel = sentinels[level]
+ local _, stop = find(input,sentinel,index,true)
+ return stop and stop + 1 - #sentinel or #input + 1
+ end
+end)
+
+ local longtwostring_body = longtwostring
+
+ local longtwostring_end = P(function(input,index)
+ if level then
+ -- local sentinel = "]" .. level .. "]"
+ local sentinel = sentinels[level]
+ local _, stop = find(input,sentinel,index,true)
+ return stop and stop + 1 or #input + 1
+ end
+ end)
+
+local longcomment = Cmt(#("[[" + ("[" * C(equals) * "[")), function(input,index,level)
+ -- local sentinel = "]" .. level .. "]"
+ local sentinel = sentinels[level]
+ local _, stop = find(input,sentinel,index,true)
+ return stop and stop + 1 or #input + 1
+end)
+
+local space = patterns.space -- S(" \n\r\t\f\v")
+local any = patterns.any
+local eol = patterns.eol
+
+local squote = P("'")
+local dquote = P('"')
+local escaped = P("\\") * P(1)
+local dashes = P("--")
+
+local spacing = token(whitespace, space^1)
+local rest = token("default", any)
+
+local shortcomment = token("comment", dashes * (1-eol)^0)
+local longcomment = token("comment", dashes * longcomment)
+
+-- fails on very long string with \ at end of lines (needs embedded lexer)
+-- and also on newline before " but it makes no sense to waste time on it
+
+local shortstring = token("quote", dquote)
+ * token("string", (escaped + (1-dquote))^0)
+ * token("quote", dquote)
+ + token("quote", squote)
+ * token("string", (escaped + (1-squote))^0)
+ * token("quote", squote)
+
+----- longstring = token("quote", longonestart)
+----- * token("string", longonestring)
+----- * token("quote", longonestop)
+----- + token("quote", longtwostart)
+----- * token("string", longtwostring)
+----- * token("quote", longtwostop)
+
+local string = shortstring
+----- + longstring
+
+lexer.embed_lexer(lualexer, stringlexer, token("quote",longtwostart), token("string",longtwostring_body) * token("quote",longtwostring_end))
+
+local integer = P("-")^-1 * (patterns.hexadecimal + patterns.decimal)
+local number = token("number", patterns.float + integer)
+ * (token("error",R("AZ","az","__")^1))^0
+
+-- officially 127-255 are ok but not utf so useless
+
+----- validword = R("AZ","az","__") * R("AZ","az","__","09")^0
+
+local utf8character = P(1) * R("\128\191")^1
+local validword = (R("AZ","az","__") + utf8character) * (R("AZ","az","__","09") + utf8character)^0
+local validsuffix = (R("AZ","az") + utf8character) * (R("AZ","az","__","09") + utf8character)^0
+
+local identifier = token("default",validword)
+
+----- operator = token("special", P('..') + P('~=') + S('+-*/%^#=<>;:,.{}[]()')) -- maybe split off {}[]()
+----- operator = token("special", S('+-*/%^#=<>;:,{}[]()') + P('..') + P('.') + P('~=') ) -- maybe split off {}[]()
+----- operator = token("special", S('+-*/%^#=<>;:,{}[]().') + P('~=') ) -- no ^1 because of nested lexers
+local operator = token("special", S('+-*/%^#=<>;:,{}[]().|~')) -- no ^1 because of nested lexers
+
+local structure = token("special", S('{}[]()'))
+
+local optionalspace = spacing^0
+local hasargument = #S("{([")
+
+-- ideal should be an embedded lexer ..
+
+local gotokeyword = token("keyword", P("goto"))
+ * spacing
+ * token("grouping",validword)
+local gotolabel = token("keyword", P("::"))
+ * (spacing + shortcomment)^0
+ * token("grouping",validword)
+ * (spacing + shortcomment)^0
+ * token("keyword", P("::"))
+
+----- p_keywords = exact_match(keywords)
+----- p_functions = exact_match(functions)
+----- p_constants = exact_match(constants)
+----- p_internals = P("__")
+----- * exact_match(internals)
+
+local p_finish = #(1-R("az","AZ","__"))
+local p_keywords = lexer.helpers.utfchartabletopattern(keywords) * p_finish -- exact_match(keywords)
+local p_functions = lexer.helpers.utfchartabletopattern(functions) * p_finish -- exact_match(functions)
+local p_constants = lexer.helpers.utfchartabletopattern(constants) * p_finish -- exact_match(constants)
+local p_internals = P("__")
+ * lexer.helpers.utfchartabletopattern(internals) * p_finish -- exact_match(internals)
+
+local p_csnames = lexer.helpers.utfchartabletopattern(csnames) -- * p_finish -- just_match(csnames)
+local p_ctnames = P("ctx") * R("AZ","az","__")^0
+local keyword = token("keyword", p_keywords)
+local builtin = token("plain", p_functions)
+local constant = token("data", p_constants)
+local internal = token("data", p_internals)
+local csname = token("user", p_csnames + p_ctnames)
+ * p_finish * optionalspace * (
+ hasargument
+ + ( token("special", S(".:")) * optionalspace * token("user", validword) )^1
+ )^-1
+
+local identifier = token("default", validword)
+ * ( optionalspace * token("special", S(".:")) * optionalspace * (
+ token("warning", p_keywords) +
+ token("data", p_internals) +
+ token("default", validword )
+ ) )^0
+
+-- local t = { } for k, v in next, tokenmappings do t[#t+1] = k end t = table.concat(t)
+-- -- local experimental = (S(t)^1) / function(s) return tokenmappings[s] end * Cp()
+--
+-- local experimental = Cmt(S(t)^1, function(_,i,s)
+-- local t = tokenmappings[s]
+-- if t then
+-- return true, t, i
+-- end
+-- end)
+
+lualexer._rules = {
+ { "whitespace", spacing },
+ { "keyword", keyword }, -- can be combined
+ -- { "structure", structure },
+ { "function", builtin }, -- can be combined
+ { "constant", constant }, -- can be combined
+ -- { "experimental", experimental }, -- works but better split
+ { "csname", csname },
+ { "goto", gotokeyword },
+ { "identifier", identifier },
+ { "string", string },
+ { "number", number },
+ { "longcomment", longcomment },
+ { "shortcomment", shortcomment },
+ { "label", gotolabel },
+ { "operator", operator },
+ { "rest", rest },
+}
+
+-- -- experiment
+--
+-- local idtoken = R("az","AZ","__")
+--
+-- function context.one_of_match(specification)
+-- local pattern = idtoken -- the concat catches _ etc
+-- local list = { }
+-- for i=1,#specification do
+-- local style = specification[i][1]
+-- local words = specification[i][2]
+-- pattern = pattern + S(table.concat(words))
+-- for i=1,#words do
+-- list[words[i]] = style
+-- end
+-- end
+-- return Cmt(pattern^1, function(_,i,s)
+-- local style = list[s]
+-- if style then
+-- return true, { style, i } -- and i or nil
+-- else
+-- -- fail
+-- end
+-- end)
+-- end
+--
+-- local whatever = context.one_of_match {
+-- { "keyword", keywords }, -- keyword
+-- { "plain", functions }, -- builtin
+-- { "data", constants }, -- constant
+-- }
+--
+-- lualexer._rules = {
+-- { "whitespace", spacing },
+-- { "whatever", whatever },
+-- { "csname", csname },
+-- { "goto", gotokeyword },
+-- { "identifier", identifier },
+-- { "string", string },
+-- { "number", number },
+-- { "longcomment", longcomment },
+-- { "shortcomment", shortcomment },
+-- { "label", gotolabel },
+-- { "operator", operator },
+-- { "rest", rest },
+-- }
+
+lualexer._tokenstyles = context.styleset
+
+-- lualexer._foldpattern = R("az")^2 + S("{}[]") -- separate entry else interference
+
+lualexer._foldpattern = (P("end") + P("if") + P("do") + P("function") + P("repeat") + P("until")) * P(#(1 - R("az")))
+ + S("{}[]")
+
+lualexer._foldsymbols = {
+ _patterns = {
+ "[a-z][a-z]+",
+ "[{}%[%]]",
+ },
+ ["keyword"] = { -- challenge: if=0 then=1 else=-1 elseif=-1
+ ["if"] = 1, -- if .. [then|else] .. end
+ ["do"] = 1, -- [while] do .. end
+ ["function"] = 1, -- function .. end
+ ["repeat"] = 1, -- repeat .. until
+ ["until"] = -1,
+ ["end"] = -1,
+ },
+ ["comment"] = {
+ ["["] = 1, ["]"] = -1,
+ },
+ -- ["quote"] = { -- confusing
+ -- ["["] = 1, ["]"] = -1,
+ -- },
+ ["special"] = {
+ -- ["("] = 1, [")"] = -1,
+ ["{"] = 1, ["}"] = -1,
+ },
+}
+
+-- embedded in tex:
+
+local cstoken = R("az","AZ","\127\255") + S("@!?_")
+local texcsname = P("\\") * cstoken^1
+local commentline = P("%") * (1-S("\n\r"))^0
+
+local texcomment = token("comment", Cmt(commentline, function() return directives.cld_inline end))
+
+local longthreestart = P("\\!!bs")
+local longthreestop = P("\\!!es")
+local longthreestring = (1-longthreestop)^0
+
+local texstring = token("quote", longthreestart)
+ * token("string", longthreestring)
+ * token("quote", longthreestop)
+
+----- texcommand = token("user", texcsname)
+local texcommand = token("warning", texcsname)
+
+-- local texstring = token("quote", longthreestart)
+-- * (texcommand + token("string",P(1-texcommand-longthreestop)^1) - longthreestop)^0 -- we match long non-\cs sequences
+-- * token("quote", longthreestop)
+
+-- local whitespace = "whitespace"
+-- local spacing = token(whitespace, space^1)
+
+lualexer._directives = directives
+
+lualexer._rules_cld = {
+ { "whitespace", spacing },
+ { "texstring", texstring },
+ { "texcomment", texcomment },
+ { "texcommand", texcommand },
+ -- { "structure", structure },
+ { "keyword", keyword },
+ { "function", builtin },
+ { "csname", csname },
+ { "constant", constant },
+ { "identifier", identifier },
+ { "string", string },
+ { "longcomment", longcomment },
+ { "shortcomment", shortcomment }, -- should not be used inline so best signal it as comment (otherwise complex state till end of inline)
+ { "number", number },
+ { "operator", operator },
+ { "rest", rest },
+}
+
+return lualexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-mps.lua b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua
new file mode 100644
index 000000000..e24a41d0c
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua
@@ -0,0 +1,183 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for metafun",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local global, string, table, lpeg = _G, string, table, lpeg
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+local type = type
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+local exact_match = lexer.exact_match
+
+local metafunlexer = lexer.new("mps","scite-context-lexer-mps")
+local whitespace = metafunlexer.whitespace
+
+local metapostprimitives = { }
+local metapostinternals = { }
+local metapostshortcuts = { }
+local metapostcommands = { }
+
+local metafuninternals = { }
+local metafunshortcuts = { }
+local metafuncommands = { }
+
+local mergedshortcuts = { }
+local mergedinternals = { }
+
+do
+
+ local definitions = context.loaddefinitions("scite-context-data-metapost")
+
+ if definitions then
+ metapostprimitives = definitions.primitives or { }
+ metapostinternals = definitions.internals or { }
+ metapostshortcuts = definitions.shortcuts or { }
+ metapostcommands = definitions.commands or { }
+ end
+
+ local definitions = context.loaddefinitions("scite-context-data-metafun")
+
+ if definitions then
+ metafuninternals = definitions.internals or { }
+ metafunshortcuts = definitions.shortcuts or { }
+ metafuncommands = definitions.commands or { }
+ end
+
+ for i=1,#metapostshortcuts do
+ mergedshortcuts[#mergedshortcuts+1] = metapostshortcuts[i]
+ end
+ for i=1,#metafunshortcuts do
+ mergedshortcuts[#mergedshortcuts+1] = metafunshortcuts[i]
+ end
+
+ for i=1,#metapostinternals do
+ mergedinternals[#mergedinternals+1] = metapostinternals[i]
+ end
+ for i=1,#metafuninternals do
+ mergedinternals[#mergedinternals+1] = metafuninternals[i]
+ end
+
+end
+
+local space = patterns.space -- S(" \n\r\t\f\v")
+local any = patterns.any
+
+local dquote = P('"')
+local cstoken = patterns.idtoken
+local mptoken = patterns.alpha
+local leftbrace = P("{")
+local rightbrace = P("}")
+local number = patterns.real
+
+local cstokentex = R("az","AZ","\127\255") + S("@!?_")
+
+-- we could collapse as in tex
+
+local spacing = token(whitespace, space^1)
+local rest = token("default", any)
+local comment = token("comment", P("%") * (1-S("\n\r"))^0)
+local internal = token("reserved", exact_match(mergedshortcuts,false))
+local shortcut = token("data", exact_match(mergedinternals))
+local helper = token("command", exact_match(metafuncommands))
+local plain = token("plain", exact_match(metapostcommands))
+local quoted = token("quote", dquote)
+ * token("string", P(1-dquote)^0)
+ * token("quote", dquote)
+local texstuff = token("quote", P("btex ") + P("verbatimtex "))
+ * token("string", P(1-P(" etex"))^0)
+ * token("quote", P(" etex"))
+local primitive = token("primitive", exact_match(metapostprimitives))
+local identifier = token("default", cstoken^1)
+local number = token("number", number)
+local grouping = token("grouping", S("()[]{}")) -- can be an option
+local suffix = token("number", P("#@") + P("@#") + P("#"))
+local special = token("special", P("#@") + P("@#") + S("#()[]{}<>=:\"")) -- or else := <> etc split
+local texlike = token("warning", P("\\") * cstokentex^1)
+local extra = token("extra", P("+-+") + P("++") + S("`~%^&_-+*/\'|\\"))
+
+local nested = P { leftbrace * (V(1) + (1-rightbrace))^0 * rightbrace }
+local texlike = token("embedded", P("\\") * (P("MP") + P("mp")) * mptoken^1)
+ * spacing^0
+ * token("grouping", leftbrace)
+ * token("default", (nested + (1-rightbrace))^0 )
+ * token("grouping", rightbrace)
+ + token("warning", P("\\") * cstokentex^1)
+
+-- lua: we assume: lua ( "lua code" )
+
+local cldlexer = lexer.load("scite-context-lexer-cld","mps-cld")
+
+local startlua = P("lua") * space^0 * P('(') * space^0 * P('"')
+local stoplua = P('"') * space^0 * P(')')
+
+local startluacode = token("embedded", startlua)
+local stopluacode = #stoplua * token("embedded", stoplua)
+
+lexer.embed_lexer(metafunlexer, cldlexer, startluacode, stopluacode)
+
+local luacall = token("embedded",P("lua") * ( P(".") * R("az","AZ","__")^1 )^1)
+
+metafunlexer._rules = {
+ { "whitespace", spacing },
+ { "comment", comment },
+ { "internal", internal },
+ { "shortcut", shortcut },
+ { "helper", helper },
+ { "plain", plain },
+ { "primitive", primitive },
+ { "luacall", luacall },
+ { "texstuff", texstuff },
+ { "suffix", suffix },
+ { "identifier", identifier },
+ { "number", number },
+ { "quoted", quoted },
+ -- { "grouping", grouping }, -- can be an option
+ { "special", special },
+ { "texlike", texlike },
+ { "extra", extra },
+ { "rest", rest },
+}
+
+metafunlexer._tokenstyles = context.styleset
+
+metafunlexer._foldpattern = patterns.lower^2 -- separate entry else interference
+
+metafunlexer._foldsymbols = {
+ _patterns = {
+ "[a-z][a-z]+",
+ },
+ ["plain"] = {
+ ["beginfig"] = 1,
+ ["endfig"] = -1,
+ ["beginglyph"] = 1,
+ ["endglyph"] = -1,
+ -- ["begingraph"] = 1,
+ -- ["endgraph"] = -1,
+ },
+ ["primitive"] = {
+ ["def"] = 1,
+ ["vardef"] = 1,
+ ["primarydef"] = 1,
+ ["secondarydef" ] = 1,
+ ["tertiarydef"] = 1,
+ ["enddef"] = -1,
+ ["if"] = 1,
+ ["fi"] = -1,
+ ["for"] = 1,
+ ["forever"] = 1,
+ ["endfor"] = -1,
+ }
+}
+
+-- if inspect then inspect(metafunlexer) end
+
+return metafunlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua
new file mode 100644
index 000000000..cdf33cf7c
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua
@@ -0,0 +1,137 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for pdf objects",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- no longer used: nesting lexers with whitespace in start/stop is unreliable
+
+local P, R, S, C, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.V
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local pdfobjectlexer = lexer.new("pdfobj","scite-context-lexer-pdf-object")
+local whitespace = pdfobjectlexer.whitespace
+
+local space = patterns.space
+local spacing = patterns.spacing
+local nospacing = patterns.nospacing
+local anything = patterns.anything
+local newline = patterns.eol
+local real = patterns.real
+local cardinal = patterns.cardinal
+
+local lparent = P("(")
+local rparent = P(")")
+local langle = P("<")
+local rangle = P(">")
+local escape = P("\\")
+local unicodetrigger = P("feff")
+
+local nametoken = 1 - space - S("<>/[]()")
+local name = P("/") * nametoken^1
+
+local p_string = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 }
+
+local t_spacing = token(whitespace, spacing)
+local t_spaces = token(whitespace, spacing)^0
+local t_rest = token("default", nospacing) -- anything
+
+local p_stream = P("stream")
+local p_endstream = P("endstream")
+local p_obj = P("obj")
+local p_endobj = P("endobj")
+local p_reference = P("R")
+
+local p_objectnumber = patterns.cardinal
+local p_comment = P("%") * (1-S("\n\r"))^0
+
+local t_string = token("quote", lparent)
+ * token("string", p_string)
+ * token("quote", rparent)
+local t_unicode = token("quote", langle)
+ * token("plain", unicodetrigger)
+ * token("string", (1-rangle)^1)
+ * token("quote", rangle)
+local t_whatsit = token("quote", langle)
+ * token("string", (1-rangle)^1)
+ * token("quote", rangle)
+local t_keyword = token("command", name)
+local t_constant = token("constant", name)
+local t_number = token("number", real)
+-- t_reference = token("number", cardinal)
+-- * t_spacing
+-- * token("number", cardinal)
+local t_reserved = token("number", P("true") + P("false") + P("NULL"))
+local t_reference = token("warning", cardinal)
+ * t_spacing
+ * token("warning", cardinal)
+ * t_spacing
+ * token("keyword", p_reference)
+
+local t_comment = token("comment", p_comment)
+
+local t_openobject = token("warning", p_objectnumber * spacing)
+-- * t_spacing
+ * token("warning", p_objectnumber * spacing)
+-- * t_spacing
+ * token("keyword", p_obj)
+local t_closeobject = token("keyword", p_endobj)
+
+local t_opendictionary = token("grouping", P("<<"))
+local t_closedictionary = token("grouping", P(">>"))
+
+local t_openarray = token("grouping", P("["))
+local t_closearray = token("grouping", P("]"))
+
+-- todo: comment
+
+local t_stream = token("keyword", p_stream)
+-- * token("default", newline * (1-newline*p_endstream*newline)^1 * newline)
+-- * token("text", (1 - p_endstream)^1)
+ * (token("text", (1 - p_endstream-spacing)^1) + t_spacing)^1
+ * token("keyword", p_endstream)
+
+local t_dictionary = { "dictionary",
+ dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary,
+ array = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray,
+ whatever = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit,
+ }
+
+----- t_object = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
+----- object = t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject,
+----- dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary,
+----- array = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray,
+----- whatever = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit,
+----- number = t_number,
+----- }
+
+local t_object = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
+ dictionary = t_dictionary.dictionary,
+ array = t_dictionary.array,
+ whatever = t_dictionary.whatever,
+ object = t_openobject^-1 * t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject,
+ number = t_number,
+ }
+
+pdfobjectlexer._shared = {
+ dictionary = t_dictionary,
+ object = t_object,
+ stream = t_stream,
+}
+
+pdfobjectlexer._rules = {
+ { "whitespace", t_spacing }, -- in fact, here we don't want whitespace as it's top level lexer work
+ { "object", t_object },
+}
+
+pdfobjectlexer._tokenstyles = context.styleset
+
+return pdfobjectlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua
new file mode 100644
index 000000000..f08d16488
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua
@@ -0,0 +1,44 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for pdf xref",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- no longer used: nesting lexers with whitespace in start/stop is unreliable
+
+local P, R = lpeg.P, lpeg.R
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local pdfxreflexer = lexer.new("pdfxref","scite-context-lexer-pdf-xref")
+local whitespace = pdfxreflexer.whitespace
+
+local spacing = patterns.spacing
+local cardinal = patterns.cardinal
+local alpha = patterns.alpha
+
+local t_spacing = token(whitespace, spacing)
+
+local p_xref = P("xref")
+local t_xref = token("keyword",p_xref)
+ * token("number", cardinal * spacing * cardinal * spacing)
+
+local t_number = token("number", cardinal * spacing * cardinal * spacing)
+ * token("keyword", alpha)
+
+pdfxreflexer._rules = {
+ { "whitespace", t_spacing },
+ { "xref", t_xref },
+ { "number", t_number },
+}
+
+pdfxreflexer._tokenstyles = context.styleset
+
+return pdfxreflexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua
new file mode 100644
index 000000000..1d4796ea5
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua
@@ -0,0 +1,205 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for pdf",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- pdf is normally startic .. i.e. not edited so we don't really
+-- need embedded lexers.
+
+local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local pdflexer = lexer.new("pdf","scite-context-lexer-pdf")
+local whitespace = pdflexer.whitespace
+
+----- pdfobjectlexer = lexer.load("scite-context-lexer-pdf-object")
+----- pdfxreflexer = lexer.load("scite-context-lexer-pdf-xref")
+
+local anything = patterns.anything
+local space = patterns.space
+local spacing = patterns.spacing
+local nospacing = patterns.nospacing
+local anything = patterns.anything
+local restofline = patterns.restofline
+
+local t_whitespace = token(whitespace, spacing)
+local t_spacing = token("default", spacing)
+----- t_rest = token("default", nospacing)
+local t_rest = token("default", anything)
+
+local p_comment = P("%") * restofline
+local t_comment = token("comment", p_comment)
+
+-- whatever
+
+local space = patterns.space
+local spacing = patterns.spacing
+local nospacing = patterns.nospacing
+local anything = patterns.anything
+local newline = patterns.eol
+local real = patterns.real
+local cardinal = patterns.cardinal
+local alpha = patterns.alpha
+
+local lparent = P("(")
+local rparent = P(")")
+local langle = P("<")
+local rangle = P(">")
+local escape = P("\\")
+local unicodetrigger = P("feff")
+
+local nametoken = 1 - space - S("<>/[]()")
+local name = P("/") * nametoken^1
+
+local p_string = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 }
+
+local t_spacing = token("default", spacing)
+local t_spaces = token("default", spacing)^0
+local t_rest = token("default", nospacing) -- anything
+
+local p_stream = P("stream")
+local p_endstream = P("endstream")
+local p_obj = P("obj")
+local p_endobj = P("endobj")
+local p_reference = P("R")
+
+local p_objectnumber = patterns.cardinal
+local p_comment = P("%") * (1-S("\n\r"))^0
+
+local t_string = token("quote", lparent)
+ * token("string", p_string)
+ * token("quote", rparent)
+local t_unicode = token("quote", langle)
+ * token("plain", unicodetrigger)
+ * token("string", (1-rangle)^1)
+ * token("quote", rangle)
+local t_whatsit = token("quote", langle)
+ * token("string", (1-rangle)^1)
+ * token("quote", rangle)
+local t_keyword = token("command", name)
+local t_constant = token("constant", name)
+local t_number = token("number", real)
+-- t_reference = token("number", cardinal)
+-- * t_spacing
+-- * token("number", cardinal)
+local t_reserved = token("number", P("true") + P("false") + P("NULL"))
+-- t_reference = token("warning", cardinal * spacing * cardinal * spacing)
+-- * token("keyword", p_reference)
+local t_reference = token("warning", cardinal)
+ * t_spacing
+ * token("warning", cardinal)
+ * t_spacing
+ * token("keyword", p_reference)
+
+local t_comment = token("comment", p_comment)
+
+local t_openobject = token("warning", p_objectnumber)
+ * t_spacing
+ * token("warning", p_objectnumber)
+ * t_spacing
+ * token("keyword", p_obj)
+-- t_openobject = token("warning", p_objectnumber * spacing)
+-- * token("warning", p_objectnumber * spacing)
+-- * token("keyword", p_obj)
+local t_closeobject = token("keyword", p_endobj)
+
+local t_opendictionary = token("grouping", P("<<"))
+local t_closedictionary = token("grouping", P(">>"))
+
+local t_openarray = token("grouping", P("["))
+local t_closearray = token("grouping", P("]"))
+
+local t_stream = token("keyword", p_stream)
+ * token("text", (1 - p_endstream)^1)
+ * token("keyword", p_endstream)
+
+local t_dictionary = { "dictionary",
+ dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary,
+ array = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray,
+ whatever = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit,
+ }
+
+local t_object = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
+ dictionary = t_dictionary.dictionary,
+ array = t_dictionary.array,
+ whatever = t_dictionary.whatever,
+ object = t_openobject * t_spaces * (V("dictionary")^-1 * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject,
+ number = t_number,
+ }
+
+-- objects ... sometimes NUL characters play havoc ... and in xref we have
+-- issues with embedded lexers that have spaces in the start and stop
+-- conditions and this cannot be handled well either ... so, an imperfect
+-- solution ... but anyway, there is not that much that can end up in
+-- the root of the tree see we're sort of safe
+
+local p_trailer = P("trailer")
+local t_trailer = token("keyword", p_trailer)
+ * t_spacing
+ * t_dictionary
+-- t_trailer = token("keyword", p_trailer * spacing)
+-- * t_dictionary
+
+local p_startxref = P("startxref")
+local t_startxref = token("keyword", p_startxref)
+ * t_spacing
+ * token("number", cardinal)
+-- t_startxref = token("keyword", p_startxref * spacing)
+-- * token("number", cardinal)
+
+local p_xref = P("xref")
+local t_xref = token("keyword",p_xref)
+ * t_spacing
+ * token("number", cardinal)
+ * t_spacing
+ * token("number", cardinal)
+ * spacing
+-- t_xref = token("keyword",p_xref)
+-- * token("number", spacing * cardinal * spacing * cardinal * spacing)
+
+local t_number = token("number", cardinal)
+ * t_spacing
+ * token("number", cardinal)
+ * t_spacing
+ * token("keyword", S("fn"))
+-- t_number = token("number", cardinal * spacing * cardinal * spacing)
+-- * token("keyword", S("fn"))
+
+pdflexer._rules = {
+ { "whitespace", t_whitespace },
+ { "object", t_object },
+ { "comment", t_comment },
+ { "trailer", t_trailer },
+ { "startxref", t_startxref },
+ { "xref", t_xref },
+ { "number", t_number },
+ { "rest", t_rest },
+}
+
+pdflexer._tokenstyles = context.styleset
+
+-- lexer.inspect(pdflexer)
+
+-- collapser: obj endobj stream endstream
+
+pdflexer._foldpattern = p_obj + p_endobj + p_stream + p_endstream
+
+pdflexer._foldsymbols = {
+ ["keyword"] = {
+ ["obj"] = 1,
+ ["endobj"] = -1,
+ ["stream"] = 1,
+ ["endstream"] = -1,
+ },
+}
+
+return pdflexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-sql.lua b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua
new file mode 100644
index 000000000..ea432c5c9
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua
@@ -0,0 +1,239 @@
+local info = {
+ version = 1.001,
+ comment = "scintilla lpeg lexer for sql",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+local exact_match = lexer.exact_match
+
+local sqllexer = lexer.new("sql","scite-context-lexer-sql")
+local whitespace = sqllexer.whitespace
+
+-- ANSI SQL 92 | 99 | 2003
+
+local keywords_standard = {
+ "absolute", "action", "add", "after", "all", "allocate", "alter", "and", "any",
+ "are", "array", "as", "asc", "asensitive", "assertion", "asymmetric", "at",
+ "atomic", "authorization", "avg", "before", "begin", "between", "bigint",
+ "binary", "bit", "bit_length", "blob", "boolean", "both", "breadth", "by",
+ "call", "called", "cascade", "cascaded", "case", "cast", "catalog", "char",
+ "char_length", "character", "character_length", "check", "clob", "close",
+ "coalesce", "collate", "collation", "column", "commit", "condition", "connect",
+ "connection", "constraint", "constraints", "constructor", "contains", "continue",
+ "convert", "corresponding", "count", "create", "cross", "cube", "current",
+ "current_date", "current_default_transform_group", "current_path",
+ "current_role", "current_time", "current_timestamp",
+ "current_transform_group_for_type", "current_user", "cursor", "cycle", "data",
+ "date", "day", "deallocate", "dec", "decimal", "declare", "default",
+ "deferrable", "deferred", "delete", "depth", "deref", "desc", "describe",
+ "descriptor", "deterministic", "diagnostics", "disconnect", "distinct", "do",
+ "domain", "double", "drop", "dynamic", "each", "element", "else", "elseif",
+ "end", "equals", "escape", "except", "exception", "exec", "execute", "exists",
+ "exit", "external", "extract", "false", "fetch", "filter", "first", "float",
+ "for", "foreign", "found", "free", "from", "full", "function", "general", "get",
+ "global", "go", "goto", "grant", "group", "grouping", "handler", "having",
+ "hold", "hour", "identity", "if", "immediate", "in", "indicator", "initially",
+ "inner", "inout", "input", "insensitive", "insert", "int", "integer",
+ "intersect", "interval", "into", "is", "isolation", "iterate", "join", "key",
+ "language", "large", "last", "lateral", "leading", "leave", "left", "level",
+ "like", "local", "localtime", "localtimestamp", "locator", "loop", "lower",
+ "map", "match", "max", "member", "merge", "method", "min", "minute", "modifies",
+ "module", "month", "multiset", "names", "national", "natural", "nchar", "nclob",
+ "new", "next", "no", "none", "not", "null", "nullif", "numeric", "object",
+ "octet_length", "of", "old", "on", "only", "open", "option", "or", "order",
+ "ordinality", "out", "outer", "output", "over", "overlaps", "pad", "parameter",
+ "partial", "partition", "path", "position", "precision", "prepare", "preserve",
+ "primary", "prior", "privileges", "procedure", "public", "range", "read",
+ "reads", "real", "recursive", "ref", "references", "referencing", "relative",
+ "release", "repeat", "resignal", "restrict", "result", "return", "returns",
+ "revoke", "right", "role", "rollback", "rollup", "routine", "row", "rows",
+ "savepoint", "schema", "scope", "scroll", "search", "second", "section",
+ "select", "sensitive", "session", "session_user", "set", "sets", "signal",
+ "similar", "size", "smallint", "some", "space", "specific", "specifictype",
+ "sql", "sqlcode", "sqlerror", "sqlexception", "sqlstate", "sqlwarning", "start",
+ "state", "static", "submultiset", "substring", "sum", "symmetric", "system",
+ "system_user", "table", "tablesample", "temporary", "then", "time", "timestamp",
+ "timezone_hour", "timezone_minute", "to", "trailing", "transaction", "translate",
+ "translation", "treat", "trigger", "trim", "true", "under", "undo", "union",
+ "unique", "unknown", "unnest", "until", "update", "upper", "usage", "user",
+ "using", "value", "values", "varchar", "varying", "view", "when", "whenever",
+ "where", "while", "window", "with", "within", "without", "work", "write", "year",
+ "zone",
+}
+
+-- The dialects list is taken from drupal.org with standard subtracted.
+--
+-- MySQL 3.23.x | 4.x | 5.x
+-- PostGreSQL 8.1
+-- MS SQL Server 2000
+-- MS ODBC
+-- Oracle 10.2
+
+local keywords_dialects = {
+ "a", "abort", "abs", "access", "ada", "admin", "aggregate", "alias", "also",
+ "always", "analyse", "analyze", "assignment", "attribute", "attributes", "audit",
+ "auto_increment", "avg_row_length", "backup", "backward", "bernoulli", "bitvar",
+ "bool", "break", "browse", "bulk", "c", "cache", "cardinality", "catalog_name",
+ "ceil", "ceiling", "chain", "change", "character_set_catalog",
+ "character_set_name", "character_set_schema", "characteristics", "characters",
+ "checked", "checkpoint", "checksum", "class", "class_origin", "cluster",
+ "clustered", "cobol", "collation_catalog", "collation_name", "collation_schema",
+ "collect", "column_name", "columns", "command_function", "command_function_code",
+ "comment", "committed", "completion", "compress", "compute", "condition_number",
+ "connection_name", "constraint_catalog", "constraint_name", "constraint_schema",
+ "containstable", "conversion", "copy", "corr", "covar_pop", "covar_samp",
+ "createdb", "createrole", "createuser", "csv", "cume_dist", "cursor_name",
+ "database", "databases", "datetime", "datetime_interval_code",
+ "datetime_interval_precision", "day_hour", "day_microsecond", "day_minute",
+ "day_second", "dayofmonth", "dayofweek", "dayofyear", "dbcc", "defaults",
+ "defined", "definer", "degree", "delay_key_write", "delayed", "delimiter",
+ "delimiters", "dense_rank", "deny", "derived", "destroy", "destructor",
+ "dictionary", "disable", "disk", "dispatch", "distinctrow", "distributed", "div",
+ "dual", "dummy", "dump", "dynamic_function", "dynamic_function_code", "enable",
+ "enclosed", "encoding", "encrypted", "end-exec", "enum", "errlvl", "escaped",
+ "every", "exclude", "excluding", "exclusive", "existing", "exp", "explain",
+ "fields", "file", "fillfactor", "final", "float4", "float8", "floor", "flush",
+ "following", "force", "fortran", "forward", "freetext", "freetexttable",
+ "freeze", "fulltext", "fusion", "g", "generated", "granted", "grants",
+ "greatest", "header", "heap", "hierarchy", "high_priority", "holdlock", "host",
+ "hosts", "hour_microsecond", "hour_minute", "hour_second", "identified",
+ "identity_insert", "identitycol", "ignore", "ilike", "immutable",
+ "implementation", "implicit", "include", "including", "increment", "index",
+ "infile", "infix", "inherit", "inherits", "initial", "initialize", "insert_id",
+ "instance", "instantiable", "instead", "int1", "int2", "int3", "int4", "int8",
+ "intersection", "invoker", "isam", "isnull", "k", "key_member", "key_type",
+ "keys", "kill", "lancompiler", "last_insert_id", "least", "length", "less",
+ "limit", "lineno", "lines", "listen", "ln", "load", "location", "lock", "login",
+ "logs", "long", "longblob", "longtext", "low_priority", "m", "matched",
+ "max_rows", "maxextents", "maxvalue", "mediumblob", "mediumint", "mediumtext",
+ "message_length", "message_octet_length", "message_text", "middleint",
+ "min_rows", "minus", "minute_microsecond", "minute_second", "minvalue",
+ "mlslabel", "mod", "mode", "modify", "monthname", "more", "move", "mumps",
+ "myisam", "name", "nesting", "no_write_to_binlog", "noaudit", "nocheck",
+ "nocompress", "nocreatedb", "nocreaterole", "nocreateuser", "noinherit",
+ "nologin", "nonclustered", "normalize", "normalized", "nosuperuser", "nothing",
+ "notify", "notnull", "nowait", "nullable", "nulls", "number", "octets", "off",
+ "offline", "offset", "offsets", "oids", "online", "opendatasource", "openquery",
+ "openrowset", "openxml", "operation", "operator", "optimize", "optionally",
+ "options", "ordering", "others", "outfile", "overlay", "overriding", "owner",
+ "pack_keys", "parameter_mode", "parameter_name", "parameter_ordinal_position",
+ "parameter_specific_catalog", "parameter_specific_name",
+ "parameter_specific_schema", "parameters", "pascal", "password", "pctfree",
+ "percent", "percent_rank", "percentile_cont", "percentile_disc", "placing",
+ "plan", "pli", "postfix", "power", "preceding", "prefix", "preorder", "prepared",
+ "print", "proc", "procedural", "process", "processlist", "purge", "quote",
+ "raid0", "raiserror", "rank", "raw", "readtext", "recheck", "reconfigure",
+ "regexp", "regr_avgx", "regr_avgy", "regr_count", "regr_intercept", "regr_r2",
+ "regr_slope", "regr_sxx", "regr_sxy", "regr_syy", "reindex", "reload", "rename",
+ "repeatable", "replace", "replication", "require", "reset", "resource",
+ "restart", "restore", "returned_cardinality", "returned_length",
+ "returned_octet_length", "returned_sqlstate", "rlike", "routine_catalog",
+ "routine_name", "routine_schema", "row_count", "row_number", "rowcount",
+ "rowguidcol", "rowid", "rownum", "rule", "save", "scale", "schema_name",
+ "schemas", "scope_catalog", "scope_name", "scope_schema", "second_microsecond",
+ "security", "self", "separator", "sequence", "serializable", "server_name",
+ "setof", "setuser", "share", "show", "shutdown", "simple", "soname", "source",
+ "spatial", "specific_name", "sql_big_result", "sql_big_selects",
+ "sql_big_tables", "sql_calc_found_rows", "sql_log_off", "sql_log_update",
+ "sql_low_priority_updates", "sql_select_limit", "sql_small_result",
+ "sql_warnings", "sqlca", "sqrt", "ssl", "stable", "starting", "statement",
+ "statistics", "status", "stddev_pop", "stddev_samp", "stdin", "stdout",
+ "storage", "straight_join", "strict", "string", "structure", "style",
+ "subclass_origin", "sublist", "successful", "superuser", "synonym", "sysdate",
+ "sysid", "table_name", "tables", "tablespace", "temp", "template", "terminate",
+ "terminated", "text", "textsize", "than", "ties", "tinyblob", "tinyint",
+ "tinytext", "toast", "top", "top_level_count", "tran", "transaction_active",
+ "transactions_committed", "transactions_rolled_back", "transform", "transforms",
+ "trigger_catalog", "trigger_name", "trigger_schema", "truncate", "trusted",
+ "tsequal", "type", "uescape", "uid", "unbounded", "uncommitted", "unencrypted",
+ "unlisten", "unlock", "unnamed", "unsigned", "updatetext", "use",
+ "user_defined_type_catalog", "user_defined_type_code", "user_defined_type_name",
+ "user_defined_type_schema", "utc_date", "utc_time", "utc_timestamp", "vacuum",
+ "valid", "validate", "validator", "var_pop", "var_samp", "varbinary", "varchar2",
+ "varcharacter", "variable", "variables", "verbose", "volatile", "waitfor",
+ "width_bucket", "writetext", "x509", "xor", "year_month", "zerofill",
+}
+
+local space = patterns.space -- S(" \n\r\t\f\v")
+local any = patterns.any
+local restofline = patterns.restofline
+local startofline = patterns.startofline
+
+local squote = P("'")
+local dquote = P('"')
+local bquote = P('`')
+local escaped = P("\\") * P(1)
+
+local begincomment = P("/*")
+local endcomment = P("*/")
+
+local decimal = patterns.decimal
+local float = patterns.float
+local integer = P("-")^-1 * decimal
+
+local spacing = token(whitespace, space^1)
+local rest = token("default", any)
+
+local shortcomment = token("comment", (P("#") + P("--")) * restofline^0)
+local longcomment = token("comment", begincomment * (1-endcomment)^0 * endcomment^-1)
+
+local p_validword = R("AZ","az","__") * R("AZ","az","__","09")^0
+local identifier = token("default",p_validword)
+
+local shortstring = token("quote", dquote) -- can be shared
+ * token("string", (escaped + (1-dquote))^0)
+ * token("quote", dquote)
+ + token("quote", squote)
+ * token("string", (escaped + (1-squote))^0)
+ * token("quote", squote)
+ + token("quote", bquote)
+ * token("string", (escaped + (1-bquote))^0)
+ * token("quote", bquote)
+
+local p_keywords_s = exact_match(keywords_standard,nil,true)
+local p_keywords_d = exact_match(keywords_dialects,nil,true)
+local keyword_s = token("keyword", p_keywords_s)
+local keyword_d = token("command", p_keywords_d)
+
+local number = token("number", float + integer)
+local operator = token("special", S("+-*/%^!=<>;:{}[]().&|?~"))
+
+sqllexer._tokenstyles = context.styleset
+
+sqllexer._foldpattern = P("/*") + P("*/") + S("{}") -- separate entry else interference
+
+sqllexer._foldsymbols = {
+ _patterns = {
+ "/%*",
+ "%*/",
+ },
+ ["comment"] = {
+ ["/*"] = 1,
+ ["*/"] = -1,
+ }
+}
+
+sqllexer._rules = {
+ { "whitespace", spacing },
+ { "keyword-s", keyword_s },
+ { "keyword-d", keyword_d },
+ { "identifier", identifier },
+ { "string", shortstring },
+ { "longcomment", longcomment },
+ { "shortcomment", shortcomment },
+ { "number", number },
+ { "operator", operator },
+ { "rest", rest },
+}
+
+return sqllexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua
new file mode 100644
index 000000000..4a55fd143
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua
@@ -0,0 +1,24 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for tex web",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local texweblexer = lexer.new("tex-web","scite-context-lexer-tex")
+local texlexer = lexer.load("scite-context-lexer-tex")
+
+-- can probably be done nicer now, a bit of a hack
+
+texweblexer._rules = texlexer._rules_web
+texweblexer._tokenstyles = texlexer._tokenstyles
+texweblexer._foldsymbols = texlexer._foldsymbols
+texweblexer._directives = texlexer._directives
+
+return texweblexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua
new file mode 100644
index 000000000..bc08bfcd9
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua
@@ -0,0 +1,567 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for context",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- maybe: _LINEBYLINE variant for large files (no nesting)
+-- maybe: protected_macros
+
+--[[
+
+ experiment dd 2009/10/28 .. todo:
+
+ -- figure out if tabs instead of splits are possible
+ -- locate an option to enter name in file dialogue (like windows permits)
+ -- figure out why loading a file fails
+ -- we cannot print to the log pane
+ -- we cannot access props["keywordclass.macros.context.en"]
+ -- lexer.get_property only handles integers
+ -- we cannot run a command to get the location of mult-def.lua
+
+ -- local interface = props["keywordclass.macros.context.en"]
+ -- local interface = lexer.get_property("keywordclass.macros.context.en","")
+
+]]--
+
+local global, string, table, lpeg = _G, string, table, lpeg
+local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt, lpeg.Cp, lpeg.Cc, lpeg.Ct
+local type, next = type, next
+local find, match, lower, upper = string.find, string.match, string.lower, string.upper
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+local inform = context.inform
+
+local token = lexer.token
+local exact_match = lexer.exact_match
+
+local contextlexer = lexer.new("tex","scite-context-lexer-tex")
+local whitespace = contextlexer.whitespace
+
+local cldlexer = lexer.load("scite-context-lexer-cld")
+local mpslexer = lexer.load("scite-context-lexer-mps")
+
+local commands = { en = { } }
+local primitives = { }
+local helpers = { }
+local constants = { }
+
+do -- todo: only once, store in global
+
+ -- commands helpers primitives
+
+ local definitions = context.loaddefinitions("scite-context-data-interfaces")
+
+ if definitions then
+ local list = { }
+ for interface, list in next, definitions do
+ list[#list+1] = interface
+ local c = { }
+ for i=1,#list do
+ c[list[i]] = true
+ end
+ if interface ~= "en" then
+ list = definitions.en
+ if list then
+ for i=1,#list do
+ c[list[i]] = true
+ end
+ end
+ end
+ commands[interface] = c
+ end
+ inform("context user interfaces '%s' supported",table.concat(list," "))
+ end
+
+ local definitions = context.loaddefinitions("scite-context-data-context")
+ local overloaded = { }
+
+ if definitions then
+ helpers = definitions.helpers or { }
+ constants = definitions.constants or { }
+ for i=1,#helpers do
+ overloaded[helpers[i]] = true
+ end
+ for i=1,#constants do
+ overloaded[constants[i]] = true
+ end
+ end
+
+ local definitions = context.loaddefinitions("scite-context-data-tex")
+
+ if definitions then
+ local function add(data,normal)
+ for k, v in next, data do
+ if v ~= "/" and v ~= "-" then
+ if not overloaded[v] then
+ primitives[#primitives+1] = v
+ end
+ if normal then
+ v = "normal" .. v
+ if not overloaded[v] then
+ primitives[#primitives+1] = v
+ end
+ end
+ end
+ end
+ end
+ add(definitions.tex,true)
+ add(definitions.etex,true)
+ add(definitions.pdftex,true)
+ add(definitions.aleph,true)
+ add(definitions.omega,true)
+ add(definitions.luatex,true)
+ add(definitions.xetex,true)
+ end
+
+end
+
+local currentcommands = commands.en or { }
+
+local cstoken = R("az","AZ","\127\255") + S("@!?_")
+
+local knowncommand = Cmt(cstoken^1, function(_,i,s)
+ return currentcommands[s] and i
+end)
+
+local utfchar = context.utfchar
+local wordtoken = context.patterns.wordtoken
+local iwordtoken = context.patterns.iwordtoken
+local wordpattern = context.patterns.wordpattern
+local iwordpattern = context.patterns.iwordpattern
+local invisibles = context.patterns.invisibles
+local checkedword = context.checkedword
+local styleofword = context.styleofword
+local setwordlist = context.setwordlist
+local validwords = false
+local validminimum = 3
+
+-- % language=uk
+
+-- fails (empty loop message) ... latest lpeg issue?
+
+local knownpreamble = Cmt(P("% "), function(input,i,_) -- todo : utfbomb, was #P("% ")
+ if i < 10 then
+ validwords, validminimum = false, 3
+ local s, e, word = find(input,"^(.-)[\n\r]",i) -- combine with match
+ if word then
+ local interface = match(word,"interface=([a-z][a-z]+)")
+ if interface and #interface == 2 then
+ inform("enabling context user interface '%s'",interface)
+ currentcommands = commands[interface] or commands.en or { }
+ end
+ local language = match(word,"language=([a-z][a-z]+)")
+ validwords, validminimum = setwordlist(language)
+ end
+ end
+ return false
+end)
+
+-- -- the token list contains { "style", endpos } entries
+-- --
+-- -- in principle this is faster but it is also crash sensitive for large files
+
+-- local constants_hash = { } for i=1,#constants do constants_hash [constants [i]] = true end
+-- local helpers_hash = { } for i=1,#helpers do helpers_hash [helpers [i]] = true end
+-- local primitives_hash = { } for i=1,#primitives do primitives_hash[primitives[i]] = true end
+
+-- local specialword = Ct( P("\\") * Cmt( C(cstoken^1), function(input,i,s)
+-- if currentcommands[s] then
+-- return true, "command", i
+-- elseif constants_hash[s] then
+-- return true, "data", i
+-- elseif helpers_hash[s] then
+-- return true, "plain", i
+-- elseif primitives_hash[s] then
+-- return true, "primitive", i
+-- else -- if starts with if then primitive
+-- return true, "user", i
+-- end
+-- end) )
+
+-- local specialword = P("\\") * Cmt( C(cstoken^1), function(input,i,s)
+-- if currentcommands[s] then
+-- return true, { "command", i }
+-- elseif constants_hash[s] then
+-- return true, { "data", i }
+-- elseif helpers_hash[s] then
+-- return true, { "plain", i }
+-- elseif primitives_hash[s] then
+-- return true, { "primitive", i }
+-- else -- if starts with if then primitive
+-- return true, { "user", i }
+-- end
+-- end)
+
+-- experiment: keep space with whatever ... less tables
+
+-- 10pt
+
+local commentline = P("%") * (1-S("\n\r"))^0
+local endline = S("\n\r")^1
+
+local space = patterns.space -- S(" \n\r\t\f\v")
+local any = patterns.any
+local backslash = P("\\")
+local hspace = S(" \t")
+
+local p_spacing = space^1
+local p_rest = any
+
+local p_preamble = knownpreamble
+local p_comment = commentline
+----- p_command = backslash * knowncommand
+----- p_constant = backslash * exact_match(constants)
+----- p_helper = backslash * exact_match(helpers)
+----- p_primitive = backslash * exact_match(primitives)
+
+local p_command = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * #(1-cstoken)
+local p_constant = backslash * lexer.helpers.utfchartabletopattern(constants) * #(1-cstoken)
+local p_helper = backslash * lexer.helpers.utfchartabletopattern(helpers) * #(1-cstoken)
+local p_primitive = backslash * lexer.helpers.utfchartabletopattern(primitives) * #(1-cstoken)
+
+local p_ifprimitive = P("\\if") * cstoken^1
+local p_csname = backslash * (cstoken^1 + P(1))
+local p_grouping = S("{$}")
+local p_special = S("#()[]<>=\"")
+local p_extra = S("`~%^&_-+/\'|")
+local p_text = iwordtoken^1 --maybe add punctuation and space
+
+local p_reserved = backslash * (
+ P("??") + R("az") * P("!")
+ ) * cstoken^1
+
+local p_number = context.patterns.real
+local p_unit = P("pt") + P("bp") + P("sp") + P("mm") + P("cm") + P("cc") + P("dd")
+
+-- no looking back = #(1-S("[=")) * cstoken^3 * #(1-S("=]"))
+
+-- This one gives stack overflows:
+--
+-- local p_word = Cmt(iwordpattern, function(_,i,s)
+-- if validwords then
+-- return checkedword(validwords,validminimum,s,i)
+-- else
+-- -- return true, { "text", i }
+-- return true, "text", i
+-- end
+-- end)
+--
+-- So we use this one instead:
+
+----- p_word = Ct( iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() ) -- the function can be inlined
+local p_word = iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined
+
+----- p_text = (1 - p_grouping - p_special - p_extra - backslash - space + hspace)^1
+
+-- keep key pressed at end-of syst-aux.mkiv:
+--
+-- 0 : 15 sec
+-- 1 : 13 sec
+-- 2 : 10 sec
+--
+-- the problem is that quite some style subtables get generated so collapsing ranges helps
+
+local option = 1
+
+if option == 1 then
+
+ p_comment = p_comment^1
+ p_grouping = p_grouping^1
+ p_special = p_special^1
+ p_extra = p_extra^1
+
+ p_command = p_command^1
+ p_constant = p_constant^1
+ p_helper = p_helper^1
+ p_primitive = p_primitive^1
+ p_ifprimitive = p_ifprimitive^1
+ p_reserved = p_reserved^1
+
+elseif option == 2 then
+
+ local included = space^0
+
+ p_comment = (p_comment * included)^1
+ p_grouping = (p_grouping * included)^1
+ p_special = (p_special * included)^1
+ p_extra = (p_extra * included)^1
+
+ p_command = (p_command * included)^1
+ p_constant = (p_constant * included)^1
+ p_helper = (p_helper * included)^1
+ p_primitive = (p_primitive * included)^1
+ p_ifprimitive = (p_ifprimitive * included)^1
+ p_reserved = (p_reserved * included)^1
+
+end
+
+local p_invisible = invisibles^1
+
+local spacing = token(whitespace, p_spacing )
+
+local rest = token("default", p_rest )
+local preamble = token("preamble", p_preamble )
+local comment = token("comment", p_comment )
+local command = token("command", p_command )
+local constant = token("data", p_constant )
+local helper = token("plain", p_helper )
+local primitive = token("primitive", p_primitive )
+local ifprimitive = token("primitive", p_ifprimitive)
+local reserved = token("reserved", p_reserved )
+local csname = token("user", p_csname )
+local grouping = token("grouping", p_grouping )
+local number = token("number", p_number )
+ * token("constant", p_unit )
+local special = token("special", p_special )
+local reserved = token("reserved", p_reserved ) -- reserved internal preproc
+local extra = token("extra", p_extra )
+local invisible = token("invisible", p_invisible )
+local text = token("default", p_text )
+local word = p_word
+
+----- startluacode = token("grouping", P("\\startluacode"))
+----- stopluacode = token("grouping", P("\\stopluacode"))
+
+local luastatus = false
+local luatag = nil
+local lualevel = 0
+
+local function startdisplaylua(_,i,s)
+ luatag = s
+ luastatus = "display"
+ cldlexer._directives.cld_inline = false
+ return true
+end
+
+local function stopdisplaylua(_,i,s)
+ local ok = luatag == s
+ if ok then
+ cldlexer._directives.cld_inline = false
+ luastatus = false
+ end
+ return ok
+end
+
+local function startinlinelua(_,i,s)
+ if luastatus == "display" then
+ return false
+ elseif not luastatus then
+ luastatus = "inline"
+ cldlexer._directives.cld_inline = true
+ lualevel = 1
+ return true
+ else-- if luastatus == "inline" then
+ lualevel = lualevel + 1
+ return true
+ end
+end
+
+local function stopinlinelua_b(_,i,s) -- {
+ if luastatus == "display" then
+ return false
+ elseif luastatus == "inline" then
+ lualevel = lualevel + 1 -- ?
+ return false
+ else
+ return true
+ end
+end
+
+local function stopinlinelua_e(_,i,s) -- }
+ if luastatus == "display" then
+ return false
+ elseif luastatus == "inline" then
+ lualevel = lualevel - 1
+ local ok = lualevel <= 0 -- was 0
+ if ok then
+ cldlexer._directives.cld_inline = false
+ luastatus = false
+ end
+ return ok
+ else
+ return true
+ end
+end
+
+contextlexer._reset_parser = function()
+ luastatus = false
+ luatag = nil
+ lualevel = 0
+end
+
+local luaenvironment = P("lua") * (P("setups") + P("code") + P(true))
+ + P("ctxfunction") * (P("definition") + P(true))
+
+local inlinelua = P("\\") * (
+ P("ctx") * (P("lua") + P("command") + P("late") * (P("lua") + P("command")) + P("function"))
+ + P("cld") * (P("command") + P("context"))
+ + P("luaexpr")
+ + (P("direct") + P("late")) * P("lua")
+ )
+
+local startlua = P("\\start") * Cmt(luaenvironment,startdisplaylua)
+ + P("<?lua") * Cmt(P(true),startdisplaylua)
+ + inlinelua * space^0 * ( Cmt(P("{"),startinlinelua) )
+
+local stoplua = P("\\stop") * Cmt(luaenvironment,stopdisplaylua)
+ + P("?>") * Cmt(P(true),stopdisplaylua)
+ + Cmt(P("{"),stopinlinelua_b)
+ + Cmt(P("}"),stopinlinelua_e)
+
+local startluacode = token("embedded", startlua)
+local stopluacode = #stoplua * token("embedded", stoplua)
+
+local luacall = P("clf_") * R("az","__","AZ")^1
+
+local metafuncall = ( P("reusable") + P("usable") + P("unique") + P("use") + P("reuse") ) * ("MPgraphic")
+ + P("uniqueMPpagegraphic")
+ + P("MPpositiongraphic")
+
+local metafunenvironment = metafuncall -- ( P("use") + P("reusable") + P("unique") ) * ("MPgraphic")
+ + P("MP") * ( P("code")+ P("page") + P("inclusions") + P("initializations") + P("definitions") + P("extensions") + P("graphic") + P("calculation") )
+
+local startmetafun = P("\\start") * metafunenvironment
+local stopmetafun = P("\\stop") * metafunenvironment -- todo match start
+
+----- subsystem = token("embedded", P("\\xml") * R("az")^1 + (P("\\st") * (P("art") + P("op")) * P("xmlsetups")))
+local subsystemtags = P("xml") + P("btx") -- will be pluggable or maybe even a proper list of valid commands
+local subsystemmacro = P("\\") * (subsystemtags * R("az")^1 + (R("az")-subsystemtags)^1 * subsystemtags * R("az")^1)
+local subsystem = token("embedded", subsystemmacro)
+
+local openargument = token("special", P("{"))
+local closeargument = token("special", P("}"))
+local argumentcontent = token("default",(1-P("}"))^0) -- maybe space needs a treatment
+
+local metafunarguments = (spacing^0 * openargument * argumentcontent * closeargument)^-2
+
+local startmetafuncode = token("embedded", startmetafun) * metafunarguments
+local stopmetafuncode = token("embedded", stopmetafun)
+
+local callers = token("embedded", P("\\") * metafuncall) * metafunarguments
+ + token("embedded", P("\\") * luacall)
+
+lexer.embed_lexer(contextlexer, cldlexer, startluacode, stopluacode)
+lexer.embed_lexer(contextlexer, mpslexer, startmetafuncode, stopmetafuncode)
+
+contextlexer._rules = {
+ { "whitespace", spacing },
+ { "preamble", preamble },
+ { "word", word },
+ { "text", text }, -- non words
+ { "comment", comment },
+ { "constant", constant },
+ -- { "subsystem", subsystem },
+ { "callers", callers },
+ { "subsystem", subsystem },
+ { "helper", helper },
+ { "command", command },
+ { "primitive", primitive },
+ { "ifprimitive", ifprimitive },
+ -- { "subsystem", subsystem },
+ { "reserved", reserved },
+ { "csname", csname },
+ -- { "whatever", specialword }, -- not yet, crashes
+ { "grouping", grouping },
+ -- { "number", number },
+ { "special", special },
+ { "extra", extra },
+ { "invisible", invisible },
+ { "rest", rest },
+}
+
+-- Watch the text grabber, after all, we're talking mostly of text (beware,
+-- no punctuation here as it can be special). We might go for utf here.
+
+local web = lexer.loadluafile("scite-context-lexer-web-snippets")
+
+if web then
+
+ lexer.inform("supporting web snippets in tex lexer")
+
+ contextlexer._rules_web = {
+ { "whitespace", spacing },
+ { "text", text }, -- non words
+ { "comment", comment },
+ { "constant", constant },
+ { "callers", callers },
+ { "helper", helper },
+ { "command", command },
+ { "primitive", primitive },
+ { "ifprimitive", ifprimitive },
+ { "reserved", reserved },
+ { "csname", csname },
+ { "grouping", grouping },
+ { "special", special },
+ { "extra", extra },
+ { "invisible", invisible },
+ { "web", web.pattern },
+ { "rest", rest },
+ }
+
+else
+
+ lexer.report("not supporting web snippets in tex lexer")
+
+ contextlexer._rules_web = {
+ { "whitespace", spacing },
+ { "text", text }, -- non words
+ { "comment", comment },
+ { "constant", constant },
+ { "callers", callers },
+ { "helper", helper },
+ { "command", command },
+ { "primitive", primitive },
+ { "ifprimitive", ifprimitive },
+ { "reserved", reserved },
+ { "csname", csname },
+ { "grouping", grouping },
+ { "special", special },
+ { "extra", extra },
+ { "invisible", invisible },
+ { "rest", rest },
+ }
+
+end
+
+contextlexer._tokenstyles = context.styleset
+
+local environment = {
+ ["\\start"] = 1, ["\\stop"] = -1,
+ -- ["\\begin"] = 1, ["\\end" ] = -1,
+}
+
+-- local block = {
+-- ["\\begin"] = 1, ["\\end" ] = -1,
+-- }
+
+local group = {
+ ["{"] = 1, ["}"] = -1,
+}
+
+contextlexer._foldpattern = P("\\" ) * (P("start") + P("stop")) + S("{}") -- separate entry else interference
+
+contextlexer._foldsymbols = { -- these need to be style references .. todo: multiple styles
+ _patterns = {
+ "\\start", "\\stop", -- regular environments
+ -- "\\begin", "\\end", -- (moveable) blocks
+ "[{}]",
+ },
+ ["command"] = environment,
+ ["constant"] = environment,
+ ["data"] = environment,
+ ["user"] = environment,
+ ["embedded"] = environment,
+ ["helper"] = environment,
+ ["plain"] = environment,
+ ["grouping"] = group,
+}
+
+-- context.inspect(contextlexer)
+
+return contextlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-txt.lua b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua
new file mode 100644
index 000000000..152e9a663
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua
@@ -0,0 +1,81 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for plain text (with spell checking)",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp
+local find, match = string.find, string.match
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local textlexer = lexer.new("txt","scite-context-lexer-txt")
+local whitespace = textlexer.whitespace
+
+local space = patterns.space
+local any = patterns.any
+local wordtoken = patterns.wordtoken
+local wordpattern = patterns.wordpattern
+
+local checkedword = context.checkedword
+local styleofword = context.styleofword
+local setwordlist = context.setwordlist
+local validwords = false
+local validminimum = 3
+
+-- local styleset = context.newstyleset {
+-- "default",
+-- "text", "okay", "error", "warning",
+-- "preamble",
+-- }
+
+-- [#!-%] language=uk
+
+local p_preamble = Cmt((S("#!-%") * P(" ")), function(input,i,_) -- todo: utf bomb no longer #
+ if i == 1 then -- < 10 then
+ validwords, validminimum = false, 3
+ local s, e, line = find(input,"^[#!%-%%](.+)[\n\r]",i)
+ if line then
+ local language = match(line,"language=([a-z]+)")
+ if language then
+ validwords, validminimum = setwordlist(language)
+ end
+ end
+ end
+ return false
+end)
+
+local t_preamble =
+ token("preamble", p_preamble)
+
+local t_word =
+ wordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined
+
+local t_text =
+ token("default", wordtoken^1)
+
+local t_rest =
+ token("default", (1-wordtoken-space)^1)
+
+local t_spacing =
+ token(whitespace, space^1)
+
+textlexer._rules = {
+ { "whitespace", t_spacing },
+ { "preamble", t_preamble },
+ { "word", t_word }, -- words >= 3
+ { "text", t_text }, -- non words
+ { "rest", t_rest },
+}
+
+textlexer._LEXBYLINE = true -- new (needs testing, not yet as the system changed in 3.24)
+textlexer._tokenstyles = context.styleset
+
+return textlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua
new file mode 100644
index 000000000..141de20e1
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua
@@ -0,0 +1,134 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for web snippets",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local P, R, S, C, Cg, Cb, Cs, Cmt, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Cs, lpeg.Cmt, lpeg.match
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local websnippets = { }
+
+local space = patterns.space -- S(" \n\r\t\f\v")
+local any = patterns.any
+local restofline = patterns.restofline
+local startofline = patterns.startofline
+
+local squote = P("'")
+local dquote = P('"')
+local period = P(".")
+
+local t_whitespace = token(whitespace, space^1)
+local t_spacing = token("default", space^1)
+local t_rest = token("default", any)
+
+-- the web subset
+
+local p_beginofweb = P("@")
+local p_endofweb = P("@>")
+
+-- @, @/ @| @# @+ @; @[ @]
+
+local p_directive_1 = p_beginofweb * S(",/|#+;[]")
+local t_directive_1 = token("label",p_directive_1)
+
+-- @.text @>(monospaced)
+-- @:text @>(macro driven)
+-- @= verbose@>
+-- @! underlined @>
+-- @t text @> (hbox)
+-- @q ignored @>
+
+local p_typeset = p_beginofweb * S(".:=!tq")
+local t_typeset = token("label",p_typeset) * token("warning",(1-p_endofweb)^1) * token("label",p_endofweb)
+
+-- @^index@>
+
+local p_index = p_beginofweb * P("^")
+local t_index = token("label",p_index) * token("function",(1-p_endofweb)^1) * token("label",p_endofweb)
+
+-- @f text renderclass
+
+local p_render = p_beginofweb * S("f")
+local t_render = token("label",p_render) * t_spacing * token("warning",(1-space)^1) * t_spacing * token("label",(1-space)^1)
+
+-- @s idem
+-- @p idem
+-- @& strip (spaces before)
+-- @h
+
+local p_directive_2 = p_beginofweb * S("sp&h")
+local t_directive_2 = token("label",p_directive_2)
+
+-- @< ... @> [=|+=|]
+-- @(foo@>
+
+local p_reference = p_beginofweb * S("<(")
+local t_reference = token("label",p_reference) * token("function",(1-p_endofweb)^1) * token("label",p_endofweb * (P("+=") + P("="))^-1)
+
+-- @'char' (ascii code)
+
+local p_character = p_beginofweb * S("'")
+local t_character = token("label",p_character) * token("reserved",(1-squote)^1) * token("label",squote)
+
+-- @l nonascii
+
+local p_nonascii = p_beginofweb * S("l")
+local t_nonascii = token("label",p_nonascii) * t_spacing * token("reserved",(1-space)^1)
+
+-- @x @y @z changefile
+-- @i webfile
+
+local p_filename = p_beginofweb * S("xyzi")
+local t_filename = token("label",p_filename) * t_spacing * token("reserved",(1-space)^1)
+
+-- @@ escape
+
+local p_escape = p_beginofweb * p_beginofweb
+local t_escape = token("text",p_escape)
+
+-- structure
+
+-- @* title.
+
+-- local p_section = p_beginofweb * P("*")^1
+-- local t_section = token("label",p_section) * t_spacing * token("function",(1-period)^1) * token("label",period)
+
+-- @ explanation
+
+-- local p_explanation = p_beginofweb
+-- local t_explanation = token("label",p_explanation) * t_spacing^1
+
+-- @d macro
+
+-- local p_macro = p_beginofweb * P("d")
+-- local t_macro = token("label",p_macro)
+
+-- @c code
+
+-- local p_code = p_beginofweb * P("c")
+-- local t_code = token("label",p_code)
+
+websnippets.pattern = P (
+ t_typeset
+ + t_index
+ + t_render
+ + t_reference
+ + t_filename
+ + t_directive_1
+ + t_directive_2
+ + t_character
+ + t_nonascii
+ + t_escape
+)
+
+
+return websnippets
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-web.lua
new file mode 100644
index 000000000..6fe5ac84c
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-web.lua
@@ -0,0 +1,68 @@
+local info = {
+ version = 1.003,
+ comment = "scintilla lpeg lexer for web",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local P, R, S = lpeg.P, lpeg.R, lpeg.S
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+local exact_match = lexer.exact_match
+
+local weblexer = lexer.new("web","scite-context-lexer-web")
+local whitespace = weblexer.whitespace
+
+local space = patterns.space -- S(" \n\r\t\f\v")
+local any = patterns.any
+local restofline = patterns.restofline
+local startofline = patterns.startofline
+
+local period = P(".")
+local percent = P("%")
+
+local spacing = token(whitespace, space^1)
+local rest = token("default", any)
+
+local eop = P("@>")
+local eos = eop * P("+")^-1 * P("=")
+
+-- we can put some of the next in the web-snippets file
+-- is f okay here?
+
+local texcomment = token("comment", percent * restofline^0)
+
+local texpart = token("label",P("@")) * #spacing
+ + token("label",P("@") * P("*")^1) * token("function",(1-period)^1) * token("label",period)
+local midpart = token("label",P("@d")) * #spacing
+ + token("label",P("@f")) * #spacing
+local cpppart = token("label",P("@c")) * #spacing
+ + token("label",P("@p")) * #spacing
+ + token("label",P("@") * S("<(")) * token("function",(1-eop)^1) * token("label",eos)
+
+local anypart = P("@") * ( P("*")^1 + S("dfcp") + space^1 + S("<(") * (1-eop)^1 * eos )
+local limbo = 1 - anypart - percent
+
+local texlexer = lexer.load("scite-context-lexer-tex-web")
+local cpplexer = lexer.load("scite-context-lexer-cpp-web")
+
+lexer.embed_lexer(weblexer, texlexer, texpart + limbo, #anypart)
+lexer.embed_lexer(weblexer, cpplexer, cpppart + midpart, #anypart)
+
+local texcomment = token("comment", percent * restofline^0)
+
+weblexer._rules = {
+ { "whitespace", spacing },
+ { "texcomment", texcomment }, -- else issues with first tex section
+ { "rest", rest },
+}
+
+weblexer._tokenstyles = context.styleset
+
+return weblexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua
new file mode 100644
index 000000000..25fa9128f
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua
@@ -0,0 +1,34 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for xml cdata",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local P = lpeg.P
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local xmlcdatalexer = lexer.new("xml-cdata","scite-context-lexer-xml-cdata")
+local whitespace = xmlcdatalexer.whitespace
+
+local space = patterns.space
+local nospace = 1 - space - P("]]>")
+
+local t_spaces = token(whitespace, space ^1)
+local t_cdata = token("comment", nospace^1)
+
+xmlcdatalexer._rules = {
+ { "whitespace", t_spaces },
+ { "cdata", t_cdata },
+}
+
+xmlcdatalexer._tokenstyles = context.styleset
+
+return xmlcdatalexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua
new file mode 100644
index 000000000..2d7260b69
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua
@@ -0,0 +1,34 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for xml comments",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local P = lpeg.P
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local xmlcommentlexer = lexer.new("xml-comment","scite-context-lexer-xml-comment")
+local whitespace = xmlcommentlexer.whitespace
+
+local space = patterns.space
+local nospace = 1 - space - P("-->")
+
+local t_spaces = token(whitespace, space ^1)
+local t_comment = token("comment", nospace^1)
+
+xmlcommentlexer._rules = {
+ { "whitespace", t_spaces },
+ { "comment", t_comment },
+}
+
+xmlcommentlexer._tokenstyles = context.styleset
+
+return xmlcommentlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua
new file mode 100644
index 000000000..1ee96ba89
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua
@@ -0,0 +1,34 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for xml script",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local P = lpeg.P
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local xmlscriptlexer = lexer.new("xml-script","scite-context-lexer-xml-script")
+local whitespace = xmlscriptlexer.whitespace
+
+local space = patterns.space
+local nospace = 1 - space - (P("</") * P("script") + P("SCRIPT")) * P(">")
+
+local t_spaces = token(whitespace, space ^1)
+local t_script = token("default", nospace^1)
+
+xmlscriptlexer._rules = {
+ { "whitespace", t_spaces },
+ { "script", t_script },
+}
+
+xmlscriptlexer._tokenstyles = context.styleset
+
+return xmlscriptlexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
new file mode 100644
index 000000000..1b7e2e897
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
@@ -0,0 +1,351 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for xml",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- adapted from the regular context pretty printer code (after all, lexing
+-- boils down to much of the same and there are only so many ways to do
+-- things). Simplified a bit as we have a different nesting model.
+
+-- todo: parse entities in attributes
+
+local global, string, table, lpeg = _G, string, table, lpeg
+local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
+local type = type
+local match, find = string.match, string.find
+
+-- local lexer = require("lexer")
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+local exact_match = lexer.exact_match
+
+local xmllexer = lexer.new("xml","scite-context-lexer-xml")
+local whitespace = xmllexer.whitespace
+
+local xmlcommentlexer = lexer.load("scite-context-lexer-xml-comment")
+local xmlcdatalexer = lexer.load("scite-context-lexer-xml-cdata")
+local xmlscriptlexer = lexer.load("scite-context-lexer-xml-script")
+local lualexer = lexer.load("scite-context-lexer-lua")
+
+local space = patterns.space
+local any = patterns.any
+
+local dquote = P('"')
+local squote = P("'")
+local colon = P(":")
+local semicolon = P(";")
+local equal = P("=")
+local ampersand = P("&")
+
+local name = (R("az","AZ","09") + S("_-."))^1
+local openbegin = P("<")
+local openend = P("</")
+local closebegin = P("/>") + P(">")
+local closeend = P(">")
+local opencomment = P("<!--")
+local closecomment = P("-->")
+local openinstruction = P("<?")
+local closeinstruction = P("?>")
+local opencdata = P("<![CDATA[")
+local closecdata = P("]]>")
+local opendoctype = P("<!DOCTYPE") -- could grab the whole doctype
+local closedoctype = P("]>") + P(">")
+local openscript = openbegin * (P("script") + P("SCRIPT")) * (1-closeend)^0 * closeend -- begin
+local closescript = openend * (P("script") + P("SCRIPT")) * closeend
+
+local openlua = "<?lua"
+local closelua = "?>"
+
+-- <!DOCTYPE Something PUBLIC "... ..." "..." [ ... ] >
+-- <!DOCTYPE Something PUBLIC "... ..." "..." >
+-- <!DOCTYPE Something SYSTEM "... ..." [ ... ] >
+-- <!DOCTYPE Something SYSTEM "... ..." >
+-- <!DOCTYPE Something [ ... ] >
+-- <!DOCTYPE Something >
+
+local entity = ampersand * (1-semicolon)^1 * semicolon
+
+local utfchar = context.utfchar
+local wordtoken = context.patterns.wordtoken
+local iwordtoken = context.patterns.iwordtoken
+local wordpattern = context.patterns.wordpattern
+local iwordpattern = context.patterns.iwordpattern
+local invisibles = context.patterns.invisibles
+local checkedword = context.checkedword
+local styleofword = context.styleofword
+local setwordlist = context.setwordlist
+local validwords = false
+local validminimum = 3
+
+-- <?xml version="1.0" encoding="UTF-8" language="uk" ?>
+--
+-- <?context-directive editor language us ?>
+
+local t_preamble = Cmt(P("<?xml "), function(input,i,_) -- todo: utf bomb, no longer #
+ if i < 200 then
+ validwords, validminimum = false, 3
+ local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-directive%s+editor%s+language%s+(..)%s+%?>")
+ -- if not language then
+ -- language = match(input,"^<%?xml[^>]*language=[\"\'](..)[\"\'][^>]*%?>",i)
+ -- end
+ if language then
+ validwords, validminimum = setwordlist(language)
+ end
+ end
+ return false
+end)
+
+local t_word =
+-- Ct( iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() ) -- the function can be inlined
+ iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined
+
+local t_rest =
+ token("default", any)
+
+local t_text =
+ token("default", (1-S("<>&")-space)^1)
+
+local t_spacing =
+ token(whitespace, space^1)
+
+local t_optionalwhitespace =
+ token("default", space^1)^0
+
+local t_localspacing =
+ token("default", space^1)
+
+-- Because we want a differently colored open and close we need an embedded lexer (whitespace
+-- trigger). What is actually needed is that scintilla applies the current whitespace style.
+-- Even using different style keys is not robust as they can be shared. I'll fix the main
+-- lexer code.
+
+local t_sstring =
+ token("quote",dquote)
+ * token("string",(1-dquote)^0) -- different from context
+ * token("quote",dquote)
+
+local t_dstring =
+ token("quote",squote)
+ * token("string",(1-squote)^0) -- different from context
+ * token("quote",squote)
+
+-- local t_comment =
+-- token("command",opencomment)
+-- * token("comment",(1-closecomment)^0) -- different from context
+-- * token("command",closecomment)
+
+-- local t_cdata =
+-- token("command",opencdata)
+-- * token("comment",(1-closecdata)^0) -- different from context
+-- * token("command",closecdata)
+
+-- maybe cdata just text (then we don't need the extra lexer as we only have one comment then)
+
+-- <!DOCTYPE Something PUBLIC "... ..." "..." [ ... ] >
+-- <!DOCTYPE Something PUBLIC "... ..." "..." >
+-- <!DOCTYPE Something SYSTEM "... ..." [ ... ] >
+-- <!DOCTYPE Something SYSTEM "... ..." >
+-- <!DOCTYPE Something [ ... ] >
+-- <!DOCTYPE Something >
+
+-- <!ENTITY xxxx SYSTEM "yyyy" NDATA zzzz>
+-- <!ENTITY xxxx PUBLIC "yyyy" >
+-- <!ENTITY xxxx "yyyy" >
+
+local t_docstr = t_dstring + t_sstring
+
+local t_docent = token("command",P("<!ENTITY"))
+ * t_optionalwhitespace
+ * token("keyword",name)
+ * t_optionalwhitespace
+ * (
+ (
+ token("constant",P("SYSTEM"))
+ * t_optionalwhitespace
+ * t_docstr
+ * t_optionalwhitespace
+ * token("constant",P("NDATA"))
+ * t_optionalwhitespace
+ * token("keyword",name)
+ ) + (
+ token("constant",P("PUBLIC"))
+ * t_optionalwhitespace
+ * t_docstr
+ ) + (
+ t_docstr
+ )
+ )
+ * t_optionalwhitespace
+ * token("command",P(">"))
+
+local t_docele = token("command",P("<!ELEMENT"))
+ * t_optionalwhitespace
+ * token("keyword",name)
+ * t_optionalwhitespace
+ * token("command",P("("))
+ * (
+ t_localspacing
+ + token("constant",P("#CDATA") + P("#PCDATA") + P("ANY"))
+ + token("text",P(","))
+ + token("comment",(1-S(",)"))^1)
+ )^1
+ * token("command",P(")"))
+ * t_optionalwhitespace
+ * token("command",P(">"))
+
+local t_docset = token("command",P("["))
+ * t_optionalwhitespace
+ * ((t_optionalwhitespace * (t_docent + t_docele))^1 + token("comment",(1-P("]"))^0))
+ * t_optionalwhitespace
+ * token("command",P("]"))
+
+local t_doctype = token("command",P("<!DOCTYPE"))
+ * t_optionalwhitespace
+ * token("keyword",name)
+ * t_optionalwhitespace
+ * (
+ (
+ token("constant",P("PUBLIC"))
+ * t_optionalwhitespace
+ * t_docstr
+ * t_optionalwhitespace
+ * t_docstr
+ * t_optionalwhitespace
+ ) + (
+ token("constant",P("SYSTEM"))
+ * t_optionalwhitespace
+ * t_docstr
+ * t_optionalwhitespace
+ )
+ )^-1
+ * t_docset^-1
+ * t_optionalwhitespace
+ * token("command",P(">"))
+
+lexer.embed_lexer(xmllexer, lualexer, token("command", openlua), token("command", closelua))
+lexer.embed_lexer(xmllexer, xmlcommentlexer, token("command", opencomment), token("command", closecomment))
+lexer.embed_lexer(xmllexer, xmlcdatalexer, token("command", opencdata), token("command", closecdata))
+lexer.embed_lexer(xmllexer, xmlscriptlexer, token("command", openscript), token("command", closescript))
+
+-- local t_name =
+-- token("plain",name)
+-- * (
+-- token("default",colon)
+-- * token("keyword",name)
+-- )
+-- + token("keyword",name)
+
+local t_name = -- more robust
+ token("plain",name * colon)^-1
+ * token("keyword",name)
+
+-- local t_key =
+-- token("plain",name)
+-- * (
+-- token("default",colon)
+-- * token("constant",name)
+-- )
+-- + token("constant",name)
+
+local t_key =
+ token("plain",name * colon)^-1
+ * token("constant",name)
+
+local t_attributes = (
+ t_optionalwhitespace
+ * t_key
+ * t_optionalwhitespace
+ * token("plain",equal)
+ * t_optionalwhitespace
+ * (t_dstring + t_sstring)
+ * t_optionalwhitespace
+)^0
+
+local t_open =
+ token("keyword",openbegin)
+ * (
+ t_name
+ * t_optionalwhitespace
+ * t_attributes
+ * token("keyword",closebegin)
+ +
+ token("error",(1-closebegin)^1)
+ )
+
+local t_close =
+ token("keyword",openend)
+ * (
+ t_name
+ * t_optionalwhitespace
+ * token("keyword",closeend)
+ +
+ token("error",(1-closeend)^1)
+ )
+
+local t_entity =
+ token("constant",entity)
+
+local t_instruction =
+ token("command",openinstruction * P("xml"))
+ * t_optionalwhitespace
+ * t_attributes
+ * t_optionalwhitespace
+ * token("command",closeinstruction)
+ + token("command",openinstruction * name)
+ * token("default",(1-closeinstruction)^1)
+ * token("command",closeinstruction)
+
+local t_invisible =
+ token("invisible",invisibles^1)
+
+-- local t_preamble =
+-- token("preamble", t_preamble )
+
+xmllexer._rules = {
+ { "whitespace", t_spacing },
+ { "preamble", t_preamble },
+ { "word", t_word },
+ -- { "text", t_text },
+ -- { "comment", t_comment },
+ -- { "cdata", t_cdata },
+ { "doctype", t_doctype },
+ { "instruction", t_instruction },
+ { "close", t_close },
+ { "open", t_open },
+ { "entity", t_entity },
+ { "invisible", t_invisible },
+ { "rest", t_rest },
+}
+
+xmllexer._tokenstyles = context.styleset
+
+xmllexer._foldpattern = P("</") + P("<") + P("/>") -- separate entry else interference
++ P("<!--") + P("-->")
+
+xmllexer._foldsymbols = {
+ _patterns = {
+ "</",
+ "/>",
+ "<",
+ },
+ ["keyword"] = {
+ ["</"] = -1,
+ ["/>"] = -1,
+ ["<"] = 1,
+ },
+ ["command"] = {
+ ["</"] = -1,
+ ["/>"] = -1,
+ ["<!--"] = 1,
+ ["-->"] = -1,
+ ["<"] = 1,
+ },
+}
+
+return xmllexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer.lua b/context/data/textadept/context/lexers/scite-context-lexer.lua
new file mode 100644
index 000000000..e526d5045
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer.lua
@@ -0,0 +1,2299 @@
+local info = {
+ version = 1.400,
+ comment = "basics for scintilla lpeg lexer for context/metafun",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+ comment = "contains copyrighted code from mitchell.att.foicica.com",
+
+}
+
+-- todo: hook into context resolver etc
+-- todo: only old api in lexers, rest in context subnamespace
+-- todo: make sure we can run in one state .. copies or shared?
+-- todo: auto-nesting
+
+if lpeg.setmaxstack then lpeg.setmaxstack(1000) end
+
+local log = false
+local trace = false
+local detail = false
+local show = false -- nice for tracing (also for later)
+local collapse = false -- can save some 15% (maybe easier on scintilla)
+local inspect = false -- can save some 15% (maybe easier on scintilla)
+
+-- local log = true
+-- local trace = true
+
+-- GET GOING
+--
+-- You need to copy this file over lexer.lua. In principle other lexers could work too but
+-- not now. Maybe some day. All patterns will move into the patterns name space. I might do
+-- the same with styles. If you run an older version of SciTE you can take one of the
+-- archives. Pre 3.41 versions can just be copied to the right path, as there we still use
+-- part of the normal lexer.
+--
+-- REMARK
+--
+-- We started using lpeg lexing as soon as it came available. Because we had rather demanding
+-- files and also wanted to use nested lexers, we ended up with our own variant. At least at
+-- that time this was more robust and also faster (as we have some pretty large lua data files
+-- and also work with large xml files). As a consequence successive versions had to be adapted
+-- to changes in the (at that time still unstable) api. In addition to lexing we also have
+-- spell checking and such. Around version 3.60 things became more stable so I don't expect to
+-- change much.
+--
+-- STATUS
+--
+-- todo: maybe use a special stripped version of the dll (stable api) and add a bit more
+-- interfacing to scintilla
+-- todo: investigate if we can use the already built in lua instance so that we can combine the
+-- power of lexign with extensions
+-- todo: play with hotspot and other properties (but no real need now)
+-- todo: maybe come up with an extension to the api subsystem
+-- todo: add proper tracing and so .. not too hard as we can run on mtxrun, but we lack a console
+-- for debugging (ok, chicken-egg as lexers probably need to be loaded before a console can
+-- kick in)
+-- todo: get rid of these lexers.STYLE_XX and lexers.XX (hide such details)
+--
+-- wish: access to all scite properties and in fact integrate in scite
+--
+--
+-- In the meantime I made the lexer suitable for typesetting sources which was no big deal as we
+-- already had that in place (ConTeXt used lpeg from the day it showed up so we have several lexing
+-- options there too).
+--
+-- HISTORY
+--
+-- The fold and lex functions are copied and patched from original code by Mitchell (see lexer.lua).
+-- All errors are mine. The ability to use lpeg in scintilla is a real nice addition and a brilliant
+-- move. The code is a byproduct of the (mainly Lua based) textadept (at the time I ran into it was
+-- a rapidly moving target so I decided to stick ot SciTE). When I played with it, it had no realtime
+-- output pane but that seems to be dealt with now (2017). I need to have a look at it in more detail
+-- but a first test again mad the output hang and it was a bit slow too (and I also want the log pane
+-- as scite has it, on the right, in view). So, for now I stick to SciTE even when it's somewhat
+-- crippled by the fact that we cannot hook our own (language dependent) lexer into the output pane
+-- (somehow the errorlist lexer is hard coded into the editor). Hopefully that will change some day.
+-- So, how did we arrive where we're now.
+--
+-- Starting with SciTE version 3.20 there is an issue with coloring. As we still lack a connection
+-- with SciTE itself (properties as well as printing to the log pane) and we cannot trace this (on
+-- windows). As far as I can see, there are no fundamental changes in lexer.lua or LexLPeg.cxx so it
+-- must be in Scintilla itself. So for the moment I stick to 3.10. Indicators are: no lexing of 'next'
+-- and 'goto <label>' in the Lua lexer and no brace highlighting either. Interesting is that it does
+-- work ok in the cld lexer (so the Lua code is okay). All seems to be ok again in later versions,
+-- so, when you update best check first and just switch back to an older version as normally a SciTE
+-- update is not critital. When char-def.lua lexes real fast this is a signal that the lexer quits
+-- somewhere halfway. Maybe there are some hard coded limitations on the amount of styles and/or
+-- length of names.
+--
+-- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay again. So, this
+-- version assumes 3.24 or higher. In 3.24 we have a different token result, i.e. no longer a { tag,
+-- pattern } but just two return values. I didn't check other changes but will do that when I run into
+-- issues. I had optimized these small tables by hashing which was more efficient but this is no longer
+-- needed. For the moment we keep some of that code around as I don't know what happens in future
+-- versions. I'm anyway still happy with this kind of lexing.
+--
+-- In 3.31 another major change took place: some helper constants (maybe they're no longer constants)
+-- and functions were moved into the lexer modules namespace but the functions are assigned to the Lua
+-- module afterward so we cannot alias them beforehand. We're probably getting close to a stable
+-- interface now. I've considered making a whole copy and patch the other functions too as we need an
+-- extra nesting model. However, I don't want to maintain too much. An unfortunate change in 3.03 is
+-- that no longer a script can be specified. This means that instead of loading the extensions via the
+-- properties file, we now need to load them in our own lexers, unless of course we replace lexer.lua
+-- completely (which adds another installation issue).
+--
+-- Another change has been that _LEXERHOME is no longer available. It looks like more and more
+-- functionality gets dropped so maybe at some point we need to ship our own dll/so files. For instance,
+-- I'd like to have access to the current filename and other scite properties. We could then cache some
+-- info with each file, if only we had knowledge of what file we're dealing with.
+--
+-- For huge files folding can be pretty slow and I do have some large ones that I keep open all the time.
+-- Loading is normally no ussue, unless one has remembered the status and the cursor is at the last line
+-- of a 200K line file. Optimizing the fold function brought down loading of char-def.lua from 14 sec
+-- => 8 sec. Replacing the word_match function and optimizing the lex function gained another 2+ seconds.
+-- A 6 second load is quite ok for me. The changed lexer table structure (no subtables) brings loading
+-- down to a few seconds.
+--
+-- When the lexer path is copied to the textadept lexer path, and the theme definition to theme path
+-- (as lexer.lua), the lexer works there as well. Although ... when I decided to check the state of
+-- textadept i had to adapt some loader code. It's not pretty but works and also permits overloading.
+-- When I have time and motive I will make a proper setup file to tune the look and feel a bit and
+-- associate suffixes with the context lexer. The textadept editor has a nice style tracing option but
+-- lacks the tabs for selecting files that scite has. It also has no integrated run that pipes to the
+-- log pane. Interesting is that the jit version of textadept crashes on lexing large files (and does
+-- not feel faster either; maybe a side effect of known limitations as we know that luajit is more
+-- limited than stock lua). Btw, in the meantime on unix one can test easier as there we can enable
+-- the loggers in this module.
+--
+-- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name .. '_whitespace' which means
+-- that we need to have it frozen at the moment we load another lexer. Because spacing is used to revert
+-- to a parent lexer we need to make sure that we load children as late as possible in order not to get
+-- the wrong whitespace trigger. This took me quite a while to figure out (not being that familiar with
+-- the internals). The lex and fold functions have been optimized. It is a pitty that there is no proper
+-- print available. Another thing needed is a default style in our own theme style definition, as otherwise
+-- we get wrong nested lexers, especially if they are larger than a view. This is the hardest part of
+-- getting things right.
+--
+-- It's a pitty that there is no scintillua library for the OSX version of scite. Even better would be
+-- to have the scintillua library as integral part of scite as that way I could use OSX alongside
+-- windows and linux (depending on needs). Also nice would be to have a proper interface to scite then
+-- because currently the lexer is rather isolated and the lua version does not provide all standard
+-- libraries. It would also be good to have lpeg support in the regular scite lua extension (currently
+-- you need to pick it up from someplace else).
+--
+-- With 3.41 the interface changed again so it gets time to look into the C++ code and consider compiling
+-- and patching myself. Loading is more complicated now as the lexer gets loaded automatically so we have
+-- little control over extending the code now. After a few days trying all kind of solutions I decided to
+-- follow a different approach: drop in a complete replacement. This of course means that I need to keep
+-- track of even more changes (which for sure will happen) but at least I get rid of interferences. The
+-- api (lexing and configuration) is simply too unstable across versions. Maybe in a few years things have
+-- stabelized again. (Or maybe it's not really expected that one writes lexers at all.) A side effect is
+-- that I now no longer will use shipped lexers but just the built-in ones in addition to the context
+-- lpeg lexers. Not that it matters much as the context lexers cover what I need (and I can always write
+-- more).
+--
+-- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu which left me with an
+-- incompatible SciTE and lexer library and updating was not possible due to the lack of 64 bit libraries.
+-- We'll see what the future brings.
+--
+-- Promissing is that the library now can use another Lua instance so maybe some day it will get properly
+-- in SciTE and we can use more clever scripting.
+--
+-- In some lexers we use embedded ones even if we could do it directly, The reason is that when the end
+-- token is edited (e.g. -->), backtracking to the space before the begin token (e.g. <!--) results in
+-- applying the surrounding whitespace which in turn means that when the end token is edited right,
+-- backtracking doesn't go back. One solution (in the dll) would be to backtrack several space categories.
+-- After all, lexing is quite fast (applying the result is much slower).
+--
+-- For some reason the first blob of text tends to go wrong (pdf and web). It would be nice to have 'whole
+-- doc' initial lexing. Quite fishy as it makes it impossible to lex the first part well (for already opened
+-- documents) because only a partial text is passed.
+--
+-- So, maybe I should just write this from scratch (assuming more generic usage) because after all, the dll
+-- expects just tables, based on a string. I can then also do some more aggressive resource sharing (needed
+-- when used generic).
+--
+-- I think that nested lexers are still bugged (esp over longer ranges). It never was robust or maybe it's
+-- simply not meant for too complex cases (well, it probably *is* tricky material). The 3.24 version was
+-- probably the best so far. The fact that styles bleed between lexers even if their states are isolated is
+-- an issue. Another issus is that zero characters in the text passed to the lexer can mess things up (pdf
+-- files have them in streams).
+--
+-- For more complex 'languages', like web or xml, we need to make sure that we use e.g. 'default' for
+-- spacing that makes up some construct. Ok, we then still have a backtracking issue but less.
+--
+-- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++ and we ship an ini
+-- file for that editor with some installation instructions embedded.
+--
+-- TODO
+--
+-- I can make an export to context, but first I'll redo the code that makes the grammar,
+-- as we only seem to need
+--
+-- lexer._TOKENSTYLES : table
+-- lexer._CHILDREN : flag
+-- lexer._EXTRASTYLES : table
+-- lexer._GRAMMAR : flag
+--
+-- lexers.load : function
+-- lexers.lex : function
+--
+-- So, if we drop compatibility with other lex definitions, we can make things simpler. Howeverm in the
+-- meantime one can just do this:
+--
+-- context --extra=listing --scite [--compact --verycompact] somefile.tex
+--
+-- and get a printable document. So, this todo is obsolete.
+
+-- TRACING
+--
+-- The advantage is that we now can check more easily with regular Lua(TeX). We can also use wine and print
+-- to the console (somehow stdout is intercepted there.) So, I've added a bit of tracing. Interesting is to
+-- notice that each document gets its own instance which has advantages but also means that when we are
+-- spellchecking we reload the word lists each time. (In the past I assumed a shared instance and took
+-- some precautions.)
+
+-- todo: make sure we don't overload context definitions when used in context
+
+-- properties is an ugly mess ... due to chages in the interface we're now left with some hybrid
+-- that sort of works ok
+
+local lpeg = require("lpeg")
+
+local global = _G
+local find, gmatch, match, lower, upper, gsub, sub, format = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format
+local concat, sort = table.concat, table.sort
+local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring
+local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
+local lpegmatch = lpeg.match
+
+local nesting = 0
+
+local print = (textadept and ui and ui.print) or print
+
+local function report(fmt,str,...)
+ if log then
+ if str then
+ fmt = format(fmt,str,...)
+ end
+ print(format("scite lpeg lexer > %s > %s",nesting == 0 and "-" or nesting,fmt))
+ end
+end
+
+local function inform(...)
+ if log and trace then
+ report(...)
+ end
+end
+
+inform("loading context lexer module (global table: %s)",tostring(global))
+
+if not package.searchpath then
+
+ -- Unfortunately the io library is only available when we end up
+ -- in this branch of code.
+
+ inform("using adapted function 'package.searchpath' (if used at all)")
+
+ function package.searchpath(name,path)
+ local tried = { }
+ for part in gmatch(path,"[^;]+") do
+ local filename = gsub(part,"%?",name)
+ local f = io.open(filename,"r")
+ if f then
+ inform("file found on path: %s",filename)
+ f:close()
+ return filename
+ end
+ tried[#tried + 1] = format("no file '%s'",filename)
+ end
+ -- added: local path .. for testing
+ local f = io.open(filename,"r")
+ if f then
+ inform("file found on current path: %s",filename)
+ f:close()
+ return filename
+ end
+ --
+ tried[#tried + 1] = format("no file '%s'",filename)
+ return nil, concat(tried,"\n")
+ end
+
+end
+
+local lexers = { }
+local context = { }
+local helpers = { }
+lexers.context = context
+lexers.helpers = helpers
+
+local patterns = { }
+context.patterns = patterns -- todo: lexers.patterns
+
+context.report = report
+context.inform = inform
+
+lexers.LEXERPATH = package.path -- can be multiple paths separated by ;
+
+if resolvers then
+ -- todo: set LEXERPATH
+ -- todo: set report
+end
+
+local function sortedkeys(hash) -- simple version, good enough for here
+ local t, n = { }, 0
+ for k, v in next, hash do
+ t[#t+1] = k
+ local l = #tostring(k)
+ if l > n then
+ n = l
+ end
+ end
+ sort(t)
+ return t, n
+end
+
+helpers.sortedkeys = sortedkeys
+
+local usedlexers = { }
+local parent_lexer = nil
+
+-- The problem with styles is that there is some nasty interaction with scintilla
+-- and each version of lexer dll/so has a different issue. So, from now on we will
+-- just add them here. There is also a limit on some 30 styles. Maybe I should
+-- hash them in order to reuse.
+
+-- todo: work with proper hashes and analyze what styles are really used by a
+-- lexer
+
+local default = {
+ "nothing", "whitespace", "comment", "string", "number", "keyword",
+ "identifier", "operator", "error", "preprocessor", "constant", "variable",
+ "function", "type", "label", "embedded",
+ "quote", "special", "extra", "reserved", "okay", "warning",
+ "command", "internal", "preamble", "grouping", "primitive", "plain",
+ "user",
+ -- not used (yet) .. we cross the 32 boundary so had to patch the initializer, see (1)
+ "char", "class", "data", "definition", "invisible", "regex",
+ "standout", "tag",
+ "text",
+}
+
+local predefined = {
+ "default", "linenumber", "bracelight", "bracebad", "controlchar",
+ "indentguide", "calltip"
+}
+
+-- Bah ... ugly ... nicer would be a proper hash .. we now have properties
+-- as well as STYLE_* and some connection between them ... why .. ok, we
+-- could delay things but who cares. Anyway, at this moment the properties
+-- are still unknown.
+
+local function preparestyles(list)
+ local reverse = { }
+ for i=1,#list do
+ local k = list[i]
+ local K = upper(k)
+ local s = "style." .. k
+ lexers[K] = k -- is this used
+ lexers["STYLE_"..K] = "$(" .. k .. ")"
+ reverse[k] = true
+ end
+ return reverse
+end
+
+local defaultstyles = preparestyles(default)
+local predefinedstyles = preparestyles(predefined)
+
+-- These helpers are set afterwards so we delay their initialization ... there
+-- is no need to alias each time again and this way we can more easily adapt
+-- to updates.
+
+-- These keep changing (values, functions, tables ...) so we nee to check these
+-- with each update. Some of them are set in the loader (the require 'lexer' is
+-- in fact not a real one as the lexer code is loaded in the dll). It's also not
+-- getting more efficient.
+
+-- FOLD_BASE = lexers.FOLD_BASE or SC_FOLDLEVELBASE
+-- FOLD_HEADER = lexers.FOLD_HEADER or SC_FOLDLEVELHEADERFLAG
+-- FOLD_BLANK = lexers.FOLD_BLANK or SC_FOLDLEVELWHITEFLAG
+-- get_style_at = lexers.get_style_at or GetStyleAt
+-- get_indent_amount = lexers.get_indent_amount or GetIndentAmount
+-- get_property = lexers.get_property or GetProperty
+-- get_fold_level = lexers.get_fold_level or GetFoldLevel
+
+-- It needs checking: do we have access to all properties now? I'll clean
+-- this up anyway as I want a simple clean and stable model.
+
+-- This is somewhat messy. The lexer dll provides some virtual fields:
+--
+-- + property
+-- + property_int
+-- + style_at
+-- + fold_level
+-- + indent_amount
+--
+-- but for some reasons not:
+--
+-- + property_expanded
+--
+-- As a consequence we need to define it here because otherwise the
+-- lexer will crash. The fuzzy thing is that we don't have to define
+-- the property and property_int tables but we do have to define the
+-- expanded beforehand. The folding properties are no longer interfaced
+-- so the interface to scite is now rather weak (only a few hard coded
+-- properties).
+
+local FOLD_BASE = 0
+local FOLD_HEADER = 0
+local FOLD_BLANK = 0
+
+local style_at = { }
+local indent_amount = { }
+local fold_level = { }
+
+local function check_main_properties()
+ if not lexers.property then
+ lexers.property = { }
+ end
+ if not lexers.property_int then
+ lexers.property_int = setmetatable({ }, {
+ __index = function(t,k)
+ -- why the tostring .. it relies on lua casting to a number when
+ -- doing a comparison
+ return tonumber(lexers.property[k]) or 0 -- tostring removed
+ end,
+ -- __newindex = function(t,k,v)
+ -- report("properties are read-only, '%s' is not changed",k)
+ -- end,
+ })
+ end
+end
+
+lexers.property_expanded = setmetatable({ }, {
+ __index = function(t,k)
+ -- better be safe for future changes .. what if at some point this is
+ -- made consistent in the dll ... we need to keep an eye on that
+ local property = lexers.property
+ if not property then
+ check_main_properties()
+ end
+ --
+ return gsub(property[k],"[$%%]%b()", function(k)
+ return t[sub(k,3,-2)]
+ end)
+ end,
+ __newindex = function(t,k,v)
+ report("properties are read-only, '%s' is not changed",k)
+ end,
+})
+
+-- A downward compatible feature but obsolete:
+
+-- local function get_property(tag,default)
+-- return lexers.property_int[tag] or lexers.property[tag] or default
+-- end
+
+-- We still want our own properties (as it keeps changing so better play
+-- safe from now on). At some point I can freeze them.
+
+local function check_properties(lexer)
+ if lexer.properties then
+ return lexer
+ end
+ check_main_properties()
+ -- we use a proxy
+ local mainproperties = lexers.property
+ local properties = { }
+ local expanded = setmetatable({ }, {
+ __index = function(t,k)
+ return gsub(properties[k] or mainproperties[k],"[$%%]%b()", function(k)
+ return t[sub(k,3,-2)]
+ end)
+ end,
+ })
+ lexer.properties = setmetatable(properties, {
+ __index = mainproperties,
+ __call = function(t,k,default) -- expands
+ local v = expanded[k]
+ local t = type(default)
+ if t == "number" then
+ return tonumber(v) or default
+ elseif t == "boolean" then
+ return v == nil and default or v
+ else
+ return v or default
+ end
+ end,
+ })
+ return lexer
+end
+
+-- do
+-- lexers.property = { foo = 123, red = "R" }
+-- local a = check_properties({}) print("a.foo",a.properties.foo)
+-- a.properties.foo = "bar" print("a.foo",a.properties.foo)
+-- a.properties.foo = "bar:$(red)" print("a.foo",a.properties.foo) print("a.foo",a.properties("foo"))
+-- end
+
+local function set(value,default)
+ if value == 0 or value == false or value == "0" then
+ return false
+ elseif value == 1 or value == true or value == "1" then
+ return true
+ else
+ return default
+ end
+end
+
+local function check_context_properties()
+ local property = lexers.property -- let's hope that this stays
+ log = set(property["lexer.context.log"], log)
+ trace = set(property["lexer.context.trace"], trace)
+ detail = set(property["lexer.context.detail"], detail)
+ show = set(property["lexer.context.show"], show)
+ collapse = set(property["lexer.context.collapse"],collapse)
+ inspect = set(property["lexer.context.inspect"], inspect)
+end
+
+function context.registerproperties(p) -- global
+ check_main_properties()
+ local property = lexers.property -- let's hope that this stays
+ for k, v in next, p do
+ property[k] = v
+ end
+ check_context_properties()
+end
+
+context.properties = setmetatable({ }, {
+ __index = lexers.property,
+ __newindex = function(t,k,v)
+ check_main_properties()
+ lexers.property[k] = v
+ check_context_properties()
+ end,
+})
+
+-- We want locals to we set them delayed. Once.
+
+local function initialize()
+ FOLD_BASE = lexers.FOLD_BASE
+ FOLD_HEADER = lexers.FOLD_HEADER
+ FOLD_BLANK = lexers.FOLD_BLANK
+ --
+ style_at = lexers.style_at -- table
+ indent_amount = lexers.indent_amount -- table
+ fold_level = lexers.fold_level -- table
+ --
+ check_main_properties()
+ --
+ initialize = nil
+end
+
+-- Style handler.
+--
+-- The property table will be set later (after loading) by the library. The
+-- styleset is not needed any more as we predefine all styles as defaults
+-- anyway (too bug sensitive otherwise).
+
+local function tocolors(colors)
+ local colorset = { }
+ local property_int = lexers.property_int or { }
+ for k, v in next, colors do
+ if type(v) == "table" then
+ local r, g, b = v[1], v[2], v[3]
+ if r and g and b then
+ v = tonumber(format("%02X%02X%02X",b,g,r),16) or 0 -- hm
+ elseif r then
+ v = tonumber(format("%02X%02X%02X",r,r,r),16) or 0
+ else
+ v = 0
+ end
+ end
+ colorset[k] = v
+ property_int["color."..k] = v
+ end
+ return colorset
+end
+
+local function toproperty(specification)
+ local serialized = { }
+ for key, value in next, specification do
+ if value == true then
+ serialized[#serialized+1] = key
+ elseif type(value) == "table" then
+ local r, g, b = value[1], value[2], value[3]
+ if r and g and b then
+ value = format("#%02X%02X%02X",r,g,b) or "#000000"
+ elseif r then
+ value = format("#%02X%02X%02X",r,r,r) or "#000000"
+ else
+ value = "#000000"
+ end
+ serialized[#serialized+1] = key .. ":" .. value
+ else
+ serialized[#serialized+1] = key .. ":" .. tostring(value)
+ end
+ end
+ return concat(serialized,",")
+end
+
+local function tostyles(styles)
+ local styleset = { }
+ local property = lexers.property or { }
+ for k, v in next, styles do
+ v = toproperty(v)
+ styleset[k] = v
+ property["style."..k] = v
+ end
+ return styleset
+end
+
+context.toproperty = toproperty
+context.tostyles = tostyles
+context.tocolors = tocolors
+
+-- If we had one instance/state of Lua as well as all regular libraries
+-- preloaded we could use the context base libraries. So, let's go poor-
+-- mans solution now.
+
+function context.registerstyles(styles)
+ local styleset = tostyles(styles)
+ context.styles = styles
+ context.styleset = styleset
+ if detail then
+ local t, n = sortedkeys(styleset)
+ local template = " %-" .. n .. "s : %s"
+ report("initializing styleset:")
+ for i=1,#t do
+ local k = t[i]
+ report(template,k,styleset[k])
+ end
+ elseif trace then
+ report("initializing styleset")
+ end
+end
+
+function context.registercolors(colors) -- needed for textadept
+ local colorset = tocolors(colors)
+ context.colors = colors
+ context.colorset = colorset
+ if detail then
+ local t, n = sortedkeys(colorset)
+ local template = " %-" .. n .. "s : %i"
+ report("initializing colorset:")
+ for i=1,#t do
+ local k = t[i]
+ report(template,k,colorset[k])
+ end
+ elseif trace then
+ report("initializing colorset")
+ end
+end
+
+-- Some spell checking related stuff. Unfortunately we cannot use a path set
+-- by property. This will get a hook for resolvers.
+
+local locations = {
+ "context/lexers", -- context lexers
+ "context/lexers/data", -- context lexers
+ "../lexers", -- original lexers
+ "../lexers/data", -- original lexers
+ ".", -- whatever
+ "./data", -- whatever
+}
+
+-- local function collect(name)
+-- local root = gsub(lexers.LEXERPATH or ".","/.-lua$","") .. "/" -- this is a horrible hack
+-- -- report("module '%s' locating '%s'",tostring(lexers),name)
+-- for i=1,#locations do
+-- local fullname = root .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
+-- if trace then
+-- report("attempt to locate '%s'",fullname)
+-- end
+-- local okay, result = pcall(function () return dofile(fullname) end)
+-- if okay then
+-- return result, fullname
+-- end
+-- end
+-- end
+
+local function collect(name)
+ local rootlist = lexers.LEXERPATH or "."
+ for root in gmatch(rootlist,"[^;]+") do
+ local root = gsub(root,"/[^/]-lua$","")
+ for i=1,#locations do
+ local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
+ if trace then
+ report("attempt to locate '%s'",fullname)
+ end
+ local okay, result = pcall(function () return dofile(fullname) end)
+ if okay then
+ return result, fullname
+ end
+ end
+ end
+end
+
+function context.loadluafile(name)
+ local data, fullname = collect(name)
+ if data then
+ if trace then
+ report("lua file '%s' has been loaded",fullname)
+ end
+ return data, fullname
+ end
+ if not textadept then
+ report("unable to load lua file '%s'",name)
+ end
+end
+
+-- in fact we could share more as we probably process the data but then we need
+-- to have a more advanced helper
+
+local cache = { }
+
+function context.loaddefinitions(name)
+ local data = cache[name]
+ if data then
+ if trace then
+ report("reusing definitions '%s'",name)
+ end
+ return data
+ elseif trace and data == false then
+ report("definitions '%s' were not found",name)
+ end
+ local data, fullname = collect(name)
+ if not data then
+ if not textadept then
+ report("unable to load definition file '%s'",name)
+ end
+ data = false
+ elseif trace then
+ report("definition file '%s' has been loaded",fullname)
+ if detail then
+ local t, n = sortedkeys(data)
+ local template = " %-" .. n .. "s : %s"
+ for i=1,#t do
+ local k = t[i]
+ local v = data[k]
+ if type(v) ~= "table" then
+ report(template,k,tostring(v))
+ elseif #v > 0 then
+ report(template,k,#v)
+ else
+ -- no need to show hash
+ end
+ end
+ end
+ end
+ cache[name] = data
+ return type(data) == "table" and data
+end
+
+function context.word_match(words,word_chars,case_insensitive)
+ local chars = "%w_" -- maybe just "" when word_chars
+ if word_chars then
+ chars = "^([" .. chars .. gsub(word_chars,"([%^%]%-])", "%%%1") .."]+)"
+ else
+ chars = "^([" .. chars .."]+)"
+ end
+ if case_insensitive then
+ local word_list = { }
+ for i=1,#words do
+ word_list[lower(words[i])] = true
+ end
+ return P(function(input, index)
+ local s, e, word = find(input,chars,index)
+ return word and word_list[lower(word)] and e + 1 or nil
+ end)
+ else
+ local word_list = { }
+ for i=1,#words do
+ word_list[words[i]] = true
+ end
+ return P(function(input, index)
+ local s, e, word = find(input,chars,index)
+ return word and word_list[word] and e + 1 or nil
+ end)
+ end
+end
+
+-- Patterns are grouped in a separate namespace but the regular lexers expect
+-- shortcuts to be present in the lexers library. Maybe I'll incorporate some
+-- of l-lpeg later.
+
+do
+
+ local anything = P(1)
+ local idtoken = R("az","AZ","\127\255","__")
+ local digit = R("09")
+ local sign = S("+-")
+ local period = P(".")
+ local octdigit = R("07")
+ local hexdigit = R("09","AF","af")
+ local lower = R("az")
+ local upper = R("AZ")
+ local alpha = upper + lower
+ local space = S(" \n\r\t\f\v")
+ local eol = S("\r\n")
+ local backslash = P("\\")
+ local decimal = digit^1
+ local octal = P("0")
+ * octdigit^1
+ local hexadecimal = P("0") * S("xX")
+ * (hexdigit^0 * period * hexdigit^1 + hexdigit^1 * period * hexdigit^0 + hexdigit^1)
+ * (S("pP") * sign^-1 * hexdigit^1)^-1 -- *
+
+ patterns.idtoken = idtoken
+ patterns.digit = digit
+ patterns.sign = sign
+ patterns.period = period
+ patterns.octdigit = octdigit
+ patterns.hexdigit = hexdigit
+ patterns.ascii = R("\000\127") -- useless
+ patterns.extend = R("\000\255") -- useless
+ patterns.control = R("\000\031")
+ patterns.lower = lower
+ patterns.upper = upper
+ patterns.alpha = alpha
+ patterns.decimal = decimal
+ patterns.octal = octal
+ patterns.hexadecimal = hexadecimal
+ patterns.float = sign^-1
+ * (digit^0 * period * digit^1 + digit^1 * period * digit^0 + digit^1)
+ * S("eE") * sign^-1 * digit^1 -- *
+ patterns.cardinal = decimal
+
+ patterns.signeddecimal = sign^-1 * decimal
+ patterns.signedoctal = sign^-1 * octal
+ patterns.signedhexadecimal = sign^-1 * hexadecimal
+ patterns.integer = sign^-1 * (hexadecimal + octal + decimal)
+ patterns.real =
+ sign^-1 * ( -- at most one
+ digit^1 * period * digit^0 -- 10.0 10.
+ + digit^0 * period * digit^1 -- 0.10 .10
+ + digit^1 -- 10
+ )
+
+ patterns.anything = anything
+ patterns.any = anything
+ patterns.restofline = (1-eol)^1
+ patterns.space = space
+ patterns.spacing = space^1
+ patterns.nospacing = (1-space)^1
+ patterns.eol = eol
+ patterns.newline = P("\r\n") + eol
+
+ local endof = S("\n\r\f")
+
+ patterns.startofline = P(function(input,index)
+ return (index == 1 or lpegmatch(endof,input,index-1)) and index
+ end)
+
+ -- These are the expected ones for other lexers. Maybe all in own namespace
+ -- and provide compatibility layer. or should I just remove them?
+
+ lexers.any = anything
+ lexers.ascii = ascii
+ lexers.extend = extend
+ lexers.alpha = alpha
+ lexers.digit = digit
+ lexers.alnum = alnum
+ lexers.lower = lower
+ lexers.upper = upper
+ lexers.xdigit = hexdigit
+ lexers.cntrl = control
+ lexers.graph = R("!~")
+ lexers.print = R(" ~")
+ lexers.punct = R("!/", ":@", "[\'", "{~")
+ lexers.space = space
+ lexers.newline = S("\r\n\f")^1
+ lexers.nonnewline = 1 - lexers.newline
+ lexers.nonnewline_esc = 1 - (lexers.newline + '\\') + backslash * anything
+ lexers.dec_num = decimal
+ lexers.oct_num = octal
+ lexers.hex_num = hexadecimal
+ lexers.integer = integer
+ lexers.float = float
+ lexers.word = (alpha + "_") * (alpha + digit + "_")^0 -- weird, why digits
+
+end
+
+-- end of patterns
+
+function context.exact_match(words,word_chars,case_insensitive)
+ local characters = concat(words)
+ local pattern -- the concat catches _ etc
+ if word_chars == true or word_chars == false or word_chars == nil then
+ word_chars = ""
+ end
+ if type(word_chars) == "string" then
+ pattern = S(characters) + patterns.idtoken
+ if case_insensitive then
+ pattern = pattern + S(upper(characters)) + S(lower(characters))
+ end
+ if word_chars ~= "" then
+ pattern = pattern + S(word_chars)
+ end
+ elseif word_chars then
+ pattern = word_chars
+ end
+ if case_insensitive then
+ local list = { }
+ if #words == 0 then
+ for k, v in next, words do
+ list[lower(k)] = v
+ end
+ else
+ for i=1,#words do
+ list[lower(words[i])] = true
+ end
+ end
+ return Cmt(pattern^1, function(_,i,s)
+ return list[lower(s)] -- and i or nil
+ end)
+ else
+ local list = { }
+ if #words == 0 then
+ for k, v in next, words do
+ list[k] = v
+ end
+ else
+ for i=1,#words do
+ list[words[i]] = true
+ end
+ end
+ return Cmt(pattern^1, function(_,i,s)
+ return list[s] -- and i or nil
+ end)
+ end
+end
+
+function context.just_match(words)
+ local p = P(words[1])
+ for i=2,#words do
+ p = p + P(words[i])
+ end
+ return p
+end
+
+-- spell checking (we can only load lua files)
+--
+-- return {
+-- min = 3,
+-- max = 40,
+-- n = 12345,
+-- words = {
+-- ["someword"] = "someword",
+-- ["anotherword"] = "Anotherword",
+-- },
+-- }
+
+local lists = { }
+
+function context.setwordlist(tag,limit) -- returns hash (lowercase keys and original values)
+ if not tag or tag == "" then
+ return false, 3
+ end
+ local list = lists[tag]
+ if not list then
+ list = context.loaddefinitions("spell-" .. tag)
+ if not list or type(list) ~= "table" then
+ if not textadept then
+ report("invalid spell checking list for '%s'",tag)
+ end
+ list = { words = false, min = 3 }
+ else
+ list.words = list.words or false
+ list.min = list.min or 3
+ end
+ lists[tag] = list
+ end
+ if trace then
+ report("enabling spell checking for '%s' with minimum '%s'",tag,list.min)
+ end
+ return list.words, list.min
+end
+
+patterns.wordtoken = R("az","AZ","\127\255")
+patterns.wordpattern = patterns.wordtoken^3 -- todo: if limit and #s < limit then
+
+function context.checkedword(validwords,validminimum,s,i) -- ,limit
+ if not validwords then -- or #s < validminimum then
+ return true, "text", i -- true, "default", i
+ else
+ -- keys are lower
+ local word = validwords[s]
+ if word == s then
+ return true, "okay", i -- exact match
+ elseif word then
+ return true, "warning", i -- case issue
+ else
+ local word = validwords[lower(s)]
+ if word == s then
+ return true, "okay", i -- exact match
+ elseif word then
+ return true, "warning", i -- case issue
+ elseif upper(s) == s then
+ return true, "warning", i -- probably a logo or acronym
+ else
+ return true, "error", i
+ end
+ end
+ end
+end
+
+function context.styleofword(validwords,validminimum,s) -- ,limit
+ if not validwords or #s < validminimum then
+ return "text"
+ else
+ -- keys are lower
+ local word = validwords[s]
+ if word == s then
+ return "okay" -- exact match
+ elseif word then
+ return "warning" -- case issue
+ else
+ local word = validwords[lower(s)]
+ if word == s then
+ return "okay" -- exact match
+ elseif word then
+ return "warning" -- case issue
+ elseif upper(s) == s then
+ return "warning" -- probably a logo or acronym
+ else
+ return "error"
+ end
+ end
+ end
+end
+
+-- overloaded functions
+
+local h_table, b_table, n_table = { }, { }, { } -- from the time small tables were used (optimization)
+
+setmetatable(h_table, { __index = function(t,level) local v = { level, FOLD_HEADER } t[level] = v return v end })
+setmetatable(b_table, { __index = function(t,level) local v = { level, FOLD_BLANK } t[level] = v return v end })
+setmetatable(n_table, { __index = function(t,level) local v = { level } t[level] = v return v end })
+
+local newline = patterns.newline
+local p_yes = Cp() * Cs((1-newline)^1) * newline^-1
+local p_nop = newline
+
+local folders = { }
+
+local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
+ local folder = folders[lexer]
+ if not folder then
+ --
+ local pattern, folds, text, start_pos, line_num, prev_level, current_level
+ --
+ local fold_symbols = lexer._foldsymbols
+ local fold_pattern = lexer._foldpattern -- use lpeg instead (context extension)
+ --
+ if fold_pattern then
+ -- if no functions are found then we could have a faster one
+ fold_pattern = Cp() * C(fold_pattern) / function(s,match)
+ local symbols = fold_symbols[style_at[start_pos + s]]
+ if symbols then
+ local l = symbols[match]
+ if l then
+ current_level = current_level + l
+ end
+ end
+ end
+ local action_y = function()
+ folds[line_num] = prev_level
+ if current_level > prev_level then
+ folds[line_num] = prev_level + FOLD_HEADER
+ end
+ if current_level < FOLD_BASE then
+ current_level = FOLD_BASE
+ end
+ prev_level = current_level
+ line_num = line_num + 1
+ end
+ local action_n = function()
+ folds[line_num] = prev_level + FOLD_BLANK
+ line_num = line_num + 1
+ end
+ pattern = ((fold_pattern + (1-newline))^1 * newline / action_y + newline/action_n)^0
+
+ else
+ -- the traditional one but a bit optimized
+ local fold_symbols_patterns = fold_symbols._patterns
+ local action_y = function(pos,line)
+ for j = 1, #fold_symbols_patterns do
+ for s, match in gmatch(line,fold_symbols_patterns[j]) do -- "()(" .. patterns[i] .. ")"
+ local symbols = fold_symbols[style_at[start_pos + pos + s - 1]]
+ local l = symbols and symbols[match]
+ local t = type(l)
+ if t == "number" then
+ current_level = current_level + l
+ elseif t == "function" then
+ current_level = current_level + l(text, pos, line, s, match)
+ end
+ end
+ end
+ folds[line_num] = prev_level
+ if current_level > prev_level then
+ folds[line_num] = prev_level + FOLD_HEADER
+ end
+ if current_level < FOLD_BASE then
+ current_level = FOLD_BASE
+ end
+ prev_level = current_level
+ line_num = line_num + 1
+ end
+ local action_n = function()
+ folds[line_num] = prev_level + FOLD_BLANK
+ line_num = line_num + 1
+ end
+ pattern = (p_yes/action_y + p_nop/action_n)^0
+ end
+ --
+ local reset_parser = lexer._reset_parser
+ --
+ folder = function(_text_,_start_pos_,_start_line_,_start_level_)
+ if reset_parser then
+ reset_parser()
+ end
+ folds = { }
+ text = _text_
+ start_pos = _start_pos_
+ line_num = _start_line_
+ prev_level = _start_level_
+ current_level = prev_level
+ lpegmatch(pattern,text)
+ -- make folds collectable
+ local t = folds
+ folds = nil
+ return t
+ end
+ folders[lexer] = folder
+ end
+ return folder(text,start_pos,start_line,start_level,lexer)
+end
+
+local folds, current_line, prev_level
+
+local function action_y()
+ local current_level = FOLD_BASE + indent_amount[current_line]
+ if current_level > prev_level then -- next level
+ local i = current_line - 1
+ local f
+ while true do
+ f = folds[i]
+ if not f then
+ break
+ elseif f[2] == FOLD_BLANK then
+ i = i - 1
+ else
+ f[2] = FOLD_HEADER -- low indent
+ break
+ end
+ end
+ folds[current_line] = { current_level } -- high indent
+ elseif current_level < prev_level then -- prev level
+ local f = folds[current_line - 1]
+ if f then
+ f[1] = prev_level -- high indent
+ end
+ folds[current_line] = { current_level } -- low indent
+ else -- same level
+ folds[current_line] = { prev_level }
+ end
+ prev_level = current_level
+ current_line = current_line + 1
+end
+
+local function action_n()
+ folds[current_line] = { prev_level, FOLD_BLANK }
+ current_line = current_line + 1
+end
+
+local pattern = ( S("\t ")^0 * ( (1-patterns.eol)^1 / action_y + P(true) / action_n) * newline )^0
+
+local function fold_by_indentation(text,start_pos,start_line,start_level)
+ -- initialize
+ folds = { }
+ current_line = start_line
+ prev_level = start_level
+ -- define
+ -- -- not here .. pattern binds and local functions are not frozen
+ -- analyze
+ lpegmatch(pattern,text)
+ -- flatten
+ for line, level in next, folds do
+ folds[line] = level[1] + (level[2] or 0)
+ end
+ -- done, make folds collectable
+ local t = folds
+ folds = nil
+ return t
+end
+
+local function fold_by_line(text,start_pos,start_line,start_level)
+ local folds = { }
+ -- can also be lpeg'd
+ for _ in gmatch(text,".-\r?\n") do
+ folds[start_line] = n_table[start_level] -- { start_level } -- stile tables ? needs checking
+ start_line = start_line + 1
+ end
+ return folds
+end
+
+local threshold_by_lexer = 512 * 1024 -- we don't know the filesize yet
+local threshold_by_parsing = 512 * 1024 -- we don't know the filesize yet
+local threshold_by_indentation = 512 * 1024 -- we don't know the filesize yet
+local threshold_by_line = 512 * 1024 -- we don't know the filesize yet
+
+function context.fold(lexer,text,start_pos,start_line,start_level) -- hm, we had size thresholds .. where did they go
+ if text == "" then
+ return { }
+ end
+ if initialize then
+ initialize()
+ end
+ local fold_by_lexer = lexer._fold
+ local fold_by_symbols = lexer._foldsymbols
+ local filesize = 0 -- we don't know that
+ if fold_by_lexer then
+ if filesize <= threshold_by_lexer then
+ return fold_by_lexer(text,start_pos,start_line,start_level,lexer)
+ end
+ elseif fold_by_symbols then -- and lexer.properties("fold.by.parsing",1) > 0 then
+ if filesize <= threshold_by_parsing then
+ return fold_by_parsing(text,start_pos,start_line,start_level,lexer)
+ end
+ elseif lexer.properties("fold.by.indentation",1) > 0 then
+ if filesize <= threshold_by_indentation then
+ return fold_by_indentation(text,start_pos,start_line,start_level,lexer)
+ end
+ elseif lexer.properties("fold.by.line",1) > 0 then
+ if filesize <= threshold_by_line then
+ return fold_by_line(text,start_pos,start_line,start_level,lexer)
+ end
+ end
+ return { }
+end
+
+-- The following code is mostly unchanged:
+
+local function add_rule(lexer,id,rule) -- unchanged
+ if not lexer._RULES then
+ lexer._RULES = { }
+ lexer._RULEORDER = { }
+ end
+ lexer._RULES[id] = rule
+ lexer._RULEORDER[#lexer._RULEORDER + 1] = id
+end
+
+-- I finally figured out that adding more styles was an issue because of several
+-- reasons:
+--
+-- + in old versions there was a limit in the amount, so we overran the built-in
+-- hard coded scintilla range
+-- + then, the add_style function didn't check for already known ones, so again
+-- we had an overrun (with some magic that could be avoided)
+-- + then, when I messed with a new default set I realized that there is no check
+-- in initializing _TOKENSTYLES (here the inspect function helps)
+-- + of course it was mostly a side effect of passing all the used styles to the
+-- _tokenstyles instead of only the not-default ones but such a thing should not
+-- matter (read: intercepted)
+--
+-- This finally removed a head-ache and was revealed by lots of tracing, which I
+-- should have built in way earlier.
+
+local function add_style(lexer,token_name,style) -- changed a bit around 3.41
+ -- We don't add styles that are already defined as this can overflow the
+ -- amount possible (in old versions of scintilla).
+ if defaultstyles[token_name] then
+ if trace and detail then
+ report("default style '%s' is ignored as extra style",token_name)
+ end
+ return
+ elseif predefinedstyles[token_name] then
+ if trace and detail then
+ report("predefined style '%s' is ignored as extra style",token_name)
+ end
+ return
+ else
+ if trace and detail then
+ report("adding extra style '%s' as '%s'",token_name,style)
+ end
+ end
+ -- This is unchanged. We skip the dangerous zone.
+ local num_styles = lexer._numstyles
+ if num_styles == 32 then
+ num_styles = num_styles + 8
+ end
+ if num_styles >= 255 then
+ report("there can't be more than %s styles",255)
+ end
+ lexer._TOKENSTYLES[token_name] = num_styles
+ lexer._EXTRASTYLES[token_name] = style
+ lexer._numstyles = num_styles + 1
+end
+
+local function check_styles(lexer)
+ -- Here we also use a check for the dangerous zone. That way we can have a
+ -- larger default set. The original code just assumes that #default is less
+ -- than the dangerous zone's start.
+ local numstyles = 0
+ local tokenstyles = { }
+ for i=1, #default do
+ if numstyles == 32 then
+ numstyles = numstyles + 8
+ end
+ tokenstyles[default[i]] = numstyles
+ numstyles = numstyles + 1
+ end
+ -- Unchanged.
+ for i=1, #predefined do
+ tokenstyles[predefined[i]] = i + 31
+ end
+ lexer._TOKENSTYLES = tokenstyles
+ lexer._numstyles = numstyles
+ lexer._EXTRASTYLES = { }
+ return lexer
+end
+
+-- At some point an 'any' append showed up in the original code ...
+-- but I see no need to catch that case ... beter fix the specification.
+--
+-- hm, why are many joined twice
+
+local function join_tokens(lexer) -- slightly different from the original (no 'any' append)
+ local patterns = lexer._RULES
+ local order = lexer._RULEORDER
+ -- report("lexer: %s, tokens: %s",lexer._NAME,table.concat(order," + "))
+ if patterns and order then
+ local token_rule = patterns[order[1]] -- normally whitespace
+ for i=2,#order do
+ token_rule = token_rule + patterns[order[i]]
+ end
+ if lexer._TYPE ~= "context" then
+ token_rule = token_rule + lexers.token(lexers.DEFAULT, patterns.any)
+ end
+ lexer._TOKENRULE = token_rule
+ return token_rule
+ else
+ return P(1)
+ end
+end
+
+local function add_lexer(grammar, lexer) -- mostly the same as the original
+ local token_rule = join_tokens(lexer)
+ local lexer_name = lexer._NAME
+ local children = lexer._CHILDREN
+ for i=1,#children do
+ local child = children[i]
+ if child._CHILDREN then
+ add_lexer(grammar, child)
+ end
+ local child_name = child._NAME
+ local rules = child._EMBEDDEDRULES[lexer_name]
+ local rules_token_rule = grammar["__" .. child_name] or rules.token_rule
+ local pattern = (-rules.end_rule * rules_token_rule)^0 * rules.end_rule^-1
+ grammar[child_name] = pattern * V(lexer_name)
+ local embedded_child = "_" .. child_name
+ grammar[embedded_child] = rules.start_rule * pattern
+ token_rule = V(embedded_child) + token_rule
+ end
+ if trace then
+ report("adding lexer '%s' with %s children",lexer_name,#children)
+ end
+ grammar["__" .. lexer_name] = token_rule
+ grammar[lexer_name] = token_rule^0
+end
+
+local function build_grammar(lexer,initial_rule) -- same as the original
+ local children = lexer._CHILDREN
+ local lexer_name = lexer._NAME
+ if children then
+ if not initial_rule then
+ initial_rule = lexer_name
+ end
+ local grammar = { initial_rule }
+ add_lexer(grammar, lexer)
+ lexer._INITIALRULE = initial_rule
+ lexer._GRAMMAR = Ct(P(grammar))
+ if trace then
+ report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children)
+ end
+ else
+ lexer._GRAMMAR = Ct(join_tokens(lexer)^0)
+ if trace then
+ report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?")
+ end
+ end
+end
+
+-- So far. We need these local functions in the next one.
+
+local lineparsers = { }
+
+local maxmatched = 100
+
+local function collapsed(t)
+ local lasttoken = nil
+ local lastindex = nil
+ for i=1,#t,2 do
+ local token = t[i]
+ local position = t[i+1]
+ if token == lasttoken then
+ t[lastindex] = position
+ elseif lastindex then
+ lastindex = lastindex + 1
+ t[lastindex] = token
+ lastindex = lastindex + 1
+ t[lastindex] = position
+ lasttoken = token
+ else
+ lastindex = i+1
+ lasttoken = token
+ end
+ end
+ for i=#t,lastindex+1,-1 do
+ t[i] = nil
+ end
+ return t
+end
+
+local function matched(lexer,grammar,text)
+ -- text = string.gsub(text,"\z","!")
+ local t = lpegmatch(grammar,text)
+ if trace then
+ if show then
+ report("output of lexer: %s (max %s entries)",lexer._NAME,maxmatched)
+ local s = lexer._TOKENSTYLES
+ local p = 1
+ for i=1,2*maxmatched,2 do
+ local n = i + 1
+ local ti = t[i]
+ local tn = t[n]
+ if ti then
+ local txt = sub(text,p,tn-1)
+ if txt then
+ txt = gsub(txt,"[%s]"," ")
+ else
+ txt = "!no text!"
+ end
+ report("%4i : %s > %s (%s) (%s)",n/2,ti,tn,s[ti] or "!unset!",txt)
+ p = tn
+ else
+ break
+ end
+ end
+ end
+ report("lexer results: %s, length: %s, ranges: %s",lexer._NAME,#text,#t/2)
+ if collapse then
+ t = collapsed(t)
+ report("lexer collapsed: %s, length: %s, ranges: %s",lexer._NAME,#text,#t/2)
+ end
+ elseif collapse then
+ t = collapsed(t)
+ end
+ return t
+end
+
+-- Todo: make nice generic lexer (extra argument with start/stop commands) for
+-- context itself.
+
+function context.lex(lexer,text,init_style)
+ -- local lexer = global._LEXER
+ local grammar = lexer._GRAMMAR
+ if initialize then
+ initialize()
+ end
+ if not grammar then
+ return { }
+ elseif lexer._LEXBYLINE then -- we could keep token
+ local tokens = { }
+ local offset = 0
+ local noftokens = 0
+ local lineparser = lineparsers[lexer]
+ if not lineparser then -- probably a cmt is more efficient
+ lineparser = C((1-newline)^0 * newline) / function(line)
+ local length = #line
+ local line_tokens = length > 0 and lpegmatch(grammar,line)
+ if line_tokens then
+ for i=1,#line_tokens,2 do
+ noftokens = noftokens + 1
+ tokens[noftokens] = line_tokens[i]
+ noftokens = noftokens + 1
+ tokens[noftokens] = line_tokens[i + 1] + offset
+ end
+ end
+ offset = offset + length
+ if noftokens > 0 and tokens[noftokens] ~= offset then
+ noftokens = noftokens + 1
+ tokens[noftokens] = "default"
+ noftokens = noftokens + 1
+ tokens[noftokens] = offset + 1
+ end
+ end
+ lineparser = lineparser^0
+ lineparsers[lexer] = lineparser
+ end
+ lpegmatch(lineparser,text)
+ return tokens
+ elseif lexer._CHILDREN then
+ local hash = lexer._HASH -- hm, was _hash
+ if not hash then
+ hash = { }
+ lexer._HASH = hash
+ end
+ grammar = hash[init_style]
+ if grammar then
+ lexer._GRAMMAR = grammar
+ -- lexer._GRAMMAR = lexer._GRAMMAR or grammar
+ else
+ for style, style_num in next, lexer._TOKENSTYLES do
+ if style_num == init_style then
+ -- the name of the lexers is filtered from the whitespace
+ -- specification .. weird code, should be a reverse hash
+ local lexer_name = match(style,"^(.+)_whitespace") or lexer._NAME
+ if lexer._INITIALRULE ~= lexer_name then
+ grammar = hash[lexer_name]
+ if not grammar then
+ build_grammar(lexer,lexer_name)
+ grammar = lexer._GRAMMAR
+ hash[lexer_name] = grammar
+ end
+ end
+ break
+ end
+ end
+ grammar = grammar or lexer._GRAMMAR
+ hash[init_style] = grammar
+ end
+ if trace then
+ report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style)
+ end
+ return matched(lexer,grammar,text)
+ else
+ if trace then
+ report("lexing '%s' with initial style '%s'",lexer._NAME,init_style)
+ end
+ return matched(lexer,grammar,text)
+ end
+end
+
+-- hm, changed in 3.24 .. no longer small table but one table:
+
+function context.token(name, patt)
+ return patt * Cc(name) * Cp()
+end
+
+-- The next ones were mostly unchanged (till now), we moved it here when 3.41
+-- became close to impossible to combine with cq. overload and a merge was
+-- the only solution. It makes later updates more painful but the update to
+-- 3.41 was already a bit of a nightmare anyway.
+
+-- Loading lexers is rather interwoven with what the dll/so sets and
+-- it changes over time. So, we need to keep an eye on changes. One
+-- problem that we always faced were the limitations in length of
+-- lexer names (as they get app/prepended occasionally to strings with
+-- a hard coded limit). So, we always used alternative names and now need
+-- to make sure this doesn't clash. As I no longer intend to use shipped
+-- lexers I could strip away some of the code in the future, but keeping
+-- it as reference makes sense.
+
+-- I spend quite some time figuring out why 3.41 didn't work or crashed which
+-- is hard when no stdout is available and when the io library is absent. In
+-- the end of of the problems was in the _NAME setting. We set _NAME
+-- to e.g. 'tex' but load from a file with a longer name, which we do
+-- as we don't want to clash with existing files, we end up in
+-- lexers not being found.
+
+local whitespaces = { }
+
+local function push_whitespace(name)
+ table.insert(whitespaces,lexers.WHITESPACE or "whitespace")
+ lexers.WHITESPACE = name .. "_whitespace"
+end
+
+local function pop_whitespace()
+ lexers.WHITESPACE = table.remove(whitespaces) or "whitespace"
+end
+
+local function check_whitespace(lexer,name)
+ if lexer then
+ lexer.whitespace = (name or lexer.name or lexer._NAME) .. "_whitespace"
+ end
+end
+
+function context.new(name,filename)
+ local lexer = {
+ _TYPE = "context",
+ --
+ _NAME = name, -- used for token building
+ _FILENAME = filename, -- for diagnostic purposed
+ --
+ name = name,
+ filename = filename,
+ }
+ if trace then
+ report("initializing lexer tagged '%s' from file '%s'",name,filename or name)
+ end
+ check_whitespace(lexer)
+ check_styles(lexer)
+ check_properties(lexer)
+ return lexer
+end
+
+local function nolexer(name)
+ local lexer = {
+ _TYPE = "unset",
+ _NAME = name,
+ -- _rules = { },
+ }
+ check_styles(lexer)
+ check_whitespace(lexer)
+ check_properties(lexer)
+ return lexer
+end
+
+local function load_lexer(name,namespace)
+ if trace then
+ report("loading lexer file '%s'",name)
+ end
+ push_whitespace(namespace or name) -- for traditional lexers .. no alt_name yet
+ local lexer, fullname = context.loadluafile(name)
+ pop_whitespace()
+ if not lexer then
+ report("invalid lexer file '%s'",name)
+ elseif trace then
+ report("lexer file '%s' has been loaded",fullname)
+ end
+ if type(lexer) ~= "table" then
+ if trace then
+ report("lexer file '%s' gets a dummy lexer",name)
+ end
+ return nolexer(name)
+ end
+ if lexer._TYPE ~= "context" then
+ lexer._TYPE = "native"
+ check_styles(lexer)
+ check_whitespace(lexer,namespace or name)
+ check_properties(lexer)
+ end
+ if not lexer._NAME then
+ lexer._NAME = name -- so: filename
+ end
+ if name ~= namespace then
+ lexer._NAME = namespace
+ end
+ return lexer
+end
+
+-- tracing ...
+
+local function inspect_lexer(lexer,level)
+ -- If we had the regular libs available I could use the usual
+ -- helpers.
+ local parent = lexer._lexer
+ lexer._lexer = nil -- prevent endless recursion
+ local name = lexer._NAME
+ local function showstyles_1(tag,styles)
+ local numbers = { }
+ for k, v in next, styles do
+ numbers[v] = k
+ end
+ -- sort by number and make number hash too
+ local keys = sortedkeys(numbers)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = numbers[k]
+ report("[%s %s] %s %s = %s",level,name,tag,k,v)
+ end
+ end
+ local function showstyles_2(tag,styles)
+ local keys = sortedkeys(styles)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = styles[k]
+ report("[%s %s] %s %s = %s",level,name,tag,k,v)
+ end
+ end
+ local keys = sortedkeys(lexer)
+ for i=1,#keys do
+ local k = keys[i]
+ local v = lexer[k]
+ report("[%s %s] root key : %s = %s",level,name,k,tostring(v))
+ end
+ showstyles_1("token style",lexer._TOKENSTYLES)
+ showstyles_2("extra style",lexer._EXTRASTYLES)
+ local children = lexer._CHILDREN
+ if children then
+ for i=1,#children do
+ inspect_lexer(children[i],level+1)
+ end
+ end
+ lexer._lexer = parent
+end
+
+function context.inspect(lexer)
+ inspect_lexer(lexer,0)
+end
+
+-- An optional second argument has been introduced so that one can embed a lexer
+-- more than once ... maybe something to look into (as not it's done by remembering
+-- the start sequence ... quite okay but maybe suboptimal ... anyway, never change
+-- a working solution).
+
+-- namespace can be automatic: if parent then use name of parent (chain)
+
+function context.loadlexer(filename,namespace)
+ nesting = nesting + 1
+ if not namespace then
+ namespace = filename
+ end
+ local lexer = usedlexers[namespace] -- we load by filename but the internal name can be short
+ if lexer then
+ if trace then
+ report("reusing lexer '%s'",namespace)
+ end
+ nesting = nesting - 1
+ return lexer
+ elseif trace then
+ report("loading lexer '%s'",namespace)
+ end
+ --
+ if initialize then
+ initialize()
+ end
+ --
+ parent_lexer = nil
+ --
+ lexer = load_lexer(filename,namespace) or nolexer(filename,namespace)
+ usedlexers[filename] = lexer
+ --
+ if not lexer._rules and not lexer._lexer then
+ lexer._lexer = parent_lexer
+ end
+ --
+ if lexer._lexer then
+ local _l = lexer._lexer
+ local _r = lexer._rules
+ local _s = lexer._tokenstyles
+ if not _l._tokenstyles then
+ _l._tokenstyles = { }
+ end
+ if _r then
+ local rules = _l._rules
+ local name = lexer.name
+ for i=1,#_r do
+ local rule = _r[i]
+ rules[#rules + 1] = {
+ name .. "_" .. rule[1],
+ rule[2],
+ }
+ end
+ end
+ if _s then
+ local tokenstyles = _l._tokenstyles
+ for token, style in next, _s do
+ tokenstyles[token] = style
+ end
+ end
+ lexer = _l
+ end
+ --
+ local _r = lexer._rules
+ if _r then
+ local _s = lexer._tokenstyles
+ if _s then
+ for token, style in next, _s do
+ add_style(lexer, token, style)
+ end
+ end
+ for i=1,#_r do
+ local rule = _r[i]
+ add_rule(lexer, rule[1], rule[2])
+ end
+ build_grammar(lexer)
+ end
+ --
+ add_style(lexer, lexer.whitespace, lexers.STYLE_WHITESPACE)
+ --
+ local foldsymbols = lexer._foldsymbols
+ if foldsymbols then
+ local patterns = foldsymbols._patterns
+ if patterns then
+ for i = 1, #patterns do
+ patterns[i] = "()(" .. patterns[i] .. ")"
+ end
+ end
+ end
+ --
+ lexer.lex = lexers.lex
+ lexer.fold = lexers.fold
+ --
+ nesting = nesting - 1
+ --
+ if inspect then
+ context.inspect(lexer)
+ end
+ --
+ return lexer
+end
+
+-- I probably need to check this occasionally with the original as I've messed around a bit
+-- in the past to get nesting working well as one can hit the max number of styles, get
+-- clashes due to fuzzy inheritance etc. so there is some interplay with the other patched
+-- code.
+
+function context.embed_lexer(parent, child, start_rule, end_rule) -- mostly the same as the original
+ local embeddedrules = child._EMBEDDEDRULES
+ if not embeddedrules then
+ embeddedrules = { }
+ child._EMBEDDEDRULES = embeddedrules
+ end
+ if not child._RULES then
+ local rules = child._rules
+ if not rules then
+ report("child lexer '%s' has no rules",child._NAME or "unknown")
+ rules = { }
+ child._rules = rules
+ end
+ for i=1,#rules do
+ local rule = rules[i]
+ add_rule(child, rule[1], rule[2])
+ end
+ end
+ embeddedrules[parent._NAME] = {
+ ["start_rule"] = start_rule,
+ ["token_rule"] = join_tokens(child),
+ ["end_rule"] = end_rule
+ }
+ local children = parent._CHILDREN
+ if not children then
+ children = { }
+ parent._CHILDREN = children
+ end
+ children[#children + 1] = child
+ local tokenstyles = parent._tokenstyles
+ if not tokenstyles then
+ tokenstyles = { }
+ parent._tokenstyles = tokenstyles
+ end
+ local childname = child._NAME
+ local whitespace = childname .. "_whitespace"
+ tokenstyles[whitespace] = lexers.STYLE_WHITESPACE -- all these STYLE_THINGS will go .. just a proper hash
+ if trace then
+ report("using whitespace '%s' as trigger for '%s' with property '%s'",whitespace,childname,lexers.STYLE_WHITESPACE)
+ end
+ local childstyles = child._tokenstyles
+ if childstyles then
+ for token, style in next, childstyles do
+ tokenstyles[token] = style
+ end
+ end
+ -- new, a bit redone, untested, no clue yet what it is for
+ local parentsymbols = parent._foldsymbols
+ local childsymbols = child ._foldsymbols
+ if not parentsymbols then
+ parentsymbols = { }
+ parent._foldsymbols = parentsymbols
+ end
+ if childsymbols then
+ for token, symbols in next, childsymbols do
+ local tokensymbols = parentsymbols[token]
+ if not tokensymbols then
+ tokensymbols = { }
+ parentsymbols[token] = tokensymbols
+ end
+ for k, v in next, symbols do
+ if type(k) == 'number' then
+ tokensymbols[#tokensymbols + 1] = v
+ elseif not tokensymbols[k] then
+ tokensymbols[k] = v
+ end
+ end
+ end
+ end
+ --
+ child._lexer = parent
+ parent_lexer = parent
+end
+
+-- we now move the adapted code to the lexers namespace
+
+lexers.new = context.new
+lexers.load = context.loadlexer
+------.loadlexer = context.loadlexer
+lexers.loadluafile = context.loadluafile
+lexers.embed_lexer = context.embed_lexer
+lexers.fold = context.fold
+lexers.lex = context.lex
+lexers.token = context.token
+lexers.word_match = context.word_match
+lexers.exact_match = context.exact_match
+lexers.just_match = context.just_match
+lexers.inspect = context.inspect
+lexers.report = context.report
+lexers.inform = context.inform
+
+-- helper .. alas ... the lexer's lua instance is rather crippled .. not even
+-- math is part of it
+
+do
+
+ local floor = math and math.floor
+ local char = string.char
+ local format = format
+ local tonumber = tonumber
+
+ if not floor then
+
+ if tonumber(string.match(_VERSION,"%d%.%d")) < 5.3 then
+ floor = function(n)
+ return tonumber(format("%d",n))
+ end
+ else
+ -- 5.3 has a mixed number system and format %d doesn't work with
+ -- floats any longer ... no fun
+ floor = function(n)
+ return (n - n % 1)
+ end
+ end
+
+ math = math or { }
+
+ math.floor = floor
+
+ end
+
+ local function utfchar(n)
+ if n < 0x80 then
+ return char(n)
+ elseif n < 0x800 then
+ return char(
+ 0xC0 + floor(n/0x40),
+ 0x80 + (n % 0x40)
+ )
+ elseif n < 0x10000 then
+ return char(
+ 0xE0 + floor(n/0x1000),
+ 0x80 + (floor(n/0x40) % 0x40),
+ 0x80 + (n % 0x40)
+ )
+ elseif n < 0x40000 then
+ return char(
+ 0xF0 + floor(n/0x40000),
+ 0x80 + floor(n/0x1000),
+ 0x80 + (floor(n/0x40) % 0x40),
+ 0x80 + (n % 0x40)
+ )
+ else
+ -- return char(
+ -- 0xF1 + floor(n/0x1000000),
+ -- 0x80 + floor(n/0x40000),
+ -- 0x80 + floor(n/0x1000),
+ -- 0x80 + (floor(n/0x40) % 0x40),
+ -- 0x80 + (n % 0x40)
+ -- )
+ return "?"
+ end
+ end
+
+ context.utfchar = utfchar
+
+ -- -- the next one is good enough for use here but not perfect (see context for a
+ -- -- better one)
+ --
+ -- local function make(t)
+ -- local p
+ -- for k, v in next, t do
+ -- if not p then
+ -- if next(v) then
+ -- p = P(k) * make(v)
+ -- else
+ -- p = P(k)
+ -- end
+ -- else
+ -- if next(v) then
+ -- p = p + P(k) * make(v)
+ -- else
+ -- p = p + P(k)
+ -- end
+ -- end
+ -- end
+ -- return p
+ -- end
+ --
+ -- function lpeg.utfchartabletopattern(list)
+ -- local tree = { }
+ -- for i=1,#list do
+ -- local t = tree
+ -- for c in gmatch(list[i],".") do
+ -- if not t[c] then
+ -- t[c] = { }
+ -- end
+ -- t = t[c]
+ -- end
+ -- end
+ -- return make(tree)
+ -- end
+
+ helpers.utfcharpattern = P(1) * R("\128\191")^0 -- unchecked but fast
+
+ local p_false = P(false)
+ local p_true = P(true)
+
+ local function make(t)
+ local function making(t)
+ local p = p_false
+ local keys = sortedkeys(t)
+ for i=1,#keys do
+ local k = keys[i]
+ if k ~= "" then
+ local v = t[k]
+ if v == true then
+ p = p + P(k) * p_true
+ elseif v == false then
+ -- can't happen
+ else
+ p = p + P(k) * making(v)
+ end
+ end
+ end
+ if t[""] then
+ p = p + p_true
+ end
+ return p
+ end
+ local p = p_false
+ local keys = sortedkeys(t)
+ for i=1,#keys do
+ local k = keys[i]
+ if k ~= "" then
+ local v = t[k]
+ if v == true then
+ p = p + P(k) * p_true
+ elseif v == false then
+ -- can't happen
+ else
+ p = p + P(k) * making(v)
+ end
+ end
+ end
+ return p
+ end
+
+ local function collapse(t,x)
+ if type(t) ~= "table" then
+ return t, x
+ else
+ local n = next(t)
+ if n == nil then
+ return t, x
+ elseif next(t,n) == nil then
+ -- one entry
+ local k = n
+ local v = t[k]
+ if type(v) == "table" then
+ return collapse(v,x..k)
+ else
+ return v, x .. k
+ end
+ else
+ local tt = { }
+ for k, v in next, t do
+ local vv, kk = collapse(v,k)
+ tt[kk] = vv
+ end
+ return tt, x
+ end
+ end
+ end
+
+ function helpers.utfchartabletopattern(list)
+ local tree = { }
+ local n = #list
+ if n == 0 then
+ for s in next, list do
+ local t = tree
+ local p, pk
+ for c in gmatch(s,".") do
+ if t == true then
+ t = { [c] = true, [""] = true }
+ p[pk] = t
+ p = t
+ t = false
+ elseif t == false then
+ t = { [c] = false }
+ p[pk] = t
+ p = t
+ t = false
+ else
+ local tc = t[c]
+ if not tc then
+ tc = false
+ t[c] = false
+ end
+ p = t
+ t = tc
+ end
+ pk = c
+ end
+ if t == false then
+ p[pk] = true
+ elseif t == true then
+ -- okay
+ else
+ t[""] = true
+ end
+ end
+ else
+ for i=1,n do
+ local s = list[i]
+ local t = tree
+ local p, pk
+ for c in gmatch(s,".") do
+ if t == true then
+ t = { [c] = true, [""] = true }
+ p[pk] = t
+ p = t
+ t = false
+ elseif t == false then
+ t = { [c] = false }
+ p[pk] = t
+ p = t
+ t = false
+ else
+ local tc = t[c]
+ if not tc then
+ tc = false
+ t[c] = false
+ end
+ p = t
+ t = tc
+ end
+ pk = c
+ end
+ if t == false then
+ p[pk] = true
+ elseif t == true then
+ -- okay
+ else
+ t[""] = true
+ end
+ end
+ end
+ collapse(tree,"")
+ -- inspect(tree)
+ return make(tree)
+ end
+
+ patterns.invisibles = helpers.utfchartabletopattern {
+ utfchar(0x00A0), -- nbsp
+ utfchar(0x2000), -- enquad
+ utfchar(0x2001), -- emquad
+ utfchar(0x2002), -- enspace
+ utfchar(0x2003), -- emspace
+ utfchar(0x2004), -- threeperemspace
+ utfchar(0x2005), -- fourperemspace
+ utfchar(0x2006), -- sixperemspace
+ utfchar(0x2007), -- figurespace
+ utfchar(0x2008), -- punctuationspace
+ utfchar(0x2009), -- breakablethinspace
+ utfchar(0x200A), -- hairspace
+ utfchar(0x200B), -- zerowidthspace
+ utfchar(0x202F), -- narrownobreakspace
+ utfchar(0x205F), -- math thinspace
+ }
+
+ -- now we can make:
+
+ patterns.iwordtoken = patterns.wordtoken - patterns.invisibles
+ patterns.iwordpattern = patterns.iwordtoken^3
+
+end
+
+-- The following helpers are not used, partially replaced by other mechanisms and
+-- when needed I'll first optimize them. I only made them somewhat more readable.
+
+function lexers.delimited_range(chars, single_line, no_escape, balanced) -- unchanged
+ local s = sub(chars,1,1)
+ local e = #chars == 2 and sub(chars,2,2) or s
+ local range
+ local b = balanced and s or ""
+ local n = single_line and "\n" or ""
+ if no_escape then
+ local invalid = S(e .. n .. b)
+ range = patterns.any - invalid
+ else
+ local invalid = S(e .. n .. b) + patterns.backslash
+ range = patterns.any - invalid + patterns.backslash * patterns.any
+ end
+ if balanced and s ~= e then
+ return P {
+ s * (range + V(1))^0 * e
+ }
+ else
+ return s * range^0 * P(e)^-1
+ end
+end
+
+function lexers.starts_line(patt) -- unchanged
+ return P ( function(input, index)
+ if index == 1 then
+ return index
+ end
+ local char = sub(input,index - 1,index - 1)
+ if char == "\n" or char == "\r" or char == "\f" then
+ return index
+ end
+ end ) * patt
+end
+
+function lexers.last_char_includes(s) -- unchanged
+ s = "[" .. gsub(s,"[-%%%[]", "%%%1") .. "]"
+ return P ( function(input, index)
+ if index == 1 then
+ return index
+ end
+ local i = index
+ while match(sub(input,i - 1,i - 1),"[ \t\r\n\f]") do
+ i = i - 1
+ end
+ if match(sub(input,i - 1,i - 1),s) then
+ return index
+ end
+ end)
+end
+
+function lexers.nested_pair(start_chars, end_chars) -- unchanged
+ local s = start_chars
+ local e = P(end_chars)^-1
+ return P {
+ s * (patterns.any - s - end_chars + V(1))^0 * e
+ }
+end
+
+local function prev_line_is_comment(prefix, text, pos, line, s) -- unchanged
+ local start = find(line,"%S")
+ if start < s and not find(line,prefix,start,true) then
+ return false
+ end
+ local p = pos - 1
+ if sub(text,p,p) == "\n" then
+ p = p - 1
+ if sub(text,p,p) == "\r" then
+ p = p - 1
+ end
+ if sub(text,p,p) ~= "\n" then
+ while p > 1 and sub(text,p - 1,p - 1) ~= "\n"
+ do p = p - 1
+ end
+ while find(sub(text,p,p),"^[\t ]$") do
+ p = p + 1
+ end
+ return sub(text,p,p + #prefix - 1) == prefix
+ end
+ end
+ return false
+end
+
+local function next_line_is_comment(prefix, text, pos, line, s)
+ local p = find(text,"\n",pos + s)
+ if p then
+ p = p + 1
+ while find(sub(text,p,p),"^[\t ]$") do
+ p = p + 1
+ end
+ return sub(text,p,p + #prefix - 1) == prefix
+ end
+ return false
+end
+
+function lexers.fold_line_comments(prefix)
+ local property_int = lexers.property_int
+ return function(text, pos, line, s)
+ if property_int["fold.line.comments"] == 0 then
+ return 0
+ end
+ if s > 1 and match(line,"^%s*()") < s then
+ return 0
+ end
+ local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s)
+ local next_line_comment = next_line_is_comment(prefix, text, pos, line, s)
+ if not prev_line_comment and next_line_comment then
+ return 1
+ end
+ if prev_line_comment and not next_line_comment then
+ return -1
+ end
+ return 0
+ end
+end
+
+-- done
+
+return lexers