summaryrefslogtreecommitdiff
path: root/context/data/scite/lexers/scite-context-lexer-xml.lua
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/scite/lexers/scite-context-lexer-xml.lua')
-rw-r--r--context/data/scite/lexers/scite-context-lexer-xml.lua202
1 files changed, 202 insertions, 0 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer-xml.lua b/context/data/scite/lexers/scite-context-lexer-xml.lua
new file mode 100644
index 000000000..0441585c1
--- /dev/null
+++ b/context/data/scite/lexers/scite-context-lexer-xml.lua
@@ -0,0 +1,202 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for metafun",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+-- adapted from the regular context pretty printer code (after all, lexing
+-- boils down to much of the same and there are only so many ways to do
+-- things). Simplified a bit as we have a different nesting model.
+
+-- todo: parse entities in attributes
+
+local lexer = lexer
+local global, string, table, lpeg = _G, string, table, lpeg
+local token, style, colors, exact_match, no_style = lexer.token, lexer.style, lexer.colors, lexer.exact_match, lexer.style_nothing
+local P, R, S, V, C, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt
+local type, setmetatable = type, setmetatable
+local match, find = string.match, string.find
+
+module(...)
+
+local examplelexer = _M
+
+local whitespace = examplelexer.WHITESPACE -- triggers states
+
+local space = lexer.space -- S(" \t\n\r\v\f")
+local any = lexer.any -- P(1)
+
+local dquote = P('"')
+local squote = P("'")
+local colon = P(":")
+local semicolon = P(";")
+local equal = P("=")
+local ampersand = P("&")
+
+local name = (R("az","AZ","09") + S('_-.'))^1
+local openbegin = P("<")
+local openend = P("</")
+local closebegin = P("/>") + P(">")
+local closeend = P(">")
+local opencomment = P("<!--")
+local closecomment = P("-->")
+local openinstruction = P("<?")
+local closeinstruction = P("?>")
+local opencdata = P("<![CDATA[")
+local closecdata = P("]]>")
+
+local entity = ampersand * (1-semicolon)^1 * semicolon
+
+local wordpattern = lexer.context.wordpattern
+local checkedword = lexer.context.checkedword
+local setwordlist = lexer.context.setwordlist
+local validwords = false
+
+-- <?xml version="1.0" encoding="UTF-8" language="uk" ?>
+--
+-- <?context-xml-directive editor language us ?>
+
+local p_preamble = Cmt(#P("<?xml "), function(input,i,_) -- todo: utf bomb
+ if i < 10 then
+ validwords = false
+ local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-xml%-directive%s+editor%s+language%s+(..)%s+%?>")
+ if not language then
+ language = match(input,'^<%?xml[^>]*language=[\"\'](..)[\"\'][^>]*%?>',i)
+ end
+ if language then
+ validwords = setwordlist(language)
+ end
+ end
+ return false
+end)
+
+
+local p_word =
+ Cmt(wordpattern, function(_,i,s)
+ if validwords then
+ return checkedword(validwords,s,i)
+ else
+ return true, { "text", i }
+ end
+ end)
+
+local p_rest =
+ token("default", any)
+
+local p_text =
+ token("default", (1-S("<>&")-space)^1)
+
+local p_spacing =
+ token(whitespace, space^1)
+
+local p_optionalwhitespace =
+ p_spacing^0
+
+local p_localspacing =
+ token("default", space^1)
+
+-- Because we want a differently colored open and close we need an embedded lexer (whitespace
+-- trigger). What is actually needed is that scintilla applies the current whitespace style.
+-- Even using different style keys is not robust as they can be shared. I'll fix the main
+-- lexer code.
+
+local p_sstring =
+ token("quote",dquote)
+ * token("string",(1-dquote)^0) -- different from context
+ * token("quote",dquote)
+
+local p_dstring =
+ token("quote",squote)
+ * token("string",(1-squote)^0) -- different from context
+ * token("quote",squote)
+
+-- local p_comment =
+-- token("command",opencomment)
+-- * token("comment",(1-closecomment)^0) -- different from context
+-- * token("command",closecomment)
+
+-- local p_cdata =
+-- token("command",opencdata)
+-- * token("comment",(1-closecdata)^0) -- different from context
+-- * token("command",closecdata)
+
+local commentlexer = lexer.load("scite-context-lexer-xml-comment")
+local cdatalexer = lexer.load("scite-context-lexer-xml-cdata")
+
+lexer.embed_lexer(examplelexer, commentlexer, token("command",opencomment), token("command",closecomment))
+lexer.embed_lexer(examplelexer, cdatalexer, token("command",opencdata), token("command",closecdata))
+
+-- maybe cdata just text (then we don't need the extra lexer as we only have one comment then)
+
+local p_name =
+ token("plain",name)
+ * (
+ token("default",colon)
+ * token("keyword",name)
+ )^1
+ + token("keyword",name)
+
+local p_key = p_name
+
+local p_attributes = (
+ p_optionalwhitespace
+ * p_key
+ * p_optionalwhitespace
+ * token("plain",equal)
+ * p_optionalwhitespace
+ * (p_dstring + p_sstring)
+ * p_optionalwhitespace
+)^0
+
+local p_open =
+ token("keyword",openbegin)
+ * p_name
+ * p_optionalwhitespace
+ * p_attributes
+ * token("keyword",closebegin)
+
+local p_close =
+ token("keyword",openend)
+ * p_name
+ * p_optionalwhitespace
+ * token("keyword",closeend)
+
+local p_entity =
+ token("constant",entity)
+
+local p_instruction =
+ token("command",openinstruction * P("xml"))
+ * p_optionalwhitespace
+ * p_attributes
+ * p_optionalwhitespace
+ * token("command",closeinstruction)
+ + token("command",openinstruction * name)
+ * token("default",(1-closeinstruction)^1)
+ * token("command",closeinstruction)
+
+_rules = {
+ { "whitespace", p_spacing },
+ { "preamble", p_preamble },
+ { "word", p_word },
+-- { "text", p_text },
+-- { "comment", p_comment },
+-- { "cdata", p_cdata },
+ { "instruction", p_instruction },
+ { "close", p_close },
+ { "open", p_open },
+ { "entity", p_entity },
+ { "rest", p_rest },
+}
+
+_tokenstyles = lexer.context.styleset
+
+_foldsymbols = { -- somehow doesn't work yet
+ _patterns = {
+ "[<>]",
+ },
+ ["keyword"] = {
+ ["<"] = 1, [">"] = -1,
+ },
+}