diff options
Diffstat (limited to 'context/data/scite/lexers/scite-context-lexer-xml.lua')
-rw-r--r-- | context/data/scite/lexers/scite-context-lexer-xml.lua | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer-xml.lua b/context/data/scite/lexers/scite-context-lexer-xml.lua new file mode 100644 index 000000000..0441585c1 --- /dev/null +++ b/context/data/scite/lexers/scite-context-lexer-xml.lua @@ -0,0 +1,202 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for metafun", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- adapted from the regular context pretty printer code (after all, lexing +-- boils down to much of the same and there are only so many ways to do +-- things). Simplified a bit as we have a different nesting model. + +-- todo: parse entities in attributes + +local lexer = lexer +local global, string, table, lpeg = _G, string, table, lpeg +local token, style, colors, exact_match, no_style = lexer.token, lexer.style, lexer.colors, lexer.exact_match, lexer.style_nothing +local P, R, S, V, C, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt +local type, setmetatable = type, setmetatable +local match, find = string.match, string.find + +module(...) + +local examplelexer = _M + +local whitespace = examplelexer.WHITESPACE -- triggers states + +local space = lexer.space -- S(" \t\n\r\v\f") +local any = lexer.any -- P(1) + +local dquote = P('"') +local squote = P("'") +local colon = P(":") +local semicolon = P(";") +local equal = P("=") +local ampersand = P("&") + +local name = (R("az","AZ","09") + S('_-.'))^1 +local openbegin = P("<") +local openend = P("</") +local closebegin = P("/>") + P(">") +local closeend = P(">") +local opencomment = P("<!--") +local closecomment = P("-->") +local openinstruction = P("<?") +local closeinstruction = P("?>") +local opencdata = P("<![CDATA[") +local closecdata = P("]]>") + +local entity = ampersand * (1-semicolon)^1 * semicolon + +local wordpattern = lexer.context.wordpattern +local checkedword = lexer.context.checkedword +local setwordlist = lexer.context.setwordlist +local validwords = false + +-- <?xml version="1.0" encoding="UTF-8" language="uk" ?> +-- +-- <?context-xml-directive editor language us ?> + +local p_preamble = Cmt(#P("<?xml "), function(input,i,_) -- todo: utf bomb + if i < 10 then + validwords = false + local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-xml%-directive%s+editor%s+language%s+(..)%s+%?>") + if not language then + language = match(input,'^<%?xml[^>]*language=[\"\'](..)[\"\'][^>]*%?>',i) + end + if language then + validwords = setwordlist(language) + end + end + return false +end) + + +local p_word = + Cmt(wordpattern, function(_,i,s) + if validwords then + return checkedword(validwords,s,i) + else + return true, { "text", i } + end + end) + +local p_rest = + token("default", any) + +local p_text = + token("default", (1-S("<>&")-space)^1) + +local p_spacing = + token(whitespace, space^1) + +local p_optionalwhitespace = + p_spacing^0 + +local p_localspacing = + token("default", space^1) + +-- Because we want a differently colored open and close we need an embedded lexer (whitespace +-- trigger). What is actually needed is that scintilla applies the current whitespace style. +-- Even using different style keys is not robust as they can be shared. I'll fix the main +-- lexer code. + +local p_sstring = + token("quote",dquote) + * token("string",(1-dquote)^0) -- different from context + * token("quote",dquote) + +local p_dstring = + token("quote",squote) + * token("string",(1-squote)^0) -- different from context + * token("quote",squote) + +-- local p_comment = +-- token("command",opencomment) +-- * token("comment",(1-closecomment)^0) -- different from context +-- * token("command",closecomment) + +-- local p_cdata = +-- token("command",opencdata) +-- * token("comment",(1-closecdata)^0) -- different from context +-- * token("command",closecdata) + +local commentlexer = lexer.load("scite-context-lexer-xml-comment") +local cdatalexer = lexer.load("scite-context-lexer-xml-cdata") + +lexer.embed_lexer(examplelexer, commentlexer, token("command",opencomment), token("command",closecomment)) +lexer.embed_lexer(examplelexer, cdatalexer, token("command",opencdata), token("command",closecdata)) + +-- maybe cdata just text (then we don't need the extra lexer as we only have one comment then) + +local p_name = + token("plain",name) + * ( + token("default",colon) + * token("keyword",name) + )^1 + + token("keyword",name) + +local p_key = p_name + +local p_attributes = ( + p_optionalwhitespace + * p_key + * p_optionalwhitespace + * token("plain",equal) + * p_optionalwhitespace + * (p_dstring + p_sstring) + * p_optionalwhitespace +)^0 + +local p_open = + token("keyword",openbegin) + * p_name + * p_optionalwhitespace + * p_attributes + * token("keyword",closebegin) + +local p_close = + token("keyword",openend) + * p_name + * p_optionalwhitespace + * token("keyword",closeend) + +local p_entity = + token("constant",entity) + +local p_instruction = + token("command",openinstruction * P("xml")) + * p_optionalwhitespace + * p_attributes + * p_optionalwhitespace + * token("command",closeinstruction) + + token("command",openinstruction * name) + * token("default",(1-closeinstruction)^1) + * token("command",closeinstruction) + +_rules = { + { "whitespace", p_spacing }, + { "preamble", p_preamble }, + { "word", p_word }, +-- { "text", p_text }, +-- { "comment", p_comment }, +-- { "cdata", p_cdata }, + { "instruction", p_instruction }, + { "close", p_close }, + { "open", p_open }, + { "entity", p_entity }, + { "rest", p_rest }, +} + +_tokenstyles = lexer.context.styleset + +_foldsymbols = { -- somehow doesn't work yet + _patterns = { + "[<>]", + }, + ["keyword"] = { + ["<"] = 1, [">"] = -1, + }, +} |