summaryrefslogtreecommitdiff
path: root/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/textadept/context/lexers/scite-context-lexer-xml.lua')
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml.lua350
1 files changed, 0 insertions, 350 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
deleted file mode 100644
index bbdb3febc..000000000
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
+++ /dev/null
@@ -1,350 +0,0 @@
-local info = {
- version = 1.002,
- comment = "scintilla lpeg lexer for xml",
- author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
- copyright = "PRAGMA ADE / ConTeXt Development Team",
- license = "see context related readme files",
-}
-
--- adapted from the regular context pretty printer code (after all, lexing
--- boils down to much of the same and there are only so many ways to do
--- things). Simplified a bit as we have a different nesting model.
-
--- todo: parse entities in attributes
-
-local global, string, table, lpeg = _G, string, table, lpeg
-local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
-local type = type
-local match, find = string.match, string.find
-
-local lexer = require("scite-context-lexer")
-local context = lexer.context
-local patterns = context.patterns
-
-local token = lexer.token
-local exact_match = lexer.exact_match
-
-local xmllexer = lexer.new("xml","scite-context-lexer-xml")
-local whitespace = xmllexer.whitespace
-
-local xmlcommentlexer = lexer.load("scite-context-lexer-xml-comment")
-local xmlcdatalexer = lexer.load("scite-context-lexer-xml-cdata")
-local xmlscriptlexer = lexer.load("scite-context-lexer-xml-script")
-local lualexer = lexer.load("scite-context-lexer-lua")
-
-local space = patterns.space
-local any = patterns.any
-
-local dquote = P('"')
-local squote = P("'")
-local colon = P(":")
-local semicolon = P(";")
-local equal = P("=")
-local ampersand = P("&")
-
-local name = (R("az","AZ","09") + S("_-."))^1
-local openbegin = P("<")
-local openend = P("</")
-local closebegin = P("/>") + P(">")
-local closeend = P(">")
-local opencomment = P("<!--")
-local closecomment = P("-->")
-local openinstruction = P("<?")
-local closeinstruction = P("?>")
-local opencdata = P("<![CDATA[")
-local closecdata = P("]]>")
-local opendoctype = P("<!DOCTYPE") -- could grab the whole doctype
-local closedoctype = P("]>") + P(">")
-local openscript = openbegin * (P("script") + P("SCRIPT")) * (1-closeend)^0 * closeend -- begin
-local closescript = openend * (P("script") + P("SCRIPT")) * closeend
-
-local openlua = "<?lua"
-local closelua = "?>"
-
--- <!DOCTYPE Something PUBLIC "... ..." "..." [ ... ] >
--- <!DOCTYPE Something PUBLIC "... ..." "..." >
--- <!DOCTYPE Something SYSTEM "... ..." [ ... ] >
--- <!DOCTYPE Something SYSTEM "... ..." >
--- <!DOCTYPE Something [ ... ] >
--- <!DOCTYPE Something >
-
-local entity = ampersand * (1-semicolon)^1 * semicolon
-
-local utfchar = context.utfchar
-local wordtoken = context.patterns.wordtoken
-local iwordtoken = context.patterns.iwordtoken
-local wordpattern = context.patterns.wordpattern
-local iwordpattern = context.patterns.iwordpattern
-local invisibles = context.patterns.invisibles
-local checkedword = context.checkedword
-local styleofword = context.styleofword
-local setwordlist = context.setwordlist
-local validwords = false
-local validminimum = 3
-
--- <?xml version="1.0" encoding="UTF-8" language="uk" ?>
---
--- <?context-directive editor language us ?>
-
-local t_preamble = Cmt(P("<?xml "), function(input,i,_) -- todo: utf bomb, no longer #
- if i < 200 then
- validwords, validminimum = false, 3
- local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-directive%s+editor%s+language%s+(..)%s+%?>")
- -- if not language then
- -- language = match(input,"^<%?xml[^>]*language=[\"\'](..)[\"\'][^>]*%?>",i)
- -- end
- if language then
- validwords, validminimum = setwordlist(language)
- end
- end
- return false
-end)
-
-local t_word =
--- Ct( iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() ) -- the function can be inlined
- iwordpattern / function(s) return styleofword(validwords,validminimum,s) end * Cp() -- the function can be inlined
-
-local t_rest =
- token("default", any)
-
-local t_text =
- token("default", (1-S("<>&")-space)^1)
-
-local t_spacing =
- token(whitespace, space^1)
-
-local t_optionalwhitespace =
- token("default", space^1)^0
-
-local t_localspacing =
- token("default", space^1)
-
--- Because we want a differently colored open and close we need an embedded lexer (whitespace
--- trigger). What is actually needed is that scintilla applies the current whitespace style.
--- Even using different style keys is not robust as they can be shared. I'll fix the main
--- lexer code.
-
-local t_sstring =
- token("quote",dquote)
- * token("string",(1-dquote)^0) -- different from context
- * token("quote",dquote)
-
-local t_dstring =
- token("quote",squote)
- * token("string",(1-squote)^0) -- different from context
- * token("quote",squote)
-
--- local t_comment =
--- token("command",opencomment)
--- * token("comment",(1-closecomment)^0) -- different from context
--- * token("command",closecomment)
-
--- local t_cdata =
--- token("command",opencdata)
--- * token("comment",(1-closecdata)^0) -- different from context
--- * token("command",closecdata)
-
--- maybe cdata just text (then we don't need the extra lexer as we only have one comment then)
-
--- <!DOCTYPE Something PUBLIC "... ..." "..." [ ... ] >
--- <!DOCTYPE Something PUBLIC "... ..." "..." >
--- <!DOCTYPE Something SYSTEM "... ..." [ ... ] >
--- <!DOCTYPE Something SYSTEM "... ..." >
--- <!DOCTYPE Something [ ... ] >
--- <!DOCTYPE Something >
-
--- <!ENTITY xxxx SYSTEM "yyyy" NDATA zzzz>
--- <!ENTITY xxxx PUBLIC "yyyy" >
--- <!ENTITY xxxx "yyyy" >
-
-local t_docstr = t_dstring + t_sstring
-
-local t_docent = token("command",P("<!ENTITY"))
- * t_optionalwhitespace
- * token("keyword",name)
- * t_optionalwhitespace
- * (
- (
- token("constant",P("SYSTEM"))
- * t_optionalwhitespace
- * t_docstr
- * t_optionalwhitespace
- * token("constant",P("NDATA"))
- * t_optionalwhitespace
- * token("keyword",name)
- ) + (
- token("constant",P("PUBLIC"))
- * t_optionalwhitespace
- * t_docstr
- ) + (
- t_docstr
- )
- )
- * t_optionalwhitespace
- * token("command",P(">"))
-
-local t_docele = token("command",P("<!ELEMENT"))
- * t_optionalwhitespace
- * token("keyword",name)
- * t_optionalwhitespace
- * token("command",P("("))
- * (
- t_localspacing
- + token("constant",P("#CDATA") + P("#PCDATA") + P("ANY"))
- + token("text",P(","))
- + token("comment",(1-S(",)"))^1)
- )^1
- * token("command",P(")"))
- * t_optionalwhitespace
- * token("command",P(">"))
-
-local t_docset = token("command",P("["))
- * t_optionalwhitespace
- * ((t_optionalwhitespace * (t_docent + t_docele))^1 + token("comment",(1-P("]"))^0))
- * t_optionalwhitespace
- * token("command",P("]"))
-
-local t_doctype = token("command",P("<!DOCTYPE"))
- * t_optionalwhitespace
- * token("keyword",name)
- * t_optionalwhitespace
- * (
- (
- token("constant",P("PUBLIC"))
- * t_optionalwhitespace
- * t_docstr
- * t_optionalwhitespace
- * t_docstr
- * t_optionalwhitespace
- ) + (
- token("constant",P("SYSTEM"))
- * t_optionalwhitespace
- * t_docstr
- * t_optionalwhitespace
- )
- )^-1
- * t_docset^-1
- * t_optionalwhitespace
- * token("command",P(">"))
-
-lexer.embed_lexer(xmllexer, lualexer, token("command", openlua), token("command", closelua))
-lexer.embed_lexer(xmllexer, xmlcommentlexer, token("command", opencomment), token("command", closecomment))
-lexer.embed_lexer(xmllexer, xmlcdatalexer, token("command", opencdata), token("command", closecdata))
-lexer.embed_lexer(xmllexer, xmlscriptlexer, token("command", openscript), token("command", closescript))
-
--- local t_name =
--- token("plain",name)
--- * (
--- token("default",colon)
--- * token("keyword",name)
--- )
--- + token("keyword",name)
-
-local t_name = -- more robust
- token("plain",name * colon)^-1
- * token("keyword",name)
-
--- local t_key =
--- token("plain",name)
--- * (
--- token("default",colon)
--- * token("constant",name)
--- )
--- + token("constant",name)
-
-local t_key =
- token("plain",name * colon)^-1
- * token("constant",name)
-
-local t_attributes = (
- t_optionalwhitespace
- * t_key
- * t_optionalwhitespace
- * token("plain",equal)
- * t_optionalwhitespace
- * (t_dstring + t_sstring)
- * t_optionalwhitespace
-)^0
-
-local t_open =
- token("keyword",openbegin)
- * (
- t_name
- * t_optionalwhitespace
- * t_attributes
- * token("keyword",closebegin)
- +
- token("error",(1-closebegin)^1)
- )
-
-local t_close =
- token("keyword",openend)
- * (
- t_name
- * t_optionalwhitespace
- * token("keyword",closeend)
- +
- token("error",(1-closeend)^1)
- )
-
-local t_entity =
- token("constant",entity)
-
-local t_instruction =
- token("command",openinstruction * P("xml"))
- * t_optionalwhitespace
- * t_attributes
- * t_optionalwhitespace
- * token("command",closeinstruction)
- + token("command",openinstruction * name)
- * token("default",(1-closeinstruction)^1)
- * token("command",closeinstruction)
-
-local t_invisible =
- token("invisible",invisibles^1)
-
--- local t_preamble =
--- token("preamble", t_preamble )
-
-xmllexer._rules = {
- { "whitespace", t_spacing },
- { "preamble", t_preamble },
- { "word", t_word },
- -- { "text", t_text },
- -- { "comment", t_comment },
- -- { "cdata", t_cdata },
- { "doctype", t_doctype },
- { "instruction", t_instruction },
- { "close", t_close },
- { "open", t_open },
- { "entity", t_entity },
- { "invisible", t_invisible },
- { "rest", t_rest },
-}
-
-xmllexer._tokenstyles = context.styleset
-
-xmllexer._foldpattern = P("</") + P("<") + P("/>") -- separate entry else interference
-+ P("<!--") + P("-->")
-
-xmllexer._foldsymbols = {
- _patterns = {
- "</",
- "/>",
- "<",
- },
- ["keyword"] = {
- ["</"] = -1,
- ["/>"] = -1,
- ["<"] = 1,
- },
- ["command"] = {
- ["</"] = -1,
- ["/>"] = -1,
- ["<!--"] = 1,
- ["-->"] = -1,
- ["<"] = 1,
- },
-}
-
-return xmllexer