beta 2011.09.18 22:35

author: Hans Hagen <pragma@wxs.nl> 2011-09-18 22:35:00 +0200
committer: Hans Hagen <pragma@wxs.nl> 2011-09-18 22:35:00 +0200
commit: e5cc3bef1e068851dd40872872f74e72c1737280 (patch)
tree: 028f95a0e8359b6177b688abb2b593ff1d757043 /context
parent: 92dab0a2466fab1646a9e7b9e3266877a5a45f57 (diff)
download: context-e5cc3bef1e068851dd40872872f74e72c1737280.tar.gz
11 files changed, 383 insertions, 63 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer-lua.lua b/context/data/scite/lexers/scite-context-lexer-lua.lua
index 49799a978..62577d4a9 100644
--- a/context/data/scite/lexers/scite-context-lexer-lua.lua
+++ b/context/data/scite/lexers/scite-context-lexer-lua.lua
@@ -12,6 +12,8 @@ local P, R, S, C, Cg, Cb, Cs, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cg, lpe
 local match, find = string.match, string.find
 local global = _G
 
+-- beware: all multiline is messy, so even if it's no lexer, it should be an embedded lexer
+
 module(...)
 
 local cldlexer = _M
diff --git a/context/data/scite/lexers/scite-context-lexer-mps.lua b/context/data/scite/lexers/scite-context-lexer-mps.lua
index 2d8cc3a70..afde63bcc 100644
--- a/context/data/scite/lexers/scite-context-lexer-mps.lua
+++ b/context/data/scite/lexers/scite-context-lexer-mps.lua
@@ -15,7 +15,6 @@ local type, next, pcall, loadfile = type, next, pcall, loadfile
 module(...)
 
 local metafunlexer = _M
-local basepath     = lexer.context and lexer.context.path or _LEXERHOME
 
 local metafunhelpers    = { }
 local metafunconstants  = { }
diff --git a/context/data/scite/lexers/scite-context-lexer-tex.lua b/context/data/scite/lexers/scite-context-lexer-tex.lua
index 340c3f75e..0866b35b4 100644
--- a/context/data/scite/lexers/scite-context-lexer-tex.lua
+++ b/context/data/scite/lexers/scite-context-lexer-tex.lua
@@ -45,8 +45,6 @@ local contextlexer = _M
 local cldlexer     = lexer.load('scite-context-lexer-cld')
 local mpslexer     = lexer.load('scite-context-lexer-mps')
 
-local basepath     = lexer.context and lexer.context.path or _LEXERHOME
-
 local commands   = { en = { } }
 local primitives = { }
 local helpers    = { }
@@ -111,9 +109,14 @@ local knowncommand = Cmt(cstoken^1, function(_,i,s)
     return currentcommands[s] and i
 end)
 
-local validwords = false
+local wordpattern = lexer.context.wordpattern
+local checkedword = lexer.context.checkedword
+local setwordlist = lexer.context.setwordlist
+local validwords  = false
+
+-- % language=uk
 
-local knownpreamble = Cmt(P("% "), function(input,i,_)
+local knownpreamble = Cmt(#P("% "), function(input,i,_) -- todo : utfbomb
     if i < 10 then
         validwords = false
         local s, e, word = find(input,'^(.+)[\n\r]',i) -- combine with match
@@ -123,7 +126,7 @@ local knownpreamble = Cmt(P("% "), function(input,i,_)
                 currentcommands  = commands[interface] or commands.en or { }
             end
             local language = match(word,"language=(..)")
-            validwords = language and lexer.context.setwordlist(language)
+            validwords = language and setwordlist(language)
         end
     end
     return false
@@ -195,26 +198,34 @@ local p_text                 = cstoken^1 --maybe add punctuation and space
 
 -- no looking back           = #(1-S("[=")) * cstoken^3 * #(1-S("=]"))
 
-local p_word                 = Cmt(cstoken^3, function(_,i,s)
-    if not validwords then
-        return true, { "text", i }
+-- local p_word                 = Cmt(wordpattern, function(_,i,s)
+--     if not validwords then
+--         return true, { "text", i }
+--     else
+--         -- keys are lower
+--         local word = validwords[s]
+--         if word == s then
+--             return true, { "okay", i } -- exact match
+--         elseif word then
+--             return true, { "warning", i } -- case issue
+--         else
+--             local word = validwords[lower(s)]
+--             if word == s then
+--                 return true, { "okay", i } -- exact match
+--             elseif word then
+--                 return true, { "warning", i } -- case issue
+--             else
+--                 return true, { "error", i }
+--             end
+--         end
+--     end
+-- end)
+
+local p_word = Cmt(wordpattern, function(_,i,s)
+    if validwords then
+        return checkedword(validwords,s,i)
     else
-        -- keys are lower
-        local word = validwords[s]
-        if word == s then
-            return true, { "okay", i } -- exact match
-        elseif word then
-            return true, { "warning", i } -- case issue
-        else
-            local word = validwords[lower(s)]
-            if word == s then
-                return true, { "okay", i } -- exact match
-            elseif word then
-                return true, { "warning", i } -- case issue
-            else
-                return true, { "error", i }
-            end
-        end
+        return true, { "text", i }
     end
 end)
 
@@ -274,7 +285,7 @@ local csname                 = token('user',      p_csname     )
 local grouping               = token('grouping',  p_grouping   )
 local special                = token('special',   p_special    )
 local extra                  = token('extra',     p_extra      )
-local text                   = token('default',   p_text       )
+----- text                   = token('default',   p_text       )
 ----- word                   = token("okay",      p_word       )
 local word                   = p_word
 
diff --git a/context/data/scite/lexers/scite-context-lexer-xml-cdata.lua b/context/data/scite/lexers/scite-context-lexer-xml-cdata.lua
new file mode 100644
index 000000000..71826099c
--- /dev/null
+++ b/context/data/scite/lexers/scite-context-lexer-xml-cdata.lua
@@ -0,0 +1,22 @@
+local lexer = lexer
+local token = lexer.token
+local P = lpeg.P
+
+module(...)
+
+local commentlexer = _M
+
+local whitespace = commentlexer.WHITESPACE -- triggers states
+
+local space      = lexer.space
+local nospace    = 1 - space - P("]]>")
+
+local p_spaces   = token(whitespace, space  ^1)
+local p_cdata    = token("comment",  nospace^1)
+
+_rules = {
+    { "whitespace", p_spaces },
+    { "cdata",      p_cdata  },
+}
+
+_tokenstyles = lexer.context.styleset
diff --git a/context/data/scite/lexers/scite-context-lexer-xml-comment.lua b/context/data/scite/lexers/scite-context-lexer-xml-comment.lua
new file mode 100644
index 000000000..2d9ce66bd
--- /dev/null
+++ b/context/data/scite/lexers/scite-context-lexer-xml-comment.lua
@@ -0,0 +1,31 @@
+local lexer = lexer
+local token = lexer.token
+local P = lpeg.P
+
+module(...)
+
+local commentlexer = _M
+
+local whitespace = commentlexer.WHITESPACE -- triggers states
+
+local space      = lexer.space
+local nospace    = 1 - space - P("-->")
+
+local p_spaces   = token(whitespace, space  ^1)
+local p_comment  = token("comment",  nospace^1)
+
+_rules = {
+    { "whitespace", p_spaces  },
+    { "comment",    p_comment },
+}
+
+_tokenstyles = lexer.context.styleset
+
+_foldsymbols = {
+    _patterns = {
+        "<%!%-%-", "%-%->", -- comments
+    },
+    ["comment"] = {
+        ["<!--"] = 1, ["-->" ] = -1,
+    }
+}
diff --git a/context/data/scite/lexers/scite-context-lexer-xml.lua b/context/data/scite/lexers/scite-context-lexer-xml.lua
new file mode 100644
index 000000000..0441585c1
--- /dev/null
+++ b/context/data/scite/lexers/scite-context-lexer-xml.lua
@@ -0,0 +1,202 @@
+local info = {
+    version   = 1.002,
+    comment   = "scintilla lpeg lexer for metafun",
+    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+    copyright = "PRAGMA ADE / ConTeXt Development Team",
+    license   = "see context related readme files",
+}
+
+-- adapted from the regular context pretty printer code (after all, lexing
+-- boils down to much of the same and there are only so many ways to do
+-- things). Simplified a bit as we have a different nesting model.
+
+-- todo: parse entities in attributes
+
+local lexer = lexer
+local global, string, table, lpeg = _G, string, table, lpeg
+local token, style, colors, exact_match, no_style = lexer.token, lexer.style, lexer.colors, lexer.exact_match, lexer.style_nothing
+local P, R, S, V, C, Cmt = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, lpeg.Cmt
+local type, setmetatable = type, setmetatable
+local match, find = string.match, string.find
+
+module(...)
+
+local examplelexer     = _M
+
+local whitespace       = examplelexer.WHITESPACE -- triggers states
+
+local space            = lexer.space -- S(" \t\n\r\v\f")
+local any              = lexer.any -- P(1)
+
+local dquote           = P('"')
+local squote           = P("'")
+local colon            = P(":")
+local semicolon        = P(";")
+local equal            = P("=")
+local ampersand        = P("&")
+
+local name             = (R("az","AZ","09") + S('_-.'))^1
+local openbegin        = P("<")
+local openend          = P("</")
+local closebegin       = P("/>") + P(">")
+local closeend         = P(">")
+local opencomment      = P("<!--")
+local closecomment     = P("-->")
+local openinstruction  = P("<?")
+local closeinstruction = P("?>")
+local opencdata        = P("<![CDATA[")
+local closecdata       = P("]]>")
+
+local entity           = ampersand * (1-semicolon)^1 * semicolon
+
+local wordpattern = lexer.context.wordpattern
+local checkedword = lexer.context.checkedword
+local setwordlist = lexer.context.setwordlist
+local validwords  = false
+
+-- <?xml version="1.0" encoding="UTF-8" language="uk" ?>
+--
+-- <?context-xml-directive editor language us ?>
+
+local p_preamble = Cmt(#P("<?xml "), function(input,i,_) -- todo: utf bomb
+    if i < 10 then
+        validwords = false
+        local language = match(input,"^<%?xml[^>]*%?>%s*<%?context%-xml%-directive%s+editor%s+language%s+(..)%s+%?>")
+        if not language then
+            language = match(input,'^<%?xml[^>]*language=[\"\'](..)[\"\'][^>]*%?>',i)
+        end
+        if language then
+            validwords = setwordlist(language)
+        end
+    end
+    return false
+end)
+
+
+local p_word =
+    Cmt(wordpattern, function(_,i,s)
+        if validwords then
+            return checkedword(validwords,s,i)
+        else
+            return true, { "text", i }
+        end
+    end)
+
+local p_rest =
+    token("default", any)
+
+local p_text =
+    token("default", (1-S("<>&")-space)^1)
+
+local p_spacing =
+    token(whitespace, space^1)
+
+local p_optionalwhitespace =
+    p_spacing^0
+
+local p_localspacing =
+    token("default", space^1)
+
+-- Because we want a differently colored open and close we need an embedded lexer (whitespace
+-- trigger). What is actually needed is that scintilla applies the current whitespace style.
+-- Even using different style keys is not robust as they can be shared. I'll fix the main
+-- lexer code.
+
+local p_sstring =
+    token("quote",dquote)
+  * token("string",(1-dquote)^0)        -- different from context
+  * token("quote",dquote)
+
+local p_dstring =
+    token("quote",squote)
+  * token("string",(1-squote)^0)        -- different from context
+  * token("quote",squote)
+
+-- local p_comment =
+--     token("command",opencomment)
+--   * token("comment",(1-closecomment)^0) -- different from context
+--   * token("command",closecomment)
+
+-- local p_cdata =
+--     token("command",opencdata)
+--   * token("comment",(1-closecdata)^0)   -- different from context
+--   * token("command",closecdata)
+
+local commentlexer = lexer.load("scite-context-lexer-xml-comment")
+local cdatalexer   = lexer.load("scite-context-lexer-xml-cdata")
+
+lexer.embed_lexer(examplelexer, commentlexer, token("command",opencomment), token("command",closecomment))
+lexer.embed_lexer(examplelexer, cdatalexer,   token("command",opencdata),   token("command",closecdata))
+
+-- maybe cdata just text (then we don't need the extra lexer as we only have one comment then)
+
+local p_name =
+    token("plain",name)
+  * (
+        token("default",colon)
+      * token("keyword",name)
+    )^1
+  + token("keyword",name)
+
+local p_key = p_name
+
+local p_attributes = (
+    p_optionalwhitespace
+  * p_key
+  * p_optionalwhitespace
+  * token("plain",equal)
+  * p_optionalwhitespace
+  * (p_dstring + p_sstring)
+  * p_optionalwhitespace
+)^0
+
+local p_open =
+    token("keyword",openbegin)
+  * p_name
+  * p_optionalwhitespace
+  * p_attributes
+  * token("keyword",closebegin)
+
+local p_close =
+    token("keyword",openend)
+  * p_name
+  * p_optionalwhitespace
+  * token("keyword",closeend)
+
+local p_entity =
+    token("constant",entity)
+
+local p_instruction =
+    token("command",openinstruction * P("xml"))
+  * p_optionalwhitespace
+  * p_attributes
+  * p_optionalwhitespace
+  * token("command",closeinstruction)
+  + token("command",openinstruction * name)
+  * token("default",(1-closeinstruction)^1)
+  * token("command",closeinstruction)
+
+_rules = {
+    { "whitespace",  p_spacing     },
+    { "preamble",    p_preamble    },
+    { "word",        p_word        },
+--  { "text",        p_text        },
+--  { "comment",     p_comment     },
+--  { "cdata",       p_cdata       },
+    { "instruction", p_instruction },
+    { "close",       p_close       },
+    { "open",        p_open        },
+    { "entity",      p_entity      },
+    { "rest",        p_rest        },
+}
+
+_tokenstyles = lexer.context.styleset
+
+_foldsymbols = { -- somehow doesn't work yet
+    _patterns = {
+        "[<>]",
+    },
+    ["keyword"] = {
+        ["<"] = 1, [">"] = -1,
+    },
+}
diff --git a/context/data/scite/lexers/scite-context-lexer.lua b/context/data/scite/lexers/scite-context-lexer.lua
index 20af5d68f..a2bb35a57 100644
--- a/context/data/scite/lexers/scite-context-lexer.lua
+++ b/context/data/scite/lexers/scite-context-lexer.lua
@@ -9,6 +9,11 @@ local info = {
 -- The fold and lex functions are copied and patched from original code by Mitchell (see
 -- lexer.lua). All errors are mine.
 --
+-- I'll probably make a whole copy and patch the other functions too as we need an extra
+-- nesting model.
+--
+-- Also needed: preamble scan once. Can be handled in caller below and _M.preamble.
+--
 -- For huge files folding can be pretty slow and I do have some large ones that I keep
 -- open all the time. Loading is normally no ussue, unless one has remembered the status
 -- and the cursor is at the last line of a 200K line file. Optimizing the fold function
@@ -24,6 +29,9 @@ local info = {
 -- an issue we can rewrite the main lex function (memorize the grammars and speed up the
 -- byline variant).
 
+-- Maybe it's safer to copy th eother methods here so that we have no dependencies, apart
+-- from the the library.
+
 local R, P, S, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg = lpeg.R, lpeg.P, lpeg.S, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg
 local lpegmatch = lpeg.match
 local find, gmatch, match, lower, upper, gsub = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub
@@ -158,6 +166,74 @@ function lexer.context.exact_match(words,word_chars,case_insensitive)
     end
 end
 
+
+-- spell checking (we can only load lua files)
+
+-- return {
+--     words = {
+--         ["someword"]    = "someword",
+--         ["anotherword"] = "Anotherword",
+--     },
+-- }
+
+local lists = { }
+
+local splitter = (Cf(Ct("") * (Cg(C((1-S(" \t\n\r"))^1 * Cc(true))) + P(1))^1,rawset) )^0
+local splitter = (Cf(Ct("") * (Cg(C(R("az","AZ","\127\255")^1) * Cc(true)) + P(1))^1,rawset) )^0
+
+local function splitwords(words)
+    return lpegmatch(splitter,words)
+end
+
+function lexer.context.setwordlist(tag,limit) -- returns hash (lowercase keys and original values)
+    if not tag or tag == "" then
+        return false
+    elseif lists[tag] ~= nil then
+        return lists[tag]
+    else
+        local list = lexer.context.loaddefinitions("spell-" .. tag)
+        if not list or type(list) ~= "table" then
+            lists[tag] = false
+            return nil
+        elseif type(list.words) == "string" then
+            list = splitwords(list.words)
+            lists[tag] = list
+            return list
+        else
+            list = list.words or false
+            lists[tag] = list
+            return list
+        end
+    end
+end
+
+lexer.context.wordpattern = R("az","AZ","\127\255")^3 -- todo: if limit and #s < limit then
+
+function lexer.context.checkedword(validwords,s,i) -- ,limit
+    if not validwords then
+        return true, { "text", i }
+    else
+        -- keys are lower
+        local word = validwords[s]
+        if word == s then
+            return true, { "okay", i } -- exact match
+        elseif word then
+            return true, { "warning", i } -- case issue
+        else
+            local word = validwords[lower(s)]
+            if word == s then
+                return true, { "okay", i } -- exact match
+            elseif word then
+                return true, { "warning", i } -- case issue
+            elseif upper(s) == s then
+                return true, { "warning", i } -- probably a logo or acronym
+            else
+                return true, { "error", i }
+            end
+        end
+    end
+end
+
 -- overloaded functions
 
 local FOLD_BASE         = SC_FOLDLEVELBASE
@@ -348,7 +424,7 @@ function lexer.context.lex(text,init_style)
         local noftokens = 0
         if true then
             for line in gmatch(text,'[^\r\n]*\r?\n?') do -- could be an lpeg
-                local line_tokens = lpeg_match(grammar,line)
+                local line_tokens = lpegmatch(grammar,line)
                 if line_tokens then
                     for i=1,#line_tokens do
                         local token = line_tokens[i]
@@ -366,7 +442,7 @@ function lexer.context.lex(text,init_style)
         else -- alternative
             local lasttoken, lastoffset
             for line in gmatch(text,'[^\r\n]*\r?\n?') do -- could be an lpeg
-                local line_tokens = lpeg_match(grammar,line)
+                local line_tokens = lpegmatch(grammar,line)
                 if line_tokens then
                     for i=1,#line_tokens do
                         lasttoken = line_tokens[i]
@@ -431,37 +507,3 @@ lexer.fold        = lexer.context.fold
 lexer.lex         = lexer.context.lex
 lexer.token       = lexer.context.token
 lexer.exact_match = lexer.context.exact_match
-
--- spell checking (we can only load lua files)
-
-local lists = { }
-
-local splitter = (Cf(Ct("") * (Cg(C((1-S(" \t\n\r"))^1 * Cc(true))) + P(1))^1,rawset) )^0
-local splitter = (Cf(Ct("") * (Cg(C(R("az","AZ","\127\255")^1) * Cc(true)) + P(1))^1,rawset) )^0
-
-local function splitwords(words)
-    return lpegmatch(splitter,words)
-end
-
-function lexer.context.setwordlist(tag)
-    if not tag or tag == "" then
-        return false
-    elseif lists[tag] ~= nil then
-        return lists[tag]
-    else
-        local list = collect("spell-" .. tag)
-        if not list or type(list) ~= "table" then
-            lists[tag] = false
-            return nil
-        elseif type(list.words) == "string" then
-            list = splitwords(list.words)
-            lists[tag] = list
-            return list
-        else
-            list = list.words or false
-            lists[tag] = list
-            return list
-        end
-    end
-end
-
diff --git a/context/data/scite/lexers/themes/scite-context-theme.lua b/context/data/scite/lexers/themes/scite-context-theme.lua
index 94f623cd8..ed1c5086b 100644
--- a/context/data/scite/lexers/themes/scite-context-theme.lua
+++ b/context/data/scite/lexers/themes/scite-context-theme.lua
@@ -13,6 +13,10 @@ local context_path = "t:/sources" -- c:/data/tex-context/tex/texmf-context/tex/b
 local font_name    = 'Dejavu Sans Mono'
 local font_size    = 14
 
+if not WIN32 then
+    font_name = '!' .. font_name
+end
+
 local global = _G
 
 -- dofile(_LEXERHOME .. '/themes/scite.lua') -- starting point so we miss nothing
diff --git a/context/data/scite/scite-context-external.properties b/context/data/scite/scite-context-external.properties
index 5cb0e8f98..14bdd222f 100644
--- a/context/data/scite/scite-context-external.properties
+++ b/context/data/scite/scite-context-external.properties
@@ -23,7 +23,7 @@ lexer.$(file.patterns.metafun)=lpeg_scite-context-lexer-mps
 lexer.$(file.patterns.context)=lpeg_scite-context-lexer-tex
 lexer.$(file.patterns.tex)=lpeg_scite-context-lexer-tex
 lexer.$(file.patterns.lua)=lpeg_scite-context-lexer-lua
-lexer.$(file.patterns.xml)=lpeg_xml
+lexer.$(file.patterns.example)=lpeg_scite-context-lexer-xml
 
 comment.block.lpeg_scite-context-lexer-tex=%
 comment.block.at.line.start.lpeg_scite-context-lexer-tex=1
@@ -37,6 +37,9 @@ comment.block.at.line.start.lpeg_scite-context-lexer-lua=1
 comment.block.lpeg_scite-context-lexer-cld=--
 comment.block.at.line.start.lpeg_scite-context-lexer-cld=1
 
+#~ comment.block.lpeg_scite-context-lexer-xml=<!--
+#~ comment.block.at.line.start.lpeg_scite-context-lexer-xml=1
+
 comment.block.lpeg_props=#
 comment.block.at.line.start.lpeg_props=1
 
diff --git a/context/data/scite/scite-context-readme.tex b/context/data/scite/scite-context-readme.tex
index 6f32c87bc..57a99502c 100644
--- a/context/data/scite/scite-context-readme.tex
+++ b/context/data/scite/scite-context-readme.tex
@@ -126,6 +126,8 @@
 
 {\em This is an updated but yet uncorrected version.}
 
+{\em Todo: look into using lpeg without special library (might be faster).}
+
 \SCITE\ is a source code editor written by Neil Hodgson. After
 playing with several editors we decided that this editor was quite
 configurable and extendible.
diff --git a/context/data/scite/scite-ctx.lua b/context/data/scite/scite-ctx.lua
index 72fcb967c..8db99693f 100644
--- a/context/data/scite/scite-ctx.lua
+++ b/context/data/scite/scite-ctx.lua
@@ -80,6 +80,8 @@ function traceln(str)
     io.flush()
 end
 
+-- traceln("LPEG " .. tostring(lpeg))
+
 function string.grab(str,delimiter)
     local list = { }
     for snippet in gmatch(str,delimiter) do
author	Hans Hagen <pragma@wxs.nl>	2011-09-18 22:35:00 +0200
committer	Hans Hagen <pragma@wxs.nl>	2011-09-18 22:35:00 +0200
commit	e5cc3bef1e068851dd40872872f74e72c1737280 (patch)
tree	028f95a0e8359b6177b688abb2b593ff1d757043 /context
parent	92dab0a2466fab1646a9e7b9e3266877a5a45f57 (diff)
download	context-e5cc3bef1e068851dd40872872f74e72c1737280.tar.gz