beta 2011.11.09 19:33

author: Marius <mariausol@gmail.com> 2011-11-09 20:40:13 +0200
committer: Marius <mariausol@gmail.com> 2011-11-09 20:40:13 +0200
commit: 39abfb3c7f6f445876494ecde0b5043e46ae151b (patch)
tree: a18355eacb92e3ac95dace9a5a9d841f05194629 /context/data/scite/lexers
parent: a73ec062b30d72c2ac967bd90c63fbc7913404c7 (diff)
download: context-39abfb3c7f6f445876494ecde0b5043e46ae151b.tar.gz
4 files changed, 159 insertions, 12 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer-tex.lua b/context/data/scite/lexers/scite-context-lexer-tex.lua
index c442fdc20..4fdf80869 100644
--- a/context/data/scite/lexers/scite-context-lexer-tex.lua
+++ b/context/data/scite/lexers/scite-context-lexer-tex.lua
@@ -111,11 +111,15 @@ local knowncommand = Cmt(cstoken^1, function(_,i,s)
     return currentcommands[s] and i
 end)
 
-local wordtoken   = context.patterns.wordtoken
-local wordpattern = context.patterns.wordpattern
-local checkedword = context.checkedword
-local setwordlist = context.setwordlist
-local validwords  = false
+local utfchar      = context.utfchar
+local wordtoken    = context.patterns.wordtoken
+local iwordtoken   = context.patterns.iwordtoken
+local wordpattern  = context.patterns.wordpattern
+local iwordpattern = context.patterns.iwordpattern
+local invisibles   = context.patterns.invisibles
+local checkedword  = context.checkedword
+local setwordlist  = context.setwordlist
+local validwords   = false
 
 -- % language=uk
 
@@ -199,7 +203,7 @@ local p_csname               = backslash * (cstoken^1 + P(1))
 local p_grouping             = S("{$}")
 local p_special              = S("#()[]<>=\"")
 local p_extra                = S("`~%^&_-+/\'|")
-local p_text                 = wordtoken^1 --maybe add punctuation and space
+local p_text                 = iwordtoken^1 --maybe add punctuation and space
 
 local p_number               = context.patterns.real
 local p_unit                 = P("pt") + P("bp") + P("sp") + P("mm") + P("cm") + P("cc") + P("dd")
@@ -229,7 +233,7 @@ local p_unit                 = P("pt") + P("bp") + P("sp") + P("mm") + P("cm") +
 --     end
 -- end)
 
-local p_word = Cmt(wordpattern, function(_,i,s)
+local p_word = Cmt(iwordpattern, function(_,i,s)
     if validwords then
         return checkedword(validwords,s,i)
     else
@@ -279,6 +283,8 @@ elseif option == 2 then
 
 end
 
+local p_invisible = invisibles^1
+
 local spacing                = token(whitespace,  p_spacing    )
 
 local rest                   = token('default',   p_rest       )
@@ -295,6 +301,7 @@ local number                 = token('number',    p_number     )
                              * token('constant',  p_unit       )
 local special                = token('special',   p_special    )
 local extra                  = token('extra',     p_extra      )
+local invisible              = token('invisible', p_invisible  )
 local text                   = token('default',   p_text       )
 local word                   = p_word
 
@@ -423,6 +430,7 @@ _rules = {
  -- { "number",      number      },
     { "special",     special     },
     { "extra",       extra       },
+    { "invisible",   invisible   },
     { "rest",        rest        },
 }
 
diff --git a/context/data/scite/lexers/scite-context-lexer-xml.lua b/context/data/scite/lexers/scite-context-lexer-xml.lua
index 614eb0c07..33eeaa210 100644
--- a/context/data/scite/lexers/scite-context-lexer-xml.lua
+++ b/context/data/scite/lexers/scite-context-lexer-xml.lua
@@ -51,10 +51,12 @@ local closecdata       = P("]]>")
 
 local entity           = ampersand * (1-semicolon)^1 * semicolon
 
-local wordpattern = context.patterns.wordpattern
-local checkedword = context.patterns.checkedword
-local setwordlist = context.setwordlist
-local validwords  = false
+local wordpattern  = context.patterns.iwordpattern
+local iwordpattern = context.patterns.wordpattern
+local checkedword  = context.patterns.checkedword
+local setwordlist  = context.setwordlist
+local invisibles   = context.patterns.invisibles
+local validwords   = false
 
 -- <?xml version="1.0" encoding="UTF-8" language="uk" ?>
 --
@@ -75,7 +77,7 @@ local p_preamble = Cmt(#P("<?xml "), function(input,i,_) -- todo: utf bomb
 end)
 
 local p_word =
-    Cmt(wordpattern, function(_,i,s)
+    Cmt(iwordpattern, function(_,i,s)
         if validwords then
             return checkedword(validwords,s,i)
         else
@@ -185,6 +187,9 @@ local p_instruction =
   * token("default",(1-closeinstruction)^1)
   * token("command",closeinstruction)
 
+local p_invisible =
+    token("invisible",invisibles^1)
+
 _rules = {
     { "whitespace",  p_spacing     },
     { "preamble",    p_preamble    },
@@ -196,6 +201,7 @@ _rules = {
     { "close",       p_close       },
     { "open",        p_open        },
     { "entity",      p_entity      },
+    { "invisible",   p_invisible   },
     { "rest",        p_rest        },
 }
 
diff --git a/context/data/scite/lexers/scite-context-lexer.lua b/context/data/scite/lexers/scite-context-lexer.lua
index 9ba25884b..f1c9bc9bd 100644
--- a/context/data/scite/lexers/scite-context-lexer.lua
+++ b/context/data/scite/lexers/scite-context-lexer.lua
@@ -574,3 +574,134 @@ lexer.fold        = context.fold
 lexer.lex         = context.lex
 lexer.token       = context.token
 lexer.exact_match = context.exact_match
+
+-- helper .. alas ... the lexer's lua instance is rather crippled .. not even
+-- math is part of it
+
+local floor = math and math.floor
+local char  = string.char
+
+if not floor then
+
+    floor = function(n)
+        return tonumber(string.format("%d",n))
+    end
+
+    math = math or { }
+
+    math.floor = floor
+
+end
+
+local function utfchar(n)
+    if n < 0x80 then
+        return char(n)
+    elseif n < 0x800 then
+        return char(
+            0xC0 + floor(n/0x40),
+            0x80 + (n % 0x40)
+        )
+    elseif n < 0x10000 then
+        return char(
+            0xE0 + floor(n/0x1000),
+            0x80 + (floor(n/0x40) % 0x40),
+            0x80 + (n % 0x40)
+        )
+    elseif n < 0x40000 then
+        return char(
+            0xF0 + floor(n/0x40000),
+            0x80 + floor(n/0x1000),
+            0x80 + (floor(n/0x40) % 0x40),
+            0x80 + (n % 0x40)
+        )
+    else
+     -- return char(
+     --     0xF1 + floor(n/0x1000000),
+     --     0x80 + floor(n/0x40000),
+     --     0x80 + floor(n/0x1000),
+     --     0x80 + (floor(n/0x40) % 0x40),
+     --     0x80 + (n % 0x40)
+     -- )
+        return "?"
+    end
+end
+
+context.utfchar = utfchar
+
+-- a helper from l-lpeg:
+
+local gmatch = string.gmatch
+
+local function make(t)
+    local p
+    for k, v in next, t do
+        if not p then
+            if next(v) then
+                p = P(k) * make(v)
+            else
+                p = P(k)
+            end
+        else
+            if next(v) then
+                p = p + P(k) * make(v)
+            else
+                p = p + P(k)
+            end
+        end
+    end
+    return p
+end
+
+function lpeg.utfchartabletopattern(list)
+    local tree = { }
+    for i=1,#list do
+        local t = tree
+        for c in gmatch(list[i],".") do
+            if not t[c] then
+                t[c] = { }
+            end
+            t = t[c]
+        end
+    end
+    return make(tree)
+end
+
+-- patterns.invisibles =
+--     P(utfchar(0x00A0)) -- nbsp
+--   + P(utfchar(0x2000)) -- enquad
+--   + P(utfchar(0x2001)) -- emquad
+--   + P(utfchar(0x2002)) -- enspace
+--   + P(utfchar(0x2003)) -- emspace
+--   + P(utfchar(0x2004)) -- threeperemspace
+--   + P(utfchar(0x2005)) -- fourperemspace
+--   + P(utfchar(0x2006)) -- sixperemspace
+--   + P(utfchar(0x2007)) -- figurespace
+--   + P(utfchar(0x2008)) -- punctuationspace
+--   + P(utfchar(0x2009)) -- breakablethinspace
+--   + P(utfchar(0x200A)) -- hairspace
+--   + P(utfchar(0x200B)) -- zerowidthspace
+--   + P(utfchar(0x202F)) -- narrownobreakspace
+--   + P(utfchar(0x205F)) -- math thinspace
+
+patterns.invisibles = lpeg.utfchartabletopattern {
+    utfchar(0x00A0), -- nbsp
+    utfchar(0x2000), -- enquad
+    utfchar(0x2001), -- emquad
+    utfchar(0x2002), -- enspace
+    utfchar(0x2003), -- emspace
+    utfchar(0x2004), -- threeperemspace
+    utfchar(0x2005), -- fourperemspace
+    utfchar(0x2006), -- sixperemspace
+    utfchar(0x2007), -- figurespace
+    utfchar(0x2008), -- punctuationspace
+    utfchar(0x2009), -- breakablethinspace
+    utfchar(0x200A), -- hairspace
+    utfchar(0x200B), -- zerowidthspace
+    utfchar(0x202F), -- narrownobreakspace
+    utfchar(0x205F), -- math thinspace
+}
+
+-- now we can make:
+
+patterns.iwordtoken   = patterns.wordtoken - patterns.invisibles
+patterns.iwordpattern = patterns.iwordtoken^3
diff --git a/context/data/scite/lexers/themes/scite-context-theme.lua b/context/data/scite/lexers/themes/scite-context-theme.lua
index ed1c5086b..f8554971d 100644
--- a/context/data/scite/lexers/themes/scite-context-theme.lua
+++ b/context/data/scite/lexers/themes/scite-context-theme.lua
@@ -77,6 +77,7 @@ style_definition         = style { fore = colors.black, bold = true }
 style_okay               = style { fore = colors.dark }
 style_error              = style { fore = colors.red }
 style_warning            = style { fore = colors.orange }
+style_invisible          = style { back = colors.orange }
 style_function           = style { fore = colors.black, bold = true }
 style_operator           = style { fore = colors.blue }
 style_preproc            = style { fore = colors.yellow, bold = true }
@@ -118,6 +119,7 @@ lexer.context.styles = {
 
     ["okay"]       = style_okay,
     ["warning"]    = style_warning,
+    ["invisible"]  = style_invisible,
     ["error"]      = style_error,
 
 }
author	Marius <mariausol@gmail.com>	2011-11-09 20:40:13 +0200
committer	Marius <mariausol@gmail.com>	2011-11-09 20:40:13 +0200
commit	39abfb3c7f6f445876494ecde0b5043e46ae151b (patch)
tree	a18355eacb92e3ac95dace9a5a9d841f05194629 /context/data/scite/lexers
parent	a73ec062b30d72c2ac967bd90c63fbc7913404c7 (diff)
download	context-39abfb3c7f6f445876494ecde0b5043e46ae151b.tar.gz