summaryrefslogtreecommitdiff
path: root/context/data/textadept/context/lexers
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/textadept/context/lexers')
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua25
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-bidi.lua598
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-cld.lua11
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua11
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-cpp.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-dummy.lua5
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-lua.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-mps.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua1
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-pdf.lua5
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-sql.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua11
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-tex.lua31
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-txt.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-web.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua1
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer-xml.lua3
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer.lua458
23 files changed, 942 insertions, 251 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua
index dce24a2b9..b53da82ea 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua
@@ -10,23 +10,22 @@ local global, string, table, lpeg = _G, string, table, lpeg
local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
local type = type
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
-local context = lexer.context
-local patterns = context.patterns
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
-local token = lexer.token
-local exact_match = lexer.exact_match
+local token = lexer.token
+local exact_match = lexer.exact_match
-local bibtexlexer = lexer.new("bib","scite-context-lexer-bibtex")
-local whitespace = bibtexlexer.whitespace
+local bibtexlexer = lexer.new("bib","scite-context-lexer-bibtex")
+local whitespace = bibtexlexer.whitespace
- local escape, left, right = P("\\"), P('{'), P('}')
+local escape, left, right = P("\\"), P('{'), P('}')
- patterns.balanced = P {
- [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
- [2] = left * V(1) * right
- }
+patterns.balanced = P {
+ [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0,
+ [2] = left * V(1) * right
+}
-- taken from bibl-bib.lua
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua
new file mode 100644
index 000000000..ea9c56712
--- /dev/null
+++ b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua
@@ -0,0 +1,598 @@
+local info = {
+ version = 1.002,
+ comment = "scintilla lpeg lexer for plain text (with spell checking)",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp
+local find, match = string.find, string.match
+
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
+
+local token = lexer.token
+
+local bidilexer = lexer.new("bidi","scite-context-lexer-bidi")
+local whitespace = bidilexer.whitespace
+
+local space = patterns.space
+local any = patterns.any
+
+-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+
+require("char-def")
+
+characters.directions = { }
+
+setmetatable(characters.directions,{ __index = function(t,k)
+ local d = data[k]
+ if d then
+ local v = d.direction
+ if v then
+ t[k] = v
+ return v
+ end
+ end
+ t[k] = false -- maybe 'l'
+ return false
+end })
+
+characters.mirrors = { }
+
+setmetatable(characters.mirrors,{ __index = function(t,k)
+ local d = data[k]
+ if d then
+ local v = d.mirror
+ if v then
+ t[k] = v
+ return v
+ end
+ end
+ t[k] = false
+ return false
+end })
+
+characters.textclasses = { }
+
+setmetatable(characters.textclasses,{ __index = function(t,k)
+ local d = data[k]
+ if d then
+ local v = d.textclass
+ if v then
+ t[k] = v
+ return v
+ end
+ end
+ t[k] = false
+ return false
+end })
+
+local directiondata = characters.directions
+local mirrordata = characters.mirrors
+local textclassdata = characters.textclasses
+
+local maximum_stack = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much
+local analyze_fences = false
+
+local whitespace = {
+ lre = true,
+ rle = true,
+ lro = true,
+ rlo = true,
+ pdf = true,
+ bn = true,
+ ws = true,
+}
+
+local b_s_ws_on = {
+ b = true,
+ s = true,
+ ws = true,
+ on = true
+}
+
+local mt_space = { __index = { char = 0x0020, direction = "ws", original = "ws", level = 0 } }
+local mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } }
+local mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } }
+local mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } }
+local mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0 } }
+
+local list = { }
+local stack = { }
+
+setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end })
+
+local function build_list(head)
+ -- P1
+ local size = 0
+ lpegmatch(pattern,head)
+ return list, size
+end
+
+local function resolve_fences(list,size,start,limit)
+ -- N0: funny effects, not always better, so it's an option
+ local nofstack = 0
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "on" then
+ local char = entry.char
+ local mirror = mirrordata[char]
+ if mirror then
+ local class = textclassdata[char]
+ entry.mirror = mirror
+ entry.class = class
+ if class == "open" then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = mirror
+ stacktop[2] = i
+ stacktop[3] = false -- not used
+ elseif nofstack == 0 then
+ -- skip
+ elseif class == "close" then
+ while nofstack > 0 do
+ local stacktop = stack[nofstack]
+ if stacktop[1] == char then
+ local open = stacktop[2]
+ local close = i
+ list[open ].paired = close
+ list[close].paired = open
+ break
+ else
+ -- do we mirror or not
+ end
+ nofstack = nofstack - 1
+ end
+ end
+ end
+ end
+ end
+end
+
+local function get_baselevel(list,size,direction)
+ if direction == "TRT" then
+ return 1, "TRT", true
+ elseif direction == "TLT" then
+ return 0, "TLT", true
+ end
+ -- P2, P3:
+ for i=1,size do
+ local entry = list[i]
+ local direction = entry.direction
+ if direction == "r" or direction == "al" then -- and an ?
+ return 1, "TRT", true
+ elseif direction == "l" then
+ return 0, "TLT", true
+ end
+ end
+ return 0, "TLT", false
+end
+
+local function resolve_explicit(list,size,baselevel)
+-- if list.rle or list.lre or list.rlo or list.lro then
+ -- X1
+ local level = baselevel
+ local override = "on"
+ local nofstack = 0
+ for i=1,size do
+ local entry = list[i]
+ local direction = entry.direction
+ -- X2
+ if direction == "rle" then
+ if nofstack < maximum_stack then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = level
+ stacktop[2] = override
+ level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
+ override = "on"
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X3
+ elseif direction == "lre" then
+ if nofstack < maximum_stack then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = level
+ stacktop[2] = override
+ level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
+ override = "on"
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X4
+ elseif direction == "rlo" then
+ if nofstack < maximum_stack then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = level
+ stacktop[2] = override
+ level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
+ override = "r"
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X5
+ elseif direction == "lro" then
+ if nofstack < maximum_stack then
+ nofstack = nofstack + 1
+ local stacktop = stack[nofstack]
+ stacktop[1] = level
+ stacktop[2] = override
+ level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
+ override = "l"
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X7
+ elseif direction == "pdf" then
+ if nofstack < maximum_stack then
+ local stacktop = stack[nofstack]
+ level = stacktop[1]
+ override = stacktop[2]
+ nofstack = nofstack - 1
+ entry.level = level
+ entry.direction = "bn"
+ entry.remove = true
+ end
+ -- X6
+ else
+ entry.level = level
+ if override ~= "on" then
+ entry.direction = override
+ end
+ end
+ end
+-- else
+-- for i=1,size do
+-- list[i].level = baselevel
+-- end
+-- end
+ -- X8 (reset states and overrides after paragraph)
+end
+
+local function resolve_weak(list,size,start,limit,orderbefore,orderafter)
+ -- W1: non spacing marks get the direction of the previous character
+-- if list.nsm then
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "nsm" then
+ if i == start then
+ entry.direction = orderbefore
+ else
+ entry.direction = list[i-1].direction
+ end
+ end
+ end
+-- end
+ -- W2: mess with numbers and arabic
+-- if list.en then
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "en" then
+ for j=i-1,start,-1 do
+ local prev = list[j]
+ local direction = prev.direction
+ if direction == "al" then
+ entry.direction = "an"
+ break
+ elseif direction == "r" or direction == "l" then
+ break
+ end
+ end
+ end
+ end
+-- end
+ -- W3
+-- if list.al then
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "al" then
+ entry.direction = "r"
+ end
+ end
+-- end
+ -- W4: make separators number
+-- if list.es or list.cs then
+ -- skip
+-- if false then
+ if false then
+ for i=start+1,limit-1 do
+ local entry = list[i]
+ local direction = entry.direction
+ if direction == "es" then
+ if list[i-1].direction == "en" and list[i+1].direction == "en" then
+ entry.direction = "en"
+ end
+ elseif direction == "cs" then
+ local prevdirection = list[i-1].direction
+ if prevdirection == "en" then
+ if list[i+1].direction == "en" then
+ entry.direction = "en"
+ end
+ elseif prevdirection == "an" and list[i+1].direction == "an" then
+ entry.direction = "an"
+ end
+ end
+ end
+ else -- only more efficient when we have es/cs
+ local runner = start + 2
+ local before = list[start]
+ local entry = list[start + 1]
+ local after = list[runner]
+ while after do
+ local direction = entry.direction
+ if direction == "es" then
+ if before.direction == "en" and after.direction == "en" then
+ entry.direction = "en"
+ end
+ elseif direction == "cs" then
+ local prevdirection = before.direction
+ if prevdirection == "en" then
+ if after.direction == "en" then
+ entry.direction = "en"
+ end
+ elseif prevdirection == "an" and after.direction == "an" then
+ entry.direction = "an"
+ end
+ end
+ before = current
+ current = after
+ after = list[runner]
+ runner = runner + 1
+ end
+ end
+-- end
+ -- W5
+-- if list.et then
+ local i = start
+ while i <= limit do
+ if list[i].direction == "et" then
+ local runstart = i
+ local runlimit = runstart
+ for i=runstart,limit do
+ if list[i].direction == "et" then
+ runlimit = i
+ else
+ break
+ end
+ end
+ local rundirection = runstart == start and sor or list[runstart-1].direction
+ if rundirection ~= "en" then
+ rundirection = runlimit == limit and orderafter or list[runlimit+1].direction
+ end
+ if rundirection == "en" then
+ for j=runstart,runlimit do
+ list[j].direction = "en"
+ end
+ end
+ i = runlimit
+ end
+ i = i + 1
+ end
+-- end
+ -- W6
+-- if list.es or list.cs or list.et then
+ for i=start,limit do
+ local entry = list[i]
+ local direction = entry.direction
+ if direction == "es" or direction == "et" or direction == "cs" then
+ entry.direction = "on"
+ end
+ end
+-- end
+ -- W7
+ for i=start,limit do
+ local entry = list[i]
+ if entry.direction == "en" then
+ local prev_strong = orderbefore
+ for j=i-1,start,-1 do
+ local direction = list[j].direction
+ if direction == "l" or direction == "r" then
+ prev_strong = direction
+ break
+ end
+ end
+ if prev_strong == "l" then
+ entry.direction = "l"
+ end
+ end
+ end
+end
+
+local function resolve_neutral(list,size,start,limit,orderbefore,orderafter)
+ -- N1, N2
+ for i=start,limit do
+ local entry = list[i]
+ if b_s_ws_on[entry.direction] then
+ -- this needs checking
+ local leading_direction, trailing_direction, resolved_direction
+ local runstart = i
+ local runlimit = runstart
+ for j=runstart+1,limit do
+ if b_s_ws_on[list[j].direction] then
+ runlimit = j
+ else
+ break
+ end
+ end
+ if runstart == start then
+ leading_direction = orderbefore
+ else
+ leading_direction = list[runstart-1].direction
+ if leading_direction == "en" or leading_direction == "an" then
+ leading_direction = "r"
+ end
+ end
+ if runlimit == limit then
+ trailing_direction = orderafter
+ else
+ trailing_direction = list[runlimit+1].direction
+ if trailing_direction == "en" or trailing_direction == "an" then
+ trailing_direction = "r"
+ end
+ end
+ if leading_direction == trailing_direction then
+ -- N1
+ resolved_direction = leading_direction
+ else
+ -- N2 / does the weird period
+ resolved_direction = entry.level % 2 == 1 and "r" or "l"
+ end
+ for j=runstart,runlimit do
+ list[j].direction = resolved_direction
+ end
+ i = runlimit
+ end
+ i = i + 1
+ end
+end
+
+local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
+ for i=start,limit do
+ local entry = list[i]
+ local level = entry.level
+ local direction = entry.direction
+ if level % 2 ~= 1 then -- even
+ -- I1
+ if direction == "r" then
+ entry.level = level + 1
+ elseif direction == "an" or direction == "en" then
+ entry.level = level + 2
+ end
+ else
+ -- I2
+ if direction == "l" or direction == "en" or direction == "an" then
+ entry.level = level + 1
+ end
+ end
+ end
+end
+
+local function resolve_levels(list,size,baselevel,analyze_fences)
+ -- X10
+ local start = 1
+ while start < size do
+ local level = list[start].level
+ local limit = start + 1
+ while limit < size and list[limit].level == level do
+ limit = limit + 1
+ end
+ local prev_level = start == 1 and baselevel or list[start-1].level
+ local next_level = limit == size and baselevel or list[limit+1].level
+ local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l"
+ local orderafter = (level > next_level and level or next_level) % 2 == 1 and "r" or "l"
+ -- W1 .. W7
+ resolve_weak(list,size,start,limit,orderbefore,orderafter)
+ -- N0
+ if analyze_fences then
+ resolve_fences(list,size,start,limit)
+ end
+ -- N1 .. N2
+ resolve_neutral(list,size,start,limit,orderbefore,orderafter)
+ -- I1 .. I2
+ resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
+ start = limit
+ end
+ -- L1
+ for i=1,size do
+ local entry = list[i]
+ local direction = entry.original
+ -- (1)
+ if direction == "s" or direction == "b" then
+ entry.level = baselevel
+ -- (2)
+ for j=i-1,1,-1 do
+ local entry = list[j]
+ if whitespace[entry.original] then
+ entry.level = baselevel
+ else
+ break
+ end
+ end
+ end
+ end
+ -- (3)
+ for i=size,1,-1 do
+ local entry = list[i]
+ if whitespace[entry.original] then
+ entry.level = baselevel
+ else
+ break
+ end
+ end
+ -- L4
+ if analyze_fences then
+ for i=1,size do
+ local entry = list[i]
+ if entry.level % 2 == 1 then -- odd(entry.level)
+ if entry.mirror and not entry.paired then
+ entry.mirror = false
+ end
+ -- okay
+ elseif entry.mirror then
+ entry.mirror = false
+ end
+ end
+ else
+ for i=1,size do
+ local entry = list[i]
+ if entry.level % 2 == 1 then -- odd(entry.level)
+ local mirror = mirrordata[entry.char]
+ if mirror then
+ entry.mirror = mirror
+ end
+ end
+ end
+ end
+end
+
+local index = 1
+
+local function process(head,direction)
+ local list, size = build_list(head)
+ local baselevel = get_baselevel(list,size,direction) -- we always have an inline dir node in context
+ resolve_explicit(list,size,baselevel)
+ resolve_levels(list,size,baselevel,analyze_fences)
+ index = 1
+ return list, size
+end
+
+-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- --
+
+local utf = lexer.helpers.utfbytepattern
+
+-- local t_start = token("default", utf, function(s,i) if i == 1 then index = 1 process(s) end end))
+-- local t_bidi = token("error", utf / function() index = index + 1 return list[index].direction == "r" end)
+-- local t_rest = token("default", any)
+
+-- bidilexer._rules = {
+-- { "start", t_start },
+-- { "bidi", t_bidi },
+-- { "rest", t_rest },
+-- }
+
+bidilexer._grammar = #utf * function(s,i)
+ process(s)
+ local t = { }
+ local n = 0
+ for i=1,size do
+ n = n + 1 t[n] = i
+ n = n + 1 t[n] = "error"
+ end
+ return t
+end
+
+bidilexer._tokenstyles = context.styleset
+
+return bidilexer
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cld.lua b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua
index a5fbf9cd7..7bda7800e 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-cld.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua
@@ -6,13 +6,12 @@ local info = {
license = "see context related readme files",
}
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
-local context = lexer.context
-local patterns = context.patterns
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
-local cldlexer = lexer.new("cld","scite-context-lexer-cld")
-local lualexer = lexer.load("scite-context-lexer-lua")
+local cldlexer = lexer.new("cld","scite-context-lexer-cld")
+local lualexer = lexer.load("scite-context-lexer-lua")
-- can probably be done nicer now, a bit of a hack
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua
index e8ff3c1ff..631a802fe 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua
@@ -6,13 +6,12 @@ local info = {
license = "see context related readme files",
}
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
-local context = lexer.context
-local patterns = context.patterns
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
-local cppweblexer = lexer.new("cpp-web","scite-context-lexer-cpp")
-local cpplexer = lexer.load("scite-context-lexer-cpp")
+local cppweblexer = lexer.new("cpp-web","scite-context-lexer-cpp")
+local cpplexer = lexer.load("scite-context-lexer-cpp")
-- can probably be done nicer now, a bit of a hack
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua
index d56dc58f9..d9079855f 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua
@@ -10,8 +10,7 @@ local info = {
local P, R, S = lpeg.P, lpeg.R, lpeg.S
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua
index 69590ed34..5d3096b7d 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua
@@ -1,4 +1,4 @@
--- local info = {
+local info = {
version = 1.002,
comment = "scintilla lpeg lexer that triggers whitespace backtracking",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
@@ -10,8 +10,7 @@
-- we need to trigger that, for instance in the bibtex lexer, but still
-- we get failed lexing
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua
index 5d5b689d2..b1304f65c 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua
@@ -6,8 +6,7 @@ local info = {
license = "see context related readme files",
}
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua
index a8aa8dbe3..ba14f5206 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-lua.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua
@@ -13,7 +13,6 @@ local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
local match, find = string.match, string.find
local setmetatable = setmetatable
--- local lexer = require("lexer")
local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
@@ -47,7 +46,7 @@ local functions = {
"pcall", "print", "rawequal", "rawget", "rawset", "require",
"setmetatable", "tonumber", "tostring", "type", "unpack", "xpcall", "select",
- "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32",
+ "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", "utf8",
}
local constants = {
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-mps.lua b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua
index e24a41d0c..1c87ea6d0 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-mps.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua
@@ -10,8 +10,7 @@ local global, string, table, lpeg = _G, string, table, lpeg
local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
local type = type
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua
index cdf33cf7c..155a9bd51 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua
@@ -10,8 +10,7 @@ local info = {
local P, R, S, C, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.V
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua
index f08d16488..14ba5296b 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua
@@ -10,7 +10,6 @@ local info = {
local P, R = lpeg.P, lpeg.R
--- local lexer = require("lexer")
local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua
index 1d4796ea5..0fd238d63 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua
@@ -6,13 +6,12 @@ local info = {
license = "see context related readme files",
}
--- pdf is normally startic .. i.e. not edited so we don't really
+-- pdf is normally static .. i.e. not edited so we don't really
-- need embedded lexers.
local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-sql.lua b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua
index ea432c5c9..cf0a03331 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-sql.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua
@@ -8,8 +8,7 @@ local info = {
local P, R, S = lpeg.P, lpeg.R, lpeg.S
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua
index 4a55fd143..88499a9c2 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua
@@ -6,13 +6,12 @@ local info = {
license = "see context related readme files",
}
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
-local context = lexer.context
-local patterns = context.patterns
+local lexer = require("scite-context-lexer")
+local context = lexer.context
+local patterns = context.patterns
-local texweblexer = lexer.new("tex-web","scite-context-lexer-tex")
-local texlexer = lexer.load("scite-context-lexer-tex")
+local texweblexer = lexer.new("tex-web","scite-context-lexer-tex")
+local texlexer = lexer.load("scite-context-lexer-tex")
-- can probably be done nicer now, a bit of a hack
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua
index bc08bfcd9..1f1246fc0 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-tex.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua
@@ -31,8 +31,7 @@ local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, l
local type, next = type, next
local find, match, lower, upper = string.find, string.match, string.lower, string.upper
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
local inform = context.inform
@@ -145,6 +144,9 @@ local validminimum = 3
-- fails (empty loop message) ... latest lpeg issue?
+-- todo: Make sure we only do this at the beginning .. a pitty that we
+-- can't store a state .. now is done too often.
+
local knownpreamble = Cmt(P("% "), function(input,i,_) -- todo : utfbomb, was #P("% ")
if i < 10 then
validwords, validminimum = false, 3
@@ -220,10 +222,12 @@ local p_comment = commentline
----- p_helper = backslash * exact_match(helpers)
----- p_primitive = backslash * exact_match(primitives)
-local p_command = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * #(1-cstoken)
-local p_constant = backslash * lexer.helpers.utfchartabletopattern(constants) * #(1-cstoken)
-local p_helper = backslash * lexer.helpers.utfchartabletopattern(helpers) * #(1-cstoken)
-local p_primitive = backslash * lexer.helpers.utfchartabletopattern(primitives) * #(1-cstoken)
+local p_csdone = #(1-cstoken) + P(-1)
+
+local p_command = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * p_csdone
+local p_constant = backslash * lexer.helpers.utfchartabletopattern(constants) * p_csdone
+local p_helper = backslash * lexer.helpers.utfchartabletopattern(helpers) * p_csdone
+local p_primitive = backslash * lexer.helpers.utfchartabletopattern(primitives) * p_csdone
local p_ifprimitive = P("\\if") * cstoken^1
local p_csname = backslash * (cstoken^1 + P(1))
@@ -446,12 +450,17 @@ local stopmetafuncode = token("embedded", stopmetafun)
local callers = token("embedded", P("\\") * metafuncall) * metafunarguments
+ token("embedded", P("\\") * luacall)
-lexer.embed_lexer(contextlexer, cldlexer, startluacode, stopluacode)
lexer.embed_lexer(contextlexer, mpslexer, startmetafuncode, stopmetafuncode)
+lexer.embed_lexer(contextlexer, cldlexer, startluacode, stopluacode)
+
+-- preamble is inefficient as it probably gets called each time (so some day I really need to
+-- patch the plugin)
+
+contextlexer._preamble = preamble
contextlexer._rules = {
{ "whitespace", spacing },
- { "preamble", preamble },
+ -- { "preamble", preamble },
{ "word", word },
{ "text", text }, -- non words
{ "comment", comment },
@@ -459,10 +468,10 @@ contextlexer._rules = {
-- { "subsystem", subsystem },
{ "callers", callers },
{ "subsystem", subsystem },
+ { "ifprimitive", ifprimitive },
{ "helper", helper },
{ "command", command },
{ "primitive", primitive },
- { "ifprimitive", ifprimitive },
-- { "subsystem", subsystem },
{ "reserved", reserved },
{ "csname", csname },
@@ -490,10 +499,10 @@ if web then
{ "comment", comment },
{ "constant", constant },
{ "callers", callers },
+ { "ifprimitive", ifprimitive },
{ "helper", helper },
{ "command", command },
{ "primitive", primitive },
- { "ifprimitive", ifprimitive },
{ "reserved", reserved },
{ "csname", csname },
{ "grouping", grouping },
@@ -514,10 +523,10 @@ else
{ "comment", comment },
{ "constant", constant },
{ "callers", callers },
+ { "ifprimitive", ifprimitive },
{ "helper", helper },
{ "command", command },
{ "primitive", primitive },
- { "ifprimitive", ifprimitive },
{ "reserved", reserved },
{ "csname", csname },
{ "grouping", grouping },
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-txt.lua b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua
index 152e9a663..8ecfff7cb 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-txt.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua
@@ -9,8 +9,7 @@ local info = {
local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp
local find, match = string.find, string.match
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua
index 141de20e1..3cef71739 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua
@@ -8,8 +8,7 @@ local info = {
local P, R, S, C, Cg, Cb, Cs, Cmt, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Cs, lpeg.Cmt, lpeg.match
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-web.lua
index 6fe5ac84c..81a6f90df 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-web.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-web.lua
@@ -8,8 +8,7 @@ local info = {
local P, R, S = lpeg.P, lpeg.R, lpeg.S
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua
index 25fa9128f..f5ca86cb2 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua
@@ -8,8 +8,7 @@ local info = {
local P = lpeg.P
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua
index 2d7260b69..40de8f603 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua
@@ -8,8 +8,7 @@ local info = {
local P = lpeg.P
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua
index 1ee96ba89..a1b717a6a 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua
@@ -8,7 +8,6 @@ local info = {
local P = lpeg.P
--- local lexer = require("lexer")
local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
index 1b7e2e897..bbdb3febc 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua
@@ -17,8 +17,7 @@ local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp
local type = type
local match, find = string.match, string.find
--- local lexer = require("lexer")
-local lexer = require("scite-context-lexer")
+local lexer = require("scite-context-lexer")
local context = lexer.context
local patterns = context.patterns
diff --git a/context/data/textadept/context/lexers/scite-context-lexer.lua b/context/data/textadept/context/lexers/scite-context-lexer.lua
index e526d5045..37f236a89 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer.lua
@@ -8,11 +8,6 @@ local info = {
}
--- todo: hook into context resolver etc
--- todo: only old api in lexers, rest in context subnamespace
--- todo: make sure we can run in one state .. copies or shared?
--- todo: auto-nesting
-
if lpeg.setmaxstack then lpeg.setmaxstack(1000) end
local log = false
@@ -27,169 +22,252 @@ local inspect = false -- can save some 15% (maybe easier on scintilla)
-- GET GOING
--
--- You need to copy this file over lexer.lua. In principle other lexers could work too but
--- not now. Maybe some day. All patterns will move into the patterns name space. I might do
--- the same with styles. If you run an older version of SciTE you can take one of the
--- archives. Pre 3.41 versions can just be copied to the right path, as there we still use
--- part of the normal lexer.
+-- You need to copy this file over lexer.lua. In principle other lexers could work
+-- too but not now. Maybe some day. All patterns will move into the patterns name
+-- space. I might do the same with styles. If you run an older version of SciTE you
+-- can take one of the archives. Pre 3.41 versions can just be copied to the right
+-- path, as there we still use part of the normal lexer. Below we mention some
+-- issues with different versions of SciTE. We try to keep up with changes but best
+-- check careful if the version that yuou install works as expected because SciTE
+-- and the scintillua dll need to be in sync.
--
-- REMARK
--
--- We started using lpeg lexing as soon as it came available. Because we had rather demanding
--- files and also wanted to use nested lexers, we ended up with our own variant. At least at
--- that time this was more robust and also faster (as we have some pretty large lua data files
--- and also work with large xml files). As a consequence successive versions had to be adapted
--- to changes in the (at that time still unstable) api. In addition to lexing we also have
--- spell checking and such. Around version 3.60 things became more stable so I don't expect to
--- change much.
+-- We started using lpeg lexing as soon as it came available. Because we had rather
+-- demanding files and also wanted to use nested lexers, we ended up with our own
+-- variant. At least at that time this was more robust and also much faster (as we
+-- have some pretty large Lua data files and also work with large xml files). As a
+-- consequence successive versions had to be adapted to changes in the (at that time
+-- still unstable) api. In addition to lexing we also have spell checking and such.
+-- Around version 3.60 things became more stable so I don't expect to change much.
+--
+-- LEXING
--
--- STATUS
+-- When pc's showed up we wrote our own editor (texedit) in MODULA 2. It was fast,
+-- had multiple overlapping (text) windows, could run in the at most 1M memory at
+-- that time, etc. The realtime file browsing with lexing that we had at that time
+-- is still on my current wish list. The color scheme and logic that we used related
+-- to the logic behind the ConTeXt user interface that evolved.
--
--- todo: maybe use a special stripped version of the dll (stable api) and add a bit more
--- interfacing to scintilla
--- todo: investigate if we can use the already built in lua instance so that we can combine the
--- power of lexign with extensions
--- todo: play with hotspot and other properties (but no real need now)
--- todo: maybe come up with an extension to the api subsystem
--- todo: add proper tracing and so .. not too hard as we can run on mtxrun, but we lack a console
--- for debugging (ok, chicken-egg as lexers probably need to be loaded before a console can
--- kick in)
--- todo: get rid of these lexers.STYLE_XX and lexers.XX (hide such details)
+-- Later I rewrote the editor in perl/tk. I don't like the perl syntax but tk
+-- widgets are very powerful and hard to beat. In fact, TextAdept reminds me of
+-- that: wrap your own interface around a framework (tk had an edit control that one
+-- could control completely not that different from scintilla). Last time I checked
+-- it still ran fine so I might try to implement something like its file handling in
+-- TextAdept.
--
--- wish: access to all scite properties and in fact integrate in scite
+-- In the end I settled for SciTE for which I wrote TeX and MetaPost lexers that
+-- could handle keyword sets. With respect to lexing (syntax highlighting) ConTeXt
+-- has a long history, if only because we need it for manuals. Anyway, in the end we
+-- arrived at lpeg based lexing (which is quite natural as we have lots of lpeg
+-- usage in ConTeXt). The basic color schemes haven't changed much. The most
+-- prominent differences are the nested lexers.
--
+-- In the meantime I made the lexer suitable for typesetting sources which was no
+-- big deal as we already had that in place (ConTeXt used lpeg from the day it
+-- showed up so we have several lexing options there too).
--
--- In the meantime I made the lexer suitable for typesetting sources which was no big deal as we
--- already had that in place (ConTeXt used lpeg from the day it showed up so we have several lexing
--- options there too).
+-- Keep in mind that in ConTeXt (typesetting) lexing can follow several approached:
+-- line based (which is handy for verbatim mode), syntax mode (which is nice for
+-- tutorials), and tolerant mode (so that one can also show bad examples or errors).
+-- These demands can clash.
--
-- HISTORY
--
--- The fold and lex functions are copied and patched from original code by Mitchell (see lexer.lua).
--- All errors are mine. The ability to use lpeg in scintilla is a real nice addition and a brilliant
--- move. The code is a byproduct of the (mainly Lua based) textadept (at the time I ran into it was
--- a rapidly moving target so I decided to stick ot SciTE). When I played with it, it had no realtime
--- output pane but that seems to be dealt with now (2017). I need to have a look at it in more detail
--- but a first test again mad the output hang and it was a bit slow too (and I also want the log pane
--- as scite has it, on the right, in view). So, for now I stick to SciTE even when it's somewhat
--- crippled by the fact that we cannot hook our own (language dependent) lexer into the output pane
--- (somehow the errorlist lexer is hard coded into the editor). Hopefully that will change some day.
--- So, how did we arrive where we're now.
+-- The remarks below are more for myself so that I keep track of changes in the
+-- way we adapt to the changes in the scintillua and scite.
+--
+-- The fold and lex functions are copied and patched from original code by Mitchell
+-- (see lexer.lua) in the scintillua distribution. So whatever I say below, assume
+-- that all errors are mine. The ability to use lpeg in scintilla is a real nice
+-- addition and a brilliant move. The code is a byproduct of the (mainly Lua based)
+-- TextAdept which at the time I ran into it was a rapidly moving target so I
+-- decided to stick ot SciTE. When I played with it, it had no realtime output pane
+-- although that seems to be dealt with now (2017). I need to have a look at it in
+-- more detail but a first test again made the output hang and it was a bit slow too
+-- (and I also want the log pane as SciTE has it, on the right, in view). So, for
+-- now I stick to SciTE even when it's somewhat crippled by the fact that we cannot
+-- hook our own (language dependent) lexer into the output pane (somehow the
+-- errorlist lexer is hard coded into the editor). Hopefully that will change some
+-- day. The ConTeXt distribution has cmd runner for textdept that will plug in the
+-- lexers discussed here as well as a dedicated runner. Considere it an experiment.
--
--- Starting with SciTE version 3.20 there is an issue with coloring. As we still lack a connection
--- with SciTE itself (properties as well as printing to the log pane) and we cannot trace this (on
--- windows). As far as I can see, there are no fundamental changes in lexer.lua or LexLPeg.cxx so it
--- must be in Scintilla itself. So for the moment I stick to 3.10. Indicators are: no lexing of 'next'
--- and 'goto <label>' in the Lua lexer and no brace highlighting either. Interesting is that it does
--- work ok in the cld lexer (so the Lua code is okay). All seems to be ok again in later versions,
--- so, when you update best check first and just switch back to an older version as normally a SciTE
--- update is not critital. When char-def.lua lexes real fast this is a signal that the lexer quits
--- somewhere halfway. Maybe there are some hard coded limitations on the amount of styles and/or
--- length of names.
+-- The basic code hasn't changed much but we had to adapt a few times to changes in
+-- the api and/or work around bugs. Starting with SciTE version 3.20 there was an
+-- issue with coloring. We still lacked a connection with SciTE itself (properties
+-- as well as printing to the log pane) and we could not trace this (on windows).
+-- However on unix we can see messages! As far as I can see, there are no
+-- fundamental changes in lexer.lua or LexLPeg.cxx so it must be/have been in
+-- Scintilla itself. So we went back to 3.10. Indicators of issues are: no lexing of
+-- 'next' and 'goto <label>' in the Lua lexer and no brace highlighting either.
+-- Interesting is that it does work ok in the cld lexer (so the Lua code is okay).
+-- All seems to be ok again in later versions, so, when you update best check first
+-- and just switch back to an older version as normally a SciTE update is not
+-- critital. When char-def.lua lexes real fast this is a signal that the lexer quits
+-- somewhere halfway. Maybe there are some hard coded limitations on the amount of
+-- styles and/or length of names.
--
--- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay again. So, this
--- version assumes 3.24 or higher. In 3.24 we have a different token result, i.e. no longer a { tag,
--- pattern } but just two return values. I didn't check other changes but will do that when I run into
--- issues. I had optimized these small tables by hashing which was more efficient but this is no longer
--- needed. For the moment we keep some of that code around as I don't know what happens in future
--- versions. I'm anyway still happy with this kind of lexing.
+-- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay
+-- again. So, this version assumes 3.24 or higher. In 3.24 we have a different token
+-- result, i.e. no longer a { tag, pattern } but just two return values. I didn't
+-- check other changes but will do that when I run into issues. I had already
+-- optimized these small tables by hashing which was much more efficient (and maybe
+-- even more efficient than the current approach) but this is no longer needed. For
+-- the moment we keep some of that code around as I don't know what happens in
+-- future versions. I'm anyway still happy with this kind of lexing.
--
--- In 3.31 another major change took place: some helper constants (maybe they're no longer constants)
--- and functions were moved into the lexer modules namespace but the functions are assigned to the Lua
--- module afterward so we cannot alias them beforehand. We're probably getting close to a stable
--- interface now. I've considered making a whole copy and patch the other functions too as we need an
--- extra nesting model. However, I don't want to maintain too much. An unfortunate change in 3.03 is
--- that no longer a script can be specified. This means that instead of loading the extensions via the
--- properties file, we now need to load them in our own lexers, unless of course we replace lexer.lua
+-- In 3.31 another major change took place: some helper constants (maybe they're no
+-- longer constants) and functions were moved into the lexer modules namespace but
+-- the functions are assigned to the Lua module afterward so we cannot alias them
+-- beforehand. We're probably getting close to a stable interface now. At that time
+-- for the first time I considered making a whole copy and patch the other functions
+-- too as we need an extra nesting model. However, I don't want to maintain too
+-- much. An unfortunate change in 3.03 is that no longer a script can be specified.
+-- This means that instead of loading the extensions via the properties file, we now
+-- need to load them in our own lexers, unless of course we replace lexer.lua
-- completely (which adds another installation issue).
--
--- Another change has been that _LEXERHOME is no longer available. It looks like more and more
--- functionality gets dropped so maybe at some point we need to ship our own dll/so files. For instance,
--- I'd like to have access to the current filename and other scite properties. We could then cache some
--- info with each file, if only we had knowledge of what file we're dealing with.
+-- Another change has been that _LEXERHOME is no longer available. It looks like
+-- more and more functionality gets dropped so maybe at some point we need to ship
+-- our own dll/so files. For instance, I'd like to have access to the current
+-- filename and other SciTE properties. We could then cache some info with each
+-- file, if only we had knowledge of what file we're dealing with. This all makes a
+-- nice installation more complex and (worse) makes it hard to share files between
+-- different editors usign s similar directory structure.
--
--- For huge files folding can be pretty slow and I do have some large ones that I keep open all the time.
--- Loading is normally no ussue, unless one has remembered the status and the cursor is at the last line
--- of a 200K line file. Optimizing the fold function brought down loading of char-def.lua from 14 sec
--- => 8 sec. Replacing the word_match function and optimizing the lex function gained another 2+ seconds.
--- A 6 second load is quite ok for me. The changed lexer table structure (no subtables) brings loading
--- down to a few seconds.
+-- For huge files folding can be pretty slow and I do have some large ones that I
+-- keep open all the time. Loading is normally no ussue, unless one has remembered
+-- the status and the cursor is at the last line of a 200K line file. Optimizing the
+-- fold function brought down loading of char-def.lua from 14 sec => 8 sec.
+-- Replacing the word_match function and optimizing the lex function gained another
+-- 2+ seconds. A 6 second load is quite ok for me. The changed lexer table structure
+-- (no subtables) brings loading down to a few seconds.
--
--- When the lexer path is copied to the textadept lexer path, and the theme definition to theme path
--- (as lexer.lua), the lexer works there as well. Although ... when I decided to check the state of
--- textadept i had to adapt some loader code. It's not pretty but works and also permits overloading.
--- When I have time and motive I will make a proper setup file to tune the look and feel a bit and
--- associate suffixes with the context lexer. The textadept editor has a nice style tracing option but
--- lacks the tabs for selecting files that scite has. It also has no integrated run that pipes to the
--- log pane. Interesting is that the jit version of textadept crashes on lexing large files (and does
--- not feel faster either; maybe a side effect of known limitations as we know that luajit is more
--- limited than stock lua). Btw, in the meantime on unix one can test easier as there we can enable
--- the loggers in this module.
+-- When the lexer path is copied to the TextAdept lexer path, and the theme
+-- definition to theme path (as lexer.lua), the lexer works there as well. Although
+-- ... when I decided to check the state of TextAdept I had to adapt some loader
+-- code. The solution is not pretty but works and also permits overloading. When I
+-- have time and motive I will make a proper setup file to tune the look and feel a
+-- bit more than we do now. The TextAdept editor nwo has tabs and a console so it
+-- has become more useable for me (it's still somewhat slower than SciTE).
+-- Interesting is that the jit version of TextAdept crashes on lexing large files
+-- (and does not feel faster either; maybe a side effect of known limitations as we
+-- know that Luajit is more limited than stock Lua).
--
--- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name .. '_whitespace' which means
--- that we need to have it frozen at the moment we load another lexer. Because spacing is used to revert
--- to a parent lexer we need to make sure that we load children as late as possible in order not to get
--- the wrong whitespace trigger. This took me quite a while to figure out (not being that familiar with
--- the internals). The lex and fold functions have been optimized. It is a pitty that there is no proper
--- print available. Another thing needed is a default style in our own theme style definition, as otherwise
--- we get wrong nested lexers, especially if they are larger than a view. This is the hardest part of
+-- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name ..
+-- '_whitespace' which means that we need to have it frozen at the moment we load
+-- another lexer. Because spacing is used to revert to a parent lexer we need to
+-- make sure that we load children as late as possible in order not to get the wrong
+-- whitespace trigger. This took me quite a while to figure out (not being that
+-- familiar with the internals). The lex and fold functions have been optimized. It
+-- is a pitty that there is no proper print available. Another thing needed is a
+-- default style in our own theme style definition, as otherwise we get wrong nested
+-- lexers, especially if they are larger than a view. This is the hardest part of
-- getting things right.
--
--- It's a pitty that there is no scintillua library for the OSX version of scite. Even better would be
--- to have the scintillua library as integral part of scite as that way I could use OSX alongside
--- windows and linux (depending on needs). Also nice would be to have a proper interface to scite then
--- because currently the lexer is rather isolated and the lua version does not provide all standard
--- libraries. It would also be good to have lpeg support in the regular scite lua extension (currently
--- you need to pick it up from someplace else).
+-- It's a pitty that there is no scintillua library for the OSX version of SciTE.
+-- Even better would be to have the scintillua library as integral part of SciTE as
+-- that way I could use OSX alongside windows and linux (depending on needs). Also
+-- nice would be to have a proper interface to SciTE then because currently the
+-- lexer is rather isolated and the Lua version does not provide all standard
+-- libraries. It would also be good to have lpeg support in the regular SciTE Lua
+-- extension (currently you need to pick it up from someplace else). I keep hoping.
--
--- With 3.41 the interface changed again so it gets time to look into the C++ code and consider compiling
--- and patching myself. Loading is more complicated now as the lexer gets loaded automatically so we have
--- little control over extending the code now. After a few days trying all kind of solutions I decided to
--- follow a different approach: drop in a complete replacement. This of course means that I need to keep
--- track of even more changes (which for sure will happen) but at least I get rid of interferences. The
--- api (lexing and configuration) is simply too unstable across versions. Maybe in a few years things have
--- stabelized again. (Or maybe it's not really expected that one writes lexers at all.) A side effect is
--- that I now no longer will use shipped lexers but just the built-in ones in addition to the context
--- lpeg lexers. Not that it matters much as the context lexers cover what I need (and I can always write
--- more).
+-- With 3.41 the interface changed again so it became time to look into the C++ code
+-- and consider compiling and patching myself, something that I like to avoid.
+-- Loading is more complicated now as the lexer gets loaded automatically so we have
+-- little control over extending the code now. After a few days trying all kind of
+-- solutions I decided to follow a different approach: drop in a complete
+-- replacement. This of course means that I need to keep track of even more changes
+-- (which for sure will happen) but at least I get rid of interferences. Till 3.60
+-- the api (lexing and configuration) was simply too unstable across versions which
+-- is a pitty because we expect authors to install SciTE without hassle. Maybe in a
+-- few years things will have stabelized. Maybe it's also not really expected that
+-- one writes lexers at all. A side effect is that I now no longer will use shipped
+-- lexers for languages that I made no lexer for, but just the built-in ones in
+-- addition to the ConTeXt lpeg lexers. Not that it matters much as the ConTeXt
+-- lexers cover what I need (and I can always write more). For editing TeX files one
+-- only needs a limited set of lexers (TeX, MetaPost, Lua, BibTeX, C/W, PDF, SQL,
+-- etc). I can add more when I want.
--
--- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu which left me with an
--- incompatible SciTE and lexer library and updating was not possible due to the lack of 64 bit libraries.
--- We'll see what the future brings.
+-- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu
+-- which left me with an incompatible SciTE and lexer library and updating was not
+-- possible due to the lack of 64 bit libraries. We'll see what the future brings.
+-- For now I can use SciTE under wine on linux. The fact that scintillua ships
+-- independently is a showstopper.
--
--- Promissing is that the library now can use another Lua instance so maybe some day it will get properly
--- in SciTE and we can use more clever scripting.
+-- Promissing is that the library now can use another Lua instance so maybe some day
+-- it will get properly in SciTE and we can use more clever scripting.
--
--- In some lexers we use embedded ones even if we could do it directly, The reason is that when the end
--- token is edited (e.g. -->), backtracking to the space before the begin token (e.g. <!--) results in
--- applying the surrounding whitespace which in turn means that when the end token is edited right,
--- backtracking doesn't go back. One solution (in the dll) would be to backtrack several space categories.
+-- In some lexers we use embedded ones even if we could do it directly, The reason
+-- is that when the end token is edited (e.g. -->), backtracking to the space before
+-- the begin token (e.g. <!--) results in applying the surrounding whitespace which
+-- in turn means that when the end token is edited right, backtracking doesn't go
+-- back. One solution (in the dll) would be to backtrack several space categories.
-- After all, lexing is quite fast (applying the result is much slower).
--
--- For some reason the first blob of text tends to go wrong (pdf and web). It would be nice to have 'whole
--- doc' initial lexing. Quite fishy as it makes it impossible to lex the first part well (for already opened
--- documents) because only a partial text is passed.
+-- For some reason the first blob of text tends to go wrong (pdf and web). It would
+-- be nice to have 'whole doc' initial lexing. Quite fishy as it makes it impossible
+-- to lex the first part well (for already opened documents) because only a partial
+-- text is passed.
--
--- So, maybe I should just write this from scratch (assuming more generic usage) because after all, the dll
--- expects just tables, based on a string. I can then also do some more aggressive resource sharing (needed
--- when used generic).
+-- So, maybe I should just write this from scratch (assuming more generic usage)
+-- because after all, the dll expects just tables, based on a string. I can then
+-- also do some more aggressive resource sharing (needed when used generic).
--
--- I think that nested lexers are still bugged (esp over longer ranges). It never was robust or maybe it's
--- simply not meant for too complex cases (well, it probably *is* tricky material). The 3.24 version was
--- probably the best so far. The fact that styles bleed between lexers even if their states are isolated is
--- an issue. Another issus is that zero characters in the text passed to the lexer can mess things up (pdf
--- files have them in streams).
+-- I think that nested lexers are still bugged (esp over longer ranges). It never
+-- was robust or maybe it's simply not meant for too complex cases (well, it
+-- probably *is* tricky material). The 3.24 version was probably the best so far.
+-- The fact that styles bleed between lexers even if their states are isolated is an
+-- issue. Another issus is that zero characters in the text passed to the lexer can
+-- mess things up (pdf files have them in streams).
--
--- For more complex 'languages', like web or xml, we need to make sure that we use e.g. 'default' for
--- spacing that makes up some construct. Ok, we then still have a backtracking issue but less.
+-- For more complex 'languages', like web or xml, we need to make sure that we use
+-- e.g. 'default' for spacing that makes up some construct. Ok, we then still have a
+-- backtracking issue but less.
--
--- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++ and we ship an ini
--- file for that editor with some installation instructions embedded.
+-- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++
+-- and we ship an ini file for that editor with some installation instructions
+-- embedded. Also, TextAdept has a console so that we can run realtime. The spawner
+-- is still not perfect (sometimes hangs) but it was enough reason to spend time on
+-- making our lexer work with TextAdept and create a setup.
+--
+-- TRACING
+--
+-- The advantage is that we now can check more easily with regular Lua(TeX). We can
+-- also use wine and print to the console (somehow stdout is intercepted there.) So,
+-- I've added a bit of tracing. Interesting is to notice that each document gets its
+-- own instance which has advantages but also means that when we are spellchecking
+-- we reload the word lists each time. (In the past I assumed a shared instance and
+-- took some precautions. But I can fix this.)
--
-- TODO
--
--- I can make an export to context, but first I'll redo the code that makes the grammar,
--- as we only seem to need
+-- It would be nice if we could lods some ConTeXt Lua modules (the basic set) and
+-- then use resolvers and such.
+--
+-- The current lexer basics are still a mix between old and new. Maybe I should redo
+-- some more. This is probably easier in TextAdept than in SciTE.
+--
+-- We have to make sure we don't overload ConTeXt definitions when this code is used
+-- in ConTeXt. I still have to add some of the goodies that we have there in lexers
+-- into these.
+--
+-- Maybe I should use a special stripped on the one hand and extended version of the
+-- dll (stable api) and at least add a bit more interfacing to scintilla.
+--
+-- I need to investigate if we can use the already built in Lua instance so that we
+-- can combine the power of lexing with extensions.
+--
+-- I need to play with hotspot and other properties like indicators (whatever they
+-- are).
+--
+-- I want to get rid of these lexers.STYLE_XX and lexers.XX things. This is possible
+-- when we give up compatibility. Generalize the helpers that I wrote for SciTE so
+-- that they also can be used TextAdept.
+--
+-- I can make an export to ConTeXt, but first I'll redo the code that makes the
+-- grammar, as we only seem to need
--
-- lexer._TOKENSTYLES : table
-- lexer._CHILDREN : flag
@@ -199,38 +277,30 @@ local inspect = false -- can save some 15% (maybe easier on scintilla)
-- lexers.load : function
-- lexers.lex : function
--
--- So, if we drop compatibility with other lex definitions, we can make things simpler. Howeverm in the
--- meantime one can just do this:
+-- So, if we drop compatibility with other lex definitions, we can make things
+-- simpler. However, in the meantime one can just do this:
--
-- context --extra=listing --scite [--compact --verycompact] somefile.tex
--
--- and get a printable document. So, this todo is obsolete.
-
--- TRACING
+-- and get a printable document. So, this todo is a bit obsolete.
--
--- The advantage is that we now can check more easily with regular Lua(TeX). We can also use wine and print
--- to the console (somehow stdout is intercepted there.) So, I've added a bit of tracing. Interesting is to
--- notice that each document gets its own instance which has advantages but also means that when we are
--- spellchecking we reload the word lists each time. (In the past I assumed a shared instance and took
--- some precautions.)
-
--- todo: make sure we don't overload context definitions when used in context
+-- Properties is an ugly mess ... due to chages in the interface we're now left
+-- with some hybrid that sort of works ok
--- properties is an ugly mess ... due to chages in the interface we're now left with some hybrid
--- that sort of works ok
+-- textadept: buffer:colourise(0,-1)
local lpeg = require("lpeg")
local global = _G
-local find, gmatch, match, lower, upper, gsub, sub, format = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format
+local find, gmatch, match, lower, upper, gsub, sub, format, byte = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format, string.byte
local concat, sort = table.concat, table.sort
local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring
local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
local lpegmatch = lpeg.match
+local usage = (textadept and "textadept") or (resolvers and "context") or "scite"
local nesting = 0
-
-local print = (textadept and ui and ui.print) or print
+local print = textadept and ui and ui.print or print
local function report(fmt,str,...)
if log then
@@ -679,21 +749,34 @@ local locations = {
-- end
-- end
-local function collect(name)
- local rootlist = lexers.LEXERPATH or "."
- for root in gmatch(rootlist,"[^;]+") do
- local root = gsub(root,"/[^/]-lua$","")
- for i=1,#locations do
- local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
- if trace then
- report("attempt to locate '%s'",fullname)
- end
- local okay, result = pcall(function () return dofile(fullname) end)
- if okay then
- return result, fullname
+local collect
+
+if usage == "context" then
+
+ collect = function(name)
+ return require(name), name
+ end
+
+else
+
+ collect = function(name)
+ local rootlist = lexers.LEXERPATH or "."
+ for root in gmatch(rootlist,"[^;]+") do
+ local root = gsub(root,"/[^/]-lua$","")
+ for i=1,#locations do
+ local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
+ if trace then
+ report("attempt to locate '%s'",fullname)
+ end
+ local okay, result = pcall(function () return dofile(fullname) end)
+ if okay then
+ return result, fullname
+ end
end
end
+ -- return require(name), name
end
+
end
function context.loadluafile(name)
@@ -1371,25 +1454,33 @@ local function add_lexer(grammar, lexer) -- mostly the same as the original
end
local function build_grammar(lexer,initial_rule) -- same as the original
- local children = lexer._CHILDREN
+ local children = lexer._CHILDREN
local lexer_name = lexer._NAME
- if children then
+ local preamble = lexer._preamble
+ local grammar = lexer._grammar
+ if grammar then
+ -- experiment
+ elseif children then
if not initial_rule then
initial_rule = lexer_name
end
- local grammar = { initial_rule }
+ grammar = { initial_rule }
add_lexer(grammar, lexer)
lexer._INITIALRULE = initial_rule
- lexer._GRAMMAR = Ct(P(grammar))
+ grammar = Ct(P(grammar))
if trace then
report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children)
end
else
- lexer._GRAMMAR = Ct(join_tokens(lexer)^0)
+ grammar = Ct(join_tokens(lexer)^0)
if trace then
report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?")
end
end
+ if preamble then
+ grammar = preamble^-1 * grammar
+ end
+ lexer._GRAMMAR = grammar
end
-- So far. We need these local functions in the next one.
@@ -1534,7 +1625,7 @@ function context.lex(lexer,text,init_style)
if trace then
report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style)
end
- return matched(lexer,grammar,text)
+ return result
else
if trace then
report("lexing '%s' with initial style '%s'",lexer._NAME,init_style)
@@ -1733,7 +1824,7 @@ function context.loadlexer(filename,namespace)
lexer = load_lexer(filename,namespace) or nolexer(filename,namespace)
usedlexers[filename] = lexer
--
- if not lexer._rules and not lexer._lexer then
+ if not lexer._rules and not lexer._lexer and not lexer_grammar then
lexer._lexer = parent_lexer
end
--
@@ -1765,16 +1856,19 @@ function context.loadlexer(filename,namespace)
end
--
local _r = lexer._rules
- if _r then
+ local _g = lexer._grammar
+ if _r or _g then
local _s = lexer._tokenstyles
if _s then
for token, style in next, _s do
add_style(lexer, token, style)
end
end
- for i=1,#_r do
- local rule = _r[i]
- add_rule(lexer, rule[1], rule[2])
+ if _r then
+ for i=1,#_r do
+ local rule = _r[i]
+ add_rule(lexer, rule[1], rule[2])
+ end
end
build_grammar(lexer)
end
@@ -2001,10 +2095,20 @@ do
-- return make(tree)
-- end
- helpers.utfcharpattern = P(1) * R("\128\191")^0 -- unchecked but fast
+ local utf8next = R("\128\191")
+ local utf8one = R("\000\127")
+ local utf8two = R("\194\223") * utf8next
+ local utf8three = R("\224\239") * utf8next * utf8next
+ local utf8four = R("\240\244") * utf8next * utf8next * utf8next
+
+ helpers.utfcharpattern = P(1) * utf8next^0 -- unchecked but fast
+ helpers.utfbytepattern = utf8one / byte
+ + utf8two / function(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
+ + utf8three / function(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
+ + utf8four / function(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
- local p_false = P(false)
- local p_true = P(true)
+ local p_false = P(false)
+ local p_true = P(true)
local function make(t)
local function making(t)