diff options
Diffstat (limited to 'context/data/textadept/context/lexers')
23 files changed, 942 insertions, 251 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua index dce24a2b9..b53da82ea 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-bibtex.lua @@ -10,23 +10,22 @@ local global, string, table, lpeg = _G, string, table, lpeg local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V local type = type --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") -local context = lexer.context -local patterns = context.patterns +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns -local token = lexer.token -local exact_match = lexer.exact_match +local token = lexer.token +local exact_match = lexer.exact_match -local bibtexlexer = lexer.new("bib","scite-context-lexer-bibtex") -local whitespace = bibtexlexer.whitespace +local bibtexlexer = lexer.new("bib","scite-context-lexer-bibtex") +local whitespace = bibtexlexer.whitespace - local escape, left, right = P("\\"), P('{'), P('}') +local escape, left, right = P("\\"), P('{'), P('}') - patterns.balanced = P { - [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, - [2] = left * V(1) * right - } +patterns.balanced = P { + [1] = ((escape * (left+right)) + (1 - (left+right)) + V(2))^0, + [2] = left * V(1) * right +} -- taken from bibl-bib.lua diff --git a/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua new file mode 100644 index 000000000..ea9c56712 --- /dev/null +++ b/context/data/textadept/context/lexers/scite-context-lexer-bidi.lua @@ -0,0 +1,598 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for plain text (with spell checking)", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp +local find, match = string.find, string.match + +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local bidilexer = lexer.new("bidi","scite-context-lexer-bidi") +local whitespace = bidilexer.whitespace + +local space = patterns.space +local any = patterns.any + +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- + +require("char-def") + +characters.directions = { } + +setmetatable(characters.directions,{ __index = function(t,k) + local d = data[k] + if d then + local v = d.direction + if v then + t[k] = v + return v + end + end + t[k] = false -- maybe 'l' + return false +end }) + +characters.mirrors = { } + +setmetatable(characters.mirrors,{ __index = function(t,k) + local d = data[k] + if d then + local v = d.mirror + if v then + t[k] = v + return v + end + end + t[k] = false + return false +end }) + +characters.textclasses = { } + +setmetatable(characters.textclasses,{ __index = function(t,k) + local d = data[k] + if d then + local v = d.textclass + if v then + t[k] = v + return v + end + end + t[k] = false + return false +end }) + +local directiondata = characters.directions +local mirrordata = characters.mirrors +local textclassdata = characters.textclasses + +local maximum_stack = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much +local analyze_fences = false + +local whitespace = { + lre = true, + rle = true, + lro = true, + rlo = true, + pdf = true, + bn = true, + ws = true, +} + +local b_s_ws_on = { + b = true, + s = true, + ws = true, + on = true +} + +local mt_space = { __index = { char = 0x0020, direction = "ws", original = "ws", level = 0 } } +local mt_lre = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } } +local mt_rle = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } } +local mt_pdf = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } } +local mt_object = { __index = { char = 0xFFFC, direction = "on", original = "on", level = 0 } } + +local list = { } +local stack = { } + +setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end }) + +local function build_list(head) + -- P1 + local size = 0 + lpegmatch(pattern,head) + return list, size +end + +local function resolve_fences(list,size,start,limit) + -- N0: funny effects, not always better, so it's an option + local nofstack = 0 + for i=start,limit do + local entry = list[i] + if entry.direction == "on" then + local char = entry.char + local mirror = mirrordata[char] + if mirror then + local class = textclassdata[char] + entry.mirror = mirror + entry.class = class + if class == "open" then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = mirror + stacktop[2] = i + stacktop[3] = false -- not used + elseif nofstack == 0 then + -- skip + elseif class == "close" then + while nofstack > 0 do + local stacktop = stack[nofstack] + if stacktop[1] == char then + local open = stacktop[2] + local close = i + list[open ].paired = close + list[close].paired = open + break + else + -- do we mirror or not + end + nofstack = nofstack - 1 + end + end + end + end + end +end + +local function get_baselevel(list,size,direction) + if direction == "TRT" then + return 1, "TRT", true + elseif direction == "TLT" then + return 0, "TLT", true + end + -- P2, P3: + for i=1,size do + local entry = list[i] + local direction = entry.direction + if direction == "r" or direction == "al" then -- and an ? + return 1, "TRT", true + elseif direction == "l" then + return 0, "TLT", true + end + end + return 0, "TLT", false +end + +local function resolve_explicit(list,size,baselevel) +-- if list.rle or list.lre or list.rlo or list.lro then + -- X1 + local level = baselevel + local override = "on" + local nofstack = 0 + for i=1,size do + local entry = list[i] + local direction = entry.direction + -- X2 + if direction == "rle" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X3 + elseif direction == "lre" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "on" + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X4 + elseif direction == "rlo" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level) + override = "r" + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X5 + elseif direction == "lro" then + if nofstack < maximum_stack then + nofstack = nofstack + 1 + local stacktop = stack[nofstack] + stacktop[1] = level + stacktop[2] = override + level = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level) + override = "l" + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X7 + elseif direction == "pdf" then + if nofstack < maximum_stack then + local stacktop = stack[nofstack] + level = stacktop[1] + override = stacktop[2] + nofstack = nofstack - 1 + entry.level = level + entry.direction = "bn" + entry.remove = true + end + -- X6 + else + entry.level = level + if override ~= "on" then + entry.direction = override + end + end + end +-- else +-- for i=1,size do +-- list[i].level = baselevel +-- end +-- end + -- X8 (reset states and overrides after paragraph) +end + +local function resolve_weak(list,size,start,limit,orderbefore,orderafter) + -- W1: non spacing marks get the direction of the previous character +-- if list.nsm then + for i=start,limit do + local entry = list[i] + if entry.direction == "nsm" then + if i == start then + entry.direction = orderbefore + else + entry.direction = list[i-1].direction + end + end + end +-- end + -- W2: mess with numbers and arabic +-- if list.en then + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + for j=i-1,start,-1 do + local prev = list[j] + local direction = prev.direction + if direction == "al" then + entry.direction = "an" + break + elseif direction == "r" or direction == "l" then + break + end + end + end + end +-- end + -- W3 +-- if list.al then + for i=start,limit do + local entry = list[i] + if entry.direction == "al" then + entry.direction = "r" + end + end +-- end + -- W4: make separators number +-- if list.es or list.cs then + -- skip +-- if false then + if false then + for i=start+1,limit-1 do + local entry = list[i] + local direction = entry.direction + if direction == "es" then + if list[i-1].direction == "en" and list[i+1].direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = list[i-1].direction + if prevdirection == "en" then + if list[i+1].direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and list[i+1].direction == "an" then + entry.direction = "an" + end + end + end + else -- only more efficient when we have es/cs + local runner = start + 2 + local before = list[start] + local entry = list[start + 1] + local after = list[runner] + while after do + local direction = entry.direction + if direction == "es" then + if before.direction == "en" and after.direction == "en" then + entry.direction = "en" + end + elseif direction == "cs" then + local prevdirection = before.direction + if prevdirection == "en" then + if after.direction == "en" then + entry.direction = "en" + end + elseif prevdirection == "an" and after.direction == "an" then + entry.direction = "an" + end + end + before = current + current = after + after = list[runner] + runner = runner + 1 + end + end +-- end + -- W5 +-- if list.et then + local i = start + while i <= limit do + if list[i].direction == "et" then + local runstart = i + local runlimit = runstart + for i=runstart,limit do + if list[i].direction == "et" then + runlimit = i + else + break + end + end + local rundirection = runstart == start and sor or list[runstart-1].direction + if rundirection ~= "en" then + rundirection = runlimit == limit and orderafter or list[runlimit+1].direction + end + if rundirection == "en" then + for j=runstart,runlimit do + list[j].direction = "en" + end + end + i = runlimit + end + i = i + 1 + end +-- end + -- W6 +-- if list.es or list.cs or list.et then + for i=start,limit do + local entry = list[i] + local direction = entry.direction + if direction == "es" or direction == "et" or direction == "cs" then + entry.direction = "on" + end + end +-- end + -- W7 + for i=start,limit do + local entry = list[i] + if entry.direction == "en" then + local prev_strong = orderbefore + for j=i-1,start,-1 do + local direction = list[j].direction + if direction == "l" or direction == "r" then + prev_strong = direction + break + end + end + if prev_strong == "l" then + entry.direction = "l" + end + end + end +end + +local function resolve_neutral(list,size,start,limit,orderbefore,orderafter) + -- N1, N2 + for i=start,limit do + local entry = list[i] + if b_s_ws_on[entry.direction] then + -- this needs checking + local leading_direction, trailing_direction, resolved_direction + local runstart = i + local runlimit = runstart + for j=runstart+1,limit do + if b_s_ws_on[list[j].direction] then + runlimit = j + else + break + end + end + if runstart == start then + leading_direction = orderbefore + else + leading_direction = list[runstart-1].direction + if leading_direction == "en" or leading_direction == "an" then + leading_direction = "r" + end + end + if runlimit == limit then + trailing_direction = orderafter + else + trailing_direction = list[runlimit+1].direction + if trailing_direction == "en" or trailing_direction == "an" then + trailing_direction = "r" + end + end + if leading_direction == trailing_direction then + -- N1 + resolved_direction = leading_direction + else + -- N2 / does the weird period + resolved_direction = entry.level % 2 == 1 and "r" or "l" + end + for j=runstart,runlimit do + list[j].direction = resolved_direction + end + i = runlimit + end + i = i + 1 + end +end + +local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) + for i=start,limit do + local entry = list[i] + local level = entry.level + local direction = entry.direction + if level % 2 ~= 1 then -- even + -- I1 + if direction == "r" then + entry.level = level + 1 + elseif direction == "an" or direction == "en" then + entry.level = level + 2 + end + else + -- I2 + if direction == "l" or direction == "en" or direction == "an" then + entry.level = level + 1 + end + end + end +end + +local function resolve_levels(list,size,baselevel,analyze_fences) + -- X10 + local start = 1 + while start < size do + local level = list[start].level + local limit = start + 1 + while limit < size and list[limit].level == level do + limit = limit + 1 + end + local prev_level = start == 1 and baselevel or list[start-1].level + local next_level = limit == size and baselevel or list[limit+1].level + local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l" + local orderafter = (level > next_level and level or next_level) % 2 == 1 and "r" or "l" + -- W1 .. W7 + resolve_weak(list,size,start,limit,orderbefore,orderafter) + -- N0 + if analyze_fences then + resolve_fences(list,size,start,limit) + end + -- N1 .. N2 + resolve_neutral(list,size,start,limit,orderbefore,orderafter) + -- I1 .. I2 + resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel) + start = limit + end + -- L1 + for i=1,size do + local entry = list[i] + local direction = entry.original + -- (1) + if direction == "s" or direction == "b" then + entry.level = baselevel + -- (2) + for j=i-1,1,-1 do + local entry = list[j] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + end + end + -- (3) + for i=size,1,-1 do + local entry = list[i] + if whitespace[entry.original] then + entry.level = baselevel + else + break + end + end + -- L4 + if analyze_fences then + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + if entry.mirror and not entry.paired then + entry.mirror = false + end + -- okay + elseif entry.mirror then + entry.mirror = false + end + end + else + for i=1,size do + local entry = list[i] + if entry.level % 2 == 1 then -- odd(entry.level) + local mirror = mirrordata[entry.char] + if mirror then + entry.mirror = mirror + end + end + end + end +end + +local index = 1 + +local function process(head,direction) + local list, size = build_list(head) + local baselevel = get_baselevel(list,size,direction) -- we always have an inline dir node in context + resolve_explicit(list,size,baselevel) + resolve_levels(list,size,baselevel,analyze_fences) + index = 1 + return list, size +end + +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- + +local utf = lexer.helpers.utfbytepattern + +-- local t_start = token("default", utf, function(s,i) if i == 1 then index = 1 process(s) end end)) +-- local t_bidi = token("error", utf / function() index = index + 1 return list[index].direction == "r" end) +-- local t_rest = token("default", any) + +-- bidilexer._rules = { +-- { "start", t_start }, +-- { "bidi", t_bidi }, +-- { "rest", t_rest }, +-- } + +bidilexer._grammar = #utf * function(s,i) + process(s) + local t = { } + local n = 0 + for i=1,size do + n = n + 1 t[n] = i + n = n + 1 t[n] = "error" + end + return t +end + +bidilexer._tokenstyles = context.styleset + +return bidilexer diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cld.lua b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua index a5fbf9cd7..7bda7800e 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-cld.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-cld.lua @@ -6,13 +6,12 @@ local info = { license = "see context related readme files", } --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") -local context = lexer.context -local patterns = context.patterns +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns -local cldlexer = lexer.new("cld","scite-context-lexer-cld") -local lualexer = lexer.load("scite-context-lexer-lua") +local cldlexer = lexer.new("cld","scite-context-lexer-cld") +local lualexer = lexer.load("scite-context-lexer-lua") -- can probably be done nicer now, a bit of a hack diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua index e8ff3c1ff..631a802fe 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp-web.lua @@ -6,13 +6,12 @@ local info = { license = "see context related readme files", } --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") -local context = lexer.context -local patterns = context.patterns +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns -local cppweblexer = lexer.new("cpp-web","scite-context-lexer-cpp") -local cpplexer = lexer.load("scite-context-lexer-cpp") +local cppweblexer = lexer.new("cpp-web","scite-context-lexer-cpp") +local cpplexer = lexer.load("scite-context-lexer-cpp") -- can probably be done nicer now, a bit of a hack diff --git a/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua index d56dc58f9..d9079855f 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-cpp.lua @@ -10,8 +10,7 @@ local info = { local P, R, S = lpeg.P, lpeg.R, lpeg.S --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua index 69590ed34..5d3096b7d 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-dummy.lua @@ -1,4 +1,4 @@ --- local info = { +local info = { version = 1.002, comment = "scintilla lpeg lexer that triggers whitespace backtracking", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", @@ -10,8 +10,7 @@ -- we need to trigger that, for instance in the bibtex lexer, but still -- we get failed lexing --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua index 5d5b689d2..b1304f65c 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-lua-longstring.lua @@ -6,8 +6,7 @@ local info = { license = "see context related readme files", } --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-lua.lua b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua index a8aa8dbe3..ba14f5206 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-lua.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-lua.lua @@ -13,7 +13,6 @@ local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp local match, find = string.match, string.find local setmetatable = setmetatable --- local lexer = require("lexer") local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns @@ -47,7 +46,7 @@ local functions = { "pcall", "print", "rawequal", "rawget", "rawset", "require", "setmetatable", "tonumber", "tostring", "type", "unpack", "xpcall", "select", - "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", + "string", "table", "coroutine", "debug", "file", "io", "lpeg", "math", "os", "package", "bit32", "utf8", } local constants = { diff --git a/context/data/textadept/context/lexers/scite-context-lexer-mps.lua b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua index e24a41d0c..1c87ea6d0 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-mps.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-mps.lua @@ -10,8 +10,7 @@ local global, string, table, lpeg = _G, string, table, lpeg local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V local type = type --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua index cdf33cf7c..155a9bd51 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua @@ -10,8 +10,7 @@ local info = { local P, R, S, C, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.V --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua index f08d16488..14ba5296b 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf-xref.lua @@ -10,7 +10,6 @@ local info = { local P, R = lpeg.P, lpeg.R --- local lexer = require("lexer") local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua index 1d4796ea5..0fd238d63 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-pdf.lua @@ -6,13 +6,12 @@ local info = { license = "see context related readme files", } --- pdf is normally startic .. i.e. not edited so we don't really +-- pdf is normally static .. i.e. not edited so we don't really -- need embedded lexers. local P, R, S, V = lpeg.P, lpeg.R, lpeg.S, lpeg.V --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-sql.lua b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua index ea432c5c9..cf0a03331 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-sql.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-sql.lua @@ -8,8 +8,7 @@ local info = { local P, R, S = lpeg.P, lpeg.R, lpeg.S --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua index 4a55fd143..88499a9c2 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-tex-web.lua @@ -6,13 +6,12 @@ local info = { license = "see context related readme files", } --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") -local context = lexer.context -local patterns = context.patterns +local lexer = require("scite-context-lexer") +local context = lexer.context +local patterns = context.patterns -local texweblexer = lexer.new("tex-web","scite-context-lexer-tex") -local texlexer = lexer.load("scite-context-lexer-tex") +local texweblexer = lexer.new("tex-web","scite-context-lexer-tex") +local texlexer = lexer.load("scite-context-lexer-tex") -- can probably be done nicer now, a bit of a hack diff --git a/context/data/textadept/context/lexers/scite-context-lexer-tex.lua b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua index bc08bfcd9..1f1246fc0 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-tex.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-tex.lua @@ -31,8 +31,7 @@ local P, R, S, V, C, Cmt, Cp, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.V, lpeg.C, l local type, next = type, next local find, match, lower, upper = string.find, string.match, string.lower, string.upper --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns local inform = context.inform @@ -145,6 +144,9 @@ local validminimum = 3 -- fails (empty loop message) ... latest lpeg issue? +-- todo: Make sure we only do this at the beginning .. a pitty that we +-- can't store a state .. now is done too often. + local knownpreamble = Cmt(P("% "), function(input,i,_) -- todo : utfbomb, was #P("% ") if i < 10 then validwords, validminimum = false, 3 @@ -220,10 +222,12 @@ local p_comment = commentline ----- p_helper = backslash * exact_match(helpers) ----- p_primitive = backslash * exact_match(primitives) -local p_command = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * #(1-cstoken) -local p_constant = backslash * lexer.helpers.utfchartabletopattern(constants) * #(1-cstoken) -local p_helper = backslash * lexer.helpers.utfchartabletopattern(helpers) * #(1-cstoken) -local p_primitive = backslash * lexer.helpers.utfchartabletopattern(primitives) * #(1-cstoken) +local p_csdone = #(1-cstoken) + P(-1) + +local p_command = backslash * lexer.helpers.utfchartabletopattern(currentcommands) * p_csdone +local p_constant = backslash * lexer.helpers.utfchartabletopattern(constants) * p_csdone +local p_helper = backslash * lexer.helpers.utfchartabletopattern(helpers) * p_csdone +local p_primitive = backslash * lexer.helpers.utfchartabletopattern(primitives) * p_csdone local p_ifprimitive = P("\\if") * cstoken^1 local p_csname = backslash * (cstoken^1 + P(1)) @@ -446,12 +450,17 @@ local stopmetafuncode = token("embedded", stopmetafun) local callers = token("embedded", P("\\") * metafuncall) * metafunarguments + token("embedded", P("\\") * luacall) -lexer.embed_lexer(contextlexer, cldlexer, startluacode, stopluacode) lexer.embed_lexer(contextlexer, mpslexer, startmetafuncode, stopmetafuncode) +lexer.embed_lexer(contextlexer, cldlexer, startluacode, stopluacode) + +-- preamble is inefficient as it probably gets called each time (so some day I really need to +-- patch the plugin) + +contextlexer._preamble = preamble contextlexer._rules = { { "whitespace", spacing }, - { "preamble", preamble }, + -- { "preamble", preamble }, { "word", word }, { "text", text }, -- non words { "comment", comment }, @@ -459,10 +468,10 @@ contextlexer._rules = { -- { "subsystem", subsystem }, { "callers", callers }, { "subsystem", subsystem }, + { "ifprimitive", ifprimitive }, { "helper", helper }, { "command", command }, { "primitive", primitive }, - { "ifprimitive", ifprimitive }, -- { "subsystem", subsystem }, { "reserved", reserved }, { "csname", csname }, @@ -490,10 +499,10 @@ if web then { "comment", comment }, { "constant", constant }, { "callers", callers }, + { "ifprimitive", ifprimitive }, { "helper", helper }, { "command", command }, { "primitive", primitive }, - { "ifprimitive", ifprimitive }, { "reserved", reserved }, { "csname", csname }, { "grouping", grouping }, @@ -514,10 +523,10 @@ else { "comment", comment }, { "constant", constant }, { "callers", callers }, + { "ifprimitive", ifprimitive }, { "helper", helper }, { "command", command }, { "primitive", primitive }, - { "ifprimitive", ifprimitive }, { "reserved", reserved }, { "csname", csname }, { "grouping", grouping }, diff --git a/context/data/textadept/context/lexers/scite-context-lexer-txt.lua b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua index 152e9a663..8ecfff7cb 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-txt.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-txt.lua @@ -9,8 +9,7 @@ local info = { local P, S, Cmt, Cp = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp local find, match = string.find, string.match --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua index 141de20e1..3cef71739 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-web-snippets.lua @@ -8,8 +8,7 @@ local info = { local P, R, S, C, Cg, Cb, Cs, Cmt, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cg, lpeg.Cb, lpeg.Cs, lpeg.Cmt, lpeg.match --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-web.lua b/context/data/textadept/context/lexers/scite-context-lexer-web.lua index 6fe5ac84c..81a6f90df 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-web.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-web.lua @@ -8,8 +8,7 @@ local info = { local P, R, S = lpeg.P, lpeg.R, lpeg.S --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua index 25fa9128f..f5ca86cb2 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-cdata.lua @@ -8,8 +8,7 @@ local info = { local P = lpeg.P --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua index 2d7260b69..40de8f603 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-comment.lua @@ -8,8 +8,7 @@ local info = { local P = lpeg.P --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua index 1ee96ba89..a1b717a6a 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-xml-script.lua @@ -8,7 +8,6 @@ local info = { local P = lpeg.P --- local lexer = require("lexer") local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua index 1b7e2e897..bbdb3febc 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer-xml.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer-xml.lua @@ -17,8 +17,7 @@ local P, R, S, C, Cmt, Cp = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cmt, lpeg.Cp local type = type local match, find = string.match, string.find --- local lexer = require("lexer") -local lexer = require("scite-context-lexer") +local lexer = require("scite-context-lexer") local context = lexer.context local patterns = context.patterns diff --git a/context/data/textadept/context/lexers/scite-context-lexer.lua b/context/data/textadept/context/lexers/scite-context-lexer.lua index e526d5045..37f236a89 100644 --- a/context/data/textadept/context/lexers/scite-context-lexer.lua +++ b/context/data/textadept/context/lexers/scite-context-lexer.lua @@ -8,11 +8,6 @@ local info = { } --- todo: hook into context resolver etc --- todo: only old api in lexers, rest in context subnamespace --- todo: make sure we can run in one state .. copies or shared? --- todo: auto-nesting - if lpeg.setmaxstack then lpeg.setmaxstack(1000) end local log = false @@ -27,169 +22,252 @@ local inspect = false -- can save some 15% (maybe easier on scintilla) -- GET GOING -- --- You need to copy this file over lexer.lua. In principle other lexers could work too but --- not now. Maybe some day. All patterns will move into the patterns name space. I might do --- the same with styles. If you run an older version of SciTE you can take one of the --- archives. Pre 3.41 versions can just be copied to the right path, as there we still use --- part of the normal lexer. +-- You need to copy this file over lexer.lua. In principle other lexers could work +-- too but not now. Maybe some day. All patterns will move into the patterns name +-- space. I might do the same with styles. If you run an older version of SciTE you +-- can take one of the archives. Pre 3.41 versions can just be copied to the right +-- path, as there we still use part of the normal lexer. Below we mention some +-- issues with different versions of SciTE. We try to keep up with changes but best +-- check careful if the version that yuou install works as expected because SciTE +-- and the scintillua dll need to be in sync. -- -- REMARK -- --- We started using lpeg lexing as soon as it came available. Because we had rather demanding --- files and also wanted to use nested lexers, we ended up with our own variant. At least at --- that time this was more robust and also faster (as we have some pretty large lua data files --- and also work with large xml files). As a consequence successive versions had to be adapted --- to changes in the (at that time still unstable) api. In addition to lexing we also have --- spell checking and such. Around version 3.60 things became more stable so I don't expect to --- change much. +-- We started using lpeg lexing as soon as it came available. Because we had rather +-- demanding files and also wanted to use nested lexers, we ended up with our own +-- variant. At least at that time this was more robust and also much faster (as we +-- have some pretty large Lua data files and also work with large xml files). As a +-- consequence successive versions had to be adapted to changes in the (at that time +-- still unstable) api. In addition to lexing we also have spell checking and such. +-- Around version 3.60 things became more stable so I don't expect to change much. +-- +-- LEXING -- --- STATUS +-- When pc's showed up we wrote our own editor (texedit) in MODULA 2. It was fast, +-- had multiple overlapping (text) windows, could run in the at most 1M memory at +-- that time, etc. The realtime file browsing with lexing that we had at that time +-- is still on my current wish list. The color scheme and logic that we used related +-- to the logic behind the ConTeXt user interface that evolved. -- --- todo: maybe use a special stripped version of the dll (stable api) and add a bit more --- interfacing to scintilla --- todo: investigate if we can use the already built in lua instance so that we can combine the --- power of lexign with extensions --- todo: play with hotspot and other properties (but no real need now) --- todo: maybe come up with an extension to the api subsystem --- todo: add proper tracing and so .. not too hard as we can run on mtxrun, but we lack a console --- for debugging (ok, chicken-egg as lexers probably need to be loaded before a console can --- kick in) --- todo: get rid of these lexers.STYLE_XX and lexers.XX (hide such details) +-- Later I rewrote the editor in perl/tk. I don't like the perl syntax but tk +-- widgets are very powerful and hard to beat. In fact, TextAdept reminds me of +-- that: wrap your own interface around a framework (tk had an edit control that one +-- could control completely not that different from scintilla). Last time I checked +-- it still ran fine so I might try to implement something like its file handling in +-- TextAdept. -- --- wish: access to all scite properties and in fact integrate in scite +-- In the end I settled for SciTE for which I wrote TeX and MetaPost lexers that +-- could handle keyword sets. With respect to lexing (syntax highlighting) ConTeXt +-- has a long history, if only because we need it for manuals. Anyway, in the end we +-- arrived at lpeg based lexing (which is quite natural as we have lots of lpeg +-- usage in ConTeXt). The basic color schemes haven't changed much. The most +-- prominent differences are the nested lexers. -- +-- In the meantime I made the lexer suitable for typesetting sources which was no +-- big deal as we already had that in place (ConTeXt used lpeg from the day it +-- showed up so we have several lexing options there too). -- --- In the meantime I made the lexer suitable for typesetting sources which was no big deal as we --- already had that in place (ConTeXt used lpeg from the day it showed up so we have several lexing --- options there too). +-- Keep in mind that in ConTeXt (typesetting) lexing can follow several approached: +-- line based (which is handy for verbatim mode), syntax mode (which is nice for +-- tutorials), and tolerant mode (so that one can also show bad examples or errors). +-- These demands can clash. -- -- HISTORY -- --- The fold and lex functions are copied and patched from original code by Mitchell (see lexer.lua). --- All errors are mine. The ability to use lpeg in scintilla is a real nice addition and a brilliant --- move. The code is a byproduct of the (mainly Lua based) textadept (at the time I ran into it was --- a rapidly moving target so I decided to stick ot SciTE). When I played with it, it had no realtime --- output pane but that seems to be dealt with now (2017). I need to have a look at it in more detail --- but a first test again mad the output hang and it was a bit slow too (and I also want the log pane --- as scite has it, on the right, in view). So, for now I stick to SciTE even when it's somewhat --- crippled by the fact that we cannot hook our own (language dependent) lexer into the output pane --- (somehow the errorlist lexer is hard coded into the editor). Hopefully that will change some day. --- So, how did we arrive where we're now. +-- The remarks below are more for myself so that I keep track of changes in the +-- way we adapt to the changes in the scintillua and scite. +-- +-- The fold and lex functions are copied and patched from original code by Mitchell +-- (see lexer.lua) in the scintillua distribution. So whatever I say below, assume +-- that all errors are mine. The ability to use lpeg in scintilla is a real nice +-- addition and a brilliant move. The code is a byproduct of the (mainly Lua based) +-- TextAdept which at the time I ran into it was a rapidly moving target so I +-- decided to stick ot SciTE. When I played with it, it had no realtime output pane +-- although that seems to be dealt with now (2017). I need to have a look at it in +-- more detail but a first test again made the output hang and it was a bit slow too +-- (and I also want the log pane as SciTE has it, on the right, in view). So, for +-- now I stick to SciTE even when it's somewhat crippled by the fact that we cannot +-- hook our own (language dependent) lexer into the output pane (somehow the +-- errorlist lexer is hard coded into the editor). Hopefully that will change some +-- day. The ConTeXt distribution has cmd runner for textdept that will plug in the +-- lexers discussed here as well as a dedicated runner. Considere it an experiment. -- --- Starting with SciTE version 3.20 there is an issue with coloring. As we still lack a connection --- with SciTE itself (properties as well as printing to the log pane) and we cannot trace this (on --- windows). As far as I can see, there are no fundamental changes in lexer.lua or LexLPeg.cxx so it --- must be in Scintilla itself. So for the moment I stick to 3.10. Indicators are: no lexing of 'next' --- and 'goto <label>' in the Lua lexer and no brace highlighting either. Interesting is that it does --- work ok in the cld lexer (so the Lua code is okay). All seems to be ok again in later versions, --- so, when you update best check first and just switch back to an older version as normally a SciTE --- update is not critital. When char-def.lua lexes real fast this is a signal that the lexer quits --- somewhere halfway. Maybe there are some hard coded limitations on the amount of styles and/or --- length of names. +-- The basic code hasn't changed much but we had to adapt a few times to changes in +-- the api and/or work around bugs. Starting with SciTE version 3.20 there was an +-- issue with coloring. We still lacked a connection with SciTE itself (properties +-- as well as printing to the log pane) and we could not trace this (on windows). +-- However on unix we can see messages! As far as I can see, there are no +-- fundamental changes in lexer.lua or LexLPeg.cxx so it must be/have been in +-- Scintilla itself. So we went back to 3.10. Indicators of issues are: no lexing of +-- 'next' and 'goto <label>' in the Lua lexer and no brace highlighting either. +-- Interesting is that it does work ok in the cld lexer (so the Lua code is okay). +-- All seems to be ok again in later versions, so, when you update best check first +-- and just switch back to an older version as normally a SciTE update is not +-- critital. When char-def.lua lexes real fast this is a signal that the lexer quits +-- somewhere halfway. Maybe there are some hard coded limitations on the amount of +-- styles and/or length of names. -- --- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay again. So, this --- version assumes 3.24 or higher. In 3.24 we have a different token result, i.e. no longer a { tag, --- pattern } but just two return values. I didn't check other changes but will do that when I run into --- issues. I had optimized these small tables by hashing which was more efficient but this is no longer --- needed. For the moment we keep some of that code around as I don't know what happens in future --- versions. I'm anyway still happy with this kind of lexing. +-- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay +-- again. So, this version assumes 3.24 or higher. In 3.24 we have a different token +-- result, i.e. no longer a { tag, pattern } but just two return values. I didn't +-- check other changes but will do that when I run into issues. I had already +-- optimized these small tables by hashing which was much more efficient (and maybe +-- even more efficient than the current approach) but this is no longer needed. For +-- the moment we keep some of that code around as I don't know what happens in +-- future versions. I'm anyway still happy with this kind of lexing. -- --- In 3.31 another major change took place: some helper constants (maybe they're no longer constants) --- and functions were moved into the lexer modules namespace but the functions are assigned to the Lua --- module afterward so we cannot alias them beforehand. We're probably getting close to a stable --- interface now. I've considered making a whole copy and patch the other functions too as we need an --- extra nesting model. However, I don't want to maintain too much. An unfortunate change in 3.03 is --- that no longer a script can be specified. This means that instead of loading the extensions via the --- properties file, we now need to load them in our own lexers, unless of course we replace lexer.lua +-- In 3.31 another major change took place: some helper constants (maybe they're no +-- longer constants) and functions were moved into the lexer modules namespace but +-- the functions are assigned to the Lua module afterward so we cannot alias them +-- beforehand. We're probably getting close to a stable interface now. At that time +-- for the first time I considered making a whole copy and patch the other functions +-- too as we need an extra nesting model. However, I don't want to maintain too +-- much. An unfortunate change in 3.03 is that no longer a script can be specified. +-- This means that instead of loading the extensions via the properties file, we now +-- need to load them in our own lexers, unless of course we replace lexer.lua -- completely (which adds another installation issue). -- --- Another change has been that _LEXERHOME is no longer available. It looks like more and more --- functionality gets dropped so maybe at some point we need to ship our own dll/so files. For instance, --- I'd like to have access to the current filename and other scite properties. We could then cache some --- info with each file, if only we had knowledge of what file we're dealing with. +-- Another change has been that _LEXERHOME is no longer available. It looks like +-- more and more functionality gets dropped so maybe at some point we need to ship +-- our own dll/so files. For instance, I'd like to have access to the current +-- filename and other SciTE properties. We could then cache some info with each +-- file, if only we had knowledge of what file we're dealing with. This all makes a +-- nice installation more complex and (worse) makes it hard to share files between +-- different editors usign s similar directory structure. -- --- For huge files folding can be pretty slow and I do have some large ones that I keep open all the time. --- Loading is normally no ussue, unless one has remembered the status and the cursor is at the last line --- of a 200K line file. Optimizing the fold function brought down loading of char-def.lua from 14 sec --- => 8 sec. Replacing the word_match function and optimizing the lex function gained another 2+ seconds. --- A 6 second load is quite ok for me. The changed lexer table structure (no subtables) brings loading --- down to a few seconds. +-- For huge files folding can be pretty slow and I do have some large ones that I +-- keep open all the time. Loading is normally no ussue, unless one has remembered +-- the status and the cursor is at the last line of a 200K line file. Optimizing the +-- fold function brought down loading of char-def.lua from 14 sec => 8 sec. +-- Replacing the word_match function and optimizing the lex function gained another +-- 2+ seconds. A 6 second load is quite ok for me. The changed lexer table structure +-- (no subtables) brings loading down to a few seconds. -- --- When the lexer path is copied to the textadept lexer path, and the theme definition to theme path --- (as lexer.lua), the lexer works there as well. Although ... when I decided to check the state of --- textadept i had to adapt some loader code. It's not pretty but works and also permits overloading. --- When I have time and motive I will make a proper setup file to tune the look and feel a bit and --- associate suffixes with the context lexer. The textadept editor has a nice style tracing option but --- lacks the tabs for selecting files that scite has. It also has no integrated run that pipes to the --- log pane. Interesting is that the jit version of textadept crashes on lexing large files (and does --- not feel faster either; maybe a side effect of known limitations as we know that luajit is more --- limited than stock lua). Btw, in the meantime on unix one can test easier as there we can enable --- the loggers in this module. +-- When the lexer path is copied to the TextAdept lexer path, and the theme +-- definition to theme path (as lexer.lua), the lexer works there as well. Although +-- ... when I decided to check the state of TextAdept I had to adapt some loader +-- code. The solution is not pretty but works and also permits overloading. When I +-- have time and motive I will make a proper setup file to tune the look and feel a +-- bit more than we do now. The TextAdept editor nwo has tabs and a console so it +-- has become more useable for me (it's still somewhat slower than SciTE). +-- Interesting is that the jit version of TextAdept crashes on lexing large files +-- (and does not feel faster either; maybe a side effect of known limitations as we +-- know that Luajit is more limited than stock Lua). -- --- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name .. '_whitespace' which means --- that we need to have it frozen at the moment we load another lexer. Because spacing is used to revert --- to a parent lexer we need to make sure that we load children as late as possible in order not to get --- the wrong whitespace trigger. This took me quite a while to figure out (not being that familiar with --- the internals). The lex and fold functions have been optimized. It is a pitty that there is no proper --- print available. Another thing needed is a default style in our own theme style definition, as otherwise --- we get wrong nested lexers, especially if they are larger than a view. This is the hardest part of +-- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name .. +-- '_whitespace' which means that we need to have it frozen at the moment we load +-- another lexer. Because spacing is used to revert to a parent lexer we need to +-- make sure that we load children as late as possible in order not to get the wrong +-- whitespace trigger. This took me quite a while to figure out (not being that +-- familiar with the internals). The lex and fold functions have been optimized. It +-- is a pitty that there is no proper print available. Another thing needed is a +-- default style in our own theme style definition, as otherwise we get wrong nested +-- lexers, especially if they are larger than a view. This is the hardest part of -- getting things right. -- --- It's a pitty that there is no scintillua library for the OSX version of scite. Even better would be --- to have the scintillua library as integral part of scite as that way I could use OSX alongside --- windows and linux (depending on needs). Also nice would be to have a proper interface to scite then --- because currently the lexer is rather isolated and the lua version does not provide all standard --- libraries. It would also be good to have lpeg support in the regular scite lua extension (currently --- you need to pick it up from someplace else). +-- It's a pitty that there is no scintillua library for the OSX version of SciTE. +-- Even better would be to have the scintillua library as integral part of SciTE as +-- that way I could use OSX alongside windows and linux (depending on needs). Also +-- nice would be to have a proper interface to SciTE then because currently the +-- lexer is rather isolated and the Lua version does not provide all standard +-- libraries. It would also be good to have lpeg support in the regular SciTE Lua +-- extension (currently you need to pick it up from someplace else). I keep hoping. -- --- With 3.41 the interface changed again so it gets time to look into the C++ code and consider compiling --- and patching myself. Loading is more complicated now as the lexer gets loaded automatically so we have --- little control over extending the code now. After a few days trying all kind of solutions I decided to --- follow a different approach: drop in a complete replacement. This of course means that I need to keep --- track of even more changes (which for sure will happen) but at least I get rid of interferences. The --- api (lexing and configuration) is simply too unstable across versions. Maybe in a few years things have --- stabelized again. (Or maybe it's not really expected that one writes lexers at all.) A side effect is --- that I now no longer will use shipped lexers but just the built-in ones in addition to the context --- lpeg lexers. Not that it matters much as the context lexers cover what I need (and I can always write --- more). +-- With 3.41 the interface changed again so it became time to look into the C++ code +-- and consider compiling and patching myself, something that I like to avoid. +-- Loading is more complicated now as the lexer gets loaded automatically so we have +-- little control over extending the code now. After a few days trying all kind of +-- solutions I decided to follow a different approach: drop in a complete +-- replacement. This of course means that I need to keep track of even more changes +-- (which for sure will happen) but at least I get rid of interferences. Till 3.60 +-- the api (lexing and configuration) was simply too unstable across versions which +-- is a pitty because we expect authors to install SciTE without hassle. Maybe in a +-- few years things will have stabelized. Maybe it's also not really expected that +-- one writes lexers at all. A side effect is that I now no longer will use shipped +-- lexers for languages that I made no lexer for, but just the built-in ones in +-- addition to the ConTeXt lpeg lexers. Not that it matters much as the ConTeXt +-- lexers cover what I need (and I can always write more). For editing TeX files one +-- only needs a limited set of lexers (TeX, MetaPost, Lua, BibTeX, C/W, PDF, SQL, +-- etc). I can add more when I want. -- --- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu which left me with an --- incompatible SciTE and lexer library and updating was not possible due to the lack of 64 bit libraries. --- We'll see what the future brings. +-- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu +-- which left me with an incompatible SciTE and lexer library and updating was not +-- possible due to the lack of 64 bit libraries. We'll see what the future brings. +-- For now I can use SciTE under wine on linux. The fact that scintillua ships +-- independently is a showstopper. -- --- Promissing is that the library now can use another Lua instance so maybe some day it will get properly --- in SciTE and we can use more clever scripting. +-- Promissing is that the library now can use another Lua instance so maybe some day +-- it will get properly in SciTE and we can use more clever scripting. -- --- In some lexers we use embedded ones even if we could do it directly, The reason is that when the end --- token is edited (e.g. -->), backtracking to the space before the begin token (e.g. <!--) results in --- applying the surrounding whitespace which in turn means that when the end token is edited right, --- backtracking doesn't go back. One solution (in the dll) would be to backtrack several space categories. +-- In some lexers we use embedded ones even if we could do it directly, The reason +-- is that when the end token is edited (e.g. -->), backtracking to the space before +-- the begin token (e.g. <!--) results in applying the surrounding whitespace which +-- in turn means that when the end token is edited right, backtracking doesn't go +-- back. One solution (in the dll) would be to backtrack several space categories. -- After all, lexing is quite fast (applying the result is much slower). -- --- For some reason the first blob of text tends to go wrong (pdf and web). It would be nice to have 'whole --- doc' initial lexing. Quite fishy as it makes it impossible to lex the first part well (for already opened --- documents) because only a partial text is passed. +-- For some reason the first blob of text tends to go wrong (pdf and web). It would +-- be nice to have 'whole doc' initial lexing. Quite fishy as it makes it impossible +-- to lex the first part well (for already opened documents) because only a partial +-- text is passed. -- --- So, maybe I should just write this from scratch (assuming more generic usage) because after all, the dll --- expects just tables, based on a string. I can then also do some more aggressive resource sharing (needed --- when used generic). +-- So, maybe I should just write this from scratch (assuming more generic usage) +-- because after all, the dll expects just tables, based on a string. I can then +-- also do some more aggressive resource sharing (needed when used generic). -- --- I think that nested lexers are still bugged (esp over longer ranges). It never was robust or maybe it's --- simply not meant for too complex cases (well, it probably *is* tricky material). The 3.24 version was --- probably the best so far. The fact that styles bleed between lexers even if their states are isolated is --- an issue. Another issus is that zero characters in the text passed to the lexer can mess things up (pdf --- files have them in streams). +-- I think that nested lexers are still bugged (esp over longer ranges). It never +-- was robust or maybe it's simply not meant for too complex cases (well, it +-- probably *is* tricky material). The 3.24 version was probably the best so far. +-- The fact that styles bleed between lexers even if their states are isolated is an +-- issue. Another issus is that zero characters in the text passed to the lexer can +-- mess things up (pdf files have them in streams). -- --- For more complex 'languages', like web or xml, we need to make sure that we use e.g. 'default' for --- spacing that makes up some construct. Ok, we then still have a backtracking issue but less. +-- For more complex 'languages', like web or xml, we need to make sure that we use +-- e.g. 'default' for spacing that makes up some construct. Ok, we then still have a +-- backtracking issue but less. -- --- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++ and we ship an ini --- file for that editor with some installation instructions embedded. +-- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++ +-- and we ship an ini file for that editor with some installation instructions +-- embedded. Also, TextAdept has a console so that we can run realtime. The spawner +-- is still not perfect (sometimes hangs) but it was enough reason to spend time on +-- making our lexer work with TextAdept and create a setup. +-- +-- TRACING +-- +-- The advantage is that we now can check more easily with regular Lua(TeX). We can +-- also use wine and print to the console (somehow stdout is intercepted there.) So, +-- I've added a bit of tracing. Interesting is to notice that each document gets its +-- own instance which has advantages but also means that when we are spellchecking +-- we reload the word lists each time. (In the past I assumed a shared instance and +-- took some precautions. But I can fix this.) -- -- TODO -- --- I can make an export to context, but first I'll redo the code that makes the grammar, --- as we only seem to need +-- It would be nice if we could lods some ConTeXt Lua modules (the basic set) and +-- then use resolvers and such. +-- +-- The current lexer basics are still a mix between old and new. Maybe I should redo +-- some more. This is probably easier in TextAdept than in SciTE. +-- +-- We have to make sure we don't overload ConTeXt definitions when this code is used +-- in ConTeXt. I still have to add some of the goodies that we have there in lexers +-- into these. +-- +-- Maybe I should use a special stripped on the one hand and extended version of the +-- dll (stable api) and at least add a bit more interfacing to scintilla. +-- +-- I need to investigate if we can use the already built in Lua instance so that we +-- can combine the power of lexing with extensions. +-- +-- I need to play with hotspot and other properties like indicators (whatever they +-- are). +-- +-- I want to get rid of these lexers.STYLE_XX and lexers.XX things. This is possible +-- when we give up compatibility. Generalize the helpers that I wrote for SciTE so +-- that they also can be used TextAdept. +-- +-- I can make an export to ConTeXt, but first I'll redo the code that makes the +-- grammar, as we only seem to need -- -- lexer._TOKENSTYLES : table -- lexer._CHILDREN : flag @@ -199,38 +277,30 @@ local inspect = false -- can save some 15% (maybe easier on scintilla) -- lexers.load : function -- lexers.lex : function -- --- So, if we drop compatibility with other lex definitions, we can make things simpler. Howeverm in the --- meantime one can just do this: +-- So, if we drop compatibility with other lex definitions, we can make things +-- simpler. However, in the meantime one can just do this: -- -- context --extra=listing --scite [--compact --verycompact] somefile.tex -- --- and get a printable document. So, this todo is obsolete. - --- TRACING +-- and get a printable document. So, this todo is a bit obsolete. -- --- The advantage is that we now can check more easily with regular Lua(TeX). We can also use wine and print --- to the console (somehow stdout is intercepted there.) So, I've added a bit of tracing. Interesting is to --- notice that each document gets its own instance which has advantages but also means that when we are --- spellchecking we reload the word lists each time. (In the past I assumed a shared instance and took --- some precautions.) - --- todo: make sure we don't overload context definitions when used in context +-- Properties is an ugly mess ... due to chages in the interface we're now left +-- with some hybrid that sort of works ok --- properties is an ugly mess ... due to chages in the interface we're now left with some hybrid --- that sort of works ok +-- textadept: buffer:colourise(0,-1) local lpeg = require("lpeg") local global = _G -local find, gmatch, match, lower, upper, gsub, sub, format = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format +local find, gmatch, match, lower, upper, gsub, sub, format, byte = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format, string.byte local concat, sort = table.concat, table.sort local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg local lpegmatch = lpeg.match +local usage = (textadept and "textadept") or (resolvers and "context") or "scite" local nesting = 0 - -local print = (textadept and ui and ui.print) or print +local print = textadept and ui and ui.print or print local function report(fmt,str,...) if log then @@ -679,21 +749,34 @@ local locations = { -- end -- end -local function collect(name) - local rootlist = lexers.LEXERPATH or "." - for root in gmatch(rootlist,"[^;]+") do - local root = gsub(root,"/[^/]-lua$","") - for i=1,#locations do - local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc - if trace then - report("attempt to locate '%s'",fullname) - end - local okay, result = pcall(function () return dofile(fullname) end) - if okay then - return result, fullname +local collect + +if usage == "context" then + + collect = function(name) + return require(name), name + end + +else + + collect = function(name) + local rootlist = lexers.LEXERPATH or "." + for root in gmatch(rootlist,"[^;]+") do + local root = gsub(root,"/[^/]-lua$","") + for i=1,#locations do + local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc + if trace then + report("attempt to locate '%s'",fullname) + end + local okay, result = pcall(function () return dofile(fullname) end) + if okay then + return result, fullname + end end end + -- return require(name), name end + end function context.loadluafile(name) @@ -1371,25 +1454,33 @@ local function add_lexer(grammar, lexer) -- mostly the same as the original end local function build_grammar(lexer,initial_rule) -- same as the original - local children = lexer._CHILDREN + local children = lexer._CHILDREN local lexer_name = lexer._NAME - if children then + local preamble = lexer._preamble + local grammar = lexer._grammar + if grammar then + -- experiment + elseif children then if not initial_rule then initial_rule = lexer_name end - local grammar = { initial_rule } + grammar = { initial_rule } add_lexer(grammar, lexer) lexer._INITIALRULE = initial_rule - lexer._GRAMMAR = Ct(P(grammar)) + grammar = Ct(P(grammar)) if trace then report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children) end else - lexer._GRAMMAR = Ct(join_tokens(lexer)^0) + grammar = Ct(join_tokens(lexer)^0) if trace then report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?") end end + if preamble then + grammar = preamble^-1 * grammar + end + lexer._GRAMMAR = grammar end -- So far. We need these local functions in the next one. @@ -1534,7 +1625,7 @@ function context.lex(lexer,text,init_style) if trace then report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style) end - return matched(lexer,grammar,text) + return result else if trace then report("lexing '%s' with initial style '%s'",lexer._NAME,init_style) @@ -1733,7 +1824,7 @@ function context.loadlexer(filename,namespace) lexer = load_lexer(filename,namespace) or nolexer(filename,namespace) usedlexers[filename] = lexer -- - if not lexer._rules and not lexer._lexer then + if not lexer._rules and not lexer._lexer and not lexer_grammar then lexer._lexer = parent_lexer end -- @@ -1765,16 +1856,19 @@ function context.loadlexer(filename,namespace) end -- local _r = lexer._rules - if _r then + local _g = lexer._grammar + if _r or _g then local _s = lexer._tokenstyles if _s then for token, style in next, _s do add_style(lexer, token, style) end end - for i=1,#_r do - local rule = _r[i] - add_rule(lexer, rule[1], rule[2]) + if _r then + for i=1,#_r do + local rule = _r[i] + add_rule(lexer, rule[1], rule[2]) + end end build_grammar(lexer) end @@ -2001,10 +2095,20 @@ do -- return make(tree) -- end - helpers.utfcharpattern = P(1) * R("\128\191")^0 -- unchecked but fast + local utf8next = R("\128\191") + local utf8one = R("\000\127") + local utf8two = R("\194\223") * utf8next + local utf8three = R("\224\239") * utf8next * utf8next + local utf8four = R("\240\244") * utf8next * utf8next * utf8next + + helpers.utfcharpattern = P(1) * utf8next^0 -- unchecked but fast + helpers.utfbytepattern = utf8one / byte + + utf8two / function(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end + + utf8three / function(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end + + utf8four / function(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end - local p_false = P(false) - local p_true = P(true) + local p_false = P(false) + local p_true = P(true) local function make(t) local function making(t) |