summaryrefslogtreecommitdiff
path: root/context/data/scite/lexers/scite-context-lexer.lua
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/scite/lexers/scite-context-lexer.lua')
-rw-r--r--context/data/scite/lexers/scite-context-lexer.lua1265
1 files changed, 978 insertions, 287 deletions
diff --git a/context/data/scite/lexers/scite-context-lexer.lua b/context/data/scite/lexers/scite-context-lexer.lua
index 7c4f7b077..be130077b 100644
--- a/context/data/scite/lexers/scite-context-lexer.lua
+++ b/context/data/scite/lexers/scite-context-lexer.lua
@@ -1,5 +1,5 @@
local info = {
- version = 1.324,
+ version = 1.400,
comment = "basics for scintilla lpeg lexer for context/metafun",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
@@ -8,168 +8,380 @@ local info = {
}
--- todo: move all code here
--- todo: explore adapted dll ... properties + init
--- todo: play with hotspot and other properties
-
--- wish: replace errorlist lexer (per language!)
--- wish: access to all scite properties
+local trace = true -- false
--- The fold and lex functions are copied and patched from original code by Mitchell (see
--- lexer.lua). All errors are mine. The ability to use lpeg is a real nice adition and a
--- brilliant move. The code is a byproduct of the (mainly Lua based) textadept (still a
--- rapidly moving target) that unfortunately misses a realtime output pane. On the other
--- hand, SciTE is somewhat crippled by the fact that we cannot pop in our own (language
--- dependent) lexer into the output pane (somehow the errorlist lexer is hard coded into
--- the editor). Hopefully that will change some day.
+-- GET GOING
+--
+-- You need to copy this file over lexer.lua. In principle other lexers could
+-- work too but not now. Maybe some day. All patterns will move into the patterns
+-- name space. I might do the same with styles. If you run an older version of
+-- SciTE you can take one of the archives. Pre 3.41 versions can just be copied
+-- to the right path, as there we still use part of the normal lexer.
+--
+-- REMARK
--
--- Starting with SciTE version 3.20 there is an issue with coloring. As we still lack
--- a connection with scite itself (properties as well as printing to the log pane) we
--- cannot trace this (on windows). As far as I can see, there are no fundamental
--- changes in lexer.lua or LexLPeg.cxx so it must be in scintilla itself. So for the
--- moment I stick to 3.10. Indicators are: no lexing of 'next' and 'goto <label>' in the
--- Lua lexer and no brace highlighting either. Interesting is that it does work ok in
--- the cld lexer (so the Lua code is okay). Also the fact that char-def.lua lexes fast
--- is a signal that the lexer quits somewhere halfway.
+-- We started using lpeg lexing as soon as it came available. Because we had
+-- rather demanding files an dalso wanted to use nested lexers, we ended up with
+-- our own variant (more robust and faster). As a consequence successive versions
+-- had to be adapted to changes in the (still unstable) api. In addition to
+-- lexing we also have spell checking and such.
+--
+-- STATUS
+--
+-- todo: maybe use a special stripped version of the dll (stable api)
+-- todo: play with hotspot and other properties
+-- wish: access to all scite properties and in fact integrate in scite
+-- todo: add proper tracing and so .. not too hard as we can run on mtxrun
+-- todo: get rid of these lexers.STYLE_XX and lexers.XX (hide such details)
--
--- After checking 3.24 and adapting to the new lexer tables things are okay again. So,
--- this version assumes 3.24 or higher. In 3.24 we have a different token result, i.e. no
--- longer a { tag, pattern } but just two return values. I didn't check other changes but
--- will do that when I run into issues. I had optimized these small tables by hashing which
--- was more efficient but this is no longer needed.
+-- HISTORY
--
--- In 3.3.1 another major change took place: some helper constants (maybe they're no
--- longer constants) and functions were moved into the lexer modules namespace but the
--- functions are assigned to the Lua module afterward so we cannot alias them beforehand.
--- We're probably getting close to a stable interface now.
+-- The fold and lex functions are copied and patched from original code by Mitchell
+-- (see lexer.lua). All errors are mine. The ability to use lpeg is a real nice
+-- adition and a brilliant move. The code is a byproduct of the (mainly Lua based)
+-- textadept (still a rapidly moving target) that unfortunately misses a realtime
+-- output pane. On the other hand, SciTE is somewhat crippled by the fact that we
+-- cannot pop in our own (language dependent) lexer into the output pane (somehow
+-- the errorlist lexer is hard coded into the editor). Hopefully that will change
+-- some day.
--
--- I've considered making a whole copy and patch the other functions too as we need
--- an extra nesting model. However, I don't want to maintain too much. An unfortunate
--- change in 3.03 is that no longer a script can be specified. This means that instead
--- of loading the extensions via the properties file, we now need to load them in our
--- own lexers, unless of course we replace lexer.lua completely (which adds another
--- installation issue).
+-- Starting with SciTE version 3.20 there is an issue with coloring. As we still
+-- lack a connection with SciTE itself (properties as well as printing to the log
+-- pane) and we cannot trace this (on windows). As far as I can see, there are no
+-- fundamental changes in lexer.lua or LexLPeg.cxx so it must be in Scintilla
+-- itself. So for the moment I stick to 3.10. Indicators are: no lexing of 'next'
+-- and 'goto <label>' in the Lua lexer and no brace highlighting either. Interesting
+-- is that it does work ok in the cld lexer (so the Lua code is okay). Also the fact
+-- that char-def.lua lexes fast is a signal that the lexer quits somewhere halfway.
+-- Maybe there are some hard coded limitations on the amount of styles and/or length
+-- if names.
--
--- Another change has been that _LEXERHOME is no longer available. It looks like more and
--- more functionality gets dropped so maybe at some point we need to ship our own dll/so
--- files. For instance, I'd like to have access to the current filename and other scite
--- properties. For instance, we could cache some info with each file, if only we had
--- knowledge of what file we're dealing with.
+-- After checking 3.24 and adapting to the new lexer tables things are okay again.
+-- So, this version assumes 3.24 or higher. In 3.24 we have a different token
+-- result, i.e. no longer a { tag, pattern } but just two return values. I didn't
+-- check other changes but will do that when I run into issues. I had optimized
+-- these small tables by hashing which was more efficient but this is no longer
+-- needed. For the moment we keep some of that code around as I don't know what
+-- happens in future versions.
--
--- For huge files folding can be pretty slow and I do have some large ones that I keep
--- open all the time. Loading is normally no ussue, unless one has remembered the status
--- and the cursor is at the last line of a 200K line file. Optimizing the fold function
--- brought down loading of char-def.lua from 14 sec => 8 sec. Replacing the word_match
--- function and optimizing the lex function gained another 2+ seconds. A 6 second load
--- is quite ok for me. The changed lexer table structure (no subtables) brings loading
--- down to a few seconds.
+-- In 3.31 another major change took place: some helper constants (maybe they're no
+-- longer constants) and functions were moved into the lexer modules namespace but
+-- the functions are assigned to the Lua module afterward so we cannot alias them
+-- beforehand. We're probably getting close to a stable interface now. I've
+-- considered making a whole copy and patch the other functions too as we need an
+-- extra nesting model. However, I don't want to maintain too much. An unfortunate
+-- change in 3.03 is that no longer a script can be specified. This means that
+-- instead of loading the extensions via the properties file, we now need to load
+-- them in our own lexers, unless of course we replace lexer.lua completely (which
+-- adds another installation issue).
--
--- When the lexer path is copied to the textadept lexer path, and the theme definition to
--- theme path (as lexer.lua), the lexer works there as well. When I have time and motive
--- I will make a proper setup file to tune the look and feel a bit and associate suffixes
--- with the context lexer. The textadept editor has a nice style tracing option but lacks
--- the tabs for selecting files that scite has. It also has no integrated run that pipes
--- to the log pane (I wonder if it could borrow code from the console2 project). Interesting
--- is that the jit version of textadept crashes on lexing large files (and does not feel
--- faster either).
+-- Another change has been that _LEXERHOME is no longer available. It looks like
+-- more and more functionality gets dropped so maybe at some point we need to ship
+-- our own dll/so files. For instance, I'd like to have access to the current
+-- filename and other scite properties. For instance, we could cache some info with
+-- each file, if only we had knowledge of what file we're dealing with.
--
--- Function load(lexer_name) starts with _M.WHITESPACE = lexer_name..'_whitespace' which
--- means that we need to have it frozen at the moment we load another lexer. Because spacing
--- is used to revert to a parent lexer we need to make sure that we load children as late
--- as possible in order not to get the wrong whitespace trigger. This took me quite a while
--- to figure out (not being that familiar with the internals). The lex and fold functions
--- have been optimized. It is a pitty that there is no proper print available. Another thing
--- needed is a default style in ourown theme style definition, as otherwise we get wrong
--- nested lexers, especially if they are larger than a view. This is the hardest part of
+-- For huge files folding can be pretty slow and I do have some large ones that I
+-- keep open all the time. Loading is normally no ussue, unless one has remembered
+-- the status and the cursor is at the last line of a 200K line file. Optimizing the
+-- fold function brought down loading of char-def.lua from 14 sec => 8 sec.
+-- Replacing the word_match function and optimizing the lex function gained another
+-- 2+ seconds. A 6 second load is quite ok for me. The changed lexer table structure
+-- (no subtables) brings loading down to a few seconds.
+--
+-- When the lexer path is copied to the textadept lexer path, and the theme
+-- definition to theme path (as lexer.lua), the lexer works there as well. When I
+-- have time and motive I will make a proper setup file to tune the look and feel a
+-- bit and associate suffixes with the context lexer. The textadept editor has a
+-- nice style tracing option but lacks the tabs for selecting files that scite has.
+-- It also has no integrated run that pipes to the log pane. Interesting is that the
+-- jit version of textadept crashes on lexing large files (and does not feel faster
+-- either; maybe a side effect of known limitations).
+--
+-- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name ..
+-- '_whitespace' which means that we need to have it frozen at the moment we load
+-- another lexer. Because spacing is used to revert to a parent lexer we need to
+-- make sure that we load children as late as possible in order not to get the wrong
+-- whitespace trigger. This took me quite a while to figure out (not being that
+-- familiar with the internals). The lex and fold functions have been optimized. It
+-- is a pitty that there is no proper print available. Another thing needed is a
+-- default style in our own theme style definition, as otherwise we get wrong nested
+-- lexers, especially if they are larger than a view. This is the hardest part of
-- getting things right.
--
--- Eventually it might be safer to copy the other methods from lexer.lua here as well so
--- that we have no dependencies, apart from the c library (for which at some point the api
--- will be stable I hope).
+-- It's a pitty that there is no scintillua library for the OSX version of scite.
+-- Even better would be to have the scintillua library as integral part of scite as
+-- that way I could use OSX alongside windows and linux (depending on needs). Also
+-- nice would be to have a proper interface to scite then because currently the
+-- lexer is rather isolated and the lua version does not provide all standard
+-- libraries. It would also be good to have lpeg support in the regular scite lua
+-- extension (currently you need to pick it up from someplace else).
+--
+-- With 3.41 the interface changed again so it gets time to look into the C++ code
+-- and consider compiling and patching myself. Loading is more complicated not as
+-- the lexer gets loaded automatically so we have little control over extending the
+-- code now. After a few days trying all kind of solutions I decided to follow a
+-- different approach: drop in a complete replacement. This of course means that I
+-- need to keep track of even more changes (which for sure will happen) but at least
+-- I get rid of interferences. The api (lexing and configuration) is simply too
+-- unstable across versions. Maybe in a few years things have stabelized. (Or maybe
+-- it's not really expected that one writes lexers at all.) A side effect is that I
+-- now no longer will use shipped lexers but just the built-in ones. Not that it
+-- matters much as the context lexers cover what I need (and I can always write
+-- more).
--
--- It's a pitty that there is no scintillua library for the OSX version of scite. Even
--- better would be to have the scintillua library as integral part of scite as that way I
--- could use OSX alongside windows and linux (depending on needs). Also nice would be to
--- have a proper interface to scite then because currently the lexer is rather isolated and the
--- lua version does not provide all standard libraries. It would also be good to have lpeg
--- support in the regular scite lua extension (currently you need to pick it up from someplace
--- else).
+-- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu
+-- which left me with an incompatible SciTE and lexer library and updating was not
+-- possible due to the lack of 64 bit libraries. We'll see what the future brings.
+--
+-- Promissing is that the library now can use another Lua instance so maybe some day
+-- it will get properly in SciTE and we can use more clever scripting.
-local lpeg = require 'lpeg'
+-- TRACING
+--
+-- The advantage is that we now can check more easily with regular Lua. We can also
+-- use wine and print to the console (somehow stdout is intercepted there.) So, I've
+-- added a bit of tracing. Interesting is to notice that each document gets its own
+-- instance which is pretty inefficient when we are spellchecking (In the past I
+-- assumed a shared instance and took some precautions.)
+
+local lpeg = require("lpeg")
-local R, P, S, C, V, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.C, lpeg.V, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
-local lpegmatch = lpeg.match
-local find, gmatch, match, lower, upper, gsub = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub
-local concat = table.concat
local global = _G
-local type, next, setmetatable, rawset = type, next, setmetatable, rawset
-
--- less confusing as we also use lexer for the current lexer and local _M = lexer is just ugly
-
-local lexers = lexer or { } -- + fallback for syntax check
-
--- ok, let's also move helpers here (todo: all go here)
-
-local sign = S("+-")
-local digit = R("09")
-local octdigit = R("07")
-local hexdigit = R("09","AF","af")
-
-lexers.sign = sign
-lexers.digit = digit
-lexers.octdigit = octdigit
-lexers.hexdigit = hexdigit
-lexers.xdigit = hexdigit
-
-lexers.dec_num = digit^1
-lexers.oct_num = P("0")
- * octdigit^1
-lexers.hex_num = P("0") * S("xX")
- * (hexdigit^0 * '.' * hexdigit^1 + hexdigit^1 * '.' * hexdigit^0 + hexdigit^1)
- * (S("pP") * sign^-1 * hexdigit^1)^-1
-lexers.float = sign^-1
- * (digit^0 * '.' * digit^1 + digit^1 * '.' * digit^0 + digit^1)
- * S("eE") * sign^-1 * digit^1
-
-lexers.dec_int = sign^-1 * lexers.dec_num
-lexers.oct_int = sign^-1 * lexers.oct_num
-lexers.hex_int = sign^-1 * lexers.hex_num
-
--- these helpers are set afterwards so we delay their initialization ... there is no need to alias
--- each time again and this way we can more easily adapt to updates
-
-local get_style_at, get_indent_amount, get_property, get_fold_level, FOLD_BASE, FOLD_HEADER, FOLD_BLANK, initialize
-
-initialize = function()
- FOLD_BASE = lexers.FOLD_BASE or SC_FOLDLEVELBASE
- FOLD_HEADER = lexers.FOLD_HEADER or SC_FOLDLEVELHEADERFLAG
- FOLD_BLANK = lexers.FOLD_BLANK or SC_FOLDLEVELWHITEFLAG
- get_style_at = lexers.get_style_at or GetStyleAt
- get_indent_amount = lexers.get_indent_amount or GetIndentAmount
- get_property = lexers.get_property or GetProperty
- get_fold_level = lexers.get_fold_level or GetFoldLevel
+local find, gmatch, match, lower, upper, gsub, sub, format = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format
+local concat = table.concat
+local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring
+local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
+local lpegmatch = lpeg.match
+
+local nesting = 0
+
+local function report(fmt,str,...)
+ if str then
+ fmt = format(fmt,str,...)
+ end
+ print(format("scite lpeg lexer > %s > %s",nesting == 0 and "-" or nesting,fmt))
+end
+
+if trace then
+ report("loading context lexer module (global table: %s)",tostring(global))
+end
+
+if not package.searchpath then
+
+ -- Unfortunately the io library is only available when we end up
+ -- in this branch of code.
+
+ if trace then
+ report("using adapted function 'package.searchpath'")
+ end
+
+ function package.searchpath(name,path)
+ local tried = { }
+ for part in gmatch(path,"[^;]+") do
+ local filename = gsub(part,"%?",name)
+ local f = io.open(filename,"r")
+ if f then
+ f:close()
+ return filename
+ end
+ tried[#tried + 1] = format("no file '%s'",filename)
+ end
+ -- added: local path .. for testing
+ local f = io.open(filename,"r")
+ if f then
+ f:close()
+ return filename
+ end
+ --
+ tried[#tried + 1] = format("no file '%s'",filename)
+ return nil, concat(tried,"\n")
+ end
+
+end
+
+local lexers = { }
+local context = { }
+lexers.context = context
+
+local patterns = { }
+context.patterns = patterns -- todo: lexers.patterns
+
+lexers._CONTEXTEXTENSIONS = true
+lexers.LEXERPATH = package.path -- no need
+
+local usedlexers = { }
+local parent_lexer = nil
+
+-- The problem with styles is that there is some nasty interaction with scintilla
+-- and each version of lexer dll/so has a different issue. So, from now on we will
+-- just add them here. There is also a limit on some 30 styles. Maybe I should
+-- hash them in order to reuse.
+
+local default = {
+ "nothing", "whitespace", "comment", "string", "number", "keyword",
+ "identifier", "operator", "error", "preprocessor", "constant", "variable",
+ "function", "type", "label", "embedded",
+ -- "regex", "class",
+ "quote", "special", "extra", "reserved", "okay", "warning",
+ "command", "internal", "preamble", "grouping", "primitive", "plain",
+ "user",
+ -- "invisible", "data",
+}
+
+local predefined = {
+ 'default', 'linenumber', 'bracelight', 'bracebad', 'controlchar',
+ 'indentguide', 'calltip'
+}
+
+-- Bah ... ugly ... nicer would be a proper hash .. we now have properties
+-- as well as STYLE_* and some connection between them ... why .. ok, we
+-- could delay things but who cares. Anyway, at this moment the properties
+-- are still unknown.
+
+local function preparestyles(list)
+ for i=1,#list do
+ local k = list[i]
+ local K = upper(k)
+ local s = "style." .. k
+ lexers[K] = k -- is this used
+ lexers['STYLE_'..K] = '$(' .. k .. ')'
+ end
+end
+
+preparestyles(default)
+preparestyles(predefined)
+
+-- These helpers are set afterwards so we delay their initialization ... there
+-- is no need to alias each time again and this way we can more easily adapt
+-- to updates.
+
+-- These keep changing (values, functions, tables ...) so we nee to check these
+-- with each update. Some of them are set in the loader (the require 'lexer' is
+-- in fact not a real one as the lexer code is loaded in the dll). It's also not
+-- getting more efficient.
+
+-- FOLD_BASE = lexers.FOLD_BASE or SC_FOLDLEVELBASE
+-- FOLD_HEADER = lexers.FOLD_HEADER or SC_FOLDLEVELHEADERFLAG
+-- FOLD_BLANK = lexers.FOLD_BLANK or SC_FOLDLEVELWHITEFLAG
+-- get_style_at = lexers.get_style_at or GetStyleAt
+-- get_indent_amount = lexers.get_indent_amount or GetIndentAmount
+-- get_property = lexers.get_property or GetProperty
+-- get_fold_level = lexers.get_fold_level or GetFoldLevel
+
+-- It needs checking: do we have access to all properties now? I'll clean
+-- this up anyway as I want a simple clean and stable model.
+
+local FOLD_BASE = 0
+local FOLD_HEADER = 0
+local FOLD_BLANK = 0
+
+local style_at = { }
+local indent_amount = { }
+local fold_level = { }
+
+local function initialize()
+ FOLD_BASE = lexers.FOLD_BASE
+ FOLD_HEADER = lexers.FOLD_HEADER
+ FOLD_BLANK = lexers.FOLD_BLANK
+ --
+ style_at = lexers.style_at -- table
+ indent_amount = lexers.indent_amount -- table
+ fold_level = lexers.fold_level -- table
--
initialize = nil
+ --
+end
+
+local function get_property(tag,default)
+ return lexers.property_int[tag] or lexers.property[tag] or default
end
--- we create our own extra namespace for extensions and helpers
+-- Do we really need this?
-lexers.context = lexers.context or { }
-local context = lexers.context
+lexers.property_expanded = setmetatable({ }, {
+ __index = function(t, key)
+ return gsub(lexers.property[key],'[$%%]%b()', function(key)
+ return t[sub(key,3,-2)]
+ end)
+ end,
+ __newindex = function(t,k,v)
+ report("properties are read-only, '%s' is not changed",k)
+ end,
+})
-context.patterns = context.patterns or { }
-local patterns = context.patterns
+-- Style handler.
+--
+-- The property table will be set later (after loading) by the library. The
+-- styleset is not needed any more as we predefine all styles as defaults
+-- anyway (too bug sensitive otherwise).
+
+local function toproperty(specification)
+ local serialized = { }
+ for key, value in next, specification do
+ if value == true then
+ serialized[#serialized+1] = key
+ elseif type(value) == "table" then
+ serialized[#serialized+1] = key .. ":" .. "#" .. value[1] .. value[2] .. value[3]
+ else
+ serialized[#serialized+1] = key .. ":" .. tostring(value)
+ end
+ end
+ return concat(serialized,",")
+end
-lexers._CONTEXTEXTENSIONS = true
+local function tostyles(styles)
+ local styleset = { }
+ local property = lexers.property or { }
+ for k, v in next, styles do
+ v = toproperty(v)
+ styleset[k] = v
+ property["style."..k] = v
+ end
+ return styleset
+end
+
+context.toproperty = toproperty
+context.tostyles = tostyles
+
+-- If we had one instance/state of Lua as well as all regular libraries
+-- preloaded we could use the context base libraries. So, let's go poor-
+-- mans solution now.
+
+function context.registerstyles(styles)
+ local styleset = tostyles(styles)
+ context.styles = styles
+ context.styleset = styleset
+ if trace then
+ local t, n = { }, 0
+ for k, v in next, styleset do
+ t[#t+1] = k
+ if #k > n then
+ n = #k
+ end
+ end
+ table.sort(t)
+ local template = " %-" .. n .. "s : %s"
+ report("initializing styleset:")
+ for i=1,#t do
+ local k = t[i]
+ report(template,k,styleset[k])
+ end
+ end
+end
+
+-- Some spell checking related stuff. Unfortunately we cannot use a path set
+-- by property.
local locations = {
- -- lexers.context.path,
- "data", -- optional data directory
- "..", -- regular scite directory
+ "data", -- optional data directory
+ "..", -- regular scite directory
+ "lexers", -- new in 3.41 .. no tracing possible
+ "lexers/data", -- new in 3.41 .. no tracing possible
+ "../data", -- new in 3.41 .. no tracing possible
}
local function collect(name)
--- local definitions = loadfile(name .. ".luc") or loadfile(name .. ".lua")
local okay, definitions = pcall(function () return require(name) end)
if okay then
if type(definitions) == "function" then
@@ -178,6 +390,7 @@ local function collect(name)
if type(definitions) == "table" then
return definitions
end
+ else
end
end
@@ -185,9 +398,13 @@ function context.loaddefinitions(name)
for i=1,#locations do
local data = collect(locations[i] .. "/" .. name)
if data then
+ if trace then
+ report("definition file '%s' has been loaded",name)
+ end
return data
end
end
+ report("unable to load definition file '%s'",name)
end
function context.word_match(words,word_chars,case_insensitive)
@@ -218,39 +435,108 @@ function context.word_match(words,word_chars,case_insensitive)
end
end
-local idtoken = R("az","AZ","\127\255","__")
-local digit = R("09")
-local sign = S("+-")
-local period = P(".")
-local space = S(" \n\r\t\f\v")
-
-patterns.idtoken = idtoken
+-- Patterns are grouped in a separate namespace but the regular lexers expect
+-- shortcuts to be present in the lexers library. Maybe I'll incorporate some
+-- of l-lpeg later.
+
+do
+
+ local anything = P(1)
+ local idtoken = R("az","AZ","\127\255","__")
+ local digit = R("09")
+ local sign = S("+-")
+ local period = P(".")
+ local octdigit = R("07")
+ local hexdigit = R("09","AF","af")
+ local lower = R('az')
+ local upper = R('AZ')
+ local alpha = upper + lower
+ local space = S(" \n\r\t\f\v")
+ local eol = S("\r\n")
+ local backslash = P("\\")
+ local decimal = digit^1
+ local octal = P("0")
+ * octdigit^1
+ local hexadecimal = P("0") * S("xX")
+ * (hexdigit^0 * '.' * hexdigit^1 + hexdigit^1 * '.' * hexdigit^0 + hexdigit^1)
+ * (S("pP") * sign^-1 * hexdigit^1)^-1 -- *
+
+ patterns.idtoken = idtoken
+ patterns.digit = digit
+ patterns.sign = sign
+ patterns.period = period
+ patterns.octdigit = octdigit
+ patterns.hexdigit = hexdigit
+ patterns.ascii = R('\000\127') -- useless
+ patterns.extend = R('\000\255') -- useless
+ patterns.control = R('\000\031')
+ patterns.lower = lower
+ patterns.upper = upper
+ patterns.alpha = alpha
+ patterns.decimal = decimal
+ patterns.octal = octal
+ patterns.hexadecimal = hexadecimal
+ patterns.float = sign^-1
+ * (digit^0 * '.' * digit^1 + digit^1 * '.' * digit^0 + digit^1)
+ * S("eE") * sign^-1 * digit^1 -- *
+ patterns.cardinal = decimal
+
+ patterns.signeddecimal = sign^-1 * decimal
+ patterns.signedoctal = sign^-1 * octal
+ patterns.signedhexadecimal = sign^-1 * hexadecimal
+ patterns.integer = sign^-1 * (hexadecimal + octal + decimal)
+ patterns.real =
+ sign^-1 * ( -- at most one
+ digit^1 * period * digit^0 -- 10.0 10.
+ + digit^0 * period * digit^1 -- 0.10 .10
+ + digit^1 -- 10
+ )
+
+ patterns.anything = anything
+ patterns.any = anything
+ patterns.restofline = (1-eol)^1
+ patterns.space = space
+ patterns.spacing = space^1
+ patterns.nospacing = (1-space)^1
+ patterns.eol = eol
+ patterns.newline = P("\r\n") + eol
+
+ local endof = S("\n\r\f")
+
+ patterns.startofline = P(function(input,index)
+ return (index == 1 or lpegmatch(endof,input,index-1)) and index
+ end)
+
+ -- These are the expected ones for other lexers. Maybe all in own namespace
+ -- and provide compatibility layer.
+
+ lexers.any = anything
+ lexers.ascii = ascii
+ lexers.extend = extend
+ lexers.alpha = alpha
+ lexers.digit = digit
+ lexers.alnum = alnum
+ lexers.lower = lower
+ lexers.upper = upper
+ lexers.xdigit = hexdigit
+ lexers.cntrl = control
+ lexers.graph = R('!~')
+ lexers.print = R(' ~')
+ lexers.punct = R('!/', ':@', '[\'', '{~')
+ lexers.space = space
+ lexers.newline = S("\r\n\f")^1
+ lexers.nonnewline = 1 - lexers.newline
+ lexers.nonnewline_esc = 1 - (lexers.newline + '\\') + backslash * anything
+ lexers.dec_num = decimal
+ lexers.oct_num = octal
+ lexers.hex_num = hexadecimal
+ lexers.integer = integer
+ lexers.float = float
+ lexers.word = (alpha + '_') * (alpha + digit + '_')^0 -- weird, why digits
-patterns.digit = digit
-patterns.sign = sign
-patterns.period = period
-
-patterns.cardinal = digit^1
-patterns.integer = sign^-1 * digit^1
-
-patterns.real =
- sign^-1 * ( -- at most one
- digit^1 * period * digit^0 -- 10.0 10.
- + digit^0 * period * digit^1 -- 0.10 .10
- + digit^1 -- 10
- )
-
-patterns.restofline = (1-S("\n\r"))^1
-patterns.space = space
-patterns.spacing = space^1
-patterns.nospacing = (1-space)^1
-patterns.anything = P(1)
-
-local endof = S("\n\r\f")
+end
-patterns.startofline = P(function(input,index)
- return (index == 1 or lpegmatch(endof,input,index-1)) and index
-end)
+-- end of patterns
function context.exact_match(words,word_chars,case_insensitive)
local characters = concat(words)
@@ -259,7 +545,7 @@ function context.exact_match(words,word_chars,case_insensitive)
word_chars = ""
end
if type(word_chars) == "string" then
- pattern = S(characters) + idtoken
+ pattern = S(characters) + patterns.idtoken
if case_insensitive then
pattern = pattern + S(upper(characters)) + S(lower(characters))
end
@@ -401,7 +687,7 @@ setmetatable(h_table, { __index = function(t,level) local v = { level, FOLD_HEAD
setmetatable(b_table, { __index = function(t,level) local v = { level, FOLD_BLANK } t[level] = v return v end })
setmetatable(n_table, { __index = function(t,level) local v = { level } t[level] = v return v end })
-local newline = P("\r\n") + S("\r\n")
+local newline = patterns.newline
local p_yes = Cp() * Cs((1-newline)^1) * newline^-1
local p_nop = newline
@@ -419,7 +705,7 @@ local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
if fold_pattern then
-- if no functions are found then we could have a faster one
fold_pattern = Cp() * C(fold_pattern) / function(s,match)
- local symbols = fold_symbols[get_style_at(start_pos + s)]
+ local symbols = fold_symbols[style_at[start_pos + s]]
if symbols then
local l = symbols[match]
if l then
@@ -450,7 +736,7 @@ local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
local action_y = function(pos,line)
for j = 1, #fold_symbols_patterns do
for s, match in gmatch(line,fold_symbols_patterns[j]) do -- '()('..patterns[i]..')'
- local symbols = fold_symbols[get_style_at(start_pos + pos + s - 1)]
+ local symbols = fold_symbols[style_at[start_pos + pos + s - 1]]
local l = symbols and symbols[match]
local t = type(l)
if t == 'number' then
@@ -503,7 +789,7 @@ end
local folds, current_line, prev_level
local function action_y()
- local current_level = FOLD_BASE + get_indent_amount(current_line)
+ local current_level = FOLD_BASE + indent_amount[current_line]
if current_level > prev_level then -- next level
local i = current_line - 1
local f
@@ -537,7 +823,7 @@ local function action_n()
current_line = current_line + 1
end
-local pattern = ( S("\t ")^0 * ( (1-S("\n\r"))^1 / action_y + P(true) / action_n) * newline )^0
+local pattern = ( S("\t ")^0 * ( (1-patterns.eol)^1 / action_y + P(true) / action_n) * newline )^0
local function fold_by_indentation(text,start_pos,start_line,start_level)
-- initialize
@@ -573,14 +859,13 @@ local threshold_by_parsing = 512 * 1024 -- we don't know the filesize yet
local threshold_by_indentation = 512 * 1024 -- we don't know the filesize yet
local threshold_by_line = 512 * 1024 -- we don't know the filesize yet
-function context.fold(text,start_pos,start_line,start_level) -- hm, we had size thresholds .. where did they go
+function context.fold(lexer,text,start_pos,start_line,start_level) -- hm, we had size thresholds .. where did they go
if text == '' then
return { }
end
if initialize then
initialize()
end
- local lexer = global._LEXER
local fold_by_lexer = lexer._fold
local fold_by_symbols = lexer._foldsymbols
local filesize = 0 -- we don't know that
@@ -604,9 +889,13 @@ function context.fold(text,start_pos,start_line,start_level) -- hm, we had size
return { }
end
+-- function context.fold(lexer,text,start_pos,start_line,start_level) -- hm, we had size thresholds .. where did they go
+-- return { }
+-- end
+
-- The following code is mostly unchanged:
-local function add_rule(lexer,id,rule)
+local function add_rule(lexer,id,rule) -- unchanged
if not lexer._RULES then
lexer._RULES = { }
lexer._RULEORDER = { }
@@ -615,31 +904,39 @@ local function add_rule(lexer,id,rule)
lexer._RULEORDER[#lexer._RULEORDER + 1] = id
end
-local function add_style(lexer,token_name,style)
- local len = lexer._STYLES.len
- if len == 32 then
- len = len + 8
+local function add_style(lexer,token_name,style) -- unchanged (well, changed a bit around 3.41)
+-- if not lexer._TOKENSTYLES[token_name] then
+ local num_styles = lexer._numstyles
+ if num_styles == 32 then
+ num_styles = num_styles + 8
end
- if len >= 128 then
- print('Too many styles defined (128 MAX)')
+ if num_styles >= 255 then
+ report("there can't be more than %s styles",255)
end
- lexer._TOKENS[token_name] = len
- lexer._STYLES[len] = style
- lexer._STYLES.len = len + 1
+ lexer._TOKENSTYLES[token_name] = num_styles
+ lexer._EXTRASTYLES[token_name] = style
+ lexer._numstyles = num_styles + 1
+-- end
end
-local function join_tokens(lexer)
+-- At some point an 'any' append showed up in the original code ...
+-- but I see no need to catch that case ... beter fix the specification.
+
+local function join_tokens(lexer) -- slightly different from the original (no 'any' append)
local patterns = lexer._RULES
local order = lexer._RULEORDER
- local token_rule = patterns[order[1]]
- for i=2,#order do
+ local token_rule = patterns[order[1]] -- normally whitespace
+ for i=2, #order do
token_rule = token_rule + patterns[order[i]]
end
+ if lexer._TYPE ~= "context" then
+ token_rule = token_rule + lexers.token(lexers.DEFAULT, patterns.any)
+ end
lexer._TOKENRULE = token_rule
return token_rule
end
-local function add_lexer(grammar, lexer, token_rule)
+local function add_lexer(grammar, lexer, token_rule) -- mostly the same as the original
local token_rule = join_tokens(lexer)
local lexer_name = lexer._NAME
local children = lexer._CHILDREN
@@ -660,7 +957,7 @@ local function add_lexer(grammar, lexer, token_rule)
grammar[lexer_name] = token_rule^0
end
-local function build_grammar(lexer, initial_rule)
+local function build_grammar(lexer,initial_rule) -- same as the original
local children = lexer._CHILDREN
if children then
local lexer_name = lexer._NAME
@@ -676,12 +973,14 @@ local function build_grammar(lexer, initial_rule)
end
end
--- so far. We need these local functions in the next one.
+-- So far. We need these local functions in the next one. We have these
+-- redefinitions because we memoize the lexers ... it looks like in
+-- 3.1.4 something similar now happens with 'lexers'.
local lineparsers = { }
-function context.lex(text,init_style)
- local lexer = global._LEXER
+function context.lex(lexer,text,init_style)
+ -- local lexer = global._LEXER
local grammar = lexer._GRAMMAR
if initialize then
initialize()
@@ -731,7 +1030,7 @@ function context.lex(text,init_style)
if grammar then
lexer._GRAMMAR = grammar
else
- for style, style_num in next, lexer._TOKENS do
+ for style, style_num in next, lexer._TOKENSTYLES do
if style_num == init_style then
-- the name of the lexers is filtered from the whitespace
-- specification
@@ -756,6 +1055,9 @@ function context.lex(text,init_style)
end
end
+
+-- so far
+
-- todo: keywords: one lookup and multiple matches
-- function context.token(name, patt)
@@ -768,130 +1070,519 @@ function context.token(name, patt)
return patt * Cc(name) * Cp()
end
+-- The next ones were mostly unchanged (till now), we moved it here when 3.41
+-- became close to impossible to combine with cq. overload and a merge was
+-- the only solution. It makes later updates more painful but the update to
+-- 3.41 was already a bit of a nightmare anyway.
+
+-- Loading lexers is rather interwoven with what the dll/so sets and
+-- it changes over time. So, we need to keep an eye on changes. One
+-- problem that we always faced were the limitations in length of
+-- lexer names (as they get app/prepended occasionally to strings with
+-- a hard coded limit). So, we always used alternative names and now need
+-- to make sure this doesn't clash. As I no longer intend to use shipped
+-- lexers I could strip away some of the code in the future, but keeping
+-- it as reference makes sense.
+
+-- I spend quite some time figuring out why 3.41 didn't work or crashed which
+-- is hard when no stdout is available and when the io library is absent. In
+-- the end of of the problems was in the _NAME setting. We set _NAME
+-- to e.g. 'tex' but load from a file with a longer name, which we do
+-- as we don't want to clash with existing files, we end up in
+-- lexers not being found.
+
+local function check_properties()
+ if not lexers.property then
+ lexers.property = { }
+ lexers.property_int = setmetatable({ }, {
+ __index = function(t,k)
+ return tostring(tonumber(lexers.property[k]) or 0)
+ end,
+ __newindex = function(t,k,v)
+ report("properties are read-only, '%s' is not changed",k)
+ end,
+ })
+ end
+end
+
+local function check_styles(lexer)
+ local numstyles = #default
+ local tokenstyles = { }
+ for i=1, #default do
+ tokenstyles[default[i]] = i - 1
+ end
+ for i=1, #predefined do
+ tokenstyles[predefined[i]] = i + 31
+ end
+ lexer._TOKENSTYLES = tokenstyles
+ lexer._numstyles = numstyles
+ lexer._EXTRASTYLES = { }
+ return lexer
+end
+
+local whitespaces = { }
+
+local function push_whitespace(name)
+ table.insert(whitespaces,lexers.WHITESPACE or "whitespace")
+ lexers.WHITESPACE = name .. "_whitespace"
+end
+
+local function pop_whitespace()
+ lexers.WHITESPACE = table.remove(whitespaces) or "whitespace"
+end
+
+local function check_whitespace(lexer,name)
+ if lexer then
+ lexer.whitespace = (name or lexer.name or lexer._NAME) .. "_whitespace"
+ end
+end
+
+function context.new(name,filename)
+ local lexer = {
+ _TYPE = "context",
+ --
+ _NAME = name, -- used for token building
+ _FILENAME = filename, -- for diagnostic purposed
+ --
+ name = name,
+ filename = filename,
+ whitespace = whitespace,
+ }
+ if trace then
+ report("initializing lexer tagged '%s' from file '%s'",name,filename or name)
+ end
+ check_styles(lexer)
+ check_whitespace(lexer)
+ return lexer
+end
+
+local function nolexer(name)
+ local lexer = {
+ _TYPE = "unset",
+ _NAME = name,
+ -- _rules = { },
+ }
+ check_styles(lexer)
+ check_whitespace(lexer)
+ return lexer
+end
+
+local function load_lexer(name)
+ local lexer, okay = nil, false
+ -- first locate the file (methods have changed over time)
+ local lexer_file = package.searchpath(name,lexers.LEXERPATH)
+ if not lexer_file or lexer_file == "" then
+ report("lexer file '%s' can't be located",name)
+ else
+ if trace then
+ report("loading lexer file '%s'",lexer_file)
+ end
+ push_whitespace(name) -- for traditional lexers .. no alt_name yet
+ okay, lexer = pcall(dofile, lexer_file or '')
+ pop_whitespace()
+ if not okay then
+ report("invalid lexer file '%s'",lexer_file)
+ elseif trace then
+ report("lexer file '%s' has been loaded",lexer_file)
+ end
+ end
+ if type(lexer) ~= "table" then
+ return nolexer(name)
+ end
+ if lexer._TYPE ~= "context" then
+ lexer._TYPE = "native"
+ check_styles(lexer)
+ check_whitespace(lexer,name)
+ end
+ if not lexer._NAME then
+ lexer._NAME = name -- so: filename
+ end
+ return lexer
+end
+
+-- An optional second argument has been introduced so that one can embed a lexer
+-- more than once ... maybe something to look into (as not it's done by remembering
+-- the start sequence ... quite okay but maybe suboptimal ... anyway, never change
+-- a working solution).
+
+function context.load(filename)
+ nesting = nesting + 1
+ local lexer = usedlexers[filename] -- we load by filename but the internal name can be short
+ if lexer then
+ if trace then
+ report("reusing lexer '%s'",filename)
+ end
+ nesting = nesting - 1
+ return lexer
+ end
+ if trace then
+ report("loading lexer '%s'",filename)
+ end
+ --
+ check_properties()
+ --
+ parent_lexer = nil
+ --
+ lexer = load_lexer(filename) or nolexer(name)
+ usedlexers[filename] = lexer
+ --
+ if not lexer._rules and not lexer._lexer then
+ lexer._lexer = parent_lexer
+ end
+ --
+ if lexer._lexer then
+ local _l = lexer._lexer
+ local _r = lexer._rules
+ local _s = lexer._tokenstyles
+ if not _l._tokenstyles then
+ _l._tokenstyles = { }
+ end
+ if _r then
+ local rules = _l._rules
+ local name = lexer.name
+ for i=1,#_r do
+ local rule = _r[i]
+ rules[#rules + 1] = {
+ name .. '_' .. rule[1],
+ rule[2],
+ }
+ end
+ end
+ if _s then
+ local tokenstyles = _l._tokenstyles
+ for token, style in next, _s do
+ tokenstyles[token] = style
+ end
+ end
+ lexer = l
+ end
+ --
+ local _r = lexer._rules
+ if _r then
+ local _s = lexer._tokenstyles
+ if _s then
+ for token, style in next, _s do
+ add_style(lexer, token, style)
+ end
+ end
+ for i=1,#_r do
+ local rule = _r[i]
+ add_rule(lexer, rule[1], rule[2])
+ end
+ build_grammar(lexer)
+ end
+ --
+ add_style(lexer, lexer.whitespace, lexers.STYLE_WHITESPACE)
+ --
+ local foldsymbols = lexer._foldsymbols
+ if foldsymbols then
+ local patterns = foldsymbols._patterns
+ if patterns then
+ for i = 1, #patterns do
+ patterns[i] = '()(' .. patterns[i] .. ')'
+ end
+ end
+ end
+ --
+ lexer.lex = lexers.lex
+ lexer.fold = lexers.fold
+ --
+ nesting = nesting - 1
+ --
+ return lexer
+end
+
+function context.embed_lexer(parent, child, start_rule, end_rule) -- mostly the same as the original
+ local embeddedrules = child._EMBEDDEDRULES
+ if not embeddedrules then
+ embeddedrules = { }
+ child._EMBEDDEDRULES = embeddedrules
+ end
+ if not child._RULES then
+ local rules = child._rules
+ if not rules then
+ report("child lexer '%s' has no rules",chile._NAME or "unknown")
+ rules = { }
+ child._rules = rules
+ end
+ for i=1,#rules do
+ local rule = rules[i]
+ add_rule(child, rule[1], rule[2])
+ end
+ end
+ embeddedrules[parent._NAME] = {
+ ['start_rule'] = start_rule,
+ ['token_rule'] = join_tokens(child),
+ ['end_rule'] = end_rule
+ }
+ local children = parent._CHILDREN
+ if not children then
+ children = { }
+ parent._CHILDREN = children
+ end
+ children[#children + 1] = child
+ local tokenstyles = parent._tokenstyles
+ if not tokenstyles then
+ tokenstyles = { }
+ parent._tokenstyles = tokenstyles
+ end
+ tokenstyles[child._NAME..'_whitespace'] = lexers.STYLE_WHITESPACE -- check what whitespace
+ local childstyles = child._tokenstyles
+ if childstyles then
+ for token, style in next, childstyles do
+ tokenstyles[token] = style
+ end
+ end
+ child._lexer = parent
+ parent_lexer = parent
+end
+
+-- we now move the adapted code to the lexers namespace
+
+lexers.new = context.new
+lexers.load = context.load
+lexers.embed_lexer = context.embed_lexer
lexers.fold = context.fold
lexers.lex = context.lex
lexers.token = context.token
+lexers.word_match = context.word_match
lexers.exact_match = context.exact_match
lexers.just_match = context.just_match
-- helper .. alas ... the lexer's lua instance is rather crippled .. not even
-- math is part of it
-local floor = math and math.floor
-local char = string.char
+do
-if not floor then
+ local floor = math and math.floor
+ local char = string.char
- floor = function(n)
- return tonumber(string.format("%d",n))
+ if not floor then
+
+ floor = function(n)
+ return tonumber(format("%d",n))
+ end
+
+ math = math or { }
+
+ math.floor = floor
+
+ end
+
+ local function utfchar(n)
+ if n < 0x80 then
+ return char(n)
+ elseif n < 0x800 then
+ return char(
+ 0xC0 + floor(n/0x40),
+ 0x80 + (n % 0x40)
+ )
+ elseif n < 0x10000 then
+ return char(
+ 0xE0 + floor(n/0x1000),
+ 0x80 + (floor(n/0x40) % 0x40),
+ 0x80 + (n % 0x40)
+ )
+ elseif n < 0x40000 then
+ return char(
+ 0xF0 + floor(n/0x40000),
+ 0x80 + floor(n/0x1000),
+ 0x80 + (floor(n/0x40) % 0x40),
+ 0x80 + (n % 0x40)
+ )
+ else
+ -- return char(
+ -- 0xF1 + floor(n/0x1000000),
+ -- 0x80 + floor(n/0x40000),
+ -- 0x80 + floor(n/0x1000),
+ -- 0x80 + (floor(n/0x40) % 0x40),
+ -- 0x80 + (n % 0x40)
+ -- )
+ return "?"
+ end
+ end
+
+ context.utfchar = utfchar
+
+ -- a helper from l-lpeg:
+
+ local function make(t)
+ local p
+ for k, v in next, t do
+ if not p then
+ if next(v) then
+ p = P(k) * make(v)
+ else
+ p = P(k)
+ end
+ else
+ if next(v) then
+ p = p + P(k) * make(v)
+ else
+ p = p + P(k)
+ end
+ end
+ end
+ return p
end
- math = math or { }
+ function lpeg.utfchartabletopattern(list)
+ local tree = { }
+ for i=1,#list do
+ local t = tree
+ for c in gmatch(list[i],".") do
+ if not t[c] then
+ t[c] = { }
+ end
+ t = t[c]
+ end
+ end
+ return make(tree)
+ end
- math.floor = floor
+ patterns.invisibles = lpeg.utfchartabletopattern {
+ utfchar(0x00A0), -- nbsp
+ utfchar(0x2000), -- enquad
+ utfchar(0x2001), -- emquad
+ utfchar(0x2002), -- enspace
+ utfchar(0x2003), -- emspace
+ utfchar(0x2004), -- threeperemspace
+ utfchar(0x2005), -- fourperemspace
+ utfchar(0x2006), -- sixperemspace
+ utfchar(0x2007), -- figurespace
+ utfchar(0x2008), -- punctuationspace
+ utfchar(0x2009), -- breakablethinspace
+ utfchar(0x200A), -- hairspace
+ utfchar(0x200B), -- zerowidthspace
+ utfchar(0x202F), -- narrownobreakspace
+ utfchar(0x205F), -- math thinspace
+ }
+
+ -- now we can make:
+
+ patterns.iwordtoken = patterns.wordtoken - patterns.invisibles
+ patterns.iwordpattern = patterns.iwordtoken^3
end
-local function utfchar(n)
- if n < 0x80 then
- return char(n)
- elseif n < 0x800 then
- return char(
- 0xC0 + floor(n/0x40),
- 0x80 + (n % 0x40)
- )
- elseif n < 0x10000 then
- return char(
- 0xE0 + floor(n/0x1000),
- 0x80 + (floor(n/0x40) % 0x40),
- 0x80 + (n % 0x40)
- )
- elseif n < 0x40000 then
- return char(
- 0xF0 + floor(n/0x40000),
- 0x80 + floor(n/0x1000),
- 0x80 + (floor(n/0x40) % 0x40),
- 0x80 + (n % 0x40)
- )
+-- The following helpers are not used, partyally replace by other mechanism and
+-- when needed I'll first optimize them. I only made them somewhat more readable.
+
+function lexers.delimited_range(chars, single_line, no_escape, balanced) -- unchanged
+ local s = sub(chars,1,1)
+ local e = #chars == 2 and sub(chars,2,2) or s
+ local range
+ local b = balanced and s or ''
+ local n = single_line and '\n' or ''
+ if no_escape then
+ local invalid = S(e .. n .. b)
+ range = patterns.any - invalid
+ else
+ local invalid = S(e .. n .. b) + patterns.backslash
+ range = patterns.any - invalid + patterns.backslash * patterns.any
+ end
+ if balanced and s ~= e then
+ return P {
+ s * (range + V(1))^0 * e
+ }
else
- -- return char(
- -- 0xF1 + floor(n/0x1000000),
- -- 0x80 + floor(n/0x40000),
- -- 0x80 + floor(n/0x1000),
- -- 0x80 + (floor(n/0x40) % 0x40),
- -- 0x80 + (n % 0x40)
- -- )
- return "?"
+ return s * range^0 * P(e)^-1
end
end
-context.utfchar = utfchar
+function lexers.starts_line(patt) -- unchanged
+ return P ( function(input, index)
+ if index == 1 then
+ return index
+ end
+ local char = sub(input,index - 1,index - 1)
+ if char == '\n' or char == '\r' or char == '\f' then
+ return index
+ end
+ end ) * patt
+end
--- a helper from l-lpeg:
+function lexers.last_char_includes(s) -- unchanged
+ s = '[' .. gsub(s,'[-%%%[]', '%%%1') .. ']'
+ return P ( function(input, index)
+ if index == 1 then
+ return index
+ end
+ local i = index
+ while match(sub(input,i - 1,i - 1),'[ \t\r\n\f]') do
+ i = i - 1
+ end
+ if match(sub(input,i - 1,i - 1),s) then
+ return index
+ end
+ end)
+end
-local gmatch = string.gmatch
+function lexers.nested_pair(start_chars, end_chars) -- unchanged
+ local s = start_chars
+ local e = P(end_chars)^-1
+ return P {
+ s * (patterns.any - s - end_chars + V(1))^0 * e
+ }
+end
-local function make(t)
- local p
- for k, v in next, t do
- if not p then
- if next(v) then
- p = P(k) * make(v)
- else
- p = P(k)
+local function prev_line_is_comment(prefix, text, pos, line, s) -- unchanged
+ local start = find(line,'%S')
+ if start < s and not find(line,prefix,start,true) then
+ return false
+ end
+ local p = pos - 1
+ if sub(text,p,p) == '\n' then
+ p = p - 1
+ if sub(text,p,p) == '\r' then
+ p = p - 1
+ end
+ if sub(text,p,p) ~= '\n' then
+ while p > 1 and sub(text,p - 1,p - 1) ~= '\n'
+ do p = p - 1
end
- else
- if next(v) then
- p = p + P(k) * make(v)
- else
- p = p + P(k)
+ while find(sub(text,p,p),'^[\t ]$') do
+ p = p + 1
end
+ return sub(text,p,p + #prefix - 1) == prefix
end
end
- return p
+ return false
end
-function lpeg.utfchartabletopattern(list)
- local tree = { }
- for i=1,#list do
- local t = tree
- for c in gmatch(list[i],".") do
- if not t[c] then
- t[c] = { }
- end
- t = t[c]
- end
- end
- return make(tree)
-end
-
-patterns.invisibles = lpeg.utfchartabletopattern {
- utfchar(0x00A0), -- nbsp
- utfchar(0x2000), -- enquad
- utfchar(0x2001), -- emquad
- utfchar(0x2002), -- enspace
- utfchar(0x2003), -- emspace
- utfchar(0x2004), -- threeperemspace
- utfchar(0x2005), -- fourperemspace
- utfchar(0x2006), -- sixperemspace
- utfchar(0x2007), -- figurespace
- utfchar(0x2008), -- punctuationspace
- utfchar(0x2009), -- breakablethinspace
- utfchar(0x200A), -- hairspace
- utfchar(0x200B), -- zerowidthspace
- utfchar(0x202F), -- narrownobreakspace
- utfchar(0x205F), -- math thinspace
-}
-
--- now we can make:
-
-patterns.iwordtoken = patterns.wordtoken - patterns.invisibles
-patterns.iwordpattern = patterns.iwordtoken^3
+local function next_line_is_comment(prefix, text, pos, line, s)
+ local p = find(text,'\n',pos + s)
+ if p then
+ p = p + 1
+ while find(sub(text,p,p),'^[\t ]$') do
+ p = p + 1
+ end
+ return sub(text,p,p + #prefix - 1) == prefix
+ end
+ return false
+end
--- require("themes/scite-context-theme")
+function lexers.fold_line_comments(prefix)
+ local property_int = lexers.property_int
+ return function(text, pos, line, s)
+ if property_int['fold.line.comments'] == 0 then
+ return 0
+ end
+ if s > 1 and match(line,'^%s*()') < s then
+ return 0
+ end
+ local prev_line_comment = prev_line_is_comment(prefix, text, pos, line, s)
+ local next_line_comment = next_line_is_comment(prefix, text, pos, line, s)
+ if not prev_line_comment and next_line_comment then
+ return 1
+ end
+ if prev_line_comment and not next_line_comment then
+ return -1
+ end
+ return 0
+ end
+end
--- In order to deal with some bug in additional styles (I have no cue what is
--- wrong, but additional styles get ignored and clash somehow) I just copy the
--- original lexer code ... see original for comments.
+-- done
return lexers