summaryrefslogtreecommitdiff
path: root/context/data/textadept/context/lexers/scite-context-lexer.lua
diff options
context:
space:
mode:
Diffstat (limited to 'context/data/textadept/context/lexers/scite-context-lexer.lua')
-rw-r--r--context/data/textadept/context/lexers/scite-context-lexer.lua458
1 files changed, 281 insertions, 177 deletions
diff --git a/context/data/textadept/context/lexers/scite-context-lexer.lua b/context/data/textadept/context/lexers/scite-context-lexer.lua
index e526d5045..37f236a89 100644
--- a/context/data/textadept/context/lexers/scite-context-lexer.lua
+++ b/context/data/textadept/context/lexers/scite-context-lexer.lua
@@ -8,11 +8,6 @@ local info = {
}
--- todo: hook into context resolver etc
--- todo: only old api in lexers, rest in context subnamespace
--- todo: make sure we can run in one state .. copies or shared?
--- todo: auto-nesting
-
if lpeg.setmaxstack then lpeg.setmaxstack(1000) end
local log = false
@@ -27,169 +22,252 @@ local inspect = false -- can save some 15% (maybe easier on scintilla)
-- GET GOING
--
--- You need to copy this file over lexer.lua. In principle other lexers could work too but
--- not now. Maybe some day. All patterns will move into the patterns name space. I might do
--- the same with styles. If you run an older version of SciTE you can take one of the
--- archives. Pre 3.41 versions can just be copied to the right path, as there we still use
--- part of the normal lexer.
+-- You need to copy this file over lexer.lua. In principle other lexers could work
+-- too but not now. Maybe some day. All patterns will move into the patterns name
+-- space. I might do the same with styles. If you run an older version of SciTE you
+-- can take one of the archives. Pre 3.41 versions can just be copied to the right
+-- path, as there we still use part of the normal lexer. Below we mention some
+-- issues with different versions of SciTE. We try to keep up with changes but best
+-- check careful if the version that yuou install works as expected because SciTE
+-- and the scintillua dll need to be in sync.
--
-- REMARK
--
--- We started using lpeg lexing as soon as it came available. Because we had rather demanding
--- files and also wanted to use nested lexers, we ended up with our own variant. At least at
--- that time this was more robust and also faster (as we have some pretty large lua data files
--- and also work with large xml files). As a consequence successive versions had to be adapted
--- to changes in the (at that time still unstable) api. In addition to lexing we also have
--- spell checking and such. Around version 3.60 things became more stable so I don't expect to
--- change much.
+-- We started using lpeg lexing as soon as it came available. Because we had rather
+-- demanding files and also wanted to use nested lexers, we ended up with our own
+-- variant. At least at that time this was more robust and also much faster (as we
+-- have some pretty large Lua data files and also work with large xml files). As a
+-- consequence successive versions had to be adapted to changes in the (at that time
+-- still unstable) api. In addition to lexing we also have spell checking and such.
+-- Around version 3.60 things became more stable so I don't expect to change much.
+--
+-- LEXING
--
--- STATUS
+-- When pc's showed up we wrote our own editor (texedit) in MODULA 2. It was fast,
+-- had multiple overlapping (text) windows, could run in the at most 1M memory at
+-- that time, etc. The realtime file browsing with lexing that we had at that time
+-- is still on my current wish list. The color scheme and logic that we used related
+-- to the logic behind the ConTeXt user interface that evolved.
--
--- todo: maybe use a special stripped version of the dll (stable api) and add a bit more
--- interfacing to scintilla
--- todo: investigate if we can use the already built in lua instance so that we can combine the
--- power of lexign with extensions
--- todo: play with hotspot and other properties (but no real need now)
--- todo: maybe come up with an extension to the api subsystem
--- todo: add proper tracing and so .. not too hard as we can run on mtxrun, but we lack a console
--- for debugging (ok, chicken-egg as lexers probably need to be loaded before a console can
--- kick in)
--- todo: get rid of these lexers.STYLE_XX and lexers.XX (hide such details)
+-- Later I rewrote the editor in perl/tk. I don't like the perl syntax but tk
+-- widgets are very powerful and hard to beat. In fact, TextAdept reminds me of
+-- that: wrap your own interface around a framework (tk had an edit control that one
+-- could control completely not that different from scintilla). Last time I checked
+-- it still ran fine so I might try to implement something like its file handling in
+-- TextAdept.
--
--- wish: access to all scite properties and in fact integrate in scite
+-- In the end I settled for SciTE for which I wrote TeX and MetaPost lexers that
+-- could handle keyword sets. With respect to lexing (syntax highlighting) ConTeXt
+-- has a long history, if only because we need it for manuals. Anyway, in the end we
+-- arrived at lpeg based lexing (which is quite natural as we have lots of lpeg
+-- usage in ConTeXt). The basic color schemes haven't changed much. The most
+-- prominent differences are the nested lexers.
--
+-- In the meantime I made the lexer suitable for typesetting sources which was no
+-- big deal as we already had that in place (ConTeXt used lpeg from the day it
+-- showed up so we have several lexing options there too).
--
--- In the meantime I made the lexer suitable for typesetting sources which was no big deal as we
--- already had that in place (ConTeXt used lpeg from the day it showed up so we have several lexing
--- options there too).
+-- Keep in mind that in ConTeXt (typesetting) lexing can follow several approached:
+-- line based (which is handy for verbatim mode), syntax mode (which is nice for
+-- tutorials), and tolerant mode (so that one can also show bad examples or errors).
+-- These demands can clash.
--
-- HISTORY
--
--- The fold and lex functions are copied and patched from original code by Mitchell (see lexer.lua).
--- All errors are mine. The ability to use lpeg in scintilla is a real nice addition and a brilliant
--- move. The code is a byproduct of the (mainly Lua based) textadept (at the time I ran into it was
--- a rapidly moving target so I decided to stick ot SciTE). When I played with it, it had no realtime
--- output pane but that seems to be dealt with now (2017). I need to have a look at it in more detail
--- but a first test again mad the output hang and it was a bit slow too (and I also want the log pane
--- as scite has it, on the right, in view). So, for now I stick to SciTE even when it's somewhat
--- crippled by the fact that we cannot hook our own (language dependent) lexer into the output pane
--- (somehow the errorlist lexer is hard coded into the editor). Hopefully that will change some day.
--- So, how did we arrive where we're now.
+-- The remarks below are more for myself so that I keep track of changes in the
+-- way we adapt to the changes in the scintillua and scite.
+--
+-- The fold and lex functions are copied and patched from original code by Mitchell
+-- (see lexer.lua) in the scintillua distribution. So whatever I say below, assume
+-- that all errors are mine. The ability to use lpeg in scintilla is a real nice
+-- addition and a brilliant move. The code is a byproduct of the (mainly Lua based)
+-- TextAdept which at the time I ran into it was a rapidly moving target so I
+-- decided to stick ot SciTE. When I played with it, it had no realtime output pane
+-- although that seems to be dealt with now (2017). I need to have a look at it in
+-- more detail but a first test again made the output hang and it was a bit slow too
+-- (and I also want the log pane as SciTE has it, on the right, in view). So, for
+-- now I stick to SciTE even when it's somewhat crippled by the fact that we cannot
+-- hook our own (language dependent) lexer into the output pane (somehow the
+-- errorlist lexer is hard coded into the editor). Hopefully that will change some
+-- day. The ConTeXt distribution has cmd runner for textdept that will plug in the
+-- lexers discussed here as well as a dedicated runner. Considere it an experiment.
--
--- Starting with SciTE version 3.20 there is an issue with coloring. As we still lack a connection
--- with SciTE itself (properties as well as printing to the log pane) and we cannot trace this (on
--- windows). As far as I can see, there are no fundamental changes in lexer.lua or LexLPeg.cxx so it
--- must be in Scintilla itself. So for the moment I stick to 3.10. Indicators are: no lexing of 'next'
--- and 'goto <label>' in the Lua lexer and no brace highlighting either. Interesting is that it does
--- work ok in the cld lexer (so the Lua code is okay). All seems to be ok again in later versions,
--- so, when you update best check first and just switch back to an older version as normally a SciTE
--- update is not critital. When char-def.lua lexes real fast this is a signal that the lexer quits
--- somewhere halfway. Maybe there are some hard coded limitations on the amount of styles and/or
--- length of names.
+-- The basic code hasn't changed much but we had to adapt a few times to changes in
+-- the api and/or work around bugs. Starting with SciTE version 3.20 there was an
+-- issue with coloring. We still lacked a connection with SciTE itself (properties
+-- as well as printing to the log pane) and we could not trace this (on windows).
+-- However on unix we can see messages! As far as I can see, there are no
+-- fundamental changes in lexer.lua or LexLPeg.cxx so it must be/have been in
+-- Scintilla itself. So we went back to 3.10. Indicators of issues are: no lexing of
+-- 'next' and 'goto <label>' in the Lua lexer and no brace highlighting either.
+-- Interesting is that it does work ok in the cld lexer (so the Lua code is okay).
+-- All seems to be ok again in later versions, so, when you update best check first
+-- and just switch back to an older version as normally a SciTE update is not
+-- critital. When char-def.lua lexes real fast this is a signal that the lexer quits
+-- somewhere halfway. Maybe there are some hard coded limitations on the amount of
+-- styles and/or length of names.
--
--- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay again. So, this
--- version assumes 3.24 or higher. In 3.24 we have a different token result, i.e. no longer a { tag,
--- pattern } but just two return values. I didn't check other changes but will do that when I run into
--- issues. I had optimized these small tables by hashing which was more efficient but this is no longer
--- needed. For the moment we keep some of that code around as I don't know what happens in future
--- versions. I'm anyway still happy with this kind of lexing.
+-- Anyway, after checking 3.24 and adapting to the new lexer tables things are okay
+-- again. So, this version assumes 3.24 or higher. In 3.24 we have a different token
+-- result, i.e. no longer a { tag, pattern } but just two return values. I didn't
+-- check other changes but will do that when I run into issues. I had already
+-- optimized these small tables by hashing which was much more efficient (and maybe
+-- even more efficient than the current approach) but this is no longer needed. For
+-- the moment we keep some of that code around as I don't know what happens in
+-- future versions. I'm anyway still happy with this kind of lexing.
--
--- In 3.31 another major change took place: some helper constants (maybe they're no longer constants)
--- and functions were moved into the lexer modules namespace but the functions are assigned to the Lua
--- module afterward so we cannot alias them beforehand. We're probably getting close to a stable
--- interface now. I've considered making a whole copy and patch the other functions too as we need an
--- extra nesting model. However, I don't want to maintain too much. An unfortunate change in 3.03 is
--- that no longer a script can be specified. This means that instead of loading the extensions via the
--- properties file, we now need to load them in our own lexers, unless of course we replace lexer.lua
+-- In 3.31 another major change took place: some helper constants (maybe they're no
+-- longer constants) and functions were moved into the lexer modules namespace but
+-- the functions are assigned to the Lua module afterward so we cannot alias them
+-- beforehand. We're probably getting close to a stable interface now. At that time
+-- for the first time I considered making a whole copy and patch the other functions
+-- too as we need an extra nesting model. However, I don't want to maintain too
+-- much. An unfortunate change in 3.03 is that no longer a script can be specified.
+-- This means that instead of loading the extensions via the properties file, we now
+-- need to load them in our own lexers, unless of course we replace lexer.lua
-- completely (which adds another installation issue).
--
--- Another change has been that _LEXERHOME is no longer available. It looks like more and more
--- functionality gets dropped so maybe at some point we need to ship our own dll/so files. For instance,
--- I'd like to have access to the current filename and other scite properties. We could then cache some
--- info with each file, if only we had knowledge of what file we're dealing with.
+-- Another change has been that _LEXERHOME is no longer available. It looks like
+-- more and more functionality gets dropped so maybe at some point we need to ship
+-- our own dll/so files. For instance, I'd like to have access to the current
+-- filename and other SciTE properties. We could then cache some info with each
+-- file, if only we had knowledge of what file we're dealing with. This all makes a
+-- nice installation more complex and (worse) makes it hard to share files between
+-- different editors usign s similar directory structure.
--
--- For huge files folding can be pretty slow and I do have some large ones that I keep open all the time.
--- Loading is normally no ussue, unless one has remembered the status and the cursor is at the last line
--- of a 200K line file. Optimizing the fold function brought down loading of char-def.lua from 14 sec
--- => 8 sec. Replacing the word_match function and optimizing the lex function gained another 2+ seconds.
--- A 6 second load is quite ok for me. The changed lexer table structure (no subtables) brings loading
--- down to a few seconds.
+-- For huge files folding can be pretty slow and I do have some large ones that I
+-- keep open all the time. Loading is normally no ussue, unless one has remembered
+-- the status and the cursor is at the last line of a 200K line file. Optimizing the
+-- fold function brought down loading of char-def.lua from 14 sec => 8 sec.
+-- Replacing the word_match function and optimizing the lex function gained another
+-- 2+ seconds. A 6 second load is quite ok for me. The changed lexer table structure
+-- (no subtables) brings loading down to a few seconds.
--
--- When the lexer path is copied to the textadept lexer path, and the theme definition to theme path
--- (as lexer.lua), the lexer works there as well. Although ... when I decided to check the state of
--- textadept i had to adapt some loader code. It's not pretty but works and also permits overloading.
--- When I have time and motive I will make a proper setup file to tune the look and feel a bit and
--- associate suffixes with the context lexer. The textadept editor has a nice style tracing option but
--- lacks the tabs for selecting files that scite has. It also has no integrated run that pipes to the
--- log pane. Interesting is that the jit version of textadept crashes on lexing large files (and does
--- not feel faster either; maybe a side effect of known limitations as we know that luajit is more
--- limited than stock lua). Btw, in the meantime on unix one can test easier as there we can enable
--- the loggers in this module.
+-- When the lexer path is copied to the TextAdept lexer path, and the theme
+-- definition to theme path (as lexer.lua), the lexer works there as well. Although
+-- ... when I decided to check the state of TextAdept I had to adapt some loader
+-- code. The solution is not pretty but works and also permits overloading. When I
+-- have time and motive I will make a proper setup file to tune the look and feel a
+-- bit more than we do now. The TextAdept editor nwo has tabs and a console so it
+-- has become more useable for me (it's still somewhat slower than SciTE).
+-- Interesting is that the jit version of TextAdept crashes on lexing large files
+-- (and does not feel faster either; maybe a side effect of known limitations as we
+-- know that Luajit is more limited than stock Lua).
--
--- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name .. '_whitespace' which means
--- that we need to have it frozen at the moment we load another lexer. Because spacing is used to revert
--- to a parent lexer we need to make sure that we load children as late as possible in order not to get
--- the wrong whitespace trigger. This took me quite a while to figure out (not being that familiar with
--- the internals). The lex and fold functions have been optimized. It is a pitty that there is no proper
--- print available. Another thing needed is a default style in our own theme style definition, as otherwise
--- we get wrong nested lexers, especially if they are larger than a view. This is the hardest part of
+-- Function load(lexer_name) starts with _lexers.WHITESPACE = lexer_name ..
+-- '_whitespace' which means that we need to have it frozen at the moment we load
+-- another lexer. Because spacing is used to revert to a parent lexer we need to
+-- make sure that we load children as late as possible in order not to get the wrong
+-- whitespace trigger. This took me quite a while to figure out (not being that
+-- familiar with the internals). The lex and fold functions have been optimized. It
+-- is a pitty that there is no proper print available. Another thing needed is a
+-- default style in our own theme style definition, as otherwise we get wrong nested
+-- lexers, especially if they are larger than a view. This is the hardest part of
-- getting things right.
--
--- It's a pitty that there is no scintillua library for the OSX version of scite. Even better would be
--- to have the scintillua library as integral part of scite as that way I could use OSX alongside
--- windows and linux (depending on needs). Also nice would be to have a proper interface to scite then
--- because currently the lexer is rather isolated and the lua version does not provide all standard
--- libraries. It would also be good to have lpeg support in the regular scite lua extension (currently
--- you need to pick it up from someplace else).
+-- It's a pitty that there is no scintillua library for the OSX version of SciTE.
+-- Even better would be to have the scintillua library as integral part of SciTE as
+-- that way I could use OSX alongside windows and linux (depending on needs). Also
+-- nice would be to have a proper interface to SciTE then because currently the
+-- lexer is rather isolated and the Lua version does not provide all standard
+-- libraries. It would also be good to have lpeg support in the regular SciTE Lua
+-- extension (currently you need to pick it up from someplace else). I keep hoping.
--
--- With 3.41 the interface changed again so it gets time to look into the C++ code and consider compiling
--- and patching myself. Loading is more complicated now as the lexer gets loaded automatically so we have
--- little control over extending the code now. After a few days trying all kind of solutions I decided to
--- follow a different approach: drop in a complete replacement. This of course means that I need to keep
--- track of even more changes (which for sure will happen) but at least I get rid of interferences. The
--- api (lexing and configuration) is simply too unstable across versions. Maybe in a few years things have
--- stabelized again. (Or maybe it's not really expected that one writes lexers at all.) A side effect is
--- that I now no longer will use shipped lexers but just the built-in ones in addition to the context
--- lpeg lexers. Not that it matters much as the context lexers cover what I need (and I can always write
--- more).
+-- With 3.41 the interface changed again so it became time to look into the C++ code
+-- and consider compiling and patching myself, something that I like to avoid.
+-- Loading is more complicated now as the lexer gets loaded automatically so we have
+-- little control over extending the code now. After a few days trying all kind of
+-- solutions I decided to follow a different approach: drop in a complete
+-- replacement. This of course means that I need to keep track of even more changes
+-- (which for sure will happen) but at least I get rid of interferences. Till 3.60
+-- the api (lexing and configuration) was simply too unstable across versions which
+-- is a pitty because we expect authors to install SciTE without hassle. Maybe in a
+-- few years things will have stabelized. Maybe it's also not really expected that
+-- one writes lexers at all. A side effect is that I now no longer will use shipped
+-- lexers for languages that I made no lexer for, but just the built-in ones in
+-- addition to the ConTeXt lpeg lexers. Not that it matters much as the ConTeXt
+-- lexers cover what I need (and I can always write more). For editing TeX files one
+-- only needs a limited set of lexers (TeX, MetaPost, Lua, BibTeX, C/W, PDF, SQL,
+-- etc). I can add more when I want.
--
--- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu which left me with an
--- incompatible SciTE and lexer library and updating was not possible due to the lack of 64 bit libraries.
--- We'll see what the future brings.
+-- In fact, the transition to 3.41 was triggered by an unfateful update of Ubuntu
+-- which left me with an incompatible SciTE and lexer library and updating was not
+-- possible due to the lack of 64 bit libraries. We'll see what the future brings.
+-- For now I can use SciTE under wine on linux. The fact that scintillua ships
+-- independently is a showstopper.
--
--- Promissing is that the library now can use another Lua instance so maybe some day it will get properly
--- in SciTE and we can use more clever scripting.
+-- Promissing is that the library now can use another Lua instance so maybe some day
+-- it will get properly in SciTE and we can use more clever scripting.
--
--- In some lexers we use embedded ones even if we could do it directly, The reason is that when the end
--- token is edited (e.g. -->), backtracking to the space before the begin token (e.g. <!--) results in
--- applying the surrounding whitespace which in turn means that when the end token is edited right,
--- backtracking doesn't go back. One solution (in the dll) would be to backtrack several space categories.
+-- In some lexers we use embedded ones even if we could do it directly, The reason
+-- is that when the end token is edited (e.g. -->), backtracking to the space before
+-- the begin token (e.g. <!--) results in applying the surrounding whitespace which
+-- in turn means that when the end token is edited right, backtracking doesn't go
+-- back. One solution (in the dll) would be to backtrack several space categories.
-- After all, lexing is quite fast (applying the result is much slower).
--
--- For some reason the first blob of text tends to go wrong (pdf and web). It would be nice to have 'whole
--- doc' initial lexing. Quite fishy as it makes it impossible to lex the first part well (for already opened
--- documents) because only a partial text is passed.
+-- For some reason the first blob of text tends to go wrong (pdf and web). It would
+-- be nice to have 'whole doc' initial lexing. Quite fishy as it makes it impossible
+-- to lex the first part well (for already opened documents) because only a partial
+-- text is passed.
--
--- So, maybe I should just write this from scratch (assuming more generic usage) because after all, the dll
--- expects just tables, based on a string. I can then also do some more aggressive resource sharing (needed
--- when used generic).
+-- So, maybe I should just write this from scratch (assuming more generic usage)
+-- because after all, the dll expects just tables, based on a string. I can then
+-- also do some more aggressive resource sharing (needed when used generic).
--
--- I think that nested lexers are still bugged (esp over longer ranges). It never was robust or maybe it's
--- simply not meant for too complex cases (well, it probably *is* tricky material). The 3.24 version was
--- probably the best so far. The fact that styles bleed between lexers even if their states are isolated is
--- an issue. Another issus is that zero characters in the text passed to the lexer can mess things up (pdf
--- files have them in streams).
+-- I think that nested lexers are still bugged (esp over longer ranges). It never
+-- was robust or maybe it's simply not meant for too complex cases (well, it
+-- probably *is* tricky material). The 3.24 version was probably the best so far.
+-- The fact that styles bleed between lexers even if their states are isolated is an
+-- issue. Another issus is that zero characters in the text passed to the lexer can
+-- mess things up (pdf files have them in streams).
--
--- For more complex 'languages', like web or xml, we need to make sure that we use e.g. 'default' for
--- spacing that makes up some construct. Ok, we then still have a backtracking issue but less.
+-- For more complex 'languages', like web or xml, we need to make sure that we use
+-- e.g. 'default' for spacing that makes up some construct. Ok, we then still have a
+-- backtracking issue but less.
--
--- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++ and we ship an ini
--- file for that editor with some installation instructions embedded.
+-- Good news for some ConTeXt users: there is now a scintillua plugin for notepad++
+-- and we ship an ini file for that editor with some installation instructions
+-- embedded. Also, TextAdept has a console so that we can run realtime. The spawner
+-- is still not perfect (sometimes hangs) but it was enough reason to spend time on
+-- making our lexer work with TextAdept and create a setup.
+--
+-- TRACING
+--
+-- The advantage is that we now can check more easily with regular Lua(TeX). We can
+-- also use wine and print to the console (somehow stdout is intercepted there.) So,
+-- I've added a bit of tracing. Interesting is to notice that each document gets its
+-- own instance which has advantages but also means that when we are spellchecking
+-- we reload the word lists each time. (In the past I assumed a shared instance and
+-- took some precautions. But I can fix this.)
--
-- TODO
--
--- I can make an export to context, but first I'll redo the code that makes the grammar,
--- as we only seem to need
+-- It would be nice if we could lods some ConTeXt Lua modules (the basic set) and
+-- then use resolvers and such.
+--
+-- The current lexer basics are still a mix between old and new. Maybe I should redo
+-- some more. This is probably easier in TextAdept than in SciTE.
+--
+-- We have to make sure we don't overload ConTeXt definitions when this code is used
+-- in ConTeXt. I still have to add some of the goodies that we have there in lexers
+-- into these.
+--
+-- Maybe I should use a special stripped on the one hand and extended version of the
+-- dll (stable api) and at least add a bit more interfacing to scintilla.
+--
+-- I need to investigate if we can use the already built in Lua instance so that we
+-- can combine the power of lexing with extensions.
+--
+-- I need to play with hotspot and other properties like indicators (whatever they
+-- are).
+--
+-- I want to get rid of these lexers.STYLE_XX and lexers.XX things. This is possible
+-- when we give up compatibility. Generalize the helpers that I wrote for SciTE so
+-- that they also can be used TextAdept.
+--
+-- I can make an export to ConTeXt, but first I'll redo the code that makes the
+-- grammar, as we only seem to need
--
-- lexer._TOKENSTYLES : table
-- lexer._CHILDREN : flag
@@ -199,38 +277,30 @@ local inspect = false -- can save some 15% (maybe easier on scintilla)
-- lexers.load : function
-- lexers.lex : function
--
--- So, if we drop compatibility with other lex definitions, we can make things simpler. Howeverm in the
--- meantime one can just do this:
+-- So, if we drop compatibility with other lex definitions, we can make things
+-- simpler. However, in the meantime one can just do this:
--
-- context --extra=listing --scite [--compact --verycompact] somefile.tex
--
--- and get a printable document. So, this todo is obsolete.
-
--- TRACING
+-- and get a printable document. So, this todo is a bit obsolete.
--
--- The advantage is that we now can check more easily with regular Lua(TeX). We can also use wine and print
--- to the console (somehow stdout is intercepted there.) So, I've added a bit of tracing. Interesting is to
--- notice that each document gets its own instance which has advantages but also means that when we are
--- spellchecking we reload the word lists each time. (In the past I assumed a shared instance and took
--- some precautions.)
-
--- todo: make sure we don't overload context definitions when used in context
+-- Properties is an ugly mess ... due to chages in the interface we're now left
+-- with some hybrid that sort of works ok
--- properties is an ugly mess ... due to chages in the interface we're now left with some hybrid
--- that sort of works ok
+-- textadept: buffer:colourise(0,-1)
local lpeg = require("lpeg")
local global = _G
-local find, gmatch, match, lower, upper, gsub, sub, format = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format
+local find, gmatch, match, lower, upper, gsub, sub, format, byte = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub, string.sub, string.format, string.byte
local concat, sort = table.concat, table.sort
local type, next, setmetatable, rawset, tonumber, tostring = type, next, setmetatable, rawset, tonumber, tostring
local R, P, S, V, C, Cp, Cs, Ct, Cmt, Cc, Cf, Cg, Carg = lpeg.R, lpeg.P, lpeg.S, lpeg.V, lpeg.C, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg, lpeg.Carg
local lpegmatch = lpeg.match
+local usage = (textadept and "textadept") or (resolvers and "context") or "scite"
local nesting = 0
-
-local print = (textadept and ui and ui.print) or print
+local print = textadept and ui and ui.print or print
local function report(fmt,str,...)
if log then
@@ -679,21 +749,34 @@ local locations = {
-- end
-- end
-local function collect(name)
- local rootlist = lexers.LEXERPATH or "."
- for root in gmatch(rootlist,"[^;]+") do
- local root = gsub(root,"/[^/]-lua$","")
- for i=1,#locations do
- local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
- if trace then
- report("attempt to locate '%s'",fullname)
- end
- local okay, result = pcall(function () return dofile(fullname) end)
- if okay then
- return result, fullname
+local collect
+
+if usage == "context" then
+
+ collect = function(name)
+ return require(name), name
+ end
+
+else
+
+ collect = function(name)
+ local rootlist = lexers.LEXERPATH or "."
+ for root in gmatch(rootlist,"[^;]+") do
+ local root = gsub(root,"/[^/]-lua$","")
+ for i=1,#locations do
+ local fullname = root .. "/" .. locations[i] .. "/" .. name .. ".lua" -- so we can also check for .luc
+ if trace then
+ report("attempt to locate '%s'",fullname)
+ end
+ local okay, result = pcall(function () return dofile(fullname) end)
+ if okay then
+ return result, fullname
+ end
end
end
+ -- return require(name), name
end
+
end
function context.loadluafile(name)
@@ -1371,25 +1454,33 @@ local function add_lexer(grammar, lexer) -- mostly the same as the original
end
local function build_grammar(lexer,initial_rule) -- same as the original
- local children = lexer._CHILDREN
+ local children = lexer._CHILDREN
local lexer_name = lexer._NAME
- if children then
+ local preamble = lexer._preamble
+ local grammar = lexer._grammar
+ if grammar then
+ -- experiment
+ elseif children then
if not initial_rule then
initial_rule = lexer_name
end
- local grammar = { initial_rule }
+ grammar = { initial_rule }
add_lexer(grammar, lexer)
lexer._INITIALRULE = initial_rule
- lexer._GRAMMAR = Ct(P(grammar))
+ grammar = Ct(P(grammar))
if trace then
report("building grammar for '%s' with whitespace '%s'and %s children",lexer_name,lexer.whitespace or "?",#children)
end
else
- lexer._GRAMMAR = Ct(join_tokens(lexer)^0)
+ grammar = Ct(join_tokens(lexer)^0)
if trace then
report("building grammar for '%s' with whitespace '%s'",lexer_name,lexer.whitespace or "?")
end
end
+ if preamble then
+ grammar = preamble^-1 * grammar
+ end
+ lexer._GRAMMAR = grammar
end
-- So far. We need these local functions in the next one.
@@ -1534,7 +1625,7 @@ function context.lex(lexer,text,init_style)
if trace then
report("lexing '%s' with initial style '%s' and %s children",lexer._NAME,#lexer._CHILDREN or 0,init_style)
end
- return matched(lexer,grammar,text)
+ return result
else
if trace then
report("lexing '%s' with initial style '%s'",lexer._NAME,init_style)
@@ -1733,7 +1824,7 @@ function context.loadlexer(filename,namespace)
lexer = load_lexer(filename,namespace) or nolexer(filename,namespace)
usedlexers[filename] = lexer
--
- if not lexer._rules and not lexer._lexer then
+ if not lexer._rules and not lexer._lexer and not lexer_grammar then
lexer._lexer = parent_lexer
end
--
@@ -1765,16 +1856,19 @@ function context.loadlexer(filename,namespace)
end
--
local _r = lexer._rules
- if _r then
+ local _g = lexer._grammar
+ if _r or _g then
local _s = lexer._tokenstyles
if _s then
for token, style in next, _s do
add_style(lexer, token, style)
end
end
- for i=1,#_r do
- local rule = _r[i]
- add_rule(lexer, rule[1], rule[2])
+ if _r then
+ for i=1,#_r do
+ local rule = _r[i]
+ add_rule(lexer, rule[1], rule[2])
+ end
end
build_grammar(lexer)
end
@@ -2001,10 +2095,20 @@ do
-- return make(tree)
-- end
- helpers.utfcharpattern = P(1) * R("\128\191")^0 -- unchecked but fast
+ local utf8next = R("\128\191")
+ local utf8one = R("\000\127")
+ local utf8two = R("\194\223") * utf8next
+ local utf8three = R("\224\239") * utf8next * utf8next
+ local utf8four = R("\240\244") * utf8next * utf8next * utf8next
+
+ helpers.utfcharpattern = P(1) * utf8next^0 -- unchecked but fast
+ helpers.utfbytepattern = utf8one / byte
+ + utf8two / function(s) local c1, c2 = byte(s,1,2) return c1 * 64 + c2 - 12416 end
+ + utf8three / function(s) local c1, c2, c3 = byte(s,1,3) return (c1 * 64 + c2) * 64 + c3 - 925824 end
+ + utf8four / function(s) local c1, c2, c3, c4 = byte(s,1,4) return ((c1 * 64 + c2) * 64 + c3) * 64 + c4 - 63447168 end
- local p_false = P(false)
- local p_true = P(true)
+ local p_false = P(false)
+ local p_true = P(true)
local function make(t)
local function making(t)