local info = {
    version   = 1.002,
    comment   = "basics for scintilla lpeg lexer for context/metafun",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files",
    comment   = "contains copyrighted code from mitchell.att.foicica.com",

}

-- The fold and lex functions are copied and patched from original code by Mitchell (see
-- lexer.lua). All errors are mine.
--
-- I've considered making a whole copy and patch the other functions too as we need
-- an extra nesting model. However, I don't want to maintain too much. An unfortunate
-- change in 3.03 is that no longer a script can be specified. This means that instead
-- of loading the extensions via the properties file, we now need to load them in our
-- own lexers, unless of course we replace lexer.lua completely (which adds another
-- installation issue).
--
-- Another change has been that _LEXERHOME is no longer available. It looks like more and
-- more functionality gets dropped so maybe at some point we need to ship our own dll/so
-- files. For instance, I'd like to have access to the current filename and other scite
-- properties. For instance, we could cache some info with each file, if only we had
-- knowledge of what file we're dealing with.
--
-- For huge files folding can be pretty slow and I do have some large ones that I keep
-- open all the time. Loading is normally no ussue, unless one has remembered the status
-- and the cursor is at the last line of a 200K line file. Optimizing the fold function
-- brought down loading of char-def.lua from 14 sec => 8 sec. Replacing the word_match
-- function and optimizing the lex function gained another 2+ seconds. A 6 second load
-- is quite ok for me.
--
-- When the lexer path is copied to the textadept lexer path, and the theme definition to
-- theme path (as lexer.lua), the lexer works there as well. When I have time and motive
-- I will make a proper setup file to tune the look and feel a bit and associate suffixes
-- with the context lexer. The textadept editor has a nice style tracing option but lacks
-- the tabs for selecting files that scite has. It also has no integrated run that pipes
-- to the log pane (I wonder if it could borrow code from the console2 project). Interesting
-- is that the jit version of textadept crashes on lexing large files (and does not feel
-- faster either).
--
-- Function load(lexer_name) starts with _M.WHITESPACE = lexer_name..'_whitespace' which
-- means that we need to have it frozen at the moment we load another lexer. Because spacing
-- is used to revert to a parent lexer we need to make sure that we load children as late
-- as possible in order not to get the wrong whitespace trigger. This took me quite a while
-- to figure out (not being that familiar with the internals). The lex and fold functions
-- have been optimized. It is a pitty that there is no proper print available. Another thing
-- needed is a default style in ourown theme style definition, as otherwise we get wrong
-- nested lexers, especially if they are larger than a view. This is the hardest part of
-- getting things right.
--
-- Eventually it might be safer to copy the other methods from lexer.lua here as well so
-- that we have no dependencies, apart from the c library (for which at some point the api
-- will be stable I guess).
--
-- It's a pitty that there is no scintillua library for the OSX version of scite. Even
-- better would be to have the scintillua library as integral part of scite as that way I
-- could use OSX alongside windows and linux (depending on needs). Also nice would be to
-- have a proper interface to scite then because currently the lexer is rather isolated and the
-- lua version does not provide all standard libraries. It would also be good to have lpeg
-- support in the regular scite lua extension (currently you need to pick it up from someplace
-- else).

local lpeg = require 'lpeg'

local R, P, S, C, V, Cp, Cs, Ct, Cmt, Cc, Cf, Cg = lpeg.R, lpeg.P, lpeg.S, lpeg.C, lpeg.V, lpeg.Cp, lpeg.Cs, lpeg.Ct, lpeg.Cmt, lpeg.Cc, lpeg.Cf, lpeg.Cg
local lpegmatch = lpeg.match
local find, gmatch, match, lower, upper, gsub = string.find, string.gmatch, string.match, string.lower, string.upper, string.gsub
local concat = table.concat
local global = _G
local type, next, setmetatable, rawset = type, next, setmetatable, rawset

if lexer then
    -- in recent c++ code the lexername and loading is hard coded
elseif _LEXERHOME then
    dofile(_LEXERHOME .. '/lexer.lua') -- pre 3.03 situation
else
    dofile('lexer.lua') -- whatever
end

lexer.context    = lexer.context or { }
local context    = lexer.context

context.patterns = context.patterns or { }
local patterns   = context.patterns

lexer._CONTEXTEXTENSIONS = true

local locations = {
 -- lexer.context.path,
   "data", -- optional data directory
   "..",   -- regular scite directory
}

local function collect(name)
--  local definitions = loadfile(name .. ".luc") or loadfile(name .. ".lua")
    local okay, definitions = pcall(function () return require(name) end)
    if okay then
        if type(definitions) == "function" then
            definitions = definitions()
        end
        if type(definitions) == "table" then
            return definitions
        end
    end
end

function context.loaddefinitions(name)
    for i=1,#locations do
        local data = collect(locations[i] .. "/" .. name)
        if data then
            return data
        end
    end
end

-- maybe more efficient:

function context.word_match(words,word_chars,case_insensitive)
    local chars = '%w_' -- maybe just "" when word_chars
    if word_chars then
        chars = '^([' .. chars .. gsub(word_chars,'([%^%]%-])', '%%%1') ..']+)'
    else
        chars = '^([' .. chars ..']+)'
    end
    if case_insensitive then
        local word_list = { }
        for i=1,#words do
            word_list[lower(words[i])] = true
        end
        return P(function(input, index)
            local s, e, word = find(input,chars,index)
            return word and word_list[lower(word)] and e + 1 or nil
        end)
    else
        local word_list = { }
        for i=1,#words do
            word_list[words[i]] = true
        end
        return P(function(input, index)
            local s, e, word = find(input,chars,index)
            return word and word_list[word] and e + 1 or nil
        end)
    end
end

local idtoken = R("az","AZ","\127\255","__")
local digit   = R("09")
local sign    = S("+-")
local period  = P(".")
local space   = S(" \n\r\t\f\v")

patterns.idtoken  = idtoken

patterns.digit    = digit
patterns.sign     = sign
patterns.period   = period

patterns.cardinal = digit^1
patterns.integer  = sign^-1 * digit^1

patterns.real     =
    sign^-1 * (                    -- at most one
        digit^1 * period * digit^0 -- 10.0 10.
      + digit^0 * period * digit^1 -- 0.10 .10
      + digit^1                    -- 10
   )

patterns.restofline = (1-S("\n\r"))^1
patterns.space      = space
patterns.spacing    = space^1
patterns.nospacing  = (1-space)^1
patterns.anything   = P(1)

function context.exact_match(words,word_chars,case_insensitive)
    local characters = concat(words)
    local pattern -- the concat catches _ etc
    if word_chars == true or word_chars == false or word_chars == nil then
        word_chars = ""
    end
    if type(word_chars) == "string" then
        pattern = S(characters) + idtoken
        if case_insensitive then
            pattern = pattern + S(upper(characters)) + S(lower(characters))
        end
        if word_chars ~= "" then
            pattern = pattern + S(word_chars)
        end
    elseif word_chars then
        pattern = word_chars
    end
    if case_insensitive then
        local list = { }
        for i=1,#words do
            list[lower(words[i])] = true
        end
        return Cmt(pattern^1, function(_,i,s)
            return list[lower(s)] -- and i or nil
        end)
    else
        local list = { }
        for i=1,#words do
            list[words[i]] = true
        end
        return Cmt(pattern^1, function(_,i,s)
            return list[s] -- and i or nil
        end)
    end
end

-- spell checking (we can only load lua files)
--
-- return {
--     min = 3,
--     max = 40,
--     n = 12345,
--     words = {
--         ["someword"]    = "someword",
--         ["anotherword"] = "Anotherword",
--     },
-- }

local lists = { }

function context.setwordlist(tag,limit) -- returns hash (lowercase keys and original values)
    if not tag or tag == "" then
        return false, 3
    end
    local list = lists[tag]
    if not list then
        list = context.loaddefinitions("spell-" .. tag)
        if not list or type(list) ~= "table" then
            list = { words = false, min = 3 }
        else
            list.words = list.words or false
            list.min   = list.min or 3
        end
        lists[tag] = list
    end
    return list.words, list.min
end

patterns.wordtoken   = R("az","AZ","\127\255")
patterns.wordpattern = patterns.wordtoken^3 -- todo: if limit and #s < limit then

function context.checkedword(validwords,validminimum,s,i) -- ,limit
    if not validwords then -- or #s < validminimum then
        return true, { "text", i } -- { "default", i }
    else
        -- keys are lower
        local word = validwords[s]
        if word == s then
            return true, { "okay", i } -- exact match
        elseif word then
            return true, { "warning", i } -- case issue
        else
            local word = validwords[lower(s)]
            if word == s then
                return true, { "okay", i } -- exact match
            elseif word then
                return true, { "warning", i } -- case issue
            elseif upper(s) == s then
                return true, { "warning", i } -- probably a logo or acronym
            else
                return true, { "error", i }
            end
        end
    end
end

function context.styleofword(validwords,validminimum,s) -- ,limit
    if not validwords or #s < validminimum then
        return "text"
    else
        -- keys are lower
        local word = validwords[s]
        if word == s then
            return "okay" -- exact match
        elseif word then
            return "warning" -- case issue
        else
            local word = validwords[lower(s)]
            if word == s then
                return "okay" -- exact match
            elseif word then
                return "warning" -- case issue
            elseif upper(s) == s then
                return "warning" -- probably a logo or acronym
            else
                return "error"
            end
        end
    end
end

-- overloaded functions

local FOLD_BASE         = SC_FOLDLEVELBASE
local FOLD_HEADER       = SC_FOLDLEVELHEADERFLAG
local FOLD_BLANK        = SC_FOLDLEVELWHITEFLAG

local get_style_at      = GetStyleAt
local get_property      = GetProperty
local get_indent_amount = GetIndentAmount

local h_table, b_table, n_table = { }, { }, { }

setmetatable(h_table, { __index = function(t,level) local v = { level, FOLD_HEADER } t[level] = v return v end })
setmetatable(b_table, { __index = function(t,level) local v = { level, FOLD_BLANK  } t[level] = v return v end })
setmetatable(n_table, { __index = function(t,level) local v = { level              } t[level] = v return v end })

-- local newline    = P("\r\n") + S("\r\n")
-- local splitlines = Ct( ( Ct ( (Cp() * Cs((1-newline)^1) * newline^-1) + (Cp() * Cc("") * newline) ) )^0)
--
-- local lines = lpegmatch(splitlines,text) -- iterating over lines is faster
-- for i=1, #lines do
--     local li = lines[i]
--     local line = li[2]
--     if line ~= "" then
--         local pos = li[1]
--         for i=1,nofpatterns do
--             for s, m in gmatch(line,patterns[i]) do
--                 if hash[m] then
--                     local symbols = fold_symbols[get_style_at(start_pos + pos + s - 1)]
--                     if symbols then
--                         local l = symbols[m]
--                         if l then
--                             local t = type(l)
--                             if t == 'number' then
--                                 current_level = current_level + l
--                             elseif t == 'function' then
--                                 current_level = current_level + l(text, pos, line, s, match)
--                             end
--                             if current_level < FOLD_BASE then -- integrate in previous
--                                 current_level = FOLD_BASE
--                             end
--                         end
--                     end
--                 end
--             end
--         end
--         if current_level > prev_level then
--             folds[line_num] = h_table[prev_level] -- { prev_level, FOLD_HEADER }
--         else
--             folds[line_num] = n_table[prev_level] -- { prev_level }
--         end
--         prev_level = current_level
--     else
--         folds[line_num] = b_table[prev_level] -- { prev_level, FOLD_BLANK }
--     end
--     line_num = line_num + 1
-- end

local newline = P("\r\n") + S("\r\n")
local p_yes   = Cp() * Cs((1-newline)^1) * newline^-1
local p_nop   = newline

local function fold_by_parsing(text,start_pos,start_line,start_level,lexer)
    local foldsymbols = lexer._foldsymbols
    if not foldsymbols then
        return { }
    end
    local patterns = foldsymbols._patterns
    if not patterns then
        return { }
    end
    local nofpatterns = #patterns
    if nofpatterns == 0 then
        return { }
    end
    local folds = { }
    local line_num = start_line
    local prev_level = start_level
    local current_level = prev_level
    local validmatches = foldsymbols._validmatches
    if not validmatches then
        validmatches = { }
        for symbol, matches in next, foldsymbols do -- whatever = { start = 1, stop = -1 }
            if not find(symbol,"^_") then -- brrr
                for s, _ in next, matches do
                    validmatches[s] = true
                end
            end
        end
        foldsymbols._validmatches = validmatches
    end
    local function action_y(pos,line) -- we can consider moving the local functions outside (drawback: folds is kept)
        for i=1,nofpatterns do
            for s, m in gmatch(line,patterns[i]) do
                if validmatches[m] then
                    local symbols = foldsymbols[get_style_at(start_pos + pos + s - 1)]
                    if symbols then
                        local action = symbols[m]
                        if action then
                            if type(action) == 'number' then -- we could store this in validmatches if there was only one symbol category
                                current_level = current_level + action
                            else
                                current_level = current_level + action(text,pos,line,s,m)
                            end
                            if current_level < FOLD_BASE then
                                current_level = FOLD_BASE
                            end
                        end
                    end
                end
            end
        end
        if current_level > prev_level then
            folds[line_num] = h_table[prev_level] -- { prev_level, FOLD_HEADER }
        else
            folds[line_num] = n_table[prev_level] -- { prev_level }
        end
        prev_level = current_level
        line_num = line_num + 1
    end
    local function action_n()
        folds[line_num] = b_table[prev_level] -- { prev_level, FOLD_BLANK }
        line_num = line_num + 1
    end
    if lexer._reset_parser then
        lexer._reset_parser()
    end
    local lpegpattern = (p_yes/action_y + p_nop/action_n)^0 -- not too efficient but indirect function calls are neither but
    lpegmatch(lpegpattern,text)                             -- keys are not pressed that fast ... large files are slow anyway
    return folds
end

local function fold_by_indentation(text,start_pos,start_line,start_level)
    local folds = { }
    local current_line = start_line
    local prev_level = start_level
    for _, line in gmatch(text,'([\t ]*)(.-)\r?\n') do
        if line ~= "" then
            local current_level = FOLD_BASE + get_indent_amount(current_line)
            if current_level > prev_level then -- next level
                local i = current_line - 1
                while true do
                    local f = folds[i]
                    if f and f[2] == FOLD_BLANK then
                        i = i - 1
                    else
                        break
                    end
                end
                local f = folds[i]
                if f then
                    f[2] = FOLD_HEADER
                end -- low indent
                folds[current_line] = n_table[current_level] -- { current_level } -- high indent
            elseif current_level < prev_level then -- prev level
                local f = folds[current_line - 1]
                if f then
                    f[1] = prev_level -- high indent
                end
                folds[current_line] = n_table[current_level] -- { current_level } -- low indent
            else -- same level
                folds[current_line] = n_table[prev_level] -- { prev_level }
            end
            prev_level = current_level
        else
            folds[current_line] = b_table[prev_level] -- { prev_level, FOLD_BLANK }
        end
        current_line = current_line + 1
    end
    return folds
end

local function fold_by_line(text,start_pos,start_line,start_level)
    local folds = { }
    for _ in gmatch(text,".-\r?\n") do
        folds[start_line] = n_table[start_level] -- { start_level }
        start_line = start_line + 1
    end
    return folds
end

local threshold_by_lexer       =  512 * 1024 -- we don't know the filesize yet
local threshold_by_parsing     =  512 * 1024 -- we don't know the filesize yet
local threshold_by_indentation =  512 * 1024 -- we don't know the filesize yet
local threshold_by_line        =  512 * 1024 -- we don't know the filesize yet

function context.fold(text,start_pos,start_line,start_level) -- hm, we had size thresholds .. where did they go
    if text == '' then
        return { }
    end
    local lexer = global._LEXER
    local fold_by_lexer = lexer._fold
    local fold_by_symbols = lexer._foldsymbols
    local filesize = 0 -- we don't know that
    if fold_by_lexer then
        if filesize <= threshold_by_lexer then
            return fold_by_lexer(text,start_pos,start_line,start_level,lexer)
        end
    elseif fold_by_symbols and get_property('fold.by.parsing',1) > 0 then
        if filesize <= threshold_by_parsing then
            return fold_by_parsing(text,start_pos,start_line,start_level,lexer)
        end
    elseif get_property('fold.by.indentation',1) > 0 then
        if filesize <= threshold_by_indentation then
            return fold_by_indentation(text,start_pos,start_line,start_level,lexer)
        end
    elseif get_property('fold.by.line',1) > 0 then
        if filesize <= threshold_by_line then
            return fold_by_line(text,start_pos,start_line,start_level,lexer)
        end
    end
    return { }
end

-- The following code is mostly unchanged:

local function add_rule(lexer, id, rule)
    if not lexer._RULES then
        lexer._RULES = {}
        lexer._RULEORDER = {}
    end
    lexer._RULES[id] = rule
    lexer._RULEORDER[#lexer._RULEORDER + 1] = id
end

local function add_style(lexer, token_name, style)
    local len = lexer._STYLES.len
    if len == 32 then
        len = len + 8
    end
    if len >= 128 then
        print('Too many styles defined (128 MAX)')
    end
    lexer._TOKENS[token_name] = len
    lexer._STYLES[len] = style
    lexer._STYLES.len = len + 1
end

local function join_tokens(lexer)
    local patterns, order = lexer._RULES, lexer._RULEORDER
    local token_rule = patterns[order[1]]
    for i=2,#order do
        token_rule = token_rule + patterns[order[i]]
    end
    lexer._TOKENRULE = token_rule
    return lexer._TOKENRULE
end

local function add_lexer(grammar, lexer, token_rule)
    local token_rule = join_tokens(lexer)
    local lexer_name = lexer._NAME
    local children = lexer._CHILDREN
    for i=1,#children do
        local child = children[i]
        if child._CHILDREN then
            add_lexer(grammar, child)
        end
        local child_name = child._NAME
        local rules = child._EMBEDDEDRULES[lexer_name]
        local rules_token_rule = grammar['__'..child_name] or rules.token_rule
        grammar[child_name] = (-rules.end_rule * rules_token_rule)^0 * rules.end_rule^-1 * V(lexer_name)
        local embedded_child = '_' .. child_name
        grammar[embedded_child] = rules.start_rule * (-rules.end_rule * rules_token_rule)^0 * rules.end_rule^-1
        token_rule = V(embedded_child) + token_rule
    end
    grammar['__' .. lexer_name] = token_rule
    grammar[lexer_name] = token_rule^0
end

local function build_grammar(lexer, initial_rule)
    local children = lexer._CHILDREN
    if children then
        local lexer_name = lexer._NAME
        if not initial_rule then
            initial_rule = lexer_name
        end
        local grammar = { initial_rule }
        add_lexer(grammar, lexer)
        lexer._INITIALRULE = initial_rule
        lexer._GRAMMAR = Ct(P(grammar))
    else
        lexer._GRAMMAR = Ct(join_tokens(lexer)^0)
    end
end

-- so far. We need these local functions in the next one.

function context.lex(text,init_style)
    local lexer = global._LEXER
    local grammar = lexer._GRAMMAR
    if not grammar then
        return { }
    elseif lexer._LEXBYLINE then -- we could keep token
        local tokens = { }
        local offset = 0
        local noftokens = 0
        if true then
            for line in gmatch(text,'[^\r\n]*\r?\n?') do -- could be an lpeg
                local line_tokens = lpegmatch(grammar,line)
                if line_tokens then
                    for i=1,#line_tokens do
                        local token = line_tokens[i]
                        token[2] = token[2] + offset
                        noftokens = noftokens + 1
                        tokens[noftokens] = token
                    end
                end
                offset = offset + #line
                if noftokens > 0 and tokens[noftokens][2] ~= offset then
                    noftokens = noftokens + 1
                    tokens[noftokens] = { 'default', offset + 1 }
                end
            end
        else -- alternative
            local lasttoken, lastoffset
            for line in gmatch(text,'[^\r\n]*\r?\n?') do -- could be an lpeg
                local line_tokens = lpegmatch(grammar,line)
                if line_tokens then
                    for i=1,#line_tokens do
                        lasttoken = line_tokens[i]
                        lastoffset = lasttoken[2] + offset
                        lasttoken[2] = lastoffset
                        noftokens = noftokens + 1
                        tokens[noftokens] = lasttoken
                    end
                end
                offset = offset + #line
                if lastoffset ~= offset then
                    lastoffset = offset + 1
                    lasttoken = { 'default', lastoffset }
                    noftokens = noftokens + 1
                    tokens[noftokens] = lasttoken
                end
            end
        end
        return tokens
    elseif lexer._CHILDREN then
        -- as we cannot print, tracing is not possible ... this might change as we can as well
        -- generate them all in one go (sharing as much as possible)
        local _hash = lexer._HASH
        if not hash then
            hash = { }
            lexer._HASH = hash
        end
        grammar = hash[init_style]
        if grammar then
            lexer._GRAMMAR = grammar
        else
            for style, style_num in next, lexer._TOKENS do
                if style_num == init_style then
                    -- the name of the lexers is filtered from the whitespace
                    -- specification
                    local lexer_name = match(style,'^(.+)_whitespace') or lexer._NAME
                    if lexer._INITIALRULE ~= lexer_name then
                        grammar = hash[lexer_name]
                        if not grammar then
                            build_grammar(lexer,lexer_name)
                            grammar = lexer._GRAMMAR
                            hash[lexer_name] = grammar
                        end
                    end
                    break
                end
            end
            grammar = grammar or lexer._GRAMMAR
            hash[init_style] = grammar
        end
        return lpegmatch(grammar,text)
    else
        return lpegmatch(grammar,text)
    end
end

-- todo: keywords: one lookup and multiple matches

function context.token(name, patt)
    return Ct(patt * Cc(name) * Cp())
end

lexer.fold        = context.fold
lexer.lex         = context.lex
lexer.token       = context.token
lexer.exact_match = context.exact_match

-- helper .. alas ... the lexer's lua instance is rather crippled .. not even
-- math is part of it

local floor = math and math.floor
local char  = string.char

if not floor then

    floor = function(n)
        return tonumber(string.format("%d",n))
    end

    math = math or { }

    math.floor = floor

end

local function utfchar(n)
    if n < 0x80 then
        return char(n)
    elseif n < 0x800 then
        return char(
            0xC0 + floor(n/0x40),
            0x80 + (n % 0x40)
        )
    elseif n < 0x10000 then
        return char(
            0xE0 + floor(n/0x1000),
            0x80 + (floor(n/0x40) % 0x40),
            0x80 + (n % 0x40)
        )
    elseif n < 0x40000 then
        return char(
            0xF0 + floor(n/0x40000),
            0x80 + floor(n/0x1000),
            0x80 + (floor(n/0x40) % 0x40),
            0x80 + (n % 0x40)
        )
    else
     -- return char(
     --     0xF1 + floor(n/0x1000000),
     --     0x80 + floor(n/0x40000),
     --     0x80 + floor(n/0x1000),
     --     0x80 + (floor(n/0x40) % 0x40),
     --     0x80 + (n % 0x40)
     -- )
        return "?"
    end
end

context.utfchar = utfchar

-- a helper from l-lpeg:

local gmatch = string.gmatch

local function make(t)
    local p
    for k, v in next, t do
        if not p then
            if next(v) then
                p = P(k) * make(v)
            else
                p = P(k)
            end
        else
            if next(v) then
                p = p + P(k) * make(v)
            else
                p = p + P(k)
            end
        end
    end
    return p
end

function lpeg.utfchartabletopattern(list)
    local tree = { }
    for i=1,#list do
        local t = tree
        for c in gmatch(list[i],".") do
            if not t[c] then
                t[c] = { }
            end
            t = t[c]
        end
    end
    return make(tree)
end

-- patterns.invisibles =
--     P(utfchar(0x00A0)) -- nbsp
--   + P(utfchar(0x2000)) -- enquad
--   + P(utfchar(0x2001)) -- emquad
--   + P(utfchar(0x2002)) -- enspace
--   + P(utfchar(0x2003)) -- emspace
--   + P(utfchar(0x2004)) -- threeperemspace
--   + P(utfchar(0x2005)) -- fourperemspace
--   + P(utfchar(0x2006)) -- sixperemspace
--   + P(utfchar(0x2007)) -- figurespace
--   + P(utfchar(0x2008)) -- punctuationspace
--   + P(utfchar(0x2009)) -- breakablethinspace
--   + P(utfchar(0x200A)) -- hairspace
--   + P(utfchar(0x200B)) -- zerowidthspace
--   + P(utfchar(0x202F)) -- narrownobreakspace
--   + P(utfchar(0x205F)) -- math thinspace

patterns.invisibles = lpeg.utfchartabletopattern {
    utfchar(0x00A0), -- nbsp
    utfchar(0x2000), -- enquad
    utfchar(0x2001), -- emquad
    utfchar(0x2002), -- enspace
    utfchar(0x2003), -- emspace
    utfchar(0x2004), -- threeperemspace
    utfchar(0x2005), -- fourperemspace
    utfchar(0x2006), -- sixperemspace
    utfchar(0x2007), -- figurespace
    utfchar(0x2008), -- punctuationspace
    utfchar(0x2009), -- breakablethinspace
    utfchar(0x200A), -- hairspace
    utfchar(0x200B), -- zerowidthspace
    utfchar(0x202F), -- narrownobreakspace
    utfchar(0x205F), -- math thinspace
}

-- now we can make:

patterns.iwordtoken   = patterns.wordtoken - patterns.invisibles
patterns.iwordpattern = patterns.iwordtoken^3

-- require("themes/scite-context-theme")

-- In order to deal with some bug in additional styles (I have no cue what is
-- wrong, but additional styles get ignored and clash somehow) I just copy the
-- original lexer code ... see original for comments.