summaryrefslogtreecommitdiff
path: root/context/data/scite/lexers/scite-context-lexer-txt.lua
blob: 07dff2970f26210cb3c52f83b3efface15adc74f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
local lexer = lexer
local token = lexer.token
local P, S, Cmt = lpeg.P, lpeg.S, lpeg.Cmt
local find, match = string.find, string.match

module(...)

local textlexer   = _M

local context     = lexer.context

local whitespace  = textlexer.WHITESPACE -- triggers states

local space       = lexer.space
local any         = lexer.any

local wordtoken   = context.patterns.wordtoken
local wordpattern = context.patterns.wordpattern
local checkedword = context.checkedword
local setwordlist = context.setwordlist
local validwords  = false

-- [#!-%] language=uk

local p_preamble = Cmt(#(S("#!-%") * P(" ")), function(input,i,_) -- todo: utf bomb
    if i == 1 then -- < 10 then
        validwords = false
        local s, e, line = find(input,'^[#!%-%%](.+)[\n\r]',i)
        if line then
            local language = match(line,"language=([a-z]+)")
            if language then
                validwords = setwordlist(language)
            end
        end
    end
    return false
end)

local t_preamble =
    token('preamble', p_preamble)

local t_word =
    Cmt(wordpattern, function(_,i,s)
        if validwords then
            return checkedword(validwords,s,i)
        else
            return true, { "text", i }
        end
    end)

local t_text =
    token("default", wordtoken^1)

local t_rest =
    token("default", (1-wordtoken-space)^1)

local t_spacing =
    token(whitespace, space^1)

_rules = {
    { "whitespace", t_spacing  },
    { "preamble",   t_preamble },
    { "word",       t_word     }, -- words >= 3
    { "text",       t_text     }, -- non words
    { "rest",       t_rest     },
}

_tokenstyles = lexer.context.styleset