summaryrefslogtreecommitdiff
path: root/context/data/scite/lexers/scite-context-lexer-txt.lua
blob: 012167aeb4c06506e9f0d0e2378b50e34c5b1636 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
local info = {
    version   = 1.002,
    comment   = "scintilla lpeg lexer for plain text (with spell checking)",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files",
}

if not lexer._CONTEXTEXTENSIONS then require("scite-context-lexer") end

local lexer = lexer
local token = lexer.token
local P, S, Cmt, Cp, Ct = lpeg.P, lpeg.S, lpeg.Cmt, lpeg.Cp, lpeg.Ct
local find, match = string.find, string.match

local textlexer   = { _NAME = "txt", _FILENAME = "scite-context-lexer-txt" }
local whitespace  = lexer.WHITESPACE
local context     = lexer.context

local space       = lexer.space
local any         = lexer.any

local wordtoken   = context.patterns.wordtoken
local wordpattern = context.patterns.wordpattern
local checkedword = context.checkedword
local styleofword = context.styleofword
local setwordlist = context.setwordlist
local validwords  = false

-- local styleset    = context.newstyleset {
--     "default",
--     "text", "okay", "error", "warning",
--     "preamble",
-- }

-- [#!-%] language=uk

local p_preamble = Cmt(#(S("#!-%") * P(" ")), function(input,i,_) -- todo: utf bomb
    if i == 1 then -- < 10 then
        validwords = false
        local s, e, line = find(input,'^[#!%-%%](.+)[\n\r]',i)
        if line then
            local language = match(line,"language=([a-z]+)")
            if language then
                validwords = setwordlist(language)
            end
        end
    end
    return false
end)

local t_preamble =
    token("preamble", p_preamble)

-- local t_word =
--     Cmt(wordpattern, function(_,i,s)
--         if validwords then
--             return checkedword(validwords,s,i)
--         else
--             return true, { "text", i }
--         end
--     end)

local t_word =
    Ct( wordpattern / function(s) return styleofword(validwords,s) end * Cp() ) -- the function can be inlined

local t_text =
    token("default", wordtoken^1)

local t_rest =
    token("default", (1-wordtoken-space)^1)

local t_spacing =
    token(whitespace, space^1)

textlexer._rules = {
    { "whitespace", t_spacing  },
    { "preamble",   t_preamble },
    { "word",       t_word     }, -- words >= 3
    { "text",       t_text     }, -- non words
    { "rest",       t_rest     },
}

textlexer._tokenstyles = context.styleset

return textlexer