context/data/textadept/context/lexers/scite-context-lexer-pdf-object.lua


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136

local info = {
    version   = 1.002,
    comment   = "scintilla lpeg lexer for pdf objects",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files",
}

-- no longer used: nesting lexers with whitespace in start/stop is unreliable

local P, R, S, C, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.V

local lexer             = require("scite-context-lexer")
local context           = lexer.context
local patterns          = context.patterns

local token             = lexer.token

local pdfobjectlexer    = lexer.new("pdfobj","scite-context-lexer-pdf-object")
local whitespace        = pdfobjectlexer.whitespace

local space             = patterns.space
local spacing           = patterns.spacing
local nospacing         = patterns.nospacing
local anything          = patterns.anything
local newline           = patterns.eol
local real              = patterns.real
local cardinal          = patterns.cardinal

local lparent           = P("(")
local rparent           = P(")")
local langle            = P("<")
local rangle            = P(">")
local escape            = P("\\")
local unicodetrigger    = P("feff")

local nametoken         = 1 - space - S("<>/[]()")
local name              = P("/") * nametoken^1

local p_string          = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 }

local t_spacing         = token(whitespace, spacing)
local t_spaces          = token(whitespace, spacing)^0
local t_rest            = token("default",  nospacing) -- anything

local p_stream          = P("stream")
local p_endstream       = P("endstream")
local p_obj             = P("obj")
local p_endobj          = P("endobj")
local p_reference       = P("R")

local p_objectnumber    = patterns.cardinal
local p_comment         = P("%") * (1-S("\n\r"))^0

local t_string          = token("quote",    lparent)
                        * token("string",   p_string)
                        * token("quote",    rparent)
local t_unicode         = token("quote",    langle)
                        * token("plain",    unicodetrigger)
                        * token("string",   (1-rangle)^1)
                        * token("quote",    rangle)
local t_whatsit         = token("quote",    langle)
                        * token("string",   (1-rangle)^1)
                        * token("quote",    rangle)
local t_keyword         = token("command",  name)
local t_constant        = token("constant", name)
local t_number          = token("number",   real)
--    t_reference       = token("number",   cardinal)
--                      * t_spacing
--                      * token("number",   cardinal)
local t_reserved        = token("number",   P("true") + P("false") + P("NULL"))
local t_reference       = token("warning",  cardinal)
                        * t_spacing
                        * token("warning",  cardinal)
                        * t_spacing
                        * token("keyword",  p_reference)

local t_comment         = token("comment",  p_comment)

local t_openobject      = token("warning",  p_objectnumber * spacing)
--                         * t_spacing
                        * token("warning",  p_objectnumber * spacing)
--                         * t_spacing
                        * token("keyword",  p_obj)
local t_closeobject     = token("keyword",  p_endobj)

local t_opendictionary  = token("grouping", P("<<"))
local t_closedictionary = token("grouping", P(">>"))

local t_openarray       = token("grouping", P("["))
local t_closearray      = token("grouping", P("]"))

-- todo: comment

local t_stream          = token("keyword", p_stream)
--                      * token("default", newline * (1-newline*p_endstream*newline)^1 * newline)
--                         * token("text", (1 - p_endstream)^1)
                        * (token("text", (1 - p_endstream-spacing)^1) + t_spacing)^1
                        * token("keyword", p_endstream)

local t_dictionary      = { "dictionary",
                            dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary,
                            array      = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray,
                            whatever   = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit,
                        }

----- t_object          = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
-----                       object     = t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject,
-----                       dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary,
-----                       array      = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray,
-----                       whatever   = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit,
-----                       number     = t_number,
-----                   }

local t_object          = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg)
                            dictionary = t_dictionary.dictionary,
                            array      = t_dictionary.array,
                            whatever   = t_dictionary.whatever,
                            object     = t_openobject^-1 * t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject,
                            number     = t_number,
                        }

pdfobjectlexer._shared = {
    dictionary  = t_dictionary,
    object      = t_object,
    stream      = t_stream,
}

pdfobjectlexer._rules = {
    { "whitespace", t_spacing }, -- in fact, here we don't want whitespace as it's top level lexer work
    { "object",     t_object  },
}

pdfobjectlexer._tokenstyles = context.styleset

return pdfobjectlexer