diff options
Diffstat (limited to 'context/data/scite/context/lexers/scite-context-lexer-pdf-object.lua')
-rw-r--r-- | context/data/scite/context/lexers/scite-context-lexer-pdf-object.lua | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/context/data/scite/context/lexers/scite-context-lexer-pdf-object.lua b/context/data/scite/context/lexers/scite-context-lexer-pdf-object.lua new file mode 100644 index 000000000..1fb95838a --- /dev/null +++ b/context/data/scite/context/lexers/scite-context-lexer-pdf-object.lua @@ -0,0 +1,136 @@ +local info = { + version = 1.002, + comment = "scintilla lpeg lexer for pdf objects", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- no longer used: nesting lexers with whitespace in start/stop is unreliable + +local P, R, S, C, V = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.V + +local lexer = require("lexer") +local context = lexer.context +local patterns = context.patterns + +local token = lexer.token + +local pdfobjectlexer = lexer.new("pdfobj","scite-context-lexer-pdf-object") +local whitespace = pdfobjectlexer.whitespace + +local space = patterns.space +local spacing = patterns.spacing +local nospacing = patterns.nospacing +local anything = patterns.anything +local newline = patterns.eol +local real = patterns.real +local cardinal = patterns.cardinal + +local lparent = P("(") +local rparent = P(")") +local langle = P("<") +local rangle = P(">") +local escape = P("\\") +local unicodetrigger = P("feff") + +local nametoken = 1 - space - S("<>/[]()") +local name = P("/") * nametoken^1 + +local p_string = P { ( escape * anything + lparent * V(1) * rparent + (1 - rparent) )^0 } + +local t_spacing = token(whitespace, spacing) +local t_spaces = token(whitespace, spacing)^0 +local t_rest = token("default", nospacing) -- anything + +local p_stream = P("stream") +local p_endstream = P("endstream") +local p_obj = P("obj") +local p_endobj = P("endobj") +local p_reference = P("R") + +local p_objectnumber = patterns.cardinal +local p_comment = P("%") * (1-S("\n\r"))^0 + +local t_string = token("quote", lparent) + * token("string", p_string) + * token("quote", rparent) +local t_unicode = token("quote", langle) + * token("plain", unicodetrigger) + * token("string", (1-rangle)^1) + * token("quote", rangle) +local t_whatsit = token("quote", langle) + * token("string", (1-rangle)^1) + * token("quote", rangle) +local t_keyword = token("command", name) +local t_constant = token("constant", name) +local t_number = token("number", real) +-- t_reference = token("number", cardinal) +-- * t_spacing +-- * token("number", cardinal) +local t_reserved = token("number", P("true") + P("false") + P("NULL")) +local t_reference = token("warning", cardinal) + * t_spacing + * token("warning", cardinal) + * t_spacing + * token("keyword", p_reference) + +local t_comment = token("comment", p_comment) + +local t_openobject = token("warning", p_objectnumber * spacing) +-- * t_spacing + * token("warning", p_objectnumber * spacing) +-- * t_spacing + * token("keyword", p_obj) +local t_closeobject = token("keyword", p_endobj) + +local t_opendictionary = token("grouping", P("<<")) +local t_closedictionary = token("grouping", P(">>")) + +local t_openarray = token("grouping", P("[")) +local t_closearray = token("grouping", P("]")) + +-- todo: comment + +local t_stream = token("keyword", p_stream) +-- * token("default", newline * (1-newline*p_endstream*newline)^1 * newline) +-- * token("text", (1 - p_endstream)^1) + * (token("text", (1 - p_endstream-spacing)^1) + t_spacing)^1 + * token("keyword", p_endstream) + +local t_dictionary = { "dictionary", + dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary, + array = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray, + whatever = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit, + } + +----- t_object = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg) +----- object = t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject, +----- dictionary = t_opendictionary * (t_spaces * t_keyword * t_spaces * V("whatever"))^0 * t_spaces * t_closedictionary, +----- array = t_openarray * (t_spaces * V("whatever"))^0 * t_spaces * t_closearray, +----- whatever = V("dictionary") + V("array") + t_constant + t_reference + t_string + t_unicode + t_number + t_reserved + t_whatsit, +----- number = t_number, +----- } + +local t_object = { "object", -- weird that we need to catch the end here (probably otherwise an invalid lpeg) + dictionary = t_dictionary.dictionary, + array = t_dictionary.array, + whatever = t_dictionary.whatever, + object = t_openobject^-1 * t_spaces * (V("dictionary") * t_spaces * t_stream^-1 + V("array") + V("number") + t_spaces) * t_spaces * t_closeobject, + number = t_number, + } + +pdfobjectlexer._shared = { + dictionary = t_dictionary, + object = t_object, + stream = t_stream, +} + +pdfobjectlexer._rules = { + { "whitespace", t_spacing }, -- in fact, here we don't want whitespace as it's top level lexer work + { "object", t_object }, +} + +pdfobjectlexer._tokenstyles = context.styleset + +return pdfobjectlexer |