summaryrefslogtreecommitdiff
path: root/scripts/context/lua/mtx-grep.lua
blob: 9a4237737cfaaa715d32ccc663788178f7b5c191 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
if not modules then modules = { } end modules ['mtx-babel'] = {
    version   = 1.001,
    comment   = "companion to mtxrun.lua",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

-- If needed this one can be optimized for speed as well as use some existing
-- helpers. We can quit faster on max, and probably use lpeg instead of find.

local helpinfo = [[
<?xml version="1.0"?>
<application>
 <metadata>
  <entry name="name">mtx-grep</entry>
  <entry name="detail">Simple Grepper</entry>
  <entry name="version">0.10</entry>
 </metadata>
 <flags>
  <category name="basic">
   <subcategory>
    <flag name="pattern"><short>search for pattern (optional)</short></flag>
    <flag name="count"><short>count matches only</short></flag>
    <flag name="nocomment"><short>skip lines that start with %% or #</short></flag>
    <flag name="n"><short>show at most n matches</short></flag>
    <flag name="first"><short>only show first match</short></flag>
    <flag name="match"><short>return the match (if it is one)</short></flag>
    <flag name="xml"><short>pattern is lpath expression</short></flag>
   </subcategory>
  </category>
 </flags>
 <examples>
  <category>
   <title>Examples</title>
   <subcategory>
    <example><command>mtxrun --script grep --pattern=module *.mkiv</command></example>
    <example><command>mtxrun --script grep --pattern="modules.-%['(.-)'%]" char-*.lua --first</command></example>
    <example><command>mtxrun --script grep --pattern=module --count *.mkiv</command></example>
    <example><command>mtxrun --script grep --pattern=module --first *.mkiv</command></example>
    <example><command>mtxrun --script grep --pattern=module --nocomment *.mkiv</command></example>
    <example><command>mtxrun --script grep --pattern=module --n=10 *.mkiv</command></example>
   </subcategory>
  </category>
 </examples>
 <comments>
    <comment>patterns are lua patterns and need to be escaped accordingly</comment>
 </comments>
</application>
]]

local application = logs.application {
    name     = "mtx-grep",
    banner   = "Simple Grepper 0.10",
    helpinfo = helpinfo,
}

local report = application.report

scripts      = scripts      or { }
scripts.grep = scripts.grep or { }

local find, match, format = string.find, string.match, string.format
local lpegmatch = lpeg.match

local cr       = lpeg.P("\r")
local lf       = lpeg.P("\n")
local crlf     = cr * lf
local newline  = crlf + cr + lf
local content  = lpeg.C((1-newline)^0) * newline + lpeg.C(lpeg.P(1)^1)

local write_nl = (logs and logs.writer) or (texio and texio.write_nl) or print

-- local pattern = "LIJST[@TYPE='BULLET']/LIJSTITEM[contains(text(),'Kern')]"

-- 'Cc%(\\\"\\\"%)'

function scripts.grep.find(pattern, files, offset)
    if pattern and pattern ~= "" then
        statistics.starttiming(scripts.grep)
        local nofmatches, noffiles, nofmatchedfiles = 0, 0, 0
        local n, m, check = 0, 0, nil
        local name = ""
        local count = environment.argument("count")
        local nocomment = environment.argument("nocomment")
        local max = tonumber(environment.argument("n")) or (environment.argument("first") and 1) or false
        local domatch = environment.argument("match")
        if environment.argument("xml") then
            for i=offset or 1, #files do
                local globbed = dir.glob(files[i])
                for i=1,#globbed do
                    name = globbed[i]
                    local data = xml.load(name)
                    if data and not data.error then
                        n, m, noffiles = 0, 0, noffiles + 1
                        if count then
                            for c in xml.collected(data,pattern) do
                                m = m + 1
                            end
                            if m > 0 then
                                nofmatches = nofmatches + m
                                nofmatchedfiles = nofmatchedfiles + 1
                                write_nl(format("%5i  %s",m,name))
                                io.flush()
                            end
                        else
                            for c in xml.collected(data,pattern) do
                                m = m + 1
                                if not max or m <= max then
                                    write_nl(format("%s: %s",name,xml.tostring(c)))
                                end
                            end
                        end
                    end
                end
            end
        else
            if nocomment then
                if count then
                    check = function(line)
                        n = n + 1
                        if find(line,"^[%%#]") then
                            -- skip
                        elseif find(line,pattern) then
                            m = m + 1
                        end
                    end
                else
                    check = function(line)
                        n = n + 1
                        if find(line,"^[%%#]") then
                            -- skip
                        elseif find(line,pattern) then
                            m = m + 1
                            if not max or m <= max then
                                if domatch then
                                    write_nl(match(line,pattern))
                                else
                                    write_nl(format("%s %6i: %s",name,n,line))
                                end
                                io.flush()
                            end
                        end
                    end
                end
            else
                if count then
                    check = function(line)
                        n = n + 1
                        if find(line,pattern) then
                            m = m + 1
                        end
                    end
                else
                    check = function(line)
                        n = n + 1
                        if find(line,pattern) then
                            m = m + 1
                            if not max or m <= max then
                                if domatch then
                                    write_nl(match(line,pattern))
                                else
                                    write_nl(format("%s %6i: %s",name,n,line))
                                end
                                io.flush()
                            end
                        end
                    end
                end
            end
            local capture = (content/check)^0 -- todo: break out when max
            for i=offset or 1, #files do
                local globbed = dir.glob(files[i])
                for i=1,#globbed do
                    name = globbed[i]
                    local data = io.loaddata(name)
                    if data then
                        n, m, noffiles = 0, 0, noffiles + 1
                        lpegmatch(capture,data)
                        if count and m > 0 then
                            nofmatches = nofmatches + m
                            nofmatchedfiles = nofmatchedfiles + 1
                            write_nl(format("%5i  %s",m,name))
                            io.flush()
                        end
                    end
                end
            end
        end
        statistics.stoptiming(scripts.grep)
        if count and nofmatches > 0 then
            write_nl(format("\nfiles: %s, matches: %s, matched files: %s, runtime: %0.3f seconds",noffiles,nofmatches,nofmatchedfiles,statistics.elapsedtime(scripts.grep)))
        end
    end
end

local pattern = environment.argument("pattern")
local files   = environment.files and #environment.files > 0 and environment.files

if environment.argument("exporthelp") then
    application.export(environment.argument("exporthelp"),files[1])
elseif pattern and files then
    scripts.grep.find(pattern, files)
elseif files then
    scripts.grep.find(files[1], files, 2)
else
    application.help()
end