summaryrefslogtreecommitdiff
path: root/tex/context/base/mkxl/lang-rep.lmt
blob: fcaff523a35d6ce020bb4ec658f6e207c86f8bc3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
if not modules then modules = { } end modules ['lang-rep'] = {
    version   = 1.001,
    comment   = "companion to lang-rep.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

-- A BachoTeX 2013 experiment, probably not that useful. Eventually I used a simpler
-- more generic example. I'm sure no one ever notices of even needs this code.
--
-- As a follow up on a question by Alan about special treatment of dropped caps I wonder
-- if I can make this one more clever (probably in a few more dev steps). For instance
-- injecting nodes or replacing nodes. It's a prelude to a kind of lpeg for nodes,
-- although (given experiences so far) we don't really need that. After all, each problem
-- is somewhat unique.

local type, tonumber, next = type, tonumber, next
local gmatch, gsub = string.gmatch, string.gsub
local utfbyte, utfsplit = utf.byte, utf.split
local P, C, U, Cc, Ct, Cs, lpegmatch = lpeg.P, lpeg.C, lpeg.patterns.utf8character, lpeg.Cc, lpeg.Ct, lpeg.Cs, lpeg.match
local find = string.find

local zwnj     =  0x200C
local grouped  = P("{") * ( Ct((U/utfbyte-P("}"))^1) + Cc(false) ) * P("}")-- grouped
local splitter = Ct((
                    #P("{") * (
                        P("{}") / function() return zwnj end
                      + Ct(Cc("discretionary") * grouped * grouped * grouped)
                      + Ct(Cc("noligature")    * grouped)
                    )
                  + U/utfbyte
                )^1)

local stripper = P("{") * Cs((1-P(-2))^0) * P("}") * P(-1)

local trace_replacements = false  trackers.register("languages.replacements",         function(v) trace_replacements = v end)
local trace_details      = false  trackers.register("languages.replacements.details", function(v) trace_details      = v end)

local report_replacement = logs.reporter("languages","replacements")

local glyph_code         = nodes.nodecodes.glyph
local glue_code          = nodes.nodecodes.glue

local spaceskip_code     = nodes.gluecodes.spaceskip
local xspaceskip_code    = nodes.gluecodes.xspaceskip

local nuts               = nodes.nuts

local getnext            = nuts.getnext
local getprev            = nuts.getprev
local getattr            = nuts.getattr
local getid              = nuts.getid
local getsubtype         = nuts.getsubtype
local getchar            = nuts.getchar
local isglyph            = nuts.isglyph

local setlink            = nuts.setlink
local setnext            = nuts.setnext
local setprev            = nuts.setprev
local setchar            = nuts.setchar
local setattrlist        = nuts.setattrlist
local setoptions         = nuts.setoptions

local glyphoptioncodes       = tex.glyphoptioncodes
local norightligature_option = glyphoptioncodes.norightligature
local noleftligature_option  = glyphoptioncodes.noleftligature

local insertbefore       = nuts.insertbefore
local insertafter        = nuts.insertafter
local remove_node        = nuts.remove
local copy_node          = nuts.copy
local flushlist          = nuts.flushlist

local nodepool           = nuts.pool
local new_disc           = nodepool.disc

local texsetattribute    = tex.setattribute
local unsetvalue         = attributes.unsetvalue

local enableaction       = nodes.tasks.enableaction

local v_reset            = interfaces.variables.reset

local implement          = interfaces.implement

local processors         = typesetters.processors
local splitprocessor     = processors.split

local replacements       = languages.replacements or { }
languages.replacements   = replacements

local a_replacements     = attributes.private("replacements")

local lists = { }
local last  = 0
local trees = { }

table.setmetatableindex(lists,function(lists,name)
    last = last + 1
    local list = { }
    local data = { name = name, list = list, attribute = last }
    lists[last] = data
    lists[name] = data
    trees[last] = list
    return data
end)

lists[v_reset].attribute = unsetvalue -- so we discard 0

-- todo: glue kern attr

local function add(root,word,replacement)
    local processor, replacement = splitprocessor(replacement,true) -- no check
    replacement = lpegmatch(stripper,replacement) or replacement
    local list = utfsplit(word) -- ,true)
    local size = #list
    for i=1,size do
        local l = utfbyte(list[i])
        if not root[l] then
            root[l] = { }
        end
        if i == size then
            local special = find(replacement,"{",1,true)
            local newlist = lpegmatch(splitter,replacement)
            root[l].final = {
                word        = word,
                replacement = replacement,
                processor   = processor,
                oldlength   = size,
                newcodes    = newlist,
                special     = special,
            }
        end
        root = root[l]
    end
end

function replacements.add(category,word,replacement)
    local root = lists[category].list
    if type(word) == "table" then
        for word, replacement in next, word do
            add(root,word,replacement)
        end
    else
        add(root,word,replacement or "")
    end
end

-- local strip = lpeg.stripper("{}")

function languages.replacements.addlist(category,list)
    local root = lists[category].list
    if type(list) == "string" then
        for new in gmatch(list,"%S+") do
            local old = gsub(new,"[{}]","")
         -- local old = lpegmatch(strip,new)
            add(root,old,new)
        end
    else
        for i=1,#list do
            local new = list[i]
            local old = gsub(new,"[{}]","")
         -- local old = lpegmatch(strip,new)
            add(root,old,new)
        end
    end
end

local function tonodes(list,template)
    local head, current
    for i=1,#list do
        local new = copy_node(template)
        setchar(new,list[i])
        if head then
            head, current = insertafter(head,current,new)
        else
            head, current = new, new
        end
    end
    return head
end

local ispunctuation = characters.is_punctuation

-- We can try to be clever and use the fact that there is no match to skip
-- over to the next word but it is gives fuzzy code so for now I removed
-- that optimization (when I really need a high performance version myself
-- I will look into it (but so far I never used this mechanism myself).
--
-- We used to have the hit checker as function but is got messy when checks
-- for punctuation was added.

local function replace(head,first,last,final,hasspace,overload)
    local current   = first
    local prefirst  = getprev(first) or head
    local postlast  = getnext(last)
    local oldlength = final.oldlength
    local newcodes  = final.newcodes
    local newlength = newcodes and #newcodes or 0
    if trace_replacements then
        report_replacement("replacing word %a by %a",final.word,final.replacement)
    end
    if hasspace or final.special then
        -- It's easier to delete and insert so we do just that. On the todo list is
        -- turn injected spaces into glue but easier might be to let the char break
        -- handler do that ...
        local prev = getprev(current)
        local next = getnext(last)
        local list = current
        setnext(last)
        setlink(prev,next)
        current = prev
        if not current then
            head = nil
        end
        local i = 1
        while i <= newlength do
            local codes = newcodes[i]
            if type(codes) == "table" then
                local method = codes[1]
                if method == "discretionary" then
                    local pre, post, replace = codes[2], codes[3], codes[4]
                    if pre then
                        pre = tonodes(pre,first)
                    end
                    if post then
                        post = tonodes(post,first)
                    end
                    if replace then
                        replace = tonodes(replace,first)
                    end
                    -- todo: also set attr
                    local new = new_disc(pre,post,replace)
                    setattrlist(new,first)
                    head, current = insertafter(head,current,new)
                elseif method == "noligature" then
                    -- not that efficient to copy but ok for testing
                    local list = codes[2]
                    if list then
                        local n = #list
                        for i=1,n do
                            local new = copy_node(first)
                            setchar(new,list[i])
                            if i == 1 then
                                setoptions(new,norightligature_option)
                            elseif i == n then
                                setoptions(new,glyphoptioncodes.noleftligature | norightligature_option)
                            else
                                setoptions(new,glyphoptioncodes.noleftligature)
                            end
                            head, current = insertafter(head,current,new)
                        end
                    else
                     -- local new = copy_node(first)
                     -- setchar(new,zwnj)
                     -- head, current = insertafter(head,current,new)
                        setoptions(current,norightligature_option)
                    end
                else
                    report_replacement("unknown method %a",method or "?")
                end
            else
                local new = copy_node(first)
                setchar(new,codes)
                head, current = insertafter(head,current,new)
            end
            i = i + 1
        end
        flushlist(list)
    elseif newlength == 0 then
        -- we overload
    elseif oldlength == newlength then
        if final.word ~= final.replacement then
            for i=1,newlength do
                setchar(current,newcodes[i])
                current = getnext(current)
            end
        end
        current = getnext(final)
    elseif oldlength < newlength then
        for i=1,newlength-oldlength do
            local n = copy_node(current)
            setchar(n,newcodes[i])
            head, current = insertbefore(head,current,n)
            current = getnext(current)
        end
        for i=newlength-oldlength+1,newlength do
            setchar(current,newcodes[i])
            current = getnext(current)
        end
    else
        for i=1,oldlength-newlength do
            head, current = remove_node(head,current,true)
        end
        for i=1,newlength do
            setchar(current,newcodes[i])
            current = getnext(current)
        end
    end
    if overload then
        overload(final,getnext(prefirst),getprev(postlast))
    end
    return head, postlast
end

-- we handle just one space

function replacements.handler(head)
    local current   = head
    local overload  = attributes.applyoverloads
    local mode      = false -- we're in word or punctuation mode
    local wordstart = false
    local wordend   = false
    local prevend   = false
    local prevfinal = false
    local tree      = false
    local root      = false
    local hasspace  = false
    while current do
        local id = getid(current) -- or use the char getter
        if id == glyph_code then
            local a = getattr(current,a_replacements)
            if a then
                -- we have a run
                tree = trees[a]
                if tree then
                    local char = getchar(current)
                    local punc = ispunctuation[char]
                    if mode == "punc" then
                        if not punc then
                            if root then
                                local final = root.final
                                if final then
                                    head = replace(head,wordstart,wordend,final,hasspace,overload)
                                elseif prevfinal then
                                    head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
                                end
                                prevfinal = false
                                root = false
                            end
                            mode = "word"
                        end
                    elseif mode == "word" then
                        if punc then
                            if root then
                                local final = root.final
                                if final then
                                    head = replace(head,wordstart,wordend,final,hasspace,overload)
                                elseif prevfinal then
                                    head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
                                end
                                prevfinal = false
                                root = false
                            end
                            mode = "punc"
                        end
                    else
                        mode = punc and "punc" or "word"
                    end
                    if root then
                        root = root[char]
                        if root then
                            wordend = current
                        end
                    else
                        if prevfinal then
                            head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
                            prevfinal = false
                        end
                        root = tree[char]
                        if root then
                            wordstart = current
                            wordend   = current
                            prevend   = false
                            hasspace  = false
                        end
                    end
                else
                    root= false
                end
            else
                tree = false
            end
            current = getnext(current)
        elseif root then
            local final = root.final
            if mode == "word" and id == glue_code then
                local s = getsubtype(current)
                if s == spaceskip_code or s == xspaceskip_code then
                    local r = root[32] -- maybe more types
                    if r then
                        if not prevend then
                            local f = root.final
                            if f then
                                prevend   = wordend
                                prevfinal = f
                            end
                        end
                        wordend  = current
                        root     = r
                        hasspace = true
                        goto moveon
                    end
                end
            end
            if final then
                head, current = replace(head,wordstart,wordend,final,hasspace,overload)
            elseif prevfinal then
                head, current = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
            end
            prevfinal = false
            root = false
          ::moveon::
            current = getnext(current)
        else
            current = getnext(current)
        end
    end
    if root then
        local final = root.final
        if final then
            head = replace(head,wordstart,wordend,final,hasspace,overload)
        elseif prevfinal then
            head = replace(head,wordstart,prevend,prevfinal,hasspace,overload)
        end
    end
    return head
end

local enabled = false

function replacements.set(n)
    if n == v_reset then
        n = unsetvalue
    else
        n = lists[n].attribute
        if not enabled then
            enableaction("processors","languages.replacements.handler")
            if trace_replacements then
                report_replacement("enabling replacement handler")
            end
            enabled = true
        end
    end
    texsetattribute(a_replacements,n)
end

-- interface

implement {
    name      = "setreplacements",
    actions   = replacements.set,
    arguments = "string"
}

implement {
    name      = "addreplacements",
    actions   = replacements.add,
    arguments = "3 strings",
}

implement {
    name      = "addreplacementslist",
    actions   = replacements.addlist,
    arguments = "2 strings",
}