summaryrefslogtreecommitdiff
path: root/context/data/scite/context/scite-ctx-bidi.lua
blob: 5e75f7e4c2ac168c56f66fd7a30432093ab9a5a2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
if not modules then modules = { } end modules ['scite-ctx-bidi'] = {
    version   = 1.001,
    comment   = "companion to scite-ctx.lua",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files",
    comment   = "Unicode bidi (sort of) variant c",
}

-- Partial comment from typo-duc.lua:
--
-- This is a follow up on typo-dua which itself is a follow up on t-bidi by Khaled Hosny which
-- in turn is based on minibidi.c from Arabeyes. This is a further optimizations, as well as
-- an update on some recent unicode bidi developments. There is (and will) also be more control
-- added. As a consequence this module is somewhat slower than its precursor which itself is
-- slower than the one-pass bidi handler. This is also a playground and I might add some plugin
-- support. However, in the meantime performance got a bit better and this third variant is again
-- some 10% faster than the second variant.
--
-- ... some patches and updates applied
-- ... some code can be removed
-- ... has to be kept in sync with context
-- ... mtxrun --script interface
--
-- ... this feature is more fun than useful
-- ... this way we can use it to check what scite / uniscribe sees (as scintila is weak on
-- bidi selection)

local setmetatable = setmetatable

local data = require("context.lexers.data.scite-context-data-bidi")

local directiondata  = data.directions
local mirrordata     = data.mirrors
local textclassdata  = data.textclasses

-- setmetatable(directiondata,{ __index = function(t,k) local v = "l" t[k] = v return v end })

local maximum_stack  = 0xFF -- unicode: 60, will be jumped to 125, we don't care too much
local analyze_fences = false

local whitespace = {
    lre = true,
    rle = true,
    lro = true,
    rlo = true,
    pdf = true,
    bn  = true,
    ws  = true,
}

local b_s_ws_on = {
    b   = true,
    s   = true,
    ws  = true,
    on  = true
}

local mt_space  = { __index = { char = 0x0020, direction = "ws",  original = "ws",  level = 0 } }
----- mt_lre    = { __index = { char = 0x202A, direction = "lre", original = "lre", level = 0 } }
----- mt_rle    = { __index = { char = 0x202B, direction = "rle", original = "rle", level = 0 } }
----- mt_pdf    = { __index = { char = 0x202C, direction = "pdf", original = "pdf", level = 0 } }
----- mt_object = { __index = { char = 0xFFFC, direction = "on",  original = "on",  level = 0 } }

local stack = { } -- shared

setmetatable(stack, { __index = function(t,k) local v = { } t[k] = v return v end })

local function build_list(list)
    -- P1
    local size = #list
    for i=1,size do
        local chr = list[i]
        if chr == " " then
            list[i] = setmetatable({ },mt_space)
        else
            local dir = directiondata[chr] or "l"
            list[i] = { char = chr, direction = dir, original = dir, level = 0 }
        end
    end
    return list, size
end

local function resolve_fences(list,size,start,limit)
    -- N0: funny effects, not always better, so it's an option
    local nofstack = 0
    for i=start,limit do
        local entry = list[i]
        if entry.direction == "on" then
            local char   = entry.char
            local mirror = mirrordata[char]
            if mirror then
                local class = textclassdata[char]
                entry.mirror = mirror
                entry.class  = class
                if class == "open" then
                    nofstack       = nofstack + 1
                    local stacktop = stack[nofstack]
                    stacktop[1]    = mirror
                    stacktop[2]    = i
                    stacktop[3]    = false -- not used
                elseif nofstack == 0 then
                    -- skip
                elseif class == "close" then
                    while nofstack > 0 do
                        local stacktop = stack[nofstack]
                        if stacktop[1] == char then
                            local open  = stacktop[2]
                            local close = i
                            list[open ].paired = close
                            list[close].paired = open
                            break
                        else
                            -- do we mirror or not
                        end
                        nofstack = nofstack - 1
                    end
                end
            end
        end
    end
end

local function get_baselevel(list,size,direction)
    if direction == "TRT" then
        return 1, "TRT", true
    elseif direction == "TLT" then
        return 0, "TLT", true
    end
    -- P2, P3:
    for i=1,size do
        local entry     = list[i]
        local direction = entry.direction
        if direction == "r" or direction == "al" then -- and an ?
            return 1, "TRT", true
        elseif direction == "l" then
            return 0, "TLT", true
        end
    end
    return 0, "TLT", false
end

local function resolve_explicit(list,size,baselevel)
-- if list.rle or list.lre or list.rlo or list.lro then
    -- X1
    local level    = baselevel
    local override = "on"
    local nofstack = 0
    for i=1,size do
        local entry     = list[i]
        local direction = entry.direction
        -- X2
        if direction == "rle" then
            if nofstack < maximum_stack then
                nofstack        = nofstack + 1
                local stacktop  = stack[nofstack]
                stacktop[1]     = level
                stacktop[2]     = override
                level           = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
                override        = "on"
                entry.level     = level
                entry.direction = "bn"
                entry.remove    = true
            end
        -- X3
        elseif direction == "lre" then
            if nofstack < maximum_stack then
                nofstack        = nofstack + 1
                local stacktop  = stack[nofstack]
                stacktop[1]     = level
                stacktop[2]     = override
                level           = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
                override        = "on"
                entry.level     = level
                entry.direction = "bn"
                entry.remove    = true
            end
        -- X4
        elseif direction == "rlo" then
            if nofstack < maximum_stack then
                nofstack        = nofstack + 1
                local stacktop  = stack[nofstack]
                stacktop[1]     = level
                stacktop[2]     = override
                level           = level + (level % 2 == 1 and 2 or 1) -- least_greater_odd(level)
                override        = "r"
                entry.level     = level
                entry.direction = "bn"
                entry.remove    = true
            end
        -- X5
        elseif direction == "lro" then
            if nofstack < maximum_stack then
                nofstack        = nofstack + 1
                local stacktop  = stack[nofstack]
                stacktop[1]     = level
                stacktop[2]     = override
                level           = level + (level % 2 == 1 and 1 or 2) -- least_greater_even(level)
                override        = "l"
                entry.level     = level
                entry.direction = "bn"
                entry.remove    = true
            end
        -- X7
        elseif direction == "pdf" then
            if nofstack < maximum_stack then
                local stacktop  = stack[nofstack]
                level           = stacktop[1]
                override        = stacktop[2]
                nofstack        = nofstack - 1
                entry.level     = level
                entry.direction = "bn"
                entry.remove    = true
            end
        -- X6
        else
            entry.level = level
            if override ~= "on" then
                entry.direction = override
            end
        end
    end
-- else
--     for i=1,size do
--         list[i].level = baselevel
--     end
-- end
    -- X8 (reset states and overrides after paragraph)
end

local function resolve_weak(list,size,start,limit,orderbefore,orderafter)
    -- W1: non spacing marks get the direction of the previous character
-- if list.nsm then
    for i=start,limit do
        local entry = list[i]
        if entry.direction == "nsm" then
            if i == start then
                entry.direction = orderbefore
            else
                entry.direction = list[i-1].direction
            end
        end
    end
-- end
    -- W2: mess with numbers and arabic
-- if list.en then
    for i=start,limit do
        local entry = list[i]
        if entry.direction == "en" then
            for j=i-1,start,-1 do
                local prev = list[j]
                local direction = prev.direction
                if direction == "al" then
                    entry.direction = "an"
                    break
                elseif direction == "r" or direction == "l" then
                    break
                end
            end
        end
    end
-- end
    -- W3
-- if list.al then
    for i=start,limit do
        local entry = list[i]
        if entry.direction == "al" then
            entry.direction = "r"
        end
    end
-- end
    -- W4: make separators number
-- if list.es or list.cs then
        -- skip
--     if false then
    if false then
        for i=start+1,limit-1 do
            local entry     = list[i]
            local direction = entry.direction
            if direction == "es" then
                if list[i-1].direction == "en" and list[i+1].direction == "en" then
                    entry.direction = "en"
                end
            elseif direction == "cs" then
                local prevdirection = list[i-1].direction
                if prevdirection == "en" then
                    if list[i+1].direction == "en" then
                        entry.direction = "en"
                    end
                elseif prevdirection == "an" and list[i+1].direction == "an" then
                    entry.direction = "an"
                end
            end
        end
    else -- only more efficient when we have es/cs
        local runner = start + 2
        local before = list[start]
        local entry  = list[start + 1]
        local after  = list[runner]
        while after do
            local direction = entry.direction
            if direction == "es" then
                if before and before.direction == "en" and after.direction == "en" then
                    entry.direction = "en"
                end
            elseif direction == "cs" then
                local prevdirection = before and before.direction
                if prevdirection == "en" then
                    if after.direction == "en" then
                        entry.direction = "en"
                    end
                elseif prevdirection == "an" and after.direction == "an" then
                    entry.direction = "an"
                end
            end
            before  = current
            current = after
            after   = list[runner]
            runner  = runner + 1
        end
    end
-- end
    -- W5
-- if list.et then
    local i = start
    while i <= limit do
        if list[i].direction == "et" then
            local runstart = i
            local runlimit = runstart
            for i=runstart,limit do
                if list[i].direction == "et" then
                    runlimit = i
                else
                    break
                end
            end
            local rundirection = runstart == start and sor or (runstart > 1 and list[runstart-1].direction)
            if rundirection ~= "en" then
                rundirection = runlimit == limit and orderafter or list[runlimit+1].direction
            end
            if rundirection == "en" then
                for j=runstart,runlimit do
                    list[j].direction = "en"
                end
            end
            i = runlimit
        end
        i = i + 1
    end
-- end
    -- W6
-- if list.es or list.cs or list.et then
    for i=start,limit do
        local entry     = list[i]
        local direction = entry.direction
        if direction == "es" or direction == "et" or direction == "cs" then
            entry.direction = "on"
        end
    end
-- end
    -- W7
    for i=start,limit do
        local entry = list[i]
        if entry.direction == "en" then
            local prev_strong = orderbefore
            for j=i-1,start,-1 do
                local direction = list[j].direction
                if direction == "l" or direction == "r" then
                    prev_strong = direction
                    break
                end
            end
            if prev_strong == "l" then
                entry.direction = "l"
            end
        end
    end
end

local function resolve_neutral(list,size,start,limit,orderbefore,orderafter)
    -- N1, N2
    for i=start,limit do
        local entry = list[i]
        if b_s_ws_on[entry.direction] then
            -- this needs checking
            local leading_direction, trailing_direction, resolved_direction
            local runstart = i
            local runlimit = runstart
            for j=runstart+1,limit do
                if b_s_ws_on[list[j].direction] then
                    runlimit = j
                else
                    break
                end
            end
            if runstart == start then
                leading_direction = orderbefore
            else
                leading_direction = list[runstart-1].direction
                if leading_direction == "en" or leading_direction == "an" then
                    leading_direction = "r"
                end
            end
            if runlimit == limit then
                trailing_direction = orderafter
            else
                trailing_direction = list[runlimit+1].direction
                if trailing_direction == "en" or trailing_direction == "an" then
                    trailing_direction = "r"
                end
            end
            if leading_direction == trailing_direction then
                -- N1
                resolved_direction = leading_direction
            else
                -- N2 / does the weird period
                resolved_direction = entry.level % 2 == 1 and "r" or "l"
            end
            for j=runstart,runlimit do
                list[j].direction = resolved_direction
            end
            i = runlimit
        end
        i = i + 1
    end
end

local function resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
    for i=start,limit do
        local entry     = list[i]
        local level     = entry.level
        local direction = entry.direction
        if level % 2 ~= 1 then -- even
            -- I1
            if direction == "r" then
                entry.level = level + 1
            elseif direction == "an" or direction == "en" then
                entry.level = level + 2
            end
        else
            -- I2
            if direction == "l" or direction == "en" or direction == "an" then
                entry.level = level + 1
            end
        end
    end
end

local function resolve_levels(list,size,baselevel,analyze_fences)
    -- X10
    local start = 1
    while start < size do
        local level = list[start].level
        local limit = start + 1
        while limit < size and list[limit].level == level do
            limit = limit + 1
        end
        local prev_level  = start == 1    and baselevel or list[start-1].level
        local next_level  = limit == size and baselevel or list[limit+1].level
        local orderbefore = (level > prev_level and level or prev_level) % 2 == 1 and "r" or "l"
        local orderafter  = (level > next_level and level or next_level) % 2 == 1 and "r" or "l"
        -- W1 .. W7
        resolve_weak(list,size,start,limit,orderbefore,orderafter)
        -- N0
        if analyze_fences then
            resolve_fences(list,size,start,limit)
        end
        -- N1 .. N2
        resolve_neutral(list,size,start,limit,orderbefore,orderafter)
        -- I1 .. I2
        resolve_implicit(list,size,start,limit,orderbefore,orderafter,baselevel)
        start = limit
    end
    -- L1
    for i=1,size do
        local entry     = list[i]
        local direction = entry.original
        -- (1)
        if direction == "s" or direction == "b" then
            entry.level = baselevel
            -- (2)
            for j=i-1,1,-1 do
                local entry = list[j]
                if whitespace[entry.original] then
                    entry.level = baselevel
                else
                    break
                end
            end
        end
    end
    -- (3)
    for i=size,1,-1 do
        local entry = list[i]
        if whitespace[entry.original] then
            entry.level = baselevel
        else
            break
        end
    end
    -- L4
    if analyze_fences then
        for i=1,size do
            local entry = list[i]
            if entry.level % 2 == 1 then
                if entry.mirror and not entry.paired then
                    entry.mirror = false
                end
            elseif entry.mirror then
                entry.mirror = false
            end
        end
    else
        for i=1,size do
            local entry = list[i]
            if entry.level % 2 == 1 then
                local mirror = mirrordata[entry.char]
                if mirror then
                    entry.mirror = mirror
                end
            end
        end
    end
end

local function process(head,direction)
    local list, size = build_list(head)
    local baselevel = get_baselevel(list,size,direction)
    resolve_explicit(list,size,baselevel)
    resolve_levels(list,size,baselevel,analyze_fences)
    return list, size
end

return {
    process = process,
}