summaryrefslogtreecommitdiff
path: root/tex/context/base/mkxl/font-phb.lmt
blob: 67b068885b9ba6edb5a7e1a4d950b33c716106d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
if not modules then modules = { } end modules ['font-phb'] = {
    version   = 1.000, -- 2016.10.10,
    comment   = "companion to font-txt.mkiv",
    original  = "derived from a prototype by Kai Eigner",
    author    = "Hans Hagen", -- so don't blame KE
    copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files",
}

-- Some (historic) explanation can be found in the font-phb.lua file. To summarize:
-- this code kind of old and originates from the times that Idris was making a font
-- that should work with context and uniscribe. When we started with mkiv there were
-- no libraries, but at some point Kai Eigner made an ffi interface to the harfbuzz
-- library that showed up. His code was adapted to ConTeXt so that we could test
-- Idris fonts (the library could use uniscribe which served as refeence for his
-- fonts). Some experiences were was wrapped up in articles. Interesting was that
-- sometimes context, uniscribe and/or native hb could not agree on how to interpret
-- font features and subtle differences could occur.
--
-- This file is made from font-phb.lua and I stripped the components code because
-- it made no sense. The files were eventually added because I did some cleanup and
-- didn't want to carry old stuff around without also sort of maintaining it. I can
-- probably strip away even more code. I might pick up this thread when Idris picks
-- up his font making.
--
-- Todo: use the new (context) advance and offset features.

local next, tonumber, pcall, rawget = next, tonumber, pcall, rawget

local concat        = table.concat
local sortedhash    = table.sortedhash
local formatters    = string.formatters

local fonts         = fonts
local otf           = fonts.handlers.otf
local texthandler   = otf.texthandler

local fontdata      = fonts.hashes.identifiers

local nuts          = nodes.nuts
local tonode        = nuts.tonode
local tonut         = nuts.tonut

local remove_node   = nuts.remove

local getboth       = nuts.getboth
local getnext       = nuts.getnext
local setnext       = nuts.setnext
local getprev       = nuts.getprev
local setprev       = nuts.setprev
local getid         = nuts.getid
local getchar       = nuts.getchar
local setchar       = nuts.setchar
local setlink       = nuts.setlink
local setoffsets    = nuts.setoffsets
local getwidth      = nuts.getwidth
local setwidth      = nuts.setwidth

local copy_node     = nuts.copy
local find_tail     = nuts.tail

local nodepool      = nuts.pool
local new_kern      = nodepool.fontkern
local new_glyph     = nodepool.glyph

local nodecodes     = nodes.nodecodes
local glyph_code    = nodecodes.glyph
local glue_code     = nodecodes.glue

local skipped = {
    -- we assume that only valid features are set but maybe we need a list
    -- of valid hb features as there can be many context specific ones
    mode     = true,
    features = true,
    language = true,
    script   = true,
}

local seenspaces = {
    [0x0020] = true,
    [0x00A0] = true,
    [0x0009] = true, -- indeed
    [0x000A] = true, -- indeed
    [0x000D] = true, -- indeed
}

-- helpers

local helpers     = { }
local methods     = { }
local initialized = { } -- we don't polute the shared table

local method      = "internal" -- a bit misleading name: it's the optional module
local shaper      = "native"   -- "uniscribe"
local report      = logs.reporter("font plugin","hb")

utilities.hb = {
    methods = methods,
    helpers = helpers,
    report  = report,
}

do

    local toutf8  = string.toutf8
    local toutf32 = string.toutf32

    function helpers.packtoutf8(text,leading,trailing)
        if leading then
            text[0] = 32
        end
        if trailing then
            text[#text+1] = 32
        end
        return toutf8(text)
    end

    function helpers.packtoutf32(text,leading,trailing)
        if leading then
            text[0] = 32
        end
        if trailing then
            text[#text+1] = 32
        end
        return toutf32(text)
    end

end

local function initialize(font)

    local tfmdata      = fontdata[font]
    local resources    = tfmdata.resources
    local shared       = tfmdata.shared
    local filename     = resources.filename
    local features     = shared.features
    local descriptions = shared.rawdata.descriptions
    local characters   = tfmdata.characters
    local featureset   = { }
    local copytochar   = shared.copytochar -- indextounicode
    local spacewidth   = nil -- unscaled
    local factor       = tfmdata.parameters.factor
    local marks        = resources.marks or { }

    -- could be shared but why care about a few extra tables

    if not copytochar then
        copytochar = { }
        -- let's make sure that we have an indexed table and not a hash
        local max = 0
        for k, v in next, descriptions do
            if v.index > max then
                max = v.index
            end
        end
        for i=0,max do
            copytochar[i] = i
        end
        -- the normal mapper
        for k, v in next, descriptions do
            copytochar[v.index] = k
        end
        shared.copytochar = copytochar
    end

    -- independent from loop as we have unordered hashes

    if descriptions[0x0020] then
        spacewidth = descriptions[0x0020].width
    elseif descriptions[0x00A0] then
        spacewidth = descriptions[0x00A0].width
    end

    for k, v in sortedhash(features) do
        if #k > 4 then
            -- unknown ones are ignored anyway but we can assume that the current
            -- (and future) extra context features use more verbose names
        elseif skipped[k] then
            -- we don't want to pass language and such so we block a few features
            -- explicitly
        elseif v == "yes" or v == true then
            featureset[#featureset+1] = k .. "=1"     -- cf command line (false)
        elseif v == "no" or v == false then
            featureset[#featureset+1] = k .. "=0"     -- cf command line (true)
        elseif type(v) == "number" then
            featureset[#featureset+1] = k .. "=" .. v -- cf command line (alternate)
        else
            -- unset
        end
    end

    local data = {
        language   = features.language, -- do we need to uppercase and padd to 4 ?
        script     = features.script,   -- do we need to uppercase and padd to 4 ?
        features   = #featureset > 0 and concat(featureset,",") or "", -- hash
        featureset = #featureset > 0 and featureset or nil,
        copytochar = copytochar,
        spacewidth = spacewidth,
        filename   = filename,
        marks      = marks,
        factor     = factor,
        characters = characters, -- the loaded font (we use its metrics which is more accurate)
        method     = features.method or method,
        shaper     = features.shaper or shaper,
    }
    initialized[font] = data
    return data
end

-- In many cases this gives compatible output but especially with respect to spacing and user
-- discretionaries that mix fonts there can be different outcomes. We also have no possibility
-- to tweak and cheat. Of course one can always run a normal node mode pass with specific
-- features first but then one can as well do all in node mode. So .. after a bit of playing
-- around I redid this one from scratch and also added tracing.

local trace_colors  = false  trackers.register("fonts.plugins.hb.colors", function(v) trace_colors  = v end)
local trace_details = false  trackers.register("fonts.plugins.hb.details",function(v) trace_details = v end)
local check_id      = false

local setcolor      = nodes.tracers.colors.set
local resetcolor    = nodes.tracers.colors.reset

table.setmetatableindex(methods,function(t,k)
    local l = "font-phb-imp-" .. k .. ".lmt"
    report("start loading method %a from %a",k,l)
    dofile(resolvers.findfile(l))
    local v = rawget(t,k)
    if v then
        report("loading method %a succeeded",k)
    else
        report("loading method %a failed",k)
        v = function() return { } end
    end
    t[k] = v
    return v
end)

local inandout  do

    local utfbyte = utf.byte
    local utfchar = utf.char
    local utf3208 = utf.utf32_to_utf8_le

    inandout = function(text,result,first,last,copytochar)
        local s = { }
        local t = { }
        local r = { }
        local f = formatters["%05U"]
        for i=1,#text do
            local c = text[i]
         -- t[#t+1] = f(utfbyte(utf3208(c)))
            s[#s+1] = utfchar(c)
            t[#t+1] = f(c)
        end
        for i=first,last do
            r[#r+1] = f(copytochar[result[i][1]])
        end
        return s, t, r
    end

end

local function harfbuzz(head,font,dynamic,rlmode,start,stop,text,leading,trailing)
    local data = initialized[font]

    if not data then
        data = initialize(font)
    end

    if check_id then
        if getid(start) ~= glyph_code then
            report("error: start is not a glyph")
            return head
        elseif getid(stop) ~= glyph_code then
            report("error: stop is not a glyph")
            return head
        end
    end
    local size   = #text -- original text, without spaces
    local result = methods[data.method](font,data,rlmode,text,leading,trailing)
    local length = result and #result or 0

    if length == 0 then
     -- report("warning: no result")
        return head
    end

    local factor     = data.factor
    local marks      = data.marks
    local spacewidth = data.spacewidth
    local copytochar = data.copytochar
    local characters = data.characters

    -- the text analyzer is only partially clever so we must assume that we get
    -- inconsistent lists

    -- we could check if something has been done (replacement or kern or so) but
    -- then we pass around more information and need to check a lot and spaces
    -- are kind of spoiling that game (we need a different table then) .. more
    -- pain than gain

    -- we could play with 0xFFFE as boundary

    local current  = start
    local prev     = nil
    local glyph    = nil

    local first    = 1
    local last     = length
    local next     = nil -- todo: keep track of them
    local prev     = nil -- todo: keep track of them

    if leading then
        first = first + 1
    end
    if trailing then
        last = last - 1
    end

    local position = first
    local cluster  = 0
    local glyph    = nil
    local index    = 0
    local count    = 1
    local saved    = nil

    if trace_details then
        report("start run, original size: %i, result index: %i upto %i",size,first,last)
        local s, t, r = inandout(text,result,first,last,copytochar)
        report("method : %s",data.method)
        report("shaper : %s",data.shaper)
        report("string : %t",s)
        report("text   : % t",t)
        report("result : % t",r)
    end

    -- okay, after some experiments, it became clear that more complex code aimed at
    -- optimization doesn't pay off as complexity also demands more testing

    for i=first,last do
        local r = result[i]
        local unicode = copytochar[r[1]] -- can be private of course
        --
        cluster = r[2] + 1 -- starts at zero
        --
        if position == cluster then
            if i == first then
                index = 1
                if trace_details then
                    report("[%i] position: %i, cluster: %i, index: %i, starting",i,position,cluster,index)
                end
            else
                index = index + 1
                if trace_details then
                    report("[%i] position: %i, cluster: %i, index: %i, next step",i,position,cluster,index)
                end
            end
        elseif position < cluster then
            -- a new cluster
            current  = getnext(current)
            position = position + 1
            size     = size - 1
            for p=position,cluster-1 do
                head, current = remove_node(head,current,true)
                if trace_details then
                    report("[%i] position: %i, cluster: %i, index: -, removing node",i,p,cluster)
                end
                size = size - 1
            end
            position = cluster
            index    = 1
            glyph    = nil
            if trace_details then
                report("[%i] position: %i, cluster: %i, index: %i, arriving",i,cluster,position,index)
            end
        else -- maybe a space got properties
            if trace_details then
                report("position: %i, cluster: %i, index: %i, quitting due to fatal inconsistency",position,cluster,index)
            end
            return head
        end
        local copied = false
        if glyph then
            if trace_details then
                report("[%i] position: %i, cluster: %i, index: %i, copying glyph, unicode %U",i,position,cluster,index,unicode)
            end
            local g = copy_node(glyph)
            if trace_colors then
                resetcolor(g)
            end
            setlink(current,g,getnext(current))
            current = g
            copied  = true
        else
            if trace_details then
                report("[%i] position: %i, cluster: %i, index: %i, using glyph, unicode %U",i,position,cluster,index,unicode)
            end
            glyph = current
        end
        --
        if not current then
            if trace_details then
                report("quitting due to unexpected end of node list")
            end
            return head
        end
        --
        local id = getid(current)
        if id ~= glyph_code then
            if trace_details then
                report("glyph expected in node list")
            end
            return head
        end
        --
        -- really, we can get a tab (9), lf (10), or cr(13) back in cambria .. don't ask me why
        --
        local prev, next = getboth(current)
        --
        -- assign glyph: first in run
        --
        setchar(current,unicode)
        if trace_colors then
            count = (count == 8) and 1 or count + 1
            setcolor(current,"trace:"..count)
        end
        --
        local x_offset  = r[3] -- r.dx
        local y_offset  = r[4] -- r.dy
        local x_advance = r[5] -- r.ax
        ----- y_advance = r[6] -- r.ay
        local left  = 0
        local right = 0
        local dx    = 0
        local dy    = 0
        if trace_details then
            if x_offset ~= 0 or y_offset ~= 0 or x_advance ~= 0 then -- or y_advance ~= 0
                report("[%i] position: %i, cluster: %i, index: %i, old, xoffset: %p, yoffset: %p, xadvance: %p, width: %p",
                    i,position,cluster,index,x_offset*factor,y_offset*factor,x_advance*factor,characters[unicode].width)
            end
        end
        if y_offset ~= 0 then
            dy = y_offset * factor
        end
        if rlmode >= 0 then
            -- l2r marks and rest
            if x_offset ~= 0 then
                dx = x_offset * factor
            end
            local width = characters[unicode].width
            local delta = x_advance * factor
            if delta ~= width then
             -- right = -(delta - width)
                right = delta - width
            end
        elseif marks[unicode] then -- why not just the next loop
            -- r2l marks
            if x_offset ~= 0 then
                dx = -x_offset * factor
            end
        else
            -- r2l rest
            local width = characters[unicode].width
            local delta = (x_advance - x_offset) * factor
            if delta ~= width then
                left = delta - width
            end
            if x_offset ~= 0 then
                right = x_offset * factor
            end
        end
        if copied or dx ~= 0 or dy ~= 0 then
            setoffsets(current,dx,dy)
        end
        if left ~= 0 then
            setlink(prev,new_kern(left),current) -- insertbefore
            if current == head then
                head = prev
            end
        end
        if right ~= 0 then
            local kern = new_kern(right)
            setlink(current,kern,next)
            current = kern
        end
        if trace_details then
            if dy ~= 0 or dx ~= 0 or left ~= 0 or right ~= 0 then
                report("[%i] position: %i, cluster: %i, index: %i, new, xoffset: %p, yoffset: %p, left: %p, right: %p",i,position,cluster,index,dx,dy,left,right)
            end
        end
    end
    --
    if trace_details then
        report("[-] position: %i, cluster: %i, index: -, at end",position,cluster)
    end
    if size > 1 then
        current = getnext(current)
        for i=1,size-1 do
            if trace_details then
                report("[-] position: %i + %i, cluster: -, index: -, removing node",position,i)
            end
            head, current = remove_node(head,current,true)
        end
    end
    --
    -- We see all kind of interesting spaces come back (like tabs in cambria) so we do a bit of
    -- extra testing here.
    --
    if leading then
        local r = result[1]
        local unicode = copytochar[r[1]]
        if seenspaces[unicode] then
            local x_advance = r[5]
            local delta     = x_advance - spacewidth
            if delta ~= 0 then
                -- nothing to do but jump one slot ahead
                local prev = getprev(start)
                if getid(prev) == glue_code then
                    local dx = delta * factor
                    setwidth(prev,getwidth(prev) + dx)
                    if trace_details then
                        report("compensating leading glue by %p due to codepoint %U",dx,unicode)
                    end
                else
                    report("no valid leading glue node")
                end
            end
        end
    end
    --
    if trailing then
        local r = result[length]
        local unicode = copytochar[r[1]]
        if seenspaces[unicode] then
            local x_advance = r[5]
            local delta     = x_advance - spacewidth
            if delta ~= 0 then
                local next = getnext(stop)
                if getid(next) == glue_code then
                    local dx = delta * factor
                    setwidth(next,getwidth(next) + dx)
                    if trace_details then
                        report("compensating trailing glue by %p due to codepoint %U",dx,unicode)
                    end
                else
                    report("no valid trailing glue node")
                end
            end
        end
    end
    --
    if trace_details then
        report("run done")
    end
    return head
end

otf.registerplugin("harfbuzz",function(head,font,dynamic,direction)
    return texthandler(head,font,dynamic,direction,harfbuzz)
end)