summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/font-imp-combining.lua
blob: 7db3b905aca166ea8bc7d773b09ea12ffc08468c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
if not modules then modules = { } end modules ['font-imp-combining'] = {
    version   = 1.001,
    comment   = "companion to font-ini.mkiv",
    author    = "Hans Hagen, PRAGMA ADE",
    copyright = "ConTeXt Development Team",
    license   = "see context related readme files"
}

if not context then return end

local next, unpack = next, unpack
local sort, copy, insert = table.sort, table.copy, table.insert
local setmetatableindex = table.setmetatableindex

local fontdata  = fonts.hashes.identifiers
local otf       = fonts.handlers.otf

local nuts      = nodes.nuts

local nextnode  = nuts.traversers.node
local ischar    = nuts.ischar
local getprev   = nuts.getprev
local getnext   = nuts.getnext
local setprev   = nuts.setprev
local setnext   = nuts.setnext
local setboth   = nuts.setboth
local setlink   = nuts.setlink
local exchange  = nuts.exchange

local class     = { } -- reused
local point     = { } -- reused
local classes   = { }
local sorters   = { }
local slide     = { }
local count     = 0

-- List provided by Joey McCollum (Hebrew Layout Intelligence):
--
-- 1. The consonants (Unicode points 05D0-05EA) have no combining class and are never reordered; this is typographically correct.
-- 2. Shin dot and sin dot (05C1-05C2) should be next, but Unicode places them in combining classes 24 and 25, after the characters in recommended classes 3-5 and many of the characters in recommended class 6.
-- 3. Dagesh / mapiq (05BC) should be next, but Unicode assigns it a combining class of 21. This means that it will be incorrectly ordered before characters in recommended class 2 and after characters in recommended classes 4-6 after Unicode normalization.
-- 4. Rafe (05BF) should be next, but Unicode assigns it a combining class of 23. Thus, it will be correctly placed after characters in recommended class 3, but incorrectly placed before characters in recommended class 2 after Unicode normalization.
-- 5. The holam and holam haser vowel points (05B9-05BA) should be next, but Unicode places them in combining class 19. This means that it will be placed incorrectly before characters in recommended classes 2-4 and after all characters in recommended class 6 except 05BB after Unicode normalization.
-- 6. The characters in 0591, 0596, 059B, 05A2-05A7, 05AA, 05B0-05B8, 05BB, 05BD, 05C5, 05C7 should be treated as being in the same class, but Unicode places them in combining classes 10-18, 20, 22, and 220.
-- 7. The prepositive marks yetiv and dehi (059A, 05AD) should be next; Unicode places them in combining class 222, so they should correctly come after all characters in recommended classes 1-6.
-- 8. The characters 0307, 0593-0595, 0597-0598, 059C-05A1, 05A8, 05AB-05AC, 05AF, 05C4 should be treated as being in the same class; Unicode places them in combining class 230, so they should correctly come after all characters in recommended classes 1-7.
-- 9. The postpositive marks segolta, pashta, telisha qetana, and zinor (0592, 0599, 05A9, 05AE) should be next; Unicode places them in combining class 230, so they will need to be reordered after the characters in recommended class 8.
--
-- Some tests by Joey:
--
-- Arial, Calibri, and Times New Roman will correctly typeset most combinations of points even in Unicode's canonical order, but they typeset the normalized sequences (hiriq, shin dot, tipeha) and (qamatz, dagesh, shin dot) incorrectly and their typographically recommended reorderings correctly.
-- Cardo will correctly typeset most combinations of points even in Unicode's canonical order, but it typesets the normalized sequences (hiriq, shin dot, tipeha) incorrectly and its typographically recommended reorderings correctly.
-- Frank Ruehl CLM typesets most combinations of points even in Unicode's canonical order, but it consistently does a poor job positioning cantillation marks even when they are placed in the typographically recommended position. Taamey Frank CLM is another version of the same font that handles this correctly, so it is possible that  Frank Ruehl CLM is just an obsolete font that did not have well-implemented Hebrew font features for cantillation marks to begin with.
-- For Linux Libertine, the text samples with both the normalized mark ordering and the typographically recommended mark ordering were typeset poorly. I think that this is just because that font does not have full support for the Hebrew glyph set (it lacks cantillation marks) or Hebrew font features (it does not place Hebrew diacritical marks intelligently), so no mark reordering would fix its problems.
-- Taamey David CLM and Taamey Frank CLM exhibits the same typographical mistakes as SBL Hebrew when the input is in Unicode canonical order, and these mistakes go away if the marks are ordered in the typographically recommended way.
--
-- SBL Hebrew is used as reference font.

classes.hebr = {
    [0x05C1] = 1, [0x05C2] = 1,
    [0x05BC] = 2,
    [0x05BF] = 3,
    [0x05B9] = 4, [0x05BA] = 4,
    [0x0591] = 5, [0x0596] = 5, [0x059B] = 5, [0x05A2] = 5, [0x05A3] = 5, [0x05A4] = 5,
    [0x05A5] = 5, [0x05A6] = 5, [0x05A7] = 5, [0x05AA] = 5, [0x05B0] = 5, [0x05B1] = 5,
    [0x05B2] = 5, [0x05B3] = 5, [0x05B4] = 5, [0x05B5] = 5, [0x05B6] = 5, [0x05B7] = 5,
    [0x05B8] = 5, [0x05BB] = 5, [0x05BD] = 5, [0x05C5] = 5, [0x05C7] = 5,
    [0x059A] = 6, [0x05AD] = 6,
    [0x0307] = 7, [0x0593] = 7, [0x0594] = 7, [0x0595] = 7, [0x0597] = 7, [0x0598] = 7,
    [0x059C] = 7, [0x059D] = 7, [0x059E] = 7, [0x059F] = 7, [0x05A0] = 7, [0x05A1] = 7,
    [0x05A8] = 7, [0x05AB] = 7, [0x05AC] = 7, [0x05AF] = 7, [0x05C4] = 7,
    [0x0592] = 8, [0x0599] = 8, [0x05A9] = 8, [0x05AE] = 8,
}

sorters.hebr = function(a,b)
    return class[a] < class[b]
end

-- local dflt = setmetatableindex(function(t,k,v)
--     for k, v in next, characters.data do
--         local c = v.combining
--         if c then
--             t[k] = c
--         end
--     end
--     setmetatableindex(t,nil)
--     return t[k]
-- end)
--
-- classes.dflt = dflt
-- sorters.dflt = function(a,b) return class[b] < class[a] end

-- see analyzeprocessor in case we want scripts

local function reorder(head)
    if count == 2 then
        local first = slide[1]
        local last  = slide[2]
        if sorter(last,first) then
            head = exchange(head,first,last)
        end
    elseif count > 1 then
        local first  = slide[1]
        local last   = slide[count]
        local before = getprev(first)
        local after  = getnext(last)
        setprev(first)
        setnext(last)
        sort(slide,sorter)
        setlink(unpack(slide))
        local first = slide[1]
        local last  = slide[count]
        if before then
            setlink(before,first)
        end
        setlink(last,after)
        if first == head then
            head = first
        end
    end
    count = 0
    return head
end

local function reorderprocessor(head,font,attr)
    local tfmdata = fontdata[font]
    local script  = otf.scriptandlanguage(tfmdata,attr)
    sorter  = sorters[script]
    if sorter then
        local classes = classes[script]
        for n in nextnode, head do
            local char, id = ischar(n,font)
            if char then
                local c = classes[char]
                if c then
                    if count == 0 then
                        count = 1
                        slide = { n }
                    else
                        count = count + 1
                        slide[count] = n
                    end
                    class[n] = c
                    point[n] = char
                elseif count > 0 then
                    head = reorder(head)
                end
            elseif count > 0 then
                head = reorder(head)
            end
        end
        if count > 0 then
            head = reorder(head)
        end
    end
    return head
end

fonts.constructors.features.otf.register {
    name         = "reordercombining",
    description  = "reorder combining characters",
--  default      = true,
--  initializers = {
--      node     = reorderinitializer,
--  },
    processors = {
        position = 1,
        node     = reorderprocessor,
    }
}