summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/lang-url.lua
blob: 17ad15cd890067abc76c2d629f48cb77a241eb62 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
if not modules then modules = { } end modules ['lang-url'] = {
    version   = 1.001,
    comment   = "companion to lang-url.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

local utfcharacters, utfvalues, utfbyte, utfchar = utf.characters, utf.values, utf.byte, utf.char
local min, max = math.min, math.max

local context   = context

local implement = interfaces.implement
local variables = interfaces.variables

local v_before  = variables.before
local v_after   = variables.after

local is_letter = characters.is_letter

--[[
<p>Hyphenating <l n='url'/>'s is somewhat tricky and a matter of taste. I did
consider using a dedicated hyphenation pattern or dealing with it by node
parsing, but the following solution suits as well. After all, we're mostly
dealing with <l n='ascii'/> characters.</p>
]]--

local urls     = { }
languages.urls = urls

local characters = utilities.storage.allocate {
    ["!"] = "before",
    ['"'] = "before",
    ["#"] = "before",
    ["$"] = "before",
    ["%"] = "before",
    ["&"] = "before",
    ["("] = "before",
    ["*"] = "before",
    ["+"] = "before",
    [","] = "before",
    ["-"] = "before",
    ["."] = "before",
    ["/"] = "before",
    [":"] = "before",
    [";"] = "before",
    ["<"] = "before",
    ["="] = "before",
    [">"] = "before",
    ["?"] = "before",
    ["@"] = "before",
    ["["] = "before",
   ["\\"] = "before",
    ["^"] = "before",
    ["_"] = "before",
    ["`"] = "before",
    ["{"] = "before",
    ["|"] = "before",
    ["~"] = "before",

    ["'"] = "after",
    [")"] = "after",
    ["]"] = "after",
    ["}"] = "after",
}

local mapping = utilities.storage.allocate {
  -- [utfchar(0xA0)] = "~", -- nbsp (catch)
}

urls.characters     = characters
urls.mapping        = mapping
urls.lefthyphenmin  = 2
urls.righthyphenmin = 3
urls.discretionary  = nil
urls.packslashes    = false

directives.register("hyphenators.urls.packslashes",function(v) urls.packslashes = v end)

local trace  = false   trackers.register("hyphenators.urls",function(v) trace = v end)
local report = logs.reporter("hyphenators","urls")

-- local ctx_a = context.a
-- local ctx_b = context.b
-- local ctx_d = context.d
-- local ctx_c = context.c
-- local ctx_l = context.l
-- local ctx_C = context.C
-- local ctx_L = context.L

-- local function action(hyphenatedurl,str,left,right,disc)
--     --
--     left  = max(      left  or urls.lefthyphenmin,    2)
--     right = min(#str-(right or urls.righthyphenmin)+2,#str)
--     disc  = disc or urls.discretionary
--     --
--     local word   = nil
--     local prev   = nil
--     local pack   = urls.packslashes
--     local length = 0
--     --
--     for char in utfcharacters(str) do
--         length  = length + 1
--         char    = mapping[char] or char
--         local b = utfbyte(char)
--         if prev == char and prev == "/" then
--             ctx_c(b)
--         elseif char == disc then
--             ctx_d()
--         else
--             if prev == "/" then
--                 ctx_d()
--             end
--             local how = characters[char]
--             if how == v_before then
--                 word = false
--                 ctx_b(b)
--             elseif how == v_after then
--                 word = false
--                 ctx_a(b)
--             else
--                 local letter = is_letter[char]
--                 if length <= left or length >= right then
--                     if word and letter then
--                         ctx_L(b)
--                     else
--                         ctx_C(b)
--                     end
--                 elseif word and letter then
--                     ctx_l(b)
--                 else
--                     ctx_c(b)
--                 end
--                 word = letter
--             end
--         end
--         if pack then
--             prev = char
--         else
--             prev = nil
--         end
--     end
-- end

local function action(hyphenatedurl,str,left,right,disc)
    --
    left  = max(      left  or urls.lefthyphenmin,    2)
    right = min(#str-(right or urls.righthyphenmin)+2,#str)
    disc  = disc or urls.discretionary
    --
    local word   = nil
    local pack   = urls.packslashes
    local length = 0
    local list   = utf.split(str)
    local size   = #list
    local prev   = nil

    for i=1,size do
        local what = nil
        local dodi = false
        local char = list[i]
        length     = length + 1
        char       = mapping[char] or char
        if char == disc then
            dodi = true
        elseif pack and char == "/" and (list[i+1] == "/" or prev == "/") then
            what = "c"
        else
            local how = characters[char]
            if how == v_before then
                what = "b"
            elseif how == v_after then
                word = false
                what = "a"
            else
                local letter = is_letter[char]
                if length <= left or length >= right then
                    if word and letter then
                        what = "L"
                    else
                        what = "C"
                    end
                elseif word and letter then
                    what = "l"
                else
                    what = "c"
                end
                word = letter
            end
        end
        if dodi then
            list[i] = "\\d"
        else
            list[i] = "\\" .. what .. "{" .. utfbyte(char) .. "}"
        end
        prev = char
    end
    if trace then
        report("old : %s",str)
        report("new : %t",list)
    end
    context("%t",list)
end

-- urls.action = function(_,...) action(...) end -- sort of obsolete

table.setmetatablecall(hyphenatedurl,action) -- watch out: a caller

-- todo, no interface in mkiv yet

function urls.setcharacters(str,value) -- 1, 2 == before, after
    for s in utfcharacters(str) do
        characters[s] = value or v_before
    end
end

-- .urls.setcharacters("')]}",2)

implement {
    name      = "sethyphenatedurlcharacters",
    actions   = urls.setcharacters,
    arguments = "2 strings",
}

implement {
    name      = "hyphenatedurl",
    scope     = "private",
    actions   = function(...) action(hyphenatedurl,...) end,
    arguments = { "string", "integer", "integer", "string" }
}