summaryrefslogtreecommitdiff
path: root/tex/context/base/mkxl/lang-url.lmt
blob: 7607d7d8472e3c143e7c27158e832ec58fa82cdb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
if not modules then modules = { } end modules ['lang-url'] = {
    version   = 1.001,
    comment   = "companion to lang-url.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

local next = next
local utfcharacters, utfbyte, utfchar = utf.characters, utf.byte, utf.char
local min, max = math.min, math.max
local setmetatableindex = table.setmetatableindex

local context          = context
local ctx_pushcatcodes = context.pushcatcodes
local ctx_popcatcodes  = context.popcatcodes

local implement = interfaces.implement
local variables = interfaces.variables

local v_before  = variables.before
local v_after   = variables.after

local is_letter = characters.is_letter

-- Hyphenating URL's is somewhat tricky and a matter of taste. I did consider using
-- a dedicated hyphenation pattern or dealing with it by node parsing, but the
-- following solution suits as well. After all, we're mostly dealing with ASCII
-- characters.

local urls     = { }
languages.urls = urls

local characters = utilities.storage.allocate {
    ["!"] = "before",
    ['"'] = "before",
    ["#"] = "before",
    ["$"] = "before",
    ["%"] = "before",
    ["&"] = "before",
    ["("] = "before",
    ["*"] = "before",
    ["+"] = "before",
    [","] = "before",
    ["-"] = "before",
    ["."] = "before",
    ["/"] = "before",
    [":"] = "before",
    [";"] = "before",
    ["<"] = "before",
    ["="] = "before",
    [">"] = "before",
    ["?"] = "before",
    ["@"] = "before",
    ["["] = "before",
   ["\\"] = "before",
    ["^"] = "before",
    ["_"] = "before",
    ["`"] = "before",
    ["{"] = "before",
    ["|"] = "before",
    ["~"] = "before",

    ["'"] = "after",
    [")"] = "after",
    ["]"] = "after",
    ["}"] = "after",
}

local mapping = utilities.storage.allocate {
  -- [utfchar(0xA0)] = "~", -- nbsp (catch)
}

urls.characters     = characters
urls.mapping        = mapping
urls.lefthyphenmin  = 2
urls.righthyphenmin = 3
urls.discretionary  = nil
urls.packslashes    = false

directives.register("hyphenators.urls.packslashes",function(v) urls.packslashes = v end)

local trace  = false   trackers.register("hyphenators.urls",function(v) trace = v end)
local report = logs.reporter("hyphenators","urls")

-- local ctx_a = context.a
-- local ctx_b = context.b
-- local ctx_d = context.d
-- local ctx_c = context.c
-- local ctx_l = context.l
-- local ctx_C = context.C
-- local ctx_L = context.L

-- local function action(hyphenatedurl,str,left,right,disc)
--     --
--     left  = max(      left  or urls.lefthyphenmin,    2)
--     right = min(#str-(right or urls.righthyphenmin)+2,#str)
--     disc  = disc or urls.discretionary
--     --
--     local word   = nil
--     local prev   = nil
--     local pack   = urls.packslashes
--     local length = 0
--     --
--     for char in utfcharacters(str) do
--         length  = length + 1
--         char    = mapping[char] or char
--         local b = utfbyte(char)
--         if prev == char and prev == "/" then
--             ctx_c(b)
--         elseif char == disc then
--             ctx_d()
--         else
--             if prev == "/" then
--                 ctx_d()
--             end
--             local how = characters[char]
--             if how == v_before then
--                 word = false
--                 ctx_b(b)
--             elseif how == v_after then
--                 word = false
--                 ctx_a(b)
--             else
--                 local letter = is_letter[char]
--                 if length <= left or length >= right then
--                     if word and letter then
--                         ctx_L(b)
--                     else
--                         ctx_C(b)
--                     end
--                 elseif word and letter then
--                     ctx_l(b)
--                 else
--                     ctx_c(b)
--                 end
--                 word = letter
--             end
--         end
--         if pack then
--             prev = char
--         else
--             prev = nil
--         end
--     end
-- end

local function action(hyphenatedurl,str,left,right,disc)
    --
    left  = max(      left  or urls.lefthyphenmin,    2)
    right = min(#str-(right or urls.righthyphenmin)+2,#str)
    disc  = disc or urls.discretionary
    --
    local word   = nil
    local pack   = urls.packslashes
    local length = 0
    local list   = utf.split(str)
    local size   = #list
    local prev   = nil

    for i=1,size do
        local what = nil
        local dodi = false
        local char = list[i]
        length     = length + 1
        char       = mapping[char] or char
        if char == disc then
            dodi = true
        elseif pack and char == "/" and (list[i+1] == "/" or prev == "/") then
            what = "c"
        else
            local how = characters[char]
            if how == v_before then
                what = "b"
            elseif how == v_after then
                word = false
                what = "a"
            else
                local letter = is_letter[char]
                if length <= left or length >= right then
                    if word and letter then
                        what = "L"
                    else
                        what = "C"
                    end
                elseif word and letter then
                    what = "l"
                else
                    what = "c"
                end
                word = letter
            end
        end
        if dodi then
            list[i] = "\\lang_url_d "
        else
            list[i] = "\\lang_url_" .. what .. "{" .. utfbyte(char) .. "}"
        end
        prev = char
    end
    if trace then
        report("old : %s",str)
        report("new : %t",list)
    end
    ctx_pushcatcodes("prtcatcodes")
    context("%t",list)
    ctx_popcatcodes()
end

-- urls.action = function(_,...) action(...) end -- sort of obsolete

table.setmetatablecall(hyphenatedurl,action) -- watch out: a caller

-- todo, no interface in mkiv yet

local registerfunction   = context.functions.register
local unregisterfunction = context.functions.unregister
local savelua            = token.savelua

local function restorevalues(savedchars,restore)
    for k, v in next, savedchars do
        characters[k] = v
    end
    unregisterfunction(restore)
end

function urls.setcharacters(str,value) -- 1, 2 == before, after
    local savedchars = { }
    local newvalue   = value or v_before
    for s in utfcharacters(str) do
        local oldvalue = characters[s]
        if oldvalue ~= newvalue then
            savedchars[s] = oldvalue
            characters[s] = newvalue
        end
    end
    if next(savedchars) then
        local restore = nil
        restore = registerfunction(function() restorevalues(savedchars,restore) end)
        savelua(restore)
    end
end

-- .urls.setcharacters("')]}",2)

implement {
    name      = "sethyphenatedurlcharacters",
    actions   = urls.setcharacters,
    arguments = "2 strings",
}

implement {
    name      = "hyphenatedurl",
    scope     = "private",
    actions   = function(...) action(hyphenatedurl,...) end,
    arguments = { "string", "integer", "integer", "string" }
}