summaryrefslogtreecommitdiff
path: root/tex/context/base/sort-ini.lua
blob: 3e367173d7630ca2a6de789dabb78a5b73eb90cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
-- filename : sort-ini.lua
-- comment  : companion to sort-ini.tex
-- author   : Hans Hagen, PRAGMA-ADE, Hasselt NL
-- copyright: PRAGMA ADE / ConTeXt Development Team
-- license  : see context related readme files

-- todo:
--
-- out of range
-- uppercase
-- texutil compatible
-- always expand to utf

if not versions then versions = { } end versions['sort-ini'] = 1.001

sorters              = { }
sorters.comparers    = { }
sorters.splitters    = { }
sorters.entries      = { }
sorters.mappings     = { }
sorters.replacements = { }
sorters.language     = 'en'

function sorters.comparers.basic(a,b,i) -- [2] has entry, key, cmp
    local sort_a, sort_b = a[2][i][3], b[2][i][3]
    if #sort_a > #sort_b then
        if #sort_b == 0 then
            return 1
        else
            for i=1,#sort_b do
                local ai, bi = sort_a[i], sort_b[i]
                if ai > bi then
                    return  1
                elseif ai < bi then
                    return -1
                end
            end
            return 1
        end
    elseif #sort_a < #sort_b then
        if #sort_a == 0 then
            return -1
        else
            for i=1,#sort_a do
                local ai, bi = sort_a[i], sort_b[i]
                if ai > bi then
                    return  1
                elseif ai < bi then
                    return -1
                end
            end
            return -1
        end
    elseif #sort_a == 0 then
        return 0
    else
        for i=1,#sort_a do
            local ai, bi = sort_a[i], sort_b[i]
            if ai > bi then
                return  1
            elseif ai < bi then
                return -1
            end
        end
        sort_a, sort_b = a[2][i][2], b[2][i][2]
        if sort_a == "" then sort_a = a[2][i][1] end
        if sort_b == "" then sort_b = b[2][i][1] end
        if sort_a < sort_b then
            return -1
        elseif sort_a > sort_b then
            return 1
        else
            return 0
        end
    end
end

function sorters.prepare(data,split,n)
    local strip = sorters.strip
    for k,v in ipairs(data) do
        for i=1,n do
            local vv = v[2][i]
            if vv then
                if vv[2] then
                    if vv[2] ~= "" then
                        vv[3] = split(strip(vv[2]))
                    else
                        vv[3] = split(strip(vv[1]))
                    end
                else
                    vv[2] = { }
                    vv[3] = split(strip(vv[1]))
                end
            else
                v[2][i] = { {}, {}, {} }
            end
        end
    end
end

function sorters.strip(str) -- todo: only letters and such utf.gsub("([^%w%d])","")
    str = str:gsub("\\%S*","")
    str = str:gsub("[%s%[%](){}%$\"\']*","")
    str = str:gsub("(%d+)",function(s) return (" "):rep(10-#s) .. s end) -- sort numbers properly
    return str
end

sorters.defaultlanguage = 'en'

function sorters.splitters.utf(str)
    local r = sorters.replacements[sorters.language] or sorters.replacements[sorters.defaultlanguage] or { }
    local m = sorters.mappings    [sorters.language] or sorters.mappings    [sorters.defaultlanguage] or { }
    local u = characters.uncompose
    local b = utf.byte
    local t = { }
    for _,v in pairs(r) do
        str = str:gsub(v[1],v[2])
    end
    for c in str:utfcharacters() do
        if m[c] then
            t[#t+1] = m[c]
        elseif #c == 1 then
            t[#t+1] = b(c)
        else
            for cc in string.characters(u(c)) do
                t[#t+1] = m[cc] or b(cc)
            end
        end
    end
    return t
end

function sorters.sort(data,cmp)
    table.sort(data,function(a,b) return cmp(a,b) == -1 end)
end

function sorters.cleanup(data)
    for k,v in ipairs(data) do
        for kk,vv in ipairs(v[2]) do
            if vv and #vv[1] == 0 then
                v[1][kk] = nil
            else
                vv[3] = nil
            end
        end
        for kk,vv in pairs(v) do
            if vv == "" then
                v[kk] = nil
            end
        end
    end
end

function sorters.unique(data)
    local prev, last = nil, 0
    for _,v in ipairs(data) do
        if not prev or not table.are_equal(prev,v,2,3) then -- check range
            last = last + 1
            data[last] = v
            prev = v
        end
    end
    for i=last+1,#data do
        data[i] = nil
    end
end

function sorters.process(kind,data)
    if data.entries then
        if not data.sorted then
            sorters.language = data.language or sorters.language
            sorters[kind].prepare(data.entries)
            sorters[kind].sort(data.entries)
            sorters[kind].unique(data.entries)
            data.sorted = true
        end
        return sorters[kind].flush(sorters[kind].finalize(data.entries),data.class,data.flush)
    else
        return { }
    end
end