summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/sort-ini.lua
blob: 0916337b1971f3d1381a105acfecc24663625ec8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
if not modules then modules = { } end modules ['sort-ini'] = {
    version   = 1.001,
    comment   = "companion to sort-ini.mkiv",
    author    = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
    copyright = "PRAGMA ADE / ConTeXt Development Team",
    license   = "see context related readme files"
}

-- It took a while to get there, but with Fleetwood Mac's "Don't Stop"
-- playing in the background we sort of got it done.

--[[<p>The code here evolved from the rather old mkii approach. There
we concatinate the key and (raw) entry into a new string. Numbers and
special characters get some treatment so that they sort ok. In
addition some normalization (lowercasing, accent stripping) takes
place and again data is appended ror prepended. Eventually these
strings are sorted using a regular string sorter. The relative order
of character is dealt with by weighting them. It took a while to
figure this all out but eventually it worked ok for most languages,
given that the right datatables were provided.</p>

<p>Here we do follow a similar approach but this time we don't append
the manipulated keys and entries but create tables for each of them
with entries being tables themselves having different properties. In
these tables characters are represented by numbers and sorting takes
place using these numbers. Strings are simplified using lowercasing
as well as shape codes. Numbers are filtered and after getting an offset
they end up at the right end of the spectrum (more clever parser will
be added some day). There are definitely more solutions to the problem
and it is a nice puzzle to solve.</p>

<p>In the future more methods can be added, as there is practically no
limit to what goes into the tables. For that we will provide hooks.</p>

<p>Todo: decomposition with specific order of accents, this is
relatively easy to do.</p>

<p>Todo: investigate what standards and conventions there are and see
how they map onto this mechanism. I've learned that users can come up
with any demand so nothing here is frozen.</p>

<p>Todo: I ran into the Unicode Collation document and noticed that
there are some similarities (like the weights) but using that method
would still demand extra code for language specifics. One option is
to use the allkeys.txt file for the uc vectors but then we would also
use the collapsed key (sq, code is now commented). In fact, we could
just hook those into the replacer code that we reun beforehand.</p>

<p>In the future index entries will become more clever, i.e. they will
have language etc properties that then can be used.</p>
]]--

local gsub, find, rep, sub, sort, concat, tohash, format = string.gsub, string.find, string.rep, string.sub, table.sort, table.concat, table.tohash, string.format
local utfbyte, utfchar, utfcharacters, utfvalues = utf.byte, utf.char, utf.characters, utf.values
local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
local P, Cs, R, S, lpegmatch, lpegpatterns = lpeg.P, lpeg.Cs, lpeg.R, lpeg.S, lpeg.match, lpeg.patterns

local allocate          = utilities.storage.allocate
local setmetatableindex = table.setmetatableindex

local trace_tests       = false  trackers.register("sorters.tests",        function(v) trace_tests        = v end)
local trace_methods     = false  trackers.register("sorters.methods",      function(v) trace_methods      = v end)
local trace_orders      = false  trackers.register("sorters.orders",       function(v) trace_orders       = v end)
local trace_replacements= false  trackers.register("sorters.replacements", function(v) trace_replacements = v end)

local report_sorters    = logs.reporter("languages","sorters")

local comparers         = { }
local splitters         = { }
local definitions       = allocate()
local tracers           = allocate()
local ignoredoffset     = 0x10000 -- frozen
local replacementoffset = 0x10000 -- frozen
local digitsoffset      = 0x20000 -- frozen
local digitsmaximum     = 0xFFFFF -- frozen

local lccodes           = characters.lccodes
local uccodes           = characters.uccodes
local lcchars           = characters.lcchars
local ucchars           = characters.ucchars
local shchars           = characters.shchars
local fscodes           = characters.fscodes
local fschars           = characters.fschars

local decomposed        = characters.decomposed

local variables         = interfaces.variables

local v_numbers         = variables.numbers
local v_default         = variables.default
local v_before          = variables.before
local v_after           = variables.after
local v_first           = variables.first
local v_last            = variables.last

local validmethods      = tohash {
    "ch", -- raw character (for tracing)
    "mm", -- minus mapping
    "zm", -- zero  mapping
    "pm", -- plus  mapping
    "mc", -- lower case - 1
    "zc", -- lower case
    "pc", -- lower case + 1
    "uc", -- unicode
}

local predefinedmethods = {
    [v_default] = "zc,pc,zm,pm,uc",
    [v_before]  = "mm,mc,uc",
    [v_after]   = "pm,mc,uc",
    [v_first]   = "pc,mm,uc",
    [v_last]    = "mc,mm,uc",
}

sorters = {
    comparers    = comparers,
    splitters    = splitters,
    definitions  = definitions,
    tracers      = tracers,
    constants    = {
        ignoredoffset     = ignoredoffset,
        replacementoffset = replacementoffset,
        digitsoffset      = digitsoffset,
        digitsmaximum     = digitsmaximum,
        defaultlanguage   = v_default,
        defaultmethod     = v_default,
        defaultdigits     = v_numbers,
        validmethods      = validmethods,
    }
}

local sorters   = sorters
local constants = sorters.constants

local data, language, method, digits
local replacements, m_mappings, z_mappings, p_mappings, entries, orders, lower, upper, method, sequence, usedinsequence
local thefirstofsplit

local mte = { -- todo: assign to t
    __index = function(t,k)
        if k and k ~= "" and utfbyte(k) < digitsoffset then -- k check really needed (see s-lan-02)
            local el
            if k then
                local l = lower[k] or lcchars[k]
                el = rawget(t,l)
            end
            if not el then
                local l = shchars[k]
                if l and l ~= k then
                    if #l > 1 then
                        l = sub(l,1,1) -- todo
                    end
                    el = rawget(t,l)
                    if not el then
                        l = lower[k] or lcchars[l]
                        if l then
                            el = rawget(t,l)
                        end
                    end
                end
                el = el or k
            end
        --  rawset(t,k,el)
            return el
        else
        --  rawset(t,k,k)
        end
    end
}

local noorder = false
local nothing = { 0 }

local function preparetables(data)
    local orders, lower, m_mappings, z_mappings, p_mappings = data.orders, data.lower, { }, { }, { }
    for i=1,#orders do
        local oi = orders[i]
        local n = { 2 * i }
        m_mappings[oi], z_mappings[oi], p_mappings[oi] = n, n, n
    end
    local mtm = {
        __index = function(t,k)
            local n, nn
            if k then
                if trace_orders then
                    report_sorters("simplifing character %C",k)
                end
                local l = lower[k] or lcchars[k]
                if l then
                    if trace_orders then
                        report_sorters(" 1 lower: %C",l)
                    end
                    local ml = rawget(t,l)
                    if ml then
                        n = { }
                        nn = 0
                        for i=1,#ml do
                            nn = nn + 1
                            n[nn] = ml[i] + (t.__delta or 0)
                        end
                        if trace_orders then
                            report_sorters(" 2 order: % t",n)
                        end
                    end
                end
                if not n then
                    local s = shchars[k] -- maybe all components?
                    if s and s ~= k then
                        if trace_orders then
                            report_sorters(" 3 shape: %C",s)
                        end
                        n = { }
                        nn = 0
                        for l in utfcharacters(s) do
                            local ml = rawget(t,l)
                            if ml then
                                if trace_orders then
                                    report_sorters(" 4 keep: %C",l)
                                end
                                if ml then
                                    for i=1,#ml do
                                        nn = nn + 1
                                        n[nn] = ml[i]
                                    end
                                end
                            else
                                l = lower[l] or lcchars[l]
                                if l then
                                    if trace_orders then
                                        report_sorters(" 5 lower: %C",l)
                                    end
                                    local ml = rawget(t,l)
                                    if ml then
                                        for i=1,#ml do
                                            nn = nn + 1
                                            n[nn] = ml[i] + (t.__delta or 0)
                                        end
                                    end
                                end
                            end
                        end
                    else
                        -- this is a kind of last resort branch that we might want to revise
                        -- one day
                        --
                        -- local b = utfbyte(k)
                        -- n = decomposed[b] or { b }
                        -- if trace_tests then
                        --     report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
                        -- end
                        --
                        -- we need to move way above valid order (new per 2014-10-16) .. maybe we
                        -- need to move it even more up to get numbers right (not all have orders)
                        --
                        if k == "\000" then
                            n = nothing -- shared
                            if trace_orders then
                                report_sorters(" 6 split: space") -- todo
                            end
                        else
                            local b = 2 * #orders + utfbyte(k)
                            n = decomposed[b] or { b } -- could be shared tables
                            if trace_orders then
                                report_sorters(" 6 split: %s",utf.tostring(b)) -- todo
                            end
                        end
                    end
                    if n then
                        if trace_orders then
                            report_sorters(" 7 order: % t",n)
                        end
                    else
                        n = noorder
                        if trace_orders then
                            report_sorters(" 8 order: 0")
                        end
                    end
                end
            else
                n = noorder
                if trace_orders then
                    report_sorters(" 9 order: 0")
                end
            end
            rawset(t,k,n)
            return n
        end
    }
    data.m_mappings = m_mappings
    data.z_mappings = z_mappings
    data.p_mappings = p_mappings
    m_mappings.__delta = -1
    z_mappings.__delta =  0
    p_mappings.__delta =  1
    setmetatable(data.entries,mte)
    setmetatable(data.m_mappings,mtm)
    setmetatable(data.z_mappings,mtm)
    setmetatable(data.p_mappings,mtm)
    thefirstofsplit = data.firstofsplit
end

local function update() -- prepare parent chains, needed when new languages are added
    for language, data in next, definitions do
        local parent = data.parent or "default"
        if language ~= "default" then
            setmetatableindex(data,definitions[parent] or definitions.default)
        end
        data.language   = language
        data.parent     = parent
        data.m_mappings = { } -- free temp data
        data.z_mappings = { } -- free temp data
        data.p_mappings = { } -- free temp data
    end
end

local function setlanguage(l,m,d,u) -- this will become a specification table (also keep this one as it's used in manuals)
    language = (l ~= "" and l) or constants.defaultlanguage
    data     = definitions[language or constants.defaultlanguage] or definitions[constants.defaultlanguage]
    method   = (m ~= "" and m) or (data.method ~= "" and data.method) or constants.defaultmethod
    digits   = (d ~= "" and d) or (data.digits ~= "" and data.digits) or constants.defaultdigits
    if trace_tests then
        report_sorters("setting language %a, method %a, digits %a",language,method,digits)
    end
    replacements = data.replacements
    entries      = data.entries
    orders       = data.orders
    lower        = data.lower
    upper        = data.upper
    preparetables(data)
    m_mappings   = data.m_mappings
    z_mappings   = data.z_mappings
    p_mappings   = data.p_mappings
    --
    method = predefinedmethods[variables[method]] or method
    data.method  = method
    --
    data.digits  = digits
    --
    local seq = utilities.parsers.settings_to_array(method or "") -- check the list
    sequence = { }
    local nofsequence = 0
    for i=1,#seq do
        local s = seq[i]
        if validmethods[s] then
            nofsequence = nofsequence + 1
            sequence[nofsequence] = s
        else
            report_sorters("invalid sorter method %a in %a",s,method)
        end
    end
    usedinsequence = tohash(sequence)
    data.sequence = sequence
    data.usedinsequence = usedinsequence
-- usedinsequence.ch = true -- better just store the string
    if trace_tests then
        report_sorters("using sort sequence: % t",sequence)
    end
    --
    return data
end

function sorters.update()
    update()
    setlanguage(language,method,numberorder) -- resync current language and method
end

function sorters.setlanguage(language,method,numberorder)
    update()
    setlanguage(language,method,numberorder) -- new language and method
end

-- tricky: { 0, 0, 0 } vs { 0, 0, 0, 0 } => longer wins and mm, pm, zm can have them

-- inlining and checking first slot first doesn't speed up (the 400K complex author sort)

local function basicsort(sort_a,sort_b)
    if sort_a and sort_b then
        local na = #sort_a
        local nb = #sort_b
        if na > nb then
            na = nb
        end
        if na > 0 then
            for i=1,na do
                local ai, bi = sort_a[i], sort_b[i]
                if ai > bi then
                    return  1
                elseif ai < bi then
                    return -1
                end
            end
        end
    end
    return 0
end

-- todo: compile compare function

local function basic(a,b) -- trace ea and eb
    if a == b then
        -- hashed (shared) entries
        return 0
    end
    local ea = a.split
    local eb = b.split
    local na = #ea
    local nb = #eb
    if na == 0 and nb == 0 then
        -- simple variant (single word)
        local result = 0
        for j=1,#sequence do
            local m = sequence[j]
            result = basicsort(ea[m],eb[m])
            if result ~= 0 then
                return result
            end
        end
        if result == 0 then
            local la = #ea.uc
            local lb = #eb.uc
            if la > lb then
                return 1
            elseif lb > la then
                return -1
            else
                return 0
            end
        else
            return result
        end
    else
        -- complex variant, used in register (multiple words)
        local result = 0
        for i=1,nb < na and nb or na do
            local eai = ea[i]
            local ebi = eb[i]
            for j=1,#sequence do
                local m = sequence[j]
                result = basicsort(eai[m],ebi[m])
                if result ~= 0 then
                    return result
                end
            end
            if result == 0 then
                local la = #eai.uc
                local lb = #ebi.uc
                if la > lb then
                    return 1
                elseif lb > la then
                    return -1
                end
            else
                return result
            end
        end
        if result ~= 0 then
            return result
        elseif na > nb then
            return 1
        elseif nb > na then
            return -1
        else
            return 0
        end
    end
end

-- if we use sq:
--
-- local function basic(a,b) -- trace ea and eb
--     local ea, eb = a.split, b.split
--     local na, nb = #ea, #eb
--     if na == 0 and nb == 0 then
--         -- simple variant (single word)
--         return basicsort(ea.sq,eb.sq)
--     else
--         -- complex variant, used in register (multiple words)
--         local result = 0
--         for i=1,nb < na and nb or na do
--             local eai, ebi = ea[i], eb[i]
--             result = basicsort(ea.sq,eb.sq)
--             if result ~= 0 then
--                 return result
--             end
--         end
--         if result ~= 0 then
--             return result
--         elseif na > nb then
--             return 1
--         elseif nb > na then
--             return -1
--         else
--             return 0
--         end
--     end
-- end

comparers.basic = basic

function sorters.basicsorter(a,b)
    return basic(a,b) == -1
end

local function numify(old)
    if digits == v_numbers then -- was swapped, fixed 2014-11-10
        local new = digitsoffset + tonumber(old) -- alternatively we can create range
        if new > digitsmaximum then
            new = digitsmaximum
        end
        return utfchar(new)
    else
        return old
    end
end

local pattern = nil

local function prepare() -- todo: test \Ux{hex}
    pattern = Cs( (
        characters.tex.toutfpattern()
      + lpeg.patterns.whitespace / "\000"
      + (P("\\Ux{") / "" * ((1-P("}"))^1/function(s) return utfchar(tonumber(s,16)) end) * (P("}")/""))
      + (P("\\") / "") * R("AZ")^0 * (P(-1) + #(1-R("AZ")))
      + (P("\\") * P(1) * R("az","AZ")^0) / ""
      + S("[](){}$\"'") / ""
      + R("09")^1 / numify
      + P(1)
    )^0 )
    return pattern
end

local function strip(str) -- todo: only letters and such
    if str and str ~= "" then
        return lpegmatch(pattern or prepare(),str)
    else
        return ""
    end
end

sorters.strip = strip

local function firstofsplit(entry)
    -- numbers are left padded by spaces
    local split = entry.split
    if #split > 0 then
        split = split[1].ch
    else
        split = split.ch
    end
    local first = split and split[1] or ""
    if thefirstofsplit then
        return thefirstofsplit(first,data,entry) -- normally the first one is needed
    else
        return first, entries[first] or "\000" -- tag
    end
end

sorters.firstofsplit = firstofsplit

-- for the moment we use an inefficient bunch of tables but once
-- we know what combinations make sense we can optimize this

function splitters.utf(str,checked) -- we could append m and u but this is cleaner, s is for tracing
    local nofreplacements = #replacements
    if nofreplacements > 0 then
        -- todo make an lpeg for this
        local replacer = replacements.replacer
        if not replacer then
            local rep = { }
            for i=1,nofreplacements do
                local r = replacements[i]
                rep[strip(r[1])] = strip(r[2])
            end
            replacer = lpeg.utfchartabletopattern(rep)
            replacer = Cs((replacer/rep + lpegpatterns.utf8character)^0)
            replacements.replacer = replacer
        end
        local rep = lpegmatch(replacer,str)
        if rep and rep ~= str then
            if trace_replacements then
                report_sorters("original   : %s",str)
                report_sorters("replacement: %s",rep)
            end
            str = rep
        end
     -- for k=1,#replacements do
     --     local v = replacements[k]
     --     local s = v[1]
     --     if find(str,s) then
     --         str = gsub(str,s,v[2])
     --     end
     -- end
    end
    local m_case    = { }
    local z_case    = { }
    local p_case    = { }
    local m_mapping = { }
    local z_mapping = { }
    local p_mapping = { }
    local char      = { }
    local byte      = { }
    local n         = 0
    local nm        = 0
    local nz        = 0
    local np        = 0
    for sc in utfcharacters(str) do
        local b = utfbyte(sc)
        if b >= digitsoffset then
            if n == 0 then
                -- we need to force number to the top
                z_case[1] = 0
                m_case[1] = 0
                p_case[1] = 0
                char[1] = sc
                byte[1] = 0
                m_mapping[1] = 0
                z_mapping[1] = 0
                p_mapping[1] = 0
                n = 2
            else
                n = n + 1
            end
            z_case[n] = b
            m_case[n] = b
            p_case[n] = b
            char[n] = sc
            byte[n] = b
            nm = nm + 1
            nz = nz + 1
            np = np + 1
            m_mapping[nm] = b
            z_mapping[nz] = b
            p_mapping[np] = b
        else
            n = n + 1
            local l = lower[sc]
            l = l and utfbyte(l) or lccodes[b] or b
         -- local u = upper[sc]
         -- u = u and utfbyte(u) or uccodes[b] or b
            if type(l) == "table" then
                l = l[1] -- there are currently no tables in lccodes but it can be some, day
            end
         -- if type(u) == "table" then
         --     u = u[1] -- there are currently no tables in lccodes but it can be some, day
         -- end
            z_case[n] = l
            if l ~= b then
                m_case[n] = l - 1
                p_case[n] = l + 1
            else
                m_case[n] = l
                p_case[n] = l
            end
            char[n], byte[n] = sc, b
            local fs = fscodes[b] or b
            local msc = m_mappings[sc]
            if msc ~= noorder then
                if not msc then
                    msc = m_mappings[fs]
                end
                for i=1,#msc do
                    nm = nm + 1
                    m_mapping[nm] = msc[i]
                end
            end
            local zsc = z_mappings[sc]
            if zsc ~= noorder then
                if not zsc then
                    zsc = z_mappings[fs]
                end
                for i=1,#zsc do
                    nz = nz + 1
                    z_mapping[nz] = zsc[i]
                end
            end
            local psc = p_mappings[sc]
            if psc ~= noorder then
                if not psc then
                    psc = p_mappings[fs]
                end
                for i=1,#psc do
                    np = np + 1
                    p_mapping[np] = psc[i]
                end
            end
        end
    end
    -- -- only those needed that are part of a sequence
    --
    -- local b = byte[1]
    -- if b then
    --     -- we set them to the first split code (korean)
    --     local fs = fscodes[b] or b
    --     if #m_mapping == 0 then
    --         m_mapping = { m_mappings[fs][1] }
    --     end
    --     if #z_mapping == 0 then
    --         z_mapping = { z_mappings[fs][1] }
    --     end
    --     if #p_mapping == 0 then
    --         p_mapping = { p_mappings[fs][1] }
    --     end
    -- end
    local result
    if checked then
        result = {
            ch = trace_tests       and char      or nil, -- not in sequence
            uc = usedinsequence.uc and byte      or nil,
            mc = usedinsequence.mc and m_case    or nil,
            zc = usedinsequence.zc and z_case    or nil,
            pc = usedinsequence.pc and p_case    or nil,
            mm = usedinsequence.mm and m_mapping or nil,
            zm = usedinsequence.zm and z_mapping or nil,
            pm = usedinsequence.pm and p_mapping or nil,
        }
    else
        result = {
            ch = char,
            uc = byte,
            mc = m_case,
            zc = z_case,
            pc = p_case,
            mm = m_mapping,
            zm = z_mapping,
            pm = p_mapping,
        }
    end
 -- local sq, n = { }, 0
 -- for i=1,#byte do
 --     for s=1,#sequence do
 --         n = n + 1
 --         sq[n] = result[sequence[s]][i]
 --     end
 -- end
 -- result.sq = sq
    return result
end

local function packch(entry)
    local split = entry.split
    if split and #split > 0 then -- useless test
        local t = { }
        for i=1,#split do
            local tt = { }
            local ch = split[i].ch
            for j=1,#ch do
                local chr = ch[j]
                local byt = utfbyte(chr)
                if byt > ignoredoffset then
                    tt[j] = "[]"
                elseif byt == 0 then
                    tt[j] = " "
                else
                    tt[j] = chr
                end
            end
            t[i] = concat(tt)
        end
        return concat(t," + ")
    else
        local t  = { }
        local ch = (split and split.ch) or entry.ch or entry
        if ch then
            for i=1,#ch do
                local chr = ch[i]
                local byt = utfbyte(chr)
                if byt > ignoredoffset then
                    t[i] = "[]"
                elseif byt == 0 then
                    t[i] = " "
                else
                    t[i] = chr
                end
            end
            return concat(t)
        else
            return ""
        end
    end
end

local function packuc(entry)
    local split = entry.split
    if split and #split > 0 then -- useless test
        local t = { }
        for i=1,#split do
            t[i] = concat(split[i].uc, " ") -- sq
        end
        return concat(t," + ")
    else
        local uc = (split and split.uc) or entry.uc or entry
        if uc then
            return concat(uc," ") -- sq
        else
            return ""
        end
    end
end

sorters.packch = packch
sorters.packuc = packuc

function sorters.sort(entries,cmp)
    if trace_methods then
        local nofentries = #entries
        report_sorters("entries: %s, language: %s, method: %s, digits: %s",nofentries,language,method,tostring(digits))
        for i=1,nofentries do
            report_sorters("entry %s",table.serialize(entries[i].split,i,true,true,true))
        end
    end
    if trace_tests then
        sort(entries,function(a,b)
            local r = cmp(a,b)
            local e = (not r and "?") or (r<0 and "<") or (r>0 and ">") or "="
            report_sorters("%s %s %s | %s %s %s",packch(a),e,packch(b),packuc(a),e,packuc(b))
            return r == -1
        end)
        local s
        for i=1,#entries do
            local entry = entries[i]
            local letter, first = firstofsplit(entry)
            if first == s then
                first = "  "
            else
                s = first
                if first and letter then
                    report_sorters(">> %C (%C)",first,letter)
                end
            end
            report_sorters("   %s | %s",packch(entry),packuc(entry))
        end
    else
        sort(entries,function(a,b)
            return cmp(a,b) == -1
        end)
    end
end

-- helper

function sorters.replacementlist(list)
    local replacements = { }
    for i=1,#list do
        replacements[i] = {
            list[i],
            utfchar(replacementoffset+i),
        }
    end
    return replacements
end