summaryrefslogtreecommitdiff
path: root/tex/context/base/sort-ini.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/sort-ini.lua')
-rw-r--r--tex/context/base/sort-ini.lua110
1 files changed, 84 insertions, 26 deletions
diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua
index 386f1a45f..5cecafb6e 100644
--- a/tex/context/base/sort-ini.lua
+++ b/tex/context/base/sort-ini.lua
@@ -12,7 +12,7 @@ if not modules then modules = { } end modules ['sort-ini'] = {
-- todo: cleanup splits (in other modules)
local utf = unicode.utf8
-local gsub, rep, sort, concat = string.gsub, string.rep, table.sort, table.concat
+local gsub, rep, sub, sort, concat = string.gsub, string.rep, string.sub, table.sort, table.concat
local utfbyte, utfchar = utf.byte, utf.char
local utfcharacters, utfvalues, strcharacters = string.utfcharacters, string.utfvalues, string.characters
local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
@@ -32,12 +32,11 @@ local replacementoffset = 0x10000 -- frozen
local digitsoffset = 0x20000 -- frozen
local digitsmaximum = 0xFFFFF -- frozen
-local lccodes = characters.lccodes
-local shcodes = characters.shcodes
-local lcchars = characters.lcchars
-local shchars = characters.shchars
+local lccodes = characters.lccodes
+local lcchars = characters.lcchars
+local shchars = characters.shchars
-local variables = interfaces.variables
+local variables = interfaces.variables
sorters = {
comparers = comparers,
@@ -60,6 +59,8 @@ local constants = sorters.constants
local data, language, method
local replacements, mappings, entries, orders, lower, upper
+--~ local shchars = characters.specialchars -- no specials for AE and ae
+
local mte = {
__index = function(t,k)
local el
@@ -70,6 +71,9 @@ local mte = {
if not el then
local l = shchars[k]
if l and l ~= k then
+ if #l > 0 then
+ l = sub(l,1,1)
+ end
el = rawget(t,l)
if not el then
l = lower[k] or lcchars[l]
@@ -89,42 +93,81 @@ local function preparetables(data)
local orders, lower, method, mappings = data.orders, data.lower, data.method, { }
for i=1,#orders do
local oi = orders[i]
- mappings[oi] = 2*i
+ mappings[oi] = { 2*i }
end
local delta = (method == variables.before or method == variables.first or method == variables.last) and -1 or 1
local mtm = {
__index = function(t,k)
local n
if k then
+ if trace_tests then
+ report_sorters("simplifing character 0x%04x %s",utfbyte(k),k)
+ end
local l = lower[k] or lcchars[k]
if l then
+ if trace_tests then
+ report_sorters(" 1 lower: %s",l)
+ end
local ml = rawget(t,l)
if ml then
- n = ml + delta -- first
+ n = { }
+ for i=1,#ml do
+ n[#n+1] = ml[i] + delta
+ end
+ if trace_tests then
+ report_sorters(" 2 order: %s",concat(n," "))
+ end
end
end
if not n then
- l = shchars[k]
- if l and l ~= k then
- local ml = rawget(t,l)
- if ml then
- n = ml -- first or last
- else
- l = lower[l] or lcchars[l]
- if l then
- local ml = rawget(t,l)
+ local s = shchars[k]
+ if s and s ~= k then -- weird test
+ if trace_tests then
+ report_sorters(" 3 shape: %s",s)
+ end
+ n = { }
+ for l in utfcharacters(s) do
+ local ml = rawget(t,l)
+ if ml then
+ if trace_tests then
+ report_sorters(" 4 keep: %s",l)
+ end
if ml then
- n = ml + delta
+ for i=1,#ml do
+ n[#n+1] = ml[i]
+ end
+ end
+ else
+ l = lower[l] or lcchars[l]
+ if l then
+ if trace_tests then
+ report_sorters(" 5 lower: %s",l)
+ end
+ local ml = rawget(t,l)
+ if ml then
+ for i=1,#ml do
+ n[#n+1] = ml[i] + delta
+ end
+ end
end
end
end
+ if trace_tests then
+ report_sorters(" 6 order: %s",concat(n," "))
+ end
+ end
+ if not n then
+ n = { 0 }
+ if trace_tests then
+ report_sorters(" 7 order: 0")
+ end
end
- end
- if not n then
- n = 0
end
else
- n = 0
+ n = { 0 }
+ if trace_tests then
+ report_sorters(" 8 order: 0")
+ end
end
rawset(t,k,n)
return n
@@ -317,7 +360,11 @@ function splitters.utf(str) -- we could append m and u but this is cleaner, s is
l = l and utfbyte(l) or lccodes[b]
if l ~= b then l = l - 1 end -- brrrr, can clash
n = n + 1
- s[n], u[n], m[n], c[n] = sc, b, l, mappings[sc]
+ s[n], u[n], m[n] = sc, b, l
+ local msc = mappings[sc]
+ for i=1,#msc do
+ c[#c+1] = msc[i]
+ end
end
elseif method == variables.first then
for sc in utfcharacters(str) do
@@ -326,13 +373,21 @@ function splitters.utf(str) -- we could append m and u but this is cleaner, s is
l = l and utfbyte(l) or lccodes[b]
if l ~= b then l = l + 1 end -- brrrr, can clash
n = n + 1
- s[n], u[n], m[n], c[n] = sc, b, l, mappings[sc]
+ s[n], u[n], m[n] = sc, b, l
+ local msc = mappings[sc]
+ for i=1,#msc do
+ c[#c+1] = msc[i]
+ end
end
else
for sc in utfcharacters(str) do
local b = utfbyte(sc)
n = n + 1
- s[n], u[n], m[n], c[n] = sc, b, mappings[sc], b
+ s[n], u[n], c[n] = sc, b, b
+ local msc = mappings[sc]
+ for i=1,#msc do
+ m[#m+1] = msc[i]
+ end
end
end
local t = { s = s, m = m, u = u, c = c }
@@ -373,9 +428,12 @@ end
function sorters.sort(entries,cmp)
if trace_tests then
+ for i=1,#entries do
+ report_sorters("entry %s",table.serialize(entries[i].split,i))
+ end
sort(entries,function(a,b)
local r = cmp(a,b)
- report_sorters("%s %s %s (%s)",pack(a),(not r and "?") or (r<0 and "<") or (r>0 and ">") or "=",pack(b),r)
+ report_sorters("%s %s %s",pack(a),(not r and "?") or (r<0 and "<") or (r>0 and ">") or "=",pack(b))
return r == -1
end)
local s