summaryrefslogtreecommitdiff
path: root/tex/context/base/sort-ini.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/sort-ini.lua')
-rw-r--r--tex/context/base/sort-ini.lua300
1 files changed, 184 insertions, 116 deletions
diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua
index eebbb9aff..386f1a45f 100644
--- a/tex/context/base/sort-ini.lua
+++ b/tex/context/base/sort-ini.lua
@@ -15,8 +15,7 @@ local utf = unicode.utf8
local gsub, rep, sort, concat = string.gsub, string.rep, table.sort, table.concat
local utfbyte, utfchar = utf.byte, utf.char
local utfcharacters, utfvalues, strcharacters = string.utfcharacters, string.utfvalues, string.characters
-local chardata = characters.data
-local next, type, tonumber = next, type, tonumber
+local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset
local allocate = utilities.storage.allocate
@@ -24,48 +23,157 @@ local trace_tests = false trackers.register("sorters.tests", function(v) trace_
local report_sorters = logs.new("sorters")
-local comparers = { }
-local splitters = { }
-local entries = allocate()
-local mappings = allocate()
-local replacements = allocate()
-local ignoredoffset = 0x10000
-local replacementoffset = 0x10000
-local digitsoffset = 0x20000
-local digitsmaximum = 0xFFFFF
+local comparers = { }
+local splitters = { }
+local definitions = allocate()
+local tracers = allocate()
+local ignoredoffset = 0x10000 -- frozen
+local replacementoffset = 0x10000 -- frozen
+local digitsoffset = 0x20000 -- frozen
+local digitsmaximum = 0xFFFFF -- frozen
+
+local lccodes = characters.lccodes
+local shcodes = characters.shcodes
+local lcchars = characters.lcchars
+local shchars = characters.shchars
+
+local variables = interfaces.variables
sorters = {
- comparers = comparers,
- splitters = splitters,
- entries = entries,
- mappings = mappings,
- replacements = replacements,
- constants = {
+ comparers = comparers,
+ splitters = splitters,
+ definitions = definitions,
+ tracers = tracers,
+ constants = {
ignoredoffset = ignoredoffset,
replacementoffset = replacementoffset,
digitsoffset = digitsoffset,
digitsmaximum = digitsmaximum,
+ defaultlanguage = variables.default,
+ defaultmethod = variables.before,
}
}
-local ssorters = sorters
+local sorters = sorters
+local constants = sorters.constants
-local language, defaultlanguage = 'en', 'en'
+local data, language, method
+local replacements, mappings, entries, orders, lower, upper
-local currentreplacements, currentmappings, currententries = { }, { }, { }
+local mte = {
+ __index = function(t,k)
+ local el
+ if k then
+ local l = lower[k] or lcchars[k]
+ el = rawget(t,l)
+ end
+ if not el then
+ local l = shchars[k]
+ if l and l ~= k then
+ el = rawget(t,l)
+ if not el then
+ l = lower[k] or lcchars[l]
+ if l then
+ el = rawget(t,l)
+ end
+ end
+ end
+ el = el or k
+ end
+ -- rawset(t,k,el) also make a copy?
+ return el
+ end
+}
-function sorters.setlanguage(lang)
- language = lang or language or defaultlanguage
- currentreplacements = replacements[language] or replacements[defaultlanguage] or { }
- currentmappings = mappings [language] or mappings [defaultlanguage] or { }
- currententries = entries [language] or entries [defaultlanguage] or { }
+local function preparetables(data)
+ local orders, lower, method, mappings = data.orders, data.lower, data.method, { }
+ for i=1,#orders do
+ local oi = orders[i]
+ mappings[oi] = 2*i
+ end
+ local delta = (method == variables.before or method == variables.first or method == variables.last) and -1 or 1
+ local mtm = {
+ __index = function(t,k)
+ local n
+ if k then
+ local l = lower[k] or lcchars[k]
+ if l then
+ local ml = rawget(t,l)
+ if ml then
+ n = ml + delta -- first
+ end
+ end
+ if not n then
+ l = shchars[k]
+ if l and l ~= k then
+ local ml = rawget(t,l)
+ if ml then
+ n = ml -- first or last
+ else
+ l = lower[l] or lcchars[l]
+ if l then
+ local ml = rawget(t,l)
+ if ml then
+ n = ml + delta
+ end
+ end
+ end
+ end
+ end
+ if not n then
+ n = 0
+ end
+ else
+ n = 0
+ end
+ rawset(t,k,n)
+ return n
+ end
+ }
+ data.mappings = mappings
+ setmetatable(data.entries,mte)
+ setmetatable(data.mappings,mtm)
+ return mappings
+end
+
+local function update() -- prepare parent chains, needed when new languages are added
+ for language, data in next, definitions do
+ local parent = data.parent or "default"
+ if language ~= "default" then
+ setmetatable(data,{ __index = definitions[parent] or definitions.default })
+ end
+ data.language = language
+ data.parent = parent
+ data.mappings = { } -- free temp data
+ end
+end
+
+local function setlanguage(l,m)
+ language = (l ~= "" and l) or constants.defaultlanguage
+ data = definitions[language or constants.defaultlanguage] or definitions[constants.defaultlanguage]
+ method = (m ~= "" and m) or data.method or constants.defaultmethod
if trace_tests then
- report_sorters("setting language '%s'",language)
+ report_sorters("setting language '%s', method '%s'",language,method)
end
- return currentreplacements, currentmappings, currententries
+ data.method = method
+ replacements = data.replacements
+ entries = data.entries
+ orders = data.orders
+ lower = data.lower
+ upper = data.upper
+ mappings = preparetables(data)
+ return data
+end
+
+function sorters.update()
+ update()
+ setlanguage(language,method) -- resync current language and method
end
--- maybe inline code if it's too slow
+function sorters.setlanguage(language,method)
+ update()
+ setlanguage(language,method) -- new language and method
+end
local function basicsort(sort_a,sort_b)
if not sort_a or not sort_b then
@@ -118,16 +226,27 @@ function comparers.basic(a,b) -- trace ea and eb
local na, nb = #ea, #eb
if na == 0 and nb == 0 then
-- simple variant (single word)
- local result = basicsort(ea.e,eb.e)
- return (result == 0 and result) or basicsort(ea.m,eb.m)
+ local result = basicsort(ea.m,eb.m)
+ if result == 0 then
+ result = basicsort(ea.c,eb.c)
+ end
+ if result == 0 then
+ result = basicsort(ea.u,eb.u)
+ end
+ return result
else
-- complex variant, used in register (multiple words)
local result = 0
for i=1,nb < na and nb or na do
local eai, ebi = ea[i], eb[i]
- result = basicsort(eai.e,ebi.e)
if result == 0 then
- result = basicsort(eai.m,ebi.m) -- only needed it there are m's
+ result = basicsort(eai.m,ebi.m)
+ end
+ if result == 0 then
+ result = basicsort(eai.c,ebi.c)
+ end
+ if result == 0 then
+ result = basicsort(eai.u,ebi.u)
end
if result ~= 0 then
break
@@ -177,59 +296,50 @@ local function firstofsplit(entry)
split = split.s
end
local entry = split and split[1] or ""
- return entry, currententries[entry] or "\000"
+ return entry, entries[entry] or "\000"
end
sorters.firstofsplit = firstofsplit
--- beware, numbers get spaces in front
-
-function splitters.utf(str)
- if #currentreplacements > 0 then
- for k=1,#currentreplacements do
- local v = currentreplacements[k]
+function splitters.utf(str) -- we could append m and u but this is cleaner, s is for tracing
+ if #replacements > 0 then
+ -- todo make an lpeg for this
+ for k=1,#replacements do
+ local v = replacements[k]
str = gsub(str,v[1],v[2])
end
end
- local s, e, m, n = { }, { }, { }, 0
- for sc in utfcharacters(str) do -- maybe an lpeg
- local ec, mc = currententries[sc], currentmappings[sc] or utfbyte(sc)
- n = n + 1
- s[n] = sc
- e[n] = currentmappings[ec] or mc
- m[n] = mc
+ local s, u, m, c, n = { }, { }, { }, { }, 0
+ if method == variables.last then
+ for sc in utfcharacters(str) do
+ local b = utfbyte(sc)
+ local l = lower[sc]
+ l = l and utfbyte(l) or lccodes[b]
+ if l ~= b then l = l - 1 end -- brrrr, can clash
+ n = n + 1
+ s[n], u[n], m[n], c[n] = sc, b, l, mappings[sc]
+ end
+ elseif method == variables.first then
+ for sc in utfcharacters(str) do
+ local b = utfbyte(sc)
+ local l = lower[sc]
+ l = l and utfbyte(l) or lccodes[b]
+ if l ~= b then l = l + 1 end -- brrrr, can clash
+ n = n + 1
+ s[n], u[n], m[n], c[n] = sc, b, l, mappings[sc]
+ end
+ else
+ for sc in utfcharacters(str) do
+ local b = utfbyte(sc)
+ n = n + 1
+ s[n], u[n], m[n], c[n] = sc, b, mappings[sc], b
+ end
end
- local t = { s = s, e = e, m = m }
---~ table.print(t)
+ local t = { s = s, m = m, u = u, c = c }
+ -- table.print(t)
return t
end
--- we can use one array instead (sort of like in mkii)
--- but for the moment we do it this way as it is more
--- handy for tracing
-
--- function splitters.utf(str)
--- if #currentreplacements > 0 then
--- for k=1,#currentreplacements do
--- local v = currentreplacements[k]
--- str = gsub(str,v[1],v[2])
--- end
--- end
--- local s, e, m, n = { }, { }, { }, 0
--- for sc in utfcharacters(str) do -- maybe an lpeg
--- local ec, mc = currententries[sc], currentmappings[sc] or utfbyte(sc)
--- n = n + 1
--- ec = currentmappings[ec] or mc
--- s[n] = sc
--- e[n] = ec
--- if ec ~= mc then
--- n = n + 1
--- e[n] = mc
--- end
--- end
--- return { s = s, e = e }
--- end
-
function table.remap(t)
local tt = { }
for k,v in next, t do
@@ -286,45 +396,3 @@ function sorters.sort(entries,cmp)
end)
end
end
-
--- some day we can have a characters.upper and characters.lower
-
-function sorters.adduppercasereplacements(what)
- local rep, new = replacements[what], { }
- for i=1,#rep do
- local r = rep[i]
- local u = chardata[utfbyte(r[1])].uccode
- if u then
- new[utfchar(u)] = r[2]
- end
- end
- for k, v in next, new do
- rep[k] = v
- end
-end
-
-function sorters.adduppercaseentries(what)
- local ent, new = entries[what], { }
- for k, v in next, ent do
- local u = chardata[utfbyte(k)].uccode
- if u then
- new[utfchar(u)] = v
- end
- end
- for k, v in next, new do
- ent[k] = v
- end
-end
-
-function sorters.adduppercasemappings(what,offset)
- local map, new, offset = mappings[what], { }, offset or 0
- for k, v in next, map do
- local u = chardata[utfbyte(k)].uccode
- if u then
- new[utfchar(u)] = v + offset
- end
- end
- for k, v in next, new do
- map[k] = v
- end
-end