From 9d17a4bc572f48289b6d188684a7bbf8c46839fe Mon Sep 17 00:00:00 2001 From: Marius Date: Wed, 22 Sep 2010 11:03:47 +0300 Subject: beta 2010.09.22 09:35 --- scripts/context/lua/mtxrun.lua | 6 +- scripts/context/stubs/mswin/mtxrun.lua | 6 +- scripts/context/stubs/unix/mtxrun | 6 +- tex/context/base/buff-ini.lua | 6 + tex/context/base/char-ini.lua | 30 +- tex/context/base/cont-new.tex | 2 +- tex/context/base/context.tex | 2 +- tex/context/base/l-table.lua | 6 +- tex/context/base/pack-box.mkiv | 7 +- tex/context/base/page-sid.mkii | 2 +- tex/context/base/page-sid.mkiv | 2 +- tex/context/base/sort-ini.lua | 300 ++- tex/context/base/sort-lan.lua | 3237 ++++++--------------------- tex/context/base/strc-ref.lua | 173 +- tex/context/base/strc-ref.mkii | 18 +- tex/context/base/strc-reg.lua | 2 +- tex/context/base/strc-reg.mkiv | 5 + tex/context/base/strc-sec.mkiv | 33 +- tex/context/base/strc-syn.mkiv | 14 +- tex/generic/context/luatex-fonts-merged.lua | 8 +- 20 files changed, 1108 insertions(+), 2757 deletions(-) diff --git a/scripts/context/lua/mtxrun.lua b/scripts/context/lua/mtxrun.lua index b175a35a4..622643f9c 100644 --- a/scripts/context/lua/mtxrun.lua +++ b/scripts/context/lua/mtxrun.lua @@ -615,7 +615,7 @@ local function sortedkeys(tab) elseif tkey == "number" then -- if kind == 1 then kind = 3 else kind = 2 end kind = (kind == 1 and 3) or 2 - else + else -- if tkey then kind = 3 end end @@ -631,7 +631,9 @@ end local function sortedhashkeys(tab) -- fast one local srt = { } for key,_ in next, tab do - srt[#srt+1] = key + if key then + srt[#srt+1] = key + end end sort(srt) return srt diff --git a/scripts/context/stubs/mswin/mtxrun.lua b/scripts/context/stubs/mswin/mtxrun.lua index b175a35a4..622643f9c 100644 --- a/scripts/context/stubs/mswin/mtxrun.lua +++ b/scripts/context/stubs/mswin/mtxrun.lua @@ -615,7 +615,7 @@ local function sortedkeys(tab) elseif tkey == "number" then -- if kind == 1 then kind = 3 else kind = 2 end kind = (kind == 1 and 3) or 2 - else + else -- if tkey then kind = 3 end end @@ -631,7 +631,9 @@ end local function sortedhashkeys(tab) -- fast one local srt = { } for key,_ in next, tab do - srt[#srt+1] = key + if key then + srt[#srt+1] = key + end end sort(srt) return srt diff --git a/scripts/context/stubs/unix/mtxrun b/scripts/context/stubs/unix/mtxrun index b175a35a4..622643f9c 100644 --- a/scripts/context/stubs/unix/mtxrun +++ b/scripts/context/stubs/unix/mtxrun @@ -615,7 +615,7 @@ local function sortedkeys(tab) elseif tkey == "number" then -- if kind == 1 then kind = 3 else kind = 2 end kind = (kind == 1 and 3) or 2 - else + else -- if tkey then kind = 3 end end @@ -631,7 +631,9 @@ end local function sortedhashkeys(tab) -- fast one local srt = { } for key,_ in next, tab do - srt[#srt+1] = key + if key then + srt[#srt+1] = key + end end sort(srt) return srt diff --git a/tex/context/base/buff-ini.lua b/tex/context/base/buff-ini.lua index 4ae4df699..a590cf81a 100644 --- a/tex/context/base/buff-ini.lua +++ b/tex/context/base/buff-ini.lua @@ -153,6 +153,9 @@ end function buffers.range(lines,first,last,range) -- 1,3 1,+3 fromhere,tothere local first, last = first or 1, last or #lines + if last < 0 then + last = #lines + last + end local what = settings_to_array(range) local r_first, r_last = what[1], what[2] local f, l = tonumber(r_first), tonumber(r_last) @@ -172,6 +175,9 @@ function buffers.range(lines,first,last,range) -- 1,3 1,+3 fromhere,tothere end if r_last then if l then + if l < 0 then + l = #lines + l + end if find(r_last,"^[%+]") then -- 1,+3 l = first + l end diff --git a/tex/context/base/char-ini.lua b/tex/context/base/char-ini.lua index 4893875c3..28a11a1e9 100644 --- a/tex/context/base/char-ini.lua +++ b/tex/context/base/char-ini.lua @@ -771,10 +771,34 @@ characters.activeoffset = 0x10000 -- there will be remapped in that byte range -- some day we will make a table +characters.lccodes = allocate() local lccodes = characters.lccodes -- lazy table +characters.uccodes = allocate() local uccodes = characters.uccodes -- lazy table +characters.shcodes = allocate() local shcodes = characters.shcodes -- lazy table + +characters.lcchars = allocate() local lcchars = characters.lcchars -- lazy table +characters.ucchars = allocate() local ucchars = characters.ucchars -- lazy table +characters.shchars = allocate() local shchars = characters.shchars -- lazy table + +characters.lccharcodes = allocate() local lccharcodes = characters.lccharcodes -- lazy table +characters.uccharcodes = allocate() local uccharcodes = characters.uccharcodes -- lazy table +characters.shcharcodes = allocate() local shcharcodes = characters.shcharcodes -- lazy table + +setmetatable(lccodes, { __index = function(t,u) if u then local c = data[u] c = c and c.lccode or u t[u] = c return c end end } ) +setmetatable(uccodes, { __index = function(t,u) if u then local c = data[u] c = c and c.uccode or u t[u] = c return c end end } ) +setmetatable(shcodes, { __index = function(t,u) if u then local c = data[u] c = c and c.shcode or u t[u] = c return c end end } ) + +setmetatable(lcchars, { __index = function(t,u) if u then local c = data[utfbyte(u)] c = c and c.lccode c = c and utfchar(c) or u t[u] = c return c end end } ) +setmetatable(ucchars, { __index = function(t,u) if u then local c = data[utfbyte(u)] c = c and c.uccode c = c and utfchar(c) or u t[u] = c return c end end } ) +setmetatable(shchars, { __index = function(t,u) if u then local c = data[utfbyte(u)] c = c and c.shcode c = c and utfchar(c) or u t[u] = c return c end end } ) + +setmetatable(lccharcodes, { __index = function(t,u) if u then local c = data[utfbyte(u)] c = c and c.lccode or u t[u] = c return c end end } ) +setmetatable(uccharcodes, { __index = function(t,u) if u then local c = data[utfbyte(u)] c = c and c.uccode or u t[u] = c return c end end } ) +setmetatable(shcharcodes, { __index = function(t,u) if u then local c = data[utfbyte(u)] c = c and c.shcode or u t[u] = c return c end end } ) + function characters.lower(str) local new = { } for u in utfvalues(str) do - new[#new+1] = utfchar(data[u].lccode or u) + new[#new+1] = utfchar(lccodes[u]) end return concat(new) end @@ -782,7 +806,7 @@ end function characters.upper(str) local new = { } for u in utfvalues(str) do - new[#new+1] = utfchar(data[u].uccode or u) + new[#new+1] = utfchar(uccodes[u]) end return concat(new) end @@ -792,7 +816,7 @@ function characters.lettered(str) for u in utfvalues(str) do local d = data[u] if is_letter[d.category] then - new[#new+1] = utfchar(d.lccode or u) + new[#new+1] = utfchar(lccodes[u]) end end return concat(new) diff --git a/tex/context/base/cont-new.tex b/tex/context/base/cont-new.tex index e6c4c69e1..c9a207c34 100644 --- a/tex/context/base/cont-new.tex +++ b/tex/context/base/cont-new.tex @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2010.09.18 12:08} +\newcontextversion{2010.09.22 09:35} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/context.tex b/tex/context/base/context.tex index 2a1eea762..681fd7312 100644 --- a/tex/context/base/context.tex +++ b/tex/context/base/context.tex @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2010.09.18 12:08} +\edef\contextversion{2010.09.22 09:35} %D For those who want to use this: diff --git a/tex/context/base/l-table.lua b/tex/context/base/l-table.lua index 996e36a83..1bd9921cb 100644 --- a/tex/context/base/l-table.lua +++ b/tex/context/base/l-table.lua @@ -99,7 +99,7 @@ local function sortedkeys(tab) elseif tkey == "number" then -- if kind == 1 then kind = 3 else kind = 2 end kind = (kind == 1 and 3) or 2 - else + else -- if tkey then kind = 3 end end @@ -115,7 +115,9 @@ end local function sortedhashkeys(tab) -- fast one local srt = { } for key,_ in next, tab do - srt[#srt+1] = key + if key then + srt[#srt+1] = key + end end sort(srt) return srt diff --git a/tex/context/base/pack-box.mkiv b/tex/context/base/pack-box.mkiv index 09af1d633..c0d3741ff 100644 --- a/tex/context/base/pack-box.mkiv +++ b/tex/context/base/pack-box.mkiv @@ -497,9 +497,14 @@ \fi \advance\scratchdimen\@@bgvoffset \xdef\bleedheight{\the\scratchdimen}% + % + \bgroup \hsize\bleedwidth \vsize\bleedheight - \setbox\scratchbox\hbox{#2}% + \global\setbox\globalscratchbox\hbox{#2}% + \egroup + \setbox\scratchbox\box\globalscratchbox + % \doif\@@bgpage\v!yes {\setbox\scratchbox\topskippedbox{\box\scratchbox}}% \setbox\scratchbox\hbox to \@@bgwidth diff --git a/tex/context/base/page-sid.mkii b/tex/context/base/page-sid.mkii index 0848c9d8c..806acc6f1 100644 --- a/tex/context/base/page-sid.mkii +++ b/tex/context/base/page-sid.mkii @@ -906,7 +906,7 @@ \divide\!!counta \baselineskip \advance\!!counta \plusone \parskip\zeropoint - \dorecurse\!!counta{\line{}} + \dorecurse\!!counta{\line{}}% \kern-\!!counta\baselineskip \penalty\zerocount } diff --git a/tex/context/base/page-sid.mkiv b/tex/context/base/page-sid.mkiv index e54b4ecd5..9365f7508 100644 --- a/tex/context/base/page-sid.mkiv +++ b/tex/context/base/page-sid.mkiv @@ -770,7 +770,7 @@ \divide\!!counta \baselineskip \advance\!!counta \plusone \parskip\zeropoint - \dorecurse\!!counta{\line{}} + \dorecurse\!!counta{\line{}}% \kern-\!!counta\baselineskip \penalty\zerocount } diff --git a/tex/context/base/sort-ini.lua b/tex/context/base/sort-ini.lua index eebbb9aff..386f1a45f 100644 --- a/tex/context/base/sort-ini.lua +++ b/tex/context/base/sort-ini.lua @@ -15,8 +15,7 @@ local utf = unicode.utf8 local gsub, rep, sort, concat = string.gsub, string.rep, table.sort, table.concat local utfbyte, utfchar = utf.byte, utf.char local utfcharacters, utfvalues, strcharacters = string.utfcharacters, string.utfvalues, string.characters -local chardata = characters.data -local next, type, tonumber = next, type, tonumber +local next, type, tonumber, rawget, rawset = next, type, tonumber, rawget, rawset local allocate = utilities.storage.allocate @@ -24,48 +23,157 @@ local trace_tests = false trackers.register("sorters.tests", function(v) trace_ local report_sorters = logs.new("sorters") -local comparers = { } -local splitters = { } -local entries = allocate() -local mappings = allocate() -local replacements = allocate() -local ignoredoffset = 0x10000 -local replacementoffset = 0x10000 -local digitsoffset = 0x20000 -local digitsmaximum = 0xFFFFF +local comparers = { } +local splitters = { } +local definitions = allocate() +local tracers = allocate() +local ignoredoffset = 0x10000 -- frozen +local replacementoffset = 0x10000 -- frozen +local digitsoffset = 0x20000 -- frozen +local digitsmaximum = 0xFFFFF -- frozen + +local lccodes = characters.lccodes +local shcodes = characters.shcodes +local lcchars = characters.lcchars +local shchars = characters.shchars + +local variables = interfaces.variables sorters = { - comparers = comparers, - splitters = splitters, - entries = entries, - mappings = mappings, - replacements = replacements, - constants = { + comparers = comparers, + splitters = splitters, + definitions = definitions, + tracers = tracers, + constants = { ignoredoffset = ignoredoffset, replacementoffset = replacementoffset, digitsoffset = digitsoffset, digitsmaximum = digitsmaximum, + defaultlanguage = variables.default, + defaultmethod = variables.before, } } -local ssorters = sorters +local sorters = sorters +local constants = sorters.constants -local language, defaultlanguage = 'en', 'en' +local data, language, method +local replacements, mappings, entries, orders, lower, upper -local currentreplacements, currentmappings, currententries = { }, { }, { } +local mte = { + __index = function(t,k) + local el + if k then + local l = lower[k] or lcchars[k] + el = rawget(t,l) + end + if not el then + local l = shchars[k] + if l and l ~= k then + el = rawget(t,l) + if not el then + l = lower[k] or lcchars[l] + if l then + el = rawget(t,l) + end + end + end + el = el or k + end + -- rawset(t,k,el) also make a copy? + return el + end +} -function sorters.setlanguage(lang) - language = lang or language or defaultlanguage - currentreplacements = replacements[language] or replacements[defaultlanguage] or { } - currentmappings = mappings [language] or mappings [defaultlanguage] or { } - currententries = entries [language] or entries [defaultlanguage] or { } +local function preparetables(data) + local orders, lower, method, mappings = data.orders, data.lower, data.method, { } + for i=1,#orders do + local oi = orders[i] + mappings[oi] = 2*i + end + local delta = (method == variables.before or method == variables.first or method == variables.last) and -1 or 1 + local mtm = { + __index = function(t,k) + local n + if k then + local l = lower[k] or lcchars[k] + if l then + local ml = rawget(t,l) + if ml then + n = ml + delta -- first + end + end + if not n then + l = shchars[k] + if l and l ~= k then + local ml = rawget(t,l) + if ml then + n = ml -- first or last + else + l = lower[l] or lcchars[l] + if l then + local ml = rawget(t,l) + if ml then + n = ml + delta + end + end + end + end + end + if not n then + n = 0 + end + else + n = 0 + end + rawset(t,k,n) + return n + end + } + data.mappings = mappings + setmetatable(data.entries,mte) + setmetatable(data.mappings,mtm) + return mappings +end + +local function update() -- prepare parent chains, needed when new languages are added + for language, data in next, definitions do + local parent = data.parent or "default" + if language ~= "default" then + setmetatable(data,{ __index = definitions[parent] or definitions.default }) + end + data.language = language + data.parent = parent + data.mappings = { } -- free temp data + end +end + +local function setlanguage(l,m) + language = (l ~= "" and l) or constants.defaultlanguage + data = definitions[language or constants.defaultlanguage] or definitions[constants.defaultlanguage] + method = (m ~= "" and m) or data.method or constants.defaultmethod if trace_tests then - report_sorters("setting language '%s'",language) + report_sorters("setting language '%s', method '%s'",language,method) end - return currentreplacements, currentmappings, currententries + data.method = method + replacements = data.replacements + entries = data.entries + orders = data.orders + lower = data.lower + upper = data.upper + mappings = preparetables(data) + return data +end + +function sorters.update() + update() + setlanguage(language,method) -- resync current language and method end --- maybe inline code if it's too slow +function sorters.setlanguage(language,method) + update() + setlanguage(language,method) -- new language and method +end local function basicsort(sort_a,sort_b) if not sort_a or not sort_b then @@ -118,16 +226,27 @@ function comparers.basic(a,b) -- trace ea and eb local na, nb = #ea, #eb if na == 0 and nb == 0 then -- simple variant (single word) - local result = basicsort(ea.e,eb.e) - return (result == 0 and result) or basicsort(ea.m,eb.m) + local result = basicsort(ea.m,eb.m) + if result == 0 then + result = basicsort(ea.c,eb.c) + end + if result == 0 then + result = basicsort(ea.u,eb.u) + end + return result else -- complex variant, used in register (multiple words) local result = 0 for i=1,nb < na and nb or na do local eai, ebi = ea[i], eb[i] - result = basicsort(eai.e,ebi.e) if result == 0 then - result = basicsort(eai.m,ebi.m) -- only needed it there are m's + result = basicsort(eai.m,ebi.m) + end + if result == 0 then + result = basicsort(eai.c,ebi.c) + end + if result == 0 then + result = basicsort(eai.u,ebi.u) end if result ~= 0 then break @@ -177,59 +296,50 @@ local function firstofsplit(entry) split = split.s end local entry = split and split[1] or "" - return entry, currententries[entry] or "\000" + return entry, entries[entry] or "\000" end sorters.firstofsplit = firstofsplit --- beware, numbers get spaces in front - -function splitters.utf(str) - if #currentreplacements > 0 then - for k=1,#currentreplacements do - local v = currentreplacements[k] +function splitters.utf(str) -- we could append m and u but this is cleaner, s is for tracing + if #replacements > 0 then + -- todo make an lpeg for this + for k=1,#replacements do + local v = replacements[k] str = gsub(str,v[1],v[2]) end end - local s, e, m, n = { }, { }, { }, 0 - for sc in utfcharacters(str) do -- maybe an lpeg - local ec, mc = currententries[sc], currentmappings[sc] or utfbyte(sc) - n = n + 1 - s[n] = sc - e[n] = currentmappings[ec] or mc - m[n] = mc + local s, u, m, c, n = { }, { }, { }, { }, 0 + if method == variables.last then + for sc in utfcharacters(str) do + local b = utfbyte(sc) + local l = lower[sc] + l = l and utfbyte(l) or lccodes[b] + if l ~= b then l = l - 1 end -- brrrr, can clash + n = n + 1 + s[n], u[n], m[n], c[n] = sc, b, l, mappings[sc] + end + elseif method == variables.first then + for sc in utfcharacters(str) do + local b = utfbyte(sc) + local l = lower[sc] + l = l and utfbyte(l) or lccodes[b] + if l ~= b then l = l + 1 end -- brrrr, can clash + n = n + 1 + s[n], u[n], m[n], c[n] = sc, b, l, mappings[sc] + end + else + for sc in utfcharacters(str) do + local b = utfbyte(sc) + n = n + 1 + s[n], u[n], m[n], c[n] = sc, b, mappings[sc], b + end end - local t = { s = s, e = e, m = m } ---~ table.print(t) + local t = { s = s, m = m, u = u, c = c } + -- table.print(t) return t end --- we can use one array instead (sort of like in mkii) --- but for the moment we do it this way as it is more --- handy for tracing - --- function splitters.utf(str) --- if #currentreplacements > 0 then --- for k=1,#currentreplacements do --- local v = currentreplacements[k] --- str = gsub(str,v[1],v[2]) --- end --- end --- local s, e, m, n = { }, { }, { }, 0 --- for sc in utfcharacters(str) do -- maybe an lpeg --- local ec, mc = currententries[sc], currentmappings[sc] or utfbyte(sc) --- n = n + 1 --- ec = currentmappings[ec] or mc --- s[n] = sc --- e[n] = ec --- if ec ~= mc then --- n = n + 1 --- e[n] = mc --- end --- end --- return { s = s, e = e } --- end - function table.remap(t) local tt = { } for k,v in next, t do @@ -286,45 +396,3 @@ function sorters.sort(entries,cmp) end) end end - --- some day we can have a characters.upper and characters.lower - -function sorters.adduppercasereplacements(what) - local rep, new = replacements[what], { } - for i=1,#rep do - local r = rep[i] - local u = chardata[utfbyte(r[1])].uccode - if u then - new[utfchar(u)] = r[2] - end - end - for k, v in next, new do - rep[k] = v - end -end - -function sorters.adduppercaseentries(what) - local ent, new = entries[what], { } - for k, v in next, ent do - local u = chardata[utfbyte(k)].uccode - if u then - new[utfchar(u)] = v - end - end - for k, v in next, new do - ent[k] = v - end -end - -function sorters.adduppercasemappings(what,offset) - local map, new, offset = mappings[what], { }, offset or 0 - for k, v in next, map do - local u = chardata[utfbyte(k)].uccode - if u then - new[utfchar(u)] = v + offset - end - end - for k, v in next, new do - map[k] = v - end -end diff --git a/tex/context/base/sort-lan.lua b/tex/context/base/sort-lan.lua index 322af2598..338ba747b 100644 --- a/tex/context/base/sort-lan.lua +++ b/tex/context/base/sort-lan.lua @@ -7,758 +7,319 @@ if not modules then modules = { } end modules ['sort-lan'] = { } -- Many vectors were supplied by Wolfgang Schuster and Philipp --- Gesang. --- --- Replacements are indexed as they need to be applied in sequence --- --- Maybe we should load these tables runtime, just like patterns. - -local utf = unicode.utf8 -local uc = utf.char -local ub = utf.byte - -local sorters = sorters - -local mappings = sorters.mappings -local entries = sorters.entries -local replacements = sorters.replacements - -local adduppercasereplacements = sorters.adduppercasereplacements -local adduppercaseentries = sorters.adduppercaseentries -local adduppercasemappings = sorters.adduppercasemappings - -local replacementoffset = sorters.constants.replacementoffset +-- Gesang. However this is a quite adapted and reformatted variant +-- so it needs some checking. + +local utfchar = utf.char +local sorters = sorters +local definitions = sorters.definitions +local replacementoffset = sorters.constants.replacementoffset + +definitions["default"] = { + method = "before", + replacements = { + -- no replacements + }, + entries = { + ["a"] = "a", ["b"] = "b", ["c"] = "c", ["d"] = "d", ["e"] = "e", + ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", ["j"] = "j", + ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", + ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["t"] = "t", + ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", + ["z"] = "z", + }, + orders = { + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", + "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", + "u", "v", "w", "x", "y", "z", + }, + lower = { + -- no replacements + }, + upper = { + -- no replacements + } +} + +sorters.setlanguage("default") -- english -entries['en'] = { - ["a"] = "a", ["b"] = "b", ["c"] = "c", ["d"] = "d", ["e"] = "e", - ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", ["j"] = "j", - ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", - ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["t"] = "t", - ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", - ["z"] = "z", - ["A"] = "a", ["B"] = "b", ["C"] = "c", ["D"] = "d", ["E"] = "e", - ["F"] = "f", ["G"] = "g", ["H"] = "h", ["I"] = "i", ["J"] = "j", - ["K"] = "k", ["L"] = "l", ["M"] = "m", ["N"] = "n", ["O"] = "o", - ["P"] = "p", ["Q"] = "q", ["R"] = "r", ["S"] = "s", ["T"] = "t", - ["U"] = "u", ["V"] = "v", ["W"] = "w", ["X"] = "x", ["Y"] = "y", - ["Z"] = "z", -} - -mappings['en'] = { - ["a"] = 1, ["b"] = 3, ["c"] = 5, ["d"] = 7, ["e"] = 9, - ["f"] = 11, ["g"] = 13, ["h"] = 15, ["i"] = 17, ["j"] = 19, - ["k"] = 21, ["l"] = 23, ["m"] = 25, ["n"] = 27, ["o"] = 29, - ["p"] = 31, ["q"] = 33, ["r"] = 35, ["s"] = 37, ["t"] = 39, - ["u"] = 41, ["v"] = 43, ["w"] = 45, ["x"] = 47, ["y"] = 49, - ["z"] = 51, - ["A"] = 1, ["B"] = 3, ["C"] = 5, ["D"] = 7, ["E"] = 9, - ["F"] = 11, ["G"] = 13, ["H"] = 15, ["I"] = 17, ["J"] = 19, - ["K"] = 21, ["L"] = 23, ["M"] = 25, ["N"] = 27, ["O"] = 29, - ["P"] = 31, ["Q"] = 33, ["R"] = 35, ["S"] = 37, ["T"] = 39, - ["U"] = 41, ["V"] = 43, ["W"] = 45, ["X"] = 47, ["Y"] = 49, - ["Z"] = 51, - -- - -- uppercase after lowercase - -- - -- ["A"] = 2, ["B"] = 4, ["C"] = 6, ["D"] = 8, ["E"] = 10, - -- ["F"] = 12, ["G"] = 14, ["H"] = 16, ["I"] = 18, ["J"] = 20, - -- ["K"] = 22, ["L"] = 24, ["M"] = 26, ["N"] = 28, ["O"] = 30, - -- ["P"] = 32, ["Q"] = 34, ["R"] = 36, ["S"] = 38, ["T"] = 40, - -- ["U"] = 42, ["V"] = 44, ["W"] = 46, ["X"] = 48, ["Y"] = 50, - -- ["Z"] = 52, -} +definitions["en"] = { parent = "default" } -- dutch -replacements['nl'] = { { "ij", 'y' }, { "IJ", 'Y' } } -entries ['nl'] = entries ['en'] -mappings ['nl'] = mappings['en'] - ---~ -- czech (defined later) ---~ ---~ local cz_ch = uc(replacementoffset + 1) ---~ local cz_CH = uc(replacementoffset + 2) ---~ ---~ replacements['cz'] = { ---~ [1] = { "ch", cz_ch } ---~ } ---~ ---~ entries['cz'] = { ---~ ['a'] = "a", -- a ---~ [uc(0x00E1)] = "a", -- aacute ---~ ['b'] = "b", -- b ---~ ['c'] = "c", -- c ---~ [uc(0x010D)] = uc(0x010D), -- ccaron ---~ ['d'] = "d", -- d ---~ [uc(0x010F)] = "d", -- dcaron ---~ ['e'] = "e", -- e ---~ [uc(0x00E9)] = "e", -- eacute ---~ [uc(0x011B)] = "e", -- ecaron ---~ ['f'] = "f", -- f ---~ ['g'] = "g", -- g ---~ ['h'] = "h", -- h ---~ [cz_ch] = "ch", -- ch ---~ ['i'] = "i", -- i ---~ [uc(0x00ED)] = "i", -- iacute ---~ ['j'] = "j", -- j ---~ ['k'] = "k", -- k ---~ ['l'] = "l", -- l ---~ ['m'] = "m", -- m ---~ ['n'] = "n", -- n ---~ ['ň'] = "n", -- ncaron ---~ ['o'] = "o", -- o ---~ ['p'] = "p", -- p ---~ ['q'] = "q", -- q ---~ ['r'] = "r", -- r ---~ ['ř'] = "ř", -- rcaron ---~ ['s'] = "s", -- s ---~ [uc(0x0161)] = uc(0x0161), -- scaron ---~ ['t'] = "t", -- t ---~ [uc(0x0165)] = "t", -- tcaron ---~ ['u'] = "u", -- u ---~ [uc(0x00FA)] = "u", -- uacute ---~ [uc(0x016F)] = "u", -- uring ---~ ['v'] = "v", -- v ---~ ['w'] = "w", -- w ---~ ['x'] = "x", -- x ---~ ['y'] = "y", -- y ---~ [uc(0x00FD)] = "y", -- yacute ---~ ['z'] = "z", -- z ---~ [uc(0x017E)] = uc(0x017E), -- zcaron ---~ } ---~ ---~ mappings['cz'] = { ---~ ['a'] = 1, -- a ---~ [uc(0x00E1)] = 3, -- aacute ---~ ['b'] = 5, -- b ---~ ['c'] = 7, -- c ---~ [uc(0x010D)] = 9, -- ccaron ---~ ['d'] = 11, -- d ---~ [uc(0x010F)] = 13, -- dcaron ---~ ['e'] = 15, -- e ---~ [uc(0x00E9)] = 17, -- eacute ---~ [uc(0x011B)] = 19, -- ecaron ---~ ['f'] = 21, -- f ---~ ['g'] = 23, -- g ---~ ['h'] = 25, -- h ---~ [cz_ch] = 27, -- ch ---~ ['i'] = 29, -- i ---~ [uc(0x00ED)] = 31, -- iacute ---~ ['j'] = 33, -- j ---~ ['k'] = 35, -- k ---~ ['l'] = 37, -- l ---~ ['m'] = 39, -- m ---~ ['n'] = 41, -- n ---~ ['ň'] = 43, -- ncaron ---~ ['o'] = 45, -- o ---~ ['p'] = 47, -- p ---~ ['q'] = 49, -- q ---~ ['r'] = 51, -- r ---~ ['ř'] = 53, -- rcaron ---~ ['s'] = 55, -- s ---~ [uc(0x0161)] = 57, -- scaron ---~ ['t'] = 59, -- t ---~ [uc(0x0165)] = 61, -- tcaron ---~ ['u'] = 63, -- u ---~ [uc(0x00FA)] = 65, -- uacute ---~ [uc(0x016F)] = 67, -- uring ---~ ['v'] = 69, -- v ---~ ['w'] = 71, -- w ---~ ['x'] = 73, -- x ---~ ['y'] = 75, -- y ---~ [uc(0x00FD)] = 77, -- yacute ---~ ['z'] = 79, -- z ---~ [uc(0x017E)] = 81, -- zcaron ---~ } ---~ ---~ adduppercaseentries ("cz") ---~ adduppercasemappings("cz") -- 1 can be option (but then we need a runtime variant) ---~ ---~ entries ['cz'][cz_CH] = entries ['cz'][cz_ch] ---~ mappings['cz'][cz_CH] = mappings['cz'][cz_ch] ---~ ---~ replacements['cs'] = replacements['cz'] ---~ entries ['cs'] = entries ['cz'] ---~ mappings ['cs'] = mappings ['cz'] - ---~ print(table.serialize(mappings.cs)) +definitions['nl'] = { + parent = 'default', + replacements = { + { "ij", 'y' }, { "IJ", 'Y' }, + }, +} -- French -entries ['fr'] = entries ['en'] -mappings['fr'] = mappings['en'] +definitions['fr'] = { parent = 'default' } -- German (by Wolfgang Schuster) -- DIN 5007-1 -entries ['DIN 5007-1'] = entries ['en'] -mappings ['DIN 5007-1'] = mappings['en'] +definitions['DIN 5007-1'] = { parent = 'default' } -- DIN 5007-2 -replacements['DIN 5007-2'] = { -- todo: adduppercasereplacements - { "ä", 'ae' }, - { "ö", 'oe' }, - { "ü", 'ue' }, - { "Ä", 'Ae' }, - { "Ö", 'Oe' }, - { "Ü", 'Ue' }, +definitions['DIN 5007-2'] = { + parent = 'default', + replacements = { + { "ä", 'ae' }, { "Ä", 'Ae' }, + { "ö", 'oe' }, { "Ö", 'Oe' }, + { "ü", 'ue' }, { "Ü", 'Ue' }, + }, } ---~ adduppercasereplacements('DIN 5007-2') - -entries ['DIN 5007-2'] = entries ['en'] -mappings ['DIN 5007-2'] = mappings['en'] - -- Duden -replacements['Duden'] = { { "ß", 's' } } -entries ['Duden'] = entries ['en'] -mappings ['Duden'] = mappings['en'] - --- new german - -entries ['de'] = entries ['en'] -mappings ['de'] = mappings['en'] - --- old german - -entries ['deo'] = entries ['de'] -mappings ['deo'] = mappings['de'] - --- german - Germany - -entries ['de-DE'] = entries ['de'] -mappings ['de-DE'] = mappings['de'] - --- german - Swiss +definitions['Duden'] = { + parent = 'default', + replacements = { { "ß", 's' } }, +} -entries ['de-CH'] = entries ['de'] -mappings ['de-CH'] = mappings['de'] +definitions['de'] = { parent = 'default' } -- new german +definitions['deo'] = { parent = 'de' } -- old german +definitions['de-DE'] = { parent = 'de' } -- german - Germany +definitions['de-CH'] = { parent = 'de' } -- german - Swiss -- german - Austria -entries['de-AT'] = { - ["a"] = "a", ["ä"] = "ä", ["b"] = "b", ["c"] = "c", ["d"] = "d", - ["e"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", - ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", - ["o"] = "o", ["ö"] = "ö", ["p"] = "p", ["q"] = "q", ["r"] = "r", - ["s"] = "s", ["t"] = "t", ["u"] = "u", ["ü"] = "ü", ["v"] = "v", - ["w"] = "w", ["x"] = "x", ["y"] = "y", ["z"] = "z", --- ["A"] = "a", ["Ä"] = "ä", ["B"] = "b", ["C"] = "c", ["D"] = "d", --- ["E"] = "e", ["F"] = "f", ["G"] = "g", ["H"] = "h", ["I"] = "i", --- ["J"] = "j", ["K"] = "k", ["L"] = "l", ["M"] = "m", ["N"] = "n", --- ["O"] = "o", ["Ö"] = "ö", ["P"] = "p", ["Q"] = "q", ["R"] = "r", --- ["S"] = "s", ["T"] = "t", ["U"] = "u", ["Ü"] = "ü", ["V"] = "v", --- ["W"] = "w", ["X"] = "x", ["Y"] = "y", ["Z"] = "z", +definitions['de-AT'] = { + entries = { + ["a"] = "a", ["ä"] = "ä", ["b"] = "b", ["c"] = "c", ["d"] = "d", + ["e"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", + ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", + ["o"] = "o", ["ö"] = "ö", ["p"] = "p", ["q"] = "q", ["r"] = "r", + ["s"] = "s", ["t"] = "t", ["u"] = "u", ["ü"] = "ü", ["v"] = "v", + ["w"] = "w", ["x"] = "x", ["y"] = "y", ["z"] = "z", + }, + orders = { + "a", "ä", "b", "c", "d", "e", "f", "g", "h", "i", + "j", "k", "l", "m", "n", "o", "ö", "p", "q", "r", + "s", "t", "u", "ü", "v", "w", "x", "y", "z", + }, } -mappings['de-AT'] = { - ["a"] = 1, ["ä"] = 3, ["b"] = 5, ["c"] = 7, ["d"] = 9, - ["e"] = 11, ["f"] = 13, ["g"] = 15, ["h"] = 17, ["i"] = 19, - ["j"] = 21, ["k"] = 23, ["l"] = 25, ["m"] = 27, ["n"] = 29, - ["o"] = 31, ["ö"] = 33, ["p"] = 35, ["q"] = 37, ["r"] = 39, - ["s"] = 41, ["t"] = 43, ["u"] = 45, ["ü"] = 47, ["v"] = 49, - ["w"] = 51, ["x"] = 53, ["y"] = 55, ["z"] = 57, --- ["A"] = 2, ["Ä"] = 4, ["B"] = 6, ["C"] = 8, ["D"] = 10, --- ["E"] = 12, ["F"] = 14, ["G"] = 16, ["H"] = 18, ["I"] = 20, --- ["J"] = 22, ["K"] = 24, ["L"] = 26, ["M"] = 28, ["N"] = 30, --- ["O"] = 32, ["Ö"] = 34, ["P"] = 36, ["Q"] = 38, ["R"] = 40, --- ["S"] = 42, ["T"] = 44, ["U"] = 46, ["Ü"] = 48, ["V"] = 50, --- ["W"] = 52, ["X"] = 54, ["Y"] = 56, ["Z"] = 58, -} - -adduppercaseentries ('de-AT') -adduppercasemappings('de-AT',1) - -- finish (by Wolfgang Schuster) -entries['fi'] = { - ["a"] = "a", ["b"] = "b", ["c"] = "c", ["d"] = "d", ["e"] = "e", - ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", ["j"] = "j", - ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", - ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["t"] = "t", - ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", - ["z"] = "z", ["å"] = "å", ["ä"] = "ä", ["ö"] = "ö", +definitions['fi'] = { + entries = { + ["a"] = "a", ["b"] = "b", ["c"] = "c", ["d"] = "d", ["e"] = "e", + ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", ["j"] = "j", + ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", + ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["t"] = "t", + ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", + ["z"] = "z", ["å"] = "å", ["ä"] = "ä", ["ö"] = "ö", + }, + orders = { + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", + "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", + "u", "v", "w", "x", "y", "z", "å", "ä", "ö", + } +} + +-- slovenian by MM: this will change since we need to add accented vowels + +definitions['sl'] = { + entries = { + ["a"] = "a", ["b"] = "b", ["c"] = "c", ["č"] = "č", ["ć"] = "ć", ["d"] = "d", + ["đ"] = "đ", ["e"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", + ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", + ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["š"] = "š", ["t"] = "t", + ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", ["z"] = "z", + ["ž"] = "ž", + }, + orders = { + "a", "b", "c", "č", "ć", "d", "đ", "e", "f", "g", "h", "i", + "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "š", "t", + "u", "v", "w", "x", "y", "z", "ž", + } +} + +-- The following data was provided by Philipp Gesang. + +definitions["ru"] = { + entries = { + ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["д"] = "д", + ["е"] = "е", ["ё"] = "е", ["ж"] = "ж", ["з"] = "з", ["и"] = "и", + ["і"] = "и", ["й"] = "й", ["к"] = "к", ["л"] = "л", ["м"] = "м", + ["н"] = "н", ["о"] = "о", ["п"] = "п", ["р"] = "р", ["с"] = "с", + ["т"] = "т", ["у"] = "у", ["ф"] = "ф", ["х"] = "х", ["ц"] = "ц", + ["ч"] = "ч", ["ш"] = "ш", ["щ"] = "щ", ["ъ"] = "ъ", ["ы"] = "ы", + ["ь"] = "ь", ["ѣ"] = "ѣ", ["э"] = "э", ["ю"] = "ю", ["я"] = "я", + ["ѳ"] = "ѳ", ["ѵ"] = "ѵ", + }, + orders = { + "а", "б", "в", "г", "д", "е", "ё", "ж", "з", "и", + "і", "й", "к", "л", "м", "н", "о", "п", "р", "с", + "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ы", + "ь", "ѣ", "э", "ю", "я", "ѳ", "ѵ", + } } -mappings['fi'] = { - ["a"] = 1, ["b"] = 3, ["c"] = 5, ["d"] = 7, ["e"] = 9, - ["f"] = 11, ["g"] = 13, ["h"] = 15, ["i"] = 17, ["j"] = 19, - ["k"] = 21, ["l"] = 23, ["m"] = 25, ["n"] = 27, ["o"] = 29, - ["p"] = 31, ["q"] = 33, ["r"] = 35, ["s"] = 37, ["t"] = 39, - ["u"] = 41, ["v"] = 43, ["w"] = 45, ["x"] = 47, ["y"] = 49, - ["z"] = 51, ["å"] = 53, ["ä"] = 55, ["ö"] = 57, -} - -adduppercaseentries ("fi") -adduppercasemappings("fi") - --- slovenian --- --- MM: this will change since we need to add accented vowels - -entries['sl'] = { - ["a"] = "a", ["b"] = "b", ["c"] = "c", ["č"] = "č", ["ć"] = "ć", ["d"] = "d", - ["đ"] = "đ", ["e"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", - ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", - ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["š"] = "š", ["t"] = "t", - ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", ["z"] = "z", - ["ž"] = "ž", -} - -mappings['sl'] = { - ["a"] = 1, ["b"] = 3, ["c"] = 5, ["č"] = 7, ["ć"] = 9, ["d"] = 11, - ["đ"] = 13, ["e"] = 15, ["f"] = 17, ["g"] = 19, ["h"] = 21, ["i"] = 23, - ["j"] = 25, ["k"] = 27, ["l"] = 29, ["m"] = 31, ["n"] = 33, ["o"] = 35, - ["p"] = 37, ["q"] = 39, ["r"] = 41, ["s"] = 43, ["š"] = 45, ["t"] = 47, - ["u"] = 49, ["v"] = 51, ["w"] = 53, ["x"] = 55, ["y"] = 57, ["z"] = 59, - ["ž"] = 61, -} - -adduppercaseentries ("sl") -adduppercasemappings("sl") -- cf. MM - --- The following (quite some) languages were provided by Philipp --- Gesang (Phg), megas.kapaneus@gmail.com. - -replacements["ru"] = { --[[ None, do you miss any? ]] } - -entries["ru"] = { - ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["д"] = "д", - ["е"] = "е", ["ё"] = "е", ["ж"] = "ж", ["з"] = "з", ["и"] = "и", - ["і"] = "и", ["й"] = "й", ["к"] = "к", ["л"] = "л", ["м"] = "м", - ["н"] = "н", ["о"] = "о", ["п"] = "п", ["р"] = "р", ["с"] = "с", - ["т"] = "т", ["у"] = "у", ["ф"] = "ф", ["х"] = "х", ["ц"] = "ц", - ["ч"] = "ч", ["ш"] = "ш", ["щ"] = "щ", ["ъ"] = "ъ", ["ы"] = "ы", - ["ь"] = "ь", ["ѣ"] = "ѣ", ["э"] = "э", ["ю"] = "ю", ["я"] = "я", - ["ѳ"] = "ѳ", ["ѵ"] = "ѵ", -} - -mappings["ru"] = { - ["а"] = 1, ["б"] = 2, ["в"] = 3, ["г"] = 4, ["д"] = 5, - ["е"] = 6, ["ё"] = 6, ["ж"] = 7, ["з"] = 8, ["и"] = 9, - ["і"] = 9, ["й"] = 10, ["к"] = 11, ["л"] = 12, ["м"] = 13, - ["н"] = 14, ["о"] = 15, ["п"] = 16, ["р"] = 17, ["с"] = 18, - ["т"] = 19, ["у"] = 20, ["ф"] = 21, ["х"] = 22, ["ц"] = 23, - ["ч"] = 24, ["ш"] = 25, ["щ"] = 26, ["ъ"] = 27, ["ы"] = 28, - ["ь"] = 29, ["ѣ"] = 30, ["э"] = 31, ["ю"] = 32, ["я"] = 33, - ["ѳ"] = 34, ["ѵ"] = 35, -} - -adduppercaseentries ("ru") -adduppercasemappings("ru") - --- Basic Ukrainian -replacements["uk"] = { --[[ None, do you miss any? ]] } - -entries["uk"] = { - ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["ґ"] = "ґ", - ["д"] = "д", ["е"] = "е", ["є"] = "є", ["ж"] = "ж", ["з"] = "з", - ["и"] = "и", ["і"] = "і", ["ї"] = "ї", ["й"] = "й", ["к"] = "к", - ["л"] = "л", ["м"] = "м", ["н"] = "н", ["о"] = "о", ["п"] = "п", - ["р"] = "р", ["с"] = "с", ["т"] = "т", ["у"] = "у", ["ф"] = "ф", - ["х"] = "х", ["ц"] = "ц", ["ч"] = "ч", ["ш"] = "ш", ["щ"] = "щ", - ["ь"] = "ь", ["ю"] = "ю", ["я"] = "я", +definitions["uk"] = { + entries = { + ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["ґ"] = "ґ", + ["д"] = "д", ["е"] = "е", ["є"] = "є", ["ж"] = "ж", ["з"] = "з", + ["и"] = "и", ["і"] = "і", ["ї"] = "ї", ["й"] = "й", ["к"] = "к", + ["л"] = "л", ["м"] = "м", ["н"] = "н", ["о"] = "о", ["п"] = "п", + ["р"] = "р", ["с"] = "с", ["т"] = "т", ["у"] = "у", ["ф"] = "ф", + ["х"] = "х", ["ц"] = "ц", ["ч"] = "ч", ["ш"] = "ш", ["щ"] = "щ", + ["ь"] = "ь", ["ю"] = "ю", ["я"] = "я", + }, + orders = { + "а", "б", "в", "г", "ґ", "д", "е", "є", "ж", "з", "и", "і", + "ї", "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", "у", + "ф", "х", "ц", "ч", "ш", "щ", "ь", "ю", "я", + } } -mappings["uk"] = { - ["а"] = 1, ["б"] = 2, ["в"] = 3, ["г"] = 4, ["ґ"] = 5, - ["д"] = 6, ["е"] = 7, ["є"] = 8, ["ж"] = 9, ["з"] = 10, - ["и"] = 11, ["і"] = 12, ["ї"] = 13, ["й"] = 14, ["к"] = 15, - ["л"] = 16, ["м"] = 17, ["н"] = 18, ["о"] = 19, ["п"] = 20, - ["р"] = 21, ["с"] = 22, ["т"] = 23, ["у"] = 24, ["ф"] = 25, - ["х"] = 26, ["ц"] = 27, ["ч"] = 28, ["ш"] = 29, ["щ"] = 30, - ["ь"] = 31, ["ю"] = 32, ["я"] = 33, -} - -adduppercaseentries ("uk") -adduppercasemappings("uk") - --- Belarusian -replacements["be"] = { --[[ None, do you miss any? ]] } - -entries["be"] = { - ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["д"] = "д", - ["е"] = "е", ["ё"] = "е", ["ж"] = "ж", ["з"] = "з", ["і"] = "і", - ["й"] = "й", ["к"] = "к", ["л"] = "л", ["м"] = "м", ["н"] = "н", - ["о"] = "о", ["п"] = "п", ["р"] = "р", ["с"] = "с", ["т"] = "т", - ["у"] = "у", ["ў"] = "ў", ["ф"] = "ф", ["х"] = "х", ["ц"] = "ц", - ["ч"] = "ч", ["ш"] = "ш", ["ы"] = "ы", ["ь"] = "ь", ["э"] = "э", - ["ю"] = "ю", ["я"] = "я", +definitions["be"] = { + entries = { + ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["д"] = "д", + ["е"] = "е", ["ё"] = "е", ["ж"] = "ж", ["з"] = "з", ["і"] = "і", + ["й"] = "й", ["к"] = "к", ["л"] = "л", ["м"] = "м", ["н"] = "н", + ["о"] = "о", ["п"] = "п", ["р"] = "р", ["с"] = "с", ["т"] = "т", + ["у"] = "у", ["ў"] = "ў", ["ф"] = "ф", ["х"] = "х", ["ц"] = "ц", + ["ч"] = "ч", ["ш"] = "ш", ["ы"] = "ы", ["ь"] = "ь", ["э"] = "э", + ["ю"] = "ю", ["я"] = "я", + }, + orders = { + "а", "б", "в", "г", "д", "е", "ё", "ж", "з", "і", + "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", + "у", "ў", "ф", "х", "ц", "ч", "ш", "ы", "ь", "э", + "ю", "я", + } } -mappings["be"] = { - ["а"] = 1, ["б"] = 2, ["в"] = 3, ["г"] = 4, ["д"] = 5, - ["е"] = 6, ["ё"] = 6, ["ж"] = 7, ["з"] = 8, ["і"] = 9, - ["й"] = 10, ["к"] = 11, ["л"] = 12, ["м"] = 13, ["н"] = 14, - ["о"] = 15, ["п"] = 16, ["р"] = 17, ["с"] = 18, ["т"] = 19, - ["у"] = 20, ["ў"] = 21, ["ф"] = 22, ["х"] = 23, ["ц"] = 24, - ["ч"] = 25, ["ш"] = 26, ["ы"] = 27, ["ь"] = 28, ["э"] = 29, - ["ю"] = 30, ["я"] = 31, -} - -adduppercaseentries ("be") -adduppercasemappings("be") - --- Bulgarian -replacements["bg"] = { --[[ None, do you miss any? ]] } - -entries["bg"] = { - ["а"] = "а", - ["б"] = "б", - ["в"] = "в", - ["г"] = "г", - ["д"] = "д", - ["е"] = "е", - ["ж"] = "ж", - ["з"] = "з", - ["и"] = "и", - ["й"] = "й", - ["к"] = "к", - ["a"] = "a", - ["л"] = "л", - ["a"] = "a", - ["м"] = "м", - ["н"] = "н", - ["о"] = "о", - ["п"] = "п", - ["р"] = "р", - ["с"] = "с", - ["т"] = "т", - ["у"] = "у", - ["ф"] = "ф", - ["х"] = "х", - ["ц"] = "ц", - ["ч"] = "ч", - ["ш"] = "ш", - ["щ"] = "щ", - ["ъ"] = "ъ", - ["ь"] = "ь", - ["ю"] = "ю", - ["я"] = "я", -} - -mappings["bg"] = { - ["а"] = 1, - ["б"] = 2, - ["в"] = 3, - ["г"] = 4, - ["д"] = 5, - ["е"] = 6, - ["ж"] = 7, - ["з"] = 8, - ["и"] = 9, - ["й"] = 10, - ["к"] = 11, - ["a"] = 12, - ["л"] = 13, - ["a"] = 14, - ["м"] = 15, - ["н"] = 16, - ["о"] = 17, - ["п"] = 18, - ["р"] = 19, - ["с"] = 20, - ["т"] = 21, - ["у"] = 22, - ["ф"] = 23, - ["х"] = 24, - ["ц"] = 25, - ["ч"] = 26, - ["ш"] = 27, - ["щ"] = 28, - ["ъ"] = 29, - ["ь"] = 30, - ["ю"] = 31, - ["я"] = 32, +definitions["bg"] = { + entries = { + ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["д"] = "д", + ["е"] = "е", ["ж"] = "ж", ["з"] = "з", ["и"] = "и", ["й"] = "й", + ["к"] = "к", ["a"] = "a", ["л"] = "л", ["a"] = "a", ["м"] = "м", + ["н"] = "н", ["о"] = "о", ["п"] = "п", ["р"] = "р", ["с"] = "с", + ["т"] = "т", ["у"] = "у", ["ф"] = "ф", ["х"] = "х", ["ц"] = "ц", + ["ч"] = "ч", ["ш"] = "ш", ["щ"] = "щ", ["ъ"] = "ъ", ["ь"] = "ь", + ["ю"] = "ю", ["я"] = "я", + }, + orders = { + "а", "б", "в", "г", "д", "е", "ж", "з","и", "й", + "к", "a", "л", "a", "м", "н", "о", "п", "р", "с", + "т", "у", "ф", "х", "ц", "ч", "ш", "щ", "ъ", "ь", + "ю", "я", + } } -adduppercaseentries ("bg") -adduppercasemappings("bg") - --- Old Church Slavonic -- The language symbol “cu” is taken from the Wikipedia subdomain -- cu.wikipedia.org. -local cu_uk = uc(replacementoffset + 1) -local cu_UK = uc(replacementoffset + 2) - -replacements["cu"] = { - [1] = { "оу", cu_uk }, -} - -entries["cu"] = { - ["а"] = "а", - ["б"] = "б", - ["в"] = "в", - ["г"] = "г", - ["д"] = "д", - ["є"] = "є", - ["ж"] = "ж", - ["ѕ"] = "ѕ", - ["ꙃ"] = "ѕ", -- Dzělo, U+0292, alternative: dz U+01f3 - ["з"] = "з", - ["ꙁ"] = "з", -- Zemlja - ["и"] = "и", - ["і"] = "и", - ["ї"] = "и", - ["ћ"] = "ћ", - ["к"] = "к", - ["л"] = "л", - ["м"] = "м", - ["н"] = "н", - ["о"] = "о", - ["п"] = "п", - ["р"] = "р", - ["с"] = "с", - ["т"] = "т", - ["у"] = "у", - ["ѹ"] = "у", -- U+0478 uk, horizontal ligature - ["ꙋ"] = "у", -- U+0479 uk, vertical ligature - [cu_uk] = "у", - ["ф"] = "ф", - ["х"] = "х", - ["ѡ"] = "ѡ", --"ō" - ["ѿ"] = "ѡ", -- U+047f \ - ["ѽ"] = "ѡ", -- U+047d > Omega variants - ["ꙍ"] = "ѡ", -- U+064D / - ["ц"] = "ц", - ["ч"] = "ч", - ["ш"] = "ш", - ["щ"] = "щ", - ["ъ"] = "ъ", - ["ы"] = "ы", - ["ꙑ"] = "ы", -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. - ["ь"] = "ь", - ["ѣ"] = "ѣ", - ["ю"] = "ю", - ["ꙗ"] = "ꙗ", -- IOTIFIED A - ["ѥ"] = "ѥ", - ["ѧ"] = "ѧ", - ["ѩ"] = "ѩ", - ["ѫ"] = "ѫ", - ["ѭ"] = "ѭ", - ["ѯ"] = "ѯ", - ["ѱ"] = "ѱ", - ["ѳ"] = "ѳ", - ["ѵ"] = "ѵ", - ["ѷ"] = "ѵ", -- Why does this even have its own codepoint???? -} - -mappings["cu"] = { - ["а"] = 1, - ["б"] = 2, - ["в"] = 3, - ["г"] = 4, - ["д"] = 5, - ["є"] = 6, - ["ж"] = 7, - ["ѕ"] = 8, - ["ꙃ"] = 8, -- Dzělo, U+0292, alternative: dz U+01f3 - ["з"] = 9, - ["ꙁ"] = 9, -- Zemlja - ["и"] = 10, - ["і"] = 10, - ["ї"] = 10, - ["ћ"] = 11, - ["к"] = 12, - ["л"] = 13, - ["м"] = 14, - ["н"] = 15, - ["о"] = 16, - ["п"] = 17, - ["р"] = 18, - ["с"] = 19, - ["т"] = 20, - ["у"] = 21, - ["ѹ"] = 21, -- U+0478 uk, horizontal ligature - ["ꙋ"] = 21, -- U+0479 uk, vertical ligature - [cu_uk] = 21, - ["ф"] = 22, - ["х"] = 23, - ["ѡ"] = 24, --"ō" - ["ѿ"] = 24, -- U+047f \ - ["ѽ"] = 24, -- U+047d > Omega variants - ["ꙍ"] = 24, -- U+064D / - ["ц"] = 25, - ["ч"] = 26, - ["ш"] = 27, - ["щ"] = 28, - ["ъ"] = 29, - ["ы"] = 30, - ["ꙑ"] = 30, -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. - ["ь"] = 31, - ["ѣ"] = 32, - ["ю"] = 33, - ["ꙗ"] = 34, -- IOTIFIED A - ["ѥ"] = 35, - ["ѧ"] = 36, - ["ѩ"] = 37, - ["ѫ"] = 38, - ["ѭ"] = 39, - ["ѯ"] = 40, - ["ѱ"] = 41, - ["ѳ"] = 42, - ["ѵ"] = 43, - ["ѷ"] = 43, -- Why does this even have its own codepoint???? -} - -adduppercaseentries ("cu") -adduppercasemappings("cu") - -entries ["cu"] [cu_UK] = entries ["cu"] [cu_uk] -mappings["cu"] [cu_UK] = mappings["cu"] [cu_uk] - ---- Polish (including the letters q, v, x) - --- Cf. ftp://ftp.gust.org.pl/pub/GUST/bulletin/03/02-bl.pdf. - -replacements["pl"] = { - -- none -} - -entries["pl"] = { - ["a"] = "a", ["ą"] = "ą", ["b"] = "b", ["c"] = "c", ["ć"] = "ć", - ["d"] = "d", ["e"] = "e", ["ę"] = "ę", ["f"] = "f", ["g"] = "g", - ["h"] = "h", ["i"] = "i", ["j"] = "j", ["k"] = "k", ["l"] = "l", - ["ł"] = "ł", ["m"] = "m", ["n"] = "n", ["ń"] = "ń", ["o"] = "o", - ["ó"] = "ó", ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", - ["ś"] = "ś", ["t"] = "t", ["u"] = "u", ["v"] = "v", ["w"] = "w", - ["x"] = "x", ["y"] = "y", ["z"] = "z", ["ź"] = "ź", ["ż"] = "ż", -} - -mappings["pl"] = { - ["a"] = 1, ["ą"] = 2, ["b"] = 3, ["c"] = 4, ["ć"] = 5, - ["d"] = 6, ["e"] = 7, ["ę"] = 8, ["f"] = 9, ["g"] = 10, - ["h"] = 11, ["i"] = 12, ["j"] = 13, ["k"] = 14, ["l"] = 15, - ["ł"] = 16, ["m"] = 17, ["n"] = 18, ["ń"] = 19, ["o"] = 20, - ["ó"] = 21, ["p"] = 22, ["q"] = 23, ["r"] = 24, ["s"] = 25, - ["ś"] = 26, ["t"] = 27, ["u"] = 28, ["v"] = 29, ["w"] = 30, - ["x"] = 31, ["y"] = 32, ["z"] = 33, ["ź"] = 34, ["ż"] = 35, -} - -adduppercaseentries ("pl") -adduppercasemappings("pl") - ---- Czech --- Modified to treat quantities and other secondary characteristics indifferently. --- Cf. --- (2.4.3; via ). - -local cz_ch = uc(replacementoffset + 1) -local cz_CH = uc(replacementoffset + 2) -- Is this actually used somewhere (e.g. with “adduppercaseentries”)? - -replacements["cz"] = { - [1] = { "ch", cz_ch } -} - -entries["cz"] = { - ["a"] = "a", -- a - ["á"] = "a", -- aacute - ["b"] = "b", -- b - ["c"] = "c", -- c - ["č"] = "č", -- ccaron - ["d"] = "d", -- d - ["ď"] = "d", -- dcaron - ["e"] = "e", -- e - ["é"] = "e", -- eacute - ["ě"] = "e", -- ecaron - ["f"] = "f", -- f - ["g"] = "g", -- g - ["h"] = "h", -- h - [cz_ch] = "ch", -- ch - ["i"] = "i", -- i - ["í"] = "i", -- iacute - ["j"] = "j", -- j - ["k"] = "k", -- k - ["l"] = "l", -- l - ["m"] = "m", -- m - ["n"] = "n", -- n - ["ň"] = "n", -- ncaron - ["o"] = "o", -- o - ["ó"] = "o", -- ó - ["p"] = "p", -- p - ["q"] = "q", -- q - ["r"] = "r", -- r - ["ř"] = "ř", -- rcaron - ["s"] = "s", -- s - ["š"] = "š", -- scaron - ["t"] = "t", -- t - ["ť"] = "t", -- tcaron - ["u"] = "u", -- u - ["ú"] = "u", -- uacute - ["ů"] = "u", -- uring - ["v"] = "v", -- v - ["w"] = "w", -- w - ["x"] = "x", -- x - ["y"] = "y", -- y - ["ý"] = "y", -- yacute - ["z"] = "z", -- z - ["ž"] = "ž", -- zcaron -} - -mappings["cz"] = { - ["a"] = 1, -- a - ["á"] = 1, -- aacute -> a - ["b"] = 2, -- b - ["c"] = 3, -- c - ["č"] = 4, -- ccaron - ["d"] = 5, -- d - ["ď"] = 5, -- dcaron -> ď - ["e"] = 6, -- e - ["é"] = 6, -- eacute -> e - ["ě"] = 6, -- ecaron -> e - ["f"] = 7, -- f - ["g"] = 8, -- g - ["h"] = 9, -- h - [cz_ch] = 10, -- ch - ["i"] = 11, -- i - ["í"] = 11, -- iacute -> i - ["j"] = 12, -- j - ["k"] = 13, -- k - ["l"] = 14, -- l - ["m"] = 15, -- m - ["n"] = 16, -- n - ["ň"] = 16, -- ncaron -> n - ["o"] = 17, -- o - ["ó"] = 17, -- o -> o - ["p"] = 18, -- p - ["q"] = 19, -- q - ["r"] = 20, -- r - ["ř"] = 21, -- rcaron - ["s"] = 22, -- s - ["š"] = 23, -- scaron - ["t"] = 24, -- t - ["ť"] = 24, -- tcaron -> t - ["u"] = 25, -- u - ["ú"] = 25, -- uacute -> u - ["ů"] = 25, -- uring -> u - ["v"] = 26, -- v - ["w"] = 27, -- w - ["x"] = 28, -- x - ["y"] = 29, -- y - ["ý"] = 29, -- yacute -> y - ["z"] = 30, -- z - ["ž"] = 31, -- zcaron Checksum: 42 -} - -adduppercaseentries ("cz") -adduppercasemappings("cz") -- 1 can be option (but then we need a runtime variant) - -entries ["cz"][cz_CH] = entries ["cz"][cz_ch] -mappings["cz"][cz_CH] = mappings["cz"][cz_ch] - -replacements["cs"] = replacements["cz"] -entries ["cs"] = entries ["cz"] -mappings ["cs"] = mappings ["cz"] +local uk, UK = utfchar(replacementoffset + 1), utfchar(replacementoffset + 11) + +definitions["cu"] = { + replacements = { + { "оу", uk }, { "ОУ", UK }, + }, + entries = { + ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["д"] = "д", + ["є"] = "є", ["ж"] = "ж", ["ѕ"] = "ѕ", ["ꙃ"] = "ѕ", ["з"] = "з", + ["ꙁ"] = "з", ["и"] = "и", ["і"] = "и", ["ї"] = "и", ["ћ"] = "ћ", + ["к"] = "к", ["л"] = "л", ["м"] = "м", ["н"] = "н", ["о"] = "о", + ["п"] = "п", ["р"] = "р", ["с"] = "с", ["т"] = "т", ["у"] = "у", + ["ѹ"] = "у", ["ꙋ"] = "у", [uk] = "у", ["ф"] = "ф", ["х"] = "х", + ["ѡ"] = "ѡ", ["ѿ"] = "ѡ", ["ѽ"] = "ѡ", ["ꙍ"] = "ѡ", ["ц"] = "ц", + ["ч"] = "ч", ["ш"] = "ш", ["щ"] = "щ", ["ъ"] = "ъ", ["ы"] = "ы", + ["ꙑ"] = "ы", ["ь"] = "ь", ["ѣ"] = "ѣ", ["ю"] = "ю", ["ꙗ"] = "ꙗ", + ["ѥ"] = "ѥ", ["ѧ"] = "ѧ", ["ѩ"] = "ѩ", ["ѫ"] = "ѫ", ["ѭ"] = "ѭ", + ["ѯ"] = "ѯ", ["ѱ"] = "ѱ", ["ѳ"] = "ѳ", ["ѵ"] = "ѵ", ["ѷ"] = "ѵ", + }, + orders = { + "а", "б", "в", "г", "д", "є", "ж", "ѕ", "ꙃ", "з", -- Dzělo, U+0292, alternative: dz U+01f3 + "ꙁ", "и", "і", "ї", "ћ", "к", "л", "м", "н", "о", -- Zemlja + "п", "р", "с", "т", "у", "ѹ", "ꙋ", uk, "ф", "х", -- U+0478 uk, horizontal ligature, U+0479 uk, vertical ligature + "ѡ", "ѿ", "ѽ", "ꙍ", "ц", "ч", "ш", "щ", "ъ", "ы", -- "ō", U+047f \, U+047d > Omega variants, U+064D / + "ꙑ", "ь", "ѣ", "ю", "ꙗ", "ѥ", "ѧ", "ѩ", "ѫ", "ѭ", -- Old jery (U+a651) as used e.g. by the OCS Wikipedia. IOTIFIED A + "ѯ", "ѱ", "ѳ", "ѵ", "ѷ", + }, + upper = { + uk = UK, + }, + lower = { + UK = uk, + } +} + +--- Polish (including the letters q, v, x) Cf. ftp://ftp.gust.org.pl/pub/GUST/bulletin/03/02-bl.pdf. + +definitions["pl"] = { + entries = { + ["a"] = "a", ["ą"] = "ą", ["b"] = "b", ["c"] = "c", ["ć"] = "ć", + ["d"] = "d", ["e"] = "e", ["ę"] = "ę", ["f"] = "f", ["g"] = "g", + ["h"] = "h", ["i"] = "i", ["j"] = "j", ["k"] = "k", ["l"] = "l", + ["ł"] = "ł", ["m"] = "m", ["n"] = "n", ["ń"] = "ń", ["o"] = "o", + ["ó"] = "ó", ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", + ["ś"] = "ś", ["t"] = "t", ["u"] = "u", ["v"] = "v", ["w"] = "w", + ["x"] = "x", ["y"] = "y", ["z"] = "z", ["ź"] = "ź", ["ż"] = "ż", + }, + orders = { + "a", "ą", "b", "c", "ć", "d", "e", "ę", "f", "g", + "h", "i", "j", "k", "l", "ł", "m", "n", "ń", "o", + "ó", "p", "q", "r", "s", "ś", "t", "u", "v", "w", + "x", "y", "z", "ź", "ż", + }, +} + +-- Czech, modified to treat quantities and other secondary characteristics indifferently. Cf. +-- http://racek.vlada.cz/usneseni/usneseni_webtest.nsf/WebGovRes/0AD8FEF4CC04B7A4C12571B6006D69D0?OpenDocument +-- (2.4.3; via ) + +local ch, CH = utfchar(replacementoffset + 1), utfchar(replacementoffset + 11) + +definitions["cz"] = { + replacements = { + { "ch", ch }, { "CH", CH } + }, + entries = { + ["a"] = "a", ["á"] = "a", ["b"] = "b", ["c"] = "c", ["č"] = "č", + ["d"] = "d", ["ď"] = "d", ["e"] = "e", ["é"] = "e", ["ě"] = "e", + ["f"] = "f", ["g"] = "g", ["h"] = "h", [ch] = "ch", ["i"] = "i", + ["í"] = "i", ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", + ["n"] = "n", ["ň"] = "n", ["o"] = "o", ["ó"] = "o", ["p"] = "p", + ["q"] = "q", ["r"] = "r", ["ř"] = "ř", ["s"] = "s", ["š"] = "š", + ["t"] = "t", ["ť"] = "t", ["u"] = "u", ["ú"] = "u", ["ů"] = "u", + ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", ["ý"] = "y", + ["z"] = "z", ["ž"] = "ž", + }, + orders = { + "a", "á", "b", "c", "č", "d", "ď", "e", "é", "ě", + "f", "g", "h", ch, "i", "í", "j", "k", "l", "m", + "n", "ň", "o", "ó", "p", "q", "r", "ř", "s", "š", + "t", "ť", "u", "ú", "ů", "v", "w", "x", "y", "ý", + "z", "ž", + }, + upper = { + ch = CH, + }, + lower = { + CH = ch, + } +} + +definitions["cs"] = { parent = "cz" } --- Slovak. @@ -767,1695 +328,461 @@ mappings ["cs"] = mappings ["cz"] -- to affect collation order, then use the values given in the comments. We could -- define an additional vector for that. -local sk_dz = uc(replacementoffset + 1) -local sk_DZ = uc(replacementoffset + 2) -local sk_dzh = uc(replacementoffset + 3) -local sk_DZH = uc(replacementoffset + 4) -local sk_ch = uc(replacementoffset + 5) -local sk_CH = uc(replacementoffset + 6) - -replacements["sk"] = { - [1] = { "dz", sk_dz }, - [2] = { "dž", sk_dzh }, - [3] = { "ch", sk_ch }, +local dz, DZ = utfchar(replacementoffset + 1), utfchar(replacementoffset + 11) +local dzh, DZH = utfchar(replacementoffset + 2), utfchar(replacementoffset + 12) +local ch, CH = utfchar(replacementoffset + 3), utfchar(replacementoffset + 13) + +definitions["sk"] = { + replacements = { + { "dz", dz }, { "dz", DZ }, + { "dž", dzh }, { "dž", DZH }, + { "ch", ch }, { "ch", CH }, + }, + entries = { + ["a"] = "a", ["á"] = "a", ["ä"] = "a", ["b"] = "b", ["c"] = "c", + ["č"] = "č", ["d"] = "d", ["ď"] = "d", [dz] = "dz", [dzh] = "dž", + ["e"] = "e", ["é"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", + [ch] = "ch", ["i"] = "i", ["í"] = "i", ["j"] = "j", ["k"] = "k", + ["l"] = "l", ["ĺ"] = "l", ["ľ"] = "l", ["m"] = "m", ["n"] = "n", + ["ň"] = "n", ["o"] = "o", ["ó"] = "o", ["ô"] = "o", ["p"] = "p", + ["q"] = "q", ["r"] = "r", ["ŕ"] = "r", ["s"] = "s", ["š"] = "š", + ["t"] = "t", ["ť"] = "t", ["u"] = "u", ["ú"] = "u", ["v"] = "v", + ["w"] = "w", ["x"] = "x", ["y"] = "y", ["ý"] = "y", ["z"] = "z", + ["ž"] = "ž", + }, + orders = { + "a", "á", "ä", "b", "c", "č", "d", "ď", dz, dzh, + "e", "é", "f", "g", "h", ch, "i", "í", "j", "k", + "l", "ĺ", "ľ", "m", "n", "ň", "o", "ó", "ô", "p", + "q", "r", "ŕ", "s", "š", "t", "ť", "u", "ú", "v", + "w", "x", "y", "ý", "z", "ž", + }, + upper = { + dz = DZ, dzh = DZH, ch = CH, + }, + lower = { + DZ = dz, DZH = dzh, CH = ch, + } } -entries["sk"] = { - ["a"] = "a", - ["á"] = "a", -- "á", - ["ä"] = "a", -- "ä", - ["b"] = "b", - ["c"] = "c", - ["č"] = "č", - ["d"] = "d", - ["ď"] = "d", -- "ď", - [sk_dz] = "dz", - [sk_dzh] = "dž", - ["e"] = "e", - ["é"] = "e", -- "é", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - [sk_ch] = "ch", - ["i"] = "i", - ["í"] = "i", -- "í", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["ĺ"] = "l", -- "ĺ", - ["ľ"] = "l", -- "ľ", - ["m"] = "m", - ["n"] = "n", - ["ň"] = "n", -- "ň", - ["o"] = "o", - ["ó"] = "o", -- "ó", - ["ô"] = "o", -- "ô", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["ŕ"] = "r", -- "ŕ", - ["s"] = "s", - ["š"] = "š", - ["t"] = "t", - ["ť"] = "t", -- "ť", - ["u"] = "u", - ["ú"] = "u", -- "ú", - ["v"] = "v", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["ý"] = "y", -- "ý", - ["z"] = "z", - ["ž"] = "ž", -} - -mappings["sk"] = { - ["a"] = 1, - ["á"] = 1, -- 2, - ["ä"] = 1, -- 3, - ["b"] = 4, - ["c"] = 5, - ["č"] = 6, - ["d"] = 7, - ["ď"] = 7, -- 8, - [sk_dz] = 9, - [sk_dzh] = 10, - ["e"] = 11, - ["é"] = 11, -- 12, - ["f"] = 13, - ["g"] = 14, - ["h"] = 15, - [sk_ch] = 16, - ["i"] = 17, - ["í"] = 17, -- 18, - ["j"] = 19, - ["k"] = 20, - ["l"] = 21, - ["ĺ"] = 21, -- 22, - ["ľ"] = 21, -- 23, - ["m"] = 24, - ["n"] = 25, - ["ň"] = 25, -- 26, - ["o"] = 27, - ["ó"] = 27, -- 28, - ["ô"] = 27, -- 29, - ["p"] = 30, - ["q"] = 31, - ["r"] = 32, - ["ŕ"] = 32, -- 33, - ["s"] = 34, - ["š"] = 35, - ["t"] = 36, - ["ť"] = 36, -- 37, - ["u"] = 38, - ["ú"] = 38, -- 39, - ["v"] = 40, - ["w"] = 41, - ["x"] = 42, - ["y"] = 43, - ["ý"] = 43, -- 44, - ["z"] = 45, - ["ž"] = 46, -- Checksum: 46, přesně! -} - -adduppercaseentries ("sk") -adduppercasemappings("sk") - -entries ["sk"] [sk_DZ] = entries ["sk"] [sk_dz] -mappings["sk"] [sk_DZ] = mappings["sk"] [sk_dz] -entries ["sk"][sk_DZH] = entries ["sk"][sk_dzh] -mappings["sk"][sk_DZH] = mappings["sk"][sk_dzh] -entries ["sk"] [sk_CH] = entries ["sk"] [sk_ch] -mappings["sk"] [sk_CH] = mappings["sk"] [sk_ch] - --- Croatian -local hr_dzh = uc(replacementoffset + 1) -local hr_DZH = uc(replacementoffset + 2) -local hr_lj = uc(replacementoffset + 3) -local hr_LJ = uc(replacementoffset + 4) -local hr_nj = uc(replacementoffset + 5) -local hr_NJ = uc(replacementoffset + 6) - -replacements["hr"] = { - [1] = { "dž", hr_dzh }, - [2] = { "lj", hr_lj }, - [3] = { "nj", hr_nj }, -} - -entries["hr"] = { - ["a"] = "a", -- Why do you sometimes encounter “â” (where Old Slavonic - ["b"] = "b", -- has “ѣ”) and how does it collate? - ["c"] = "c", - ["č"] = "č", - ["ć"] = "ć", - ["d"] = "d", - [hr_dzh] = "dž", - ["đ"] = "đ", - ["e"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - [hr_lj] = "lj", - ["m"] = "m", - ["n"] = "n", - [hr_nj] = "nj", - ["o"] = "o", - ["p"] = "p", - ["r"] = "r", - ["s"] = "s", - ["š"] = "š", - ["t"] = "t", - ["u"] = "u", - ["v"] = "v", - ["z"] = "z", - ["ž"] = "ž", -} - -mappings["hr"] = { - ["a"] = 1, - ["b"] = 2, - ["c"] = 3, - ["č"] = 4, - ["ć"] = 5, - ["d"] = 6, - [hr_dzh] = 7, - ["đ"] = 8, - ["e"] = 9, - ["f"] = 10, - ["g"] = 11, - ["h"] = 12, - ["i"] = 13, - ["j"] = 14, - ["k"] = 15, - ["l"] = 16, - [hr_lj] = 17, - ["m"] = 18, - ["n"] = 19, - [hr_nj] = 20, - ["o"] = 21, - ["p"] = 22, - ["r"] = 23, - ["s"] = 24, - ["š"] = 25, - ["t"] = 26, - ["u"] = 27, - ["v"] = 28, - ["z"] = 29, - ["ž"] = 30, +local dzh, DZH = utfchar(replacementoffset + 1), utfchar(replacementoffset + 11) +local lj, LJ = utfchar(replacementoffset + 2), utfchar(replacementoffset + 12) +local nj, NJ = utfchar(replacementoffset + 3), utfchar(replacementoffset + 13) + +definitions["hr"] = { + replacements = { + { "dž", dzh }, { "DŽ", DZH }, + { "lj", lj }, { "LJ", LJ }, + { "nj", nj }, { "NJ", NJ }, + }, + entries = { + ["a"] = "a", ["b"] = "b", ["c"] = "c", ["č"] = "č", ["ć"] = "ć", + ["d"] = "d", [dzh] = "dž", ["đ"] = "đ", ["e"] = "e", ["f"] = "f", + ["g"] = "g", ["h"] = "h", ["i"] = "i", ["j"] = "j", ["k"] = "k", + ["l"] = "l", [lj] = "lj", ["m"] = "m", ["n"] = "n", [nj] = "nj", + ["o"] = "o", ["p"] = "p", ["r"] = "r", ["s"] = "s", ["š"] = "š", + ["t"] = "t", ["u"] = "u", ["v"] = "v", ["z"] = "z", ["ž"] = "ž", + }, + orders = { + "a", "b", "c", "č", "ć", "d", dzh, "đ", "e", "f", + "g", "h", "i", "j", "k", "l", lj, "m", "n", nj, + "o", "p", "r", "s", "š", "t", "u", "v", "z", "ž", + }, + upper = { + dzh = DZH, lj = LJ, nj = NJ, + }, + lower = { + DZH = dzh, LJ = lj, NJ = nj, + } } -adduppercaseentries ("hr") -adduppercasemappings("hr") - -entries ["hr"][hr_DZH] = entries ["hr"][hr_dzh] -mappings["hr"][hr_DZH] = mappings["hr"][hr_dzh] -entries ["hr"] [hr_LJ] = entries ["hr"] [hr_lj] -mappings["hr"] [hr_LJ] = mappings["hr"] [hr_lj] -entries ["hr"] [hr_NJ] = entries ["hr"] [hr_nj] -mappings["hr"] [hr_NJ] = mappings["hr"] [hr_nj] --- Serbian -replacements["sr"] = { - -- none -} - -entries["sr"] = { - ["а"] = "а", - ["б"] = "б", - ["в"] = "в", - ["г"] = "г", - ["д"] = "д", - ["ђ"] = "ђ", - ["е"] = "е", - ["ж"] = "ж", - ["з"] = "з", - ["и"] = "и", - ["ј"] = "ј", - ["к"] = "к", - ["л"] = "л", - ["љ"] = "љ", - ["м"] = "м", - ["н"] = "н", - ["њ"] = "њ", - ["о"] = "о", - ["п"] = "п", - ["р"] = "р", - ["с"] = "с", - ["т"] = "т", - ["ћ"] = "ћ", - ["у"] = "у", - ["ф"] = "ф", - ["х"] = "х", - ["ц"] = "ц", - ["ч"] = "ч", - ["џ"] = "џ", - ["ш"] = "ш", +definitions["sr"] = { + entries = { + ["а"] = "а", ["б"] = "б", ["в"] = "в", ["г"] = "г", ["д"] = "д", + ["ђ"] = "ђ", ["е"] = "е", ["ж"] = "ж", ["з"] = "з", ["и"] = "и", + ["ј"] = "ј", ["к"] = "к", ["л"] = "л", ["љ"] = "љ", ["м"] = "м", + ["н"] = "н", ["њ"] = "њ", ["о"] = "о", ["п"] = "п", ["р"] = "р", + ["с"] = "с", ["т"] = "т", ["ћ"] = "ћ", ["у"] = "у", ["ф"] = "ф", + ["х"] = "х", ["ц"] = "ц", ["ч"] = "ч", ["џ"] = "џ", + ["ш"] = "ш", + }, + orders = { + "а", "б", "в", "г", "д", "ђ", "е", "ж", "з", "и", + "ј", "к", "л", "љ", "м", "н", "њ", "о", "п", "р", + "с", "т", "ћ", "у", "ф", "х", "ц", "ч", "џ", "ш", + } } -mappings["sr"] = { - ["а"] = 1, - ["б"] = 2, - ["в"] = 3, - ["г"] = 4, - ["д"] = 5, - ["ђ"] = 6, - ["е"] = 7, - ["ж"] = 8, - ["з"] = 9, - ["и"] = 10, - ["ј"] = 11, - ["к"] = 12, - ["л"] = 13, - ["љ"] = 14, - ["м"] = 15, - ["н"] = 16, - ["њ"] = 17, - ["о"] = 18, - ["п"] = 19, - ["р"] = 20, - ["с"] = 21, - ["т"] = 22, - ["ћ"] = 23, - ["у"] = 24, - ["ф"] = 25, - ["х"] = 26, - ["ц"] = 27, - ["ч"] = 28, - ["џ"] = 29, - ["ш"] = 30, -} - -adduppercaseentries ("sr") -adduppercasemappings("sr") - --- Transliteration: Russian|ISO9-1995 -- Keeping the same collation order as Russian (v.s.). -- Matches the tables from: -- http://bitbucket.org/phg/transliterator/src/tip/tex/context/third/transliterator/trans_tables_iso9.lua -local ru_iso9_yer = uc(replacementoffset + 1) - -replacements["ru-iso9"] = { - [1] = { "''", ru_iso9_yer }, -} - -entries["ru-iso9"] = { - ["a"] = "a", - ["b"] = "b", - ["v"] = "v", - ["g"] = "g", - ["d"] = "d", - ["e"] = "e", - ["ë"] = "ë", - ["ž"] = "ž", - ["z"] = "z", - ["i"] = "i", - ["ì"] = "ì", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["p"] = "p", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["f"] = "f", - ["h"] = "h", - ["c"] = "c", - ["č"] = "č", - ["š"] = "š", - ["ŝ"] = "ŝ", - ["ʺ"] = "ʺ", - [ru_iso9_yer] = "ʺ", - ["y"] = "y", - ["ʹ"] = "ʹ", - ["'"] = "ʹ", - ["ě"] = "ě", - ["è"] = "è", - ["û"] = "û", - ["â"] = "â", - ["û"] = "û", - ["â"] = "â", -} - -mappings["ru-iso9"] = { - ["a"] = 1, - ["b"] = 2, - ["v"] = 3, - ["g"] = 4, - ["d"] = 5, - ["e"] = 6, - ["ë"] = 6, - ["ž"] = 7, - ["z"] = 8, - ["i"] = 9, - ["ì"] = 9, - ["j"] = 10, - ["k"] = 11, - ["l"] = 12, - ["m"] = 13, - ["n"] = 14, - ["o"] = 15, - ["p"] = 16, - ["r"] = 17, - ["s"] = 18, - ["t"] = 19, - ["u"] = 20, - ["f"] = 21, - ["h"] = 22, - ["c"] = 23, - ["č"] = 24, - ["š"] = 25, - ["ŝ"] = 26, - ["ʺ"] = 27, - [ru_iso9_yer] = 27, - ["y"] = 28, - ["ʹ"] = 29, - ["'"] = 29, - ["ě"] = 30, - ["è"] = 31, - ["û"] = 32, - ["â"] = 33, - ["û"] = 34, - ["â"] = 35, +local yer = utfchar(replacementoffset + 1) + +definitions["ru-iso9"] = { + replacements = { + { "''", yer }, + }, + entries = { + ["a"] = "a", ["b"] = "b", ["v"] = "v", ["g"] = "g", ["d"] = "d", + ["e"] = "e", ["ë"] = "ë", ["ž"] = "ž", ["z"] = "z", ["i"] = "i", + ["ì"] = "ì", ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", + ["n"] = "n", ["o"] = "o", ["p"] = "p", ["r"] = "r", ["s"] = "s", + ["t"] = "t", ["u"] = "u", ["f"] = "f", ["h"] = "h", ["c"] = "c", + ["č"] = "č", ["š"] = "š", ["ŝ"] = "ŝ", ["ʺ"] = "ʺ", [yer] = "ʺ", + ["y"] = "y", ["ʹ"] = "ʹ", ["'"] = "ʹ", ["ě"] = "ě", ["è"] = "è", + ["û"] = "û", ["â"] = "â", ["û"] = "û", ["â"] = "â", + }, + orders = { + "a", "b", "v", "g", "d", "e", "ë", "ž", "z", "i", + "ì", "j", "k", "l", "m", "n", "o", "p", "r", "s", + "t", "u", "f", "h", "c", "č", "š", "ŝ", "ʺ", yer, + "y", "ʹ", "'", "ě", "è", "û", "â", "û", "â", + } } -adduppercaseentries ("ru-iso9") -adduppercasemappings("ru-iso9") - --- Transliteration: Old Slavonic|scientific -- Matches the tables from: -- http://bitbucket.org/phg/transliterator/src/tip/tex/context/third/transliterator/trans_tables_scntfc.lua -local ocs_scn_uk = uc(replacementoffset + 1) -local ocs_scn_tshe = uc(replacementoffset + 2) -local ocs_scn_sht = uc(replacementoffset + 3) -local ocs_scn_ju = uc(replacementoffset + 4) -local ocs_scn_ja = uc(replacementoffset + 5) -local ocs_scn_je = uc(replacementoffset + 6) -local ocs_scn_ijus = uc(replacementoffset + 7) -local ocs_scn_ibigjus = uc(replacementoffset + 8) -local ocs_scn_xi = uc(replacementoffset + 9) -local ocs_scn_psi = uc(replacementoffset + 10) -local ocs_scn_theta = uc(replacementoffset + 11) -local ocs_scn_shch = uc(replacementoffset + 12) - -local ocs_scn_UK = uc(replacementoffset + 13) -local ocs_scn_TSHE = uc(replacementoffset + 14) -local ocs_scn_SHT = uc(replacementoffset + 15) -local ocs_scn_JU = uc(replacementoffset + 16) -local ocs_scn_JA = uc(replacementoffset + 17) -local ocs_scn_JE = uc(replacementoffset + 18) -local ocs_scn_IJUS = uc(replacementoffset + 19) -local ocs_scn_IBIGJUS = uc(replacementoffset + 20) -local ocs_scn_XI = uc(replacementoffset + 21) -local ocs_scn_PSI = uc(replacementoffset + 22) -local ocs_scn_THETA = uc(replacementoffset + 23) -local ocs_scn_SHCH = uc(replacementoffset + 24) - -replacements["ocs-scn"] = { - [1] = { "ou", ocs_scn_uk }, - [2] = { "g’", ocs_scn_tshe }, - [3] = { "št", ocs_scn_sht }, - [4] = { "ju", ocs_scn_ju }, - [5] = { "ja", ocs_scn_ja }, - [6] = { "je", ocs_scn_je }, - [7] = { "ję", ocs_scn_ijus }, - [8] = { "jǫ", ocs_scn_ibigjus }, - [9] = { "ks", ocs_scn_xi }, - [10] = { "ps", ocs_scn_psi }, - [11] = { "th", ocs_scn_theta }, - [12] = { "šč", ocs_scn_shch }, -} - -entries["ocs-scn"] = { - ["a"] = "a", - ["b"] = "b", - ["v"] = "v", - ["g"] = "g", - ["d"] = "d", - ["e"] = "e", - ["ž"] = "ž", - ["ʒ"] = "ʒ", - ["z"] = "z", - ["i"] = "i", - ["ï"] = "ï", - [ocs_scn_tshe] = "g’", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["p"] = "p", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["f"] = "f", - ["x"] = "x", - ["o"] = "o", - ["c"] = "c", - ["č"] = "č", - ["š"] = "š", - [ocs_scn_sht] = "št", - [ocs_scn_shch] = "šč", - ["ъ"] = "ъ", - ["y"] = "y", - [ocs_scn_uk] = "y", - ["ь"] = "ь", - ["ě"] = "ě", - [ocs_scn_ju] = "ju", - [ocs_scn_ja] = "ja", - [ocs_scn_je] = "je", - ["ę"] = "ę", - [ocs_scn_ijus] = "ję", - ["ǫ"] = "ǫ", -[ocs_scn_ibigjus] = "jǫ", - [ocs_scn_xi] = "ks", - [ocs_scn_psi] = "ps", - [ocs_scn_theta] = "th", - ["ü"] = "ü", +local uk, UK = utfchar(replacementoffset + 1), utfchar(replacementoffset + 21) +local tshe, TSHE = utfchar(replacementoffset + 2), utfchar(replacementoffset + 22) +local sht, SHT = utfchar(replacementoffset + 3), utfchar(replacementoffset + 23) +local ju, JU = utfchar(replacementoffset + 4), utfchar(replacementoffset + 24) +local ja, JA = utfchar(replacementoffset + 5), utfchar(replacementoffset + 25) +local je, JE = utfchar(replacementoffset + 6), utfchar(replacementoffset + 26) +local ijus, IJUS = utfchar(replacementoffset + 7), utfchar(replacementoffset + 27) +local ibigjus, IBIGJUS = utfchar(replacementoffset + 8), utfchar(replacementoffset + 28) +local xi, XI = utfchar(replacementoffset + 9), utfchar(replacementoffset + 29) +local psi, PSI = utfchar(replacementoffset + 10), utfchar(replacementoffset + 30) +local theta, THETA = utfchar(replacementoffset + 11), utfchar(replacementoffset + 31) +local shch, SHCH = utfchar(replacementoffset + 12), utfchar(replacementoffset + 32) + +definitions["ocs-scn"] = { + replacements = { + { "ou", uk }, { "OU", UK }, + { "g’", tshe }, { "G’", TSHE }, + { "št", sht }, { "ŠT", SHT }, + { "ju", ju }, { "JU", JU }, + { "ja", ja }, { "JA", JA }, + { "je", je }, { "JE", JE }, + { "ję", ijus }, { "JĘ", IJUS }, + { "jǫ", ibigjus }, { "JǪ", IBIGJUS }, + { "ks", xi }, { "KS", XI }, + { "ps", psi }, { "PS", PSI }, + { "th", theta }, { "TH", THETA }, + { "šč", shch }, { "ŠČ", SHCH }, + }, + entries = { + ["a"] = "a", ["b"] = "b", ["v"] = "v", ["g"] = "g", ["d"] = "d", + ["e"] = "e", ["ž"] = "ž", ["ʒ"] = "ʒ", ["z"] = "z", ["i"] = "i", + ["ï"] = "ï", [tshe] = "g’", ["k"] = "k", ["l"] = "l", ["m"] = "m", + ["n"] = "n", ["o"] = "o", ["p"] = "p", ["r"] = "r", ["s"] = "s", + ["t"] = "t", ["u"] = "u", ["f"] = "f", ["x"] = "x", ["o"] = "o", + ["c"] = "c", ["č"] = "č", ["š"] = "š", [sht] = "št", [shch] = "šč", + ["ъ"] = "ъ", ["y"] = "y", [uk] = "y", ["ь"] = "ь", ["ě"] = "ě", + [ju] = "ju", [ja] = "ja", [je] = "je", ["ę"] = "ę", [ijus] = "ję", + ["ǫ"] = "ǫ", [ibigjus] = "jǫ", [xi] = "ks", [psi] = "ps", [theta] = "th", + ["ü"] = "ü", + }, + orders = { + "a", "b", "v", "g", "d", "e", "ž", "ʒ", "z", "i", "ï", + tshe, "k", "l", "m", "n", "o", "p", "r", "s", "t", "u", + "f", "x", "o", "c", "č", "š", sht, shch, "ъ", "y", uk, + "ь", "ě", ju, ja, je, "ę", ijus, "ǫ", ibigjus, xi, psi, + theta, "ü", + }, + upper = { + uk = UK, tshe = TSHE, sht = SHT, ju = JU, ja = JA, je = JE, ijus = IJUS, ibigjus = IBIGJUS, xi = XI, psi = PSI, theta = THETA, shch = SHCH, + }, + lower = { + UK = uk, TSHE = tshe, SHT = sht, JU = ju, JA = ja, JE = je, IJUS = ijus, IBIGJUS = ibigjus, XI = xi, PSI = psi, THETA = theta, SHCH = shch, + }, } -mappings["ocs-scn"] = { - ["a"] = 1, - ["b"] = 2, - ["v"] = 3, - ["g"] = 4, - ["d"] = 5, - ["e"] = 6, - ["ž"] = 7, - ["ʒ"] = 8, - ["z"] = 9, - ["i"] = 10, - ["ï"] = 10, - [ocs_scn_tshe] = 11, - ["k"] = 12, - ["l"] = 13, - ["m"] = 14, - ["n"] = 15, - ["o"] = 16, - ["p"] = 17, - ["r"] = 18, - ["s"] = 19, - ["t"] = 20, - ["u"] = 21, - ["f"] = 22, - ["x"] = 23, - ["o"] = 24, - ["c"] = 25, - ["č"] = 26, - ["š"] = 27, - [ocs_scn_sht] = 28, - [ocs_scn_shch] = 28, - ["ъ"] = 29, - ["y"] = 30, - [ocs_scn_uk] = 30, - ["ь"] = 31, - ["ě"] = 32, - [ocs_scn_ju] = 33, - [ocs_scn_ja] = 34, - [ocs_scn_je] = 35, - ["ę"] = 36, - [ocs_scn_ijus] = 37, - ["ǫ"] = 38, -[ocs_scn_ibigjus] = 39, - [ocs_scn_xi] = 40, - [ocs_scn_psi] = 41, - [ocs_scn_theta] = 42, - ["ü"] = 43, -} - -adduppercaseentries ("ocs-scn") -adduppercasemappings("ocs-scn") - - entries["ocs-scn"][ocs_scn_UK ] = entries["ocs-scn"][ocs_scn_uk ] -mappings["ocs-scn"][ocs_scn_UK ] = mappings["ocs-scn"][ocs_scn_uk ] - - entries["ocs-scn"][ocs_scn_TSHE ] = entries["ocs-scn"][ocs_scn_tshe ] -mappings["ocs-scn"][ocs_scn_TSHE ] = mappings["ocs-scn"][ocs_scn_tshe ] - - entries["ocs-scn"][ocs_scn_SHT ] = entries["ocs-scn"][ocs_scn_sht ] -mappings["ocs-scn"][ocs_scn_SHT ] = mappings["ocs-scn"][ocs_scn_sht ] - - entries["ocs-scn"][ocs_scn_JU ] = entries["ocs-scn"][ocs_scn_ju ] -mappings["ocs-scn"][ocs_scn_JU ] = mappings["ocs-scn"][ocs_scn_ju ] - - entries["ocs-scn"][ocs_scn_JA ] = entries["ocs-scn"][ocs_scn_ja ] -mappings["ocs-scn"][ocs_scn_JA ] = mappings["ocs-scn"][ocs_scn_ja ] - - entries["ocs-scn"][ocs_scn_JE ] = entries["ocs-scn"][ocs_scn_je ] -mappings["ocs-scn"][ocs_scn_JE ] = mappings["ocs-scn"][ocs_scn_je ] - - entries["ocs-scn"][ocs_scn_IJUS ] = entries["ocs-scn"][ocs_scn_ijus ] -mappings["ocs-scn"][ocs_scn_IJUS ] = mappings["ocs-scn"][ocs_scn_ijus ] - - entries["ocs-scn"][ocs_scn_IBIGJUS] = entries["ocs-scn"][ocs_scn_ibigjus] -mappings["ocs-scn"][ocs_scn_IBIGJUS] = mappings["ocs-scn"][ocs_scn_ibigjus] - - entries["ocs-scn"][ocs_scn_XI ] = entries["ocs-scn"][ocs_scn_xi ] -mappings["ocs-scn"][ocs_scn_XI ] = mappings["ocs-scn"][ocs_scn_xi ] - - entries["ocs-scn"][ocs_scn_PSI ] = entries["ocs-scn"][ocs_scn_psi ] -mappings["ocs-scn"][ocs_scn_PSI ] = mappings["ocs-scn"][ocs_scn_psi ] - - entries["ocs-scn"][ocs_scn_THETA ] = entries["ocs-scn"][ocs_scn_theta ] -mappings["ocs-scn"][ocs_scn_THETA ] = mappings["ocs-scn"][ocs_scn_theta ] - - entries["ocs-scn"][ocs_scn_SHCH ] = entries["ocs-scn"][ocs_scn_shch ] -mappings["ocs-scn"][ocs_scn_SHCH ] = mappings["ocs-scn"][ocs_scn_shch ] --- Norwegian (bokmål). -replacements["no"] = { --[[ None, do you miss any? ]] } - -entries["no"] = { - ["a"] = "a", - ["b"] = "b", - ["c"] = "c", - ["d"] = "d", - ["e"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["v"] = "v", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["z"] = "z", - ["æ"] = "æ", - ["ø"] = "ø", - ["å"] = "å", +definitions["no"] = { + entries = { + ["a"] = "a", ["b"] = "b", ["c"] = "c", ["d"] = "d", ["e"] = "e", + ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", ["j"] = "j", + ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", + ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["t"] = "t", + ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", + ["z"] = "z", ["æ"] = "æ", ["ø"] = "ø", ["å"] = "å", + }, + orders = { + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", + "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", + "u", "v", "w", "x", "y", "z", "æ", "ø", "å", + } } -mappings["no"] = { - ["a"] = 1, - ["b"] = 2, - ["c"] = 3, - ["d"] = 4, - ["e"] = 5, - ["f"] = 6, - ["g"] = 7, - ["h"] = 8, - ["i"] = 9, - ["j"] = 10, - ["k"] = 11, - ["l"] = 12, - ["m"] = 13, - ["n"] = 14, - ["o"] = 15, - ["p"] = 16, - ["q"] = 17, - ["r"] = 18, - ["s"] = 19, - ["t"] = 20, - ["u"] = 21, - ["v"] = 22, - ["w"] = 23, - ["x"] = 24, - ["y"] = 25, - ["z"] = 26, - ["æ"] = 27, - ["ø"] = 28, - ["å"] = 29, -} - -adduppercaseentries ("no") -adduppercasemappings("no") - --- Danish (-> Norwegian). -replacements["da"] = { --[[ None, do you miss any? ]] } - entries["da"] = entries["no"] - mappings["da"] = mappings["no"] +definitions["da"] = { parent = "no" } --- Swedish -replacements["sv"] = { --[[ None, do you miss any? ]] } - -entries["sv"] = { - ["a"] = "a", - ["b"] = "b", - ["c"] = "c", - ["d"] = "d", - ["e"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["v"] = "v", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["z"] = "z", - ["å"] = "å", - ["ä"] = "ä", - ["ö"] = "ö", -} - -mappings["sv"] = { - ["a"] = 1, - ["b"] = 2, - ["c"] = 3, - ["d"] = 4, - ["e"] = 5, - ["f"] = 6, - ["g"] = 7, - ["h"] = 8, - ["i"] = 9, - ["j"] = 10, - ["k"] = 11, - ["l"] = 12, - ["m"] = 13, - ["n"] = 14, - ["o"] = 15, - ["p"] = 16, - ["q"] = 17, - ["r"] = 18, - ["s"] = 19, - ["t"] = 20, - ["u"] = 21, - ["v"] = 22, - ["w"] = 23, - ["x"] = 24, - ["y"] = 25, - ["z"] = 26, - ["å"] = 27, - ["ä"] = 28, - ["ö"] = 29, +definitions["sv"] = { + entries = { + ["a"] = "a", ["b"] = "b", ["c"] = "c", ["d"] = "d", ["e"] = "e", + ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", ["j"] = "j", + ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", + ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["t"] = "t", + ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", + ["z"] = "z", ["å"] = "å", ["ä"] = "ä", ["ö"] = "ö", + }, + orders = { + "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", + "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", + "u", "v", "w", "x", "y", "z", "å", "ä", "ö", + } } -adduppercaseentries ("sv") -adduppercasemappings("sv") - --- Icelandic -- Treating quantities as allographs. -replacements["is"] = { --[[ None, do you miss any? ]] } - -entries["is"] = { - ["a"] = "a", - ["á"] = "a", - ["b"] = "b", - ["d"] = "d", - ["ð"] = "ð", - ["e"] = "e", - ["é"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["í"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["ó"] = "o", - ["p"] = "p", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["ú"] = "u", - ["v"] = "v", - ["x"] = "x", - ["y"] = "y", - ["ý"] = "y", - ["þ"] = "þ", - ["æ"] = "æ", - ["ö"] = "ö", -} - -mappings["is"] = { - ["a"] = 1, - ["á"] = 1, - ["b"] = 2, - ["d"] = 3, - ["ð"] = 4, - ["e"] = 5, - ["é"] = 5, - ["f"] = 6, - ["g"] = 7, - ["h"] = 8, - ["i"] = 9, - ["í"] = 9, - ["j"] = 10, - ["k"] = 11, - ["l"] = 12, - ["m"] = 13, - ["n"] = 14, - ["o"] = 15, - ["ó"] = 15, - ["p"] = 16, - ["r"] = 17, - ["s"] = 18, - ["t"] = 19, - ["u"] = 20, - ["ú"] = 20, - ["v"] = 21, - ["x"] = 22, - ["y"] = 23, - ["ý"] = 23, - ["þ"] = 24, - ["æ"] = 25, - ["ö"] = 26, +definitions["is"] = { + entries = { + ["a"] = "a", ["á"] = "a", ["b"] = "b", ["d"] = "d", ["ð"] = "ð", + ["e"] = "e", ["é"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", + ["i"] = "i", ["í"] = "i", ["j"] = "j", ["k"] = "k", ["l"] = "l", + ["m"] = "m", ["n"] = "n", ["o"] = "o", ["ó"] = "o", ["p"] = "p", + ["r"] = "r", ["s"] = "s", ["t"] = "t", ["u"] = "u", ["ú"] = "u", + ["v"] = "v", ["x"] = "x", ["y"] = "y", ["ý"] = "y", ["þ"] = "þ", + ["æ"] = "æ", ["ö"] = "ö", + }, + orders = { + "a", "á", "b", "d", "ð", "e", "é", "f", "g", "h", + "i", "í", "j", "k", "l", "m", "n", "o", "ó", "p", + "r", "s", "t", "u", "ú", "v", "x", "y", "ý", "þ", + "æ", "ö", + }, } -adduppercaseentries ("is") -adduppercasemappings("is") - --- Greek -replacements["gr"] = { --[[ None, do you miss any? ]] } - -entries["gr"] = { - ["α"] = "α", - ["ά"] = "α", - ["ὰ"] = "α", - ["ᾶ"] = "α", - ["ᾳ"] = "α", - ["ἀ"] = "α", - ["ἁ"] = "α", - ["ἄ"] = "α", - ["ἂ"] = "α", - ["ἆ"] = "α", - ["ἁ"] = "α", - ["ἅ"] = "α", - ["ἃ"] = "α", - ["ἇ"] = "α", - ["ᾁ"] = "α", - ["ᾴ"] = "α", - ["ᾲ"] = "α", - ["ᾷ"] = "α", - ["ᾄ"] = "α", - ["ᾂ"] = "α", - ["ᾅ"] = "α", - ["ᾃ"] = "α", - ["ᾆ"] = "α", - ["ᾇ"] = "α", - ["β"] = "β", - ["γ"] = "γ", - ["δ"] = "δ", - ["ε"] = "ε", - ["έ"] = "ε", - ["ὲ"] = "ε", - ["ἐ"] = "ε", - ["ἔ"] = "ε", - ["ἒ"] = "ε", - ["ἑ"] = "ε", - ["ἕ"] = "ε", - ["ἓ"] = "ε", - ["ζ"] = "ζ", - ["η"] = "η", - ["η"] = "η", - ["ή"] = "η", - ["ὴ"] = "η", - ["ῆ"] = "η", - ["ῃ"] = "η", - ["ἠ"] = "η", - ["ἤ"] = "η", - ["ἢ"] = "η", - ["ἦ"] = "η", - ["ᾐ"] = "η", - ["ἡ"] = "η", - ["ἥ"] = "η", - ["ἣ"] = "η", - ["ἧ"] = "η", - ["ᾑ"] = "η", - ["ῄ"] = "η", - ["ῂ"] = "η", - ["ῇ"] = "η", - ["ᾔ"] = "η", - ["ᾒ"] = "η", - ["ᾕ"] = "η", - ["ᾓ"] = "η", - ["ᾖ"] = "η", - ["ᾗ"] = "η", - ["θ"] = "θ", - ["ι"] = "ι", - ["ί"] = "ι", - ["ὶ"] = "ι", - ["ῖ"] = "ι", - ["ἰ"] = "ι", - ["ἴ"] = "ι", - ["ἲ"] = "ι", - ["ἶ"] = "ι", - ["ἱ"] = "ι", - ["ἵ"] = "ι", - ["ἳ"] = "ι", - ["ἷ"] = "ι", - ["ϊ"] = "ι", - ["ΐ"] = "ι", - ["ῒ"] = "ι", - ["ῗ"] = "ι", - ["κ"] = "κ", - ["λ"] = "λ", - ["μ"] = "μ", - ["ν"] = "ν", - ["ξ"] = "ξ", - ["ο"] = "ο", - ["ό"] = "ο", - ["ὸ"] = "ο", - ["ὀ"] = "ο", - ["ὄ"] = "ο", - ["ὂ"] = "ο", - ["ὁ"] = "ο", - ["ὅ"] = "ο", - ["ὃ"] = "ο", - ["π"] = "π", - ["ρ"] = "ρ", - ["ῤ"] = "ῤ", - ["ῥ"] = "ῥ", - ["σ"] = "σ", - ["ς"] = "ς", - ["τ"] = "τ", - ["υ"] = "υ", - ["ύ"] = "υ", - ["ὺ"] = "υ", - ["ῦ"] = "υ", - ["ὐ"] = "υ", - ["ὔ"] = "υ", - ["ὒ"] = "υ", - ["ὖ"] = "υ", - ["ὑ"] = "υ", - ["ὕ"] = "υ", - ["ὓ"] = "υ", - ["ὗ"] = "υ", - ["ϋ"] = "υ", - ["ΰ"] = "υ", - ["ῢ"] = "υ", - ["ῧ"] = "υ", - ["φ"] = "φ", - ["χ"] = "χ", - ["ψ"] = "ω", - ["ω"] = "ω", - ["ώ"] = "ω", - ["ὼ"] = "ω", - ["ῶ"] = "ω", - ["ῳ"] = "ω", - ["ὠ"] = "ω", - ["ὤ"] = "ω", - ["ὢ"] = "ω", - ["ὦ"] = "ω", - ["ᾠ"] = "ω", - ["ὡ"] = "ω", - ["ὥ"] = "ω", - ["ὣ"] = "ω", - ["ὧ"] = "ω", - ["ᾡ"] = "ω", - ["ῴ"] = "ω", - ["ῲ"] = "ω", - ["ῷ"] = "ω", - ["ᾤ"] = "ω", - ["ᾢ"] = "ω", - ["ᾥ"] = "ω", - ["ᾣ"] = "ω", - ["ᾦ"] = "ω", - ["ᾧ"] = "ω", -} - -mappings["gr"] = { - ["α"] = 1, - ["ά"] = 1, - ["ὰ"] = 1, - ["ᾶ"] = 1, - ["ᾳ"] = 1, - ["ἀ"] = 1, - ["ἁ"] = 1, - ["ἄ"] = 1, - ["ἂ"] = 1, - ["ἆ"] = 1, - ["ἁ"] = 1, - ["ἅ"] = 1, - ["ἃ"] = 1, - ["ἇ"] = 1, - ["ᾁ"] = 1, - ["ᾴ"] = 1, - ["ᾲ"] = 1, - ["ᾷ"] = 1, - ["ᾄ"] = 1, - ["ᾂ"] = 1, - ["ᾅ"] = 1, - ["ᾃ"] = 1, - ["ᾆ"] = 1, - ["ᾇ"] = 1, - ["β"] = 2, - ["γ"] = 3, - ["δ"] = 4, - ["ε"] = 5, - ["έ"] = 5, - ["ὲ"] = 5, - ["ἐ"] = 5, - ["ἔ"] = 5, - ["ἒ"] = 5, - ["ἑ"] = 5, - ["ἕ"] = 5, - ["ἓ"] = 5, - ["ζ"] = 6, - ["η"] = 7, - ["η"] = 7, - ["ή"] = 7, - ["ὴ"] = 7, - ["ῆ"] = 7, - ["ῃ"] = 7, - ["ἠ"] = 7, - ["ἤ"] = 7, - ["ἢ"] = 7, - ["ἦ"] = 7, - ["ᾐ"] = 7, - ["ἡ"] = 7, - ["ἥ"] = 7, - ["ἣ"] = 7, - ["ἧ"] = 7, - ["ᾑ"] = 7, - ["ῄ"] = 7, - ["ῂ"] = 7, - ["ῇ"] = 7, - ["ᾔ"] = 7, - ["ᾒ"] = 7, - ["ᾕ"] = 7, - ["ᾓ"] = 7, - ["ᾖ"] = 7, - ["ᾗ"] = 7, - ["θ"] = 8, - ["ι"] = 9, - ["ί"] = 9, - ["ὶ"] = 9, - ["ῖ"] = 9, - ["ἰ"] = 9, - ["ἴ"] = 9, - ["ἲ"] = 9, - ["ἶ"] = 9, - ["ἱ"] = 9, - ["ἵ"] = 9, - ["ἳ"] = 9, - ["ἷ"] = 9, - ["ϊ"] = 9, - ["ΐ"] = 9, - ["ῒ"] = 9, - ["ῗ"] = 9, - ["κ"] = 10, - ["λ"] = 11, - ["μ"] = 12, - ["ν"] = 13, - ["ξ"] = 14, - ["ο"] = 15, - ["ό"] = 15, - ["ὸ"] = 15, - ["ὀ"] = 15, - ["ὄ"] = 15, - ["ὂ"] = 15, - ["ὁ"] = 15, - ["ὅ"] = 15, - ["ὃ"] = 15, - ["π"] = 16, - ["ρ"] = 17, - ["ῤ"] = 17, - ["ῥ"] = 17, - ["σ"] = 18, - ["ς"] = 18, - ["τ"] = 19, - ["υ"] = 20, - ["ύ"] = 20, - ["ὺ"] = 20, - ["ῦ"] = 20, - ["ὐ"] = 20, - ["ὔ"] = 20, - ["ὒ"] = 20, - ["ὖ"] = 20, - ["ὑ"] = 20, - ["ὕ"] = 20, - ["ὓ"] = 20, - ["ὗ"] = 20, - ["ϋ"] = 20, - ["ΰ"] = 20, - ["ῢ"] = 20, - ["ῧ"] = 20, - ["φ"] = 21, - ["χ"] = 22, - ["ψ"] = 23, - ["ω"] = 24, - ["ώ"] = 24, - ["ὼ"] = 24, - ["ῶ"] = 24, - ["ῳ"] = 24, - ["ὠ"] = 24, - ["ὤ"] = 24, - ["ὢ"] = 24, - ["ὦ"] = 24, - ["ᾠ"] = 24, - ["ὡ"] = 24, - ["ὥ"] = 24, - ["ὣ"] = 24, - ["ὧ"] = 24, - ["ᾡ"] = 24, - ["ῴ"] = 24, - ["ῲ"] = 24, - ["ῷ"] = 24, - ["ᾤ"] = 24, - ["ᾢ"] = 24, - ["ᾥ"] = 24, - ["ᾣ"] = 24, - ["ᾦ"] = 24, - ["ᾧ"] = 24, +definitions["gr"] = { + entries = { + ["α"] = "α", ["ά"] = "α", ["ὰ"] = "α", ["ᾶ"] = "α", ["ᾳ"] = "α", + ["ἀ"] = "α", ["ἁ"] = "α", ["ἄ"] = "α", ["ἂ"] = "α", ["ἆ"] = "α", + ["ἁ"] = "α", ["ἅ"] = "α", ["ἃ"] = "α", ["ἇ"] = "α", ["ᾁ"] = "α", + ["ᾴ"] = "α", ["ᾲ"] = "α", ["ᾷ"] = "α", ["ᾄ"] = "α", ["ᾂ"] = "α", + ["ᾅ"] = "α", ["ᾃ"] = "α", ["ᾆ"] = "α", ["ᾇ"] = "α", ["β"] = "β", + ["γ"] = "γ", ["δ"] = "δ", ["ε"] = "ε", ["έ"] = "ε", ["ὲ"] = "ε", + ["ἐ"] = "ε", ["ἔ"] = "ε", ["ἒ"] = "ε", ["ἑ"] = "ε", ["ἕ"] = "ε", + ["ἓ"] = "ε", ["ζ"] = "ζ", ["η"] = "η", ["η"] = "η", ["ή"] = "η", + ["ὴ"] = "η", ["ῆ"] = "η", ["ῃ"] = "η", ["ἠ"] = "η", ["ἤ"] = "η", + ["ἢ"] = "η", ["ἦ"] = "η", ["ᾐ"] = "η", ["ἡ"] = "η", ["ἥ"] = "η", + ["ἣ"] = "η", ["ἧ"] = "η", ["ᾑ"] = "η", ["ῄ"] = "η", ["ῂ"] = "η", + ["ῇ"] = "η", ["ᾔ"] = "η", ["ᾒ"] = "η", ["ᾕ"] = "η", ["ᾓ"] = "η", + ["ᾖ"] = "η", ["ᾗ"] = "η", ["θ"] = "θ", ["ι"] = "ι", ["ί"] = "ι", + ["ὶ"] = "ι", ["ῖ"] = "ι", ["ἰ"] = "ι", ["ἴ"] = "ι", ["ἲ"] = "ι", + ["ἶ"] = "ι", ["ἱ"] = "ι", ["ἵ"] = "ι", ["ἳ"] = "ι", ["ἷ"] = "ι", + ["ϊ"] = "ι", ["ΐ"] = "ι", ["ῒ"] = "ι", ["ῗ"] = "ι", ["κ"] = "κ", + ["λ"] = "λ", ["μ"] = "μ", ["ν"] = "ν", ["ξ"] = "ξ", ["ο"] = "ο", + ["ό"] = "ο", ["ὸ"] = "ο", ["ὀ"] = "ο", ["ὄ"] = "ο", ["ὂ"] = "ο", + ["ὁ"] = "ο", ["ὅ"] = "ο", ["ὃ"] = "ο", ["π"] = "π", ["ρ"] = "ρ", + ["ῤ"] = "ῤ", ["ῥ"] = "ῥ", ["σ"] = "σ", ["ς"] = "ς", ["τ"] = "τ", + ["υ"] = "υ", ["ύ"] = "υ", ["ὺ"] = "υ", ["ῦ"] = "υ", ["ὐ"] = "υ", + ["ὔ"] = "υ", ["ὒ"] = "υ", ["ὖ"] = "υ", ["ὑ"] = "υ", ["ὕ"] = "υ", + ["ὓ"] = "υ", ["ὗ"] = "υ", ["ϋ"] = "υ", ["ΰ"] = "υ", ["ῢ"] = "υ", + ["ῧ"] = "υ", ["φ"] = "φ", ["χ"] = "χ", ["ψ"] = "ω", ["ω"] = "ω", + ["ώ"] = "ω", ["ὼ"] = "ω", ["ῶ"] = "ω", ["ῳ"] = "ω", ["ὠ"] = "ω", + ["ὤ"] = "ω", ["ὢ"] = "ω", ["ὦ"] = "ω", ["ᾠ"] = "ω", ["ὡ"] = "ω", + ["ὥ"] = "ω", ["ὣ"] = "ω", ["ὧ"] = "ω", ["ᾡ"] = "ω", ["ῴ"] = "ω", + ["ῲ"] = "ω", ["ῷ"] = "ω", ["ᾤ"] = "ω", ["ᾢ"] = "ω", ["ᾥ"] = "ω", + ["ᾣ"] = "ω", ["ᾦ"] = "ω", ["ᾧ"] = "ω", + }, + orders = { + "α", "ά", "ὰ", "ᾶ", "ᾳ", "ἀ", "ἁ", "ἄ", "ἂ", "ἆ", + "ἁ", "ἅ", "ἃ", "ἇ", "ᾁ", "ᾴ", "ᾲ", "ᾷ", "ᾄ", "ᾂ", + "ᾅ", "ᾃ", "ᾆ", "ᾇ", "β", "γ", "δ", "ε", "έ", "ὲ", + "ἐ", "ἔ", "ἒ", "ἑ", "ἕ", "ἓ", "ζ", "η", "η", "ή", + "ὴ", "ῆ", "ῃ", "ἠ", "ἤ", "ἢ", "ἦ", "ᾐ", "ἡ", "ἥ", + "ἣ", "ἧ", "ᾑ", "ῄ", "ῂ", "ῇ", "ᾔ", "ᾒ", "ᾕ", "ᾓ", + "ᾖ", "ᾗ", "θ", "ι", "ί", "ὶ", "ῖ", "ἰ", "ἴ", "ἲ", + "ἶ", "ἱ", "ἵ", "ἳ", "ἷ", "ϊ", "ΐ", "ῒ", "ῗ", "κ", + "λ", "μ", "ν", "ξ", "ο", "ό", "ὸ", "ὀ", "ὄ", "ὂ", + "ὁ", "ὅ", "ὃ", "π", "ρ", "ῤ", "ῥ", "σ", "ς", "τ", + "υ", "ύ", "ὺ", "ῦ", "ὐ", "ὔ", "ὒ", "ὖ", "ὑ", "ὕ", + "ὓ", "ὗ", "ϋ", "ΰ", "ῢ", "ῧ", "φ", "χ", "ψ", "ω", + "ώ", "ὼ", "ῶ", "ῳ", "ὠ", "ὤ", "ὢ", "ὦ", "ᾠ", "ὡ", + "ὥ", "ὣ", "ὧ", "ᾡ", "ῴ", "ῲ", "ῷ", "ᾤ", "ᾢ", "ᾥ", + "ᾣ", "ᾦ", "ᾧ", + }, } -adduppercaseentries ("gr") -adduppercasemappings("gr") - --- Latin -- Treating the post-classical fricatives “j” and “v” as “i” and “u” -- respectively. -replacements["la"] = { - [1] = { "æ", "ae" }, -} - -entries["la"] = { - ["a"] = "a", - ["ā"] = "a", - ["ă"] = "a", - ["b"] = "b", - ["c"] = "c", - ["d"] = "d", - ["e"] = "e", - ["ē"] = "e", - ["ĕ"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["ī"] = "i", - ["ĭ"] = "i", - ["j"] = "i", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["ō"] = "o", - ["ŏ"] = "o", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["ū"] = "u", - ["ŭ"] = "u", - ["v"] = "u", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["ȳ"] = "y", -- Should exist in Greek words. - ["y̆"] = "y", -- Should exist in Greek words. - ["z"] = "z", -} - -mappings["la"] = { - ["a"] = 1, - ["ā"] = 1, - ["ă"] = 1, - ["b"] = 2, - ["c"] = 3, - ["d"] = 4, - ["e"] = 5, - ["ē"] = 5, - ["ĕ"] = 5, - ["f"] = 6, - ["g"] = 7, - ["h"] = 8, - ["i"] = 9, - ["ī"] = 9, - ["ĭ"] = 9, - ["j"] = 9, - ["k"] = 10, - ["l"] = 11, - ["m"] = 12, - ["n"] = 13, - ["o"] = 14, - ["ō"] = 14, - ["ŏ"] = 14, - ["p"] = 15, - ["q"] = 16, - ["r"] = 17, - ["s"] = 18, - ["t"] = 19, - ["u"] = 20, - ["ū"] = 20, - ["ŭ"] = 20, - ["v"] = 20, - ["w"] = 21, - ["x"] = 22, - ["y"] = 23, - ["ȳ"] = 23, - ["y̆"] = 23, - ["z"] = 24, +definitions["la"] = { + replacements = { + { "æ", "ae" }, { "Æ", "AE" }, + }, + entries = { + ["a"] = "a", ["ā"] = "a", ["ă"] = "a", ["b"] = "b", ["c"] = "c", + ["d"] = "d", ["e"] = "e", ["ē"] = "e", ["ĕ"] = "e", ["f"] = "f", + ["g"] = "g", ["h"] = "h", ["i"] = "i", ["ī"] = "i", ["ĭ"] = "i", + ["j"] = "i", ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", + ["o"] = "o", ["ō"] = "o", ["ŏ"] = "o", ["p"] = "p", ["q"] = "q", + ["r"] = "r", ["s"] = "s", ["t"] = "t", ["u"] = "u", ["ū"] = "u", + ["ŭ"] = "u", ["v"] = "u", ["w"] = "w", ["x"] = "x", ["y"] = "y", + ["ȳ"] = "y", ["y̆"] = "y", ["z"] = "z", + }, + orders = { + "a", "ā", "ă", "b", "c", "d", "e", "ē", "ĕ", "f", + "g", "h", "i", "ī", "ĭ", "j", "k", "l", "m", "n", + "o", "ō", "ŏ", "p", "q", "r", "s", "t", "u", "ū", + "ŭ", "v", "w", "x", "y", "ȳ", "y̆", "z", + } } -adduppercaseentries ("la") -adduppercasemappings("la") - --- Italian -replacements["it"] = { --[[ None, do you miss any? ]] } - -entries["it"] = { - ["a"] = "a", - ["á"] = "a", - ["b"] = "b", - ["c"] = "c", - ["d"] = "d", - ["e"] = "e", - ["é"] = "e", - ["è"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["í"] = "i", - ["ì"] = "i", - ["j"] = "i", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["ó"] = "o", - ["ò"] = "o", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["ú"] = "u", - ["ù"] = "u", - ["v"] = "u", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["z"] = "z", -} - -mappings["it"] = { - ["a"] = 1, - ["á"] = 1, - ["b"] = 2, - ["c"] = 3, - ["d"] = 4, - ["e"] = 5, - ["é"] = 5, - ["è"] = 5, - ["f"] = 6, - ["g"] = 7, - ["h"] = 8, - ["i"] = 9, - ["í"] = 9, - ["ì"] = 9, - ["j"] = 10, - ["k"] = 11, - ["l"] = 12, - ["m"] = 13, - ["n"] = 14, - ["o"] = 15, - ["ó"] = 15, - ["ò"] = 15, - ["p"] = 16, - ["q"] = 17, - ["r"] = 18, - ["s"] = 19, - ["t"] = 20, - ["u"] = 21, - ["ú"] = 21, - ["ù"] = 21, - ["v"] = 22, - ["w"] = 23, - ["x"] = 24, - ["y"] = 25, - ["z"] = 26, +definitions["it"] = { + entries = { + ["a"] = "a", ["á"] = "a", ["b"] = "b", ["c"] = "c", ["d"] = "d", + ["e"] = "e", ["é"] = "e", ["è"] = "e", ["f"] = "f", ["g"] = "g", + ["h"] = "h", ["i"] = "i", ["í"] = "i", ["ì"] = "i", ["j"] = "i", + ["k"] = "k", ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", + ["ó"] = "o", ["ò"] = "o", ["p"] = "p", ["q"] = "q", ["r"] = "r", + ["s"] = "s", ["t"] = "t", ["u"] = "u", ["ú"] = "u", ["ù"] = "u", + ["v"] = "u", ["w"] = "w", ["x"] = "x", ["y"] = "y", ["z"] = "z", + }, + orders = { + "a", "á", "b", "c", "d", "e", "é", "è", "f", "g", + "h", "i", "í", "ì", "j", "k", "l", "m", "n", "o", + "ó", "ò", "p", "q", "r", "s", "t", "u", "ú", "ù", + "v", "w", "x", "y", "z", + } } -adduppercaseentries ("it") -adduppercasemappings("it") - --- Romanian - -replacements["ro"] = { --[[ None, do you miss any? ]] } - -entries["ro"] = { - ["a"] = "a", - ["ă"] = "ă", - ["â"] = "â", - ["b"] = "b", - ["c"] = "c", - ["d"] = "d", - ["e"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["î"] = "î", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["s"] = "s", - ["ș"] = "ș", - ["t"] = "t", - ["ț"] = "ț", - ["u"] = "u", - ["v"] = "v", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["z"] = "z", +definitions["ro"] = { + entries = { + ["a"] = "a", ["ă"] = "ă", ["â"] = "â", ["b"] = "b", ["c"] = "c", + ["d"] = "d", ["e"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", + ["i"] = "i", ["î"] = "î", ["j"] = "j", ["k"] = "k", ["l"] = "l", + ["m"] = "m", ["n"] = "n", ["o"] = "o", ["p"] = "p", ["q"] = "q", + ["r"] = "r", ["s"] = "s", ["ș"] = "ș", ["t"] = "t", ["ț"] = "ț", + ["u"] = "u", ["v"] = "v", ["w"] = "w", ["x"] = "x", ["y"] = "y", + ["z"] = "z", + }, + orders = { + "a", "ă", "â", "b", "c", "d", "e", "f", "g", "h", + "i", "î", "j", "k", "l", "m", "n", "o", "p", "q", + "r", "s", "ș", "t", "ț", "u", "v", "w", "x", "y", + "z", + } } -mappings["ro"] = { - ["a"] = 1, - ["ă"] = 2, - ["â"] = 3, - ["b"] = 4, - ["c"] = 5, - ["d"] = 6, - ["e"] = 7, - ["f"] = 8, - ["g"] = 9, - ["h"] = 10, - ["i"] = 11, - ["î"] = 12, - ["j"] = 13, - ["k"] = 14, - ["l"] = 15, - ["m"] = 16, - ["n"] = 17, - ["o"] = 18, - ["p"] = 19, - ["q"] = 20, - ["r"] = 21, - ["s"] = 22, - ["ș"] = 23, - ["t"] = 24, - ["ț"] = 25, - ["u"] = 26, - ["v"] = 27, - ["w"] = 28, - ["x"] = 29, - ["y"] = 30, - ["z"] = 31, -} - -adduppercaseentries ("ro") -adduppercasemappings("ro") - --- Spanish -replacements["es"] = { --[[ None, do you miss any? ]] } - -entries["es"] = { - ["a"] = "a", - ["á"] = "a", - ["b"] = "b", - ["c"] = "c", - ["d"] = "d", - ["e"] = "e", - ["é"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["í"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["ñ"] = "ñ", - ["o"] = "o", - ["ó"] = "o", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["ú"] = "u", - ["ü"] = "u", - ["v"] = "v", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["z"] = "z", -} - -mappings["es"] = { - ["a"] = 1, - ["á"] = 1, - ["b"] = 2, - ["c"] = 3, - ["d"] = 4, - ["e"] = 5, - ["é"] = 5, - ["f"] = 6, - ["g"] = 7, - ["h"] = 8, - ["i"] = 9, - ["í"] = 9, - ["j"] = 10, - ["k"] = 11, - ["l"] = 12, - ["m"] = 13, - ["n"] = 14, - ["ñ"] = 15, - ["o"] = 16, - ["ó"] = 16, - ["p"] = 17, - ["q"] = 18, - ["r"] = 19, - ["s"] = 20, - ["t"] = 21, - ["u"] = 22, - ["ú"] = 22, - ["ü"] = 22, - ["v"] = 23, - ["w"] = 24, - ["x"] = 25, - ["y"] = 26, - ["z"] = 27, +definitions["es"] = { + entries = { + ["a"] = "a", ["á"] = "a", ["b"] = "b", ["c"] = "c", ["d"] = "d", + ["e"] = "e", ["é"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", + ["i"] = "i", ["í"] = "i", ["j"] = "j", ["k"] = "k", ["l"] = "l", + ["m"] = "m", ["n"] = "n", ["ñ"] = "ñ", ["o"] = "o", ["ó"] = "o", + ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", ["t"] = "t", + ["u"] = "u", ["ú"] = "u", ["ü"] = "u", ["v"] = "v", ["w"] = "w", + ["x"] = "x", ["y"] = "y", ["z"] = "z", + }, + orders = { + "a", "á", "b", "c", "d", "e", "é", "f", "g", "h", + "i", "í", "j", "k", "l", "m", "n", "ñ", "o", "ó", + "p", "q", "r", "s", "t", "u", "ú", "ü", "v", "w", + "x", "y", "z", + } } -adduppercaseentries ("es") -adduppercasemappings("es") - --- Portuguese -replacements["pt"] = { --[[ None, do you miss any? ]] } - -entries["pt"] = { - ["a"] = "a", - ["á"] = "a", - ["â"] = "a", - ["ã"] = "a", - ["à"] = "a", - ["b"] = "b", - ["c"] = "c", - ["ç"] = "c", - ["d"] = "d", - ["e"] = "e", - ["é"] = "e", - ["ê"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["í"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["ó"] = "o", - ["ô"] = "o", - ["õ"] = "o", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["s"] = "s", - ["t"] = "t", - ["u"] = "u", - ["ú"] = "u", - ["ü"] = "u", -- qüinqüelíngüe - ["v"] = "v", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["z"] = "z", -} - -mappings["pt"] = { - ["a"] = 1, - ["á"] = 1, - ["â"] = 1, - ["ã"] = 1, - ["à"] = 1, - ["b"] = 2, - ["c"] = 3, - ["ç"] = 3, - ["d"] = 4, - ["e"] = 5, - ["é"] = 5, - ["ê"] = 5, - ["f"] = 6, - ["g"] = 7, - ["h"] = 8, - ["i"] = 9, - ["í"] = 9, - ["j"] = 10, - ["k"] = 11, - ["l"] = 12, - ["m"] = 13, - ["n"] = 14, - ["o"] = 15, - ["ó"] = 15, - ["ô"] = 15, - ["õ"] = 15, - ["p"] = 16, - ["q"] = 17, - ["r"] = 18, - ["s"] = 19, - ["t"] = 20, - ["u"] = 21, - ["ú"] = 21, - ["ü"] = 21, - ["v"] = 22, - ["w"] = 23, - ["x"] = 24, - ["y"] = 25, - ["z"] = 26, +definitions["pt"] = { + entries = { + ["a"] = "a", ["á"] = "a", ["â"] = "a", ["ã"] = "a", ["à"] = "a", + ["b"] = "b", ["c"] = "c", ["ç"] = "c", ["d"] = "d", ["e"] = "e", + ["é"] = "e", ["ê"] = "e", ["f"] = "f", ["g"] = "g", ["h"] = "h", + ["i"] = "i", ["í"] = "i", ["j"] = "j", ["k"] = "k", ["l"] = "l", + ["m"] = "m", ["n"] = "n", ["o"] = "o", ["ó"] = "o", ["ô"] = "o", + ["õ"] = "o", ["p"] = "p", ["q"] = "q", ["r"] = "r", ["s"] = "s", + ["t"] = "t", ["u"] = "u", ["ú"] = "u", ["ü"] = "u", ["v"] = "v", + ["w"] = "w", ["x"] = "x", ["y"] = "y", ["z"] = "z", + }, + orders = { + "a", "á", "â", "ã", "à", "b", "c", "ç", "d", "e", + "é", "ê", "f", "g", "h", "i", "í", "j", "k", "l", + "m", "n", "o", "ó", "ô", "õ", "p", "q", "r", "s", + "t", "u", "ú", "ü", "v", "w", "x", "y", "z", + } } -adduppercaseentries ("pt") -adduppercasemappings("pt") - - --- Lithuanian -local lt_ch = uc(replacementoffset + 1) -local lt_CH = uc(replacementoffset + 2) - -replacements["lt"] = { - [1] = { "ch", lt_ch } +local ch, CH = utfchar(replacementoffset + 1), utfchar(replacementoffset + 11) + +definitions["lt"] = { + replacements = { + { "ch", ch }, { "CH", CH} + }, + entries = { + ["a"] = "a", ["ą"] = "a", ["b"] = "b", ["c"] = "c", [ch ] = "c", + ["č"] = "č", ["d"] = "d", ["e"] = "e", ["ę"] = "e", ["ė"] = "e", + ["f"] = "f", ["g"] = "g", ["h"] = "h", ["i"] = "i", ["į"] = "i", + ["y"] = "i", ["j"] = "j", ["k"] = "k", ["l"] = "l", ["m"] = "m", + ["n"] = "n", ["o"] = "o", ["p"] = "p", ["r"] = "r", ["s"] = "s", + ["š"] = "š", ["t"] = "t", ["u"] = "u", ["ų"] = "u", ["ū"] = "u", + ["v"] = "v", ["z"] = "z", ["ž"] = "ž", + }, + orders = { + "a", "ą", "b", "c", ch, "č", "d", "e", "ę", "ė", + "f", "g", "h", "i", "į", "y", "j", "k", "l", "m", + "n", "o", "p", "r", "s", "š", "t", "u", "ų", "ū", + "v", "z", "ž", + }, + lower = { + ch = CH, + }, + upper = { + CH = ch, + }, } -entries["lt"] = { - ["a"] = "a", - ["ą"] = "a", - ["b"] = "b", - ["c"] = "c", - [lt_ch] = "c", - ["č"] = "č", - ["d"] = "d", - ["e"] = "e", - ["ę"] = "e", - ["ė"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["į"] = "i", - ["y"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["p"] = "p", - ["r"] = "r", - ["s"] = "s", - ["š"] = "š", - ["t"] = "t", - ["u"] = "u", - ["ų"] = "u", - ["ū"] = "u", - ["v"] = "v", - ["z"] = "z", - ["ž"] = "ž", -} - -mappings["lt"] = { - ["a"] = 1, - ["ą"] = 1, - ["b"] = 2, - ["c"] = 3, - [lt_ch] = 3, - ["č"] = 4, - ["d"] = 5, - ["e"] = 6, - ["ę"] = 6, - ["ė"] = 6, - ["f"] = 7, - ["g"] = 8, - ["h"] = 9, - ["i"] = 10, - ["į"] = 10, - ["y"] = 10, - ["j"] = 11, - ["k"] = 12, - ["l"] = 13, - ["m"] = 14, - ["n"] = 15, - ["o"] = 16, - ["p"] = 17, - ["r"] = 18, - ["s"] = 19, - ["š"] = 20, - ["t"] = 21, - ["u"] = 22, - ["ų"] = 22, - ["ū"] = 22, - ["v"] = 23, - ["z"] = 24, - ["ž"] = 25, -} - -adduppercaseentries ("lt") -adduppercasemappings("lt") - -entries ["lt"][lt_CH] = entries ["lt"][lt_ch] -mappings["lt"][lt_CH] = mappings["lt"][lt_ch] - --- Latvian -replacements["lv"] = { --[[ None, do you miss any? ]] } - -entries["lv"] = { - ["a"] = "a", - ["ā"] = "a", - ["b"] = "b", - ["c"] = "c", - ["č"] = "č", - ["d"] = "d", - ["e"] = "e", - ["ē"] = "e", - ["f"] = "f", - ["g"] = "g", - ["ģ"] = "ģ", - ["h"] = "h", - ["i"] = "i", - ["ī"] = "i", - ["j"] = "j", - ["k"] = "k", - ["ķ"] = "ķ", - ["l"] = "l", - ["ļ"] = "ļ", - ["m"] = "m", - ["n"] = "n", - ["ņ"] = "ņ", - ["o"] = "o", - ["ō"] = "o", - ["p"] = "p", - ["r"] = "r", - ["ŗ"] = "ŗ", - ["s"] = "s", - ["š"] = "š", - ["t"] = "t", - ["u"] = "u", - ["ū"] = "u", - ["v"] = "v", - ["z"] = "z", - ["ž"] = "ž", -} - -mappings["lv"] = { - ["a"] = 1, - ["ā"] = 1, - ["b"] = 2, - ["c"] = 3, - ["č"] = 4, - ["d"] = 5, - ["e"] = 6, - ["ē"] = 6, - ["f"] = 7, - ["g"] = 8, - ["ģ"] = 9, - ["h"] = 10, - ["i"] = 11, - ["ī"] = 11, - ["j"] = 12, - ["k"] = 13, - ["ķ"] = 14, - ["l"] = 15, - ["ļ"] = 16, - ["m"] = 17, - ["n"] = 18, - ["ņ"] = 19, - ["o"] = 20, - ["ō"] = 20, - ["p"] = 21, - ["r"] = 22, - ["ŗ"] = 23, - ["s"] = 24, - ["š"] = 25, - ["t"] = 26, - ["u"] = 27, - ["ū"] = 27, - ["v"] = 28, - ["z"] = 29, - ["ž"] = 30, +definitions["lv"] = { + entries = { + ["a"] = "a", ["ā"] = "a", ["b"] = "b", ["c"] = "c", ["č"] = "č", + ["d"] = "d", ["e"] = "e", ["ē"] = "e", ["f"] = "f", ["g"] = "g", + ["ģ"] = "ģ", ["h"] = "h", ["i"] = "i", ["ī"] = "i", ["j"] = "j", + ["k"] = "k", ["ķ"] = "ķ", ["l"] = "l", ["ļ"] = "ļ", ["m"] = "m", + ["n"] = "n", ["ņ"] = "ņ", ["o"] = "o", ["ō"] = "o", ["p"] = "p", + ["r"] = "r", ["ŗ"] = "ŗ", ["s"] = "s", ["š"] = "š", ["t"] = "t", + ["u"] = "u", ["ū"] = "u", ["v"] = "v", ["z"] = "z", ["ž"] = "ž", + }, + orders = { + "a", "ā", "b", "c", "č", "d", "e", "ē", "f", "g", + "ģ", "h", "i", "ī", "j", "k", "ķ", "l", "ļ", "m", + "n", "ņ", "o", "ō", "p", "r", "ŗ", "s", "š", "t", + "u", "ū", "v", "z", "ž", + } } -adduppercaseentries ("lv") -adduppercasemappings("lv") - --- Hungarian -- Helpful but disturbing: @@ -2464,230 +791,68 @@ adduppercasemappings("lv") -- for sequences like “nny”, “ssz”, and “zsz”. This is left as an exercise to -- the reader…) -local hu_cs = uc(replacementoffset + 1) -local hu_CS = uc(replacementoffset + 2) - -local hu_dz = uc(replacementoffset + 3) -local hu_DZ = uc(replacementoffset + 4) - -local hu_dzs = uc(replacementoffset + 5) -local hu_DZS = uc(replacementoffset + 6) - -local hu_gy = uc(replacementoffset + 7) -local hu_GY = uc(replacementoffset + 8) - -local hu_ly = uc(replacementoffset + 9) -local hu_LY = uc(replacementoffset + 10) - -local hu_ny = uc(replacementoffset + 11) -local hu_NY = uc(replacementoffset + 12) - -local hu_sz = uc(replacementoffset + 13) -local hu_SZ = uc(replacementoffset + 14) - -local hu_ty = uc(replacementoffset + 15) -local hu_TY = uc(replacementoffset + 16) - -local hu_zs = uc(replacementoffset + 17) -local hu_ZS = uc(replacementoffset + 18) - -replacements["hu"] = { - [1] = { "cs", hu_cs }, - [2] = { "dz", hu_dz }, - [3] = { "dzs", hu_dzs }, - [4] = { "gy", hu_gy }, - [5] = { "ly", hu_ly }, - [6] = { "ny", hu_ny }, - [7] = { "sz", hu_sz }, - [8] = { "ty", hu_ty }, - [9] = { "zs", hu_zs }, -} - -entries["hu"] = { - ["a"] = "a", - ["á"] = "a", - ["b"] = "b", - ["c"] = "c", - [hu_cs] = "cs", - ["d"] = "d", - [hu_dz] = "dz", - [hu_dzs] = "dzs", - ["e"] = "e", - ["é"] = "e", - ["f"] = "f", - ["g"] = "g", - [hu_gy] = "gy", - ["h"] = "h", - ["i"] = "i", - ["í"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - [hu_ly] = "ly", - ["m"] = "m", - ["n"] = "n", - [hu_ny] = "ny", - ["o"] = "o", - ["ó"] = "o", - ["ö"] = "ö", - ["ő"] = "ö", - ["p"] = "p", - ["q"] = "q", - ["r"] = "r", - ["s"] = "s", - [hu_sz] = "sz", - ["t"] = "t", - [hu_ty] = "ty", - ["u"] = "u", - ["ú"] = "u", - ["ü"] = "ü", - ["ű"] = "ü", - ["v"] = "v", - ["w"] = "w", - ["x"] = "x", - ["y"] = "y", - ["z"] = "z", - [hu_zs] = "zs", -} - -mappings["hu"] = { - ["a"] = 1, - ["á"] = 1, -- -> a - ["b"] = 2, - ["c"] = 3, - [hu_cs] = 4, - ["d"] = 5, - [hu_dz] = 6, - [hu_dzs] = 7, - ["e"] = 8, - ["é"] = 8, -- -> e - ["f"] = 9, - ["g"] = 10, - [hu_gy] = 11, - ["h"] = 12, - ["i"] = 13, - ["í"] = 13, -- -> i - ["j"] = 14, - ["k"] = 15, - ["l"] = 16, - [hu_ly] = 17, - ["m"] = 18, - ["n"] = 19, - [hu_ny] = 20, - ["o"] = 21, - ["ó"] = 21, -- -> o - ["ö"] = 22, - ["ő"] = 22, -- -> ö - ["p"] = 23, - ["q"] = 24, - ["r"] = 25, - ["s"] = 26, - [hu_sz] = 27, - ["t"] = 28, - [hu_ty] = 29, - ["u"] = 30, - ["ú"] = 30, -- -> u - ["ü"] = 31, - ["ű"] = 31, -- -> ü - ["v"] = 32, - ["w"] = 33, - ["x"] = 34, - ["y"] = 35, - ["z"] = 36, - [hu_zs] = 37, +local cs, CS = utfchar(replacementoffset + 1), utfchar(replacementoffset + 11) +local dz, DZ = utfchar(replacementoffset + 2), utfchar(replacementoffset + 12) +local dzs, DZS = utfchar(replacementoffset + 3), utfchar(replacementoffset + 13) +local gy, GY = utfchar(replacementoffset + 4), utfchar(replacementoffset + 14) +local ly, LY = utfchar(replacementoffset + 5), utfchar(replacementoffset + 15) +local ny, NY = utfchar(replacementoffset + 6), utfchar(replacementoffset + 16) +local sz, SZ = utfchar(replacementoffset + 7), utfchar(replacementoffset + 17) +local ty, TY = utfchar(replacementoffset + 8), utfchar(replacementoffset + 18) +local zs, ZS = utfchar(replacementoffset + 9), utfchar(replacementoffset + 19) + +definitions["hu"] = { + replacements = { + { "cs", cs }, { "CS", CS }, + { "dz", dz }, { "DZ", DZ }, + { "dzs", dzs }, { "DZS", DZS }, + { "gy", gy }, { "GY", GY }, + { "ly", ly }, { "LY", LY }, + { "ny", ny }, { "NY", NY }, + { "sz", sz }, { "SZ", SZ }, + { "ty", ty }, { "TY", TY }, + { "zs", zs }, { "ZS", ZS }, + }, + entries = { + ["a"] = "a", ["á"] = "a", ["b"] = "b", ["c"] = "c", [cs ] = "cs", + ["d"] = "d", [dz ] = "dz", [dzs] = "dzs", ["e"] = "e", ["é"] = "e", + ["f"] = "f", ["g"] = "g", [gy ] = "gy", ["h"] = "h", ["i"] = "i", + ["í"] = "i", ["j"] = "j", ["k"] = "k", ["l"] = "l", [ly ] = "ly", + ["m"] = "m", ["n"] = "n", [ny ] = "ny", ["o"] = "o", ["ó"] = "o", + ["ö"] = "ö", ["ő"] = "ö", ["p"] = "p", ["q"] = "q", ["r"] = "r", + ["s"] = "s", [sz ] = "sz", ["t"] = "t", [ty ] = "ty", ["u"] = "u", + ["ú"] = "u", ["ü"] = "ü", ["ű"] = "ü", ["v"] = "v", ["w"] = "w", + ["x"] = "x", ["y"] = "y", ["z"] = "z", [zs ] = "zs", + }, + orders = { + "a", "á", "b", "c", cs, "d", dz, dzs, "e", "é", + "f", "g", gy, "h", "i", "í", "j", "k", "l", ly, + "m", "n", ny, "o", "ó", "ö", "ő", "p", "q", "r", + "s", sz, "t", ty, "u", "ú", "ü", "ű", "v", "w", + "x", "y", "z", zs, + }, + lower = { + CS = cs, DZ = dz, DZS = dzs, GY = gy, LY = ly, NY = ny, SZ = sz, TY = ty, ZS = zs, + }, + upper = { + cs = CS, dz = DZ, dzs = DZS, gy = GY, ly = LY, ny = NY, sz = SZ, ty = TY, zs = ZS, + }, } -adduppercaseentries ("hu") -adduppercasemappings("hu") - -entries ["hu"] [hu_CS] = entries ["hu"] [hu_cs] -mappings["hu"] [hu_CS] = mappings["hu"] [hu_cs] -entries ["hu"] [hu_DZ] = entries ["hu"] [hu_dz] -mappings["hu"] [hu_DZ] = mappings["hu"] [hu_dz] -entries ["hu"][hu_DZS] = entries ["hu"][hu_dzs] -mappings["hu"][hu_DZS] = mappings["hu"][hu_dzs] -entries ["hu"] [hu_GY] = entries ["hu"] [hu_gy] -mappings["hu"] [hu_GY] = mappings["hu"] [hu_gy] -entries ["hu"] [hu_LY] = entries ["hu"] [hu_ly] -mappings["hu"] [hu_LY] = mappings["hu"] [hu_ly] -entries ["hu"] [hu_NY] = entries ["hu"] [hu_ny] -mappings["hu"] [hu_NY] = mappings["hu"] [hu_ny] -entries ["hu"] [hu_SZ] = entries ["hu"] [hu_sz] -mappings["hu"] [hu_SZ] = mappings["hu"] [hu_sz] -entries ["hu"] [hu_TY] = entries ["hu"] [hu_ty] -mappings["hu"] [hu_TY] = mappings["hu"] [hu_ty] -entries ["hu"] [hu_ZS] = entries ["hu"] [hu_zs] -mappings["hu"] [hu_ZS] = mappings["hu"] [hu_zs] - --- Estonian -replacements["et"] = { --[[ None, do you miss any? ]] } - -entries["et"] = { - ["a"] = "a", - ["b"] = "b", - ["d"] = "d", - ["e"] = "e", - ["f"] = "f", - ["g"] = "g", - ["h"] = "h", - ["i"] = "i", - ["j"] = "j", - ["k"] = "k", - ["l"] = "l", - ["m"] = "m", - ["n"] = "n", - ["o"] = "o", - ["p"] = "p", - ["r"] = "r", - ["s"] = "s", - ["š"] = "š", - ["z"] = "z", - ["ž"] = "ž", - ["t"] = "t", - ["u"] = "u", - ["v"] = "v", - ["w"] = "v", -- foreign words only - ["õ"] = "õ", - ["ä"] = "ä", - ["ö"] = "ö", - ["ü"] = "ü", - ["x"] = "x", --foreign words only - ["y"] = "y", --foreign words only +definitions["et"] = { + entries = { -- w x y are used for foreign words only + ["a"] = "a", ["b"] = "b", ["d"] = "d", ["e"] = "e", ["f"] = "f", + ["g"] = "g", ["h"] = "h", ["i"] = "i", ["j"] = "j", ["k"] = "k", + ["l"] = "l", ["m"] = "m", ["n"] = "n", ["o"] = "o", ["p"] = "p", + ["r"] = "r", ["s"] = "s", ["š"] = "š", ["z"] = "z", ["ž"] = "ž", + ["t"] = "t", ["u"] = "u", ["v"] = "v", ["w"] = "v", ["õ"] = "õ", + ["ä"] = "ä", ["ö"] = "ö", ["ü"] = "ü", ["x"] = "x", ["y"] = "y", + }, + orders = { + "a", "b", "d", "e", "f", "g", "h", "i", "j", "k", + "l", "m", "n", "o", "p", "r", "s", "š", "z", "ž", + "t", "u", "v", "w", "õ", "ä", "ö", "ü", "x", "y", + } } - -mappings["et"] = { - ["a"] = 1, - ["b"] = 2, - ["d"] = 3, - ["e"] = 4, - ["f"] = 5, - ["g"] = 6, - ["h"] = 7, - ["i"] = 8, - ["j"] = 9, - ["k"] = 10, - ["l"] = 11, - ["m"] = 12, - ["n"] = 13, - ["o"] = 14, - ["p"] = 15, - ["r"] = 16, - ["s"] = 17, - ["š"] = 18, - ["z"] = 19, - ["ž"] = 20, - ["t"] = 21, - ["u"] = 22, - ["v"] = 23, - ["w"] = 23, - ["õ"] = 24, - ["ä"] = 25, - ["ö"] = 26, - ["ü"] = 27, - ["x"] = 28, - ["y"] = 29, -} - -adduppercaseentries ("et") -adduppercasemappings("et") diff --git a/tex/context/base/strc-ref.lua b/tex/context/base/strc-ref.lua index 9345dc3bf..bced781fd 100644 --- a/tex/context/base/strc-ref.lua +++ b/tex/context/base/strc-ref.lua @@ -7,7 +7,7 @@ if not modules then modules = { } end modules ['strc-ref'] = { } local format, find, gmatch, match, concat = string.format, string.find, string.gmatch, string.match, table.concat -local lpegmatch = lpeg.match +local lpegmatch, lpegP, lpegCs = lpeg.match, lpeg.P, lpeg.Cs local texsprint, texwrite, texcount, texsetcount = tex.sprint, tex.write, tex.count, tex.setcount local allocate, mark = utilities.storage.allocate, utilities.storage.mark @@ -80,7 +80,7 @@ function references.registerfinalizer(func) -- we could use a token register ins finalizers[#finalizers+1] = func end -local function initializer() +local function initializer() -- can we use a tobesaved as metatable for collected? tobesaved = mark(references.tobesaved) collected = mark(references.collected) for i=1,#initializers do @@ -163,9 +163,9 @@ end local result = { } -local lparent, rparent, lbrace, rbrace, dcolon, backslash = lpeg.P("("), lpeg.P(")"), lpeg.P("{"), lpeg.P("}"), lpeg.P("::"), lpeg.P("\\") +local lparent, rparent, lbrace, rbrace, dcolon, backslash = lpegP("("), lpegP(")"), lpegP("{"), lpegP("}"), lpegP("::"), lpegP("\\") -local reset = lpeg.P("") / function() result = { } end +local reset = lpegP("") / function() result = { } end local b_token = backslash / function(s) result.has_tex = true return s end local o_token = 1 - rparent - rbrace - lparent - lbrace @@ -174,11 +174,11 @@ local s_token = 1 - lparent - lbrace - lparent - lbrace local i_token = 1 - lparent - lbrace local f_token = 1 - lparent - lbrace - dcolon -local outer = (f_token )^1 / function (s) result.outer = s end -local operation = lpeg.Cs((b_token + o_token)^1) / function (s) result.operation = s end -local arguments = lpeg.Cs((b_token + a_token)^0) / function (s) result.arguments = s end -local special = (s_token )^1 / function (s) result.special = s end -local inner = (i_token )^1 / function (s) result.inner = s end +local outer = (f_token )^1 / function (s) result.outer = s end +local operation = lpegCs((b_token + o_token)^1) / function (s) result.operation = s end +local arguments = lpegCs((b_token + a_token)^0) / function (s) result.arguments = s end +local special = (s_token )^1 / function (s) result.special = s end +local inner = (i_token )^1 / function (s) result.inner = s end local outer_reference = (outer * dcolon)^0 @@ -244,6 +244,7 @@ function references.resolvers.reference(var) end local function register_from_lists(collected,derived) + local g = derived[""] if not g then g = { } derived[""] = g end -- global for i=1,#collected do local entry = collected[i] local m, r = entry.metadata, entry.references @@ -258,7 +259,8 @@ local function register_from_lists(collected,derived) if trace_referencing then report_references("list entry %s provides %s reference '%s' on realpage %s",i,kind,s,realpage) end - d[s] = t -- share them + d[s] = d[s] or t -- share them + g[s] = g[s] or t -- first wins end end end @@ -632,7 +634,9 @@ end -- t.special t.operation t.arguments t.outer t.inner -local prefixsplitter = lpeg.splitat(":") +-- to what extend do we check the non prefixed variant + +local strict = false local function resolve(prefix,reference,args,set) -- we start with prefix,reference texcount.referencehastexstate = 0 @@ -646,29 +650,39 @@ local function resolve(prefix,reference,args,set) -- we start with prefix,refere local r = settings_to_array(reference) for i=1,#r do local ri = r[i] - local dp = defined[prefix] or defined[""] - local d = dp[ri] + local d + if strict then + d = defined[prefix] or defined[""] + d = d and d[ri] + else + d = defined[prefix] + d = d and d[ri] + if not d then + d = defined[""] + d = d and d[ri] + end + end if d then resolve(prefix,d[2],nil,set) else local var = lpegmatch(scanner,ri) if var then var.reference = ri - if not var.outer and var.inner then - local d = defined[prefix] - d = d and d[var.inner] ---~ if not d then ---~ local p, r = lpegmatch(prefixsplitter,var.inner) ---~ d = defined[p] ---~ d = d and d[r] ---~ print(p,r,d) ---~ table.print(defined) ---~ end - if not d then - d = defined[""] - d = d and d[var.inner] + local vo, vi = var.outer, var.inner + if not vo and vi then + -- to be checked + if strict then + d = defined[prefix] or defined[""] + d = d and d[vi] + else + d = defined[prefix] + d = d and d[vi] + if not d then + d = defined[""] + d = d and d[vi] + end end - d = d and d[var.inner] + -- if d then resolve(prefix,d[2],var.arguments,set) -- args can be nil else @@ -690,6 +704,7 @@ local function resolve(prefix,reference,args,set) -- we start with prefix,refere if set.has_tex then texcount.referencehastexstate = 1 end +--~ table.print(set) return set else return { } @@ -746,6 +761,8 @@ end --~ end --~ end +local prefixsplitter = lpegCs(lpegP((1-lpegP(":"))^1 * lpegP(":"))) * lpegCs(lpegP(1)^1) + local function identify(prefix,reference) local set = resolve(prefix,reference) local bug = false @@ -899,47 +916,91 @@ local function identify(prefix,reference) var.error = "unknown inner or special" end else - -- inner ---~ local i = tobesaved[prefix] - local i = collected[prefix] - i = i and i[inner] + -- inner ... we could move the prefix logic into the parser so that we have 'm for each entry + -- foo:bar -> foo == prefix (first we try the global one) + -- -:bar -> ignore prefix + local p, i = prefix, nil + local splitprefix, splitinner = lpegmatch(prefixsplitter,inner) + -- these are taken from other anonymous references + if splitprefix and splitinner then + if splitprefix == "-" then + i = collected[""] + i = i and i[splitinner] + if i then + p = "" + end + else + i = collected[splitprefix] + i = i and i[splitinner] + if i then + p = splitprefix + end + end + else + i = collected[prefix] + i = i and i[inner] + if i then + p = prefix + end + end if i then var.i = { "reference", i } references.resolvers.reference(var) var.kind = "inner" - var.p = prefix + var.p = p else - i = derived[prefix] - i = i and i[inner] + -- these are taken from other data structures (like lists) +--~ print("!!!!!!!!!!!!!!",splitprefix,splitinner) +--~ table.print(derived) + if splitprefix and splitinner then + if splitprefix == "-" then + i = derived[""] + i = i and i[splitinner] + if i then + p = "" + end + else + i = derived[splitprefix] + i = i and i[splitinner] + if i then + p = splitprefix + end + end + else + i = derived[prefix] + i = i and i[inner] + if i then + p = prefix + end + end + if not i and prefix ~= "" then + i = derived[""] + i = i and i[inner] + if i then + p = "" + end + end if i then var.kind = "inner" var.i = i references.resolvers[i[1]](var) - var.p = prefix + var.p = p else - i = collected[prefix] - i = i and i[inner] - if i then - var.kind = "inner" - var.i = { "reference", i } - references.resolvers.reference(var) - var.p = prefix + -- no prefixes here + local s = specials[inner] + if s then + var.kind = "special" else - local s = specials[inner] - if s then - var.kind = "special" + i = (collected[""] and collected[""][inner]) or + (derived [""] and derived [""][inner]) or + (tobesaved[""] and tobesaved[""][inner]) + if i then + var.kind = "inner" + var.i = { "reference", i } + references.resolvers.reference(var) + var.p = "" else - i = (collected[""] and collected[""][inner]) or - (derived [""] and derived [""][inner]) or - (tobesaved[""] and tobesaved[""][inner]) - if i then - var.kind = "inner" - var.i = { "reference", i } - references.resolvers.reference(var) - var.p = "" - else - var.error = "unknown inner or special" - end + var.error = "unknown inner or special" end end end diff --git a/tex/context/base/strc-ref.mkii b/tex/context/base/strc-ref.mkii index 1df5d09d8..4550f7144 100644 --- a/tex/context/base/strc-ref.mkii +++ b/tex/context/base/strc-ref.mkii @@ -44,22 +44,6 @@ \unprotect -% messages moved - -% messages moved - -% messages moved - -% messages moved - -% messages moved - -% messages moved - -% messages moved - -% messages moved - %D This module deals with referencing. In \CONTEXT\ referencing %D is one of the core features, although at a first glance %D probably nobody will notice. This is good, because @@ -786,7 +770,7 @@ %D \starttyping %D \pagereference[important] %D \pagereference[unimportant] -%D \setupreferencing[prefixprefix=sidetrack] +%D \setupreferencing[prefix=sidetrack] %D \pagereference[important] %D \stoptyping %D diff --git a/tex/context/base/strc-reg.lua b/tex/context/base/strc-reg.lua index 1e9781b38..23ce4d221 100644 --- a/tex/context/base/strc-reg.lua +++ b/tex/context/base/strc-reg.lua @@ -418,7 +418,7 @@ function registers.analyzed(class,options) local data = collected[class] if data and data.entries then options = options or { } - sorters.setlanguage(options.language) + sorters.setlanguage(options.language,options.method) registers.filter(data,options) -- filter entries into results (criteria) registers.prepare(data,options) -- adds split table parallel to list table registers.sort(data,options) -- sorts results diff --git a/tex/context/base/strc-reg.mkiv b/tex/context/base/strc-reg.mkiv index 118ba2a35..37c849790 100644 --- a/tex/context/base/strc-reg.mkiv +++ b/tex/context/base/strc-reg.mkiv @@ -122,6 +122,7 @@ \c!pagesegments=2:2, \c!file=\jobname, %\c!deeptextcommand=, % undefined by default ! + \c!method=, % no default as we have them in the module, maybe some day in lang-* \s!language=\currentmainlanguage]% %D Definition: @@ -443,6 +444,8 @@ %D Rendering: +% todo: c!language ipv s!language + \let\utilityregisterlength\!!zerocount \def\determineregistercharacteristics @@ -454,6 +457,7 @@ \setupregister[\currentregister][#2]% \normalexpanded{\endgroup\noexpand\xdef\noexpand\utilityregisterlength{\ctxlua{structures.registers.analyze('\currentregister',{ language = "\registerparameter\s!language", + method = "\registerparameter\c!method", compress = "\registerparameter\c!compress", criterium = "\registerparameter\c!criterium", })}}}% brrr @@ -487,6 +491,7 @@ \startpacked[\v!blank]% \ctxlua{structures.registers.process('\currentregister',{ language = "\registerparameter\s!language", + method = "\registerparameter\c!method", compress = "\registerparameter\c!compress", criterium = "\registerparameter\c!criterium", }, diff --git a/tex/context/base/strc-sec.mkiv b/tex/context/base/strc-sec.mkiv index b2b962183..2a6d25631 100644 --- a/tex/context/base/strc-sec.mkiv +++ b/tex/context/base/strc-sec.mkiv @@ -287,22 +287,35 @@ \let\structureheadprefix\empty \def\structureheadprefixplus{+} % When do we reset the referenceprefix? This needs to be checked. Does it work -% at all? +% at all? Todo: pushpop. + +% \def\setstructureheadreference +% {\edef\structureheadreference {\structureheadparameter\c!reference}% +% \edef\structureheadreferenceprefix{\structureheadparameter\c!prefix}% +% \ifx\structureheadreferenceprefix\empty +% \ifx\currentrefenceprefix\structureheadreferenceprefixplus +% \setupreferenceprefix[]% yes or no? only when set by structure +% \fi +% \else\ifx\structureheadreferenceprefix\structureheadreferenceprefixplus +% \ifx\structureheadreference\empty +% \expanded{\setupreferenceprefix[\structureheadreferenceprefixplus]}% +% \else +% \expanded{\setupreferenceprefix[\structureheadreference]}% we assume just one reference +% \fi +% \else +% \expanded{\setupreferenceprefix[\structureheadreferenceprefix]}% +% \fi\fi} + +% pop in stopsection commands unless empty \def\setstructureheadreference {\edef\structureheadreference {\structureheadparameter\c!reference}% \edef\structureheadreferenceprefix{\structureheadparameter\c!prefix}% \ifx\structureheadreferenceprefix\empty - \setupreferenceprefix[]% yes or no? - \else\ifx\structureheadreferenceprefix\structureheadreferenceprefixplus - \ifx\structureheadreference\empty - \expanded{\setupreferenceprefix[\structureheadreferenceprefixplus]}% - \else - \expanded{\setupreferenceprefix[\structureheadreference]}% we assume just one reference - \fi + \let\structureheadreferenceprefix\referenceprefix \else - \expanded{\setupreferenceprefix[\structureheadreferenceprefix]}% - \fi\fi} + \setupreferenceprefix[\structureheadreferenceprefix]% currenty no pop + \fi} \setvalue{\??nh:\c!incrementnumber:\v!yes }{\settrue \structureheaddoincrement\settrue \structureheadtolist} \setvalue{\??nh:\c!incrementnumber:\v!no }{\setfalse\structureheaddoincrement\setfalse\structureheadtolist} diff --git a/tex/context/base/strc-syn.mkiv b/tex/context/base/strc-syn.mkiv index 1c59ad7a4..d46edaa39 100644 --- a/tex/context/base/strc-syn.mkiv +++ b/tex/context/base/strc-syn.mkiv @@ -96,6 +96,7 @@ %\c!after=, \c!indentnext=\v!no, %\c!expansion=, + \c!method=, \s!language=\currentmainlanguage] \unexpanded\def\definesynonyms @@ -200,7 +201,11 @@ \c!color=. #2]% \startpacked - \ctxlua{structures.synonyms.process('#1',{ criterium = "\synonymparameter\c!criterium" })}% + \ctxlua{structures.synonyms.process('#1',{ + criterium = "\synonymparameter\c!criterium", + language = "\synonymparameter\s!language", + method = "\synonymparameter\c!method", + })}% \stoppacked \endgroup} @@ -263,6 +268,7 @@ %\c!before=, \c!after=\endgraf, %\c!expansion=, + \c!method=, \s!language=\currentmainlanguage] \unexpanded\def\definesorting @@ -344,7 +350,11 @@ \def\currentsorting{#1}% \getparameters[\??so#1][#2]% \startpacked - \ctxlua{structures.synonyms.process('#1',{ criterium = "\sortingparameter\c!criterium" })}% + \ctxlua{structures.synonyms.process('#1',{ + criterium = "\sortingparameter\c!criterium", + language = "\sortingparameter\s!language", + method = "\sortingparameter\c!method", + })}% \stoppacked \endgroup} diff --git a/tex/generic/context/luatex-fonts-merged.lua b/tex/generic/context/luatex-fonts-merged.lua index 7718f984a..4c221ae12 100644 --- a/tex/generic/context/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 09/18/10 12:08:38 +-- merge date : 09/22/10 09:35:53 do -- begin closure to overcome local limits and interference @@ -736,7 +736,7 @@ local function sortedkeys(tab) elseif tkey == "number" then -- if kind == 1 then kind = 3 else kind = 2 end kind = (kind == 1 and 3) or 2 - else + else -- if tkey then kind = 3 end end @@ -752,7 +752,9 @@ end local function sortedhashkeys(tab) -- fast one local srt = { } for key,_ in next, tab do - srt[#srt+1] = key + if key then + srt[#srt+1] = key + end end sort(srt) return srt -- cgit v1.2.3