summaryrefslogtreecommitdiff
path: root/tex/context/base/mkxl/regi-ini.lmt
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkxl/regi-ini.lmt')
-rw-r--r--tex/context/base/mkxl/regi-ini.lmt367
1 files changed, 367 insertions, 0 deletions
diff --git a/tex/context/base/mkxl/regi-ini.lmt b/tex/context/base/mkxl/regi-ini.lmt
new file mode 100644
index 000000000..c0cd4f1c8
--- /dev/null
+++ b/tex/context/base/mkxl/regi-ini.lmt
@@ -0,0 +1,367 @@
+if not modules then modules = { } end modules ['regi-ini'] = {
+ version = 1.001,
+ comment = "companion to regi-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+--[[ldx--
+<p>Regimes take care of converting the input characters into
+<l n='utf'/> sequences. The conversion tables are loaded at
+runtime.</p>
+--ldx]]--
+
+local tostring = tostring
+local utfchar = utf.char
+local P, Cs, Cc, lpegmatch = lpeg.P, lpeg.Cs, lpeg.Cc, lpeg.match
+local char, gsub, format, gmatch, byte, match, lower = string.char, string.gsub, string.format, string.gmatch, string.byte, string.match, string.lower
+local next = next
+local insert, remove, fastcopy = table.insert, table.remove, table.fastcopy
+local concat = table.concat
+local totable = string.totable
+
+local allocate = utilities.storage.allocate
+local sequencers = utilities.sequencers
+local textlineactions = resolvers.openers.helpers.textlineactions
+local setmetatableindex = table.setmetatableindex
+
+-- We will hook regime handling code into the input methods.
+
+local trace_translating = false trackers.register("regimes.translating", function(v) trace_translating = v end)
+
+local report_loading = logs.reporter("regimes","loading")
+local report_translating = logs.reporter("regimes","translating")
+
+regimes = regimes or { }
+local regimes = regimes
+
+local mapping = allocate {
+ utf = false
+}
+
+local backmapping = allocate {
+}
+
+-- regimes.mapping = mapping
+
+local synonyms = { -- backward compatibility list
+
+ ["windows-1250"] = "cp1250",
+ ["windows-1251"] = "cp1251",
+ ["windows-1252"] = "cp1252",
+ ["windows-1253"] = "cp1253",
+ ["windows-1254"] = "cp1254",
+ ["windows-1255"] = "cp1255",
+ ["windows-1256"] = "cp1256",
+ ["windows-1257"] = "cp1257",
+ ["windows-1258"] = "cp1258",
+
+ ["il1"] = "8859-1",
+ ["il2"] = "8859-2",
+ ["il3"] = "8859-3",
+ ["il4"] = "8859-4",
+ ["il5"] = "8859-9",
+ ["il6"] = "8859-10",
+ ["il7"] = "8859-13",
+ ["il8"] = "8859-14",
+ ["il9"] = "8859-15",
+ ["il10"] = "8859-16",
+
+ ["iso-8859-1"] = "8859-1",
+ ["iso-8859-2"] = "8859-2",
+ ["iso-8859-3"] = "8859-3",
+ ["iso-8859-4"] = "8859-4",
+ ["iso-8859-9"] = "8859-9",
+ ["iso-8859-10"] = "8859-10",
+ ["iso-8859-13"] = "8859-13",
+ ["iso-8859-14"] = "8859-14",
+ ["iso-8859-15"] = "8859-15",
+ ["iso-8859-16"] = "8859-16",
+
+ ["latin1"] = "8859-1",
+ ["latin2"] = "8859-2",
+ ["latin3"] = "8859-3",
+ ["latin4"] = "8859-4",
+ ["latin5"] = "8859-9",
+ ["latin6"] = "8859-10",
+ ["latin7"] = "8859-13",
+ ["latin8"] = "8859-14",
+ ["latin9"] = "8859-15",
+ ["latin10"] = "8859-16",
+
+ ["utf-8"] = "utf",
+ ["utf8"] = "utf",
+ [""] = "utf",
+
+ ["windows"] = "cp1252",
+
+ ["pdf"] = "pdfdoc",
+
+ ["437"] = "ibm",
+}
+
+local currentregime = "utf"
+
+local function loadregime(mapping,regime)
+ regime = lower(tostring(regime))
+ regime = synonyms[regime] or synonyms["windows-"..regime] or regime
+ local name = resolvers.findfile(format("regi-%s.lua",regime)) or ""
+ local data = name ~= "" and dofile(name)
+ if data then
+ vector = { }
+ for eightbit, unicode in next, data do
+ vector[char(eightbit)] = utfchar(unicode)
+ end
+ report_loading("vector %a is loaded",regime)
+ else
+ vector = false
+ report_loading("vector %a is unknown",regime)
+ end
+ mapping[regime] = vector
+ return vector
+end
+
+local function loadreverse(t,k)
+ local t = { }
+ local m = mapping[k]
+ if m then
+ for k, v in next, m do
+ t[v] = k
+ end
+ end
+ backmapping[k] = t
+ return t
+end
+
+setmetatableindex(mapping, loadregime)
+setmetatableindex(backmapping,loadreverse)
+
+regimes.mapping = mapping
+regimes.backmapping = backmapping
+
+local function fromregime(regime,line)
+ if line and #line > 0 then
+ -- local map = mapping[regime and synonyms[regime] or regime or currentregime]
+ local map = mapping[regime or currentregime]
+ if map then
+ line = gsub(line,".",map)
+ end
+ end
+ return line
+end
+
+local cache = { } -- if really needed we can copy vectors and hash defaults
+
+setmetatableindex(cache, function(t,k)
+ local v = { remappers = { } }
+ t[k] = v
+ return v
+end)
+
+local function toregime(vector,str,default) -- toregime('8859-1',"abcde Ä","?")
+ local d = default or "?"
+ local c = cache[vector].remappers
+ local r = c[d]
+ if not r then
+ local t = fastcopy(backmapping[vector])
+ -- r = utf.remapper(t) -- not good for defaults here
+ local pattern = Cs((lpeg.utfchartabletopattern(t)/t + lpeg.patterns.utf8character/d + P(1)/d)^0)
+ r = function(str)
+ if not str or str == "" then
+ return ""
+ else
+ return lpegmatch(pattern,str)
+ end
+ end
+ c[d] = r
+ end
+ return r(str)
+end
+
+local function disable()
+ currentregime = "utf"
+ sequencers.disableaction(textlineactions,"regimes.process")
+ return currentregime
+end
+
+local function enable(regime)
+ regime = synonyms[regime] or regime
+ if mapping[regime] == false then
+ disable()
+ else
+ currentregime = regime
+ sequencers.enableaction(textlineactions,"regimes.process")
+ end
+ return currentregime
+end
+
+regimes.toregime = toregime
+regimes.fromregime = fromregime
+regimes.translate = function(str,regime) return fromregime(regime,str) end
+regimes.enable = enable
+regimes.disable = disable
+
+-- The following function can be used when we want to make sure that utf gets passed
+-- unharmed. This is needed for modules.
+
+local level = 0
+
+function regimes.process(str,filename,currentline,noflines,coding)
+ if level == 0 and coding ~= "utf-8" then
+ str = fromregime(currentregime,str)
+ if trace_translating then
+ report_translating("utf: %s",str)
+ end
+ end
+ return str
+end
+
+local function push()
+ level = level + 1
+ if trace_translating then
+ report_translating("pushing level %s",level)
+ end
+end
+
+local function pop()
+ if level > 0 then
+ if trace_translating then
+ report_translating("popping level %s",level)
+ end
+ level = level - 1
+ end
+end
+
+regimes.push = push
+regimes.pop = pop
+
+function regimes.list()
+ local name = resolvers.findfile(format("regi-ini.lua",regime)) or ""
+ local okay = { }
+ if name then
+ local list = dir.glob(file.join(file.dirname(name),"regi-*.lua"))
+ for i=1,#list do
+ local name = list[i]
+ if name ~= "regi-ini.lua" then
+ okay[#okay+1] = match(name,"regi%-(.-)%.lua")
+ end
+ table.sort(okay)
+ end
+ end
+ return okay
+end
+
+sequencers.prependaction(textlineactions,"system","regimes.process")
+sequencers.disableaction(textlineactions,"regimes.process")
+
+-- Next we provide some hacks. Unfortunately we run into crappy encoded (read:
+-- mixed) encoded xml files that have these ë ä ö ü sequences instead of ë ä ö ü
+-- etc.
+
+local patterns = { }
+
+function regimes.cleanup(regime,str)
+ if not str or str == "" then
+ return str
+ end
+ local p = patterns[regime]
+ if p == nil then
+ regime = regime and synonyms[regime] or regime or currentregime
+ local vector = regime ~= "utf" and regime ~= "utf-8" and mapping[regime]
+ if vector then
+ local mapping = { }
+ for k, v in next, vector do
+ local split = totable(v)
+ for i=1,#split do
+ split[i] = utfchar(byte(split[i]))
+ end
+ split = concat(split)
+ if v ~= split then
+ mapping[split] = v
+ end
+ end
+ p = Cs((lpeg.utfchartabletopattern(mapping)/mapping+P(1))^0)
+ else
+ p = false
+ end
+ patterns[regime] = p
+ end
+ return p and lpegmatch(p,str) or str
+end
+
+-- local old = [[test ë ä ö ü crap]]
+-- local new = regimes.cleanup("cp1252",old)
+-- report_translating("%s -> %s",old,new)
+-- local old = "Pozn" .. char(0xE1) .. "mky"
+-- local new = fromregime("cp1250",old)
+-- report_translating("%s -> %s",old,new)
+
+-- interface (might move to regi-tex.lua)
+
+if interfaces then
+
+ local implement = interfaces.implement
+ local setmacro = interfaces.setmacro
+
+ implement {
+ name = "enableregime",
+ public = true,
+ protected = true,
+ arguments = "optional",
+ actions = function(regime) setmacro("currentregime",enable(regime)) end
+ }
+
+ implement {
+ name = "disableregime",
+ public = true,
+ protected = true,
+ actions = function() setmacro("currentregime",disable()) end
+ }
+
+ implement {
+ name = "pushregime",
+ public = true,
+ protected = true,
+ actions = push
+ }
+
+ implement {
+ name = "popregime",
+ public = true,
+ protected = true,
+ actions = pop
+ }
+
+ local stack = { }
+
+ implement {
+ name = "startregime",
+ public = true,
+ protected = true,
+ arguments = "optional",
+ actions = function(regime)
+ insert(stack,currentregime)
+ if trace_translating then
+ report_translating("start using %a",regime)
+ end
+ setmacro("currentregime",enable(regime))
+ end
+ }
+
+ implement {
+ name = "stopregime",
+ public = true,
+ protected = true,
+ actions = function()
+ if #stack > 0 then
+ local regime = remove(stack)
+ if trace_translating then
+ report_translating("stop using %a",regime)
+ end
+ setmacro("currentregime",enable(regime))
+ end
+ end
+ }
+
+end