diff options
Diffstat (limited to 'tex/context/base/regi-ini.lua')
-rw-r--r-- | tex/context/base/regi-ini.lua | 139 |
1 files changed, 79 insertions, 60 deletions
diff --git a/tex/context/base/regi-ini.lua b/tex/context/base/regi-ini.lua index d5d278b16..63f45a0b1 100644 --- a/tex/context/base/regi-ini.lua +++ b/tex/context/base/regi-ini.lua @@ -243,8 +243,12 @@ end regimes.push = push regimes.pop = pop -sequencers.prependaction(textlineactions,"system","regimes.process") -sequencers.disableaction(textlineactions,"regimes.process") +if sequencers then + + sequencers.prependaction(textlineactions,"system","regimes.process") + sequencers.disableaction(textlineactions,"regimes.process") + +end -- interface: @@ -311,48 +315,82 @@ local patterns = { } -- -- twice as fast and much less lpeg bytecode +-- function regimes.cleanup(regime,str) +-- if not str or str == "" then +-- return str +-- end +-- local p = patterns[regime] +-- if p == nil then +-- regime = regime and synonyms[regime] or regime or currentregime +-- local vector = regime ~= "utf" and regime ~= "utf-8" and mapping[regime] +-- if vector then +-- local utfchars = { } +-- local firsts = { } +-- for k, uchar in next, vector do +-- local stream = { } +-- local split = totable(uchar) +-- local nofsplits = #split +-- if nofsplits > 1 then +-- local first +-- for i=1,nofsplits do +-- local u = vector[split[i]] +-- if not first then +-- first = firsts[u] +-- if not first then +-- first = { } +-- firsts[u] = first +-- end +-- end +-- stream[i] = u +-- end +-- local nofstream = #stream +-- if nofstream > 1 then +-- first[#first+1] = concat(stream,2,nofstream) +-- utfchars[concat(stream)] = uchar +-- end +-- end +-- end +-- p = P(false) +-- for k, v in next, firsts do +-- local q = P(false) +-- for i=1,#v do +-- q = q + P(v[i]) +-- end +-- p = p + P(k) * q +-- end +-- p = Cs(((p+1)/utfchars)^1) +-- -- lpeg.print(p) -- size: 1042 +-- else +-- p = false +-- end +-- patterns[regime] = p +-- end +-- return p and lpegmatch(p,str) or str +-- end +-- +-- 5 times faster: + function regimes.cleanup(regime,str) + if not str or str == "" then + return str + end local p = patterns[regime] if p == nil then regime = regime and synonyms[regime] or regime or currentregime - local vector = regime ~= "utf" and mapping[regime] + local vector = regime ~= "utf" and regime ~= "utf-8" and mapping[regime] if vector then - local utfchars = { } - local firsts = { } - for k, uchar in next, vector do - local stream = { } - local split = totable(uchar) - local nofsplits = #split - if nofsplits > 1 then - local first - for i=1,nofsplits do - local u = vector[split[i]] - if not first then - first = firsts[u] - if not first then - first = { } - firsts[u] = first - end - end - stream[i] = u - end - local nofstream = #stream - if nofstream > 1 then - first[#first+1] = concat(stream,2,nofstream) - utfchars[concat(stream)] = uchar - end + local mapping = { } + for k, v in next, vector do + local split = totable(v) + for i=1,#split do + split[i] = utfchar(byte(split[i])) end - end - p = P(false) - for k, v in next, firsts do - local q = P(false) - for i=1,#v do - q = q + P(v[i]) + split = concat(split) + if v ~= split then + mapping[split] = v end - p = p + P(k) * q end - p = Cs(((p+1)/utfchars)^1) - -- lpeg.print(p) -- size: 1042 + p = Cs((lpeg.utfchartabletopattern(table.keys(mapping))/mapping+P(1))^0) else p = false end @@ -361,28 +399,9 @@ function regimes.cleanup(regime,str) return p and lpegmatch(p,str) or str end --- local map = require("regi-cp1252") -- local old = [[test ë ä ö ü crap]] --- local new = correctencoding(map,old) --- --- print(old,new) - --- obsolete: --- --- function regimes.setsynonym(synonym,target) --- synonyms[synonym] = target --- end --- --- function regimes.truename(regime) --- return regime and synonyms[regime] or regime or currentregime --- end --- --- commands.setregimesynonym = regimes.setsynonym --- --- function commands.trueregimename(regime) --- context(regimes.truename(regime)) --- end --- --- function regimes.load(regime) --- return mapping[synonyms[regime] or regime] --- end +-- local new = regimes.cleanup("cp1252",old) +-- report_translating("%s -> %s",old,new) +-- local old = "Pozn" .. char(0xE1) .. "mky" +-- local new = translate(old,"cp1250") +-- report_translating("%s -> %s",old,new) |