summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/char-utf.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkiv/char-utf.lua')
-rw-r--r--tex/context/base/mkiv/char-utf.lua229
1 files changed, 28 insertions, 201 deletions
diff --git a/tex/context/base/mkiv/char-utf.lua b/tex/context/base/mkiv/char-utf.lua
index f4a6d50e1..5702f2087 100644
--- a/tex/context/base/mkiv/char-utf.lua
+++ b/tex/context/base/mkiv/char-utf.lua
@@ -17,6 +17,9 @@ of output (for instance <l n='pdf'/>).</p>
<p>We implement these manipulations as filters. One can run multiple filters
over a string.</p>
+
+<p>The old code has now been moved to char-obs.lua which we keep around for
+educational purposes.</p>
--ldx]]--
local gsub, find = string.gsub, string.find
@@ -42,21 +45,6 @@ local charfromnumber = characters.fromnumber
characters = characters or { }
local characters = characters
-local graphemes = allocate()
-characters.graphemes = graphemes
-
-local collapsed = allocate()
-characters.collapsed = collapsed
-
--- local combined = allocate()
--- characters.combined = combined
-
-local decomposed = allocate()
-characters.decomposed = decomposed
-
-local mathpairs = allocate()
-characters.mathpairs = mathpairs
-
local filters = allocate()
characters.filters = filters
@@ -94,8 +82,20 @@ local decomposed = allocate {
characters.decomposed = decomposed
-local function initialize() -- maybe in tex mode store in format !
- local data = characters.data
+local graphemes = characters.graphemes
+local collapsed = characters.collapsed
+local mathpairs = characters.mathpairs
+
+if not graphemes then
+
+ graphemes = allocate()
+ collapsed = allocate()
+ mathpairs = allocate()
+
+ characters.graphemes = graphemes
+ characters.collapsed = collapsed
+ characters.mathpairs = mathpairs
+
local function backtrack(v,last,target)
local vs = v.specials
if vs and #vs == 3 and vs[1] == "char" then
@@ -105,6 +105,7 @@ local function initialize() -- maybe in tex mode store in format !
backtrack(data[one],second,target)
end
end
+
local function setpair(one,two,unicode,first,second,combination)
local mps = mathpairs[one]
if not mps then
@@ -121,6 +122,7 @@ local function initialize() -- maybe in tex mode store in format !
mps[second] = combination
end
end
+
for unicode, v in next, data do
local vs = v.specials
if vs and #vs == 3 and vs[1] == "char" then
@@ -150,18 +152,16 @@ local function initialize() -- maybe in tex mode store in format !
setpair(one,two,unicode,first,second,combination)
end
end
- initialize = false
- characters.initialize = function() end
-end
-characters.initialize = initialize
+ if storage then
+ storage.register("characters/graphemes", characters.graphemes, "characters.graphemes")
+ storage.register("characters/collapsed", characters.collapsed, "characters.collapsed")
+ storage.register("characters/mathpairs", characters.mathpairs, "characters.mathpairs")
+ end
---[[ldx--
-<p>The next variant has lazy token collecting, on a 140 page mk.tex this saves
-about .25 seconds, which is understandable because we have no graphemes and
-not collecting tokens is not only faster but also saves garbage collecting.
-</p>
---ldx]]--
+end
+
+function characters.initialize() end -- dummy
local skippable = { }
local filesuffix = file.suffix
@@ -179,119 +179,9 @@ function utffilters.setskippable(suffix,value)
end
end
--- function utffilters.collapse(str,filename) -- we can make high a seperate pass (never needed with collapse)
--- if skippable[filesuffix(filename)] then
--- return str
--- -- elseif find(filename,"^virtual://") then
--- -- return str
--- -- else
--- -- -- print("\n"..filename)
--- end
--- if str and str ~= "" then
--- local nstr = #str
--- if nstr > 1 then
--- if initialize then -- saves a call
--- initialize()
--- end
--- local tokens, t, first, done, n = { }, 0, false, false, 0
--- for second in utfcharacters(str) do
--- if done then
--- if first then
--- if second == " " then
--- t = t + 1
--- tokens[t] = first
--- first = second
--- else
--- -- local crs = high[second]
--- -- if crs then
--- -- t = t + 1
--- -- tokens[t] = first
--- -- first = crs
--- -- else
--- local cgf = graphemes[first]
--- if cgf and cgf[second] then
--- first = cgf[second]
--- else
--- t = t + 1
--- tokens[t] = first
--- first = second
--- end
--- -- end
--- end
--- elseif second == " " then
--- first = second
--- else
--- -- local crs = high[second]
--- -- if crs then
--- -- first = crs
--- -- else
--- first = second
--- -- end
--- end
--- elseif second == " " then
--- first = nil
--- n = n + 1
--- else
--- -- local crs = high[second]
--- -- if crs then
--- -- for s in utfcharacters(str) do
--- -- if n == 1 then
--- -- break
--- -- else
--- -- t = t + 1
--- -- tokens[t] = s
--- -- n = n - 1
--- -- end
--- -- end
--- -- if first then
--- -- t = t + 1
--- -- tokens[t] = first
--- -- end
--- -- first = crs
--- -- done = true
--- -- else
--- local cgf = graphemes[first]
--- if cgf and cgf[second] then
--- for s in utfcharacters(str) do
--- if n == 1 then
--- break
--- else
--- t = t + 1
--- tokens[t] = s
--- n = n - 1
--- end
--- end
--- first = cgf[second]
--- done = true
--- else
--- first = second
--- n = n + 1
--- end
--- -- end
--- end
--- end
--- if done then
--- if first then
--- t = t + 1
--- tokens[t] = first
--- end
--- return concat(tokens) -- seldom called
--- end
--- elseif nstr > 0 then
--- return high[str] or str -- this will go from here
--- end
--- end
--- return str
--- end
-
--- this is about twice as fast
-
local p_collapse = nil -- so we can reset if needed
local function prepare()
- if initialize then
- initialize()
- end
local tree = utfchartabletopattern(collapsed)
p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
end
@@ -309,72 +199,9 @@ function utffilters.collapse(str,filename)
end
end
--- function utffilters.decompose(str)
--- if str and str ~= "" then
--- local nstr = #str
--- if nstr > 1 then
--- -- if initialize then -- saves a call
--- -- initialize()
--- -- end
--- local tokens, t, done, n = { }, 0, false, 0
--- for s in utfcharacters(str) do
--- local dec = decomposed[s]
--- if dec then
--- if not done then
--- if n > 0 then
--- for s in utfcharacters(str) do
--- if n == 0 then
--- break
--- else
--- t = t + 1
--- tokens[t] = s
--- n = n - 1
--- end
--- end
--- end
--- done = true
--- end
--- t = t + 1
--- tokens[t] = dec
--- elseif done then
--- t = t + 1
--- tokens[t] = s
--- else
--- n = n + 1
--- end
--- end
--- if done then
--- return concat(tokens) -- seldom called
--- end
--- end
--- end
--- return str
--- end
-
--- local replacer = nil
--- local finder = nil
---
--- function utffilters.decompose(str) -- 3 to 4 times faster than the above
--- if not replacer then
--- if initialize then
--- initialize()
--- end
--- local tree = utfchartabletopattern(decomposed)
--- finder = lpeg.finder(tree,false,true)
--- replacer = lpeg.replacer(tree,decomposed,false,true)
--- end
--- if str and str ~= "" and #str > 1 and lpegmatch(finder,str) then
--- return lpegmatch(replacer,str)
--- end
--- return str
--- end
-
local p_decompose = nil
local function prepare()
- if initialize then
- initialize()
- end
local tree = utfchartabletopattern(decomposed)
p_decompose = Cs((tree/decomposed + p_utf8character)^0 * P(-1))
end
@@ -448,7 +275,7 @@ local p_reorder = nil
-- return p, new
-- end
--- -- the next one isnto stable for similar weights
+-- -- the next one into stable for similar weights
local sorter = function(a,b)
return b[2] < a[2]