summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/char-utf.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkiv/char-utf.lua')
-rw-r--r--tex/context/base/mkiv/char-utf.lua116
1 files changed, 69 insertions, 47 deletions
diff --git a/tex/context/base/mkiv/char-utf.lua b/tex/context/base/mkiv/char-utf.lua
index 5702f2087..8f46ae98f 100644
--- a/tex/context/base/mkiv/char-utf.lua
+++ b/tex/context/base/mkiv/char-utf.lua
@@ -22,6 +22,7 @@ over a string.</p>
educational purposes.</p>
--ldx]]--
+local next, type = next, type
local gsub, find = string.gsub, string.find
local concat, sortedhash, keys, sort = table.concat, table.sortedhash, table.keys, table.sort
local utfchar, utfbyte, utfcharacters, utfvalues = utf.char, utf.byte, utf.characters, utf.values
@@ -39,6 +40,7 @@ local utfchartabletopattern = lpeg.utfchartabletopattern
local formatters = string.formatters
local allocate = utilities.storage.allocate or function() return { } end
+local mark = utilities.storage.mark or allocate
local charfromnumber = characters.fromnumber
@@ -84,17 +86,23 @@ characters.decomposed = decomposed
local graphemes = characters.graphemes
local collapsed = characters.collapsed
-local mathpairs = characters.mathpairs
+local mathlists = characters.mathlists
-if not graphemes then
+if graphemes then
+
+ mark(graphemes)
+ mark(collapsed)
+ mark(mathlists)
+
+else
graphemes = allocate()
collapsed = allocate()
- mathpairs = allocate()
+ mathlists = allocate()
characters.graphemes = graphemes
characters.collapsed = collapsed
- characters.mathpairs = mathpairs
+ characters.mathlists = mathlists
local function backtrack(v,last,target)
local vs = v.specials
@@ -106,57 +114,70 @@ if not graphemes then
end
end
- local function setpair(one,two,unicode,first,second,combination)
- local mps = mathpairs[one]
- if not mps then
- mps = { [two] = unicode }
- mathpairs[one] = mps
- else
- mps[two] = unicode
- end
- local mps = mathpairs[first]
- if not mps then
- mps = { [second] = combination }
- mathpairs[first] = mps
- else
- mps[second] = combination
+ local function setlist(unicode,list,start,category)
+ if list[start] ~= 0x20 then
+ local t = mathlists
+ for i=start,#list do
+ local l = list[i]
+ local f = t[l]
+ if f then
+ t = f
+ else
+ f = { }
+ t[l] = f
+ t = f
+ end
+ end
+ t[category] = unicode
end
end
+ local mlists = { }
+
for unicode, v in next, data do
local vs = v.specials
- if vs and #vs == 3 and vs[1] == "char" then
- --
- local one, two = vs[2], vs[3]
- local first, second, combination = utfchar(one), utfchar(two), utfchar(unicode)
- --
- collapsed[first..second] = combination
- backtrack(data[one],second,combination)
- -- sort of obsolete:
- local cgf = graphemes[first]
- if not cgf then
- cgf = { [second] = combination }
- graphemes[first] = cgf
- else
- cgf[second] = combination
+ if vs then
+ local kind = vs[1]
+ local size = #vs
+ if kind == "char" and size == 3 then -- what if more than 3
+ --
+ local one, two = vs[2], vs[3]
+ local first, second, combination = utfchar(one), utfchar(two), utfchar(unicode)
+ --
+ collapsed[first..second] = combination
+ backtrack(data[one],second,combination)
+ -- sort of obsolete:
+ local cgf = graphemes[first]
+ if not cgf then
+ cgf = { [second] = combination }
+ graphemes[first] = cgf
+ else
+ cgf[second] = combination
+ end
+ --
end
- --
- if v.mathclass or v.mathspec then
- setpair(two,one,unicode,second,first,combination) -- watch order
+ if (kind == "char" or kind == "compat") and (size > 2) and (v.mathclass or v.mathspec) then
+ setlist(unicode,vs,2,"specials")
end
end
- local mp = v.mathpair
- if mp then
- local one, two = mp[1], mp[2]
- local first, second, combination = utfchar(one), utfchar(two), utfchar(unicode)
- setpair(one,two,unicode,first,second,combination)
+ local ml = v.mathlist
+ if ml then
+ mlists[unicode] = ml
end
end
+ -- these win:
+
+ for unicode, ml in next, mlists do
+ setlist(unicode,ml,1,"mathlist")
+ end
+
+ mlists = nil
+
if storage then
- storage.register("characters/graphemes", characters.graphemes, "characters.graphemes")
- storage.register("characters/collapsed", characters.collapsed, "characters.collapsed")
- storage.register("characters/mathpairs", characters.mathpairs, "characters.mathpairs")
+ storage.register("characters/graphemes", graphemes, "characters.graphemes")
+ storage.register("characters/collapsed", collapsed, "characters.collapsed")
+ storage.register("characters/mathlists", mathlists, "characters.mathlists")
end
end
@@ -183,14 +204,15 @@ local p_collapse = nil -- so we can reset if needed
local function prepare()
local tree = utfchartabletopattern(collapsed)
- p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1)) -- the P(1) is needed in order to accept non utf
+ -- p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1))
+ p_collapse = Cs((tree/collapsed + p_utf8character)^0)
end
function utffilters.collapse(str,filename)
if not p_collapse then
prepare()
end
- if not str or #str == "" or #str == 1 then
+ if not str or str == "" or #str == 1 then
return str
elseif filename and skippable[filesuffix(filename)] then -- we could hash the collapsables or do a quicker test
return str
@@ -213,7 +235,7 @@ function utffilters.decompose(str,filename) -- 3 to 4 times faster than the abov
if str and str ~= "" and #str > 1 then
return lpegmatch(p_decompose,str)
end
- if not str or #str == "" or #str < 2 then
+ if not str or str == "" or #str < 2 then
return str
elseif filename and skippable[filesuffix(filename)] then
return str
@@ -338,7 +360,7 @@ function utffilters.reorder(str,filename)
if not p_reorder then
prepare()
end
- if not str or #str == "" or #str < 2 then
+ if not str or str == "" or #str < 2 then
return str
elseif filename and skippable[filesuffix(filename)] then
return str