diff options
Diffstat (limited to 'tex/context/base/mkiv/char-utf.lua')
-rw-r--r-- | tex/context/base/mkiv/char-utf.lua | 116 |
1 files changed, 69 insertions, 47 deletions
diff --git a/tex/context/base/mkiv/char-utf.lua b/tex/context/base/mkiv/char-utf.lua index 5702f2087..8f46ae98f 100644 --- a/tex/context/base/mkiv/char-utf.lua +++ b/tex/context/base/mkiv/char-utf.lua @@ -22,6 +22,7 @@ over a string.</p> educational purposes.</p> --ldx]]-- +local next, type = next, type local gsub, find = string.gsub, string.find local concat, sortedhash, keys, sort = table.concat, table.sortedhash, table.keys, table.sort local utfchar, utfbyte, utfcharacters, utfvalues = utf.char, utf.byte, utf.characters, utf.values @@ -39,6 +40,7 @@ local utfchartabletopattern = lpeg.utfchartabletopattern local formatters = string.formatters local allocate = utilities.storage.allocate or function() return { } end +local mark = utilities.storage.mark or allocate local charfromnumber = characters.fromnumber @@ -84,17 +86,23 @@ characters.decomposed = decomposed local graphemes = characters.graphemes local collapsed = characters.collapsed -local mathpairs = characters.mathpairs +local mathlists = characters.mathlists -if not graphemes then +if graphemes then + + mark(graphemes) + mark(collapsed) + mark(mathlists) + +else graphemes = allocate() collapsed = allocate() - mathpairs = allocate() + mathlists = allocate() characters.graphemes = graphemes characters.collapsed = collapsed - characters.mathpairs = mathpairs + characters.mathlists = mathlists local function backtrack(v,last,target) local vs = v.specials @@ -106,57 +114,70 @@ if not graphemes then end end - local function setpair(one,two,unicode,first,second,combination) - local mps = mathpairs[one] - if not mps then - mps = { [two] = unicode } - mathpairs[one] = mps - else - mps[two] = unicode - end - local mps = mathpairs[first] - if not mps then - mps = { [second] = combination } - mathpairs[first] = mps - else - mps[second] = combination + local function setlist(unicode,list,start,category) + if list[start] ~= 0x20 then + local t = mathlists + for i=start,#list do + local l = list[i] + local f = t[l] + if f then + t = f + else + f = { } + t[l] = f + t = f + end + end + t[category] = unicode end end + local mlists = { } + for unicode, v in next, data do local vs = v.specials - if vs and #vs == 3 and vs[1] == "char" then - -- - local one, two = vs[2], vs[3] - local first, second, combination = utfchar(one), utfchar(two), utfchar(unicode) - -- - collapsed[first..second] = combination - backtrack(data[one],second,combination) - -- sort of obsolete: - local cgf = graphemes[first] - if not cgf then - cgf = { [second] = combination } - graphemes[first] = cgf - else - cgf[second] = combination + if vs then + local kind = vs[1] + local size = #vs + if kind == "char" and size == 3 then -- what if more than 3 + -- + local one, two = vs[2], vs[3] + local first, second, combination = utfchar(one), utfchar(two), utfchar(unicode) + -- + collapsed[first..second] = combination + backtrack(data[one],second,combination) + -- sort of obsolete: + local cgf = graphemes[first] + if not cgf then + cgf = { [second] = combination } + graphemes[first] = cgf + else + cgf[second] = combination + end + -- end - -- - if v.mathclass or v.mathspec then - setpair(two,one,unicode,second,first,combination) -- watch order + if (kind == "char" or kind == "compat") and (size > 2) and (v.mathclass or v.mathspec) then + setlist(unicode,vs,2,"specials") end end - local mp = v.mathpair - if mp then - local one, two = mp[1], mp[2] - local first, second, combination = utfchar(one), utfchar(two), utfchar(unicode) - setpair(one,two,unicode,first,second,combination) + local ml = v.mathlist + if ml then + mlists[unicode] = ml end end + -- these win: + + for unicode, ml in next, mlists do + setlist(unicode,ml,1,"mathlist") + end + + mlists = nil + if storage then - storage.register("characters/graphemes", characters.graphemes, "characters.graphemes") - storage.register("characters/collapsed", characters.collapsed, "characters.collapsed") - storage.register("characters/mathpairs", characters.mathpairs, "characters.mathpairs") + storage.register("characters/graphemes", graphemes, "characters.graphemes") + storage.register("characters/collapsed", collapsed, "characters.collapsed") + storage.register("characters/mathlists", mathlists, "characters.mathlists") end end @@ -183,14 +204,15 @@ local p_collapse = nil -- so we can reset if needed local function prepare() local tree = utfchartabletopattern(collapsed) - p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1)) -- the P(1) is needed in order to accept non utf + -- p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1)) + p_collapse = Cs((tree/collapsed + p_utf8character)^0) end function utffilters.collapse(str,filename) if not p_collapse then prepare() end - if not str or #str == "" or #str == 1 then + if not str or str == "" or #str == 1 then return str elseif filename and skippable[filesuffix(filename)] then -- we could hash the collapsables or do a quicker test return str @@ -213,7 +235,7 @@ function utffilters.decompose(str,filename) -- 3 to 4 times faster than the abov if str and str ~= "" and #str > 1 then return lpegmatch(p_decompose,str) end - if not str or #str == "" or #str < 2 then + if not str or str == "" or #str < 2 then return str elseif filename and skippable[filesuffix(filename)] then return str @@ -338,7 +360,7 @@ function utffilters.reorder(str,filename) if not p_reorder then prepare() end - if not str or #str == "" or #str < 2 then + if not str or str == "" or #str < 2 then return str elseif filename and skippable[filesuffix(filename)] then return str |