diff options
Diffstat (limited to 'tex/context/base/mkiv/font-map.lua')
-rw-r--r-- | tex/context/base/mkiv/font-map.lua | 204 |
1 files changed, 136 insertions, 68 deletions
diff --git a/tex/context/base/mkiv/font-map.lua b/tex/context/base/mkiv/font-map.lua index 838c74173..cf369708c 100644 --- a/tex/context/base/mkiv/font-map.lua +++ b/tex/context/base/mkiv/font-map.lua @@ -10,16 +10,18 @@ local tonumber, next, type = tonumber, next, type local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match -local utfbyte = utf.byte local floor = math.floor local formatters = string.formatters +local sortedhash, sortedkeys = table.sortedhash, table.sortedkeys -local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) -local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) +local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) +local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_mapping = v end) local report_fonts = logs.reporter("fonts","loading") -- not otf only -local force_ligatures = false directives.register("fonts.mapping.forceligatures",function(v) force_ligatures = v end) +-- force_ligatures is true per 2017-04-20 so that these emoji's with bad names work too + +local force_ligatures = true directives.register("fonts.mapping.forceligatures",function(v) force_ligatures = v end) local fonts = fonts or { } local mappings = fonts.mappings or { } @@ -140,7 +142,7 @@ local f_double = formatters["%04X%04X"] -- end -- end -local function tounicode16(unicode,name) +local function tounicode16(unicode) if unicode < 0xD7FF or (unicode > 0xDFFF and unicode <= 0xFFFF) then return f_single(unicode) else @@ -149,7 +151,7 @@ local function tounicode16(unicode,name) end end -local function tounicode16sequence(unicodes,name) +local function tounicode16sequence(unicodes) local t = { } for l=1,#unicodes do local u = unicodes[l] @@ -186,6 +188,32 @@ local function tounicode(unicode,name) end end +-- no real gain on runs +-- +-- local hash = setmetatableindex(function(t,u) +-- local v +-- if u < 0xD7FF or (u > 0xDFFF and u <= 0xFFFF) then +-- v = f_single(u) +-- else +-- u = u - 0x10000 +-- v = f_double(floor(u/1024)+0xD800,u%1024+0xDC00) +-- end +-- t[u] = v +-- return v +-- end) +-- +-- local function tounicode(unicode,name) +-- if type(unicode) == "table" then +-- local t = { } +-- for l=1,#unicode do +-- t[l] = hash[u] +-- end +-- return concat(t) +-- else +-- return hash[unicode] +-- end +-- end + local function fromunicode16(str) if #str == 4 then return tonumber(str,16) @@ -216,6 +244,8 @@ mappings.tounicode16 = tounicode16 mappings.tounicode16sequence = tounicode16sequence mappings.fromunicode16 = fromunicode16 +-- mozilla emoji has bad lig names: name = gsub(name,"(u[a-f0-9_]+)%-([a-f0-9_]+)","%1_%2") + local ligseparator = P("_") local varseparator = P(".") local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator * C((1 - ligseparator - varseparator)^1))^0) @@ -237,39 +267,50 @@ local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator * -- to be completed .. for fonts that use unicodes for ligatures which -- is a actually a bad thing and should be avoided in the first place -local overloads = allocate { - IJ = { name = "I_J", unicode = { 0x49, 0x4A }, mess = 0x0132 }, - ij = { name = "i_j", unicode = { 0x69, 0x6A }, mess = 0x0133 }, - ff = { name = "f_f", unicode = { 0x66, 0x66 }, mess = 0xFB00 }, - fi = { name = "f_i", unicode = { 0x66, 0x69 }, mess = 0xFB01 }, - fl = { name = "f_l", unicode = { 0x66, 0x6C }, mess = 0xFB02 }, - ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 }, - ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 }, - fj = { name = "f_j", unicode = { 0x66, 0x6A } }, - fk = { name = "f_k", unicode = { 0x66, 0x6B } }, -} - -for k, v in next, overloads do - local name = v.name - local mess = v.mess - if name then - overloads[name] = v - end - if mess then - overloads[mess] = v +do + + local overloads = allocate { + IJ = { name = "I_J", unicode = { 0x49, 0x4A }, mess = 0x0132 }, + ij = { name = "i_j", unicode = { 0x69, 0x6A }, mess = 0x0133 }, + ff = { name = "f_f", unicode = { 0x66, 0x66 }, mess = 0xFB00 }, + fi = { name = "f_i", unicode = { 0x66, 0x69 }, mess = 0xFB01 }, + fl = { name = "f_l", unicode = { 0x66, 0x6C }, mess = 0xFB02 }, + ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 }, + ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 }, + fj = { name = "f_j", unicode = { 0x66, 0x6A } }, + fk = { name = "f_k", unicode = { 0x66, 0x6B } }, + } + + local o = { } + + for k, v in next, overloads do + local name = v.name + local mess = v.mess + if name then + o[name] = v + end + if mess then + o[mess] = v + end + o[k] = v end -end -mappings.overloads = overloads + mappings.overloads = o + +end function mappings.addtounicode(data,filename,checklookups) local resources = data.resources local unicodes = resources.unicodes if not unicodes then + if trace_mapping then + report_fonts("no unicode list, quitting tounicode for %a",filename) + end return end local properties = data.properties local descriptions = data.descriptions + local overloads = mappings.overloads -- we need to move this code unicodes['space'] = unicodes['space'] or 32 unicodes['hyphen'] = unicodes['hyphen'] or 45 @@ -288,17 +329,25 @@ function mappings.addtounicode(data,filename,checklookups) local usedmap = cidinfo and fonts.cid.getmap(cidinfo) local uparser = makenameparser() -- hm, every time? if usedmap then - oparser = usedmap and makenameparser(cidinfo.ordering) - cidnames = usedmap.names - cidcodes = usedmap.unicodes + oparser = usedmap and makenameparser(cidinfo.ordering) + cidnames = usedmap.names + cidcodes = usedmap.unicodes end - local ns = 0 - local nl = 0 + local ns = 0 + local nl = 0 + -- + -- in order to avoid differences between runs due to hash randomization we + -- run over a sorted list -- - for du, glyph in next, descriptions do - local name = glyph.name + local dlist = sortedkeys(descriptions) + -- + -- for du, glyph in next, descriptions do + for i=1,#dlist do + local du = dlist[i] + local glyph = descriptions[du] + local name = glyph.name if name then - local overload = overloads[name] + local overload = overloads[name] or overloads[du] if overload then -- get rid of weird ligatures -- glyph.name = overload.name @@ -434,52 +483,71 @@ function mappings.addtounicode(data,filename,checklookups) end end end + else + local overload = overloads[du] + if overload then + glyph.unicode = overload.unicode + end end end if type(checklookups) == "function" then checklookups(data,missing,nofmissing) end - -- todo: go lowercase - - local collected = false local unicoded = 0 - for unicode, glyph in next, descriptions do - if glyph.class == "ligature" and (force_ligatures or not glyph.unicode) then - if not collected then - collected = fonts.handlers.otf.readers.getcomponents(data) - if not collected then - break - end + local collected = fonts.handlers.otf.readers.getcomponents(data) -- neglectable overhead + + local function resolve(glyph,u) + local n = #u + for i=1,n do + if u[i] > private then + n = 0 + break + end + end + if n > 0 then + if n > 1 then + glyph.unicode = u + else + glyph.unicode = u[1] end - local u = collected[unicode] -- always tables + unicoded = unicoded + 1 + end + end + + if not collected then + -- move on + elseif force_ligatures then + for i=1,#dlist do + local du = dlist[i] + local u = collected[du] -- always tables if u then - local n = #u - for i=1,n do - if u[i] > private then - n = 0 - break - end - end - if n > 0 then - if n > 1 then - glyph.unicode = u - else - glyph.unicode = u[1] - end - unicoded = unicoded + 1 + resolve(descriptions[du],u) + end + end + else + for i=1,#dlist do + local du = dlist[i] + local glyph = descriptions[du] + if glyph.class == "ligature" and not glyph.unicode then + local u = collected[du] -- always tables + if u then + resolve(glyph,u) end end end end + if trace_mapping and unicoded > 0 then report_fonts("%n ligature tounicode mappings deduced from gsub ligature features",unicoded) end - if trace_mapping then - for unic, glyph in table.sortedhash(descriptions) do - local name = glyph.name - local index = glyph.index + -- for unic, glyph in sortedhash(descriptions) do + for i=1,#dlist do + local du = dlist[i] + local glyph = descriptions[du] + local name = glyph.name or "-" + local index = glyph.index or 0 local unicode = glyph.unicode if unicode then if type(unicode) == "table" then @@ -487,12 +555,12 @@ function mappings.addtounicode(data,filename,checklookups) for i=1,#unicode do unicodes[i] = formatters("%U",unicode[i]) end - report_fonts("internal slot %U, name %a, unicode %U, tounicode % t",index,name,unic,unicodes) + report_fonts("internal slot %U, name %a, unicode %U, tounicode % t",index,name,du,unicodes) else - report_fonts("internal slot %U, name %a, unicode %U, tounicode %U",index,name,unic,unicode) + report_fonts("internal slot %U, name %a, unicode %U, tounicode %U",index,name,du,unicode) end else - report_fonts("internal slot %U, name %a, unicode %U",index,name,unic) + report_fonts("internal slot %U, name %a, unicode %U",index,name,du) end end end |