From 4f053696e1813fde4bd6cebbb77ff2a1e1f6800b Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Thu, 29 Oct 2015 07:14:40 +0100 Subject: [fontloader] sync with Context as of 2015-10-29 --- src/fontloader/misc/fontloader-font-map.lua | 473 ++++++++++------------------ 1 file changed, 163 insertions(+), 310 deletions(-) (limited to 'src/fontloader/misc/fontloader-font-map.lua') diff --git a/src/fontloader/misc/fontloader-font-map.lua b/src/fontloader/misc/fontloader-font-map.lua index 69474ba..b645d9a 100644 --- a/src/fontloader/misc/fontloader-font-map.lua +++ b/src/fontloader/misc/fontloader-font-map.lua @@ -31,25 +31,27 @@ of obsolete. Some code may move to runtime or auxiliary modules.

The name to unciode related code will stay of course.

--ldx]]-- -local function loadlumtable(filename) -- will move to font goodies - local lumname = file.replacesuffix(file.basename(filename),"lum") - local lumfile = resolvers.findfile(lumname,"map") or "" - if lumfile ~= "" and lfs.isfile(lumfile) then - if trace_loading or trace_mapping then - report_fonts("loading map table %a",lumfile) - end - lumunic = dofile(lumfile) - return lumunic, lumfile - end -end +-- local function loadlumtable(filename) -- will move to font goodies +-- local lumname = file.replacesuffix(file.basename(filename),"lum") +-- local lumfile = resolvers.findfile(lumname,"map") or "" +-- if lumfile ~= "" and lfs.isfile(lumfile) then +-- if trace_loading or trace_mapping then +-- report_fonts("loading map table %a",lumfile) +-- end +-- lumunic = dofile(lumfile) +-- return lumunic, lumfile +-- end +-- end local hex = R("AF","09") -local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end -local hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end +----- hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end +----- hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end +local hexfour = (hex*hex*hex^-2) / function(s) return tonumber(s,16) end +local hexsix = (hex*hex*hex^-4) / function(s) return tonumber(s,16) end local dec = (R("09")^1) / tonumber local period = P(".") -local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) -local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) +local unicode = (P("uni") + P("UNI")) * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) -- base planes +local ucode = (P("u") + P("U") ) * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) -- extended local index = P("index") * dec * Cc(false) local parser = unicode + ucode + index @@ -168,7 +170,6 @@ end -- return s -- end -mappings.loadlumtable = loadlumtable mappings.makenameparser = makenameparser mappings.tounicode = tounicode mappings.tounicode16 = tounicode16 @@ -179,13 +180,13 @@ local ligseparator = P("_") local varseparator = P(".") local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator * C((1 - ligseparator - varseparator)^1))^0) +-- maybe: ff fi fl ffi ffl => f_f f_i f_l f_f_i f_f_l + -- local function test(name) -- local split = lpegmatch(namesplitter,name) -- print(string.formatters["%s: [% t]"](name,split)) -- end --- maybe: ff fi fl ffi ffl => f_f f_i f_l f_f_i f_f_l - -- test("i.f_") -- test("this") -- test("this.that") @@ -221,332 +222,184 @@ end mappings.overloads = overloads -function mappings.addtounicode(data,filename) - local resources = data.resources - local properties = data.properties - local descriptions = data.descriptions - local unicodes = resources.unicodes - local lookuptypes = resources.lookuptypes +function mappings.addtounicode(data,filename,checklookups) + local resources = data.resources + local unicodes = resources.unicodes if not unicodes then return end + local properties = data.properties + local descriptions = data.descriptions -- we need to move this code unicodes['space'] = unicodes['space'] or 32 unicodes['hyphen'] = unicodes['hyphen'] or 45 unicodes['zwj'] = unicodes['zwj'] or 0x200D unicodes['zwnj'] = unicodes['zwnj'] or 0x200C - local private = fonts.constructors.privateoffset - local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context - ----- namevector = fonts.encodings.agl.names -- loaded runtime in context - local missing = { } - local lumunic, uparser, oparser - local cidinfo, cidnames, cidcodes, usedmap - -- - cidinfo = properties.cidinfo - usedmap = cidinfo and fonts.cid.getmap(cidinfo) -- + local private = fonts.constructors and fonts.constructors.privateoffset or 0xF0000 -- 0x10FFFF + local unicodevector = fonts.encodings.agl.unicodes or { } -- loaded runtime in context + local contextvector = fonts.encodings.agl.ctxcodes or { } -- loaded runtime in context + local missing = { } + local nofmissing = 0 + local oparser = nil + local cidnames = nil + local cidcodes = nil + local cidinfo = properties.cidinfo + local usedmap = cidinfo and fonts.cid.getmap(cidinfo) + local uparser = makenameparser() -- hm, every time? if usedmap then - oparser = usedmap and makenameparser(cidinfo.ordering) - cidnames = usedmap.names - cidcodes = usedmap.unicodes + oparser = usedmap and makenameparser(cidinfo.ordering) + cidnames = usedmap.names + cidcodes = usedmap.unicodes end - uparser = makenameparser() - local ns, nl = 0, 0 + local ns = 0 + local nl = 0 + -- for unic, glyph in next, descriptions do - local index = glyph.index - local name = glyph.name - local r = overloads[name] - if r then - -- get rid of weird ligatures - -- glyph.name = r.name - glyph.unicode = r.unicode - elseif unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then - local unicode = lumunic and lumunic[name] or unicodevector[name] - if unicode then - glyph.unicode = unicode - ns = ns + 1 - end - -- cidmap heuristics, beware, there is no guarantee for a match unless - -- the chain resolves - if (not unicode) and usedmap then - local foundindex = lpegmatch(oparser,name) - if foundindex then - unicode = cidcodes[foundindex] -- name to number - if unicode then - glyph.unicode = unicode - ns = ns + 1 - else - local reference = cidnames[foundindex] -- number to name - if reference then - local foundindex = lpegmatch(oparser,reference) - if foundindex then - unicode = cidcodes[foundindex] - if unicode then - glyph.unicode = unicode - ns = ns + 1 + local name = glyph.name + if name then + local index = glyph.index + local r = overloads[name] + if r then + -- get rid of weird ligatures + -- glyph.name = r.name + glyph.unicode = r.unicode + elseif not unic or unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then + local unicode = unicodevector[name] or contextvector[name] + if unicode then + glyph.unicode = unicode + ns = ns + 1 + end + -- cidmap heuristics, beware, there is no guarantee for a match unless + -- the chain resolves + if (not unicode) and usedmap then + local foundindex = lpegmatch(oparser,name) + if foundindex then + unicode = cidcodes[foundindex] -- name to number + if unicode then + glyph.unicode = unicode + ns = ns + 1 + else + local reference = cidnames[foundindex] -- number to name + if reference then + local foundindex = lpegmatch(oparser,reference) + if foundindex then + unicode = cidcodes[foundindex] + if unicode then + glyph.unicode = unicode + ns = ns + 1 + end end - end - if not unicode or unicode == "" then - local foundcodes, multiple = lpegmatch(uparser,reference) - if foundcodes then - glyph.unicode = foundcodes - if multiple then - nl = nl + 1 - unicode = true - else - ns = ns + 1 - unicode = foundcodes + if not unicode or unicode == "" then + local foundcodes, multiple = lpegmatch(uparser,reference) + if foundcodes then + glyph.unicode = foundcodes + if multiple then + nl = nl + 1 + unicode = true + else + ns = ns + 1 + unicode = foundcodes + end end end end end end end - end - -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_ - -- - -- It is not trivial to find a solution that suits all fonts. We tried several alternatives - -- and this one seems to work reasonable also with fonts that use less standardized naming - -- schemes. The extra private test is tested by KE and seems to work okay with non-typical - -- fonts as well. - -- - -- The next time I look into this, I'll add an extra analysis step to the otf loader (we can - -- resolve some tounicodes by looking into the gsub data tables that are bound to glyphs. - -- --- a real tricky last resort: --- --- local lookups = glyph.lookups --- if lookups then --- for _, lookup in next, lookups do -- assume consistency else we need to sort --- for i=1,#lookup do --- local l = lookup[i] --- if l.type == "ligature" then --- local s = l.specification --- if s.char == glyph.name then --- local components = s.components --- if components then --- local t, n = { }, 0 --- unicode = true --- for l=1,#components do --- local base = components[l] --- local u = unicodes[base] or unicodevector[base] --- if not u then --- break --- elseif type(u) == "table" then --- if u[1] >= private then --- unicode = false --- break --- end --- n = n + 1 --- t[n] = u[1] --- else --- if u >= private then --- unicode = false --- break --- end --- n = n + 1 --- t[n] = u --- end --- end --- if n == 0 then -- done then --- -- nothing --- elseif n == 1 then --- glyph.unicode = t[1] --- else --- glyph.unicode = t --- end --- nl = nl + 1 --- break --- end --- end --- end --- end --- if unicode then --- break --- end --- end --- end - if not unicode or unicode == "" then - local split = lpegmatch(namesplitter,name) - local nsplit = split and #split or 0 - local t, n = { }, 0 - unicode = true - for l=1,nsplit do - local base = split[l] - local u = unicodes[base] or unicodevector[base] - if not u then - break - elseif type(u) == "table" then - if u[1] >= private then - unicode = false - break + -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_ + -- + -- It is not trivial to find a solution that suits all fonts. We tried several alternatives + -- and this one seems to work reasonable also with fonts that use less standardized naming + -- schemes. The extra private test is tested by KE and seems to work okay with non-typical + -- fonts as well. + -- + if not unicode or unicode == "" then + local split = lpegmatch(namesplitter,name) + local nsplit = split and #split or 0 -- add if + if nsplit == 0 then + -- skip + elseif nsplit == 1 then + local base = split[1] + local u = unicodes[base] or unicodevector[base] or contextvector[name] + if not u then + -- skip + elseif type(u) == "table" then + -- unlikely + if u[1] < private then + unicode = u + glyph.unicode = unicode + end + elseif u < private then + unicode = u + glyph.unicode = unicode end - n = n + 1 - t[n] = u[1] else - if u >= private then - unicode = false - break - end - n = n + 1 - t[n] = u - end - end - if n == 0 then -- done then - -- nothing - elseif n == 1 then - glyph.unicode = t[1] - else - glyph.unicode = t - end - nl = nl + 1 - end - -- last resort (we might need to catch private here as well) - if not unicode or unicode == "" then - local foundcodes, multiple = lpegmatch(uparser,name) - if foundcodes then - glyph.unicode = foundcodes - if multiple then - nl = nl + 1 - unicode = true - else - ns = ns + 1 - unicode = foundcodes - end - end - end - -- check using substitutes and alternates - local r = overloads[unicode] - if r then - unicode = r.unicode - glyph.unicode = unicode - end - -- - if not unicode then - missing[name] = true - end - end - end - if next(missing) then - local guess = { } - -- helper - local function check(gname,code,unicode) - local description = descriptions[code] - -- no need to add a self reference - local variant = description.name - if variant == gname then - return - end - -- the variant already has a unicode (normally that resultrs in a default tounicode to self) - local unic = unicodes[variant] - if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then - -- no default mapping and therefore maybe no tounicode yet - else - return - end - -- the variant already has a tounicode - if descriptions[code].unicode then - return - end - -- add to the list - local g = guess[variant] - -- local r = overloads[unicode] - -- if r then - -- unicode = r.unicode - -- end - if g then - g[gname] = unicode - else - guess[variant] = { [gname] = unicode } - end - end - -- - for unicode, description in next, descriptions do - local slookups = description.slookups - if slookups then - local gname = description.name - for tag, data in next, slookups do - local lookuptype = lookuptypes[tag] - if lookuptype == "alternate" then - for i=1,#data do - check(gname,data[i],unicode) - end - elseif lookuptype == "substitution" then - check(gname,data,unicode) - end - end - end - local mlookups = description.mlookups - if mlookups then - local gname = description.name - for tag, list in next, mlookups do - local lookuptype = lookuptypes[tag] - if lookuptype == "alternate" then - for i=1,#list do - local data = list[i] - for i=1,#data do - check(gname,data[i],unicode) + local t, n = { }, 0 + for l=1,nsplit do + local base = split[l] + local u = unicodes[base] or unicodevector[base] or contextvector[name] + if not u then + break + elseif type(u) == "table" then + if u[1] >= private then + break + end + n = n + 1 + t[n] = u[1] + else + if u >= private then + break + end + n = n + 1 + t[n] = u end end - elseif lookuptype == "substitution" then - for i=1,#list do - check(gname,list[i],unicode) + if n > 0 then + if n == 1 then + unicode = t[1] + else + unicode = t + end + glyph.unicode = unicode end end + nl = nl + 1 end - end - end - -- resolve references - local done = true - while done do - done = false - for k, v in next, guess do - if type(v) ~= "number" then - for kk, vv in next, v do - if vv == -1 or vv >= private or (vv >= 0xE000 and vv <= 0xF8FF) or vv == 0xFFFE or vv == 0xFFFF then - local uu = guess[kk] - if type(uu) == "number" then - guess[k] = uu - done = true - end + -- last resort (we might need to catch private here as well) + if not unicode or unicode == "" then + local foundcodes, multiple = lpegmatch(uparser,name) + if foundcodes then + glyph.unicode = foundcodes + if multiple then + nl = nl + 1 + unicode = true else - guess[k] = vv - done = true + ns = ns + 1 + unicode = foundcodes end end end - end - end - -- wrap up - local orphans = 0 - local guessed = 0 - for k, v in next, guess do - if type(v) == "number" then - descriptions[unicodes[k]].unicode = descriptions[v].unicode or v -- can also be a table - guessed = guessed + 1 - else - local t = nil - local l = lower(k) - local u = unicodes[l] - if not u then - orphans = orphans + 1 - elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then - local unicode = descriptions[u].unicode - if unicode then - descriptions[unicodes[k]].unicode = unicode - guessed = guessed + 1 - else - orphans = orphans + 1 - end - else - orphans = orphans + 1 + -- check using substitutes and alternates + local r = overloads[unicode] + if r then + unicode = r.unicode + glyph.unicode = unicode + end + -- + if not unicode then + missing[unic] = true + nofmissing = nofmissing + 1 end end + else + -- no name end - if trace_loading and orphans > 0 or guessed > 0 then - report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans) - end end + if type(checklookups) == "function" then + checklookups(data,missing,nofmissing) + end + -- todo: go lowercase if trace_mapping then for unic, glyph in table.sortedhash(descriptions) do local name = glyph.name -- cgit v1.2.3