if not modules then modules = { } end modules ['font-map'] = { version = 1.001, comment = "companion to font-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } local tonumber, next, type = tonumber, next, type local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match local utfbyte = utf.byte local floor = math.floor local formatters = string.formatters local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) local report_fonts = logs.reporter("fonts","loading") -- not otf only local fonts = fonts or { } local mappings = fonts.mappings or { } fonts.mappings = mappings local allocate = utilities.storage.allocate --[[ldx--
Eventually this code will disappear because map files are kind of obsolete. Some code may move to runtime or auxiliary modules.
The name to unciode related code will stay of course.
--ldx]]-- -- local function loadlumtable(filename) -- will move to font goodies -- local lumname = file.replacesuffix(file.basename(filename),"lum") -- local lumfile = resolvers.findfile(lumname,"map") or "" -- if lumfile ~= "" and lfs.isfile(lumfile) then -- if trace_loading or trace_mapping then -- report_fonts("loading map table %a",lumfile) -- end -- lumunic = dofile(lumfile) -- return lumunic, lumfile -- end -- end local hex = R("AF","09") ----- hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end ----- hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end local hexfour = (hex*hex*hex^-2) / function(s) return tonumber(s,16) end local hexsix = (hex*hex*hex^-4) / function(s) return tonumber(s,16) end local dec = (R("09")^1) / tonumber local period = P(".") local unicode = (P("uni") + P("UNI")) * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) -- base planes local ucode = (P("u") + P("U") ) * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) -- extended local index = P("index") * dec * Cc(false) local parser = unicode + ucode + index local parsers = { } local function makenameparser(str) if not str or str == "" then return parser else local p = parsers[str] if not p then p = P(str) * period * dec * Cc(false) parsers[str] = p end return p end end local f_single = formatters["%04X"] local f_double = formatters["%04X%04X"] -- 0.684 0.661 0,672 0.650 : cache at lua end (more mem) -- 0.682 0,672 0.698 0.657 : no cache (moderate mem i.e. lua strings) -- 0.644 0.647 0.655 0.645 : convert in c (less mem in theory) -- local tounicodes = table.setmetatableindex(function(t,unicode) -- local s -- if unicode < 0xD7FF or (unicode > 0xDFFF and unicode <= 0xFFFF) then -- s = f_single(unicode) -- else -- unicode = unicode - 0x10000 -- s = f_double(floor(unicode/1024)+0xD800,unicode%1024+0xDC00) -- end -- t[unicode] = s -- return s -- end) -- -- local function tounicode16(unicode,name) -- local s = tounicodes[unicode] -- if s then -- return s -- else -- report_fonts("can't convert %a in %a into tounicode",unicode,name) -- end -- end -- -- local function tounicode16sequence(unicodes,name) -- local t = { } -- for l=1,#unicodes do -- local u = unicodes[l] -- local s = tounicodes[u] -- if s then -- t[l] = s -- else -- report_fonts ("can't convert %a in %a into tounicode",u,name) -- return -- end -- end -- return concat(t) -- end -- -- local function tounicode(unicode,name) -- if type(unicode) == "table" then -- local t = { } -- for l=1,#unicode do -- local u = unicode[l] -- local s = tounicodes[u] -- if s then -- t[l] = s -- else -- report_fonts ("can't convert %a in %a into tounicode",u,name) -- return -- end -- end -- return concat(t) -- else -- local s = tounicodes[unicode] -- if s then -- return s -- else -- report_fonts("can't convert %a in %a into tounicode",unicode,name) -- end -- end -- end local function tounicode16(unicode,name) if unicode < 0xD7FF or (unicode > 0xDFFF and unicode <= 0xFFFF) then return f_single(unicode) else unicode = unicode - 0x10000 return f_double(floor(unicode/1024)+0xD800,unicode%1024+0xDC00) end end local function tounicode16sequence(unicodes,name) local t = { } for l=1,#unicodes do local u = unicodes[l] if u < 0xD7FF or (u > 0xDFFF and u <= 0xFFFF) then t[l] = f_single(u) else u = u - 0x10000 t[l] = f_double(floor(u/1024)+0xD800,u%1024+0xDC00) end end return concat(t) end local function tounicode(unicode,name) if type(unicode) == "table" then local t = { } for l=1,#unicode do local u = unicode[l] if u < 0xD7FF or (u > 0xDFFF and u <= 0xFFFF) then t[l] = f_single(u) else u = u - 0x10000 t[l] = f_double(floor(u/1024)+0xD800,u%1024+0xDC00) end end return concat(t) else if unicode < 0xD7FF or (unicode > 0xDFFF and unicode <= 0xFFFF) then return f_single(unicode) else unicode = unicode - 0x10000 return f_double(floor(unicode/1024)+0xD800,unicode%1024+0xDC00) end end end local function fromunicode16(str) if #str == 4 then return tonumber(str,16) else local l, r = match(str,"(....)(....)") -- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00 return 0x10000 + (tonumber(l,16)-0xD800)*0x400 + tonumber(r,16) - 0xDC00 end end -- Slightly slower: -- -- local p = C(4) * (C(4)^-1) / function(l,r) -- if r then -- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00 -- else -- return tonumber(l,16) -- end -- end -- -- local function fromunicode16(str) -- return lpegmatch(p,str) -- end mappings.makenameparser = makenameparser mappings.tounicode = tounicode mappings.tounicode16 = tounicode16 mappings.tounicode16sequence = tounicode16sequence mappings.fromunicode16 = fromunicode16 local ligseparator = P("_") local varseparator = P(".") local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator * C((1 - ligseparator - varseparator)^1))^0) -- maybe: ff fi fl ffi ffl => f_f f_i f_l f_f_i f_f_l -- local function test(name) -- local split = lpegmatch(namesplitter,name) -- print(string.formatters["%s: [% t]"](name,split)) -- end -- test("i.f_") -- test("this") -- test("this.that") -- test("japan1.123") -- test("such_so_more") -- test("such_so_more.that") -- to be completed .. for fonts that use unicodes for ligatures which -- is a actually a bad thing and should be avoided in the first place local overloads = allocate { IJ = { name = "I_J", unicode = { 0x49, 0x4A }, mess = 0x0132 }, ij = { name = "i_j", unicode = { 0x69, 0x6A }, mess = 0x0133 }, ff = { name = "f_f", unicode = { 0x66, 0x66 }, mess = 0xFB00 }, fi = { name = "f_i", unicode = { 0x66, 0x69 }, mess = 0xFB01 }, fl = { name = "f_l", unicode = { 0x66, 0x6C }, mess = 0xFB02 }, ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 }, ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 }, fj = { name = "f_j", unicode = { 0x66, 0x6A } }, fk = { name = "f_k", unicode = { 0x66, 0x6B } }, } for k, v in next, overloads do local name = v.name local mess = v.mess if name then overloads[name] = v end if mess then overloads[mess] = v end end mappings.overloads = overloads function mappings.addtounicode(data,filename,checklookups) local resources = data.resources local unicodes = resources.unicodes if not unicodes then return end local properties = data.properties local descriptions = data.descriptions -- we need to move this code unicodes['space'] = unicodes['space'] or 32 unicodes['hyphen'] = unicodes['hyphen'] or 45 unicodes['zwj'] = unicodes['zwj'] or 0x200D unicodes['zwnj'] = unicodes['zwnj'] or 0x200C -- local private = fonts.constructors and fonts.constructors.privateoffset or 0xF0000 -- 0x10FFFF local unicodevector = fonts.encodings.agl.unicodes or { } -- loaded runtime in context local contextvector = fonts.encodings.agl.ctxcodes or { } -- loaded runtime in context local missing = { } local nofmissing = 0 local oparser = nil local cidnames = nil local cidcodes = nil local cidinfo = properties.cidinfo local usedmap = cidinfo and fonts.cid.getmap(cidinfo) local uparser = makenameparser() -- hm, every time? if usedmap then oparser = usedmap and makenameparser(cidinfo.ordering) cidnames = usedmap.names cidcodes = usedmap.unicodes end local ns = 0 local nl = 0 -- for unic, glyph in next, descriptions do local name = glyph.name if name then local index = glyph.index local r = overloads[name] if r then -- get rid of weird ligatures -- glyph.name = r.name glyph.unicode = r.unicode elseif not unic or unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then local unicode = unicodevector[name] or contextvector[name] if unicode then glyph.unicode = unicode ns = ns + 1 end -- cidmap heuristics, beware, there is no guarantee for a match unless -- the chain resolves if (not unicode) and usedmap then local foundindex = lpegmatch(oparser,name) if foundindex then unicode = cidcodes[foundindex] -- name to number if unicode then glyph.unicode = unicode ns = ns + 1 else local reference = cidnames[foundindex] -- number to name if reference then local foundindex = lpegmatch(oparser,reference) if foundindex then unicode = cidcodes[foundindex] if unicode then glyph.unicode = unicode ns = ns + 1 end end if not unicode or unicode == "" then local foundcodes, multiple = lpegmatch(uparser,reference) if foundcodes then glyph.unicode = foundcodes if multiple then nl = nl + 1 unicode = true else ns = ns + 1 unicode = foundcodes end end end end end end end -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_ -- -- It is not trivial to find a solution that suits all fonts. We tried several alternatives -- and this one seems to work reasonable also with fonts that use less standardized naming -- schemes. The extra private test is tested by KE and seems to work okay with non-typical -- fonts as well. -- if not unicode or unicode == "" then local split = lpegmatch(namesplitter,name) local nsplit = split and #split or 0 -- add if if nsplit == 0 then -- skip elseif nsplit == 1 then local base = split[1] local u = unicodes[base] or unicodevector[base] or contextvector[name] if not u then -- skip elseif type(u) == "table" then -- unlikely if u[1] < private then unicode = u glyph.unicode = unicode end elseif u < private then unicode = u glyph.unicode = unicode end else local t, n = { }, 0 for l=1,nsplit do local base = split[l] local u = unicodes[base] or unicodevector[base] or contextvector[name] if not u then break elseif type(u) == "table" then if u[1] >= private then break end n = n + 1 t[n] = u[1] else if u >= private then break end n = n + 1 t[n] = u end end if n > 0 then if n == 1 then unicode = t[1] else unicode = t end glyph.unicode = unicode end end nl = nl + 1 end -- last resort (we might need to catch private here as well) if not unicode or unicode == "" then local foundcodes, multiple = lpegmatch(uparser,name) if foundcodes then glyph.unicode = foundcodes if multiple then nl = nl + 1 unicode = true else ns = ns + 1 unicode = foundcodes end end end -- check using substitutes and alternates local r = overloads[unicode] if r then unicode = r.unicode glyph.unicode = unicode end -- if not unicode then missing[unic] = true nofmissing = nofmissing + 1 end end else -- no name end end if type(checklookups) == "function" then checklookups(data,missing,nofmissing) end -- todo: go lowercase local unset = { } for unic, glyph in next, descriptions do if not glyph.unicode and glyph.class == "ligature" then unset[unic] = glyph end end if next(unset) then local sequences = resources.sequences local collected = { } for i=1,#sequences do local sequence = sequences[i] if sequence.type == "gsub_ligature" then local steps = sequence.steps if steps then local l = { } local function traverse(p,k,v) if k == "ligature" then collected[v] = { unpack(l) } else table.insert(l,k) for k, vv in next, v do traverse(p,k,vv) end table.remove(l) end end for i=1,#steps do -- we actually had/have this in base mode local coverage = steps[i].coverage if coverage then for k, v in next, coverage do traverse(k,k,v) end end end end end end if next(collected) then while true do local done = false for k, v in next, collected do for i=1,#v do local vi = v[i] if vi == k then collected[k] = nil unset[k] = nil else local c = collected[vi] if c then done = true local t = { } local n = i - 1 for j=1,n do t[j] = t[j] end for j=1,#c do n = n + 1 t[n] = c[j] end for j=i+1,#v do n = n + 1 t[n] = t[j] end collected[k] = t break end end end end if not done then break end end local n = 0 for k, v in next, unset do u = collected[k] if u then v.unicode = u n = n + 1 end end if trace_mapping and n > 0 then report_fonts("%n ligature tounicode mappings deduced from gsub ligature feaures",n) end end end if trace_mapping then for unic, glyph in table.sortedhash(descriptions) do local name = glyph.name local index = glyph.index local unicode = glyph.unicode if unicode then if type(unicode) == "table" then local unicodes = { } for i=1,#unicode do unicodes[i] = formatters("%U",unicode[i]) end report_fonts("internal slot %U, name %a, unicode %U, tounicode % t",index,name,unic,unicodes) else report_fonts("internal slot %U, name %a, unicode %U, tounicode %U",index,name,unic,unicode) end else report_fonts("internal slot %U, name %a, unicode %U",index,name,unic) end end end if trace_loading and (ns > 0 or nl > 0) then report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns) end end -- local parser = makenameparser("Japan1") -- local parser = makenameparser() -- local function test(str) -- local b, a = lpegmatch(parser,str) -- print((a and table.serialize(b)) or b) -- end -- test("a.sc") -- test("a") -- test("uni1234") -- test("uni1234.xx") -- test("uni12349876") -- test("u123400987600") -- test("index1234") -- test("Japan1.123")