diff options
author | Philipp Gesang <phg42.2a@gmail.com> | 2014-12-14 12:29:58 +0100 |
---|---|---|
committer | Philipp Gesang <phg42.2a@gmail.com> | 2014-12-14 12:29:58 +0100 |
commit | c029158f450cd96f71db2f2a4da72a5c946089c0 (patch) | |
tree | 1c5d30d6853f6deb57cc9818b8739106a2d5e03f /src/fontloader/misc/fontloader-font-map.lua | |
parent | ea148be27d5a79fd11759856e0ab4033f328ce5a (diff) | |
parent | ca673f7b14af906606a188fd98978d3501842f63 (diff) | |
download | luaotfload-c029158f450cd96f71db2f2a4da72a5c946089c0.tar.gz |
Merge pull request #260 from phi-gamma/master
add import helper, new directory layout, revised letterspacing, fontloader gone nuts
Diffstat (limited to 'src/fontloader/misc/fontloader-font-map.lua')
-rw-r--r-- | src/fontloader/misc/fontloader-font-map.lua | 533 |
1 files changed, 533 insertions, 0 deletions
diff --git a/src/fontloader/misc/fontloader-font-map.lua b/src/fontloader/misc/fontloader-font-map.lua new file mode 100644 index 0000000..e26f28e --- /dev/null +++ b/src/fontloader/misc/fontloader-font-map.lua @@ -0,0 +1,533 @@ +if not modules then modules = { } end modules ['font-map'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +local tonumber, next, type = tonumber, next, type + +local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower +local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match +local utfbyte = utf.byte +local floor = math.floor +local formatters = string.formatters + +local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) +local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) + +local report_fonts = logs.reporter("fonts","loading") -- not otf only + +local fonts = fonts or { } +local mappings = fonts.mappings or { } +fonts.mappings = mappings + +--[[ldx-- +<p>Eventually this code will disappear because map files are kind +of obsolete. Some code may move to runtime or auxiliary modules.</p> +<p>The name to unciode related code will stay of course.</p> +--ldx]]-- + +local function loadlumtable(filename) -- will move to font goodies + local lumname = file.replacesuffix(file.basename(filename),"lum") + local lumfile = resolvers.findfile(lumname,"map") or "" + if lumfile ~= "" and lfs.isfile(lumfile) then + if trace_loading or trace_mapping then + report_fonts("loading map table %a",lumfile) + end + lumunic = dofile(lumfile) + return lumunic, lumfile + end +end + +local hex = R("AF","09") +local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end +local hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end +local dec = (R("09")^1) / tonumber +local period = P(".") +local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) +local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) +local index = P("index") * dec * Cc(false) + +local parser = unicode + ucode + index + +local parsers = { } + +local function makenameparser(str) + if not str or str == "" then + return parser + else + local p = parsers[str] + if not p then + p = P(str) * period * dec * Cc(false) + parsers[str] = p + end + return p + end +end + +local f_single = formatters["%04X"] +local f_double = formatters["%04X%04X"] + +local function tounicode16(unicode,name) + if unicode < 0x10000 then + return f_single(unicode) + elseif unicode < 0x1FFFFFFFFF then + return f_double(floor(unicode/1024),unicode%1024+0xDC00) + else + report_fonts("can't convert %a in %a into tounicode",unicode,name) + end +end + +local function tounicode16sequence(unicodes,name) + local t = { } + for l=1,#unicodes do + local u = unicodes[l] + if u < 0x10000 then + t[l] = f_single(u) + elseif unicode < 0x1FFFFFFFFF then + t[l] = f_double(floor(u/1024),u%1024+0xDC00) + else + report_fonts ("can't convert %a in %a into tounicode",u,name) + return + end + end + return concat(t) +end + +local function tounicode(unicode,name) + if type(unicode) == "table" then + local t = { } + for l=1,#unicode do + local u = unicode[l] + if u < 0x10000 then + t[l] = f_single(u) + elseif u < 0x1FFFFFFFFF then + t[l] = f_double(floor(u/1024),u%1024+0xDC00) + else + report_fonts ("can't convert %a in %a into tounicode",u,name) + return + end + end + return concat(t) + else + if unicode < 0x10000 then + return f_single(unicode) + elseif unicode < 0x1FFFFFFFFF then + return f_double(floor(unicode/1024),unicode%1024+0xDC00) + else + report_fonts("can't convert %a in %a into tounicode",unicode,name) + end + end +end + + +local function fromunicode16(str) + if #str == 4 then + return tonumber(str,16) + else + local l, r = match(str,"(....)(....)") + return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00 + end +end + +-- Slightly slower: +-- +-- local p = C(4) * (C(4)^-1) / function(l,r) +-- if r then +-- return (tonumber(l,16))*0x400 + tonumber(r,16) - 0xDC00 +-- else +-- return tonumber(l,16) +-- end +-- end +-- +-- local function fromunicode16(str) +-- return lpegmatch(p,str) +-- end + +-- This is quite a bit faster but at the cost of some memory but if we +-- do this we will also use it elsewhere so let's not follow this route +-- now. I might use this method in the plain variant (no caching there) +-- but then I need a flag that distinguishes between code branches. +-- +-- local cache = { } +-- +-- function mappings.tounicode16(unicode) +-- local s = cache[unicode] +-- if not s then +-- if unicode < 0x10000 then +-- s = format("%04X",unicode) +-- else +-- s = format("%04X%04X",unicode/0x400+0xD800,unicode%0x400+0xDC00) +-- end +-- cache[unicode] = s +-- end +-- return s +-- end + +mappings.loadlumtable = loadlumtable +mappings.makenameparser = makenameparser +mappings.tounicode = tounicode +mappings.tounicode16 = tounicode16 +mappings.tounicode16sequence = tounicode16sequence +mappings.fromunicode16 = fromunicode16 + +local ligseparator = P("_") +local varseparator = P(".") +local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator * C((1 - ligseparator - varseparator)^1))^0) + +-- local function test(name) +-- local split = lpegmatch(namesplitter,name) +-- print(string.formatters["%s: [% t]"](name,split)) +-- end + +-- maybe: ff fi fl ffi ffl => f_f f_i f_l f_f_i f_f_l + +-- test("i.f_") +-- test("this") +-- test("this.that") +-- test("japan1.123") +-- test("such_so_more") +-- test("such_so_more.that") + +-- to be completed .. for fonts that use unicodes for ligatures which +-- is a actually a bad thing and should be avoided in the first place + +local overloads = { + IJ = { name = "I_J", unicode = { 0x49, 0x4A }, mess = 0x0132 }, + ij = { name = "i_j", unicode = { 0x69, 0x6A }, mess = 0x0133 }, + ff = { name = "f_f", unicode = { 0x66, 0x66 }, mess = 0xFB00 }, + fi = { name = "f_i", unicode = { 0x66, 0x69 }, mess = 0xFB01 }, + fl = { name = "f_l", unicode = { 0x66, 0x6C }, mess = 0xFB02 }, + ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 }, + ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 }, + fj = { name = "f_j", unicode = { 0x66, 0x6A } }, + fk = { name = "f_k", unicode = { 0x66, 0x6B } }, +} + +for k, v in next, overloads do + local name = v.name + local mess = v.mess + if name then + overloads[name] = v + end + if mess then + overloads[mess] = v + end +end + +mappings.overloads = overloads + +function mappings.addtounicode(data,filename) + local resources = data.resources + local properties = data.properties + local descriptions = data.descriptions + local unicodes = resources.unicodes + local lookuptypes = resources.lookuptypes + if not unicodes then + return + end + -- we need to move this code + unicodes['space'] = unicodes['space'] or 32 + unicodes['hyphen'] = unicodes['hyphen'] or 45 + unicodes['zwj'] = unicodes['zwj'] or 0x200D + unicodes['zwnj'] = unicodes['zwnj'] or 0x200C + local private = fonts.constructors.privateoffset + local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context + ----- namevector = fonts.encodings.agl.names -- loaded runtime in context + local missing = { } + local lumunic, uparser, oparser + local cidinfo, cidnames, cidcodes, usedmap + -- + cidinfo = properties.cidinfo + usedmap = cidinfo and fonts.cid.getmap(cidinfo) + -- + if usedmap then + oparser = usedmap and makenameparser(cidinfo.ordering) + cidnames = usedmap.names + cidcodes = usedmap.unicodes + end + uparser = makenameparser() + local ns, nl = 0, 0 + for unic, glyph in next, descriptions do + local index = glyph.index + local name = glyph.name + local r = overloads[name] + if r then + -- get rid of weird ligatures + -- glyph.name = r.name + glyph.unicode = r.unicode + elseif unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then + local unicode = lumunic and lumunic[name] or unicodevector[name] + if unicode then + glyph.unicode = unicode + ns = ns + 1 + end + -- cidmap heuristics, beware, there is no guarantee for a match unless + -- the chain resolves + if (not unicode) and usedmap then + local foundindex = lpegmatch(oparser,name) + if foundindex then + unicode = cidcodes[foundindex] -- name to number + if unicode then + glyph.unicode = unicode + ns = ns + 1 + else + local reference = cidnames[foundindex] -- number to name + if reference then + local foundindex = lpegmatch(oparser,reference) + if foundindex then + unicode = cidcodes[foundindex] + if unicode then + glyph.unicode = unicode + ns = ns + 1 + end + end + if not unicode or unicode == "" then + local foundcodes, multiple = lpegmatch(uparser,reference) + if foundcodes then + glyph.unicode = foundcodes + if multiple then + nl = nl + 1 + unicode = true + else + ns = ns + 1 + unicode = foundcodes + end + end + end + end + end + end + end + -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_ + -- + -- It is not trivial to find a solution that suits all fonts. We tried several alternatives + -- and this one seems to work reasonable also with fonts that use less standardized naming + -- schemes. The extra private test is tested by KE and seems to work okay with non-typical + -- fonts as well. + -- + -- The next time I look into this, I'll add an extra analysis step to the otf loader (we can + -- resolve some tounicodes by looking into the gsub data tables that are bound to glyphs. + -- + if not unicode or unicode == "" then + local split = lpegmatch(namesplitter,name) + local nsplit = split and #split or 0 + local t, n = { }, 0 + unicode = true + for l=1,nsplit do + local base = split[l] + local u = unicodes[base] or unicodevector[base] + if not u then + break + elseif type(u) == "table" then + if u[1] >= private then + unicode = false + break + end + n = n + 1 + t[n] = u[1] + else + if u >= private then + unicode = false + break + end + n = n + 1 + t[n] = u + end + end + if n == 0 then -- done then + -- nothing + elseif n == 1 then + glyph.unicode = t[1] + else + glyph.unicode = t + end + nl = nl + 1 + end + -- last resort (we might need to catch private here as well) + if not unicode or unicode == "" then + local foundcodes, multiple = lpegmatch(uparser,name) + if foundcodes then + glyph.unicode = foundcodes + if multiple then + nl = nl + 1 + unicode = true + else + ns = ns + 1 + unicode = foundcodes + end + end + end + -- check using substitutes and alternates + local r = overloads[unicode] + if r then + unicode = r.unicode + glyph.unicode = unicode + end + -- + if not unicode then + missing[name] = true + end + end + end + if next(missing) then + local guess = { } + -- helper + local function check(gname,code,unicode) + local description = descriptions[code] + -- no need to add a self reference + local variant = description.name + if variant == gname then + return + end + -- the variant already has a unicode (normally that resultrs in a default tounicode to self) + local unic = unicodes[variant] + if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then + -- no default mapping and therefore maybe no tounicode yet + else + return + end + -- the variant already has a tounicode + if descriptions[code].unicode then + return + end + -- add to the list + local g = guess[variant] + -- local r = overloads[unicode] + -- if r then + -- unicode = r.unicode + -- end + if g then + g[gname] = unicode + else + guess[variant] = { [gname] = unicode } + end + end + -- + for unicode, description in next, descriptions do + local slookups = description.slookups + if slookups then + local gname = description.name + for tag, data in next, slookups do + local lookuptype = lookuptypes[tag] + if lookuptype == "alternate" then + for i=1,#data do + check(gname,data[i],unicode) + end + elseif lookuptype == "substitution" then + check(gname,data,unicode) + end + end + end + local mlookups = description.mlookups + if mlookups then + local gname = description.name + for tag, list in next, mlookups do + local lookuptype = lookuptypes[tag] + if lookuptype == "alternate" then + for i=1,#list do + local data = list[i] + for i=1,#data do + check(gname,data[i],unicode) + end + end + elseif lookuptype == "substitution" then + for i=1,#list do + check(gname,list[i],unicode) + end + end + end + end + end + -- resolve references + local done = true + while done do + done = false + for k, v in next, guess do + if type(v) ~= "number" then + for kk, vv in next, v do + if vv == -1 or vv >= private or (vv >= 0xE000 and vv <= 0xF8FF) or vv == 0xFFFE or vv == 0xFFFF then + local uu = guess[kk] + if type(uu) == "number" then + guess[k] = uu + done = true + end + else + guess[k] = vv + done = true + end + end + end + end + end + -- wrap up + local orphans = 0 + local guessed = 0 + for k, v in next, guess do + if type(v) == "number" then + descriptions[unicodes[k]].unicode = descriptions[v].unicode or v -- can also be a table + guessed = guessed + 1 + else + local t = nil + local l = lower(k) + local u = unicodes[l] + if not u then + orphans = orphans + 1 + elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then + local unicode = descriptions[u].unicode + if unicode then + descriptions[unicodes[k]].unicode = unicode + guessed = guessed + 1 + else + orphans = orphans + 1 + end + else + orphans = orphans + 1 + end + end + end + if trace_loading and orphans > 0 or guessed > 0 then + report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans) + end + end + if trace_mapping then + for unic, glyph in table.sortedhash(descriptions) do + local name = glyph.name + local index = glyph.index + local unicode = glyph.unicode + if unicode then + if type(unicode) == "table" then + local unicodes = { } + for i=1,#unicode do + unicodes[i] = formatters("%U",unicode[i]) + end + report_fonts("internal slot %U, name %a, unicode %U, tounicode % t",index,name,unic,unicodes) + else + report_fonts("internal slot %U, name %a, unicode %U, tounicode %U",index,name,unic,unicode) + end + else + report_fonts("internal slot %U, name %a, unicode %U",index,name,unic) + end + end + end + if trace_loading and (ns > 0 or nl > 0) then + report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns) + end +end + +-- local parser = makenameparser("Japan1") +-- local parser = makenameparser() +-- local function test(str) +-- local b, a = lpegmatch(parser,str) +-- print((a and table.serialize(b)) or b) +-- end +-- test("a.sc") +-- test("a") +-- test("uni1234") +-- test("uni1234.xx") +-- test("uni12349876") +-- test("u123400987600") +-- test("index1234") +-- test("Japan1.123") |