if not modules then modules = { } end modules ['font-map'] = { version = 1.001, comment = "companion to font-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match local utfbyte = utf.byte local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) local report_fonts = logs.reporter("fonts","loading") -- not otf only local fonts = fonts local mappings = { } fonts.mappings = mappings --[[ldx--
Eventually this code will disappear because map files are kind of obsolete. Some code may move to runtime or auxiliary modules.
The name to unciode related code will stay of course.
--ldx]]-- local function loadlumtable(filename) -- will move to font goodies local lumname = file.replacesuffix(file.basename(filename),"lum") local lumfile = resolvers.findfile(lumname,"map") or "" if lumfile ~= "" and lfs.isfile(lumfile) then if trace_loading or trace_mapping then report_fonts("enhance: loading %s ",lumfile) end lumunic = dofile(lumfile) return lumunic, lumfile end end local hex = R("AF","09") local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end local hexsix = (hex^1) / function(s) return tonumber(s,16) end local dec = (R("09")^1) / tonumber local period = P(".") local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) local index = P("index") * dec * Cc(false) local parser = unicode + ucode + index local parsers = { } local function makenameparser(str) if not str or str == "" then return parser else local p = parsers[str] if not p then p = P(str) * period * dec * Cc(false) parsers[str] = p end return p end end --~ local parser = mappings.makenameparser("Japan1") --~ local parser = mappings.makenameparser() --~ local function test(str) --~ local b, a = lpegmatch(parser,str) --~ print((a and table.serialize(b)) or b) --~ end --~ test("a.sc") --~ test("a") --~ test("uni1234") --~ test("uni1234.xx") --~ test("uni12349876") --~ test("index1234") --~ test("Japan1.123") local function tounicode16(unicode) if unicode < 0x10000 then return format("%04X",unicode) else return format("%04X%04X",unicode/1024+0xD800,unicode%1024+0xDC00) end end local function tounicode16sequence(unicodes) local t = { } for l=1,#unicodes do local unicode = unicodes[l] if unicode < 0x10000 then t[l] = format("%04X",unicode) else t[l] = format("%04X%04X",unicode/1024+0xD800,unicode%1024+0xDC00) end end return concat(t) end local function fromunicode16(str) if #str == 4 then return tonumber(str,16) else local l, r = match(str,"(....)(....)") return (tonumber(l,16)- 0xD800)*0x400 + tonumber(r,16) - 0xDC00 end end --~ This is quite a bit faster but at the cost of some memory but if we --~ do this we will also use it elsewhere so let's not follow this route --~ now. I might use this method in the plain variant (no caching there) --~ but then I need a flag that distinguishes between code branches. --~ --~ local cache = { } --~ --~ function mappings.tounicode16(unicode) --~ local s = cache[unicode] --~ if not s then --~ if unicode < 0x10000 then --~ s = format("%04X",unicode) --~ else --~ s = format("%04X%04X",unicode/1024+0xD800,unicode%1024+0xDC00) --~ end --~ cache[unicode] = s --~ end --~ return s --~ end mappings.loadlumtable = loadlumtable mappings.makenameparser = makenameparser mappings.tounicode16 = tounicode16 mappings.tounicode16sequence = tounicode16sequence mappings.fromunicode16 = fromunicode16 local separator = S("_.") local other = C((1 - separator)^1) local ligsplitter = Ct(other * (separator * other)^0) --~ print(table.serialize(lpegmatch(ligsplitter,"this"))) --~ print(table.serialize(lpegmatch(ligsplitter,"this.that"))) --~ print(table.serialize(lpegmatch(ligsplitter,"japan1.123"))) --~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more"))) --~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that"))) function mappings.addtounicode(data,filename) local resources = data.resources local properties = data.properties local descriptions = data.descriptions local unicodes = resources.unicodes if not unicodes then return end -- we need to move this code unicodes['space'] = unicodes['space'] or 32 unicodes['hyphen'] = unicodes['hyphen'] or 45 unicodes['zwj'] = unicodes['zwj'] or 0x200D unicodes['zwnj'] = unicodes['zwnj'] or 0x200C -- the tounicode mapping is sparse and only needed for alternatives local private = fonts.constructors.privateoffset local unknown = format("%04X",utfbyte("?")) local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context local tounicode = { } local originals = { } resources.tounicode = tounicode resources.originals = originals local lumunic, uparser, oparser local cidinfo, cidnames, cidcodes, usedmap if false then -- will become an option lumunic = loadlumtable(filename) lumunic = lumunic and lumunic.tounicode end -- cidinfo = properties.cidinfo usedmap = cidinfo and fonts.cid.getmap(cidinfo) -- if usedmap then oparser = usedmap and makenameparser(cidinfo.ordering) cidnames = usedmap.names cidcodes = usedmap.unicodes end uparser = makenameparser() local ns, nl = 0, 0 for unic, glyph in next, descriptions do local index = glyph.index local name = glyph.name if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then local unicode = lumunic and lumunic[name] or unicodevector[name] if unicode then originals[index] = unicode tounicode[index] = tounicode16(unicode) ns = ns + 1 end -- cidmap heuristics, beware, there is no guarantee for a match unless -- the chain resolves if (not unicode) and usedmap then local foundindex = lpegmatch(oparser,name) if foundindex then unicode = cidcodes[foundindex] -- name to number if unicode then originals[index] = unicode tounicode[index] = tounicode16(unicode) ns = ns + 1 else local reference = cidnames[foundindex] -- number to name if reference then local foundindex = lpegmatch(oparser,reference) if foundindex then unicode = cidcodes[foundindex] if unicode then originals[index] = unicode tounicode[index] = tounicode16(unicode) ns = ns + 1 end end if not unicode then local foundcodes, multiple = lpegmatch(uparser,reference) if foundcodes then originals[index] = foundcodes if multiple then tounicode[index] = tounicode16sequence(foundcodes) nl = nl + 1 unicode = true else tounicode[index] = tounicode16(foundcodes) ns = ns + 1 unicode = foundcodes end end end end end end end -- a.whatever or a_b_c.whatever or a_b_c (no numbers) if not unicode then local split = lpegmatch(ligsplitter,name) local nplit = split and #split or 0 if nplit >= 2 then local t, n = { }, 0 for l=1,nplit do local base = split[l] local u = unicodes[base] or unicodevector[base] if not u then break elseif type(u) == "table" then n = n + 1 t[n] = u[1] else n = n + 1 t[n] = u end end if n == 0 then -- done then -- nothing elseif n == 1 then originals[index] = t[1] tounicode[index] = tounicode16(t[1]) else originals[index] = t tounicode[index] = tounicode16sequence(t) end nl = nl + 1 unicode = true else -- skip: already checked and we don't want privates here end end -- last resort (we might need to catch private here as well) if not unicode then local foundcodes, multiple = lpegmatch(uparser,name) if foundcodes then if multiple then originals[index] = foundcodes tounicode[index] = tounicode16sequence(foundcodes) nl = nl + 1 unicode = true else originals[index] = foundcodes tounicode[index] = tounicode16(foundcodes) ns = ns + 1 unicode = foundcodes end end end -- if not unicode then -- originals[index] = 0xFFFD -- tounicode[index] = "FFFD" -- end end end if trace_mapping then for unic, glyph in table.sortedhash(descriptions) do local name = glyph.name local index = glyph.index local toun = tounicode[index] if toun then report_fonts("internal: 0x%05X, name: %s, unicode: U+%05X, tounicode: %s",index,name,unic,toun) else report_fonts("internal: 0x%05X, name: %s, unicode: U+%05X",index,name,unic) end end end if trace_loading and (ns > 0 or nl > 0) then report_fonts("enhance: %s tounicode entries added (%s ligatures)",nl+ns, ns) end end