diff options
Diffstat (limited to 'otfl-font-map.lua')
-rw-r--r-- | otfl-font-map.lua | 161 |
1 files changed, 155 insertions, 6 deletions
diff --git a/otfl-font-map.lua b/otfl-font-map.lua index 9e85516..9dd2bd0 100644 --- a/otfl-font-map.lua +++ b/otfl-font-map.lua @@ -6,10 +6,13 @@ if not modules then modules = { } end modules ['font-map'] = { license = "see context related readme files" } -local match, format, find, concat, gsub = string.match, string.format, string.find, table.concat, string.gsub +local utf = unicode.utf8 +local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower local lpegmatch = lpeg.match +local utfbyte = utf.byte -local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end) +local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end) +local trace_unimapping = false trackers.register("otf.unimapping", function(v) trace_unimapping = v end) local ctxcatcodes = tex and tex.ctxcatcodes @@ -128,7 +131,7 @@ function fonts.map.load_file(filename, entries, encodings) return entries, encodings end -function fonts.map.load_lum_table(filename) +local function load_lum_table(filename) local lumname = file.replacesuffix(file.basename(filename),"lum") local lumfile = resolvers.find_file(lumname,"map") or "" if lumfile ~= "" and lfs.isfile(lumfile) then @@ -154,7 +157,7 @@ local parser = unicode + ucode + index local parsers = { } -function fonts.map.make_name_parser(str) +local function make_name_parser(str) if not str or str == "" then return parser else @@ -181,7 +184,7 @@ end --~ test("index1234") --~ test("Japan1.123") -function fonts.map.tounicode16(unicode) +local function tounicode16(unicode) if unicode < 0x10000 then return format("%04X",unicode) else @@ -189,7 +192,7 @@ function fonts.map.tounicode16(unicode) end end -function fonts.map.tounicode16sequence(unicodes) +local function tounicode16sequence(unicodes) local t = { } for l=1,#unicodes do local unicode = unicodes[l] @@ -222,3 +225,149 @@ end --~ return s --~ end +fonts.map.load_lum_table = load_lum_table +fonts.map.make_name_parser = make_name_parser +fonts.map.tounicode16 = tounicode16 +fonts.map.tounicode16sequence = tounicode16sequence + +local separator = lpeg.S("_.") +local other = lpeg.C((1 - separator)^1) +local ligsplitter = lpeg.Ct(other * (separator * other)^0) + +--~ print(table.serialize(lpegmatch(ligsplitter,"this"))) +--~ print(table.serialize(lpegmatch(ligsplitter,"this.that"))) +--~ print(table.serialize(lpegmatch(ligsplitter,"japan1.123"))) +--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more"))) +--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that"))) + +fonts.map.add_to_unicode = function(data,filename) + local unicodes = data.luatex and data.luatex.unicodes + if not unicodes then + return + end + -- we need to move this code + unicodes['space'] = unicodes['space'] or 32 + unicodes['hyphen'] = unicodes['hyphen'] or 45 + unicodes['zwj'] = unicodes['zwj'] or 0x200D + unicodes['zwnj'] = unicodes['zwnj'] or 0x200C + -- the tounicode mapping is sparse and only needed for alternatives + local tounicode, originals, ns, nl, private, unknown = { }, { }, 0, 0, fonts.private, format("%04X",utfbyte("?")) + data.luatex.tounicode, data.luatex.originals = tounicode, originals + local lumunic, uparser, oparser + if false then -- will become an option + lumunic = load_lum_table(filename) + lumunic = lumunic and lumunic.tounicode + end + local cidinfo, cidnames, cidcodes = data.cidinfo + local usedmap = cidinfo and cidinfo.usedname + usedmap = usedmap and lower(usedmap) + usedmap = usedmap and fonts.cid.map[usedmap] + if usedmap then + oparser = usedmap and make_name_parser(cidinfo.ordering) + cidnames = usedmap.names + cidcodes = usedmap.unicodes + end + uparser = make_name_parser() + local aglmap = fonts.map and fonts.map.agl_to_unicode + for index, glyph in next, data.glyphs do + local name, unic = glyph.name, glyph.unicode or -1 -- play safe + if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then + local unicode = (lumunic and lumunic[name]) or (aglmap and aglmap[name]) + if unicode then + originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1 + end + -- cidmap heuristics, beware, there is no guarantee for a match unless + -- the chain resolves + if (not unicode) and usedmap then + local foundindex = lpegmatch(oparser,name) + if foundindex then + unicode = cidcodes[foundindex] -- name to number + if unicode then + originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1 + else + local reference = cidnames[foundindex] -- number to name + if reference then + local foundindex = lpegmatch(oparser,reference) + if foundindex then + unicode = cidcodes[foundindex] + if unicode then + originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1 + end + end + if not unicode then + local foundcodes, multiple = lpegmatch(uparser,reference) + if foundcodes then + if multiple then + originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true + else + originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes + end + end + end + end + end + end + end + -- a.whatever or a_b_c.whatever or a_b_c (no numbers) + if not unicode then + local split = lpegmatch(ligsplitter,name) + local nplit = (split and #split) or 0 + if nplit == 0 then + -- skip + elseif nplit == 1 then + local base = split[1] + unicode = unicodes[base] or (aglmap and aglmap[base]) + if unicode then + if type(unicode) == "table" then + unicode = unicode[1] + end + originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1 + end + else + local t = { } + for l=1,nplit do + local base = split[l] + local u = unicodes[base] or (aglmap and aglmap[base]) + if not u then + break + elseif type(u) == "table" then + t[#t+1] = u[1] + else + t[#t+1] = u + end + end + if #t > 0 then -- done then + originals[index], tounicode[index], nl, unicode = t, tounicode16sequence(t), nl + 1, true + end + end + end + -- last resort + if not unicode then + local foundcodes, multiple = lpegmatch(uparser,name) + if foundcodes then + if multiple then + originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true + else + originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes + end + end + end + if not unicode then + originals[index], tounicode[index] = 0xFFFD, "FFFD" + end + end + end + if trace_unimapping then + for index, glyph in table.sortedpairs(data.glyphs) do + local toun, name, unic = tounicode[index], glyph.name, glyph.unicode or -1 -- play safe + if toun then + logs.report("load otf","internal: 0x%05X, name: %s, unicode: 0x%05X, tounicode: %s",index,name,unic,toun) + else + logs.report("load otf","internal: 0x%05X, name: %s, unicode: 0x%05X",index,name,unic) + end + end + end + if trace_loading and (ns > 0 or nl > 0) then + logs.report("load otf","enhance: %s tounicode entries added (%s ligatures)",nl+ns, ns) + end +end |