diff options
Diffstat (limited to 'otfl-font-map.lua')
-rw-r--r-- | otfl-font-map.lua | 161 |
1 files changed, 97 insertions, 64 deletions
diff --git a/otfl-font-map.lua b/otfl-font-map.lua index 26b22b6..7f5305f 100644 --- a/otfl-font-map.lua +++ b/otfl-font-map.lua @@ -6,15 +6,18 @@ if not modules then modules = { } end modules ['font-map'] = { license = "see context related readme files" } -local utf = unicode.utf8 local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower -local lpegmatch = lpeg.match +local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match local utfbyte = utf.byte -local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end) -local trace_unimapping = false trackers.register("otf.unimapping", function(v) trace_unimapping = v end) +local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end) +local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end) -local report_otf = logs.reporter("fonts","otf loading") +local report_fonts = logs.reporter("fonts","loading") -- not otf only + +local fonts = fonts +local mappings = { } +fonts.mappings = mappings --[[ldx-- <p>Eventually this code will disappear because map files are kind @@ -22,23 +25,18 @@ of obsolete. Some code may move to runtime or auxiliary modules.</p> <p>The name to unciode related code will stay of course.</p> --ldx]]-- -local fonts = fonts -fonts.map = fonts.map or { } - local function loadlumtable(filename) -- will move to font goodies local lumname = file.replacesuffix(file.basename(filename),"lum") local lumfile = resolvers.findfile(lumname,"map") or "" if lumfile ~= "" and lfs.isfile(lumfile) then - if trace_loading or trace_unimapping then - report_otf("enhance: loading %s ",lumfile) + if trace_loading or trace_mapping then + report_fonts("enhance: loading %s ",lumfile) end lumunic = dofile(lumfile) return lumunic, lumfile end end -local P, R, S, C, Ct, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc - local hex = R("AF","09") local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end local hexsix = (hex^1) / function(s) return tonumber(s,16) end @@ -65,8 +63,8 @@ local function makenameparser(str) end end ---~ local parser = fonts.map.makenameparser("Japan1") ---~ local parser = fonts.map.makenameparser() +--~ local parser = mappings.makenameparser("Japan1") +--~ local parser = mappings.makenameparser() --~ local function test(str) --~ local b, a = lpegmatch(parser,str) --~ print((a and table.serialize(b)) or b) @@ -100,6 +98,15 @@ local function tounicode16sequence(unicodes) return concat(t) end +local function fromunicode16(str) + if #str == 4 then + return tonumber(str,16) + else + local l, r = match(str,"(....)(....)") + return (tonumber(l,16)- 0xD800)*0x400 + tonumber(r,16) - 0xDC00 + end +end + --~ This is quite a bit faster but at the cost of some memory but if we --~ do this we will also use it elsewhere so let's not follow this route --~ now. I might use this method in the plain variant (no caching there) @@ -107,7 +114,7 @@ end --~ --~ local cache = { } --~ ---~ function fonts.map.tounicode16(unicode) +--~ function mappings.tounicode16(unicode) --~ local s = cache[unicode] --~ if not s then --~ if unicode < 0x10000 then @@ -120,10 +127,11 @@ end --~ return s --~ end -fonts.map.loadlumtable = loadlumtable -fonts.map.makenameparser = makenameparser -fonts.map.tounicode16 = tounicode16 -fonts.map.tounicode16sequence = tounicode16sequence +mappings.loadlumtable = loadlumtable +mappings.makenameparser = makenameparser +mappings.tounicode16 = tounicode16 +mappings.tounicode16sequence = tounicode16sequence +mappings.fromunicode16 = fromunicode16 local separator = S("_.") local other = C((1 - separator)^1) @@ -135,8 +143,11 @@ local ligsplitter = Ct(other * (separator * other)^0) --~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more"))) --~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that"))) -fonts.map.addtounicode = function(data,filename) - local unicodes = data.luatex and data.luatex.unicodes +function mappings.addtounicode(data,filename) + local resources = data.resources + local properties = data.properties + local descriptions = data.descriptions + local unicodes = resources.unicodes if not unicodes then return end @@ -146,30 +157,39 @@ fonts.map.addtounicode = function(data,filename) unicodes['zwj'] = unicodes['zwj'] or 0x200D unicodes['zwnj'] = unicodes['zwnj'] or 0x200C -- the tounicode mapping is sparse and only needed for alternatives - local tounicode, originals, ns, nl, private, unknown = { }, { }, 0, 0, fonts.privateoffset, format("%04X",utfbyte("?")) - data.luatex.tounicode, data.luatex.originals = tounicode, originals + local private = fonts.constructors.privateoffset + local unknown = format("%04X",utfbyte("?")) + local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context + local tounicode = { } + local originals = { } + resources.tounicode = tounicode + resources.originals = originals local lumunic, uparser, oparser + local cidinfo, cidnames, cidcodes, usedmap if false then -- will become an option lumunic = loadlumtable(filename) lumunic = lumunic and lumunic.tounicode end - local cidinfo, cidnames, cidcodes = data.cidinfo - local usedmap = cidinfo and cidinfo.usedname - usedmap = usedmap and lower(usedmap) - usedmap = usedmap and fonts.cid.map[usedmap] + -- + cidinfo = properties.cidinfo + usedmap = cidinfo and fonts.cid.getmap(cidinfo) + -- if usedmap then - oparser = usedmap and makenameparser(cidinfo.ordering) + oparser = usedmap and makenameparser(cidinfo.ordering) cidnames = usedmap.names cidcodes = usedmap.unicodes end uparser = makenameparser() - local unicodevector = fonts.enc.agl.unicodes -- loaded runtime in context - for index, glyph in next, data.glyphs do - local name, unic = glyph.name, glyph.unicode or -1 -- play safe + local ns, nl = 0, 0 + for unic, glyph in next, descriptions do + local index = glyph.index + local name = glyph.name if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then - local unicode = (lumunic and lumunic[name]) or unicodevector[name] + local unicode = lumunic and lumunic[name] or unicodevector[name] if unicode then - originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1 + originals[index] = unicode + tounicode[index] = tounicode16(unicode) + ns = ns + 1 end -- cidmap heuristics, beware, there is no guarantee for a match unless -- the chain resolves @@ -178,7 +198,9 @@ fonts.map.addtounicode = function(data,filename) if foundindex then unicode = cidcodes[foundindex] -- name to number if unicode then - originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1 + originals[index] = unicode + tounicode[index] = tounicode16(unicode) + ns = ns + 1 else local reference = cidnames[foundindex] -- number to name if reference then @@ -186,16 +208,23 @@ fonts.map.addtounicode = function(data,filename) if foundindex then unicode = cidcodes[foundindex] if unicode then - originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1 + originals[index] = unicode + tounicode[index] = tounicode16(unicode) + ns = ns + 1 end end if not unicode then local foundcodes, multiple = lpegmatch(uparser,reference) if foundcodes then + originals[index] = foundcodes if multiple then - originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true + tounicode[index] = tounicode16sequence(foundcodes) + nl = nl + 1 + unicode = true else - originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes + tounicode[index] = tounicode16(foundcodes) + ns = ns + 1 + unicode = foundcodes end end end @@ -206,19 +235,8 @@ fonts.map.addtounicode = function(data,filename) -- a.whatever or a_b_c.whatever or a_b_c (no numbers) if not unicode then local split = lpegmatch(ligsplitter,name) - local nplit = (split and #split) or 0 - if nplit == 0 then - -- skip - elseif nplit == 1 then - local base = split[1] - unicode = unicodes[base] or unicodevector[base] - if unicode then - if type(unicode) == "table" then - unicode = unicode[1] - end - originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1 - end - else + local nplit = split and #split or 0 + if nplit >= 2 then local t, n = { }, 0 for l=1,nplit do local base = split[l] @@ -236,39 +254,54 @@ fonts.map.addtounicode = function(data,filename) if n == 0 then -- done then -- nothing elseif n == 1 then - originals[index], tounicode[index], nl, unicode = t[1], tounicode16(t[1]), nl + 1, true + originals[index] = t[1] + tounicode[index] = tounicode16(t[1]) else - originals[index], tounicode[index], nl, unicode = t, tounicode16sequence(t), nl + 1, true + originals[index] = t + tounicode[index] = tounicode16sequence(t) end + nl = nl + 1 + unicode = true + else + -- skip: already checked and we don't want privates here end end - -- last resort + -- last resort (we might need to catch private here as well) if not unicode then local foundcodes, multiple = lpegmatch(uparser,name) if foundcodes then if multiple then - originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true + originals[index] = foundcodes + tounicode[index] = tounicode16sequence(foundcodes) + nl = nl + 1 + unicode = true else - originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes + originals[index] = foundcodes + tounicode[index] = tounicode16(foundcodes) + ns = ns + 1 + unicode = foundcodes end end end - if not unicode then - originals[index], tounicode[index] = 0xFFFD, "FFFD" - end + -- if not unicode then + -- originals[index] = 0xFFFD + -- tounicode[index] = "FFFD" + -- end end end - if trace_unimapping then - for index, glyph in table.sortedhash(data.glyphs) do - local toun, name, unic = tounicode[index], glyph.name, glyph.unicode or -1 -- play safe + if trace_mapping then + for unic, glyph in table.sortedhash(descriptions) do + local name = glyph.name + local index = glyph.index + local toun = tounicode[index] if toun then - report_otf("internal: 0x%05X, name: %s, unicode: 0x%05X, tounicode: %s",index,name,unic,toun) + report_fonts("internal: 0x%05X, name: %s, unicode: U+%05X, tounicode: %s",index,name,unic,toun) else - report_otf("internal: 0x%05X, name: %s, unicode: 0x%05X",index,name,unic) + report_fonts("internal: 0x%05X, name: %s, unicode: U+%05X",index,name,unic) end end end if trace_loading and (ns > 0 or nl > 0) then - report_otf("enhance: %s tounicode entries added (%s ligatures)",nl+ns, ns) + report_fonts("enhance: %s tounicode entries added (%s ligatures)",nl+ns, ns) end end |