summaryrefslogtreecommitdiff
path: root/tex/context/base/font-map.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/font-map.lua')
-rw-r--r--tex/context/base/font-map.lua391
1 files changed, 61 insertions, 330 deletions
diff --git a/tex/context/base/font-map.lua b/tex/context/base/font-map.lua
index 309435e0d..890e47d3f 100644
--- a/tex/context/base/font-map.lua
+++ b/tex/context/base/font-map.lua
@@ -6,12 +6,13 @@ if not modules then modules = { } end modules ['font-map'] = {
license = "see context related readme files"
}
-local tonumber = tonumber
+local tonumber, next, type = tonumber, next, type
local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match
local utfbyte = utf.byte
local floor = math.floor
+local formatters = string.formatters
local trace_loading = false trackers.register("fonts.loading", function(v) trace_loading = v end)
local trace_mapping = false trackers.register("fonts.mapping", function(v) trace_unimapping = v end)
@@ -66,11 +67,14 @@ local function makenameparser(str)
end
end
+local f_single = formatters["%04X"]
+local f_double = formatters["%04X%04X"]
+
local function tounicode16(unicode,name)
if unicode < 0x10000 then
- return format("%04X",unicode)
+ return f_single(unicode)
elseif unicode < 0x1FFFFFFFFF then
- return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+ return f_double(floor(unicode/1024),unicode%1024+0xDC00)
else
report_fonts("can't convert %a in %a into tounicode",unicode,name)
end
@@ -81,9 +85,9 @@ local function tounicode16sequence(unicodes,name)
for l=1,#unicodes do
local u = unicodes[l]
if u < 0x10000 then
- t[l] = format("%04X",u)
+ t[l] = f_single(u)
elseif unicode < 0x1FFFFFFFFF then
- t[l] = format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+ t[l] = f_double(floor(u/1024),u%1024+0xDC00)
else
report_fonts ("can't convert %a in %a into tounicode",u,name)
return
@@ -98,9 +102,9 @@ local function tounicode(unicode,name)
for l=1,#unicode do
local u = unicode[l]
if u < 0x10000 then
- t[l] = format("%04X",u)
+ t[l] = f_single(u)
elseif u < 0x1FFFFFFFFF then
- t[l] = format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+ t[l] = f_double(floor(u/1024),u%1024+0xDC00)
else
report_fonts ("can't convert %a in %a into tounicode",u,name)
return
@@ -109,9 +113,9 @@ local function tounicode(unicode,name)
return concat(t)
else
if unicode < 0x10000 then
- return format("%04X",unicode)
+ return f_single(unicode)
elseif unicode < 0x1FFFFFFFFF then
- return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+ return f_double(floor(unicode/1024),unicode%1024+0xDC00)
else
report_fonts("can't convert %a in %a into tounicode",unicode,name)
end
@@ -187,321 +191,35 @@ local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator *
-- test("such_so_more")
-- test("such_so_more.that")
--- function mappings.addtounicode(data,filename)
--- local resources = data.resources
--- local properties = data.properties
--- local descriptions = data.descriptions
--- local unicodes = resources.unicodes
--- local lookuptypes = resources.lookuptypes
--- if not unicodes then
--- return
--- end
--- -- we need to move this code
--- unicodes['space'] = unicodes['space'] or 32
--- unicodes['hyphen'] = unicodes['hyphen'] or 45
--- unicodes['zwj'] = unicodes['zwj'] or 0x200D
--- unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
--- -- the tounicode mapping is sparse and only needed for alternatives
--- local private = fonts.constructors.privateoffset
--- local unknown = format("%04X",utfbyte("?"))
--- local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
--- ----- namevector = fonts.encodings.agl.names -- loaded runtime in context
--- local tounicode = { }
--- local originals = { }
--- local missing = { }
--- resources.tounicode = tounicode
--- resources.originals = originals
--- local lumunic, uparser, oparser
--- local cidinfo, cidnames, cidcodes, usedmap
--- -- if false then -- will become an option
--- -- lumunic = loadlumtable(filename)
--- -- lumunic = lumunic and lumunic.tounicode
--- -- end
--- --
--- cidinfo = properties.cidinfo
--- usedmap = cidinfo and fonts.cid.getmap(cidinfo)
--- --
--- if usedmap then
--- oparser = usedmap and makenameparser(cidinfo.ordering)
--- cidnames = usedmap.names
--- cidcodes = usedmap.unicodes
--- end
--- uparser = makenameparser()
--- local ns, nl = 0, 0
--- for unic, glyph in next, descriptions do
--- local index = glyph.index
--- local name = glyph.name
--- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
--- local unicode = lumunic and lumunic[name] or unicodevector[name]
--- if unicode then
--- originals[index] = unicode
--- tounicode[index] = tounicode16(unicode,name)
--- ns = ns + 1
--- end
--- -- cidmap heuristics, beware, there is no guarantee for a match unless
--- -- the chain resolves
--- if (not unicode) and usedmap then
--- local foundindex = lpegmatch(oparser,name)
--- if foundindex then
--- unicode = cidcodes[foundindex] -- name to number
--- if unicode then
--- originals[index] = unicode
--- tounicode[index] = tounicode16(unicode,name)
--- ns = ns + 1
--- else
--- local reference = cidnames[foundindex] -- number to name
--- if reference then
--- local foundindex = lpegmatch(oparser,reference)
--- if foundindex then
--- unicode = cidcodes[foundindex]
--- if unicode then
--- originals[index] = unicode
--- tounicode[index] = tounicode16(unicode,name)
--- ns = ns + 1
--- end
--- end
--- if not unicode or unicode == "" then
--- local foundcodes, multiple = lpegmatch(uparser,reference)
--- if foundcodes then
--- originals[index] = foundcodes
--- if multiple then
--- tounicode[index] = tounicode16sequence(foundcodes)
--- nl = nl + 1
--- unicode = true
--- else
--- tounicode[index] = tounicode16(foundcodes,name)
--- ns = ns + 1
--- unicode = foundcodes
--- end
--- end
--- end
--- end
--- end
--- end
--- end
--- -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_
--- --
--- -- It is not trivial to find a solution that suits all fonts. We tried several alternatives
--- -- and this one seems to work reasonable also with fonts that use less standardized naming
--- -- schemes. The extra private test is tested by KE and seems to work okay with non-typical
--- -- fonts as well.
--- --
--- -- The next time I look into this, I'll add an extra analysis step to the otf loader (we can
--- -- resolve some tounicodes by looking into the gsub data tables that are bound to glyphs.
--- --
--- if not unicode or unicode == "" then
--- local split = lpegmatch(namesplitter,name)
--- local nsplit = split and #split or 0
--- local t, n = { }, 0
--- unicode = true
--- for l=1,nsplit do
--- local base = split[l]
--- local u = unicodes[base] or unicodevector[base]
--- if not u then
--- break
--- elseif type(u) == "table" then
--- if u[1] >= private then
--- unicode = false
--- break
--- end
--- n = n + 1
--- t[n] = u[1]
--- else
--- if u >= private then
--- unicode = false
--- break
--- end
--- n = n + 1
--- t[n] = u
--- end
--- end
--- if n == 0 then -- done then
--- -- nothing
--- elseif n == 1 then
--- local unicode = t[1]
--- originals[index] = unicode
--- tounicode[index] = tounicode16(unicode,name)
--- else
--- originals[index] = t
--- tounicode[index] = tounicode16sequence(t)
--- end
--- nl = nl + 1
--- end
--- -- last resort (we might need to catch private here as well)
--- if not unicode or unicode == "" then
--- local foundcodes, multiple = lpegmatch(uparser,name)
--- if foundcodes then
--- if multiple then
--- originals[index] = foundcodes
--- tounicode[index] = tounicode16sequence(foundcodes,name)
--- nl = nl + 1
--- unicode = true
--- else
--- originals[index] = foundcodes
--- tounicode[index] = tounicode16(foundcodes,name)
--- ns = ns + 1
--- unicode = foundcodes
--- end
--- end
--- end
--- -- check using substitutes and alternates
--- --
--- if not unicode then
--- missing[name] = true
--- end
--- -- if not unicode then
--- -- originals[index] = 0xFFFD
--- -- tounicode[index] = "FFFD"
--- -- end
--- end
--- end
--- if next(missing) then
--- local guess = { }
--- -- helper
--- local function check(gname,code,unicode)
--- local description = descriptions[code]
--- -- no need to add a self reference
--- local variant = description.name
--- if variant == gname then
--- return
--- end
--- -- the variant already has a unicode (normally that resultrs in a default tounicode to self)
--- local unic = unicodes[variant]
--- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
--- -- no default mapping and therefore maybe no tounicode yet
--- else
--- return
--- end
--- -- the variant already has a tounicode
--- local index = descriptions[code].index
--- if tounicode[index] then
--- return
--- end
--- -- add to the list
--- local g = guess[variant]
--- if g then
--- g[gname] = unicode
--- else
--- guess[variant] = { [gname] = unicode }
--- end
--- end
--- --
--- for unicode, description in next, descriptions do
--- local slookups = description.slookups
--- if slookups then
--- local gname = description.name
--- for tag, data in next, slookups do
--- local lookuptype = lookuptypes[tag]
--- if lookuptype == "alternate" then
--- for i=1,#data do
--- check(gname,data[i],unicode)
--- end
--- elseif lookuptype == "substitution" then
--- check(gname,data,unicode)
--- end
--- end
--- end
--- local mlookups = description.mlookups
--- if mlookups then
--- local gname = description.name
--- for tag, list in next, mlookups do
--- local lookuptype = lookuptypes[tag]
--- if lookuptype == "alternate" then
--- for i=1,#list do
--- local data = list[i]
--- for i=1,#data do
--- check(gname,data[i],unicode)
--- end
--- end
--- elseif lookuptype == "substitution" then
--- for i=1,#list do
--- check(gname,list[i],unicode)
--- end
--- end
--- end
--- end
--- end
--- -- resolve references
--- local done = true
--- while done do
--- done = false
--- for k, v in next, guess do
--- if type(v) ~= "number" then
--- for kk, vv in next, v do
--- if vv == -1 or vv >= private or (vv >= 0xE000 and vv <= 0xF8FF) or vv == 0xFFFE or vv == 0xFFFF then
--- local uu = guess[kk]
--- if type(uu) == "number" then
--- guess[k] = uu
--- done = true
--- end
--- else
--- guess[k] = vv
--- done = true
--- end
--- end
--- end
--- end
--- end
--- -- generate tounicodes
--- for k, v in next, guess do
--- if type(v) == "number" then
--- guess[k] = tounicode16(v)
--- else
--- local t = nil
--- local l = lower(k)
--- local u = unicodes[l]
--- if not u then
--- -- forget about it
--- elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then
--- local du = descriptions[u]
--- local index = du.index
--- t = tounicode[index]
--- if t then
--- tounicode[index] = v
--- originals[index] = unicode
--- end
--- else
--- -- t = u
--- end
--- if t then
--- guess[k] = t
--- else
--- guess[k] = "FFFD"
--- end
--- end
--- end
--- local orphans = 0
--- local guessed = 0
--- for k, v in next, guess do
--- if v == "FFFD" then
--- orphans = orphans + 1
--- guess[k] = false
--- else
--- guessed = guessed + 1
--- guess[k] = true
--- end
--- end
--- -- resources.nounicode = guess -- only when we test things
--- if trace_loading and orphans > 0 or guessed > 0 then
--- report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans)
--- end
--- end
--- if trace_mapping then
--- for unic, glyph in table.sortedhash(descriptions) do
--- local name = glyph.name
--- local index = glyph.index
--- local toun = tounicode[index]
--- if toun then
--- report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun)
--- else
--- report_fonts("internal slot %U, name %a, unicode %U",index,name,unic)
--- end
--- end
--- end
--- if trace_loading and (ns > 0 or nl > 0) then
--- report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns)
--- end
--- end
+-- to be completed .. for fonts that use unicodes for ligatures which
+-- is a actually a bad thing and should be avoided in the first place
+
+local overloads = {
+ IJ = { name = "I_J", unicode = { 0x49, 0x4A }, mess = 0x0132 },
+ ij = { name = "i_j", unicode = { 0x69, 0x6A }, mess = 0x0133 },
+ ff = { name = "f_f", unicode = { 0x66, 0x66 }, mess = 0xFB00 },
+ fi = { name = "f_i", unicode = { 0x66, 0x69 }, mess = 0xFB01 },
+ fl = { name = "f_l", unicode = { 0x66, 0x6C }, mess = 0xFB02 },
+ ffi = { name = "f_f_i", unicode = { 0x66, 0x66, 0x69 }, mess = 0xFB03 },
+ ffl = { name = "f_f_l", unicode = { 0x66, 0x66, 0x6C }, mess = 0xFB04 },
+ fj = { name = "f_j", unicode = { 0x66, 0x6A } },
+ fk = { name = "f_k", unicode = { 0x66, 0x6B } },
+}
+
+require("char-ini")
+
+for k, v in next, overloads do
+ local name = v.name
+ local mess = v.mess
+ if name then
+ overloads[name] = v
+ end
+ if mess then
+ overloads[mess] = v
+ end
+end
+
+mappings.overloads = overloads
function mappings.addtounicode(data,filename)
local resources = data.resources
@@ -513,12 +231,11 @@ function mappings.addtounicode(data,filename)
return
end
-- we need to move this code
- unicodes['space'] = unicodes['space'] or 32
- unicodes['hyphen'] = unicodes['hyphen'] or 45
- unicodes['zwj'] = unicodes['zwj'] or 0x200D
- unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
+ unicodes['space'] = unicodes['space'] or 32
+ unicodes['hyphen'] = unicodes['hyphen'] or 45
+ unicodes['zwj'] = unicodes['zwj'] or 0x200D
+ unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
local private = fonts.constructors.privateoffset
- local unknown = format("%04X",utfbyte("?"))
local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
----- namevector = fonts.encodings.agl.names -- loaded runtime in context
local missing = { }
@@ -538,7 +255,12 @@ function mappings.addtounicode(data,filename)
for unic, glyph in next, descriptions do
local index = glyph.index
local name = glyph.name
- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+ local r = overloads[name]
+ if r then
+ -- get rid of weird ligatures
+ -- glyph.name = r.name
+ glyph.unicode = r.unicode
+ elseif unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
local unicode = lumunic and lumunic[name] or unicodevector[name]
if unicode then
glyph.unicode = unicode
@@ -641,6 +363,11 @@ function mappings.addtounicode(data,filename)
end
end
-- check using substitutes and alternates
+ local r = overloads[unicode]
+ if r then
+ unicode = r.unicode
+ glyph.unicode = unicode
+ end
--
if not unicode then
missing[name] = true
@@ -670,6 +397,10 @@ function mappings.addtounicode(data,filename)
end
-- add to the list
local g = guess[variant]
+ -- local r = overloads[unicode]
+ -- if r then
+ -- unicode = r.unicode
+ -- end
if g then
g[gname] = unicode
else