summaryrefslogtreecommitdiff
path: root/tex/context/base/font-map.lua
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/font-map.lua')
-rw-r--r--tex/context/base/font-map.lua473
1 files changed, 396 insertions, 77 deletions
diff --git a/tex/context/base/font-map.lua b/tex/context/base/font-map.lua
index 429c73597..309435e0d 100644
--- a/tex/context/base/font-map.lua
+++ b/tex/context/base/font-map.lua
@@ -79,18 +79,46 @@ end
local function tounicode16sequence(unicodes,name)
local t = { }
for l=1,#unicodes do
- local unicode = unicodes[l]
- if unicode < 0x10000 then
- t[l] = format("%04X",unicode)
+ local u = unicodes[l]
+ if u < 0x10000 then
+ t[l] = format("%04X",u)
elseif unicode < 0x1FFFFFFFFF then
- t[l] = format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+ t[l] = format("%04X%04X",floor(u/1024),u%1024+0xDC00)
else
- report_fonts ("can't convert %a in %a into tounicode",unicode,name)
+ report_fonts ("can't convert %a in %a into tounicode",u,name)
+ return
end
end
return concat(t)
end
+local function tounicode(unicode,name)
+ if type(unicode) == "table" then
+ local t = { }
+ for l=1,#unicode do
+ local u = unicode[l]
+ if u < 0x10000 then
+ t[l] = format("%04X",u)
+ elseif u < 0x1FFFFFFFFF then
+ t[l] = format("%04X%04X",floor(u/1024),u%1024+0xDC00)
+ else
+ report_fonts ("can't convert %a in %a into tounicode",u,name)
+ return
+ end
+ end
+ return concat(t)
+ else
+ if unicode < 0x10000 then
+ return format("%04X",unicode)
+ elseif unicode < 0x1FFFFFFFFF then
+ return format("%04X%04X",floor(unicode/1024),unicode%1024+0xDC00)
+ else
+ report_fonts("can't convert %a in %a into tounicode",unicode,name)
+ end
+ end
+end
+
+
local function fromunicode16(str)
if #str == 4 then
return tonumber(str,16)
@@ -136,6 +164,7 @@ end
mappings.loadlumtable = loadlumtable
mappings.makenameparser = makenameparser
+mappings.tounicode = tounicode
mappings.tounicode16 = tounicode16
mappings.tounicode16sequence = tounicode16sequence
mappings.fromunicode16 = fromunicode16
@@ -158,6 +187,322 @@ local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator *
-- test("such_so_more")
-- test("such_so_more.that")
+-- function mappings.addtounicode(data,filename)
+-- local resources = data.resources
+-- local properties = data.properties
+-- local descriptions = data.descriptions
+-- local unicodes = resources.unicodes
+-- local lookuptypes = resources.lookuptypes
+-- if not unicodes then
+-- return
+-- end
+-- -- we need to move this code
+-- unicodes['space'] = unicodes['space'] or 32
+-- unicodes['hyphen'] = unicodes['hyphen'] or 45
+-- unicodes['zwj'] = unicodes['zwj'] or 0x200D
+-- unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
+-- -- the tounicode mapping is sparse and only needed for alternatives
+-- local private = fonts.constructors.privateoffset
+-- local unknown = format("%04X",utfbyte("?"))
+-- local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
+-- ----- namevector = fonts.encodings.agl.names -- loaded runtime in context
+-- local tounicode = { }
+-- local originals = { }
+-- local missing = { }
+-- resources.tounicode = tounicode
+-- resources.originals = originals
+-- local lumunic, uparser, oparser
+-- local cidinfo, cidnames, cidcodes, usedmap
+-- -- if false then -- will become an option
+-- -- lumunic = loadlumtable(filename)
+-- -- lumunic = lumunic and lumunic.tounicode
+-- -- end
+-- --
+-- cidinfo = properties.cidinfo
+-- usedmap = cidinfo and fonts.cid.getmap(cidinfo)
+-- --
+-- if usedmap then
+-- oparser = usedmap and makenameparser(cidinfo.ordering)
+-- cidnames = usedmap.names
+-- cidcodes = usedmap.unicodes
+-- end
+-- uparser = makenameparser()
+-- local ns, nl = 0, 0
+-- for unic, glyph in next, descriptions do
+-- local index = glyph.index
+-- local name = glyph.name
+-- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+-- local unicode = lumunic and lumunic[name] or unicodevector[name]
+-- if unicode then
+-- originals[index] = unicode
+-- tounicode[index] = tounicode16(unicode,name)
+-- ns = ns + 1
+-- end
+-- -- cidmap heuristics, beware, there is no guarantee for a match unless
+-- -- the chain resolves
+-- if (not unicode) and usedmap then
+-- local foundindex = lpegmatch(oparser,name)
+-- if foundindex then
+-- unicode = cidcodes[foundindex] -- name to number
+-- if unicode then
+-- originals[index] = unicode
+-- tounicode[index] = tounicode16(unicode,name)
+-- ns = ns + 1
+-- else
+-- local reference = cidnames[foundindex] -- number to name
+-- if reference then
+-- local foundindex = lpegmatch(oparser,reference)
+-- if foundindex then
+-- unicode = cidcodes[foundindex]
+-- if unicode then
+-- originals[index] = unicode
+-- tounicode[index] = tounicode16(unicode,name)
+-- ns = ns + 1
+-- end
+-- end
+-- if not unicode or unicode == "" then
+-- local foundcodes, multiple = lpegmatch(uparser,reference)
+-- if foundcodes then
+-- originals[index] = foundcodes
+-- if multiple then
+-- tounicode[index] = tounicode16sequence(foundcodes)
+-- nl = nl + 1
+-- unicode = true
+-- else
+-- tounicode[index] = tounicode16(foundcodes,name)
+-- ns = ns + 1
+-- unicode = foundcodes
+-- end
+-- end
+-- end
+-- end
+-- end
+-- end
+-- end
+-- -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_
+-- --
+-- -- It is not trivial to find a solution that suits all fonts. We tried several alternatives
+-- -- and this one seems to work reasonable also with fonts that use less standardized naming
+-- -- schemes. The extra private test is tested by KE and seems to work okay with non-typical
+-- -- fonts as well.
+-- --
+-- -- The next time I look into this, I'll add an extra analysis step to the otf loader (we can
+-- -- resolve some tounicodes by looking into the gsub data tables that are bound to glyphs.
+-- --
+-- if not unicode or unicode == "" then
+-- local split = lpegmatch(namesplitter,name)
+-- local nsplit = split and #split or 0
+-- local t, n = { }, 0
+-- unicode = true
+-- for l=1,nsplit do
+-- local base = split[l]
+-- local u = unicodes[base] or unicodevector[base]
+-- if not u then
+-- break
+-- elseif type(u) == "table" then
+-- if u[1] >= private then
+-- unicode = false
+-- break
+-- end
+-- n = n + 1
+-- t[n] = u[1]
+-- else
+-- if u >= private then
+-- unicode = false
+-- break
+-- end
+-- n = n + 1
+-- t[n] = u
+-- end
+-- end
+-- if n == 0 then -- done then
+-- -- nothing
+-- elseif n == 1 then
+-- local unicode = t[1]
+-- originals[index] = unicode
+-- tounicode[index] = tounicode16(unicode,name)
+-- else
+-- originals[index] = t
+-- tounicode[index] = tounicode16sequence(t)
+-- end
+-- nl = nl + 1
+-- end
+-- -- last resort (we might need to catch private here as well)
+-- if not unicode or unicode == "" then
+-- local foundcodes, multiple = lpegmatch(uparser,name)
+-- if foundcodes then
+-- if multiple then
+-- originals[index] = foundcodes
+-- tounicode[index] = tounicode16sequence(foundcodes,name)
+-- nl = nl + 1
+-- unicode = true
+-- else
+-- originals[index] = foundcodes
+-- tounicode[index] = tounicode16(foundcodes,name)
+-- ns = ns + 1
+-- unicode = foundcodes
+-- end
+-- end
+-- end
+-- -- check using substitutes and alternates
+-- --
+-- if not unicode then
+-- missing[name] = true
+-- end
+-- -- if not unicode then
+-- -- originals[index] = 0xFFFD
+-- -- tounicode[index] = "FFFD"
+-- -- end
+-- end
+-- end
+-- if next(missing) then
+-- local guess = { }
+-- -- helper
+-- local function check(gname,code,unicode)
+-- local description = descriptions[code]
+-- -- no need to add a self reference
+-- local variant = description.name
+-- if variant == gname then
+-- return
+-- end
+-- -- the variant already has a unicode (normally that resultrs in a default tounicode to self)
+-- local unic = unicodes[variant]
+-- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+-- -- no default mapping and therefore maybe no tounicode yet
+-- else
+-- return
+-- end
+-- -- the variant already has a tounicode
+-- local index = descriptions[code].index
+-- if tounicode[index] then
+-- return
+-- end
+-- -- add to the list
+-- local g = guess[variant]
+-- if g then
+-- g[gname] = unicode
+-- else
+-- guess[variant] = { [gname] = unicode }
+-- end
+-- end
+-- --
+-- for unicode, description in next, descriptions do
+-- local slookups = description.slookups
+-- if slookups then
+-- local gname = description.name
+-- for tag, data in next, slookups do
+-- local lookuptype = lookuptypes[tag]
+-- if lookuptype == "alternate" then
+-- for i=1,#data do
+-- check(gname,data[i],unicode)
+-- end
+-- elseif lookuptype == "substitution" then
+-- check(gname,data,unicode)
+-- end
+-- end
+-- end
+-- local mlookups = description.mlookups
+-- if mlookups then
+-- local gname = description.name
+-- for tag, list in next, mlookups do
+-- local lookuptype = lookuptypes[tag]
+-- if lookuptype == "alternate" then
+-- for i=1,#list do
+-- local data = list[i]
+-- for i=1,#data do
+-- check(gname,data[i],unicode)
+-- end
+-- end
+-- elseif lookuptype == "substitution" then
+-- for i=1,#list do
+-- check(gname,list[i],unicode)
+-- end
+-- end
+-- end
+-- end
+-- end
+-- -- resolve references
+-- local done = true
+-- while done do
+-- done = false
+-- for k, v in next, guess do
+-- if type(v) ~= "number" then
+-- for kk, vv in next, v do
+-- if vv == -1 or vv >= private or (vv >= 0xE000 and vv <= 0xF8FF) or vv == 0xFFFE or vv == 0xFFFF then
+-- local uu = guess[kk]
+-- if type(uu) == "number" then
+-- guess[k] = uu
+-- done = true
+-- end
+-- else
+-- guess[k] = vv
+-- done = true
+-- end
+-- end
+-- end
+-- end
+-- end
+-- -- generate tounicodes
+-- for k, v in next, guess do
+-- if type(v) == "number" then
+-- guess[k] = tounicode16(v)
+-- else
+-- local t = nil
+-- local l = lower(k)
+-- local u = unicodes[l]
+-- if not u then
+-- -- forget about it
+-- elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then
+-- local du = descriptions[u]
+-- local index = du.index
+-- t = tounicode[index]
+-- if t then
+-- tounicode[index] = v
+-- originals[index] = unicode
+-- end
+-- else
+-- -- t = u
+-- end
+-- if t then
+-- guess[k] = t
+-- else
+-- guess[k] = "FFFD"
+-- end
+-- end
+-- end
+-- local orphans = 0
+-- local guessed = 0
+-- for k, v in next, guess do
+-- if v == "FFFD" then
+-- orphans = orphans + 1
+-- guess[k] = false
+-- else
+-- guessed = guessed + 1
+-- guess[k] = true
+-- end
+-- end
+-- -- resources.nounicode = guess -- only when we test things
+-- if trace_loading and orphans > 0 or guessed > 0 then
+-- report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans)
+-- end
+-- end
+-- if trace_mapping then
+-- for unic, glyph in table.sortedhash(descriptions) do
+-- local name = glyph.name
+-- local index = glyph.index
+-- local toun = tounicode[index]
+-- if toun then
+-- report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun)
+-- else
+-- report_fonts("internal slot %U, name %a, unicode %U",index,name,unic)
+-- end
+-- end
+-- end
+-- if trace_loading and (ns > 0 or nl > 0) then
+-- report_fonts("%s tounicode entries added, ligatures %s",nl+ns,ns)
+-- end
+-- end
+
function mappings.addtounicode(data,filename)
local resources = data.resources
local properties = data.properties
@@ -172,22 +517,13 @@ function mappings.addtounicode(data,filename)
unicodes['hyphen'] = unicodes['hyphen'] or 45
unicodes['zwj'] = unicodes['zwj'] or 0x200D
unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
- -- the tounicode mapping is sparse and only needed for alternatives
local private = fonts.constructors.privateoffset
local unknown = format("%04X",utfbyte("?"))
local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
----- namevector = fonts.encodings.agl.names -- loaded runtime in context
- local tounicode = { }
- local originals = { }
local missing = { }
- resources.tounicode = tounicode
- resources.originals = originals
local lumunic, uparser, oparser
local cidinfo, cidnames, cidcodes, usedmap
- if false then -- will become an option
- lumunic = loadlumtable(filename)
- lumunic = lumunic and lumunic.tounicode
- end
--
cidinfo = properties.cidinfo
usedmap = cidinfo and fonts.cid.getmap(cidinfo)
@@ -205,9 +541,8 @@ function mappings.addtounicode(data,filename)
if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
local unicode = lumunic and lumunic[name] or unicodevector[name]
if unicode then
- originals[index] = unicode
- tounicode[index] = tounicode16(unicode,name)
- ns = ns + 1
+ glyph.unicode = unicode
+ ns = ns + 1
end
-- cidmap heuristics, beware, there is no guarantee for a match unless
-- the chain resolves
@@ -216,9 +551,8 @@ function mappings.addtounicode(data,filename)
if foundindex then
unicode = cidcodes[foundindex] -- name to number
if unicode then
- originals[index] = unicode
- tounicode[index] = tounicode16(unicode,name)
- ns = ns + 1
+ glyph.unicode = unicode
+ ns = ns + 1
else
local reference = cidnames[foundindex] -- number to name
if reference then
@@ -226,23 +560,20 @@ function mappings.addtounicode(data,filename)
if foundindex then
unicode = cidcodes[foundindex]
if unicode then
- originals[index] = unicode
- tounicode[index] = tounicode16(unicode,name)
- ns = ns + 1
+ glyph.unicode = unicode
+ ns = ns + 1
end
end
if not unicode or unicode == "" then
local foundcodes, multiple = lpegmatch(uparser,reference)
if foundcodes then
- originals[index] = foundcodes
+ glyph.unicode = foundcodes
if multiple then
- tounicode[index] = tounicode16sequence(foundcodes)
- nl = nl + 1
- unicode = true
+ nl = nl + 1
+ unicode = true
else
- tounicode[index] = tounicode16(foundcodes,name)
- ns = ns + 1
- unicode = foundcodes
+ ns = ns + 1
+ unicode = foundcodes
end
end
end
@@ -289,11 +620,9 @@ function mappings.addtounicode(data,filename)
if n == 0 then -- done then
-- nothing
elseif n == 1 then
- originals[index] = t[1]
- tounicode[index] = tounicode16(t[1],name)
+ glyph.unicode = t[1]
else
- originals[index] = t
- tounicode[index] = tounicode16sequence(t)
+ glyph.unicode = t
end
nl = nl + 1
end
@@ -301,16 +630,13 @@ function mappings.addtounicode(data,filename)
if not unicode or unicode == "" then
local foundcodes, multiple = lpegmatch(uparser,name)
if foundcodes then
+ glyph.unicode = foundcodes
if multiple then
- originals[index] = foundcodes
- tounicode[index] = tounicode16sequence(foundcodes,name)
- nl = nl + 1
- unicode = true
+ nl = nl + 1
+ unicode = true
else
- originals[index] = foundcodes
- tounicode[index] = tounicode16(foundcodes,name)
- ns = ns + 1
- unicode = foundcodes
+ ns = ns + 1
+ unicode = foundcodes
end
end
end
@@ -319,14 +645,9 @@ function mappings.addtounicode(data,filename)
if not unicode then
missing[name] = true
end
- -- if not unicode then
- -- originals[index] = 0xFFFD
- -- tounicode[index] = "FFFD"
- -- end
end
end
if next(missing) then
--- inspect(missing)
local guess = { }
-- helper
local function check(gname,code,unicode)
@@ -344,8 +665,7 @@ function mappings.addtounicode(data,filename)
return
end
-- the variant already has a tounicode
- local index = descriptions[code].index
- if tounicode[index] then
+ if descriptions[code].unicode then
return
end
-- add to the list
@@ -413,52 +733,51 @@ function mappings.addtounicode(data,filename)
end
end
end
- -- generate tounicodes
+ -- wrap up
+ local orphans = 0
+ local guessed = 0
for k, v in next, guess do
if type(v) == "number" then
- guess[k] = tounicode16(v)
+ descriptions[unicodes[k]].unicode = descriptions[v].unicode or v -- can also be a table
+ guessed = guessed + 1
else
local t = nil
local l = lower(k)
local u = unicodes[l]
if not u then
- -- forget about it
+ orphans = orphans + 1
elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then
- t = tounicode[descriptions[u].index]
- else
- -- t = u
- end
- if t then
- guess[k] = t
+ local unicode = descriptions[u].unicode
+ if unicode then
+ descriptions[unicodes[k]].unicode = unicode
+ guessed = guessed + 1
+ else
+ orphans = orphans + 1
+ end
else
- guess[k] = "FFFD"
+ orphans = orphans + 1
end
end
end
- local orphans = 0
- local guessed = 0
- for k, v in next, guess do
- tounicode[descriptions[unicodes[k]].index] = v
- if v == "FFFD" then
- orphans = orphans + 1
- guess[k] = false
- else
- guessed = guessed + 1
- guess[k] = true
- end
- end
- -- resources.nounicode = guess -- only when we test things
if trace_loading and orphans > 0 or guessed > 0 then
report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans)
end
end
if trace_mapping then
for unic, glyph in table.sortedhash(descriptions) do
- local name = glyph.name
- local index = glyph.index
- local toun = tounicode[index]
- if toun then
- report_fonts("internal slot %U, name %a, unicode %U, tounicode %a",index,name,unic,toun)
+ local name = glyph.name
+ local index = glyph.index
+ local unicode = glyph.unicode
+ if unicode then
+ if type(unicode) == "table" then
+ local unicodes = { }
+ for i=1,#unicode do
+ unicodes[i] = formatters("%U",unicode[i])
+ end
+ report_fonts("internal slot %U, name %a, unicode %U, tounicode % t",index,name,unic,unicodes)
+ else
+ report_fonts("internal slot %U, name %a, unicode %U, tounicode %U",index,name,unic,unicode)
+ end
else
report_fonts("internal slot %U, name %a, unicode %U",index,name,unic)
end