summaryrefslogtreecommitdiff
path: root/src/fontloader/misc/fontloader-font-map.lua
diff options
context:
space:
mode:
Diffstat (limited to 'src/fontloader/misc/fontloader-font-map.lua')
-rw-r--r--src/fontloader/misc/fontloader-font-map.lua473
1 files changed, 163 insertions, 310 deletions
diff --git a/src/fontloader/misc/fontloader-font-map.lua b/src/fontloader/misc/fontloader-font-map.lua
index 69474ba..b645d9a 100644
--- a/src/fontloader/misc/fontloader-font-map.lua
+++ b/src/fontloader/misc/fontloader-font-map.lua
@@ -31,25 +31,27 @@ of obsolete. Some code may move to runtime or auxiliary modules.</p>
<p>The name to unciode related code will stay of course.</p>
--ldx]]--
-local function loadlumtable(filename) -- will move to font goodies
- local lumname = file.replacesuffix(file.basename(filename),"lum")
- local lumfile = resolvers.findfile(lumname,"map") or ""
- if lumfile ~= "" and lfs.isfile(lumfile) then
- if trace_loading or trace_mapping then
- report_fonts("loading map table %a",lumfile)
- end
- lumunic = dofile(lumfile)
- return lumunic, lumfile
- end
-end
+-- local function loadlumtable(filename) -- will move to font goodies
+-- local lumname = file.replacesuffix(file.basename(filename),"lum")
+-- local lumfile = resolvers.findfile(lumname,"map") or ""
+-- if lumfile ~= "" and lfs.isfile(lumfile) then
+-- if trace_loading or trace_mapping then
+-- report_fonts("loading map table %a",lumfile)
+-- end
+-- lumunic = dofile(lumfile)
+-- return lumunic, lumfile
+-- end
+-- end
local hex = R("AF","09")
-local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end
-local hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end
+----- hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end
+----- hexsix = (hex*hex*hex*hex*hex*hex) / function(s) return tonumber(s,16) end
+local hexfour = (hex*hex*hex^-2) / function(s) return tonumber(s,16) end
+local hexsix = (hex*hex*hex^-4) / function(s) return tonumber(s,16) end
local dec = (R("09")^1) / tonumber
local period = P(".")
-local unicode = P("uni") * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true))
-local ucode = P("u") * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true))
+local unicode = (P("uni") + P("UNI")) * (hexfour * (period + P(-1)) * Cc(false) + Ct(hexfour^1) * Cc(true)) -- base planes
+local ucode = (P("u") + P("U") ) * (hexsix * (period + P(-1)) * Cc(false) + Ct(hexsix ^1) * Cc(true)) -- extended
local index = P("index") * dec * Cc(false)
local parser = unicode + ucode + index
@@ -168,7 +170,6 @@ end
-- return s
-- end
-mappings.loadlumtable = loadlumtable
mappings.makenameparser = makenameparser
mappings.tounicode = tounicode
mappings.tounicode16 = tounicode16
@@ -179,13 +180,13 @@ local ligseparator = P("_")
local varseparator = P(".")
local namesplitter = Ct(C((1 - ligseparator - varseparator)^1) * (ligseparator * C((1 - ligseparator - varseparator)^1))^0)
+-- maybe: ff fi fl ffi ffl => f_f f_i f_l f_f_i f_f_l
+
-- local function test(name)
-- local split = lpegmatch(namesplitter,name)
-- print(string.formatters["%s: [% t]"](name,split))
-- end
--- maybe: ff fi fl ffi ffl => f_f f_i f_l f_f_i f_f_l
-
-- test("i.f_")
-- test("this")
-- test("this.that")
@@ -221,332 +222,184 @@ end
mappings.overloads = overloads
-function mappings.addtounicode(data,filename)
- local resources = data.resources
- local properties = data.properties
- local descriptions = data.descriptions
- local unicodes = resources.unicodes
- local lookuptypes = resources.lookuptypes
+function mappings.addtounicode(data,filename,checklookups)
+ local resources = data.resources
+ local unicodes = resources.unicodes
if not unicodes then
return
end
+ local properties = data.properties
+ local descriptions = data.descriptions
-- we need to move this code
unicodes['space'] = unicodes['space'] or 32
unicodes['hyphen'] = unicodes['hyphen'] or 45
unicodes['zwj'] = unicodes['zwj'] or 0x200D
unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
- local private = fonts.constructors.privateoffset
- local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
- ----- namevector = fonts.encodings.agl.names -- loaded runtime in context
- local missing = { }
- local lumunic, uparser, oparser
- local cidinfo, cidnames, cidcodes, usedmap
- --
- cidinfo = properties.cidinfo
- usedmap = cidinfo and fonts.cid.getmap(cidinfo)
--
+ local private = fonts.constructors and fonts.constructors.privateoffset or 0xF0000 -- 0x10FFFF
+ local unicodevector = fonts.encodings.agl.unicodes or { } -- loaded runtime in context
+ local contextvector = fonts.encodings.agl.ctxcodes or { } -- loaded runtime in context
+ local missing = { }
+ local nofmissing = 0
+ local oparser = nil
+ local cidnames = nil
+ local cidcodes = nil
+ local cidinfo = properties.cidinfo
+ local usedmap = cidinfo and fonts.cid.getmap(cidinfo)
+ local uparser = makenameparser() -- hm, every time?
if usedmap then
- oparser = usedmap and makenameparser(cidinfo.ordering)
- cidnames = usedmap.names
- cidcodes = usedmap.unicodes
+ oparser = usedmap and makenameparser(cidinfo.ordering)
+ cidnames = usedmap.names
+ cidcodes = usedmap.unicodes
end
- uparser = makenameparser()
- local ns, nl = 0, 0
+ local ns = 0
+ local nl = 0
+ --
for unic, glyph in next, descriptions do
- local index = glyph.index
- local name = glyph.name
- local r = overloads[name]
- if r then
- -- get rid of weird ligatures
- -- glyph.name = r.name
- glyph.unicode = r.unicode
- elseif unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
- local unicode = lumunic and lumunic[name] or unicodevector[name]
- if unicode then
- glyph.unicode = unicode
- ns = ns + 1
- end
- -- cidmap heuristics, beware, there is no guarantee for a match unless
- -- the chain resolves
- if (not unicode) and usedmap then
- local foundindex = lpegmatch(oparser,name)
- if foundindex then
- unicode = cidcodes[foundindex] -- name to number
- if unicode then
- glyph.unicode = unicode
- ns = ns + 1
- else
- local reference = cidnames[foundindex] -- number to name
- if reference then
- local foundindex = lpegmatch(oparser,reference)
- if foundindex then
- unicode = cidcodes[foundindex]
- if unicode then
- glyph.unicode = unicode
- ns = ns + 1
+ local name = glyph.name
+ if name then
+ local index = glyph.index
+ local r = overloads[name]
+ if r then
+ -- get rid of weird ligatures
+ -- glyph.name = r.name
+ glyph.unicode = r.unicode
+ elseif not unic or unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+ local unicode = unicodevector[name] or contextvector[name]
+ if unicode then
+ glyph.unicode = unicode
+ ns = ns + 1
+ end
+ -- cidmap heuristics, beware, there is no guarantee for a match unless
+ -- the chain resolves
+ if (not unicode) and usedmap then
+ local foundindex = lpegmatch(oparser,name)
+ if foundindex then
+ unicode = cidcodes[foundindex] -- name to number
+ if unicode then
+ glyph.unicode = unicode
+ ns = ns + 1
+ else
+ local reference = cidnames[foundindex] -- number to name
+ if reference then
+ local foundindex = lpegmatch(oparser,reference)
+ if foundindex then
+ unicode = cidcodes[foundindex]
+ if unicode then
+ glyph.unicode = unicode
+ ns = ns + 1
+ end
end
- end
- if not unicode or unicode == "" then
- local foundcodes, multiple = lpegmatch(uparser,reference)
- if foundcodes then
- glyph.unicode = foundcodes
- if multiple then
- nl = nl + 1
- unicode = true
- else
- ns = ns + 1
- unicode = foundcodes
+ if not unicode or unicode == "" then
+ local foundcodes, multiple = lpegmatch(uparser,reference)
+ if foundcodes then
+ glyph.unicode = foundcodes
+ if multiple then
+ nl = nl + 1
+ unicode = true
+ else
+ ns = ns + 1
+ unicode = foundcodes
+ end
end
end
end
end
end
end
- end
- -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_
- --
- -- It is not trivial to find a solution that suits all fonts. We tried several alternatives
- -- and this one seems to work reasonable also with fonts that use less standardized naming
- -- schemes. The extra private test is tested by KE and seems to work okay with non-typical
- -- fonts as well.
- --
- -- The next time I look into this, I'll add an extra analysis step to the otf loader (we can
- -- resolve some tounicodes by looking into the gsub data tables that are bound to glyphs.
- --
--- a real tricky last resort:
---
--- local lookups = glyph.lookups
--- if lookups then
--- for _, lookup in next, lookups do -- assume consistency else we need to sort
--- for i=1,#lookup do
--- local l = lookup[i]
--- if l.type == "ligature" then
--- local s = l.specification
--- if s.char == glyph.name then
--- local components = s.components
--- if components then
--- local t, n = { }, 0
--- unicode = true
--- for l=1,#components do
--- local base = components[l]
--- local u = unicodes[base] or unicodevector[base]
--- if not u then
--- break
--- elseif type(u) == "table" then
--- if u[1] >= private then
--- unicode = false
--- break
--- end
--- n = n + 1
--- t[n] = u[1]
--- else
--- if u >= private then
--- unicode = false
--- break
--- end
--- n = n + 1
--- t[n] = u
--- end
--- end
--- if n == 0 then -- done then
--- -- nothing
--- elseif n == 1 then
--- glyph.unicode = t[1]
--- else
--- glyph.unicode = t
--- end
--- nl = nl + 1
--- break
--- end
--- end
--- end
--- end
--- if unicode then
--- break
--- end
--- end
--- end
- if not unicode or unicode == "" then
- local split = lpegmatch(namesplitter,name)
- local nsplit = split and #split or 0
- local t, n = { }, 0
- unicode = true
- for l=1,nsplit do
- local base = split[l]
- local u = unicodes[base] or unicodevector[base]
- if not u then
- break
- elseif type(u) == "table" then
- if u[1] >= private then
- unicode = false
- break
+ -- a.whatever or a_b_c.whatever or a_b_c (no numbers) a.b_
+ --
+ -- It is not trivial to find a solution that suits all fonts. We tried several alternatives
+ -- and this one seems to work reasonable also with fonts that use less standardized naming
+ -- schemes. The extra private test is tested by KE and seems to work okay with non-typical
+ -- fonts as well.
+ --
+ if not unicode or unicode == "" then
+ local split = lpegmatch(namesplitter,name)
+ local nsplit = split and #split or 0 -- add if
+ if nsplit == 0 then
+ -- skip
+ elseif nsplit == 1 then
+ local base = split[1]
+ local u = unicodes[base] or unicodevector[base] or contextvector[name]
+ if not u then
+ -- skip
+ elseif type(u) == "table" then
+ -- unlikely
+ if u[1] < private then
+ unicode = u
+ glyph.unicode = unicode
+ end
+ elseif u < private then
+ unicode = u
+ glyph.unicode = unicode
end
- n = n + 1
- t[n] = u[1]
else
- if u >= private then
- unicode = false
- break
- end
- n = n + 1
- t[n] = u
- end
- end
- if n == 0 then -- done then
- -- nothing
- elseif n == 1 then
- glyph.unicode = t[1]
- else
- glyph.unicode = t
- end
- nl = nl + 1
- end
- -- last resort (we might need to catch private here as well)
- if not unicode or unicode == "" then
- local foundcodes, multiple = lpegmatch(uparser,name)
- if foundcodes then
- glyph.unicode = foundcodes
- if multiple then
- nl = nl + 1
- unicode = true
- else
- ns = ns + 1
- unicode = foundcodes
- end
- end
- end
- -- check using substitutes and alternates
- local r = overloads[unicode]
- if r then
- unicode = r.unicode
- glyph.unicode = unicode
- end
- --
- if not unicode then
- missing[name] = true
- end
- end
- end
- if next(missing) then
- local guess = { }
- -- helper
- local function check(gname,code,unicode)
- local description = descriptions[code]
- -- no need to add a self reference
- local variant = description.name
- if variant == gname then
- return
- end
- -- the variant already has a unicode (normally that resultrs in a default tounicode to self)
- local unic = unicodes[variant]
- if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
- -- no default mapping and therefore maybe no tounicode yet
- else
- return
- end
- -- the variant already has a tounicode
- if descriptions[code].unicode then
- return
- end
- -- add to the list
- local g = guess[variant]
- -- local r = overloads[unicode]
- -- if r then
- -- unicode = r.unicode
- -- end
- if g then
- g[gname] = unicode
- else
- guess[variant] = { [gname] = unicode }
- end
- end
- --
- for unicode, description in next, descriptions do
- local slookups = description.slookups
- if slookups then
- local gname = description.name
- for tag, data in next, slookups do
- local lookuptype = lookuptypes[tag]
- if lookuptype == "alternate" then
- for i=1,#data do
- check(gname,data[i],unicode)
- end
- elseif lookuptype == "substitution" then
- check(gname,data,unicode)
- end
- end
- end
- local mlookups = description.mlookups
- if mlookups then
- local gname = description.name
- for tag, list in next, mlookups do
- local lookuptype = lookuptypes[tag]
- if lookuptype == "alternate" then
- for i=1,#list do
- local data = list[i]
- for i=1,#data do
- check(gname,data[i],unicode)
+ local t, n = { }, 0
+ for l=1,nsplit do
+ local base = split[l]
+ local u = unicodes[base] or unicodevector[base] or contextvector[name]
+ if not u then
+ break
+ elseif type(u) == "table" then
+ if u[1] >= private then
+ break
+ end
+ n = n + 1
+ t[n] = u[1]
+ else
+ if u >= private then
+ break
+ end
+ n = n + 1
+ t[n] = u
end
end
- elseif lookuptype == "substitution" then
- for i=1,#list do
- check(gname,list[i],unicode)
+ if n > 0 then
+ if n == 1 then
+ unicode = t[1]
+ else
+ unicode = t
+ end
+ glyph.unicode = unicode
end
end
+ nl = nl + 1
end
- end
- end
- -- resolve references
- local done = true
- while done do
- done = false
- for k, v in next, guess do
- if type(v) ~= "number" then
- for kk, vv in next, v do
- if vv == -1 or vv >= private or (vv >= 0xE000 and vv <= 0xF8FF) or vv == 0xFFFE or vv == 0xFFFF then
- local uu = guess[kk]
- if type(uu) == "number" then
- guess[k] = uu
- done = true
- end
+ -- last resort (we might need to catch private here as well)
+ if not unicode or unicode == "" then
+ local foundcodes, multiple = lpegmatch(uparser,name)
+ if foundcodes then
+ glyph.unicode = foundcodes
+ if multiple then
+ nl = nl + 1
+ unicode = true
else
- guess[k] = vv
- done = true
+ ns = ns + 1
+ unicode = foundcodes
end
end
end
- end
- end
- -- wrap up
- local orphans = 0
- local guessed = 0
- for k, v in next, guess do
- if type(v) == "number" then
- descriptions[unicodes[k]].unicode = descriptions[v].unicode or v -- can also be a table
- guessed = guessed + 1
- else
- local t = nil
- local l = lower(k)
- local u = unicodes[l]
- if not u then
- orphans = orphans + 1
- elseif u == -1 or u >= private or (u >= 0xE000 and u <= 0xF8FF) or u == 0xFFFE or u == 0xFFFF then
- local unicode = descriptions[u].unicode
- if unicode then
- descriptions[unicodes[k]].unicode = unicode
- guessed = guessed + 1
- else
- orphans = orphans + 1
- end
- else
- orphans = orphans + 1
+ -- check using substitutes and alternates
+ local r = overloads[unicode]
+ if r then
+ unicode = r.unicode
+ glyph.unicode = unicode
+ end
+ --
+ if not unicode then
+ missing[unic] = true
+ nofmissing = nofmissing + 1
end
end
+ else
+ -- no name
end
- if trace_loading and orphans > 0 or guessed > 0 then
- report_fonts("%s glyphs with no related unicode, %s guessed, %s orphans",guessed+orphans,guessed,orphans)
- end
end
+ if type(checklookups) == "function" then
+ checklookups(data,missing,nofmissing)
+ end
+ -- todo: go lowercase
if trace_mapping then
for unic, glyph in table.sortedhash(descriptions) do
local name = glyph.name