summaryrefslogtreecommitdiff
path: root/otfl-font-map.lua
diff options
context:
space:
mode:
authorKhaled Hosny <khaledhosny@eglug.org>2010-02-22 14:26:34 +0200
committerKhaled Hosny <khaledhosny@eglug.org>2010-02-23 01:44:03 +0200
commite064aa2dc70fb819cfdb6b3c0bdf1d081859584f (patch)
treeb429356162be25b812b6e75e85cccae61f893ad4 /otfl-font-map.lua
parente0931232fb05665da0e809cb3d7be0d3d6c8e820 (diff)
downloadluaotfload-e064aa2dc70fb819cfdb6b3c0bdf1d081859584f.tar.gz
Updating to latest ConTeXt beta (2010.02.23)
Now many of our fixes are incorporated upstream or fixed in a different way.
Diffstat (limited to 'otfl-font-map.lua')
-rw-r--r--otfl-font-map.lua161
1 files changed, 155 insertions, 6 deletions
diff --git a/otfl-font-map.lua b/otfl-font-map.lua
index 9e85516..9dd2bd0 100644
--- a/otfl-font-map.lua
+++ b/otfl-font-map.lua
@@ -6,10 +6,13 @@ if not modules then modules = { } end modules ['font-map'] = {
license = "see context related readme files"
}
-local match, format, find, concat, gsub = string.match, string.format, string.find, table.concat, string.gsub
+local utf = unicode.utf8
+local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
local lpegmatch = lpeg.match
+local utfbyte = utf.byte
-local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end)
+local trace_loading = false trackers.register("otf.loading", function(v) trace_loading = v end)
+local trace_unimapping = false trackers.register("otf.unimapping", function(v) trace_unimapping = v end)
local ctxcatcodes = tex and tex.ctxcatcodes
@@ -128,7 +131,7 @@ function fonts.map.load_file(filename, entries, encodings)
return entries, encodings
end
-function fonts.map.load_lum_table(filename)
+local function load_lum_table(filename)
local lumname = file.replacesuffix(file.basename(filename),"lum")
local lumfile = resolvers.find_file(lumname,"map") or ""
if lumfile ~= "" and lfs.isfile(lumfile) then
@@ -154,7 +157,7 @@ local parser = unicode + ucode + index
local parsers = { }
-function fonts.map.make_name_parser(str)
+local function make_name_parser(str)
if not str or str == "" then
return parser
else
@@ -181,7 +184,7 @@ end
--~ test("index1234")
--~ test("Japan1.123")
-function fonts.map.tounicode16(unicode)
+local function tounicode16(unicode)
if unicode < 0x10000 then
return format("%04X",unicode)
else
@@ -189,7 +192,7 @@ function fonts.map.tounicode16(unicode)
end
end
-function fonts.map.tounicode16sequence(unicodes)
+local function tounicode16sequence(unicodes)
local t = { }
for l=1,#unicodes do
local unicode = unicodes[l]
@@ -222,3 +225,149 @@ end
--~ return s
--~ end
+fonts.map.load_lum_table = load_lum_table
+fonts.map.make_name_parser = make_name_parser
+fonts.map.tounicode16 = tounicode16
+fonts.map.tounicode16sequence = tounicode16sequence
+
+local separator = lpeg.S("_.")
+local other = lpeg.C((1 - separator)^1)
+local ligsplitter = lpeg.Ct(other * (separator * other)^0)
+
+--~ print(table.serialize(lpegmatch(ligsplitter,"this")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"this.that")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"japan1.123")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that")))
+
+fonts.map.add_to_unicode = function(data,filename)
+ local unicodes = data.luatex and data.luatex.unicodes
+ if not unicodes then
+ return
+ end
+ -- we need to move this code
+ unicodes['space'] = unicodes['space'] or 32
+ unicodes['hyphen'] = unicodes['hyphen'] or 45
+ unicodes['zwj'] = unicodes['zwj'] or 0x200D
+ unicodes['zwnj'] = unicodes['zwnj'] or 0x200C
+ -- the tounicode mapping is sparse and only needed for alternatives
+ local tounicode, originals, ns, nl, private, unknown = { }, { }, 0, 0, fonts.private, format("%04X",utfbyte("?"))
+ data.luatex.tounicode, data.luatex.originals = tounicode, originals
+ local lumunic, uparser, oparser
+ if false then -- will become an option
+ lumunic = load_lum_table(filename)
+ lumunic = lumunic and lumunic.tounicode
+ end
+ local cidinfo, cidnames, cidcodes = data.cidinfo
+ local usedmap = cidinfo and cidinfo.usedname
+ usedmap = usedmap and lower(usedmap)
+ usedmap = usedmap and fonts.cid.map[usedmap]
+ if usedmap then
+ oparser = usedmap and make_name_parser(cidinfo.ordering)
+ cidnames = usedmap.names
+ cidcodes = usedmap.unicodes
+ end
+ uparser = make_name_parser()
+ local aglmap = fonts.map and fonts.map.agl_to_unicode
+ for index, glyph in next, data.glyphs do
+ local name, unic = glyph.name, glyph.unicode or -1 -- play safe
+ if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+ local unicode = (lumunic and lumunic[name]) or (aglmap and aglmap[name])
+ if unicode then
+ originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+ end
+ -- cidmap heuristics, beware, there is no guarantee for a match unless
+ -- the chain resolves
+ if (not unicode) and usedmap then
+ local foundindex = lpegmatch(oparser,name)
+ if foundindex then
+ unicode = cidcodes[foundindex] -- name to number
+ if unicode then
+ originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+ else
+ local reference = cidnames[foundindex] -- number to name
+ if reference then
+ local foundindex = lpegmatch(oparser,reference)
+ if foundindex then
+ unicode = cidcodes[foundindex]
+ if unicode then
+ originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+ end
+ end
+ if not unicode then
+ local foundcodes, multiple = lpegmatch(uparser,reference)
+ if foundcodes then
+ if multiple then
+ originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true
+ else
+ originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ -- a.whatever or a_b_c.whatever or a_b_c (no numbers)
+ if not unicode then
+ local split = lpegmatch(ligsplitter,name)
+ local nplit = (split and #split) or 0
+ if nplit == 0 then
+ -- skip
+ elseif nplit == 1 then
+ local base = split[1]
+ unicode = unicodes[base] or (aglmap and aglmap[base])
+ if unicode then
+ if type(unicode) == "table" then
+ unicode = unicode[1]
+ end
+ originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+ end
+ else
+ local t = { }
+ for l=1,nplit do
+ local base = split[l]
+ local u = unicodes[base] or (aglmap and aglmap[base])
+ if not u then
+ break
+ elseif type(u) == "table" then
+ t[#t+1] = u[1]
+ else
+ t[#t+1] = u
+ end
+ end
+ if #t > 0 then -- done then
+ originals[index], tounicode[index], nl, unicode = t, tounicode16sequence(t), nl + 1, true
+ end
+ end
+ end
+ -- last resort
+ if not unicode then
+ local foundcodes, multiple = lpegmatch(uparser,name)
+ if foundcodes then
+ if multiple then
+ originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true
+ else
+ originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes
+ end
+ end
+ end
+ if not unicode then
+ originals[index], tounicode[index] = 0xFFFD, "FFFD"
+ end
+ end
+ end
+ if trace_unimapping then
+ for index, glyph in table.sortedpairs(data.glyphs) do
+ local toun, name, unic = tounicode[index], glyph.name, glyph.unicode or -1 -- play safe
+ if toun then
+ logs.report("load otf","internal: 0x%05X, name: %s, unicode: 0x%05X, tounicode: %s",index,name,unic,toun)
+ else
+ logs.report("load otf","internal: 0x%05X, name: %s, unicode: 0x%05X",index,name,unic)
+ end
+ end
+ end
+ if trace_loading and (ns > 0 or nl > 0) then
+ logs.report("load otf","enhance: %s tounicode entries added (%s ligatures)",nl+ns, ns)
+ end
+end