Updating to latest ConTeXt beta (2010.02.23)

Now many of our fixes are incorporated upstream or fixed in a different way.
author: Khaled Hosny <khaledhosny@eglug.org> 2010-02-22 14:26:34 +0200
committer: Khaled Hosny <khaledhosny@eglug.org> 2010-02-23 01:44:03 +0200
commit: e064aa2dc70fb819cfdb6b3c0bdf1d081859584f (patch)
tree: b429356162be25b812b6e75e85cccae61f893ad4 /otfl-font-map.lua
parent: e0931232fb05665da0e809cb3d7be0d3d6c8e820 (diff)
download: luaotfload-e064aa2dc70fb819cfdb6b3c0bdf1d081859584f.tar.gz
1 files changed, 155 insertions, 6 deletions
diff --git a/otfl-font-map.lua b/otfl-font-map.lua
index 9e85516..9dd2bd0 100644
--- a/otfl-font-map.lua
+++ b/otfl-font-map.lua
@@ -6,10 +6,13 @@ if not modules then modules = { } end modules ['font-map'] = {
     license   = "see context related readme files"
 }
 
-local match, format, find, concat, gsub = string.match, string.format, string.find, table.concat, string.gsub
+local utf = unicode.utf8
+local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
 local lpegmatch = lpeg.match
+local utfbyte = utf.byte
 
-local trace_loading = false  trackers.register("otf.loading", function(v) trace_loading = v end)
+local trace_loading    = false  trackers.register("otf.loading",    function(v) trace_loading    = v end)
+local trace_unimapping = false  trackers.register("otf.unimapping", function(v) trace_unimapping = v end)
 
 local ctxcatcodes = tex and tex.ctxcatcodes
 
@@ -128,7 +131,7 @@ function fonts.map.load_file(filename, entries, encodings)
     return entries, encodings
 end
 
-function fonts.map.load_lum_table(filename)
+local function load_lum_table(filename)
     local lumname = file.replacesuffix(file.basename(filename),"lum")
     local lumfile = resolvers.find_file(lumname,"map") or ""
     if lumfile ~= "" and lfs.isfile(lumfile) then
@@ -154,7 +157,7 @@ local parser  = unicode + ucode + index
 
 local parsers = { }
 
-function fonts.map.make_name_parser(str)
+local function make_name_parser(str)
     if not str or str == "" then
         return parser
     else
@@ -181,7 +184,7 @@ end
 --~ test("index1234")
 --~ test("Japan1.123")
 
-function fonts.map.tounicode16(unicode)
+local function tounicode16(unicode)
     if unicode < 0x10000 then
         return format("%04X",unicode)
     else
@@ -189,7 +192,7 @@ function fonts.map.tounicode16(unicode)
     end
 end
 
-function fonts.map.tounicode16sequence(unicodes)
+local function tounicode16sequence(unicodes)
     local t = { }
     for l=1,#unicodes do
         local unicode = unicodes[l]
@@ -222,3 +225,149 @@ end
 --~     return s
 --~ end
 
+fonts.map.load_lum_table      = load_lum_table
+fonts.map.make_name_parser    = make_name_parser
+fonts.map.tounicode16         = tounicode16
+fonts.map.tounicode16sequence = tounicode16sequence
+
+local separator   = lpeg.S("_.")
+local other       = lpeg.C((1 - separator)^1)
+local ligsplitter = lpeg.Ct(other * (separator * other)^0)
+
+--~ print(table.serialize(lpegmatch(ligsplitter,"this")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"this.that")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"japan1.123")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more")))
+--~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that")))
+
+fonts.map.add_to_unicode = function(data,filename)
+    local unicodes = data.luatex and data.luatex.unicodes
+    if not unicodes then
+        return
+    end
+    -- we need to move this code
+    unicodes['space']  = unicodes['space']  or 32
+    unicodes['hyphen'] = unicodes['hyphen'] or 45
+    unicodes['zwj']    = unicodes['zwj']    or 0x200D
+    unicodes['zwnj']   = unicodes['zwnj']   or 0x200C
+    -- the tounicode mapping is sparse and only needed for alternatives
+    local tounicode, originals, ns, nl, private, unknown = { }, { }, 0, 0, fonts.private, format("%04X",utfbyte("?"))
+    data.luatex.tounicode, data.luatex.originals = tounicode, originals
+    local lumunic, uparser, oparser
+    if false then -- will become an option
+        lumunic = load_lum_table(filename)
+        lumunic = lumunic and lumunic.tounicode
+    end
+    local cidinfo, cidnames, cidcodes = data.cidinfo
+    local usedmap = cidinfo and cidinfo.usedname
+    usedmap = usedmap and lower(usedmap)
+    usedmap = usedmap and fonts.cid.map[usedmap]
+    if usedmap then
+        oparser = usedmap and make_name_parser(cidinfo.ordering)
+        cidnames = usedmap.names
+        cidcodes = usedmap.unicodes
+    end
+    uparser = make_name_parser()
+    local aglmap = fonts.map and fonts.map.agl_to_unicode
+    for index, glyph in next, data.glyphs do
+        local name, unic = glyph.name, glyph.unicode or -1 -- play safe
+        if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
+            local unicode = (lumunic and lumunic[name]) or (aglmap and aglmap[name])
+            if unicode then
+                originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+            end
+            -- cidmap heuristics, beware, there is no guarantee for a match unless
+            -- the chain resolves
+            if (not unicode) and usedmap then
+                local foundindex = lpegmatch(oparser,name)
+                if foundindex then
+                    unicode = cidcodes[foundindex] -- name to number
+                    if unicode then
+                        originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+                    else
+                        local reference = cidnames[foundindex] -- number to name
+                        if reference then
+                            local foundindex = lpegmatch(oparser,reference)
+                            if foundindex then
+                                unicode = cidcodes[foundindex]
+                                if unicode then
+                                    originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+                                end
+                            end
+                            if not unicode then
+                                local foundcodes, multiple = lpegmatch(uparser,reference)
+                                if foundcodes then
+                                    if multiple then
+                                        originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true
+                                    else
+                                        originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes
+                                    end
+                                end
+                            end
+                        end
+                    end
+                end
+            end
+            -- a.whatever or a_b_c.whatever or a_b_c (no numbers)
+            if not unicode then
+                local split = lpegmatch(ligsplitter,name)
+                local nplit = (split and #split) or 0
+                if nplit == 0 then
+                    -- skip
+                elseif nplit == 1 then
+                    local base = split[1]
+                    unicode = unicodes[base] or (aglmap and aglmap[base])
+                    if unicode then
+                        if type(unicode) == "table" then
+                            unicode = unicode[1]
+                        end
+                        originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+                    end
+                else
+                    local t = { }
+                    for l=1,nplit do
+                        local base = split[l]
+                        local u = unicodes[base] or (aglmap and aglmap[base])
+                        if not u then
+                            break
+                        elseif type(u) == "table" then
+                            t[#t+1] = u[1]
+                        else
+                            t[#t+1] = u
+                        end
+                    end
+                    if #t > 0 then -- done then
+                        originals[index], tounicode[index], nl, unicode = t, tounicode16sequence(t), nl + 1, true
+                    end
+                end
+            end
+            -- last resort
+            if not unicode then
+                local foundcodes, multiple = lpegmatch(uparser,name)
+                if foundcodes then
+                    if multiple then
+                        originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true
+                    else
+                        originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes
+                    end
+                end
+            end
+            if not unicode then
+                originals[index], tounicode[index] = 0xFFFD, "FFFD"
+            end
+        end
+    end
+    if trace_unimapping then
+        for index, glyph in table.sortedpairs(data.glyphs) do
+            local toun, name, unic = tounicode[index], glyph.name, glyph.unicode or -1 -- play safe
+            if toun then
+                logs.report("load otf","internal: 0x%05X, name: %s, unicode: 0x%05X, tounicode: %s",index,name,unic,toun)
+            else
+                logs.report("load otf","internal: 0x%05X, name: %s, unicode: 0x%05X",index,name,unic)
+            end
+        end
+    end
+    if trace_loading and (ns > 0 or nl > 0) then
+        logs.report("load otf","enhance: %s tounicode entries added (%s ligatures)",nl+ns, ns)
+    end
+end
author	Khaled Hosny <khaledhosny@eglug.org>	2010-02-22 14:26:34 +0200
committer	Khaled Hosny <khaledhosny@eglug.org>	2010-02-23 01:44:03 +0200
commit	e064aa2dc70fb819cfdb6b3c0bdf1d081859584f (patch)
tree	b429356162be25b812b6e75e85cccae61f893ad4 /otfl-font-map.lua
parent	e0931232fb05665da0e809cb3d7be0d3d6c8e820 (diff)
download	luaotfload-e064aa2dc70fb819cfdb6b3c0bdf1d081859584f.tar.gz