1 files changed, 97 insertions, 64 deletions
diff --git a/otfl-font-map.lua b/otfl-font-map.lua
index 26b22b6..7f5305f 100644
--- a/otfl-font-map.lua
+++ b/otfl-font-map.lua
@@ -6,15 +6,18 @@ if not modules then modules = { } end modules ['font-map'] = {
     license   = "see context related readme files"
 }
 
-local utf = unicode.utf8
 local match, format, find, concat, gsub, lower = string.match, string.format, string.find, table.concat, string.gsub, string.lower
-local lpegmatch = lpeg.match
+local P, R, S, C, Ct, Cc, lpegmatch = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc, lpeg.match
 local utfbyte = utf.byte
 
-local trace_loading    = false  trackers.register("otf.loading",    function(v) trace_loading    = v end)
-local trace_unimapping = false  trackers.register("otf.unimapping", function(v) trace_unimapping = v end)
+local trace_loading = false  trackers.register("fonts.loading",    function(v) trace_loading    = v end)
+local trace_mapping = false  trackers.register("fonts.mapping", function(v) trace_unimapping = v end)
 
-local report_otf = logs.reporter("fonts","otf loading")
+local report_fonts  = logs.reporter("fonts","loading") -- not otf only
+
+local fonts    = fonts
+local mappings = { }
+fonts.mappings = mappings
 
 --[[ldx--
 <p>Eventually this code will disappear because map files are kind
@@ -22,23 +25,18 @@ of obsolete. Some code may move to runtime or auxiliary modules.</p>
 <p>The name to unciode related code will stay of course.</p>
 --ldx]]--
 
-local fonts = fonts
-fonts.map   = fonts.map or { }
-
 local function loadlumtable(filename) -- will move to font goodies
     local lumname = file.replacesuffix(file.basename(filename),"lum")
     local lumfile = resolvers.findfile(lumname,"map") or ""
     if lumfile ~= "" and lfs.isfile(lumfile) then
-        if trace_loading or trace_unimapping then
-            report_otf("enhance: loading %s ",lumfile)
+        if trace_loading or trace_mapping then
+            report_fonts("enhance: loading %s ",lumfile)
         end
         lumunic = dofile(lumfile)
         return lumunic, lumfile
     end
 end
 
-local P, R, S, C, Ct, Cc = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Ct, lpeg.Cc
-
 local hex     = R("AF","09")
 local hexfour = (hex*hex*hex*hex) / function(s) return tonumber(s,16) end
 local hexsix  = (hex^1)           / function(s) return tonumber(s,16) end
@@ -65,8 +63,8 @@ local function makenameparser(str)
     end
 end
 
---~ local parser = fonts.map.makenameparser("Japan1")
---~ local parser = fonts.map.makenameparser()
+--~ local parser = mappings.makenameparser("Japan1")
+--~ local parser = mappings.makenameparser()
 --~ local function test(str)
 --~     local b, a = lpegmatch(parser,str)
 --~     print((a and table.serialize(b)) or b)
@@ -100,6 +98,15 @@ local function tounicode16sequence(unicodes)
     return concat(t)
 end
 
+local function fromunicode16(str)
+    if #str == 4 then
+        return tonumber(str,16)
+    else
+        local l, r = match(str,"(....)(....)")
+        return (tonumber(l,16)- 0xD800)*0x400  + tonumber(r,16) - 0xDC00
+    end
+end
+
 --~ This is quite a bit faster but at the cost of some memory but if we
 --~ do this we will also use it elsewhere so let's not follow this route
 --~ now. I might use this method in the plain variant (no caching there)
@@ -107,7 +114,7 @@ end
 --~
 --~ local cache = { }
 --~
---~ function fonts.map.tounicode16(unicode)
+--~ function mappings.tounicode16(unicode)
 --~     local s = cache[unicode]
 --~     if not s then
 --~         if unicode < 0x10000 then
@@ -120,10 +127,11 @@ end
 --~     return s
 --~ end
 
-fonts.map.loadlumtable        = loadlumtable
-fonts.map.makenameparser      = makenameparser
-fonts.map.tounicode16         = tounicode16
-fonts.map.tounicode16sequence = tounicode16sequence
+mappings.loadlumtable        = loadlumtable
+mappings.makenameparser      = makenameparser
+mappings.tounicode16         = tounicode16
+mappings.tounicode16sequence = tounicode16sequence
+mappings.fromunicode16       = fromunicode16
 
 local separator   = S("_.")
 local other       = C((1 - separator)^1)
@@ -135,8 +143,11 @@ local ligsplitter = Ct(other * (separator * other)^0)
 --~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more")))
 --~ print(table.serialize(lpegmatch(ligsplitter,"such_so_more.that")))
 
-fonts.map.addtounicode = function(data,filename)
-    local unicodes = data.luatex and data.luatex.unicodes
+function mappings.addtounicode(data,filename)
+    local resources    = data.resources
+    local properties   = data.properties
+    local descriptions = data.descriptions
+    local unicodes     = resources.unicodes
     if not unicodes then
         return
     end
@@ -146,30 +157,39 @@ fonts.map.addtounicode = function(data,filename)
     unicodes['zwj']    = unicodes['zwj']    or 0x200D
     unicodes['zwnj']   = unicodes['zwnj']   or 0x200C
     -- the tounicode mapping is sparse and only needed for alternatives
-    local tounicode, originals, ns, nl, private, unknown = { }, { }, 0, 0, fonts.privateoffset, format("%04X",utfbyte("?"))
-    data.luatex.tounicode, data.luatex.originals = tounicode, originals
+    local private       = fonts.constructors.privateoffset
+    local unknown       = format("%04X",utfbyte("?"))
+    local unicodevector = fonts.encodings.agl.unicodes -- loaded runtime in context
+    local tounicode     = { }
+    local originals     = { }
+    resources.tounicode = tounicode
+    resources.originals = originals
     local lumunic, uparser, oparser
+    local cidinfo, cidnames, cidcodes, usedmap
     if false then -- will become an option
         lumunic = loadlumtable(filename)
         lumunic = lumunic and lumunic.tounicode
     end
-    local cidinfo, cidnames, cidcodes = data.cidinfo
-    local usedmap = cidinfo and cidinfo.usedname
-    usedmap = usedmap and lower(usedmap)
-    usedmap = usedmap and fonts.cid.map[usedmap]
+    --
+    cidinfo = properties.cidinfo
+    usedmap = cidinfo and fonts.cid.getmap(cidinfo)
+    --
     if usedmap then
-        oparser = usedmap and makenameparser(cidinfo.ordering)
+        oparser  = usedmap and makenameparser(cidinfo.ordering)
         cidnames = usedmap.names
         cidcodes = usedmap.unicodes
     end
     uparser = makenameparser()
-    local unicodevector = fonts.enc.agl.unicodes -- loaded runtime in context
-    for index, glyph in next, data.glyphs do
-        local name, unic = glyph.name, glyph.unicode or -1 -- play safe
+    local ns, nl = 0, 0
+    for unic, glyph in next, descriptions do
+        local index = glyph.index
+        local name  = glyph.name
         if unic == -1 or unic >= private or (unic >= 0xE000 and unic <= 0xF8FF) or unic == 0xFFFE or unic == 0xFFFF then
-            local unicode = (lumunic and lumunic[name]) or unicodevector[name]
+            local unicode = lumunic and lumunic[name] or unicodevector[name]
             if unicode then
-                originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+                originals[index] = unicode
+                tounicode[index] = tounicode16(unicode)
+                ns               = ns + 1
             end
             -- cidmap heuristics, beware, there is no guarantee for a match unless
             -- the chain resolves
@@ -178,7 +198,9 @@ fonts.map.addtounicode = function(data,filename)
                 if foundindex then
                     unicode = cidcodes[foundindex] -- name to number
                     if unicode then
-                        originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+                        originals[index] = unicode
+                        tounicode[index] = tounicode16(unicode)
+                        ns               = ns + 1
                     else
                         local reference = cidnames[foundindex] -- number to name
                         if reference then
@@ -186,16 +208,23 @@ fonts.map.addtounicode = function(data,filename)
                             if foundindex then
                                 unicode = cidcodes[foundindex]
                                 if unicode then
-                                    originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
+                                    originals[index] = unicode
+                                    tounicode[index] = tounicode16(unicode)
+                                    ns               = ns + 1
                                 end
                             end
                             if not unicode then
                                 local foundcodes, multiple = lpegmatch(uparser,reference)
                                 if foundcodes then
+                                    originals[index] = foundcodes
                                     if multiple then
-                                        originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true
+                                        tounicode[index] = tounicode16sequence(foundcodes)
+                                        nl               = nl + 1
+                                        unicode          = true
                                     else
-                                        originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes
+                                        tounicode[index] = tounicode16(foundcodes)
+                                        ns               = ns + 1
+                                        unicode          = foundcodes
                                     end
                                 end
                             end
@@ -206,19 +235,8 @@ fonts.map.addtounicode = function(data,filename)
             -- a.whatever or a_b_c.whatever or a_b_c (no numbers)
             if not unicode then
                 local split = lpegmatch(ligsplitter,name)
-                local nplit = (split and #split) or 0
-                if nplit == 0 then
-                    -- skip
-                elseif nplit == 1 then
-                    local base = split[1]
-                    unicode = unicodes[base] or unicodevector[base]
-                    if unicode then
-                        if type(unicode) == "table" then
-                            unicode = unicode[1]
-                        end
-                        originals[index], tounicode[index], ns = unicode, tounicode16(unicode), ns + 1
-                    end
-                else
+                local nplit = split and #split or 0
+                if nplit >= 2 then
                     local t, n = { }, 0
                     for l=1,nplit do
                         local base = split[l]
@@ -236,39 +254,54 @@ fonts.map.addtounicode = function(data,filename)
                     if n == 0 then -- done then
                         -- nothing
                     elseif n == 1 then
-                        originals[index], tounicode[index], nl, unicode = t[1], tounicode16(t[1]), nl + 1, true
+                        originals[index] = t[1]
+                        tounicode[index] = tounicode16(t[1])
                     else
-                        originals[index], tounicode[index], nl, unicode = t, tounicode16sequence(t), nl + 1, true
+                        originals[index] = t
+                        tounicode[index] = tounicode16sequence(t)
                     end
+                    nl = nl + 1
+                    unicode = true
+                else
+                    -- skip: already checked and we don't want privates here
                 end
             end
-            -- last resort
+            -- last resort (we might need to catch private here as well)
             if not unicode then
                 local foundcodes, multiple = lpegmatch(uparser,name)
                 if foundcodes then
                     if multiple then
-                        originals[index], tounicode[index], nl, unicode = foundcodes, tounicode16sequence(foundcodes), nl + 1, true
+                        originals[index] = foundcodes
+                        tounicode[index] = tounicode16sequence(foundcodes)
+                        nl               = nl + 1
+                        unicode          = true
                     else
-                        originals[index], tounicode[index], ns, unicode = foundcodes, tounicode16(foundcodes), ns + 1, foundcodes
+                        originals[index] = foundcodes
+                        tounicode[index] = tounicode16(foundcodes)
+                        ns               = ns + 1
+                        unicode          = foundcodes
                     end
                 end
             end
-            if not unicode then
-                originals[index], tounicode[index] = 0xFFFD, "FFFD"
-            end
+         -- if not unicode then
+         --     originals[index] = 0xFFFD
+         --     tounicode[index] = "FFFD"
+         -- end
         end
     end
-    if trace_unimapping then
-        for index, glyph in table.sortedhash(data.glyphs) do
-            local toun, name, unic = tounicode[index], glyph.name, glyph.unicode or -1 -- play safe
+    if trace_mapping then
+        for unic, glyph in table.sortedhash(descriptions) do
+            local name  = glyph.name
+            local index = glyph.index
+            local toun  = tounicode[index]
             if toun then
-                report_otf("internal: 0x%05X, name: %s, unicode: 0x%05X, tounicode: %s",index,name,unic,toun)
+                report_fonts("internal: 0x%05X, name: %s, unicode: U+%05X, tounicode: %s",index,name,unic,toun)
             else
-                report_otf("internal: 0x%05X, name: %s, unicode: 0x%05X",index,name,unic)
+                report_fonts("internal: 0x%05X, name: %s, unicode: U+%05X",index,name,unic)
             end
         end
     end
     if trace_loading and (ns > 0 or nl > 0) then
-        report_otf("enhance: %s tounicode entries added (%s ligatures)",nl+ns, ns)
+        report_fonts("enhance: %s tounicode entries added (%s ligatures)",nl+ns, ns)
     end
 end