From 9f36b1ac422e48e70e11716f5550a833bc8be3f9 Mon Sep 17 00:00:00 2001
From: Philipp Gesang <phg@phi-gamma.net>
Date: Mon, 11 Apr 2016 21:33:01 +0200
Subject: [fontloader] sync with Context as of 2016-04-11
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After some discussion, Hans came up with these extensions to the new
reader. We get access to more items from the hideous “name” table. On
the one hand, this means more brokenness to endure and a less sane
matter to work with. But since our tracker was devoid of font-matching
related bug reports for some time, it’s the right move nonetheless.

In addition to the name table junk, the font loader now also includes
the “version” field in the output of “getinfo()”. It’s meaningless per
se, but it sure helps to distinguish historical bugs from the ones that
matter.

**UNTESTED**
---
 src/fontloader/misc/fontloader-font-gbn.lua |   5 +-
 src/fontloader/misc/fontloader-font-otr.lua | 220 +++++++++++++++++++---------
 src/fontloader/misc/fontloader-font-ots.lua |  46 ++++--
 3 files changed, 182 insertions(+), 89 deletions(-)

(limited to 'src/fontloader/misc')

diff --git a/src/fontloader/misc/fontloader-font-gbn.lua b/src/fontloader/misc/fontloader-font-gbn.lua
index f81c877..a645a97 100644
--- a/src/fontloader/misc/fontloader-font-gbn.lua
+++ b/src/fontloader/misc/fontloader-font-gbn.lua
@@ -19,7 +19,6 @@ local nodes = nodes
 local nuts        = nodes.nuts -- context abstraction of direct nodes
 
 local traverse_id = nuts.traverse_id
-local free_node   = nuts.free
 local remove_node = nuts.remove
 
 local glyph_code  = nodes.nodecodes.glyph
@@ -137,9 +136,7 @@ function nodes.handlers.nodepass(head)
         end
         if redundant then
             for i=1,#redundant do
-                local n = redundant[i]
-                remove_node(nuthead,n)
-                free_node(n)
+                remove_node(nuthead,redundant[i],true)
             end
         end
         for d in traverse_id(disc_code,nuthead) do
diff --git a/src/fontloader/misc/fontloader-font-otr.lua b/src/fontloader/misc/fontloader-font-otr.lua
index 1fc338b..0ad1e19 100644
--- a/src/fontloader/misc/fontloader-font-otr.lua
+++ b/src/fontloader/misc/fontloader-font-otr.lua
@@ -148,7 +148,7 @@ local function reportskippedtable(tag)
 end
 
 -- We have quite some data tables. We are somewhat ff compatible with names but as I used
--- the information form the microsoft site there can be differences. Eventually I might end
+-- the information from the microsoft site there can be differences. Eventually I might end
 -- up with a different ordering and naming.
 
 local reservednames = { [0] =
@@ -163,14 +163,14 @@ local reservednames = { [0] =
     "manufacturer",
     "designer",
     "description", -- descriptor in ff
-    "venderurl",
+    "vendorurl",
     "designerurl",
     "license",
     "licenseurl",
     "reserved",
     "typographicfamily",    -- preffamilyname
     "typographicsubfamily", -- prefmodifiers
-    "compatiblefullname", -- for mac
+    "compatiblefullname",   -- for mac
     "sampletext",
     "cidfindfontname",
     "wwsfamily",
@@ -244,7 +244,16 @@ local decoders = {
     macintosh = { },
     iso       = { },
     windows   = {
-        ["unicode bmp"] = utf16_to_utf8_be
+        -- maybe always utf16
+        ["unicode semantics"]           = utf16_to_utf8_be,
+        ["unicode bmp"]                 = utf16_to_utf8_be,
+        ["unicode full"]                = utf16_to_utf8_be,
+        ["unicode 1.0 semantics"]       = utf16_to_utf8_be,
+        ["unicode 1.1 semantics"]       = utf16_to_utf8_be,
+        ["unicode 2.0 bmp"]             = utf16_to_utf8_be,
+        ["unicode 2.0 full"]            = utf16_to_utf8_be,
+        ["unicode variation sequences"] = utf16_to_utf8_be,
+        ["unicode full repertoire"]     = utf16_to_utf8_be,
     },
     custom    = { },
 }
@@ -702,7 +711,17 @@ local panosewidths = {
 -- useful information about what we deal with. The complication is that we need
 -- to filter the best one available.
 
-function readers.name(f,fontdata)
+local platformnames = {
+    postscriptname       = true,
+    fullname             = true,
+    family               = true,
+    subfamily            = true,
+    typographicfamily    = true,
+    typographicsubfamily = true,
+    compatiblefullname   = true,
+}
+
+function readers.name(f,fontdata,specification)
     local datatable = fontdata.tables.name
     if datatable then
         setposition(f,datatable.offset)
@@ -710,6 +729,7 @@ function readers.name(f,fontdata)
         local nofnames = readushort(f)
         local offset   = readushort(f)
         -- we can also provide a raw list as extra, todo as option
+        local start    = datatable.offset + offset
         local namelists = {
             unicode   = { },
             windows   = { },
@@ -738,7 +758,7 @@ function readers.name(f,fontdata)
                                     language = language,
                                     name     = name,
                                     length   = readushort(f),
-                                    offset   = readushort(f),
+                                    offset   = start + readushort(f),
                                 }
                             else
                                 skipshort(f,2)
@@ -766,7 +786,6 @@ function readers.name(f,fontdata)
         --
         -- we need to choose one we like, for instance an unicode one
         --
-        local start = datatable.offset + offset
         local names = { }
         local done  = { }
         --
@@ -783,7 +802,7 @@ function readers.name(f,fontdata)
                     local encoding = name.encoding
                     local language = name.language
                     if (not e or encoding == e) and (not l or language == l) then
-                        setposition(f,start+name.offset)
+                        setposition(f,name.offset)
                         local content = readstring(f,name.length)
                         local decoder = decoders[platform]
                         if decoder then
@@ -812,11 +831,57 @@ function readers.name(f,fontdata)
         filter("unicode")
         --
         fontdata.names = names
+        --
+        if specification.platformnames then
+            local collected = { }
+            for platform, namelist in next, namelists do
+                local filtered = false
+                for i=1,#namelist do
+                    local entry = namelist[i]
+                    local name  = entry.name
+                    if platformnames[name] then
+                        setposition(f,entry.offset)
+                        local content  = readstring(f,entry.length)
+                        local encoding = entry.encoding
+                        local decoder  = decoders[platform]
+                        if decoder then
+                            decoder = decoder[encoding]
+                        end
+                        if decoder then
+                            content = decoder(content)
+                        end
+                        if filtered then
+                            filtered[name] = content
+                        else
+                            filtered = { [name] = content }
+                        end
+                    end
+                end
+                if filtered then
+                    collected[platform] = filtered
+                end
+            end
+            fontdata.platformnames = collected
+        end
     else
         fontdata.names = { }
     end
 end
 
+----- validutf = lpeg.patterns.utf8character^0 * P(-1)
+local validutf = lpeg.patterns.validutf8
+
+local function getname(fontdata,key)
+    local names = fontdata.names
+    if names then
+        local value = names[key]
+        if value then
+            local content = value.content
+            return lpegmatch(validutf,content) and content or nil
+        end
+    end
+end
+
 -- This table is an original windows (with its precursor os/2) table. In ff this one is
 -- part of the pfminfo table but here we keep it separate (for now). We will create a
 -- properties table afterwards.
@@ -1733,69 +1798,69 @@ otf.unpackoutlines = unpackoutlines
 -- some properties in order to read following tables. When details is true we also
 -- initialize the glyphs data.
 
------ validutf = lpeg.patterns.utf8character^0 * P(-1)
-local validutf = lpeg.patterns.validutf8
-
-local function getname(fontdata,key)
-    local names = fontdata.names
-    if names then
-        local value = names[key]
-        if value then
-            local content = value.content
-            return lpegmatch(validutf,content) and content or nil
-        end
-    end
-end
-
-local function getinfo(maindata,sub)
+local function getinfo(maindata,sub,platformnames)
     local fontdata = sub and maindata.subfonts and maindata.subfonts[sub] or maindata
-    local names = fontdata.names
+    local names    = fontdata.names
+    local info     = nil
     if names then
-        local metrics    = fontdata.windowsmetrics or { }
-        local postscript = fontdata.postscript or { }
-        local fontheader = fontdata.fontheader or { }
-        local cffinfo    = fontdata.cffinfo or { }
-        local filename   = fontdata.filename
-        local weight     = getname(fontdata,"weight") or cffinfo.weight or metrics.weight
-        local width      = getname(fontdata,"width")  or cffinfo.width  or metrics.width
-        return { -- we inherit some inconsistencies/choices from ff
-            subfontindex = fontdata.subfontindex or sub or 0,
-         -- filename     = filename,
-         -- version      = name("version"),
-         -- format       = fontdata.format,
-            fontname     = getname(fontdata,"postscriptname"),
-            fullname     = getname(fontdata,"fullname"), -- or file.nameonly(filename)
-            familyname   = getname(fontdata,"typographicfamily") or getname(fontdata,"family"),
-            subfamily    = getname(fontdata,"subfamily"),
-            modifiers    = getname(fontdata,"typographicsubfamily"),
-            weight       = weight and lower(weight),
-            width        = width and lower(width),
-            pfmweight    = metrics.weightclass or 400, -- will become weightclass
-            pfmwidth     = metrics.widthclass or 5,    -- will become widthclass
-            panosewidth  = metrics.panosewidth,
-            panoseweight = metrics.panoseweight,
-            italicangle  = postscript.italicangle or 0,
-            units        = fontheader.units or 0,
-            designsize   = fontdata.designsize,
-            minsize      = fontdata.minsize,
-            maxsize      = fontdata.maxsize,
-            monospaced   = (tonumber(postscript.monospaced or 0) > 0) or metrics.panosewidth == "monospaced",
-            averagewidth = metrics.averagewidth,
-            xheight      = metrics.xheight,
-            ascender     = metrics.typoascender,
-            descender    = metrics.typodescender,
+        local metrics        = fontdata.windowsmetrics or { }
+        local postscript     = fontdata.postscript or { }
+        local fontheader     = fontdata.fontheader or { }
+        local cffinfo        = fontdata.cffinfo or { }
+        local filename       = fontdata.filename
+        local weight         = getname(fontdata,"weight") or cffinfo.weight or metrics.weight
+        local width          = getname(fontdata,"width")  or cffinfo.width  or metrics.width
+        local fontname       = getname(fontdata,"postscriptname")
+        local fullname       = getname(fontdata,"fullname")
+        local family         = getname(fontdata,"family")
+        local subfamily      = getname(fontdata,"subfamily")
+        local familyname     = getname(fontdata,"typographicfamily") or family
+        local subfamilyname  = getname(fontdata,"typographicsubfamily") or subfamily
+        local compatiblename = getname(fontdata,"compatiblefullname")
+        info = { -- we inherit some inconsistencies/choices from ff
+            subfontindex   = fontdata.subfontindex or sub or 0,
+         -- filename       = filename,
+            version        = getname(fontdata,"version"),
+         -- format         = fontdata.format,
+            fontname       = fontname,
+            fullname       = fullname,
+            family         = family,
+            subfamily      = subfamily,
+            familyname     = familyname,
+            subfamilyname  = subfamilyname,
+            compatiblename = compatiblename,
+            weight         = weight and lower(weight),
+            width          = width and lower(width),
+            pfmweight      = metrics.weightclass or 400, -- will become weightclass
+            pfmwidth       = metrics.widthclass or 5,    -- will become widthclass
+            panosewidth    = metrics.panosewidth,
+            panoseweight   = metrics.panoseweight,
+            italicangle    = postscript.italicangle or 0,
+            units          = fontheader.units or 0,
+            designsize     = fontdata.designsize,
+            minsize        = fontdata.minsize,
+            maxsize        = fontdata.maxsize,
+            monospaced     = (tonumber(postscript.monospaced or 0) > 0) or metrics.panosewidth == "monospaced",
+            averagewidth   = metrics.averagewidth,
+            xheight        = metrics.xheight,
+            capheight      = metrics.capheight, -- not always present and probably crap
+            ascender       = metrics.typoascender,
+            descender      = metrics.typodescender,
+            platformnames  = platformnames and fontdata.platformnames or nil,
         }
     elseif n then
-        return {
+        info = {
             filename = fontdata.filename,
             comment  = "there is no info for subfont " .. n,
         }
     else
-        return {
+        info = {
             filename = fontdata.filename,
             comment  = "there is no info",
         }
     end
+ -- inspect(info)
+    return info
 end
 
 local function loadtables(f,specification,offset)
@@ -1870,6 +1935,7 @@ local function readdata(f,offset,specification)
         end
     end
     --
+    --
     readers["os/2"](f,fontdata,specification)
     readers["head"](f,fontdata,specification)
     readers["maxp"](f,fontdata,specification)
@@ -1989,7 +2055,10 @@ local function loadfont(specification,n)
         specification.details = true
     end
     if specification.details then
-        specification.info = true
+        specification.info = true -- not really used any more
+    end
+    if specification.platformnames then
+        specification.platformnames = true -- not really used any more
     end
     local function message(str)
         report("fatal error in file %a: %s\n%s",specification.filename,str,debug.traceback())
@@ -2043,7 +2112,7 @@ function readers.loadfont(filename,n)
             descriptions  = fontdata.descriptions,
             format        = fontdata.format,
             goodies       = { },
-            metadata      = getinfo(fontdata,n),
+            metadata      = getinfo(fontdata,n), -- no platformnames here !
             properties    = {
                 hasitalics = fontdata.hasitalics or false,
             },
@@ -2067,27 +2136,38 @@ function readers.loadfont(filename,n)
     end
 end
 
-function readers.getinfo(filename,n,details)
+function readers.getinfo(filename,specification) -- string, nil|number|table
+    -- platformnames is optional and not used by context (a too unpredictable mess
+    -- that only add to the confusion) .. so it's only for checking things
+    local subfont      = nil
+    local platformname = false
+    if type(specification) == "table" then
+        subfont       = tonumber(specification.subfont)
+        platformnames = specification.platformnames
+    else
+        subfont       = tonumber(specification)
+    end
     local fontdata = loadfont {
-        filename = filename,
-        details  = true,
+        filename      = filename,
+        details       = true,
+        platformnames = platformnames,
     }
     if fontdata then
         local subfonts = fontdata.subfonts
         if not subfonts then
-            return getinfo(fontdata)
-        elseif type(n) ~= "number" then
+            return getinfo(fontdata,nil,platformnames)
+        elseif not subfont then
             local info = { }
             for i=1,#subfonts do
-                info[i] = getinfo(fontdata,i)
+                info[i] = getinfo(fontdata,i,platformnames)
             end
             return info
-        elseif n > 1 and n <= subfonts then
-            return getinfo(fontdata,n)
+        elseif subfont > 1 and subfont <= #subfonts then
+            return getinfo(fontdata,subfont,platformnames)
         else
             return {
                 filename = filename,
-                comment  = "there is no subfont " .. n .. " in this file"
+                comment  = "there is no subfont " .. subfont .. " in this file"
             }
         end
     else
diff --git a/src/fontloader/misc/fontloader-font-ots.lua b/src/fontloader/misc/fontloader-font-ots.lua
index 4d5e8ec..d67db6d 100644
--- a/src/fontloader/misc/fontloader-font-ots.lua
+++ b/src/fontloader/misc/fontloader-font-ots.lua
@@ -2716,8 +2716,8 @@ function otf.dataset(tfmdata,font) -- generic variant, overloaded in context
     return rl
 end
 
-local function report_disc(n)
-    report_run("kern: %s > %s",disc,languages.serializediscretionary(disc))
+local function report_disc(what,n)
+    report_run("%s: %s > %s",what,n,languages.serializediscretionary(n))
 end
 
 local function kernrun(disc,k_run,font,attr,...)
@@ -2725,7 +2725,7 @@ local function kernrun(disc,k_run,font,attr,...)
     -- we catch <font 1><disc font 2>
     --
     if trace_kernruns then
-        report_disc("kern")
+        report_disc("kern",disc)
     end
     --
     local prev, next = getboth(disc)
@@ -2819,7 +2819,7 @@ end
 
 local function comprun(disc,c_run,...)
     if trace_compruns then
-        report_disc("comp")
+        report_disc("comp",disc)
     end
     --
     local pre, post, replace = getdisc(disc)
@@ -2866,31 +2866,35 @@ end
 
 local function testrun(disc,t_run,c_run,...)
     if trace_testruns then
-        report_disc("test")
+        report_disc("test",disc)
     end
     local prev, next = getboth(disc)
     if not next then
         -- weird discretionary
         return
     end
-    local pre, post, replace, pretail, posttail, replacetail = getdisc(disc)
+    local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
     local done = false
     if replace and prev then
-        -- only look ahead
-     -- local nest = getprev(replace)
+        -- this is a bit strange as we only do replace here and not post
+        -- anyway, we only look ahead ... the idea is that we discard a
+        -- disc when there is a ligature crossing the replace boundary
         setlink(replacetail,next)
-        if t_run(replace,next,...) then
-            setfield(disc,"replace",nil) -- beware, side effects of nest so first
+        local ok, overflow = t_run(replace,next,...)
+        if ok and overflow then
+            -- so, we can have crossed the boundary
+            setfield(disc,"replace",nil)
             setlink(prev,replace)
-            setlink(replacetail,next)
+         -- setlink(replacetail,next)
             setboth(disc)
             flush_node_list(disc)
             return replace, true -- restart .. tricky !
         else
+            -- we stay inside the disc
             setnext(replacetail)
             setprev(next,disc)
         end
- --       pre, post, replace, pretail, posttail, replacetail = getdisc(disc)
+     -- pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
     end
     --
     -- like comprun
@@ -2945,7 +2949,7 @@ end
 -- local function discrun(disc,drun,krun)
 --     local prev, next = getboth(disc)
 --     if trace_discruns then
---        report_disc("disc")
+--        report_disc("disc",disc)
 --     end
 --     if next and prev then
 --         setnext(prev,next)
@@ -3043,7 +3047,13 @@ local function t_run_single(start,stop,font,attr,lookupcache)
                     -- if we need more than ligatures we can outline the code and use functions
                     local s = getnext(start)
                     local l = nil
+                    local d = 0
                     while s do
+                        if s == stop then
+                            d = 1
+                        elseif d > 0 then
+                            d = d + 1
+                        end
                         local lg = lookupmatch[getchar(s)]
                         if lg then
                             l = lg
@@ -3053,7 +3063,7 @@ local function t_run_single(start,stop,font,attr,lookupcache)
                         end
                     end
                     if l and l.ligature then
-                        return true
+                        return true, d > 1
                     end
                 end
             end
@@ -3168,7 +3178,13 @@ local function t_run_multiple(start,stop,font,attr,steps,nofsteps)
                             -- if we need more than ligatures we can outline the code and use functions
                             local s = getnext(start)
                             local l = nil
+                            local d = 0
                             while s do
+                                if s == stop then
+                                    d = 1
+                                elseif d > 0 then
+                                    d = d + 1
+                                end
                                 local lg = lookupmatch[getchar(s)]
                                 if lg then
                                     l = lg
@@ -3178,7 +3194,7 @@ local function t_run_multiple(start,stop,font,attr,steps,nofsteps)
                                 end
                             end
                             if l and l.ligature then
-                                return true
+                                return true, d > 1
                             end
                         end
                     else
-- 
cgit v1.2.3