From 8c9101fcc313f47232c8378a1cde37874efd845a Mon Sep 17 00:00:00 2001 From: Philipp Gesang Date: Tue, 5 Jul 2016 08:17:49 +0200 Subject: [fontloader] sync with Context as of 2016-07-05 --- src/fontloader/misc/fontloader-font-tfm.lua | 572 ++++++++++++++++++++++++++-- 1 file changed, 550 insertions(+), 22 deletions(-) (limited to 'src/fontloader/misc/fontloader-font-tfm.lua') diff --git a/src/fontloader/misc/fontloader-font-tfm.lua b/src/fontloader/misc/fontloader-font-tfm.lua index ab6d795..d9b0523 100644 --- a/src/fontloader/misc/fontloader-font-tfm.lua +++ b/src/fontloader/misc/fontloader-font-tfm.lua @@ -6,8 +6,9 @@ if not modules then modules = { } end modules ['font-tfm'] = { license = "see context related readme files" } -local next = next -local match = string.match +local next, type = next, type +local match, format = string.match, string.format +local concat, sortedhash = table.concat, table.sortedhash local trace_defining = false trackers.register("fonts.defining", function(v) trace_defining = v end) local trace_features = false trackers.register("tfm.features", function(v) trace_features = v end) @@ -16,6 +17,7 @@ local report_defining = logs.reporter("fonts","defining") local report_tfm = logs.reporter("fonts","tfm loading") local findbinfile = resolvers.findbinfile +local setmetatableindex = table.setmetatableindex local fonts = fonts local handlers = fonts.handlers @@ -28,8 +30,10 @@ tfm.version = 1.000 tfm.maxnestingdepth = 5 tfm.maxnestingsize = 65536*1024 +local otf = fonts.handlers.otf + local tfmfeatures = constructors.features.tfm ------ registertfmfeature = tfmfeatures.register +local registertfmfeature = tfmfeatures.register constructors.resolvevirtualtoo = false -- wil be set in font-ctx.lua @@ -69,7 +73,68 @@ function tfm.setfeatures(tfmdata,features) end end -local depth = { } -- table.setmetatableindex("number") +local depth = { } -- table.setmetatableindex("number") +local enhancers = { } + +local steps = { + "normalize features", + "check extra features" +} + +-- otf.enhancers.register("check extra features",enhance) + +enhancers["check extra features"] = otf.enhancers.enhance + +local function applyenhancers(data,filename) + for i=1,#steps do + local step = steps[i] + local enhancer = enhancers[step] + if enhancer then + if trace_loading then + report_tfm("applying enhancer %a",step) + end + enhancer(data,filename) + else + report_tfm("invalid enhancer %a",step) + end + end +end + +-- Normally we just load the tfm data and go on. However there was some demand for +-- loading good old tfm /pfb files where afm files were lacking and even enc files +-- of dubious quality so we now support loading such (often messy) setups too. +-- +-- Because such fonts also use (ugly) tweaks achieve some purpose (like swapping +-- accents) we need to delay the unicoding actions till after the features have been +-- applied. +-- +-- It must be noted that in ConTeXt we don't expect this to be used at all. Here is +-- example: +-- +-- tfm metrics + pfb vector for index + pfb file for shapes +-- +-- \font\foo=file:csr10.tfm:reencode=auto;mode=node;liga=yes;kern=yes +-- +-- tfm metrics + pfb vector for index + enc file for tfm mapping + pfb file for shapes +-- +-- \font\foo=file:csr10.tfm:reencode=csr.enc;mode=node;liga=yes;kern=yes +-- +-- tfm metrics + enc file for mapping to tfm + bitmaps shapes +-- +-- \font\foo=file:csr10.tfm:reencode=csr.enc;bitmap=yes;mode=node;liga=yes;kern=yes +-- +-- One can add features: +-- +-- fonts.handlers.otf.addfeature { +-- name = "czechdqcheat", +-- type = "substitution", +-- data = { +-- quotedblright = "csquotedblright", +-- }, +-- } +-- +-- So "czechdqcheat=yes" is then a valid feature. And yes, it's a cheat. + local function read_from_tfm(specification) local filename = specification.filename @@ -80,26 +145,116 @@ local function read_from_tfm(specification) end local tfmdata = font.read_tfm(filename,size) -- not cached, fast enough if tfmdata then - local features = specification.features and specification.features.normal or { } + + local features = specification.features and specification.features.normal or { } + local features = constructors.checkedfeatures("tfm",features) + specification.features.normal = features + + -- If reencode returns a new table, we assume that we're doing something + -- special. An 'auto' reencode pickt up its vector from the pfb file. + + local newtfmdata = (depth[filename] == 1) and tfm.reencode(tfmdata,specification) + if newtfmdata then + tfmdata = newtfmdata + end + local resources = tfmdata.resources or { } local properties = tfmdata.properties or { } local parameters = tfmdata.parameters or { } local shared = tfmdata.shared or { } - properties.name = tfmdata.name - properties.fontname = tfmdata.fontname - properties.psname = tfmdata.psname - properties.filename = specification.filename - properties.format = fonts.formats.tfm -- better than nothing - parameters.size = size + -- + shared.features = features + shared.resources = resources + -- + properties.name = tfmdata.name -- todo: fallback + properties.fontname = tfmdata.fontname -- todo: fallback + properties.psname = tfmdata.psname -- todo: fallback + properties.fullname = tfmdata.fullname -- todo: fallback + properties.filename = specification.filename -- todo: fallback + properties.format = fonts.formats.tfm -- better than nothing -- tfmdata.properties = properties tfmdata.resources = resources tfmdata.parameters = parameters tfmdata.shared = shared -- - shared.rawdata = { } + shared.rawdata = { resources = resources } shared.features = features + -- + -- The next branch is only entered when we have a proper encoded file i.e. + -- unicodes and such. It really nakes no sense to do feature juggling when + -- we have no names and unicodes. + -- + if newtfmdata then + -- + -- Some opentype processing assumes these to be present: + -- + if not resources.marks then + resources.marks = { } + end + if not resources.sequences then + resources.sequences = { } + end + if not resources.features then + resources.features = { + gsub = { }, + gpos = { }, + } + end + if not tfmdata.changed then + tfmdata.changed = { } + end + if not tfmdata.descriptions then + tfmdata.descriptions = tfmdata.characters + end + -- + -- It might be handy to have this: + -- + otf.readers.addunicodetable(tfmdata) + -- + -- We make a pseudo opentype font, e.g. kerns and ligatures etc: + -- + applyenhancers(tfmdata,filename) + -- + -- Now user stuff can kick in. + -- + constructors.applymanipulators("tfm",tfmdata,features,trace_features,report_tfm) + -- + -- As that can also mess with names and such, we are now ready for finalizing + -- the unicode information. This is a different order that for instance type one + -- (afm) files. First we try to deduce unicodes from already present information. + -- + otf.readers.unifymissing(tfmdata) + -- + -- Next we fill in the gaps, based on names from teh agl. Probably not much will + -- happen here. + -- + fonts.mappings.addtounicode(tfmdata,filename) + -- + -- The tounicode data is passed to the backend that constructs the vectors for us. + -- + tfmdata.tounicode = 1 + local tounicode = fonts.mappings.tounicode + for unicode, v in next, tfmdata.characters do + local u = v.unicode + if u then + v.tounicode = tounicode(u) + end + end + -- + -- However, when we use a bitmap font those vectors can't be constructed because + -- that information is not carried with those fonts (there is no name info, nor + -- proper index info, nor unicodes at that end). So, we provide it ourselves. + -- + if tfmdata.usedbitmap then + tfm.addtounicode(tfmdata) + end + end + -- shared.processes = next(features) and tfm.setfeatures(tfmdata,features) or nil + -- + parameters.factor = 1 -- already scaled + parameters.size = size parameters.slant = parameters.slant or parameters[1] or 0 parameters.space = parameters.space or parameters[2] or 0 parameters.space_stretch = parameters.space_stretch or parameters[3] or 0 @@ -110,7 +265,12 @@ local function read_from_tfm(specification) -- constructors.enhanceparameters(parameters) -- official copies for us -- - if constructors.resolvevirtualtoo then + if newtfmdata then + -- + -- We do nothing as we assume flat tfm files. It would become real messy + -- otherwise and I don't have something for testing on my system anyway. + -- + elseif constructors.resolvevirtualtoo then fonts.loggers.register(tfmdata,file.suffix(filename),specification) -- strange, why here local vfname = findbinfile(specification.name, 'ovf') if vfname and vfname ~= "" then @@ -145,21 +305,26 @@ local function read_from_tfm(specification) end end -- - local allfeatures = tfmdata.shared.features or specification.features.normal - constructors.applymanipulators("tfm",tfmdata,allfeatures.normal,trace_features,report_tfm) - if not features.encoding then - local encoding, filename = match(properties.filename,"^(.-)%-(.*)$") -- context: encoding-name.* - if filename and encoding and encodings.known and encodings.known[encoding] then - features.encoding = encoding - end - end - -- let's play safe: + -- This is for old times sake (and context specific) so we comment it. It has + -- to do with encoding prefixes (a context naming that was later adopted by + -- the lm/gyre project) + -- + -- if not features.encoding then + -- local encoding, filename = match(properties.filename,"^(.-)%-(.*)$") + -- if filename and encoding and encodings.known and encodings.known[encoding] then + -- features.encoding = encoding + -- end + -- end + -- + -- Some afterthoughts: + -- properties.haskerns = true properties.hasligatures = true resources.unicodes = { } resources.lookuptags = { } -- depth[filename] = depth[filename] - 1 + -- return tfmdata else depth[filename] = depth[filename] - 1 @@ -199,3 +364,366 @@ function readers.tfm(specification) end readers.ofm = readers.tfm + +-- The reencoding acts upon the 'reencode' feature which can have values 'auto' or +-- an enc file. You can also specify a 'pfbfile' feature (but it defaults to the +-- tfm filename) and a 'bitmap' feature. When no enc file is givven (auto) we will +-- get the vectors from the pfb file. + +do + + local outfiles = { } + + local tfmcache = table.setmetatableindex(function(t,tfmdata) + local id = font.define(tfmdata) + t[tfmdata] = id + return id + end) + + local encdone = table.setmetatableindex("table") + + function tfm.reencode(tfmdata,specification) + + local features = specification.features + + if not features then + return + end + + local features = features.normal + + if not features then + return + end + + local tfmfile = file.basename(tfmdata.name) + local encfile = features.reencode -- or features.enc + local pfbfile = features.pfbfile -- or features.pfb + local bitmap = features.bitmap -- or features.pk + + if not encfile then + return + end + + local pfbfile = outfiles[tfmfile] + + if pfbfile == nil then + if bitmap then + pfbfile = false + elseif type(pfbfile) ~= "string" then + pfbfile = tfmfile + end + if type(pfbfile) == "string" then + pfbfile = file.addsuffix(pfbfile,"pfb") + -- pdf.mapline(tfmfile .. "<" .. pfbfile) + report_tfm("using type1 shapes from %a for %a",pfbfile,tfmfile) + else + report_tfm("using bitmap shapes for %a",tfmfile) + pfbfile = false -- use bitmap + end + outfiles[tfmfile] = pfbfile + end + + local encoding = false + local vector = false + + if type(pfbfile) == "string" then + local pfb = fonts.constructors.handlers.pfb + if pfb and pfb.loadvector then + local v, e = pfb.loadvector(pfbfile) + if v then + vector = v + end + if e then + encoding = e + end + end + end + if type(encfile) == "string" and encfile ~= "auto" then + encoding = fonts.encodings.load(file.addsuffix(encfile,"enc")) + if encoding then + encoding = encoding.vector + end + end + if not encoding then + report_tfm("bad encoding for %a, quitting",tfmfile) + return + end + + local unicoding = fonts.encodings.agl and fonts.encodings.agl.unicodes + local virtualid = tfmcache[tfmdata] + local tfmdata = table.copy(tfmdata) -- good enough for small fonts + local characters = { } + local originals = tfmdata.characters + local indices = { } + local parentfont = { "font", 1 } + local private = fonts.constructors.privateoffset + local reported = encdone[tfmfile][encfile] + + -- create characters table + + local backmap = vector and table.swapped(vector) + local done = { } -- prevent duplicate + + for index, name in sortedhash(encoding) do -- predictable order + local unicode = unicoding[name] + local original = originals[index] + if original then + if unicode then + original.unicode = unicode + else + unicode = private + private = private + 1 + if not reported then + report_tfm("glyph %a in font %a with encoding %a gets unicode %U",name,tfmfile,encfile,unicode) + end + end + characters[unicode] = original + indices[index] = unicode + original.name = name -- so one can lookup weird names + if backmap then + original.index = backmap[name] + else -- probably bitmap + original.commands = { parentfont, { "char", index } } + original.oindex = index + end + done[name] = true + elseif not done[name] then + report_tfm("bad index %a in font %a with name %a",index,tfmfile,name) + end + end + + encdone[tfmfile][encfile] = true + + -- redo kerns and ligatures + + for k, v in next, characters do + local kerns = v.kerns + if kerns then + local t = { } + for k, v in next, kerns do + local i = indices[k] + if i then + t[i] = v + end + end + v.kerns = next(t) and t or nil + end + local ligatures = v.ligatures + if ligatures then + local t = { } + for k, v in next, ligatures do + local i = indices[k] + if i then + t[i] = v + v.char = indices[v.char] + end + end + v.ligatures = next(t) and t or nil + end + end + + -- wrap up + + tfmdata.fonts = { { id = virtualid } } + tfmdata.characters = characters + tfmdata.fullname = tfmdata.fullname or tfmdata.name + tfmdata.psname = file.nameonly(pfbfile or tfmdata.name) + tfmdata.filename = pfbfile + tfmdata.encodingbytes = 2 + tfmdata.format = "type1" + tfmdata.tounicode = 1 + tfmdata.embedding = "subset" + tfmdata.usedbitmap = bitmap and virtualid + + return tfmdata + end + +end + +-- This code adds a ToUnicode vector for bitmap fonts. We don't bother about +-- ranges because we have small fonts. it works ok with acrobat but fails with +-- the other viewers (they get confused by the bitmaps I guess). + +do + + local template = [[ +/CIDInit /ProcSet findresource begin + 12 dict begin + begincmap + /CIDSystemInfo << /Registry (TeX) /Ordering (bitmap-%s) /Supplement 0 >> def + /CMapName /TeX-bitmap-%s def + /CMapType 2 def + 1 begincodespacerange + <00> + endcodespacerange + %s beginbfchar +%s + endbfchar + endcmap +CMapName currentdict /CMap defineresource pop end +end +end +]] + + local flushstreamobject = lpdf and lpdf.flushstreamobject + local setfontattributes = pdf.setfontattributes + + if not flushstreamobject then + flushstreamobject = function(data) + return pdf.obj { + immediate = true, + type = "stream", + string = data, + } + end + end + + if not setfontattributes then + setfontattributes = function(id,data) + print(format("your luatex is too old so no tounicode bitmap font%i",id)) + end + end + + function tfm.addtounicode(tfmdata) + local id = tfmdata.usedbitmap + local map = { } + local char = { } -- no need for range, hardly used + for k, v in next, tfmdata.characters do + local index = v.oindex + local tounicode = v.tounicode + if index and tounicode then + map[index] = tounicode + end + end + for k, v in sortedhash(map) do + char[#char+1] = format("<%02X> <%s>",k,v) + end + char = concat(char,"\n") + local stream = format(template,id,id,#char,char) + local reference = flushstreamobject(stream,nil,true) + setfontattributes(id,format("/ToUnicode %i 0 R",reference)) + end + +end + +-- Now we implement the regular features handlers. We need to convert the +-- tfm specific structures to opentype structures. In basemode they are +-- converted back so that is a bti of a waste but it's fast enough. + +do + + local everywhere = { ["*"] = { ["*"] = true } } -- or: { ["*"] = { "*" } } + local noflags = { false, false, false, false } + + enhancers["normalize features"] = function(data) + local ligatures = setmetatableindex("table") + local kerns = setmetatableindex("table") + local characters = data.characters + for u, c in next, characters do + local l = c.ligatures + local k = c.kerns + if l then + ligatures[u] = l + for u, v in next, l do + l[u] = { ligature = v.char } + end + c.ligatures = nil + end + if k then + kerns[u] = k + for u, v in next, k do + k[u] = v -- { v, 0 } + end + c.kerns = nil + end + end + + for u, l in next, ligatures do + for k, v in next, l do + local vl = v.ligature + local dl = ligatures[vl] + if dl then + for kk, vv in next, dl do + v[kk] = vv -- table.copy(vv) + end + end + end + end + + local features = { + gpos = { }, + gsub = { }, + } + local sequences = { + -- only filled ones + } + if next(ligatures) then + features.gsub.liga = everywhere + data.properties.hasligatures = true + sequences[#sequences+1] = { + features = { + liga = everywhere, + }, + flags = noflags, + name = "s_s_0", + nofsteps = 1, + order = { "liga" }, + type = "gsub_ligature", + steps = { + { + coverage = ligatures, + }, + }, + } + end + if next(kerns) then + features.gpos.kern = everywhere + data.properties.haskerns = true + sequences[#sequences+1] = { + features = { + kern = everywhere, + }, + flags = noflags, + name = "p_s_0", + nofsteps = 1, + order = { "kern" }, + type = "gpos_pair", + steps = { + { + format = "kern", + coverage = kerns, + }, + }, + } + end + data.resources.features = features + data.resources.sequences = sequences + data.shared.resources = data.shared.resources or resources + end + +end + +-- As with type one (afm) loading, we just use the opentype ones: + +registertfmfeature { + name = "mode", + description = "mode", + initializers = { + base = otf.modeinitializer, + node = otf.modeinitializer, + } +} + +registertfmfeature { + name = "features", + description = "features", + default = true, + initializers = { + base = otf.basemodeinitializer, + node = otf.nodemodeinitializer, + }, + processors = { + node = otf.featuresprocessor, + } +} -- cgit v1.2.3