From a63f1e4297162ac3a338a849ba10739816b44d39 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Mon, 26 Apr 2021 22:56:38 +0200 Subject: 2021-04-26 20:42:00 --- tex/context/base/mkii/cont-new.mkii | 2 +- tex/context/base/mkii/context.mkii | 2 +- tex/context/base/mkii/mult-cs.mkii | 1 + tex/context/base/mkiv/cont-new.mkiv | 2 +- tex/context/base/mkiv/context.mkiv | 2 +- tex/context/base/mkiv/font-lib.mkvi | 2 + tex/context/base/mkiv/font-phb-imp-binary.lua | 119 ++++ tex/context/base/mkiv/font-phb.lua | 728 +++++++++++++++++++++ tex/context/base/mkiv/font-txt.lua | 549 ++++++++++++++++ tex/context/base/mkiv/m-fonts-plugins.mkiv | 64 +- tex/context/base/mkiv/status-files.pdf | Bin 23999 -> 23923 bytes tex/context/base/mkiv/status-lua.pdf | Bin 224934 -> 224973 bytes tex/context/base/mkxl/cont-new.mkxl | 2 +- tex/context/base/mkxl/context.mkxl | 2 +- tex/context/base/mkxl/font-lib.mklx | 12 + tex/context/base/mkxl/font-phb-imp-binary.lmt | 114 ++++ tex/context/base/mkxl/font-phb-imp-internal.lmt | 160 +++++ tex/context/base/mkxl/font-phb.lmt | 560 ++++++++++++++++ tex/context/base/mkxl/font-txt.lmt | 583 +++++++++++++++++ tex/context/base/mkxl/lpdf-lmt.lmt | 2 +- tex/context/interface/mkii/keys-cs.xml | 1 + tex/generic/context/luatex/luatex-fonts-merged.lua | 2 +- 22 files changed, 2896 insertions(+), 13 deletions(-) create mode 100644 tex/context/base/mkiv/font-phb-imp-binary.lua create mode 100644 tex/context/base/mkiv/font-phb.lua create mode 100644 tex/context/base/mkiv/font-txt.lua create mode 100644 tex/context/base/mkxl/font-phb-imp-binary.lmt create mode 100644 tex/context/base/mkxl/font-phb-imp-internal.lmt create mode 100644 tex/context/base/mkxl/font-phb.lmt create mode 100644 tex/context/base/mkxl/font-txt.lmt (limited to 'tex') diff --git a/tex/context/base/mkii/cont-new.mkii b/tex/context/base/mkii/cont-new.mkii index c0f77fb71..3a4d4cdcc 100644 --- a/tex/context/base/mkii/cont-new.mkii +++ b/tex/context/base/mkii/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2021.04.26 00:51} +\newcontextversion{2021.04.26 20:39} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/mkii/context.mkii b/tex/context/base/mkii/context.mkii index 693531af4..6539eea05 100644 --- a/tex/context/base/mkii/context.mkii +++ b/tex/context/base/mkii/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2021.04.26 00:51} +\edef\contextversion{2021.04.26 20:39} %D For those who want to use this: diff --git a/tex/context/base/mkii/mult-cs.mkii b/tex/context/base/mkii/mult-cs.mkii index 0f2336d65..72369ad89 100644 --- a/tex/context/base/mkii/mult-cs.mkii +++ b/tex/context/base/mkii/mult-cs.mkii @@ -761,6 +761,7 @@ \setinterfaceconstant{coupling}{propojeni} \setinterfaceconstant{couplingway}{zpusobpropojeni} \setinterfaceconstant{criterium}{kriterium} +\setinterfaceconstant{crop}{crop} \setinterfaceconstant{cropoffset}{cropoffset} \setinterfaceconstant{crossreference}{crossreference} \setinterfaceconstant{cssfile}{cssfile} diff --git a/tex/context/base/mkiv/cont-new.mkiv b/tex/context/base/mkiv/cont-new.mkiv index 223405dc1..be48bbe8d 100644 --- a/tex/context/base/mkiv/cont-new.mkiv +++ b/tex/context/base/mkiv/cont-new.mkiv @@ -13,7 +13,7 @@ % \normalend % uncomment this to get the real base runtime -\newcontextversion{2021.04.26 00:51} +\newcontextversion{2021.04.26 20:39} %D This file is loaded at runtime, thereby providing an excellent place for hacks, %D patches, extensions and new features. There can be local overloads in cont-loc diff --git a/tex/context/base/mkiv/context.mkiv b/tex/context/base/mkiv/context.mkiv index c44775e59..87793ce03 100644 --- a/tex/context/base/mkiv/context.mkiv +++ b/tex/context/base/mkiv/context.mkiv @@ -45,7 +45,7 @@ %D {YYYY.MM.DD HH:MM} format. \edef\contextformat {\jobname} -\edef\contextversion{2021.04.26 00:51} +\edef\contextversion{2021.04.26 20:39} %D Kind of special: diff --git a/tex/context/base/mkiv/font-lib.mkvi b/tex/context/base/mkiv/font-lib.mkvi index db9df49cd..d05872653 100644 --- a/tex/context/base/mkiv/font-lib.mkvi +++ b/tex/context/base/mkiv/font-lib.mkvi @@ -63,6 +63,8 @@ \registerctxluafile{font-one}{optimize} \registerctxluafile{font-afk}{} +\registerctxluafile{font-txt}{} + % tfm \registerctxluafile{font-tpk}{optimize} diff --git a/tex/context/base/mkiv/font-phb-imp-binary.lua b/tex/context/base/mkiv/font-phb-imp-binary.lua new file mode 100644 index 000000000..39ac6ec04 --- /dev/null +++ b/tex/context/base/mkiv/font-phb-imp-binary.lua @@ -0,0 +1,119 @@ +if not modules then modules = { } end modules ['font-phb-imp-binary'] = { + version = 1.000, -- 2016.10.10, + comment = "companion to font-txt.mkiv", + author = "Hans Hagen", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- The hb library comes in versions and the one I tested in 2016 was part of the inkscape +-- suite. In principle one can have incompatibilities due to updates but that is the nature +-- of a library. When a library ie expected one has better use the system version, if only +-- to make sure that different programs behave the same. +-- +-- The main reason for testing this approach was that when Idris was working on his fonts, +-- we wanted to know how different shapers deal with it and the hb command line program +-- could provide uniscribe output. For the context shaper uniscribe is the reference, also +-- because Idris started out with Volt a decade ago. +-- +-- This file uses the indirect approach by calling the executable. This file uses context +-- features and is not generic. + +local next, tonumber, pcall = next, tonumber, pcall + +local concat = table.concat +local reverse = table.reverse +local formatters = string.formatters +local removefile = os.remove +local resultof = os.resultof +local savedata = io.savedata + +local report = utilities.hb.report or print +local packtoutf8 = utilities.hb.helpers.packtoutf8 + +if not context then + report("the binary runner is only supported in context") + return +end + +-- output : [index=cluster@x_offset,y_offset+x_advance,y_advance|...] +-- result : { index, cluster, x_offset, y_offset, x_advance, y_advance } + +local P, Ct, Cc = lpeg.P, lpeg.Ct, lpeg.Cc +local lpegmatch = lpeg.match + +local zero = Cc(0) +local number = lpeg.patterns.integer / tonumber + zero +local index = lpeg.patterns.cardinal / tonumber +local cluster = index +local offset = (P("@") * number * (P(",") * number + zero)) + zero * zero +local advance = (P("+") * number * (P(",") * number + zero)) + zero * zero +local glyph = Ct(index * P("=") * cluster * offset * advance) +local pattern = Ct(P("[") * (glyph * P("|")^-1)^0 * P("]")) + +local shapers = { + native = "ot,uniscribe,fallback", + uniscribe = "uniscribe,ot,fallback", + fallback = "fallback" +} + +local runner = sandbox.registerrunner { + method = "resultof", + name = "harfbuzz", + -- program = { + -- windows = "hb-shape.exe", + -- unix = "hb-shape" + -- }, + program = "hb-shape", + checkers = { + shaper = "string", + features = "string", + script = "string", + language = "string", + direction = "string", + textfile = "writable", + fontfile = "readable", + }, + template = string.longtostring [[ + --shaper=%shaper% + --output-format=text + --no-glyph-names + --features="%features%" + --script=%script% + --language=%language% + --direction=%direction% + --text-file=%textfile% + --font-file=%fontfile% + ]], +} + +local tempfile = "font-phb.tmp" +local reported = false + +function utilities.hb.methods.binary(font,data,rlmode,text,leading,trailing) + if runner then + savedata(tempfile,packtoutf8(text,leading,trailing)) + local result = runner { + shaper = shapers[data.shaper] or shapers.native, + features = data.features, + script = data.script or "dflt", + language = data.language or "dflt", + direction = rlmode < 0 and "rtl" or "ltr", + textfile = tempfile, + fontfile = data.filename, + } + removefile(tempfile) + if result then + -- return jsontolua(result) + result = lpegmatch(pattern,result) -- { index cluster xo yo xa ya } + if rlmode < 0 then + return reverse(result) -- we can avoid this + else + return result + end + end + elseif reported then + report("no runner available") + reported = true + end +end diff --git a/tex/context/base/mkiv/font-phb.lua b/tex/context/base/mkiv/font-phb.lua new file mode 100644 index 000000000..42ebfcbc6 --- /dev/null +++ b/tex/context/base/mkiv/font-phb.lua @@ -0,0 +1,728 @@ +if not modules then modules = { } end modules ['font-phb'] = { + version = 1.000, -- 2016.10.10, + comment = "companion to font-txt.mkiv", + original = "derived from a prototype by Kai Eigner", + author = "Hans Hagen", -- so don't blame KE + copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- The next code is a rewrite of Kai's prototype. Here we forget about components +-- and assume some sane data structures. Clusters are handled on the fly. This is +-- probably one of the places where generic and context code is (to be) different +-- anyway. All errors in the logic below are mine (Hans). The optimizations probably +-- make less sense in luajittex because there the interpreter does some optimization +-- but we may end up with a non-jit version some day. +-- +-- For testing I used the commandline tool as this code is not that critital and not +-- used in context for production (maybe for testing). I noticed some issues with +-- r2l shaping of latin but the uniscribe shaper seems better with that but as it's +-- a library we're supposed to treat it as a magic black box and not look into it. In +-- the end all will be sorted out I guess so we don't need to worry about it. Also, I +-- can always improve the code below if really needed. +-- +-- We create intermediate tables which might look inefficient. For instance we could +-- just return two tables or an iterator but in the end this is not the bottleneck. +-- In fact, speed is hard to measure anyway, as it depends on the font, complexity +-- of the text, etc. Sometimes the library is faster, sometimes the context Lua one +-- (which is interesting as it does a bit more, i.e. supports additional features, +-- which also makes it even harder to check). When we compare context mkiv runs with +-- mkii runs using pdftex or xetex (which uses harfbuzz) the performance of luatex +-- on (simple) font demos normally is significant less compared with pdftex (8 bit +-- and no unicode) but a bit better than xetex. It looks like the interface that gets +-- implemented here suits that pattern (keep in mind that especially discretionary +-- handling is quite complex and similar to the context mkiv variant). +-- +-- The main motivations for supporting this are (1) the fact that Kai spent time on +-- it, and (2) that we can compare the Lua variant with uniscribe, which is kind of +-- a reference. We started a decade ago (2006) with the Lua implementation and had +-- to rely on MSWord for comparison. On the other hand, the command line version is +-- also useable for that. Don't blame the library or its (maybe wrong) use (here) +-- for side effects. +-- +-- Currently there are two methods: (1) binary, which is slow and uses the command +-- line shaper and (2) the ffi binding. In the meantime I redid the feed-back-into- +-- the-node-list method. This way tracing is easier, performance better, and there +-- is no need to mess so much with spacing. I have no clue if I lost functionality +-- and as this is not production code issues probably will go unnoticed for a while. +-- We'll see. +-- +-- Usage: see m-fonts-plugins.mkiv as that is the interface. +-- +-- Remark: It looks like the library sets up some features by default. Passing them +-- somehow doesn't work (yet) so I must miss something here. There is something fishy +-- here with enabling features like init, medi, fina etc because when we turn them on +-- they aren't applied. Also some features are not processed. +-- +-- Remark: Because utf32 is fragile I append a couple of zero slots which seems to +-- work out ok. In fact, after some experiment I figured out that utf32 needs a list +-- of 4 byte cardinals. From the fact that Kai used the utf8 method I assumed that +-- there was a utf32 too and indeed that worked but I have no time to look into it +-- more deeply. It seems to work ok though. +-- +-- The plugin itself has plugins and we do it the same as with (my)sql support, i.e. +-- we provide methods. The specific methods are implemented in the imp files. We +-- follow that model with other libraries too. +-- +-- Somehow the command line version does uniscribe (usp10.dll) but not the library +-- so when I can get motivated I might write a binding for uniscribe. (Problem: I +-- don't look forward to decipher complex (c++) library api's so in the end it might +-- never happen. A quick glance at the usp10 api gives me the impression that the +-- apis don't differ that much, but still.) +-- +-- Warning: This is rather old code, cooked up in the second half of 2016. I'm not +-- sure if it will keep working because it's not used in production and therefore +-- doesn't get tested. It was written as part of some comparison tests for Idris, +-- who wanted to compare the ConTeXt handler, uniscribe and hb, for which there are +-- also some special modules (that show results alongside). It has never been tested +-- in regular documents. As it runs independent of the normal font processors there +-- is probably not that much risk of interference but of course one looses all the +-- goodies that have been around for a while (or will show up in the future). The +-- code can probably be optimized a bit. + +-- There are three implementation specific files: +-- +-- 1 font-phb-imp-binary.lua : calls the command line version of hb +-- 2 font-phb-imp-library.lua : uses ffi to interface to hb +-- 3 font-phb-imp-internal.lua : uses a small library to interface to hb +-- +-- Variants 1 and 2 should work with mkiv and were used when playing with these +-- things, when writing the articles, and when running some tests for Idris font +-- development. Variant 3 (and maybe 1 also works) is meant for lmtx and has not +-- been used (read: tested) so far. The 1 and 2 variants are kind of old, but 3 is +-- an adaptation of 2 so not hip and modern either. + +if not context then + return +end + +local next, tonumber, pcall, rawget = next, tonumber, pcall, rawget + +local concat = table.concat +local sortedhash = table.sortedhash +local formatters = string.formatters + +local fonts = fonts +local otf = fonts.handlers.otf +local texthandler = otf.texthandler + +local fontdata = fonts.hashes.identifiers + +local nuts = nodes.nuts +local tonode = nuts.tonode +local tonut = nuts.tonut + +local remove_node = nuts.remove + +local getboth = nuts.getboth +local getnext = nuts.getnext +local setnext = nuts.setnext +local getprev = nuts.getprev +local setprev = nuts.setprev +local getid = nuts.getid +local getchar = nuts.getchar +local setchar = nuts.setchar +local setlink = nuts.setlink +local setoffsets = nuts.setoffsets +----- getcomponents = nuts.getcomponents +----- setcomponents = nuts.setcomponents +local getwidth = nuts.getwidth +local setwidth = nuts.setwidth + +local copy_node = nuts.copy +local find_tail = nuts.tail + +local nodepool = nuts.pool +local new_kern = nodepool.fontkern +local new_glyph = nodepool.glyph + +local nodecodes = nodes.nodecodes +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue + +local skipped = { + -- we assume that only valid features are set but maybe we need a list + -- of valid hb features as there can be many context specific ones + mode = true, + features = true, + language = true, + script = true, +} + +local seenspaces = { + [0x0020] = true, + [0x00A0] = true, + [0x0009] = true, -- indeed + [0x000A] = true, -- indeed + [0x000D] = true, -- indeed +} + +-- helpers + +local helpers = { } +local methods = { } +local initialized = { } -- we don't polute the shared table + +local method = "library" +local shaper = "native" -- "uniscribe" +local report = logs.reporter("font plugin","hb") + +utilities.hb = { + methods = methods, + helpers = helpers, + report = report, +} + +do + + local toutf8 = utf.char + local space = toutf8(0x20) + + -- we can move this to the internal lib .. just pass a table .. but it is not faster + + function helpers.packtoutf8(text,leading,trailing) + local size = #text + for i=1,size do + text[i] = toutf8(text[i]) + end + if leading then + text[0] = space + end + if trailing then + text[size+1] = space + end + return concat(text,"",leading and 0 or 1,trailing and (size + 1) or size) + end + + local toutf32 = utf.toutf32string + local space = toutf32(0x20) + + function helpers.packtoutf32(text,leading,trailing) + local size = #text + for i=1,size do + text[i] = toutf32(text[i]) + end + if leading then + text[0] = space + end + if trailing then + text[size+1] = space + end + return concat(text,"",leading and 0 or 1,trailing and (size + 1) or size) + end + +end + +local function initialize(font) + + local tfmdata = fontdata[font] + local resources = tfmdata.resources + local shared = tfmdata.shared + local filename = resources.filename + local features = shared.features + local descriptions = shared.rawdata.descriptions + local characters = tfmdata.characters + local featureset = { } + local copytochar = shared.copytochar -- indextounicode + local spacewidth = nil -- unscaled + local factor = tfmdata.parameters.factor + local marks = resources.marks or { } + + -- could be shared but why care about a few extra tables + + if not copytochar then + copytochar = { } + -- let's make sure that we have an indexed table and not a hash + local max = 0 + for k, v in next, descriptions do + if v.index > max then + max = v.index + end + end + for i=0,max do + copytochar[i] = i + end + -- the normal mapper + for k, v in next, descriptions do + copytochar[v.index] = k + end + shared.copytochar = copytochar + end + + -- independent from loop as we have unordered hashes + + if descriptions[0x0020] then + spacewidth = descriptions[0x0020].width + elseif descriptions[0x00A0] then + spacewidth = descriptions[0x00A0].width + end + + for k, v in sortedhash(features) do + if #k > 4 then + -- unknown ones are ignored anyway but we can assume that the current + -- (and future) extra context features use more verbose names + elseif skipped[k] then + -- we don't want to pass language and such so we block a few features + -- explicitly + elseif v == "yes" or v == true then + featureset[#featureset+1] = k .. "=1" -- cf command line (false) + elseif v == "no" or v == false then + featureset[#featureset+1] = k .. "=0" -- cf command line (true) + elseif type(v) == "number" then + featureset[#featureset+1] = k .. "=" .. v -- cf command line (alternate) + else + -- unset + end + end + + local data = { + language = features.language, -- do we need to uppercase and padd to 4 ? + script = features.script, -- do we need to uppercase and padd to 4 ? + features = #featureset > 0 and concat(featureset,",") or "", -- hash + featureset = #featureset > 0 and featureset or nil, + copytochar = copytochar, + spacewidth = spacewidth, + filename = filename, + marks = marks, + factor = factor, + characters = characters, -- the loaded font (we use its metrics which is more accurate) + method = features.method or method, + shaper = features.shaper or shaper, + } + initialized[font] = data + return data +end + +-- In many cases this gives compatible output but especially with respect to spacing and user +-- discretionaries that mix fonts there can be different outcomes. We also have no possibility +-- to tweak and cheat. Of course one can always run a normal node mode pass with specific +-- features first but then one can as well do all in node mode. So .. after a bit of playing +-- around I redid this one from scratch and also added tracing. + +local trace_colors = false trackers.register("fonts.plugins.hb.colors", function(v) trace_colors = v end) +local trace_details = false trackers.register("fonts.plugins.hb.details",function(v) trace_details = v end) +local check_id = false +----- components = false -- we have no need for them + +local setcolor = function() end +local resetcolor = function() end + +if context then + setcolor = nodes.tracers.colors.set + resetcolor = nodes.tracers.colors.reset +end + +table.setmetatableindex(methods,function(t,k) + local l = "font-phb-imp-" .. k .. ".lua" + report("start loading method %a from %a",k,l) + dofile(resolvers.findfile(l)) + local v = rawget(t,k) + if v then + report("loading method %a succeeded",k) + else + report("loading method %a failed",k) + v = function() return { } end + end + t[k] = v + return v +end) + +local inandout do + + local utfbyte = utf.byte + local utfchar = utf.char + local utf3208 = utf.utf32_to_utf8_le + + inandout = function(text,result,first,last,copytochar) + local s = { } + local t = { } + local r = { } + local f = formatters["%05U"] + for i=1,#text do + local c = text[i] + -- t[#t+1] = f(utfbyte(utf3208(c))) + s[#s+1] = utfchar(c) + t[#t+1] = f(c) + end + for i=first,last do + r[#r+1] = f(copytochar[result[i][1]]) + end + return s, t, r + end + +end + +local function harfbuzz(head,font,attr,rlmode,start,stop,text,leading,trailing) + local data = initialized[font] + + if not data then + data = initialize(font) + end + + if check_id then + if getid(start) ~= glyph_code then + report("error: start is not a glyph") + return head + elseif getid(stop) ~= glyph_code then + report("error: stop is not a glyph") + return head + end + end + local size = #text -- original text, without spaces + local result = methods[data.method](font,data,rlmode,text,leading,trailing) + local length = result and #result or 0 + + if length == 0 then + -- report("warning: no result") + return head + end + + local factor = data.factor + local marks = data.marks + local spacewidth = data.spacewidth + local copytochar = data.copytochar + local characters = data.characters + + -- the text analyzer is only partially clever so we must assume that we get + -- inconsistent lists + + -- we could check if something has been done (replacement or kern or so) but + -- then we pass around more information and need to check a lot and spaces + -- are kind of spoiling that game (we need a different table then) .. more + -- pain than gain + + -- we could play with 0xFFFE as boundary + + local current = start + local prev = nil + local glyph = nil + + local first = 1 + local last = length + local next = nil -- todo: keep track of them + local prev = nil -- todo: keep track of them + + if leading then + first = first + 1 + end + if trailing then + last = last - 1 + end + + local position = first + local cluster = 0 + local glyph = nil + local index = 0 + local count = 1 + -- local runner = nil + local saved = nil + + if trace_details then + report("start run, original size: %i, result index: %i upto %i",size,first,last) + local s, t, r = inandout(text,result,first,last,copytochar) + report("method : %s",data.method) + report("shaper : %s",data.shaper) + report("string : %t",s) + report("text : % t",t) + report("result : % t",r) + end + + -- okay, after some experiments, it became clear that more complex code aimed at + -- optimization doesn't pay off as complexity also demands more testing + + for i=first,last do + local r = result[i] + local unicode = copytochar[r[1]] -- can be private of course + -- + cluster = r[2] + 1 -- starts at zero + -- + if position == cluster then + if i == first then + index = 1 + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, starting",i,position,cluster,index) + end + else + index = index + 1 + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, next step",i,position,cluster,index) + end + end + elseif position < cluster then + -- a new cluster + current = getnext(current) + position = position + 1 + size = size - 1 + -- if runner then + -- local h, t + -- if saved then + -- h = copy_node(runner) + -- if trace_colors then + -- resetcolor(h) + -- end + -- setchar(h,saved) + -- t = h + -- if trace_details then + -- report("[%i] position: %i, cluster: %i, index: -, initializing components",i,position,cluster) + -- end + -- else + -- h = getcomponents(runner) + -- t = find_tail(h) + -- end + -- for p=position,cluster-1 do + -- local n + -- head, current, n = remove_node(head,current) + -- setlink(t,n) + -- t = n + -- if trace_details then + -- report("[%i] position: %i, cluster: %i, index: -, moving node to components",i,p,cluster) + -- end + -- size = size - 1 + -- end + -- if saved then + -- setcomponents(runner,h) + -- saved = false + -- end + -- else + for p=position,cluster-1 do + head, current = remove_node(head,current,true) + if trace_details then + report("[%i] position: %i, cluster: %i, index: -, removing node",i,p,cluster) + end + size = size - 1 + end + -- end + position = cluster + index = 1 + glyph = nil + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, arriving",i,cluster,position,index) + end + else -- maybe a space got properties + if trace_details then + report("position: %i, cluster: %i, index: %i, quitting due to fatal inconsistency",position,cluster,index) + end + return head + end + local copied = false + if glyph then + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, copying glyph, unicode %U",i,position,cluster,index,unicode) + end + local g = copy_node(glyph) + if trace_colors then + resetcolor(g) + end + setlink(current,g,getnext(current)) -- insert_before + current = g + copied = true + else + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, using glyph, unicode %U",i,position,cluster,index,unicode) + end + glyph = current + end + -- + if not current then + if trace_details then + report("quitting due to unexpected end of node list") + end + return head + end + -- + local id = getid(current) + if id ~= glyph_code then + if trace_details then + report("glyph expected in node list") + end + return head + end + -- + -- really, we can get a tab (9), lf (10), or cr(13) back in cambria .. don't ask me why + -- + local prev, next = getboth(current) + -- + -- assign glyph: first in run + -- + -- if components and index == 1 then + -- runner = current + -- saved = getchar(current) + -- if saved ~= unicode then + -- setchar(current,unicode) -- small optimization + -- if trace_colors then + -- count = (count == 8) and 1 or count + 1 + -- setcolor(current,"trace:"..count) + -- end + -- end + -- else + setchar(current,unicode) + if trace_colors then + count = (count == 8) and 1 or count + 1 + setcolor(current,"trace:"..count) + end + -- end + -- + local x_offset = r[3] -- r.dx + local y_offset = r[4] -- r.dy + local x_advance = r[5] -- r.ax + ----- y_advance = r[6] -- r.ay + local left = 0 + local right = 0 + local dx = 0 + local dy = 0 + if trace_details then + if x_offset ~= 0 or y_offset ~= 0 or x_advance ~= 0 then -- or y_advance ~= 0 + report("[%i] position: %i, cluster: %i, index: %i, old, xoffset: %p, yoffset: %p, xadvance: %p, width: %p", + i,position,cluster,index,x_offset*factor,y_offset*factor,x_advance*factor,characters[unicode].width) + end + end + if y_offset ~= 0 then + dy = y_offset * factor + end + if rlmode >= 0 then + -- l2r marks and rest + if x_offset ~= 0 then + dx = x_offset * factor + end + local width = characters[unicode].width + local delta = x_advance * factor + if delta ~= width then + -- right = -(delta - width) + right = delta - width + end + elseif marks[unicode] then -- why not just the next loop + -- r2l marks + if x_offset ~= 0 then + dx = -x_offset * factor + end + else + -- r2l rest + local width = characters[unicode].width + local delta = (x_advance - x_offset) * factor + if delta ~= width then + left = delta - width + end + if x_offset ~= 0 then + right = x_offset * factor + end + end + if copied or dx ~= 0 or dy ~= 0 then + setoffsets(current,dx,dy) + end + if left ~= 0 then + setlink(prev,new_kern(left),current) -- insertbefore + if current == head then + head = prev + end + end + if right ~= 0 then + local kern = new_kern(right) + setlink(current,kern,next) + current = kern + end + if trace_details then + if dy ~= 0 or dx ~= 0 or left ~= 0 or right ~= 0 then + report("[%i] position: %i, cluster: %i, index: %i, new, xoffset: %p, yoffset: %p, left: %p, right: %p",i,position,cluster,index,dx,dy,left,right) + end + end + end + -- + if trace_details then + report("[-] position: %i, cluster: %i, index: -, at end",position,cluster) + end + if size > 1 then + current = getnext(current) + -- if runner then + -- local h, t + -- if saved then + -- h = copy_node(runner) + -- if trace_colors then + -- resetcolor(h) + -- end + -- setchar(h,saved) + -- t = h + -- if trace_details then + -- report("[-] position: %i, cluster: -, index: -, initializing components",position) + -- end + -- else + -- h = getcomponents(runner) + -- t = find_tail(h) + -- end + -- for i=1,size-1 do + -- if trace_details then + -- report("[-] position: %i + %i, cluster: -, index: -, moving node to components",position,i) + -- end + -- local n + -- head, current, n = remove_node(head,current,true) + -- setlink(t,n) + -- t = n + -- end + -- if saved then + -- setcomponents(runner,h) + -- saved = false + -- end + -- else + for i=1,size-1 do + if trace_details then + report("[-] position: %i + %i, cluster: -, index: -, removing node",position,i) + end + head, current = remove_node(head,current,true) + end + -- end + end + -- + -- We see all kind of interesting spaces come back (like tabs in cambria) so we do a bit of + -- extra testing here. + -- + if leading then + local r = result[1] + local unicode = copytochar[r[1]] + if seenspaces[unicode] then + local x_advance = r[5] + local delta = x_advance - spacewidth + if delta ~= 0 then + -- nothing to do but jump one slot ahead + local prev = getprev(start) + if getid(prev) == glue_code then + local dx = delta * factor + setwidth(prev,getwidth(prev) + dx) + if trace_details then + report("compensating leading glue by %p due to codepoint %U",dx,unicode) + end + else + report("no valid leading glue node") + end + end + end + end + -- + if trailing then + local r = result[length] + local unicode = copytochar[r[1]] + if seenspaces[unicode] then + local x_advance = r[5] + local delta = x_advance - spacewidth + if delta ~= 0 then + local next = getnext(stop) + if getid(next) == glue_code then + local dx = delta * factor + setwidth(next,getwidth(next) + dx) + if trace_details then + report("compensating trailing glue by %p due to codepoint %U",dx,unicode) + end + else + report("no valid trailing glue node") + end + end + end + end + -- + if trace_details then + report("run done") + end + return head +end + +otf.registerplugin("harfbuzz",function(head,font,attr,direction) + return texthandler(head,font,attr,direction,harfbuzz) +end) diff --git a/tex/context/base/mkiv/font-txt.lua b/tex/context/base/mkiv/font-txt.lua new file mode 100644 index 000000000..20f290ddb --- /dev/null +++ b/tex/context/base/mkiv/font-txt.lua @@ -0,0 +1,549 @@ +if not modules then modules = { } end modules ['font-txt'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + original = "derived from a prototype by Kai Eigner", + author = "Hans Hagen", -- so don't blame KE + copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- The next code is derived from a snippet handler prototype by Kai Eigner and +-- resembles the main loop of the Lua font handler but I decided use a more generic +-- (and pluggable) approach and not hook it into the already present opentype +-- handler. This is cleaner as it cannot interfere with the Lua font processor +-- (which does some more things) and is also better performance wise. It also makes +-- it possible to support other handlers as history has proven that there are no +-- universal solution in computer land. Most of the disc logic is kept but done +-- slightly different. +-- +-- The code is heavily optimized and generalized so there can be errors. As +-- mentioned, the plug mode can be used for alternative font handlers. A font is +-- still loaded but the node and base mode handlers are ignored. Plugins are +-- unlikely to work well in context as they can mess up attribute driven subsystem, +-- so they are not officially supported. The language and script options are +-- available in the usual way. +-- +-- The code collects snippets, either or not with spacing around them and partially +-- running over disc nodes. The r2l "don't assume disc and collect larger chunks" is +-- not robust so I got rid of that branch. This is somewhat similar to the Lua font +-- handler. +-- +-- An alternative is to run over longer strings with dummy chars (unicode objects) as +-- markers for whatever is in the list but that becomes tricky with mixed fonts and +-- reconstruction becomes a bit of a mess then, especially because disc nodes force +-- us to backtrack and look at several solutions. It also has a larger memory +-- footprint. Some tests demonstrated that it has no gain and only adds complexity. +-- +-- This (derived) variant is better suited for context and seems to work ok in the +-- generic variant. I also added some context specific tracing to the code. This +-- variant uses the plug model provided in the context font system. So, in context, +-- using the built in Lua handler is the better alternative, also because it has +-- extensive tracing features. Context users would loose additional functionality +-- that has been provided for a decade and therefore plugins are not officially +-- supported (at least not by me, unless I use them myself). +-- +-- There is no checking here for already processed characters so best not mix this +-- variant with code that does similar things. If this code evolves depends on the +-- useability. Kai's code can now be found on github where it is used with a harfbuzz +-- library. We add this kind of stuff because occasionally we compare engines and +-- Kai sends me examples and I then need to check context. +-- +-- One important difference between Kai's approach and the one used in ConTeXt is +-- that we use utf-32 instead of utf-8. Once I figured out that clusters were just +-- indices into the original text that made more sense. The first implementation +-- used the command line tool (binary), then I went for ffi (library). +-- +-- Beware: this file only implements the framework for plugins. Plugins themselves +-- are in other files (e.g. font-phb*.lua). On the todo list is a uniscribe plugin +-- because that is after all the reference for opentype support, but that interface +-- needs a bit more work (so it might never happen). +-- +-- Usage: see m-fonts-plugins.mkiv. As it's a nice test for ffi support that file +-- migth be added to the distribution somewhere in the middle of 2017 when the ffi +-- interface has been tested a bit more. Okay, it's 2012 now and we're way past that +-- date but we never had a reason for adding it to the ConTeXt distribution. It +-- should still work okay because I occasionally checked it against progress made in +-- the engines and used newer helpers. +-- +-- Here is an example of usage: +-- +-- \starttext +-- \definefontfeature[test][mode=plug,features=text] +-- \start +-- \showfontkerns +-- \definedfont[Serif*test] +-- \input tufte \par +-- \stop +-- \stoptext + +local fonts = fonts +local otf = fonts.handlers.otf +local nodes = nodes + +local utfchar = utf.char + +local nuts = nodes.nuts + +local getnext = nuts.getnext +local setnext = nuts.setnext +local getprev = nuts.getprev +local setprev = nuts.setprev +local getid = nuts.getid +local getsubtype = nuts.getsubtype +local getfont = nuts.getfont +local getchar = nuts.getchar +local getdisc = nuts.getdisc +local setdisc = nuts.setdisc +local getboth = nuts.getboth +local setlink = nuts.setlink +local getkern = nuts.getkern +local getwidth = nuts.getwidth + +local ischar = nuts.ischar +local isglyph = nuts.isglyph +local traverse_id = nuts.traverse_id +local usesfont = nuts.uses_font + +local copy_node_list = nuts.copy_list +local find_node_tail = nuts.tail +local flush_list = nuts.flush_list +local free_node = nuts.free +local end_of_math = nuts.end_of_math +local start_of_par = nuts.start_of_par + +local nodecodes = nodes.nodecodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local disc_code = nodecodes.disc +local kern_code = nodecodes.kern +local math_code = nodecodes.math +local dir_code = nodecodes.dir +local par_code = nodecodes.par + +local righttoleft_code = nodes.dirvalues.righttoleft + +local txtdirstate = otf.helpers.txtdirstate +local pardirstate = otf.helpers.pardirstate + +local fonthashes = fonts.hashes +local fontdata = fonthashes.identifiers + +local function deletedisc(head) + local current = head + local next = nil + while current do + next = getnext(current) + if getid(current) == disc_code then + local pre, post, replace, pre_tail, post_tail, replace_tail = getdisc(current,true) + setdisc(current) + if pre then + flush_list(pre) + end + if post then + flush_list(post) + end + local p, n = getboth(current) + if replace then + if current == head then + head = replace + setprev(replace) -- already nil + else + setlink(p,replace) + end + setlink(replace_tail,n) -- was: setlink(n,replace_tail) + elseif current == head then + head = n + setprev(n) + else + setlink(p,n) + end + free_node(current) + end + current = next + end + return head +end + +-- As we know that we have the same font we can probably optimize this a bit more. +-- Although we can have more in disc nodes than characters and kerns we only support +-- those two types. + +local function eqnode(n,m) -- no real improvement in speed + local n_char = isglyph(n) + if n_char then + return n_char == ischar(m,getfont(n)) + elseif n_id == kern_code then + return getkern(n) == getkern(m) + end +end + +local function equalnode(n,m) + if not n then + return not m + elseif not m then + return false + end + local n_char, n_id = isglyph(n) + if n_char then + return n_char == ischar(m,n_id) -- n_id == n_font + elseif n_id == whatsit_code then + return false + elseif n_id == glue_code then + return true + elseif n_id == kern_code then + return getkern(n) == getkern(m) + elseif n_id == disc_code then + local n_pre, n_post, n_replace = getdisc(n) + local m_pre, m_post, m_replace = getdisc(m) + while n_pre and m_pre do + if not eqnode(n_pre,m_pre) then + return false + end + n_pre = getnext(n_pre) + m_pre = getnext(m_pre) + end + if n_pre or m_pre then + return false + end + while n_post and m_post do + if not eqnode(n_post,m_post) then + return false + end + n_post = getnext(n_post) + m_post = getnext(m_post) + end + if n_post or m_post then + return false + end + while n_replace and m_replace do + if not eqnode(n_replace,m_replace) then + return false + end + n_replace = getnext(n_replace) + m_replace = getnext(m_replace) + end + if n_replace or m_replace then + return false + end + return true + end + return false +end + +-- The spacing hackery is not nice. The text can get leading and trailing spaces +-- and even mid spaces while the start and stop nodes not always are glues then +-- so the plugin really needs to do some testing there. We could pass more context +-- but it doesn't become much better. +-- +-- The attribute gets passed for tracing purposes. We could support it (not that +-- hard to do) but as we don't test strickly for fonts (in disc nodes) we are not +-- compatible anyway. It would also mean more testing. So, don't use this mixed +-- with node and base mode in context. +-- +-- We don't distinguish between modes in treatment (so no r2l assumptions) and +-- no cheats for scripts that might not use discretionaries. Such hacks can work +-- in predictable cases but in context one can use a mix all kind of things and +-- users do that. On the other hand, we do support longer glyph runs in both modes +-- so there we gain a bit. + +local function texthandler(head,font,attr,rlmode,handler,startspacing,stopspacing,nesting) + if not head then + return + end + if startspacing == nil then + startspacing = false + end + if stopspacing == nil then + stopspacing = false + end + + if getid(head) == par_code and start_of_par(head) then + rlmode = pardirstate(head) + elseif rlmode == righttoleft_code then + rlmode = -1 + else + rlmode = 0 + end + + local dirstack = { } + local rlparmode = 0 + local topstack = 0 + local text = { } + local size = 0 + local current = head + local start = nil + local stop = nil + local startrlmode = rlmode + + local function handle(leading,trailing) -- what gets passed can become configureable: e.g. utf 8 + local stop = current or start -- hm, what with outer stop + if getid(stop) ~= glyph_code then + stop = getprev(stop) + end + head = handler(head,font,attr,rlmode,start,stop,text,leading,trailing) -- handler can adapt text + size = 0 + text = { } + start = nil + end + + while current do + local char, id = ischar(current,font) + if char then + if not start then + start = current + startrlmode = rlmode + end + local char = getchar(current) + size = size + 1 + text[size] = char + current = getnext(current) + elseif char == false then + -- so a mixed font + if start and size > 0 then + handle(startspacing,false) + end + startspacing = false + current = getnext(current) + elseif id == glue_code then + -- making this branch optional i.e. always use the else doesn't really + -- make a difference in performance (in hb) .. tricky anyway as we can + local width = getwidth(current) + if width > 0 then + if start and size > 0 then + handle(startspacing,true) + end + startspacing = true + stopspacing = false + else + if start and size > 0 then + head = handle(startspacing) + end + startspacing = false + stopspacing = false + end + current = getnext(current) + elseif id == disc_code and usesfont(current,font) then -- foo|-|bar : has hbox + -- This looks much like the original code but I don't see a need to optimize + -- for e.g. deva or r2l fonts. If there are no disc nodes then we won't see + -- this branch anyway and if there are, we should just deal with them. + -- + -- There is still some weird code here ... start/stop and such. When I'm in + -- the mood (or see a need) I'll rewrite this bit. + + -- bug: disc in last word moves to end (in practice not an issue as one + -- doesn't want a break there) + + local pre = nil + local post = nil + local currentnext = getnext(current) + local current_pre, current_post, current_replace = getdisc(current) + setdisc(current) -- why, we set it later + if start then + pre = copy_node_list(start,current) + stop = getprev(current) + -- why also current and not: + -- pre = copy_node_list(start,stop) + if start == head then + head = current + end + setlink(getprev(start),current) + setlink(stop,current_pre) + current_pre = start + setprev(current_pre) + start = nil + stop = nil + startrlmode = rlmode + end + while currentnext do + local char, id = ischar(currentnext,font) + if char or id == disc_code then + stop = currentnext + currentnext = getnext(currentnext) + elseif id == glue_code then + local width = getwidth(currentnext) + if width and width > 0 then + stopspacing = true + else + stopspacing = false + end + break + else + break + end + end + if stop then + local currentnext = getnext(current) + local stopnext = getnext(stop) + post = copy_node_list(currentnext,stopnext) + if current_post then + setlink(find_node_tail(current_post),currentnext) + else + setprev(currentnext) + current_post = currentnext + end + setlink(current,stopnext) + setnext(stop) + stop = nil + end + if pre then + setlink(find_node_tail(pre),current_replace) + current_replace = pre + pre = nil + end + if post then + if current_replace then + setlink(find_node_tail(current_replace),post) + else + current_replace = post + end + post = nil + end + size = 0 -- hm, ok, start is also nil now + text = { } + if current_pre then + current_pre = texthandler(current_pre,font,attr,rlmode,handler,startspacing,false,"pre") + end + if current_post then + current_post = texthandler(current_post,font,attr,rlmode,handler,false,stopspacing,"post") + end + if current_replace then + current_replace = texthandler(current_replace,font,attr,rlmode,handler,startspacing,stopspacing,"replace") + end + startspacing = false + stopspacing = false + local cpost = current_post and find_node_tail(current_post) + local creplace = current_replace and find_node_tail(current_replace) + local cpostnew = nil + local creplacenew = nil + local newcurrent = nil + while cpost and equalnode(cpost,creplace) do + cpostnew = cpost + creplacenew = creplace + if creplace then + creplace = getprev(creplace) + end + cpost = getprev(cpost) + end + if cpostnew then + if cpostnew == current_post then + current_post = nil + else + setnext(getprev(cpostnew)) + end + flush_list(cpostnew) + if creplacenew == current_replace then + current_replace = nil + else + setnext(getprev(creplacenew)) + end + local c = getnext(current) + setlink(current,creplacenew) + local creplacenewtail = find_node_tail(creplacenew) + setlink(creplacenewtail,c) + newcurrent = creplacenewtail + end + current_post = current_post and deletedisc(current_post) + current_replace = current_replace and deletedisc(current_replace) + local cpre = current_pre + local creplace = current_replace + local cprenew = nil + local creplacenew = nil + while cpre and equalnode(cpre, creplace) do + cprenew = cpre + creplacenew = creplace + if creplace then + creplace = getnext(creplace) + end + cpre = getnext(cpre) + end + if cprenew then + cpre = current_pre + current_pre = getnext(cprenew) + if current_pre then + setprev(current_pre) + end + setnext(cprenew) + flush_list(cpre) + creplace = current_replace + current_replace = getnext(creplacenew) + if current_replace then + setprev(current_replace) + end + setlink(getprev(current),creplace) + if current == head then + head = creplace + end + setlink(creplacenew,current) + end + setdisc(current,current_pre,current_post,current_replace) + current = currentnext + else + if start and size > 0 then + handle(startspacing,stopspacing) + end + startspacing = false + stopspacing = false + if id == math_code then + current = getnext(end_of_math(current)) + elseif id == dir_code then + startspacing = false + topstack, rlmode = txtdirstate(current,dirstack,topstack,rlparmode) + current = getnext(current) + -- elseif id == par_code and start_of_par(current) then + -- startspacing = false + -- rlparmode, rlmode = pardirstate(current) + -- current = getnext(current) + else + current = getnext(current) + end + end + end + if start and size > 0 then + handle(startspacing,stopspacing) + end + return head, true +end + +function fonts.handlers.otf.texthandler(head,font,attr,direction,action) + if action then + return texthandler(head,font,attr,direction == righttoleft_code and -1 or 0,action) + else + return head, false + end +end + +-- Next comes a tracer plug into context. + +local texthandler = fonts.handlers.otf.texthandler +local report_text = logs.reporter("otf plugin","text") +local nofruns = 0 +local nofsnippets = 0 +local f_unicode = string.formatters["%U"] + +local function showtext(head,font,attr,rlmode,start,stop,list,before,after) + if list then + nofsnippets = nofsnippets + 1 + local plus = { } + for i=1,#list do + local u = list[i] + list[i] = utfchar(u) + plus[i] = f_unicode(u) + end + report_text("%03i : [%s] %t [%s]-> % t", nofsnippets, before and "+" or "-", list, after and "+" or "-", plus) + else + report_text() + report_text("invalid list") + report_text() + end + return head, false +end + +fonts.handlers.otf.registerplugin("text",function(head,font,attr,direction) + nofruns = nofruns + 1 + nofsnippets = 0 + report_text("start run %i",nofruns) + local h, d = texthandler(head,font,attr,direction,showtext) + report_text("stop run %i",nofruns) + return h, d +end) diff --git a/tex/context/base/mkiv/m-fonts-plugins.mkiv b/tex/context/base/mkiv/m-fonts-plugins.mkiv index a2b06fb8c..00174d13e 100644 --- a/tex/context/base/mkiv/m-fonts-plugins.mkiv +++ b/tex/context/base/mkiv/m-fonts-plugins.mkiv @@ -22,11 +22,65 @@ %D %D I'm not sure if Idris will ever need this but the code has been used for some %D articles so that's why it's eventually shipped. - -% \enabletrackers[resolvers.ffilib] - -\registerctxluafile{font-txt}{} % generic text handler -\registerctxluafile{font-phb}{} % harfbuzz plugin: binary or library (ffi/optional) +%D +%D The library is supposed to be present in +%D +%D \starttyping +%D .../tex/texmf-win64/bin/lib/luatatex/harfbuzz/libharfbuzz-0.dll +%D .../tex/texmf-win64/bin/lib/luametatex/harfbuzz/libharfbuzz-0.dll +%D +%D .../tex/texmf-*/bin/lib/luatatex/harfbuzz/libharfbuzz.so +%D .../tex/texmf-*/bin/lib/luametatex/harfbuzz/libharfbuzz.so +%D \stoptyping +%D +%D It might be found on the system if paths are set but with libraries that render +%D (font) stuff it's best to rely on what you explictly installed. When you install +%D files there make sure to run \type {mtxrun --generate}. Keep in mind that we don't +%D officially support such dependencies (bug-wise). +%D +%D In mkiv we support binary and library, and in lmtx binary and internal where we +%D default to library or internal as binary is not that fast (more meant as a +%D playground I guess). You can track loading with: +%D +%D \starttyping +%D \enabletrackers[resolvers.ffilib] +%D \stoptyping +%D +%D This file has always been in the distribution but per end april 2021 the font-txt, +%D font-phb and font-phb-imp-* files are in the distrinbution too. It's a side effect +%D of some cleanup of the (luametatex) source tree (where the optional module was not +%D yet in sync with the other optional ones). This module is mostly for myself and +%D Idris if only because it is not tested for interferences with all kind of other +%D font code (and it's unlikely to happen), so don't depend on this! + +% \starttext +% +% \definefontfeature [native] [default] +% [mode=plug, +% features=harfbuzz, +% shaper=native] +% +% \definefontfeature [uniscribe] [default] +% [mode=plug, +% features=harfbuzz, +% shaper=uniscribe] +% +% \definefontfeature [binary] [default] +% [mode=plug, +% method=binary, +% features=harfbuzz, +% shaper=uniscribe] +% +% % \nohyphens +% \definedfont[Serif*default] fiets \par % \input tufte \par +% \definedfont[Serif*native] fiets \par % \input tufte \par +% \definedfont[Serif*uniscribe] fiets \par % \input tufte \par +% \definedfont[Serif*binary] fiets \par % \input tufte \par +% +% \stoptext + + +\registerctxluafile{font-phb}{autosuffix} \startluacode diff --git a/tex/context/base/mkiv/status-files.pdf b/tex/context/base/mkiv/status-files.pdf index 18d4edc32..ae6464bfa 100644 Binary files a/tex/context/base/mkiv/status-files.pdf and b/tex/context/base/mkiv/status-files.pdf differ diff --git a/tex/context/base/mkiv/status-lua.pdf b/tex/context/base/mkiv/status-lua.pdf index c7c27b61c..ff552cb3c 100644 Binary files a/tex/context/base/mkiv/status-lua.pdf and b/tex/context/base/mkiv/status-lua.pdf differ diff --git a/tex/context/base/mkxl/cont-new.mkxl b/tex/context/base/mkxl/cont-new.mkxl index 9fb153df4..9b2fb738e 100644 --- a/tex/context/base/mkxl/cont-new.mkxl +++ b/tex/context/base/mkxl/cont-new.mkxl @@ -13,7 +13,7 @@ % \normalend % uncomment this to get the real base runtime -\newcontextversion{2021.04.26 00:51} +\newcontextversion{2021.04.26 20:39} %D This file is loaded at runtime, thereby providing an excellent place for hacks, %D patches, extensions and new features. There can be local overloads in cont-loc diff --git a/tex/context/base/mkxl/context.mkxl b/tex/context/base/mkxl/context.mkxl index 210b1b25c..35103e9b4 100644 --- a/tex/context/base/mkxl/context.mkxl +++ b/tex/context/base/mkxl/context.mkxl @@ -29,7 +29,7 @@ %D {YYYY.MM.DD HH:MM} format. \immutable\edef\contextformat {\jobname} -\immutable\edef\contextversion{2021.04.26 00:51} +\immutable\edef\contextversion{2021.04.26 20:39} %overloadmode 1 % check frozen / warning %overloadmode 2 % check frozen / error diff --git a/tex/context/base/mkxl/font-lib.mklx b/tex/context/base/mkxl/font-lib.mklx index 72c2d4ba9..f1f7f5ce0 100644 --- a/tex/context/base/mkxl/font-lib.mklx +++ b/tex/context/base/mkxl/font-lib.mklx @@ -59,6 +59,8 @@ \registerctxluafile{font-one}{optimize} \registerctxluafile{font-afk}{} +\registerctxluafile{font-txt}{autosuffix} + % tfm \registerctxluafile{font-tpk}{optimize} @@ -153,4 +155,14 @@ \permanent \def\cleanfontname #1{\clf_cleanfontname{#1}} \permanent\protected\def\setfontofid #1{\clf_setfontofid\numexpr#1\relax} +%D I have to put it someplace, so here: +%D +%D \starttyping +%D \definefontfeature[test][mode=plug,features=text] +%D \definefont[FontA][Serif*test] +%D \definefont[FontB][Serif*test] +%D {\FontA aaa}{\FontB\glyphscale 1200 bbb} +%D {\FontA aaa}{\FontB\glyphscale 1200 bbb} +%D \stoptyping + \protect \endinput diff --git a/tex/context/base/mkxl/font-phb-imp-binary.lmt b/tex/context/base/mkxl/font-phb-imp-binary.lmt new file mode 100644 index 000000000..39a6057ea --- /dev/null +++ b/tex/context/base/mkxl/font-phb-imp-binary.lmt @@ -0,0 +1,114 @@ +if not modules then modules = { } end modules ['font-phb-imp-binary'] = { + version = 1.000, -- 2016.10.10, + comment = "companion to font-txt.mkiv", + author = "Hans Hagen", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- The hb library comes in versions and the one I tested in 2016 was part of the inkscape +-- suite. In principle one can have incompatibilities due to updates but that is the nature +-- of a library. When a library ie expected one has better use the system version, if only +-- to make sure that different programs behave the same. +-- +-- The main reason for testing this approach was that when Idris was working on his fonts, +-- we wanted to know how different shapers deal with it and the hb command line program +-- could provide uniscribe output. For the context shaper uniscribe is the reference, also +-- because Idris started out with Volt a decade ago. +-- +-- This file uses the indirect approach by calling the executable. This file uses context +-- features and is not generic. + +local next, tonumber, pcall = next, tonumber, pcall + +local concat = table.concat +local reverse = table.reverse +local formatters = string.formatters +local removefile = os.remove +local resultof = os.resultof +local savedata = io.savedata + +local report = utilities.hb.report or print +local packtoutf8 = utilities.hb.helpers.packtoutf8 + +-- output : [index=cluster@x_offset,y_offset+x_advance,y_advance|...] +-- result : { index, cluster, x_offset, y_offset, x_advance, y_advance } + +local P, Ct, Cc = lpeg.P, lpeg.Ct, lpeg.Cc +local lpegmatch = lpeg.match + +local zero = Cc(0) +local number = lpeg.patterns.integer / tonumber + zero +local index = lpeg.patterns.cardinal / tonumber +local cluster = index +local offset = (P("@") * number * (P(",") * number + zero)) + zero * zero +local advance = (P("+") * number * (P(",") * number + zero)) + zero * zero +local glyph = Ct(index * P("=") * cluster * offset * advance) +local pattern = Ct(P("[") * (glyph * P("|")^-1)^0 * P("]")) + +local shapers = { + native = "ot,uniscribe,fallback", + uniscribe = "uniscribe,ot,fallback", + fallback = "fallback" +} + +local runner = sandbox.registerrunner { + method = "resultof", + name = "harfbuzz", + -- program = { + -- windows = "hb-shape.exe", + -- unix = "hb-shape" + -- }, + program = "hb-shape", + checkers = { + shaper = "string", + features = "string", + script = "string", + language = "string", + direction = "string", + textfile = "writable", + fontfile = "readable", + }, + template = string.longtostring [[ + --shaper=%shaper% + --output-format=text + --no-glyph-names + --features="%features%" + --script=%script% + --language=%language% + --direction=%direction% + --text-file=%textfile% + --font-file=%fontfile% + ]], +} + +local tempfile = "font-phb.tmp" +local reported = false + +function utilities.hb.methods.binary(font,data,rlmode,text,leading,trailing) + if runner then + savedata(tempfile,packtoutf8(text,leading,trailing)) + local result = runner { + shaper = shapers[data.shaper] or shapers.native, + features = data.features, + script = data.script or "dflt", + language = data.language or "dflt", + direction = rlmode < 0 and "rtl" or "ltr", + textfile = tempfile, + fontfile = data.filename, + } + removefile(tempfile) + if result then + -- return jsontolua(result) + result = lpegmatch(pattern,result) -- { index cluster xo yo xa ya } + if rlmode < 0 then + return reverse(result) -- we can avoid this + else + return result + end + end + elseif reported then + report("no runner available") + reported = true + end +end diff --git a/tex/context/base/mkxl/font-phb-imp-internal.lmt b/tex/context/base/mkxl/font-phb-imp-internal.lmt new file mode 100644 index 000000000..fb2251abe --- /dev/null +++ b/tex/context/base/mkxl/font-phb-imp-internal.lmt @@ -0,0 +1,160 @@ +if not modules then modules = { } end modules ['font-phb-imp-internal'] = { + version = 1.000, -- 2016.10.10, + comment = "companion to font-txt.mkiv", + original = "derived from font-phb-imp-library", + author = "Hans Hagen", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- The hb library comes in versions and the one I tested in 2016 was part of the inkscape +-- suite. In principle one can have incompatibilities due to updates but that is the nature +-- of a library. When a library ie expected one has better use the system version, if only +-- to make sure that different programs behave the same. +-- +-- The main reason for testing this approach was that when Idris was working on his fonts, +-- we wanted to know how different shapers deal with it and the hb command line program +-- could provide uniscribe output. For the context shaper uniscribe is the reference, also +-- because Idris started out with Volt a decade ago. +-- +-- We treat the lib as a black box as it should be. At some point Kai Eigner made an ffi +-- binding and that one was adapted to the plugin approach of context. It saved me the +-- trouble of looking at source files to figure it all out. Below is the adapted code. +-- +-- This is basically the ffi variant but with the hb function calls delegated to a simple +-- runtime library. That library was a side effect of playing a day with delayed loading +-- like ffi does in luametatex, which seems to work ok for what we call optional libraries +-- in lmtx. I didn't really test the next code well (and will probably do that when Idris +-- needs some comparison with uniscribe etc). There are nowadays probably other ways to do +-- this but this is what we had and so we can keep the test code that has been around for +-- a while (which is needed because some old articles need it.) +-- +-- The following setup doesn't really fit into the way we set up internal libraries +-- but if isn't used in the same sense anyway so we stick to what we already had in +-- the ffi variant (also because it uses helpers here and we want to keep the client +-- variant too). We don't need to be generic as other macro packages follow a different +-- route. +-- +-- Last time I checked "fiets" got no ligature with the "ot" shaper but it did get one +-- with the "uniscribe" shaper ... somewhat puzzling .. but "effe" worked okay. Maybe +-- there is some built-in heuristic interfering? When Idris an I tested fonts we had +-- similar differences with arabic so maybe we miss a point here. +-- +-- native font plugin > hb > string : fi- +-- font plugin > hb > text : U+00066 U+00069 U+0002D +-- font plugin > hb > result : U+00066 U+00069 U+0002D +-- +-- uniscribe font plugin > hb > string : fi- +-- font plugin > hb > text : U+00066 U+00069 U+0002D +-- font plugin > hb > result : U+0FB01 U+0002D +-- +-- native font plugin > hb > string : ets +-- font plugin > hb > text : U+00065 U+00074 U+00073 +-- font plugin > hb > result : U+00065 U+00074 U+00073 +-- +-- uniscribe font plugin > hb > string : ets +-- font plugin > hb > text : U+00065 U+00074 U+00073 +-- font plugin > hb > result : U+00065 U+00074 U+00073 +-- +-- native font plugin > hb > string : fiets +-- font plugin > hb > text : U+00066 U+00069 U+00065 U+00074 U+00073 +-- font plugin > hb > result : U+00066 U+00069 U+00065 U+00074 U+00073 +-- +-- uniscribe font plugin > hb > string : fiets +-- font plugin > hb > text : U+00066 U+00069 U+00065 U+00074 U+00073 +-- font plugin > hb > result : U+0FB01 U+00065 U+00074 U+00073 + +local report = utilities.hb.report or print + +local hblib = optional and (optional.hb or optional.test) + +if not hblib then + report("no hblib found, you can try the ffi variant") + return +end + +local hb_initialize = hblib.initialize +local hb_getversion = hblib.getversion +local hb_getshapers = hblib.getshapers +local hb_loadfont = hblib.loadfont +local hb_shapestring = hblib.shapestring + +if not hb_initialize then + report("no functions in hblib found, you can try the ffi variant") + return +end + +local loaddata = io.loaddata +local findlib = resolvers.findlib +local concat = table.concat +local utfchar = utf.char +local packtoutf8 = utilities.hb.helpers.packtoutf8 +local packtoutf32 = utilities.hb.helpers.packtoutf32 +local report = utilities.hb.report or print +local fontdata = fonts.hashes.identifiers +local initialized = nil +local loaded = { } +local shared = { } +local libname = os.name == "windows" and "libharfbuzz-0" or "libharfbuzz" + +local shapers = { + native = { "ot", "uniscribe", "fallback" }, + uniscribe = { "uniscribe", "ot", "fallback" }, + -- uniscribe = { "uniscribe", "fallback" }, -- stalls without fallback when no uniscribe present + fallback = { "fallback" }, +} + +local mode = 32 + +function utilities.hb.methods.internal(font,data,rlmode,text,leading,trailing) + if initialized == nil then + local filename = findlib(libname) + initialized = hb_initialize(filename) + if initialized then + report("using hb library version %a, supported shapers: %,t",hb_getversion(),hb_getshapers()) + else + report("unable to locate hb library") + initialize = false + end + end + if initialized then + local instance = loaded[font] + if instance == nil then + local tfmdata = fontdata[font] + local resources = tfmdata.resources + local filename = resources.filename + instance = shared[filename] + if instance == nil then + local wholefont = loaddata(filename) + if wholefont then + instance = hb_loadfont(font,wholefont) + end + if not instance then + instance = false + end + shared[filename] = instance + end + loaded[font] = instance + end + if instance then + if mode ==32 then + text = packtoutf32(text,leading,trailing) + else + text = packtoutf8(text,leading,trailing) -- doesn't work ok (no time not to figure it out) + end + local result = hb_shapestring ( + instance, + data.script or "dflt", + data.language or "dflt", + rlmode < 0 and "rtl" or "ltr", + shapers[data.shaper] or shapers.native, + data.featureset or { }, + text, + rlmode < 0, + mode + ) + -- inspect(result) + return result + end + end +end diff --git a/tex/context/base/mkxl/font-phb.lmt b/tex/context/base/mkxl/font-phb.lmt new file mode 100644 index 000000000..43edfd33f --- /dev/null +++ b/tex/context/base/mkxl/font-phb.lmt @@ -0,0 +1,560 @@ +if not modules then modules = { } end modules ['font-phb'] = { + version = 1.000, -- 2016.10.10, + comment = "companion to font-txt.mkiv", + original = "derived from a prototype by Kai Eigner", + author = "Hans Hagen", -- so don't blame KE + copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files", +} + +-- Some (historic) explanation can be found in the font-phb.lua file. To summarize: +-- this code kind of old and originates from the times that Idris was making a font +-- that should work with context and uniscribe. When we started with mkiv there were +-- no libraries, but at some point Kai Eigner made an ffi interface to the harfbuzz +-- library that showed up. His code was adapted to ConTeXt so that we could test +-- Idris fonts (the library could use uniscribe which served as refeence for his +-- fonts). Some experiences were was wrapped up in articles. Interesting was that +-- sometimes context, uniscribe and/or native hb could not agree on how to interpret +-- font features and subtle differences could occur. +-- +-- This file is made from font-phb.lua and I stripped the components code because +-- it made no sense. The files were eventually added because I did some cleanup and +-- didn't want to carry old stuff around without also sort of maintaining it. I can +-- probably strip away even more code. I might pick up this thread when Idris picks +-- up his font making. +-- +-- Todo: use the new (context) advance and offset features. + +local next, tonumber, pcall, rawget = next, tonumber, pcall, rawget + +local concat = table.concat +local sortedhash = table.sortedhash +local formatters = string.formatters + +local fonts = fonts +local otf = fonts.handlers.otf +local texthandler = otf.texthandler + +local fontdata = fonts.hashes.identifiers + +local nuts = nodes.nuts +local tonode = nuts.tonode +local tonut = nuts.tonut + +local remove_node = nuts.remove + +local getboth = nuts.getboth +local getnext = nuts.getnext +local setnext = nuts.setnext +local getprev = nuts.getprev +local setprev = nuts.setprev +local getid = nuts.getid +local getchar = nuts.getchar +local setchar = nuts.setchar +local setlink = nuts.setlink +local setoffsets = nuts.setoffsets +local getwidth = nuts.getwidth +local setwidth = nuts.setwidth + +local copy_node = nuts.copy +local find_tail = nuts.tail + +local nodepool = nuts.pool +local new_kern = nodepool.fontkern +local new_glyph = nodepool.glyph + +local nodecodes = nodes.nodecodes +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue + +local skipped = { + -- we assume that only valid features are set but maybe we need a list + -- of valid hb features as there can be many context specific ones + mode = true, + features = true, + language = true, + script = true, +} + +local seenspaces = { + [0x0020] = true, + [0x00A0] = true, + [0x0009] = true, -- indeed + [0x000A] = true, -- indeed + [0x000D] = true, -- indeed +} + +-- helpers + +local helpers = { } +local methods = { } +local initialized = { } -- we don't polute the shared table + +local method = "internal" -- a bit misleading name: it's the optional module +local shaper = "native" -- "uniscribe" +local report = logs.reporter("font plugin","hb") + +utilities.hb = { + methods = methods, + helpers = helpers, + report = report, +} + +do + + local toutf8 = string.toutf8 + local toutf32 = string.toutf32 + + function helpers.packtoutf8(text,leading,trailing) + if leading then + text[0] = 32 + end + if trailing then + text[#text+1] = 32 + end + return toutf8(text) + end + + function helpers.packtoutf32(text,leading,trailing) + if leading then + text[0] = 32 + end + if trailing then + text[#text+1] = 32 + end + return toutf32(text) + end + +end + +local function initialize(font) + + local tfmdata = fontdata[font] + local resources = tfmdata.resources + local shared = tfmdata.shared + local filename = resources.filename + local features = shared.features + local descriptions = shared.rawdata.descriptions + local characters = tfmdata.characters + local featureset = { } + local copytochar = shared.copytochar -- indextounicode + local spacewidth = nil -- unscaled + local factor = tfmdata.parameters.factor + local marks = resources.marks or { } + + -- could be shared but why care about a few extra tables + + if not copytochar then + copytochar = { } + -- let's make sure that we have an indexed table and not a hash + local max = 0 + for k, v in next, descriptions do + if v.index > max then + max = v.index + end + end + for i=0,max do + copytochar[i] = i + end + -- the normal mapper + for k, v in next, descriptions do + copytochar[v.index] = k + end + shared.copytochar = copytochar + end + + -- independent from loop as we have unordered hashes + + if descriptions[0x0020] then + spacewidth = descriptions[0x0020].width + elseif descriptions[0x00A0] then + spacewidth = descriptions[0x00A0].width + end + + for k, v in sortedhash(features) do + if #k > 4 then + -- unknown ones are ignored anyway but we can assume that the current + -- (and future) extra context features use more verbose names + elseif skipped[k] then + -- we don't want to pass language and such so we block a few features + -- explicitly + elseif v == "yes" or v == true then + featureset[#featureset+1] = k .. "=1" -- cf command line (false) + elseif v == "no" or v == false then + featureset[#featureset+1] = k .. "=0" -- cf command line (true) + elseif type(v) == "number" then + featureset[#featureset+1] = k .. "=" .. v -- cf command line (alternate) + else + -- unset + end + end + + local data = { + language = features.language, -- do we need to uppercase and padd to 4 ? + script = features.script, -- do we need to uppercase and padd to 4 ? + features = #featureset > 0 and concat(featureset,",") or "", -- hash + featureset = #featureset > 0 and featureset or nil, + copytochar = copytochar, + spacewidth = spacewidth, + filename = filename, + marks = marks, + factor = factor, + characters = characters, -- the loaded font (we use its metrics which is more accurate) + method = features.method or method, + shaper = features.shaper or shaper, + } + initialized[font] = data + return data +end + +-- In many cases this gives compatible output but especially with respect to spacing and user +-- discretionaries that mix fonts there can be different outcomes. We also have no possibility +-- to tweak and cheat. Of course one can always run a normal node mode pass with specific +-- features first but then one can as well do all in node mode. So .. after a bit of playing +-- around I redid this one from scratch and also added tracing. + +local trace_colors = false trackers.register("fonts.plugins.hb.colors", function(v) trace_colors = v end) +local trace_details = false trackers.register("fonts.plugins.hb.details",function(v) trace_details = v end) +local check_id = false + +local setcolor = nodes.tracers.colors.set +local resetcolor = nodes.tracers.colors.reset + +table.setmetatableindex(methods,function(t,k) + local l = "font-phb-imp-" .. k .. ".lmt" + report("start loading method %a from %a",k,l) + dofile(resolvers.findfile(l)) + local v = rawget(t,k) + if v then + report("loading method %a succeeded",k) + else + report("loading method %a failed",k) + v = function() return { } end + end + t[k] = v + return v +end) + +local inandout do + + local utfbyte = utf.byte + local utfchar = utf.char + local utf3208 = utf.utf32_to_utf8_le + + inandout = function(text,result,first,last,copytochar) + local s = { } + local t = { } + local r = { } + local f = formatters["%05U"] + for i=1,#text do + local c = text[i] + -- t[#t+1] = f(utfbyte(utf3208(c))) + s[#s+1] = utfchar(c) + t[#t+1] = f(c) + end + for i=first,last do + r[#r+1] = f(copytochar[result[i][1]]) + end + return s, t, r + end + +end + +local function harfbuzz(head,font,dynamic,rlmode,start,stop,text,leading,trailing) + local data = initialized[font] + + if not data then + data = initialize(font) + end + + if check_id then + if getid(start) ~= glyph_code then + report("error: start is not a glyph") + return head + elseif getid(stop) ~= glyph_code then + report("error: stop is not a glyph") + return head + end + end + local size = #text -- original text, without spaces + local result = methods[data.method](font,data,rlmode,text,leading,trailing) + local length = result and #result or 0 + + if length == 0 then + -- report("warning: no result") + return head + end + + local factor = data.factor + local marks = data.marks + local spacewidth = data.spacewidth + local copytochar = data.copytochar + local characters = data.characters + + -- the text analyzer is only partially clever so we must assume that we get + -- inconsistent lists + + -- we could check if something has been done (replacement or kern or so) but + -- then we pass around more information and need to check a lot and spaces + -- are kind of spoiling that game (we need a different table then) .. more + -- pain than gain + + -- we could play with 0xFFFE as boundary + + local current = start + local prev = nil + local glyph = nil + + local first = 1 + local last = length + local next = nil -- todo: keep track of them + local prev = nil -- todo: keep track of them + + if leading then + first = first + 1 + end + if trailing then + last = last - 1 + end + + local position = first + local cluster = 0 + local glyph = nil + local index = 0 + local count = 1 + local saved = nil + + if trace_details then + report("start run, original size: %i, result index: %i upto %i",size,first,last) + local s, t, r = inandout(text,result,first,last,copytochar) + report("method : %s",data.method) + report("shaper : %s",data.shaper) + report("string : %t",s) + report("text : % t",t) + report("result : % t",r) + end + + -- okay, after some experiments, it became clear that more complex code aimed at + -- optimization doesn't pay off as complexity also demands more testing + + for i=first,last do + local r = result[i] + local unicode = copytochar[r[1]] -- can be private of course + -- + cluster = r[2] + 1 -- starts at zero + -- + if position == cluster then + if i == first then + index = 1 + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, starting",i,position,cluster,index) + end + else + index = index + 1 + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, next step",i,position,cluster,index) + end + end + elseif position < cluster then + -- a new cluster + current = getnext(current) + position = position + 1 + size = size - 1 + for p=position,cluster-1 do + head, current = remove_node(head,current,true) + if trace_details then + report("[%i] position: %i, cluster: %i, index: -, removing node",i,p,cluster) + end + size = size - 1 + end + position = cluster + index = 1 + glyph = nil + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, arriving",i,cluster,position,index) + end + else -- maybe a space got properties + if trace_details then + report("position: %i, cluster: %i, index: %i, quitting due to fatal inconsistency",position,cluster,index) + end + return head + end + local copied = false + if glyph then + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, copying glyph, unicode %U",i,position,cluster,index,unicode) + end + local g = copy_node(glyph) + if trace_colors then + resetcolor(g) + end + setlink(current,g,getnext(current)) -- insert_before + current = g + copied = true + else + if trace_details then + report("[%i] position: %i, cluster: %i, index: %i, using glyph, unicode %U",i,position,cluster,index,unicode) + end + glyph = current + end + -- + if not current then + if trace_details then + report("quitting due to unexpected end of node list") + end + return head + end + -- + local id = getid(current) + if id ~= glyph_code then + if trace_details then + report("glyph expected in node list") + end + return head + end + -- + -- really, we can get a tab (9), lf (10), or cr(13) back in cambria .. don't ask me why + -- + local prev, next = getboth(current) + -- + -- assign glyph: first in run + -- + setchar(current,unicode) + if trace_colors then + count = (count == 8) and 1 or count + 1 + setcolor(current,"trace:"..count) + end + -- + local x_offset = r[3] -- r.dx + local y_offset = r[4] -- r.dy + local x_advance = r[5] -- r.ax + ----- y_advance = r[6] -- r.ay + local left = 0 + local right = 0 + local dx = 0 + local dy = 0 + if trace_details then + if x_offset ~= 0 or y_offset ~= 0 or x_advance ~= 0 then -- or y_advance ~= 0 + report("[%i] position: %i, cluster: %i, index: %i, old, xoffset: %p, yoffset: %p, xadvance: %p, width: %p", + i,position,cluster,index,x_offset*factor,y_offset*factor,x_advance*factor,characters[unicode].width) + end + end + if y_offset ~= 0 then + dy = y_offset * factor + end + if rlmode >= 0 then + -- l2r marks and rest + if x_offset ~= 0 then + dx = x_offset * factor + end + local width = characters[unicode].width + local delta = x_advance * factor + if delta ~= width then + -- right = -(delta - width) + right = delta - width + end + elseif marks[unicode] then -- why not just the next loop + -- r2l marks + if x_offset ~= 0 then + dx = -x_offset * factor + end + else + -- r2l rest + local width = characters[unicode].width + local delta = (x_advance - x_offset) * factor + if delta ~= width then + left = delta - width + end + if x_offset ~= 0 then + right = x_offset * factor + end + end + if copied or dx ~= 0 or dy ~= 0 then + setoffsets(current,dx,dy) + end + if left ~= 0 then + setlink(prev,new_kern(left),current) -- insertbefore + if current == head then + head = prev + end + end + if right ~= 0 then + local kern = new_kern(right) + setlink(current,kern,next) + current = kern + end + if trace_details then + if dy ~= 0 or dx ~= 0 or left ~= 0 or right ~= 0 then + report("[%i] position: %i, cluster: %i, index: %i, new, xoffset: %p, yoffset: %p, left: %p, right: %p",i,position,cluster,index,dx,dy,left,right) + end + end + end + -- + if trace_details then + report("[-] position: %i, cluster: %i, index: -, at end",position,cluster) + end + if size > 1 then + current = getnext(current) + for i=1,size-1 do + if trace_details then + report("[-] position: %i + %i, cluster: -, index: -, removing node",position,i) + end + head, current = remove_node(head,current,true) + end + end + -- + -- We see all kind of interesting spaces come back (like tabs in cambria) so we do a bit of + -- extra testing here. + -- + if leading then + local r = result[1] + local unicode = copytochar[r[1]] + if seenspaces[unicode] then + local x_advance = r[5] + local delta = x_advance - spacewidth + if delta ~= 0 then + -- nothing to do but jump one slot ahead + local prev = getprev(start) + if getid(prev) == glue_code then + local dx = delta * factor + setwidth(prev,getwidth(prev) + dx) + if trace_details then + report("compensating leading glue by %p due to codepoint %U",dx,unicode) + end + else + report("no valid leading glue node") + end + end + end + end + -- + if trailing then + local r = result[length] + local unicode = copytochar[r[1]] + if seenspaces[unicode] then + local x_advance = r[5] + local delta = x_advance - spacewidth + if delta ~= 0 then + local next = getnext(stop) + if getid(next) == glue_code then + local dx = delta * factor + setwidth(next,getwidth(next) + dx) + if trace_details then + report("compensating trailing glue by %p due to codepoint %U",dx,unicode) + end + else + report("no valid trailing glue node") + end + end + end + end + -- + if trace_details then + report("run done") + end + return head +end + +otf.registerplugin("harfbuzz",function(head,font,dynamic,direction) + return texthandler(head,font,dynamic,direction,harfbuzz) +end) diff --git a/tex/context/base/mkxl/font-txt.lmt b/tex/context/base/mkxl/font-txt.lmt new file mode 100644 index 000000000..ce177e114 --- /dev/null +++ b/tex/context/base/mkxl/font-txt.lmt @@ -0,0 +1,583 @@ +if not modules then modules = { } end modules ['font-txt'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + original = "derived from a prototype by Kai Eigner", + author = "Hans Hagen", -- so don't blame KE + copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- The next code is derived from a snippet handler prototype by Kai Eigner and +-- resembles the main loop of the Lua font handler but I decided use a more generic +-- (and pluggable) approach and not hook it into the already present opentype +-- handler. This is cleaner as it cannot interfere with the Lua font processor +-- (which does some more things) and is also better performance wise. It also makes +-- it possible to support other handlers as history has proven that there are no +-- universal solution in computer land. Most of the disc logic is kept but done +-- slightly different. +-- +-- The code is heavily optimized and generalized so there can be errors. As +-- mentioned, the plug mode can be used for alternative font handlers. A font is +-- still loaded but the node and base mode handlers are ignored. Plugins are +-- unlikely to work well in context as they can mess up attribute driven subsystem, +-- so they are not officially supported. The language and script options are +-- available in the usual way. +-- +-- The code collects snippets, either or not with spacing around them and partially +-- running over disc nodes. The r2l "don't assume disc and collect larger chunks" is +-- not robust so I got rid of that branch. This is somewhat similar to the Lua font +-- handler. +-- +-- An alternative is to run over longer strings with dummy chars (unicode objects) as +-- markers for whatever is in the list but that becomes tricky with mixed fonts and +-- reconstruction becomes a bit of a mess then, especially because disc nodes force +-- us to backtrack and look at several solutions. It also has a larger memory +-- footprint. Some tests demonstrated that it has no gain and only adds complexity. +-- +-- This (derived) variant is better suited for context and seems to work ok in the +-- generic variant. I also added some context specific tracing to the code. This +-- variant uses the plug model provided in the context font system. So, in context, +-- using the built in Lua handler is the better alternative, also because it has +-- extensive tracing features. Context users would loose additional functionality +-- that has been provided for a decade and therefore plugins are not officially +-- supported (at least not by me, unless I use them myself). +-- +-- There is no checking here for already processed characters so best not mix this +-- variant with code that does similar things. If this code evolves depends on the +-- useability. Kai's code can now be found on github where it is used with a harfbuzz +-- library. We add this kind of stuff because occasionally we compare engines and +-- Kai sends me examples and I then need to check context. +-- +-- One important difference between Kai's approach and the one used in ConTeXt is +-- that we use utf-32 instead of utf-8. Once I figured out that clusters were just +-- indices into the original text that made more sense. The first implementation +-- used the command line tool (binary), then I went for ffi (library). +-- +-- Beware: this file only implements the framework for plugins. Plugins themselves +-- are in other files (e.g. font-phb*.lua). On the todo list is a uniscribe plugin +-- because that is after all the reference for opentype support, but that interface +-- needs a bit more work (so it might never happen). +-- +-- Usage: see m-fonts-plugins.mkiv. As it's a nice test for ffi support that file +-- migth be added to the distribution somewhere in the middle of 2017 when the ffi +-- interface has been tested a bit more. Okay, it's 2012 now and we're way past that +-- date but we never had a reason for adding it to the ConTeXt distribution. It +-- should still work okay because I occasionally checked it against progress made in +-- the engines and used newer helpers. +-- +-- Here is an example of usage: +-- +-- \starttext +-- \definefontfeature[test][mode=plug,features=text] +-- \start +-- \showfontkerns +-- \definedfont[Serif*test] +-- \input tufte \par +-- \stop +-- \stoptext + +local fonts = fonts +local otf = fonts.handlers.otf +local nodes = nodes + +local utfchar = utf.char + +local nuts = nodes.nuts + +local getnext = nuts.getnext +local setnext = nuts.setnext +local getprev = nuts.getprev +local setprev = nuts.setprev +local getid = nuts.getid +local getsubtype = nuts.getsubtype +local getfont = nuts.getfont +local getchar = nuts.getchar +local getdisc = nuts.getdisc +local setdisc = nuts.setdisc +local getboth = nuts.getboth +local getscales = nuts.getscales +local setlink = nuts.setlink +local getkern = nuts.getkern +local getwidth = nuts.getwidth + +local ischar = nuts.ischar +local isnextchar = nuts.isnextchar +local isglyph = nuts.isglyph +local traverse_id = nuts.traverse_id +local usesfont = nuts.uses_font + +local copy_node_list = nuts.copy_list +local find_node_tail = nuts.tail +local flush_list = nuts.flush_list +local free_node = nuts.free +local end_of_math = nuts.end_of_math +local start_of_par = nuts.start_of_par + +local nodecodes = nodes.nodecodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local disc_code = nodecodes.disc +local kern_code = nodecodes.kern +local math_code = nodecodes.math +local dir_code = nodecodes.dir +local par_code = nodecodes.par + +local righttoleft_code = nodes.dirvalues.righttoleft + +local txtdirstate = otf.helpers.txtdirstate +local pardirstate = otf.helpers.pardirstate + +local fonthashes = fonts.hashes +local fontdata = fonthashes.identifiers + +local function deletedisc(head) + local current = head + local next = nil + while current do + next = getnext(current) + if getid(current) == disc_code then + local pre, post, replace, pre_tail, post_tail, replace_tail = getdisc(current,true) + setdisc(current) + if pre then + flush_list(pre) + end + if post then + flush_list(post) + end + local p, n = getboth(current) + if replace then + if current == head then + head = replace + setprev(replace) -- already nil + else + setlink(p,replace) + end + setlink(replace_tail,n) -- was: setlink(n,replace_tail) + elseif current == head then + head = n + setprev(n) + else + setlink(p,n) + end + free_node(current) + end + current = next + end + return head +end + +-- As we know that we have the same font we can probably optimize this a bit more. +-- Although we can have more in disc nodes than characters and kerns we only support +-- those two types. + +local function eqnode(n,m) -- no real improvement in speed + local n_char = isglyph(n) + if n_char then + return n_char == ischar(m,getfont(n)) + elseif n_id == kern_code then + return getkern(n) == getkern(m) + end +end + +local function equalnode(n,m) + if not n then + return not m + elseif not m then + return false + end + local n_char, n_id = isglyph(n) + if n_char then + return n_char == ischar(m,n_id) -- n_id == n_font + elseif n_id == whatsit_code then + return false + elseif n_id == glue_code then + return true + elseif n_id == kern_code then + return getkern(n) == getkern(m) + elseif n_id == disc_code then + local n_pre, n_post, n_replace = getdisc(n) + local m_pre, m_post, m_replace = getdisc(m) + while n_pre and m_pre do + if not eqnode(n_pre,m_pre) then + return false + end + n_pre = getnext(n_pre) + m_pre = getnext(m_pre) + end + if n_pre or m_pre then + return false + end + while n_post and m_post do + if not eqnode(n_post,m_post) then + return false + end + n_post = getnext(n_post) + m_post = getnext(m_post) + end + if n_post or m_post then + return false + end + while n_replace and m_replace do + if not eqnode(n_replace,m_replace) then + return false + end + n_replace = getnext(n_replace) + m_replace = getnext(m_replace) + end + if n_replace or m_replace then + return false + end + return true + end + return false +end + +-- The spacing hackery is not nice. The text can get leading and trailing spaces +-- and even mid spaces while the start and stop nodes not always are glues then +-- so the plugin really needs to do some testing there. We could pass more context +-- but it doesn't become much better. +-- +-- The attribute gets passed for tracing purposes. We could support it (not that +-- hard to do) but as we don't test strickly for fonts (in disc nodes) we are not +-- compatible anyway. It would also mean more testing. So, don't use this mixed +-- with node and base mode in context. +-- +-- We don't distinguish between modes in treatment (so no r2l assumptions) and +-- no cheats for scripts that might not use discretionaries. Such hacks can work +-- in predictable cases but in context one can use a mix all kind of things and +-- users do that. On the other hand, we do support longer glyph runs in both modes +-- so there we gain a bit. + + +do + + local currentscale, currentxscale, currentyscale + + local function texthandler(head,font,dynamic,rlmode,handler,startspacing,stopspacing,nesting) + if not head then + return + end + if startspacing == nil then + startspacing = false + end + if stopspacing == nil then + stopspacing = false + end + + if getid(head) == par_code and start_of_par(head) then + rlmode = pardirstate(head) + elseif rlmode == righttoleft_code then + rlmode = -1 + else + rlmode = 0 + end + + local dirstack = { } + local rlparmode = 0 + local topstack = 0 + local text = { } + local size = 0 + local current = head + local start = nil + local stop = nil + local startrlmode = rlmode + + local function handle(leading,trailing) -- what gets passed can become configureable: e.g. utf 8 + local stop = current or start -- hm, what with outer stop + if getid(stop) ~= glyph_code then + stop = getprev(stop) + end + head = handler(head,font,dynamic,rlmode,start,stop,text,leading,trailing) -- handler can adapt text + size = 0 + text = { } + start = nil + end + + -- maybe: isnextchar + + while current do + -- local char, id = ischar(current,font,dynamic) + local char, id = ischar(current,font,dynamic,currentscale,currentxscale,currentyscale) + if char then + -- local s, sx, sy = getscales(current) + -- if s ~= currentscale or sx ~= currentxscale or sy ~= currentyscale then + -- if start and size > 0 then + -- handle(startspacing,false) + -- end + -- startspacing = false + -- currentscale, currentxscale, currentyscale = s, sx, sy + -- end + if not start then + start = current + startrlmode = rlmode + end + local char = getchar(current) + size = size + 1 + text[size] = char + current = getnext(current) + elseif char == false then + -- so a mixed font + if start and size > 0 then + handle(startspacing,false) + end + startspacing = false + local s, sx, sy = getscales(current) + if s ~= currentscale or sx ~= currentxscale or sy ~= currentyscale then + if start and size > 0 then + handle(startspacing,false) + end + startspacing = false + currentscale, currentxscale, currentyscale = s, sx, sy + -- todo: safeguard against a loop + else + current = getnext(current) + currentscale, currentxscale, currentyscale = false, false, false + end + elseif id == glue_code then + -- making this branch optional i.e. always use the else doesn't really + -- make a difference in performance (in hb) .. tricky anyway as we can + local width = getwidth(current) + if width > 0 then + if start and size > 0 then + handle(startspacing,true) + end + startspacing = true + stopspacing = false + else + if start and size > 0 then + head = handle(startspacing) + end + startspacing = false + stopspacing = false + end + current = getnext(current) + elseif id == disc_code and usesfont(current,font) then -- foo|-|bar : has hbox + -- This looks much like the original code but I don't see a need to optimize + -- for e.g. deva or r2l fonts. If there are no disc nodes then we won't see + -- this branch anyway and if there are, we should just deal with them. + -- + -- There is still some weird code here ... start/stop and such. When I'm in + -- the mood (or see a need) I'll rewrite this bit. + + -- bug: disc in last word moves to end (in practice not an issue as one + -- doesn't want a break there) + + local pre = nil + local post = nil + local currentnext = getnext(current) + local current_pre, current_post, current_replace = getdisc(current) + setdisc(current) -- why, we set it later + if start then + pre = copy_node_list(start,current) + stop = getprev(current) + -- why also current and not: + -- pre = copy_node_list(start,stop) + if start == head then + head = current + end + setlink(getprev(start),current) + setlink(stop,current_pre) + current_pre = start + setprev(current_pre) + start = nil + stop = nil + startrlmode = rlmode + end + while currentnext do + local char, id = ischar(currentnext,font) + if char or id == disc_code then + stop = currentnext + currentnext = getnext(currentnext) + elseif id == glue_code then + local width = getwidth(currentnext) + if width and width > 0 then + stopspacing = true + else + stopspacing = false + end + break + else + break + end + end + if stop then + local currentnext = getnext(current) + local stopnext = getnext(stop) + post = copy_node_list(currentnext,stopnext) + if current_post then + setlink(find_node_tail(current_post),currentnext) + else + setprev(currentnext) + current_post = currentnext + end + setlink(current,stopnext) + setnext(stop) + stop = nil + end + if pre then + setlink(find_node_tail(pre),current_replace) + current_replace = pre + pre = nil + end + if post then + if current_replace then + setlink(find_node_tail(current_replace),post) + else + current_replace = post + end + post = nil + end + size = 0 -- hm, ok, start is also nil now + text = { } + if current_pre then + current_pre = texthandler(current_pre,font,dynamic,rlmode,handler,startspacing,false,"pre") + end + if current_post then + current_post = texthandler(current_post,font,dynamic,rlmode,handler,false,stopspacing,"post") + end + if current_replace then + current_replace = texthandler(current_replace,font,dynamic,rlmode,handler,startspacing,stopspacing,"replace") + end + startspacing = false + stopspacing = false + local cpost = current_post and find_node_tail(current_post) + local creplace = current_replace and find_node_tail(current_replace) + local cpostnew = nil + local creplacenew = nil + local newcurrent = nil + while cpost and equalnode(cpost,creplace) do + cpostnew = cpost + creplacenew = creplace + if creplace then + creplace = getprev(creplace) + end + cpost = getprev(cpost) + end + if cpostnew then + if cpostnew == current_post then + current_post = nil + else + setnext(getprev(cpostnew)) + end + flush_list(cpostnew) + if creplacenew == current_replace then + current_replace = nil + else + setnext(getprev(creplacenew)) + end + local c = getnext(current) + setlink(current,creplacenew) + local creplacenewtail = find_node_tail(creplacenew) + setlink(creplacenewtail,c) + newcurrent = creplacenewtail + end + current_post = current_post and deletedisc(current_post) + current_replace = current_replace and deletedisc(current_replace) + local cpre = current_pre + local creplace = current_replace + local cprenew = nil + local creplacenew = nil + while cpre and equalnode(cpre, creplace) do + cprenew = cpre + creplacenew = creplace + if creplace then + creplace = getnext(creplace) + end + cpre = getnext(cpre) + end + if cprenew then + cpre = current_pre + current_pre = getnext(cprenew) + if current_pre then + setprev(current_pre) + end + setnext(cprenew) + flush_list(cpre) + creplace = current_replace + current_replace = getnext(creplacenew) + if current_replace then + setprev(current_replace) + end + setlink(getprev(current),creplace) + if current == head then + head = creplace + end + setlink(creplacenew,current) + end + setdisc(current,current_pre,current_post,current_replace) + current = currentnext + else + if start and size > 0 then + handle(startspacing,stopspacing) + end + startspacing = false + stopspacing = false + if id == math_code then + current = getnext(end_of_math(current)) + elseif id == dir_code then + startspacing = false + topstack, rlmode = txtdirstate(current,dirstack,topstack,rlparmode) + current = getnext(current) + -- elseif id == par_code and start_of_par(current) then + -- startspacing = false + -- rlparmode, rlmode = pardirstate(current) + -- current = getnext(current) + else + current = getnext(current) + end + end + end + if start and size > 0 then + handle(startspacing,stopspacing) + end + return head, true + end + + function fonts.handlers.otf.texthandler(head,font,dynamic,direction,action) + currentscale = false + currentxscale = false + currentyscale = false + if action then + return texthandler(head,font,dynamic,direction == righttoleft_code and -1 or 0,action) + else + return head, false + end + end + + -- Next comes a tracer plug into context. + + ----- texthandler = fonts.handlers.otf.texthandler + local report_text = logs.reporter("otf plugin","text") + local nofruns = 0 + local nofsnippets = 0 + local f_unicode = string.formatters["%U"] + + local function showtext(head,font,dynamic,rlmode,start,stop,list,before,after) + if list then + nofsnippets = nofsnippets + 1 + local plus = { } + for i=1,#list do + local u = list[i] + list[i] = utfchar(u) + plus[i] = f_unicode(u) + end + report_text("%03i : [%s] %t [%s]-> % t", nofsnippets, before and "+" or "-", list, after and "+" or "-", plus) + else + report_text() + report_text("invalid list") + report_text() + end + return head, false + end + + fonts.handlers.otf.registerplugin("text",function(head,font,dynamic,direction) + nofruns = nofruns + 1 + nofsnippets = 0 + report_text("start run %i",nofruns) + local h, d = texthandler(head,font,dynamic,direction,showtext) + report_text("stop run %i",nofruns) + return h, d + end) + +end diff --git a/tex/context/base/mkxl/lpdf-lmt.lmt b/tex/context/base/mkxl/lpdf-lmt.lmt index b7db8e50d..c0f02d75b 100644 --- a/tex/context/base/mkxl/lpdf-lmt.lmt +++ b/tex/context/base/mkxl/lpdf-lmt.lmt @@ -2291,7 +2291,7 @@ local function flushstreamobj(data,n,dict,comp,nolength) if done then b = dict and f_stream_b_d_c(n,dict,size) or f_stream_b_n_c(n,size) else - b = dict and f_stream_b_d_u(n,dict,size) or f_stream_b_d_r(n,size) + b = dict and f_stream_b_d_u(n,dict,size) or f_stream_b_n_u(n,size) end end flush(f,b) diff --git a/tex/context/interface/mkii/keys-cs.xml b/tex/context/interface/mkii/keys-cs.xml index 3006aa9b0..eee6b834e 100644 --- a/tex/context/interface/mkii/keys-cs.xml +++ b/tex/context/interface/mkii/keys-cs.xml @@ -767,6 +767,7 @@ + diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 453ee0018..1a0cf868e 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : c:/data/develop/context/sources/luatex-fonts-merged.lua -- parent file : c:/data/develop/context/sources/luatex-fonts.lua --- merge date : 2021-04-26 00:51 +-- merge date : 2021-04-26 20:39 do -- begin closure to overcome local limits and interference -- cgit v1.2.3