diff options
author | Hans Hagen <pragma@wxs.nl> | 2021-04-26 22:56:38 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg@phi-gamma.net> | 2021-04-26 22:56:38 +0200 |
commit | a63f1e4297162ac3a338a849ba10739816b44d39 (patch) | |
tree | 342eeb85ccc82f45eb304c3449f4c6172e8c9cc9 /tex/context/base/mkiv/font-txt.lua | |
parent | 643bd3f4610ad64823521fac6fc8bb5f1b76eb3f (diff) | |
download | context-a63f1e4297162ac3a338a849ba10739816b44d39.tar.gz |
2021-04-26 20:42:00
Diffstat (limited to 'tex/context/base/mkiv/font-txt.lua')
-rw-r--r-- | tex/context/base/mkiv/font-txt.lua | 549 |
1 files changed, 549 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/font-txt.lua b/tex/context/base/mkiv/font-txt.lua new file mode 100644 index 000000000..20f290ddb --- /dev/null +++ b/tex/context/base/mkiv/font-txt.lua @@ -0,0 +1,549 @@ +if not modules then modules = { } end modules ['font-txt'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + original = "derived from a prototype by Kai Eigner", + author = "Hans Hagen", -- so don't blame KE + copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- The next code is derived from a snippet handler prototype by Kai Eigner and +-- resembles the main loop of the Lua font handler but I decided use a more generic +-- (and pluggable) approach and not hook it into the already present opentype +-- handler. This is cleaner as it cannot interfere with the Lua font processor +-- (which does some more things) and is also better performance wise. It also makes +-- it possible to support other handlers as history has proven that there are no +-- universal solution in computer land. Most of the disc logic is kept but done +-- slightly different. +-- +-- The code is heavily optimized and generalized so there can be errors. As +-- mentioned, the plug mode can be used for alternative font handlers. A font is +-- still loaded but the node and base mode handlers are ignored. Plugins are +-- unlikely to work well in context as they can mess up attribute driven subsystem, +-- so they are not officially supported. The language and script options are +-- available in the usual way. +-- +-- The code collects snippets, either or not with spacing around them and partially +-- running over disc nodes. The r2l "don't assume disc and collect larger chunks" is +-- not robust so I got rid of that branch. This is somewhat similar to the Lua font +-- handler. +-- +-- An alternative is to run over longer strings with dummy chars (unicode objects) as +-- markers for whatever is in the list but that becomes tricky with mixed fonts and +-- reconstruction becomes a bit of a mess then, especially because disc nodes force +-- us to backtrack and look at several solutions. It also has a larger memory +-- footprint. Some tests demonstrated that it has no gain and only adds complexity. +-- +-- This (derived) variant is better suited for context and seems to work ok in the +-- generic variant. I also added some context specific tracing to the code. This +-- variant uses the plug model provided in the context font system. So, in context, +-- using the built in Lua handler is the better alternative, also because it has +-- extensive tracing features. Context users would loose additional functionality +-- that has been provided for a decade and therefore plugins are not officially +-- supported (at least not by me, unless I use them myself). +-- +-- There is no checking here for already processed characters so best not mix this +-- variant with code that does similar things. If this code evolves depends on the +-- useability. Kai's code can now be found on github where it is used with a harfbuzz +-- library. We add this kind of stuff because occasionally we compare engines and +-- Kai sends me examples and I then need to check context. +-- +-- One important difference between Kai's approach and the one used in ConTeXt is +-- that we use utf-32 instead of utf-8. Once I figured out that clusters were just +-- indices into the original text that made more sense. The first implementation +-- used the command line tool (binary), then I went for ffi (library). +-- +-- Beware: this file only implements the framework for plugins. Plugins themselves +-- are in other files (e.g. font-phb*.lua). On the todo list is a uniscribe plugin +-- because that is after all the reference for opentype support, but that interface +-- needs a bit more work (so it might never happen). +-- +-- Usage: see m-fonts-plugins.mkiv. As it's a nice test for ffi support that file +-- migth be added to the distribution somewhere in the middle of 2017 when the ffi +-- interface has been tested a bit more. Okay, it's 2012 now and we're way past that +-- date but we never had a reason for adding it to the ConTeXt distribution. It +-- should still work okay because I occasionally checked it against progress made in +-- the engines and used newer helpers. +-- +-- Here is an example of usage: +-- +-- \starttext +-- \definefontfeature[test][mode=plug,features=text] +-- \start +-- \showfontkerns +-- \definedfont[Serif*test] +-- \input tufte \par +-- \stop +-- \stoptext + +local fonts = fonts +local otf = fonts.handlers.otf +local nodes = nodes + +local utfchar = utf.char + +local nuts = nodes.nuts + +local getnext = nuts.getnext +local setnext = nuts.setnext +local getprev = nuts.getprev +local setprev = nuts.setprev +local getid = nuts.getid +local getsubtype = nuts.getsubtype +local getfont = nuts.getfont +local getchar = nuts.getchar +local getdisc = nuts.getdisc +local setdisc = nuts.setdisc +local getboth = nuts.getboth +local setlink = nuts.setlink +local getkern = nuts.getkern +local getwidth = nuts.getwidth + +local ischar = nuts.ischar +local isglyph = nuts.isglyph +local traverse_id = nuts.traverse_id +local usesfont = nuts.uses_font + +local copy_node_list = nuts.copy_list +local find_node_tail = nuts.tail +local flush_list = nuts.flush_list +local free_node = nuts.free +local end_of_math = nuts.end_of_math +local start_of_par = nuts.start_of_par + +local nodecodes = nodes.nodecodes + +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local disc_code = nodecodes.disc +local kern_code = nodecodes.kern +local math_code = nodecodes.math +local dir_code = nodecodes.dir +local par_code = nodecodes.par + +local righttoleft_code = nodes.dirvalues.righttoleft + +local txtdirstate = otf.helpers.txtdirstate +local pardirstate = otf.helpers.pardirstate + +local fonthashes = fonts.hashes +local fontdata = fonthashes.identifiers + +local function deletedisc(head) + local current = head + local next = nil + while current do + next = getnext(current) + if getid(current) == disc_code then + local pre, post, replace, pre_tail, post_tail, replace_tail = getdisc(current,true) + setdisc(current) + if pre then + flush_list(pre) + end + if post then + flush_list(post) + end + local p, n = getboth(current) + if replace then + if current == head then + head = replace + setprev(replace) -- already nil + else + setlink(p,replace) + end + setlink(replace_tail,n) -- was: setlink(n,replace_tail) + elseif current == head then + head = n + setprev(n) + else + setlink(p,n) + end + free_node(current) + end + current = next + end + return head +end + +-- As we know that we have the same font we can probably optimize this a bit more. +-- Although we can have more in disc nodes than characters and kerns we only support +-- those two types. + +local function eqnode(n,m) -- no real improvement in speed + local n_char = isglyph(n) + if n_char then + return n_char == ischar(m,getfont(n)) + elseif n_id == kern_code then + return getkern(n) == getkern(m) + end +end + +local function equalnode(n,m) + if not n then + return not m + elseif not m then + return false + end + local n_char, n_id = isglyph(n) + if n_char then + return n_char == ischar(m,n_id) -- n_id == n_font + elseif n_id == whatsit_code then + return false + elseif n_id == glue_code then + return true + elseif n_id == kern_code then + return getkern(n) == getkern(m) + elseif n_id == disc_code then + local n_pre, n_post, n_replace = getdisc(n) + local m_pre, m_post, m_replace = getdisc(m) + while n_pre and m_pre do + if not eqnode(n_pre,m_pre) then + return false + end + n_pre = getnext(n_pre) + m_pre = getnext(m_pre) + end + if n_pre or m_pre then + return false + end + while n_post and m_post do + if not eqnode(n_post,m_post) then + return false + end + n_post = getnext(n_post) + m_post = getnext(m_post) + end + if n_post or m_post then + return false + end + while n_replace and m_replace do + if not eqnode(n_replace,m_replace) then + return false + end + n_replace = getnext(n_replace) + m_replace = getnext(m_replace) + end + if n_replace or m_replace then + return false + end + return true + end + return false +end + +-- The spacing hackery is not nice. The text can get leading and trailing spaces +-- and even mid spaces while the start and stop nodes not always are glues then +-- so the plugin really needs to do some testing there. We could pass more context +-- but it doesn't become much better. +-- +-- The attribute gets passed for tracing purposes. We could support it (not that +-- hard to do) but as we don't test strickly for fonts (in disc nodes) we are not +-- compatible anyway. It would also mean more testing. So, don't use this mixed +-- with node and base mode in context. +-- +-- We don't distinguish between modes in treatment (so no r2l assumptions) and +-- no cheats for scripts that might not use discretionaries. Such hacks can work +-- in predictable cases but in context one can use a mix all kind of things and +-- users do that. On the other hand, we do support longer glyph runs in both modes +-- so there we gain a bit. + +local function texthandler(head,font,attr,rlmode,handler,startspacing,stopspacing,nesting) + if not head then + return + end + if startspacing == nil then + startspacing = false + end + if stopspacing == nil then + stopspacing = false + end + + if getid(head) == par_code and start_of_par(head) then + rlmode = pardirstate(head) + elseif rlmode == righttoleft_code then + rlmode = -1 + else + rlmode = 0 + end + + local dirstack = { } + local rlparmode = 0 + local topstack = 0 + local text = { } + local size = 0 + local current = head + local start = nil + local stop = nil + local startrlmode = rlmode + + local function handle(leading,trailing) -- what gets passed can become configureable: e.g. utf 8 + local stop = current or start -- hm, what with outer stop + if getid(stop) ~= glyph_code then + stop = getprev(stop) + end + head = handler(head,font,attr,rlmode,start,stop,text,leading,trailing) -- handler can adapt text + size = 0 + text = { } + start = nil + end + + while current do + local char, id = ischar(current,font) + if char then + if not start then + start = current + startrlmode = rlmode + end + local char = getchar(current) + size = size + 1 + text[size] = char + current = getnext(current) + elseif char == false then + -- so a mixed font + if start and size > 0 then + handle(startspacing,false) + end + startspacing = false + current = getnext(current) + elseif id == glue_code then + -- making this branch optional i.e. always use the else doesn't really + -- make a difference in performance (in hb) .. tricky anyway as we can + local width = getwidth(current) + if width > 0 then + if start and size > 0 then + handle(startspacing,true) + end + startspacing = true + stopspacing = false + else + if start and size > 0 then + head = handle(startspacing) + end + startspacing = false + stopspacing = false + end + current = getnext(current) + elseif id == disc_code and usesfont(current,font) then -- foo|-|bar : has hbox + -- This looks much like the original code but I don't see a need to optimize + -- for e.g. deva or r2l fonts. If there are no disc nodes then we won't see + -- this branch anyway and if there are, we should just deal with them. + -- + -- There is still some weird code here ... start/stop and such. When I'm in + -- the mood (or see a need) I'll rewrite this bit. + + -- bug: disc in last word moves to end (in practice not an issue as one + -- doesn't want a break there) + + local pre = nil + local post = nil + local currentnext = getnext(current) + local current_pre, current_post, current_replace = getdisc(current) + setdisc(current) -- why, we set it later + if start then + pre = copy_node_list(start,current) + stop = getprev(current) + -- why also current and not: + -- pre = copy_node_list(start,stop) + if start == head then + head = current + end + setlink(getprev(start),current) + setlink(stop,current_pre) + current_pre = start + setprev(current_pre) + start = nil + stop = nil + startrlmode = rlmode + end + while currentnext do + local char, id = ischar(currentnext,font) + if char or id == disc_code then + stop = currentnext + currentnext = getnext(currentnext) + elseif id == glue_code then + local width = getwidth(currentnext) + if width and width > 0 then + stopspacing = true + else + stopspacing = false + end + break + else + break + end + end + if stop then + local currentnext = getnext(current) + local stopnext = getnext(stop) + post = copy_node_list(currentnext,stopnext) + if current_post then + setlink(find_node_tail(current_post),currentnext) + else + setprev(currentnext) + current_post = currentnext + end + setlink(current,stopnext) + setnext(stop) + stop = nil + end + if pre then + setlink(find_node_tail(pre),current_replace) + current_replace = pre + pre = nil + end + if post then + if current_replace then + setlink(find_node_tail(current_replace),post) + else + current_replace = post + end + post = nil + end + size = 0 -- hm, ok, start is also nil now + text = { } + if current_pre then + current_pre = texthandler(current_pre,font,attr,rlmode,handler,startspacing,false,"pre") + end + if current_post then + current_post = texthandler(current_post,font,attr,rlmode,handler,false,stopspacing,"post") + end + if current_replace then + current_replace = texthandler(current_replace,font,attr,rlmode,handler,startspacing,stopspacing,"replace") + end + startspacing = false + stopspacing = false + local cpost = current_post and find_node_tail(current_post) + local creplace = current_replace and find_node_tail(current_replace) + local cpostnew = nil + local creplacenew = nil + local newcurrent = nil + while cpost and equalnode(cpost,creplace) do + cpostnew = cpost + creplacenew = creplace + if creplace then + creplace = getprev(creplace) + end + cpost = getprev(cpost) + end + if cpostnew then + if cpostnew == current_post then + current_post = nil + else + setnext(getprev(cpostnew)) + end + flush_list(cpostnew) + if creplacenew == current_replace then + current_replace = nil + else + setnext(getprev(creplacenew)) + end + local c = getnext(current) + setlink(current,creplacenew) + local creplacenewtail = find_node_tail(creplacenew) + setlink(creplacenewtail,c) + newcurrent = creplacenewtail + end + current_post = current_post and deletedisc(current_post) + current_replace = current_replace and deletedisc(current_replace) + local cpre = current_pre + local creplace = current_replace + local cprenew = nil + local creplacenew = nil + while cpre and equalnode(cpre, creplace) do + cprenew = cpre + creplacenew = creplace + if creplace then + creplace = getnext(creplace) + end + cpre = getnext(cpre) + end + if cprenew then + cpre = current_pre + current_pre = getnext(cprenew) + if current_pre then + setprev(current_pre) + end + setnext(cprenew) + flush_list(cpre) + creplace = current_replace + current_replace = getnext(creplacenew) + if current_replace then + setprev(current_replace) + end + setlink(getprev(current),creplace) + if current == head then + head = creplace + end + setlink(creplacenew,current) + end + setdisc(current,current_pre,current_post,current_replace) + current = currentnext + else + if start and size > 0 then + handle(startspacing,stopspacing) + end + startspacing = false + stopspacing = false + if id == math_code then + current = getnext(end_of_math(current)) + elseif id == dir_code then + startspacing = false + topstack, rlmode = txtdirstate(current,dirstack,topstack,rlparmode) + current = getnext(current) + -- elseif id == par_code and start_of_par(current) then + -- startspacing = false + -- rlparmode, rlmode = pardirstate(current) + -- current = getnext(current) + else + current = getnext(current) + end + end + end + if start and size > 0 then + handle(startspacing,stopspacing) + end + return head, true +end + +function fonts.handlers.otf.texthandler(head,font,attr,direction,action) + if action then + return texthandler(head,font,attr,direction == righttoleft_code and -1 or 0,action) + else + return head, false + end +end + +-- Next comes a tracer plug into context. + +local texthandler = fonts.handlers.otf.texthandler +local report_text = logs.reporter("otf plugin","text") +local nofruns = 0 +local nofsnippets = 0 +local f_unicode = string.formatters["%U"] + +local function showtext(head,font,attr,rlmode,start,stop,list,before,after) + if list then + nofsnippets = nofsnippets + 1 + local plus = { } + for i=1,#list do + local u = list[i] + list[i] = utfchar(u) + plus[i] = f_unicode(u) + end + report_text("%03i : [%s] %t [%s]-> % t", nofsnippets, before and "+" or "-", list, after and "+" or "-", plus) + else + report_text() + report_text("invalid list") + report_text() + end + return head, false +end + +fonts.handlers.otf.registerplugin("text",function(head,font,attr,direction) + nofruns = nofruns + 1 + nofsnippets = 0 + report_text("start run %i",nofruns) + local h, d = texthandler(head,font,attr,direction,showtext) + report_text("stop run %i",nofruns) + return h, d +end) |