From 4f053696e1813fde4bd6cebbb77ff2a1e1f6800b Mon Sep 17 00:00:00 2001
From: Philipp Gesang This module is a bit more split up that I'd like but since we also want to test
@@ -65,9 +80,12 @@ is currently acceptable. Not all functions are implemented yet, often because I
lack the fonts for testing. Many scripts are not yet supported either, but I will
look into them as soon as Because there are different interpretations possible, I will extend the code
-with more (configureable) variants. I can also add hooks for users so that they can
-write their own extensions. The specification leaves room for interpretation. In case of doubt the microsoft
+implementation is the reference as it is the most complete one. As they deal with
+lots of scripts and fonts, Kai and Ivo did a lot of testing of the generic code and
+their suggestions help improve the code. I'm aware that not all border cases can be
+taken care of, unless we accept excessive runtime, and even then the interference
+with other mechanisms (like hyphenation) are not trivial. Glyphs are indexed not by unicode but in their own way. This is because there is no
relationship with unicode at all, apart from the fact that a font might cover certain
@@ -94,12 +112,12 @@ when there's a fix in the
We get hits on a mark, but we're not sure if the it has to be applied so we need to explicitly test for basechar, baselig and basemark entries.
@@ -855,7 +1082,7 @@ function handlers.gpos_mark2mark(head,start,kind,lookupname,markanchors,sequence if al[anchor] then local ma = markanchors[anchor] if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar]) + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar],true) if trace_marks then logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%p,%p)", pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) @@ -938,85 +1165,11 @@ function handlers.gpos_cursive(head,start,kind,lookupname,exitanchors,sequence) end end -function handlers.gpos_single(head,start,kind,lookupname,kerns,sequence) - local startchar = getchar(start) - local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting single %s by (%p,%p) and correction (%p,%p)",pref(kind,lookupname),gref(startchar),dx,dy,w,h) - end - return head, start, false -end - -function handlers.gpos_pair(head,start,kind,lookupname,kerns,sequence) - -- todo: kerns in disc nodes: pre, post, replace -> loop over disc too - -- todo: kerns in components of ligatures - local snext = getnext(start) - if not snext then - return head, start, false - else - local prev, done = start, false - local factor = tfmdata.parameters.factor - local lookuptype = lookuptypes[lookupname] - while snext and getid(snext) == glyph_code and getfont(snext) == currentfont and getsubtype(snext)<256 do - local nextchar = getchar(snext) - local krn = kerns[nextchar] - if not krn and marks[nextchar] then - prev = snext - snext = getnext(snext) - else - if not krn then - -- skip - elseif type(krn) == "table" then - if lookuptype == "pair" then -- probably not needed - local a, b = krn[2], krn[3] - if a and #a > 0 then - local startchar = getchar(start) - local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - if b and #b > 0 then - local startchar = getchar(start) - local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) - if trace_kerns then - logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - else -- wrong ... position has different entries - report_process("%s: check this out (old kern stuff)",pref(kind,lookupname)) - -- local a, b = krn[2], krn[6] - -- if a and a ~= 0 then - -- local k = setkern(snext,factor,rlmode,a) - -- if trace_kerns then - -- logprocess("%s: inserting first kern %s between %s and %s",pref(kind,lookupname),k,gref(getchar(prev)),gref(nextchar)) - -- end - -- end - -- if b and b ~= 0 then - -- logwarning("%s: ignoring second kern xoff %s",pref(kind,lookupname),b*factor) - -- end - end - done = true - elseif krn ~= 0 then - local k = setkern(snext,factor,rlmode,krn) - if trace_kerns then - logprocess("%s: inserting kern %s between %s and %s",pref(kind,lookupname),k,gref(getchar(prev)),gref(nextchar)) - end - done = true - end - break - end - end - return head, start, done - end -end - --[[ldx--I will implement multiple chain replacements once I run into a font that uses it. It's not that complex to handle.
--ldx]]-- -local chainmores = { } local chainprocs = { } local function logprocess(...) @@ -1045,11 +1198,6 @@ function chainprocs.chainsub(head,start,stop,kind,chainname,currentcontext,looku return head, start, false end -function chainmores.chainsub(head,start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) - logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) - return head, start, false -end - -- The reversesub is a special case, which is why we need to store the replacements -- in a bit weird way. There is no lookup and the replacement comes from the lookup -- itself. It is meant mostly for dealing with Urdu. @@ -1116,8 +1264,7 @@ as less as needed but that would also make the code even more messy. -- end --[[ldx-- -Here we replace start by a single variant, First we delete the rest of the -match.
+Here we replace start by a single variant.
--ldx]]-- function chainprocs.gsub_single(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) @@ -1125,7 +1272,7 @@ function chainprocs.gsub_single(head,start,stop,kind,chainname,currentcontext,lo local current = start local subtables = currentlookup.subtables if #subtables > 1 then - logwarning("todo: check if we need to loop over the replacements: %s",concat(subtables," ")) + logwarning("todo: check if we need to loop over the replacements: % t",subtables) end while current do if getid(current) == glyph_code then @@ -1160,11 +1307,8 @@ function chainprocs.gsub_single(head,start,stop,kind,chainname,currentcontext,lo return head, start, false end -chainmores.gsub_single = chainprocs.gsub_single - --[[ldx-- -Here we replace start by a sequence of new glyphs. First we delete the rest of -the match.
+Here we replace start by a sequence of new glyphs.
--ldx]]-- function chainprocs.gsub_multiple(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) @@ -1193,8 +1337,6 @@ function chainprocs.gsub_multiple(head,start,stop,kind,chainname,currentcontext, return head, start, false end -chainmores.gsub_multiple = chainprocs.gsub_multiple - --[[ldx--Here we replace start by new glyph. First we delete the rest of the match.
--ldx]]-- @@ -1249,8 +1391,6 @@ function chainprocs.gsub_alternate(head,start,stop,kind,chainname,currentcontext return head, start, false end -chainmores.gsub_alternate = chainprocs.gsub_alternate - --[[ldx--When we replace ligatures we use a helper that handles the marks. I might change
this function (move code inline and handle the marks by a separate function). We
@@ -1276,13 +1416,19 @@ function chainprocs.gsub_ligature(head,start,stop,kind,chainname,currentcontext,
local s = getnext(start)
local discfound = false
local last = stop
- local nofreplacements = 0
+ local nofreplacements = 1
local skipmark = currentlookup.flags[1]
while s do
local id = getid(s)
if id == disc_code then
- s = getnext(s)
- discfound = true
+ if not discfound then
+ discfound = s
+ end
+ if s == stop then
+ break -- okay? or before the disc
+ else
+ s = getnext(s)
+ end
else
local schar = getchar(s)
if skipmark and marks[schar] then -- marks
@@ -1315,7 +1461,7 @@ function chainprocs.gsub_ligature(head,start,stop,kind,chainname,currentcontext,
end
end
head, start = toligature(kind,lookupname,head,start,stop,l2,currentlookup.flags[1],discfound)
- return head, start, true, nofreplacements
+ return head, start, true, nofreplacements, discfound
elseif trace_bugs then
if start == stop then
logwarning("%s: replacing character %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar))
@@ -1325,11 +1471,97 @@ function chainprocs.gsub_ligature(head,start,stop,kind,chainname,currentcontext,
end
end
end
- return head, start, false, 0
+ return head, start, false, 0, false
+end
+
+function chainprocs.gpos_single(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence)
+ -- untested .. needs checking for the new model
+ local startchar = getchar(start)
+ local subtables = currentlookup.subtables
+ local lookupname = subtables[1]
+ local kerns = lookuphash[lookupname]
+ if kerns then
+ kerns = kerns[startchar] -- needed ?
+ if kerns then
+ local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns) -- ,characters[startchar])
+ if trace_kerns then
+ logprocess("%s: shifting single %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),dx,dy,w,h)
+ end
+ end
+ end
+ return head, start, false
+end
+
+function chainprocs.gpos_pair(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence)
+ local snext = getnext(start)
+ if snext then
+ local startchar = getchar(start)
+ local subtables = currentlookup.subtables
+ local lookupname = subtables[1]
+ local kerns = lookuphash[lookupname]
+ if kerns then
+ kerns = kerns[startchar]
+ if kerns then
+ local lookuptype = lookuptypes[lookupname]
+ local prev, done = start, false
+ local factor = tfmdata.parameters.factor
+ while snext and getid(snext) == glyph_code and getfont(snext) == currentfont and getsubtype(snext)<256 do
+ local nextchar = getchar(snext)
+ local krn = kerns[nextchar]
+ if not krn and marks[nextchar] then
+ prev = snext
+ snext = getnext(snext)
+ else
+ if not krn then
+ -- skip
+ elseif type(krn) == "table" then
+ if lookuptype == "pair" then
+ local a, b = krn[2], krn[3]
+ if a and #a > 0 then
+ local startchar = getchar(start)
+ local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a) -- ,characters[startchar])
+ if trace_kerns then
+ logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h)
+ end
+ end
+ if b and #b > 0 then
+ local startchar = getchar(start)
+ local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b) -- ,characters[nextchar])
+ if trace_kerns then
+ logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h)
+ end
+ end
+ else
+ report_process("%s: check this out (old kern stuff)",cref(kind,chainname,chainlookupname))
+ -- local a, b = krn[2], krn[6]
+ -- if a and a ~= 0 then
+ -- local k = setkern(snext,factor,rlmode,a)
+ -- if trace_kerns then
+ -- logprocess("%s: inserting first kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(getchar(prev)),gref(nextchar))
+ -- end
+ -- end
+ -- if b and b ~= 0 then
+ -- logwarning("%s: ignoring second kern xoff %s",cref(kind,chainname,chainlookupname),b*factor)
+ -- end
+ end
+ done = true
+ elseif krn ~= 0 then
+ local k = setkern(snext,factor,rlmode,krn)
+ if trace_kerns then
+ logprocess("%s: inserting kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(getchar(prev)),gref(nextchar))
+ end
+ done = true
+ end
+ break
+ end
+ end
+ return head, start, done
+ end
+ end
+ end
+ return head, start, false
end
-chainmores.gsub_ligature = chainprocs.gsub_ligature
-
function chainprocs.gpos_mark2base(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname)
local markchar = getchar(start)
if marks[markchar] then
@@ -1497,7 +1729,7 @@ function chainprocs.gpos_mark2mark(head,start,stop,kind,chainname,currentcontext
if al[anchor] then
local ma = markanchors[anchor]
if ma then
- local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar])
+ local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma,characters[basechar],true)
if trace_marks then
logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%p,%p)",
cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy)
@@ -1588,133 +1820,346 @@ function chainprocs.gpos_cursive(head,start,stop,kind,chainname,currentcontext,l
return head, start, false
end
-function chainprocs.gpos_single(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence)
- -- untested .. needs checking for the new model
- local startchar = getchar(start)
- local subtables = currentlookup.subtables
- local lookupname = subtables[1]
- local kerns = lookuphash[lookupname]
- if kerns then
- kerns = kerns[startchar] -- needed ?
- if kerns then
- local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar])
- if trace_kerns then
- logprocess("%s: shifting single %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),dx,dy,w,h)
+-- what pointer to return, spec says stop
+-- to be discussed ... is bidi changer a space?
+-- elseif char == zwnj and sequence[n][32] then -- brrr
+
+-- somehow l or f is global
+-- we don't need to pass the currentcontext, saves a bit
+-- make a slow variant then can be activated but with more tracing
+
+local function show_skip(kind,chainname,char,ck,class)
+ if ck[9] then
+ logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a, %a => %a",cref(kind,chainname),gref(char),class,ck[1],ck[2],ck[9],ck[10])
+ else
+ logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a",cref(kind,chainname),gref(char),class,ck[1],ck[2])
+ end
+end
+
+-- A previous version had disc collapsing code in the (single sub) handler plus some
+-- checking in the main loop, but that left the pre/post sequences undone. The best
+-- solution is to add some checking there and backtrack when a replace/post matches
+-- but it takes a bit of work to figure out an efficient way (this is what the sweep*
+-- names refer to). I might look into that variant one day again as it can replace
+-- some other code too. In that approach we can have a special version for gub and pos
+-- which gains some speed. This method does the test and passes info to the handlers
+-- (sweepnode, sweepmode, sweepprev, sweepnext, etc). Here collapsing is handled in the
+-- main loop which also makes code elsewhere simpler (i.e. no need for the other special
+-- runners and disc code in ligature building). I also experimented with pushing preceding
+-- glyphs sequences in the replace/pre fields beforehand which saves checking afterwards
+-- but at the cost of duplicate glyphs (memory) but it's too much overhead (runtime).
+--
+-- In the meantime Kai had moved the code from the single chain into a more general handler
+-- and this one (renamed to chaindisk) is used now. I optimized the code a bit and brought
+-- it in sycn with the other code. Hopefully I didn't introduce errors. Note: this somewhat
+-- complex approach is meant for fonts that implement (for instance) ligatures by character
+-- replacement which to some extend is not that suitable for hyphenation. I also use some
+-- helpers. This method passes some states but reparses the list. There is room for a bit of
+-- speed up but that will be done in the context version. (In fact a partial rewrite of all
+-- code can bring some more efficientry.)
+--
+-- I didn't test it with extremes but successive disc nodes still can give issues but in
+-- order to handle that we need more complex code which also slows down even more. The main
+-- loop variant could deal with that: test, collapse, backtrack.
+
+local function chaindisk(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,chainindex,sequence,chainproc)
+
+ if not start then
+ return head, start, false
+ end
+
+ local startishead = start == head
+ local seq = ck[3]
+ local f = ck[4]
+ local l = ck[5]
+ local s = #seq
+ local done = false
+ local sweepnode = sweepnode
+ local sweeptype = sweeptype
+ local sweepoverflow = false
+ local checkdisc = getprev(head) -- hm bad name head
+ local keepdisc = not sweepnode
+ local lookaheaddisc = nil
+ local backtrackdisc = nil
+ local current = start
+ local last = start
+ local prev = getprev(start)
+
+ -- fishy: so we can overflow and then go on in the sweep?
+
+ local i = f
+ while i <= l do
+ local id = getid(current)
+ if id == glyph_code then
+ i = i + 1
+ last = current
+ current = getnext(current)
+ elseif id == disc_code then
+ if keepdisc then
+ keepdisc = false
+ if notmatchpre[current] ~= notmatchreplace[current] then
+ lookaheaddisc = current
+ end
+ local replace = getfield(current,"replace")
+ while replace and i <= l do
+ if getid(replace) == glyph_code then
+ i = i + 1
+ end
+ replace = getnext(replace)
+ end
+ last = current
+ current = getnext(c)
+ else
+ head, current = flattendisk(head,current)
+ end
+ else
+ last = current
+ current = getnext(current)
+ end
+ if current then
+ -- go on
+ elseif sweepoverflow then
+ -- we already are folling up on sweepnode
+ break
+ elseif sweeptype == "post" or sweeptype == "replace" then
+ current = getnext(sweepnode)
+ if current then
+ sweeptype = nil
+ sweepoverflow = true
+ else
+ break
end
end
end
- return head, start, false
-end
-chainmores.gpos_single = chainprocs.gpos_single -- okay?
+ if sweepoverflow then
+ local prev = current and getprev(current)
+ if not current or prev ~= sweepnode then
+ local head = getnext(sweepnode)
+ local tail = nil
+ if prev then
+ tail = prev
+ setfield(current,"prev",sweepnode)
+ else
+ tail = find_node_tail(head)
+ end
+ setfield(sweepnode,"next",current)
+ setfield(head,"prev",nil)
+ setfield(tail,"next",nil)
+ appenddisc(sweepnode,head)
+ end
+ end
--- when machines become faster i will make a shared function
+ if l < s then
+ local i = l
+ local t = sweeptype == "post" or sweeptype == "replace"
+ while current and i < s do
+ local id = getid(current)
+ if id == glyph_code then
+ i = i + 1
+ current = getnext(current)
+ elseif id == disc_code then
+ if keepdisc then
+ keepdisc = false
+ if notmatchpre[current] ~= notmatchreplace[current] then
+ lookaheaddisc = current
+ end
+ local replace = getfield(c,"replace")
+ while replace and i < s do
+ if getid(replace) == glyph_code then
+ i = i + 1
+ end
+ replace = getnext(replace)
+ end
+ current = getnext(current)
+ elseif notmatchpre[current] ~= notmatchreplace[current] then
+ head, current = flattendisk(head,current)
+ else
+ current = getnext(current) -- HH
+ end
+ else
+ current = getnext(current)
+ end
+ if not current and t then
+ current = getnext(sweepnode)
+ if current then
+ sweeptype = nil
+ end
+ end
+ end
+ end
-function chainprocs.gpos_pair(head,start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence)
- local snext = getnext(start)
- if snext then
- local startchar = getchar(start)
- local subtables = currentlookup.subtables
- local lookupname = subtables[1]
- local kerns = lookuphash[lookupname]
- if kerns then
- kerns = kerns[startchar]
- if kerns then
- local lookuptype = lookuptypes[lookupname]
- local prev, done = start, false
- local factor = tfmdata.parameters.factor
- while snext and getid(snext) == glyph_code and getfont(snext) == currentfont and getsubtype(snext)<256 do
- local nextchar = getchar(snext)
- local krn = kerns[nextchar]
- if not krn and marks[nextchar] then
- prev = snext
- snext = getnext(snext)
- else
- if not krn then
- -- skip
- elseif type(krn) == "table" then
- if lookuptype == "pair" then
- local a, b = krn[2], krn[3]
- if a and #a > 0 then
- local startchar = getchar(start)
- local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar])
- if trace_kerns then
- logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h)
- end
- end
- if b and #b > 0 then
- local startchar = getchar(start)
- local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar])
- if trace_kerns then
- logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h)
- end
- end
- else
- report_process("%s: check this out (old kern stuff)",cref(kind,chainname,chainlookupname))
- local a, b = krn[2], krn[6]
- if a and a ~= 0 then
- local k = setkern(snext,factor,rlmode,a)
- if trace_kerns then
- logprocess("%s: inserting first kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(getchar(prev)),gref(nextchar))
- end
- end
- if b and b ~= 0 then
- logwarning("%s: ignoring second kern xoff %s",cref(kind,chainname,chainlookupname),b*factor)
- end
- end
- done = true
- elseif krn ~= 0 then
- local k = setkern(snext,factor,rlmode,krn)
- if trace_kerns then
- logprocess("%s: inserting kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(getchar(prev)),gref(nextchar))
- end
- done = true
+ if f > 1 then
+ local current = prev
+ local i = f
+ local t = sweeptype == "pre" or sweeptype == "replace"
+ if not current and t and current == checkdisk then
+ current = getprev(sweepnode)
+ end
+ while current and i > 1 do -- missing getprev added / moved outside
+ local id = getid(current)
+ if id == glyph_code then
+ i = i - 1
+ elseif id == disc_code then
+ if keepdisc then
+ keepdisc = false
+ if notmatchpost[current] ~= notmatchreplace[current] then
+ backtrackdisc = current
+ end
+ local replace = getfield(current,"replace")
+ while replace and i > 1 do
+ if getid(replace) == glyph_code then
+ i = i - 1
end
- break
+ replace = getnext(replace)
end
+ elseif notmatchpost[current] ~= notmatchreplace[current] then
+ head, current = flattendisk(head,current)
end
- return head, start, done
+ end
+ current = getprev(current)
+ if t and current == checkdisk then
+ current = getprev(sweepnode)
end
end
end
- return head, start, false
-end
-chainmores.gpos_pair = chainprocs.gpos_pair -- okay?
+ local ok = false
+ if lookaheaddisc then
--- what pointer to return, spec says stop
--- to be discussed ... is bidi changer a space?
--- elseif char == zwnj and sequence[n][32] then -- brrr
+ local cf = start
+ local cl = getprev(lookaheaddisc)
+ local cprev = getprev(start)
+ local insertedmarks = 0
--- somehow l or f is global
--- we don't need to pass the currentcontext, saves a bit
--- make a slow variant then can be activated but with more tracing
+ while cprev and getid(cf) == glyph_code and getfont(cf) == currentfont and getsubtype(cf) < 256 and marks[getchar(cf)] do
+ insertedmarks = insertedmarks + 1
+ cf = cprev
+ startishead = cf == head
+ cprev = getprev(cprev)
+ end
+
+ setfield(lookaheaddisc,"prev",cprev)
+ if cprev then
+ setfield(cprev,"next",lookaheaddisc)
+ end
+ setfield(cf,"prev",nil)
+ setfield(cl,"next",nil)
+ if startishead then
+ head = lookaheaddisc
+ end
+
+ local replace = getfield(lookaheaddisc,"replace")
+ local pre = getfield(lookaheaddisc,"pre")
+ local new = copy_node_list(cf)
+ local cnew = new
+ for i=1,insertedmarks do
+ cnew = getnext(cnew)
+ end
+ local clast = cnew
+ for i=f,l do
+ clast = getnext(clast)
+ end
+ if not notmatchpre[lookaheaddisc] then
+ cf, start, ok = chainproc(cf,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence)
+ end
+ if not notmatchreplace[lookaheaddisc] then
+ new, cnew, ok = chainproc(new,cnew,clast,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence)
+ end
+ if pre then
+ setfield(cl,"next",pre)
+ setfield(pre,"prev",cl)
+ end
+ if replace then
+ local tail = find_node_tail(new)
+ setfield(tail,"next",replace)
+ setfield(replace,"prev",tail)
+ end
+ setfield(lookaheaddisc,"pre",cf) -- also updates tail
+ setfield(lookaheaddisc,"replace",new) -- also updates tail
+
+ start = getprev(lookaheaddisc)
+ sweephead[cf] = getnext(clast)
+ sweephead[new] = getnext(last)
+
+ elseif backtrackdisc then
+
+ local cf = getnext(backtrackdisc)
+ local cl = start
+ local cnext = getnext(start)
+ local insertedmarks = 0
+
+ while cnext and getid(cnext) == glyph_code and getfont(cnext) == currentfont and getsubtype(cnext) < 256 and marks[getchar(cnext)] do
+ insertedmarks = insertedmarks + 1
+ cl = cnext
+ cnext = getnext(cnext)
+ end
+ if cnext then
+ setfield(cnext,"prev",backtrackdisc)
+ end
+ setfield(backtrackdisc,"next",cnext)
+ setfield(cf,"prev",nil)
+ setfield(cl,"next",nil)
+ local replace = getfield(backtrackdisc,"replace")
+ local post = getfield(backtrackdisc,"post")
+ local new = copy_node_list(cf)
+ local cnew = find_node_tail(new)
+ for i=1,insertedmarks do
+ cnew = getprev(cnew)
+ end
+ local clast = cnew
+ for i=f,l do
+ clast = getnext(clast)
+ end
+ if not notmatchpost[backtrackdisc] then
+ cf, start, ok = chainproc(cf,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence)
+ end
+ if not notmatchreplace[backtrackdisc] then
+ new, cnew, ok = chainproc(new,cnew,clast,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence)
+ end
+ if post then
+ local tail = find_node_tail(post)
+ setfield(tail,"next",cf)
+ setfield(cf,"prev",tail)
+ else
+ post = cf
+ end
+ if replace then
+ local tail = find_node_tail(replace)
+ setfield(tail,"next",new)
+ setfield(new,"prev",tail)
+ else
+ replace = new
+ end
+ setfield(backtrackdisc,"post",post) -- also updates tail
+ setfield(backtrackdisc,"replace",replace) -- also updates tail
+ start = getprev(backtrackdisc)
+ sweephead[post] = getnext(clast)
+ sweephead[replace] = getnext(last)
-local function show_skip(kind,chainname,char,ck,class)
- if ck[9] then
- logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a, %a => %a",cref(kind,chainname),gref(char),class,ck[1],ck[2],ck[9],ck[10])
else
- logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a",cref(kind,chainname),gref(char),class,ck[1],ck[2])
- end
-end
-local quit_on_no_replacement = true
+ head, start, ok = chainproc(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence)
-directives.register("otf.chain.quitonnoreplacement",function(value) -- maybe per font
- quit_on_no_replacement = value
-end)
+ end
+
+ return head, start, ok
+end
local function normal_handle_contextchain(head,start,kind,chainname,contexts,sequence,lookuphash)
- -- local rule, lookuptype, sequence, f, l, lookups = ck[1], ck[2] ,ck[3], ck[4], ck[5], ck[6]
+ local sweepnode = sweepnode
+ local sweeptype = sweeptype
+ local diskseen = false
+ local checkdisc = getprev(head)
local flags = sequence.flags
local done = false
local skipmark = flags[1]
local skipligature = flags[2]
local skipbase = flags[3]
- local someskip = skipmark or skipligature or skipbase -- could be stored in flags for a fast test (hm, flags could be false !)
- local markclass = sequence.markclass -- todo, first we need a proper test
+ local markclass = sequence.markclass
local skipped = false
- for k=1,#contexts do
+
+ for k=1,#contexts do -- i've only seen ccmp having > 1 (e.g. dejavu)
local match = true
local current = start
local last = start
@@ -1728,7 +2173,8 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
else
-- maybe we need a better space check (maybe check for glue or category or combination)
-- we cannot optimize for n=2 because there can be disc nodes
- local f, l = ck[4], ck[5]
+ local f = ck[4]
+ local l = ck[5]
-- current match
if f == 1 and f == l then -- current only
-- already a hit
@@ -1738,9 +2184,14 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
if f == l then -- new, else last out of sync (f is > 1)
-- match = true
else
+ local discfound = nil
local n = f + 1
last = getnext(last)
while n <= l do
+ if not last and (sweeptype == "post" or sweeptype == "replace") then
+ last = getnext(sweepnode)
+ sweeptype = nil
+ end
if last then
local id = getid(last)
if id == glyph_code then
@@ -1748,7 +2199,7 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
local char = getchar(last)
local ccd = descriptions[char]
if ccd then
- local class = ccd.class
+ local class = ccd.class or "base"
if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
skipped = true
if trace_skips then
@@ -1761,18 +2212,77 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
end
n = n + 1
else
- match = false
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpre[discfound]
+ else
+ match = false
+ end
break
end
else
- match = false
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpre[discfound]
+ else
+ match = false
+ end
break
end
else
- match = false
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpre[discfound]
+ else
+ match = false
+ end
break
end
elseif id == disc_code then
+ diskseen = true
+ discfound = last
+ notmatchpre[last] = nil
+ notmatchpost[last] = true
+ notmatchreplace[last] = nil
+ local pre = getfield(last,"pre")
+ local replace = getfield(last,"replace")
+ if pre then
+ local n = n
+ while pre do
+ if seq[n][getchar(pre)] then
+ n = n + 1
+ pre = getnext(pre)
+ if n > l then
+ break
+ end
+ else
+ notmatchpre[last] = true
+ break
+ end
+ end
+ if n <= l then
+ notmatchpre[last] = true
+ end
+ else
+ notmatchpre[last] = true
+ end
+ if replace then
+ -- so far we never entered this branch
+ while replace do
+ if seq[n][getchar(replace)] then
+ n = n + 1
+ replace = getnext(replace)
+ if n > l then
+ break
+ end
+ else
+ notmatchreplace[last] = true
+ match = not notmatchpre[last]
+ break
+ end
+ end
+ match = not notmatchpre[last]
+ end
last = getnext(last)
else
match = false
@@ -1789,50 +2299,137 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
if match and f > 1 then
local prev = getprev(start)
if prev then
- local n = f-1
- while n >= 1 do
- if prev then
- local id = getid(prev)
- if id == glyph_code then
- if getfont(prev) == currentfont and getsubtype(prev)<256 then -- normal char
- local char = getchar(prev)
- local ccd = descriptions[char]
- if ccd then
- local class = ccd.class
- if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
- skipped = true
- if trace_skips then
- show_skip(kind,chainname,char,ck,class)
+ if prev == checkdisc and (sweeptype == "pre" or sweeptype == "replace") then
+ prev = getprev(sweepnode)
+ -- sweeptype = nil
+ end
+ if prev then
+ local discfound = nil
+ local n = f - 1
+ while n >= 1 do
+ if prev then
+ local id = getid(prev)
+ if id == glyph_code then
+ if getfont(prev) == currentfont and getsubtype(prev)<256 then -- normal char
+ local char = getchar(prev)
+ local ccd = descriptions[char]
+ if ccd then
+ local class = ccd.class
+ if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
+ skipped = true
+ if trace_skips then
+ show_skip(kind,chainname,char,ck,class)
+ end
+ elseif seq[n][char] then
+ n = n -1
+ else
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpost[discfound]
+ else
+ match = false
+ end
+ break
end
- elseif seq[n][char] then
- n = n -1
else
- match = false
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpost[discfound]
+ else
+ match = false
+ end
break
end
else
- match = false
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpost[discfound]
+ else
+ match = false
+ end
break
end
+ elseif id == disc_code then
+ -- the special case: f i where i becomes dottless i ..
+ diskseen = true
+ discfound = prev
+ notmatchpre[prev] = true
+ notmatchpost[prev] = nil
+ notmatchreplace[prev] = nil
+ local pre = getfield(prev,"pre")
+ local post = getfield(prev,"post")
+ local replace = getfield(prev,"replace")
+ if pre ~= start and post ~= start and replace ~= start then
+ if post then
+ local n = n
+ local posttail = find_node_tail(post)
+ while posttail do
+ if seq[n][getchar(posttail)] then
+ n = n - 1
+ if posttail == post then
+ break
+ else
+ posttail = getprev(posttail)
+ if n < 1 then
+ break
+ end
+ end
+ else
+ notmatchpost[prev] = true
+ break
+ end
+ end
+ if n >= 1 then
+ notmatchpost[prev] = true
+ end
+ else
+ notmatchpost[prev] = true
+ end
+ if replace then
+ -- we seldom enter this branch (e.g. on brill efficient)
+ local replacetail = find_node_tail(replace)
+ while replacetail do
+ if seq[n][getchar(replacetail)] then
+ n = n - 1
+ if replacetail == replace then
+ break
+ else
+ replacetail = getprev(replacetail)
+ if n < 1 then
+ break
+ end
+ end
+ else
+ notmatchreplace[prev] = true
+ match = not notmatchpost[prev]
+ break
+ end
+ end
+ if not match then
+ break
+ end
+ else
+ -- skip 'm
+ end
+ else
+ -- skip 'm
+ end
+ elseif seq[n][32] then
+ n = n -1
else
match = false
break
end
- elseif id == disc_code then
- -- skip 'm
- elseif seq[n][32] then
- n = n -1
+ prev = getprev(prev)
+ elseif seq[n][32] then -- somewhat special, as zapfino can have many preceding spaces
+ n = n - 1
else
match = false
break
end
- prev = getprev(prev)
- elseif seq[n][32] then -- somewhat special, as zapfino can have many preceding spaces
- n = n -1
- else
- match = false
- break
end
+ else
+ match = false
end
else
match = false
@@ -1841,7 +2438,14 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
-- after
if match and s > l then
local current = last and getnext(last)
+ if not current then
+ if sweeptype == "post" or sweeptype == "replace" then
+ current = getnext(sweepnode)
+ -- sweeptype = nil
+ end
+ end
if current then
+ local discfound = nil
-- removed optimization for s-l == 1, we have to deal with marks anyway
local n = l + 1
while n <= s do
@@ -1861,19 +2465,81 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
elseif seq[n][char] then
n = n + 1
else
- match = false
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpre[discfound]
+ else
+ match = false
+ end
break
end
+ else
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpre[discfound]
+ else
+ match = false
+ end
+ break
+ end
+ else
+ if discfound then
+ notmatchreplace[discfound] = true
+ match = not notmatchpre[discfound]
else
match = false
+ end
+ break
+ end
+ elseif id == disc_code then
+ diskseen = true
+ discfound = current
+ notmatchpre[current] = nil
+ notmatchpost[current] = true
+ notmatchreplace[current] = nil
+ local pre = getfield(current,"pre")
+ local replace = getfield(current,"replace")
+ if pre then
+ local n = n
+ while pre do
+ if seq[n][getchar(pre)] then
+ n = n + 1
+ pre = getnext(pre)
+ if n > s then
+ break
+ end
+ else
+ notmatchpre[current] = true
+ break
+ end
+ end
+ if n <= s then
+ notmatchpre[current] = true
+ end
+ else
+ notmatchpre[current] = true
+ end
+ if replace then
+ -- so far we never entered this branch
+ while replace do
+ if seq[n][getchar(replace)] then
+ n = n + 1
+ replace = getnext(replace)
+ if n > s then
+ break
+ end
+ else
+ notmatchreplace[current] = true
+ match = notmatchpre[current]
+ break
+ end
+ end
+ if not match then
break
end
else
- match = false
- break
+ -- skip 'm
end
- elseif id == disc_code then
- -- skip 'm
elseif seq[n][32] then -- brrr
n = n + 1
else
@@ -1894,7 +2560,8 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
end
end
if match then
- -- ck == currentcontext
+ -- can lookups be of a different type ?
+ local diskchain = diskseen or sweepnode
if trace_contexts then
local rule, lookuptype, f, l = ck[1], ck[2], ck[4], ck[5]
local char = getchar(start)
@@ -1914,10 +2581,14 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
local chainlookupname = chainlookups[1]
local chainlookup = lookuptable[chainlookupname]
if chainlookup then
- local cp = chainprocs[chainlookup.type]
- if cp then
+ local chainproc = chainprocs[chainlookup.type]
+ if chainproc then
local ok
- head, start, ok = cp(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence)
+ if diskchain then
+ head, start, ok = chaindisk(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence,chainproc)
+ else
+ head, start, ok = chainproc(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence)
+ end
if ok then
done = true
end
@@ -1929,13 +2600,13 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
end
else
local i = 1
- while true do
+ while start and true do
if skipped then
- while true do
+ while true do -- todo: use properties
local char = getchar(start)
local ccd = descriptions[char]
if ccd then
- local class = ccd.class
+ local class = ccd.class or "base"
if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then
start = getnext(start)
else
@@ -1946,36 +2617,51 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
end
end
end
+ -- see remark in ms standard under : LookupType 5: Contextual Substitution Subtable
local chainlookupname = chainlookups[i]
local chainlookup = lookuptable[chainlookupname]
if not chainlookup then
- -- okay, n matches, < n replacements
+ -- we just advance
i = i + 1
else
- local cp = chainmores[chainlookup.type]
- if not cp then
+ local chainproc = chainprocs[chainlookup.type]
+ if not chainproc then
-- actually an error
logprocess("%s: %s is not yet supported",cref(kind,chainname,chainlookupname),chainlookup.type)
i = i + 1
else
local ok, n
- head, start, ok, n = cp(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,i,sequence)
+ if diskchain then
+ head, start, ok = chaindisk(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence,chainproc)
+ else
+ head, start, ok, n = chainproc(head,start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,i,sequence)
+ end
-- messy since last can be changed !
if ok then
done = true
- -- skip next one(s) if ligature
- i = i + (n or 1)
- else
- i = i + 1
+ if n and n > 1 then
+ -- we have a ligature (cf the spec we advance one but we really need to test it
+ -- as there are fonts out there that are fuzzy and have too many lookups:
+ --
+ -- U+1105 U+119E U+1105 U+119E : sourcehansansklight: script=hang ccmp=yes
+ --
+ if i + n > nofchainlookups then
+ -- if trace_contexts then
+ -- logprocess("%s: quitting lookups",cref(kind,chainname))
+ -- end
+ break
+ else
+ -- we need to carry one
+ end
+ end
end
+ i = i + 1
end
end
- if i > nofchainlookups then
+ if i > nofchainlookups or not start then
break
elseif start then
start = getnext(start)
- else
- -- weird
end
end
end
@@ -1990,8 +2676,16 @@ local function normal_handle_contextchain(head,start,kind,chainname,contexts,seq
end
end
end
+ if done then
+ break -- out of contexts (new, needs checking)
+ end
end
end
+ if diskseen then -- maybe move up so that we can turn checking on/off
+ notmatchpre = { }
+ notmatchpost = { }
+ notmatchreplace = { }
+ end
return head, start, done
end
@@ -2076,13 +2770,13 @@ local function initialize(sequence,script,language,enabled)
local order = sequence.order
if order then
for i=1,#order do --
- local kind = order[i] --
+ local kind = order[i] --
local valid = enabled[kind]
if valid then
local scripts = features[kind] --
local languages = scripts[script] or scripts[wildcard]
if languages and (languages[language] or languages[wildcard]) then
- return { valid, autofeatures[kind] or false, sequence.chain or 0, kind, sequence }
+ return { valid, autofeatures[kind] or false, sequence, kind }
end
end
end
@@ -2126,32 +2820,216 @@ function otf.dataset(tfmdata,font) -- generic variant, overloaded in context
return rl
end
--- elseif id == glue_code then
--- if p[5] then -- chain
--- local pc = pp[32]
--- if pc then
--- start, ok = start, false -- p[1](start,kind,p[2],pc,p[3],p[4])
--- if ok then
--- done = true
--- end
--- if start then start = getnext(start) end
--- else
--- start = getnext(start)
--- end
--- else
--- start = getnext(start)
--- end
+-- assumptions:
+--
+-- * languages that use complex disc nodes
+
+local function kernrun(disc,run)
+ --
+ -- we catch