if not modules then modules = { } end modules ['font-ots'] = { -- sequences
version = 1.001,
optimize = true,
comment = "companion to font-ini.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files",
}
--[[ldx--
I need to check the description at the microsoft site ... it has been improved
so maybe there are some interesting details there. Most below is based on old and
incomplete documentation and involved quite a bit of guesswork (checking with the
abstract uniscribe of those days. But changing things is tricky!
This module is a bit more split up that I'd like but since we also want to test
with plain it has to be so. This module is part of
and discussion about improvements and functionality mostly happens on the
mailing list.
The specification of OpenType is (or at least decades ago was) kind of vague.
Apart from a lack of a proper free specifications there's also the problem that
Microsoft and Adobe may have their own interpretation of how and in what order to
apply features. In general the Microsoft website has more detailed specifications
and is a better reference. There is also some information in the FontForge help
files. In the end we rely most on the Microsoft specification.
Because there is so much possible, fonts might contain bugs and/or be made to
work with certain rederers. These may evolve over time which may have the side
effect that suddenly fonts behave differently. We don't want to catch all font
issues.
After a lot of experiments (mostly by Taco, me and Idris) the first implementation
was already quite useful. When it did most of what we wanted, a more optimized version
evolved. Of course all errors are mine and of course the code can be improved. There
are quite some optimizations going on here and processing speed is currently quite
acceptable and has been improved over time. Many complex scripts are not yet supported
yet, but I will look into them as soon as users ask for it.
The specification leaves room for interpretation. In case of doubt the Microsoft
implementation is the reference as it is the most complete one. As they deal with
lots of scripts and fonts, Kai and Ivo did a lot of testing of the generic code and
their suggestions help improve the code. I'm aware that not all border cases can be
taken care of, unless we accept excessive runtime, and even then the interference
with other mechanisms (like hyphenation) are not trivial.
Especially discretionary handling has been improved much by Kai Eigner who uses complex
(latin) fonts. The current implementation is a compromis between his patches and my code
and in the meantime performance is quite ok. We cannot check all border cases without
compromising speed but so far we're okay. Given good test cases we can probably improve
it here and there. Especially chain lookups are non trivial with discretionaries but
things got much better over time thanks to Kai.
Glyphs are indexed not by unicode but in their own way. This is because there is no
relationship with unicode at all, apart from the fact that a font might cover certain
ranges of characters. One character can have multiple shapes. However, at the
end we use unicode so and all extra glyphs are mapped into a private
space. This is needed because we need to access them and has to include
then in the output eventually.
The initial data table is rather close to the open type specification and also not
that different from the one produced by but we uses hashes instead.
In that table is packed (similar tables are shared) and cached on disk
so that successive runs can use the optimized table (after loading the table is
unpacked).
This module is sparsely documented because it is has been a moving target. The
table format of the reader changed a bit over time and we experiment a lot with
different methods for supporting features. By now the structures are quite stable
Incrementing the version number will force a re-cache. We jump the number by one
when there's a fix in the reader or processing code that can result in different
results.
This code is also used outside context but in context it has to work with other
mechanisms. Both put some constraints on the code here.
--ldx]]--
-- Remark: We assume that cursives don't cross discretionaries which is okay because it
-- is only used in semitic scripts.
--
-- Remark: We assume that marks precede base characters.
--
-- Remark: When complex ligatures extend into discs nodes we can get side effects. Normally
-- this doesn't happen; ff\d{l}{l}{l} in lm works but ff\d{f}{f}{f}.
--
-- Todo: check if we copy attributes to disc nodes if needed.
--
-- Todo: it would be nice if we could get rid of components. In other places we can use
-- the unicode properties. We can just keep a lua table.
--
-- Remark: We do some disc juggling where we need to keep in mind that the pre, post and
-- replace fields can have prev pointers to a nesting node ... I wonder if that is still
-- needed.
--
-- Remark: This is not possible:
--
-- \discretionary {alpha-} {betagammadelta}
-- {\discretionary {alphabeta-} {gammadelta}
-- {\discretionary {alphabetagamma-} {delta}
-- {alphabetagammadelta}}}
--
-- Remark: Something is messed up: we have two mark / ligature indices, one at the
-- injection end and one here ... this is based on KE's patches but there is something
-- fishy there as I'm pretty sure that for husayni we need some connection (as it's much
-- more complex than an average font) but I need proper examples of all cases, not of
-- only some.
--
-- Remark: I wonder if indexed would be faster than unicoded. It would be a major
-- rewrite to have char being unicode + an index field in glyph nodes. Also more
-- assignments have to be made in order to keep things in sync. So, it's a no-go.
--
-- Remark: We can provide a fast loop when there are no disc nodes (tests show a 1%
-- gain). Smaller functions might perform better cache-wise. But ... memory becomes
-- faster anyway, so ...
--
-- Remark: Some optimizations made sense for 5.2 but seem less important for 5.3 but
-- anyway served their purpose.
--
-- Todo: just (0=l2r and 1=r2l) or maybe (r2l = true)
-- Experiments with returning the data with the ischar are positive for lmtx but
-- have a performance hit on mkiv because there we need to wrap ischardata (pending
-- extensions to luatex which is unlikely to happen for such an experiment because
-- we then can't remove it). Actually it might make generic slightly faster. Also,
-- there are some corner cases where a data check comes before a char fetch and
-- we're talking of millions of calls there. At some point I might make a version
-- for lmtx that does it slightly different anyway.
local type, next, tonumber = type, next, tonumber
local random = math.random
local formatters = string.formatters
local insert = table.insert
local registertracker = trackers.register
local logs = logs
local trackers = trackers
local nodes = nodes
local attributes = attributes
local fonts = fonts
local otf = fonts.handlers.otf
local tracers = nodes.tracers
local trace_singles = false registertracker("otf.singles", function(v) trace_singles = v end)
local trace_multiples = false registertracker("otf.multiples", function(v) trace_multiples = v end)
local trace_alternatives = false registertracker("otf.alternatives", function(v) trace_alternatives = v end)
local trace_ligatures = false registertracker("otf.ligatures", function(v) trace_ligatures = v end)
local trace_contexts = false registertracker("otf.contexts", function(v) trace_contexts = v end)
local trace_marks = false registertracker("otf.marks", function(v) trace_marks = v end)
local trace_kerns = false registertracker("otf.kerns", function(v) trace_kerns = v end)
local trace_cursive = false registertracker("otf.cursive", function(v) trace_cursive = v end)
local trace_preparing = false registertracker("otf.preparing", function(v) trace_preparing = v end)
local trace_bugs = false registertracker("otf.bugs", function(v) trace_bugs = v end)
local trace_details = false registertracker("otf.details", function(v) trace_details = v end)
local trace_steps = false registertracker("otf.steps", function(v) trace_steps = v end)
local trace_skips = false registertracker("otf.skips", function(v) trace_skips = v end)
local trace_plugins = false registertracker("otf.plugins", function(v) trace_plugins = v end)
local trace_chains = false registertracker("otf.chains", function(v) trace_chains = v end)
local trace_kernruns = false registertracker("otf.kernruns", function(v) trace_kernruns = v end)
----- trace_discruns = false registertracker("otf.discruns", function(v) trace_discruns = v end)
local trace_compruns = false registertracker("otf.compruns", function(v) trace_compruns = v end)
local trace_testruns = false registertracker("otf.testruns", function(v) trace_testruns = v end)
local forcediscretionaries = false
local forcepairadvance = false -- for testing
local repeatablemultiples = context or false
directives.register("otf.forcediscretionaries", function(v) forcediscretionaries = v end)
directives.register("otf.forcepairadvance", function(v) forcepairadvance = v end)
local report_direct = logs.reporter("fonts","otf direct")
local report_subchain = logs.reporter("fonts","otf subchain")
local report_chain = logs.reporter("fonts","otf chain")
local report_process = logs.reporter("fonts","otf process")
local report_warning = logs.reporter("fonts","otf warning")
local report_run = logs.reporter("fonts","otf run")
registertracker("otf.substitutions", "otf.singles","otf.multiples","otf.alternatives","otf.ligatures")
registertracker("otf.positions", "otf.marks","otf.kerns","otf.cursive")
registertracker("otf.actions", "otf.substitutions","otf.positions")
registertracker("otf.sample", "otf.steps","otf.substitutions","otf.positions","otf.analyzing")
registertracker("otf.sample.silent", "otf.steps=silent","otf.substitutions","otf.positions","otf.analyzing")
local nuts = nodes.nuts
local getnext = nuts.getnext
local setnext = nuts.setnext
local getprev = nuts.getprev
local setprev = nuts.setprev
local getboth = nuts.getboth
local setboth = nuts.setboth
local getid = nuts.getid
local getstate = nuts.getstate
local getsubtype = nuts.getsubtype
local getchar = nuts.getchar
local setchar = nuts.setchar
local getdisc = nuts.getdisc
local setdisc = nuts.setdisc
local getreplace = nuts.getreplace
local setlink = nuts.setlink
local getwidth = nuts.getwidth
local getattr = nuts.getattr
local getglyphdata = nuts.getglyphdata
---------------------------------------------------------------------------------------
-- Beware: In ConTeXt components no longer are real components. We only keep track of
-- their positions because some complex ligatures might need that. For the moment we
-- use an x_ prefix because for now generic follows the other approach.
local components = nuts.components
local copynocomponents = components.copynocomponents
local copyonlyglyphs = components.copyonlyglyphs
local countcomponents = components.count
local setcomponents = components.set
local getcomponents = components.get
local flushcomponents = components.flush
---------------------------------------------------------------------------------------
local ischar = nuts.ischar
local usesfont = nuts.usesfont
local insertnodeafter = nuts.insertafter
local copynode = nuts.copy
local copynodelist = nuts.copylist
local removenode = nuts.remove
local findnodetail = nuts.tail
local flushnodelist = nuts.flushlist
local flushnode = nuts.flushnode
local endofmath = nuts.endofmath
local startofpar = nuts.startofpar
local setmetatable = setmetatable
local setmetatableindex = table.setmetatableindex
local nextnode = nuts.traversers.node
local nodecodes = nodes.nodecodes
local glyphcodes = nodes.glyphcodes
local glyph_code = nodecodes.glyph
local glue_code = nodecodes.glue
local disc_code = nodecodes.disc
local math_code = nodecodes.math
local dir_code = nodecodes.dir
local par_code = nodecodes.par
local lefttoright_code = nodes.dirvalues.lefttoright
local righttoleft_code = nodes.dirvalues.righttoleft
local discretionarydisc_code = nodes.disccodes.discretionary
local a_noligature = attributes.private("noligature")
local injections = nodes.injections
local setmark = injections.setmark
local setcursive = injections.setcursive
local setkern = injections.setkern
local setmove = injections.setmove
local setposition = injections.setposition
local resetinjection = injections.reset
local copyinjection = injections.copy
local setligaindex = injections.setligaindex
local getligaindex = injections.getligaindex
local fontdata = fonts.hashes.identifiers
local fontfeatures = fonts.hashes.features
local otffeatures = fonts.constructors.features.otf
local registerotffeature = otffeatures.register
local onetimemessage = fonts.loggers.onetimemessage or function() end
local getrandom = utilities and utilities.randomizer and utilities.randomizer.get
otf.defaultnodealternate = "none" -- first last
-- We use a few semi-global variables. The handler can be called nested but this assumes
-- that the same font is used.
local tfmdata = false
local characters = false
local descriptions = false
local marks = false
local classes = false
local currentfont = false
local factor = 0
local threshold = 0
local checkmarks = false
local discs = false
local spaces = false
local sweepnode = nil
local sweephead = { } -- we don't nil entries but false them (no collection and such)
local notmatchpre = { } -- to be checked: can we use false instead of nil / what if a == b tests
local notmatchpost = { } -- to be checked: can we use false instead of nil / what if a == b tests
local notmatchreplace = { } -- to be checked: can we use false instead of nil / what if a == b tests
local handlers = { }
local isspace = injections.isspace
local getthreshold = injections.getthreshold
local checkstep = (tracers and tracers.steppers.check) or function() end
local registerstep = (tracers and tracers.steppers.register) or function() end
local registermessage = (tracers and tracers.steppers.message) or function() end
local function logprocess(...)
if trace_steps then
registermessage(...)
if trace_steps == "silent" then
return
end
end
report_direct(...)
end
local function logwarning(...)
report_direct(...)
end
local gref do
local f_unicode = formatters["U+%X"] -- was ["%U"]
local f_uniname = formatters["U+%X (%s)"] -- was ["%U (%s)"]
local f_unilist = formatters["% t"]
gref = function(n) -- currently the same as in font-otb
if type(n) == "number" then
local description = descriptions[n]
local name = description and description.name
if name then
return f_uniname(n,name)
else
return f_unicode(n)
end
elseif n then
local t = { }
for i=1,#n do
local ni = n[i]
if tonumber(ni) then -- later we will start at 2
local di = descriptions[ni]
local nn = di and di.name
if nn then
t[#t+1] = f_uniname(ni,nn)
else
t[#t+1] = f_unicode(ni)
end
end
end
return f_unilist(t)
else
return ""
end
end
end
local function cref(dataset,sequence,index)
if not dataset then
return "no valid dataset"
end
local merged = sequence.merged and "merged " or ""
if index and index > 1 then
return formatters["feature %a, type %a, %schain lookup %a, index %a"](
dataset[4],sequence.type,merged,sequence.name,index)
else
return formatters["feature %a, type %a, %schain lookup %a"](
dataset[4],sequence.type,merged,sequence.name)
end
end
local function pref(dataset,sequence)
return formatters["feature %a, type %a, %slookup %a"](
dataset[4],sequence.type,sequence.merged and "merged " or "",sequence.name)
end
local function mref(rlmode)
if not rlmode or rlmode >= 0 then
return "l2r"
else
return "r2l"
end
end
-- The next code is somewhat complicated by the fact that some fonts can have ligatures made
-- from ligatures that themselves have marks. This was identified by Kai in for instance
-- arabtype: KAF LAM SHADDA ALEF FATHA (0x0643 0x0644 0x0651 0x0627 0x064E). This becomes
-- KAF LAM-ALEF with a SHADDA on the first and a FATHA op de second component. In a next
-- iteration this becomes a KAF-LAM-ALEF with a SHADDA on the second and a FATHA on the
-- third component.
-- We can assume that languages that use marks are not hyphenated. We can also assume
-- that at most one discretionary is present.
-- We do need components in funny kerning mode but maybe I can better reconstruct then
-- as we do have the font components info available; removing components makes the
-- previous code much simpler. Also, later on copying and freeing becomes easier.
-- However, for arabic we need to keep them around for the sake of mark placement
-- and indices.
local function flattendisk(head,disc)
local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
local prev, next = getboth(disc)
local ishead = head == disc
setdisc(disc)
flushnode(disc)
if pre then
flushnodelist(pre)
end
if post then
flushnodelist(post)
end
if ishead then
if replace then
if next then
setlink(replacetail,next)
end
return replace, replace
elseif next then
return next, next
else
-- return -- maybe warning
end
else
if replace then
if next then
setlink(replacetail,next)
end
setlink(prev,replace)
return head, replace
else
setlink(prev,next) -- checks for next anyway
return head, next
end
end
end
local function appenddisc(disc,list)
local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
local posthead = list
local replacehead = copynodelist(list)
if post then
setlink(posttail,posthead)
else
post = posthead
end
if replace then
setlink(replacetail,replacehead)
else
replace = replacehead
end
setdisc(disc,pre,post,replace)
end
local function markstoligature(head,start,stop,char)
if start == stop and getchar(start) == char then
return head, start
else
local prev = getprev(start)
local next = getnext(stop)
setprev(start)
setnext(stop)
local base = copynocomponents(start,copyinjection)
if head == start then
head = base
end
resetinjection(base)
setchar(base,char)
setcomponents(base,start)
setlink(prev,base,next)
flushcomponents(start)
return head, base
end
end
-- Remark for Kai: (some arabic fonts do mark + mark = other mark and such)
--
-- The hasmarks is needed for ligatures of marks that are part of a ligature in
-- which case we assume that we can delete the marks anyway (we can always become
-- more clever if needed) .. in fact the whole logic here should be redone. We're
-- in the not discfound branch then. We now have skiphash too so we can be more
-- selective if needed (todo).
-- we can have more granularity here but for now we only do a simple check
local no_left_ligature_code = 1
local no_right_ligature_code = 2
local no_left_kern_code = 4
local no_right_kern_code = 8
local hasglyphoption = function(n,c)
if c == no_left_ligature_code or c == no_right_ligature_code then
return getattr(n,a_noligature) == 1
else
return false
end
end
-- in lmtx we need to check the components and can be slightly more clever
local function toligature(head,start,stop,char,dataset,sequence,skiphash,discfound,hasmarks) -- brr head
if hasglyphoption(start,no_right_ligature_code) then
return head, start
end
if start == stop and getchar(start) == char then
resetinjection(start)
setchar(start,char)
return head, start
end
local prev = getprev(start)
local next = getnext(stop)
local comp = start
setprev(start)
setnext(stop)
local base = copynocomponents(start,copyinjection)
if start == head then
head = base
end
resetinjection(base)
setchar(base,char)
setcomponents(base,comp)
setlink(prev,base,next)
if not discfound then
local deletemarks = not skiphash or hasmarks
local components = start -- not used
local baseindex = 0
local componentindex = 0
local head = base
local current = base
-- first we loop over the glyphs in start ... stop
while start do
local char = getchar(start)
if not marks[char] then
baseindex = baseindex + componentindex
componentindex = countcomponents(start,marks)
-- we can be more clever here: "not deletemarks or (skiphash and not skiphash[char])"
-- and such:
elseif not deletemarks then
-- we can get a loop when the font expects otherwise (i.e. unexpected deletemarks)
setligaindex(start,baseindex + getligaindex(start,componentindex))
if trace_marks then
logwarning("%s: keep ligature mark %s, gets index %s",pref(dataset,sequence),gref(char),getligaindex(start))
end
local n = copynode(start)
copyinjection(n,start) -- is this ok ? we position later anyway
head, current = insertnodeafter(head,current,n) -- unlikely that mark has components
elseif trace_marks then
logwarning("%s: delete ligature mark %s",pref(dataset,sequence),gref(char))
end
start = getnext(start)
end
-- we can have one accent as part of a lookup and another following
local start = getnext(current)
while start do
local char = ischar(start)
if char then
-- also something skiphash here?
if marks[char] then
setligaindex(start,baseindex + getligaindex(start,componentindex))
if trace_marks then
logwarning("%s: set ligature mark %s, gets index %s",pref(dataset,sequence),gref(char),getligaindex(start))
end
start = getnext(start)
else
break
end
else
break
end
end
flushcomponents(components)
else
-- discfound ... forget about marks .. probably no scripts that hyphenate and have marks
local discprev, discnext = getboth(discfound)
if discprev and discnext then
-- we assume normalization in context, and don't care about generic ... especially
-- \- can give problems as there we can have a negative char but that won't match
-- anyway
local pre, post, replace, pretail, posttail, replacetail = getdisc(discfound,true)
if not replace then
-- looks like we never come here as it's not okay
local prev = getprev(base)
-- local comp = getcomponents(base) -- already set
local copied = copyonlyglyphs(comp)
if pre then
setlink(discprev,pre)
else
setnext(discprev) -- also blocks funny assignments
end
pre = comp -- is start
if post then
setlink(posttail,discnext)
setprev(post) -- nil anyway
else
post = discnext
setprev(discnext) -- also blocks funny assignments
end
setlink(prev,discfound,next)
setboth(base)
-- here components have a pointer so we can't free it!
setcomponents(base,copied)
replace = base
if forcediscretionaries then
setdisc(discfound,pre,post,replace,discretionarydisc_code)
else
setdisc(discfound,pre,post,replace)
end
base = prev
end
end
end
return head, base
end
local function multiple_glyphs(head,start,multiple,skiphash,what,stop) -- what to do with skiphash matches here
local nofmultiples = #multiple
if nofmultiples > 0 then
local first = start
resetinjection(start)
setchar(start,multiple[1])
if nofmultiples > 1 then
-- local sn = getnext(start)
for i=2,nofmultiples do
-- untested:
--
-- while ignoremarks and marks[getchar(sn)] then
-- local sn = getnext(sn)
-- end
local n = copynode(start) -- ignore components
resetinjection(n)
setchar(n,multiple[i])
insertnodeafter(head,start,n)
start = n
end
end
if what ~= true and repeatablemultiples then
-- This is just some experimental code; we might introduce gsub_extensible
-- some day instead. Beware: when we have a feature that mixes alternates and
-- multiples we need to make sure we don't handle the alternate string values
-- here.
local kind = type(what)
local m, f, l
if kind == "string" then
local what, n = string.match(what,"^repeat(.-)[:=](%d+)$")
if what == "middle" then
m = tonumber(n)
elseif what == "first" then
f = tonumber(n)
elseif what == "last" then
l = tonumber(n)
end
elseif kind == "table" then
-- won't happen because currently we don't split these values
m = what.middle
f = what.first
l = what.last
end
if f or m or l then
if m and m > 1 and nofmultiples == 3 then
local middle = getnext(first)
for i=2,m do
local n = copynode(middle) -- ignore components
resetinjection(n)
insertnodeafter(head,first,n)
end
end
if f and f > 1 then
for i=2,f do
local n = copynode(first) -- ignore components
resetinjection(n)
insertnodeafter(head,first,n)
end
end
if l and l > 1 then
for i=2,l do
local n = copynode(start) -- ignore components
resetinjection(n)
insertnodeafter(head,start,n)
start = n
end
end
end
end
return head, start, true
else
if trace_multiples then
logprocess("no multiple for %s",gref(getchar(start)))
end
return head, start, false
end
end
local function get_alternative_glyph(start,alternatives,value)
local n = #alternatives
if n == 1 then
-- we could actually change that into a gsub and save some memory in the
-- font loader but it makes tracing more messy
return alternatives[1], trace_alternatives and "1 (only one present)"
elseif value == "random" then
local r = getrandom and getrandom("glyph",1,n) or random(1,n)
return alternatives[r], trace_alternatives and formatters["value %a, taking %a"](value,r)
elseif value == "first" then
return alternatives[1], trace_alternatives and formatters["value %a, taking %a"](value,1)
elseif value == "last" then
return alternatives[n], trace_alternatives and formatters["value %a, taking %a"](value,n)
end
value = value == true and 1 or tonumber(value)
if type(value) ~= "number" then
return alternatives[1], trace_alternatives and formatters["invalid value %s, taking %a"](value,1)
end
-- local a = alternatives[value]
-- if a then
-- -- some kind of hash
-- return a, trace_alternatives and formatters["value %a, taking %a"](value,a)
-- end
if value > n then
local defaultalt = otf.defaultnodealternate
if defaultalt == "first" then
return alternatives[n], trace_alternatives and formatters["invalid value %s, taking %a"](value,1)
elseif defaultalt == "last" then
return alternatives[1], trace_alternatives and formatters["invalid value %s, taking %a"](value,n)
else
return false, trace_alternatives and formatters["invalid value %a, %s"](value,"out of range")
end
elseif value == 0 then
return getchar(start), trace_alternatives and formatters["invalid value %a, %s"](value,"no change")
elseif value < 1 then
return alternatives[1], trace_alternatives and formatters["invalid value %a, taking %a"](value,1)
else
return alternatives[value], trace_alternatives and formatters["value %a, taking %a"](value,value)
end
end
-- handlers
function handlers.gsub_single(head,start,dataset,sequence,replacement)
if trace_singles then
logprocess("%s: replacing %s by single %s",pref(dataset,sequence),gref(getchar(start)),gref(replacement))
end
resetinjection(start)
setchar(start,replacement)
return head, start, true
end
function handlers.gsub_alternate(head,start,dataset,sequence,alternative)
local kind = dataset[4]
local what = dataset[1]
local value = what == true and tfmdata.shared.features[kind] or what
local choice, comment = get_alternative_glyph(start,alternative,value)
if choice then
if trace_alternatives then
logprocess("%s: replacing %s by alternative %a to %s, %s",pref(dataset,sequence),gref(getchar(start)),gref(choice),comment)
end
resetinjection(start)
setchar(start,choice)
else
if trace_alternatives then
logwarning("%s: no variant %a for %s, %s",pref(dataset,sequence),value,gref(getchar(start)),comment)
end
end
return head, start, true
end
function handlers.gsub_multiple(head,start,dataset,sequence,multiple,rlmode,skiphash)
if trace_multiples then
logprocess("%s: replacing %s by multiple %s",pref(dataset,sequence),gref(getchar(start)),gref(multiple))
end
return multiple_glyphs(head,start,multiple,skiphash,dataset[1])
end
-- Don't we deal with disc otherwise now? I need to check if the next one can be
-- simplified. Anyway, it can be way messier: marks that get removed as well as
-- marks that are kept.
function handlers.gsub_ligature(head,start,dataset,sequence,ligature,rlmode,skiphash)
local current = getnext(start)
if not current then
return head, start, false, nil
end
local stop = nil
local startchar = getchar(start)
if skiphash and skiphash[startchar] then
while current do
local char = ischar(current,currentfont)
if char then
local lg = not tonumber(ligature) and ligature[char]
if lg then
stop = current
ligature = lg
current = getnext(current)
else
break
end
else
break
end
end
if stop then
local ligature = tonumber(ligature) or ligature.ligature
if ligature then
if trace_ligatures then
local stopchar = getchar(stop)
head, start = markstoligature(head,start,stop,ligature)
logprocess("%s: replacing %s upto %s by ligature %s case 1",pref(dataset,sequence),gref(startchar),gref(stopchar),gref(getchar(start)))
else
head, start = markstoligature(head,start,stop,ligature)
end
return head, start, true, false
else
-- ok, goto next lookup
end
end
else
local discfound = false
local hasmarks = marks[startchar]
while current do
local char, id = ischar(current,currentfont)
if char then
if skiphash and skiphash[char] then
current = getnext(current)
else
local lg = not tonumber(ligature) and ligature[char]
if lg then
if marks[char] then
hasmarks = true
end
stop = current -- needed for fake so outside then
ligature = lg
current = getnext(current)
else
break
end
end
elseif char == false then
-- kind of weird
break
elseif id == disc_code then
discfound = current
break
else
break
end
end
-- of{f-}{}{f}e o{f-}{}{f}fe o{-}{}{ff}e (oe and ff ligature)
-- we can end up here when we have a start run .. testruns start at a disc but
-- so here we have the other case: char + disc
--
-- Challenge for Kai (latinmodern): \hyphenation{fii-f-f-iif} fiiffiif
--
if discfound then
-- don't assume marks in a disc and we don't run over a disc (for now)
local pre, post, replace = getdisc(discfound)
local match
if replace then
local char = ischar(replace,currentfont)
if char and (not tonumber(ligature) and ligature[char]) then
match = true
end
end
if not match and pre then
local char = ischar(pre,currentfont)
if char and (not tonumber(ligature) and ligature[char]) then
match = true
end
end
if not match and not pre or not replace then
local n = getnext(discfound)
local char = ischar(n,currentfont)
if char and (not tonumber(ligature) and ligature[char]) then
match = true
end
end
if match then
-- we force a restart
local ishead = head == start
local prev = getprev(start)
if stop then
setnext(stop)
local copy = copynodelist(start)
local tail = stop -- was: getprev(stop) -- Kai: needs checking on your samples
local liat = findnodetail(copy)
if pre then
setlink(liat,pre)
end
if replace then
setlink(tail,replace)
end
pre = copy
replace = start
else
setnext(start)
local copy = copynode(start)
if pre then
setlink(copy,pre)
end
if replace then
setlink(start,replace)
end
pre = copy
replace = start
end
setdisc(discfound,pre,post,replace)
if prev then
setlink(prev,discfound)
else
setprev(discfound)
head = discfound
end
start = discfound
return head, start, true, true
end
end
local ligature = tonumber(ligature) or ligature.ligature
if ligature then
if stop then
if trace_ligatures then
local stopchar = getchar(stop)
-- head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,discfound,hasmarks)
head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,false,hasmarks)
logprocess("%s: replacing %s upto %s by ligature %s case 2",pref(dataset,sequence),gref(startchar),gref(stopchar),gref(ligature))
-- we can have a rare case of multiple disc in a lig but that makes no sense language wise but if really
-- needed we could backtrack if we're in a disc node
else
-- head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,discfound,hasmarks)
head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,false,hasmarks)
end
else
-- weird but happens (in some arabic font)
resetinjection(start)
setchar(start,ligature)
if trace_ligatures then
logprocess("%s: replacing %s by (no real) ligature %s case 3",pref(dataset,sequence),gref(startchar),gref(ligature))
end
end
return head, start, true, false
else
-- weird but happens, pseudo ligatures ... just the components
end
end
return head, start, false, false
end
function handlers.gpos_single(head,start,dataset,sequence,kerns,rlmode,skiphash,step,injection)
if hasglyphoption(start,no_right_kern_code) then
return head, start, false
else
local startchar = getchar(start)
local format = step.format
if format == "single" or type(kerns) == "table" then -- the table check can go
local dx, dy, w, h = setposition(0,start,factor,rlmode,kerns,injection)
if trace_kerns then
logprocess("%s: shifting single %s by %s xy (%p,%p) and wh (%p,%p)",pref(dataset,sequence),gref(startchar),format,dx,dy,w,h)
end
else
local k = (format == "move" and setmove or setkern)(start,factor,rlmode,kerns,injection)
if trace_kerns then
logprocess("%s: shifting single %s by %s %p",pref(dataset,sequence),gref(startchar),format,k)
end
end
return head, start, true
end
end
function handlers.gpos_pair(head,start,dataset,sequence,kerns,rlmode,skiphash,step,injection)
if hasglyphoption(start,no_right_kern_code) then
return head, start, false
else
local snext = getnext(start)
if not snext then
return head, start, false
else
local prev = start
while snext do
local nextchar = ischar(snext,currentfont)
if nextchar then
if skiphash and skiphash[nextchar] then -- includes marks too when flag
prev = snext
snext = getnext(snext)
else
local krn = kerns[nextchar]
if not krn then
break
end
local format = step.format
if format == "pair" then
local a = krn[1]
local b = krn[2]
if a == true then
-- zero
elseif a then -- #a > 0
local x, y, w, h = setposition(1,start,factor,rlmode,a,injection)
if trace_kerns then
local startchar = getchar(start)
logprocess("%s: shifting first of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s",pref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h,injection or "injections")
end
end
if b == true then
-- zero
start = snext -- cf spec
elseif b then -- #b > 0
local x, y, w, h = setposition(2,snext,factor,rlmode,b,injection)
if trace_kerns then
local startchar = getchar(start)
logprocess("%s: shifting second of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s",pref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h,injection or "injections")
end
start = snext -- cf spec
elseif forcepairadvance then
start = snext -- for testing, not cf spec
end
return head, start, true
elseif krn ~= 0 then
local k = (format == "move" and setmove or setkern)(snext,factor,rlmode,krn,injection)
if trace_kerns then
logprocess("%s: inserting %s %p between %s and %s as %s",pref(dataset,sequence),format,k,gref(getchar(prev)),gref(nextchar),injection or "injections")
end
return head, start, true
else -- can't happen
break
end
end
else
break
end
end
return head, start, false
end
end
end
--[[ldx--
We get hits on a mark, but we're not sure if the it has to be applied so
we need to explicitly test for basechar, baselig and basemark entries.
--ldx]]--
function handlers.gpos_mark2base(head,start,dataset,sequence,markanchors,rlmode,skiphash)
local markchar = getchar(start)
if marks[markchar] then
local base = getprev(start) -- [glyph] [start=mark]
if base then
local basechar = ischar(base,currentfont)
if basechar then
if marks[basechar] then
while base do
base = getprev(base)
if base then
basechar = ischar(base,currentfont)
if basechar then
if not marks[basechar] then
break
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
end
return head, start, false
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
end
return head, start, false
end
end
end
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)",
pref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
end
return head, start, true
elseif trace_bugs then
-- onetimemessage(currentfont,basechar,"no base anchors")
logwarning("%s: mark %s is not anchored to %s",pref(dataset,sequence),gref(markchar),gref(basechar))
end
elseif trace_bugs then
logwarning("%s: nothing preceding, case %i",pref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s: nothing preceding, case %i",pref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
end
return head, start, false
end
function handlers.gpos_mark2ligature(head,start,dataset,sequence,markanchors,rlmode,skiphash)
local markchar = getchar(start)
if marks[markchar] then
local base = getprev(start) -- [glyph] [optional marks] [start=mark]
if base then
local basechar = ischar(base,currentfont)
if basechar then
if marks[basechar] then
while base do
base = getprev(base)
if base then
basechar = ischar(base,currentfont)
if basechar then
if not marks[basechar] then
break
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
end
return head, start, false
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
end
return head, start, false
end
end
end
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
if ma then
local index = getligaindex(start)
ba = ba[index]
if ba then
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
if trace_marks then
logprocess("%s, index %s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)",
pref(dataset,sequence),index,bound,gref(markchar),gref(basechar),index,dx,dy)
end
return head, start, true
else
if trace_bugs then
logwarning("%s: no matching anchors for mark %s and baselig %s with index %a",pref(dataset,sequence),gref(markchar),gref(basechar),index)
end
end
end
elseif trace_bugs then
-- logwarning("%s: char %s is missing in font",pref(dataset,sequence),gref(basechar))
onetimemessage(currentfont,basechar,"no base anchors")
end
elseif trace_bugs then
logwarning("%s: prev node is no char, case %i",pref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s: prev node is no char, case %i",pref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
end
return head, start, false
end
function handlers.gpos_mark2mark(head,start,dataset,sequence,markanchors,rlmode,skiphash)
local markchar = getchar(start)
if marks[markchar] then
local base = getprev(start) -- [glyph] [basemark] [start=mark]
local slc = getligaindex(start)
if slc then -- a rather messy loop ... needs checking with husayni
while base do
local blc = getligaindex(base)
if blc and blc ~= slc then
base = getprev(base)
else
break
end
end
end
if base then
local basechar = ischar(base,currentfont)
if basechar then -- subtype test can go
local ba = markanchors[1][basechar] -- slot 1 has been made copy of the class hash
if ba then
local ma = markanchors[2]
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],true,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)",
pref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
end
return head, start, true
end
end
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
end
return head, start, false
end
function handlers.gpos_cursive(head,start,dataset,sequence,exitanchors,rlmode,skiphash,step) -- to be checked
local startchar = getchar(start)
if marks[startchar] then
if trace_cursive then
logprocess("%s: ignoring cursive for mark %s",pref(dataset,sequence),gref(startchar))
end
else
local nxt = getnext(start)
while nxt do
local nextchar = ischar(nxt,currentfont)
if not nextchar then
break
elseif marks[nextchar] then -- always sequence.flags[1]
nxt = getnext(nxt)
else
local exit = exitanchors[3]
if exit then
local entry = exitanchors[1][nextchar]
if entry then
entry = entry[2]
if entry then
local r2lflag = sequence.flags[4] -- mentioned in the standard
local dx, dy, bound = setcursive(start,nxt,factor,rlmode,exit,entry,characters[startchar],characters[nextchar],r2lflag)
if trace_cursive then
logprocess("%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode",pref(dataset,sequence),gref(startchar),gref(nextchar),dx,dy,bound,mref(rlmode))
end
return head, start, true
end
end
end
break
end
end
end
return head, start, false
end
--[[ldx--
I will implement multiple chain replacements once I run into a font that uses
it. It's not that complex to handle.
--ldx]]--
local chainprocs = { }
local function logprocess(...)
if trace_steps then
registermessage(...)
if trace_steps == "silent" then
return
end
end
report_subchain(...)
end
local logwarning = report_subchain
local function logprocess(...)
if trace_steps then
registermessage(...)
if trace_steps == "silent" then
return
end
end
report_chain(...)
end
local logwarning = report_chain
-- We could share functions but that would lead to extra function calls with many
-- arguments, redundant tests and confusing messages.
-- The reversesub is a special case, which is why we need to store the replacements
-- in a bit weird way. There is no lookup and the replacement comes from the lookup
-- itself. It is meant mostly for dealing with Urdu.
local function reversesub(head,start,stop,dataset,sequence,replacements,rlmode,skiphash)
local char = getchar(start)
local replacement = replacements[char]
if replacement then
if trace_singles then
logprocess("%s: single reverse replacement of %s by %s",cref(dataset,sequence),gref(char),gref(replacement))
end
resetinjection(start)
setchar(start,replacement)
return head, start, true
else
return head, start, false
end
end
chainprocs.reversesub = reversesub
--[[ldx--
This chain stuff is somewhat tricky since we can have a sequence of actions to be
applied: single, alternate, multiple or ligature where ligature can be an invalid
one in the sense that it will replace multiple by one but not neccessary one that
looks like the combination (i.e. it is the counterpart of multiple then). For
example, the following is valid:
xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx
Therefore we we don't really do the replacement here already unless we have the
single lookup case. The efficiency of the replacements can be improved by deleting
as less as needed but that would also make the code even more messy.
--ldx]]--
--[[ldx--
Here we replace start by a single variant.
--ldx]]--
-- To be done (example needed): what if > 1 steps
-- this is messy: do we need this disc checking also in alternates?
local function reportzerosteps(dataset,sequence)
logwarning("%s: no steps",cref(dataset,sequence))
end
local function reportmoresteps(dataset,sequence)
logwarning("%s: more than 1 step",cref(dataset,sequence))
end
-- local function reportbadsteps(dataset,sequence)
-- logwarning("%s: bad step, no proper return values",cref(dataset,sequence))
-- end
local function getmapping(dataset,sequence,currentlookup)
local steps = currentlookup.steps
local nofsteps = currentlookup.nofsteps
if nofsteps == 0 then
reportzerosteps(dataset,sequence)
currentlookup.mapping = false
return false
else
if nofsteps > 1 then
reportmoresteps(dataset,sequence)
end
local mapping = steps[1].coverage
currentlookup.mapping = mapping
currentlookup.format = steps[1].format
return mapping
end
end
function chainprocs.gsub_remove(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
if trace_chains then
logprocess("%s: removing character %s",cref(dataset,sequence,chainindex),gref(getchar(start)))
end
head, start = removenode(head,start,true)
return head, getprev(start), true
end
function chainprocs.gsub_single(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local current = start
while current do
local currentchar = ischar(current)
if currentchar then
local replacement = mapping[currentchar]
if not replacement or replacement == "" then
if trace_bugs then
logwarning("%s: no single for %s",cref(dataset,sequence,chainindex),gref(currentchar))
end
else
if trace_singles then
logprocess("%s: replacing single %s by %s",cref(dataset,sequence,chainindex),gref(currentchar),gref(replacement))
end
resetinjection(current)
setchar(current,replacement)
end
return head, start, true
elseif currentchar == false then
-- can't happen
break
elseif current == stop then
break
else
current = getnext(current)
end
end
end
return head, start, false
end
--[[ldx--
Here we replace start by new glyph. First we delete the rest of the match.
--ldx]]--
-- char_1 mark_1 -> char_x mark_1 (ignore marks)
-- char_1 mark_1 -> char_x
-- to be checked: do we always have just one glyph?
-- we can also have alternates for marks
-- marks come last anyway
-- are there cases where we need to delete the mark
function chainprocs.gsub_alternate(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local kind = dataset[4]
local what = dataset[1]
local value = what == true and tfmdata.shared.features[kind] or what -- todo: optimize in ctx
local current = start
while current do
local currentchar = ischar(current)
if currentchar then
local alternatives = mapping[currentchar]
if alternatives then
local choice, comment = get_alternative_glyph(current,alternatives,value)
if choice then
if trace_alternatives then
logprocess("%s: replacing %s by alternative %a to %s, %s",cref(dataset,sequence),gref(currentchar),choice,gref(choice),comment)
end
resetinjection(start)
setchar(start,choice)
else
if trace_alternatives then
logwarning("%s: no variant %a for %s, %s",cref(dataset,sequence),value,gref(currentchar),comment)
end
end
end
return head, start, true
elseif currentchar == false then
-- can't happen
break
elseif current == stop then
break
else
current = getnext(current)
end
end
end
return head, start, false
end
--[[ldx--
Here we replace start by a sequence of new glyphs.
--ldx]]--
function chainprocs.gsub_multiple(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local startchar = getchar(start)
local replacement = mapping[startchar]
if not replacement or replacement == "" then
if trace_bugs then
logwarning("%s: no multiple for %s",cref(dataset,sequence),gref(startchar))
end
else
if trace_multiples then
logprocess("%s: replacing %s by multiple characters %s",cref(dataset,sequence),gref(startchar),gref(replacement))
end
return multiple_glyphs(head,start,replacement,skiphash,dataset[1],stop)
end
end
return head, start, false
end
--[[ldx--
When we replace ligatures we use a helper that handles the marks. I might change
this function (move code inline and handle the marks by a separate function). We
assume rather stupid ligatures (no complex disc nodes).
--ldx]]--
-- compare to handlers.gsub_ligature which is more complex ... why
function chainprocs.gsub_ligature(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local startchar = getchar(start)
local ligatures = mapping[startchar]
if not ligatures then
if trace_bugs then
logwarning("%s: no ligatures starting with %s",cref(dataset,sequence,chainindex),gref(startchar))
end
else
local hasmarks = marks[startchar]
local current = getnext(start)
local discfound = false
local last = stop
local nofreplacements = 1
while current do
-- todo: ischar ... can there really be disc nodes here?
local id = getid(current)
if id == disc_code then
if not discfound then
discfound = current
end
if current == stop then
break -- okay? or before the disc
else
current = getnext(current)
end
else
local schar = getchar(current)
if skiphash and skiphash[schar] then -- marks
-- if current == stop then -- maybe add this
-- break
-- else
current = getnext(current)
-- end
else
local lg = not tonumber(ligatures) and ligatures[schar]
if lg then
ligatures = lg
last = current
nofreplacements = nofreplacements + 1
if marks[char] then
hasmarks = true
end
if current == stop then
break
else
current = getnext(current)
end
else
break
end
end
end
end
local ligature = tonumber(ligatures) or ligatures.ligature
if ligature then
if chainindex then
stop = last
end
if trace_ligatures then
if start == stop then
logprocess("%s: replacing character %s by ligature %s case 3",cref(dataset,sequence,chainindex),gref(startchar),gref(ligature))
else
logprocess("%s: replacing character %s upto %s by ligature %s case 4",cref(dataset,sequence,chainindex),gref(startchar),gref(getchar(stop)),gref(ligature))
end
end
head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,discfound,hasmarks)
return head, start, true, nofreplacements, discfound
elseif trace_bugs then
if start == stop then
logwarning("%s: replacing character %s by ligature fails",cref(dataset,sequence,chainindex),gref(startchar))
else
logwarning("%s: replacing character %s upto %s by ligature fails",cref(dataset,sequence,chainindex),gref(startchar),gref(getchar(stop)))
end
end
end
end
return head, start, false, 0, false
end
function chainprocs.gpos_single(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
-- we actually should check no_left_kern_code with next
if not hasglyphoption(start,no_right_kern_code) then
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local startchar = getchar(start)
local kerns = mapping[startchar]
if kerns then
local format = currentlookup.format
if format == "single" then
local dx, dy, w, h = setposition(0,start,factor,rlmode,kerns) -- currentlookup.flags ?
if trace_kerns then
logprocess("%s: shifting single %s by %s (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),format,dx,dy,w,h)
end
else -- needs checking .. maybe no kerns format for single
local k = (format == "move" and setmove or setkern)(start,factor,rlmode,kerns,injection)
if trace_kerns then
logprocess("%s: shifting single %s by %s %p",cref(dataset,sequence),gref(startchar),format,k)
end
end
return head, start, true
end
end
end
return head, start, false
end
function chainprocs.gpos_pair(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) -- todo: injections ?
-- we actually should check no_left_kern_code with next
if not hasglyphoption(start,no_right_kern_code) then
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local snext = getnext(start)
if snext then
local startchar = getchar(start)
local kerns = mapping[startchar] -- always 1 step
if kerns then
local prev = start
while snext do
local nextchar = ischar(snext,currentfont)
if not nextchar then
break
end
if skiphash and skiphash[nextchar] then
prev = snext
snext = getnext(snext)
else
local krn = kerns[nextchar]
if not krn then
break
end
local format = currentlookup.format
if format == "pair" then
local a = krn[1]
local b = krn[2]
if a == true then
-- zero
elseif a then
local x, y, w, h = setposition(1,start,factor,rlmode,a,"injections") -- currentlookups flags?
if trace_kerns then
local startchar = getchar(start)
logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h)
end
end
if b == true then
-- zero
start = snext -- cf spec
elseif b then -- #b > 0
local x, y, w, h = setposition(2,snext,factor,rlmode,b,"injections")
if trace_kerns then
local startchar = getchar(start)
logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h)
end
start = snext -- cf spec
elseif forcepairadvance then
start = snext -- for testing, not cf spec
end
return head, start, true
elseif krn ~= 0 then
local k = (format == "move" and setmove or setkern)(snext,factor,rlmode,krn)
if trace_kerns then
logprocess("%s: inserting %s %p between %s and %s",cref(dataset,sequence),format,k,gref(getchar(prev)),gref(nextchar))
end
return head, start, true
else
break
end
end
end
end
end
end
end
return head, start, false
end
function chainprocs.gpos_mark2base(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local markchar = getchar(start)
if marks[markchar] then
local markanchors = mapping[markchar] -- always 1 step
if markanchors then
local base = getprev(start) -- [glyph] [start=mark]
if base then
local basechar = ischar(base,currentfont)
if basechar then
if marks[basechar] then
while base do
base = getprev(base)
if base then
local basechar = ischar(base,currentfont)
if basechar then
if not marks[basechar] then
break
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
end
return head, start, false
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
end
return head, start, false
end
end
end
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
if ma then
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)",
cref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
end
return head, start, true
end
end
elseif trace_bugs then
logwarning("%s: prev node is no char, case %i",cref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s: prev node is no char, case %i",cref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s: mark %s has no anchors",cref(dataset,sequence),gref(markchar))
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",cref(dataset,sequence),gref(markchar))
end
end
return head, start, false
end
function chainprocs.gpos_mark2ligature(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local markchar = getchar(start)
if marks[markchar] then
local markanchors = mapping[markchar] -- always 1 step
if markanchors then
local base = getprev(start) -- [glyph] [optional marks] [start=mark]
if base then
local basechar = ischar(base,currentfont)
if basechar then
if marks[basechar] then
while base do
base = getprev(base)
if base then
local basechar = ischar(base,currentfont)
if basechar then
if not marks[basechar] then
break
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",cref(dataset,sequence),markchar,1)
end
return head, start, false
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",cref(dataset,sequence),markchar,2)
end
return head, start, false
end
end
end
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
if ma then
local index = getligaindex(start)
ba = ba[index]
if ba then
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)",
cref(dataset,sequence),a or bound,gref(markchar),gref(basechar),index,dx,dy)
end
return head, start, true
end
end
end
elseif trace_bugs then
logwarning("%s, prev node is no char, case %i",cref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s, prev node is no char, case %i",cref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s, mark %s has no anchors",cref(dataset,sequence),gref(markchar))
end
elseif trace_bugs then
logwarning("%s, mark %s is no mark",cref(dataset,sequence),gref(markchar))
end
end
return head, start, false
end
function chainprocs.gpos_mark2mark(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local markchar = getchar(start)
if marks[markchar] then
local markanchors = mapping[markchar] -- always 1 step
if markanchors then
local base = getprev(start) -- [glyph] [basemark] [start=mark]
local slc = getligaindex(start)
if slc then -- a rather messy loop ... needs checking with husayni
while base do
local blc = getligaindex(base)
if blc and blc ~= slc then
base = getprev(base)
else
break
end
end
end
if base then -- subtype test can go
local basechar = ischar(base,currentfont)
if basechar then
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
if ma then
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],true,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)",
cref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
end
return head, start, true
end
end
elseif trace_bugs then
logwarning("%s: prev node is no mark, case %i",cref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s: prev node is no mark, case %i",cref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s: mark %s has no anchors",cref(dataset,sequence),gref(markchar))
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",cref(dataset,sequence),gref(markchar))
end
end
return head, start, false
end
function chainprocs.gpos_cursive(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local startchar = getchar(start)
local exitanchors = mapping[startchar] -- always 1 step
if exitanchors then
if marks[startchar] then
if trace_cursive then
logprocess("%s: ignoring cursive for mark %s",pref(dataset,sequence),gref(startchar))
end
else
local nxt = getnext(start)
while nxt do
local nextchar = ischar(nxt,currentfont)
if not nextchar then
break
elseif marks[nextchar] then
-- should not happen (maybe warning)
nxt = getnext(nxt)
else
local exit = exitanchors[3]
if exit then
local entry = exitanchors[1][nextchar]
if entry then
entry = entry[2]
if entry then
local r2lflag = sequence.flags[4] -- mentioned in the standard
local dx, dy, bound = setcursive(start,nxt,factor,rlmode,exit,entry,characters[startchar],characters[nextchar],r2lflag)
if trace_cursive then
logprocess("%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode",pref(dataset,sequence),gref(startchar),gref(nextchar),dx,dy,bound,mref(rlmode))
end
return head, start, true
end
end
elseif trace_bugs then
onetimemessage(currentfont,startchar,"no entry anchors")
end
break
end
end
end
elseif trace_cursive and trace_details then
logprocess("%s, cursive %s is already done",pref(dataset,sequence),gref(getchar(start)),alreadydone)
end
end
return head, start, false
end
-- what pointer to return, spec says stop
-- to be discussed ... is bidi changer a space?
-- elseif char == zwnj and sequence[n][32] then -- brrr
local function show_skip(dataset,sequence,char,ck,class)
logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a",cref(dataset,sequence),gref(char),class,ck[1],ck[8] or ck[2])
end
-- A previous version had disc collapsing code in the (single sub) handler plus some
-- checking in the main loop, but that left the pre/post sequences undone. The best
-- solution is to add some checking there and backtrack when a replace/post matches
-- but it takes a bit of work to figure out an efficient way (this is what the
-- sweep* names refer to). I might look into that variant one day again as it can
-- replace some other code too. In that approach we can have a special version for
-- gub and pos which gains some speed. This method does the test and passes info to
-- the handlers. Here collapsing is handled in the main loop which also makes code
-- elsewhere simpler (i.e. no need for the other special runners and disc code in
-- ligature building). I also experimented with pushing preceding glyphs sequences
-- in the replace/pre fields beforehand which saves checking afterwards but at the
-- cost of duplicate glyphs (memory) but it's too much overhead (runtime).
--
-- In the meantime Kai had moved the code from the single chain into a more general
-- handler and this one (renamed to chaindisk) is used now. I optimized the code a
-- bit and brought it in sycn with the other code. Hopefully I didn't introduce
-- errors. Note: this somewhat complex approach is meant for fonts that implement
-- (for instance) ligatures by character replacement which to some extend is not
-- that suitable for hyphenation. I also use some helpers. This method passes some
-- states but reparses the list. There is room for a bit of speed up but that will
-- be done in the context version. (In fact a partial rewrite of all code can bring
-- some more efficiency.)
--
-- I didn't test it with extremes but successive disc nodes still can give issues
-- but in order to handle that we need more complex code which also slows down even
-- more. The main loop variant could deal with that: test, collapse, backtrack.
local userkern = nuts.pool and nuts.pool.newkern -- context
do if not userkern then -- generic
local thekern = nuts.new("kern",1) -- userkern
local setkern = nuts.setkern -- not injections.setkern
userkern = function(k)
local n = copynode(thekern)
setkern(n,k)
return n
end
end end
local function checked(head)
local current = head
while current do
if getid(current) == glue_code then
local kern = userkern(getwidth(current))
if head == current then
local next = getnext(current)
if next then
setlink(kern,next)
end
flushnode(current)
head = kern
current = next
else
local prev, next = getboth(current)
setlink(prev,kern,next)
flushnode(current)
current = next
end
else
current = getnext(current)
end
end
return head
end
local function setdiscchecked(d,pre,post,replace)
if pre then pre = checked(pre) end
if post then post = checked(post) end
if replace then replace = checked(replace) end
setdisc(d,pre,post,replace)
end
local noflags = { false, false, false, false }
local function chainrun(head,start,last,dataset,sequence,rlmode,skiphash,ck,where)
local size = ck[5] - ck[4] + 1
local chainlookups = ck[6]
local done = false
-- current match
if chainlookups then
-- Lookups can be like { 1, false, 3 } or { false, 2 } or basically anything and
-- #lookups can be less than #current
if size == 1 then
-- if nofchainlookups > size then
-- -- bad rules
-- end
local chainlookup = chainlookups[1]
if chainlookup then
for j=1,#chainlookup do
local chainstep = chainlookup[j]
if chainstep then
local chainkind = chainstep.type
local chainproc = chainprocs[chainkind]
if chainproc then
local ok
-- HH: chainindex 1 added here (for KAI to check too), there are weird ligatures e.g.
-- char + mark -> char where mark has to disappear
head, start, ok = chainproc(head,start,last,dataset,sequence,chainstep,rlmode,skiphash,1)
if ok then
done = true
end
else
logprocess("%s: %s is not yet supported (1)",cref(dataset,sequence),chainkind)
end
else
logprocess("%s: has an issue (1)",cref(dataset,sequence))
end
end
else
-- whatever
end
else
-- See LookupType 5: Contextual Substitution Subtable. Now it becomes messy. The
-- easiest case is where #current maps on #lookups i.e. one-to-one. But what if
-- we have a ligature. Cf the spec we then need to advance one character but we
-- really need to test it as there are fonts out there that are fuzzy and have
-- too many lookups:
--
-- U+1105 U+119E U+1105 U+119E : sourcehansansklight: script=hang ccmp=yes
--
-- Even worse are these family emoji shapes as they can have multiple lookups
-- per slot (probably only for gpos).
-- It's very unlikely that we will have skip classes here but still ... we seldom
-- enter this branch anyway.
local i = 1
local laststart = start
local nofchainlookups = #chainlookups -- useful?
while start do
if skiphash then -- hm, so we know we skip some
while start do
local char = ischar(start,currentfont)
if char then
if skiphash and skiphash[char] then
start = getnext(start)
else
break
end
else
break
end
end
end
local chainlookup = chainlookups[i]
if chainlookup then
for j=1,#chainlookup do
local chainstep = chainlookup[j]
if chainstep then
local chainkind = chainstep.type
local chainproc = chainprocs[chainkind]
if chainproc then
local ok, n
head, start, ok, n = chainproc(head,start,last,dataset,sequence,chainstep,rlmode,skiphash,i)
-- messy since last can be changed !
if ok then
done = true
if n and n > 1 and i + n > nofchainlookups then
-- this is a safeguard, we just ignore the rest of the lookups
i = size -- prevents an advance
break
end
end
else
-- actually an error
logprocess("%s: %s is not yet supported (2)",cref(dataset,sequence),chainkind)
end
else
-- actually an error
logprocess("%s: has an issue (2)",cref(dataset,sequence))
end
end
else
-- we skip but we could also delete as option .. what does an empty lookup actually mean
-- in opentype ... anyway, we could map it onto gsub_remove if needed
end
i = i + 1
if i > size or not start then
break
elseif start then
laststart = start
start = getnext(start)
end
end
if not start then
start = laststart
end
end
else
-- todo: needs checking for holes in the replacements
local replacements = ck[7]
if replacements then
head, start, done = reversesub(head,start,last,dataset,sequence,replacements,rlmode,skiphash)
else
done = true
if trace_contexts then
logprocess("%s: skipping match @ %i",cref(dataset,sequence),where)
end
end
end
return head, start, done
end
local function chaindisk(head,start,dataset,sequence,rlmode,skiphash,ck)
if not start then
return head, start, false
end
local startishead = start == head
local seq = ck[3]
local f = ck[4]
local l = ck[5]
local s = #seq
local done = false
local sweepnode = sweepnode
local sweeptype = sweeptype
local sweepoverflow = false
local checkdisc = getprev(head)
local keepdisc = not sweepnode
local lookaheaddisc = nil
local backtrackdisc = nil
local current = start
local last = start
local prev = getprev(start)
local hasglue = false
local useddisc = nil -- new 2022-09-25
local usedstart = start -- new 2022-09-25
-- fishy: so we can overflow and then go on in the sweep?
-- todo : id can also be glue_code as we checked spaces
local i = f
while i <= l do
local id = getid(current)
if id == glyph_code then
i = i + 1
last = current
current = getnext(current)
elseif id == glue_code then
i = i + 1
last = current
current = getnext(current)
hasglue = true
elseif id == disc_code then
if keepdisc then
keepdisc = false
lookaheaddisc = current
local replace = getreplace(current)
if not replace then
sweepoverflow = true
sweepnode = current
current = getnext(current)
else
-- we can use an iterator
while replace and i <= l do
if getid(replace) == glyph_code then
i = i + 1
end
replace = getnext(replace)
end
current = getnext(replace)
end
last = current
else
head, current = flattendisk(head,current)
end
else
last = current
current = getnext(current)
end
if current then
-- go on
elseif sweepoverflow then
-- we already are following up on sweepnode
break
elseif sweeptype == "post" or sweeptype == "replace" then
current = getnext(sweepnode)
if current then
sweeptype = nil
sweepoverflow = true
else
break
end
else
break -- added
end
end
if sweepoverflow then
local prev = current and getprev(current)
if not current or prev ~= sweepnode then
local head = getnext(sweepnode)
local tail = nil
if prev then
tail = prev
setprev(current,sweepnode)
else
tail = findnodetail(head)
end
setnext(sweepnode,current)
setprev(head)
setnext(tail)
appenddisc(sweepnode,head)
end
end
if l < s then
local i = l
local t = sweeptype == "post" or sweeptype == "replace"
while current and i < s do
local id = getid(current)
if id == glyph_code then
i = i + 1
current = getnext(current)
elseif id == glue_code then
i = i + 1
current = getnext(current)
hasglue = true
elseif id == disc_code then
if keepdisc then
keepdisc = false
if notmatchpre[current] ~= notmatchreplace[current] then
lookaheaddisc = current
end
-- we assume a simple text only replace (we could use nuts.count)
local replace = getreplace(current)
while replace and i < s do
if getid(replace) == glyph_code then
i = i + 1
end
replace = getnext(replace)
end
current = getnext(current)
elseif notmatchpre[current] ~= notmatchreplace[current] then
head, current = flattendisk(head,current)
else
current = getnext(current) -- HH
end
else
current = getnext(current)
end
if not current and t then
current = getnext(sweepnode)
if current then
sweeptype = nil
end
end
end
end
if f > 1 then
local current = prev
local i = f
local t = sweeptype == "pre" or sweeptype == "replace"
if not current and t and current == checkdisc then
current = getprev(sweepnode)
end
while current and i > 1 do -- missing getprev added / moved outside
local id = getid(current)
if id == glyph_code then
i = i - 1
elseif id == glue_code then
i = i - 1
hasglue = true
elseif id == disc_code then
if keepdisc then
keepdisc = false
if notmatchpost[current] ~= notmatchreplace[current] then
backtrackdisc = current
end
-- we assume a simple text only replace (we could use nuts.count)
local replace = getreplace(current)
while replace and i > 1 do
if getid(replace) == glyph_code then
i = i - 1
end
replace = getnext(replace)
end
elseif notmatchpost[current] ~= notmatchreplace[current] then
head, current = flattendisk(head,current)
end
end
current = getprev(current)
if t and current == checkdisc then
current = getprev(sweepnode)
end
end
end
local done = false
if lookaheaddisc then
local cf = start
local cl = getprev(lookaheaddisc)
local cprev = getprev(start)
local insertedmarks = 0
while cprev do
local char = ischar(cf,currentfont)
if char and marks[char] then
insertedmarks = insertedmarks + 1
cf = cprev
startishead = cf == head
cprev = getprev(cprev)
else
break
end
end
setlink(cprev,lookaheaddisc)
setprev(cf)
setnext(cl)
if startishead then
head = lookaheaddisc
end
local pre, post, replace = getdisc(lookaheaddisc)
local new = copynodelist(cf) -- br, how often does that happen
local cnew = new
if pre then
setlink(findnodetail(cf),pre)
end
if replace then
local tail = findnodetail(new)
setlink(tail,replace)
end
for i=1,insertedmarks do
cnew = getnext(cnew)
end
cl = start
local clast = cnew
for i=f,l do
cl = getnext(cl)
clast = getnext(clast)
end
if not notmatchpre[lookaheaddisc] then
local ok = false
cf, start, ok = chainrun(cf,start,cl,dataset,sequence,rlmode,skiphash,ck,1)
if ok then
done = true
end
end
if not notmatchreplace[lookaheaddisc] then
local ok = false
new, cnew, ok = chainrun(new,cnew,clast,dataset,sequence,rlmode,skiphash,ck,2)
if ok then
done = true
end
end
if hasglue then
setdiscchecked(lookaheaddisc,cf,post,new)
else
setdisc(lookaheaddisc,cf,post,new)
end
start = getprev(lookaheaddisc)
useddisc = lookaheaddisc -- new 2022-09-25
sweephead[cf] = getnext(clast) or false
sweephead[new] = getnext(cl) or false
elseif backtrackdisc then
local cf = getnext(backtrackdisc)
local cl = start
local cnext = getnext(start)
local insertedmarks = 0
while cnext do
local char = ischar(cnext,currentfont)
if char and marks[char] then
insertedmarks = insertedmarks + 1
cl = cnext
cnext = getnext(cnext)
else
break
end
end
setlink(backtrackdisc,cnext)
setprev(cf)
setnext(cl)
local pre, post, replace, pretail, posttail, replacetail = getdisc(backtrackdisc,true)
local new = copynodelist(cf)
local cnew = findnodetail(new)
for i=1,insertedmarks do
cnew = getprev(cnew)
end
local clast = cnew
for i=f,l do
clast = getnext(clast)
end
if not notmatchpost[backtrackdisc] then
local ok = false
cf, start, ok = chainrun(cf,start,last,dataset,sequence,rlmode,skiphash,ck,3)
if ok then
done = true
end
end
if not notmatchreplace[backtrackdisc] then
local ok = false
new, cnew, ok = chainrun(new,cnew,clast,dataset,sequence,rlmode,skiphash,ck,4)
if ok then
done = true
end
end
if post then
setlink(posttail,cf)
else
post = cf
end
if replace then
setlink(replacetail,new)
else
replace = new
end
if hasglue then
setdiscchecked(backtrackdisc,pre,post,replace)
else
setdisc(backtrackdisc,pre,post,replace)
end
start = getprev(backtrackdisc)
useddisc = backtrackdisc -- new 2022-09-25
sweephead[post] = getnext(clast) or false
sweephead[replace] = getnext(last) or false
else
local ok = false
head, start, ok = chainrun(head,start,last,dataset,sequence,rlmode,skiphash,ck,5)
if ok then
done = true
end
end
if useddisc and start ~= usedstart then -- make this option per font -- new 2022-09-25
start = getnext(start) -- new 2022-09-25
end -- new 2022-09-25
return head, start, done, useddisc -- new 2022-09-25
end
local chaintrac do
local level = 0
local last = { }
chaintrac = function(head,start,dataset,sequence,rlmode,skiphash,ck,match,discseen,sweepnode)
if dataset then
level = level + 1
last[level] = start
local rule = ck[1]
local lookuptype = ck[8] or ck[2]
local nofseq = #ck[3] -- ck[3].n
local first = ck[4]
local last = ck[5]
local char = getchar(start)
logwarning("+ %i : %s: rule %s %s at char %s for (%s,%s,%s) chars, lookuptype %a, %sdisc seen, %ssweeping",
level,cref(dataset,sequence),rule,match and "matches" or "nomatch",
gref(char),first-1,last-first+1,nofseq-last,lookuptype,
discseen and "" or "no ", sweepnode and "" or "not ")
else
-- (start,done)
local what = start and "done" or "continue"
local where = head == last[level] and "same" or "different"
local char = getchar(head)
if char then
logwarning("- %i : %s at char %s, %s node",level,what,gref(char),where)
else
logwarning("- %i : %s, %s node",level,what,where)
end
level = level - 1
end
end
end
-- The next one is quite optimized but still somewhat slow, fonts like ebgaramond
-- are real torture tests because they have many steps with one context (having
-- multiple contexts makes more sense) also because we (can) reduce them. Instead of
-- a match boolean variable and check for that I decided to use a goto with labels
-- instead. This is one of the cases where it makes the code more readable and we
-- might even gain a bit performance.
-- when we have less replacements (lookups) then current matches we can push too much into
-- the previous disc .. such be it ( with only f done)
local function handle_contextchain(head,start,dataset,sequence,contexts,rlmode,skiphash)
if not contexts then
return head, start, false
end
-- optimizing for rlmode gains nothing
local sweepnode = sweepnode
local sweeptype = sweeptype
local postreplace
local prereplace
local checkdisc
local discseen -- = false
if sweeptype then
if sweeptype == "replace" then
postreplace = true
prereplace = true
else
postreplace = sweeptype == "post"
prereplace = sweeptype == "pre"
end
checkdisc = getprev(head)
end
local currentfont = currentfont
local skipped -- = false
local startprev,
startnext = getboth(start)
local done -- = false
-- we can have multiple hits and as we scan (currently) all we need to check
-- if we have a match ... contextchains have no real coverage table (with
-- unique entries)
-- fonts can have many steps (each doing one check) or many contexts
-- todo: make a per-char cache so that we have small contexts (when we have a context
-- n == 1 and otherwise it can be more so we can even distingish n == 1 or more)
local nofcontexts = contexts.n -- #contexts
local startchar = nofcontext == 1 or ischar(start,currentfont) -- only needed in a chain
for k=1,nofcontexts do -- does this disc mess work well with n > 1
local ck = contexts[k]
local seq = ck[3]
local f = ck[4] -- first current
local last = start
if not startchar or not seq[f][startchar] then
-- report("no hit in %a at %i of %i contexts",sequence.type,k,nofcontexts)
goto next
end
local s = seq.n -- or #seq
if s == 1 then
-- bit weird case: why use a chain, but it is a hit
else
local l = ck[5] -- last current
local current = start
-- local last = start
-- current match
if l > f then
-- before/current/after | before/current | current/after
local discfound -- = nil
local n = f + 1
last = startnext -- the second in current (first already matched)
while n <= l do
if postreplace and not last then
last = getnext(sweepnode)
sweeptype = nil
end
if last then
local char, id = ischar(last,currentfont)
if char then
if skiphash and skiphash[char] then
skipped = true
if trace_skips then
show_skip(dataset,sequence,char,ck,classes[char])
end
last = getnext(last)
elseif seq[n][char] then
if n < l then
last = getnext(last)
end
n = n + 1
elseif discfound then
notmatchreplace[discfound] = true
if notmatchpre[discfound] then
goto next
else
break
end
else
goto next
end
elseif char == false then
if discfound then
notmatchreplace[discfound] = true
if notmatchpre[discfound] then
goto next
else
break
end
else
goto next
end
elseif id == disc_code then
-- elseif id == disc_code and (not discs or discs[last]) then
discseen = true
discfound = last
notmatchpre[last] = nil
notmatchpost[last] = true
notmatchreplace[last] = nil
local pre, post, replace = getdisc(last)
if pre then
local n = n
while pre do
if seq[n][getchar(pre)] then
n = n + 1
if n > l then
break
end
pre = getnext(pre)
else
notmatchpre[last] = true
break
end
end
if n <= l then
notmatchpre[last] = true
end
else
notmatchpre[last] = true
end
if replace then
-- so far we never entered this branch
while replace do
if seq[n][getchar(replace)] then
n = n + 1
if n > l then
break
end
replace = getnext(replace)
else
notmatchreplace[last] = true
if notmatchpre[last] then
goto next
else
break
end
end
end
-- why here again
if notmatchpre[last] then
goto next
end
end
-- maybe only if match
last = getnext(last)
else
goto next
end
else
goto next
end
end
end
-- before
if f > 1 then
-- if startprev then -- new 2022-09-25
local prev = startprev
if prereplace and prev == checkdisc then
prev = getprev(sweepnode)
end
if prev then
local discfound -- = nil
local n = f - 1
while n >= 1 do
if prev then
local char, id = ischar(prev,currentfont)
if char then
if skiphash and skiphash[char] then
skipped = true
if trace_skips then
show_skip(dataset,sequence,char,ck,classes[char])
end
prev = getprev(prev)
elseif seq[n][char] then
if n > 1 then
prev = getprev(prev)
end
n = n - 1
elseif discfound then
notmatchreplace[discfound] = true
if notmatchpost[discfound] then
goto next
else
break
end
else
goto next
end
elseif char == false then
if discfound then
notmatchreplace[discfound] = true
if notmatchpost[discfound] then
goto next
end
else
goto next
end
break
elseif id == disc_code then
-- elseif id == disc_code and (not discs or discs[prev]) then
-- the special case: f i where i becomes dottless i ..
discseen = true
discfound = prev
notmatchpre[prev] = true
notmatchpost[prev] = nil
notmatchreplace[prev] = nil
local pre, post, replace, pretail, posttail, replacetail = getdisc(prev,true)
-- weird test: needs checking
if pre ~= start and post ~= start and replace ~= start then
if post then
local n = n
while posttail do
if seq[n][getchar(posttail)] then
n = n - 1
if posttail == post or n < 1 then
break
else
posttail = getprev(posttail)
end
else
notmatchpost[prev] = true
break
end
end
if n >= 1 then
notmatchpost[prev] = true
end
else
notmatchpost[prev] = true
end
if replace then
-- we seldom enter this branch (e.g. on brill efficient)
while replacetail do
if seq[n][getchar(replacetail)] then
n = n - 1
if replacetail == replace or n < 1 then
break
else
replacetail = getprev(replacetail)
end
else
notmatchreplace[prev] = true
if notmatchpost[prev] then
goto next
else
break
end
end
end
else
-- notmatchreplace[prev] = true -- not according to Kai
end
end
prev = getprev(prev)
-- elseif id == glue_code and seq[n][32] and isspace(prev,threshold,id) then
-- elseif seq[n][32] and spaces[prev] then
-- n = n - 1
-- prev = getprev(prev)
elseif id == glue_code then
local sn = seq[n]
if (sn[32] and spaces[prev]) or sn[0xFFFC] then
n = n - 1
prev = getprev(prev)
else
goto next
end
elseif seq[n][0xFFFC] then
n = n - 1
prev = getprev(prev)
else
goto next
end
else
goto next
end
end
else
goto next
end
-- else -- new 2022-09-25
-- goto next -- new 2022-09-25
-- end -- new 2022-09-25
end
-- after
if s > l then
local current = last and getnext(last)
if not current and postreplace then
current = getnext(sweepnode)
end
if current then
local discfound -- = nil
local n = l + 1
while n <= s do
if current then
local char, id = ischar(current,currentfont)
if char then
if skiphash and skiphash[char] then
skipped = true
if trace_skips then
show_skip(dataset,sequence,char,ck,classes[char])
end
current = getnext(current) -- was absent
elseif seq[n][char] then
if n < s then -- new test
current = getnext(current) -- was absent
end
n = n + 1
elseif discfound then
notmatchreplace[discfound] = true
if notmatchpre[discfound] then
goto next
else
break
end
else
goto next
end
elseif char == false then
if discfound then
notmatchreplace[discfound] = true
if notmatchpre[discfound] then
goto next
else
break
end
else
goto next
end
elseif id == disc_code then
-- elseif id == disc_code and (not discs or discs[current]) then
discseen = true
discfound = current
notmatchpre[current] = nil
notmatchpost[current] = true
notmatchreplace[current] = nil
local pre, post, replace = getdisc(current)
if pre then
local n = n
while pre do
if seq[n][getchar(pre)] then
n = n + 1
if n > s then
break
else
pre = getnext(pre)
end
else
notmatchpre[current] = true
break
end
end
if n <= s then
notmatchpre[current] = true
end
else
notmatchpre[current] = true
end
if replace then
-- so far we never entered this branch
while replace do
if seq[n][getchar(replace)] then
n = n + 1
if n > s then
break
else
replace = getnext(replace)
end
else
notmatchreplace[current] = true
if notmatchpre[current] then
goto next
else
break
end
end
end
else
-- notmatchreplace[current] = true -- not according to Kai
end
current = getnext(current)
elseif id == glue_code then
local sn = seq[n]
if (sn[32] and spaces[current]) or sn[0xFFFC] then
n = n + 1
current = getnext(current)
else
goto next
end
elseif seq[n][0xFFFC] then
n = n + 1
current = getnext(current)
else
goto next
end
else
goto next
end
end
else
goto next
end
end
end
if trace_contexts then
chaintrac(head,start,dataset,sequence,rlmode,skipped and skiphash,ck,true,discseen,sweepnode)
end
if discseen or sweepnode then
-- When we process a disc we can collapse and therefore we backtrack one node (start) and
-- reprocess. This is needed because there might be more in the collapsed list.
head, start, done = chaindisk(head,start,dataset,sequence,rlmode,skipped and skiphash,ck)
else
head, start, done = chainrun(head,start,last,dataset,sequence,rlmode,skipped and skiphash,ck,6)
end
if trace_contexts then
chaintrac(start,done)
end
if done then
break
-- else
-- next context
end
::next::
end
if discseen then
notmatchpre = { }
notmatchpost = { }
notmatchreplace = { }
-- notmatchpre = { a = 1, b = 1 } notmatchpre .a = nil notmatchpre .b = nil
-- notmatchpost = { a = 1, b = 1 } notmatchpost .a = nil notmatchpost .b = nil
-- notmatchreplace = { a = 1, b = 1 } notmatchreplace.a = nil notmatchreplace.b = nil
end
return head, start, done
end
handlers.gsub_context = handle_contextchain
handlers.gsub_contextchain = handle_contextchain
handlers.gsub_reversecontextchain = handle_contextchain
handlers.gpos_contextchain = handle_contextchain
handlers.gpos_context = handle_contextchain
-- local function chained_contextchain(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
-- local steps = currentlookup.steps
-- local nofsteps = currentlookup.nofsteps
-- if nofsteps > 1 then
-- reportmoresteps(dataset,sequence)
-- end
-- -- probably wrong
-- local l = steps[1].coverage[getchar(start)]
-- if l then
-- return handle_contextchain(head,start,dataset,sequence,l,rlmode,skiphash)
-- else
-- return head, start, false
-- end
-- end
-- new 2022-09-25
local function chained_contextchain(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
local steps = currentlookup.steps
local nofsteps = currentlookup.nofsteps
local char = getchar(start)
if nofsteps == 1 then
local s = steps[1]
local l = s.coverage[char]
if l then
return handle_contextchain(head,start,dataset,sequence,l,rlmode,skiphash)
end
else
for i=1,nofsteps do
local s = steps[i]
local l = s.coverage[char]
if l then
local h, s, d = handle_contextchain(head,start,dataset,sequence,l,rlmode,skiphash)
if d then
return h, s, d
end
end
end
end
return head, start, false
end
chainprocs.gsub_context = chained_contextchain
chainprocs.gsub_contextchain = chained_contextchain
chainprocs.gsub_reversecontextchain = chained_contextchain
chainprocs.gpos_contextchain = chained_contextchain
chainprocs.gpos_context = chained_contextchain
------------------------------
-- experiment (needs no handler in font-otc so not now):
--
-- function otf.registerchainproc(name,f)
-- -- chainprocs[name] = f
-- chainprocs[name] = function(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
-- local done = currentlookup.nofsteps > 0
-- if not done then
-- reportzerosteps(dataset,sequence)
-- else
-- head, start, done = f(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
-- if not head or not start then
-- reportbadsteps(dataset,sequence)
-- end
-- end
-- return head, start, done
-- end
-- end
local missing = setmetatableindex("table")
local logwarning = report_process
local resolved = { } -- we only resolve a font,script,language pair once
local function logprocess(...)
if trace_steps then
registermessage(...)
if trace_steps == "silent" then
return
end
end
report_process(...)
end
-- todo: pass all these 'locals' in a table
local sequencelists = setmetatableindex(function(t,font)
local sequences = fontdata[font].resources.sequences
if not sequences or not next(sequences) then
sequences = false
end
t[font] = sequences
return sequences
end)
-- fonts.hashes.sequences = sequencelists
do -- overcome local limit
local autofeatures = fonts.analyzers.features
local featuretypes = otf.tables.featuretypes
local defaultscript = otf.features.checkeddefaultscript
local defaultlanguage = otf.features.checkeddefaultlanguage
local wildcard = "*"
local default = "dflt"
local function initialize(sequence,script,language,enabled,autoscript,autolanguage)
local features = sequence.features
if features then
local order = sequence.order
if order then
local featuretype = featuretypes[sequence.type or "unknown"]
for i=1,#order do
local kind = order[i]
local valid = enabled[kind]
if valid then
local scripts = features[kind]
local languages = scripts and (
scripts[script] or
scripts[wildcard] or
(autoscript and defaultscript(featuretype,autoscript,scripts))
)
local enabled = languages and (
languages[language] or
languages[wildcard] or
(autolanguage and defaultlanguage(featuretype,autolanguage,languages))
)
if enabled then
return { valid, autofeatures[kind] or false, sequence, kind }
end
end
end
else
-- can't happen
end
end
return false
end
function otf.dataset(tfmdata,font) -- generic variant, overloaded in context
local shared = tfmdata.shared
local properties = tfmdata.properties
local language = properties.language or "dflt"
local script = properties.script or "dflt"
local enabled = shared.features
local autoscript = enabled and enabled.autoscript
local autolanguage = enabled and enabled.autolanguage
local res = resolved[font]
if not res then
res = { }
resolved[font] = res
end
local rs = res[script]
if not rs then
rs = { }
res[script] = rs
end
local rl = rs[language]
if not rl then
rl = {
-- indexed but we can also add specific data by key
}
rs[language] = rl
local sequences = tfmdata.resources.sequences
if sequences then
for s=1,#sequences do
local v = enabled and initialize(sequences[s],script,language,enabled,autoscript,autolanguage)
if v then
rl[#rl+1] = v
end
end
end
end
return rl
end
end
-- Functions like kernrun, comprun etc evolved over time and in the end look rather
-- complex. It's a bit of a compromise between extensive copying and creating subruns.
-- The logic has been improved a lot by Kai and Ivo who use complex fonts which
-- really helped to identify border cases on the one hand and get insight in the diverse
-- ways fonts implement features (not always that consistent and efficient). At the same
-- time I tried to keep the code relatively efficient so that the overhead in runtime
-- stays acceptable.
local function report_disc(what,n)
report_run("%s: %s > %s",what,n,languages.serializediscretionary(n))
end
local function kernrun(disc,k_run,font,attr,...)
--
-- we catch
--
if trace_kernruns then
report_disc("kern",disc)
end
--
local prev, next = getboth(disc)
--
local nextstart = next
local done = false
--
local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
--
local prevmarks = prev
--
-- can be optional, because why on earth do we get a disc after a mark (okay, maybe when a ccmp
-- has happened but then it should be in the disc so basically this test indicates an error)
--
while prevmarks do
local char = ischar(prevmarks,font)
if char and marks[char] then
prevmarks = getprev(prevmarks)
else
break
end
end
--
if prev and not ischar(prev,font) then -- and (pre or replace)
prev = false
end
if next and not ischar(next,font) then -- and (post or replace)
next = false
end
--
-- we need to get rid of this nest mess some day .. has to be done otherwise
--
if pre then
if k_run(pre,"injections",nil,font,attr,...) then
done = true
end
if prev then
setlink(prev,pre)
if k_run(prevmarks,"preinjections",pre,font,attr,...) then -- or prev?
done = true
end
setprev(pre)
setlink(prev,disc)
end
end
--
if post then
if k_run(post,"injections",nil,font,attr,...) then
done = true
end
if next then
setlink(posttail,next)
if k_run(posttail,"postinjections",next,font,attr,...) then
done = true
end
setnext(posttail)
setlink(disc,next)
end
end
--
if replace then
if k_run(replace,"injections",nil,font,attr,...) then
done = true
end
if prev then
setlink(prev,replace)
if k_run(prevmarks,"replaceinjections",replace,font,attr,...) then -- getnext(replace))
done = true
end
setprev(replace)
setlink(prev,disc)
end
if next then
setlink(replacetail,next)
if k_run(replacetail,"replaceinjections",next,font,attr,...) then
done = true
end
setnext(replacetail)
setlink(disc,next)
end
elseif prev and next then
setlink(prev,next)
if k_run(prevmarks,"emptyinjections",next,font,attr,...) then
done = true
end
setlink(prev,disc,next)
end
if done and trace_testruns then
report_disc("done",disc)
end
-- return nextstart, done
return nextstart
end
-- fonts like ebgaramond do ligatures this way (less efficient than e.g. dejavu which
-- will do the testrun variant)
local function comprun(disc,c_run,...) -- vararg faster than the whole list
if trace_compruns then
report_disc("comp",disc)
end
--
local pre, post, replace = getdisc(disc)
local renewed = false
--
if pre then
sweepnode = disc
sweeptype = "pre" -- in alternative code preinjections is used (also used then for properties, saves a variable)
local new, done = c_run(pre,...)
if done then
pre = new
renewed = true
end
end
--
if post then
sweepnode = disc
sweeptype = "post"
local new, done = c_run(post,...)
if done then
post = new
renewed = true
end
end
--
if replace then
sweepnode = disc
sweeptype = "replace"
local new, done = c_run(replace,...)
if done then
replace = new
renewed = true
end
end
--
sweepnode = nil
sweeptype = nil
if renewed then
if trace_testruns then
report_disc("done",disc)
end
setdisc(disc,pre,post,replace)
end
--
-- return getnext(disc), renewed
return getnext(disc)
end
-- if we can hyphenate in a lig then unlikely a lig so we
-- could have a option here to ignore lig
local test_flatten_start = 2 -- must start at 2 according to Kai
directives.register("otf.testrun.forceflatten", function(v)
test_flatten_start = v and 1 or 2
end)
local function testrun(disc,t_run,c_run,...)
if trace_testruns then
report_disc("test",disc)
end
local prev, next = getboth(disc)
if not next then
-- weird discretionary
return
end
local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
local renewed = false
if post or replace then -- and prev then -- hm, we can start with a disc
if post then
setlink(posttail,next)
else
post = next
end
if replace then
setlink(replacetail,next)
else
replace = next
end
local d_post = t_run(post,next,...)
local d_replace = t_run(replace,next,...)
if d_post > 0 or d_replace > 0 then
local d = d_replace > d_post and d_replace or d_post
local head = getnext(disc) -- is: next
local tail = head
for i=test_flatten_start,d do
local nx = getnext(tail)
local id = getid(nx)
if id == disc_code then
head, tail = flattendisk(head,nx)
elseif id == glyph_code then
tail = nx
else
-- we can have overrun into a glue
break
end
end
next = getnext(tail)
setnext(tail)
setprev(head)
local new = copynodelist(head)
if posttail then
setlink(posttail,head)
else
post = head
end
if replacetail then
setlink(replacetail,new)
else
replace = new
end
else
-- we stay inside the disc
if posttail then
setnext(posttail)
else
post = nil
end
if replacetail then
setnext(replacetail)
else
replace = nil
end
end
setlink(disc,next)
-- pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
end
--
-- like comprun
--
if trace_testruns then
report_disc("more",disc)
end
--
if pre then
sweepnode = disc
sweeptype = "pre"
local new, ok = c_run(pre,...)
if ok then
pre = new
renewed = true
end
end
--
if post then
sweepnode = disc
sweeptype = "post"
local new, ok = c_run(post,...)
if ok then
post = new
renewed = true
end
end
--
if replace then
sweepnode = disc
sweeptype = "replace"
local new, ok = c_run(replace,...)
if ok then
replace = new
renewed = true
end
end
--
sweepnode = nil
sweeptype = nil
if renewed then
setdisc(disc,pre,post,replace)
if trace_testruns then
report_disc("done",disc)
end
end
-- next can have changed (copied list)
-- return getnext(disc), renewed
return getnext(disc)
end
-- 1{2{\oldstyle\discretionary{3}{4}{5}}6}7\par
-- 1{2\discretionary{3{\oldstyle3}}{{\oldstyle4}4}{5{\oldstyle5}5}6}7\par
local nesting = 0
local function c_run_single(head,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
local done = false
local sweep = sweephead[head]
local start
if sweep then
start = sweep
-- sweephead[head] = nil
sweephead[head] = false
else
start = head
end
while start do
local char, id = ischar(start,font)
if char then
local a -- happens often so no assignment is faster
if attr then
a = getglyphdata(start)
end
if not a or (a == attr) then
local lookupmatch = lookupcache[char]
if lookupmatch then
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
done = true
end
end
if start then
start = getnext(start)
end
else
-- go on can be a mixed one
start = getnext(start)
end
elseif char == false then
return head, done
elseif sweep then
-- else we loose the rest
return head, done
else
-- in disc component
start = getnext(start)
end
end
return head, done
end
-- only replace?
local function t_run_single(start,stop,font,attr,lookupcache)
local lastd = nil
while start ~= stop do
local char = ischar(start,font)
if char then
local a -- happens often so no assignment is faster
if attr then
a = getglyphdata(start)
end
local startnext = getnext(start)
if not a or (a == attr) then
local lookupmatch = lookupcache[char]
if lookupmatch then -- hm, hyphens can match (tlig) so we need to really check
-- if we need more than ligatures we can outline the code and use functions
local s = startnext
local ss = nil
local sstop = s == stop
if not s then
s = ss
ss = nil
end
-- a bit weird: why multiple ... anyway we can't have a disc in a disc
-- how about post ... we can probably merge this into the while
while getid(s) == disc_code do
ss = getnext(s)
s = getreplace(s)
if not s then
s = ss
ss = nil
end
end
local l = nil
local d = 0
while s do
local char = ischar(s,font)
if char then
local lg = not tonumber(lookupmatch) and lookupmatch[char]
if lg then
if sstop then
d = 1
elseif d > 0 then
d = d + 1
end
l = lg
s = getnext(s)
sstop = s == stop
if not s then
s = ss
ss = nil
end
while getid(s) == disc_code do
ss = getnext(s)
s = getreplace(s)
if not s then
s = ss
ss = nil
end
end
lookupmatch = lg
else
break
end
else
break
end
end
if l and (tonumber(l) or l.ligature) then -- so we test for ligature
lastd = d
end
-- why not: if not l then break elseif l.ligature then return d end
else
-- why not: break
-- no match (yet)
end
else
-- go on can be a mixed one
-- why not: break
end
if lastd then
return lastd
end
start = startnext
else
break
end
end
return 0
end
local function k_run_single(sub,injection,last,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
local a -- happens often so no assignment is faster
if attr then
a = getglyphdata(sub)
end
if not a or (a == attr) then
for n in nextnode, sub do -- only gpos
if n == last then
break
end
local char = ischar(n,font)
if char then
local lookupmatch = lookupcache[char]
if lookupmatch then
local h, d, ok = handler(sub,n,dataset,sequence,lookupmatch,rlmode,skiphash,step,injection)
if ok then
return true
end
end
end
end
end
end
local function c_run_multiple(head,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
local done = false
local sweep = sweephead[head]
local start
if sweep then
start = sweep
-- sweephead[head] = nil
sweephead[head] = false
else
start = head
end
while start do
local char = ischar(start,font)
if char then
local a -- happens often so no assignment is faster
if attr then
a = getglyphdata(start)
end
if not a or (a == attr) then
for i=1,nofsteps do
local step = steps[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
-- we could move all code inline but that makes things even more unreadable
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
done = true
break
elseif not start then
-- don't ask why ... shouldn't happen
break
end
end
end
if start then
start = getnext(start)
end
else
-- go on can be a mixed one
start = getnext(start)
end
elseif char == false then
-- whatever glyph
return head, done
elseif sweep then
-- else we loose the rest
return head, done
else
-- in disc component
start = getnext(start)
end
end
return head, done
end
local function t_run_multiple(start,stop,font,attr,steps,nofsteps)
local lastd = nil
while start ~= stop do
local char = ischar(start,font)
if char then
local a -- happens often so no assignment is faster
if attr then
a = getglyphdata(start)
end
local startnext = getnext(start)
if not a or (a == attr) then
for i=1,nofsteps do
local step = steps[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
-- if we need more than ligatures we can outline the code and use functions
local s = startnext
local ss = nil
local sstop = s == stop
if not s then
s = ss
ss = nil
end
while getid(s) == disc_code do
ss = getnext(s)
s = getreplace(s)
if not s then
s = ss
ss = nil
end
end
local l = nil
local d = 0
while s do
local char = ischar(s)
if char then
local lg = not tonumber(lookupmatch) and lookupmatch[char]
if lg then
if sstop then
d = 1
elseif d > 0 then
d = d + 1
end
l = lg
s = getnext(s)
sstop = s == stop
if not s then
s = ss
ss = nil
end
while getid(s) == disc_code do
ss = getnext(s)
s = getreplace(s)
if not s then
s = ss
ss = nil
end
end
lookupmatch = lg
else
break
end
else
break
end
end
if l and (tonumber(l) or l.ligature) then
lastd = d
end
end
end
else
-- go on can be a mixed one
end
if lastd then
return lastd
end
start = startnext
else
break
end
end
return 0
end
local function k_run_multiple(sub,injection,last,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
local a -- happens often so no assignment is faster
if attr then
a = getglyphdata(sub)
end
if not a or (a == attr) then
for n in nextnode, sub do -- only gpos
if n == last then
break
end
local char = ischar(n)
if char then
for i=1,nofsteps do
local step = steps[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
local h, d, ok = handler(sub,n,dataset,sequence,lookupmatch,rlmode,skiphash,step,injection) -- sub was head
if ok then
return true
end
end
end
end
end
end
end
local txtdirstate, pardirstate do -- this might change (no need for nxt in pardirstate)
local getdirection = nuts.getdirection
txtdirstate = function(start,stack,top,rlparmode)
local dir, pop = getdirection(start)
if pop then
if top == 1 then
return 0, rlparmode
else
top = top - 1
if stack[top] == righttoleft_code then
return top, -1
else
return top, 1
end
end
elseif dir == lefttoright_code then
top = top + 1
stack[top] = lefttoright_code
return top, 1
elseif dir == righttoleft_code then
top = top + 1
stack[top] = righttoleft_code
return top, -1
else
return top, rlparmode
end
end
pardirstate = function(start)
local dir = getdirection(start)
if dir == lefttoright_code then
return 1, 1
elseif dir == righttoleft_code then
return -1, -1
else
return 0, 0
end
end
end
-- These are non public helpers that can change without notice!
otf.helpers = otf.helpers or { }
otf.helpers.txtdirstate = txtdirstate
otf.helpers.pardirstate = pardirstate
-- This is the main loop. We run over the node list dealing with a specific font. The
-- attribute is a context specific thing. We could work on sub start-stop ranges instead
-- but I wonder if there is that much speed gain (experiments showed that it made not
-- much sense) and we need to keep track of directions anyway. Also at some point I
-- want to play with font interactions and then we do need the full sweeps. Apart from
-- optimizations the principles of processing the features hasn't changed much since
-- the beginning.
do
-- This is a measurable experimental speedup (only with hyphenated text and multiple
-- fonts per processor call), especially for fonts with lots of contextual lookups.
local fastdisc = true
local testdics = false
directives.register("otf.fastdisc",function(v) fastdisc = v end) -- normally enabled
-- using a merged combined hash as first test saves some 30% on ebgaramond and
-- about 15% on arabtype .. then moving the a test also saves a bit (even when
-- often a is not set at all so that one is a bit debatable
local otfdataset = nil -- todo: make an installer
local getfastdisc = { __index = function(t,k)
local v = usesfont(k,currentfont)
t[k] = v
return v
end }
local getfastspace = { __index = function(t,k)
-- we don't pass the id so that one can overload isspace
local v = isspace(k,threshold) or false
t[k] = v
return v
end }
function otf.featuresprocessor(head,font,attr,direction,n)
local sequences = sequencelists[font] -- temp hack
nesting = nesting + 1
if nesting == 1 then
currentfont = font
tfmdata = fontdata[font]
descriptions = tfmdata.descriptions -- only needed in gref so we could pass node there instead
characters = tfmdata.characters -- but this branch is not entered that often anyway
local resources = tfmdata.resources
marks = resources.marks
classes = resources.classes
threshold,
factor = getthreshold(font)
checkmarks = tfmdata.properties.checkmarks
if not otfdataset then
otfdataset = otf.dataset
end
discs = fastdisc and n and n > 1 and setmetatable({},getfastdisc) -- maybe inline
spaces = setmetatable({},getfastspace)
elseif currentfont ~= font then
report_warning("nested call with a different font, level %s, quitting",nesting)
nesting = nesting - 1
return head, false
end
-- some 10% faster when no dynamics but hardly measureable on real runs .. but: it only
-- works when we have no other dynamics as otherwise the zero run will be applied to the
-- whole stream for which we then need to pass another variable which we won't
-- if attr == 0 then
-- attr = false
-- end
if trace_steps then
checkstep(head)
end
local initialrl = 0
if getid(head) == par_code and startofpar(head) then
initialrl = pardirstate(head)
elseif direction == righttoleft_code then
initialrl = -1
end
-- local done = false
local datasets = otfdataset(tfmdata,font,attr)
local dirstack = { nil } -- could move outside function but we can have local runs
sweephead = { }
-- sweephead = { a = 1, b = 1 } sweephead.a = nil sweephead.b = nil
-- Keeping track of the headnode is needed for devanagari. (I generalized it a bit
-- so that multiple cases are also covered.) We could prepend a temp node.
-- We don't goto the next node when a disc node is created so that we can then treat
-- the pre, post and replace. It's a bit of a hack but works out ok for most cases.
for s=1,#datasets do
local dataset = datasets[s]
local attribute = dataset[2]
local sequence = dataset[3] -- sequences[s] -- also dataset[5]
local rlparmode = initialrl
local topstack = 0
local typ = sequence.type
local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset
local forcetestrun = typ == "gsub_ligature" -- testrun is only for ligatures
local handler = handlers[typ] -- store in dataset
local steps = sequence.steps
local nofsteps = sequence.nofsteps
local skiphash = sequence.skiphash
if not steps then
-- This permits injection, watch the different arguments. Watch out, the arguments passed
-- are not frozen as we might extend or change this. Is this used at all apart from some
-- experiments?
local h, ok = handler(head,dataset,sequence,initialrl,font,attr) -- less arguments now
-- if ok then
-- done = true
-- end
if h and h ~= head then
head = h
end
elseif typ == "gsub_reversecontextchain" then
--
-- This might need a check: if we have #before or #after > 0 then we might need to reverse
-- the before and after lists in the loader. But first I need to see a font that uses multiple
-- matches.
--
local start = findnodetail(head)
local rlmode = 0 -- how important is this .. do we need to check for dir?
local merged = steps.merged
while start do
local char = ischar(start,font)
if char then
local m = merged[char]
if m then
local a -- happens often so no assignment is faster
if attr then
a = getglyphdata(start)
end
if not a or (a == attr) then
for i=m[1],m[2] do
local step = steps[i]
-- for i=1,#m do
-- local step = m[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
-- done = true
break
end
end
end
if start then
start = getprev(start)
end
else
start = getprev(start)
end
else
start = getprev(start)
end
else
start = getprev(start)
end
end
else
local start = head
local rlmode = initialrl
if nofsteps == 1 then -- happens often
local step = steps[1]
local lookupcache = step.coverage
while start do
local char, id = ischar(start,font)
if char then
if skiphash and skiphash[char] then -- we never needed it here but let's try
start = getnext(start)
else
local lookupmatch = lookupcache[char]
if lookupmatch then
local a -- happens often so no assignment is faster
if attr then
if getglyphdata(start) == attr and (not attribute or getstate(start,attribute)) then
a = true
end
elseif not attribute or getstate(start,attribute) then
a = true
end
if a then
local ok, df
head, start, ok, df = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
-- if ok then
-- done = true
-- end
if df then
-- print("restart 1",typ)
elseif start then
start = getnext(start)
end
else
start = getnext(start)
end
else
start = getnext(start)
end
end
elseif char == false or id == glue_code then
-- a different font|state or glue (happens often)
start = getnext(start)
elseif id == disc_code then
if not discs or discs[start] == true then
if gpossing then
start = kernrun(start,k_run_single, font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
elseif forcetestrun then
start = testrun(start,t_run_single,c_run_single,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
else
start = comprun(start,c_run_single, font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
end
else
start = getnext(start)
end
elseif id == math_code then
start = getnext(endofmath(start))
elseif id == dir_code then
topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
start = getnext(start)
-- elseif id == par_code and startofpar(start) then
-- rlparmode, rlmode = pardirstate(start)
-- start = getnext(start)
else
start = getnext(start)
end
end
else
local merged = steps.merged
while start do
local char, id = ischar(start,font)
if char then
if skiphash and skiphash[char] then -- we never needed it here but let's try
start = getnext(start)
else
local m = merged[char]
if m then
local a -- happens often so no assignment is faster
if attr then
if getglyphdata(start) == attr and (not attribute or getstate(start,attribute)) then
a = true
end
elseif not attribute or getstate(start,attribute) then
a = true
end
if a then
local ok, df
for i=m[1],m[2] do
local step = steps[i]
-- for i=1,#m do
-- local step = m[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
-- we could move all code inline but that makes things even more unreadable
-- local ok, df
head, start, ok, df = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if df then
break
elseif ok then
-- done = true
break
elseif not start then
-- don't ask why ... shouldn't happen
break
end
end
end
if df then
-- print("restart 2",typ)
elseif start then
start = getnext(start)
end
else
start = getnext(start)
end
else
start = getnext(start)
end
end
elseif char == false or id == glue_code then
-- a different font|state or glue (happens often)
start = getnext(start)
elseif id == disc_code then
if not discs or discs[start] == true then
if gpossing then
start = kernrun(start,k_run_multiple, font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
elseif forcetestrun then
start = testrun(start,t_run_multiple,c_run_multiple,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
else
start = comprun(start,c_run_multiple, font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
end
else
start = getnext(start)
end
elseif id == math_code then
start = getnext(endofmath(start))
elseif id == dir_code then
topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
start = getnext(start)
-- elseif id == par_code and startofpar(start) then
-- rlparmode, rlmode = pardirstate(start)
-- start = getnext(start)
else
start = getnext(start)
end
end
end
end
if trace_steps then -- ?
registerstep(head)
end
end
nesting = nesting - 1
-- return head, done
return head
end
-- This is not an official helper and used for tracing experiments. It can be changed as I like
-- at any moment. At some point it might be used in a module that can help font development.
function otf.datasetpositionprocessor(head,font,direction,dataset)
currentfont = font
tfmdata = fontdata[font]
descriptions = tfmdata.descriptions -- only needed in gref so we could pass node there instead
characters = tfmdata.characters -- but this branch is not entered that often anyway
local resources = tfmdata.resources
marks = resources.marks
classes = resources.classes
threshold,
factor = getthreshold(font)
checkmarks = tfmdata.properties.checkmarks
if type(dataset) == "number" then
dataset = otfdataset(tfmdata,font,0)[dataset]
end
local sequence = dataset[3] -- sequences[s] -- also dataset[5]
local typ = sequence.type
-- local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset
-- gpos_contextchain gpos_context
-- if not gpossing then
-- return head, false
-- end
local handler = handlers[typ] -- store in dataset
local steps = sequence.steps
local nofsteps = sequence.nofsteps
local done = false
local dirstack = { nil } -- could move outside function but we can have local runs (maybe a few more nils)
local start = head
local initialrl = (direction == righttoleft_code) and -1 or 0
local rlmode = initialrl
local rlparmode = initialrl
local topstack = 0
local merged = steps.merged
-- local matches = false
local position = 0
while start do
local char, id = ischar(start,font)
if char then
position = position + 1
local m = merged[char]
if m then
if skiphash and skiphash[char] then -- we never needed it here but let's try
start = getnext(start)
else
for i=m[1],m[2] do
local step = steps[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
-- if matches then
-- matches[position] = i
-- else
-- matches = { [position] = i }
-- end
break
elseif not start then
break
end
end
end
if start then
start = getnext(start)
end
end
else
start = getnext(start)
end
elseif char == false or id == glue_code then
-- a different font|state or glue (happens often)
start = getnext(start)
elseif id == math_code then
start = getnext(endofmath(start))
elseif id == dir_code then
topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
start = getnext(start)
-- elseif id == par_code and startofpar(start) then
-- rlparmode, rlmode = pardirstate(start)
-- start = nxt
else
start = getnext(start)
end
end
return head
end
-- end of experiment
end
-- so far
do
local plugins = { }
otf.plugins = plugins
local report = logs.reporter("fonts")
local warned = false
local okay = { text = true }
function otf.registerplugin(name,f)
if type(name) == "string" and type(f) == "function" then
plugins[name] = { name, f }
if okay[name] then
-- no warning (e.g. the diagnostic text plugin)
else
report("plugin %a has been loaded, please be aware of possible side effects",name)
if not warned then
if logs.pushtarget then
logs.pushtarget("log")
end
report("Plugins are not officially supported unless stated otherwise. This is because")
report("they bypass the regular font handling and therefore some features in ConTeXt")
report("(especially those related to fonts) might not work as expected or might not work")
report("at all. Some plugins are for testing and development only and might change")
report("whenever we feel the need for it.")
report()
if logs.poptarget then
logs.poptarget()
end
warned = true
end
end
end
end
function otf.plugininitializer(tfmdata,value)
if type(value) == "string" then
tfmdata.shared.plugin = plugins[value]
end
end
function otf.pluginprocessor(head,font,dynamic,direction) -- n
local s = fontdata[font].shared
local p = s and s.plugin
if p then
if trace_plugins then
report_process("applying plugin %a",p[1])
end
return p[2](head,font,dynamic,direction)
else
return head, false
end
end
end
function otf.featuresinitializer(tfmdata,value)
-- nothing done here any more
end
registerotffeature {
name = "features",
description = "features",
default = true,
initializers = {
position = 1,
node = otf.featuresinitializer,
plug = otf.plugininitializer,
},
processors = {
node = otf.featuresprocessor,
plug = otf.pluginprocessor,
}
}
-- Moved here (up) a bit. This doesn't really belong in generic so it will move to a
-- context module some day.
local function markinitializer(tfmdata,value)
local properties = tfmdata.properties
properties.checkmarks = value
end
registerotffeature {
name = "checkmarks",
description = "check mark widths",
default = true,
initializers = {
node = markinitializer,
},
}
-- This can be used for extra handlers, but should be used with care! We implement one
-- here but some more can be found in the osd (script devanagary) file. Now watch out:
-- when a handler has steps, it is called as the other ones, but when we have no steps,
-- we use a different call:
--
-- function(head,dataset,sequence,initialrl,font,attr)
-- return head, done
-- end
--
-- Also see (!!).
otf.handlers = handlers
if context then
return
else
-- todo: move the following code someplace else
end
local setspacekerns = nodes.injections.setspacekerns if not setspacekerns then os.exit() end
local tag = "kern"
-- if fontfeatures then
-- function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr)
-- local features = fontfeatures[font]
-- local enabled = features and features.spacekern and features[tag]
-- if enabled then
-- setspacekerns(font,sequence)
-- end
-- return head, enabled
-- end
-- else -- generic (no hashes)
function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr)
local shared = fontdata[font].shared
local features = shared and shared.features
local enabled = features and features.spacekern and features[tag]
if enabled then
setspacekerns(font,sequence)
end
return head, enabled
end
-- end
-- There are fonts out there that change the space but we don't do that kind of
-- things in TeX.
local function hasspacekerns(data)
local resources = data.resources
local sequences = resources.sequences
local validgpos = resources.features.gpos
if validgpos and sequences then
for i=1,#sequences do
local sequence = sequences[i]
local steps = sequence.steps
if steps and sequence.features[tag] then
local kind = sequence.type
if kind == "gpos_pair" or kind == "gpos_single" then
for i=1,#steps do
local step = steps[i]
local coverage = step.coverage
local rules = step.rules
if rules then
-- not now: analyze (simple) rules
elseif not coverage then
-- nothing to do
elseif kind == "gpos_single" then
-- maybe a message that we ignore
elseif kind == "gpos_pair" then
local format = step.format
if format == "move" or format == "kern" then
local kerns = coverage[32]
if kerns then
return true
end
for k, v in next, coverage do
if v[32] then
return true
end
end
elseif format == "pair" then
local kerns = coverage[32]
if kerns then
for k, v in next, kerns do
local one = v[1]
if one and one ~= true then
return true
end
end
end
for k, v in next, coverage do
local kern = v[32]
if kern then
local one = kern[1]
if one and one ~= true then
return true
end
end
end
end
end
end
end
end
end
end
return false
end
otf.readers.registerextender {
name = "spacekerns",
action = function(data)
data.properties.hasspacekerns = hasspacekerns(data)
end
}
local function spaceinitializer(tfmdata,value) -- attr
local resources = tfmdata.resources
local spacekerns = resources and resources.spacekerns
if value and spacekerns == nil then
local rawdata = tfmdata.shared and tfmdata.shared.rawdata
local properties = rawdata.properties
if properties and properties.hasspacekerns then
local sequences = resources.sequences
local validgpos = resources.features.gpos
if validgpos and sequences then
local left = { }
local right = { }
local last = 0
local feat = nil
for i=1,#sequences do
local sequence = sequences[i]
local steps = sequence.steps
if steps then
-- we don't support space kerns in other features
local kern = sequence.features[tag]
if kern then
local kind = sequence.type
if kind == "gpos_pair" or kind == "gpos_single" then
if feat then
for script, languages in next, kern do
local f = feat[script]
if f then
for l in next, languages do
f[l] = true
end
else
feat[script] = languages
end
end
else
feat = kern
end
for i=1,#steps do
local step = steps[i]
local coverage = step.coverage
local rules = step.rules
if rules then
-- not now: analyze (simple) rules
elseif not coverage then
-- nothing to do
elseif kind == "gpos_single" then
-- makes no sense in TeX
elseif kind == "gpos_pair" then
local format = step.format
if format == "move" or format == "kern" then
local kerns = coverage[32]
if kerns then
for k, v in next, kerns do
right[k] = v
end
end
for k, v in next, coverage do
local kern = v[32]
if kern then
left[k] = kern
end
end
elseif format == "pair" then
local kerns = coverage[32]
if kerns then
for k, v in next, kerns do
local one = v[1]
if one and one ~= true then
right[k] = one[3]
end
end
end
for k, v in next, coverage do
local kern = v[32]
if kern then
local one = kern[1]
if one and one ~= true then
left[k] = one[3]
end
end
end
end
end
end
last = i
end
else
-- no steps ... needed for old one ... we could use the basekerns
-- instead
end
end
end
left = next(left) and left or false
right = next(right) and right or false
if left or right then
spacekerns = {
left = left,
right = right,
}
if last > 0 then
local triggersequence = {
-- no steps, see (!!)
features = { [tag] = feat or { dflt = { dflt = true, } } },
flags = noflags,
name = "trigger_space_kerns",
order = { tag },
type = "trigger_space_kerns",
left = left,
right = right,
}
insert(sequences,last,triggersequence)
end
end
end
end
resources.spacekerns = spacekerns
end
return spacekerns
end
registerotffeature {
name = "spacekern",
description = "space kern injection",
default = true,
initializers = {
node = spaceinitializer,
},
}