summaryrefslogtreecommitdiff
path: root/tex/context/base/mkxl/font-ots.lmt
diff options
context:
space:
mode:
Diffstat (limited to 'tex/context/base/mkxl/font-ots.lmt')
-rw-r--r--tex/context/base/mkxl/font-ots.lmt4521
1 files changed, 4521 insertions, 0 deletions
diff --git a/tex/context/base/mkxl/font-ots.lmt b/tex/context/base/mkxl/font-ots.lmt
new file mode 100644
index 000000000..b34dfa847
--- /dev/null
+++ b/tex/context/base/mkxl/font-ots.lmt
@@ -0,0 +1,4521 @@
+if not modules then modules = { } end modules ['font-ots'] = { -- sequences
+ version = 1.001,
+ optimize = true,
+ comment = "companion to font-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files",
+}
+
+--[[ldx--
+<p>This module is a bit more split up that I'd like but since we also want to test
+with plain <l n='tex'/> it has to be so. This module is part of <l n='context'/>
+and discussion about improvements and functionality mostly happens on the
+<l n='context'/> mailing list.</p>
+
+<p>The specification of OpenType is (or at least decades ago was) kind of vague.
+Apart from a lack of a proper free specifications there's also the problem that
+Microsoft and Adobe may have their own interpretation of how and in what order to
+apply features. In general the Microsoft website has more detailed specifications
+and is a better reference. There is also some information in the FontForge help
+files. In the end we rely most on the Microsoft specification.</p>
+
+<p>Because there is so much possible, fonts might contain bugs and/or be made to
+work with certain rederers. These may evolve over time which may have the side
+effect that suddenly fonts behave differently. We don't want to catch all font
+issues.</p>
+
+<p>After a lot of experiments (mostly by Taco, me and Idris) the first implementation
+was already quite useful. When it did most of what we wanted, a more optimized version
+evolved. Of course all errors are mine and of course the code can be improved. There
+are quite some optimizations going on here and processing speed is currently quite
+acceptable and has been improved over time. Many complex scripts are not yet supported
+yet, but I will look into them as soon as <l n='context'/> users ask for it.</p>
+
+<p>The specification leaves room for interpretation. In case of doubt the Microsoft
+implementation is the reference as it is the most complete one. As they deal with
+lots of scripts and fonts, Kai and Ivo did a lot of testing of the generic code and
+their suggestions help improve the code. I'm aware that not all border cases can be
+taken care of, unless we accept excessive runtime, and even then the interference
+with other mechanisms (like hyphenation) are not trivial.</p>
+
+<p>Especially discretionary handling has been improved much by Kai Eigner who uses complex
+(latin) fonts. The current implementation is a compromis between his patches and my code
+and in the meantime performance is quite ok. We cannot check all border cases without
+compromising speed but so far we're okay. Given good test cases we can probably improve
+it here and there. Especially chain lookups are non trivial with discretionaries but
+things got much better over time thanks to Kai.</p>
+
+<p>Glyphs are indexed not by unicode but in their own way. This is because there is no
+relationship with unicode at all, apart from the fact that a font might cover certain
+ranges of characters. One character can have multiple shapes. However, at the
+<l n='tex'/> end we use unicode so and all extra glyphs are mapped into a private
+space. This is needed because we need to access them and <l n='tex'/> has to include
+then in the output eventually.</p>
+
+<p>The initial data table is rather close to the open type specification and also not
+that different from the one produced by <l n='fontforge'/> but we uses hashes instead.
+In <l n='context'/> that table is packed (similar tables are shared) and cached on disk
+so that successive runs can use the optimized table (after loading the table is
+unpacked).</p>
+
+<p>This module is sparsely documented because it is has been a moving target. The
+table format of the reader changed a bit over time and we experiment a lot with
+different methods for supporting features. By now the structures are quite stable</p>
+
+<p>Incrementing the version number will force a re-cache. We jump the number by one
+when there's a fix in the reader or processing code that can result in different
+results.</p>
+
+<p>This code is also used outside context but in context it has to work with other
+mechanisms. Both put some constraints on the code here.</p>
+
+--ldx]]--
+
+-- Remark: We assume that cursives don't cross discretionaries which is okay because it
+-- is only used in semitic scripts.
+--
+-- Remark: We assume that marks precede base characters.
+--
+-- Remark: When complex ligatures extend into discs nodes we can get side effects. Normally
+-- this doesn't happen; ff\d{l}{l}{l} in lm works but ff\d{f}{f}{f}.
+--
+-- Todo: check if we copy attributes to disc nodes if needed.
+--
+-- Todo: it would be nice if we could get rid of components. In other places we can use
+-- the unicode properties. We can just keep a lua table.
+--
+-- Remark: We do some disc juggling where we need to keep in mind that the pre, post and
+-- replace fields can have prev pointers to a nesting node ... I wonder if that is still
+-- needed.
+--
+-- Remark: This is not possible:
+--
+-- \discretionary {alpha-} {betagammadelta}
+-- {\discretionary {alphabeta-} {gammadelta}
+-- {\discretionary {alphabetagamma-} {delta}
+-- {alphabetagammadelta}}}
+--
+-- Remark: Something is messed up: we have two mark / ligature indices, one at the
+-- injection end and one here ... this is based on KE's patches but there is something
+-- fishy there as I'm pretty sure that for husayni we need some connection (as it's much
+-- more complex than an average font) but I need proper examples of all cases, not of
+-- only some.
+--
+-- Remark: I wonder if indexed would be faster than unicoded. It would be a major
+-- rewrite to have char being unicode + an index field in glyph nodes. Also more
+-- assignments have to be made in order to keep things in sync. So, it's a no-go.
+--
+-- Remark: We can provide a fast loop when there are no disc nodes (tests show a 1%
+-- gain). Smaller functions might perform better cache-wise. But ... memory becomes
+-- faster anyway, so ...
+--
+-- Remark: Some optimizations made sense for 5.2 but seem less important for 5.3 but
+-- anyway served their purpose.
+--
+-- Todo: just (0=l2r and 1=r2l) or maybe (r2l = true)
+
+-- Experiments with returning the data with the ischar are positive for lmtx but
+-- have a performance hit on mkiv because there we need to wrap ischardata (pending
+-- extensions to luatex which is unlikely to happen for such an experiment because
+-- we then can't remove it). Actually it might make generic slightly faster. Also,
+-- there are some corner cases where a data check comes before a char fetch and
+-- we're talking of millions of calls there. At some point I might make a version
+-- for lmtx that does it slightly different anyway.
+
+local type, next, tonumber = type, next, tonumber
+local random = math.random
+local formatters = string.formatters
+local insert = table.insert
+
+local registertracker = trackers.register
+
+local logs = logs
+local trackers = trackers
+local nodes = nodes
+local attributes = attributes
+local fonts = fonts
+
+local otf = fonts.handlers.otf
+local tracers = nodes.tracers
+
+local trace_singles = false registertracker("otf.singles", function(v) trace_singles = v end)
+local trace_multiples = false registertracker("otf.multiples", function(v) trace_multiples = v end)
+local trace_alternatives = false registertracker("otf.alternatives", function(v) trace_alternatives = v end)
+local trace_ligatures = false registertracker("otf.ligatures", function(v) trace_ligatures = v end)
+local trace_contexts = false registertracker("otf.contexts", function(v) trace_contexts = v end)
+local trace_marks = false registertracker("otf.marks", function(v) trace_marks = v end)
+local trace_kerns = false registertracker("otf.kerns", function(v) trace_kerns = v end)
+local trace_cursive = false registertracker("otf.cursive", function(v) trace_cursive = v end)
+local trace_preparing = false registertracker("otf.preparing", function(v) trace_preparing = v end)
+local trace_bugs = false registertracker("otf.bugs", function(v) trace_bugs = v end)
+local trace_details = false registertracker("otf.details", function(v) trace_details = v end)
+local trace_steps = false registertracker("otf.steps", function(v) trace_steps = v end)
+local trace_skips = false registertracker("otf.skips", function(v) trace_skips = v end)
+local trace_plugins = false registertracker("otf.plugins", function(v) trace_plugins = v end)
+local trace_chains = false registertracker("otf.chains", function(v) trace_chains = v end)
+
+local trace_kernruns = false registertracker("otf.kernruns", function(v) trace_kernruns = v end)
+----- trace_discruns = false registertracker("otf.discruns", function(v) trace_discruns = v end)
+local trace_compruns = false registertracker("otf.compruns", function(v) trace_compruns = v end)
+local trace_testruns = false registertracker("otf.testruns", function(v) trace_testruns = v end)
+
+local forcediscretionaries = false
+local forcepairadvance = false -- for testing
+
+directives.register("otf.forcediscretionaries",function(v)
+ forcediscretionaries = v
+end)
+
+directives.register("otf.forcepairadvance",function(v)
+ forcepairadvance = v
+end)
+
+local report_direct = logs.reporter("fonts","otf direct")
+local report_subchain = logs.reporter("fonts","otf subchain")
+local report_chain = logs.reporter("fonts","otf chain")
+local report_process = logs.reporter("fonts","otf process")
+local report_warning = logs.reporter("fonts","otf warning")
+local report_run = logs.reporter("fonts","otf run")
+
+registertracker("otf.substitutions", "otf.singles","otf.multiples","otf.alternatives","otf.ligatures")
+registertracker("otf.positions", "otf.marks","otf.kerns","otf.cursive")
+registertracker("otf.actions", "otf.substitutions","otf.positions")
+registertracker("otf.sample", "otf.steps","otf.substitutions","otf.positions","otf.analyzing")
+registertracker("otf.sample.silent", "otf.steps=silent","otf.substitutions","otf.positions","otf.analyzing")
+
+local nuts = nodes.nuts
+
+local getnext = nuts.getnext
+local setnext = nuts.setnext
+local getprev = nuts.getprev
+local setprev = nuts.setprev
+local getboth = nuts.getboth
+local setboth = nuts.setboth
+local getid = nuts.getid
+local getstate = nuts.getstate
+local getsubtype = nuts.getsubtype
+local setsubtype = nuts.setsubtype
+local getchar = nuts.getchar
+local setchar = nuts.setchar
+local getdisc = nuts.getdisc
+local setdisc = nuts.setdisc
+local getreplace = nuts.getreplace
+local setlink = nuts.setlink
+local getwidth = nuts.getwidth
+
+local getglyphdata = nuts.getglyphdata
+
+---------------------------------------------------------------------------------------
+
+-- Beware: In ConTeXt components no longer are real components. We only keep track of
+-- their positions because some complex ligatures might need that. For the moment we
+-- use an x_ prefix because for now generic follows the other approach.
+
+local copy_no_components = nuts.copy_no_components
+local copy_only_glyphs = nuts.copy_only_glyphs
+local count_components = nuts.count_components
+local set_components = nuts.set_components
+local get_components = nuts.get_components
+local flush_components = nuts.flush_components
+
+---------------------------------------------------------------------------------------
+
+local ischar = nuts.ischar
+local isnextchar = nuts.isnextchar -- only makes sense when we know we need next
+local isprevchar = nuts.isprevchar -- only makes sense when we know we need prev
+local usesfont = nuts.uses_font
+
+local insert_node_after = nuts.insert_after
+local copy_node = nuts.copy
+local copy_node_list = nuts.copy_list
+local remove_node = nuts.remove
+local find_node_tail = nuts.tail
+local flush_node_list = nuts.flush_list
+local flush_node = nuts.flush_node
+local end_of_math = nuts.end_of_math
+local start_of_par = nuts.start_of_par
+
+local setmetatable = setmetatable
+local setmetatableindex = table.setmetatableindex
+
+local nextnode = nuts.traversers.node
+
+----- zwnj = 0x200C
+----- zwj = 0x200D
+
+local nodecodes = nodes.nodecodes
+local glyphcodes = nodes.glyphcodes
+local disccodes = nodes.disccodes
+
+local glyph_code = nodecodes.glyph
+local glue_code = nodecodes.glue
+local disc_code = nodecodes.disc
+local math_code = nodecodes.math
+local dir_code = nodecodes.dir
+local par_code = nodecodes.par
+
+local lefttoright_code = nodes.dirvalues.lefttoright
+local righttoleft_code = nodes.dirvalues.righttoleft
+
+local discretionarydisc_code = disccodes.discretionary
+local ligatureglyph_code = glyphcodes.ligature
+
+local injections = nodes.injections
+local setmark = injections.setmark
+local setcursive = injections.setcursive
+local setkern = injections.setkern
+local setmove = injections.setmove
+local setposition = injections.setposition
+local resetinjection = injections.reset
+local copyinjection = injections.copy
+local setligaindex = injections.setligaindex
+local getligaindex = injections.getligaindex
+
+local fontdata = fonts.hashes.identifiers
+local fontfeatures = fonts.hashes.features
+
+local otffeatures = fonts.constructors.features.otf
+local registerotffeature = otffeatures.register
+
+local onetimemessage = fonts.loggers.onetimemessage or function() end
+
+local getrandom = utilities and utilities.randomizer and utilities.randomizer.get
+
+otf.defaultnodealternate = "none" -- first last
+
+-- We use a few semi-global variables. The handler can be called nested but this assumes
+-- that the same font is used.
+
+local tfmdata = false
+local characters = false
+local descriptions = false
+local marks = false
+local classes = false
+local currentfont = false
+local factor = 0
+local threshold = 0
+local checkmarks = false
+
+local discs = false
+local spaces = false
+
+local sweepnode = nil
+local sweephead = { } -- we don't nil entries but false them (no collection and such)
+
+local notmatchpre = { } -- to be checked: can we use false instead of nil / what if a == b tests
+local notmatchpost = { } -- to be checked: can we use false instead of nil / what if a == b tests
+local notmatchreplace = { } -- to be checked: can we use false instead of nil / what if a == b tests
+
+local handlers = { }
+
+local isspace = injections.isspace
+local getthreshold = injections.getthreshold
+
+local checkstep = (tracers and tracers.steppers.check) or function() end
+local registerstep = (tracers and tracers.steppers.register) or function() end
+local registermessage = (tracers and tracers.steppers.message) or function() end
+
+-- local function checkdisccontent(d)
+-- local pre, post, replace = getdisc(d)
+-- if pre then for n in traverse_id(glue_code,pre) do report("pre: %s",nodes.idstostring(pre)) break end end
+-- if post then for n in traverse_id(glue_code,post) do report("pos: %s",nodes.idstostring(post)) break end end
+-- if replace then for n in traverse_id(glue_code,replace) do report("rep: %s",nodes.idstostring(replace)) break end end
+-- end
+
+local function logprocess(...)
+ if trace_steps then
+ registermessage(...)
+ if trace_steps == "silent" then
+ return
+ end
+ end
+ report_direct(...)
+end
+
+local function logwarning(...)
+ report_direct(...)
+end
+
+local gref do
+
+ local f_unicode = formatters["U+%X"] -- was ["%U"]
+ local f_uniname = formatters["U+%X (%s)"] -- was ["%U (%s)"]
+ local f_unilist = formatters["% t"]
+
+ gref = function(n) -- currently the same as in font-otb
+ if type(n) == "number" then
+ local description = descriptions[n]
+ local name = description and description.name
+ if name then
+ return f_uniname(n,name)
+ else
+ return f_unicode(n)
+ end
+ elseif n then
+ local t = { }
+ for i=1,#n do
+ local ni = n[i]
+ if tonumber(ni) then -- later we will start at 2
+ local di = descriptions[ni]
+ local nn = di and di.name
+ if nn then
+ t[#t+1] = f_uniname(ni,nn)
+ else
+ t[#t+1] = f_unicode(ni)
+ end
+ end
+ end
+ return f_unilist(t)
+ else
+ return "<error in node mode tracing>"
+ end
+ end
+
+end
+
+local function cref(dataset,sequence,index)
+ if not dataset then
+ return "no valid dataset"
+ end
+ local merged = sequence.merged and "merged " or ""
+ if index then
+ return formatters["feature %a, type %a, %schain lookup %a, index %a"](
+ dataset[4],sequence.type,merged,sequence.name,index)
+ else
+ return formatters["feature %a, type %a, %schain lookup %a"](
+ dataset[4],sequence.type,merged,sequence.name)
+ end
+end
+
+local function pref(dataset,sequence)
+ return formatters["feature %a, type %a, %slookup %a"](
+ dataset[4],sequence.type,sequence.merged and "merged " or "",sequence.name)
+end
+
+local function mref(rlmode)
+ if not rlmode or rlmode >= 0 then
+ return "l2r"
+ else
+ return "r2l"
+ end
+end
+
+-- The next code is somewhat complicated by the fact that some fonts can have ligatures made
+-- from ligatures that themselves have marks. This was identified by Kai in for instance
+-- arabtype: KAF LAM SHADDA ALEF FATHA (0x0643 0x0644 0x0651 0x0627 0x064E). This becomes
+-- KAF LAM-ALEF with a SHADDA on the first and a FATHA op de second component. In a next
+-- iteration this becomes a KAF-LAM-ALEF with a SHADDA on the second and a FATHA on the
+-- third component.
+
+-- We can assume that languages that use marks are not hyphenated. We can also assume
+-- that at most one discretionary is present.
+
+-- We do need components in funny kerning mode but maybe I can better reconstruct then
+-- as we do have the font components info available; removing components makes the
+-- previous code much simpler. Also, later on copying and freeing becomes easier.
+-- However, for arabic we need to keep them around for the sake of mark placement
+-- and indices.
+
+local function flattendisk(head,disc)
+ local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
+ local prev, next = getboth(disc)
+ local ishead = head == disc
+ setdisc(disc)
+ flush_node(disc)
+ if pre then
+ flush_node_list(pre)
+ end
+ if post then
+ flush_node_list(post)
+ end
+ if ishead then
+ if replace then
+ if next then
+ setlink(replacetail,next)
+ end
+ return replace, replace
+ elseif next then
+ return next, next
+ else
+ -- return -- maybe warning
+ end
+ else
+ if replace then
+ if next then
+ setlink(replacetail,next)
+ end
+ setlink(prev,replace)
+ return head, replace
+ else
+ setlink(prev,next) -- checks for next anyway
+ return head, next
+ end
+ end
+end
+
+local function appenddisc(disc,list)
+ local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
+ local posthead = list
+ local replacehead = copy_node_list(list)
+ if post then
+ setlink(posttail,posthead)
+ else
+ post = posthead
+ end
+ if replace then
+ setlink(replacetail,replacehead)
+ else
+ replace = replacehead
+ end
+ setdisc(disc,pre,post,replace)
+end
+
+local function markstoligature(head,start,stop,char)
+ if start == stop and getchar(start) == char then
+ return head, start
+ else
+ local prev = getprev(start)
+ local next = getnext(stop)
+ setprev(start)
+ setnext(stop)
+ local base = copy_no_components(start,copyinjection)
+ if head == start then
+ head = base
+ end
+ resetinjection(base)
+ setchar(base,char)
+ setsubtype(base,ligatureglyph_code)
+ set_components(base,start)
+ setlink(prev,base,next)
+ flush_components(start)
+ return head, base
+ end
+end
+
+-- Remark for Kai: (some arabic fonts do mark + mark = other mark and such)
+--
+-- The hasmarks is needed for ligatures of marks that are part of a ligature in
+-- which case we assume that we can delete the marks anyway (we can always become
+-- more clever if needed) .. in fact the whole logic here should be redone. We're
+-- in the not discfound branch then. We now have skiphash too so we can be more
+-- selective if needed (todo).
+
+-- we can have more granularity here but for now we only do a simple check
+
+local no_left_ligature_code = 1
+local no_right_ligature_code = 2
+local no_left_kern_code = 4
+local no_right_kern_code = 8
+
+local has_glyph_option = nuts.has_glyph_option
+
+-- in lmtx we need to check the components and can be slightly more clever
+
+local function toligature(head,start,stop,char,dataset,sequence,skiphash,discfound,hasmarks) -- brr head
+ if has_glyph_option(start,no_right_ligature_code) then
+ return head, start
+ end
+ if start == stop and getchar(start) == char then
+ resetinjection(start)
+ setchar(start,char)
+ return head, start
+ end
+ local prev = getprev(start)
+ local next = getnext(stop)
+ local comp = start
+ setprev(start)
+ setnext(stop)
+ local base = copy_no_components(start,copyinjection)
+ if start == head then
+ head = base
+ end
+ resetinjection(base)
+ setchar(base,char)
+ setsubtype(base,ligatureglyph_code)
+ set_components(base,comp)
+ setlink(prev,base,next)
+ if not discfound then
+ local deletemarks = not skiphash or hasmarks
+ local components = start -- not used
+ local baseindex = 0
+ local componentindex = 0
+ local head = base
+ local current = base
+ -- first we loop over the glyphs in start ... stop
+ while start do
+ local char = getchar(start)
+ if not marks[char] then
+ baseindex = baseindex + componentindex
+ componentindex = count_components(start,marks)
+ -- we can be more clever here: "not deletemarks or (skiphash and not skiphash[char])"
+ -- and such:
+ elseif not deletemarks then
+ -- we can get a loop when the font expects otherwise (i.e. unexpected deletemarks)
+ setligaindex(start,baseindex + getligaindex(start,componentindex))
+ if trace_marks then
+ logwarning("%s: keep ligature mark %s, gets index %s",pref(dataset,sequence),gref(char),getligaindex(start))
+ end
+ local n = copy_node(start)
+ copyinjection(n,start) -- is this ok ? we position later anyway
+ head, current = insert_node_after(head,current,n) -- unlikely that mark has components
+ elseif trace_marks then
+ logwarning("%s: delete ligature mark %s",pref(dataset,sequence),gref(char))
+ end
+ start = getnext(start)
+ end
+ -- we can have one accent as part of a lookup and another following
+ local start = getnext(current)
+ while start do
+ local char = ischar(start)
+ if char then
+ -- also something skiphash here?
+ if marks[char] then
+ setligaindex(start,baseindex + getligaindex(start,componentindex))
+ if trace_marks then
+ logwarning("%s: set ligature mark %s, gets index %s",pref(dataset,sequence),gref(char),getligaindex(start))
+ end
+ start = getnext(start)
+ else
+ break
+ end
+ else
+ break
+ end
+ end
+ flush_components(components)
+ else
+ -- discfound ... forget about marks .. probably no scripts that hyphenate and have marks
+ local discprev, discnext = getboth(discfound)
+ if discprev and discnext then
+ -- we assume normalization in context, and don't care about generic ... especially
+ -- \- can give problems as there we can have a negative char but that won't match
+ -- anyway
+ local pre, post, replace, pretail, posttail, replacetail = getdisc(discfound,true)
+ if not replace then
+ -- looks like we never come here as it's not okay
+ local prev = getprev(base)
+ -- local comp = get_components(base) -- already set
+ local copied = copy_only_glyphs(comp)
+ if pre then
+ setlink(discprev,pre)
+ else
+ setnext(discprev) -- also blocks funny assignments
+ end
+ pre = comp -- is start
+ if post then
+ setlink(posttail,discnext)
+ setprev(post) -- nil anyway
+ else
+ post = discnext
+ setprev(discnext) -- also blocks funny assignments
+ end
+ setlink(prev,discfound,next)
+ setboth(base)
+ -- here components have a pointer so we can't free it!
+ set_components(base,copied)
+ replace = base
+ if forcediscretionaries then
+ setdisc(discfound,pre,post,replace,discretionarydisc_code)
+ else
+ setdisc(discfound,pre,post,replace)
+ end
+ base = prev
+ end
+ end
+ end
+ return head, base
+end
+
+local function multiple_glyphs(head,start,multiple,skiphash,what,stop) -- what to do with skiphash matches here
+ local nofmultiples = #multiple
+ if nofmultiples > 0 then
+ resetinjection(start)
+ setchar(start,multiple[1])
+ if nofmultiples > 1 then
+ local sn = getnext(start)
+ for k=2,nofmultiples do
+ -- untested:
+ --
+ -- while ignoremarks and marks[getchar(sn)] then
+ -- local sn = getnext(sn)
+ -- end
+ local n = copy_node(start) -- ignore components
+ resetinjection(n)
+ setchar(n,multiple[k])
+ insert_node_after(head,start,n)
+ start = n
+ end
+ if what == true then
+ -- we're ok
+ elseif what > 1 then
+ local m = multiple[nofmultiples]
+ for i=2,what do
+ local n = copy_node(start) -- ignore components
+ resetinjection(n)
+ setchar(n,m)
+ insert_node_after(head,start,n)
+ start = n
+ end
+ end
+ end
+ return head, start, true
+ else
+ if trace_multiples then
+ logprocess("no multiple for %s",gref(getchar(start)))
+ end
+ return head, start, false
+ end
+end
+
+local function get_alternative_glyph(start,alternatives,value)
+ local n = #alternatives
+ if n == 1 then
+ -- we could actually change that into a gsub and save some memory in the
+ -- font loader but it makes tracing more messy
+ return alternatives[1], trace_alternatives and "1 (only one present)"
+ elseif value == "random" then
+ local r = getrandom and getrandom("glyph",1,n) or random(1,n)
+ return alternatives[r], trace_alternatives and formatters["value %a, taking %a"](value,r)
+ elseif value == "first" then
+ return alternatives[1], trace_alternatives and formatters["value %a, taking %a"](value,1)
+ elseif value == "last" then
+ return alternatives[n], trace_alternatives and formatters["value %a, taking %a"](value,n)
+ end
+ value = value == true and 1 or tonumber(value)
+ if type(value) ~= "number" then
+ return alternatives[1], trace_alternatives and formatters["invalid value %s, taking %a"](value,1)
+ end
+ -- local a = alternatives[value]
+ -- if a then
+ -- -- some kind of hash
+ -- return a, trace_alternatives and formatters["value %a, taking %a"](value,a)
+ -- end
+ if value > n then
+ local defaultalt = otf.defaultnodealternate
+ if defaultalt == "first" then
+ return alternatives[n], trace_alternatives and formatters["invalid value %s, taking %a"](value,1)
+ elseif defaultalt == "last" then
+ return alternatives[1], trace_alternatives and formatters["invalid value %s, taking %a"](value,n)
+ else
+ return false, trace_alternatives and formatters["invalid value %a, %s"](value,"out of range")
+ end
+ elseif value == 0 then
+ return getchar(start), trace_alternatives and formatters["invalid value %a, %s"](value,"no change")
+ elseif value < 1 then
+ return alternatives[1], trace_alternatives and formatters["invalid value %a, taking %a"](value,1)
+ else
+ return alternatives[value], trace_alternatives and formatters["value %a, taking %a"](value,value)
+ end
+end
+
+-- handlers
+
+function handlers.gsub_single(head,start,dataset,sequence,replacement)
+ if trace_singles then
+ logprocess("%s: replacing %s by single %s",pref(dataset,sequence),gref(getchar(start)),gref(replacement))
+ end
+ resetinjection(start)
+ setchar(start,replacement)
+ return head, start, true
+end
+
+function handlers.gsub_alternate(head,start,dataset,sequence,alternative)
+ local kind = dataset[4]
+ local what = dataset[1]
+ local value = what == true and tfmdata.shared.features[kind] or what
+ local choice, comment = get_alternative_glyph(start,alternative,value)
+ if choice then
+ if trace_alternatives then
+ logprocess("%s: replacing %s by alternative %a to %s, %s",pref(dataset,sequence),gref(getchar(start)),gref(choice),comment)
+ end
+ resetinjection(start)
+ setchar(start,choice)
+ else
+ if trace_alternatives then
+ logwarning("%s: no variant %a for %s, %s",pref(dataset,sequence),value,gref(getchar(start)),comment)
+ end
+ end
+ return head, start, true
+end
+
+function handlers.gsub_multiple(head,start,dataset,sequence,multiple,rlmode,skiphash)
+ if trace_multiples then
+ logprocess("%s: replacing %s by multiple %s",pref(dataset,sequence),gref(getchar(start)),gref(multiple))
+ end
+ return multiple_glyphs(head,start,multiple,skiphash,dataset[1])
+end
+
+-- Don't we deal with disc otherwise now? I need to check if the next one can be
+-- simplified. Anyway, it can be way messier: marks that get removed as well as
+-- marks that are kept.
+
+function handlers.gsub_ligature(head,start,dataset,sequence,ligature,rlmode,skiphash)
+ local current = getnext(start)
+ if not current then
+ return head, start, false, nil
+ end
+ local stop = nil
+ local startchar = getchar(start)
+ if skiphash and skiphash[startchar] then
+ while current do
+ -- local char = ischar(current,currentfont)
+ local nxt, char = isnextchar(current,currentfont)
+ if char then
+ local lg = ligature[char]
+ if lg then
+ stop = current
+ ligature = lg
+ -- current = getnext(current)
+ current = nxt
+ else
+ break
+ end
+ else
+ break
+ end
+ end
+ if stop then
+ local lig = ligature.ligature
+ if lig then
+ if trace_ligatures then
+ local stopchar = getchar(stop)
+ head, start = markstoligature(head,start,stop,lig)
+ logprocess("%s: replacing %s upto %s by ligature %s case 1",pref(dataset,sequence),gref(startchar),gref(stopchar),gref(getchar(start)))
+ else
+ head, start = markstoligature(head,start,stop,lig)
+ end
+ return head, start, true, false
+ else
+ -- ok, goto next lookup
+ end
+ end
+ else
+ local discfound = false
+ local hasmarks = marks[startchar]
+ while current do
+ local char, id = ischar(current,currentfont)
+ if char then
+ if skiphash and skiphash[char] then
+ current = getnext(current)
+ else
+ local lg = ligature[char]
+ if lg then
+ if marks[char] then
+ hasmarks = true
+ end
+ stop = current -- needed for fake so outside then
+ ligature = lg
+ current = getnext(current)
+ else
+ break
+ end
+ end
+ elseif char == false then
+ -- kind of weird
+ break
+ elseif id == disc_code then
+ discfound = current
+ break
+ else
+ break
+ end
+ end
+ -- of{f-}{}{f}e o{f-}{}{f}fe o{-}{}{ff}e (oe and ff ligature)
+ -- we can end up here when we have a start run .. testruns start at a disc but
+ -- so here we have the other case: char + disc
+ if discfound then
+ -- don't assume marks in a disc and we don't run over a disc (for now)
+ local pre, post, replace = getdisc(discfound)
+ local match
+ if replace then
+ local char = ischar(replace,currentfont)
+ if char and ligature[char] then
+ match = true
+ end
+ end
+ if not match and pre then
+ local char = ischar(pre,currentfont)
+ if char and ligature[char] then
+ match = true
+ end
+ end
+ if not match and not pre or not replace then
+ local n = getnext(discfound)
+ local char = ischar(n,currentfont)
+ if char and ligature[char] then
+ match = true
+ end
+ end
+ if match then
+ -- we force a restart
+ local ishead = head == start
+ local prev = getprev(start)
+ if stop then
+ setnext(stop)
+ local copy = copy_node_list(start)
+ local tail = stop -- was: getprev(stop) -- Kai: needs checking on your samples
+ local liat = find_node_tail(copy)
+ if pre then
+ setlink(liat,pre)
+ end
+ if replace then
+ setlink(tail,replace)
+ end
+ pre = copy
+ replace = start
+ else
+ setnext(start)
+ local copy = copy_node(start)
+ if pre then
+ setlink(copy,pre)
+ end
+ if replace then
+ setlink(start,replace)
+ end
+ pre = copy
+ replace = start
+ end
+ setdisc(discfound,pre,post,replace)
+ if prev then
+ setlink(prev,discfound)
+ else
+ setprev(discfound)
+ head = discfound
+ end
+ start = discfound
+ return head, start, true, true
+ end
+ end
+ local lig = ligature.ligature
+ if lig then
+ if stop then
+ if trace_ligatures then
+ local stopchar = getchar(stop)
+ -- head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,discfound,hasmarks)
+ head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,false,hasmarks)
+ logprocess("%s: replacing %s upto %s by ligature %s case 2",pref(dataset,sequence),gref(startchar),gref(stopchar),gref(lig))
+ else
+ -- head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,discfound,hasmarks)
+ head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,false,hasmarks)
+ end
+ else
+ -- weird but happens (in some arabic font)
+ resetinjection(start)
+ setchar(start,lig)
+ if trace_ligatures then
+ logprocess("%s: replacing %s by (no real) ligature %s case 3",pref(dataset,sequence),gref(startchar),gref(lig))
+ end
+ end
+ return head, start, true, false
+ else
+ -- weird but happens, pseudo ligatures ... just the components
+ end
+ end
+ return head, start, false, false
+end
+
+function handlers.gpos_single(head,start,dataset,sequence,kerns,rlmode,skiphash,step,injection)
+ if has_glyph_option(start,no_right_kern_code) then
+ return head, start, false
+ else
+ local startchar = getchar(start)
+ local format = step.format
+ if format == "single" or type(kerns) == "table" then -- the table check can go
+ local dx, dy, w, h = setposition(0,start,factor,rlmode,kerns,injection)
+ if trace_kerns then
+ logprocess("%s: shifting single %s by %s xy (%p,%p) and wh (%p,%p)",pref(dataset,sequence),gref(startchar),format,dx,dy,w,h)
+ end
+ else
+ local k = (format == "move" and setmove or setkern)(start,factor,rlmode,kerns,injection)
+ if trace_kerns then
+ logprocess("%s: shifting single %s by %s %p",pref(dataset,sequence),gref(startchar),format,k)
+ end
+ end
+ return head, start, true
+ end
+end
+
+function handlers.gpos_pair(head,start,dataset,sequence,kerns,rlmode,skiphash,step,injection)
+ if has_glyph_option(start,no_right_kern_code) then
+ return head, start, false
+ else
+ local snext = getnext(start)
+ if not snext then
+ return head, start, false
+ else
+ local prev = start
+ while snext do
+ local nextchar = ischar(snext,currentfont)
+ if nextchar then
+ if skiphash and skiphash[nextchar] then -- includes marks too when flag
+ prev = snext
+ snext = getnext(snext)
+ else
+ local krn = kerns[nextchar]
+ if not krn then
+ break
+ end
+ local format = step.format
+ if format == "pair" then
+ local a = krn[1]
+ local b = krn[2]
+ if a == true then
+ -- zero
+ elseif a then -- #a > 0
+ local x, y, w, h = setposition(1,start,factor,rlmode,a,injection)
+ if trace_kerns then
+ local startchar = getchar(start)
+ logprocess("%s: shifting first of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s",pref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h,injection or "injections")
+ end
+ end
+ if b == true then
+ -- zero
+ start = snext -- cf spec
+ elseif b then -- #b > 0
+ local x, y, w, h = setposition(2,snext,factor,rlmode,b,injection)
+ if trace_kerns then
+ local startchar = getchar(start)
+ logprocess("%s: shifting second of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s",pref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h,injection or "injections")
+ end
+ start = snext -- cf spec
+ elseif forcepairadvance then
+ start = snext -- for testing, not cf spec
+ end
+ return head, start, true
+ elseif krn ~= 0 then
+ local k = (format == "move" and setmove or setkern)(snext,factor,rlmode,krn,injection)
+ if trace_kerns then
+ logprocess("%s: inserting %s %p between %s and %s as %s",pref(dataset,sequence),format,k,gref(getchar(prev)),gref(nextchar),injection or "injections")
+ end
+ return head, start, true
+ else -- can't happen
+ break
+ end
+ end
+ else
+ break
+ end
+ end
+ return head, start, false
+ end
+ end
+end
+
+--[[ldx--
+<p>We get hits on a mark, but we're not sure if the it has to be applied so
+we need to explicitly test for basechar, baselig and basemark entries.</p>
+--ldx]]--
+
+function handlers.gpos_mark2base(head,start,dataset,sequence,markanchors,rlmode,skiphash)
+ local markchar = getchar(start)
+ if marks[markchar] then
+ local base = getprev(start) -- [glyph] [start=mark]
+ if base then
+ local basechar = ischar(base,currentfont)
+ if basechar then
+ if marks[basechar] then
+ while base do
+ base = getprev(base)
+ if base then
+ basechar = ischar(base,currentfont)
+ if basechar then
+ if not marks[basechar] then
+ break
+ end
+ else
+ if trace_bugs then
+ logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
+ end
+ return head, start, false
+ end
+ else
+ if trace_bugs then
+ logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
+ end
+ return head, start, false
+ end
+ end
+ end
+ local ba = markanchors[1][basechar]
+ if ba then
+ local ma = markanchors[2]
+ local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
+ if trace_marks then
+ logprocess("%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)",
+ pref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
+ end
+ return head, start, true
+ elseif trace_bugs then
+ -- onetimemessage(currentfont,basechar,"no base anchors",report_fonts)
+ logwarning("%s: mark %s is not anchored to %s",pref(dataset,sequence),gref(markchar),gref(basechar))
+ end
+ elseif trace_bugs then
+ logwarning("%s: nothing preceding, case %i",pref(dataset,sequence),1)
+ end
+ elseif trace_bugs then
+ logwarning("%s: nothing preceding, case %i",pref(dataset,sequence),2)
+ end
+ elseif trace_bugs then
+ logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
+ end
+ return head, start, false
+end
+
+function handlers.gpos_mark2ligature(head,start,dataset,sequence,markanchors,rlmode,skiphash)
+ local markchar = getchar(start)
+ if marks[markchar] then
+ local base = getprev(start) -- [glyph] [optional marks] [start=mark]
+ if base then
+ local basechar = ischar(base,currentfont)
+ if basechar then
+ if marks[basechar] then
+ while base do
+ base = getprev(base)
+ if base then
+ basechar = ischar(base,currentfont)
+ if basechar then
+ if not marks[basechar] then
+ break
+ end
+ else
+ if trace_bugs then
+ logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
+ end
+ return head, start, false
+ end
+ else
+ if trace_bugs then
+ logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
+ end
+ return head, start, false
+ end
+ end
+ end
+ local ba = markanchors[1][basechar]
+ if ba then
+ local ma = markanchors[2]
+ if ma then
+ local index = getligaindex(start)
+ ba = ba[index]
+ if ba then
+ local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
+ if trace_marks then
+ logprocess("%s, index %s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)",
+ pref(dataset,sequence),index,bound,gref(markchar),gref(basechar),index,dx,dy)
+ end
+ return head, start, true
+ else
+ if trace_bugs then
+ logwarning("%s: no matching anchors for mark %s and baselig %s with index %a",pref(dataset,sequence),gref(markchar),gref(basechar),index)
+ end
+ end
+ end
+ elseif trace_bugs then
+ -- logwarning("%s: char %s is missing in font",pref(dataset,sequence),gref(basechar))
+ onetimemessage(currentfont,basechar,"no base anchors",report_fonts)
+ end
+ elseif trace_bugs then
+ logwarning("%s: prev node is no char, case %i",pref(dataset,sequence),1)
+ end
+ elseif trace_bugs then
+ logwarning("%s: prev node is no char, case %i",pref(dataset,sequence),2)
+ end
+ elseif trace_bugs then
+ logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
+ end
+ return head, start, false
+end
+
+function handlers.gpos_mark2mark(head,start,dataset,sequence,markanchors,rlmode,skiphash)
+ local markchar = getchar(start)
+ if marks[markchar] then
+ local base = getprev(start) -- [glyph] [basemark] [start=mark]
+ local slc = getligaindex(start)
+ if slc then -- a rather messy loop ... needs checking with husayni
+ while base do
+ local blc = getligaindex(base)
+ if blc and blc ~= slc then
+ base = getprev(base)
+ else
+ break
+ end
+ end
+ end
+ if base then
+ local basechar = ischar(base,currentfont)
+ if basechar then -- subtype test can go
+ local ba = markanchors[1][basechar] -- slot 1 has been made copy of the class hash
+ if ba then
+ local ma = markanchors[2]
+ local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],true,checkmarks)
+ if trace_marks then
+ logprocess("%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)",
+ pref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
+ end
+ return head, start, true
+ end
+ end
+ end
+ elseif trace_bugs then
+ logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
+ end
+ return head, start, false
+end
+
+function handlers.gpos_cursive(head,start,dataset,sequence,exitanchors,rlmode,skiphash,step) -- to be checked
+ local startchar = getchar(start)
+ if marks[startchar] then
+ if trace_cursive then
+ logprocess("%s: ignoring cursive for mark %s",pref(dataset,sequence),gref(startchar))
+ end
+ else
+ local nxt = getnext(start)
+ while nxt do
+ local nextchar = ischar(nxt,currentfont)
+ if not nextchar then
+ break
+ elseif marks[nextchar] then -- always sequence.flags[1]
+ nxt = getnext(nxt)
+ else
+ local exit = exitanchors[3]
+ if exit then
+ local entry = exitanchors[1][nextchar]
+ if entry then
+ entry = entry[2]
+ if entry then
+ local r2lflag = sequence.flags[4] -- mentioned in the standard
+ local dx, dy, bound = setcursive(start,nxt,factor,rlmode,exit,entry,characters[startchar],characters[nextchar],r2lflag)
+ if trace_cursive then
+ logprocess("%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode",pref(dataset,sequence),gref(startchar),gref(nextchar),dx,dy,bound,mref(rlmode))
+ end
+ return head, start, true
+ end
+ end
+ end
+ break
+ end
+ end
+ end
+ return head, start, false
+end
+
+--[[ldx--
+<p>I will implement multiple chain replacements once I run into a font that uses
+it. It's not that complex to handle.</p>
+--ldx]]--
+
+local chainprocs = { }
+
+local function logprocess(...)
+ if trace_steps then
+ registermessage(...)
+ if trace_steps == "silent" then
+ return
+ end
+ end
+ report_subchain(...)
+end
+
+local logwarning = report_subchain
+
+local function logprocess(...)
+ if trace_steps then
+ registermessage(...)
+ if trace_steps == "silent" then
+ return
+ end
+ end
+ report_chain(...)
+end
+
+local logwarning = report_chain
+
+-- We could share functions but that would lead to extra function calls with many
+-- arguments, redundant tests and confusing messages.
+
+-- The reversesub is a special case, which is why we need to store the replacements
+-- in a bit weird way. There is no lookup and the replacement comes from the lookup
+-- itself. It is meant mostly for dealing with Urdu.
+
+local function reversesub(head,start,stop,dataset,sequence,replacements,rlmode,skiphash)
+ local char = getchar(start)
+ local replacement = replacements[char]
+ if replacement then
+ if trace_singles then
+ logprocess("%s: single reverse replacement of %s by %s",cref(dataset,sequence),gref(char),gref(replacement))
+ end
+ resetinjection(start)
+ setchar(start,replacement)
+ return head, start, true
+ else
+ return head, start, false
+ end
+end
+
+
+chainprocs.reversesub = reversesub
+
+--[[ldx--
+<p>This chain stuff is somewhat tricky since we can have a sequence of actions to be
+applied: single, alternate, multiple or ligature where ligature can be an invalid
+one in the sense that it will replace multiple by one but not neccessary one that
+looks like the combination (i.e. it is the counterpart of multiple then). For
+example, the following is valid:</p>
+
+<typing>
+<line>xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx</line>
+</typing>
+
+<p>Therefore we we don't really do the replacement here already unless we have the
+single lookup case. The efficiency of the replacements can be improved by deleting
+as less as needed but that would also make the code even more messy.</p>
+--ldx]]--
+
+--[[ldx--
+<p>Here we replace start by a single variant.</p>
+--ldx]]--
+
+-- To be done (example needed): what if > 1 steps
+
+-- this is messy: do we need this disc checking also in alternaties?
+
+local function reportzerosteps(dataset,sequence)
+ logwarning("%s: no steps",cref(dataset,sequence))
+end
+
+local function reportmoresteps(dataset,sequence)
+ logwarning("%s: more than 1 step",cref(dataset,sequence))
+end
+
+-- local function reportbadsteps(dataset,sequence)
+-- logwarning("%s: bad step, no proper return values",cref(dataset,sequence))
+-- end
+
+local function getmapping(dataset,sequence,currentlookup)
+ local steps = currentlookup.steps
+ local nofsteps = currentlookup.nofsteps
+ if nofsteps == 0 then
+ reportzerosteps(dataset,sequence)
+ currentlookup.mapping = false
+ return false
+ else
+ if nofsteps > 1 then
+ reportmoresteps(dataset,sequence)
+ end
+ local mapping = steps[1].coverage
+ currentlookup.mapping = mapping
+ currentlookup.format = steps[1].format
+ return mapping
+ end
+end
+
+function chainprocs.gsub_remove(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ if trace_chains then
+ logprocess("%s: removing character %s",cref(dataset,sequence,chainindex),gref(getchar(start)))
+ end
+ head, start = remove_node(head,start,true)
+ return head, getprev(start), true
+end
+
+function chainprocs.gsub_single(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local current = start
+ while current do
+ local currentchar = ischar(current)
+ if currentchar then
+ local replacement = mapping[currentchar]
+ if not replacement or replacement == "" then
+ if trace_bugs then
+ logwarning("%s: no single for %s",cref(dataset,sequence,chainindex),gref(currentchar))
+ end
+ else
+ if trace_singles then
+ logprocess("%s: replacing single %s by %s",cref(dataset,sequence,chainindex),gref(currentchar),gref(replacement))
+ end
+ resetinjection(current)
+ setchar(current,replacement)
+ end
+ return head, start, true
+ elseif currentchar == false then
+ -- can't happen
+ break
+ elseif current == stop then
+ break
+ else
+ current = getnext(current)
+ end
+ end
+ end
+ return head, start, false
+end
+
+--[[ldx--
+<p>Here we replace start by new glyph. First we delete the rest of the match.</p>
+--ldx]]--
+
+-- char_1 mark_1 -> char_x mark_1 (ignore marks)
+-- char_1 mark_1 -> char_x
+
+-- to be checked: do we always have just one glyph?
+-- we can also have alternates for marks
+-- marks come last anyway
+-- are there cases where we need to delete the mark
+
+function chainprocs.gsub_alternate(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local kind = dataset[4]
+ local what = dataset[1]
+ local value = what == true and tfmdata.shared.features[kind] or what -- todo: optimize in ctx
+ local current = start
+ while current do
+ local currentchar = ischar(current)
+ if currentchar then
+ local alternatives = mapping[currentchar]
+ if alternatives then
+ local choice, comment = get_alternative_glyph(current,alternatives,value)
+ if choice then
+ if trace_alternatives then
+ logprocess("%s: replacing %s by alternative %a to %s, %s",cref(dataset,sequence),gref(currentchar),choice,gref(choice),comment)
+ end
+ resetinjection(start)
+ setchar(start,choice)
+ else
+ if trace_alternatives then
+ logwarning("%s: no variant %a for %s, %s",cref(dataset,sequence),value,gref(currentchar),comment)
+ end
+ end
+ end
+ return head, start, true
+ elseif currentchar == false then
+ -- can't happen
+ break
+ elseif current == stop then
+ break
+ else
+ current = getnext(current)
+ end
+ end
+ end
+ return head, start, false
+end
+
+--[[ldx--
+<p>Here we replace start by a sequence of new glyphs.</p>
+--ldx]]--
+
+function chainprocs.gsub_multiple(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local startchar = getchar(start)
+ local replacement = mapping[startchar]
+ if not replacement or replacement == "" then
+ if trace_bugs then
+ logwarning("%s: no multiple for %s",cref(dataset,sequence),gref(startchar))
+ end
+ else
+ if trace_multiples then
+ logprocess("%s: replacing %s by multiple characters %s",cref(dataset,sequence),gref(startchar),gref(replacement))
+ end
+ return multiple_glyphs(head,start,replacement,skiphash,dataset[1],stop)
+ end
+ end
+ return head, start, false
+end
+
+--[[ldx--
+<p>When we replace ligatures we use a helper that handles the marks. I might change
+this function (move code inline and handle the marks by a separate function). We
+assume rather stupid ligatures (no complex disc nodes).</p>
+--ldx]]--
+
+-- compare to handlers.gsub_ligature which is more complex ... why
+
+function chainprocs.gsub_ligature(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local startchar = getchar(start)
+ local ligatures = mapping[startchar]
+ if not ligatures then
+ if trace_bugs then
+ logwarning("%s: no ligatures starting with %s",cref(dataset,sequence,chainindex),gref(startchar))
+ end
+ else
+ local hasmarks = marks[startchar]
+ local current = getnext(start)
+ local discfound = false
+ local last = stop
+ local nofreplacements = 1
+ while current do
+ -- todo: ischar ... can there really be disc nodes here?
+ local id = getid(current)
+ if id == disc_code then
+ if not discfound then
+ discfound = current
+ end
+ if current == stop then
+ break -- okay? or before the disc
+ else
+ current = getnext(current)
+ end
+ else
+ local schar = getchar(current)
+ if skiphash and skiphash[schar] then -- marks
+ -- if current == stop then -- maybe add this
+ -- break
+ -- else
+ current = getnext(current)
+ -- end
+ else
+ local lg = ligatures[schar]
+ if lg then
+ ligatures = lg
+ last = current
+ nofreplacements = nofreplacements + 1
+ if marks[char] then
+ hasmarks = true
+ end
+ if current == stop then
+ break
+ else
+ current = getnext(current)
+ end
+ else
+ break
+ end
+ end
+ end
+ end
+ local ligature = ligatures.ligature
+ if ligature then
+ if chainindex then
+ stop = last
+ end
+ if trace_ligatures then
+ if start == stop then
+ logprocess("%s: replacing character %s by ligature %s case 3",cref(dataset,sequence,chainindex),gref(startchar),gref(ligature))
+ else
+ logprocess("%s: replacing character %s upto %s by ligature %s case 4",cref(dataset,sequence,chainindex),gref(startchar),gref(getchar(stop)),gref(ligature))
+ end
+ end
+ head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,discfound,hasmarks)
+ return head, start, true, nofreplacements, discfound
+ elseif trace_bugs then
+ if start == stop then
+ logwarning("%s: replacing character %s by ligature fails",cref(dataset,sequence,chainindex),gref(startchar))
+ else
+ logwarning("%s: replacing character %s upto %s by ligature fails",cref(dataset,sequence,chainindex),gref(startchar),gref(getchar(stop)))
+ end
+ end
+ end
+ end
+ return head, start, false, 0, false
+end
+
+function chainprocs.gpos_single(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ if has_glyph_option(start,no_right_kern_code) then
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local startchar = getchar(start)
+ local kerns = mapping[startchar]
+ if kerns then
+ local format = currentlookup.format
+ if format == "single" then
+ local dx, dy, w, h = setposition(0,start,factor,rlmode,kerns) -- currentlookup.flags ?
+ if trace_kerns then
+ logprocess("%s: shifting single %s by %s (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),format,dx,dy,w,h)
+ end
+ else -- needs checking .. maybe no kerns format for single
+ local k = (format == "move" and setmove or setkern)(start,factor,rlmode,kerns,injection)
+ if trace_kerns then
+ logprocess("%s: shifting single %s by %s %p",cref(dataset,sequence),gref(startchar),format,k)
+ end
+ end
+ return head, start, true
+ end
+ end
+ end
+ return head, start, false
+end
+
+function chainprocs.gpos_pair(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) -- todo: injections ?
+ if has_glyph_option(start,no_right_kern_code) then
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local snext = getnext(start)
+ if snext then
+ local startchar = getchar(start)
+ local kerns = mapping[startchar] -- always 1 step
+ if kerns then
+ local prev = start
+ while snext do
+ local nextchar = ischar(snext,currentfont)
+ if not nextchar then
+ break
+ end
+ if skiphash and skiphash[nextchar] then
+ prev = snext
+ snext = getnext(snext)
+ else
+ local krn = kerns[nextchar]
+ if not krn then
+ break
+ end
+ local format = currentlookup.format
+ if format == "pair" then
+ local a = krn[1]
+ local b = krn[2]
+ if a == true then
+ -- zero
+ elseif a then
+ local x, y, w, h = setposition(1,start,factor,rlmode,a,"injections") -- currentlookups flags?
+ if trace_kerns then
+ local startchar = getchar(start)
+ logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h)
+ end
+ end
+ if b == true then
+ -- zero
+ start = snext -- cf spec
+ elseif b then -- #b > 0
+ local x, y, w, h = setposition(2,snext,factor,rlmode,b,"injections")
+ if trace_kerns then
+ local startchar = getchar(start)
+ logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h)
+ end
+ start = snext -- cf spec
+ elseif forcepairadvance then
+ start = snext -- for testing, not cf spec
+ end
+ return head, start, true
+ elseif krn ~= 0 then
+ local k = (format == "move" and setmove or setkern)(snext,factor,rlmode,krn)
+ if trace_kerns then
+ logprocess("%s: inserting %s %p between %s and %s",cref(dataset,sequence),format,k,gref(getchar(prev)),gref(nextchar))
+ end
+ return head, start, true
+ else
+ break
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ return head, start, false
+end
+
+function chainprocs.gpos_mark2base(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local markchar = getchar(start)
+ if marks[markchar] then
+ local markanchors = mapping[markchar] -- always 1 step
+ if markanchors then
+ local base = getprev(start) -- [glyph] [start=mark]
+ if base then
+ local basechar = ischar(base,currentfont)
+ if basechar then
+ if marks[basechar] then
+ while base do
+ base = getprev(base)
+ if base then
+ local basechar = ischar(base,currentfont)
+ if basechar then
+ if not marks[basechar] then
+ break
+ end
+ else
+ if trace_bugs then
+ logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
+ end
+ return head, start, false
+ end
+ else
+ if trace_bugs then
+ logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
+ end
+ return head, start, false
+ end
+ end
+ end
+ local ba = markanchors[1][basechar]
+ if ba then
+ local ma = markanchors[2]
+ if ma then
+ local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
+ if trace_marks then
+ logprocess("%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)",
+ cref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
+ end
+ return head, start, true
+ end
+ end
+ elseif trace_bugs then
+ logwarning("%s: prev node is no char, case %i",cref(dataset,sequence),1)
+ end
+ elseif trace_bugs then
+ logwarning("%s: prev node is no char, case %i",cref(dataset,sequence),2)
+ end
+ elseif trace_bugs then
+ logwarning("%s: mark %s has no anchors",cref(dataset,sequence),gref(markchar))
+ end
+ elseif trace_bugs then
+ logwarning("%s: mark %s is no mark",cref(dataset,sequence),gref(markchar))
+ end
+ end
+ return head, start, false
+end
+
+function chainprocs.gpos_mark2ligature(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local markchar = getchar(start)
+ if marks[markchar] then
+ local markanchors = mapping[markchar] -- always 1 step
+ if markanchors then
+ local base = getprev(start) -- [glyph] [optional marks] [start=mark]
+ if base then
+ local basechar = ischar(base,currentfont)
+ if basechar then
+ if marks[basechar] then
+ while base do
+ base = getprev(base)
+ if base then
+ local basechar = ischar(base,currentfont)
+ if basechar then
+ if not marks[basechar] then
+ break
+ end
+ else
+ if trace_bugs then
+ logwarning("%s: no base for mark %s, case %i",cref(dataset,sequence),markchar,1)
+ end
+ return head, start, false
+ end
+ else
+ if trace_bugs then
+ logwarning("%s: no base for mark %s, case %i",cref(dataset,sequence),markchar,2)
+ end
+ return head, start, false
+ end
+ end
+ end
+ local ba = markanchors[1][basechar]
+ if ba then
+ local ma = markanchors[2]
+ if ma then
+ local index = getligaindex(start)
+ ba = ba[index]
+ if ba then
+ local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
+ if trace_marks then
+ logprocess("%s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)",
+ cref(dataset,sequence),a or bound,gref(markchar),gref(basechar),index,dx,dy)
+ end
+ return head, start, true
+ end
+ end
+ end
+ elseif trace_bugs then
+ logwarning("%s, prev node is no char, case %i",cref(dataset,sequence),1)
+ end
+ elseif trace_bugs then
+ logwarning("%s, prev node is no char, case %i",cref(dataset,sequence),2)
+ end
+ elseif trace_bugs then
+ logwarning("%s, mark %s has no anchors",cref(dataset,sequence),gref(markchar))
+ end
+ elseif trace_bugs then
+ logwarning("%s, mark %s is no mark",cref(dataset,sequence),gref(markchar))
+ end
+ end
+ return head, start, false
+end
+
+function chainprocs.gpos_mark2mark(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local markchar = getchar(start)
+ if marks[markchar] then
+ local markanchors = mapping[markchar] -- always 1 step
+ if markanchors then
+ local base = getprev(start) -- [glyph] [basemark] [start=mark]
+ local slc = getligaindex(start)
+ if slc then -- a rather messy loop ... needs checking with husayni
+ while base do
+ local blc = getligaindex(base)
+ if blc and blc ~= slc then
+ base = getprev(base)
+ else
+ break
+ end
+ end
+ end
+ if base then -- subtype test can go
+ local basechar = ischar(base,currentfont)
+ if basechar then
+ local ba = markanchors[1][basechar]
+ if ba then
+ local ma = markanchors[2]
+ if ma then
+ local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],true,checkmarks)
+ if trace_marks then
+ logprocess("%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)",
+ cref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
+ end
+ return head, start, true
+ end
+ end
+ elseif trace_bugs then
+ logwarning("%s: prev node is no mark, case %i",cref(dataset,sequence),1)
+ end
+ elseif trace_bugs then
+ logwarning("%s: prev node is no mark, case %i",cref(dataset,sequence),2)
+ end
+ elseif trace_bugs then
+ logwarning("%s: mark %s has no anchors",cref(dataset,sequence),gref(markchar))
+ end
+ elseif trace_bugs then
+ logwarning("%s: mark %s is no mark",cref(dataset,sequence),gref(markchar))
+ end
+ end
+ return head, start, false
+end
+
+function chainprocs.gpos_cursive(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
+ local mapping = currentlookup.mapping
+ if mapping == nil then
+ mapping = getmapping(dataset,sequence,currentlookup)
+ end
+ if mapping then
+ local startchar = getchar(start)
+ local exitanchors = mapping[startchar] -- always 1 step
+ if exitanchors then
+ if marks[startchar] then
+ if trace_cursive then
+ logprocess("%s: ignoring cursive for mark %s",pref(dataset,sequence),gref(startchar))
+ end
+ else
+ local nxt = getnext(start)
+ while nxt do
+ local nextchar = ischar(nxt,currentfont)
+ if not nextchar then
+ break
+ elseif marks[nextchar] then
+ -- should not happen (maybe warning)
+ nxt = getnext(nxt)
+ else
+ local exit = exitanchors[3]
+ if exit then
+ local entry = exitanchors[1][nextchar]
+ if entry then
+ entry = entry[2]
+ if entry then
+ local r2lflag = sequence.flags[4] -- mentioned in the standard
+ local dx, dy, bound = setcursive(start,nxt,factor,rlmode,exit,entry,characters[startchar],characters[nextchar],r2lflag)
+ if trace_cursive then
+ logprocess("%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode",pref(dataset,sequence),gref(startchar),gref(nextchar),dx,dy,bound,mref(rlmode))
+ end
+ return head, start, true
+ end
+ end
+ elseif trace_bugs then
+ onetimemessage(currentfont,startchar,"no entry anchors",report_fonts)
+ end
+ break
+ end
+ end
+ end
+ elseif trace_cursive and trace_details then
+ logprocess("%s, cursive %s is already done",pref(dataset,sequence),gref(getchar(start)),alreadydone)
+ end
+ end
+ return head, start, false
+end
+
+-- what pointer to return, spec says stop
+-- to be discussed ... is bidi changer a space?
+-- elseif char == zwnj and sequence[n][32] then -- brrr
+
+local function show_skip(dataset,sequence,char,ck,class)
+ logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a",cref(dataset,sequence),gref(char),class,ck[1],ck[8] or ck[2])
+end
+
+-- A previous version had disc collapsing code in the (single sub) handler plus some
+-- checking in the main loop, but that left the pre/post sequences undone. The best
+-- solution is to add some checking there and backtrack when a replace/post matches
+-- but it takes a bit of work to figure out an efficient way (this is what the
+-- sweep* names refer to). I might look into that variant one day again as it can
+-- replace some other code too. In that approach we can have a special version for
+-- gub and pos which gains some speed. This method does the test and passes info to
+-- the handlers. Here collapsing is handled in the main loop which also makes code
+-- elsewhere simpler (i.e. no need for the other special runners and disc code in
+-- ligature building). I also experimented with pushing preceding glyphs sequences
+-- in the replace/pre fields beforehand which saves checking afterwards but at the
+-- cost of duplicate glyphs (memory) but it's too much overhead (runtime).
+--
+-- In the meantime Kai had moved the code from the single chain into a more general
+-- handler and this one (renamed to chaindisk) is used now. I optimized the code a
+-- bit and brought it in sycn with the other code. Hopefully I didn't introduce
+-- errors. Note: this somewhat complex approach is meant for fonts that implement
+-- (for instance) ligatures by character replacement which to some extend is not
+-- that suitable for hyphenation. I also use some helpers. This method passes some
+-- states but reparses the list. There is room for a bit of speed up but that will
+-- be done in the context version. (In fact a partial rewrite of all code can bring
+-- some more efficiency.)
+--
+-- I didn't test it with extremes but successive disc nodes still can give issues
+-- but in order to handle that we need more complex code which also slows down even
+-- more. The main loop variant could deal with that: test, collapse, backtrack.
+
+local userkern = nuts.pool and nuts.pool.newkern -- context
+
+do if not userkern then -- generic
+
+ local thekern = nuts.new("kern",1) -- userkern
+ local setkern = nuts.setkern -- not injections.setkern
+
+ userkern = function(k)
+ local n = copy_node(thekern)
+ setkern(n,k)
+ return n
+ end
+
+end end
+
+local function checked(head)
+ local current = head
+ while current do
+ if getid(current) == glue_code then
+ local kern = userkern(getwidth(current))
+ if head == current then
+ local next = getnext(current)
+ if next then
+ setlink(kern,next)
+ end
+ flush_node(current)
+ head = kern
+ current = next
+ else
+ local prev, next = getboth(current)
+ setlink(prev,kern,next)
+ flush_node(current)
+ current = next
+ end
+ else
+ current = getnext(current)
+ end
+ end
+ return head
+end
+
+local function setdiscchecked(d,pre,post,replace)
+ if pre then pre = checked(pre) end
+ if post then post = checked(post) end
+ if replace then replace = checked(replace) end
+ setdisc(d,pre,post,replace)
+end
+
+local noflags = { false, false, false, false }
+
+local function chainrun(head,start,last,dataset,sequence,rlmode,skiphash,ck)
+
+ local size = ck[5] - ck[4] + 1
+ local chainlookups = ck[6]
+ local done = false
+
+ -- current match
+ if chainlookups then
+ -- Lookups can be like { 1, false, 3 } or { false, 2 } or basically anything and
+ -- #lookups can be less than #current
+
+ if size == 1 then
+
+ -- if nofchainlookups > size then
+ -- -- bad rules
+ -- end
+
+ local chainlookup = chainlookups[1]
+ for j=1,#chainlookup do
+ local chainstep = chainlookup[j]
+ local chainkind = chainstep.type
+ local chainproc = chainprocs[chainkind]
+ if chainproc then
+ local ok
+ head, start, ok = chainproc(head,start,last,dataset,sequence,chainstep,rlmode,skiphash)
+ if ok then
+ done = true
+ end
+ else
+ logprocess("%s: %s is not yet supported (1)",cref(dataset,sequence),chainkind)
+ end
+ end
+
+ else
+
+ -- See LookupType 5: Contextual Substitution Subtable. Now it becomes messy. The
+ -- easiest case is where #current maps on #lookups i.e. one-to-one. But what if
+ -- we have a ligature. Cf the spec we then need to advance one character but we
+ -- really need to test it as there are fonts out there that are fuzzy and have
+ -- too many lookups:
+ --
+ -- U+1105 U+119E U+1105 U+119E : sourcehansansklight: script=hang ccmp=yes
+ --
+ -- Even worse are these family emoji shapes as they can have multiple lookups
+ -- per slot (probably only for gpos).
+
+ -- It's very unlikely that we will have skip classes here but still ... we seldom
+ -- enter this branch anyway.
+
+ local i = 1
+ local laststart = start
+ local nofchainlookups = #chainlookups -- useful?
+ while start do
+ if skiphash then -- hm, so we know we skip some
+ while start do
+ local char = ischar(start,currentfont)
+ if char then
+ if skiphash and skiphash[char] then
+ start = getnext(start)
+ else
+ break
+ end
+ else
+ break
+ end
+ end
+ end
+ local chainlookup = chainlookups[i]
+ if chainlookup then
+ for j=1,#chainlookup do
+ local chainstep = chainlookup[j]
+ local chainkind = chainstep.type
+ local chainproc = chainprocs[chainkind]
+ if chainproc then
+ local ok, n
+ head, start, ok, n = chainproc(head,start,last,dataset,sequence,chainstep,rlmode,skiphash,i)
+ -- messy since last can be changed !
+ if ok then
+ done = true
+ if n and n > 1 and i + n > nofchainlookups then
+ -- this is a safeguard, we just ignore the rest of the lookups
+ i = size -- prevents an advance
+ break
+ end
+ end
+ else
+ -- actually an error
+ logprocess("%s: %s is not yet supported (2)",cref(dataset,sequence),chainkind)
+ end
+ end
+ else
+ -- we skip but we could also delete as option .. what does an empty lookup actually mean
+ -- in opentype ... anyway, we could map it onto gsub_remove if needed
+ end
+ i = i + 1
+ if i > size or not start then
+ break
+ elseif start then
+ laststart = start
+ start = getnext(start)
+ end
+ end
+ if not start then
+ start = laststart
+ end
+
+ end
+ else
+ -- todo: needs checking for holes in the replacements
+ local replacements = ck[7]
+ if replacements then
+ head, start, done = reversesub(head,start,last,dataset,sequence,replacements,rlmode,skiphash)
+ else
+ done = true
+ if trace_contexts then
+ logprocess("%s: skipping match",cref(dataset,sequence))
+ end
+ end
+ end
+ return head, start, done
+end
+
+local function chaindisk(head,start,dataset,sequence,rlmode,skiphash,ck)
+
+ if not start then
+ return head, start, false
+ end
+
+ local startishead = start == head
+ local seq = ck[3]
+ local f = ck[4]
+ local l = ck[5]
+ local s = #seq
+ local done = false
+ local sweepnode = sweepnode
+ local sweeptype = sweeptype
+ local sweepoverflow = false
+ local checkdisc = getprev(head)
+ local keepdisc = not sweepnode
+ local lookaheaddisc = nil
+ local backtrackdisc = nil
+ local current = start
+ local last = start
+ local prev = getprev(start)
+ local hasglue = false
+
+ -- fishy: so we can overflow and then go on in the sweep?
+ -- todo : id can also be glue_code as we checked spaces
+
+ local i = f
+ while i <= l do
+ local id = getid(current)
+ if id == glyph_code then
+ i = i + 1
+ last = current
+ current = getnext(current)
+ elseif id == glue_code then
+ i = i + 1
+ last = current
+ current = getnext(current)
+ hasglue = true
+ elseif id == disc_code then
+ if keepdisc then
+ keepdisc = false
+ lookaheaddisc = current
+ local replace = getreplace(current)
+ if not replace then
+ sweepoverflow = true
+ sweepnode = current
+ current = getnext(current)
+ else
+ -- we can use an iterator
+ while replace and i <= l do
+ if getid(replace) == glyph_code then
+ i = i + 1
+ end
+ replace = getnext(replace)
+ end
+ current = getnext(replace)
+ end
+ last = current
+ else
+ head, current = flattendisk(head,current)
+ end
+ else
+ last = current
+ current = getnext(current)
+ end
+ if current then
+ -- go on
+ elseif sweepoverflow then
+ -- we already are following up on sweepnode
+ break
+ elseif sweeptype == "post" or sweeptype == "replace" then
+ current = getnext(sweepnode)
+ if current then
+ sweeptype = nil
+ sweepoverflow = true
+ else
+ break
+ end
+ else
+ break -- added
+ end
+ end
+
+ if sweepoverflow then
+ local prev = current and getprev(current)
+ if not current or prev ~= sweepnode then
+ local head = getnext(sweepnode)
+ local tail = nil
+ if prev then
+ tail = prev
+ setprev(current,sweepnode)
+ else
+ tail = find_node_tail(head)
+ end
+ setnext(sweepnode,current)
+ setprev(head)
+ setnext(tail)
+ appenddisc(sweepnode,head)
+ end
+ end
+
+ if l < s then
+ local i = l
+ local t = sweeptype == "post" or sweeptype == "replace"
+ while current and i < s do
+ local id = getid(current)
+ if id == glyph_code then
+ i = i + 1
+ current = getnext(current)
+ elseif id == glue_code then
+ i = i + 1
+ current = getnext(current)
+ hasglue = true
+ elseif id == disc_code then
+ if keepdisc then
+ keepdisc = false
+ if notmatchpre[current] ~= notmatchreplace[current] then
+ lookaheaddisc = current
+ end
+ -- we assume a simple text only replace (we could use nuts.count)
+ local replace = getreplace(current)
+ while replace and i < s do
+ if getid(replace) == glyph_code then
+ i = i + 1
+ end
+ replace = getnext(replace)
+ end
+ current = getnext(current)
+ elseif notmatchpre[current] ~= notmatchreplace[current] then
+ head, current = flattendisk(head,current)
+ else
+ current = getnext(current) -- HH
+ end
+ else
+ current = getnext(current)
+ end
+ if not current and t then
+ current = getnext(sweepnode)
+ if current then
+ sweeptype = nil
+ end
+ end
+ end
+ end
+
+ if f > 1 then
+ local current = prev
+ local i = f
+ local t = sweeptype == "pre" or sweeptype == "replace"
+ if not current and t and current == checkdisc then
+ current = getprev(sweepnode)
+ end
+ while current and i > 1 do -- missing getprev added / moved outside
+ local id = getid(current)
+ if id == glyph_code then
+ i = i - 1
+ elseif id == glue_code then
+ i = i - 1
+ hasglue = true
+ elseif id == disc_code then
+ if keepdisc then
+ keepdisc = false
+ if notmatchpost[current] ~= notmatchreplace[current] then
+ backtrackdisc = current
+ end
+ -- we assume a simple text only replace (we could use nuts.count)
+ local replace = getreplace(current)
+ while replace and i > 1 do
+ if getid(replace) == glyph_code then
+ i = i - 1
+ end
+ replace = getnext(replace)
+ end
+ elseif notmatchpost[current] ~= notmatchreplace[current] then
+ head, current = flattendisk(head,current)
+ end
+ end
+ current = getprev(current)
+ if t and current == checkdisc then
+ current = getprev(sweepnode)
+ end
+ end
+ end
+ local done = false
+
+ if lookaheaddisc then
+
+ local cf = start
+ local cl = getprev(lookaheaddisc)
+ local cprev = getprev(start)
+ local insertedmarks = 0
+
+ while cprev do
+ local char = ischar(cf,currentfont)
+ if char and marks[char] then
+ insertedmarks = insertedmarks + 1
+ cf = cprev
+ startishead = cf == head
+ cprev = getprev(cprev)
+ else
+ break
+ end
+ end
+ setlink(cprev,lookaheaddisc)
+ setprev(cf)
+ setnext(cl)
+ if startishead then
+ head = lookaheaddisc
+ end
+ local pre, post, replace = getdisc(lookaheaddisc)
+ local new = copy_node_list(cf) -- br, how often does that happen
+ local cnew = new
+ if pre then
+ setlink(find_node_tail(cf),pre)
+ end
+ if replace then
+ local tail = find_node_tail(new)
+ setlink(tail,replace)
+ end
+ for i=1,insertedmarks do
+ cnew = getnext(cnew)
+ end
+ cl = start
+ local clast = cnew
+ for i=f,l do
+ cl = getnext(cl)
+ clast = getnext(clast)
+ end
+ if not notmatchpre[lookaheaddisc] then
+ local ok = false
+ cf, start, ok = chainrun(cf,start,cl,dataset,sequence,rlmode,skiphash,ck)
+ if ok then
+ done = true
+ end
+ end
+ if not notmatchreplace[lookaheaddisc] then
+ local ok = false
+ new, cnew, ok = chainrun(new,cnew,clast,dataset,sequence,rlmode,skiphash,ck)
+ if ok then
+ done = true
+ end
+ end
+ if hasglue then
+ setdiscchecked(lookaheaddisc,cf,post,new)
+ else
+ setdisc(lookaheaddisc,cf,post,new)
+ end
+ start = getprev(lookaheaddisc)
+ sweephead[cf] = getnext(clast) or false
+ sweephead[new] = getnext(cl) or false
+
+ elseif backtrackdisc then
+
+ local cf = getnext(backtrackdisc)
+ local cl = start
+ local cnext = getnext(start)
+ local insertedmarks = 0
+
+ while cnext do
+ local char = ischar(cnext,currentfont)
+ if char and marks[char] then
+ insertedmarks = insertedmarks + 1
+ cl = cnext
+ cnext = getnext(cnext)
+ else
+ break
+ end
+ end
+ setlink(backtrackdisc,cnext)
+ setprev(cf)
+ setnext(cl)
+ local pre, post, replace, pretail, posttail, replacetail = getdisc(backtrackdisc,true)
+ local new = copy_node_list(cf)
+ local cnew = find_node_tail(new)
+ for i=1,insertedmarks do
+ cnew = getprev(cnew)
+ end
+ local clast = cnew
+ for i=f,l do
+ clast = getnext(clast)
+ end
+ if not notmatchpost[backtrackdisc] then
+ local ok = false
+ cf, start, ok = chainrun(cf,start,last,dataset,sequence,rlmode,skiphash,ck)
+ if ok then
+ done = true
+ end
+ end
+ if not notmatchreplace[backtrackdisc] then
+ local ok = false
+ new, cnew, ok = chainrun(new,cnew,clast,dataset,sequence,rlmode,skiphash,ck)
+ if ok then
+ done = true
+ end
+ end
+ if post then
+ setlink(posttail,cf)
+ else
+ post = cf
+ end
+ if replace then
+ setlink(replacetail,new)
+ else
+ replace = new
+ end
+ if hasglue then
+ setdiscchecked(backtrackdisc,pre,post,replace)
+ else
+ setdisc(backtrackdisc,pre,post,replace)
+ end
+ start = getprev(backtrackdisc)
+ sweephead[post] = getnext(clast) or false
+ sweephead[replace] = getnext(last) or false
+
+ else
+
+ local ok = false
+ head, start, ok = chainrun(head,start,last,dataset,sequence,rlmode,skiphash,ck)
+ if ok then
+ done = true
+ end
+
+ end
+
+ return head, start, done
+end
+
+local function chaintrac(head,start,dataset,sequence,rlmode,skiphash,ck,match,discseen,sweepnode)
+ local rule = ck[1]
+ local lookuptype = ck[8] or ck[2]
+ local nofseq = #ck[3]
+ local first = ck[4]
+ local last = ck[5]
+ local char = getchar(start)
+ logwarning("%s: rule %s %s at char %s for (%s,%s,%s) chars, lookuptype %a, %sdisc seen, %ssweeping",
+ cref(dataset,sequence),rule,match and "matches" or "nomatch",
+ gref(char),first-1,last-first+1,nofseq-last,lookuptype,
+ discseen and "" or "no ", sweepnode and "" or "not ")
+end
+
+-- The next one is quite optimized but still somewhat slow, fonts like ebgaramond
+-- are real torture tests because they have many steps with one context (having
+-- multiple contexts makes more sense) also because we (can) reduce them. Instead of
+-- a match boolean variable and check for that I decided to use a goto with labels
+-- instead. This is one of the cases where it makes the code more readable and we
+-- might even gain a bit performance.
+
+-- when we have less replacements (lookups) then current matches we can push too much into
+-- the previous disc .. such be it (<before><disc><current=fl><after> with only f done)
+
+local function handle_contextchain(head,start,dataset,sequence,contexts,rlmode,skiphash)
+ -- optimizing for rlmode gains nothing
+ local sweepnode = sweepnode
+ local sweeptype = sweeptype
+ local postreplace
+ local prereplace
+ local checkdisc
+ local discseen -- = false
+ if sweeptype then
+ if sweeptype == "replace" then
+ postreplace = true
+ prereplace = true
+ else
+ postreplace = sweeptype == "post"
+ prereplace = sweeptype == "pre"
+ end
+ checkdisc = getprev(head)
+ end
+ local currentfont = currentfont
+
+ local skipped -- = false
+
+ local startprev,
+ startnext = getboth(start)
+ local done -- = false
+
+ -- we can have multiple hits and as we scan (currently) all we need to check
+ -- if we have a match ... contextchains have no real coverage table (with
+ -- unique entries)
+
+ -- fonts can have many steps (each doing one check) or many contexts
+
+ -- todo: make a per-char cache so that we have small contexts (when we have a context
+ -- n == 1 and otherwise it can be more so we can even distingish n == 1 or more)
+
+ local nofcontexts = contexts.n -- #contexts
+
+ local startchar = nofcontext == 1 or ischar(start,currentfont) -- only needed in a chain
+
+ for k=1,nofcontexts do -- does this disc mess work well with n > 1
+
+ local ck = contexts[k]
+ local seq = ck[3]
+ local f = ck[4] -- first current
+ if not startchar or not seq[f][startchar] then
+ -- report("no hit in %a at %i of %i contexts",sequence.type,k,nofcontexts)
+ goto next
+ end
+ local s = seq.n -- or #seq
+ local l = ck[5] -- last current
+ local current = start
+ local last = start
+
+ if s == 1 then
+ goto next
+ end
+
+ -- current match
+
+ if l > f then
+ -- before/current/after | before/current | current/after
+ local discfound -- = nil
+ local n = f + 1
+ last = startnext -- the second in current (first already matched)
+ while n <= l do
+ if postreplace and not last then
+ last = getnext(sweepnode)
+ sweeptype = nil
+ end
+ if last then
+ -- local char, id = ischar(last,currentfont)
+ local nxt, char, id = isnextchar(last,currentfont)
+ if char then
+ if skiphash and skiphash[char] then
+ skipped = true
+ if trace_skips then
+ show_skip(dataset,sequence,char,ck,classes[char])
+ end
+ -- last = getnext(last)
+ last = nxt
+ elseif seq[n][char] then
+ if n < l then
+ -- last = getnext(last)
+ last = nxt
+ end
+ n = n + 1
+ elseif discfound then
+ notmatchreplace[discfound] = true
+ if notmatchpre[discfound] then
+ goto next
+ else
+ break
+ end
+ else
+ goto next
+ end
+ elseif char == false then
+ if discfound then
+ notmatchreplace[discfound] = true
+ if notmatchpre[discfound] then
+ goto next
+ else
+ break
+ end
+ else
+ goto next
+ end
+ elseif id == disc_code then
+ -- elseif id == disc_code and (not discs or discs[last]) then
+ discseen = true
+ discfound = last
+ notmatchpre[last] = nil
+ notmatchpost[last] = true
+ notmatchreplace[last] = nil
+ local pre, post, replace = getdisc(last)
+ if pre then
+ local n = n
+ while pre do
+ if seq[n][getchar(pre)] then
+ n = n + 1
+ if n > l then
+ break
+ end
+ pre = getnext(pre)
+ else
+ notmatchpre[last] = true
+ break
+ end
+ end
+ if n <= l then
+ notmatchpre[last] = true
+ end
+ else
+ notmatchpre[last] = true
+ end
+ if replace then
+ -- so far we never entered this branch
+ while replace do
+ if seq[n][getchar(replace)] then
+ n = n + 1
+ if n > l then
+ break
+ end
+ replace = getnext(replace)
+ else
+ notmatchreplace[last] = true
+ if notmatchpre[last] then
+ goto next
+ else
+ break
+ end
+ end
+ end
+ -- why here again
+ if notmatchpre[last] then
+ goto next
+ end
+ end
+ -- maybe only if match
+ -- last = getnext(last)
+ last = nxt
+ else
+ goto next
+ end
+ else
+ goto next
+ end
+ end
+ end
+
+ -- before
+
+ if f > 1 then
+ if startprev then
+ local prev = startprev
+ if prereplace and prev == checkdisc then
+ prev = getprev(sweepnode)
+ end
+ if prev then
+ local discfound -- = nil
+ local n = f - 1
+ while n >= 1 do
+ if prev then
+ -- local char, id = ischar(prev,currentfont)
+ local prv, char, id = isprevchar(prev,currentfont)
+ if char then
+ if skiphash and skiphash[char] then
+ skipped = true
+ if trace_skips then
+ show_skip(dataset,sequence,char,ck,classes[char])
+ end
+ -- prev = getprev(prev)
+ prev = prv
+ elseif seq[n][char] then
+ if n > 1 then
+ -- prev = getprev(prev)
+ prev = prv
+ end
+ n = n - 1
+ elseif discfound then
+ notmatchreplace[discfound] = true
+ if notmatchpost[discfound] then
+ goto next
+ else
+ break
+ end
+ else
+ goto next
+ end
+ elseif char == false then
+ if discfound then
+ notmatchreplace[discfound] = true
+ if notmatchpost[discfound] then
+ goto next
+ end
+ else
+ goto next
+ end
+ break
+ elseif id == disc_code then
+ -- elseif id == disc_code and (not discs or discs[prev]) then
+ -- the special case: f i where i becomes dottless i ..
+ discseen = true
+ discfound = prev
+ notmatchpre[prev] = true
+ notmatchpost[prev] = nil
+ notmatchreplace[prev] = nil
+ local pre, post, replace, pretail, posttail, replacetail = getdisc(prev,true)
+ -- weird test: needs checking
+ if pre ~= start and post ~= start and replace ~= start then
+ if post then
+ local n = n
+ while posttail do
+ if seq[n][getchar(posttail)] then
+ n = n - 1
+ if posttail == post or n < 1 then
+ break
+ else
+ posttail = getprev(posttail)
+ end
+ else
+ notmatchpost[prev] = true
+ break
+ end
+ end
+ if n >= 1 then
+ notmatchpost[prev] = true
+ end
+ else
+ notmatchpost[prev] = true
+ end
+ if replace then
+ -- we seldom enter this branch (e.g. on brill efficient)
+ while replacetail do
+ if seq[n][getchar(replacetail)] then
+ n = n - 1
+ if replacetail == replace or n < 1 then
+ break
+ else
+ replacetail = getprev(replacetail)
+ end
+ else
+ notmatchreplace[prev] = true
+ if notmatchpost[prev] then
+ goto next
+ else
+ break
+ end
+ end
+ end
+ else
+ -- notmatchreplace[prev] = true -- not according to Kai
+ end
+ end
+ -- prev = getprev(prev)
+ prev = prv
+ -- elseif id == glue_code and seq[n][32] and isspace(prev,threshold,id) then
+ -- elseif seq[n][32] and spaces[prev] then
+ -- n = n - 1
+ -- -- prev = getprev(prev)
+ -- prev = prv
+ elseif id == glue_code then
+ local sn = seq[n]
+ if (sn[32] and spaces[prev]) or sn[0xFFFC] then
+ n = n - 1
+ -- prev = getprev(prev)
+ prev = prv
+ else
+ goto next
+ end
+ elseif seq[n][0xFFFC] then
+ n = n - 1
+ -- prev = getprev(prev)
+ prev = prv
+ else
+ goto next
+ end
+ else
+ goto next
+ end
+ end
+ else
+ goto next
+ end
+ else
+ goto next
+ end
+ end
+
+ -- after
+
+ if s > l then
+ local current = last and getnext(last)
+ if not current and postreplace then
+ current = getnext(sweepnode)
+ end
+ if current then
+ local discfound -- = nil
+ local n = l + 1
+ while n <= s do
+ if current then
+ -- local char, id = ischar(current,currentfont)
+ local nxt, char, id = isnextchar(current,currentfont)
+ if char then
+ if skiphash and skiphash[char] then
+ skipped = true
+ if trace_skips then
+ show_skip(dataset,sequence,char,ck,classes[char])
+ end
+ -- current = getnext(current) -- was absent
+ current = nxt -- was absent
+ elseif seq[n][char] then
+ if n < s then -- new test
+ current = getnext(current) -- was absent
+ end
+ n = n + 1
+ elseif discfound then
+ notmatchreplace[discfound] = true
+ if notmatchpre[discfound] then
+ goto next
+ else
+ break
+ end
+ else
+ goto next
+ end
+ elseif char == false then
+ if discfound then
+ notmatchreplace[discfound] = true
+ if notmatchpre[discfound] then
+ goto next
+ else
+ break
+ end
+ else
+ goto next
+ end
+ elseif id == disc_code then
+ -- elseif id == disc_code and (not discs or discs[current]) then
+ discseen = true
+ discfound = current
+ notmatchpre[current] = nil
+ notmatchpost[current] = true
+ notmatchreplace[current] = nil
+ local pre, post, replace = getdisc(current)
+ if pre then
+ local n = n
+ while pre do
+ if seq[n][getchar(pre)] then
+ n = n + 1
+ if n > s then
+ break
+ else
+ pre = getnext(pre)
+ end
+ else
+ notmatchpre[current] = true
+ break
+ end
+ end
+ if n <= s then
+ notmatchpre[current] = true
+ end
+ else
+ notmatchpre[current] = true
+ end
+ if replace then
+ -- so far we never entered this branch
+ while replace do
+ if seq[n][getchar(replace)] then
+ n = n + 1
+ if n > s then
+ break
+ else
+ replace = getnext(replace)
+ end
+ else
+ notmatchreplace[current] = true
+ if notmatchpre[current] then
+ goto next
+ else
+ break
+ end
+ end
+ end
+ else
+ -- notmatchreplace[current] = true -- not according to Kai
+ end
+ current = getnext(current)
+ elseif id == glue_code then
+ local sn = seq[n]
+ if (sn[32] and spaces[current]) or sn[0xFFFC] then
+ n = n + 1
+ -- current = getnext(current)
+ current = nxt
+ else
+ goto next
+ end
+ elseif seq[n][0xFFFC] then
+ n = n + 1
+ -- current = getnext(current)
+ current = nxt
+ else
+ goto next
+ end
+ else
+ goto next
+ end
+ end
+ else
+ goto next
+ end
+ end
+
+ if trace_contexts then
+ chaintrac(head,start,dataset,sequence,rlmode,skipped and skiphash,ck,true,discseen,sweepnode)
+ end
+ if discseen or sweepnode then
+ head, start, done = chaindisk(head,start,dataset,sequence,rlmode,skipped and skiphash,ck)
+ else
+ head, start, done = chainrun(head,start,last,dataset,sequence,rlmode,skipped and skiphash,ck)
+ end
+ if done then
+ break
+ -- else
+ -- next context
+ end
+ ::next::
+ -- if trace_chains then
+ -- chaintrac(head,start,dataset,sequence,rlmode,skipped and skiphash,ck,false,discseen,sweepnode)
+ -- end
+ end
+ if discseen then
+ notmatchpre = { }
+ notmatchpost = { }
+ notmatchreplace = { }
+ -- notmatchpre = { a = 1, b = 1 } notmatchpre .a = nil notmatchpre .b = nil
+ -- notmatchpost = { a = 1, b = 1 } notmatchpost .a = nil notmatchpost .b = nil
+ -- notmatchreplace = { a = 1, b = 1 } notmatchreplace.a = nil notmatchreplace.b = nil
+ end
+ return head, start, done
+end
+
+handlers.gsub_context = handle_contextchain
+handlers.gsub_contextchain = handle_contextchain
+handlers.gsub_reversecontextchain = handle_contextchain
+handlers.gpos_contextchain = handle_contextchain
+handlers.gpos_context = handle_contextchain
+
+-- this needs testing
+
+local function chained_contextchain(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
+ local steps = currentlookup.steps
+ local nofsteps = currentlookup.nofsteps
+ if nofsteps > 1 then
+ reportmoresteps(dataset,sequence)
+ end
+ -- probably wrong
+ local l = steps[1].coverage[getchar(start)]
+ if l then
+ return handle_contextchain(head,start,dataset,sequence,l,rlmode,skiphash)
+ else
+ return head, start, false
+ end
+end
+
+chainprocs.gsub_context = chained_contextchain
+chainprocs.gsub_contextchain = chained_contextchain
+chainprocs.gsub_reversecontextchain = chained_contextchain
+chainprocs.gpos_contextchain = chained_contextchain
+chainprocs.gpos_context = chained_contextchain
+
+------------------------------
+
+-- experiment (needs no handler in font-otc so not now):
+--
+-- function otf.registerchainproc(name,f)
+-- -- chainprocs[name] = f
+-- chainprocs[name] = function(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
+-- local done = currentlookup.nofsteps > 0
+-- if not done then
+-- reportzerosteps(dataset,sequence)
+-- else
+-- head, start, done = f(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
+-- if not head or not start then
+-- reportbadsteps(dataset,sequence)
+-- end
+-- end
+-- return head, start, done
+-- end
+-- end
+
+local missing = setmetatableindex("table")
+local logwarning = report_process
+local resolved = { } -- we only resolve a font,script,language pair once
+
+local function logprocess(...)
+ if trace_steps then
+ registermessage(...)
+ if trace_steps == "silent" then
+ return
+ end
+ end
+ report_process(...)
+end
+
+-- todo: pass all these 'locals' in a table
+
+local sequencelists = setmetatableindex(function(t,font)
+ local sequences = fontdata[font].resources.sequences
+ if not sequences or not next(sequences) then
+ sequences = false
+ end
+ t[font] = sequences
+ return sequences
+end)
+
+-- fonts.hashes.sequences = sequencelists
+
+do -- overcome local limit
+
+ local autofeatures = fonts.analyzers.features
+ local featuretypes = otf.tables.featuretypes
+ local defaultscript = otf.features.checkeddefaultscript
+ local defaultlanguage = otf.features.checkeddefaultlanguage
+
+ local wildcard = "*"
+ local default = "dflt"
+
+ local function initialize(sequence,script,language,enabled,autoscript,autolanguage)
+ local features = sequence.features
+ if features then
+ local order = sequence.order
+ if order then
+ local featuretype = featuretypes[sequence.type or "unknown"]
+ for i=1,#order do
+ local kind = order[i]
+ local valid = enabled[kind]
+ if valid then
+ local scripts = features[kind]
+ local languages = scripts and (
+ scripts[script] or
+ scripts[wildcard] or
+ (autoscript and defaultscript(featuretype,autoscript,scripts))
+ )
+ local enabled = languages and (
+ languages[language] or
+ languages[wildcard] or
+ (autolanguage and defaultlanguage(featuretype,autolanguage,languages))
+ )
+ if enabled then
+ return { valid, autofeatures[kind] or false, sequence, kind }
+ end
+ end
+ end
+ else
+ -- can't happen
+ end
+ end
+ return false
+ end
+
+ function otf.dataset(tfmdata,font) -- generic variant, overloaded in context
+ local shared = tfmdata.shared
+ local properties = tfmdata.properties
+ local language = properties.language or "dflt"
+ local script = properties.script or "dflt"
+ local enabled = shared.features
+ local autoscript = enabled and enabled.autoscript
+ local autolanguage = enabled and enabled.autolanguage
+ local res = resolved[font]
+ if not res then
+ res = { }
+ resolved[font] = res
+ end
+ local rs = res[script]
+ if not rs then
+ rs = { }
+ res[script] = rs
+ end
+ local rl = rs[language]
+ if not rl then
+ rl = {
+ -- indexed but we can also add specific data by key
+ }
+ rs[language] = rl
+ local sequences = tfmdata.resources.sequences
+ if sequences then
+ for s=1,#sequences do
+ local v = enabled and initialize(sequences[s],script,language,enabled,autoscript,autolanguage)
+ if v then
+ rl[#rl+1] = v
+ end
+ end
+ end
+ end
+ return rl
+ end
+
+end
+
+-- Functions like kernrun, comprun etc evolved over time and in the end look rather
+-- complex. It's a bit of a compromis between extensive copying and creating subruns.
+-- The logic has been improved a lot by Kai and Ivo who use complex fonts which
+-- really helped to identify border cases on the one hand and get insight in the diverse
+-- ways fonts implement features (not always that consistent and efficient). At the same
+-- time I tried to keep the code relatively efficient so that the overhead in runtime
+-- stays acceptable.
+
+local function report_disc(what,n)
+ report_run("%s: %s > %s",what,n,languages.serializediscretionary(n))
+end
+
+local function kernrun(disc,k_run,font,attr,...)
+ --
+ -- we catch <font 1><disc font 2>
+ --
+ if trace_kernruns then
+ report_disc("kern",disc)
+ end
+ --
+ local prev, next = getboth(disc)
+ --
+ local nextstart = next
+ local done = false
+ --
+ local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
+ --
+ local prevmarks = prev
+ --
+ -- can be optional, because why on earth do we get a disc after a mark (okay, maybe when a ccmp
+ -- has happened but then it should be in the disc so basically this test indicates an error)
+ --
+ while prevmarks do
+ local char = ischar(prevmarks,font)
+ if char and marks[char] then
+ prevmarks = getprev(prevmarks)
+ else
+ break
+ end
+ end
+ --
+ if prev and not ischar(prev,font) then -- and (pre or replace)
+ prev = false
+ end
+ if next and not ischar(next,font) then -- and (post or replace)
+ next = false
+ end
+ --
+ -- we need to get rid of this nest mess some day .. has to be done otherwise
+ --
+ if pre then
+ if k_run(pre,"injections",nil,font,attr,...) then
+ done = true
+ end
+ if prev then
+ setlink(prev,pre)
+ if k_run(prevmarks,"preinjections",pre,font,attr,...) then -- or prev?
+ done = true
+ end
+ setprev(pre)
+ setlink(prev,disc)
+ end
+ end
+ --
+ if post then
+ if k_run(post,"injections",nil,font,attr,...) then
+ done = true
+ end
+ if next then
+ setlink(posttail,next)
+ if k_run(posttail,"postinjections",next,font,attr,...) then
+ done = true
+ end
+ setnext(posttail)
+ setlink(disc,next)
+ end
+ end
+ --
+ if replace then
+ if k_run(replace,"injections",nil,font,attr,...) then
+ done = true
+ end
+ if prev then
+ setlink(prev,replace)
+ if k_run(prevmarks,"replaceinjections",replace,font,attr,...) then -- getnext(replace))
+ done = true
+ end
+ setprev(replace)
+ setlink(prev,disc)
+ end
+ if next then
+ setlink(replacetail,next)
+ if k_run(replacetail,"replaceinjections",next,font,attr,...) then
+ done = true
+ end
+ setnext(replacetail)
+ setlink(disc,next)
+ end
+ elseif prev and next then
+ setlink(prev,next)
+ if k_run(prevmarks,"emptyinjections",next,font,attr,...) then
+ done = true
+ end
+ setlink(prev,disc,next)
+ end
+ if done and trace_testruns then
+ report_disc("done",disc)
+ end
+ return nextstart, done
+end
+
+-- fonts like ebgaramond do ligatures this way (less efficient than e.g. dejavu which
+-- will do the testrun variant)
+
+local function comprun(disc,c_run,...) -- vararg faster than the whole list
+ if trace_compruns then
+ report_disc("comp",disc)
+ end
+ --
+ local pre, post, replace = getdisc(disc)
+ local renewed = false
+ --
+ if pre then
+ sweepnode = disc
+ sweeptype = "pre" -- in alternative code preinjections is used (also used then for properties, saves a variable)
+ local new, done = c_run(pre,...)
+ if done then
+ pre = new
+ renewed = true
+ end
+ end
+ --
+ if post then
+ sweepnode = disc
+ sweeptype = "post"
+ local new, done = c_run(post,...)
+ if done then
+ post = new
+ renewed = true
+ end
+ end
+ --
+ if replace then
+ sweepnode = disc
+ sweeptype = "replace"
+ local new, done = c_run(replace,...)
+ if done then
+ replace = new
+ renewed = true
+ end
+ end
+ --
+ sweepnode = nil
+ sweeptype = nil
+ if renewed then
+ if trace_testruns then
+ report_disc("done",disc)
+ end
+ setdisc(disc,pre,post,replace)
+ end
+ --
+ return getnext(disc), renewed
+end
+
+-- if we can hyphenate in a lig then unlikely a lig so we
+-- could have a option here to ignore lig
+
+local function testrun(disc,t_run,c_run,...)
+ if trace_testruns then
+ report_disc("test",disc)
+ end
+ local prev, next = getboth(disc)
+ if not next then
+ -- weird discretionary
+ return
+ end
+ local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
+ local renewed = false
+ if post or replace then -- and prev then -- hm, we can start with a disc
+ if post then
+ setlink(posttail,next)
+ else
+ post = next
+ end
+ if replace then
+ setlink(replacetail,next)
+ else
+ replace = next
+ end
+ local d_post = t_run(post,next,...)
+ local d_replace = t_run(replace,next,...)
+ if d_post > 0 or d_replace > 0 then
+ local d = d_replace > d_post and d_replace or d_post
+ local head = getnext(disc) -- is: next
+ local tail = head
+ for i=2,d do -- must start at 2 according to Kai
+ local nx = getnext(tail)
+ local id = getid(nx)
+ if id == disc_code then
+ head, tail = flattendisk(head,nx)
+ elseif id == glyph_code then
+ tail = nx
+ else
+ -- we can have overrun into a glue
+ break
+ end
+ end
+ next = getnext(tail)
+ setnext(tail)
+ setprev(head)
+ local new = copy_node_list(head)
+ if posttail then
+ setlink(posttail,head)
+ else
+ post = head
+ end
+ if replacetail then
+ setlink(replacetail,new)
+ else
+ replace = new
+ end
+ else
+ -- we stay inside the disc
+ if posttail then
+ setnext(posttail)
+ else
+ post = nil
+ end
+ if replacetail then
+ setnext(replacetail)
+ else
+ replace = nil
+ end
+ end
+ setlink(disc,next)
+ -- pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
+ end
+ --
+ -- like comprun
+ --
+ if trace_testruns then
+ report_disc("more",disc)
+ end
+ --
+ if pre then
+ sweepnode = disc
+ sweeptype = "pre"
+ local new, ok = c_run(pre,...)
+ if ok then
+ pre = new
+ renewed = true
+ end
+ end
+ --
+ if post then
+ sweepnode = disc
+ sweeptype = "post"
+ local new, ok = c_run(post,...)
+ if ok then
+ post = new
+ renewed = true
+ end
+ end
+ --
+ if replace then
+ sweepnode = disc
+ sweeptype = "replace"
+ local new, ok = c_run(replace,...)
+ if ok then
+ replace = new
+ renewed = true
+ end
+ end
+ --
+ sweepnode = nil
+ sweeptype = nil
+ if renewed then
+ setdisc(disc,pre,post,replace)
+ if trace_testruns then
+ report_disc("done",disc)
+ end
+ end
+ -- next can have changed (copied list)
+ return getnext(disc), renewed
+end
+
+-- 1{2{\oldstyle\discretionary{3}{4}{5}}6}7\par
+-- 1{2\discretionary{3{\oldstyle3}}{{\oldstyle4}4}{5{\oldstyle5}5}6}7\par
+
+local nesting = 0
+
+local function c_run_single(head,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
+ local done = false
+ local sweep = sweephead[head]
+ local start
+ if sweep then
+ start = sweep
+ -- sweephead[head] = nil
+ sweephead[head] = false
+ else
+ start = head
+ end
+ while start do
+ local char, id = ischar(start,font)
+ if char then
+ local a -- happens often so no assignment is faster
+ if attr then
+ a = getglyphdata(start)
+ end
+ if not a or (a == attr) then
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ local ok
+ head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
+ if ok then
+ done = true
+ end
+ end
+ if start then
+ start = getnext(start)
+ end
+ else
+ -- go on can be a mixed one
+ start = getnext(start)
+ end
+ elseif char == false then
+ return head, done
+ elseif sweep then
+ -- else we loose the rest
+ return head, done
+ else
+ -- in disc component
+ start = getnext(start)
+ end
+ end
+ return head, done
+end
+
+-- only replace?
+
+local function t_run_single(start,stop,font,attr,lookupcache)
+ local lastd = nil
+ while start ~= stop do
+ local char = ischar(start,font)
+ if char then
+ local a -- happens often so no assignment is faster
+ if attr then
+ a = getglyphdata(start)
+ end
+ local startnext = getnext(start)
+ if not a or (a == attr) then
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then -- hm, hyphens can match (tlig) so we need to really check
+ -- if we need more than ligatures we can outline the code and use functions
+ local s = startnext
+ local ss = nil
+ local sstop = s == stop
+ if not s then
+ s = ss
+ ss = nil
+ end
+ -- a bit weird: why multiple ... anyway we can't have a disc in a disc
+ -- how about post ... we can probably merge this into the while
+ while getid(s) == disc_code do
+ ss = getnext(s)
+ s = getreplace(s)
+ if not s then
+ s = ss
+ ss = nil
+ end
+ end
+ local l = nil
+ local d = 0
+ while s do
+ local char = ischar(s,font)
+ if char then
+ local lg = lookupmatch[char]
+ if lg then
+ if sstop then
+ d = 1
+ elseif d > 0 then
+ d = d + 1
+ end
+ l = lg
+ s = getnext(s)
+ sstop = s == stop
+ if not s then
+ s = ss
+ ss = nil
+ end
+ while getid(s) == disc_code do
+ ss = getnext(s)
+ s = getreplace(s)
+ if not s then
+ s = ss
+ ss = nil
+ end
+ end
+ lookupmatch = lg
+ else
+ break
+ end
+ else
+ break
+ end
+ end
+ if l and l.ligature then -- so we test for ligature
+ lastd = d
+ end
+ -- why not: if not l then break elseif l.ligature then return d end
+ else
+ -- why not: break
+ -- no match (yet)
+ end
+ else
+ -- go on can be a mixed one
+ -- why not: break
+ end
+ if lastd then
+ return lastd
+ end
+ start = startnext
+ else
+ break
+ end
+ end
+ return 0
+end
+
+local function k_run_single(sub,injection,last,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
+ local a -- happens often so no assignment is faster
+ if attr then
+ a = getglyphdata(sub)
+ end
+ if not a or (a == attr) then
+ for n in nextnode, sub do -- only gpos
+ if n == last then
+ break
+ end
+ local char = ischar(n,font)
+ if char then
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ local h, d, ok = handler(sub,n,dataset,sequence,lookupmatch,rlmode,skiphash,step,injection)
+ if ok then
+ return true
+ end
+ end
+ end
+ end
+ end
+end
+
+local function c_run_multiple(head,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
+ local done = false
+ local sweep = sweephead[head]
+ local start
+ if sweep then
+ start = sweep
+ -- sweephead[head] = nil
+ sweephead[head] = false
+ else
+ start = head
+ end
+ while start do
+ local char = ischar(start,font)
+ if char then
+ local a -- happens often so no assignment is faster
+ if attr then
+ a = getglyphdata(start)
+ end
+ if not a or (a == attr) then
+ for i=1,nofsteps do
+ local step = steps[i]
+ local lookupcache = step.coverage
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ -- we could move all code inline but that makes things even more unreadable
+ local ok
+ head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
+ if ok then
+ done = true
+ break
+ elseif not start then
+ -- don't ask why ... shouldn't happen
+ break
+ end
+ end
+ end
+ if start then
+ start = getnext(start)
+ end
+ else
+ -- go on can be a mixed one
+ start = getnext(start)
+ end
+ elseif char == false then
+ -- whatever glyph
+ return head, done
+ elseif sweep then
+ -- else we loose the rest
+ return head, done
+ else
+ -- in disc component
+ start = getnext(start)
+ end
+ end
+ return head, done
+end
+
+local function t_run_multiple(start,stop,font,attr,steps,nofsteps)
+ local lastd = nil
+ while start ~= stop do
+ local char = ischar(start,font)
+ if char then
+ local a -- happens often so no assignment is faster
+ if attr then
+ a = getglyphdata(start)
+ end
+ local startnext = getnext(start)
+ if not a or (a == attr) then
+ for i=1,nofsteps do
+ local step = steps[i]
+ local lookupcache = step.coverage
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ -- if we need more than ligatures we can outline the code and use functions
+ local s = startnext
+ local ss = nil
+ local sstop = s == stop
+ if not s then
+ s = ss
+ ss = nil
+ end
+ while getid(s) == disc_code do
+ ss = getnext(s)
+ s = getreplace(s)
+ if not s then
+ s = ss
+ ss = nil
+ end
+ end
+ local l = nil
+ local d = 0
+ while s do
+ local char = ischar(s)
+ if char then
+ local lg = lookupmatch[char]
+ if lg then
+ if sstop then
+ d = 1
+ elseif d > 0 then
+ d = d + 1
+ end
+ l = lg
+ s = getnext(s)
+ sstop = s == stop
+ if not s then
+ s = ss
+ ss = nil
+ end
+ while getid(s) == disc_code do
+ ss = getnext(s)
+ s = getreplace(s)
+ if not s then
+ s = ss
+ ss = nil
+ end
+ end
+ lookupmatch = lg
+ else
+ break
+ end
+ else
+ break
+ end
+ end
+ if l and l.ligature then
+ lastd = d
+ end
+ end
+ end
+ else
+ -- go on can be a mixed one
+ end
+ if lastd then
+ return lastd
+ end
+ start = startnext
+ else
+ break
+ end
+ end
+ return 0
+end
+
+local function k_run_multiple(sub,injection,last,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
+ local a -- happens often so no assignment is faster
+ if attr then
+ a = getglyphdata(sub)
+ end
+ if not a or (a == attr) then
+ for n in nextnode, sub do -- only gpos
+ if n == last then
+ break
+ end
+ local char = ischar(n)
+ if char then
+ for i=1,nofsteps do
+ local step = steps[i]
+ local lookupcache = step.coverage
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ local h, d, ok = handler(sub,n,dataset,sequence,lookupmatch,rlmode,skiphash,step,injection) -- sub was head
+ if ok then
+ return true
+ end
+ end
+ end
+ end
+ end
+ end
+end
+
+local txtdirstate, pardirstate do -- this might change (no need for nxt in pardirstate)
+
+ local getdirection = nuts.getdirection
+
+ txtdirstate = function(start,stack,top,rlparmode)
+ local dir, pop = getdirection(start)
+ if pop then
+ if top == 1 then
+ return 0, rlparmode
+ else
+ top = top - 1
+ if stack[top] == righttoleft_code then
+ return top, -1
+ else
+ return top, 1
+ end
+ end
+ elseif dir == lefttoright_code then
+ top = top + 1
+ stack[top] = lefttoright_code
+ return top, 1
+ elseif dir == righttoleft_code then
+ top = top + 1
+ stack[top] = righttoleft_code
+ return top, -1
+ else
+ return top, rlparmode
+ end
+ end
+
+ pardirstate = function(start)
+ local dir = getdirection(start)
+ if dir == lefttoright_code then
+ return 1, 1
+ elseif dir == righttoleft_code then
+ return -1, -1
+ else
+ return 0, 0
+ end
+ end
+
+end
+
+-- These are non public helpers that can change without notice!
+
+otf.helpers = otf.helpers or { }
+otf.helpers.txtdirstate = txtdirstate
+otf.helpers.pardirstate = pardirstate
+
+-- This is the main loop. We run over the node list dealing with a specific font. The
+-- attribute is a context specific thing. We could work on sub start-stop ranges instead
+-- but I wonder if there is that much speed gain (experiments showed that it made not
+-- much sense) and we need to keep track of directions anyway. Also at some point I
+-- want to play with font interactions and then we do need the full sweeps. Apart from
+-- optimizations the principles of processing the features hasn't changed much since
+-- the beginning.
+
+do
+
+ -- This is a measurable experimental speedup (only with hyphenated text and multiple
+ -- fonts per processor call), especially for fonts with lots of contextual lookups.
+
+ local fastdisc = true
+ local testdics = false
+
+ directives.register("otf.fastdisc",function(v) fastdisc = v end) -- normally enabled
+
+ -- using a merged combined hash as first test saves some 30% on ebgaramond and
+ -- about 15% on arabtype .. then moving the a test also saves a bit (even when
+ -- often a is not set at all so that one is a bit debatable
+
+ local otfdataset = nil -- todo: make an installer
+
+ local getfastdisc = { __index = function(t,k)
+ local v = usesfont(k,currentfont)
+ t[k] = v
+ return v
+ end }
+
+ local getfastspace = { __index = function(t,k)
+ -- we don't pass the id so that one can overload isspace
+ local v = isspace(k,threshold) or false
+ t[k] = v
+ return v
+ end }
+
+ function otf.featuresprocessor(head,font,attr,direction,n)
+
+ local sequences = sequencelists[font] -- temp hack
+
+ nesting = nesting + 1
+
+ if nesting == 1 then
+ currentfont = font
+ tfmdata = fontdata[font]
+ descriptions = tfmdata.descriptions -- only needed in gref so we could pass node there instead
+ characters = tfmdata.characters -- but this branch is not entered that often anyway
+ local resources = tfmdata.resources
+ marks = resources.marks
+ classes = resources.classes
+ threshold,
+ factor = getthreshold(font)
+ checkmarks = tfmdata.properties.checkmarks
+
+ if not otfdataset then
+ otfdataset = otf.dataset
+ end
+
+ discs = fastdisc and n and n > 1 and setmetatable({},getfastdisc) -- maybe inline
+ spaces = setmetatable({},getfastspace)
+
+ elseif currentfont ~= font then
+
+ report_warning("nested call with a different font, level %s, quitting",nesting)
+ nesting = nesting - 1
+ return head, false
+
+ end
+
+ -- some 10% faster when no dynamics but hardly measureable on real runs .. but: it only
+ -- works when we have no other dynamics as otherwise the zero run will be applied to the
+ -- whole stream for which we then need to pass another variable which we won't
+
+ -- if attr == 0 then
+ -- attr = false
+ -- end
+
+ if trace_steps then
+ checkstep(head)
+ end
+
+ local initialrl = 0
+
+ if getid(head) == par_code and start_of_par(head) then
+ initialrl = pardirstate(head)
+ elseif direction == righttoleft_code then
+ initialrl = -1
+ end
+
+ -- local done = false
+ local datasets = otfdataset(tfmdata,font,attr)
+ local dirstack = { nil } -- could move outside function but we can have local runs
+ sweephead = { }
+ -- sweephead = { a = 1, b = 1 } sweephead.a = nil sweephead.b = nil
+
+ -- Keeping track of the headnode is needed for devanagari. (I generalized it a bit
+ -- so that multiple cases are also covered.) We could prepend a temp node.
+
+ -- We don't goto the next node when a disc node is created so that we can then treat
+ -- the pre, post and replace. It's a bit of a hack but works out ok for most cases.
+
+ for s=1,#datasets do
+ local dataset = datasets[s]
+ local attribute = dataset[2]
+ local sequence = dataset[3] -- sequences[s] -- also dataset[5]
+ local rlparmode = initialrl
+ local topstack = 0
+ local typ = sequence.type
+ local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset
+ local forcetestrun = typ == "gsub_ligature" -- testrun is only for ligatures
+ local handler = handlers[typ] -- store in dataset
+ local steps = sequence.steps
+ local nofsteps = sequence.nofsteps
+ local skiphash = sequence.skiphash
+
+ if not steps then
+ -- This permits injection, watch the different arguments. Watch out, the arguments passed
+ -- are not frozen as we might extend or change this. Is this used at all apart from some
+ -- experiments?
+ local h, ok = handler(head,dataset,sequence,initialrl,font,attr) -- less arguments now
+ -- if ok then
+ -- done = true
+ -- end
+ if h and h ~= head then
+ head = h
+ end
+ elseif typ == "gsub_reversecontextchain" then
+ --
+ -- This might need a check: if we have #before or #after > 0 then we might need to reverse
+ -- the before and after lists in the loader. But first I need to see a font that uses multiple
+ -- matches.
+ --
+ local start = find_node_tail(head)
+ local rlmode = 0 -- how important is this .. do we need to check for dir?
+ local merged = steps.merged
+ while start do
+ local char = ischar(start,font)
+ if char then
+ local m = merged[char]
+ if m then
+ local a -- happens often so no assignment is faster
+ if attr then
+ a = getglyphdata(start)
+ end
+ if not a or (a == attr) then
+ for i=m[1],m[2] do
+ local step = steps[i]
+ -- for i=1,#m do
+ -- local step = m[i]
+ local lookupcache = step.coverage
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ local ok
+ head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
+ if ok then
+ -- done = true
+ break
+ end
+ end
+ end
+ if start then
+ start = getprev(start)
+ end
+ else
+ start = getprev(start)
+ end
+ else
+ start = getprev(start)
+ end
+ else
+ start = getprev(start)
+ end
+ end
+ else
+ local start = head
+ local rlmode = initialrl
+ if nofsteps == 1 then -- happens often
+ local step = steps[1]
+ local lookupcache = step.coverage
+ while start do
+ -- local char, id = ischar(start,font)
+ local nxt, char, id = isnextchar(start,font)
+ if char then
+ if skiphash and skiphash[char] then -- we never needed it here but let's try
+ -- start = getnext(start)
+ start = nxt
+ else
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ local a -- happens often so no assignment is faster
+ if attr then
+ if getglyphdata(start) == attr and (not attribute or getstate(start,attribute)) then
+ a = true
+ end
+ elseif not attribute or getstate(start,attribute) then
+ a = true
+ end
+ if a then
+ local ok, df
+ head, start, ok, df = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
+ -- if ok then
+ -- done = true
+ -- end
+ if df then
+ -- print("restart 1",typ)
+ elseif start then
+ start = getnext(start) -- can be a new start
+ end
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ end
+ elseif char == false or id == glue_code then
+ -- a different font|state or glue (happens often)
+ -- start = getnext(start)
+ start = nxt
+ elseif id == disc_code then
+ if not discs or discs[start] == true then
+ local ok
+ if gpossing then
+ start, ok = kernrun(start,k_run_single, font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
+ elseif forcetestrun then
+ start, ok = testrun(start,t_run_single,c_run_single,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
+ else
+ start, ok = comprun(start,c_run_single, font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
+ end
+ -- if ok then
+ -- done = true
+ -- end
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ elseif id == math_code then
+ start = getnext(end_of_math(start))
+ elseif id == dir_code then
+ topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
+ -- start = getnext(start)
+ start = nxt
+ -- elseif id == par_code and start_of_par(start) then
+ -- rlparmode, rlmode = pardirstate(start)
+ -- -- start = getnext(start)
+ -- start = nxt
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ end
+ else
+ local merged = steps.merged
+ while start do
+ -- local char, id = ischar(start,font)
+ local nxt, char, id = isnextchar(start,font)
+ if char then
+ if skiphash and skiphash[char] then -- we never needed it here but let's try
+ -- start = getnext(start)
+ start = nxt
+ else
+ local m = merged[char]
+ if m then
+ local a -- happens often so no assignment is faster
+ if attr then
+ if getglyphdata(start) == attr and (not attribute or getstate(start,attribute)) then
+ a = true
+ end
+ elseif not attribute or getstate(start,attribute) then
+ a = true
+ end
+ if a then
+ local ok, df
+ for i=m[1],m[2] do
+ local step = steps[i]
+ -- for i=1,#m do
+ -- local step = m[i]
+ local lookupcache = step.coverage
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ -- we could move all code inline but that makes things even more unreadable
+-- local ok, df
+ head, start, ok, df = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
+ if df then
+ break
+ elseif ok then
+ -- done = true
+ break
+ elseif not start then
+ -- don't ask why ... shouldn't happen
+ break
+ end
+ end
+ end
+ if df then
+ -- print("restart 2",typ)
+ elseif start then
+ start = getnext(start) -- can be a new next
+ end
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ end
+ elseif char == false or id == glue_code then
+ -- a different font|state or glue (happens often)
+ -- start = getnext(start)
+ start = nxt
+ elseif id == disc_code then
+ if not discs or discs[start] == true then
+ local ok
+ if gpossing then
+ start, ok = kernrun(start,k_run_multiple, font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
+ elseif forcetestrun then
+ start, ok = testrun(start,t_run_multiple,c_run_multiple,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
+ else
+ start, ok = comprun(start,c_run_multiple, font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
+ end
+ -- if ok then
+ -- done = true
+ -- end
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ elseif id == math_code then
+ start = getnext(end_of_math(start))
+ elseif id == dir_code then
+ topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
+ -- start = getnext(start)
+ start = nxt
+ -- elseif id == par_code and start_of_par(start) then
+ -- rlparmode, rlmode = pardirstate(start)
+ -- start = getnext(start)
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ end
+ end
+ end
+
+ if trace_steps then -- ?
+ registerstep(head)
+ end
+
+ end
+
+ nesting = nesting - 1
+
+ -- return head, done
+ return head
+ end
+
+ -- This is not an official helper and used for tracing experiments. It can be changed as I like
+ -- at any moment. At some point it might be used in a module that can help font development.
+
+ function otf.datasetpositionprocessor(head,font,direction,dataset)
+
+ currentfont = font
+ tfmdata = fontdata[font]
+ descriptions = tfmdata.descriptions -- only needed in gref so we could pass node there instead
+ characters = tfmdata.characters -- but this branch is not entered that often anyway
+ local resources = tfmdata.resources
+ marks = resources.marks
+ classes = resources.classes
+ threshold,
+ factor = getthreshold(font)
+ checkmarks = tfmdata.properties.checkmarks
+
+ if type(dataset) == "number" then
+ dataset = otfdataset(tfmdata,font,0)[dataset]
+ end
+
+ local sequence = dataset[3] -- sequences[s] -- also dataset[5]
+ local typ = sequence.type
+ -- local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset
+
+ -- gpos_contextchain gpos_context
+
+ -- if not gpossing then
+ -- return head, false
+ -- end
+
+ local handler = handlers[typ] -- store in dataset
+ local steps = sequence.steps
+ local nofsteps = sequence.nofsteps
+
+ local done = false
+ local dirstack = { nil } -- could move outside function but we can have local runs (maybe a few more nils)
+ local start = head
+ local initialrl = (direction == righttoleft_code) and -1 or 0
+ local rlmode = initialrl
+ local rlparmode = initialrl
+ local topstack = 0
+ local merged = steps.merged
+
+ -- local matches = false
+ local position = 0
+
+ while start do
+ -- local char, id = ischar(start,font)
+ local nxt, char, id = isnextchar(start,font)
+ if char then
+ position = position + 1
+ local m = merged[char]
+ if m then
+ if skiphash and skiphash[char] then -- we never needed it here but let's try
+ -- start = getnext(start)
+ start = nxt
+ else
+ for i=m[1],m[2] do
+ local step = steps[i]
+ local lookupcache = step.coverage
+ local lookupmatch = lookupcache[char]
+ if lookupmatch then
+ local ok
+ head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
+ if ok then
+ -- if matches then
+ -- matches[position] = i
+ -- else
+ -- matches = { [position] = i }
+ -- end
+ break
+ elseif not start then
+ break
+ end
+ end
+ end
+ if start then
+ start = getnext(start) -- start can be new
+ end
+ end
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ elseif char == false or id == glue_code then
+ -- a different font|state or glue (happens often)
+ -- start = getnext(start)
+ start = nxt
+ elseif id == math_code then
+ start = getnext(end_of_math(start))
+ elseif id == dir_code then
+ topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
+ -- start = getnext(start)
+ start = nxt
+ -- elseif id == par_code and start_of_par(start) then
+ -- rlparmode, rlmode = pardirstate(start)
+ -- -- start = getnext(start)
+ -- start = nxt
+ else
+ -- start = getnext(start)
+ start = nxt
+ end
+ end
+
+ return head
+ end
+
+ -- end of experiment
+
+end
+
+-- so far
+
+local plugins = { }
+otf.plugins = plugins
+
+local report = logs.reporter("fonts")
+
+function otf.registerplugin(name,f)
+ if type(name) == "string" and type(f) == "function" then
+ plugins[name] = { name, f }
+ report()
+ report("plugin %a has been loaded, please be aware of possible side effects",name)
+ report()
+ if logs.pushtarget then
+ logs.pushtarget("log")
+ end
+ report("Plugins are not officially supported unless stated otherwise. This is because")
+ report("they bypass the regular font handling and therefore some features in ConTeXt")
+ report("(especially those related to fonts) might not work as expected or might not work")
+ report("at all. Some plugins are for testing and development only and might change")
+ report("whenever we feel the need for it.")
+ report()
+ if logs.poptarget then
+ logs.poptarget()
+ end
+ end
+end
+
+function otf.plugininitializer(tfmdata,value)
+ if type(value) == "string" then
+ tfmdata.shared.plugin = plugins[value]
+ end
+end
+
+function otf.pluginprocessor(head,font,attr,direction) -- n
+ local s = fontdata[font].shared
+ local p = s and s.plugin
+ if p then
+ if trace_plugins then
+ report_process("applying plugin %a",p[1])
+ end
+ return p[2](head,font,attr,direction)
+ else
+ return head, false
+ end
+end
+
+function otf.featuresinitializer(tfmdata,value)
+ -- nothing done here any more
+end
+
+registerotffeature {
+ name = "features",
+ description = "features",
+ default = true,
+ initializers = {
+ position = 1,
+ node = otf.featuresinitializer,
+ plug = otf.plugininitializer,
+ },
+ processors = {
+ node = otf.featuresprocessor,
+ plug = otf.pluginprocessor,
+ }
+}
+
+-- Moved here (up) a bit. This doesn't really belong in generic so it will
+-- move to a context module some day.
+
+local function markinitializer(tfmdata,value)
+ local properties = tfmdata.properties
+ properties.checkmarks = value
+end
+
+registerotffeature {
+ name = "checkmarks",
+ description = "check mark widths",
+ default = true,
+ initializers = {
+ node = markinitializer,
+ },
+}
+
+-- This can be used for extra handlers, but should be used with care! We implement one
+-- here but some more can be found in the osd (script devanagary) file. Now watch out:
+-- when a handler has steps, it is called as the other ones, but when we have no steps,
+-- we use a different call:
+--
+-- function(head,dataset,sequence,initialrl,font,attr)
+-- return head, done
+-- end
+--
+-- Also see (!!).
+
+otf.handlers = handlers
+
+if context then
+ return
+else
+ -- todo: move the following code someplace else
+end
+
+local setspacekerns = nodes.injections.setspacekerns if not setspacekerns then os.exit() end
+
+local tag = "kern"
+
+-- if fontfeatures then
+
+-- function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr)
+-- local features = fontfeatures[font]
+-- local enabled = features and features.spacekern and features[tag]
+-- if enabled then
+-- setspacekerns(font,sequence)
+-- end
+-- return head, enabled
+-- end
+
+-- else -- generic (no hashes)
+
+ function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr)
+ local shared = fontdata[font].shared
+ local features = shared and shared.features
+ local enabled = features and features.spacekern and features[tag]
+ if enabled then
+ setspacekerns(font,sequence)
+ end
+ return head, enabled
+ end
+
+-- end
+
+-- There are fonts out there that change the space but we don't do that kind of
+-- things in TeX.
+
+local function hasspacekerns(data)
+ local resources = data.resources
+ local sequences = resources.sequences
+ local validgpos = resources.features.gpos
+ if validgpos and sequences then
+ for i=1,#sequences do
+ local sequence = sequences[i]
+ local steps = sequence.steps
+ if steps and sequence.features[tag] then
+ local kind = sequence.type
+ if kind == "gpos_pair" or kind == "gpos_single" then
+ for i=1,#steps do
+ local step = steps[i]
+ local coverage = step.coverage
+ local rules = step.rules
+ if rules then
+ -- not now: analyze (simple) rules
+ elseif not coverage then
+ -- nothing to do
+ elseif kind == "gpos_single" then
+ -- maybe a message that we ignore
+ elseif kind == "gpos_pair" then
+ local format = step.format
+ if format == "move" or format == "kern" then
+ local kerns = coverage[32]
+ if kerns then
+ return true
+ end
+ for k, v in next, coverage do
+ if v[32] then
+ return true
+ end
+ end
+ elseif format == "pair" then
+ local kerns = coverage[32]
+ if kerns then
+ for k, v in next, kerns do
+ local one = v[1]
+ if one and one ~= true then
+ return true
+ end
+ end
+ end
+ for k, v in next, coverage do
+ local kern = v[32]
+ if kern then
+ local one = kern[1]
+ if one and one ~= true then
+ return true
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ end
+ return false
+end
+
+otf.readers.registerextender {
+ name = "spacekerns",
+ action = function(data)
+ data.properties.hasspacekerns = hasspacekerns(data)
+ end
+}
+
+local function spaceinitializer(tfmdata,value) -- attr
+ local resources = tfmdata.resources
+ local spacekerns = resources and resources.spacekerns
+ if value and spacekerns == nil then
+ local rawdata = tfmdata.shared and tfmdata.shared.rawdata
+ local properties = rawdata.properties
+ if properties and properties.hasspacekerns then
+ local sequences = resources.sequences
+ local validgpos = resources.features.gpos
+ if validgpos and sequences then
+ local left = { }
+ local right = { }
+ local last = 0
+ local feat = nil
+ for i=1,#sequences do
+ local sequence = sequences[i]
+ local steps = sequence.steps
+ if steps then
+ -- we don't support space kerns in other features
+ local kern = sequence.features[tag]
+ if kern then
+ local kind = sequence.type
+ if kind == "gpos_pair" or kind == "gpos_single" then
+ if feat then
+ for script, languages in next, kern do
+ local f = feat[script]
+ if f then
+ for l in next, languages do
+ f[l] = true
+ end
+ else
+ feat[script] = languages
+ end
+ end
+ else
+ feat = kern
+ end
+ for i=1,#steps do
+ local step = steps[i]
+ local coverage = step.coverage
+ local rules = step.rules
+ if rules then
+ -- not now: analyze (simple) rules
+ elseif not coverage then
+ -- nothing to do
+ elseif kind == "gpos_single" then
+ -- makes no sense in TeX
+ elseif kind == "gpos_pair" then
+ local format = step.format
+ if format == "move" or format == "kern" then
+ local kerns = coverage[32]
+ if kerns then
+ for k, v in next, kerns do
+ right[k] = v
+ end
+ end
+ for k, v in next, coverage do
+ local kern = v[32]
+ if kern then
+ left[k] = kern
+ end
+ end
+ elseif format == "pair" then
+ local kerns = coverage[32]
+ if kerns then
+ for k, v in next, kerns do
+ local one = v[1]
+ if one and one ~= true then
+ right[k] = one[3]
+ end
+ end
+ end
+ for k, v in next, coverage do
+ local kern = v[32]
+ if kern then
+ local one = kern[1]
+ if one and one ~= true then
+ left[k] = one[3]
+ end
+ end
+ end
+ end
+ end
+ end
+ last = i
+ end
+ else
+ -- no steps ... needed for old one ... we could use the basekerns
+ -- instead
+ end
+ end
+ end
+ left = next(left) and left or false
+ right = next(right) and right or false
+ if left or right then
+ spacekerns = {
+ left = left,
+ right = right,
+ }
+ if last > 0 then
+ local triggersequence = {
+ -- no steps, see (!!)
+ features = { [tag] = feat or { dflt = { dflt = true, } } },
+ flags = noflags,
+ name = "trigger_space_kerns",
+ order = { tag },
+ type = "trigger_space_kerns",
+ left = left,
+ right = right,
+ }
+ insert(sequences,last,triggersequence)
+ end
+ end
+ end
+ end
+ resources.spacekerns = spacekerns
+ end
+ return spacekerns
+end
+
+registerotffeature {
+ name = "spacekern",
+ description = "space kern injection",
+ default = true,
+ initializers = {
+ node = spaceinitializer,
+ },
+}