if not modules then modules = { } end modules ['font-ots'] = { -- sequences
version = 1.001,
comment = "companion to font-ini.mkiv",
author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
copyright = "PRAGMA ADE / ConTeXt Development Team",
license = "see context related readme files",
}
--[[ldx--
This module is a bit more split up that I'd like but since we also want to test
with plain it has to be so. This module is part of
and discussion about improvements and functionality mostly happens on the
mailing list.
The specification of OpenType is (or at least a decade ago was) kind of vague.
Apart from a lack of a proper free specifications there's also the problem that
Microsoft and Adobe may have their own interpretation of how and in what order to
apply features. In general the Microsoft website has more detailed specifications
and is a better reference. There is also some information in the FontForge help
files. In the end we rely most on the Microsoft specification.
Because there is so much possible, fonts might contain bugs and/or be made to
work with certain rederers. These may evolve over time which may have the side
effect that suddenly fonts behave differently. We don't want to catch all font
issues.
After a lot of experiments (mostly by Taco, me and Idris) the first implementation
becaus quite useful. When it did most of what we wanted, a more optimized version
evolved. Of course all errors are mine and of course the code can be improved. There
are quite some optimizations going on here and processing speed is currently quite
acceptable and has been improved over time. Many complex scripts are not yet supported
yet, but I will look into them as soon as users ask for it.
The specification leaves room for interpretation. In case of doubt the Microsoft
implementation is the reference as it is the most complete one. As they deal with
lots of scripts and fonts, Kai and Ivo did a lot of testing of the generic code and
their suggestions help improve the code. I'm aware that not all border cases can be
taken care of, unless we accept excessive runtime, and even then the interference
with other mechanisms (like hyphenation) are not trivial.
Especially discretionary handling has been improved much by Kai Eigner who uses complex
(latin) fonts. The current implementation is a compromis between his patches and my code
and in the meantime performance is quite ok. We cannot check all border cases without
compromising speed but so far we're okay. Given good test cases we can probably improve
it here and there. Especially chain lookups are non trivial with discretionaries but
things got much better over time thanks to Kai.
Glyphs are indexed not by unicode but in their own way. This is because there is no
relationship with unicode at all, apart from the fact that a font might cover certain
ranges of characters. One character can have multiple shapes. However, at the
end we use unicode so and all extra glyphs are mapped into a private
space. This is needed because we need to access them and has to include
then in the output eventually.
The initial data table is rather close to the open type specification and also not
that different from the one produced by but we uses hashes instead.
In that table is packed (similar tables are shared) and cached on disk
so that successive runs can use the optimized table (after loading the table is
unpacked).
This module is sparsely documented because it is has been a moving target. The
table format of the reader changed a bit over time and we experiment a lot with
different methods for supporting features. By now the structures are quite stable
Incrementing the version number will force a re-cache. We jump the number by one
when there's a fix in the reader or processing code that can result in different
results.
This code is also used outside context but in context it has to work with other
mechanisms. Both put some constraints on the code here.
--ldx]]--
-- Remark: We assume that cursives don't cross discretionaries which is okay because it
-- is only used in semitic scripts.
--
-- Remark: We assume that marks precede base characters.
--
-- Remark: When complex ligatures extend into discs nodes we can get side effects. Normally
-- this doesn't happen; ff\d{l}{l}{l} in lm works but ff\d{f}{f}{f}.
--
-- Todo: check if we copy attributes to disc nodes if needed.
--
-- Todo: it would be nice if we could get rid of components. In other places we can use
-- the unicode properties.
--
-- Remark: We do some disc juggling where we need to keep in mind that the pre, post and
-- replace fields can have prev pointers to a nesting node ... I wonder if that is still
-- needed.
--
-- Remark: This is not possible:
--
-- \discretionary {alpha-} {betagammadelta}
-- {\discretionary {alphabeta-} {gammadelta}
-- {\discretionary {alphabetagamma-} {delta}
-- {alphabetagammadelta}}}
--
-- Remark: Something is messed up: we have two mark / ligature indices, one at the
-- injection end and one here ... this is based on KE's patches but there is something
-- fishy there as I'm pretty sure that for husayni we need some connection (as it's much
-- more complex than an average font) but I need proper examples of all cases, not of
-- only some.
--
-- Remark: I wonder if indexed would be faster than unicoded. It would be a major
-- rewrite to have char being unicode + an index field in glyph nodes. Also more
-- assignments have to be made in order to keep things in sync. So, it's a no-go.
--
-- Remark: We can provide a fast loop when there are no disc nodes (tests show a 1%
-- gain). Smaller functions might perform better cache-wise. But ... memory becomes
-- faster anyway, so ...
--
-- Remark: Some optimizations made sense for 5.2 but seem less important for 5.3 but
-- anyway served their purpose.
--
-- Todo: just (0=l2r and 1=r2l) or maybe (r2l = true)
local type, next, tonumber = type, next, tonumber
local random = math.random
local formatters = string.formatters
local insert = table.insert
local registertracker = trackers.register
local logs = logs
local trackers = trackers
local nodes = nodes
local attributes = attributes
local fonts = fonts
local otf = fonts.handlers.otf
local tracers = nodes.tracers
local trace_singles = false registertracker("otf.singles", function(v) trace_singles = v end)
local trace_multiples = false registertracker("otf.multiples", function(v) trace_multiples = v end)
local trace_alternatives = false registertracker("otf.alternatives", function(v) trace_alternatives = v end)
local trace_ligatures = false registertracker("otf.ligatures", function(v) trace_ligatures = v end)
local trace_contexts = false registertracker("otf.contexts", function(v) trace_contexts = v end)
local trace_marks = false registertracker("otf.marks", function(v) trace_marks = v end)
local trace_kerns = false registertracker("otf.kerns", function(v) trace_kerns = v end)
local trace_cursive = false registertracker("otf.cursive", function(v) trace_cursive = v end)
local trace_preparing = false registertracker("otf.preparing", function(v) trace_preparing = v end)
local trace_bugs = false registertracker("otf.bugs", function(v) trace_bugs = v end)
local trace_details = false registertracker("otf.details", function(v) trace_details = v end)
local trace_steps = false registertracker("otf.steps", function(v) trace_steps = v end)
local trace_skips = false registertracker("otf.skips", function(v) trace_skips = v end)
local trace_plugins = false registertracker("otf.plugins", function(v) trace_plugins = v end)
local trace_chains = false registertracker("otf.chains", function(v) trace_chains = v end)
local trace_kernruns = false registertracker("otf.kernruns", function(v) trace_kernruns = v end)
----- trace_discruns = false registertracker("otf.discruns", function(v) trace_discruns = v end)
local trace_compruns = false registertracker("otf.compruns", function(v) trace_compruns = v end)
local trace_testruns = false registertracker("otf.testruns", function(v) trace_testruns = v end)
local forcediscretionaries = false
local forcepairadvance = false -- for testing
directives.register("otf.forcediscretionaries",function(v)
forcediscretionaries = v
end)
directives.register("otf.forcepairadvance",function(v)
forcepairadvance = v
end)
local report_direct = logs.reporter("fonts","otf direct")
local report_subchain = logs.reporter("fonts","otf subchain")
local report_chain = logs.reporter("fonts","otf chain")
local report_process = logs.reporter("fonts","otf process")
local report_warning = logs.reporter("fonts","otf warning")
local report_run = logs.reporter("fonts","otf run")
registertracker("otf.substitutions", "otf.singles","otf.multiples","otf.alternatives","otf.ligatures")
registertracker("otf.positions", "otf.marks","otf.kerns","otf.cursive")
registertracker("otf.actions", "otf.substitutions","otf.positions")
registertracker("otf.sample", "otf.steps","otf.substitutions","otf.positions","otf.analyzing")
registertracker("otf.sample.silent", "otf.steps=silent","otf.substitutions","otf.positions","otf.analyzing")
local nuts = nodes.nuts
local tonode = nuts.tonode
local tonut = nuts.tonut
local getfield = nuts.getfield
local getnext = nuts.getnext
local setnext = nuts.setnext
local getprev = nuts.getprev
local setprev = nuts.setprev
local getboth = nuts.getboth
local setboth = nuts.setboth
local getid = nuts.getid
local getattr = nuts.getattr
local setattr = nuts.setattr
local getprop = nuts.getprop
local setprop = nuts.setprop
local getfont = nuts.getfont
local getsubtype = nuts.getsubtype
local setsubtype = nuts.setsubtype
local getchar = nuts.getchar
local setchar = nuts.setchar
local getdisc = nuts.getdisc
local setdisc = nuts.setdisc
local setlink = nuts.setlink
local getcomponents = nuts.getcomponents -- the original one, not yet node-aux
local setcomponents = nuts.setcomponents -- the original one, not yet node-aux
local getdir = nuts.getdir
local getwidth = nuts.getwidth
local ischar = nuts.is_char
local usesfont = nuts.uses_font
local insert_node_after = nuts.insert_after
local copy_node = nuts.copy
local copy_node_list = nuts.copy_list
local remove_node = nuts.remove
local find_node_tail = nuts.tail
local flush_node_list = nuts.flush_list
local flush_node = nuts.flush_node
local end_of_math = nuts.end_of_math
local traverse_nodes = nuts.traverse
----- traverse_id = nuts.traverse_id
local set_components = nuts.set_components
local take_components = nuts.take_components
local count_components = nuts.count_components
local copy_no_components = nuts.copy_no_components
local copy_only_glyphs = nuts.copy_only_glyphs
local setmetatable = setmetatable
local setmetatableindex = table.setmetatableindex
----- zwnj = 0x200C
----- zwj = 0x200D
local nodecodes = nodes.nodecodes
local glyphcodes = nodes.glyphcodes
local disccodes = nodes.disccodes
local glyph_code = nodecodes.glyph
local glue_code = nodecodes.glue
local disc_code = nodecodes.disc
local math_code = nodecodes.math
local dir_code = nodecodes.dir
local localpar_code = nodecodes.localpar
local discretionary_code = disccodes.discretionary
local ligature_code = glyphcodes.ligature
local a_state = attributes.private('state')
local a_noligature = attributes.private("noligature")
local injections = nodes.injections
local setmark = injections.setmark
local setcursive = injections.setcursive
local setkern = injections.setkern
local setmove = injections.setmove
local setposition = injections.setposition
local resetinjection = injections.reset
local copyinjection = injections.copy
local setligaindex = injections.setligaindex
local getligaindex = injections.getligaindex
local fontdata = fonts.hashes.identifiers
local fontfeatures = fonts.hashes.features
local otffeatures = fonts.constructors.features.otf
local registerotffeature = otffeatures.register
local onetimemessage = fonts.loggers.onetimemessage or function() end
local getrandom = utilities and utilities.randomizer and utilities.randomizer.get
otf.defaultnodealternate = "none" -- first last
-- We use a few semi-global variables. The handler can be called nested but this assumes
-- that the same font is used.
local tfmdata = false
local characters = false
local descriptions = false
local marks = false
local classes = false
local currentfont = false
local factor = 0
local threshold = 0
local checkmarks = false
local discs = false
local spaces = false
local sweepnode = nil
local sweephead = { } -- we don't nil entries but false them (no collection and such)
local notmatchpre = { } -- to be checked: can we use false instead of nil / what if a == b tests
local notmatchpost = { } -- to be checked: can we use false instead of nil / what if a == b tests
local notmatchreplace = { } -- to be checked: can we use false instead of nil / what if a == b tests
local handlers = { }
local isspace = injections.isspace
local getthreshold = injections.getthreshold
local checkstep = (tracers and tracers.steppers.check) or function() end
local registerstep = (tracers and tracers.steppers.register) or function() end
local registermessage = (tracers and tracers.steppers.message) or function() end
-- local function checkdisccontent(d)
-- local pre, post, replace = getdisc(d)
-- if pre then for n in traverse_id(glue_code,pre) do report("pre: %s",nodes.idstostring(pre)) break end end
-- if post then for n in traverse_id(glue_code,post) do report("pos: %s",nodes.idstostring(post)) break end end
-- if replace then for n in traverse_id(glue_code,replace) do report("rep: %s",nodes.idstostring(replace)) break end end
-- end
local function logprocess(...)
if trace_steps then
registermessage(...)
if trace_steps == "silent" then
return
end
end
report_direct(...)
end
local function logwarning(...)
report_direct(...)
end
local gref do
local f_unicode = formatters["U+%X"] -- was ["%U"]
local f_uniname = formatters["U+%X (%s)"] -- was ["%U (%s)"]
local f_unilist = formatters["% t"]
gref = function(n) -- currently the same as in font-otb
if type(n) == "number" then
local description = descriptions[n]
local name = description and description.name
if name then
return f_uniname(n,name)
else
return f_unicode(n)
end
elseif n then
local t = { }
for i=1,#n do
local ni = n[i]
if tonumber(ni) then -- later we will start at 2
local di = descriptions[ni]
local nn = di and di.name
if nn then
t[#t+1] = f_uniname(ni,nn)
else
t[#t+1] = f_unicode(ni)
end
end
end
return f_unilist(t)
else
return ""
end
end
end
local function cref(dataset,sequence,index)
if not dataset then
return "no valid dataset"
end
local merged = sequence.merged and "merged " or ""
if index then
return formatters["feature %a, type %a, %schain lookup %a, index %a"](
dataset[4],sequence.type,merged,sequence.name,index)
else
return formatters["feature %a, type %a, %schain lookup %a"](
dataset[4],sequence.type,merged,sequence.name)
end
end
local function pref(dataset,sequence)
return formatters["feature %a, type %a, %slookup %a"](
dataset[4],sequence.type,sequence.merged and "merged " or "",sequence.name)
end
local function mref(rlmode)
if not rlmode or rlmode >= 0 then
return "l2r"
else
return "r2l"
end
end
-- The next code is somewhat complicated by the fact that some fonts can have ligatures made
-- from ligatures that themselves have marks. This was identified by Kai in for instance
-- arabtype: KAF LAM SHADDA ALEF FATHA (0x0643 0x0644 0x0651 0x0627 0x064E). This becomes
-- KAF LAM-ALEF with a SHADDA on the first and a FATHA op de second component. In a next
-- iteration this becomes a KAF-LAM-ALEF with a SHADDA on the second and a FATHA on the
-- third component.
-- We can assume that languages that use marks are not hyphenated. We can also assume
-- that at most one discretionary is present.
-- We do need components in funny kerning mode but maybe I can better reconstruct then
-- as we do have the font components info available; removing components makes the
-- previous code much simpler. Also, later on copying and freeing becomes easier.
-- However, for arabic we need to keep them around for the sake of mark placement
-- and indices.
local function flattendisk(head,disc)
local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
local prev, next = getboth(disc)
local ishead = head == disc
setdisc(disc)
flush_node(disc)
if pre then
flush_node_list(pre)
end
if post then
flush_node_list(post)
end
if ishead then
if replace then
if next then
setlink(replacetail,next)
end
return replace, replace
elseif next then
return next, next
else
-- return -- maybe warning
end
else
if replace then
if next then
setlink(replacetail,next)
end
setlink(prev,replace)
return head, replace
else
setlink(prev,next) -- checks for next anyway
return head, next
end
end
end
local function appenddisc(disc,list)
local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
local posthead = list
local replacehead = copy_node_list(list)
if post then
setlink(posttail,posthead)
else
post = posthead
end
if replace then
setlink(replacetail,replacehead)
else
replace = replacehead
end
setdisc(disc,pre,post,replace)
end
-- start is a mark and we need to keep that one
local take_components = getcomponents -- we overload here (for now)
local set_components = setcomponents -- we overload here (for now)
----- get_components = getcomponents -- we overload here (for now)
local function count_components(start,marks)
if getid(start) ~= glyph_code then
return 0
elseif getsubtype(start) == ligature_code then
local i = 0
local components = getcomponents(start)
while components do
i = i + count_components(components,marks)
components = getnext(components)
end
return i
elseif not marks[getchar(start)] then
return 1
else
return 0
end
end
local function markstoligature(head,start,stop,char)
if start == stop and getchar(start) == char then
return head, start
else
local prev = getprev(start)
local next = getnext(stop)
setprev(start)
setnext(stop)
local base = copy_no_components(start,copyinjection)
if head == start then
head = base
end
resetinjection(base)
setchar(base,char)
setsubtype(base,ligature_code)
set_components(base,start)
setlink(prev,base,next)
return head, base
end
end
-- Remark for Kai: (some arabic fonts do mark + mark = other mark and such)
--
-- The hasmarks is needed for ligatures of marks that are part of a ligature in
-- which case we assume that we can delete the marks anyway (we can always become
-- more clever if needed) .. in fact the whole logic here should be redone. We're
-- in the not discfound branch then. We now have skiphash too so we can be more
-- selective if needed (todo).
local function toligature(head,start,stop,char,dataset,sequence,skiphash,discfound,hasmarks) -- brr head
if getattr(start,a_noligature) == 1 then
-- so we can do: e\noligature{ff}e e\noligature{f}fie (we only look at the first)
return head, start
end
if start == stop and getchar(start) == char then
resetinjection(start)
setchar(start,char)
return head, start
end
local prev = getprev(start)
local next = getnext(stop)
local comp = start
setprev(start)
setnext(stop)
local base = copy_no_components(start,copyinjection)
if start == head then
head = base
end
resetinjection(base)
setchar(base,char)
setsubtype(base,ligature_code)
set_components(base,comp)
setlink(prev,base,next)
if not discfound then
local deletemarks = not skiphash or hasmarks
local components = start
local baseindex = 0
local componentindex = 0
local head = base
local current = base
-- first we loop over the glyphs in start ... stop
while start do
local char = getchar(start)
if not marks[char] then
baseindex = baseindex + componentindex
componentindex = count_components(start,marks)
-- we can be more clever here: "not deletemarks or (skiphash and not skiphash[char])"
-- and such:
elseif not deletemarks then
-- we can get a loop when the font expects otherwise (i.e. unexpected deletemarks)
setligaindex(start,baseindex + getligaindex(start,componentindex))
if trace_marks then
logwarning("%s: keep mark %s, gets index %s",pref(dataset,sequence),gref(char),getligaindex(start))
end
local n = copy_node(start)
copyinjection(n,start) -- is this ok ? we position later anyway
head, current = insert_node_after(head,current,n) -- unlikely that mark has components
elseif trace_marks then
logwarning("%s: delete mark %s",pref(dataset,sequence),gref(char))
end
start = getnext(start)
end
-- we can have one accent as part of a lookup and another following
local start = getnext(current)
while start do
local char = ischar(start)
if char then
-- also something skiphash here?
if marks[char] then
setligaindex(start,baseindex + getligaindex(start,componentindex))
if trace_marks then
logwarning("%s: set mark %s, gets index %s",pref(dataset,sequence),gref(char),getligaindex(start))
end
start = getnext(start)
else
break
end
else
break
end
end
else
-- discfound ... forget about marks .. probably no scripts that hyphenate and have marks
local discprev, discnext = getboth(discfound)
if discprev and discnext then
-- we assume normalization in context, and don't care about generic ... especially
-- \- can give problems as there we can have a negative char but that won't match
-- anyway
local pre, post, replace, pretail, posttail, replacetail = getdisc(discfound,true)
if not replace then
local prev = getprev(base)
local comp = take_components(base)
local copied = copy_only_glyphs(comp)
if pre then
setlink(discprev,pre)
else
setnext(discprev) -- also blocks funny assignments
end
pre = comp
if post then
setlink(posttail,discnext)
setprev(post)
else
post = discnext
setprev(discnext) -- also blocks funny assignments
end
setlink(prev,discfound,next)
setboth(base)
-- here components have a pointer so we can't free it!
set_components(base,copied)
replace = base
if forcediscretionaries then
setdisc(discfound,pre,post,replace,discretionary_code)
else
setdisc(discfound,pre,post,replace)
end
base = prev
end
end
end
return head, base
end
local function multiple_glyphs(head,start,multiple,skiphash,what) -- what to do with skiphash matches here
local nofmultiples = #multiple
if nofmultiples > 0 then
resetinjection(start)
setchar(start,multiple[1])
if nofmultiples > 1 then
local sn = getnext(start)
for k=2,nofmultiples do
-- untested:
--
-- while ignoremarks and marks[getchar(sn)] then
-- local sn = getnext(sn)
-- end
local n = copy_node(start) -- ignore components
resetinjection(n)
setchar(n,multiple[k])
insert_node_after(head,start,n)
start = n
end
if what == true then
-- we're ok
elseif what > 1 then
local m = multiple[nofmultiples]
for i=2,what do
local n = copy_node(start) -- ignore components
resetinjection(n)
setchar(n,m)
insert_node_after(head,start,n)
start = n
end
end
end
return head, start, true
else
if trace_multiples then
logprocess("no multiple for %s",gref(getchar(start)))
end
return head, start, false
end
end
local function get_alternative_glyph(start,alternatives,value)
local n = #alternatives
if n == 1 then
-- we could actually change that into a gsub and save some memory in the
-- font loader but it makes tracing more messy
return alternatives[1], trace_alternatives and "1 (only one present)"
elseif value == "random" then
local r = getrandom and getrandom("glyph",1,n) or random(1,n)
return alternatives[r], trace_alternatives and formatters["value %a, taking %a"](value,r)
elseif value == "first" then
return alternatives[1], trace_alternatives and formatters["value %a, taking %a"](value,1)
elseif value == "last" then
return alternatives[n], trace_alternatives and formatters["value %a, taking %a"](value,n)
end
value = value == true and 1 or tonumber(value)
if type(value) ~= "number" then
return alternatives[1], trace_alternatives and formatters["invalid value %s, taking %a"](value,1)
end
-- local a = alternatives[value]
-- if a then
-- -- some kind of hash
-- return a, trace_alternatives and formatters["value %a, taking %a"](value,a)
-- end
if value > n then
local defaultalt = otf.defaultnodealternate
if defaultalt == "first" then
return alternatives[n], trace_alternatives and formatters["invalid value %s, taking %a"](value,1)
elseif defaultalt == "last" then
return alternatives[1], trace_alternatives and formatters["invalid value %s, taking %a"](value,n)
else
return false, trace_alternatives and formatters["invalid value %a, %s"](value,"out of range")
end
elseif value == 0 then
return getchar(start), trace_alternatives and formatters["invalid value %a, %s"](value,"no change")
elseif value < 1 then
return alternatives[1], trace_alternatives and formatters["invalid value %a, taking %a"](value,1)
else
return alternatives[value], trace_alternatives and formatters["value %a, taking %a"](value,value)
end
end
-- handlers
function handlers.gsub_single(head,start,dataset,sequence,replacement)
if trace_singles then
logprocess("%s: replacing %s by single %s",pref(dataset,sequence),gref(getchar(start)),gref(replacement))
end
resetinjection(start)
setchar(start,replacement)
return head, start, true
end
function handlers.gsub_alternate(head,start,dataset,sequence,alternative)
local kind = dataset[4]
local what = dataset[1]
local value = what == true and tfmdata.shared.features[kind] or what
local choice, comment = get_alternative_glyph(start,alternative,value)
if choice then
if trace_alternatives then
logprocess("%s: replacing %s by alternative %a to %s, %s",pref(dataset,sequence),gref(getchar(start)),gref(choice),comment)
end
resetinjection(start)
setchar(start,choice)
else
if trace_alternatives then
logwarning("%s: no variant %a for %s, %s",pref(dataset,sequence),value,gref(getchar(start)),comment)
end
end
return head, start, true
end
function handlers.gsub_multiple(head,start,dataset,sequence,multiple,rlmode,skiphash)
if trace_multiples then
logprocess("%s: replacing %s by multiple %s",pref(dataset,sequence),gref(getchar(start)),gref(multiple))
end
return multiple_glyphs(head,start,multiple,skiphash,dataset[1])
end
-- Don't we deal with disc otherwise now? I need to check if the next one can be
-- simplified. Anyway, it can be way messier: marks that get removed as well as
-- marks that are kept.
function handlers.gsub_ligature(head,start,dataset,sequence,ligature,rlmode,skiphash)
local current = getnext(start)
if not current then
return head, start, false, nil
end
local stop = nil
local startchar = getchar(start)
if skiphash and skiphash[startchar] then
while current do
local char = ischar(current,currentfont)
if char then
local lg = ligature[char]
if lg then
stop = current
ligature = lg
current = getnext(current)
else
break
end
else
break
end
end
if stop then
local lig = ligature.ligature
if lig then
if trace_ligatures then
local stopchar = getchar(stop)
head, start = markstoligature(head,start,stop,lig)
logprocess("%s: replacing %s upto %s by ligature %s case 1",pref(dataset,sequence),gref(startchar),gref(stopchar),gref(getchar(start)))
else
head, start = markstoligature(head,start,stop,lig)
end
return head, start, true, false
else
-- ok, goto next lookup
end
end
else -- is the check for disc still valid here ? and why only replace then
local discfound = false
local lastdisc = nil
local hasmarks = marks[startchar]
while current do
local char, id = ischar(current,currentfont)
if char then
if skiphash and skiphash[char] then
current = getnext(current)
-- if stop then stop = current end -- ?
else -- ligature is a tree
local lg = ligature[char] -- can there be multiple in a row? maybe in a bad font
if lg then
if not discfound and lastdisc then
discfound = lastdisc
lastdisc = nil
end
if marks[char] then
hasmarks = true
end
stop = current -- needed for fake so outside then
ligature = lg
current = getnext(current)
else
break
end
end
elseif char == false then
-- kind of weird
break
elseif id == disc_code then
--
-- Kai: see chainprocs, we probably could do the same here or was there a reason
-- why we kept the replace check here.
--
-- if not discfound then
-- discfound = current
-- end
-- if current == stop then
-- break -- okay? or before the disc
-- else
-- current = getnext(current)
-- end
--
local replace = getfield(current,"replace") -- hm: pre and post
if replace then
-- of{f-}{}{f}e o{f-}{}{f}fe o{-}{}{ff}e (oe and ff ligature)
-- we can end up here when we have a start run .. testruns start at a disc but
-- so here we have the other case: char + disc
while replace do
local char, id = ischar(replace,currentfont)
if char then
local lg = ligature[char] -- can there be multiple in a row? maybe in a bad font
if lg then
if marks[char] then
hasmarks = true -- very unlikely
end
ligature = lg
replace = getnext(replace)
else
return head, start, false, false
end
else
return head, start, false, false
end
end
stop = current
end
lastdisc = current
current = getnext(current)
else
break
end
end
local lig = ligature.ligature
if lig then
if stop then
if trace_ligatures then
local stopchar = getchar(stop)
head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,discfound,hasmarks)
logprocess("%s: replacing %s upto %s by ligature %s case 2",pref(dataset,sequence),gref(startchar),gref(stopchar),gref(lig))
else
head, start = toligature(head,start,stop,lig,dataset,sequence,skiphash,discfound,hasmarks)
end
else
-- weird but happens (in some arabic font)
resetinjection(start)
setchar(start,lig)
if trace_ligatures then
logprocess("%s: replacing %s by (no real) ligature %s case 3",pref(dataset,sequence),gref(startchar),gref(lig))
end
end
return head, start, true, discfound
else
-- weird but happens, pseudo ligatures ... just the components
end
end
return head, start, false, discfound
end
function handlers.gpos_single(head,start,dataset,sequence,kerns,rlmode,skiphash,step,injection)
local startchar = getchar(start)
local format = step.format
if format == "single" or type(kerns) == "table" then -- the table check can go
local dx, dy, w, h = setposition(0,start,factor,rlmode,kerns,injection)
if trace_kerns then
logprocess("%s: shifting single %s by %s xy (%p,%p) and wh (%p,%p)",pref(dataset,sequence),gref(startchar),format,dx,dy,w,h)
end
else
local k = (format == "move" and setmove or setkern)(start,factor,rlmode,kerns,injection)
if trace_kerns then
logprocess("%s: shifting single %s by %s %p",pref(dataset,sequence),gref(startchar),format,k)
end
end
return head, start, true
end
function handlers.gpos_pair(head,start,dataset,sequence,kerns,rlmode,skiphash,step,injection)
local snext = getnext(start)
if not snext then
return head, start, false
else
local prev = start
while snext do
local nextchar = ischar(snext,currentfont)
if nextchar then
if skiphash and skiphash[nextchar] then -- includes marks too when flag
prev = snext
snext = getnext(snext)
else
local krn = kerns[nextchar]
if not krn then
break
end
local format = step.format
if format == "pair" then
local a, b = krn[1], krn[2]
if a == true then
-- zero
elseif a then -- #a > 0
local x, y, w, h = setposition(1,start,factor,rlmode,a,injection)
if trace_kerns then
local startchar = getchar(start)
logprocess("%s: shifting first of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s",pref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h,injection or "injections")
end
end
if b == true then
-- zero
start = snext -- cf spec
elseif b then -- #b > 0
local x, y, w, h = setposition(2,snext,factor,rlmode,b,injection)
if trace_kerns then
local startchar = getchar(snext)
logprocess("%s: shifting second of pair %s and %s by xy (%p,%p) and wh (%p,%p) as %s",pref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h,injection or "injections")
end
start = snext -- cf spec
elseif forcepairadvance then
start = snext -- for testing, not cf spec
end
return head, start, true
elseif krn ~= 0 then
local k = (format == "move" and setmove or setkern)(snext,factor,rlmode,krn,injection)
if trace_kerns then
logprocess("%s: inserting %s %p between %s and %s as %s",pref(dataset,sequence),format,k,gref(getchar(prev)),gref(nextchar),injection or "injections")
end
return head, start, true
else -- can't happen
break
end
end
else
break
end
end
return head, start, false
end
end
--[[ldx--
We get hits on a mark, but we're not sure if the it has to be applied so
we need to explicitly test for basechar, baselig and basemark entries.
--ldx]]--
function handlers.gpos_mark2base(head,start,dataset,sequence,markanchors,rlmode,skiphash)
local markchar = getchar(start)
if marks[markchar] then
local base = getprev(start) -- [glyph] [start=mark]
if base then
local basechar = ischar(base,currentfont)
if basechar then
if marks[basechar] then
while base do
base = getprev(base)
if base then
basechar = ischar(base,currentfont)
if basechar then
if not marks[basechar] then
break
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
end
return head, start, false
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
end
return head, start, false
end
end
end
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)",
pref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
end
return head, start, true
elseif trace_bugs then
-- onetimemessage(currentfont,basechar,"no base anchors",report_fonts)
logwarning("%s: mark %s is not anchored to %s",pref(dataset,sequence),gref(markchar),gref(basechar))
end
elseif trace_bugs then
logwarning("%s: nothing preceding, case %i",pref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s: nothing preceding, case %i",pref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
end
return head, start, false
end
function handlers.gpos_mark2ligature(head,start,dataset,sequence,markanchors,rlmode,skiphash)
local markchar = getchar(start)
if marks[markchar] then
local base = getprev(start) -- [glyph] [optional marks] [start=mark]
if base then
local basechar = ischar(base,currentfont)
if basechar then
if marks[basechar] then
while base do
base = getprev(base)
if base then
basechar = ischar(base,currentfont)
if basechar then
if not marks[basechar] then
break
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
end
return head, start, false
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
end
return head, start, false
end
end
end
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
if ma then
local index = getligaindex(start)
ba = ba[index]
if ba then
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
if trace_marks then
logprocess("%s, index %s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)",
pref(dataset,sequence),index,bound,gref(markchar),gref(basechar),index,dx,dy)
end
return head, start, true
else
if trace_bugs then
logwarning("%s: no matching anchors for mark %s and baselig %s with index %a",pref(dataset,sequence),gref(markchar),gref(basechar),index)
end
end
end
elseif trace_bugs then
-- logwarning("%s: char %s is missing in font",pref(dataset,sequence),gref(basechar))
onetimemessage(currentfont,basechar,"no base anchors",report_fonts)
end
elseif trace_bugs then
logwarning("%s: prev node is no char, case %i",pref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s: prev node is no char, case %i",pref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
end
return head, start, false
end
function handlers.gpos_mark2mark(head,start,dataset,sequence,markanchors,rlmode,skiphash)
local markchar = getchar(start)
if marks[markchar] then
local base = getprev(start) -- [glyph] [basemark] [start=mark]
local slc = getligaindex(start)
if slc then -- a rather messy loop ... needs checking with husayni
while base do
local blc = getligaindex(base)
if blc and blc ~= slc then
base = getprev(base)
else
break
end
end
end
if base then
local basechar = ischar(base,currentfont)
if basechar then -- subtype test can go
local ba = markanchors[1][basechar] -- slot 1 has been made copy of the class hash
if ba then
local ma = markanchors[2]
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],true,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)",
pref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
end
return head, start, true
end
end
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",pref(dataset,sequence),gref(markchar))
end
return head, start, false
end
function handlers.gpos_cursive(head,start,dataset,sequence,exitanchors,rlmode,skiphash,step) -- to be checked
local startchar = getchar(start)
if marks[startchar] then
if trace_cursive then
logprocess("%s: ignoring cursive for mark %s",pref(dataset,sequence),gref(startchar))
end
else
local nxt = getnext(start)
while nxt do
local nextchar = ischar(nxt,currentfont)
if not nextchar then
break
elseif marks[nextchar] then -- always sequence.flags[1]
nxt = getnext(nxt)
else
local exit = exitanchors[3]
if exit then
local entry = exitanchors[1][nextchar]
if entry then
entry = entry[2]
if entry then
local r2lflag = sequence.flags[4] -- mentioned in the standard
local dx, dy, bound = setcursive(start,nxt,factor,rlmode,exit,entry,characters[startchar],characters[nextchar],r2lflag)
if trace_cursive then
logprocess("%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode",pref(dataset,sequence),gref(startchar),gref(nextchar),dx,dy,bound,mref(rlmode))
end
return head, start, true
end
end
end
break
end
end
end
return head, start, false
end
--[[ldx--
I will implement multiple chain replacements once I run into a font that uses
it. It's not that complex to handle.
--ldx]]--
local chainprocs = { }
local function logprocess(...)
if trace_steps then
registermessage(...)
if trace_steps == "silent" then
return
end
end
report_subchain(...)
end
local logwarning = report_subchain
local function logprocess(...)
if trace_steps then
registermessage(...)
if trace_steps == "silent" then
return
end
end
report_chain(...)
end
local logwarning = report_chain
-- We could share functions but that would lead to extra function calls with many
-- arguments, redundant tests and confusing messages.
-- The reversesub is a special case, which is why we need to store the replacements
-- in a bit weird way. There is no lookup and the replacement comes from the lookup
-- itself. It is meant mostly for dealing with Urdu.
local function reversesub(head,start,stop,dataset,sequence,replacements,rlmode,skiphash)
local char = getchar(start)
local replacement = replacements[char]
if replacement then
if trace_singles then
logprocess("%s: single reverse replacement of %s by %s",cref(dataset,sequence),gref(char),gref(replacement))
end
resetinjection(start)
setchar(start,replacement)
return head, start, true
else
return head, start, false
end
end
chainprocs.reversesub = reversesub
--[[ldx--
This chain stuff is somewhat tricky since we can have a sequence of actions to be
applied: single, alternate, multiple or ligature where ligature can be an invalid
one in the sense that it will replace multiple by one but not neccessary one that
looks like the combination (i.e. it is the counterpart of multiple then). For
example, the following is valid:
xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx
Therefore we we don't really do the replacement here already unless we have the
single lookup case. The efficiency of the replacements can be improved by deleting
as less as needed but that would also make the code even more messy.
--ldx]]--
--[[ldx--
Here we replace start by a single variant.
--ldx]]--
-- To be done (example needed): what if > 1 steps
-- this is messy: do we need this disc checking also in alternaties?
local function reportzerosteps(dataset,sequence)
logwarning("%s: no steps",cref(dataset,sequence))
end
local function reportmoresteps(dataset,sequence)
logwarning("%s: more than 1 step",cref(dataset,sequence))
end
-- local function reportbadsteps(dataset,sequence)
-- logwarning("%s: bad step, no proper return values",cref(dataset,sequence))
-- end
local function getmapping(dataset,sequence,currentlookup)
local steps = currentlookup.steps
local nofsteps = currentlookup.nofsteps
if nofsteps == 0 then
reportzerosteps(dataset,sequence)
currentlookup.mapping = false
return false
else
if nofsteps > 1 then
reportmoresteps(dataset,sequence)
end
local mapping = steps[1].coverage
currentlookup.mapping = mapping
currentlookup.format = steps[1].format
return mapping
end
end
function chainprocs.gsub_single(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local current = start
while current do
local currentchar = ischar(current)
if currentchar then
local replacement = mapping[currentchar]
if not replacement or replacement == "" then
if trace_bugs then
logwarning("%s: no single for %s",cref(dataset,sequence,chainindex),gref(currentchar))
end
else
if trace_singles then
logprocess("%s: replacing single %s by %s",cref(dataset,sequence,chainindex),gref(currentchar),gref(replacement))
end
resetinjection(current)
setchar(current,replacement)
end
return head, start, true
elseif currentchar == false then
-- can't happen
break
elseif current == stop then
break
else
current = getnext(current)
end
end
end
return head, start, false
end
--[[ldx--
Here we replace start by new glyph. First we delete the rest of the match.
--ldx]]--
-- char_1 mark_1 -> char_x mark_1 (ignore marks)
-- char_1 mark_1 -> char_x
-- to be checked: do we always have just one glyph?
-- we can also have alternates for marks
-- marks come last anyway
-- are there cases where we need to delete the mark
function chainprocs.gsub_alternate(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local kind = dataset[4]
local what = dataset[1]
local value = what == true and tfmdata.shared.features[kind] or what -- todo: optimize in ctx
local current = start
while current do
local currentchar = ischar(current)
if currentchar then
local alternatives = mapping[currentchar]
if alternatives then
local choice, comment = get_alternative_glyph(current,alternatives,value)
if choice then
if trace_alternatives then
logprocess("%s: replacing %s by alternative %a to %s, %s",cref(dataset,sequence),gref(currentchar),choice,gref(choice),comment)
end
resetinjection(start)
setchar(start,choice)
else
if trace_alternatives then
logwarning("%s: no variant %a for %s, %s",cref(dataset,sequence),value,gref(currentchar),comment)
end
end
end
return head, start, true
elseif currentchar == false then
-- can't happen
break
elseif current == stop then
break
else
current = getnext(current)
end
end
end
return head, start, false
end
--[[ldx--
Here we replace start by a sequence of new glyphs.
--ldx]]--
function chainprocs.gsub_multiple(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local startchar = getchar(start)
local replacement = mapping[startchar]
if not replacement or replacement == "" then
if trace_bugs then
logwarning("%s: no multiple for %s",cref(dataset,sequence),gref(startchar))
end
else
if trace_multiples then
logprocess("%s: replacing %s by multiple characters %s",cref(dataset,sequence),gref(startchar),gref(replacement))
end
return multiple_glyphs(head,start,replacement,skiphash,dataset[1])
end
end
return head, start, false
end
--[[ldx--
When we replace ligatures we use a helper that handles the marks. I might change
this function (move code inline and handle the marks by a separate function). We
assume rather stupid ligatures (no complex disc nodes).
--ldx]]--
-- compare to handlers.gsub_ligature which is more complex ... why
function chainprocs.gsub_ligature(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local startchar = getchar(start)
local ligatures = mapping[startchar]
if not ligatures then
if trace_bugs then
logwarning("%s: no ligatures starting with %s",cref(dataset,sequence,chainindex),gref(startchar))
end
else
local hasmarks = marks[startchar]
local current = getnext(start)
local discfound = false
local last = stop
local nofreplacements = 1
while current do
-- todo: ischar ... can there really be disc nodes here?
local id = getid(current)
if id == disc_code then
if not discfound then
discfound = current
end
if current == stop then
break -- okay? or before the disc
else
current = getnext(current)
end
else
local schar = getchar(current)
if skiphash and skiphash[schar] then -- marks
-- if current == stop then -- maybe add this
-- break
-- else
current = getnext(current)
-- end
else
local lg = ligatures[schar]
if lg then
ligatures = lg
last = current
nofreplacements = nofreplacements + 1
if marks[char] then
hasmarks = true
end
if current == stop then
break
else
current = getnext(current)
end
else
break
end
end
end
end
local ligature = ligatures.ligature
if ligature then
if chainindex then
stop = last
end
if trace_ligatures then
if start == stop then
logprocess("%s: replacing character %s by ligature %s case 3",cref(dataset,sequence,chainindex),gref(startchar),gref(ligature))
else
logprocess("%s: replacing character %s upto %s by ligature %s case 4",cref(dataset,sequence,chainindex),gref(startchar),gref(getchar(stop)),gref(ligature))
end
end
head, start = toligature(head,start,stop,ligature,dataset,sequence,skiphash,discfound,hasmarks)
return head, start, true, nofreplacements, discfound
elseif trace_bugs then
if start == stop then
logwarning("%s: replacing character %s by ligature fails",cref(dataset,sequence,chainindex),gref(startchar))
else
logwarning("%s: replacing character %s upto %s by ligature fails",cref(dataset,sequence,chainindex),gref(startchar),gref(getchar(stop)))
end
end
end
end
return head, start, false, 0, false
end
function chainprocs.gpos_single(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local startchar = getchar(start)
local kerns = mapping[startchar]
if kerns then
local format = currentlookup.format
if format == "single" then
local dx, dy, w, h = setposition(0,start,factor,rlmode,kerns) -- currentlookup.flags ?
if trace_kerns then
logprocess("%s: shifting single %s by %s (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),format,dx,dy,w,h)
end
else -- needs checking .. maybe no kerns format for single
local k = (format == "move" and setmove or setkern)(start,factor,rlmode,kerns,injection)
if trace_kerns then
logprocess("%s: shifting single %s by %s %p",cref(dataset,sequence),gref(startchar),format,k)
end
end
return head, start, true
end
end
return head, start, false
end
function chainprocs.gpos_pair(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex) -- todo: injections ?
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local snext = getnext(start)
if snext then
local startchar = getchar(start)
local kerns = mapping[startchar] -- always 1 step
if kerns then
local prev = start
while snext do
local nextchar = ischar(snext,currentfont)
if not nextchar then
break
end
if skiphash and skiphash[nextchar] then
prev = snext
snext = getnext(snext)
else
local krn = kerns[nextchar]
if not krn then
break
end
local format = currentlookup.format
if format == "pair" then
local a, b = krn[1], krn[2]
if a == true then
-- zero
elseif a then
local x, y, w, h = setposition(1,start,factor,rlmode,a,"injections") -- currentlookups flags?
if trace_kerns then
local startchar = getchar(start)
logprocess("%s: shifting first of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h)
end
end
if b == true then
-- zero
start = snext -- cf spec
elseif b then -- #b > 0
local x, y, w, h = setposition(2,snext,factor,rlmode,b,"injections")
if trace_kerns then
local startchar = getchar(start)
logprocess("%s: shifting second of pair %s and %s by (%p,%p) and correction (%p,%p)",cref(dataset,sequence),gref(startchar),gref(nextchar),x,y,w,h)
end
start = snext -- cf spec
elseif forcepairadvance then
start = snext -- for testing, not cf spec
end
return head, start, true
elseif krn ~= 0 then
local k = (format == "move" and setmove or setkern)(snext,factor,rlmode,krn)
if trace_kerns then
logprocess("%s: inserting %s %p between %s and %s",cref(dataset,sequence),format,k,gref(getchar(prev)),gref(nextchar))
end
return head, start, true
else
break
end
end
end
end
end
end
return head, start, false
end
function chainprocs.gpos_mark2base(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local markchar = getchar(start)
if marks[markchar] then
local markanchors = mapping[markchar] -- always 1 step
if markanchors then
local base = getprev(start) -- [glyph] [start=mark]
if base then
local basechar = ischar(base,currentfont)
if basechar then
if marks[basechar] then
while base do
base = getprev(base)
if base then
local basechar = ischar(base,currentfont)
if basechar then
if not marks[basechar] then
break
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),1)
end
return head, start, false
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",pref(dataset,sequence),gref(markchar),2)
end
return head, start, false
end
end
end
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
if ma then
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to basechar %s => (%p,%p)",
cref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
end
return head, start, true
end
end
elseif trace_bugs then
logwarning("%s: prev node is no char, case %i",cref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s: prev node is no char, case %i",cref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s: mark %s has no anchors",cref(dataset,sequence),gref(markchar))
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",cref(dataset,sequence),gref(markchar))
end
end
return head, start, false
end
function chainprocs.gpos_mark2ligature(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local markchar = getchar(start)
if marks[markchar] then
local markanchors = mapping[markchar] -- always 1 step
if markanchors then
local base = getprev(start) -- [glyph] [optional marks] [start=mark]
if base then
local basechar = ischar(base,currentfont)
if basechar then
if marks[basechar] then
while base do
base = getprev(base)
if base then
local basechar = ischar(base,currentfont)
if basechar then
if not marks[basechar] then
break
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",cref(dataset,sequence),markchar,1)
end
return head, start, false
end
else
if trace_bugs then
logwarning("%s: no base for mark %s, case %i",cref(dataset,sequence),markchar,2)
end
return head, start, false
end
end
end
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
if ma then
local index = getligaindex(start)
ba = ba[index]
if ba then
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],false,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to baselig %s at index %s => (%p,%p)",
cref(dataset,sequence),a or bound,gref(markchar),gref(basechar),index,dx,dy)
end
return head, start, true
end
end
end
elseif trace_bugs then
logwarning("%s, prev node is no char, case %i",cref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s, prev node is no char, case %i",cref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s, mark %s has no anchors",cref(dataset,sequence),gref(markchar))
end
elseif trace_bugs then
logwarning("%s, mark %s is no mark",cref(dataset,sequence),gref(markchar))
end
end
return head, start, false
end
function chainprocs.gpos_mark2mark(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local markchar = getchar(start)
if marks[markchar] then
local markanchors = mapping[markchar] -- always 1 step
if markanchors then
local base = getprev(start) -- [glyph] [basemark] [start=mark]
local slc = getligaindex(start)
if slc then -- a rather messy loop ... needs checking with husayni
while base do
local blc = getligaindex(base)
if blc and blc ~= slc then
base = getprev(base)
else
break
end
end
end
if base then -- subtype test can go
local basechar = ischar(base,currentfont)
if basechar then
local ba = markanchors[1][basechar]
if ba then
local ma = markanchors[2]
if ma then
local dx, dy, bound = setmark(start,base,factor,rlmode,ba,ma,characters[basechar],true,checkmarks)
if trace_marks then
logprocess("%s, bound %s, anchoring mark %s to basemark %s => (%p,%p)",
cref(dataset,sequence),bound,gref(markchar),gref(basechar),dx,dy)
end
return head, start, true
end
end
elseif trace_bugs then
logwarning("%s: prev node is no mark, case %i",cref(dataset,sequence),1)
end
elseif trace_bugs then
logwarning("%s: prev node is no mark, case %i",cref(dataset,sequence),2)
end
elseif trace_bugs then
logwarning("%s: mark %s has no anchors",cref(dataset,sequence),gref(markchar))
end
elseif trace_bugs then
logwarning("%s: mark %s is no mark",cref(dataset,sequence),gref(markchar))
end
end
return head, start, false
end
function chainprocs.gpos_cursive(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash,chainindex)
local mapping = currentlookup.mapping
if mapping == nil then
mapping = getmapping(dataset,sequence,currentlookup)
end
if mapping then
local startchar = getchar(start)
local exitanchors = mapping[startchar] -- always 1 step
if exitanchors then
if marks[startchar] then
if trace_cursive then
logprocess("%s: ignoring cursive for mark %s",pref(dataset,sequence),gref(startchar))
end
else
local nxt = getnext(start)
while nxt do
local nextchar = ischar(nxt,currentfont)
if not nextchar then
break
elseif marks[nextchar] then
-- should not happen (maybe warning)
nxt = getnext(nxt)
else
local exit = exitanchors[3]
if exit then
local entry = exitanchors[1][nextchar]
if entry then
entry = entry[2]
if entry then
local r2lflag = sequence.flags[4] -- mentioned in the standard
local dx, dy, bound = setcursive(start,nxt,factor,rlmode,exit,entry,characters[startchar],characters[nextchar],r2lflag)
if trace_cursive then
logprocess("%s: moving %s to %s cursive (%p,%p) using bound %s in %s mode",pref(dataset,sequence),gref(startchar),gref(nextchar),dx,dy,bound,mref(rlmode))
end
return head, start, true
end
end
elseif trace_bugs then
onetimemessage(currentfont,startchar,"no entry anchors",report_fonts)
end
break
end
end
end
elseif trace_cursive and trace_details then
logprocess("%s, cursive %s is already done",pref(dataset,sequence),gref(getchar(start)),alreadydone)
end
end
return head, start, false
end
-- what pointer to return, spec says stop
-- to be discussed ... is bidi changer a space?
-- elseif char == zwnj and sequence[n][32] then -- brrr
local function show_skip(dataset,sequence,char,ck,class)
logwarning("%s: skipping char %s, class %a, rule %a, lookuptype %a",cref(dataset,sequence),gref(char),class,ck[1],ck[8] or ck[2])
end
-- A previous version had disc collapsing code in the (single sub) handler plus some
-- checking in the main loop, but that left the pre/post sequences undone. The best
-- solution is to add some checking there and backtrack when a replace/post matches
-- but it takes a bit of work to figure out an efficient way (this is what the
-- sweep* names refer to). I might look into that variant one day again as it can
-- replace some other code too. In that approach we can have a special version for
-- gub and pos which gains some speed. This method does the test and passes info to
-- the handlers. Here collapsing is handled in the main loop which also makes code
-- elsewhere simpler (i.e. no need for the other special runners and disc code in
-- ligature building). I also experimented with pushing preceding glyphs sequences
-- in the replace/pre fields beforehand which saves checking afterwards but at the
-- cost of duplicate glyphs (memory) but it's too much overhead (runtime).
--
-- In the meantime Kai had moved the code from the single chain into a more general
-- handler and this one (renamed to chaindisk) is used now. I optimized the code a
-- bit and brought it in sycn with the other code. Hopefully I didn't introduce
-- errors. Note: this somewhat complex approach is meant for fonts that implement
-- (for instance) ligatures by character replacement which to some extend is not
-- that suitable for hyphenation. I also use some helpers. This method passes some
-- states but reparses the list. There is room for a bit of speed up but that will
-- be done in the context version. (In fact a partial rewrite of all code can bring
-- some more efficiency.)
--
-- I didn't test it with extremes but successive disc nodes still can give issues
-- but in order to handle that we need more complex code which also slows down even
-- more. The main loop variant could deal with that: test, collapse, backtrack.
local userkern = nuts.pool and nuts.pool.newkern -- context
do if not userkern then -- generic
local thekern = nuts.new("kern",1) -- userkern
local setkern = nuts.setkern -- not injections.setkern
userkern = function(k)
local n = copy_node(thekern)
setkern(n,k)
return n
end
end end
local function checked(head)
local current = head
while current do
if getid(current) == glue_code then
local kern = userkern(getwidth(current))
if head == current then
local next = getnext(current)
if next then
setlink(kern,next)
end
flush_node(current)
head = kern
current = next
else
local prev, next = getboth(current)
setlink(prev,kern,next)
flush_node(current)
current = next
end
else
current = getnext(current)
end
end
return head
end
local function setdiscchecked(d,pre,post,replace)
if pre then pre = checked(pre) end
if post then post = checked(post) end
if replace then replace = checked(replace) end
setdisc(d,pre,post,replace)
end
local noflags = { false, false, false, false }
local function chainrun(head,start,last,dataset,sequence,rlmode,skiphash,ck)
local size = ck[5] - ck[4] + 1
local chainlookups = ck[6]
local done = false
-- current match
if chainlookups then
-- Lookups can be like { 1, false, 3 } or { false, 2 } or basically anything and
-- #lookups can be less than #current
if size == 1 then
-- if nofchainlookups > size then
-- -- bad rules
-- end
local chainlookup = chainlookups[1]
for j=1,#chainlookup do
local chainstep = chainlookup[j]
local chainkind = chainstep.type
local chainproc = chainprocs[chainkind]
if chainproc then
local ok
head, start, ok = chainproc(head,start,last,dataset,sequence,chainstep,rlmode,skiphash)
if ok then
done = true
end
else
logprocess("%s: %s is not yet supported (1)",cref(dataset,sequence),chainkind)
end
end
else
-- See LookupType 5: Contextual Substitution Subtable. Now it becomes messy. The
-- easiest case is where #current maps on #lookups i.e. one-to-one. But what if
-- we have a ligature. Cf the spec we then need to advance one character but we
-- really need to test it as there are fonts out there that are fuzzy and have
-- too many lookups:
--
-- U+1105 U+119E U+1105 U+119E : sourcehansansklight: script=hang ccmp=yes
--
-- Even worse are these family emoji shapes as they can have multiple lookups
-- per slot (probably only for gpos).
-- It's very unlikely that we will have skip classes here but still ... we seldom
-- enter this branch anyway.
local i = 1
local laststart = start
local nofchainlookups = #chainlookups -- useful?
while start do
if skiphash then -- hm, so we know we skip some
while start do
local char = ischar(start,currentfont)
if char then
if skiphash and skiphash[char] then
start = getnext(start)
else
break
end
else
break
end
end
end
local chainlookup = chainlookups[i]
if chainlookup then
for j=1,#chainlookup do
local chainstep = chainlookup[j]
local chainkind = chainstep.type
local chainproc = chainprocs[chainkind]
if chainproc then
local ok, n
head, start, ok, n = chainproc(head,start,last,dataset,sequence,chainstep,rlmode,skiphash,i)
-- messy since last can be changed !
if ok then
done = true
if n and n > 1 and i + n > nofchainlookups then
-- this is a safeguard, we just ignore the rest of the lookups
i = size -- prevents an advance
break
end
end
else
-- actually an error
logprocess("%s: %s is not yet supported (2)",cref(dataset,sequence),chainkind)
end
end
end
i = i + 1
if i > size or not start then
break
elseif start then
laststart = start
start = getnext(start)
end
end
if not start then
start = laststart
end
end
else
-- todo: needs checking for holes in the replacements
local replacements = ck[7]
if replacements then
head, start, done = reversesub(head,start,last,dataset,sequence,replacements,rlmode,skiphash)
else
done = true
if trace_contexts then
logprocess("%s: skipping match",cref(dataset,sequence))
end
end
end
return head, start, done
end
local function chaindisk(head,start,dataset,sequence,rlmode,skiphash,ck)
if not start then
return head, start, false
end
local startishead = start == head
local seq = ck[3]
local f = ck[4]
local l = ck[5]
local s = #seq
local done = false
local sweepnode = sweepnode
local sweeptype = sweeptype
local sweepoverflow = false
local keepdisc = not sweepnode
local lookaheaddisc = nil
local backtrackdisc = nil
local current = start
local last = start
local prev = getprev(start)
local hasglue = false
-- fishy: so we can overflow and then go on in the sweep?
-- todo : id can also be glue_code as we checked spaces
local i = f
while i <= l do
local id = getid(current)
if id == glyph_code then
i = i + 1
last = current
current = getnext(current)
elseif id == glue_code then
i = i + 1
last = current
current = getnext(current)
hasglue = true
elseif id == disc_code then
if keepdisc then
keepdisc = false
lookaheaddisc = current
local replace = getfield(current,"replace")
if not replace then
sweepoverflow = true
sweepnode = current
current = getnext(current)
else
while replace and i <= l do
if getid(replace) == glyph_code then
i = i + 1
end
replace = getnext(replace)
end
current = getnext(replace)
end
last = current
else
head, current = flattendisk(head,current)
end
else
last = current
current = getnext(current)
end
if current then
-- go on
elseif sweepoverflow then
-- we already are following up on sweepnode
break
elseif sweeptype == "post" or sweeptype == "replace" then
current = getnext(sweepnode)
if current then
sweeptype = nil
sweepoverflow = true
else
break
end
else
break -- added
end
end
if sweepoverflow then
local prev = current and getprev(current)
if not current or prev ~= sweepnode then
local head = getnext(sweepnode)
local tail = nil
if prev then
tail = prev
setprev(current,sweepnode)
else
tail = find_node_tail(head)
end
setnext(sweepnode,current)
setprev(head)
setnext(tail)
appenddisc(sweepnode,head)
end
end
if l < s then
local i = l
local t = sweeptype == "post" or sweeptype == "replace"
while current and i < s do
local id = getid(current)
if id == glyph_code then
i = i + 1
current = getnext(current)
elseif id == glue_code then
i = i + 1
current = getnext(current)
hasglue = true
elseif id == disc_code then
if keepdisc then
keepdisc = false
if notmatchpre[current] ~= notmatchreplace[current] then
lookaheaddisc = current
end
-- we assume a simple text only replace (we could use nuts.count)
local replace = getfield(current,"replace")
while replace and i < s do
if getid(replace) == glyph_code then
i = i + 1
end
replace = getnext(replace)
end
current = getnext(current)
elseif notmatchpre[current] ~= notmatchreplace[current] then
head, current = flattendisk(head,current)
else
current = getnext(current) -- HH
end
else
current = getnext(current)
end
if not current and t then
current = getnext(sweepnode)
if current then
sweeptype = nil
end
end
end
end
if f > 1 then
local current = prev
local i = f
local t = sweeptype == "pre" or sweeptype == "replace"
if not current and t and current == checkdisk then
current = getprev(sweepnode)
end
while current and i > 1 do -- missing getprev added / moved outside
local id = getid(current)
if id == glyph_code then
i = i - 1
elseif id == glue_code then
i = i - 1
hasglue = true
elseif id == disc_code then
if keepdisc then
keepdisc = false
if notmatchpost[current] ~= notmatchreplace[current] then
backtrackdisc = current
end
-- we assume a simple text only replace (we could use nuts.count)
local replace = getfield(current,"replace")
while replace and i > 1 do
if getid(replace) == glyph_code then
i = i - 1
end
replace = getnext(replace)
end
elseif notmatchpost[current] ~= notmatchreplace[current] then
head, current = flattendisk(head,current)
end
end
current = getprev(current)
if t and current == checkdisk then
current = getprev(sweepnode)
end
end
end
local done = false
if lookaheaddisc then
local cf = start
local cl = getprev(lookaheaddisc)
local cprev = getprev(start)
local insertedmarks = 0
while cprev do
local char = ischar(cf,currentfont)
if char and marks[char] then
insertedmarks = insertedmarks + 1
cf = cprev
startishead = cf == head
cprev = getprev(cprev)
else
break
end
end
setlink(cprev,lookaheaddisc)
setprev(cf)
setnext(cl)
if startishead then
head = lookaheaddisc
end
local pre, post, replace = getdisc(lookaheaddisc)
local new = copy_node_list(cf) -- br, how often does that happen
local cnew = new
if pre then
setlink(find_node_tail(cf),pre)
end
if replace then
local tail = find_node_tail(new)
setlink(tail,replace)
end
for i=1,insertedmarks do
cnew = getnext(cnew)
end
cl = start
local clast = cnew
for i=f,l do
cl = getnext(cl)
clast = getnext(clast)
end
if not notmatchpre[lookaheaddisc] then
local ok = false
cf, start, ok = chainrun(cf,start,cl,dataset,sequence,rlmode,skiphash,ck)
if ok then
done = true
end
end
if not notmatchreplace[lookaheaddisc] then
local ok = false
new, cnew, ok = chainrun(new,cnew,clast,dataset,sequence,rlmode,skiphash,ck)
if ok then
done = true
end
end
if hasglue then
setdiscchecked(lookaheaddisc,cf,post,new)
else
setdisc(lookaheaddisc,cf,post,new)
end
start = getprev(lookaheaddisc)
sweephead[cf] = getnext(clast) or false
sweephead[new] = getnext(cl) or false
elseif backtrackdisc then
local cf = getnext(backtrackdisc)
local cl = start
local cnext = getnext(start)
local insertedmarks = 0
while cnext do
local char = ischar(cnext,currentfont)
if char and marks[char] then
insertedmarks = insertedmarks + 1
cl = cnext
cnext = getnext(cnext)
else
break
end
end
setlink(backtrackdisc,cnext)
setprev(cf)
setnext(cl)
local pre, post, replace, pretail, posttail, replacetail = getdisc(backtrackdisc,true)
local new = copy_node_list(cf)
local cnew = find_node_tail(new)
for i=1,insertedmarks do
cnew = getprev(cnew)
end
local clast = cnew
for i=f,l do
clast = getnext(clast)
end
if not notmatchpost[backtrackdisc] then
local ok = false
cf, start, ok = chainrun(cf,start,last,dataset,sequence,rlmode,skiphash,ck)
if ok then
done = true
end
end
if not notmatchreplace[backtrackdisc] then
local ok = false
new, cnew, ok = chainrun(new,cnew,clast,dataset,sequence,rlmode,skiphash,ck)
if ok then
done = true
end
end
if post then
setlink(posttail,cf)
else
post = cf
end
if replace then
setlink(replacetail,new)
else
replace = new
end
if hasglue then
setdiscchecked(backtrackdisc,pre,post,replace)
else
setdisc(backtrackdisc,pre,post,replace)
end
start = getprev(backtrackdisc)
sweephead[post] = getnext(clast) or false
sweephead[replace] = getnext(last) or false
else
local ok = false
head, start, ok = chainrun(head,start,last,dataset,sequence,rlmode,skiphash,ck)
if ok then
done = true
end
end
return head, start, done
end
local function chaintrac(head,start,dataset,sequence,rlmode,skiphash,ck,match,discseen,sweepnode)
local rule = ck[1]
local lookuptype = ck[8] or ck[2]
local nofseq = #ck[3]
local first = ck[4]
local last = ck[5]
local char = getchar(start)
logwarning("%s: rule %s %s at char %s for (%s,%s,%s) chars, lookuptype %a, %sdisc seen, %ssweeping",
cref(dataset,sequence),rule,match and "matches" or "nomatch",
gref(char),first-1,last-first+1,nofseq-last,lookuptype,
discseen and "" or "no ", sweepnode and "" or "not ")
end
-- The next one is quite optimized but still somewhat slow, fonts like ebgaramond
-- are real torture tests because they have many steps with one context (having
-- multiple contexts makes more sense) also because we (can) reduce them. Instead of
-- a match boolean variable and check for that I decided to use a goto with labels
-- instead. This is one of the cases where it makes the code more readable and we
-- might even gain a bit performance.
-- when we have less replacements (lookups) then current matches we can push too much into
-- the previous disc .. such be it ( with only f done)
local function handle_contextchain(head,start,dataset,sequence,contexts,rlmode,skiphash)
-- optimizing for rlmode gains nothing
local sweepnode = sweepnode
local sweeptype = sweeptype
local postreplace
local prereplace
local checkdisc
local discseen -- = false
if sweeptype then
if sweeptype == "replace" then
postreplace = true
prereplace = true
else
postreplace = sweeptype == "post"
prereplace = sweeptype == "pre"
end
checkdisc = getprev(head)
end
local currentfont = currentfont
local skipped -- = false
local startprev,
startnext = getboth(start)
local done -- = false
-- we can have multiple hits and as we scan (currently) all we need to check
-- if we have a match ... contextchains have no real coverage table (with
-- unique entries)
-- fonts can have many steps (each doing one check) or many contexts
-- todo: make a per-char cache so that we have small contexts (when we have a context
-- n == 1 and otherwise it can be more so we can even distingish n == 1 or more)
local nofcontexts = contexts.n -- #contexts
local startchar = nofcontext == 1 or ischar(start,currentfont) -- only needed in a chain
for k=1,nofcontexts do -- does this disc mess work well with n > 1
local ck = contexts[k]
local seq = ck[3]
local f = ck[4] -- first current
if not startchar or not seq[f][startchar] then
-- report("no hit in %a at %i of %i contexts",sequence.type,k,nofcontexts)
goto next
end
local s = seq.n -- or #seq
local l = ck[5] -- last current
local current = start
local last = start
-- current match
if l > f then
-- before/current/after | before/current | current/after
local discfound -- = nil
local n = f + 1
last = startnext -- the second in current (first already matched)
while n <= l do
if postreplace and not last then
last = getnext(sweepnode)
sweeptype = nil
end
if last then
local char, id = ischar(last,currentfont)
if char then
if skiphash and skiphash[char] then
skipped = true
if trace_skips then
show_skip(dataset,sequence,char,ck,classes[char])
end
last = getnext(last)
elseif seq[n][char] then
if n < l then
last = getnext(last)
end
n = n + 1
elseif discfound then
notmatchreplace[discfound] = true
if notmatchpre[discfound] then
goto next
else
break
end
else
goto next
end
elseif char == false then
if discfound then
notmatchreplace[discfound] = true
if notmatchpre[discfound] then
goto next
else
break
end
else
goto next
end
elseif id == disc_code then
-- elseif id == disc_code and (not discs or discs[last]) then
discseen = true
discfound = last
notmatchpre[last] = nil
notmatchpost[last] = true
notmatchreplace[last] = nil
local pre, post, replace = getdisc(last)
if pre then
local n = n
while pre do
if seq[n][getchar(pre)] then
n = n + 1
if n > l then
break
end
pre = getnext(pre)
else
notmatchpre[last] = true
break
end
end
-- commented, for Kai to check
-- if n <= l then
-- notmatchpre[last] = true
-- end
else
notmatchpre[last] = true
end
if replace then
-- so far we never entered this branch
while replace do
if seq[n][getchar(replace)] then
n = n + 1
if n > l then
break
end
replace = getnext(replace)
else
notmatchreplace[last] = true
if notmatchpre[last] then
goto next
else
break
end
end
end
-- why here again
if notmatchpre[last] then
goto next
end
end
-- maybe only if match
last = getnext(last)
else
goto next
end
else
goto next
end
end
end
-- before
if f > 1 then
if startprev then
local prev = startprev
if prereplace and prev == checkdisc then
prev = getprev(sweepnode)
end
if prev then
local discfound -- = nil
local n = f - 1
while n >= 1 do
if prev then
local char, id = ischar(prev,currentfont)
if char then
if skiphash and skiphash[char] then
skipped = true
if trace_skips then
show_skip(dataset,sequence,char,ck,classes[char])
end
prev = getprev(prev)
elseif seq[n][char] then
if n > 1 then
prev = getprev(prev)
end
n = n - 1
elseif discfound then
notmatchreplace[discfound] = true
if notmatchpost[discfound] then
goto next
else
break
end
else
goto next
end
elseif char == false then
if discfound then
notmatchreplace[discfound] = true
if notmatchpost[discfound] then
goto next
end
else
goto next
end
break
elseif id == disc_code then
-- elseif id == disc_code and (not discs or discs[prev]) then
-- the special case: f i where i becomes dottless i ..
discseen = true
discfound = prev
notmatchpre[prev] = true
notmatchpost[prev] = nil
notmatchreplace[prev] = nil
local pre, post, replace, pretail, posttail, replacetail = getdisc(prev,true)
-- weird test: needs checking
if pre ~= start and post ~= start and replace ~= start then
if post then
local n = n
while posttail do
if seq[n][getchar(posttail)] then
n = n - 1
if posttail == post or n < 1 then
break
else
posttail = getprev(posttail)
end
else
notmatchpost[prev] = true
break
end
end
if n >= 1 then
notmatchpost[prev] = true
end
else
notmatchpost[prev] = true
end
if replace then
-- we seldom enter this branch (e.g. on brill efficient)
while replacetail do
if seq[n][getchar(replacetail)] then
n = n - 1
if replacetail == replace or n < 1 then
break
else
replacetail = getprev(replacetail)
end
else
notmatchreplace[prev] = true
if notmatchpost[prev] then
goto next
else
break
end
end
end
else
notmatchreplace[prev] = true -- new, for Kai to check
end
end
prev = getprev(prev)
-- elseif id == glue_code and seq[n][32] and isspace(prev,threshold,id) then
-- elseif seq[n][32] and spaces[prev] then
-- n = n - 1
-- prev = getprev(prev)
elseif id == glue_code then
local sn = seq[n]
if (sn[32] and spaces[prev]) or sn[0xFFFC] then
n = n - 1
prev = getprev(prev)
else
goto next
end
elseif seq[n][0xFFFC] then
n = n - 1
prev = getprev(prev)
else
goto next
end
else
goto next
end
end
else
goto next
end
else
goto next
end
end
-- after
if s > l then
local current = last and getnext(last)
if not current and postreplace then
current = getnext(sweepnode)
end
if current then
local discfound -- = nil
local n = l + 1
while n <= s do
if current then
local char, id = ischar(current,currentfont)
if char then
if skiphash and skiphash[char] then
skipped = true
if trace_skips then
show_skip(dataset,sequence,char,ck,classes[char])
end
current = getnext(current) -- was absent
elseif seq[n][char] then
if n < s then -- new test
current = getnext(current) -- was absent
end
n = n + 1
elseif discfound then
notmatchreplace[discfound] = true
if notmatchpre[discfound] then
goto next
else
break
end
else
goto next
end
elseif char == false then
if discfound then
notmatchreplace[discfound] = true
if notmatchpre[discfound] then
goto next
else
break
end
else
goto next
end
elseif id == disc_code then
-- elseif id == disc_code and (not discs or discs[current]) then
discseen = true
discfound = current
notmatchpre[current] = nil
notmatchpost[current] = true
notmatchreplace[current] = nil
local pre, post, replace = getdisc(current)
if pre then
local n = n
while pre do
if seq[n][getchar(pre)] then
n = n + 1
if n > s then
break
else
pre = getnext(pre)
end
else
notmatchpre[current] = true
break
end
end
if n <= s then
notmatchpre[current] = true
end
else
notmatchpre[current] = true
end
if replace then
-- so far we never entered this branch
while replace do
if seq[n][getchar(replace)] then
n = n + 1
if n > s then
break
else
replace = getnext(replace)
end
else
notmatchreplace[current] = true
if notmatchpre[current] then
goto next
else
break
end
end
end
else
notmatchreplace[current] = true -- new, for Kai to check
end
current = getnext(current)
elseif id == glue_code then
local sn = seq[n]
if (sn[32] and spaces[current]) or sn[0xFFFC] then
n = n + 1
current = getnext(current)
else
goto next
end
elseif seq[n][0xFFFC] then
n = n + 1
current = getnext(current)
else
goto next
end
else
goto next
end
end
else
goto next
end
end
if trace_contexts then
chaintrac(head,start,dataset,sequence,rlmode,skipped and skiphash,ck,true,discseen,sweepnode)
end
if discseen or sweepnode then
head, start, done = chaindisk(head,start,dataset,sequence,rlmode,skipped and skiphash,ck)
else
head, start, done = chainrun(head,start,last,dataset,sequence,rlmode,skipped and skiphash,ck)
end
if done then
break
-- else
-- next context
end
::next::
-- if trace_chains then
-- chaintrac(head,start,dataset,sequence,rlmode,skipped and skiphash,ck,false,discseen,sweepnode)
-- end
end
if discseen then
notmatchpre = { }
notmatchpost = { }
notmatchreplace = { }
end
return head, start, done
end
handlers.gsub_context = handle_contextchain
handlers.gsub_contextchain = handle_contextchain
handlers.gsub_reversecontextchain = handle_contextchain
handlers.gpos_contextchain = handle_contextchain
handlers.gpos_context = handle_contextchain
-- this needs testing
local function chained_contextchain(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
local steps = currentlookup.steps
local nofsteps = currentlookup.nofsteps
if nofsteps > 1 then
reportmoresteps(dataset,sequence)
end
-- probably wrong
local l = steps[1].coverage[getchar(start)]
if l then
return handle_contextchain(head,start,dataset,sequence,l,rlmode,skiphash)
else
return head, start, false
end
end
chainprocs.gsub_context = chained_contextchain
chainprocs.gsub_contextchain = chained_contextchain
chainprocs.gsub_reversecontextchain = chained_contextchain
chainprocs.gpos_contextchain = chained_contextchain
chainprocs.gpos_context = chained_contextchain
------------------------------
-- experiment (needs no handler in font-otc so not now):
--
-- function otf.registerchainproc(name,f)
-- -- chainprocs[name] = f
-- chainprocs[name] = function(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
-- local done = currentlookup.nofsteps > 0
-- if not done then
-- reportzerosteps(dataset,sequence)
-- else
-- head, start, done = f(head,start,stop,dataset,sequence,currentlookup,rlmode,skiphash)
-- if not head or not start then
-- reportbadsteps(dataset,sequence)
-- end
-- end
-- return head, start, done
-- end
-- end
local missing = setmetatableindex("table")
local logwarning = report_process
local resolved = { } -- we only resolve a font,script,language pair once
local function logprocess(...)
if trace_steps then
registermessage(...)
if trace_steps == "silent" then
return
end
end
report_process(...)
end
-- todo: pass all these 'locals' in a table
local sequencelists = setmetatableindex(function(t,font)
local sequences = fontdata[font].resources.sequences
if not sequences or not next(sequences) then
sequences = false
end
t[font] = sequences
return sequences
end)
-- fonts.hashes.sequences = sequencelists
do -- overcome local limit
local autofeatures = fonts.analyzers.features
local featuretypes = otf.tables.featuretypes
local defaultscript = otf.features.checkeddefaultscript
local defaultlanguage = otf.features.checkeddefaultlanguage
local wildcard = "*"
local default = "dflt"
local function initialize(sequence,script,language,enabled,autoscript,autolanguage)
local features = sequence.features
if features then
local order = sequence.order
if order then
local featuretype = featuretypes[sequence.type or "unknown"]
for i=1,#order do
local kind = order[i]
local valid = enabled[kind]
if valid then
local scripts = features[kind]
local languages = scripts and (
scripts[script] or
scripts[wildcard] or
(autoscript and defaultscript(featuretype,autoscript,scripts))
)
local enabled = languages and (
languages[language] or
languages[wildcard] or
(autolanguage and defaultlanguage(featuretype,autolanguage,languages))
)
if enabled then
return { valid, autofeatures[kind] or false, sequence, kind }
end
end
end
else
-- can't happen
end
end
return false
end
function otf.dataset(tfmdata,font) -- generic variant, overloaded in context
local shared = tfmdata.shared
local properties = tfmdata.properties
local language = properties.language or "dflt"
local script = properties.script or "dflt"
local enabled = shared.features
local autoscript = enabled and enabled.autoscript
local autolanguage = enabled and enabled.autolanguage
local res = resolved[font]
if not res then
res = { }
resolved[font] = res
end
local rs = res[script]
if not rs then
rs = { }
res[script] = rs
end
local rl = rs[language]
if not rl then
rl = {
-- indexed but we can also add specific data by key
}
rs[language] = rl
local sequences = tfmdata.resources.sequences
if sequences then
for s=1,#sequences do
local v = enabled and initialize(sequences[s],script,language,enabled,autoscript,autolanguage)
if v then
rl[#rl+1] = v
end
end
end
end
return rl
end
end
-- Functions like kernrun, comprun etc evolved over time and in the end look rather
-- complex. It's a bit of a compromis between extensive copying and creating subruns.
-- The logic has been improved a lot by Kai and Ivo who use complex fonts which
-- really helped to identify border cases on the one hand and get insight in the diverse
-- ways fonts implement features (not always that consistent and efficient). At the same
-- time I tried to keep the code relatively efficient so that the overhead in runtime
-- stays acceptable.
local function report_disc(what,n)
report_run("%s: %s > %s",what,n,languages.serializediscretionary(n))
end
local function kernrun(disc,k_run,font,attr,...)
--
-- we catch
--
if trace_kernruns then
report_disc("kern",disc)
end
--
local prev, next = getboth(disc)
--
local nextstart = next
local done = false
--
local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
--
local prevmarks = prev
--
-- can be optional, because why on earth do we get a disc after a mark (okay, maybe when a ccmp
-- has happened but then it should be in the disc so basically this test indicates an error)
--
while prevmarks do
local char = ischar(prevmarks,font)
if char and marks[char] then
prevmarks = getprev(prevmarks)
else
break
end
end
--
if prev and not ischar(prev,font) then -- and (pre or replace)
prev = false
end
if next and not ischar(next,font) then -- and (post or replace)
next = false
end
--
-- we need to get rid of this nest mess some day .. has to be done otherwise
--
if pre then
if k_run(pre,"injections",nil,font,attr,...) then
done = true
end
if prev then
setlink(prev,pre)
if k_run(prevmarks,"preinjections",pre,font,attr,...) then -- or prev?
done = true
end
setprev(pre)
setlink(prev,disc)
end
end
--
if post then
if k_run(post,"injections",nil,font,attr,...) then
done = true
end
if next then
setlink(posttail,next)
if k_run(posttail,"postinjections",next,font,attr,...) then
done = true
end
setnext(posttail)
setlink(disc,next)
end
end
--
if replace then
if k_run(replace,"injections",nil,font,attr,...) then
done = true
end
if prev then
setlink(prev,replace)
if k_run(prevmarks,"replaceinjections",replace,font,attr,...) then -- getnext(replace))
done = true
end
setprev(replace)
setlink(prev,disc)
end
if next then
setlink(replacetail,next)
if k_run(replacetail,"replaceinjections",next,font,attr,...) then
done = true
end
setnext(replacetail)
setlink(disc,next)
end
elseif prev and next then
setlink(prev,next)
if k_run(prevmarks,"emptyinjections",next,font,attr,...) then
done = true
end
setlink(prev,disc,next)
end
if done and trace_testruns then
report_disc("done",disc)
end
return nextstart, done
end
-- fonts like ebgaramond do ligatures this way (less efficient than e.g. dejavu which
-- will do the testrun variant)
local function comprun(disc,c_run,...) -- vararg faster than the whole list
if trace_compruns then
report_disc("comp",disc)
end
--
local pre, post, replace = getdisc(disc)
local renewed = false
--
if pre then
sweepnode = disc
sweeptype = "pre" -- in alternative code preinjections is used (also used then for properties, saves a variable)
local new, done = c_run(pre,...)
if done then
pre = new
renewed = true
end
end
--
if post then
sweepnode = disc
sweeptype = "post"
local new, done = c_run(post,...)
if done then
post = new
renewed = true
end
end
--
if replace then
sweepnode = disc
sweeptype = "replace"
local new, done = c_run(replace,...)
if done then
replace = new
renewed = true
end
end
--
sweepnode = nil
sweeptype = nil
if renewed then
if trace_testruns then
report_disc("done",disc)
end
setdisc(disc,pre,post,replace)
end
--
return getnext(disc), renewed
end
-- if we can hyphenate in a lig then unlikely a lig so we
-- could have a option here to ignore lig
local function testrun(disc,t_run,c_run,...)
if trace_testruns then
report_disc("test",disc)
end
local prev, next = getboth(disc)
if not next then
-- weird discretionary
return
end
local pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
local renewed = false
if (post or replace) and prev then
if post then
setlink(posttail,next)
else
post = next
end
if replace then
setlink(replacetail,next)
else
replace = next
end
local d_post = t_run(post,next,...)
local d_replace = t_run(replace,next,...)
if d_post > 0 or d_replace > 0 then
local d = d_replace > d_post and d_replace or d_post
local head = getnext(disc) -- is: next
local tail = head
for i=1,d do
local nx = getnext(tail)
local id = getid(nx)
if id == disc_code then
head, tail = flattendisk(head,nx)
elseif id == glyph_code then
tail = nx
else
-- we can have overrun into a glue
break
end
end
next = getnext(tail)
setnext(tail)
setprev(head)
local new = copy_node_list(head)
if posttail then
setlink(posttail,head)
else
post = head
end
if replacetail then
setlink(replacetail,new)
else
replace = new
end
else
-- we stay inside the disc
if posttail then
setnext(posttail)
else
post = nil
end
if replacetail then
setnext(replacetail)
else
replace = nil
end
end
setlink(disc,next)
-- pre, post, replace, pretail, posttail, replacetail = getdisc(disc,true)
end
--
-- like comprun
--
if trace_testruns then
report_disc("more",disc)
end
--
if pre then
sweepnode = disc
sweeptype = "pre"
local new, ok = c_run(pre,...)
if ok then
pre = new
renewed = true
end
end
--
if post then
sweepnode = disc
sweeptype = "post"
local new, ok = c_run(post,...)
if ok then
post = new
renewed = true
end
end
--
if replace then
sweepnode = disc
sweeptype = "replace"
local new, ok = c_run(replace,...)
if ok then
replace = new
renewed = true
end
end
--
sweepnode = nil
sweeptype = nil
if renewed then
setdisc(disc,pre,post,replace)
if trace_testruns then
report_disc("done",disc)
end
end
-- next can have changed (copied list)
return getnext(disc), renewed
end
-- We can make some assumptions with respect to discretionaries. First of all it is very
-- unlikely that some of the analysis related attributes applies. Then we can also assume
-- that the ConTeXt specific dynamic attribute is different, although we do use explicit
-- discretionaries (maybe we need to tag those some day). So, at least for now, we don't
-- have the following test in the sub runs:
--
-- -- local a = getattr(start,0)
-- -- if a then
-- -- a = (a == attr) and (not attribute or getprop(start,a_state) == attribute)
-- -- else
-- -- a = not attribute or getprop(start,a_state) == attribute
-- -- end
-- -- if a then
--
-- but use this instead:
--
-- -- local a = getattr(start,0)
-- -- if not a or (a == attr) then
--
-- and even that one is probably not needed. However, we can handle interesting
-- cases now:
--
-- 1{2{\oldstyle\discretionary{3}{4}{5}}6}7\par
-- 1{2\discretionary{3{\oldstyle3}}{{\oldstyle4}4}{5{\oldstyle5}5}6}7\par
local nesting = 0
local function c_run_single(head,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
local done = false
local sweep = sweephead[head]
if sweep then
start = sweep
-- sweephead[head] = nil
sweephead[head] = false
else
start = head
end
while start do
local char, id = ischar(start,font)
if char then
local a -- happens often so no assignment is faster
if attr then
a = getattr(start,0)
end
if not a or (a == attr) then
local lookupmatch = lookupcache[char]
if lookupmatch then
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
done = true
end
end
if start then
start = getnext(start)
end
else
-- go on can be a mixed one
start = getnext(start)
end
elseif char == false then
return head, done
elseif sweep then
-- else we loose the rest
return head, done
else
-- in disc component
start = getnext(start)
end
end
return head, done
end
-- only replace?
local function t_run_single(start,stop,font,attr,lookupcache)
local lastd = nil
while start ~= stop do
local char = ischar(start,font)
if char then
local a -- happens often so no assignment is faster
if attr then
a = getattr(start,0)
end
local startnext = getnext(start)
if not a or (a == attr) then
local lookupmatch = lookupcache[char]
if lookupmatch then -- hm, hyphens can match (tlig) so we need to really check
-- if we need more than ligatures we can outline the code and use functions
local s = startnext
local ss = nil
local sstop = s == stop
if not s then
s = ss
ss = nil
end
-- a bit weird: why multiple ... anyway we can't have a disc in a disc
-- how about post ... we can probably merge this into the while
while getid(s) == disc_code do
ss = getnext(s)
s = getfield(s,"replace")
if not s then
s = ss
ss = nil
end
end
local l = nil
local d = 0
while s do
local char = ischar(s,font)
if char then
local lg = lookupmatch[char]
if lg then
if sstop then
d = 1
elseif d > 0 then
d = d + 1
end
l = lg
s = getnext(s)
sstop = s == stop
if not s then
s = ss
ss = nil
end
while getid(s) == disc_code do
ss = getnext(s)
s = getfield(s,"replace")
if not s then
s = ss
ss = nil
end
end
else
break
end
else
break
end
end
if l and l.ligature then -- so we test for ligature
lastd = d
end
-- why not: if not l then break elseif l.ligature then return d end
else
-- why not: break
-- no match (yet)
end
else
-- go on can be a mixed one
-- why not: break
end
if lastd then
return lastd
end
start = startnext
else
break
end
end
return 0
end
local function k_run_single(sub,injection,last,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
local a -- happens often so no assignment is faster
if attr then
a = getattr(sub,0)
end
if not a or (a == attr) then
for n in traverse_nodes(sub) do -- only gpos
if n == last then
break
end
local char = ischar(n)
if char then
local lookupmatch = lookupcache[char]
if lookupmatch then
local h, d, ok = handler(sub,n,dataset,sequence,lookupmatch,rlmode,skiphash,step,injection)
if ok then
return true
end
end
end
end
end
end
local function c_run_multiple(head,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
local done = false
local sweep = sweephead[head]
if sweep then
start = sweep
-- sweephead[head] = nil
sweephead[head] = false
else
start = head
end
while start do
local char = ischar(start,font)
if char then
local a -- happens often so no assignment is faster
if attr then
a = getattr(start,0)
end
if not a or (a == attr) then
for i=1,nofsteps do
local step = steps[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
-- we could move all code inline but that makes things even more unreadable
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
done = true
break
elseif not start then
-- don't ask why ... shouldn't happen
break
end
end
end
if start then
start = getnext(start)
end
else
-- go on can be a mixed one
start = getnext(start)
end
elseif char == false then
-- whatever glyph
return head, done
elseif sweep then
-- else we loose the rest
return head, done
else
-- in disc component
start = getnext(start)
end
end
return head, done
end
local function t_run_multiple(start,stop,font,attr,steps,nofsteps)
local lastd = nil
while start ~= stop do
local char = ischar(start,font)
if char then
local a -- happens often so no assignment is faster
if attr then
a = getattr(start,0)
end
local startnext = getnext(start)
if not a or (a == attr) then
for i=1,nofsteps do
local step = steps[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
-- if we need more than ligatures we can outline the code and use functions
local s = startnext
local ss = nil
local sstop = s == stop
if not s then
s = ss
ss = nil
end
while getid(s) == disc_code do
ss = getnext(s)
s = getfield(s,"replace")
if not s then
s = ss
ss = nil
end
end
local l = nil
local d = 0
while s do
local char = ischar(s)
if char then
local lg = lookupmatch[char]
if lg then
if sstop then
d = 1
elseif d > 0 then
d = d + 1
end
l = lg
s = getnext(s)
sstop = s == stop
if not s then
s = ss
ss = nil
end
while getid(s) == disc_code do
ss = getnext(s)
s = getfield(s,"replace")
if not s then
s = ss
ss = nil
end
end
else
break
end
else
break
end
end
if l and l.ligature then
lastd = d
end
end
end
else
-- go on can be a mixed one
end
if lastd then
return lastd
end
start = startnext
else
break
end
end
return 0
end
local function k_run_multiple(sub,injection,last,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
local a -- happens often so no assignment is faster
if attr then
a = getattr(sub,0)
end
if not a or (a == attr) then
for n in traverse_nodes(sub) do -- only gpos
if n == last then
break
end
local char = ischar(n)
if char then
for i=1,nofsteps do
local step = steps[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
local h, d, ok = handler(sub,n,dataset,sequence,lookupmatch,rlmode,skiphash,step,injection) -- sub was head
if ok then
return true
end
end
end
end
end
end
end
-- to be checked, nowadays we probably can assume properly matched directions
-- so maybe we no longer need a stack
-- local function txtdirstate(start,stack,top,rlparmode)
-- local dir = getdir(start)
-- local new = 1
-- if dir == "+TRT" then
-- top = top + 1
-- stack[top] = dir
-- new = -1
-- elseif dir == "+TLT" then
-- top = top + 1
-- stack[top] = dir
-- elseif dir == "-TRT" or dir == "-TLT" then
-- if top == 1 then
-- top = 0
-- new = rlparmode
-- else
-- top = top - 1
-- if stack[top] == "+TRT" then
-- new = -1
-- end
-- end
-- else
-- new = rlparmode
-- end
-- return getnext(start), top, new
-- end
--
-- local function pardirstate(start)
-- local dir = getdir(start)
-- local new = 0
-- if dir == "TLT" then
-- new = 1
-- elseif dir == "TRT" then
-- new = -1
-- end
-- return getnext(start), new, new
-- end
local function txtdirstate(start,stack,top,rlparmode)
local nxt = getnext(start)
local dir = getdir(start)
if dir == "+TRT" then
top = top + 1
stack[top] = dir
return nxt, top, -1
elseif dir == "+TLT" then
top = top + 1
stack[top] = dir
return nxt, top, 1
elseif dir == "-TRT" or dir == "-TLT" then
if top == 1 then
return nxt, 0, rlparmode
else
top = top - 1
if stack[top] == "+TRT" then
return nxt, top, -1
else
return nxt, top, 1
end
end
else
return nxt, top, rlparmode
end
end
local function pardirstate(start)
local nxt = getnext(start)
local dir = getdir(start)
if dir == "TLT" then
return nxt, 1, 1
elseif dir == "TRT" then
return nxt, -1, -1
else
return nxt, 0, 0
end
end
-- -- this will become:
--
-- local getdirection = nuts.getdirection
--
-- local function txtdirstate1(start,stack,top,rlparmode)
-- local nxt = getnext(start)
-- local dir, sub = getdirection(start)
-- if sub then
-- if top == 1 then
-- return nxt, 0, rlparmode
-- elseif dir < 2 then
-- top = top - 1
-- if stack[top] == 1 then
-- return nxt, top, -1
-- else
-- return nxt, top, 1
-- end
-- else
-- return nxt, top, rlparmode
-- end
-- elseif dir == 1 then
-- top = top + 1
-- stack[top] = 1
-- return nxt, top, -1
-- elseif dir == 0 then
-- top = top + 1
-- stack[top] = 0
-- return nxt, top, 1
-- else
-- return nxt, top, rlparmode
-- end
-- end
--
-- local function pardirstate1(start)
-- local nxt = getnext(start)
-- local dir = getdirection(start)
-- if dir == 0 then
-- return nxt, 1, 1
-- elseif dir == 1 then
-- return nxt, -1, -1
-- else
-- return nxt, 0, 0
-- end
-- end
otf.helpers = otf.helpers or { }
otf.helpers.txtdirstate = txtdirstate
otf.helpers.pardirstate = pardirstate
-- This is the main loop. We run over the node list dealing with a specific font. The
-- attribute is a context specific thing. We could work on sub start-stop ranges instead
-- but I wonder if there is that much speed gain (experiments showed that it made not
-- much sense) and we need to keep track of directions anyway. Also at some point I
-- want to play with font interactions and then we do need the full sweeps. Apart from
-- optimizations the principles of processing the features hasn't changed much since
-- the beginning.
do
-- reference:
--
-- local a = attr and getattr(start,0)
-- if a then
-- a = (a == attr) and (not attribute or getprop(start,a_state) == attribute)
-- else
-- a = not attribute or getprop(start,a_state) == attribute
-- end
--
-- used:
--
-- local a -- happens often so no assignment is faster
-- if attr then
-- if getattr(start,0) == attr and (not attribute or getprop(start,a_state) == attribute) then
-- a = true
-- end
-- elseif not attribute or getprop(start,a_state) == attribute then
-- a = true
-- end
-- This is a measurable experimental speedup (only with hyphenated text and multiple
-- fonts per processor call), especially for fonts with lots of contextual lookups.
local fastdisc = true
local testdics = false
directives.register("otf.fastdisc",function(v) fastdisc = v end) -- normally enabled
-- using a merged combined hash as first test saves some 30% on ebgaramond and
-- about 15% on arabtype .. then moving the a test also saves a bit (even when
-- often a is not set at all so that one is a bit debatable
local otfdataset = nil -- todo: make an installer
local getfastdisc = { __index = function(t,k)
local v = usesfont(k,currentfont)
t[k] = v
return v
end }
local getfastspace = { __index = function(t,k)
-- we don't pass the id so that one can overload isspace
local v = isspace(k,threshold) or false
t[k] = v
return v
end }
function otf.featuresprocessor(head,font,attr,direction,n)
local sequences = sequencelists[font] -- temp hack
nesting = nesting + 1
if nesting == 1 then
currentfont = font
tfmdata = fontdata[font]
descriptions = tfmdata.descriptions -- only needed in gref so we could pass node there instead
characters = tfmdata.characters -- but this branch is not entered that often anyway
local resources = tfmdata.resources
marks = resources.marks
classes = resources.classes
threshold,
factor = getthreshold(font)
checkmarks = tfmdata.properties.checkmarks
if not otfdataset then
otfdataset = otf.dataset
end
discs = fastdisc and n and n > 1 and setmetatable({},getfastdisc) -- maybe inline
spaces = setmetatable({},getfastspace)
elseif currentfont ~= font then
report_warning("nested call with a different font, level %s, quitting",nesting)
nesting = nesting - 1
return head, false
end
-- some 10% faster when no dynamics but hardly measureable on real runs .. but: it only
-- works when we have no other dynamics as otherwise the zero run will be applied to the
-- whole stream for which we then need to pass another variable which we won't
-- if attr == 0 then
-- attr = false
-- end
local head = tonut(head)
if trace_steps then
checkstep(head)
end
local initialrl = direction == "TRT" and -1 or 0
-- local initialrl = (direction == 1 or direction == "TRT") and -1 or 0
local done = false
-- local datasets = otf.dataset(tfmdata,font,attr)
local datasets = otfdataset(tfmdata,font,attr)
local dirstack = { } -- could move outside function but we can have local runs
sweephead = { }
-- Keeping track of the headnode is needed for devanagari. (I generalized it a bit
-- so that multiple cases are also covered.) We could prepend a temp node.
-- We don't goto the next node when a disc node is created so that we can then treat
-- the pre, post and replace. It's a bit of a hack but works out ok for most cases.
for s=1,#datasets do
local dataset = datasets[s]
local attribute = dataset[2]
local sequence = dataset[3] -- sequences[s] -- also dataset[5]
local rlparmode = initialrl
local topstack = 0
local typ = sequence.type
local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset
local forcetestrun = typ == "gsub_ligature" -- testrun is only for ligatures
local handler = handlers[typ] -- store in dataset
local steps = sequence.steps
local nofsteps = sequence.nofsteps
local skiphash = sequence.skiphash
if not steps then
-- This permits injection, watch the different arguments. Watch out, the arguments passed
-- are not frozen as we might extend or change this. Is this used at all apart from some
-- experiments?
local h, ok = handler(head,dataset,sequence,initialrl,font,attr) -- less arguments now
if ok then
done = true
end
if h and h ~= head then
head = h
end
elseif typ == "gsub_reversecontextchain" then
--
-- This might need a check: if we have #before or #after > 0 then we might need to reverse
-- the before and after lists in the loader. But first I need to see a font that uses multiple
-- matches.
--
local start = find_node_tail(head)
local rlmode = 0 -- how important is this .. do we need to check for dir?
local merged = steps.merged
while start do
local char = ischar(start,font)
if char then
local m = merged[char]
if m then
local a -- happens often so no assignment is faster
if attr then
a = getattr(start,0)
end
if not a or (a == attr) then
for i=m[1],m[2] do
local step = steps[i]
-- for i=1,#m do
-- local step = m[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
done = true
break
end
end
end
if start then
start = getprev(start)
end
else
start = getprev(start)
end
else
start = getprev(start)
end
else
start = getprev(start)
end
end
else
local start = head
local rlmode = initialrl
if nofsteps == 1 then -- happens often
local step = steps[1]
local lookupcache = step.coverage
while start do
local char, id = ischar(start,font)
if char then
if skiphash and skiphash[char] then -- we never needed it here but let's try
start = getnext(start)
else
local lookupmatch = lookupcache[char]
if lookupmatch then
local a -- happens often so no assignment is faster
if attr then
if getattr(start,0) == attr and (not attribute or getprop(start,a_state) == attribute) then
a = true
end
elseif not attribute or getprop(start,a_state) == attribute then
a = true
end
if a then
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
done = true
end
if start then
start = getnext(start)
end
else
start = getnext(start)
end
else
start = getnext(start)
end
end
elseif char == false or id == glue_code then
-- a different font|state or glue (happens often)
start = getnext(start)
elseif id == disc_code then
if not discs or discs[start] == true then
local ok
if gpossing then
start, ok = kernrun(start,k_run_single, font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
elseif forcetestrun then
start, ok = testrun(start,t_run_single,c_run_single,font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
else
start, ok = comprun(start,c_run_single, font,attr,lookupcache,step,dataset,sequence,rlmode,skiphash,handler)
end
if ok then
done = true
end
else
start = getnext(start)
end
elseif id == math_code then
start = getnext(end_of_math(start))
elseif id == dir_code then
start, topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
elseif id == localpar_code then
start, rlparmode, rlmode = pardirstate(start)
else
start = getnext(start)
end
end
else
local merged = steps.merged
while start do
local char, id = ischar(start,font)
if char then
if skiphash and skiphash[char] then -- we never needed it here but let's try
start = getnext(start)
else
local m = merged[char]
if m then
local a -- happens often so no assignment is faster
if attr then
if getattr(start,0) == attr and (not attribute or getprop(start,a_state) == attribute) then
a = true
end
elseif not attribute or getprop(start,a_state) == attribute then
a = true
end
if a then
for i=m[1],m[2] do
local step = steps[i]
-- for i=1,#m do
-- local step = m[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
-- we could move all code inline but that makes things even more unreadable
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
done = true
break
elseif not start then
-- don't ask why ... shouldn't happen
break
end
end
end
if start then
start = getnext(start)
end
else
start = getnext(start)
end
else
start = getnext(start)
end
end
elseif char == false or id == glue_code then
-- a different font|state or glue (happens often)
start = getnext(start)
elseif id == disc_code then
if not discs or discs[start] == true then
local ok
if gpossing then
start, ok = kernrun(start,k_run_multiple, font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
elseif forcetestrun then
start, ok = testrun(start,t_run_multiple,c_run_multiple,font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
else
start, ok = comprun(start,c_run_multiple, font,attr,steps,nofsteps,dataset,sequence,rlmode,skiphash,handler)
end
if ok then
done = true
end
else
start = getnext(start)
end
elseif id == math_code then
start = getnext(end_of_math(start))
elseif id == dir_code then
start, topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
elseif id == localpar_code then
start, rlparmode, rlmode = pardirstate(start)
else
start = getnext(start)
end
end
end
end
if trace_steps then -- ?
registerstep(head)
end
end
nesting = nesting - 1
head = tonode(head)
return head, done
end
-- This is not an official helpoer and used for tracing experiments. It can be changed as I like
-- at any moment. At some point it might be used in a module that can help font development.
function otf.datasetpositionprocessor(head,font,direction,dataset)
currentfont = font
tfmdata = fontdata[font]
descriptions = tfmdata.descriptions -- only needed in gref so we could pass node there instead
characters = tfmdata.characters -- but this branch is not entered that often anyway
local resources = tfmdata.resources
marks = resources.marks
classes = resources.classes
threshold,
factor = getthreshold(font)
checkmarks = tfmdata.properties.checkmarks
if type(dataset) == "number" then
dataset = otfdataset(tfmdata,font,0)[dataset]
end
local sequence = dataset[3] -- sequences[s] -- also dataset[5]
local typ = sequence.type
-- local gpossing = typ == "gpos_single" or typ == "gpos_pair" -- store in dataset
-- gpos_contextchain gpos_context
-- if not gpossing then
-- return head, false
-- end
local handler = handlers[typ] -- store in dataset
local steps = sequence.steps
local nofsteps = sequence.nofsteps
local head = tonut(head)
local done = false
local dirstack = { } -- could move outside function but we can have local runs
local start = head
local initialrl = direction == "TRT" and -1 or 0
-- local initialrl = (direction == 1 or direction == "TRT") and -1 or 0
local rlmode = initialrl
local rlparmode = initialrl
local topstack = 0
local merged = steps.merged
-- local matches = false
local position = 0
while start do
local char, id = ischar(start,font)
if char then
position = position + 1
local m = merged[char]
if m then
if skiphash and skiphash[char] then -- we never needed it here but let's try
start = getnext(start)
else
for i=m[1],m[2] do
local step = steps[i]
local lookupcache = step.coverage
local lookupmatch = lookupcache[char]
if lookupmatch then
local ok
head, start, ok = handler(head,start,dataset,sequence,lookupmatch,rlmode,skiphash,step)
if ok then
-- if matches then
-- matches[position] = i
-- else
-- matches = { [position] = i }
-- end
break
elseif not start then
break
end
end
end
if start then
start = getnext(start)
end
end
else
start = getnext(start)
end
elseif char == false or id == glue_code then
-- a different font|state or glue (happens often)
start = getnext(start)
elseif id == math_code then
start = getnext(end_of_math(start))
elseif id == dir_code then
start, topstack, rlmode = txtdirstate(start,dirstack,topstack,rlparmode)
elseif id == localpar_code then
start, rlparmode, rlmode = pardirstate(start)
else
start = getnext(start)
end
end
return tonode(head) -- , matches
end
-- end of experiment
end
-- so far
local plugins = { }
otf.plugins = plugins
function otf.registerplugin(name,f)
if type(name) == "string" and type(f) == "function" then
plugins[name] = { name, f }
end
end
function otf.plugininitializer(tfmdata,value)
if type(value) == "string" then
tfmdata.shared.plugin = plugins[value]
end
end
function otf.pluginprocessor(head,font,attr,direction) -- n
local s = fontdata[font].shared
local p = s and s.plugin
if p then
if trace_plugins then
report_process("applying plugin %a",p[1])
end
return p[2](head,font,attr,direction)
else
return head, false
end
end
function otf.featuresinitializer(tfmdata,value)
-- nothing done here any more
end
registerotffeature {
name = "features",
description = "features",
default = true,
initializers = {
position = 1,
node = otf.featuresinitializer,
plug = otf.plugininitializer,
},
processors = {
node = otf.featuresprocessor,
plug = otf.pluginprocessor,
}
}
-- This can be used for extra handlers, but should be used with care! We implement one
-- here but some more can be found in the osd (script devanagary) file. Now watch out:
-- when a handlers has steps, it is called as the other ones, but when we have no steps,
-- we use a different call:
--
-- function(head,dataset,sequence,initialrl,font,attr)
-- return head, done
-- end
--
-- Also see (!!).
otf.handlers = handlers
local setspacekerns = nodes.injections.setspacekerns if not setspacekerns then os.exit() end
local tag = "kern" -- maybe some day a merge
if fontfeatures then
function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr)
local features = fontfeatures[font]
local enabled = features and features.spacekern and features[tag]
if enabled then
setspacekerns(font,sequence)
end
return head, enabled
end
else -- generic (no hashes)
function handlers.trigger_space_kerns(head,dataset,sequence,initialrl,font,attr)
local shared = fontdata[font].shared
local features = shared and shared.features
local enabled = features and features.spacekern and features[tag]
if enabled then
setspacekerns(font,sequence)
end
return head, enabled
end
end
-- There are fonts out there that change the space but we don't do that kind of
-- things in TeX.
local function hasspacekerns(data)
local resources = data.resources
local sequences = resources.sequences
local validgpos = resources.features.gpos
if validgpos and sequences then
for i=1,#sequences do
local sequence = sequences[i]
local steps = sequence.steps
if steps and sequence.features[tag] then
local kind = sequence.type
if kind == "gpos_pair" or kind == "gpos_single" then
for i=1,#steps do
local step = steps[i]
local coverage = step.coverage
local rules = step.rules
if rules then
-- not now: analyze (simple) rules
elseif not coverage then
-- nothing to do
elseif kind == "gpos_single" then
-- maybe a message that we ignore
elseif kind == "gpos_pair" then
local format = step.format
if format == "move" or format == "kern" then
local kerns = coverage[32]
if kerns then
return true
end
for k, v in next, coverage do
if v[32] then
return true
end
end
elseif format == "pair" then
local kerns = coverage[32]
if kerns then
for k, v in next, kerns do
local one = v[1]
if one and one ~= true then
return true
end
end
end
for k, v in next, coverage do
local kern = v[32]
if kern then
local one = kern[1]
if one and one ~= true then
return true
end
end
end
end
end
end
end
end
end
end
return false
end
otf.readers.registerextender {
name = "spacekerns",
action = function(data)
data.properties.hasspacekerns = hasspacekerns(data)
end
}
local function spaceinitializer(tfmdata,value) -- attr
local resources = tfmdata.resources
local spacekerns = resources and resources.spacekerns
if value and spacekerns == nil then
local rawdata = tfmdata.shared and tfmdata.shared.rawdata
local properties = rawdata.properties
if properties and properties.hasspacekerns then
local sequences = resources.sequences
local validgpos = resources.features.gpos
if validgpos and sequences then
local left = { }
local right = { }
local last = 0
local feat = nil
for i=1,#sequences do
local sequence = sequences[i]
local steps = sequence.steps
if steps then
-- we don't support space kerns in other features
local kern = sequence.features[tag]
if kern then
local kind = sequence.type
if kind == "gpos_pair" or kind == "gpos_single" then
if feat then
for script, languages in next, kern do
local f = feat[script]
if f then
for l in next, languages do
f[l] = true
end
else
feat[script] = languages
end
end
else
feat = kern
end
for i=1,#steps do
local step = steps[i]
local coverage = step.coverage
local rules = step.rules
if rules then
-- not now: analyze (simple) rules
elseif not coverage then
-- nothng to do
elseif kind == "gpos_single" then
-- makes no sense in TeX
elseif kind == "gpos_pair" then
local format = step.format
if format == "move" or format == "kern" then
local kerns = coverage[32]
if kerns then
for k, v in next, kerns do
right[k] = v
end
end
for k, v in next, coverage do
local kern = v[32]
if kern then
left[k] = kern
end
end
elseif format == "pair" then
local kerns = coverage[32]
if kerns then
for k, v in next, kerns do
local one = v[1]
if one and one ~= true then
right[k] = one[3]
end
end
end
for k, v in next, coverage do
local kern = v[32]
if kern then
local one = kern[1]
if one and one ~= true then
left[k] = one[3]
end
end
end
end
end
end
last = i
end
else
-- no steps ... needed for old one ... we could use the basekerns
-- instead
end
end
end
left = next(left) and left or false
right = next(right) and right or false
if left or right then
spacekerns = {
left = left,
right = right,
}
if last > 0 then
local triggersequence = {
-- no steps, see (!!)
features = { [tag] = feat or { dflt = { dflt = true, } } },
flags = noflags,
name = "trigger_space_kerns",
order = { tag },
type = "trigger_space_kerns",
left = left,
right = right,
}
insert(sequences,last,triggersequence)
end
end
end
end
resources.spacekerns = spacekerns
end
return spacekerns
end
registerotffeature {
name = "spacekern",
description = "space kern injection",
default = true,
initializers = {
node = spaceinitializer,
},
}
local function markinitializer(tfmdata,value)
local properties = tfmdata.properties
properties.checkmarks = value
end
registerotffeature {
name = "checkmarks",
description = "check mark widths",
default = true,
initializers = {
node = markinitializer,
},
}