From 3d3b6af2a13fb57dd36377954669124ceac73b3d Mon Sep 17 00:00:00 2001 From: Marius Date: Wed, 17 Oct 2012 20:20:13 +0300 Subject: beta 2012.10.17 19:02 --- tex/generic/context/luatex/luatex-fonts-merged.lua | 6706 ++++++++++---------- tex/generic/context/luatex/luatex-fonts.lua | 2 +- 2 files changed, 3354 insertions(+), 3354 deletions(-) (limited to 'tex/generic') diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 96be83bcc..cd36a3e5b 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 10/17/12 13:30:56 +-- merge date : 10/17/12 19:02:50 do -- begin closure to overcome local limits and interference @@ -8817,3701 +8817,3701 @@ end -- closure do -- begin closure to overcome local limits and interference -if not modules then modules = { } end modules ['font-otn'] = { +if not modules then modules = { } end modules ['font-ota'] = { version = 1.001, - comment = "companion to font-ini.mkiv", + comment = "companion to font-otf.lua (analysing)", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", license = "see context related readme files" } --- this is still somewhat preliminary and it will get better in due time; --- much functionality could only be implemented thanks to the husayni font --- of Idris Samawi Hamid to who we dedicate this module. +-- this might become scrp-*.lua --- in retrospect it always looks easy but believe it or not, it took a lot --- of work to get proper open type support done: buggy fonts, fuzzy specs, --- special made testfonts, many skype sessions between taco, idris and me, --- torture tests etc etc ... unfortunately the code does not show how much --- time it took ... +local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat --- todo: --- --- kerning is probably not yet ok for latin around dics nodes --- extension infrastructure (for usage out of context) --- sorting features according to vendors/renderers --- alternative loop quitters --- check cursive and r2l --- find out where ignore-mark-classes went --- default features (per language, script) --- handle positions (we need example fonts) --- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) --- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) +if not trackers then trackers = { register = function() end } end ---[[ldx-- -

This module is a bit more split up that I'd like but since we also want to test -with plain it has to be so. This module is part of -and discussion about improvements and functionality mostly happens on the - mailing list.

+local trace_analyzing = false trackers.register("otf.analyzing", function(v) trace_analyzing = v end) -

The specification of OpenType is kind of vague. Apart from a lack of a proper -free specifications there's also the problem that Microsoft and Adobe -may have their own interpretation of how and in what order to apply features. -In general the Microsoft website has more detailed specifications and is a -better reference. There is also some information in the FontForge help files.

+local fonts, nodes, node = fonts, nodes, node -

Because there is so much possible, fonts might contain bugs and/or be made to -work with certain rederers. These may evolve over time which may have the side -effect that suddenly fonts behave differently.

+local allocate = utilities.storage.allocate -

After a lot of experiments (mostly by Taco, me and Idris) we're now at yet another -implementation. Of course all errors are mine and of course the code can be -improved. There are quite some optimizations going on here and processing speed -is currently acceptable. Not all functions are implemented yet, often because I -lack the fonts for testing. Many scripts are not yet supported either, but I will -look into them as soon as users ask for it.

+local otf = fonts.handlers.otf -

Because there are different interpretations possible, I will extend the code -with more (configureable) variants. I can also add hooks for users so that they can -write their own extensions.

+local analyzers = fonts.analyzers +local initializers = allocate() +local methods = allocate() -

Glyphs are indexed not by unicode but in their own way. This is because there is no -relationship with unicode at all, apart from the fact that a font might cover certain -ranges of characters. One character can have multiple shapes. However, at the - end we use unicode so and all extra glyphs are mapped into a private -space. This is needed because we need to access them and has to include -then in the output eventually.

+analyzers.initializers = initializers +analyzers.methods = methods +analyzers.useunicodemarks = false -

The raw table as it coms from gets reorganized in to fit out needs. -In that table is packed (similar tables are shared) and cached on disk -so that successive runs can use the optimized table (after loading the table is -unpacked). The flattening code used later is a prelude to an even more compact table -format (and as such it keeps evolving).

+local nodecodes = nodes.nodecodes +local glyph_code = nodecodes.glyph -

This module is sparsely documented because it is a moving target. The table format -of the reader changes and we experiment a lot with different methods for supporting -features.

+local set_attribute = node.set_attribute +local has_attribute = node.has_attribute +local traverse_id = node.traverse_id +local traverse_node_list = node.traverse -

As with the code, we may decide to store more information in the - table.

+local fontdata = fonts.hashes.identifiers +local state = attributes.private('state') +local categories = characters and characters.categories or { } -- sorry, only in context -

Incrementing the version number will force a re-cache. We jump the number by one -when there's a fix in the library or code that -results in different tables.

+local otffeatures = fonts.constructors.newfeatures("otf") +local registerotffeature = otffeatures.register + +--[[ldx-- +

Analyzers run per script and/or language and are needed in order to +process features right.

--ldx]]-- --- action handler chainproc chainmore comment --- --- gsub_single ok ok ok --- gsub_multiple ok ok not implemented yet --- gsub_alternate ok ok not implemented yet --- gsub_ligature ok ok ok --- gsub_context ok -- --- gsub_contextchain ok -- --- gsub_reversecontextchain ok -- --- chainsub -- ok --- reversesub -- ok --- gpos_mark2base ok ok --- gpos_mark2ligature ok ok --- gpos_mark2mark ok ok --- gpos_cursive ok untested --- gpos_single ok ok --- gpos_pair ok ok --- gpos_context ok -- --- gpos_contextchain ok -- --- --- todo: contextpos and contextsub and class stuff --- --- actions: --- --- handler : actions triggered by lookup --- chainproc : actions triggered by contextual lookup --- chainmore : multiple substitutions triggered by contextual lookup (e.g. fij -> f + ij) --- --- remark: the 'not implemented yet' variants will be done when we have fonts that use them --- remark: we need to check what to do with discretionaries +analyzers.constants = { + init = 1, + medi = 2, + fina = 3, + isol = 4, + -- devanagari + rphf = 5, + half = 6, + pref = 7, + blwf = 8, + pstf = 9, +} --- We used to have independent hashes for lookups but as the tags are unique --- we now use only one hash. If needed we can have multiple again but in that --- case I will probably prefix (i.e. rename) the lookups in the cached font file. +-- todo: analyzers per script/lang, cross font, so we need an font id hash -> script +-- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace --- Todo: make plugin feature that operates on char/glyphnode arrays +function analyzers.setstate(head,font) + local useunicodemarks = analyzers.useunicodemarks + local tfmdata = fontdata[font] + local characters = tfmdata.characters + local descriptions = tfmdata.descriptions + local first, last, current, n, done = nil, nil, head, 0, false -- maybe make n boolean + while current do + local id = current.id + if id == glyph_code and current.font == font then + local char = current.char + local d = descriptions[char] + if d then + if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then + done = true + set_attribute(current,state,5) -- mark + elseif n == 0 then + first, last, n = current, current, 1 + set_attribute(current,state,1) -- init + else + last, n = current, n+1 + set_attribute(current,state,2) -- medi + end + else -- finish + if first and first == last then + set_attribute(last,state,4) -- isol + elseif last then + set_attribute(last,state,3) -- fina + end + first, last, n = nil, nil, 0 + end + elseif id == disc_code then + -- always in the middle + set_attribute(current,state,2) -- midi + last = current + else -- finish + if first and first == last then + set_attribute(last,state,4) -- isol + elseif last then + set_attribute(last,state,3) -- fina + end + first, last, n = nil, nil, 0 + end + current = current.next + end + if first and first == last then + set_attribute(last,state,4) -- isol + elseif last then + set_attribute(last,state,3) -- fina + end + return head, done +end -local concat, insert, remove = table.concat, table.insert, table.remove -local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip -local type, next, tonumber, tostring = type, next, tonumber, tostring -local lpegmatch = lpeg.match -local random = math.random +-- in the future we will use language/script attributes instead of the +-- font related value, but then we also need dynamic features which is +-- somewhat slower; and .. we need a chain of them -local logs, trackers, nodes, attributes = logs, trackers, nodes, attributes +local function analyzeinitializer(tfmdata,value) -- attr + local script, language = otf.scriptandlanguage(tfmdata) -- attr + local action = initializers[script] + if not action then + -- skip + elseif type(action) == "function" then + return action(tfmdata,value) + else + local action = action[language] + if action then + return action(tfmdata,value) + end + end +end -local registertracker = trackers.register +local function analyzeprocessor(head,font,attr) + local tfmdata = fontdata[font] + local script, language = otf.scriptandlanguage(tfmdata,attr) + local action = methods[script] + if not action then + -- skip + elseif type(action) == "function" then + return action(head,font,attr) + else + action = action[language] + if action then + return action(head,font,attr) + end + end + return head, false +end -local fonts = fonts -local otf = fonts.handlers.otf +registerotffeature { + name = "analyze", + description = "analysis of (for instance) character classes", + default = true, + initializers = { + node = analyzeinitializer, + }, + processors = { + position = 1, + node = analyzeprocessor, + } +} -local trace_lookups = false registertracker("otf.lookups", function(v) trace_lookups = v end) -local trace_singles = false registertracker("otf.singles", function(v) trace_singles = v end) -local trace_multiples = false registertracker("otf.multiples", function(v) trace_multiples = v end) -local trace_alternatives = false registertracker("otf.alternatives", function(v) trace_alternatives = v end) -local trace_ligatures = false registertracker("otf.ligatures", function(v) trace_ligatures = v end) -local trace_contexts = false registertracker("otf.contexts", function(v) trace_contexts = v end) -local trace_marks = false registertracker("otf.marks", function(v) trace_marks = v end) -local trace_kerns = false registertracker("otf.kerns", function(v) trace_kerns = v end) -local trace_cursive = false registertracker("otf.cursive", function(v) trace_cursive = v end) -local trace_preparing = false registertracker("otf.preparing", function(v) trace_preparing = v end) -local trace_bugs = false registertracker("otf.bugs", function(v) trace_bugs = v end) -local trace_details = false registertracker("otf.details", function(v) trace_details = v end) -local trace_applied = false registertracker("otf.applied", function(v) trace_applied = v end) -local trace_steps = false registertracker("otf.steps", function(v) trace_steps = v end) -local trace_skips = false registertracker("otf.skips", function(v) trace_skips = v end) -local trace_directions = false registertracker("otf.directions", function(v) trace_directions = v end) - -local report_direct = logs.reporter("fonts","otf direct") -local report_subchain = logs.reporter("fonts","otf subchain") -local report_chain = logs.reporter("fonts","otf chain") -local report_process = logs.reporter("fonts","otf process") -local report_prepare = logs.reporter("fonts","otf prepare") -local report_warning = logs.reporter("fonts","otf warning") - -registertracker("otf.verbose_chain", function(v) otf.setcontextchain(v and "verbose") end) -registertracker("otf.normal_chain", function(v) otf.setcontextchain(v and "normal") end) - -registertracker("otf.replacements", "otf.singles,otf.multiples,otf.alternatives,otf.ligatures") -registertracker("otf.positions","otf.marks,otf.kerns,otf.cursive") -registertracker("otf.actions","otf.replacements,otf.positions") -registertracker("otf.injections","nodes.injections") - -registertracker("*otf.sample","otf.steps,otf.actions,otf.analyzing") - -local insert_node_after = node.insert_after -local delete_node = nodes.delete -local copy_node = node.copy -local find_node_tail = node.tail or node.slide -local set_attribute = node.set_attribute -local has_attribute = node.has_attribute -local flush_node_list = node.flush_list - -local setmetatableindex = table.setmetatableindex - -local zwnj = 0x200C -local zwj = 0x200D -local wildcard = "*" -local default = "dflt" - -local nodecodes = nodes.nodecodes -local whatcodes = nodes.whatcodes -local glyphcodes = nodes.glyphcodes - -local glyph_code = nodecodes.glyph -local glue_code = nodecodes.glue -local disc_code = nodecodes.disc -local whatsit_code = nodecodes.whatsit - -local dir_code = whatcodes.dir -local localpar_code = whatcodes.localpar - -local ligature_code = glyphcodes.ligature +-- latin -local privateattribute = attributes.private +methods.latn = analyzers.setstate --- Something is messed up: we have two mark / ligature indices, one at the injection --- end and one here ... this is bases in KE's patches but there is something fishy --- there as I'm pretty sure that for husayni we need some connection (as it's much --- more complex than an average font) but I need proper examples of all cases, not --- of only some. +-- this info eventually will go into char-def and we will have a state +-- table for generic then -local state = privateattribute('state') -local markbase = privateattribute('markbase') -local markmark = privateattribute('markmark') -local markdone = privateattribute('markdone') -- assigned at the injection end -local cursbase = privateattribute('cursbase') -local curscurs = privateattribute('curscurs') -local cursdone = privateattribute('cursdone') -local kernpair = privateattribute('kernpair') -local ligacomp = privateattribute('ligacomp') -- assigned here (ideally it should be combined) +local zwnj = 0x200C +local zwj = 0x200D -local injections = nodes.injections -local setmark = injections.setmark -local setcursive = injections.setcursive -local setkern = injections.setkern -local setpair = injections.setpair +local isol = { + [0x0600] = true, [0x0601] = true, [0x0602] = true, [0x0603] = true, + [0x0608] = true, [0x060B] = true, [0x0621] = true, [0x0674] = true, + [0x06DD] = true, [zwnj] = true, +} -local markonce = true -local cursonce = true -local kernonce = true +local isol_fina = { + [0x0622] = true, [0x0623] = true, [0x0624] = true, [0x0625] = true, + [0x0627] = true, [0x0629] = true, [0x062F] = true, [0x0630] = true, + [0x0631] = true, [0x0632] = true, [0x0648] = true, [0x0671] = true, + [0x0672] = true, [0x0673] = true, [0x0675] = true, [0x0676] = true, + [0x0677] = true, [0x0688] = true, [0x0689] = true, [0x068A] = true, + [0x068B] = true, [0x068C] = true, [0x068D] = true, [0x068E] = true, + [0x068F] = true, [0x0690] = true, [0x0691] = true, [0x0692] = true, + [0x0693] = true, [0x0694] = true, [0x0695] = true, [0x0696] = true, + [0x0697] = true, [0x0698] = true, [0x0699] = true, [0x06C0] = true, + [0x06C3] = true, [0x06C4] = true, [0x06C5] = true, [0x06C6] = true, + [0x06C7] = true, [0x06C8] = true, [0x06C9] = true, [0x06CA] = true, + [0x06CB] = true, [0x06CD] = true, [0x06CF] = true, [0x06D2] = true, + [0x06D3] = true, [0x06D5] = true, [0x06EE] = true, [0x06EF] = true, + [0x0759] = true, [0x075A] = true, [0x075B] = true, [0x076B] = true, + [0x076C] = true, [0x0771] = true, [0x0773] = true, [0x0774] = true, + [0x0778] = true, [0x0779] = true, [0xFEF5] = true, [0xFEF7] = true, + [0xFEF9] = true, [0xFEFB] = true, -local fonthashes = fonts.hashes -local fontdata = fonthashes.identifiers + -- syriac -local otffeatures = fonts.constructors.newfeatures("otf") -local registerotffeature = otffeatures.register + [0x0710] = true, [0x0715] = true, [0x0716] = true, [0x0717] = true, + [0x0718] = true, [0x0719] = true, [0x0728] = true, [0x072A] = true, + [0x072C] = true, [0x071E] = true, +} -local onetimemessage = fonts.loggers.onetimemessage +local isol_fina_medi_init = { + [0x0626] = true, [0x0628] = true, [0x062A] = true, [0x062B] = true, + [0x062C] = true, [0x062D] = true, [0x062E] = true, [0x0633] = true, + [0x0634] = true, [0x0635] = true, [0x0636] = true, [0x0637] = true, + [0x0638] = true, [0x0639] = true, [0x063A] = true, [0x063B] = true, + [0x063C] = true, [0x063D] = true, [0x063E] = true, [0x063F] = true, + [0x0640] = true, [0x0641] = true, [0x0642] = true, [0x0643] = true, + [0x0644] = true, [0x0645] = true, [0x0646] = true, [0x0647] = true, + [0x0649] = true, [0x064A] = true, [0x066E] = true, [0x066F] = true, + [0x0678] = true, [0x0679] = true, [0x067A] = true, [0x067B] = true, + [0x067C] = true, [0x067D] = true, [0x067E] = true, [0x067F] = true, + [0x0680] = true, [0x0681] = true, [0x0682] = true, [0x0683] = true, + [0x0684] = true, [0x0685] = true, [0x0686] = true, [0x0687] = true, + [0x069A] = true, [0x069B] = true, [0x069C] = true, [0x069D] = true, + [0x069E] = true, [0x069F] = true, [0x06A0] = true, [0x06A1] = true, + [0x06A2] = true, [0x06A3] = true, [0x06A4] = true, [0x06A5] = true, + [0x06A6] = true, [0x06A7] = true, [0x06A8] = true, [0x06A9] = true, + [0x06AA] = true, [0x06AB] = true, [0x06AC] = true, [0x06AD] = true, + [0x06AE] = true, [0x06AF] = true, [0x06B0] = true, [0x06B1] = true, + [0x06B2] = true, [0x06B3] = true, [0x06B4] = true, [0x06B5] = true, + [0x06B6] = true, [0x06B7] = true, [0x06B8] = true, [0x06B9] = true, + [0x06BA] = true, [0x06BB] = true, [0x06BC] = true, [0x06BD] = true, + [0x06BE] = true, [0x06BF] = true, [0x06C1] = true, [0x06C2] = true, + [0x06CC] = true, [0x06CE] = true, [0x06D0] = true, [0x06D1] = true, + [0x06FA] = true, [0x06FB] = true, [0x06FC] = true, [0x06FF] = true, + [0x0750] = true, [0x0751] = true, [0x0752] = true, [0x0753] = true, + [0x0754] = true, [0x0755] = true, [0x0756] = true, [0x0757] = true, + [0x0758] = true, [0x075C] = true, [0x075D] = true, [0x075E] = true, + [0x075F] = true, [0x0760] = true, [0x0761] = true, [0x0762] = true, + [0x0763] = true, [0x0764] = true, [0x0765] = true, [0x0766] = true, + [0x0767] = true, [0x0768] = true, [0x0769] = true, [0x076A] = true, + [0x076D] = true, [0x076E] = true, [0x076F] = true, [0x0770] = true, + [0x0772] = true, [0x0775] = true, [0x0776] = true, [0x0777] = true, + [0x077A] = true, [0x077B] = true, [0x077C] = true, [0x077D] = true, + [0x077E] = true, [0x077F] = true, -otf.defaultnodealternate = "none" -- first last + -- syriac --- we share some vars here, after all, we have no nested lookups and --- less code + [0x0712] = true, [0x0713] = true, [0x0714] = true, [0x071A] = true, + [0x071B] = true, [0x071C] = true, [0x071D] = true, [0x071F] = true, + [0x0720] = true, [0x0721] = true, [0x0722] = true, [0x0723] = true, + [0x0724] = true, [0x0725] = true, [0x0726] = true, [0x0727] = true, + [0x0729] = true, [0x072B] = true, -local tfmdata = false -local characters = false -local descriptions = false -local resources = false -local marks = false -local currentfont = false -local lookuptable = false -local anchorlookups = false -local lookuptypes = false -local handlers = { } -local rlmode = 0 -local featurevalue = false + -- also --- we cannot optimize with "start = first_glyph(head)" because then we don't --- know which rlmode we're in which messes up cursive handling later on --- --- head is always a whatsit so we can safely assume that head is not changed + [zwj] = true, +} --- we use this for special testing and documentation +local arab_warned = { } -local checkstep = (nodes and nodes.tracers and nodes.tracers.steppers.check) or function() end -local registerstep = (nodes and nodes.tracers and nodes.tracers.steppers.register) or function() end -local registermessage = (nodes and nodes.tracers and nodes.tracers.steppers.message) or function() end +-- todo: gref -local function logprocess(...) - if trace_steps then - registermessage(...) +local function warning(current,what) + local char = current.char + if not arab_warned[char] then + log.report("analyze","arab: character %s (U+%05X) has no %s class", char, char, what) + arab_warned[char] = true end - report_direct(...) -end - -local function logwarning(...) - report_direct(...) end -local function gref(n) - if type(n) == "number" then - local description = descriptions[n] - local name = description and description.name - if name then - return format("U+%05X (%s)",n,name) +local function finish(first,last) + if last then + if first == last then + local fc = first.char + if isol_fina_medi_init[fc] or isol_fina[fc] then + set_attribute(first,state,4) -- isol + else + warning(first,"isol") + set_attribute(first,state,0) -- error + end else - return format("U+%05X",n) - end - elseif not n then - return "" - else - local num, nam = { }, { } - for i=1,#n do - local ni = n[i] - if tonumber(ni) then -- later we will start at 2 - local di = descriptions[ni] - num[i] = format("U+%05X",ni) - nam[i] = di and di.name or "?" + local lc = last.char + if isol_fina_medi_init[lc] or isol_fina[lc] then -- why isol here ? + -- if laststate == 1 or laststate == 2 or laststate == 4 then + set_attribute(last,state,3) -- fina + else + warning(last,"fina") + set_attribute(last,state,0) -- error end end - return format("%s (%s)",concat(num," "), concat(nam," ")) - end -end - -local function cref(kind,chainname,chainlookupname,lookupname,index) - if index then - return format("feature %s, chain %s, sub %s, lookup %s, index %s",kind,chainname,chainlookupname,lookupname,index) - elseif lookupname then - return format("feature %s, chain %s, sub %s, lookup %s",kind,chainname or "?",chainlookupname or "?",lookupname) - elseif chainlookupname then - return format("feature %s, chain %s, sub %s",kind,chainname or "?",chainlookupname) - elseif chainname then - return format("feature %s, chain %s",kind,chainname) - else - return format("feature %s",kind) + first, last = nil, nil + elseif first then + -- first and last are either both set so we never com here + local fc = first.char + if isol_fina_medi_init[fc] or isol_fina[fc] then + set_attribute(first,state,4) -- isol + else + warning(first,"isol") + set_attribute(first,state,0) -- error + end + first = nil end + return first, last end -local function pref(kind,lookupname) - return format("feature %s, lookup %s",kind,lookupname) -end - --- We can assume that languages that use marks are not hyphenated. We can also assume --- that at most one discretionary is present. - --- We do need components in funny kerning mode but maybe I can better reconstruct then --- as we do have the font components info available; removing components makes the --- previous code much simpler. Also, later on copying and freeing becomes easier. --- However, for arabic we need to keep them around for the sake of mark placement --- and indices. - -local function copy_glyph(g) -- next and prev are untouched ! - local components = g.components - if components then - g.components = nil - local n = copy_node(g) - g.components = components - return n - else - return copy_node(g) +function methods.arab(head,font,attr) -- maybe make a special version with no trace + local useunicodemarks = analyzers.useunicodemarks + local tfmdata = fontdata[font] + local marks = tfmdata.resources.marks + local first, last, current, done = nil, nil, head, false + while current do + if current.id == glyph_code and current.subtype<256 and current.font == font and not has_attribute(current,state) then + done = true + local char = current.char + if marks[char] or (useunicodemarks and categories[char] == "mn") then + set_attribute(current,state,5) -- mark + elseif isol[char] then -- can be zwj or zwnj too + first, last = finish(first,last) + set_attribute(current,state,4) -- isol + first, last = nil, nil + elseif not first then + if isol_fina_medi_init[char] then + set_attribute(current,state,1) -- init + first, last = first or current, current + elseif isol_fina[char] then + set_attribute(current,state,4) -- isol + first, last = nil, nil + else -- no arab + first, last = finish(first,last) + end + elseif isol_fina_medi_init[char] then + first, last = first or current, current + set_attribute(current,state,2) -- medi + elseif isol_fina[char] then + if not has_attribute(last,state,1) then + -- tricky, we need to check what last may be ! + set_attribute(last,state,2) -- medi + end + set_attribute(current,state,3) -- fina + first, last = nil, nil + elseif char >= 0x0600 and char <= 0x06FF then + set_attribute(current,state,6) -- rest + first, last = finish(first,last) + else --no + first, last = finish(first,last) + end + else + first, last = finish(first,last) + end + current = current.next end + first, last = finish(first,last) + return head, done end --- start is a mark and we need to keep that one +methods.syrc = methods.arab --- local function markstoligature(kind,lookupname,start,stop,char) --- -- [start]..[stop] --- local keep = start --- local prev = start.prev --- local next = stop.next --- local base = copy_glyph(start) --- local current, start = insert_node_after(start,start,base) --- -- [current][start]..[stop] --- current.next = next --- if next then --- next.prev = current --- end --- start.prev = nil --- stop.next = nil --- current.char = char --- current.subtype = ligature_code --- current.components = start --- return keep --- end +directives.register("otf.analyze.useunicodemarks",function(v) + analyzers.useunicodemarks = v +end) -local function markstoligature(kind,lookupname,start,stop,char) - if start == stop and start.char == char then - return start - else - local prev = start.prev - local next = stop.next - start.prev = nil - stop.next = nil - local base = copy_glyph(start) - base.char = char - base.subtype = ligature_code - base.components = start - if prev then - prev.next = base - end - if next then - next.prev = base - end - base.next = next - base.prev = prev - return base - end -end +end -- closure --- The next code is somewhat complicated by the fact that some fonts can have ligatures made --- from ligatures that themselves have marks. This was identified by Kai in for instance --- arabtype: KAF LAM SHADDA ALEF FATHA (0x0643 0x0644 0x0651 0x0627 0x064E). This becomes --- KAF LAM-ALEF with a SHADDA on the first and a FATHA op de second component. In a next --- iteration this becomes a KAF-LAM-ALEF with a SHADDA on the second and a FATHA on the --- third component. +do -- begin closure to overcome local limits and interference -local function getcomponentindex(start) - if start.id ~= glyph_code then - return 0 - elseif start.subtype == ligature_code then - local i = 0 - local components = start.components - while components do - i = i + getcomponentindex(components) - components = components.next - end - return i - elseif not marks[start.char] then - return 1 - else - return 0 - end -end +if not modules then modules = { } end modules ['font-otn'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} --- local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head --- if start == stop and start.char == char then --- start.char = char --- return start --- elseif discfound then --- local prev = start.prev --- local next = stop.next --- start.prev = nil --- stop.next = nil --- local base = copy_glyph(start) --- base.char = char --- base.subtype = ligature_code --- base.components = start -- start can have components --- if prev then --- prev.next = base --- end --- if next then --- next.prev = base --- end --- base.next = next --- base.prev = prev --- return base --- else --- -- start is the ligature --- local deletemarks = markflag ~= "mark" --- local prev = start.prev --- local next = stop.next --- local base = copy_glyph(start) --- local current, start = insert_node_after(start,start,base) --- -- [start->current][copyofstart->start]...[stop] --- current.next = next --- if next then --- next.prev = current --- end --- start.prev = nil --- stop.next = nil --- current.char = char --- current.subtype = ligature_code --- current.components = start --- local head = current --- -- this is messy ... we should get rid of the components eventually --- local baseindex = 0 --- local componentindex = 0 --- while start do --- local char = start.char --- if not marks[char] then --- baseindex = baseindex + componentindex --- componentindex = getcomponentindex(start) --- elseif not deletemarks then -- quite fishy --- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) --- if trace_marks then --- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) --- end --- head, current = insert_node_after(head,current,copy_glyph(start)) -- unlikely that mark has components --- end --- start = start.next --- end --- start = current.next --- while start and start.id == glyph_code do -- hm, is id test needed ? --- local char = start.char --- if marks[char] then --- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) --- if trace_marks then --- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) --- end --- else --- break --- end --- start = start.next --- end --- return head --- end --- end +-- this is still somewhat preliminary and it will get better in due time; +-- much functionality could only be implemented thanks to the husayni font +-- of Idris Samawi Hamid to who we dedicate this module. -local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head - if start == stop and start.char == char then - start.char = char - return start - end - local prev = start.prev - local next = stop.next - start.prev = nil - stop.next = nil - local base = copy_glyph(start) - base.char = char - base.subtype = ligature_code - base.components = start -- start can have components - if prev then - prev.next = base - end - if next then - next.prev = base - end - base.next = next - base.prev = prev - if not discfound then - local deletemarks = markflag ~= "mark" - local components = start - local baseindex = 0 - local componentindex = 0 - local head = base - local current = base - while start do - local char = start.char - if not marks[char] then - baseindex = baseindex + componentindex - componentindex = getcomponentindex(start) - elseif not deletemarks then -- quite fishy - set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) - if trace_marks then - logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) - end - head, current = insert_node_after(head,current,copy_node(start)) -- unlikely that mark has components - end - start = start.next - end - local start = components - while start and start.id == glyph_code do -- hm, is id test needed ? - local char = start.char - if marks[char] then - set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) - if trace_marks then - logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) - end - else - break - end - start = start.next - end - end - return base -end +-- in retrospect it always looks easy but believe it or not, it took a lot +-- of work to get proper open type support done: buggy fonts, fuzzy specs, +-- special made testfonts, many skype sessions between taco, idris and me, +-- torture tests etc etc ... unfortunately the code does not show how much +-- time it took ... -function handlers.gsub_single(start,kind,lookupname,replacement) - if trace_singles then - logprocess("%s: replacing %s by single %s",pref(kind,lookupname),gref(start.char),gref(replacement)) - end - start.char = replacement - return start, true -end +-- todo: +-- +-- kerning is probably not yet ok for latin around dics nodes +-- extension infrastructure (for usage out of context) +-- sorting features according to vendors/renderers +-- alternative loop quitters +-- check cursive and r2l +-- find out where ignore-mark-classes went +-- default features (per language, script) +-- handle positions (we need example fonts) +-- handle gpos_single (we might want an extra width field in glyph nodes because adding kerns might interfere) +-- mark (to mark) code is still not what it should be (too messy but we need some more extreem husayni tests) -local function get_alternative_glyph(start,alternatives,value) - -- needs checking: (global value, brrr) - local choice = nil - local n = #alternatives - local char = start.char - -- - if value == "random" then - local r = random(1,n) - value, choice = format("random, choice %s",r), alternatives[r] - elseif value == "first" then - value, choice = format("first, choice %s",1), alternatives[1] - elseif value == "last" then - value, choice = format("last, choice %s",n), alternatives[n] - else - value = tonumber(value) - if type(value) ~= "number" then - value, choice = "default, choice 1", alternatives[1] - elseif value > n then - local defaultalt = otf.defaultnodealternate - if defaultalt == "first" then - value, choice = format("no %s variants, taking %s",value,n), alternatives[n] - elseif defaultalt == "last" then - value, choice = format("no %s variants, taking %s",value,1), alternatives[1] - else - value, choice = format("no %s variants, ignoring",value), false - end - elseif value == 0 then - value, choice = format("choice %s (no change)",value), char - elseif value < 1 then - value, choice = format("no %s variants, taking %s",value,1), alternatives[1] - else - value, choice = format("choice %s",value), alternatives[value] - end - end - return choice -end +--[[ldx-- +

This module is a bit more split up that I'd like but since we also want to test +with plain it has to be so. This module is part of +and discussion about improvements and functionality mostly happens on the + mailing list.

-local function multiple_glyphs(start,multiple) -- marks ? - local nofmultiples = #multiple - if nofmultiples > 0 then - start.char = multiple[1] - if nofmultiples > 1 then - local sn = start.next - for k=2,nofmultiples do -- todo: use insert_node - local n = copy_node(start) -- ignore components - n.char = multiple[k] - n.next = sn - n.prev = start - if sn then - sn.prev = n - end - start.next = n - start = n - end - end - return start, true - else - if trace_multiples then - logprocess("no multiple for %s",gref(start.char)) - end - return start, false - end -end +

The specification of OpenType is kind of vague. Apart from a lack of a proper +free specifications there's also the problem that Microsoft and Adobe +may have their own interpretation of how and in what order to apply features. +In general the Microsoft website has more detailed specifications and is a +better reference. There is also some information in the FontForge help files.

-function handlers.gsub_alternate(start,kind,lookupname,alternative,sequence) - local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue - local choice = get_alternative_glyph(start,alternative,value) - if choice then - if trace_alternatives then - logprocess("%s: replacing %s by alternative %s (%s)",pref(kind,lookupname),gref(start.char),gref(choice),choice) - end - start.char = choice - else - if trace_alternatives then - logwarning("%s: no variant %s for %s",pref(kind,lookupname),tostring(value),gref(start.char)) - end - end - return start, true -end +

Because there is so much possible, fonts might contain bugs and/or be made to +work with certain rederers. These may evolve over time which may have the side +effect that suddenly fonts behave differently.

-function handlers.gsub_multiple(start,kind,lookupname,multiple) - if trace_multiples then - logprocess("%s: replacing %s by multiple %s",pref(kind,lookupname),gref(start.char),gref(multiple)) - end - return multiple_glyphs(start,multiple) -end +

After a lot of experiments (mostly by Taco, me and Idris) we're now at yet another +implementation. Of course all errors are mine and of course the code can be +improved. There are quite some optimizations going on here and processing speed +is currently acceptable. Not all functions are implemented yet, often because I +lack the fonts for testing. Many scripts are not yet supported either, but I will +look into them as soon as users ask for it.

-function handlers.gsub_ligature(start,kind,lookupname,ligature,sequence) - local s, stop, discfound = start.next, nil, false - local startchar = start.char - if marks[startchar] then - while s do - local id = s.id - if id == glyph_code and s.subtype<256 and s.font == currentfont then - local lg = ligature[s.char] - if lg then - stop = s - ligature = lg - s = s.next - else - break - end - else - break - end - end - if stop then - local lig = ligature.ligature - if lig then - if trace_ligatures then - local stopchar = stop.char - start = markstoligature(kind,lookupname,start,stop,lig) - logprocess("%s: replacing %s upto %s by ligature %s",pref(kind,lookupname),gref(startchar),gref(stopchar),gref(start.char)) - else - start = markstoligature(kind,lookupname,start,stop,lig) - end - return start, true - else - -- ok, goto next lookup - end - end - else - local skipmark = sequence.flags[1] - while s do - local id = s.id - if id == glyph_code and s.subtype<256 then - if s.font == currentfont then - local char = s.char - if skipmark and marks[char] then - s = s.next - else - local lg = ligature[char] - if lg then - stop = s - ligature = lg - s = s.next - else - break - end - end - else - break - end - elseif id == disc_code then - discfound = true - s = s.next - else - break - end - end - if stop then - local lig = ligature.ligature - if lig then - if trace_ligatures then - local stopchar = stop.char - start = toligature(kind,lookupname,start,stop,lig,skipmark,discfound) - logprocess("%s: replacing %s upto %s by ligature %s",pref(kind,lookupname),gref(startchar),gref(stopchar),gref(start.char)) - else - start = toligature(kind,lookupname,start,stop,lig,skipmark,discfound) - end - return start, true - else - -- ok, goto next lookup - end - end - end - return start, false -end +

Because there are different interpretations possible, I will extend the code +with more (configureable) variants. I can also add hooks for users so that they can +write their own extensions.

---[[ldx-- -

We get hits on a mark, but we're not sure if the it has to be applied so -we need to explicitly test for basechar, baselig and basemark entries.

---ldx]]-- +

Glyphs are indexed not by unicode but in their own way. This is because there is no +relationship with unicode at all, apart from the fact that a font might cover certain +ranges of characters. One character can have multiple shapes. However, at the + end we use unicode so and all extra glyphs are mapped into a private +space. This is needed because we need to access them and has to include +then in the output eventually.

-function handlers.gpos_mark2base(start,kind,lookupname,markanchors,sequence) - local markchar = start.char - if marks[markchar] then - local base = start.prev -- [glyph] [start=mark] - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - local basechar = base.char - if marks[basechar] then - while true do - base = base.prev - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - basechar = base.char - if not marks[basechar] then - break - end - else - if trace_bugs then - logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) - end - return start, false - end - end - end - local baseanchors = descriptions[basechar] - if baseanchors then - baseanchors = baseanchors.anchors - end - if baseanchors then - local baseanchors = baseanchors['basechar'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) - if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%s,%s)", - pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) - end - return start, true - end - end - end - if trace_bugs then - logwarning("%s, no matching anchors for mark %s and base %s",pref(kind,lookupname),gref(markchar),gref(basechar)) - end - end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) - onetimemessage(currentfont,basechar,"no base anchors",report_fonts) - end - elseif trace_bugs then - logwarning("%s: prev node is no char",pref(kind,lookupname)) - end - elseif trace_bugs then - logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) - end - return start, false -end - -function handlers.gpos_mark2ligature(start,kind,lookupname,markanchors,sequence) - -- check chainpos variant - local markchar = start.char - if marks[markchar] then - local base = start.prev -- [glyph] [optional marks] [start=mark] - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - local basechar = base.char - if marks[basechar] then - while true do - base = base.prev - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - basechar = base.char - if not marks[basechar] then - break - end - else - if trace_bugs then - logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) - end - return start, false - end - end - end - local index = has_attribute(start,ligacomp) - local baseanchors = descriptions[basechar] - if baseanchors then - baseanchors = baseanchors.anchors - if baseanchors then - local baseanchors = baseanchors['baselig'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - ba = ba[index] - if ba then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) -- index - if trace_marks then - logprocess("%s, anchor %s, index %s, bound %s: anchoring mark %s to baselig %s at index %s => (%s,%s)", - pref(kind,lookupname),anchor,index,bound,gref(markchar),gref(basechar),index,dx,dy) - end - return start, true - end - end - end - end - if trace_bugs then - logwarning("%s: no matching anchors for mark %s and baselig %s",pref(kind,lookupname),gref(markchar),gref(basechar)) - end - end - end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) - onetimemessage(currentfont,basechar,"no base anchors",report_fonts) - end - elseif trace_bugs then - logwarning("%s: prev node is no char",pref(kind,lookupname)) - end - elseif trace_bugs then - logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) - end - return start, false -end +

The raw table as it coms from gets reorganized in to fit out needs. +In that table is packed (similar tables are shared) and cached on disk +so that successive runs can use the optimized table (after loading the table is +unpacked). The flattening code used later is a prelude to an even more compact table +format (and as such it keeps evolving).

-function handlers.gpos_mark2mark(start,kind,lookupname,markanchors,sequence) - local markchar = start.char - if marks[markchar] then - local base = start.prev -- [glyph] [basemark] [start=mark] - -- while base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp) do - -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature - -- end - local slc = has_attribute(start,ligacomp) - if slc then -- a rather messy loop ... needs checking with husayni - while base do - local blc = has_attribute(base,ligacomp) - if blc and blc ~= slc then - base = base.prev - else - break - end - end - end - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then -- subtype test can go - local basechar = base.char - local baseanchors = descriptions[basechar] - if baseanchors then - baseanchors = baseanchors.anchors - if baseanchors then - baseanchors = baseanchors['basemark'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) - if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%s,%s)", - pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) - end - return start,true - end - end - end - if trace_bugs then - logwarning("%s: no matching anchors for mark %s and basemark %s",pref(kind,lookupname),gref(markchar),gref(basechar)) - end - end - end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) - onetimemessage(currentfont,basechar,"no base anchors",report_fonts) - end - elseif trace_bugs then - logwarning("%s: prev node is no mark",pref(kind,lookupname)) - end - elseif trace_bugs then - logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) - end - return start,false -end +

This module is sparsely documented because it is a moving target. The table format +of the reader changes and we experiment a lot with different methods for supporting +features.

-function handlers.gpos_cursive(start,kind,lookupname,exitanchors,sequence) -- to be checked - local alreadydone = cursonce and has_attribute(start,cursbase) - if not alreadydone then - local done = false - local startchar = start.char - if marks[startchar] then - if trace_cursive then - logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) - end - else - local nxt = start.next - while not done and nxt and nxt.id == glyph_code and nxt.subtype<256 and nxt.font == currentfont do - local nextchar = nxt.char - if marks[nextchar] then - -- should not happen (maybe warning) - nxt = nxt.next - else - local entryanchors = descriptions[nextchar] - if entryanchors then - entryanchors = entryanchors.anchors - if entryanchors then - entryanchors = entryanchors['centry'] - if entryanchors then - local al = anchorlookups[lookupname] - for anchor, entry in next, entryanchors do - if al[anchor] then - local exit = exitanchors[anchor] - if exit then - local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) - if trace_cursive then - logprocess("%s: moving %s to %s cursive (%s,%s) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) - end - done = true - break - end - end - end - end - end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) - onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) - end - break - end - end - end - return start, done - else - if trace_cursive and trace_details then - logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(start.char),alreadydone) - end - return start, false - end -end +

As with the code, we may decide to store more information in the + table.

-function handlers.gpos_single(start,kind,lookupname,kerns,sequence) - local startchar = start.char - local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting single %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),dx,dy,w,h) - end - return start, false -end +

Incrementing the version number will force a re-cache. We jump the number by one +when there's a fix in the library or code that +results in different tables.

+--ldx]]-- -function handlers.gpos_pair(start,kind,lookupname,kerns,sequence) - -- todo: kerns in disc nodes: pre, post, replace -> loop over disc too - -- todo: kerns in components of ligatures - local snext = start.next - if not snext then - return start, false - else - local prev, done = start, false - local factor = tfmdata.parameters.factor - local lookuptype = lookuptypes[lookupname] - while snext and snext.id == glyph_code and snext.subtype<256 and snext.font == currentfont do - local nextchar = snext.char - local krn = kerns[nextchar] - if not krn and marks[nextchar] then - prev = snext - snext = snext.next - else - local krn = kerns[nextchar] - if not krn then - -- skip - elseif type(krn) == "table" then - if lookuptype == "pair" then -- probably not needed - local a, b = krn[2], krn[3] - if a and #a > 0 then - local startchar = start.char - local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting first of pair %s and %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - if b and #b > 0 then - local startchar = start.char - local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) - if trace_kerns then - logprocess("%s: shifting second of pair %s and %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - else -- wrong ... position has different entries - report_process("%s: check this out (old kern stuff)",pref(kind,lookupname)) - -- local a, b = krn[2], krn[6] - -- if a and a ~= 0 then - -- local k = setkern(snext,factor,rlmode,a) - -- if trace_kerns then - -- logprocess("%s: inserting first kern %s between %s and %s",pref(kind,lookupname),k,gref(prev.char),gref(nextchar)) - -- end - -- end - -- if b and b ~= 0 then - -- logwarning("%s: ignoring second kern xoff %s",pref(kind,lookupname),b*factor) - -- end - end - done = true - elseif krn ~= 0 then - local k = setkern(snext,factor,rlmode,krn) - if trace_kerns then - logprocess("%s: inserting kern %s between %s and %s",pref(kind,lookupname),k,gref(prev.char),gref(nextchar)) - end - done = true - end - break - end - end - return start, done - end -end +-- action handler chainproc chainmore comment +-- +-- gsub_single ok ok ok +-- gsub_multiple ok ok not implemented yet +-- gsub_alternate ok ok not implemented yet +-- gsub_ligature ok ok ok +-- gsub_context ok -- +-- gsub_contextchain ok -- +-- gsub_reversecontextchain ok -- +-- chainsub -- ok +-- reversesub -- ok +-- gpos_mark2base ok ok +-- gpos_mark2ligature ok ok +-- gpos_mark2mark ok ok +-- gpos_cursive ok untested +-- gpos_single ok ok +-- gpos_pair ok ok +-- gpos_context ok -- +-- gpos_contextchain ok -- +-- +-- todo: contextpos and contextsub and class stuff +-- +-- actions: +-- +-- handler : actions triggered by lookup +-- chainproc : actions triggered by contextual lookup +-- chainmore : multiple substitutions triggered by contextual lookup (e.g. fij -> f + ij) +-- +-- remark: the 'not implemented yet' variants will be done when we have fonts that use them +-- remark: we need to check what to do with discretionaries ---[[ldx-- -

I will implement multiple chain replacements once I run into a font that uses -it. It's not that complex to handle.

---ldx]]-- +-- We used to have independent hashes for lookups but as the tags are unique +-- we now use only one hash. If needed we can have multiple again but in that +-- case I will probably prefix (i.e. rename) the lookups in the cached font file. -local chainmores = { } -local chainprocs = { } +-- Todo: make plugin feature that operates on char/glyphnode arrays -local function logprocess(...) - if trace_steps then - registermessage(...) - end - report_subchain(...) -end +local concat, insert, remove = table.concat, table.insert, table.remove +local format, gmatch, gsub, find, match, lower, strip = string.format, string.gmatch, string.gsub, string.find, string.match, string.lower, string.strip +local type, next, tonumber, tostring = type, next, tonumber, tostring +local lpegmatch = lpeg.match +local random = math.random -local logwarning = report_subchain +local logs, trackers, nodes, attributes = logs, trackers, nodes, attributes -local function logprocess(...) - if trace_steps then - registermessage(...) - end - report_chain(...) -end +local registertracker = trackers.register -local logwarning = report_chain +local fonts = fonts +local otf = fonts.handlers.otf --- We could share functions but that would lead to extra function calls with many --- arguments, redundant tests and confusing messages. +local trace_lookups = false registertracker("otf.lookups", function(v) trace_lookups = v end) +local trace_singles = false registertracker("otf.singles", function(v) trace_singles = v end) +local trace_multiples = false registertracker("otf.multiples", function(v) trace_multiples = v end) +local trace_alternatives = false registertracker("otf.alternatives", function(v) trace_alternatives = v end) +local trace_ligatures = false registertracker("otf.ligatures", function(v) trace_ligatures = v end) +local trace_contexts = false registertracker("otf.contexts", function(v) trace_contexts = v end) +local trace_marks = false registertracker("otf.marks", function(v) trace_marks = v end) +local trace_kerns = false registertracker("otf.kerns", function(v) trace_kerns = v end) +local trace_cursive = false registertracker("otf.cursive", function(v) trace_cursive = v end) +local trace_preparing = false registertracker("otf.preparing", function(v) trace_preparing = v end) +local trace_bugs = false registertracker("otf.bugs", function(v) trace_bugs = v end) +local trace_details = false registertracker("otf.details", function(v) trace_details = v end) +local trace_applied = false registertracker("otf.applied", function(v) trace_applied = v end) +local trace_steps = false registertracker("otf.steps", function(v) trace_steps = v end) +local trace_skips = false registertracker("otf.skips", function(v) trace_skips = v end) +local trace_directions = false registertracker("otf.directions", function(v) trace_directions = v end) -function chainprocs.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname) - logwarning("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) - return start, false -end +local report_direct = logs.reporter("fonts","otf direct") +local report_subchain = logs.reporter("fonts","otf subchain") +local report_chain = logs.reporter("fonts","otf chain") +local report_process = logs.reporter("fonts","otf process") +local report_prepare = logs.reporter("fonts","otf prepare") +local report_warning = logs.reporter("fonts","otf warning") -function chainmores.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) - logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) - return start, false -end +registertracker("otf.verbose_chain", function(v) otf.setcontextchain(v and "verbose") end) +registertracker("otf.normal_chain", function(v) otf.setcontextchain(v and "normal") end) --- The reversesub is a special case, which is why we need to store the replacements --- in a bit weird way. There is no lookup and the replacement comes from the lookup --- itself. It is meant mostly for dealing with Urdu. +registertracker("otf.replacements", "otf.singles,otf.multiples,otf.alternatives,otf.ligatures") +registertracker("otf.positions","otf.marks,otf.kerns,otf.cursive") +registertracker("otf.actions","otf.replacements,otf.positions") +registertracker("otf.injections","nodes.injections") -function chainprocs.reversesub(start,stop,kind,chainname,currentcontext,lookuphash,replacements) - local char = start.char - local replacement = replacements[char] - if replacement then - if trace_singles then - logprocess("%s: single reverse replacement of %s by %s",cref(kind,chainname),gref(char),gref(replacement)) - end - start.char = replacement - return start, true - else - return start, false - end -end +registertracker("*otf.sample","otf.steps,otf.actions,otf.analyzing") ---[[ldx-- -

This chain stuff is somewhat tricky since we can have a sequence of actions to be -applied: single, alternate, multiple or ligature where ligature can be an invalid -one in the sense that it will replace multiple by one but not neccessary one that -looks like the combination (i.e. it is the counterpart of multiple then). For -example, the following is valid:

+local insert_node_after = node.insert_after +local delete_node = nodes.delete +local copy_node = node.copy +local find_node_tail = node.tail or node.slide +local set_attribute = node.set_attribute +local has_attribute = node.has_attribute +local flush_node_list = node.flush_list - -xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx - +local setmetatableindex = table.setmetatableindex -

Therefore we we don't really do the replacement here already unless we have the -single lookup case. The efficiency of the replacements can be improved by deleting -as less as needed but that would also make the code even more messy.

---ldx]]-- +local zwnj = 0x200C +local zwj = 0x200D +local wildcard = "*" +local default = "dflt" -local function delete_till_stop(start,stop,ignoremarks) -- keeps start - local n = 1 - if start == stop then - -- done - elseif ignoremarks then - repeat -- start x x m x x stop => start m - local next = start.next - if not marks[next.char] then -local components = next.components -if components then -- probably not needed - flush_node_list(components) -end - delete_node(start,next) - end - n = n + 1 - until next == stop - else -- start x x x stop => start - repeat - local next = start.next -local components = next.components -if components then -- probably not needed - flush_node_list(components) -end - delete_node(start,next) - n = n + 1 - until next == stop - end - return n -end +local nodecodes = nodes.nodecodes +local whatcodes = nodes.whatcodes +local glyphcodes = nodes.glyphcodes ---[[ldx-- -

Here we replace start by a single variant, First we delete the rest of the -match.

---ldx]]-- +local glyph_code = nodecodes.glyph +local glue_code = nodecodes.glue +local disc_code = nodecodes.disc +local whatsit_code = nodecodes.whatsit -function chainprocs.gsub_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) - -- todo: marks ? - local current = start - local subtables = currentlookup.subtables - if #subtables > 1 then - logwarning("todo: check if we need to loop over the replacements: %s",concat(subtables," ")) - end - while current do - if current.id == glyph_code then - local currentchar = current.char - local lookupname = subtables[1] -- only 1 - local replacement = lookuphash[lookupname] - if not replacement then - if trace_bugs then - logwarning("%s: no single hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) - end - else - replacement = replacement[currentchar] - if not replacement then - if trace_bugs then - logwarning("%s: no single for %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar)) - end - else - if trace_singles then - logprocess("%s: replacing single %s by %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar),gref(replacement)) - end - current.char = replacement - end - end - return start, true - elseif current == stop then - break - else - current = current.next - end +local dir_code = whatcodes.dir +local localpar_code = whatcodes.localpar + +local ligature_code = glyphcodes.ligature + +local privateattribute = attributes.private + +-- Something is messed up: we have two mark / ligature indices, one at the injection +-- end and one here ... this is bases in KE's patches but there is something fishy +-- there as I'm pretty sure that for husayni we need some connection (as it's much +-- more complex than an average font) but I need proper examples of all cases, not +-- of only some. + +local state = privateattribute('state') +local markbase = privateattribute('markbase') +local markmark = privateattribute('markmark') +local markdone = privateattribute('markdone') -- assigned at the injection end +local cursbase = privateattribute('cursbase') +local curscurs = privateattribute('curscurs') +local cursdone = privateattribute('cursdone') +local kernpair = privateattribute('kernpair') +local ligacomp = privateattribute('ligacomp') -- assigned here (ideally it should be combined) + +local injections = nodes.injections +local setmark = injections.setmark +local setcursive = injections.setcursive +local setkern = injections.setkern +local setpair = injections.setpair + +local markonce = true +local cursonce = true +local kernonce = true + +local fonthashes = fonts.hashes +local fontdata = fonthashes.identifiers + +local otffeatures = fonts.constructors.newfeatures("otf") +local registerotffeature = otffeatures.register + +local onetimemessage = fonts.loggers.onetimemessage + +otf.defaultnodealternate = "none" -- first last + +-- we share some vars here, after all, we have no nested lookups and +-- less code + +local tfmdata = false +local characters = false +local descriptions = false +local resources = false +local marks = false +local currentfont = false +local lookuptable = false +local anchorlookups = false +local lookuptypes = false +local handlers = { } +local rlmode = 0 +local featurevalue = false + +-- we cannot optimize with "start = first_glyph(head)" because then we don't +-- know which rlmode we're in which messes up cursive handling later on +-- +-- head is always a whatsit so we can safely assume that head is not changed + +-- we use this for special testing and documentation + +local checkstep = (nodes and nodes.tracers and nodes.tracers.steppers.check) or function() end +local registerstep = (nodes and nodes.tracers and nodes.tracers.steppers.register) or function() end +local registermessage = (nodes and nodes.tracers and nodes.tracers.steppers.message) or function() end + +local function logprocess(...) + if trace_steps then + registermessage(...) end - return start, false + report_direct(...) end -chainmores.gsub_single = chainprocs.gsub_single - ---[[ldx-- -

Here we replace start by a sequence of new glyphs. First we delete the rest of -the match.

---ldx]]-- +local function logwarning(...) + report_direct(...) +end -function chainprocs.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - delete_till_stop(start,stop) -- we could pass ignoremarks as #3 .. - local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local replacements = lookuphash[lookupname] - if not replacements then - if trace_bugs then - logwarning("%s: no multiple hits",cref(kind,chainname,chainlookupname,lookupname)) +local function gref(n) + if type(n) == "number" then + local description = descriptions[n] + local name = description and description.name + if name then + return format("U+%05X (%s)",n,name) + else + return format("U+%05X",n) end + elseif not n then + return "" else - replacements = replacements[startchar] - if not replacements then - if trace_bugs then - logwarning("%s: no multiple for %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar)) - end - else - if trace_multiples then - logprocess("%s: replacing %s by multiple characters %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar),gref(replacements)) + local num, nam = { }, { } + for i=1,#n do + local ni = n[i] + if tonumber(ni) then -- later we will start at 2 + local di = descriptions[ni] + num[i] = format("U+%05X",ni) + nam[i] = di and di.name or "?" end - return multiple_glyphs(start,replacements) end + return format("%s (%s)",concat(num," "), concat(nam," ")) end - return start, false end --- function chainmores.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- logprocess("%s: gsub_multiple not yet supported",cref(kind,chainname,chainlookupname)) --- return start, false --- end - -chainmores.gsub_multiple = chainprocs.gsub_multiple +local function cref(kind,chainname,chainlookupname,lookupname,index) + if index then + return format("feature %s, chain %s, sub %s, lookup %s, index %s",kind,chainname,chainlookupname,lookupname,index) + elseif lookupname then + return format("feature %s, chain %s, sub %s, lookup %s",kind,chainname or "?",chainlookupname or "?",lookupname) + elseif chainlookupname then + return format("feature %s, chain %s, sub %s",kind,chainname or "?",chainlookupname) + elseif chainname then + return format("feature %s, chain %s",kind,chainname) + else + return format("feature %s",kind) + end +end ---[[ldx-- -

Here we replace start by new glyph. First we delete the rest of the match.

---ldx]]-- +local function pref(kind,lookupname) + return format("feature %s, lookup %s",kind,lookupname) +end --- char_1 mark_1 -> char_x mark_1 (ignore marks) --- char_1 mark_1 -> char_x +-- We can assume that languages that use marks are not hyphenated. We can also assume +-- that at most one discretionary is present. --- to be checked: do we always have just one glyph? --- we can also have alternates for marks --- marks come last anyway --- are there cases where we need to delete the mark +-- We do need components in funny kerning mode but maybe I can better reconstruct then +-- as we do have the font components info available; removing components makes the +-- previous code much simpler. Also, later on copying and freeing becomes easier. +-- However, for arabic we need to keep them around for the sake of mark placement +-- and indices. -function chainprocs.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - local current = start - local subtables = currentlookup.subtables - local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue - while current do - if current.id == glyph_code then -- is this check needed? - local currentchar = current.char - local lookupname = subtables[1] - local alternatives = lookuphash[lookupname] - if not alternatives then - if trace_bugs then - logwarning("%s: no alternative hit",cref(kind,chainname,chainlookupname,lookupname)) - end - else - alternatives = alternatives[currentchar] - if alternatives then - local choice = get_alternative_glyph(current,alternatives,value) - if choice then - if trace_alternatives then - logprocess("%s: replacing %s by alternative %s (%s)",cref(kind,chainname,chainlookupname,lookupname),gref(char),gref(choice),choice) - end - start.char = choice - else - if trace_alternatives then - logwarning("%s: no variant %s for %s",cref(kind,chainname,chainlookupname,lookupname),tostring(value),gref(char)) - end - end - elseif trace_bugs then - logwarning("%s: no alternative for %s",cref(kind,chainname,chainlookupname,lookupname),gref(currentchar)) - end - end - return start, true - elseif current == stop then - break - else - current = current.next - end +local function copy_glyph(g) -- next and prev are untouched ! + local components = g.components + if components then + g.components = nil + local n = copy_node(g) + g.components = components + return n + else + return copy_node(g) end - return start, false end --- function chainmores.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- logprocess("%s: gsub_alternate not yet supported",cref(kind,chainname,chainlookupname)) --- return start, false --- end - -chainmores.gsub_alternate = chainprocs.gsub_alternate +-- start is a mark and we need to keep that one ---[[ldx-- -

When we replace ligatures we use a helper that handles the marks. I might change -this function (move code inline and handle the marks by a separate function). We -assume rather stupid ligatures (no complex disc nodes).

---ldx]]-- +-- local function markstoligature(kind,lookupname,start,stop,char) +-- -- [start]..[stop] +-- local keep = start +-- local prev = start.prev +-- local next = stop.next +-- local base = copy_glyph(start) +-- local current, start = insert_node_after(start,start,base) +-- -- [current][start]..[stop] +-- current.next = next +-- if next then +-- next.prev = current +-- end +-- start.prev = nil +-- stop.next = nil +-- current.char = char +-- current.subtype = ligature_code +-- current.components = start +-- return keep +-- end -function chainprocs.gsub_ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) - local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local ligatures = lookuphash[lookupname] - if not ligatures then - if trace_bugs then - logwarning("%s: no ligature hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) +local function markstoligature(kind,lookupname,start,stop,char) + if start == stop and start.char == char then + return start + else + local prev = start.prev + local next = stop.next + start.prev = nil + stop.next = nil + local base = copy_glyph(start) + base.char = char + base.subtype = ligature_code + base.components = start + if prev then + prev.next = base + end + if next then + next.prev = base end + base.next = next + base.prev = prev + return base + end +end + +-- The next code is somewhat complicated by the fact that some fonts can have ligatures made +-- from ligatures that themselves have marks. This was identified by Kai in for instance +-- arabtype: KAF LAM SHADDA ALEF FATHA (0x0643 0x0644 0x0651 0x0627 0x064E). This becomes +-- KAF LAM-ALEF with a SHADDA on the first and a FATHA op de second component. In a next +-- iteration this becomes a KAF-LAM-ALEF with a SHADDA on the second and a FATHA on the +-- third component. + +local function getcomponentindex(start) + if start.id ~= glyph_code then + return 0 + elseif start.subtype == ligature_code then + local i = 0 + local components = start.components + while components do + i = i + getcomponentindex(components) + components = components.next + end + return i + elseif not marks[start.char] then + return 1 else - ligatures = ligatures[startchar] - if not ligatures then - if trace_bugs then - logwarning("%s: no ligatures starting with %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) - end - else - local s = start.next - local discfound = false - local last = stop - local nofreplacements = 0 - local skipmark = currentlookup.flags[1] - while s do - local id = s.id - if id == disc_code then - s = s.next - discfound = true - else - local schar = s.char - if skipmark and marks[schar] then -- marks - s = s.next - else - local lg = ligatures[schar] - if lg then - ligatures, last, nofreplacements = lg, s, nofreplacements + 1 - if s == stop then - break - else - s = s.next - end - else - break - end - end + return 0 + end +end + +-- local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head +-- if start == stop and start.char == char then +-- start.char = char +-- return start +-- elseif discfound then +-- local prev = start.prev +-- local next = stop.next +-- start.prev = nil +-- stop.next = nil +-- local base = copy_glyph(start) +-- base.char = char +-- base.subtype = ligature_code +-- base.components = start -- start can have components +-- if prev then +-- prev.next = base +-- end +-- if next then +-- next.prev = base +-- end +-- base.next = next +-- base.prev = prev +-- return base +-- else +-- -- start is the ligature +-- local deletemarks = markflag ~= "mark" +-- local prev = start.prev +-- local next = stop.next +-- local base = copy_glyph(start) +-- local current, start = insert_node_after(start,start,base) +-- -- [start->current][copyofstart->start]...[stop] +-- current.next = next +-- if next then +-- next.prev = current +-- end +-- start.prev = nil +-- stop.next = nil +-- current.char = char +-- current.subtype = ligature_code +-- current.components = start +-- local head = current +-- -- this is messy ... we should get rid of the components eventually +-- local baseindex = 0 +-- local componentindex = 0 +-- while start do +-- local char = start.char +-- if not marks[char] then +-- baseindex = baseindex + componentindex +-- componentindex = getcomponentindex(start) +-- elseif not deletemarks then -- quite fishy +-- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) +-- if trace_marks then +-- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) +-- end +-- head, current = insert_node_after(head,current,copy_glyph(start)) -- unlikely that mark has components +-- end +-- start = start.next +-- end +-- start = current.next +-- while start and start.id == glyph_code do -- hm, is id test needed ? +-- local char = start.char +-- if marks[char] then +-- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) +-- if trace_marks then +-- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) +-- end +-- else +-- break +-- end +-- start = start.next +-- end +-- return head +-- end +-- end + +local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head + if start == stop and start.char == char then + start.char = char + return start + end + local prev = start.prev + local next = stop.next + start.prev = nil + stop.next = nil + local base = copy_glyph(start) + base.char = char + base.subtype = ligature_code + base.components = start -- start can have components + if prev then + prev.next = base + end + if next then + next.prev = base + end + base.next = next + base.prev = prev + if not discfound then + local deletemarks = markflag ~= "mark" + local components = start + local baseindex = 0 + local componentindex = 0 + local head = base + local current = base + while start do + local char = start.char + if not marks[char] then + baseindex = baseindex + componentindex + componentindex = getcomponentindex(start) + elseif not deletemarks then -- quite fishy + set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) + if trace_marks then + logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) end + head, current = insert_node_after(head,current,copy_node(start)) -- unlikely that mark has components end - local l2 = ligatures.ligature - if l2 then - if chainindex then - stop = last - end - if trace_ligatures then - if start == stop then - logprocess("%s: replacing character %s by ligature %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(l2)) - else - logprocess("%s: replacing character %s upto %s by ligature %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(stop.char),gref(l2)) - end - end - start = toligature(kind,lookupname,start,stop,l2,currentlookup.flags[1],discfound) - return start, true, nofreplacements - elseif trace_bugs then - if start == stop then - logwarning("%s: replacing character %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) - else - logwarning("%s: replacing character %s upto %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(stop.char)) + start = start.next + end + local start = components + while start and start.id == glyph_code do -- hm, is id test needed ? + local char = start.char + if marks[char] then + set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) + if trace_marks then + logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) end + else + break end + start = start.next end end - return start, false, 0 + return base end -chainmores.gsub_ligature = chainprocs.gsub_ligature +function handlers.gsub_single(start,kind,lookupname,replacement) + if trace_singles then + logprocess("%s: replacing %s by single %s",pref(kind,lookupname),gref(start.char),gref(replacement)) + end + start.char = replacement + return start, true +end -function chainprocs.gpos_mark2base(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - local markchar = start.char - if marks[markchar] then - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local markanchors = lookuphash[lookupname] - if markanchors then - markanchors = markanchors[markchar] - end - if markanchors then - local base = start.prev -- [glyph] [start=mark] - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - local basechar = base.char - if marks[basechar] then - while true do - base = base.prev - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - basechar = base.char - if not marks[basechar] then - break - end - else - if trace_bugs then - logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) - end - return start, false - end - end - end - local baseanchors = descriptions[basechar].anchors - if baseanchors then - local baseanchors = baseanchors['basechar'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) - if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%s,%s)", - cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) - end - return start, true - end - end - end - if trace_bugs then - logwarning("%s, no matching anchors for mark %s and base %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) - end - end +local function get_alternative_glyph(start,alternatives,value) + -- needs checking: (global value, brrr) + local choice = nil + local n = #alternatives + local char = start.char + -- + if value == "random" then + local r = random(1,n) + value, choice = format("random, choice %s",r), alternatives[r] + elseif value == "first" then + value, choice = format("first, choice %s",1), alternatives[1] + elseif value == "last" then + value, choice = format("last, choice %s",n), alternatives[n] + else + value = tonumber(value) + if type(value) ~= "number" then + value, choice = "default, choice 1", alternatives[1] + elseif value > n then + local defaultalt = otf.defaultnodealternate + if defaultalt == "first" then + value, choice = format("no %s variants, taking %s",value,n), alternatives[n] + elseif defaultalt == "last" then + value, choice = format("no %s variants, taking %s",value,1), alternatives[1] + else + value, choice = format("no %s variants, ignoring",value), false + end + elseif value == 0 then + value, choice = format("choice %s (no change)",value), char + elseif value < 1 then + value, choice = format("no %s variants, taking %s",value,1), alternatives[1] + else + value, choice = format("choice %s",value), alternatives[value] + end + end + return choice +end + +local function multiple_glyphs(start,multiple) -- marks ? + local nofmultiples = #multiple + if nofmultiples > 0 then + start.char = multiple[1] + if nofmultiples > 1 then + local sn = start.next + for k=2,nofmultiples do -- todo: use insert_node + local n = copy_node(start) -- ignore components + n.char = multiple[k] + n.next = sn + n.prev = start + if sn then + sn.prev = n end - elseif trace_bugs then - logwarning("%s: prev node is no char",cref(kind,chainname,chainlookupname,lookupname)) + start.next = n + start = n end - elseif trace_bugs then - logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) end - elseif trace_bugs then - logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + return start, true + else + if trace_multiples then + logprocess("no multiple for %s",gref(start.char)) + end + return start, false end - return start, false end -function chainprocs.gpos_mark2ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - local markchar = start.char - if marks[markchar] then - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local markanchors = lookuphash[lookupname] - if markanchors then - markanchors = markanchors[markchar] +function handlers.gsub_alternate(start,kind,lookupname,alternative,sequence) + local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue + local choice = get_alternative_glyph(start,alternative,value) + if choice then + if trace_alternatives then + logprocess("%s: replacing %s by alternative %s (%s)",pref(kind,lookupname),gref(start.char),gref(choice),choice) end - if markanchors then - local base = start.prev -- [glyph] [optional marks] [start=mark] - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - local basechar = base.char - if marks[basechar] then - while true do - base = base.prev - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then - basechar = base.char - if not marks[basechar] then - break - end + start.char = choice + else + if trace_alternatives then + logwarning("%s: no variant %s for %s",pref(kind,lookupname),tostring(value),gref(start.char)) + end + end + return start, true +end + +function handlers.gsub_multiple(start,kind,lookupname,multiple) + if trace_multiples then + logprocess("%s: replacing %s by multiple %s",pref(kind,lookupname),gref(start.char),gref(multiple)) + end + return multiple_glyphs(start,multiple) +end + +function handlers.gsub_ligature(start,kind,lookupname,ligature,sequence) + local s, stop, discfound = start.next, nil, false + local startchar = start.char + if marks[startchar] then + while s do + local id = s.id + if id == glyph_code and s.subtype<256 and s.font == currentfont then + local lg = ligature[s.char] + if lg then + stop = s + ligature = lg + s = s.next + else + break + end + else + break + end + end + if stop then + local lig = ligature.ligature + if lig then + if trace_ligatures then + local stopchar = stop.char + start = markstoligature(kind,lookupname,start,stop,lig) + logprocess("%s: replacing %s upto %s by ligature %s",pref(kind,lookupname),gref(startchar),gref(stopchar),gref(start.char)) + else + start = markstoligature(kind,lookupname,start,stop,lig) + end + return start, true + else + -- ok, goto next lookup + end + end + else + local skipmark = sequence.flags[1] + while s do + local id = s.id + if id == glyph_code and s.subtype<256 then + if s.font == currentfont then + local char = s.char + if skipmark and marks[char] then + s = s.next + else + local lg = ligature[char] + if lg then + stop = s + ligature = lg + s = s.next else - if trace_bugs then - logwarning("%s: no base for mark %s",cref(kind,chainname,chainlookupname,lookupname),markchar) - end - return start, false + break end end + else + break end - -- todo: like marks a ligatures hash - local index = has_attribute(start,ligacomp) - local baseanchors = descriptions[basechar].anchors - if baseanchors then - local baseanchors = baseanchors['baselig'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - ba = ba[index] - if ba then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) -- index - if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to baselig %s at index %s => (%s,%s)", - cref(kind,chainname,chainlookupname,lookupname),anchor,a or bound,gref(markchar),gref(basechar),index,dx,dy) - end - return start, true - end - end - end - end - if trace_bugs then - logwarning("%s: no matching anchors for mark %s and baselig %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) - end - end + elseif id == disc_code then + discfound = true + s = s.next + else + break + end + end + if stop then + local lig = ligature.ligature + if lig then + if trace_ligatures then + local stopchar = stop.char + start = toligature(kind,lookupname,start,stop,lig,skipmark,discfound) + logprocess("%s: replacing %s upto %s by ligature %s",pref(kind,lookupname),gref(startchar),gref(stopchar),gref(start.char)) + else + start = toligature(kind,lookupname,start,stop,lig,skipmark,discfound) end - elseif trace_bugs then - logwarning("feature %s, lookup %s: prev node is no char",kind,lookupname) + return start, true + else + -- ok, goto next lookup end - elseif trace_bugs then - logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) end - elseif trace_bugs then - logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) end return start, false end -function chainprocs.gpos_mark2mark(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) +--[[ldx-- +

We get hits on a mark, but we're not sure if the it has to be applied so +we need to explicitly test for basechar, baselig and basemark entries.

+--ldx]]-- + +function handlers.gpos_mark2base(start,kind,lookupname,markanchors,sequence) local markchar = start.char if marks[markchar] then ---~ local alreadydone = markonce and has_attribute(start,markmark) ---~ if not alreadydone then - -- local markanchors = descriptions[markchar].anchors markanchors = markanchors and markanchors.mark - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local markanchors = lookuphash[lookupname] - if markanchors then - markanchors = markanchors[markchar] + local base = start.prev -- [glyph] [start=mark] + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + local basechar = base.char + if marks[basechar] then + while true do + base = base.prev + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + basechar = base.char + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) + end + return start, false + end + end end - if markanchors then - local base = start.prev -- [glyph] [basemark] [start=mark] - -- while (base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp)) do - -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature - -- end - local slc = has_attribute(start,ligacomp) - if slc then -- a rather messy loop ... needs checking with husayni - while base do - local blc = has_attribute(base,ligacomp) - if blc and blc ~= slc then - base = base.prev - else + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors + end + if baseanchors then + local baseanchors = baseanchors['basechar'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%s,%s)", + pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) + end + return start, true + end + end + end + if trace_bugs then + logwarning("%s, no matching anchors for mark %s and base %s",pref(kind,lookupname),gref(markchar),gref(basechar)) + end + end + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) + end + elseif trace_bugs then + logwarning("%s: prev node is no char",pref(kind,lookupname)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) + end + return start, false +end + +function handlers.gpos_mark2ligature(start,kind,lookupname,markanchors,sequence) + -- check chainpos variant + local markchar = start.char + if marks[markchar] then + local base = start.prev -- [glyph] [optional marks] [start=mark] + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + local basechar = base.char + if marks[basechar] then + while true do + base = base.prev + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + basechar = base.char + if not marks[basechar] then break end + else + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) + end + return start, false end end - if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then -- subtype test can go - local basechar = base.char - local baseanchors = descriptions[basechar].anchors - if baseanchors then - baseanchors = baseanchors['basemark'] - if baseanchors then - local al = anchorlookups[lookupname] - for anchor,ba in next, baseanchors do - if al[anchor] then - local ma = markanchors[anchor] - if ma then - local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + end + local index = has_attribute(start,ligacomp) + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors + if baseanchors then + local baseanchors = baseanchors['baselig'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + ba = ba[index] + if ba then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) -- index if trace_marks then - logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%s,%s)", - cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) + logprocess("%s, anchor %s, index %s, bound %s: anchoring mark %s to baselig %s at index %s => (%s,%s)", + pref(kind,lookupname),anchor,index,bound,gref(markchar),gref(basechar),index,dx,dy) end return start, true end end end - if trace_bugs then - logwarning("%s: no matching anchors for mark %s and basemark %s",gref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) - end + end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and baselig %s",pref(kind,lookupname),gref(markchar),gref(basechar)) end end - elseif trace_bugs then - logwarning("%s: prev node is no mark",cref(kind,chainname,chainlookupname,lookupname)) end - elseif trace_bugs then - logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) end ---~ elseif trace_marks and trace_details then ---~ logprocess("%s, mark %s is already bound (n=%s), ignoring mark2mark",pref(kind,lookupname),gref(markchar),alreadydone) ---~ end + elseif trace_bugs then + logwarning("%s: prev node is no char",pref(kind,lookupname)) + end elseif trace_bugs then - logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) end return start, false end --- ! ! ! untested ! ! ! - -function chainprocs.gpos_cursive(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - local alreadydone = cursonce and has_attribute(start,cursbase) - if not alreadydone then - local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local exitanchors = lookuphash[lookupname] - if exitanchors then - exitanchors = exitanchors[startchar] - end - if exitanchors then - local done = false - if marks[startchar] then - if trace_cursive then - logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) +function handlers.gpos_mark2mark(start,kind,lookupname,markanchors,sequence) + local markchar = start.char + if marks[markchar] then + local base = start.prev -- [glyph] [basemark] [start=mark] + -- while base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp) do + -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature + -- end + local slc = has_attribute(start,ligacomp) + if slc then -- a rather messy loop ... needs checking with husayni + while base do + local blc = has_attribute(base,ligacomp) + if blc and blc ~= slc then + base = base.prev + else + break end - else - local nxt = start.next - while not done and nxt and nxt.id == glyph_code and nxt.subtype<256 and nxt.font == currentfont do - local nextchar = nxt.char - if marks[nextchar] then - -- should not happen (maybe warning) - nxt = nxt.next - else - local entryanchors = descriptions[nextchar] - if entryanchors then - entryanchors = entryanchors.anchors - if entryanchors then - entryanchors = entryanchors['centry'] - if entryanchors then - local al = anchorlookups[lookupname] - for anchor, entry in next, entryanchors do - if al[anchor] then - local exit = exitanchors[anchor] - if exit then - local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) - if trace_cursive then - logprocess("%s: moving %s to %s cursive (%s,%s) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) - end - done = true - break - end - end + end + end + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then -- subtype test can go + local basechar = base.char + local baseanchors = descriptions[basechar] + if baseanchors then + baseanchors = baseanchors.anchors + if baseanchors then + baseanchors = baseanchors['basemark'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%s,%s)", + pref(kind,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) end + return start,true end end - else -- if trace_bugs then - -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) - onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) end - break + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and basemark %s",pref(kind,lookupname),gref(markchar),gref(basechar)) + end end end + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(basechar)) + onetimemessage(currentfont,basechar,"no base anchors",report_fonts) end - return start, done - else - if trace_cursive and trace_details then - logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(start.char),alreadydone) - end - return start, false + elseif trace_bugs then + logwarning("%s: prev node is no mark",pref(kind,lookupname)) end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",pref(kind,lookupname),gref(markchar)) end - return start, false + return start,false end -function chainprocs.gpos_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) - -- untested .. needs checking for the new model - local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local kerns = lookuphash[lookupname] - if kerns then - kerns = kerns[startchar] -- needed ? - if kerns then - local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting single %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),dx,dy,w,h) +function handlers.gpos_cursive(start,kind,lookupname,exitanchors,sequence) -- to be checked + local alreadydone = cursonce and has_attribute(start,cursbase) + if not alreadydone then + local done = false + local startchar = start.char + if marks[startchar] then + if trace_cursive then + logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) + end + else + local nxt = start.next + while not done and nxt and nxt.id == glyph_code and nxt.subtype<256 and nxt.font == currentfont do + local nextchar = nxt.char + if marks[nextchar] then + -- should not happen (maybe warning) + nxt = nxt.next + else + local entryanchors = descriptions[nextchar] + if entryanchors then + entryanchors = entryanchors.anchors + if entryanchors then + entryanchors = entryanchors['centry'] + if entryanchors then + local al = anchorlookups[lookupname] + for anchor, entry in next, entryanchors do + if al[anchor] then + local exit = exitanchors[anchor] + if exit then + local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) + if trace_cursive then + logprocess("%s: moving %s to %s cursive (%s,%s) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) + end + done = true + break + end + end + end + end + end + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) + onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) + end + break + end end end + return start, done + else + if trace_cursive and trace_details then + logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(start.char),alreadydone) + end + return start, false end - return start, false end --- when machines become faster i will make a shared function +function handlers.gpos_single(start,kind,lookupname,kerns,sequence) + local startchar = start.char + local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting single %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),dx,dy,w,h) + end + return start, false +end -function chainprocs.gpos_pair(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) --- logwarning("%s: gpos_pair not yet supported",cref(kind,chainname,chainlookupname)) +function handlers.gpos_pair(start,kind,lookupname,kerns,sequence) + -- todo: kerns in disc nodes: pre, post, replace -> loop over disc too + -- todo: kerns in components of ligatures local snext = start.next - if snext then - local startchar = start.char - local subtables = currentlookup.subtables - local lookupname = subtables[1] - local kerns = lookuphash[lookupname] - if kerns then - kerns = kerns[startchar] - if kerns then - local lookuptype = lookuptypes[lookupname] - local prev, done = start, false - local factor = tfmdata.parameters.factor - while snext and snext.id == glyph_code and snext.subtype<256 and snext.font == currentfont do - local nextchar = snext.char - local krn = kerns[nextchar] - if not krn and marks[nextchar] then - prev = snext - snext = snext.next - else - if not krn then - -- skip - elseif type(krn) == "table" then - if lookuptype == "pair" then - local a, b = krn[2], krn[3] - if a and #a > 0 then - local startchar = start.char - local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) - if trace_kerns then - logprocess("%s: shifting first of pair %s and %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - if b and #b > 0 then - local startchar = start.char - local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) - if trace_kerns then - logprocess("%s: shifting second of pair %s and %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) - end - end - else - report_process("%s: check this out (old kern stuff)",cref(kind,chainname,chainlookupname)) - local a, b = krn[2], krn[6] - if a and a ~= 0 then - local k = setkern(snext,factor,rlmode,a) - if trace_kerns then - logprocess("%s: inserting first kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(prev.char),gref(nextchar)) - end - end - if b and b ~= 0 then - logwarning("%s: ignoring second kern xoff %s",cref(kind,chainname,chainlookupname),b*factor) - end + if not snext then + return start, false + else + local prev, done = start, false + local factor = tfmdata.parameters.factor + local lookuptype = lookuptypes[lookupname] + while snext and snext.id == glyph_code and snext.subtype<256 and snext.font == currentfont do + local nextchar = snext.char + local krn = kerns[nextchar] + if not krn and marks[nextchar] then + prev = snext + snext = snext.next + else + local krn = kerns[nextchar] + if not krn then + -- skip + elseif type(krn) == "table" then + if lookuptype == "pair" then -- probably not needed + local a, b = krn[2], krn[3] + if a and #a > 0 then + local startchar = start.char + local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting first of pair %s and %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) end - done = true - elseif krn ~= 0 then - local k = setkern(snext,factor,rlmode,krn) + end + if b and #b > 0 then + local startchar = start.char + local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) if trace_kerns then - logprocess("%s: inserting kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(prev.char),gref(nextchar)) + logprocess("%s: shifting second of pair %s and %s by (%s,%s) and correction (%s,%s)",pref(kind,lookupname),gref(startchar),gref(nextchar),x,y,w,h) end - done = true end - break + else -- wrong ... position has different entries + report_process("%s: check this out (old kern stuff)",pref(kind,lookupname)) + -- local a, b = krn[2], krn[6] + -- if a and a ~= 0 then + -- local k = setkern(snext,factor,rlmode,a) + -- if trace_kerns then + -- logprocess("%s: inserting first kern %s between %s and %s",pref(kind,lookupname),k,gref(prev.char),gref(nextchar)) + -- end + -- end + -- if b and b ~= 0 then + -- logwarning("%s: ignoring second kern xoff %s",pref(kind,lookupname),b*factor) + -- end + end + done = true + elseif krn ~= 0 then + local k = setkern(snext,factor,rlmode,krn) + if trace_kerns then + logprocess("%s: inserting kern %s between %s and %s",pref(kind,lookupname),k,gref(prev.char),gref(nextchar)) end + done = true end - return start, done + break end end + return start, done + end +end + +--[[ldx-- +

I will implement multiple chain replacements once I run into a font that uses +it. It's not that complex to handle.

+--ldx]]-- + +local chainmores = { } +local chainprocs = { } + +local function logprocess(...) + if trace_steps then + registermessage(...) + end + report_subchain(...) +end + +local logwarning = report_subchain + +local function logprocess(...) + if trace_steps then + registermessage(...) end + report_chain(...) +end + +local logwarning = report_chain + +-- We could share functions but that would lead to extra function calls with many +-- arguments, redundant tests and confusing messages. + +function chainprocs.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname) + logwarning("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) return start, false end --- what pointer to return, spec says stop --- to be discussed ... is bidi changer a space? --- elseif char == zwnj and sequence[n][32] then -- brrr +function chainmores.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) + logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) + return start, false +end --- somehow l or f is global --- we don't need to pass the currentcontext, saves a bit --- make a slow variant then can be activated but with more tracing +-- The reversesub is a special case, which is why we need to store the replacements +-- in a bit weird way. There is no lookup and the replacement comes from the lookup +-- itself. It is meant mostly for dealing with Urdu. -local function show_skip(kind,chainname,char,ck,class) - if ck[9] then - logwarning("%s: skipping char %s (%s) in rule %s, lookuptype %s (%s=>%s)",cref(kind,chainname),gref(char),class,ck[1],ck[2],ck[9],ck[10]) +function chainprocs.reversesub(start,stop,kind,chainname,currentcontext,lookuphash,replacements) + local char = start.char + local replacement = replacements[char] + if replacement then + if trace_singles then + logprocess("%s: single reverse replacement of %s by %s",cref(kind,chainname),gref(char),gref(replacement)) + end + start.char = replacement + return start, true else - logwarning("%s: skipping char %s (%s) in rule %s, lookuptype %s",cref(kind,chainname),gref(char),class,ck[1],ck[2]) + return start, false end end -local function normal_handle_contextchain(start,kind,chainname,contexts,sequence,lookuphash) - -- local rule, lookuptype, sequence, f, l, lookups = ck[1], ck[2] ,ck[3], ck[4], ck[5], ck[6] - local flags = sequence.flags - local done = false - local skipmark = flags[1] - local skipligature = flags[2] - local skipbase = flags[3] - local someskip = skipmark or skipligature or skipbase -- could be stored in flags for a fast test (hm, flags could be false !) - local markclass = sequence.markclass -- todo, first we need a proper test - local skipped = false - for k=1,#contexts do - local match = true - local current = start - local last = start - local ck = contexts[k] - local seq = ck[3] - local s = #seq - -- f..l = mid string - if s == 1 then - -- never happens - match = current.id == glyph_code and current.subtype<256 and current.font == currentfont and seq[1][current.char] - else - -- maybe we need a better space check (maybe check for glue or category or combination) - -- we cannot optimize for n=2 because there can be disc nodes - local f, l = ck[4], ck[5] - -- current match - if f == 1 and f == l then -- current only - -- already a hit - -- match = true - else -- before/current/after | before/current | current/after - -- no need to test first hit (to be optimized) - if f == l then -- new, else last out of sync (f is > 1) - -- match = true - else - local n = f + 1 - last = last.next - while n <= l do - if last then - local id = last.id - if id == glyph_code then - if last.subtype<256 and last.font == currentfont then - local char = last.char - local ccd = descriptions[char] - if ccd then - local class = ccd.class - if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then - skipped = true - if trace_skips then - show_skip(kind,chainname,char,ck,class) - end - last = last.next - elseif seq[n][char] then - if n < l then - last = last.next - end - n = n + 1 - else - match = false - break - end - else - match = false - break - end - else - match = false - break - end - elseif id == disc_code then - last = last.next - else - match = false - break - end - else - match = false - break - end - end - end +--[[ldx-- +

This chain stuff is somewhat tricky since we can have a sequence of actions to be +applied: single, alternate, multiple or ligature where ligature can be an invalid +one in the sense that it will replace multiple by one but not neccessary one that +looks like the combination (i.e. it is the counterpart of multiple then). For +example, the following is valid:

+ + +xxxabcdexxx [single a->A][multiple b->BCD][ligature cde->E] xxxABCDExxx + + +

Therefore we we don't really do the replacement here already unless we have the +single lookup case. The efficiency of the replacements can be improved by deleting +as less as needed but that would also make the code even more messy.

+--ldx]]-- + +local function delete_till_stop(start,stop,ignoremarks) -- keeps start + local n = 1 + if start == stop then + -- done + elseif ignoremarks then + repeat -- start x x m x x stop => start m + local next = start.next + if not marks[next.char] then +local components = next.components +if components then -- probably not needed + flush_node_list(components) +end + delete_node(start,next) end - -- before - if match and f > 1 then - local prev = start.prev - if prev then - local n = f-1 - while n >= 1 do - if prev then - local id = prev.id - if id == glyph_code then - if prev.subtype<256 and prev.font == currentfont then -- normal char - local char = prev.char - local ccd = descriptions[char] - if ccd then - local class = ccd.class - if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then - skipped = true - if trace_skips then - show_skip(kind,chainname,char,ck,class) - end - elseif seq[n][char] then - n = n -1 - else - match = false - break - end - else - match = false - break - end - else - match = false - break - end - elseif id == disc_code then - -- skip 'm - elseif seq[n][32] then - n = n -1 - else - match = false - break - end - prev = prev.prev - elseif seq[n][32] then -- somewhat special, as zapfino can have many preceding spaces - n = n -1 - else - match = false - break - end - end - elseif f == 2 then - match = seq[1][32] - else - for n=f-1,1 do - if not seq[n][32] then - match = false - break - end - end + n = n + 1 + until next == stop + else -- start x x x stop => start + repeat + local next = start.next +local components = next.components +if components then -- probably not needed + flush_node_list(components) +end + delete_node(start,next) + n = n + 1 + until next == stop + end + return n +end + +--[[ldx-- +

Here we replace start by a single variant, First we delete the rest of the +match.

+--ldx]]-- + +function chainprocs.gsub_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) + -- todo: marks ? + local current = start + local subtables = currentlookup.subtables + if #subtables > 1 then + logwarning("todo: check if we need to loop over the replacements: %s",concat(subtables," ")) + end + while current do + if current.id == glyph_code then + local currentchar = current.char + local lookupname = subtables[1] -- only 1 + local replacement = lookuphash[lookupname] + if not replacement then + if trace_bugs then + logwarning("%s: no single hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) end - end - -- after - if match and s > l then - local current = last and last.next - if current then - -- removed optimization for s-l == 1, we have to deal with marks anyway - local n = l + 1 - while n <= s do - if current then - local id = current.id - if id == glyph_code then - if current.subtype<256 and current.font == currentfont then -- normal char - local char = current.char - local ccd = descriptions[char] - if ccd then - local class = ccd.class - if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then - skipped = true - if trace_skips then - show_skip(kind,chainname,char,ck,class) - end - elseif seq[n][char] then - n = n + 1 - else - match = false - break - end - else - match = false - break - end - else - match = false - break - end - elseif id == disc_code then - -- skip 'm - elseif seq[n][32] then -- brrr - n = n + 1 - else - match = false - break - end - current = current.next - elseif seq[n][32] then - n = n + 1 - else - match = false - break - end + else + replacement = replacement[currentchar] + if not replacement then + if trace_bugs then + logwarning("%s: no single for %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar)) end - elseif s-l == 1 then - match = seq[s][32] else - for n=l+1,s do - if not seq[n][32] then - match = false - break - end - end - end - end - end - if match then - -- ck == currentcontext - if trace_contexts then - local rule, lookuptype, f, l = ck[1], ck[2], ck[4], ck[5] - local char = start.char - if ck[9] then - logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %s (%s=>%s)", - cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype,ck[9],ck[10]) - else - logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %s", - cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype) - end - end - local chainlookups = ck[6] - if chainlookups then - local nofchainlookups = #chainlookups - -- we can speed this up if needed - if nofchainlookups == 1 then - local chainlookupname = chainlookups[1] - local chainlookup = lookuptable[chainlookupname] - if chainlookup then - local cp = chainprocs[chainlookup.type] - if cp then - start, done = cp(start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence) - else - logprocess("%s: %s is not yet supported",cref(kind,chainname,chainlookupname),chainlookup.type) - end - else -- shouldn't happen - logprocess("%s is not yet supported",cref(kind,chainname,chainlookupname)) - end - else - local i = 1 - repeat - if skipped then - while true do - local char = start.char - local ccd = descriptions[char] - if ccd then - local class = ccd.class - if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then - start = start.next - else - break - end - else - break - end - end - end - local chainlookupname = chainlookups[i] - local chainlookup = lookuptable[chainlookupname] -- can be false (n matches, nofchainlookups - end - else - local replacements = ck[7] - if replacements then - start, done = chainprocs.reversesub(start,last,kind,chainname,ck,lookuphash,replacements) -- sequence - else - done = true -- can be meant to be skipped - if trace_contexts then - logprocess("%s: skipping match",cref(kind,chainname)) + if trace_singles then + logprocess("%s: replacing single %s by %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(currentchar),gref(replacement)) end + current.char = replacement end end + return start, true + elseif current == stop then + break + else + current = current.next end end - return start, done + return start, false end --- Because we want to keep this elsewhere (an because speed is less an issue) we --- pass the font id so that the verbose variant can access the relevant helper tables. - -local verbose_handle_contextchain = function(font,...) - logwarning("no verbose handler installed, reverting to 'normal'") - otf.setcontextchain() - return normal_handle_contextchain(...) -end +chainmores.gsub_single = chainprocs.gsub_single -otf.chainhandlers = { - normal = normal_handle_contextchain, - verbose = verbose_handle_contextchain, -} +--[[ldx-- +

Here we replace start by a sequence of new glyphs. First we delete the rest of +the match.

+--ldx]]-- -function otf.setcontextchain(method) - if not method or method == "normal" or not otf.chainhandlers[method] then - if handlers.contextchain then -- no need for a message while making the format - logwarning("installing normal contextchain handler") +function chainprocs.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + delete_till_stop(start,stop) -- we could pass ignoremarks as #3 .. + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local replacements = lookuphash[lookupname] + if not replacements then + if trace_bugs then + logwarning("%s: no multiple hits",cref(kind,chainname,chainlookupname,lookupname)) end - handlers.contextchain = normal_handle_contextchain else - logwarning("installing contextchain handler '%s'",method) - local handler = otf.chainhandlers[method] - handlers.contextchain = function(...) - return handler(currentfont,...) -- hm, get rid of ... + replacements = replacements[startchar] + if not replacements then + if trace_bugs then + logwarning("%s: no multiple for %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar)) + end + else + if trace_multiples then + logprocess("%s: replacing %s by multiple characters %s",cref(kind,chainname,chainlookupname,lookupname),gref(startchar),gref(replacements)) + end + return multiple_glyphs(start,replacements) end end - handlers.gsub_context = handlers.contextchain - handlers.gsub_contextchain = handlers.contextchain - handlers.gsub_reversecontextchain = handlers.contextchain - handlers.gpos_contextchain = handlers.contextchain - handlers.gpos_context = handlers.contextchain -end - -otf.setcontextchain() - -local missing = { } -- we only report once - -local function logprocess(...) - if trace_steps then - registermessage(...) - end - report_process(...) -end - -local logwarning = report_process - -local function report_missing_cache(typ,lookup) - local f = missing[currentfont] if not f then f = { } missing[currentfont] = f end - local t = f[typ] if not t then t = { } f[typ] = t end - if not t[lookup] then - t[lookup] = true - logwarning("missing cache for lookup %s of type %s in font %s (%s)",lookup,typ,currentfont,tfmdata.properties.fullname) - end + return start, false end -local resolved = { } -- we only resolve a font,script,language pair once - --- todo: pass all these 'locals' in a table +-- function chainmores.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) +-- logprocess("%s: gsub_multiple not yet supported",cref(kind,chainname,chainlookupname)) +-- return start, false +-- end -local lookuphashes = { } +chainmores.gsub_multiple = chainprocs.gsub_multiple -setmetatableindex(lookuphashes, function(t,font) - local lookuphash = fontdata[font].resources.lookuphash - if not lookuphash or not next(lookuphash) then - lookuphash = false - end - t[font] = lookuphash - return lookuphash -end) +--[[ldx-- +

Here we replace start by new glyph. First we delete the rest of the match.

+--ldx]]-- --- fonts.hashes.lookups = lookuphashes +-- char_1 mark_1 -> char_x mark_1 (ignore marks) +-- char_1 mark_1 -> char_x -local special_attributes = { - init = 1, - medi = 2, - fina = 3, - isol = 4, - -- devanagari - rphf = 5, - half = 6, - pref = 7, - blwf = 8, - pstf = 9, -} +-- to be checked: do we always have just one glyph? +-- we can also have alternates for marks +-- marks come last anyway +-- are there cases where we need to delete the mark -local function initialize(sequence,script,language,enabled) - local features = sequence.features - if features then - for kind, scripts in next, features do - local valid = enabled[kind] - if valid then - local languages = scripts[script] or scripts[wildcard] - if languages and (languages[language] or languages[wildcard]) then - return { valid, special_attributes[kind] or false, sequence.chain or 0, kind, sequence } +function chainprocs.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local current = start + local subtables = currentlookup.subtables + local value = featurevalue == true and tfmdata.shared.features[kind] or featurevalue + while current do + if current.id == glyph_code then -- is this check needed? + local currentchar = current.char + local lookupname = subtables[1] + local alternatives = lookuphash[lookupname] + if not alternatives then + if trace_bugs then + logwarning("%s: no alternative hit",cref(kind,chainname,chainlookupname,lookupname)) + end + else + alternatives = alternatives[currentchar] + if alternatives then + local choice = get_alternative_glyph(current,alternatives,value) + if choice then + if trace_alternatives then + logprocess("%s: replacing %s by alternative %s (%s)",cref(kind,chainname,chainlookupname,lookupname),gref(char),gref(choice),choice) + end + start.char = choice + else + if trace_alternatives then + logwarning("%s: no variant %s for %s",cref(kind,chainname,chainlookupname,lookupname),tostring(value),gref(char)) + end + end + elseif trace_bugs then + logwarning("%s: no alternative for %s",cref(kind,chainname,chainlookupname,lookupname),gref(currentchar)) end end + return start, true + elseif current == stop then + break + else + current = current.next end end - return false + return start, false end -function otf.dataset(tfmdata,font) -- generic variant, overloaded in context - local shared = tfmdata.shared - local properties = tfmdata.properties - local language = properties.language or "dflt" - local script = properties.script or "dflt" - local enabled = shared.features - local res = resolved[font] - if not res then - res = { } - resolved[font] = res - end - local rs = res[script] - if not rs then - rs = { } - res[script] = rs - end - local rl = rs[language] - if not rl then - rl = { - -- indexed but we can also add specific data by key - } - rs[language] = rl - local sequences = tfmdata.resources.sequences - setmetatableindex(rl, function(t,k) - if type(k) == "number" then - local v = enabled and initialize(sequences[k],script,language,enabled) - t[k] = v - return v - end - end) - end - return rl -end - --- elseif id == glue_code then --- if p[5] then -- chain --- local pc = pp[32] --- if pc then --- start, ok = start, false -- p[1](start,kind,p[2],pc,p[3],p[4]) --- if ok then --- done = true --- end --- if start then start = start.next end --- else --- start = start.next --- end --- else --- start = start.next --- end - --- there will be a new direction parser (pre-parsed etc) - -local function featuresprocessor(head,font,attr) +-- function chainmores.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) +-- logprocess("%s: gsub_alternate not yet supported",cref(kind,chainname,chainlookupname)) +-- return start, false +-- end - local lookuphash = lookuphashes[font] -- we can also check sequences here +chainmores.gsub_alternate = chainprocs.gsub_alternate - if not lookuphash then - return head, false - end +--[[ldx-- +

When we replace ligatures we use a helper that handles the marks. I might change +this function (move code inline and handle the marks by a separate function). We +assume rather stupid ligatures (no complex disc nodes).

+--ldx]]-- - if trace_steps then - checkstep(head) +function chainprocs.gsub_ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex) + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local ligatures = lookuphash[lookupname] + if not ligatures then + if trace_bugs then + logwarning("%s: no ligature hits",cref(kind,chainname,chainlookupname,lookupname,chainindex)) + end + else + ligatures = ligatures[startchar] + if not ligatures then + if trace_bugs then + logwarning("%s: no ligatures starting with %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) + end + else + local s = start.next + local discfound = false + local last = stop + local nofreplacements = 0 + local skipmark = currentlookup.flags[1] + while s do + local id = s.id + if id == disc_code then + s = s.next + discfound = true + else + local schar = s.char + if skipmark and marks[schar] then -- marks + s = s.next + else + local lg = ligatures[schar] + if lg then + ligatures, last, nofreplacements = lg, s, nofreplacements + 1 + if s == stop then + break + else + s = s.next + end + else + break + end + end + end + end + local l2 = ligatures.ligature + if l2 then + if chainindex then + stop = last + end + if trace_ligatures then + if start == stop then + logprocess("%s: replacing character %s by ligature %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(l2)) + else + logprocess("%s: replacing character %s upto %s by ligature %s",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(stop.char),gref(l2)) + end + end + start = toligature(kind,lookupname,start,stop,l2,currentlookup.flags[1],discfound) + return start, true, nofreplacements + elseif trace_bugs then + if start == stop then + logwarning("%s: replacing character %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar)) + else + logwarning("%s: replacing character %s upto %s by ligature fails",cref(kind,chainname,chainlookupname,lookupname,chainindex),gref(startchar),gref(stop.char)) + end + end + end end + return start, false, 0 +end - tfmdata = fontdata[font] - descriptions = tfmdata.descriptions - characters = tfmdata.characters - resources = tfmdata.resources - - marks = resources.marks - anchorlookups = resources.lookup_to_anchor - lookuptable = resources.lookups - lookuptypes = resources.lookuptypes - - currentfont = font - rlmode = 0 - - local sequences = resources.sequences - local done = false - local datasets = otf.dataset(tfmdata,font,attr) - - local dirstack = { } -- could move outside function - - -- We could work on sub start-stop ranges instead but I wonder if there is that - -- much speed gain (experiments showed that it made not much sense) and we need - -- to keep track of directions anyway. Also at some point I want to play with - -- font interactions and then we do need the full sweeps. - - -- Keeping track of the headnode is needed for devanagari (I generalized it a bit - -- so that multiple cases are also covered. +chainmores.gsub_ligature = chainprocs.gsub_ligature - for s=1,#sequences do - local dataset = datasets[s] - if dataset then - featurevalue = dataset[1] -- todo: pass to function instead of using a global - if featurevalue then - local sequence = sequences[s] -- also dataset[5] - local rlparmode = 0 - local topstack = 0 - local success = false - local attribute = dataset[2] - local chain = dataset[3] -- sequence.chain or 0 - local typ = sequence.type - local subtables = sequence.subtables - if chain < 0 then - -- this is a limited case, no special treatments like 'init' etc - local handler = handlers[typ] - -- we need to get rid of this slide! probably no longer needed in latest luatex - local start = find_node_tail(head) -- slow (we can store tail because there's always a skip at the end): todo - while start do - local id = start.id - if id == glyph_code then - if start.subtype<256 and start.font == font then - local a = has_attribute(start,0) - if a then - a = a == attr - else - a = true - end - if a then - for i=1,#subtables do - local lookupname = subtables[i] - local lookupcache = lookuphash[lookupname] - if lookupcache then - local lookupmatch = lookupcache[start.char] - if lookupmatch then - local headnode = start == head - start, success = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) - if success then - if headnode then - head = start - end - break - end - end - else - report_missing_cache(typ,lookupname) - end +function chainprocs.gpos_mark2base(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = start.char + if marks[markchar] then + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] + end + if markanchors then + local base = start.prev -- [glyph] [start=mark] + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + local basechar = base.char + if marks[basechar] then + while true do + base = base.prev + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + basechar = base.char + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",pref(kind,lookupname),gref(markchar)) + end + return start, false + end + end + end + local baseanchors = descriptions[basechar].anchors + if baseanchors then + local baseanchors = baseanchors['basechar'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basechar %s => (%s,%s)", + cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) end - if start then start = start.prev end - else - start = start.prev + return start, true end - else - start = start.prev end - else - start = start.prev + end + if trace_bugs then + logwarning("%s, no matching anchors for mark %s and base %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) end end - else - local handler = handlers[typ] - local ns = #subtables - local start = head -- local ? - rlmode = 0 -- to be checked ? - if ns == 1 then -- happens often - local lookupname = subtables[1] - local lookupcache = lookuphash[lookupname] - if not lookupcache then -- also check for empty cache - report_missing_cache(typ,lookupname) - else - while start do - local id = start.id - if id == glyph_code then - if start.subtype<256 and start.font == font then - local a = has_attribute(start,0) - if a then - a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) - else - a = not attribute or has_attribute(start,state,attribute) - end - if a then - local lookupmatch = lookupcache[start.char] - if lookupmatch then - -- sequence kan weg - local headnode = start == head - local ok - start, ok = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,1) - if ok then - success = true - if headnode then - head = start - end - end - end - if start then start = start.next end - else - start = start.next - end - else - start = start.next - end - elseif id == whatsit_code then -- will be function - local subtype = start.subtype - if subtype == dir_code then - local dir = start.dir - if dir == "+TRT" or dir == "+TLT" then - topstack = topstack + 1 - dirstack[topstack] = dir - elseif dir == "-TRT" or dir == "-TLT" then - topstack = topstack - 1 - end - local newdir = dirstack[topstack] - if newdir == "+TRT" then - rlmode = -1 - elseif newdir == "+TLT" then - rlmode = 1 - else - rlmode = rlparmode - end - if trace_directions then - report_process("directions after txtdir %s: txtdir=%s:%s, parmode=%s, txtmode=%s",dir,topstack,newdir or "unset",rlparmode,rlmode) - end - elseif subtype == localpar_code then - local dir = start.dir - if dir == "TRT" then - rlparmode = -1 - elseif dir == "TLT" then - rlparmode = 1 - else - rlparmode = 0 - end - rlmode = rlparmode - if trace_directions then - report_process("directions after pardir %s: parmode=%s, txtmode=%s",dir,rlparmode,rlmode) - end - end - start = start.next - else - start = start.next - end + end + elseif trace_bugs then + logwarning("%s: prev node is no char",cref(kind,chainname,chainlookupname,lookupname)) + end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) + end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + end + return start, false +end + +function chainprocs.gpos_mark2ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = start.char + if marks[markchar] then + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] + end + if markanchors then + local base = start.prev -- [glyph] [optional marks] [start=mark] + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + local basechar = base.char + if marks[basechar] then + while true do + base = base.prev + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then + basechar = base.char + if not marks[basechar] then + break + end + else + if trace_bugs then + logwarning("%s: no base for mark %s",cref(kind,chainname,chainlookupname,lookupname),markchar) end + return start, false end - else - while start do - local id = start.id - if id == glyph_code then - if start.subtype<256 and start.font == font then - local a = has_attribute(start,0) - if a then - a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) - else - a = not attribute or has_attribute(start,state,attribute) - end - if a then - for i=1,ns do - local lookupname = subtables[i] - local lookupcache = lookuphash[lookupname] - if lookupcache then - local lookupmatch = lookupcache[start.char] - if lookupmatch then - -- we could move all code inline but that makes things even more unreadable - local headnode = start == head - local ok - start, ok = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) - if ok then - success = true - if headnode then - head = start - end - break - end - end - else - report_missing_cache(typ,lookupname) - end + end + end + -- todo: like marks a ligatures hash + local index = has_attribute(start,ligacomp) + local baseanchors = descriptions[basechar].anchors + if baseanchors then + local baseanchors = baseanchors['baselig'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + ba = ba[index] + if ba then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) -- index + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to baselig %s at index %s => (%s,%s)", + cref(kind,chainname,chainlookupname,lookupname),anchor,a or bound,gref(markchar),gref(basechar),index,dx,dy) end - if start then start = start.next end - else - start = start.next - end - else - start = start.next - end - elseif id == whatsit_code then - local subtype = start.subtype - if subtype == dir_code then - local dir = start.dir - if dir == "+TRT" or dir == "+TLT" then - topstack = topstack + 1 - dirstack[topstack] = dir - elseif dir == "-TRT" or dir == "-TLT" then - topstack = topstack - 1 - end - local newdir = dirstack[topstack] - if newdir == "+TRT" then - rlmode = -1 - elseif newdir == "+TLT" then - rlmode = 1 - else - rlmode = rlparmode - end - if trace_directions then - report_process("directions after txtdir %s: txtdir=%s:%s, parmode=%s, txtmode=%s",dir,topstack,newdir or "unset",rlparmode,rlmode) - end - elseif subtype == localpar_code then - local dir = start.dir - if dir == "TRT" then - rlparmode = -1 - elseif dir == "TLT" then - rlparmode = 1 - else - rlparmode = 0 - end - rlmode = rlparmode - if trace_directions then - report_process("directions after pardir %s: parmode=%s, txtmode=%s",dir,rlparmode,rlmode) + return start, true end end - start = start.next - else - start = start.next end end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and baselig %s",cref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + end end end - if success then - done = true - end - if trace_steps then -- ? - registerstep(head) - end + elseif trace_bugs then + logwarning("feature %s, lookup %s: prev node is no char",kind,lookupname) end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) end - return head, done -end - -local function generic(lookupdata,lookupname,unicode,lookuphash) - local target = lookuphash[lookupname] - if target then - target[unicode] = lookupdata - else - lookuphash[lookupname] = { [unicode] = lookupdata } - end + return start, false end -local action = { - - substitution = generic, - multiple = generic, - alternate = generic, - position = generic, - - ligature = function(lookupdata,lookupname,unicode,lookuphash) - local target = lookuphash[lookupname] - if not target then - target = { } - lookuphash[lookupname] = target - end - for i=1,#lookupdata do - local li = lookupdata[i] - local tu = target[li] - if not tu then - tu = { } - target[li] = tu - end - target = tu - end - target.ligature = unicode - end, - - pair = function(lookupdata,lookupname,unicode,lookuphash) - local target = lookuphash[lookupname] - if not target then - target = { } - lookuphash[lookupname] = target - end - local others = target[unicode] - local paired = lookupdata[1] - if others then - others[paired] = lookupdata - else - others = { [paired] = lookupdata } - target[unicode] = others - end - end, - -} - -local function prepare_lookups(tfmdata) - - local rawdata = tfmdata.shared.rawdata - local resources = rawdata.resources - local lookuphash = resources.lookuphash - local anchor_to_lookup = resources.anchor_to_lookup - local lookup_to_anchor = resources.lookup_to_anchor - local lookuptypes = resources.lookuptypes - local characters = tfmdata.characters - local descriptions = tfmdata.descriptions - - -- we cannot free the entries in the descriptions as sometimes we access - -- then directly (for instance anchors) ... selectively freeing does save - -- much memory as it's only a reference to a table and the slot in the - -- description hash is not freed anyway - - for unicode, character in next, characters do -- we cannot loop over descriptions ! - - local description = descriptions[unicode] - - if description then - - local lookups = description.slookups - if lookups then - for lookupname, lookupdata in next, lookups do - action[lookuptypes[lookupname]](lookupdata,lookupname,unicode,lookuphash) - end +function chainprocs.gpos_mark2mark(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local markchar = start.char + if marks[markchar] then +--~ local alreadydone = markonce and has_attribute(start,markmark) +--~ if not alreadydone then + -- local markanchors = descriptions[markchar].anchors markanchors = markanchors and markanchors.mark + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local markanchors = lookuphash[lookupname] + if markanchors then + markanchors = markanchors[markchar] end - - local lookups = description.mlookups - if lookups then - for lookupname, lookuplist in next, lookups do - local lookuptype = lookuptypes[lookupname] - for l=1,#lookuplist do - local lookupdata = lookuplist[l] - action[lookuptype](lookupdata,lookupname,unicode,lookuphash) + if markanchors then + local base = start.prev -- [glyph] [basemark] [start=mark] + -- while (base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp)) do + -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature + -- end + local slc = has_attribute(start,ligacomp) + if slc then -- a rather messy loop ... needs checking with husayni + while base do + local blc = has_attribute(base,ligacomp) + if blc and blc ~= slc then + base = base.prev + else + break + end end end - end - - local list = description.kerns - if list then - for lookup, krn in next, list do -- ref to glyph, saves lookup - local target = lookuphash[lookup] - if target then - target[unicode] = krn - else - lookuphash[lookup] = { [unicode] = krn } + if base and base.id == glyph_code and base.subtype<256 and base.font == currentfont then -- subtype test can go + local basechar = base.char + local baseanchors = descriptions[basechar].anchors + if baseanchors then + baseanchors = baseanchors['basemark'] + if baseanchors then + local al = anchorlookups[lookupname] + for anchor,ba in next, baseanchors do + if al[anchor] then + local ma = markanchors[anchor] + if ma then + local dx, dy, bound = setmark(start,base,tfmdata.parameters.factor,rlmode,ba,ma) + if trace_marks then + logprocess("%s, anchor %s, bound %s: anchoring mark %s to basemark %s => (%s,%s)", + cref(kind,chainname,chainlookupname,lookupname),anchor,bound,gref(markchar),gref(basechar),dx,dy) + end + return start, true + end + end + end + if trace_bugs then + logwarning("%s: no matching anchors for mark %s and basemark %s",gref(kind,chainname,chainlookupname,lookupname),gref(markchar),gref(basechar)) + end + end end + elseif trace_bugs then + logwarning("%s: prev node is no mark",cref(kind,chainname,chainlookupname,lookupname)) end + elseif trace_bugs then + logwarning("%s: mark %s has no anchors",cref(kind,chainname,chainlookupname,lookupname),gref(markchar)) end +--~ elseif trace_marks and trace_details then +--~ logprocess("%s, mark %s is already bound (n=%s), ignoring mark2mark",pref(kind,lookupname),gref(markchar),alreadydone) +--~ end + elseif trace_bugs then + logwarning("%s: mark %s is no mark",cref(kind,chainname,chainlookupname),gref(markchar)) + end + return start, false +end - local list = description.anchors - if list then - for typ, anchors in next, list do -- types - if typ == "mark" or typ == "cexit" then -- or entry? - for name, anchor in next, anchors do - local lookups = anchor_to_lookup[name] - if lookups then - for lookup, _ in next, lookups do - local target = lookuphash[lookup] - if target then - target[unicode] = anchors - else - lookuphash[lookup] = { [unicode] = anchors } +-- ! ! ! untested ! ! ! + +function chainprocs.gpos_cursive(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) + local alreadydone = cursonce and has_attribute(start,cursbase) + if not alreadydone then + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local exitanchors = lookuphash[lookupname] + if exitanchors then + exitanchors = exitanchors[startchar] + end + if exitanchors then + local done = false + if marks[startchar] then + if trace_cursive then + logprocess("%s: ignoring cursive for mark %s",pref(kind,lookupname),gref(startchar)) + end + else + local nxt = start.next + while not done and nxt and nxt.id == glyph_code and nxt.subtype<256 and nxt.font == currentfont do + local nextchar = nxt.char + if marks[nextchar] then + -- should not happen (maybe warning) + nxt = nxt.next + else + local entryanchors = descriptions[nextchar] + if entryanchors then + entryanchors = entryanchors.anchors + if entryanchors then + entryanchors = entryanchors['centry'] + if entryanchors then + local al = anchorlookups[lookupname] + for anchor, entry in next, entryanchors do + if al[anchor] then + local exit = exitanchors[anchor] + if exit then + local dx, dy, bound = setcursive(start,nxt,tfmdata.parameters.factor,rlmode,exit,entry,characters[startchar],characters[nextchar]) + if trace_cursive then + logprocess("%s: moving %s to %s cursive (%s,%s) using anchor %s and bound %s in rlmode %s",pref(kind,lookupname),gref(startchar),gref(nextchar),dx,dy,anchor,bound,rlmode) + end + done = true + break + end + end end end end + else -- if trace_bugs then + -- logwarning("%s: char %s is missing in font",pref(kind,lookupname),gref(startchar)) + onetimemessage(currentfont,startchar,"no entry anchors",report_fonts) end + break end end end - + return start, done + else + if trace_cursive and trace_details then + logprocess("%s, cursive %s is already done",pref(kind,lookupname),gref(start.char),alreadydone) + end + return start, false end - end - + return start, false end -local function split(replacement,original) - local result = { } - for i=1,#replacement do - result[original[i]] = replacement[i] +function chainprocs.gpos_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) + -- untested .. needs checking for the new model + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local kerns = lookuphash[lookupname] + if kerns then + kerns = kerns[startchar] -- needed ? + if kerns then + local dx, dy, w, h = setpair(start,tfmdata.parameters.factor,rlmode,sequence.flags[4],kerns,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting single %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),dx,dy,w,h) + end + end end - return result + return start, false end -local valid = { - coverage = { chainsub = true, chainpos = true, contextsub = true }, - reversecoverage = { reversesub = true }, - glyphs = { chainsub = true, chainpos = true }, -} +-- when machines become faster i will make a shared function -local function prepare_contextchains(tfmdata) - local rawdata = tfmdata.shared.rawdata - local resources = rawdata.resources - local lookuphash = resources.lookuphash - local lookups = rawdata.lookups - if lookups then - for lookupname, lookupdata in next, rawdata.lookups do - local lookuptype = lookupdata.type - if lookuptype then - local rules = lookupdata.rules - if rules then - local format = lookupdata.format - local validformat = valid[format] - if not validformat then - report_prepare("unsupported format %s",format) - elseif not validformat[lookuptype] then - -- todo: dejavu-serif has one (but i need to see what use it has) - report_prepare("unsupported %s %s for %s",format,lookuptype,lookupname) - else - local contexts = lookuphash[lookupname] - if not contexts then - contexts = { } - lookuphash[lookupname] = contexts - end - local t, nt = { }, 0 - for nofrules=1,#rules do - local rule = rules[nofrules] - local current = rule.current - local before = rule.before - local after = rule.after - local replacements = rule.replacements - local sequence = { } - local nofsequences = 0 - -- Wventually we can store start, stop and sequence in the cached file - -- but then less sharing takes place so best not do that without a lot - -- of profiling so let's forget about it. - if before then - for n=1,#before do - nofsequences = nofsequences + 1 - sequence[nofsequences] = before[n] +function chainprocs.gpos_pair(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,chainindex,sequence) +-- logwarning("%s: gpos_pair not yet supported",cref(kind,chainname,chainlookupname)) + local snext = start.next + if snext then + local startchar = start.char + local subtables = currentlookup.subtables + local lookupname = subtables[1] + local kerns = lookuphash[lookupname] + if kerns then + kerns = kerns[startchar] + if kerns then + local lookuptype = lookuptypes[lookupname] + local prev, done = start, false + local factor = tfmdata.parameters.factor + while snext and snext.id == glyph_code and snext.subtype<256 and snext.font == currentfont do + local nextchar = snext.char + local krn = kerns[nextchar] + if not krn and marks[nextchar] then + prev = snext + snext = snext.next + else + if not krn then + -- skip + elseif type(krn) == "table" then + if lookuptype == "pair" then + local a, b = krn[2], krn[3] + if a and #a > 0 then + local startchar = start.char + local x, y, w, h = setpair(start,factor,rlmode,sequence.flags[4],a,characters[startchar]) + if trace_kerns then + logprocess("%s: shifting first of pair %s and %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) + end end - end - local start = nofsequences + 1 - for n=1,#current do - nofsequences = nofsequences + 1 - sequence[nofsequences] = current[n] - end - local stop = nofsequences - if after then - for n=1,#after do - nofsequences = nofsequences + 1 - sequence[nofsequences] = after[n] + if b and #b > 0 then + local startchar = start.char + local x, y, w, h = setpair(snext,factor,rlmode,sequence.flags[4],b,characters[nextchar]) + if trace_kerns then + logprocess("%s: shifting second of pair %s and %s by (%s,%s) and correction (%s,%s)",cref(kind,chainname,chainlookupname),gref(startchar),gref(nextchar),x,y,w,h) + end end - end - if sequence[1] then - -- Replacements only happen with reverse lookups as they are single only. We - -- could pack them into current (replacement value instead of true) and then - -- use sequence[start] instead but it's somewhat ugly. - nt = nt + 1 - t[nt] = { nofrules, lookuptype, sequence, start, stop, rule.lookups, replacements } - for unic, _ in next, sequence[start] do - local cu = contexts[unic] - if not cu then - contexts[unic] = t + else + report_process("%s: check this out (old kern stuff)",cref(kind,chainname,chainlookupname)) + local a, b = krn[2], krn[6] + if a and a ~= 0 then + local k = setkern(snext,factor,rlmode,a) + if trace_kerns then + logprocess("%s: inserting first kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(prev.char),gref(nextchar)) end end + if b and b ~= 0 then + logwarning("%s: ignoring second kern xoff %s",cref(kind,chainname,chainlookupname),b*factor) + end + end + done = true + elseif krn ~= 0 then + local k = setkern(snext,factor,rlmode,krn) + if trace_kerns then + logprocess("%s: inserting kern %s between %s and %s",cref(kind,chainname,chainlookupname),k,gref(prev.char),gref(nextchar)) end + done = true end + break end - else - -- no rules end - else - report_prepare("missing lookuptype for %s",lookupname) - end - end - end -end - --- we can consider lookuphash == false (initialized but empty) vs lookuphash == table - -local function featuresinitializer(tfmdata,value) - if true then -- value then - -- beware we need to use the topmost properties table - local rawdata = tfmdata.shared.rawdata - local properties = rawdata.properties - if not properties.initialized then - local starttime = trace_preparing and os.clock() - local resources = rawdata.resources - resources.lookuphash = resources.lookuphash or { } - prepare_contextchains(tfmdata) - prepare_lookups(tfmdata) - properties.initialized = true - if trace_preparing then - report_prepare("preparation time is %0.3f seconds for %s",os.clock()-starttime,tfmdata.properties.fullname or "?") + return start, done end end end + return start, false end -registerotffeature { - name = "features", - description = "features", - default = true, - initializers = { - position = 1, - node = featuresinitializer, - }, - processors = { - node = featuresprocessor, - } -} - --- this will change but is needed for an experiment: - -otf.handlers = handlers - -end -- closure - -do -- begin closure to overcome local limits and interference +-- what pointer to return, spec says stop +-- to be discussed ... is bidi changer a space? +-- elseif char == zwnj and sequence[n][32] then -- brrr -if not modules then modules = { } end modules ['luatex-fonts-chr'] = { - version = 1.001, - comment = "companion to luatex-fonts.lua", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} +-- somehow l or f is global +-- we don't need to pass the currentcontext, saves a bit +-- make a slow variant then can be activated but with more tracing -if context then - texio.write_nl("fatal error: this module is not for context") - os.exit() +local function show_skip(kind,chainname,char,ck,class) + if ck[9] then + logwarning("%s: skipping char %s (%s) in rule %s, lookuptype %s (%s=>%s)",cref(kind,chainname),gref(char),class,ck[1],ck[2],ck[9],ck[10]) + else + logwarning("%s: skipping char %s (%s) in rule %s, lookuptype %s",cref(kind,chainname),gref(char),class,ck[1],ck[2]) + end end -characters = characters or { } -characters.categories = { - [0x0300]="mn", - [0x0301]="mn", - [0x0302]="mn", - [0x0303]="mn", - [0x0304]="mn", - [0x0305]="mn", - [0x0306]="mn", - [0x0307]="mn", - [0x0308]="mn", - [0x0309]="mn", - [0x030A]="mn", - [0x030B]="mn", - [0x030C]="mn", - [0x030D]="mn", - [0x030E]="mn", - [0x030F]="mn", - [0x0310]="mn", - [0x0311]="mn", - [0x0312]="mn", - [0x0313]="mn", - [0x0314]="mn", - [0x0315]="mn", - [0x0316]="mn", - [0x0317]="mn", - [0x0318]="mn", - [0x0319]="mn", - [0x031A]="mn", - [0x031B]="mn", - [0x031C]="mn", - [0x031D]="mn", - [0x031E]="mn", - [0x031F]="mn", - [0x0320]="mn", - [0x0321]="mn", - [0x0322]="mn", - [0x0323]="mn", - [0x0324]="mn", - [0x0325]="mn", - [0x0326]="mn", - [0x0327]="mn", - [0x0328]="mn", - [0x0329]="mn", - [0x032A]="mn", - [0x032B]="mn", - [0x032C]="mn", - [0x032D]="mn", - [0x032E]="mn", - [0x032F]="mn", - [0x0330]="mn", - [0x0331]="mn", - [0x0332]="mn", - [0x0333]="mn", - [0x0334]="mn", - [0x0335]="mn", - [0x0336]="mn", - [0x0337]="mn", - [0x0338]="mn", - [0x0339]="mn", - [0x033A]="mn", - [0x033B]="mn", - [0x033C]="mn", - [0x033D]="mn", - [0x033E]="mn", - [0x033F]="mn", - [0x0340]="mn", - [0x0341]="mn", - [0x0342]="mn", - [0x0343]="mn", - [0x0344]="mn", - [0x0345]="mn", - [0x0346]="mn", - [0x0347]="mn", - [0x0348]="mn", - [0x0349]="mn", - [0x034A]="mn", - [0x034B]="mn", - [0x034C]="mn", - [0x034D]="mn", - [0x034E]="mn", - [0x034F]="mn", - [0x0350]="mn", - [0x0351]="mn", - [0x0352]="mn", - [0x0353]="mn", - [0x0354]="mn", - [0x0355]="mn", - [0x0356]="mn", - [0x0357]="mn", - [0x0358]="mn", - [0x0359]="mn", - [0x035A]="mn", - [0x035B]="mn", - [0x035C]="mn", - [0x035D]="mn", - [0x035E]="mn", - [0x035F]="mn", - [0x0360]="mn", - [0x0361]="mn", - [0x0362]="mn", - [0x0363]="mn", - [0x0364]="mn", - [0x0365]="mn", - [0x0366]="mn", - [0x0367]="mn", - [0x0368]="mn", - [0x0369]="mn", - [0x036A]="mn", - [0x036B]="mn", - [0x036C]="mn", - [0x036D]="mn", - [0x036E]="mn", - [0x036F]="mn", - [0x0483]="mn", - [0x0484]="mn", - [0x0485]="mn", - [0x0486]="mn", - [0x0591]="mn", - [0x0592]="mn", - [0x0593]="mn", - [0x0594]="mn", - [0x0595]="mn", - [0x0596]="mn", - [0x0597]="mn", - [0x0598]="mn", - [0x0599]="mn", - [0x059A]="mn", - [0x059B]="mn", - [0x059C]="mn", - [0x059D]="mn", - [0x059E]="mn", - [0x059F]="mn", - [0x05A0]="mn", - [0x05A1]="mn", - [0x05A2]="mn", - [0x05A3]="mn", - [0x05A4]="mn", - [0x05A5]="mn", - [0x05A6]="mn", - [0x05A7]="mn", - [0x05A8]="mn", - [0x05A9]="mn", - [0x05AA]="mn", - [0x05AB]="mn", - [0x05AC]="mn", - [0x05AD]="mn", - [0x05AE]="mn", - [0x05AF]="mn", - [0x05B0]="mn", - [0x05B1]="mn", - [0x05B2]="mn", - [0x05B3]="mn", - [0x05B4]="mn", - [0x05B5]="mn", - [0x05B6]="mn", - [0x05B7]="mn", - [0x05B8]="mn", - [0x05B9]="mn", - [0x05BA]="mn", - [0x05BB]="mn", - [0x05BC]="mn", - [0x05BD]="mn", - [0x05BF]="mn", - [0x05C1]="mn", - [0x05C2]="mn", - [0x05C4]="mn", - [0x05C5]="mn", - [0x05C7]="mn", - [0x0610]="mn", - [0x0611]="mn", - [0x0612]="mn", - [0x0613]="mn", - [0x0614]="mn", - [0x0615]="mn", - [0x064B]="mn", - [0x064C]="mn", - [0x064D]="mn", - [0x064E]="mn", - [0x064F]="mn", - [0x0650]="mn", - [0x0651]="mn", - [0x0652]="mn", - [0x0653]="mn", - [0x0654]="mn", - [0x0655]="mn", - [0x0656]="mn", - [0x0657]="mn", - [0x0658]="mn", - [0x0659]="mn", - [0x065A]="mn", - [0x065B]="mn", - [0x065C]="mn", - [0x065D]="mn", - [0x065E]="mn", - [0x0670]="mn", - [0x06D6]="mn", - [0x06D7]="mn", - [0x06D8]="mn", - [0x06D9]="mn", - [0x06DA]="mn", - [0x06DB]="mn", - [0x06DC]="mn", - [0x06DF]="mn", - [0x06E0]="mn", - [0x06E1]="mn", - [0x06E2]="mn", - [0x06E3]="mn", - [0x06E4]="mn", - [0x06E7]="mn", - [0x06E8]="mn", - [0x06EA]="mn", - [0x06EB]="mn", - [0x06EC]="mn", - [0x06ED]="mn", - [0x0711]="mn", - [0x0730]="mn", - [0x0731]="mn", - [0x0732]="mn", - [0x0733]="mn", - [0x0734]="mn", - [0x0735]="mn", - [0x0736]="mn", - [0x0737]="mn", - [0x0738]="mn", - [0x0739]="mn", - [0x073A]="mn", - [0x073B]="mn", - [0x073C]="mn", - [0x073D]="mn", - [0x073E]="mn", - [0x073F]="mn", - [0x0740]="mn", - [0x0741]="mn", - [0x0742]="mn", - [0x0743]="mn", - [0x0744]="mn", - [0x0745]="mn", - [0x0746]="mn", - [0x0747]="mn", - [0x0748]="mn", - [0x0749]="mn", - [0x074A]="mn", - [0x07A6]="mn", - [0x07A7]="mn", - [0x07A8]="mn", - [0x07A9]="mn", - [0x07AA]="mn", - [0x07AB]="mn", - [0x07AC]="mn", - [0x07AD]="mn", - [0x07AE]="mn", - [0x07AF]="mn", - [0x07B0]="mn", - [0x07EB]="mn", - [0x07EC]="mn", - [0x07ED]="mn", - [0x07EE]="mn", - [0x07EF]="mn", - [0x07F0]="mn", - [0x07F1]="mn", - [0x07F2]="mn", - [0x07F3]="mn", - [0x0901]="mn", - [0x0902]="mn", - [0x093C]="mn", - [0x0941]="mn", - [0x0942]="mn", - [0x0943]="mn", - [0x0944]="mn", - [0x0945]="mn", - [0x0946]="mn", - [0x0947]="mn", - [0x0948]="mn", - [0x094D]="mn", - [0x0951]="mn", - [0x0952]="mn", - [0x0953]="mn", - [0x0954]="mn", - [0x0962]="mn", - [0x0963]="mn", - [0x0981]="mn", - [0x09BC]="mn", - [0x09C1]="mn", - [0x09C2]="mn", - [0x09C3]="mn", - [0x09C4]="mn", - [0x09CD]="mn", - [0x09E2]="mn", - [0x09E3]="mn", - [0x0A01]="mn", - [0x0A02]="mn", - [0x0A3C]="mn", - [0x0A41]="mn", - [0x0A42]="mn", - [0x0A47]="mn", - [0x0A48]="mn", - [0x0A4B]="mn", - [0x0A4C]="mn", - [0x0A4D]="mn", - [0x0A70]="mn", - [0x0A71]="mn", - [0x0A81]="mn", - [0x0A82]="mn", - [0x0ABC]="mn", - [0x0AC1]="mn", - [0x0AC2]="mn", - [0x0AC3]="mn", - [0x0AC4]="mn", - [0x0AC5]="mn", - [0x0AC7]="mn", - [0x0AC8]="mn", - [0x0ACD]="mn", - [0x0AE2]="mn", - [0x0AE3]="mn", - [0x0B01]="mn", - [0x0B3C]="mn", - [0x0B3F]="mn", - [0x0B41]="mn", - [0x0B42]="mn", - [0x0B43]="mn", - [0x0B4D]="mn", - [0x0B56]="mn", - [0x0B82]="mn", - [0x0BC0]="mn", - [0x0BCD]="mn", - [0x0C3E]="mn", - [0x0C3F]="mn", - [0x0C40]="mn", - [0x0C46]="mn", - [0x0C47]="mn", - [0x0C48]="mn", - [0x0C4A]="mn", - [0x0C4B]="mn", - [0x0C4C]="mn", - [0x0C4D]="mn", - [0x0C55]="mn", - [0x0C56]="mn", - [0x0CBC]="mn", - [0x0CBF]="mn", - [0x0CC6]="mn", - [0x0CCC]="mn", - [0x0CCD]="mn", - [0x0CE2]="mn", - [0x0CE3]="mn", - [0x0D41]="mn", - [0x0D42]="mn", - [0x0D43]="mn", - [0x0D4D]="mn", - [0x0DCA]="mn", - [0x0DD2]="mn", - [0x0DD3]="mn", - [0x0DD4]="mn", - [0x0DD6]="mn", - [0x0E31]="mn", - [0x0E34]="mn", - [0x0E35]="mn", - [0x0E36]="mn", - [0x0E37]="mn", - [0x0E38]="mn", - [0x0E39]="mn", - [0x0E3A]="mn", - [0x0E47]="mn", - [0x0E48]="mn", - [0x0E49]="mn", - [0x0E4A]="mn", - [0x0E4B]="mn", - [0x0E4C]="mn", - [0x0E4D]="mn", - [0x0E4E]="mn", - [0x0EB1]="mn", - [0x0EB4]="mn", - [0x0EB5]="mn", - [0x0EB6]="mn", - [0x0EB7]="mn", - [0x0EB8]="mn", - [0x0EB9]="mn", - [0x0EBB]="mn", - [0x0EBC]="mn", - [0x0EC8]="mn", - [0x0EC9]="mn", - [0x0ECA]="mn", - [0x0ECB]="mn", - [0x0ECC]="mn", - [0x0ECD]="mn", - [0x0F18]="mn", - [0x0F19]="mn", - [0x0F35]="mn", - [0x0F37]="mn", - [0x0F39]="mn", - [0x0F71]="mn", - [0x0F72]="mn", - [0x0F73]="mn", - [0x0F74]="mn", - [0x0F75]="mn", - [0x0F76]="mn", - [0x0F77]="mn", - [0x0F78]="mn", - [0x0F79]="mn", - [0x0F7A]="mn", - [0x0F7B]="mn", - [0x0F7C]="mn", - [0x0F7D]="mn", - [0x0F7E]="mn", - [0x0F80]="mn", - [0x0F81]="mn", - [0x0F82]="mn", - [0x0F83]="mn", - [0x0F84]="mn", - [0x0F86]="mn", - [0x0F87]="mn", - [0x0F90]="mn", - [0x0F91]="mn", - [0x0F92]="mn", - [0x0F93]="mn", - [0x0F94]="mn", - [0x0F95]="mn", - [0x0F96]="mn", - [0x0F97]="mn", - [0x0F99]="mn", - [0x0F9A]="mn", - [0x0F9B]="mn", - [0x0F9C]="mn", - [0x0F9D]="mn", - [0x0F9E]="mn", - [0x0F9F]="mn", - [0x0FA0]="mn", - [0x0FA1]="mn", - [0x0FA2]="mn", - [0x0FA3]="mn", - [0x0FA4]="mn", - [0x0FA5]="mn", - [0x0FA6]="mn", - [0x0FA7]="mn", - [0x0FA8]="mn", - [0x0FA9]="mn", - [0x0FAA]="mn", - [0x0FAB]="mn", - [0x0FAC]="mn", - [0x0FAD]="mn", - [0x0FAE]="mn", - [0x0FAF]="mn", - [0x0FB0]="mn", - [0x0FB1]="mn", - [0x0FB2]="mn", - [0x0FB3]="mn", - [0x0FB4]="mn", - [0x0FB5]="mn", - [0x0FB6]="mn", - [0x0FB7]="mn", - [0x0FB8]="mn", - [0x0FB9]="mn", - [0x0FBA]="mn", - [0x0FBB]="mn", - [0x0FBC]="mn", - [0x0FC6]="mn", - [0x102D]="mn", - [0x102E]="mn", - [0x102F]="mn", - [0x1030]="mn", - [0x1032]="mn", - [0x1036]="mn", - [0x1037]="mn", - [0x1039]="mn", - [0x1058]="mn", - [0x1059]="mn", - [0x135F]="mn", - [0x1712]="mn", - [0x1713]="mn", - [0x1714]="mn", - [0x1732]="mn", - [0x1733]="mn", - [0x1734]="mn", - [0x1752]="mn", - [0x1753]="mn", - [0x1772]="mn", - [0x1773]="mn", - [0x17B7]="mn", - [0x17B8]="mn", - [0x17B9]="mn", - [0x17BA]="mn", - [0x17BB]="mn", - [0x17BC]="mn", - [0x17BD]="mn", - [0x17C6]="mn", - [0x17C9]="mn", - [0x17CA]="mn", - [0x17CB]="mn", - [0x17CC]="mn", - [0x17CD]="mn", - [0x17CE]="mn", - [0x17CF]="mn", - [0x17D0]="mn", - [0x17D1]="mn", - [0x17D2]="mn", - [0x17D3]="mn", - [0x17DD]="mn", - [0x180B]="mn", - [0x180C]="mn", - [0x180D]="mn", - [0x18A9]="mn", - [0x1920]="mn", - [0x1921]="mn", - [0x1922]="mn", - [0x1927]="mn", - [0x1928]="mn", - [0x1932]="mn", - [0x1939]="mn", - [0x193A]="mn", - [0x193B]="mn", - [0x1A17]="mn", - [0x1A18]="mn", - [0x1B00]="mn", - [0x1B01]="mn", - [0x1B02]="mn", - [0x1B03]="mn", - [0x1B34]="mn", - [0x1B36]="mn", - [0x1B37]="mn", - [0x1B38]="mn", - [0x1B39]="mn", - [0x1B3A]="mn", - [0x1B3C]="mn", - [0x1B42]="mn", - [0x1B6B]="mn", - [0x1B6C]="mn", - [0x1B6D]="mn", - [0x1B6E]="mn", - [0x1B6F]="mn", - [0x1B70]="mn", - [0x1B71]="mn", - [0x1B72]="mn", - [0x1B73]="mn", - [0x1DC0]="mn", - [0x1DC1]="mn", - [0x1DC2]="mn", - [0x1DC3]="mn", - [0x1DC4]="mn", - [0x1DC5]="mn", - [0x1DC6]="mn", - [0x1DC7]="mn", - [0x1DC8]="mn", - [0x1DC9]="mn", - [0x1DCA]="mn", - [0x1DFE]="mn", - [0x1DFF]="mn", - [0x20D0]="mn", - [0x20D1]="mn", - [0x20D2]="mn", - [0x20D3]="mn", - [0x20D4]="mn", - [0x20D5]="mn", - [0x20D6]="mn", - [0x20D7]="mn", - [0x20D8]="mn", - [0x20D9]="mn", - [0x20DA]="mn", - [0x20DB]="mn", - [0x20DC]="mn", - [0x20E1]="mn", - [0x20E5]="mn", - [0x20E6]="mn", - [0x20E7]="mn", - [0x20E8]="mn", - [0x20E9]="mn", - [0x20EA]="mn", - [0x20EB]="mn", - [0x20EC]="mn", - [0x20ED]="mn", - [0x20EE]="mn", - [0x20EF]="mn", - [0x302A]="mn", - [0x302B]="mn", - [0x302C]="mn", - [0x302D]="mn", - [0x302E]="mn", - [0x302F]="mn", - [0x3099]="mn", - [0x309A]="mn", - [0xA806]="mn", - [0xA80B]="mn", - [0xA825]="mn", - [0xA826]="mn", - [0xFB1E]="mn", - [0xFE00]="mn", - [0xFE01]="mn", - [0xFE02]="mn", - [0xFE03]="mn", - [0xFE04]="mn", - [0xFE05]="mn", - [0xFE06]="mn", - [0xFE07]="mn", - [0xFE08]="mn", - [0xFE09]="mn", - [0xFE0A]="mn", - [0xFE0B]="mn", - [0xFE0C]="mn", - [0xFE0D]="mn", - [0xFE0E]="mn", - [0xFE0F]="mn", - [0xFE20]="mn", - [0xFE21]="mn", - [0xFE22]="mn", - [0xFE23]="mn", +local function normal_handle_contextchain(start,kind,chainname,contexts,sequence,lookuphash) + -- local rule, lookuptype, sequence, f, l, lookups = ck[1], ck[2] ,ck[3], ck[4], ck[5], ck[6] + local flags = sequence.flags + local done = false + local skipmark = flags[1] + local skipligature = flags[2] + local skipbase = flags[3] + local someskip = skipmark or skipligature or skipbase -- could be stored in flags for a fast test (hm, flags could be false !) + local markclass = sequence.markclass -- todo, first we need a proper test + local skipped = false + for k=1,#contexts do + local match = true + local current = start + local last = start + local ck = contexts[k] + local seq = ck[3] + local s = #seq + -- f..l = mid string + if s == 1 then + -- never happens + match = current.id == glyph_code and current.subtype<256 and current.font == currentfont and seq[1][current.char] + else + -- maybe we need a better space check (maybe check for glue or category or combination) + -- we cannot optimize for n=2 because there can be disc nodes + local f, l = ck[4], ck[5] + -- current match + if f == 1 and f == l then -- current only + -- already a hit + -- match = true + else -- before/current/after | before/current | current/after + -- no need to test first hit (to be optimized) + if f == l then -- new, else last out of sync (f is > 1) + -- match = true + else + local n = f + 1 + last = last.next + while n <= l do + if last then + local id = last.id + if id == glyph_code then + if last.subtype<256 and last.font == currentfont then + local char = last.char + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + last = last.next + elseif seq[n][char] then + if n < l then + last = last.next + end + n = n + 1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + last = last.next + else + match = false + break + end + else + match = false + break + end + end + end + end + -- before + if match and f > 1 then + local prev = start.prev + if prev then + local n = f-1 + while n >= 1 do + if prev then + local id = prev.id + if id == glyph_code then + if prev.subtype<256 and prev.font == currentfont then -- normal char + local char = prev.char + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + elseif seq[n][char] then + n = n -1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + -- skip 'm + elseif seq[n][32] then + n = n -1 + else + match = false + break + end + prev = prev.prev + elseif seq[n][32] then -- somewhat special, as zapfino can have many preceding spaces + n = n -1 + else + match = false + break + end + end + elseif f == 2 then + match = seq[1][32] + else + for n=f-1,1 do + if not seq[n][32] then + match = false + break + end + end + end + end + -- after + if match and s > l then + local current = last and last.next + if current then + -- removed optimization for s-l == 1, we have to deal with marks anyway + local n = l + 1 + while n <= s do + if current then + local id = current.id + if id == glyph_code then + if current.subtype<256 and current.font == currentfont then -- normal char + local char = current.char + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + skipped = true + if trace_skips then + show_skip(kind,chainname,char,ck,class) + end + elseif seq[n][char] then + n = n + 1 + else + match = false + break + end + else + match = false + break + end + else + match = false + break + end + elseif id == disc_code then + -- skip 'm + elseif seq[n][32] then -- brrr + n = n + 1 + else + match = false + break + end + current = current.next + elseif seq[n][32] then + n = n + 1 + else + match = false + break + end + end + elseif s-l == 1 then + match = seq[s][32] + else + for n=l+1,s do + if not seq[n][32] then + match = false + break + end + end + end + end + end + if match then + -- ck == currentcontext + if trace_contexts then + local rule, lookuptype, f, l = ck[1], ck[2], ck[4], ck[5] + local char = start.char + if ck[9] then + logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %s (%s=>%s)", + cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype,ck[9],ck[10]) + else + logwarning("%s: rule %s matches at char %s for (%s,%s,%s) chars, lookuptype %s", + cref(kind,chainname),rule,gref(char),f-1,l-f+1,s-l,lookuptype) + end + end + local chainlookups = ck[6] + if chainlookups then + local nofchainlookups = #chainlookups + -- we can speed this up if needed + if nofchainlookups == 1 then + local chainlookupname = chainlookups[1] + local chainlookup = lookuptable[chainlookupname] + if chainlookup then + local cp = chainprocs[chainlookup.type] + if cp then + start, done = cp(start,last,kind,chainname,ck,lookuphash,chainlookup,chainlookupname,nil,sequence) + else + logprocess("%s: %s is not yet supported",cref(kind,chainname,chainlookupname),chainlookup.type) + end + else -- shouldn't happen + logprocess("%s is not yet supported",cref(kind,chainname,chainlookupname)) + end + else + local i = 1 + repeat + if skipped then + while true do + local char = start.char + local ccd = descriptions[char] + if ccd then + local class = ccd.class + if class == skipmark or class == skipligature or class == skipbase or (markclass and class == "mark" and not markclass[char]) then + start = start.next + else + break + end + else + break + end + end + end + local chainlookupname = chainlookups[i] + local chainlookup = lookuptable[chainlookupname] -- can be false (n matches, nofchainlookups + end + else + local replacements = ck[7] + if replacements then + start, done = chainprocs.reversesub(start,last,kind,chainname,ck,lookuphash,replacements) -- sequence + else + done = true -- can be meant to be skipped + if trace_contexts then + logprocess("%s: skipping match",cref(kind,chainname)) + end + end + end + end + end + return start, done +end + +-- Because we want to keep this elsewhere (an because speed is less an issue) we +-- pass the font id so that the verbose variant can access the relevant helper tables. + +local verbose_handle_contextchain = function(font,...) + logwarning("no verbose handler installed, reverting to 'normal'") + otf.setcontextchain() + return normal_handle_contextchain(...) +end + +otf.chainhandlers = { + normal = normal_handle_contextchain, + verbose = verbose_handle_contextchain, } -end -- closure +function otf.setcontextchain(method) + if not method or method == "normal" or not otf.chainhandlers[method] then + if handlers.contextchain then -- no need for a message while making the format + logwarning("installing normal contextchain handler") + end + handlers.contextchain = normal_handle_contextchain + else + logwarning("installing contextchain handler '%s'",method) + local handler = otf.chainhandlers[method] + handlers.contextchain = function(...) + return handler(currentfont,...) -- hm, get rid of ... + end + end + handlers.gsub_context = handlers.contextchain + handlers.gsub_contextchain = handlers.contextchain + handlers.gsub_reversecontextchain = handlers.contextchain + handlers.gpos_contextchain = handlers.contextchain + handlers.gpos_context = handlers.contextchain +end -do -- begin closure to overcome local limits and interference +otf.setcontextchain() -if not modules then modules = { } end modules ['font-ota'] = { - version = 1.001, - comment = "companion to font-otf.lua (analysing)", - author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", - copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" -} +local missing = { } -- we only report once --- this might become scrp-*.lua +local function logprocess(...) + if trace_steps then + registermessage(...) + end + report_process(...) +end -local type, tostring, match, format, concat = type, tostring, string.match, string.format, table.concat +local logwarning = report_process -if not trackers then trackers = { register = function() end } end +local function report_missing_cache(typ,lookup) + local f = missing[currentfont] if not f then f = { } missing[currentfont] = f end + local t = f[typ] if not t then t = { } f[typ] = t end + if not t[lookup] then + t[lookup] = true + logwarning("missing cache for lookup %s of type %s in font %s (%s)",lookup,typ,currentfont,tfmdata.properties.fullname) + end +end -local trace_analyzing = false trackers.register("otf.analyzing", function(v) trace_analyzing = v end) +local resolved = { } -- we only resolve a font,script,language pair once -local fonts, nodes, node = fonts, nodes, node +-- todo: pass all these 'locals' in a table -local allocate = utilities.storage.allocate +local lookuphashes = { } -local otf = fonts.handlers.otf +setmetatableindex(lookuphashes, function(t,font) + local lookuphash = fontdata[font].resources.lookuphash + if not lookuphash or not next(lookuphash) then + lookuphash = false + end + t[font] = lookuphash + return lookuphash +end) + +-- fonts.hashes.lookups = lookuphashes + +local constants = fonts.analyzers.constants + +local function initialize(sequence,script,language,enabled) + local features = sequence.features + if features then + for kind, scripts in next, features do + local valid = enabled[kind] + if valid then + local languages = scripts[script] or scripts[wildcard] + if languages and (languages[language] or languages[wildcard]) then + return { valid, constants[kind] or false, sequence.chain or 0, kind, sequence } + end + end + end + end + return false +end + +function otf.dataset(tfmdata,font) -- generic variant, overloaded in context + local shared = tfmdata.shared + local properties = tfmdata.properties + local language = properties.language or "dflt" + local script = properties.script or "dflt" + local enabled = shared.features + local res = resolved[font] + if not res then + res = { } + resolved[font] = res + end + local rs = res[script] + if not rs then + rs = { } + res[script] = rs + end + local rl = rs[language] + if not rl then + rl = { + -- indexed but we can also add specific data by key + } + rs[language] = rl + local sequences = tfmdata.resources.sequences + setmetatableindex(rl, function(t,k) + if type(k) == "number" then + local v = enabled and initialize(sequences[k],script,language,enabled) + t[k] = v + return v + end + end) + end + return rl +end + +-- elseif id == glue_code then +-- if p[5] then -- chain +-- local pc = pp[32] +-- if pc then +-- start, ok = start, false -- p[1](start,kind,p[2],pc,p[3],p[4]) +-- if ok then +-- done = true +-- end +-- if start then start = start.next end +-- else +-- start = start.next +-- end +-- else +-- start = start.next +-- end + +-- there will be a new direction parser (pre-parsed etc) + +local function featuresprocessor(head,font,attr) + + local lookuphash = lookuphashes[font] -- we can also check sequences here + + if not lookuphash then + return head, false + end + + if trace_steps then + checkstep(head) + end + + tfmdata = fontdata[font] + descriptions = tfmdata.descriptions + characters = tfmdata.characters + resources = tfmdata.resources + + marks = resources.marks + anchorlookups = resources.lookup_to_anchor + lookuptable = resources.lookups + lookuptypes = resources.lookuptypes + + currentfont = font + rlmode = 0 + + local sequences = resources.sequences + local done = false + local datasets = otf.dataset(tfmdata,font,attr) + + local dirstack = { } -- could move outside function + + -- We could work on sub start-stop ranges instead but I wonder if there is that + -- much speed gain (experiments showed that it made not much sense) and we need + -- to keep track of directions anyway. Also at some point I want to play with + -- font interactions and then we do need the full sweeps. + + -- Keeping track of the headnode is needed for devanagari (I generalized it a bit + -- so that multiple cases are also covered. + + for s=1,#sequences do + local dataset = datasets[s] + if dataset then + featurevalue = dataset[1] -- todo: pass to function instead of using a global + if featurevalue then + local sequence = sequences[s] -- also dataset[5] + local rlparmode = 0 + local topstack = 0 + local success = false + local attribute = dataset[2] + local chain = dataset[3] -- sequence.chain or 0 + local typ = sequence.type + local subtables = sequence.subtables + if chain < 0 then + -- this is a limited case, no special treatments like 'init' etc + local handler = handlers[typ] + -- we need to get rid of this slide! probably no longer needed in latest luatex + local start = find_node_tail(head) -- slow (we can store tail because there's always a skip at the end): todo + while start do + local id = start.id + if id == glyph_code then + if start.subtype<256 and start.font == font then + local a = has_attribute(start,0) + if a then + a = a == attr + else + a = true + end + if a then + for i=1,#subtables do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local lookupmatch = lookupcache[start.char] + if lookupmatch then + local headnode = start == head + start, success = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) + if success then + if headnode then + head = start + end + break + end + end + else + report_missing_cache(typ,lookupname) + end + end + if start then start = start.prev end + else + start = start.prev + end + else + start = start.prev + end + else + start = start.prev + end + end + else + local handler = handlers[typ] + local ns = #subtables + local start = head -- local ? + rlmode = 0 -- to be checked ? + if ns == 1 then -- happens often + local lookupname = subtables[1] + local lookupcache = lookuphash[lookupname] + if not lookupcache then -- also check for empty cache + report_missing_cache(typ,lookupname) + else + while start do + local id = start.id + if id == glyph_code then + if start.subtype<256 and start.font == font then + local a = has_attribute(start,0) + if a then + a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) + else + a = not attribute or has_attribute(start,state,attribute) + end + if a then + local lookupmatch = lookupcache[start.char] + if lookupmatch then + -- sequence kan weg + local headnode = start == head + local ok + start, ok = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,1) + if ok then + success = true + if headnode then + head = start + end + end + end + if start then start = start.next end + else + start = start.next + end + else + start = start.next + end + elseif id == whatsit_code then -- will be function + local subtype = start.subtype + if subtype == dir_code then + local dir = start.dir + if dir == "+TRT" or dir == "+TLT" then + topstack = topstack + 1 + dirstack[topstack] = dir + elseif dir == "-TRT" or dir == "-TLT" then + topstack = topstack - 1 + end + local newdir = dirstack[topstack] + if newdir == "+TRT" then + rlmode = -1 + elseif newdir == "+TLT" then + rlmode = 1 + else + rlmode = rlparmode + end + if trace_directions then + report_process("directions after txtdir %s: txtdir=%s:%s, parmode=%s, txtmode=%s",dir,topstack,newdir or "unset",rlparmode,rlmode) + end + elseif subtype == localpar_code then + local dir = start.dir + if dir == "TRT" then + rlparmode = -1 + elseif dir == "TLT" then + rlparmode = 1 + else + rlparmode = 0 + end + rlmode = rlparmode + if trace_directions then + report_process("directions after pardir %s: parmode=%s, txtmode=%s",dir,rlparmode,rlmode) + end + end + start = start.next + else + start = start.next + end + end + end + else + while start do + local id = start.id + if id == glyph_code then + if start.subtype<256 and start.font == font then + local a = has_attribute(start,0) + if a then + a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) + else + a = not attribute or has_attribute(start,state,attribute) + end + if a then + for i=1,ns do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local lookupmatch = lookupcache[start.char] + if lookupmatch then + -- we could move all code inline but that makes things even more unreadable + local headnode = start == head + local ok + start, ok = handler(start,dataset[4],lookupname,lookupmatch,sequence,lookuphash,i) + if ok then + success = true + if headnode then + head = start + end + break + end + end + else + report_missing_cache(typ,lookupname) + end + end + if start then start = start.next end + else + start = start.next + end + else + start = start.next + end + elseif id == whatsit_code then + local subtype = start.subtype + if subtype == dir_code then + local dir = start.dir + if dir == "+TRT" or dir == "+TLT" then + topstack = topstack + 1 + dirstack[topstack] = dir + elseif dir == "-TRT" or dir == "-TLT" then + topstack = topstack - 1 + end + local newdir = dirstack[topstack] + if newdir == "+TRT" then + rlmode = -1 + elseif newdir == "+TLT" then + rlmode = 1 + else + rlmode = rlparmode + end + if trace_directions then + report_process("directions after txtdir %s: txtdir=%s:%s, parmode=%s, txtmode=%s",dir,topstack,newdir or "unset",rlparmode,rlmode) + end + elseif subtype == localpar_code then + local dir = start.dir + if dir == "TRT" then + rlparmode = -1 + elseif dir == "TLT" then + rlparmode = 1 + else + rlparmode = 0 + end + rlmode = rlparmode + if trace_directions then + report_process("directions after pardir %s: parmode=%s, txtmode=%s",dir,rlparmode,rlmode) + end + end + start = start.next + else + start = start.next + end + end + end + end + if success then + done = true + end + if trace_steps then -- ? + registerstep(head) + end + end + end + end + return head, done +end -local analyzers = fonts.analyzers -local initializers = allocate() -local methods = allocate() +local function generic(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if target then + target[unicode] = lookupdata + else + lookuphash[lookupname] = { [unicode] = lookupdata } + end +end -analyzers.initializers = initializers -analyzers.methods = methods -analyzers.useunicodemarks = false +local action = { -local nodecodes = nodes.nodecodes -local glyph_code = nodecodes.glyph + substitution = generic, + multiple = generic, + alternate = generic, + position = generic, -local set_attribute = node.set_attribute -local has_attribute = node.has_attribute -local traverse_id = node.traverse_id -local traverse_node_list = node.traverse + ligature = function(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if not target then + target = { } + lookuphash[lookupname] = target + end + for i=1,#lookupdata do + local li = lookupdata[i] + local tu = target[li] + if not tu then + tu = { } + target[li] = tu + end + target = tu + end + target.ligature = unicode + end, -local fontdata = fonts.hashes.identifiers -local state = attributes.private('state') -local categories = characters and characters.categories or { } -- sorry, only in context + pair = function(lookupdata,lookupname,unicode,lookuphash) + local target = lookuphash[lookupname] + if not target then + target = { } + lookuphash[lookupname] = target + end + local others = target[unicode] + local paired = lookupdata[1] + if others then + others[paired] = lookupdata + else + others = { [paired] = lookupdata } + target[unicode] = others + end + end, -local otffeatures = fonts.constructors.newfeatures("otf") -local registerotffeature = otffeatures.register +} ---[[ldx-- -

Analyzers run per script and/or language and are needed in order to -process features right.

---ldx]]-- +local function prepare_lookups(tfmdata) --- todo: analyzers per script/lang, cross font, so we need an font id hash -> script --- e.g. latin -> hyphenate, arab -> 1/2/3 analyze -- its own namespace + local rawdata = tfmdata.shared.rawdata + local resources = rawdata.resources + local lookuphash = resources.lookuphash + local anchor_to_lookup = resources.anchor_to_lookup + local lookup_to_anchor = resources.lookup_to_anchor + local lookuptypes = resources.lookuptypes + local characters = tfmdata.characters + local descriptions = tfmdata.descriptions -local state = attributes.private('state') + -- we cannot free the entries in the descriptions as sometimes we access + -- then directly (for instance anchors) ... selectively freeing does save + -- much memory as it's only a reference to a table and the slot in the + -- description hash is not freed anyway -function analyzers.setstate(head,font) - local useunicodemarks = analyzers.useunicodemarks - local tfmdata = fontdata[font] - local characters = tfmdata.characters - local descriptions = tfmdata.descriptions - local first, last, current, n, done = nil, nil, head, 0, false -- maybe make n boolean - while current do - local id = current.id - if id == glyph_code and current.font == font then - local char = current.char - local d = descriptions[char] - if d then - if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then - done = true - set_attribute(current,state,5) -- mark - elseif n == 0 then - first, last, n = current, current, 1 - set_attribute(current,state,1) -- init - else - last, n = current, n+1 - set_attribute(current,state,2) -- medi + for unicode, character in next, characters do -- we cannot loop over descriptions ! + + local description = descriptions[unicode] + + if description then + + local lookups = description.slookups + if lookups then + for lookupname, lookupdata in next, lookups do + action[lookuptypes[lookupname]](lookupdata,lookupname,unicode,lookuphash) end - else -- finish - if first and first == last then - set_attribute(last,state,4) -- isol - elseif last then - set_attribute(last,state,3) -- fina + end + + local lookups = description.mlookups + if lookups then + for lookupname, lookuplist in next, lookups do + local lookuptype = lookuptypes[lookupname] + for l=1,#lookuplist do + local lookupdata = lookuplist[l] + action[lookuptype](lookupdata,lookupname,unicode,lookuphash) + end end - first, last, n = nil, nil, 0 end - elseif id == disc_code then - -- always in the middle - set_attribute(current,state,2) -- midi - last = current - else -- finish - if first and first == last then - set_attribute(last,state,4) -- isol - elseif last then - set_attribute(last,state,3) -- fina + + local list = description.kerns + if list then + for lookup, krn in next, list do -- ref to glyph, saves lookup + local target = lookuphash[lookup] + if target then + target[unicode] = krn + else + lookuphash[lookup] = { [unicode] = krn } + end + end end - first, last, n = nil, nil, 0 + + local list = description.anchors + if list then + for typ, anchors in next, list do -- types + if typ == "mark" or typ == "cexit" then -- or entry? + for name, anchor in next, anchors do + local lookups = anchor_to_lookup[name] + if lookups then + for lookup, _ in next, lookups do + local target = lookuphash[lookup] + if target then + target[unicode] = anchors + else + lookuphash[lookup] = { [unicode] = anchors } + end + end + end + end + end + end + end + end - current = current.next + end - if first and first == last then - set_attribute(last,state,4) -- isol - elseif last then - set_attribute(last,state,3) -- fina + +end + +local function split(replacement,original) + local result = { } + for i=1,#replacement do + result[original[i]] = replacement[i] end - return head, done + return result end --- in the future we will use language/script attributes instead of the --- font related value, but then we also need dynamic features which is --- somewhat slower; and .. we need a chain of them +local valid = { + coverage = { chainsub = true, chainpos = true, contextsub = true }, + reversecoverage = { reversesub = true }, + glyphs = { chainsub = true, chainpos = true }, +} -local function analyzeinitializer(tfmdata,value) -- attr - local script, language = otf.scriptandlanguage(tfmdata) -- attr - local action = initializers[script] - if not action then - -- skip - elseif type(action) == "function" then - return action(tfmdata,value) - else - local action = action[language] - if action then - return action(tfmdata,value) +local function prepare_contextchains(tfmdata) + local rawdata = tfmdata.shared.rawdata + local resources = rawdata.resources + local lookuphash = resources.lookuphash + local lookups = rawdata.lookups + if lookups then + for lookupname, lookupdata in next, rawdata.lookups do + local lookuptype = lookupdata.type + if lookuptype then + local rules = lookupdata.rules + if rules then + local format = lookupdata.format + local validformat = valid[format] + if not validformat then + report_prepare("unsupported format %s",format) + elseif not validformat[lookuptype] then + -- todo: dejavu-serif has one (but i need to see what use it has) + report_prepare("unsupported %s %s for %s",format,lookuptype,lookupname) + else + local contexts = lookuphash[lookupname] + if not contexts then + contexts = { } + lookuphash[lookupname] = contexts + end + local t, nt = { }, 0 + for nofrules=1,#rules do + local rule = rules[nofrules] + local current = rule.current + local before = rule.before + local after = rule.after + local replacements = rule.replacements + local sequence = { } + local nofsequences = 0 + -- Wventually we can store start, stop and sequence in the cached file + -- but then less sharing takes place so best not do that without a lot + -- of profiling so let's forget about it. + if before then + for n=1,#before do + nofsequences = nofsequences + 1 + sequence[nofsequences] = before[n] + end + end + local start = nofsequences + 1 + for n=1,#current do + nofsequences = nofsequences + 1 + sequence[nofsequences] = current[n] + end + local stop = nofsequences + if after then + for n=1,#after do + nofsequences = nofsequences + 1 + sequence[nofsequences] = after[n] + end + end + if sequence[1] then + -- Replacements only happen with reverse lookups as they are single only. We + -- could pack them into current (replacement value instead of true) and then + -- use sequence[start] instead but it's somewhat ugly. + nt = nt + 1 + t[nt] = { nofrules, lookuptype, sequence, start, stop, rule.lookups, replacements } + for unic, _ in next, sequence[start] do + local cu = contexts[unic] + if not cu then + contexts[unic] = t + end + end + end + end + end + else + -- no rules + end + else + report_prepare("missing lookuptype for %s",lookupname) + end end end end -local function analyzeprocessor(head,font,attr) - local tfmdata = fontdata[font] - local script, language = otf.scriptandlanguage(tfmdata,attr) - local action = methods[script] - if not action then - -- skip - elseif type(action) == "function" then - return action(head,font,attr) - else - action = action[language] - if action then - return action(head,font,attr) +-- we can consider lookuphash == false (initialized but empty) vs lookuphash == table + +local function featuresinitializer(tfmdata,value) + if true then -- value then + -- beware we need to use the topmost properties table + local rawdata = tfmdata.shared.rawdata + local properties = rawdata.properties + if not properties.initialized then + local starttime = trace_preparing and os.clock() + local resources = rawdata.resources + resources.lookuphash = resources.lookuphash or { } + prepare_contextchains(tfmdata) + prepare_lookups(tfmdata) + properties.initialized = true + if trace_preparing then + report_prepare("preparation time is %0.3f seconds for %s",os.clock()-starttime,tfmdata.properties.fullname or "?") + end end end - return head, false end registerotffeature { - name = "analyze", - description = "analysis of (for instance) character classes", + name = "features", + description = "features", default = true, initializers = { - node = analyzeinitializer, - }, - processors = { position = 1, - node = analyzeprocessor, - } -} - --- latin - -methods.latn = analyzers.setstate - --- this info eventually will go into char-def and we will have a state --- table for generic then - -local zwnj = 0x200C -local zwj = 0x200D - -local isol = { - [0x0600] = true, [0x0601] = true, [0x0602] = true, [0x0603] = true, - [0x0608] = true, [0x060B] = true, [0x0621] = true, [0x0674] = true, - [0x06DD] = true, [zwnj] = true, -} - -local isol_fina = { - [0x0622] = true, [0x0623] = true, [0x0624] = true, [0x0625] = true, - [0x0627] = true, [0x0629] = true, [0x062F] = true, [0x0630] = true, - [0x0631] = true, [0x0632] = true, [0x0648] = true, [0x0671] = true, - [0x0672] = true, [0x0673] = true, [0x0675] = true, [0x0676] = true, - [0x0677] = true, [0x0688] = true, [0x0689] = true, [0x068A] = true, - [0x068B] = true, [0x068C] = true, [0x068D] = true, [0x068E] = true, - [0x068F] = true, [0x0690] = true, [0x0691] = true, [0x0692] = true, - [0x0693] = true, [0x0694] = true, [0x0695] = true, [0x0696] = true, - [0x0697] = true, [0x0698] = true, [0x0699] = true, [0x06C0] = true, - [0x06C3] = true, [0x06C4] = true, [0x06C5] = true, [0x06C6] = true, - [0x06C7] = true, [0x06C8] = true, [0x06C9] = true, [0x06CA] = true, - [0x06CB] = true, [0x06CD] = true, [0x06CF] = true, [0x06D2] = true, - [0x06D3] = true, [0x06D5] = true, [0x06EE] = true, [0x06EF] = true, - [0x0759] = true, [0x075A] = true, [0x075B] = true, [0x076B] = true, - [0x076C] = true, [0x0771] = true, [0x0773] = true, [0x0774] = true, - [0x0778] = true, [0x0779] = true, [0xFEF5] = true, [0xFEF7] = true, - [0xFEF9] = true, [0xFEFB] = true, + node = featuresinitializer, + }, + processors = { + node = featuresprocessor, + } +} - -- syriac +-- this will change but is needed for an experiment: - [0x0710] = true, [0x0715] = true, [0x0716] = true, [0x0717] = true, - [0x0718] = true, [0x0719] = true, [0x0728] = true, [0x072A] = true, - [0x072C] = true, [0x071E] = true, -} +otf.handlers = handlers -local isol_fina_medi_init = { - [0x0626] = true, [0x0628] = true, [0x062A] = true, [0x062B] = true, - [0x062C] = true, [0x062D] = true, [0x062E] = true, [0x0633] = true, - [0x0634] = true, [0x0635] = true, [0x0636] = true, [0x0637] = true, - [0x0638] = true, [0x0639] = true, [0x063A] = true, [0x063B] = true, - [0x063C] = true, [0x063D] = true, [0x063E] = true, [0x063F] = true, - [0x0640] = true, [0x0641] = true, [0x0642] = true, [0x0643] = true, - [0x0644] = true, [0x0645] = true, [0x0646] = true, [0x0647] = true, - [0x0649] = true, [0x064A] = true, [0x066E] = true, [0x066F] = true, - [0x0678] = true, [0x0679] = true, [0x067A] = true, [0x067B] = true, - [0x067C] = true, [0x067D] = true, [0x067E] = true, [0x067F] = true, - [0x0680] = true, [0x0681] = true, [0x0682] = true, [0x0683] = true, - [0x0684] = true, [0x0685] = true, [0x0686] = true, [0x0687] = true, - [0x069A] = true, [0x069B] = true, [0x069C] = true, [0x069D] = true, - [0x069E] = true, [0x069F] = true, [0x06A0] = true, [0x06A1] = true, - [0x06A2] = true, [0x06A3] = true, [0x06A4] = true, [0x06A5] = true, - [0x06A6] = true, [0x06A7] = true, [0x06A8] = true, [0x06A9] = true, - [0x06AA] = true, [0x06AB] = true, [0x06AC] = true, [0x06AD] = true, - [0x06AE] = true, [0x06AF] = true, [0x06B0] = true, [0x06B1] = true, - [0x06B2] = true, [0x06B3] = true, [0x06B4] = true, [0x06B5] = true, - [0x06B6] = true, [0x06B7] = true, [0x06B8] = true, [0x06B9] = true, - [0x06BA] = true, [0x06BB] = true, [0x06BC] = true, [0x06BD] = true, - [0x06BE] = true, [0x06BF] = true, [0x06C1] = true, [0x06C2] = true, - [0x06CC] = true, [0x06CE] = true, [0x06D0] = true, [0x06D1] = true, - [0x06FA] = true, [0x06FB] = true, [0x06FC] = true, [0x06FF] = true, - [0x0750] = true, [0x0751] = true, [0x0752] = true, [0x0753] = true, - [0x0754] = true, [0x0755] = true, [0x0756] = true, [0x0757] = true, - [0x0758] = true, [0x075C] = true, [0x075D] = true, [0x075E] = true, - [0x075F] = true, [0x0760] = true, [0x0761] = true, [0x0762] = true, - [0x0763] = true, [0x0764] = true, [0x0765] = true, [0x0766] = true, - [0x0767] = true, [0x0768] = true, [0x0769] = true, [0x076A] = true, - [0x076D] = true, [0x076E] = true, [0x076F] = true, [0x0770] = true, - [0x0772] = true, [0x0775] = true, [0x0776] = true, [0x0777] = true, - [0x077A] = true, [0x077B] = true, [0x077C] = true, [0x077D] = true, - [0x077E] = true, [0x077F] = true, +end -- closure - -- syriac +do -- begin closure to overcome local limits and interference - [0x0712] = true, [0x0713] = true, [0x0714] = true, [0x071A] = true, - [0x071B] = true, [0x071C] = true, [0x071D] = true, [0x071F] = true, - [0x0720] = true, [0x0721] = true, [0x0722] = true, [0x0723] = true, - [0x0724] = true, [0x0725] = true, [0x0726] = true, [0x0727] = true, - [0x0729] = true, [0x072B] = true, +if not modules then modules = { } end modules ['luatex-fonts-chr'] = { + version = 1.001, + comment = "companion to luatex-fonts.lua", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} - -- also +if context then + texio.write_nl("fatal error: this module is not for context") + os.exit() +end - [zwj] = true, +characters = characters or { } +characters.categories = { + [0x0300]="mn", + [0x0301]="mn", + [0x0302]="mn", + [0x0303]="mn", + [0x0304]="mn", + [0x0305]="mn", + [0x0306]="mn", + [0x0307]="mn", + [0x0308]="mn", + [0x0309]="mn", + [0x030A]="mn", + [0x030B]="mn", + [0x030C]="mn", + [0x030D]="mn", + [0x030E]="mn", + [0x030F]="mn", + [0x0310]="mn", + [0x0311]="mn", + [0x0312]="mn", + [0x0313]="mn", + [0x0314]="mn", + [0x0315]="mn", + [0x0316]="mn", + [0x0317]="mn", + [0x0318]="mn", + [0x0319]="mn", + [0x031A]="mn", + [0x031B]="mn", + [0x031C]="mn", + [0x031D]="mn", + [0x031E]="mn", + [0x031F]="mn", + [0x0320]="mn", + [0x0321]="mn", + [0x0322]="mn", + [0x0323]="mn", + [0x0324]="mn", + [0x0325]="mn", + [0x0326]="mn", + [0x0327]="mn", + [0x0328]="mn", + [0x0329]="mn", + [0x032A]="mn", + [0x032B]="mn", + [0x032C]="mn", + [0x032D]="mn", + [0x032E]="mn", + [0x032F]="mn", + [0x0330]="mn", + [0x0331]="mn", + [0x0332]="mn", + [0x0333]="mn", + [0x0334]="mn", + [0x0335]="mn", + [0x0336]="mn", + [0x0337]="mn", + [0x0338]="mn", + [0x0339]="mn", + [0x033A]="mn", + [0x033B]="mn", + [0x033C]="mn", + [0x033D]="mn", + [0x033E]="mn", + [0x033F]="mn", + [0x0340]="mn", + [0x0341]="mn", + [0x0342]="mn", + [0x0343]="mn", + [0x0344]="mn", + [0x0345]="mn", + [0x0346]="mn", + [0x0347]="mn", + [0x0348]="mn", + [0x0349]="mn", + [0x034A]="mn", + [0x034B]="mn", + [0x034C]="mn", + [0x034D]="mn", + [0x034E]="mn", + [0x034F]="mn", + [0x0350]="mn", + [0x0351]="mn", + [0x0352]="mn", + [0x0353]="mn", + [0x0354]="mn", + [0x0355]="mn", + [0x0356]="mn", + [0x0357]="mn", + [0x0358]="mn", + [0x0359]="mn", + [0x035A]="mn", + [0x035B]="mn", + [0x035C]="mn", + [0x035D]="mn", + [0x035E]="mn", + [0x035F]="mn", + [0x0360]="mn", + [0x0361]="mn", + [0x0362]="mn", + [0x0363]="mn", + [0x0364]="mn", + [0x0365]="mn", + [0x0366]="mn", + [0x0367]="mn", + [0x0368]="mn", + [0x0369]="mn", + [0x036A]="mn", + [0x036B]="mn", + [0x036C]="mn", + [0x036D]="mn", + [0x036E]="mn", + [0x036F]="mn", + [0x0483]="mn", + [0x0484]="mn", + [0x0485]="mn", + [0x0486]="mn", + [0x0591]="mn", + [0x0592]="mn", + [0x0593]="mn", + [0x0594]="mn", + [0x0595]="mn", + [0x0596]="mn", + [0x0597]="mn", + [0x0598]="mn", + [0x0599]="mn", + [0x059A]="mn", + [0x059B]="mn", + [0x059C]="mn", + [0x059D]="mn", + [0x059E]="mn", + [0x059F]="mn", + [0x05A0]="mn", + [0x05A1]="mn", + [0x05A2]="mn", + [0x05A3]="mn", + [0x05A4]="mn", + [0x05A5]="mn", + [0x05A6]="mn", + [0x05A7]="mn", + [0x05A8]="mn", + [0x05A9]="mn", + [0x05AA]="mn", + [0x05AB]="mn", + [0x05AC]="mn", + [0x05AD]="mn", + [0x05AE]="mn", + [0x05AF]="mn", + [0x05B0]="mn", + [0x05B1]="mn", + [0x05B2]="mn", + [0x05B3]="mn", + [0x05B4]="mn", + [0x05B5]="mn", + [0x05B6]="mn", + [0x05B7]="mn", + [0x05B8]="mn", + [0x05B9]="mn", + [0x05BA]="mn", + [0x05BB]="mn", + [0x05BC]="mn", + [0x05BD]="mn", + [0x05BF]="mn", + [0x05C1]="mn", + [0x05C2]="mn", + [0x05C4]="mn", + [0x05C5]="mn", + [0x05C7]="mn", + [0x0610]="mn", + [0x0611]="mn", + [0x0612]="mn", + [0x0613]="mn", + [0x0614]="mn", + [0x0615]="mn", + [0x064B]="mn", + [0x064C]="mn", + [0x064D]="mn", + [0x064E]="mn", + [0x064F]="mn", + [0x0650]="mn", + [0x0651]="mn", + [0x0652]="mn", + [0x0653]="mn", + [0x0654]="mn", + [0x0655]="mn", + [0x0656]="mn", + [0x0657]="mn", + [0x0658]="mn", + [0x0659]="mn", + [0x065A]="mn", + [0x065B]="mn", + [0x065C]="mn", + [0x065D]="mn", + [0x065E]="mn", + [0x0670]="mn", + [0x06D6]="mn", + [0x06D7]="mn", + [0x06D8]="mn", + [0x06D9]="mn", + [0x06DA]="mn", + [0x06DB]="mn", + [0x06DC]="mn", + [0x06DF]="mn", + [0x06E0]="mn", + [0x06E1]="mn", + [0x06E2]="mn", + [0x06E3]="mn", + [0x06E4]="mn", + [0x06E7]="mn", + [0x06E8]="mn", + [0x06EA]="mn", + [0x06EB]="mn", + [0x06EC]="mn", + [0x06ED]="mn", + [0x0711]="mn", + [0x0730]="mn", + [0x0731]="mn", + [0x0732]="mn", + [0x0733]="mn", + [0x0734]="mn", + [0x0735]="mn", + [0x0736]="mn", + [0x0737]="mn", + [0x0738]="mn", + [0x0739]="mn", + [0x073A]="mn", + [0x073B]="mn", + [0x073C]="mn", + [0x073D]="mn", + [0x073E]="mn", + [0x073F]="mn", + [0x0740]="mn", + [0x0741]="mn", + [0x0742]="mn", + [0x0743]="mn", + [0x0744]="mn", + [0x0745]="mn", + [0x0746]="mn", + [0x0747]="mn", + [0x0748]="mn", + [0x0749]="mn", + [0x074A]="mn", + [0x07A6]="mn", + [0x07A7]="mn", + [0x07A8]="mn", + [0x07A9]="mn", + [0x07AA]="mn", + [0x07AB]="mn", + [0x07AC]="mn", + [0x07AD]="mn", + [0x07AE]="mn", + [0x07AF]="mn", + [0x07B0]="mn", + [0x07EB]="mn", + [0x07EC]="mn", + [0x07ED]="mn", + [0x07EE]="mn", + [0x07EF]="mn", + [0x07F0]="mn", + [0x07F1]="mn", + [0x07F2]="mn", + [0x07F3]="mn", + [0x0901]="mn", + [0x0902]="mn", + [0x093C]="mn", + [0x0941]="mn", + [0x0942]="mn", + [0x0943]="mn", + [0x0944]="mn", + [0x0945]="mn", + [0x0946]="mn", + [0x0947]="mn", + [0x0948]="mn", + [0x094D]="mn", + [0x0951]="mn", + [0x0952]="mn", + [0x0953]="mn", + [0x0954]="mn", + [0x0962]="mn", + [0x0963]="mn", + [0x0981]="mn", + [0x09BC]="mn", + [0x09C1]="mn", + [0x09C2]="mn", + [0x09C3]="mn", + [0x09C4]="mn", + [0x09CD]="mn", + [0x09E2]="mn", + [0x09E3]="mn", + [0x0A01]="mn", + [0x0A02]="mn", + [0x0A3C]="mn", + [0x0A41]="mn", + [0x0A42]="mn", + [0x0A47]="mn", + [0x0A48]="mn", + [0x0A4B]="mn", + [0x0A4C]="mn", + [0x0A4D]="mn", + [0x0A70]="mn", + [0x0A71]="mn", + [0x0A81]="mn", + [0x0A82]="mn", + [0x0ABC]="mn", + [0x0AC1]="mn", + [0x0AC2]="mn", + [0x0AC3]="mn", + [0x0AC4]="mn", + [0x0AC5]="mn", + [0x0AC7]="mn", + [0x0AC8]="mn", + [0x0ACD]="mn", + [0x0AE2]="mn", + [0x0AE3]="mn", + [0x0B01]="mn", + [0x0B3C]="mn", + [0x0B3F]="mn", + [0x0B41]="mn", + [0x0B42]="mn", + [0x0B43]="mn", + [0x0B4D]="mn", + [0x0B56]="mn", + [0x0B82]="mn", + [0x0BC0]="mn", + [0x0BCD]="mn", + [0x0C3E]="mn", + [0x0C3F]="mn", + [0x0C40]="mn", + [0x0C46]="mn", + [0x0C47]="mn", + [0x0C48]="mn", + [0x0C4A]="mn", + [0x0C4B]="mn", + [0x0C4C]="mn", + [0x0C4D]="mn", + [0x0C55]="mn", + [0x0C56]="mn", + [0x0CBC]="mn", + [0x0CBF]="mn", + [0x0CC6]="mn", + [0x0CCC]="mn", + [0x0CCD]="mn", + [0x0CE2]="mn", + [0x0CE3]="mn", + [0x0D41]="mn", + [0x0D42]="mn", + [0x0D43]="mn", + [0x0D4D]="mn", + [0x0DCA]="mn", + [0x0DD2]="mn", + [0x0DD3]="mn", + [0x0DD4]="mn", + [0x0DD6]="mn", + [0x0E31]="mn", + [0x0E34]="mn", + [0x0E35]="mn", + [0x0E36]="mn", + [0x0E37]="mn", + [0x0E38]="mn", + [0x0E39]="mn", + [0x0E3A]="mn", + [0x0E47]="mn", + [0x0E48]="mn", + [0x0E49]="mn", + [0x0E4A]="mn", + [0x0E4B]="mn", + [0x0E4C]="mn", + [0x0E4D]="mn", + [0x0E4E]="mn", + [0x0EB1]="mn", + [0x0EB4]="mn", + [0x0EB5]="mn", + [0x0EB6]="mn", + [0x0EB7]="mn", + [0x0EB8]="mn", + [0x0EB9]="mn", + [0x0EBB]="mn", + [0x0EBC]="mn", + [0x0EC8]="mn", + [0x0EC9]="mn", + [0x0ECA]="mn", + [0x0ECB]="mn", + [0x0ECC]="mn", + [0x0ECD]="mn", + [0x0F18]="mn", + [0x0F19]="mn", + [0x0F35]="mn", + [0x0F37]="mn", + [0x0F39]="mn", + [0x0F71]="mn", + [0x0F72]="mn", + [0x0F73]="mn", + [0x0F74]="mn", + [0x0F75]="mn", + [0x0F76]="mn", + [0x0F77]="mn", + [0x0F78]="mn", + [0x0F79]="mn", + [0x0F7A]="mn", + [0x0F7B]="mn", + [0x0F7C]="mn", + [0x0F7D]="mn", + [0x0F7E]="mn", + [0x0F80]="mn", + [0x0F81]="mn", + [0x0F82]="mn", + [0x0F83]="mn", + [0x0F84]="mn", + [0x0F86]="mn", + [0x0F87]="mn", + [0x0F90]="mn", + [0x0F91]="mn", + [0x0F92]="mn", + [0x0F93]="mn", + [0x0F94]="mn", + [0x0F95]="mn", + [0x0F96]="mn", + [0x0F97]="mn", + [0x0F99]="mn", + [0x0F9A]="mn", + [0x0F9B]="mn", + [0x0F9C]="mn", + [0x0F9D]="mn", + [0x0F9E]="mn", + [0x0F9F]="mn", + [0x0FA0]="mn", + [0x0FA1]="mn", + [0x0FA2]="mn", + [0x0FA3]="mn", + [0x0FA4]="mn", + [0x0FA5]="mn", + [0x0FA6]="mn", + [0x0FA7]="mn", + [0x0FA8]="mn", + [0x0FA9]="mn", + [0x0FAA]="mn", + [0x0FAB]="mn", + [0x0FAC]="mn", + [0x0FAD]="mn", + [0x0FAE]="mn", + [0x0FAF]="mn", + [0x0FB0]="mn", + [0x0FB1]="mn", + [0x0FB2]="mn", + [0x0FB3]="mn", + [0x0FB4]="mn", + [0x0FB5]="mn", + [0x0FB6]="mn", + [0x0FB7]="mn", + [0x0FB8]="mn", + [0x0FB9]="mn", + [0x0FBA]="mn", + [0x0FBB]="mn", + [0x0FBC]="mn", + [0x0FC6]="mn", + [0x102D]="mn", + [0x102E]="mn", + [0x102F]="mn", + [0x1030]="mn", + [0x1032]="mn", + [0x1036]="mn", + [0x1037]="mn", + [0x1039]="mn", + [0x1058]="mn", + [0x1059]="mn", + [0x135F]="mn", + [0x1712]="mn", + [0x1713]="mn", + [0x1714]="mn", + [0x1732]="mn", + [0x1733]="mn", + [0x1734]="mn", + [0x1752]="mn", + [0x1753]="mn", + [0x1772]="mn", + [0x1773]="mn", + [0x17B7]="mn", + [0x17B8]="mn", + [0x17B9]="mn", + [0x17BA]="mn", + [0x17BB]="mn", + [0x17BC]="mn", + [0x17BD]="mn", + [0x17C6]="mn", + [0x17C9]="mn", + [0x17CA]="mn", + [0x17CB]="mn", + [0x17CC]="mn", + [0x17CD]="mn", + [0x17CE]="mn", + [0x17CF]="mn", + [0x17D0]="mn", + [0x17D1]="mn", + [0x17D2]="mn", + [0x17D3]="mn", + [0x17DD]="mn", + [0x180B]="mn", + [0x180C]="mn", + [0x180D]="mn", + [0x18A9]="mn", + [0x1920]="mn", + [0x1921]="mn", + [0x1922]="mn", + [0x1927]="mn", + [0x1928]="mn", + [0x1932]="mn", + [0x1939]="mn", + [0x193A]="mn", + [0x193B]="mn", + [0x1A17]="mn", + [0x1A18]="mn", + [0x1B00]="mn", + [0x1B01]="mn", + [0x1B02]="mn", + [0x1B03]="mn", + [0x1B34]="mn", + [0x1B36]="mn", + [0x1B37]="mn", + [0x1B38]="mn", + [0x1B39]="mn", + [0x1B3A]="mn", + [0x1B3C]="mn", + [0x1B42]="mn", + [0x1B6B]="mn", + [0x1B6C]="mn", + [0x1B6D]="mn", + [0x1B6E]="mn", + [0x1B6F]="mn", + [0x1B70]="mn", + [0x1B71]="mn", + [0x1B72]="mn", + [0x1B73]="mn", + [0x1DC0]="mn", + [0x1DC1]="mn", + [0x1DC2]="mn", + [0x1DC3]="mn", + [0x1DC4]="mn", + [0x1DC5]="mn", + [0x1DC6]="mn", + [0x1DC7]="mn", + [0x1DC8]="mn", + [0x1DC9]="mn", + [0x1DCA]="mn", + [0x1DFE]="mn", + [0x1DFF]="mn", + [0x20D0]="mn", + [0x20D1]="mn", + [0x20D2]="mn", + [0x20D3]="mn", + [0x20D4]="mn", + [0x20D5]="mn", + [0x20D6]="mn", + [0x20D7]="mn", + [0x20D8]="mn", + [0x20D9]="mn", + [0x20DA]="mn", + [0x20DB]="mn", + [0x20DC]="mn", + [0x20E1]="mn", + [0x20E5]="mn", + [0x20E6]="mn", + [0x20E7]="mn", + [0x20E8]="mn", + [0x20E9]="mn", + [0x20EA]="mn", + [0x20EB]="mn", + [0x20EC]="mn", + [0x20ED]="mn", + [0x20EE]="mn", + [0x20EF]="mn", + [0x302A]="mn", + [0x302B]="mn", + [0x302C]="mn", + [0x302D]="mn", + [0x302E]="mn", + [0x302F]="mn", + [0x3099]="mn", + [0x309A]="mn", + [0xA806]="mn", + [0xA80B]="mn", + [0xA825]="mn", + [0xA826]="mn", + [0xFB1E]="mn", + [0xFE00]="mn", + [0xFE01]="mn", + [0xFE02]="mn", + [0xFE03]="mn", + [0xFE04]="mn", + [0xFE05]="mn", + [0xFE06]="mn", + [0xFE07]="mn", + [0xFE08]="mn", + [0xFE09]="mn", + [0xFE0A]="mn", + [0xFE0B]="mn", + [0xFE0C]="mn", + [0xFE0D]="mn", + [0xFE0E]="mn", + [0xFE0F]="mn", + [0xFE20]="mn", + [0xFE21]="mn", + [0xFE22]="mn", + [0xFE23]="mn", } -local arab_warned = { } - --- todo: gref - -local function warning(current,what) - local char = current.char - if not arab_warned[char] then - log.report("analyze","arab: character %s (U+%05X) has no %s class", char, char, what) - arab_warned[char] = true - end -end - -local function finish(first,last) - if last then - if first == last then - local fc = first.char - if isol_fina_medi_init[fc] or isol_fina[fc] then - set_attribute(first,state,4) -- isol - else - warning(first,"isol") - set_attribute(first,state,0) -- error - end - else - local lc = last.char - if isol_fina_medi_init[lc] or isol_fina[lc] then -- why isol here ? - -- if laststate == 1 or laststate == 2 or laststate == 4 then - set_attribute(last,state,3) -- fina - else - warning(last,"fina") - set_attribute(last,state,0) -- error - end - end - first, last = nil, nil - elseif first then - -- first and last are either both set so we never com here - local fc = first.char - if isol_fina_medi_init[fc] or isol_fina[fc] then - set_attribute(first,state,4) -- isol - else - warning(first,"isol") - set_attribute(first,state,0) -- error - end - first = nil - end - return first, last -end - -function methods.arab(head,font,attr) -- maybe make a special version with no trace - local useunicodemarks = analyzers.useunicodemarks - local tfmdata = fontdata[font] - local marks = tfmdata.resources.marks - local first, last, current, done = nil, nil, head, false - while current do - if current.id == glyph_code and current.subtype<256 and current.font == font and not has_attribute(current,state) then - done = true - local char = current.char - if marks[char] or (useunicodemarks and categories[char] == "mn") then - set_attribute(current,state,5) -- mark - elseif isol[char] then -- can be zwj or zwnj too - first, last = finish(first,last) - set_attribute(current,state,4) -- isol - first, last = nil, nil - elseif not first then - if isol_fina_medi_init[char] then - set_attribute(current,state,1) -- init - first, last = first or current, current - elseif isol_fina[char] then - set_attribute(current,state,4) -- isol - first, last = nil, nil - else -- no arab - first, last = finish(first,last) - end - elseif isol_fina_medi_init[char] then - first, last = first or current, current - set_attribute(current,state,2) -- medi - elseif isol_fina[char] then - if not has_attribute(last,state,1) then - -- tricky, we need to check what last may be ! - set_attribute(last,state,2) -- medi - end - set_attribute(current,state,3) -- fina - first, last = nil, nil - elseif char >= 0x0600 and char <= 0x06FF then - set_attribute(current,state,6) -- rest - first, last = finish(first,last) - else --no - first, last = finish(first,last) - end - else - first, last = finish(first,last) - end - current = current.next - end - first, last = finish(first,last) - return head, done -end - -methods.syrc = methods.arab - -directives.register("otf.analyze.useunicodemarks",function(v) - analyzers.useunicodemarks = v -end) - end -- closure do -- begin closure to overcome local limits and interference diff --git a/tex/generic/context/luatex/luatex-fonts.lua b/tex/generic/context/luatex/luatex-fonts.lua index f5045a4e3..535519db7 100644 --- a/tex/generic/context/luatex/luatex-fonts.lua +++ b/tex/generic/context/luatex/luatex-fonts.lua @@ -176,9 +176,9 @@ else loadmodule('font-otf.lua') loadmodule('font-otb.lua') loadmodule('node-inj.lua') -- will be replaced (luatex >= .70) + loadmodule('font-ota.lua') loadmodule('font-otn.lua') -- loadmodule('luatex-fonts-chr.lua') - loadmodule('font-ota.lua') loadmodule('luatex-fonts-lua.lua') loadmodule('font-def.lua') loadmodule('luatex-fonts-def.lua') -- cgit v1.2.3