diff options
author | Hans Hagen <pragma@wxs.nl> | 2013-01-17 18:16:00 +0100 |
---|---|---|
committer | Hans Hagen <pragma@wxs.nl> | 2013-01-17 18:16:00 +0100 |
commit | 9c266934ab4fde7d71d13c607ac5e5e13bedf1ff (patch) | |
tree | c4bfef6ef5e6ea9c41c52b90846f175fa91ce9f3 /tex | |
parent | de678e954438110850d93c5f35a0d6bf14129901 (diff) | |
download | context-9c266934ab4fde7d71d13c607ac5e5e13bedf1ff.tar.gz |
beta 2013.01.17 18:16
Diffstat (limited to 'tex')
28 files changed, 3222 insertions, 278 deletions
diff --git a/tex/context/base/bibl-tra.mkiv b/tex/context/base/bibl-tra.mkiv index 914470fbe..3eb885eef 100644 --- a/tex/context/base/bibl-tra.mkiv +++ b/tex/context/base/bibl-tra.mkiv @@ -1475,14 +1475,24 @@ \bibresetrefsep \processcommalist[#1]\dobibauthornumref} +% \def\dobibauthornumref#1% +% {\bibinsertrefsep +% \doifbibreferencefoundelse{#1} +% {\begingroup +% \bibgetvara{#1}% +% \bibalternative\c!inbetween +% \setuppublications[\c!refcommand=num]% +% \cite[#1]% +% \endgroup} +% {}} + \def\dobibauthornumref#1% {\bibinsertrefsep \doifbibreferencefoundelse{#1} {\begingroup - \bibgetvara{#1}% + \cite[\c!left=,\c!right=,\c!alternative=\v!author][#1]% \bibalternative\c!inbetween - \setuppublications[\c!refcommand=num]% - \cite[#1]% + \cite[num][#1]% \endgroup} {}} diff --git a/tex/context/base/cont-new.mkii b/tex/context/base/cont-new.mkii index 3ce67992e..e1d8f6f91 100644 --- a/tex/context/base/cont-new.mkii +++ b/tex/context/base/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2013.01.13 23:10} +\newcontextversion{2013.01.17 18:16} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 18153995f..6339c3793 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2013.01.13 23:10} +\newcontextversion{2013.01.17 18:16} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf Binary files differindex 43f4790bd..66ce96cb4 100644 --- a/tex/context/base/context-version.pdf +++ b/tex/context/base/context-version.pdf diff --git a/tex/context/base/context-version.png b/tex/context/base/context-version.png Binary files differindex 3157fb849..4d1a73000 100644 --- a/tex/context/base/context-version.png +++ b/tex/context/base/context-version.png diff --git a/tex/context/base/context.mkii b/tex/context/base/context.mkii index 6abbe0653..e0c952341 100644 --- a/tex/context/base/context.mkii +++ b/tex/context/base/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2013.01.13 23:10} +\edef\contextversion{2013.01.17 18:16} %D For those who want to use this: diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index ded53e1cf..ab199bceb 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -25,7 +25,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2013.01.13 23:10} +\edef\contextversion{2013.01.17 18:16} %D For those who want to use this: diff --git a/tex/context/base/font-odv.lua b/tex/context/base/font-odv.lua new file mode 100644 index 000000000..eb8f7a9da --- /dev/null +++ b/tex/context/base/font-odv.lua @@ -0,0 +1,3074 @@ +if not modules then modules = { } end modules ['font-odv'] = { + version = 1.001, + comment = "companion to font-ini.mkiv", + author = "Kai Eigner, TAT Zetwerk / Hans Hagen, PRAGMA ADE", + copyright = "TAT Zetwerk / PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- Kai: we're leaking nodes (happens when assigning start nodes behind start, also +-- in the original code) so this needs to be sorted out. As I touched nearly all code, +-- reshuffled, etc. etc. (imagine how much can get messed up in nearly a week work) it +-- could be that I introduced bugs. There is more to gain (esp in the functions applied +-- to a range) but I'll do that when everything works as expected. + +-- A few remarks: +-- +-- This code is a partial rewrite of the code that deals with devanagari. The data and logic +-- is by Kai Eigner and based based on Microsoft's OpenType specifications for specific +-- scripts, but with a few improvements. More information can be found at: +-- +-- deva: http://www.microsoft.com/typography/OpenType%20Dev/devanagari/introO.mspx +-- dev2: http://www.microsoft.com/typography/OpenType%20Dev/devanagari/intro.mspx +-- +-- Interesting is that Kai managed to write this on top of the existing otf handler. Only a +-- few extensions were needed, like a few more analyzing states and dealing with changed +-- head nodes in the core scanner as that only happens here. There's a lot going on here +-- and it's only because I touched nearly all code that I got a bit of a picture of what +-- happens. For in-depth knowledge one needs to consult Kai. +-- +-- The rewrite mostly deals with efficiency, both in terms of speed and code. We also made +-- sure that it suits generic use as well as use in ConTeXt. I removed some buglets but can +-- as well have messed up the logic by doing this. For this we keep the original around +-- as that serves as reference. I kept the comments but added a few more. Due to the lots +-- of reshuffling glyphs quite some leaks occur(red) but once I'm satisfied with the rewrite +-- I'll weed them. I also integrated initialization etc into the regular mechanisms. +-- +-- In the meantime, we're down from 25.5-3.5=22 seconds to 17.7-3.5=14.2 seconds for a 100 +-- page sample with both variants so it's worth the effort. Due to the method chosen it will +-- never be real fast. If I ever become a power user I'll have a go at some further speed +-- up. I will rename some functions (and features) once we don't need to check the original +-- code. We now use a special subset sequence for use inside the analyzer (after all we could +-- can store this in the dataset and save redundant analysis). +-- +-- I might go for an array approach with respect to attributes (and reshuffling). Easier. +-- +-- Hans Hagen, PRAGMA-ADE, Hasselt NL + +-- Matras: according to Microsoft typography specifications "up to one of each type: +-- pre-, above-, below- or post- base", but that does not seem to be right. It could +-- become an option. +-- +-- The next code looks weird anyway: the "and boolean" should move inside the if +-- or we should check differently (case vs successive). +-- +-- local function ms_matra(c) +-- local prebase, abovebase, belowbase, postbase = true, true, true, true +-- local n = c.next +-- while n and n.id == glyph_code and n.subtype<256 and n.font == font do +-- local char = n.char +-- if not dependent_vowel[char] then +-- break +-- elseif pre_mark[char] and prebase then +-- prebase = false +-- elseif above_mark[char] and abovebase then +-- abovebase = false +-- elseif below_mark[char] and belowbase then +-- belowbase = false +-- elseif post_mark[char] and postbase then +-- postbase = false +-- else +-- return c +-- end +-- c = c.next +-- end +-- return c +-- end + +-- todo: first test for font then for subtype + +local insert, imerge = table.insert, table.imerge +local next = next + +local trace_analyzing = false trackers.register("otf.analyzing", function(v) trace_analyzing = v end) +local report_devanagari = logs.reporter("otf","devanagari") + +fonts = fonts or { } +fonts.analyzers = fonts.analyzers or { } +fonts.analyzers.methods = fonts.analyzers.methods or { node = { otf = { } } } + +local otf = fonts.handlers.otf + +local nodecodes = nodes.nodecodes +local glyph_code = nodecodes.glyph + +local handlers = otf.handlers +local methods = fonts.analyzers.methods + +local otffeatures = fonts.constructors.newfeatures("otf") +local registerotffeature = otffeatures.register + +local processcharacters = nodes.handlers.characters + +local set_attribute = node.set_attribute +local unset_attribute = node.unset_attribute +local has_attribute = node.has_attribute +local insert_node_after = node.insert_after +local copy_node = node.copy +local free_node = node.free +local remove_node = node.remove +local flush_list = node.flush_list + +local fontdata = fonts.hashes.identifiers + +local a_state = attributes.private('state') +local a_syllabe = attributes.private('syllabe') + +local dotted_circle = 0x25CC + +-- In due time there will be entries here for scripts like Bengali, Gujarati, +-- Gurmukhi, Kannada, Malayalam, Oriya, Tamil, Telugu. Feel free to provide the +-- code points. + +local consonant = { + [0x0915] = true, [0x0916] = true, [0x0917] = true, [0x0918] = true, + [0x0919] = true, [0x091A] = true, [0x091B] = true, [0x091C] = true, + [0x091D] = true, [0x091E] = true, [0x091F] = true, [0x0920] = true, + [0x0921] = true, [0x0922] = true, [0x0923] = true, [0x0924] = true, + [0x0925] = true, [0x0926] = true, [0x0927] = true, [0x0928] = true, + [0x0929] = true, [0x092A] = true, [0x092B] = true, [0x092C] = true, + [0x092D] = true, [0x092E] = true, [0x092F] = true, [0x0930] = true, + [0x0931] = true, [0x0932] = true, [0x0933] = true, [0x0934] = true, + [0x0935] = true, [0x0936] = true, [0x0937] = true, [0x0938] = true, + [0x0939] = true, [0x0958] = true, [0x0959] = true, [0x095A] = true, + [0x095B] = true, [0x095C] = true, [0x095D] = true, [0x095E] = true, + [0x095F] = true, [0x0979] = true, [0x097A] = true, +} + +local independent_vowel = { + [0x0904] = true, [0x0905] = true, [0x0906] = true, [0x0907] = true, + [0x0908] = true, [0x0909] = true, [0x090A] = true, [0x090B] = true, + [0x090C] = true, [0x090D] = true, [0x090E] = true, [0x090F] = true, + [0x0910] = true, [0x0911] = true, [0x0912] = true, [0x0913] = true, + [0x0914] = true, [0x0960] = true, [0x0961] = true, [0x0972] = true, + [0x0973] = true, [0x0974] = true, [0x0975] = true, [0x0976] = true, + [0x0977] = true, +} + +local dependent_vowel = { -- matra + [0x093A] = true, [0x093B] = true, [0x093E] = true, [0x093F] = true, + [0x0940] = true, [0x0941] = true, [0x0942] = true, [0x0943] = true, + [0x0944] = true, [0x0945] = true, [0x0946] = true, [0x0947] = true, + [0x0948] = true, [0x0949] = true, [0x094A] = true, [0x094B] = true, + [0x094C] = true, [0x094E] = true, [0x094F] = true, [0x0955] = true, + [0x0956] = true, [0x0957] = true, [0x0962] = true, [0x0963] = true, +} + +local vowel_modifier = { + [0x0900] = true, [0x0901] = true, [0x0902] = true, [0x0903] = true, + -- A8E0 - A8F1 are cantillation marks for the Samaveda and may not belong here. + [0xA8E0] = true, [0xA8E1] = true, [0xA8E2] = true, [0xA8E3] = true, + [0xA8E4] = true, [0xA8E5] = true, [0xA8E6] = true, [0xA8E7] = true, + [0xA8E8] = true, [0xA8E9] = true, [0xA8EA] = true, [0xA8EB] = true, + [0xA8EC] = true, [0xA8ED] = true, [0xA8EE] = true, [0xA8EF] = true, + [0xA8F0] = true, [0xA8F1] = true, +} + +local stress_tone_mark = { + [0x0951] = true, [0x0952] = true, [0x0953] = true, [0x0954] = true, +} + +local nukta = { + [0x093C] = true, +} + +local halant = { + [0x094D] = true, +} + +local ra = { + [0x0930] = true, +} + +local anudatta = { + [0x0952] = true, +} + +local nbsp = { -- might become a constant instead of table + [0x00A0] = true, +} + +local zwnj = { -- might become a constant instead of table + [0x200C] = true, +} + +local zwj = { -- might become a constant instead of table + [0x200D] = true, +} + +local zw_char = { + [0x200C] = true, + [0x200D] = true, +} + +local pre_mark = { + [0x093F] = true, [0x094E] = true, +} + +local above_mark = { + [0x0900] = true, [0x0901] = true, [0x0902] = true, [0x093A] = true, + [0x0945] = true, [0x0946] = true, [0x0947] = true, [0x0948] = true, + [0x0951] = true, [0x0953] = true, [0x0954] = true, [0x0955] = true, + [0xA8E0] = true, [0xA8E1] = true, [0xA8E2] = true, [0xA8E3] = true, + [0xA8E4] = true, [0xA8E5] = true, [0xA8E6] = true, [0xA8E7] = true, + [0xA8E8] = true, [0xA8E9] = true, [0xA8EA] = true, [0xA8EB] = true, + [0xA8EC] = true, [0xA8ED] = true, [0xA8EE] = true, [0xA8EF] = true, + [0xA8F0] = true, [0xA8F1] = true, +} + +local below_mark = { + [0x093C] = true, [0x0941] = true, [0x0942] = true, [0x0943] = true, + [0x0944] = true, [0x094D] = true, [0x0952] = true, [0x0956] = true, + [0x0957] = true, [0x0962] = true, [0x0963] = true, +} + +local post_mark = { + [0x0903] = true, [0x093B] = true, [0x093E] = true, [0x0940] = true, + [0x0949] = true, [0x094A] = true, [0x094B] = true, [0x094C] = true, + [0x094F] = true, +} + +local mark_four = { } -- As we access these frequently an extra hash is used. + +for k, v in next, pre_mark do mark_four[k] = pre_mark end +for k, v in next, above_mark do mark_four[k] = above_mark end +for k, v in next, below_mark do mark_four[k] = below_mark end +for k, v in next, post_mark do mark_four[k] = post_mark end + +local mark_above_below_post = { } + +for k, v in next, above_mark do mark_above_below_post[k] = above_mark end +for k, v in next, below_mark do mark_above_below_post[k] = below_mark end +for k, v in next, post_mark do mark_above_below_post[k] = post_mark end + +-- Again, this table can be extended for other scripts than devanagari. Actually, +-- for ConTeXt this kind of dat is kept elsewhere so eventually we might move +-- tables to someplace else. + +local reorder_class = { + [0x0930] = "before postscript", + [0x093F] = "before half", + [0x0940] = "after subscript", + [0x0941] = "after subscript", + [0x0942] = "after subscript", + [0x0943] = "after subscript", + [0x0944] = "after subscript", + [0x0945] = "after subscript", + [0x0946] = "after subscript", + [0x0947] = "after subscript", + [0x0948] = "after subscript", + [0x0949] = "after subscript", + [0x094A] = "after subscript", + [0x094B] = "after subscript", + [0x094C] = "after subscript", + [0x0962] = "after subscript", + [0x0963] = "after subscript", + [0x093E] = "after subscript", +} + +-- We use some pseudo features as we need to manipulate the nodelist based +-- on information in the font as well as already applied features. + +local dflt_true = { + dflt = true +} + +local dev2_defaults = { + dev2 = dflt_true, +} + +local deva_defaults = { + dev2 = dflt_true, + deva = dflt_true, +} + +local false_flags = { false, false, false, false } + +local both_joiners_true = { [0x200C] = true, [0x200D] = true } + +local sequence_reorder_matras = { + chain = 0, + features = { dv01 = dev2_defaults }, + flags = false_flags, + name = "dv01_reorder_matras", + subtables = { "dv01_reorder_matras" }, + type = "devanagari_reorder_matras", +} + +local sequence_reorder_reph = { + chain = 0, + features = { dv02 = dev2_defaults }, + flags = false_flags, + name = "dv02_reorder_reph", + subtables = { "dv02_reorder_reph" }, + type = "devanagari_reorder_reph", +} + +local sequence_reorder_pre_base_reordering_consonants = { + chain = 0, + features = { dv03 = dev2_defaults }, + flags = false_flags, + name = "dv03_reorder_pre_base_reordering_consonants", + subtables = { "dv03_reorder_pre_base_reordering_consonants" }, + type = "devanagari_reorder_pre_base_reordering_consonants", +} + +local sequence_remove_joiners = { + chain = 0, + features = { dv04 = deva_defaults }, + flags = false_flags, + name = "dv04_remove_joiners", + subtables = { "dv04_remove_joiners" }, + type = "devanagari_remove_joiners", +} + +-- Looping over feature twice as efficient as looping over basic forms (some +-- 350 checks instead of 750 for one font). This is something to keep an eye on +-- as it might depends on the font. Not that it's a bottleneck. + +local basic_shaping_forms = { + nukt = true, + akhn = true, + rphf = true, + pref = true, + rkrf = true, + blwf = true, + half = true, + pstf = true, + vatu = true, + cjct = true, +} + +local function initializedevanagi(tfmdata) + local script, language = otf.scriptandlanguage(tfmdata,attr) -- take fast variant + if script == "deva" or script == "dev2" then + local resources = tfmdata.resources + local lookuphash = resources.lookuphash + if not lookuphash["dv01"] then + report_devanagari("adding devanagari features to font") + -- + local features = resources.features + local gsubfeatures = features.gsub + local sequences = resources.sequences + local sharedfeatures = tfmdata.shared.features + -- + local lastmatch = 0 + for s=1,#sequences do -- classify chars + local features = sequences[s].features + if features then + for k, v in next, features do + if basic_shaping_forms[k] then + lastmatch = s + end + end + end + end + local insertindex = lastmatch + 1 + -- + lookuphash["dv04_remove_joiners"] = both_joiners_true + -- + gsubfeatures["dv01"] = dev2_defaults -- reorder matras + gsubfeatures["dv02"] = dev2_defaults -- reorder reph + gsubfeatures["dv03"] = dev2_defaults -- reorder pre base reordering consonants + gsubfeatures["dv04"] = deva_defaults -- remove joiners + -- + insert(sequences,insertindex,sequence_reorder_pre_base_reordering_consonants) + insert(sequences,insertindex,sequence_reorder_reph) + insert(sequences,insertindex,sequence_reorder_matras) + insert(sequences,insertindex,sequence_remove_joiners) + -- + if script == "deva" then + sharedfeatures["dv04"] = true -- dv04_remove_joiners + end + -- + if script == "dev2" then + sharedfeatures["dv01"] = true -- dv01_reorder_matras + sharedfeatures["dv02"] = true -- dv02_reorder_reph + sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants + sharedfeatures["dv04"] = true -- dv04_remove_joiners + end + -- + end + end +end + +registerotffeature { + name = "devanagari", + description = "inject additional features", + default = true, + initializers = { + node = initializedevanagi, + }, +} + +-- hm, this is applied to one character: + +local function deva_initialize(font,attr) + + local tfmdata = fontdata[font] + local resources = tfmdata.resources + local lookuphash = resources.lookuphash + + local datasets = otf.dataset(tfmdata,font,attr) + local devanagaridata = datasets.devanagari + + if devanagaridata then -- maybe also check for e.g. reph + + return lookuphash, devanagaridata.reph, devanagaridata.vattu, devanagaridata.blwfcache + + else + + devanagaridata = { } + datasets.devanagari = devanagaridata + + local reph = false + local vattu = false + local blwfcache = { } + + local sequences = resources.sequences + + for s=1,#sequences do -- triggers creation of dataset + -- local sequence = sequences[s] + local dataset = datasets[s] + if dataset and dataset[1] then -- value + local kind = dataset[4] + if kind == "rphf" then + -- deva + reph = true + elseif kind == "blwf" then + -- deva + vattu = true + -- dev2 + -- local subtables = sequence.subtables -- dataset[5].subtables + local subtables = dataset[5].subtables + for i=1,#subtables do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + for k, v in next, lookupcache do + blwfcache[k] = blwfcache[k] or v + end + end + end + end + end + end + + devanagaridata.reph = reph + devanagaridata.vattu = vattu + devanagaridata.blwfcache = blwfcache + + return lookuphash, reph, vattu, blwfcache + + end + +end + +local function deva_reorder(head,start,stop,font,attr) + + local lookuphash, reph, vattu, blwfcache = deva_initialize(font,attr) -- could be inlines but ugly + + local current = start + local n = start.next + local base = nil + local firstcons = nil + local lastcons = nil + local basefound = false + + if ra[start.char] and halant[n.char] and reph then + -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph + -- from candidates for base consonants + if n == stop then + return head, stop + end + if zwj[n.next.char] then + current = start + else + current = n.next + set_attribute(start,a_state,5) -- rphf + end + end + + if nbsp[current.char] then + -- Stand Alone cluster + if current == stop then + stop = stop.prev + head = remove_node(head, current) + free_node(current) + return head, stop + else + base, firstcons, lastcons = current, current, current + current = current.next + if current ~= stop then + if nukta[current.char] then + current = current.next + end + if zwj[current.char] then + if current ~= stop then + local next = current.next + if next ~= stop and halant[next.char] then + current = next + next = current.next + local tmp = next.next + local changestop = next == stop + local tempcurrent = copy_node(next) + local nextcurrent = copy_node(current) + tempcurrent.next = nextcurrent + nextcurrent.prev = tempcurrent + set_attribute(tempcurrent,a_state,8) --blwf + tempcurrent = processcharacters(tempcurrent) + unset_attribute(tempcurrent,a_state) + if next.char == tempcurrent.char then + flush_list(tempcurrent) + local n = copy_node(current) + current.char = dotted_circle + head = insert_node_after(head, current, n) + else + current.char = tempcurrent.char -- (assumes that result of blwf consists of one node) + local freenode = current.next + current.next = tmp + tmp.prev = current + free_node(freenode) + flush_list(tempcurrent) + if changestop then + stop = current + end + end + end + end + end + end + end + end + + while not basefound do + -- find base consonant + if consonant[current.char] then + set_attribute(current,a_state,6) -- half + if not firstcons then + firstcons = current + end + lastcons = current + if not base then + base = current + elseif blwfcache[current.char] then + -- consonant has below-base (or post-base) form + set_attribute(current,a_state,8) -- blwf + else + base = current + end + end + basefound = current == stop + current = current.next + end + + if base ~= lastcons then + -- if base consonant is not last one then move halant from base consonant to last one + local np = base + local n = base.next + if nukta[n.char] then + np = n + n = n.next + end + if halant[n.char] then + if lastcons ~= stop then + local ln = lastcons.next + if nukta[ln.char] then + lastcons = ln + end + end + -- local np = n.prev + local nn = n.next + local ln = lastcons.next -- what if lastcons is nn ? + np.next = nn + nn.prev = np + lastcons.next = n + if ln then + ln.prev = n + end + n.next = ln + n.prev = lastcons + if lastcons == stop then + stop = n + end + end + end + + n = start.next + if ra[start.char] and halant[n.char] and not (n ~= stop and zw_char[n.next.char]) then + -- if syllable starts with Ra + H then move this combination so that it follows either: + -- the post-base 'matra' (if any) or the base consonant + local matra = base + if base ~= stop then + local next = base.next + if dependent_vowel[next.char] then + matra = next + end + end + -- [sp][start][n][nn] [matra|base][?] + -- [matra|base][start] [n][?] [sp][nn] + local sp = start.prev + local nn = n.next + local mn = matra.next + if sp then + sp.next = nn + end + nn.prev = sp + matra.next = start + start.prev = matra + n.next = mn + if mn then + mn.prev = n + end + if head == start then + head = nn + end + start = nn + if matra == stop then + stop = n + end + end + + local current = start + while current ~= stop do + local next = current.next + if next ~= stop and halant[next.char] and zwnj[next.next.char] then + unset_attribute(current,a_state) + end + current = next + end + + if base ~= stop and has_attribute(base,a_state) then + local next = base.next + if halant[next.char] and not (next ~= stop and zwj[next.next.char]) then + unset_attribute(base,a_state) + end + end + + -- ToDo: split two- or three-part matras into their parts. Then, move the left 'matra' part to the beginning of the syllable. + -- Not necessary for Devanagari. However it is necessay for other scripts, such as Tamil (e.g. TAMIL VOWEL SIGN O - 0BCA) + + -- classify consonants and 'matra' parts as pre-base, above-base (Reph), below-base or post-base, and group elements of the syllable (consonants and 'matras') according to this classification + + local current, allreordered, moved = start, false, { [base] = true } + local a, b, p, bn = base, base, base, base.next + if base ~= stop and nukta[bn.char] then + a, b, p = bn, bn, bn + end + while not allreordered do + -- current is always consonant + local c = current + local n = current.next + local l = nil -- used ? + if c ~= stop then + if nukta[n.char] then + c = n + n = n.next + end + if c ~= stop then + if halant[n.char] then + c = n + n = n.next + end + while c ~= stop and dependent_vowel[n.char] do + c = n + n = n.next + end + if c ~= stop then + if vowel_modifier[n.char] then + c = n + n = n.next + end + if c ~= stop and stress_tone_mark[n.char] then + c = n + n = n.next + end + end + end + end + local bp = firstcons.prev + local cn = current.next + local last = c.next + while cn ~= last do + -- move pre-base matras... + if pre_mark[cn.char] then + if bp then + bp.next = cn + end + local next = cn.next + local prev = cn.prev + if next then + next.prev = prev + end + prev.next = next + if cn == stop then + stop = prev + end + cn.prev = bp + cn.next = firstcons + firstcons.prev = cn + if firstcons == start then + if head == start then + head = cn + end + start = cn + end + break + end + cn = cn.next + end + allreordered = c == stop + current = c.next + end + + if reph or vattu then + local current, cns = start, nil + while current ~= stop do + local c = current + local n = current.next + if ra[current.char] and halant[n.char] then + c = n + n = n.next + local b, bn = base, base + while bn ~= stop do + local next = bn.next + if dependent_vowel[next.char] then + b = next + end + bn = next + end + if has_attribute(current,a_state) == 5 then + -- position Reph (Ra + H) after post-base 'matra' (if any) since these + -- become marks on the 'matra', not on the base glyph + if b ~= current then + if current == start then + if head == start then + head = n + end + start = n + end + if b == stop then + stop = c + end + local prev = current.prev + if prev then + prev.next = n + end + if n then + n.prev = prev + end + local next = b.next + c.next = next + if next then + next.prev = c + end + c.next = next + b.next = current + current.prev = b + end + elseif cns and cns.next ~= current then + -- position below-base Ra (vattu) following the consonants on which it is placed (either the base consonant or one of the pre-base consonants) + local cp, cnsn = current.prev, cns.next + if cp then + cp.next = n + end + if n then + n.prev = cp + end + cns.next = current + current.prev = cns + c.next = cnsn + if cnsn then + cnsn.prev = c + end + if c == stop then + stop = cp + break + end + current = n.prev + end + else + local char = current.char + if consonant[char] or nbsp[char] then -- maybe combined hash + cns = current + local next = cns.next + if halant[next.char] then + cns = next + end + end + end + current = current.next + end + end + + if nbsp[base.char] then + head = remove_node(head,base) + free_node(base) + end + + return head, stop +end + +-- If a pre-base matra character had been reordered before applying basic features, +-- the glyph can be moved closer to the main consonant based on whether half-forms had been formed. +-- Actual position for the matra is defined as “after last standalone halant glyph, +-- after initial matra position and before the main consonant”. +-- If ZWJ or ZWNJ follow this halant, position is moved after it. + +-- so we break out ... this is only done for the first 'word' (if we feed words we can as +-- well test for non glyph. + +function handlers.devanagari_reorder_matras(start,kind,lookupname,replacement) -- no leak + local current = start -- we could cache attributes here + local startfont = start.font + local startattr = has_attribute(start,a_syllabe) + -- can be fast loop + while current and current.id == glyph_code and current.subtype<256 and current.font == font and has_attribute(current,a_syllabe) == startattr do + local next = current.next + if halant[current.char] and not has_attribute(current,a_state) then + if next and next.id == glyph_code and next.subtype<256 and next.font == font and has_attribute(next,a_syllabe) == startattr and zw_char[next.char] then + current = next + end + local startnext = start.next +-- local startprev = start.prev +-- startnext.prev = startprev +-- if startprev then +-- startprev.next = startnext +-- end +remove_node(start,start) + local next = current.next + if next then + next.prev = start + end + start.next = next + current.next = start + start.prev = current + start = startnext + break + end + current = next + end + return start, true +end + +-- todo: way more caching of attributes and font + +-- Reph’s original position is always at the beginning of the syllable, (i.e. it is not reordered at the character reordering stage). +-- However, it will be reordered according to the basic-forms shaping results. +-- Possible positions for reph, depending on the script, are; after main, before post-base consonant forms, +-- and after post-base consonant forms. + +-- 1 If reph should be positioned after post-base consonant forms, proceed to step 5. +-- 2 If the reph repositioning class is not after post-base: target position is after the first explicit halant glyph between +-- the first post-reph consonant and last main consonant. If ZWJ or ZWNJ are following this halant, position is moved after it. +-- If such position is found, this is the target position. Otherwise, proceed to the next step. +-- Note: in old-implementation fonts, where classifications were fixed in shaping engine, +-- there was no case where reph position will be found on this step. +-- 3 If reph should be repositioned after the main consonant: from the first consonant not ligated with main, +-- or find the first consonant that is not a potential pre-base reordering Ra. +-- 4 If reph should be positioned before post-base consonant, find first post-base classified consonant not ligated with main. +-- If no consonant is found, the target position should be before the first matra, syllable modifier sign or vedic sign. +-- 5 If no consonant is found in steps 3 or 4, move reph to a position immediately before the first post-base matra, +-- syllable modifier sign or vedic sign that has a reordering class after the intended reph position. +-- For example, if the reordering position for reph is post-main, it will skip above-base matras that also have a post-main position. +-- 6 Otherwise, reorder reph to the end of the syllable. + +-- hm, this only looks at the start of a nodelist ... is this supposed to be line based? + +function handlers.devanagari_reorder_reph(start,kind,lookupname,replacement) + -- since in Devanagari reph has reordering position 'before postscript' dev2 only follows step 2, 4, and 6, + -- the other steps are still ToDo (required for scripts other than dev2) + local current = start.next + local startnext = nil + local startprev = nil + local startfont = start.font + local startattr = has_attribute(start,a_syllabe) + while current and current.id == glyph_code and current.subtype<256 and current.font == startfont and has_attribute(current,a_syllabe) == startattr do --step 2 + if halant[current.char] and not has_attribute(current,a_state) then + local next = current.next + if next and next.id == glyph_code and next.subtype<256 and next.font == startfont and has_attribute(next,a_syllabe) == startattr and zw_char[next.char] then + current = next + end +-- startnext = start.next +-- startprev = start.prev +-- startnext.prev = startprev +-- if startprev then +-- startprev.next = startnext +-- end +remove_node(start,start) + local next = current.next + if next then + next.prev = start + end + start.next = next + current.next = start + start.prev = current + start = startnext + startattr = has_attribute(start,a_syllabe) + break + end + current = current.next + end + if not startnext then + current = start.next + while current and current.id == glyph_code and current.subtype<256 and current.font == startfont and has_attribute(current,a_syllabe) == startattr do --step 4 + if has_attribute(current,a_state) == 9 then --post-base + startnext = start.next +-- startprev = start.prev +-- startnext.prev = startprev +-- if startprev then +-- startprev.next = startnext +-- end +remove_node(start,start) + local prev = current.prev + start.prev = prev + prev.next = start + start.next = current + current.prev = start + start = startnext + startattr = has_attribute(start,a_syllabe) + break + end + current = current.next + end + end + -- ToDo: determine position for reph with reordering position other than 'before postscript' + -- (required for scripts other than dev2) + -- leaks + if not startnext then + current = start.next + local c = nil + while current and current.id == glyph_code and current.subtype<256 and current.font == startfont and has_attribute(current,a_syllabe) == startattr do --step 5 + if not c then + local char = current.char + -- todo: combine in one + if mark_above_below_post[char] and reorder_class[char] ~= "after subscript" then + c = current + end + end + current = current.next + end + -- here we can loose the old start node: maybe best split cases + if c then + startnext = start.next + -- if c ~= startnext then -- needs testing +-- startprev = start.prev +-- startnext.prev = startprev +-- if startprev then +-- startprev.next = startnext +-- end +remove_node(start,start) + local prev = c.prev + start.prev = prev + prev.next = start + start.next = c + c.prev = start + -- end + start = startnext + startattr = has_attribute(start,a_syllabe) + end + end + -- leaks + if not startnext then + current = start + local next = current.next + while next and next.id == glyph_code and next.subtype<256 and next.font == startfont and has_attribute(next,a_syllabe) == startattr do --step 6 + current = next + next = current.next + end + if start ~= current then + startnext = start.next +-- startprev = start.prev +-- startnext.prev = startprev +-- if startprev then +-- startprev.next = startnext +-- end +remove_node(start,start) + local next = current.next + if next then + next.prev = start + end + start.next = next + current.next = start + start.prev = current + start = startnext + end + end + -- + return start, true +end + +-- we can cache some checking (v) + +-- If a pre-base reordering consonant is found, reorder it according to the following rules: +-- +-- 1 Only reorder a glyph produced by substitution during application of the feature. +-- (Note that a font may shape a Ra consonant with the feature generally but block it in certain contexts.) +-- 2 Try to find a target position the same way as for pre-base matra. If it is found, reorder pre-base consonant glyph. +-- 3 If position is not found, reorder immediately before main consonant. + +-- UNTESTED: NOT CALLED IN EXAMPLE + +function handlers.devanagari_reorder_pre_base_reordering_consonants(start,kind,lookupname,replacement) + local current = start + local startnext = nil + local startprev = nil + local startfont = start.font + local startattr = has_attribute(start,a_syllabe) + -- can be fast for loop + caching state + while current and current.id == glyph_code and current.subtype<256 and current.font == startfont and has_attribute(current,a_syllabe) == startattr do + local next = current.next + if halant[current.char] and not has_attribute(current,a_state) then + if next and next.id == glyph_code and next.subtype<256 and next.font == font and has_attribute(next,a_syllabe) == startattr then + local char = next.char + if zw_char[char] then + current = next + end + end + startnext = start.next +-- startprev = start.prev +-- startnext.prev = startprev +-- if startprev then +-- startprev.next = startnext +-- end +removenode(start,start) + local next = current.next + if next then + next.prev = start + end + start.next = next + current.next = start + start.prev = current + start = startnext + break + end + current = next + end + if not startnext then + current = start.next + startattr = has_attribute(start,a_syllabe) + while current and current.id == glyph_code and current.subtype<256 and current.font == startfont and has_attribute(current,a_syllabe) == startattr do + if not consonant[current.char] and has_attribute(current,a_state) then --main + startnext = start.next +-- startprev = start.prev +-- startnext.prev = startprev +-- if startprev then +-- startprev.next = startnext +-- end +removenode(start,start) + local prev = current.prev + start.prev = prev + prev.next = start + start.next = current + current.prev = start + start = startnext + break + end + current = current.next + end + end + return start, true +end + +function handlers.devanagari_remove_joiners(start,kind,lookupname,replacement) + local stop = start.next + local startfont = start.font + while stop and stop.id == glyph_code and stop.subtype<256 and stop.font == startfont do + local char = stop.char + if zw_char[char] then + stop = stop.next + else + break + end + end + if stop then + stop.prev.next = nil + stop.prev = start.prev + end + local prev = start.prev + if prev then + prev.next = stop + end + flush_list(start) + return stop, true +end + +local valid = { + rphf = true, + pref = true, + half = true, + blwf = true, + pstf = true, +} + +local function dev2_initialize(font,attr) + + local tfmdata = fontdata[font] + local resources = tfmdata.resources + local lookuphash = resources.lookuphash + + local datasets = otf.dataset(tfmdata,font,attr) + local devanagaridata = datasets.devanagari + + if devanagaridata then -- maybe also check for e.g. seqsubset + + return lookuphash, devanagaridata.seqsubset + + else + + devanagaridata = { } + datasets.devanagari = devanagaridata + + local seqsubset = { } + devanagaridata.seqsubset = seqsubset + + local sequences = resources.sequences + + for s=1,#sequences do + -- local sequence = sequences[s] + local dataset = datasets[s] + if dataset and dataset[1] then -- featurevalue + local kind = dataset[4] + if kind and valid[kind] then + -- could become a function call + -- local subtables = sequence.subtables + local subtables = dataset[5].subtables + for i=1,#subtables do + local lookupname = subtables[i] + local lookupcache = lookuphash[lookupname] + if lookupcache then + local reph = false + local chain = dataset[3] + if chain ~= 0 then --rphf is result of of chain + --ToDo: rphf might be result of other handler/chainproc + else + reph = lookupcache[0x0930] + if reph then + reph = reph[0x094D] + if reph then + reph = reph["ligature"] + end + end + --ToDo: rphf actualy acts on consonant + halant. This consonant might not necesseraly be 0x0930 ... (but fot dev2 it is) + end + seqsubset[#seqsubset+1] = { kind, lookupcache, reph } + end + end + end + end + end + + lookuphash["dv01_reorder_matras"] = pre_mark -- move to initializer ? + + return lookuphash, seqsubset + + end + +end + +-- this one will be merged into the caller: it saves a call, but we will then make function +-- of the actions + +local function dev2_reorder(head,start,stop,font,attr) -- maybe do a pass over (determine stop in sweep) + + local lookuphash, seqsubset = dev2_initialize(font,attr) + + local reph, pre_base_reordering_consonants = false, { } -- was nil ... probably went unnoticed because never assigned + local halfpos, basepos, subpos, postpos = nil, nil, nil, nil + local locl = { } + + for i=1,#seqsubset do + + -- maybe quit if start == stop + + local subset = seqsubset[i] + local kind = subset[1] + local lookupcache = subset[2] + if kind == "rphf" then + if subset[3] then + reph = true + end + local current = start + local last = stop.next + while current ~= last do + if current ~= stop then + local c = locl[current] or current.char + local found = lookupcache[c] + if found then + local next = current.next + local n = locl[next] or next.char + if found[n] then --above-base: rphf Consonant + Halant + local afternext = next ~= stop and next.next + if afternext and zw_char[afternext.char] then -- ZWJ and ZWNJ prevent creation of reph + current = next + current = current.next + elseif current == start then + set_attribute(current,a_state,5) + current = next + else + current = next + end + end + end + end + current = current.next + end + elseif kind == "pref" then + -- why not global? pretty ineffient this way + -- this will move to the initializer and we will store the hash in dataset + for k, v in lookupcache[0x094D], next do + pre_base_reordering_consonants[k] = v and v["ligature"] --ToDo: reph might also be result of chain + end + -- + local current = start + local last = stop.next + while current ~= last do + if current ~= stop then + local c = locl[current] or current.char + local found = lookupcache[c] + if found then + local next = current.next + local n = locl[next] or next.char + if found[n] then + set_attribute(current,a_state,7) + set_attribute(next,a_state,7) + current = next + end + end + end + current = current.next + end + elseif kind == "half" then -- half forms: half / Consonant + Halant + local current = start + local last = stop.next + while current ~= last do + if current ~= stop then + local c = locl[current] or current.char + local found = lookupcache[c] + if found then + local next = current.next + local n = locl[next] or next.char + if found[n] then + if next ~= stop and zwnj[next.next.char] then --ZWNJ prevent creation of half + current = current.next + else + set_attribute(current,a_state,6) + if not halfpos then + halfpos = current + end + end + current = next + end + end + end + current = current.next + end + elseif kind == "blwf" then -- below-base: blwf / Halant + Consonant + local current = start + local last = stop.next + while current ~= last do + if current ~= stop then + local c = locl[current] or current.char + local found = lookupcache[c] + if found then + local next = current.next + local n = locl[next] or next.char + if found[n] then + set_attribute(current,a_state,8) + set_attribute(next,a_state,8) + current = next + subpos = current + end + end + end + current = current.next + end + elseif kind == "pstf" then -- post-base: pstf / Halant + Consonant + local current = start + local last = stop.next + while current ~= last do + if current ~= stop then + local c = locl[current] or current.char + local found = lookupcache[c] + if found then + local next = current.next + local n = locl[next] or next.char + if found[n] then + set_attribute(current,a_state,9) + set_attribute(next,a_state,9) + current = next + postpos = current + end + end + end + current = current.next + end + end + end + + -- this one changes per word + + lookuphash["dv02_reorder_reph"] = { [reph] = true } + lookuphash["dv03_reorder_pre_base_reordering_consonants"] = pre_base_reordering_consonants + + local current, base, firstcons = start, nil, nil + + if has_attribute(start,a_state) == 5 then + -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph from candidates for base consonants + current = start.next.next + end + + if current ~= stop.next and nbsp[current.char] then + -- Stand Alone cluster + if current == stop then + stop = stop.prev + head = remove_node(head,current) + free_node(current) + return head, stop + else + base = current + current = current.next + if current ~= stop then + local char = current.char + if nukta[char] then + current = current.next + char = current.char + end + if zwj[char] then + local next = current.next + if current ~= stop and next ~= stop and halant[next.char] then + current = next + next = current.next + local tmp = next.next + local changestop = next == stop + next.next = nil + set_attribute(current,a_state,7) --pref + current = processcharacters(current) + set_attribute(current,a_state,8) --blwf + current = processcharacters(current) + set_attribute(current,a_state,9) --pstf + current = processcharacters(current) + unset_attribute(current,a_state) + if halant[current.char] then + current.next.next = tmp + local nc = copy_node(current) + current.char = dotted_circle + head = insert_node_after(head,current,nc) + else + current.next = tmp -- assumes that result of pref, blwf, or pstf consists of one node + if changestop then + stop = current + end + end + end + end + end + end + else -- not Stand Alone cluster + local last = stop.next + while current ~= last do -- find base consonant + local next = current.next + if consonant[current.char] then + if not (current ~= stop and next ~= stop and halant[next.char] and zwj[next.next.char]) then + if not firstcons then + firstcons = current + end + -- check whether consonant has below-base or post-base form or is pre-base reordering Ra + local a = has_attribute(current,a_state) + if not (a == 7 or a == 8 or a == 9) then + base = current + end + end + end + current = next + end + if not base then + base = firstcons + end + end + + if not base then + if has_attribute(start,a_state) == 5 then + unset_attribute(start,a_state) + end + return head, stop + else + if has_attribute(base,a_state) then + unset_attribute(base,a_state) + end + basepos = base + end + if not halfpos then + halfpos = base + end + if not subpos then + subpos = base + end + if not postpos then + postpos = subpos or base + end + + -- Matra characters are classified and reordered by which consonant in a conjunct they have affinity for + + local moved = { } + local current = start + local last = stop.next + while current ~= last do + local char, target, cn = locl[current] or current.char, nil, current.next + if not moved[current] and dependent_vowel[char] then + if pre_mark[char] then -- Before first half form in the syllable + moved[current] = true + local prev = current.prev + local next = current.next + if prev then + prev.next = next + end + if next then + next.prev = prev + end + if current == stop then + stop = current.prev + end + if halfpos == start then + if head == start then + head = current + end + start = current + end + local prev = halfpos.prev + if prev then + prev.next = current + end + current.prev = prev + halfpos.prev = current + current.next = halfpos + halfpos = current + elseif above_mark[char] then -- After main consonant + target = basepos + if subpos == basepos then + subpos = current + end + if postpos == basepos then + postpos = current + end + basepos = current + elseif below_mark[char] then -- After subjoined consonants + target = subpos + if postpos == subpos then + postpos = current + end + subpos = current + elseif post_mark[char] then -- After post-form consonant + target = postpos + postpos = current + end + if mark_above_below_post[char] then + local prev = current.prev + if prev ~= target then + local next = current.next + if prev then -- not needed, already tested with target + prev.next = next + end + if next then + next.prev = prev + end + if current == stop then + stop = prev + end + local next = target.next + if next then + next.prev = current + end + current.next = next + target.next = current + current.prev = target + end + end + end + current = cn + end + + -- Reorder marks to canonical order: Adjacent nukta and halant or nukta and vedic sign are always repositioned if necessary, so that the nukta is first. + + local current, c = start, nil + while current ~= stop do + local char = current.char + if halant[char] or stress_tone_mark[char] then + if not c then + c = current + end + else + c = nil + end + local next = current.next + if c and nukta[next.char] then + if head == c then + head = next + end + if stop == next then + stop = current + end + local prev = c.prev + if prev then + prev.next = next + end + next.prev = prev + local nextnext = next.next + current.next = nextnext + local nextnextnext = nextnext.next + if nextnextnext then + nextnextnext.prev = current + end + c.prev = nextnext + nextnext.next = c + end + if stop == current then break end + current = current.next + end + + if nbsp[base.char] then + head = remove_node(head, base) + free_node(base) + end + + return head, stop +end + +-- cleaned up and optimized ... needs checking (local, check order, fixes, extra hash, etc) + +local separator = { } + +imerge(separator,consonant) +imerge(separator,independent_vowel) +imerge(separator,dependent_vowel) +imerge(separator,vowel_modifier) +imerge(separator,stress_tone_mark) +imerge(separator,nukta) +imerge(separator,halant) + +local function analyze_next_chars_one(c,font,variant) -- skip one dependent vowel + -- why two variants ... the comment suggests that it's the same ruleset + local n = c.next + if not n then + return c + end + if variant == 1 then + local v = n.id == glyph_code and n.subtype<256 and n.font == font + if v and nukta[n.char] then + n = n.next + if n then + v = n.id == glyph_code and n.subtype<256 and n.font == font + end + end + if n and v then + local nn = n.next + if nn and nn.id == glyph_code and nn.subtype<256 and nn.font == font then + local nnn = nn.next + if nnn and nnn.id == glyph_code and nnn.subtype<256 and nnn.font == font then + local nnc = nn.char + local nnnc = nnn.char + if zwj[nnc] and consonant[nnnc] then + c = nnn + elseif zw_char[nnc] and halant[nnnc] then + local nnnn = nnn.next + if nnnn and nnnn.id == glyph_code and consonant[nnnn.char] and nnnn.subtype<256 and nnnn.font == font then + c = nnnn + end + end + end + end + end + elseif variant == 2 then + if n.id == glyph_code and nukta[n.char] and n.subtype<256 and n.font == font then + c = n + end + n = c.next + if n and n.id == glyph_code and n.subtype<256 and n.font == font then + local nn = n.next + if nn then + local nv = nn.id == glyph_code and nn.subtype<256 and nn.font == font + if nv and zw_char[n.char] then + n = nn + nn = nn.next + nv = nn.id == glyph_code and nn.subtype<256 and nn.font == font + end + if nn and nv and halant[n.char] and consonant[nn.char] then + c = nn + end + end + end + end + -- c = ms_matra(c) + local n = c.next + if not n then + return c + end + local v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + local char = n.char + if dependent_vowel[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if nukta[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if halant[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if vowel_modifier[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if stress_tone_mark[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if stress_tone_mark[char] then + return n + else + return c + end +end + +local function analyze_next_chars_two(c,font) + local n = c.next + if not n then + return c + end + if n.id == glyph_code and nukta[n.char] and n.subtype<256 and n.font == font then + c = n + end + n = c + while true do + local nn = n.next + if nn and nn.id == glyph_code and nn.subtype<256 and nn.font == font then + local char = nn.char + if halant[char] then + n = nn + local nnn = nn.next + if nnn and nnn.id == glyph_code and zw_char[nnn.char] and nnn.subtype<256 and nnn.font == font then + n = nnn + end + elseif zw_char[char] then + -- n = nn -- not here (?) + local nnn = nn.next + if nnn and nnn.id == glyph_code and halant[nnn.char] and nnn.subtype<256 and nnn.font == font then + n = nnn + end + else + break + end + local nn = n.next + if nn and nn.id == glyph_code and consonant[nn.char] and nn.subtype<256 and nn.font == font then + n = nn + local nnn = nn.next + if nnn and nnn.id == glyph_code and nukta[nnn.char] and nnn.subtype<256 and nnn.font == font then + n = nnn + end + c = n + else + break + end + else + break + end + end + -- + if not c then + -- This shouldn't happen I guess. + return + end + local n = c.next + if not n then + return c + end + local v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + local char = n.char + if anudatta[char] then + c = n + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if halant[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + if zw_char[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + else + -- c = ms_matra(c) + -- same as one + if dependent_vowel[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if nukta[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if halant[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + end + -- same as one + if vowel_modifier[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if stress_tone_mark[char] then + c = c.next + n = c.next + if not n then + return c + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + return c + end + char = n.char + end + if stress_tone_mark[char] then + return n + else + return c + end +end + +local function inject_syntax_error(head,current,mark) + local signal = copy_node(current) + if mark == pre_mark then + signal.char = dotted_circle + else + current.char = dotted_circle + end + return insert_node_after(head,current,signal) +end + +-- It looks like these two analyzers were written independently but they share +-- a lot. Common code has been synced. + +function methods.deva(head,font,attr) + local current, start, done = head, true, false + while current do + if current.id == glyph_code and current.subtype<256 and current.font == font then + done = true + local syllablestart = current + local syllableend = nil + local c = current + local n = c.next + if n and ra[c.char] and n.id == glyph_code and halant[n.char] and n.subtype<256 and n.font == font then + local n = n.next + if n and n.id == glyph_code and n.subtype<256 and n.font == font then + c = n + end + end + local standalone = nbsp[c.char] + if standalone then + local prev = current.prev + if not prev then + -- begin of paragraph or box + elseif prev.id ~= glyph_code or prev.subtype>=256 or prev.font ~= font then + -- different font or language so quite certainly a different word + elseif not separator[prev.char] then + -- something that separates words + else + standalone = false + end + end + if standalone then + -- stand alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)] + local syllabeend, current = analyze_next_chars_one(c,font,2) -- watch out, here we set current to next + if syllablestart ~= syllableend then + head, current = deva_reorder(head,syllablestart,syllableend,font,attr) + current = current.next + end + else + -- we can delay the n.subtype and n.font and test for say halant[c] first + -- as an table access is faster than two function calls (subtype and font are + -- pseudo fields) but the code becomes messy (unless we make it a function) + local char = current.char + if consonant[char] then + -- syllable containing consonant + local prevc = true + while prevc do + prevc = false + local n = current.next + if not n then + break + end + local v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + break + end + local c = n.char + if nukta[c] then + n = n.next + if not n then + break + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + break + end + c = n.char + end + if halant[c] then + n = n.next + if not n then + break + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + break + end + c = n.char + if zw_char[c] then + n = n.next + if not n then + break + end + v = n.id == glyph_code and n.subtype<256 and n.font == font + if not v then + break + end + c = n.char + end + if consonant[c] then + prevc = true + current = n + end + end + end + local n = current.next + if n and n.id == glyph_code and nukta[n.char] and n.subtype<256 and n.font == font then + -- nukta (not specified in Microsft Devanagari OpenType specification) + current = n + n = current.next + end + syllableend = current + current = n + if current then + local v = current.id == glyph_code and current.subtype<256 and current.font == font + if v then + if halant[current.char] then + -- syllable containing consonant without vowels: {C + [Nukta] + H} + C + H + local n = current.next + if n and n.id == glyph_code and zw_char[n.char] and n.subtype<256 and n.font == font then + -- code collapsed, probably needs checking with intention + syllableend = n + current = n.next + else + syllableend = current + current = n + end + else + -- syllable containing consonant with vowels: {C + [Nukta] + H} + C + [M] + [VM] + [SM] + local c = current.char + if dependent_vowel[c] then + syllableend = current + current = current.next + v = current and current.id == glyph_code and current.subtype<256 and current.font == font + if v then + c = current.char + end + end + if v and vowel_modifier[c] then + syllableend = current + current = current.next + v = current and current.id == glyph_code and current.subtype<256 and current.font == font + if v then + c = current.char + end + end + if v and stress_tone_mark[c] then + syllableend = current + current = current.next + end + end + end + end + if syllablestart ~= syllableend then + head, current = deva_reorder(head,syllablestart,syllableend,font,attr) + current = current.next + end + elseif independent_vowel[char] then + -- syllable without consonants: VO + [VM] + [SM] + syllableend = current + current = current.next + if current then + local v = current.id == glyph_code and current.subtype<256 and current.font == font + if v then + local c = current.char + if vowel_modifier[c] then + syllableend = current + current = current.next + v = current and current.id == glyph_code and current.subtype<256 and current.font == font + if v then + c = current.char + end + end + if v and stress_tone_mark[c] then + syllableend = current + current = current.next + end + end + end + else + local mark = mark_four[char] + if mark then + head, current = inject_syntax_error(head,current,mark) + end + current = current.next + end + end + else + current = current.next + end + start = false + end + + return head, done +end + +-- there is a good change that when we run into one with subtype < 256 that the rest is also done +-- so maybe we can omit this check (it's pretty hard to get glyphs in the stream out of the blue) + +-- handler(start,kind,lookupname,lookupmatch,sequence,lookuphash,1) + +function methods.dev2(head,font,attr) + local current = head + local start = true + local done = false + local syllabe = 0 + while current do + local syllablestart, syllableend = nil, nil + if current.id == glyph_code and current.subtype<256 and current.font == font then + done = true + syllablestart = current + local c = current + local n = current.next + if n and ra[c.char] and n.id == glyph_code and halant[n.char] and n.subtype<256 and n.font == font then + local n = n.next + if n and n.id == glyph_code and n.subtype<256 and n.font == font then + c = n + end + end + local char = c.char + if independent_vowel[char] then + -- vowel-based syllable: [Ra+H]+V+[N]+[<[<ZWJ|ZWNJ>]+H+C|ZWJ+C>]+[{M}+[N]+[H]]+[SM]+[(VD)] + current = analyze_next_chars_one(c,font,1) + syllableend = current + else + local standalone = nbsp[char] + if standalone then + local p = current.prev + if not p then + -- begin of paragraph or box + elseif p.id ~= glyph_code or p.subtype>=256 or p.font ~= font then + -- different font or language so quite certainly a different word + elseif not separator[p.char] then + -- something that separates words + else + standalone = false + end + end + if standalone then + -- Stand Alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)] + current = analyze_next_chars_one(c,font,2) + syllableend = current + elseif consonant[current.char] then + -- WHY current INSTEAD OF c ? + + -- Consonant syllable: {C+[N]+<H+[<ZWNJ|ZWJ>]|<ZWNJ|ZWJ>+H>} + C+[N]+[A] + [< H+[<ZWNJ|ZWJ>] | {M}+[N]+[H]>]+[SM]+[(VD)] + current = analyze_next_chars_two(current,font) -- not c ! + syllableend = current + end + end + end + if syllableend then + syllabe = syllabe + 1 + local c = syllablestart + local n = syllableend.next + while c ~= n do + set_attribute(c,a_syllabe,syllabe) + c = c.next + end + end + if syllableend and syllablestart ~= syllableend then + head, current = dev2_reorder(head,syllablestart,syllableend,font,attr) + end + if not syllableend and current.id == glyph_code and current.subtype<256 and current.font == font and not has_attribute(current,a_state) then + local mark = mark_four[current.char] + if mark then + head, current = inject_syntax_error(head,current,mark) + end + end + start = false + current = current.next + end + + return head, done +end + +-- Temporary checker: + +if false then -- when true we can see how much nodes bleed + + local function check(what,action,head,kind,lookupname,replacement) + local n_before = nodes.count(head) + local s_before = nodes.listtoutf(head) + local head, done = action(head,kind,lookupname,replacement) + local n_after = nodes.count(head) + local s_after = nodes.listtoutf(head) + if n_before ~= n_after then + print("leak",what) + print(n_before,s_before) + print(n_after,s_after) + end + return head, done + end + + local devanagari_reorder_matras = handlers.devanagari_reorder_matras + local devanagari_reorder_reph = handlers.devanagari_reorder_reph + local devanagari_reorder_pre_base_reordering_consonants = handlers.devanagari_reorder_pre_base_reordering_consonants + local devanagari_remove_joiners = handlers.devanagari_remove_joiners + + function handlers.devanagari_reorder_matras(start,kind,lookupname,replacement) + if trace then + return check("matras",devanagari_reorder_matras,start,kind,lookupname,replacement) + else + return devanagari_reorder_matras(start,kind,lookupname,replacement) + end + end + + function handlers.devanagari_reorder_reph(start,kind,lookupname,replacement) + if trace then + return check("reph",devanagari_reorder_reph,start,kind,lookupname,replacement) + else + return devanagari_reorder_reph(start,kind,lookupname,replacement) + end + end + + function handlers.devanagari_reorder_pre_base_reordering_consonants(start,kind,lookupname,replacement) + if trace then + return check("consonants",devanagari_reorder_pre_base_reordering_consonants,start,kind,lookupname,replacement) + else + return devanagari_reorder_pre_base_reordering_consonants(start,kind,lookupname,replacement) + end + end + + function handlers.devanagari_remove_joiners(start,kind,lookupname,replacement) + if trace then + return check("joiners",devanagari_remove_joiners,start,kind,lookupname,replacement) + else + return devanagari_remove_joiners(start,kind,lookupname,replacement) + end + end + +end + +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- +-- We keep the original around for a while so that we can check it -- +-- when the above code does it wrong (data tables are not included). -- +-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- + +-- local state = attributes.private('state') +-- local sylnr = attributes.private('syllabe') +-- +-- local function install_dev(tfmdata) +-- local features = tfmdata.resources.features +-- local sequences = tfmdata.resources.sequences +-- +-- local insertpos = 1 +-- for s=1,#sequences do -- classify chars +-- for k in pairs(basic_shaping_forms) do +-- if sequences[s].features and ( sequences[s].features[k] or sequences[s].features.locl ) then insertpos = s + 1 end +-- end +-- end +-- +-- features.gsub["dev2_reorder_matras"] = { ["dev2"] = { ["dflt"] = true } } +-- features.gsub["dev2_reorder_reph"] = { ["dev2"] = { ["dflt"] = true } } +-- features.gsub["dev2_reorder_pre_base_reordering_consonants"] = { ["dev2"] = { ["dflt"] = true } } +-- features.gsub["remove_joiners"] = { ["deva"] = { ["dflt"] = true }, ["dev2"] = { ["dflt"] = true } } +-- +-- local sequence_dev2_reorder_matras = { +-- chain = 0, +-- features = { dev2_reorder_matras = { dev2 = { dflt = true } } }, +-- flags = { false, false, false, false }, +-- name = "dev2_reorder_matras", +-- subtables = { "dev2_reorder_matras" }, +-- type = "dev2_reorder_matras", +-- } +-- local sequence_dev2_reorder_reph = { +-- chain = 0, +-- features = { dev2_reorder_reph = { dev2 = { dflt = true } } }, +-- flags = { false, false, false, false }, +-- name = "dev2_reorder_reph", +-- subtables = { "dev2_reorder_reph" }, +-- type = "dev2_reorder_reph", +-- } +-- local sequence_dev2_reorder_pre_base_reordering_consonants = { +-- chain = 0, +-- features = { dev2_reorder_pre_base_reordering_consonants = { dev2 = { dflt = true } } }, +-- flags = { false, false, false, false }, +-- name = "dev2_reorder_pre_base_reordering_consonants", +-- subtables = { "dev2_reorder_pre_base_reordering_consonants" }, +-- type = "dev2_reorder_pre_base_reordering_consonants", +-- } +-- local sequence_remove_joiners = { +-- chain = 0, +-- features = { remove_joiners = { deva = { dflt = true }, dev2 = { dflt = true } } }, +-- flags = { false, false, false, false }, +-- name = "remove_joiners", +-- subtables = { "remove_joiners" }, +-- type = "remove_joiners", +-- } +-- table.insert(sequences, insertpos, sequence_dev2_reorder_pre_base_reordering_consonants) +-- table.insert(sequences, insertpos, sequence_dev2_reorder_reph) +-- table.insert(sequences, insertpos, sequence_dev2_reorder_matras) +-- table.insert(sequences, insertpos, sequence_remove_joiners) +-- end +-- +-- local function deva_reorder(head,start,stop,font,attr) +-- local tfmdata = fontdata[font] +-- local lookuphash = tfmdata.resources.lookuphash +-- local sequences = tfmdata.resources.sequences +-- +-- if not lookuphash["remove_joiners"] then install_dev(tfmdata) end --install Devanagari-features +-- +-- local sharedfeatures = tfmdata.shared.features +-- sharedfeatures["remove_joiners"] = true +-- local datasets = otf.dataset(tfmdata,font,attr) +-- +-- lookuphash["remove_joiners"] = { [0x200C] = true, [0x200D] = true } +-- +-- local current, n, base, firstcons, lastcons, basefound = start, start.next, nil, nil, nil, false +-- local reph, vattu = false, false +-- for s=1,#sequences do +-- local dataset = datasets[s] +-- featurevalue = dataset and dataset[1] +-- if featurevalue and dataset[4] == "rphf" then reph = true end +-- if featurevalue and dataset[4] == "blwf" then vattu = true end +-- end +-- if ra[start.char] and halant[n.char] and reph then -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph from candidates for base consonants +-- if n == stop then return head, stop end +-- if zwj[n.next.char] then +-- current = start +-- else +-- current = n.next +-- set_attribute(start,state,5) -- rphf +-- end +-- end +-- +-- if nbsp[current.char] then --Stand Alone cluster +-- if current == stop then +-- stop = stop.prev +-- head = node.remove(head, current) +-- node.free(current) +-- return head, stop +-- else +-- base, firstcons, lastcons = current, current, current +-- current = current.next +-- if current ~= stop then +-- if nukta[current.char] then current = current.next end +-- if zwj[current.char] then +-- if current ~= stop and current.next ~= stop and halant[current.next.char] then +-- current = current.next +-- local tmp = current.next.next +-- local changestop = current.next == stop +-- local tempcurrent = node.copy(current.next) +-- tempcurrent.next = node.copy(current) +-- tempcurrent.next.prev = tempcurrent +-- set_attribute(tempcurrent,state,8) --blwf +-- tempcurrent = nodes.handlers.characters(tempcurrent) +-- unset_attribute(tempcurrent,state) +-- if current.next.char == tempcurrent.char then +-- node.flush_list(tempcurrent) +-- local n = node.copy(current) +-- current.char = dotted_circle +-- head = node.insert_after(head, current, n) +-- else +-- current.char = tempcurrent.char -- (assumes that result of blwf consists of one node) +-- local freenode = current.next +-- current.next = tmp +-- tmp.prev = current +-- node.free(freenode) +-- node.flush_list(tempcurrent) +-- if changestop then stop = current end +-- end +-- end +-- end +-- end +-- end +-- end +-- +-- while not basefound do -- find base consonant +-- if consonant[current.char] then +-- set_attribute(current, state, 6) -- half +-- if not firstcons then firstcons = current end +-- lastcons = current +-- if not base then +-- base = current +-- else --check whether consonant has below-base (or post-base) form +-- local baseform = true +-- for s=1,#sequences do +-- local sequence = sequences[s] +-- local dataset = datasets[s] +-- featurevalue = dataset and dataset[1] +-- if featurevalue and dataset[4] == "blwf" then +-- local subtables = sequence.subtables +-- for i=1,#subtables do +-- local lookupname = subtables[i] +-- local lookupcache = lookuphash[lookupname] +-- if lookupcache then +-- local lookupmatch = lookupcache[current.char] +-- if lookupmatch then +-- set_attribute(current, state, 8) -- blwf +-- baseform = false +-- end +-- end +-- end +-- end +-- end +-- if baseform then base = current end +-- end +-- end +-- basefound = current == stop +-- current = current.next +-- end +-- if base ~= lastcons then -- if base consonant is not last one then move halant from base consonant to last one +-- n = base.next +-- if nukta[n.char] then n = n.next end +-- if halant[n.char] then +-- if lastcons ~= stop then +-- local ln = lastcons.next +-- if nukta[ln.char] then lastcons = ln end +-- end +-- local np, nn, ln = n.prev, n.next, lastcons.next +-- np.next = n.next +-- nn.prev = n.prev +-- lastcons.next = n +-- if ln then ln.prev = n end +-- n.next = ln +-- n.prev = lastcons +-- if lastcons == stop then stop = n end +-- end +-- end +-- +-- n = start.next +-- if ra[start.char] and halant[n.char] and not ( n ~= stop and ( zwj[n.next.char] or zwnj[n.next.char] ) ) then -- if syllable starts with Ra + H then move this combination so that it follows either: the post-base 'matra' (if any) or the base consonant +-- local matra = base +-- if base ~= stop and dependent_vowel[base.next.char] then matra = base.next end +-- local sp, nn, mn = start.prev, n.next, matra.next +-- if sp then sp.next = nn end +-- nn.prev = sp +-- matra.next = start +-- start.prev = matra +-- n.next = mn +-- if mn then mn.prev = n end +-- if head == start then head = nn end +-- start = nn +-- if matra == stop then stop = n end +-- end +-- +-- local current = start +-- while current ~= stop do +-- if halant[current.next.char] and current.next ~= stop and zwnj[current.next.next.char] then unset_attribute(current, state) end +-- current = current.next +-- end +-- +-- if has_attribute(base, state) and base ~= stop and halant[base.next.char] and not ( base.next ~= stop and zwj[base.next.next.char] ) then unset_attribute(base, state) end +-- +-- local current, allreordered, moved = start, false, { [base] = true } +-- local a, b, p, bn = base, base, base, base.next +-- if base ~= stop and nukta[bn.char] then a, b, p = bn, bn, bn end +-- while not allreordered do +-- local c, n, l = current, current.next, nil --current is always consonant +-- if c ~= stop and nukta[n.char] then c = n n = n.next end +-- if c ~= stop and halant[n.char] then c = n n = n.next end +-- while c ~= stop and dependent_vowel[n.char] do c = n n = n.next end +-- if c ~= stop and vowel_modifier[n.char] then c = n n = n.next end +-- if c ~= stop and stress_tone_mark[n.char] then c = n n = n.next end +-- local bp, cn = firstcons.prev, current.next +-- while cn ~= c.next do -- move pre-base matras... +-- if pre_mark[cn.char] then +-- if bp then bp.next = cn end +-- cn.prev.next = cn.next +-- if cn.next then cn.next.prev = cn.prev end +-- if cn == stop then stop = cn.prev end +-- cn.prev = bp +-- cn.next = firstcons +-- firstcons.prev = cn +-- if firstcons == start then +-- if head == start then head = cn end +-- start = cn +-- end +-- break +-- end +-- cn = cn.next +-- end +-- allreordered = c == stop +-- current = c.next +-- end +-- +-- if reph or vattu then +-- local current, cns = start, nil +-- while current ~= stop do +-- local c, n = current, current.next +-- if ra[current.char] and halant[n.char] then +-- c, n = n, n.next +-- local b, bn = base, base +-- while bn ~= stop do +-- if dependent_vowel[bn.next.char] then b = bn.next end +-- bn = bn.next +-- end +-- if has_attribute(current,state,attribute) == 5 then -- position Reph (Ra + H) after post-base 'matra' (if any) since these become marks on the 'matra', not on the base glyph +-- if b ~= current then +-- if current == start then +-- if head == start then head = n end +-- start = n +-- end +-- if b == stop then stop = c end +-- if current.prev then current.prev.next = n end +-- if n then n.prev = current.prev end +-- c.next = b.next +-- if b.next then b.next.prev = c end +-- b.next = current +-- current.prev = b +-- end +-- elseif cns and cns.next ~= current then -- position below-base Ra (vattu) following the consonants on which it is placed (either the base consonant or one of the pre-base consonants) +-- local cp, cnsn = current.prev, cns.next +-- if cp then cp.next = n end +-- if n then n.prev = cp end +-- cns.next = current +-- current.prev = cns +-- c.next = cnsn +-- if cnsn then cnsn.prev = c end +-- if c == stop then stop = cp break end +-- current = n.prev +-- end +-- elseif consonant[current.char] or nbsp[current.char] then +-- cns = current +-- if halant[cns.next.char] then cns = cns.next end +-- end +-- current = current.next +-- end +-- end +-- +-- if nbsp[base.char] then +-- head = node.remove(head, base) +-- node.free(base) +-- end +-- +-- return head, stop +-- end +-- +-- function dev2_reorder_matras(start,kind,lookupname,replacement) +-- local current = start +-- while current and current.id == glyph and current.subtype<256 and current.font == start.font and has_attribute(current, sylnr) == has_attribute(start, sylnr) do +-- if halant[current.char] and not has_attribute(current, state) then +-- if current.next and current.next.id == glyph and current.next.subtype<256 and current.next.font == start.font and has_attribute(current.next, sylnr) == has_attribute(start, sylnr) and ( zwj[current.next.char] or zwnj[current.next.char] ) then current = current.next end +-- local sn = start.next +-- start.next.prev = start.prev +-- if start.prev then start.prev.next = start.next end +-- if current.next then current.next.prev = start end +-- start.next = current.next +-- current.next = start +-- start.prev = current +-- start = sn +-- break +-- end +-- current = current.next +-- end +-- return start, true +-- end +-- +-- function dev2_reorder_reph(start,kind,lookupname,replacement) +-- local current, sn = start.next, nil +-- while current and current.id == glyph and current.subtype<256 and current.font == start.font and has_attribute(current, sylnr) == has_attribute(start, sylnr) do --step 2 +-- if halant[current.char] and not has_attribute(current, state) then +-- if current.next and current.next.id == glyph and current.next.subtype<256 and current.next.font == start.font and has_attribute(current.next, sylnr) == has_attribute(start, sylnr) and ( zwj[current.next.char] or zwnj[current.next.char] ) then current = current.next end +-- sn = start.next +-- start.next.prev = start.prev +-- if start.prev then start.prev.next = start.next end +-- if current.next then current.next.prev = start end +-- start.next = current.next +-- current.next = start +-- start.prev = current +-- start = sn +-- break +-- end +-- current = current.next +-- end +-- if not sn then +-- current = start.next +-- while current and current.id == glyph and current.subtype<256 and current.font == start.font and has_attribute(current, sylnr) == has_attribute(start, sylnr) do --step 4 +-- if has_attribute(current, state) == 9 then --post-base +-- sn = start.next +-- start.next.prev = start.prev +-- if start.prev then start.prev.next = start.next end +-- start.prev = current.prev +-- current.prev.next = start +-- start.next = current +-- current.prev = start +-- start = sn +-- break +-- end +-- current = current.next +-- end +-- end +-- if not sn then +-- current = start.next +-- local c = nil +-- while current and current.id == glyph and current.subtype<256 and current.font == start.font and has_attribute(current, sylnr) == has_attribute(start, sylnr) do --step 5 +-- if not c and ( above_mark[current.char] or below_mark[current.char] or post_mark[current.char] ) and ReorderClass[current.char] ~= "after subscript" then c = current end +-- current = current.next +-- end +-- if c then +-- sn = start.next +-- start.next.prev = start.prev +-- if start.prev then start.prev.next = start.next end +-- start.prev = c.prev +-- c.prev.next = start +-- start.next = c +-- c.prev = start +-- start = sn +-- end +-- end +-- if not sn then +-- current = start +-- while current.next and current.next.id == glyph and current.next.subtype<256 and current.next.font == start.font and has_attribute(current.next, sylnr) == has_attribute(start, sylnr) do --step 6 +-- current = current.next +-- end +-- if start ~= current then +-- sn = start.next +-- start.next.prev = start.prev +-- if start.prev then start.prev.next = start.next end +-- if current.next then current.next.prev = start end +-- start.next = current.next +-- current.next = start +-- start.prev = current +-- start = sn +-- end +-- end +-- return start, true +-- end +-- +-- function dev2_reorder_pre_base_reordering_consonants(start,kind,lookupname,replacement) +-- local current, sn = start, nil +-- while current and current.id == glyph and current.subtype<256 and current.font == start.font and has_attribute(current, sylnr) == has_attribute(start, sylnr) do +-- if halant[current.char] and not has_attribute(current, state) then +-- if current.next and current.next.id == glyph and current.next.subtype<256 and current.next.font == start.font and has_attribute(current.next, sylnr) == has_attribute(start, sylnr) and ( zwj[current.next.char] or zwnj[current.next.char] ) then current = current.next end +-- sn = start.next +-- start.next.prev = start.prev +-- if start.prev then start.prev.next = start.next end +-- if current.next then current.next.prev = start end +-- start.next = current.next +-- current.next = start +-- start.prev = current +-- start = sn +-- break +-- end +-- current = current.next +-- end +-- if not sn then +-- current = start.next +-- while current and current.id == glyph and current.subtype<256 and current.font == start.font and has_attribute(current, sylnr) == has_attribute(start, sylnr) do +-- if not consonant[current.char] and has_attribute(current, state) then --main +-- sn = start.next +-- start.next.prev = start.prev +-- if start.prev then start.prev.next = start.next end +-- start.prev = current.prev +-- current.prev.next = start +-- start.next = current +-- current.prev = start +-- start = sn +-- break +-- end +-- current = current.next +-- end +-- end +-- return start, true +-- end +-- +-- function remove_joiners(start,kind,lookupname,replacement) +-- local stop = start.next +-- while stop and stop.id == glyph and stop.subtype<256 and stop.font == start.font and (zwj[stop.char] or zwnj[stop.char]) do stop = stop.next end +-- if stop then stop.prev.next = nil stop.prev = start.prev end +-- if start.prev then start.prev.next = stop end +-- node.flush_list(start) +-- return stop, true +-- end +-- +-- local function dev2_reorder(head,start,stop,font,attr) +-- local tfmdata = fontdata[font] +-- local lookuphash = tfmdata.resources.lookuphash +-- local sequences = tfmdata.resources.sequences +-- +-- if not lookuphash["remove_joiners"] then install_dev(tfmdata) end --install Devanagari-features +-- +-- local sharedfeatures = tfmdata.shared.features +-- sharedfeatures["dev2_reorder_matras"] = true +-- sharedfeatures["dev2_reorder_reph"] = true +-- sharedfeatures["dev2_reorder_pre_base_reordering_consonants"] = true +-- sharedfeatures["remove_joiners"] = true +-- local datasets = otf.dataset(tfmdata,font,attr) +-- +-- local reph, pre_base_reordering_consonants = false, nil +-- local halfpos, basepos, subpos, postpos = nil, nil, nil, nil +-- local locl = { } +-- +-- for s=1,#sequences do -- classify chars +-- local sequence = sequences[s] +-- local dataset = datasets[s] +-- featurevalue = dataset and dataset[1] +-- if featurevalue and dataset[4] then +-- local subtables = sequence.subtables +-- for i=1,#subtables do +-- local lookupname = subtables[i] +-- local lookupcache = lookuphash[lookupname] +-- if lookupcache then +-- if dataset[4] == "rphf" then +-- if dataset[3] ~= 0 then --rphf is result of of chain +-- else +-- reph = lookupcache[0x0930] and lookupcache[0x0930][0x094D] and lookupcache[0x0930][0x094D]["ligature"] +-- end +-- end +-- if dataset[4] == "pref" and not pre_base_reordering_consonants then +-- for k, v in pairs(lookupcache[0x094D]) do +-- pre_base_reordering_consonants[k] = v and v["ligature"] --ToDo: reph might also be result of chain +-- end +-- end +-- local current = start +-- while current ~= stop.next do +-- if dataset[4] == "locl" then locl[current] = lookupcache[current.char] end --ToDo: locl might also be result of chain +-- if current ~= stop then +-- local c, n = locl[current] or current.char, locl[current.next] or current.next.char +-- if dataset[4] == "rphf" and lookupcache[c] and lookupcache[c][n] then --above-base: rphf Consonant + Halant +-- if current.next ~= stop and ( zwj[current.next.next.char] or zwnj[current.next.next.char] ) then --ZWJ and ZWNJ prevent creation of reph +-- current = current.next +-- elseif current == start then +-- set_attribute(current,state,5) +-- end +-- current = current.next +-- end +-- if dataset[4] == "half" and lookupcache[c] and lookupcache[c][n] then --half forms: half Consonant + Halant +-- if current.next ~= stop and zwnj[current.next.next.char] then --ZWNJ prevent creation of half +-- current = current.next +-- else +-- set_attribute(current,state,6) +-- if not halfpos then halfpos = current end +-- end +-- current = current.next +-- end +-- if dataset[4] == "pref" and lookupcache[c] and lookupcache[c][n] then --pre-base: pref Halant + Consonant +-- set_attribute(current,state,7) +-- set_attribute(current.next,state,7) +-- current = current.next +-- end +-- if dataset[4] == "blwf" and lookupcache[c] and lookupcache[c][n] then --below-base: blwf Halant + Consonant +-- set_attribute(current,state,8) +-- set_attribute(current.next,state,8) +-- current = current.next +-- subpos = current +-- end +-- if dataset[4] == "pstf" and lookupcache[c] and lookupcache[c][n] then --post-base: pstf Halant + Consonant +-- set_attribute(current,state,9) +-- set_attribute(current.next,state,9) +-- current = current.next +-- postpos = current +-- end +-- end +-- current = current.next +-- end +-- end +-- end +-- end +-- end +-- +-- lookuphash["dev2_reorder_matras"] = pre_mark +-- lookuphash["dev2_reorder_reph"] = { [reph] = true } +-- lookuphash["dev2_reorder_pre_base_reordering_consonants"] = pre_base_reordering_consonants or { } +-- lookuphash["remove_joiners"] = { [0x200C] = true, [0x200D] = true } +-- +-- local current, base, firstcons = start, nil, nil +-- if has_attribute(start,state) == 5 then current = start.next.next end -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph from candidates for base consonants +-- +-- if current ~= stop.next and nbsp[current.char] then --Stand Alone cluster +-- if current == stop then +-- stop = stop.prev +-- head = node.remove(head, current) +-- node.free(current) +-- return head, stop +-- else +-- base = current +-- current = current.next +-- if current ~= stop then +-- if nukta[current.char] then current = current.next end +-- if zwj[current.char] then +-- if current ~= stop and current.next ~= stop and halant[current.next.char] then +-- current = current.next +-- local tmp = current.next.next +-- local changestop = current.next == stop +-- current.next.next = nil +-- set_attribute(current,state,7) --pref +-- current = nodes.handlers.characters(current) +-- set_attribute(current,state,8) --blwf +-- current = nodes.handlers.characters(current) +-- set_attribute(current,state,9) --pstf +-- current = nodes.handlers.characters(current) +-- unset_attribute(current,state) +-- if halant[current.char] then +-- current.next.next = tmp +-- local nc = node.copy(current) +-- current.char = dotted_circle +-- head = node.insert_after(head, current, nc) +-- else +-- current.next = tmp -- (assumes that result of pref, blwf, or pstf consists of one node) +-- if changestop then stop = current end +-- end +-- end +-- end +-- end +-- end +-- else --not Stand Alone cluster +-- while current ~= stop.next do -- find base consonant +-- if consonant[current.char] and not ( current ~= stop and halant[current.next.char] and current.next ~= stop and zwj[current.next.next.char] ) then +-- if not firstcons then firstcons = current end +-- if not ( has_attribute(current, state) == 7 or has_attribute(current, state) == 8 or has_attribute(current, state) == 9 ) then base = current end --check whether consonant has below-base or post-base form or is pre-base reordering Ra +-- end +-- current = current.next +-- end +-- if not base then +-- base = firstcons +-- end +-- end +-- +-- if not base then +-- if has_attribute(start, state) == 5 then unset_attribute(start, state) end +-- return head, stop +-- else +-- if has_attribute(base, state) then unset_attribute(base, state) end +-- basepos = base +-- end +-- if not halfpos then halfpos = base end +-- if not subpos then subpos = base end +-- if not postpos then postpos = subpos or base end +-- +-- --Matra characters are classified and reordered by which consonant in a conjunct they have affinity for +-- local moved = { } +-- current = start +-- while current ~= stop.next do +-- local char, target, cn = locl[current] or current.char, nil, current.next +-- if not moved[current] and dependent_vowel[char] then +-- if pre_mark[char] then -- Before first half form in the syllable +-- moved[current] = true +-- if current.prev then current.prev.next = current.next end +-- if current.next then current.next.prev = current.prev end +-- if current == stop then stop = current.prev end +-- if halfpos == start then +-- if head == start then head = current end +-- start = current +-- end +-- if halfpos.prev then halfpos.prev.next = current end +-- current.prev = halfpos.prev +-- halfpos.prev = current +-- current.next = halfpos +-- halfpos = current +-- elseif above_mark[char] then -- After main consonant +-- target = basepos +-- if subpos == basepos then subpos = current end +-- if postpos == basepos then postpos = current end +-- basepos = current +-- elseif below_mark[char] then -- After subjoined consonants +-- target = subpos +-- if postpos == subpos then postpos = current end +-- subpos = current +-- elseif post_mark[char] then -- After post-form consonant +-- target = postpos +-- postpos = current +-- end +-- if ( above_mark[char] or below_mark[char] or post_mark[char] ) and current.prev ~= target then +-- if current.prev then current.prev.next = current.next end +-- if current.next then current.next.prev = current.prev end +-- if current == stop then stop = current.prev end +-- if target.next then target.next.prev = current end +-- current.next = target.next +-- target.next = current +-- current.prev = target +-- end +-- end +-- current = cn +-- end +-- +-- --Reorder marks to canonical order: Adjacent nukta and halant or nukta and vedic sign are always repositioned if necessary, so that the nukta is first. +-- local current, c = start, nil +-- while current ~= stop do +-- if halant[current.char] or stress_tone_mark[current.char] then +-- if not c then c = current end +-- else +-- c = nil +-- end +-- if c and nukta[current.next.char] then +-- if head == c then head = current.next end +-- if stop == current.next then stop = current end +-- if c.prev then c.prev.next = current.next end +-- current.next.prev = c.prev +-- current.next = current.next.next +-- if current.next.next then current.next.next.prev = current end +-- c.prev = current.next +-- current.next.next = c +-- end +-- if stop == current then break end +-- current = current.next +-- end +-- +-- if nbsp[base.char] then +-- head = node.remove(head, base) +-- node.free(base) +-- end +-- +-- return head, stop +-- end +-- +-- function fonts.analyzers.methods.deva(head,font,attr) +-- local orighead = head +-- local current, start, done = head, true, false +-- while current do +-- if current.id == glyph and current.subtype<256 and current.font == font then +-- done = true +-- local syllablestart, syllableend = current, nil +-- +-- local c = current --Checking Stand Alone cluster (this behavior is copied from dev2) +-- if ra[c.char] and c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and halant[c.next.char] and c.next.next and c.next.next.id == glyph and c.next.next.subtype<256 and c.next.next.font == font then c = c.next.next end +-- if nbsp[c.char] and ( not current.prev or current.prev.id ~= glyph or current.prev.subtype>=256 or current.prev.font ~= font or +-- ( not consonant[current.prev.char] and not independent_vowel[current.prev.char] and not dependent_vowel[current.prev.char] and +-- not vowel_modifier[current.prev.char] and not stress_tone_mark[current.prev.char] and not nukta[current.prev.char] and not halant[current.prev.char] ) +-- ) then --Stand Alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)] +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and nukta[c.next.char] then c = c.next end +-- local n = c.next +-- if n and n.id == glyph and n.subtype<256 and n.font == font then +-- local ni = n.next +-- if ( zwj[n.char] or zwnj[n.char] ) and ni and ni.id == glyph and ni.subtype<256 and ni.font == font then n = ni ni = ni.next end +-- if halant[n.char] and ni and ni.id == glyph and ni.subtype<256 and ni.font == font and consonant[ni.char] then c = ni end +-- end +-- while c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and dependent_vowel[c.next.char] do c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and nukta[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and halant[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and vowel_modifier[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and stress_tone_mark[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and stress_tone_mark[c.next.char] then c = c.next end +-- current = c.next +-- syllableend = c +-- if syllablestart ~= syllableend then +-- head, current = deva_reorder(head, syllablestart,syllableend,font,attr) +-- current = current.next +-- end +-- elseif consonant[current.char] then -- syllable containing consonant +-- prevc = true +-- while prevc do +-- prevc = false +-- local n = current.next +-- if n and n.id == glyph and n.subtype<256 and n.font == font and nukta[n.char] then n = n.next end +-- if n and n.id == glyph and n.subtype<256 and n.font == font and halant[n.char] then +-- local n = n.next +-- if n and n.id == glyph and n.subtype<256 and n.font == font and ( zwj[n.char] or zwnj[n.char] ) then n = n.next end +-- if n and n.id == glyph and n.subtype<256 and n.font == font and consonant[n.char] then +-- prevc = true +-- current = n +-- end +-- end +-- end +-- if current.next and current.next.id == glyph and current.next.subtype<256 and current.next.font == font and nukta[current.next.char] then current = current.next end -- nukta (not specified in Microsft Devanagari OpenType specification) +-- syllableend = current +-- current = current.next +-- if current and current.id == glyph and current.subtype<256 and current.font == font and halant[current.char] then -- syllable containing consonant without vowels: {C + [Nukta] + H} + C + H +-- if current.next and current.next.id == glyph and current.next.subtype<256 and current.next.font == font and ( zwj[current.next.char] or zwnj[current.next.char] ) then current = current.next end +-- syllableend = current +-- current = current.next +-- else -- syllable containing consonant with vowels: {C + [Nukta] + H} + C + [M] + [VM] + [SM] +-- if current and current.id == glyph and current.subtype<256 and current.font == font and dependent_vowel[current.char] then +-- syllableend = current +-- current = current.next +-- end +-- if current and current.id == glyph and current.subtype<256 and current.font == font and vowel_modifier[current.char] then +-- syllableend = current +-- current = current.next +-- end +-- if current and current.id == glyph and current.subtype<256 and current.font == font and stress_tone_mark[current.char] then +-- syllableend = current +-- current = current.next +-- end +-- end +-- if syllablestart ~= syllableend then +-- head, current = deva_reorder(head,syllablestart,syllableend,font,attr) +-- current = current.next +-- end +-- elseif current.id == glyph and current.subtype<256 and current.font == font and independent_vowel[current.char] then -- syllable without consonants: VO + [VM] + [SM] +-- syllableend = current +-- current = current.next +-- if current and current.id == glyph and current.subtype<256 and current.font == font and vowel_modifier[current.char] then +-- syllableend = current +-- current = current.next +-- end +-- if current and current.id == glyph and current.subtype<256 and current.font == font and stress_tone_mark[current.char] then +-- syllableend = current +-- current = current.next +-- end +-- else -- Syntax error +-- if pre_mark[current.char] or above_mark[current.char] or below_mark[current.char] or post_mark[current.char] then +-- local n = node.copy(current) +-- if pre_mark[current.char] then +-- n.char = dotted_circle +-- else +-- current.char = dotted_circle +-- end +-- head, current = node.insert_after(head, current, n) +-- end +-- current = current.next +-- end +-- else +-- current = current.next +-- end +-- start = false +-- end +-- +-- return head, done +-- end +-- +-- function fonts.analyzers.methods.dev2(head,font,attr) +-- local current, start, done, syl_nr = head, true, false, 0 +-- while current do +-- local syllablestart, syllableend = nil, nil +-- if current.id == glyph and current.subtype<256 and current.font == font then +-- syllablestart = current +-- done = true +-- local c, n = current, current.next +-- if ra[current.char] and n and n.id == glyph and n.subtype<256 and n.font == font and halant[n.char] and n.next and n.next.id == glyph and n.next.subtype<256 and n.next.font == font then c = n.next end +-- if independent_vowel[c.char] then --Vowel-based syllable: [Ra+H]+V+[N]+[<[<ZWJ|ZWNJ>]+H+C|ZWJ+C>]+[{M}+[N]+[H]]+[SM]+[(VD)] +-- n = c.next +-- local ni, nii = nil, nil +-- if n and n.id == glyph and n.subtype<256 and n.font == font and nukta[n.char] then n = n.next end +-- if n and n.id == glyph and n.subtype<256 and n.font == font then local ni = n.next end +-- if ni and ni.id == glyph and ni.subtype<256 and ni.font == font and ni.next and ni.next.id == glyph and ni.next.subtype<256 and ni.next.font == font then +-- nii = ni.next +-- if zwj[ni.char] and consonant[nii.char] then +-- c = nii +-- elseif (zwj[ni.char] or zwnj[ni.char]) and halant[nii.char] and nii.next and nii.next.id == glyph and nii.next.subtype<256 and nii.next.font == font and consonant[nii.next.char] then +-- c = nii.next +-- end +-- end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and dependent_vowel[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and nukta[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and halant[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and vowel_modifier[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and stress_tone_mark[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and stress_tone_mark[c.next.char] then c = c.next end +-- current = c +-- syllableend = c +-- elseif nbsp[c.char] and ( not current.prev or current.prev.id ~= glyph or current.prev.subtype>=256 or current.prev.font ~= font or +-- ( not consonant[current.prev.char] and not independent_vowel[current.prev.char] and not dependent_vowel[current.prev.char] and +-- not vowel_modifier[current.prev.char] and not stress_tone_mark[current.prev.char] and not nukta[current.prev.char] and not halant[current.prev.char] ) +-- ) then --Stand Alone cluster (at the start of the word only): #[Ra+H]+NBSP+[N]+[<[<ZWJ|ZWNJ>]+H+C>]+[{M}+[N]+[H]]+[SM]+[(VD)] +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and nukta[c.next.char] then c = c.next end +-- n = c.next +-- if n and n.id == glyph and n.subtype<256 and n.font == font then +-- local ni = n.next +-- if ( zwj[n.char] or zwnj[n.char] ) and ni and ni.id == glyph and ni.subtype<256 and ni.font == font then n = ni ni = ni.next end +-- if halant[n.char] and ni and ni.id == glyph and ni.subtype<256 and ni.font == font and consonant[ni.char] then c = ni end +-- end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and dependent_vowel[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and nukta[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and halant[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and vowel_modifier[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and stress_tone_mark[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and stress_tone_mark[c.next.char] then c = c.next end +-- current = c +-- syllableend = c +-- elseif consonant[current.char] then --Consonant syllable: {C+[N]+<H+[<ZWNJ|ZWJ>]|<ZWNJ|ZWJ>+H>} + C+[N]+[A] + [< H+[<ZWNJ|ZWJ>] | {M}+[N]+[H]>]+[SM]+[(VD)] +-- c = current +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and nukta[c.next.char] then c = c.next end +-- n = c +-- while n.next and n.next.id == glyph and n.next.subtype<256 and n.next.font == font and ( halant[n.next.char] or zwnj[n.next.char] or zwj[n.next.char] ) do +-- if halant[n.next.char] then +-- n = n.next +-- if n.next and n.next.id == glyph and n.next.subtype<256 and n.next.font == font and ( zwnj[n.next.char] or zwj[n.next.char] ) then n = n.next end +-- else +-- if n.next.next and n.next.next.id == glyph and n.next.next.subtype<256 and n.next.next.font == font and halant[n.next.next.char] then n = n.next.next end +-- end +-- if n.next and n.next.id == glyph and n.next.subtype<256 and n.next.font == font and consonant[n.next.char] then +-- n = n.next +-- if n.next and n.next.id == glyph and n.next.subtype<256 and n.next.font == font and nukta[n.next.char] then n = n.next end +-- c = n +-- else +-- break +-- end +-- end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and anudatta[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and halant[c.next.char] then +-- c = c.next +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and ( zwnj[c.next.char] or zwj[c.next.char] ) then c = c.next end +-- else +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and dependent_vowel[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and nukta[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and halant[c.next.char] then c = c.next end +-- end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and vowel_modifier[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and stress_tone_mark[c.next.char] then c = c.next end +-- if c.next and c.next.id == glyph and c.next.subtype<256 and c.next.font == font and stress_tone_mark[c.next.char] then c = c.next end +-- current = c +-- syllableend = c +-- end +-- end +-- +-- if syllableend then +-- syl_nr = syl_nr + 1 +-- c = syllablestart +-- while c ~= syllableend.next do +-- set_attribute(c,sylnr,syl_nr) +-- c = c.next +-- end +-- end +-- if syllableend and syllablestart ~= syllableend then +-- head, current = dev2_reorder(head,syllablestart,syllableend,font,attr) +-- end +-- +-- if not syllableend and not has_attribute(current, state) and current.id == glyph and current.subtype<256 and current.font == font then -- Syntax error +-- if pre_mark[current.char] or above_mark[current.char] or below_mark[current.char] or post_mark[current.char] then +-- local n = node.copy(current) +-- if pre_mark[current.char] then +-- n.char = dotted_circle +-- else +-- current.char = dotted_circle +-- end +-- head, current = node.insert_after(head, current, n) +-- end +-- end +-- +-- start = false +-- current = current.next +-- end +-- +-- return head, done +-- end +-- +-- function otf.handlers.dev2_reorder_matras(start,kind,lookupname,replacement) +-- return dev2_reorder_matras(start,kind,lookupname,replacement) +-- end +-- +-- function otf.handlers.dev2_reorder_reph(start,kind,lookupname,replacement) +-- return dev2_reorder_reph(start,kind,lookupname,replacement) +-- end +-- +-- function otf.handlers.dev2_reorder_pre_base_reordering_consonants(start,kind,lookupname,replacement) +-- return dev2_reorder_pre_base_reordering_consonants(start,kind,lookupname,replacement) +-- end +-- +-- function otf.handlers.remove_joiners(start,kind,lookupname,replacement) +-- return remove_joiners(start,kind,lookupname,replacement) +-- end diff --git a/tex/context/base/font-otn.lua b/tex/context/base/font-otn.lua index 2c6016427..59d0cac9a 100644 --- a/tex/context/base/font-otn.lua +++ b/tex/context/base/font-otn.lua @@ -3,9 +3,11 @@ if not modules then modules = { } end modules ['font-otn'] = { comment = "companion to font-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" + license = "see context related readme files", } +-- preprocessors = { "nodes" } + -- this is still somewhat preliminary and it will get better in due time; -- much functionality could only be implemented thanks to the husayni font -- of Idris Samawi Hamid to who we dedicate this module. @@ -404,80 +406,6 @@ local function getcomponentindex(start) end end --- local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head --- if start == stop and start.char == char then --- start.char = char --- return start --- elseif discfound then --- local prev = start.prev --- local next = stop.next --- start.prev = nil --- stop.next = nil --- local base = copy_glyph(start) --- base.char = char --- base.subtype = ligature_code --- base.components = start -- start can have components --- if prev then --- prev.next = base --- end --- if next then --- next.prev = base --- end --- base.next = next --- base.prev = prev --- return base --- else --- -- start is the ligature --- local deletemarks = markflag ~= "mark" --- local prev = start.prev --- local next = stop.next --- local base = copy_glyph(start) --- local current, start = insert_node_after(start,start,base) --- -- [start->current][copyofstart->start]...[stop] --- current.next = next --- if next then --- next.prev = current --- end --- start.prev = nil --- stop.next = nil --- current.char = char --- current.subtype = ligature_code --- current.components = start --- local head = current --- -- this is messy ... we should get rid of the components eventually --- local baseindex = 0 --- local componentindex = 0 --- while start do --- local char = start.char --- if not marks[char] then --- baseindex = baseindex + componentindex --- componentindex = getcomponentindex(start) --- elseif not deletemarks then -- quite fishy --- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) --- if trace_marks then --- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) --- end --- head, current = insert_node_after(head,current,copy_glyph(start)) -- unlikely that mark has components --- end --- start = start.next --- end --- start = current.next --- while start and start.id == glyph_code do -- hm, is id test needed ? --- local char = start.char --- if marks[char] then --- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) --- if trace_marks then --- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) --- end --- else --- break --- end --- start = start.next --- end --- return head --- end --- end - local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head if start == stop and start.char == char then start.char = char @@ -848,9 +776,6 @@ function handlers.gpos_mark2mark(start,kind,lookupname,markanchors,sequence) local markchar = start.char if marks[markchar] then local base = start.prev -- [glyph] [basemark] [start=mark] - -- while base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp) do - -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature - -- end local slc = has_attribute(start,ligacomp) if slc then -- a rather messy loop ... needs checking with husayni while base do @@ -1505,9 +1430,6 @@ function chainprocs.gpos_mark2mark(start,stop,kind,chainname,currentcontext,look end if markanchors then local base = start.prev -- [glyph] [basemark] [start=mark] - -- while (base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp)) do - -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature - -- end local slc = has_attribute(start,ligacomp) if slc then -- a rather messy loop ... needs checking with husayni while base do diff --git a/tex/context/base/luat-env.lua b/tex/context/base/luat-env.lua index a00acdc63..e483169fd 100644 --- a/tex/context/base/luat-env.lua +++ b/tex/context/base/luat-env.lua @@ -21,11 +21,12 @@ local allocate, mark = utilities.storage.allocate, utilities.storage.mark local format, sub, match, gsub, find = string.format, string.sub, string.match, string.gsub, string.find local unquoted, quoted = string.unquoted, string.quoted local concat, insert, remove = table.concat, table.insert, table.remove -local loadedluacode = utilities.lua.loadedluacode -local luasuffixes = utilities.lua.suffixes -environment = environment or { } -local environment = environment +local luautilities = utilities.lua +local luasuffixes = luautilities.suffixes + +environment = environment or { } +local environment = environment -- precautions @@ -314,7 +315,7 @@ function environment.luafilechunk(filename,silent) -- used for loading lua bytec filename = file.replacesuffix(filename, "lua") local fullname = environment.luafile(filename) if fullname and fullname ~= "" then - local data = loadedluacode(fullname,strippable,filename) + local data = luautilities.loadedluacode(fullname,strippable,filename) -- can be overloaded if trace_locating then report_lua("loading file %s%s", fullname, not data and " failed" or "") elseif not silent then diff --git a/tex/context/base/luat-lib.mkiv b/tex/context/base/luat-lib.mkiv index 283ed8998..521ecbf5e 100644 --- a/tex/context/base/luat-lib.mkiv +++ b/tex/context/base/luat-lib.mkiv @@ -71,6 +71,7 @@ \registerctxluafile{luat-bwc}{1.001} \registerctxluafile{trac-lmx}{1.001} % might become l-lmx or luat-lmx \registerctxluafile{luat-mac}{1.001} +%registerctxluafile{luat-prp}{1.001} % for the moment of not much use \registerctxluafile{lxml-tab}{1.001} \registerctxluafile{lxml-lpt}{1.001} diff --git a/tex/context/base/math-tag.lua b/tex/context/base/math-tag.lua index 0ac5b0897..c504b610a 100644 --- a/tex/context/base/math-tag.lua +++ b/tex/context/base/math-tag.lua @@ -127,7 +127,7 @@ process = function(start) -- we cannot use the processor as we have no finalizer -- check for code local a = get_attribute(start,a_mathcategory) if a then - set_attribute(start,a_tagged,start_tagged("ms"),{ detail = a }) + set_attribute(start,a_tagged,start_tagged("ms",{ detail = a })) else set_attribute(start,a_tagged,start_tagged("ms")) end diff --git a/tex/context/base/mlib-ctx.lua b/tex/context/base/mlib-ctx.lua index 5a3becd7a..5d976d161 100644 --- a/tex/context/base/mlib-ctx.lua +++ b/tex/context/base/mlib-ctx.lua @@ -103,11 +103,15 @@ end statistics.register("metapost processing time", function() local n = metapost.n if n and n > 0 then - local e, t = metapost.makempy.nofconverted, statistics.elapsedtime - local str = format("%s seconds, loading: %s seconds, execution: %s seconds, n: %s", - t(metapost), t(mplib), t(metapost.exectime), n) - if e > 0 then - return format("%s, external: %s seconds (%s calls)", str, t(metapost.makempy), e) + local nofconverted = metapost.makempy.nofconverted + local elapsedtime = statistics.elapsedtime + local elapsed = statistics.elapsed + local str = format("%s seconds, loading: %s, execution: %s, n: %s, average: %s", + elapsedtime(metapost), elapsedtime(mplib), elapsedtime(metapost.exectime), n, + elapsedtime((elapsed(metapost) + elapsed(mplib) + elapsed(metapost.exectime)) / n)) + if nofconverted > 0 then + return format("%s, external: %s (%s calls)", + str, elapsedtime(metapost.makempy), nofconverted) else return str end diff --git a/tex/context/base/mlib-pps.lua b/tex/context/base/mlib-pps.lua index 43a548c65..8e69066d0 100644 --- a/tex/context/base/mlib-pps.lua +++ b/tex/context/base/mlib-pps.lua @@ -479,6 +479,8 @@ local function sxsy(wd,ht,dp) -- helper for text return (wd ~= 0 and factor/wd) or 0, (hd ~= 0 and factor/hd) or 0 end +local no_first_run = "mfun_first_run := false ;" +local do_first_run = "mfun_first_run := true ;" local no_trial_run = "mfun_trial_run := false ;" local do_trial_run = "mfun_trial_run := true ;" local do_begin_fig = "; beginfig(1) ; " @@ -588,6 +590,7 @@ function metapost.graphic_base_pass(specification) extensions, inclusions, wrappit and do_begin_fig or "", + do_first_run, do_trial_run, current_initializations, do_safeguard, @@ -611,6 +614,7 @@ function metapost.graphic_base_pass(specification) metapost.process(mpx, { preamble, wrappit and do_begin_fig or "", + do_first_run, no_trial_run, current_initializations, do_safeguard, diff --git a/tex/context/base/mlib-run.lua b/tex/context/base/mlib-run.lua index 6a23fe316..16ab55c3b 100644 --- a/tex/context/base/mlib-run.lua +++ b/tex/context/base/mlib-run.lua @@ -161,8 +161,11 @@ function metapost.reporterror(result) if t and t ~= "" then (metapost.texerrors and texerrormessage or report_metapost)("terminal: %s",t) end + if e == "" or e == "no-error" then + e = nil + end if e then - (metapost.texerrors and texerrormessage or report_metapost)("error: %s",(e=="" and "?") or e) + (metapost.texerrors and texerrormessage or report_metapost)("error: %s",e) end if not t and not e and l then metapost.lastlog = metapost.lastlog .. "\n" .. l @@ -458,7 +461,7 @@ function metapost.process(mpx, data, trialrun, flusher, multipass, isextrapass, end if not metapost.reporterror(result) then if metapost.showlog then - local str = (result.term ~= "" and result.term) or "no terminal output" + local str = result.term ~= "" and result.term or "no terminal output" if not emptystring(str) then metapost.lastlog = metapost.lastlog .. "\n" .. str report_metapost("log: %s",str) diff --git a/tex/context/base/node-fin.lua b/tex/context/base/node-fin.lua index 506fc724f..9bcfa0a7b 100644 --- a/tex/context/base/node-fin.lua +++ b/tex/context/base/node-fin.lua @@ -3,7 +3,7 @@ if not modules then modules = { } end modules ['node-fin'] = { comment = "companion to node-fin.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" + license = "see context related readme files", } -- this module is being reconstructed diff --git a/tex/context/base/node-fnt.lua b/tex/context/base/node-fnt.lua index 49e1029e7..a97c98d83 100644 --- a/tex/context/base/node-fnt.lua +++ b/tex/context/base/node-fnt.lua @@ -3,7 +3,7 @@ if not modules then modules = { } end modules ['node-fnt'] = { comment = "companion to font-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" + license = "see context related readme files", } if not context then os.exit() end -- generic function in node-dum @@ -219,7 +219,7 @@ function handlers.characters(head) return head, true end --- function handlers.xcharacters(head) +-- function handlers.characters(head) -- -- either next or not, but definitely no already processed list -- starttiming(nodes) -- local usedfonts, attrfonts, done = { }, { }, false diff --git a/tex/context/base/node-ini.lua b/tex/context/base/node-ini.lua index a294643b2..7f33a0149 100644 --- a/tex/context/base/node-ini.lua +++ b/tex/context/base/node-ini.lua @@ -174,7 +174,7 @@ whatcodes = allocate(swapped(whatcodes, whatcodes )) listcodes = allocate(swapped(listcodes, listcodes )) glyphcodes = allocate(swapped(glyphcodes, glyphcodes)) kerncodes = allocate(swapped(kerncodes, kerncodes )) -penaltycodes = allocate(swapped(penaltycodes, penaltycodes )) +penaltycodes = allocate(swapped(penaltycodes, penaltycodes)) mathcodes = allocate(swapped(mathcodes, mathcodes )) fillcodes = allocate(swapped(fillcodes, fillcodes )) diff --git a/tex/context/base/node-ini.mkiv b/tex/context/base/node-ini.mkiv index 79e02ff46..4eeafe442 100644 --- a/tex/context/base/node-ini.mkiv +++ b/tex/context/base/node-ini.mkiv @@ -31,6 +31,7 @@ \registerctxluafile{node-ext}{1.001} %registerctxluafile{node-inj}{1.001} % we might split it off \registerctxluafile{node-acc}{1.001} % experimental +%registerctxluafile{node-prp}{1.001} % makes no sense (yet) \newcount\c_node_tracers_show_box % box number diff --git a/tex/context/base/node-inj.lua b/tex/context/base/node-inj.lua index 33eaa6eb1..b03ded9f2 100644 --- a/tex/context/base/node-inj.lua +++ b/tex/context/base/node-inj.lua @@ -3,7 +3,7 @@ if not modules then modules = { } end modules ['node-inj'] = { comment = "companion to node-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" + license = "see context related readme files", } -- This is very experimental (this will change when we have luatex > .50 and diff --git a/tex/context/base/spac-chr.lua b/tex/context/base/spac-chr.lua index 00c3d6766..2db0b9a7b 100644 --- a/tex/context/base/spac-chr.lua +++ b/tex/context/base/spac-chr.lua @@ -78,7 +78,7 @@ local function inject_char_space(unicode,head,current,parent) local glue = new_glue(char and char.width or fontparameters[font].space) -- glue.attr = copy_node_list(current.attr) glue.attr = current.attr -current.attr = nil + current.attr = nil set_attribute(glue,a_character,unicode) head, current = insert_node_after(head,current,glue) return head, current diff --git a/tex/context/base/spac-hor.mkiv b/tex/context/base/spac-hor.mkiv index d40bcf3ee..42661010b 100644 --- a/tex/context/base/spac-hor.mkiv +++ b/tex/context/base/spac-hor.mkiv @@ -215,7 +215,7 @@ \let\dorechecknextindentation\relax % public (in macros) -\def\spac_indentation_check_next_indentation +\unexpanded\def\spac_indentation_check_next_indentation {\global\let\dorechecknextindentation\relax \doifnextcharelse\par\donothing\spac_indentation_variant_no} % messy check as next is seldom \par @@ -969,7 +969,7 @@ {\futurelet\nexttoken\spac_spaces_auto_insert_next} \def\spac_spaces_auto_insert_next - {\ctxcommand{autonextspace("\meaning\nexttoken")}} % todo, just consult nexttoken at the lua end + {\ctxcommand{autonextspace(\!!bs\meaning\nexttoken\!!es)}} % todo, just consult nexttoken at the lua end %D Moved from bib module: diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf Binary files differindex 05e831c58..49660f7d2 100644 --- a/tex/context/base/status-files.pdf +++ b/tex/context/base/status-files.pdf diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf Binary files differindex e045bc941..90da15dfd 100644 --- a/tex/context/base/status-lua.pdf +++ b/tex/context/base/status-lua.pdf diff --git a/tex/context/base/trac-inf.lua b/tex/context/base/trac-inf.lua index 5575639eb..826271169 100644 --- a/tex/context/base/trac-inf.lua +++ b/tex/context/base/trac-inf.lua @@ -11,6 +11,7 @@ if not modules then modules = { } end modules ['trac-inf'] = { -- get warnings about assignments. This is more efficient than using rawset -- and rawget. +local type, tonumber = type, tonumber local format, lower = string.format, string.lower local concat = table.concat local clock = os.gettimeofday or os.clock -- should go in environment @@ -72,19 +73,26 @@ local function stoptiming(instance, report) return 0 end +local function elapsed(instance) + if type(instance) == "table" then + local timer = timers[instance or "notimer"] + return timer and timer.loadtime or 0 + else + return tonumber(instance) or 0 + end +end + local function elapsedtime(instance) - local timer = timers[instance or "notimer"] - return format("%0.3f",timer and timer.loadtime or 0) + return format("%0.3f",elapsed(instance)) end local function elapsedindeed(instance) - local timer = timers[instance or "notimer"] - return (timer and timer.loadtime or 0) > statistics.threshold + return elapsed(instance) > statistics.threshold end local function elapsedseconds(instance,rest) -- returns nil if 0 seconds if elapsedindeed(instance) then - return format("%s seconds %s", elapsedtime(instance),rest or "") + return format("%0.3f seconds %s", elapsed(instance),rest or "") end end @@ -92,6 +100,7 @@ statistics.hastiming = hastiming statistics.resettiming = resettiming statistics.starttiming = starttiming statistics.stoptiming = stoptiming +statistics.elapsed = elapsed statistics.elapsedtime = elapsedtime statistics.elapsedindeed = elapsedindeed statistics.elapsedseconds = elapsedseconds diff --git a/tex/context/base/typo-dir.lua b/tex/context/base/typo-dir.lua index ee9407074..da324b7e2 100644 --- a/tex/context/base/typo-dir.lua +++ b/tex/context/base/typo-dir.lua @@ -180,7 +180,6 @@ function directions.process(namespace,attribute,start) -- todo: make faster local lro, rlo, prevattr, inmath = false, false, 0, false while current do local id = current.id ---~ print(id,attribute,has_attribute(current,attribute)) if skipmath and id == math_code then local subtype = current.subtype if subtype == beginmath_code then diff --git a/tex/context/base/util-lua.lua b/tex/context/base/util-lua.lua index b496880b2..36daaff55 100644 --- a/tex/context/base/util-lua.lua +++ b/tex/context/base/util-lua.lua @@ -41,6 +41,8 @@ local function fatalerror(name) utilities.report(format("fatal error in %q",name or "unknown")) end +-- environment.loadpreprocessedfile can be set to a preprocessor + if jit or status.luatex_version >= 74 then local function register(name) @@ -76,7 +78,7 @@ if jit or status.luatex_version >= 74 then function luautilities.loadedluacode(fullname,forcestrip,name) -- quite subtle ... doing this wrong incidentally can give more bytes name = name or fullname - local code = loadfile(fullname) + local code = environment.loadpreprocessedfile and environment.loadpreprocessedfile(fullname) or loadfile(fullname) if code then code() end @@ -229,8 +231,7 @@ else function luautilities.loadedluacode(fullname,forcestrip,name) -- quite subtle ... doing this wrong incidentally can give more bytes - name = name or fullname - local code = loadfile(fullname) + local code = environment.loadpreprocessedfile and environment.preprocessedloadfile(fullname) or loadfile(fullname) if code then code() end diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 60e2a7942..5739422b5 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 01/13/13 23:10:29 +-- merge date : 01/17/13 18:16:10 do -- begin closure to overcome local limits and interference @@ -8480,7 +8480,7 @@ if not modules then modules = { } end modules ['node-inj'] = { comment = "companion to node-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" + license = "see context related readme files", } -- This is very experimental (this will change when we have luatex > .50 and @@ -8508,9 +8508,6 @@ local nodepool = nodes.pool local newkern = nodepool.kern local traverse_id = node.traverse_id -local unset_attribute = node.unset_attribute -local has_attribute = node.has_attribute -local set_attribute = node.set_attribute local insert_node_before = node.insert_before local insert_node_after = node.insert_after @@ -8547,8 +8544,8 @@ function injections.setcursive(start,nxt,factor,rlmode,exit,entry,tfmstart,tfmne local dx, dy = factor*(exit[1]-entry[1]), factor*(exit[2]-entry[2]) local ws, wn = tfmstart.width, tfmnext.width local bound = #cursives + 1 - set_attribute(start,cursbase,bound) - set_attribute(nxt,curscurs,bound) + start[cursbase] = bound + nxt[curscurs] = bound cursives[bound] = { rlmode, dx, dy, ws, wn } return dx, dy, bound end @@ -8557,14 +8554,14 @@ function injections.setpair(current,factor,rlmode,r2lflag,spec,tfmchr) local x, y, w, h = factor*spec[1], factor*spec[2], factor*spec[3], factor*spec[4] -- dy = y - h if x ~= 0 or w ~= 0 or y ~= 0 or h ~= 0 then - local bound = has_attribute(current,kernpair) + local bound = current[kernpair] if bound then local kb = kerns[bound] -- inefficient but singles have less, but weird anyway, needs checking kb[2], kb[3], kb[4], kb[5] = (kb[2] or 0) + x, (kb[3] or 0) + y, (kb[4] or 0)+ w, (kb[5] or 0) + h else bound = #kerns + 1 - set_attribute(current,kernpair,bound) + current[kernpair] = bound kerns[bound] = { rlmode, x, y, w, h, r2lflag, tfmchr.width } end return x, y, w, h, bound @@ -8576,7 +8573,7 @@ function injections.setkern(current,factor,rlmode,x,tfmchr) local dx = factor*x if dx ~= 0 then local bound = #kerns + 1 - set_attribute(current,kernpair,bound) + current[kernpair] = bound kerns[bound] = { rlmode, dx } return dx, bound else @@ -8586,7 +8583,7 @@ end function injections.setmark(start,base,factor,rlmode,ba,ma,index) -- ba=baseanchor, ma=markanchor local dx, dy = factor*(ba[1]-ma[1]), factor*(ba[2]-ma[2]) -- the index argument is no longer used but when this - local bound = has_attribute(base,markbase) -- fails again we should pass it + local bound = base[markbase] -- fails again we should pass it local index = 1 if bound then local mb = marks[bound] @@ -8594,8 +8591,8 @@ local index = 1 -- if not index then index = #mb + 1 end index = #mb + 1 mb[index] = { dx, dy, rlmode } - set_attribute(start,markmark,bound) - set_attribute(start,markdone,index) + start[markmark] = bound + start[markdone] = index return dx, dy, bound else report_injections("possible problem, U+%05X is base mark without data (id: %s)",base.char,bound) @@ -8604,9 +8601,9 @@ index = #mb + 1 -- index = index or 1 index = index or 1 bound = #marks + 1 - set_attribute(base,markbase,bound) - set_attribute(start,markmark,bound) - set_attribute(start,markdone,index) + base[markbase] = bound + start[markmark] = bound + start[markdone] = index marks[bound] = { [index] = { dx, dy, rlmode } } return dx, dy, bound end @@ -8619,12 +8616,12 @@ local function trace(head) report_injections("begin run") for n in traverse_id(glyph_code,head) do if n.subtype < 256 then - local kp = has_attribute(n,kernpair) - local mb = has_attribute(n,markbase) - local mm = has_attribute(n,markmark) - local md = has_attribute(n,markdone) - local cb = has_attribute(n,cursbase) - local cc = has_attribute(n,curscurs) + local kp = n[kernpair] + local mb = n[markbase] + local mm = n[markmark] + local md = n[markdone] + local cb = n[cursbase] + local cc = n[curscurs] report_injections("char U+%05X, font=%s",n.char,n.font) if kp then local k = kerns[kp] @@ -8690,7 +8687,7 @@ function injections.handler(head,where,keep) if tm then mk[n] = tm[n.char] end - local k = has_attribute(n,kernpair) + local k = n[kernpair] if k then local kk = kerns[k] if kk then @@ -8739,9 +8736,9 @@ function injections.handler(head,where,keep) for i=1,nofvalid do -- valid == glyphs local n = valid[i] if not mk[n] then - local n_cursbase = has_attribute(n,cursbase) + local n_cursbase = n[cursbase] if p_cursbase then - local n_curscurs = has_attribute(n,curscurs) + local n_curscurs = n[curscurs] if p_cursbase == n_curscurs then local c = cursives[n_curscurs] if c then @@ -8802,14 +8799,14 @@ function injections.handler(head,where,keep) if has_marks then for i=1,nofvalid do local p = valid[i] - local p_markbase = has_attribute(p,markbase) + local p_markbase = p[markbase] if p_markbase then local mrks = marks[p_markbase] local nofmarks = #mrks for n in traverse_id(glyph_code,p.next) do - local n_markmark = has_attribute(n,markmark) + local n_markmark = n[markmark] if p_markbase == n_markmark then - local index = has_attribute(n,markdone) or 1 + local index = n[markdone] or 1 local d = mrks[index] if d then local rlmode = d[3] @@ -8912,7 +8909,7 @@ function injections.handler(head,where,keep) end for n in traverse_id(glyph_code,head) do if n.subtype < 256 then - local k = has_attribute(n,kernpair) + local k = n[kernpair] if k then local kk = kerns[k] if kk then @@ -8996,8 +8993,6 @@ analyzers.useunicodemarks = false local nodecodes = nodes.nodecodes local glyph_code = nodecodes.glyph -local set_attribute = node.set_attribute -local has_attribute = node.has_attribute local traverse_id = node.traverse_id local traverse_node_list = node.traverse @@ -9043,40 +9038,40 @@ function analyzers.setstate(head,font) if d then if d.class == "mark" or (useunicodemarks and categories[char] == "mn") then done = true - set_attribute(current,state,5) -- mark + current[state] = 5 -- mark elseif n == 0 then first, last, n = current, current, 1 - set_attribute(current,state,1) -- init + current[state] = 1 -- init else last, n = current, n+1 - set_attribute(current,state,2) -- medi + current[state] = 2 -- medi end else -- finish if first and first == last then - set_attribute(last,state,4) -- isol + last[state] = 4 -- isol elseif last then - set_attribute(last,state,3) -- fina + last[state] = 3 -- fina end first, last, n = nil, nil, 0 end elseif id == disc_code then -- always in the middle - set_attribute(current,state,2) -- midi + current[state] = 2 -- midi last = current else -- finish if first and first == last then - set_attribute(last,state,4) -- isol + last[state] = 4 -- isol elseif last then - set_attribute(last,state,3) -- fina + last[state] = 3 -- fina end first, last, n = nil, nil, 0 end current = current.next end if first and first == last then - set_attribute(last,state,4) -- isol + last[state] = 4 -- isol elseif last then - set_attribute(last,state,3) -- fina + last[state] = 3 -- fina end return head, done end @@ -9238,19 +9233,19 @@ local function finish(first,last) if first == last then local fc = first.char if isol_fina_medi_init[fc] or isol_fina[fc] then - set_attribute(first,state,4) -- isol + first[state] = 4 -- isol else warning(first,"isol") - set_attribute(first,state,0) -- error + first[state] = 0 -- error end else local lc = last.char if isol_fina_medi_init[lc] or isol_fina[lc] then -- why isol here ? -- if laststate == 1 or laststate == 2 or laststate == 4 then - set_attribute(last,state,3) -- fina + last[state] = 3 -- fina else warning(last,"fina") - set_attribute(last,state,0) -- error + last[state] = 0 -- error end end first, last = nil, nil @@ -9258,10 +9253,10 @@ local function finish(first,last) -- first and last are either both set so we never com here local fc = first.char if isol_fina_medi_init[fc] or isol_fina[fc] then - set_attribute(first,state,4) -- isol + first[state] = 4 -- isol else warning(first,"isol") - set_attribute(first,state,0) -- error + first[state] = 0 -- error end first = nil end @@ -9274,37 +9269,37 @@ function methods.arab(head,font,attr) -- maybe make a special version with no tr local marks = tfmdata.resources.marks local first, last, current, done = nil, nil, head, false while current do - if current.id == glyph_code and current.font == font and current.subtype<256 and not has_attribute(current,state) then + if current.id == glyph_code and current.font == font and current.subtype<256 and not current[state] then done = true local char = current.char if marks[char] or (useunicodemarks and categories[char] == "mn") then - set_attribute(current,state,5) -- mark + current[state] = 5 -- mark elseif isol[char] then -- can be zwj or zwnj too first, last = finish(first,last) - set_attribute(current,state,4) -- isol + current[state] = 4 -- isol first, last = nil, nil elseif not first then if isol_fina_medi_init[char] then - set_attribute(current,state,1) -- init + current[state] = 1 -- init first, last = first or current, current elseif isol_fina[char] then - set_attribute(current,state,4) -- isol + current[state] = 4 -- isol first, last = nil, nil else -- no arab first, last = finish(first,last) end elseif isol_fina_medi_init[char] then first, last = first or current, current - set_attribute(current,state,2) -- medi + current[state] = 2 -- medi elseif isol_fina[char] then - if not has_attribute(last,state,1) then + if not last[state] == 1 then -- tricky, we need to check what last may be ! - set_attribute(last,state,2) -- medi + last[state] = 2 -- medi end - set_attribute(current,state,3) -- fina + current[state] = 3 -- fina first, last = nil, nil elseif char >= 0x0600 and char <= 0x06FF then - set_attribute(current,state,6) -- rest + current[state] = 6 -- rest first, last = finish(first,last) else --no first, last = finish(first,last) @@ -9333,9 +9328,11 @@ if not modules then modules = { } end modules ['font-otn'] = { comment = "companion to font-ini.mkiv", author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", copyright = "PRAGMA ADE / ConTeXt Development Team", - license = "see context related readme files" + license = "see context related readme files", } +-- preprocessors = { "nodes" } + -- this is still somewhat preliminary and it will get better in due time; -- much functionality could only be implemented thanks to the husayni font -- of Idris Samawi Hamid to who we dedicate this module. @@ -9501,8 +9498,6 @@ local insert_node_after = node.insert_after local delete_node = nodes.delete local copy_node = node.copy local find_node_tail = node.tail or node.slide -local set_attribute = node.set_attribute -local has_attribute = node.has_attribute local flush_node_list = node.flush_list local setmetatableindex = table.setmetatableindex @@ -9734,80 +9729,6 @@ local function getcomponentindex(start) end end --- local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head --- if start == stop and start.char == char then --- start.char = char --- return start --- elseif discfound then --- local prev = start.prev --- local next = stop.next --- start.prev = nil --- stop.next = nil --- local base = copy_glyph(start) --- base.char = char --- base.subtype = ligature_code --- base.components = start -- start can have components --- if prev then --- prev.next = base --- end --- if next then --- next.prev = base --- end --- base.next = next --- base.prev = prev --- return base --- else --- -- start is the ligature --- local deletemarks = markflag ~= "mark" --- local prev = start.prev --- local next = stop.next --- local base = copy_glyph(start) --- local current, start = insert_node_after(start,start,base) --- -- [start->current][copyofstart->start]...[stop] --- current.next = next --- if next then --- next.prev = current --- end --- start.prev = nil --- stop.next = nil --- current.char = char --- current.subtype = ligature_code --- current.components = start --- local head = current --- -- this is messy ... we should get rid of the components eventually --- local baseindex = 0 --- local componentindex = 0 --- while start do --- local char = start.char --- if not marks[char] then --- baseindex = baseindex + componentindex --- componentindex = getcomponentindex(start) --- elseif not deletemarks then -- quite fishy --- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) --- if trace_marks then --- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) --- end --- head, current = insert_node_after(head,current,copy_glyph(start)) -- unlikely that mark has components --- end --- start = start.next --- end --- start = current.next --- while start and start.id == glyph_code do -- hm, is id test needed ? --- local char = start.char --- if marks[char] then --- set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) --- if trace_marks then --- logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) --- end --- else --- break --- end --- start = start.next --- end --- return head --- end --- end - local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- brr head if start == stop and start.char == char then start.char = char @@ -9842,9 +9763,9 @@ local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- baseindex = baseindex + componentindex componentindex = getcomponentindex(start) elseif not deletemarks then -- quite fishy - set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) + start[ligacomp] = baseindex + (start[ligacomp] or componentindex) if trace_marks then - logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) + logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),start[ligacomp]) end head, current = insert_node_after(head,current,copy_node(start)) -- unlikely that mark has components end @@ -9854,9 +9775,9 @@ local function toligature(kind,lookupname,start,stop,char,markflag,discfound) -- while start and start.id == glyph_code do -- hm, is id test needed ? local char = start.char if marks[char] then - set_attribute(start,ligacomp,baseindex + (has_attribute(start,ligacomp) or componentindex)) + start[ligacomp] = baseindex + (start[ligacomp] or componentindex) if trace_marks then - logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),has_attribute(start,ligacomp)) + logwarning("%s: keep mark %s, gets index %s",pref(kind,lookupname),gref(char),start[ligacomp]) end else break @@ -10132,7 +10053,7 @@ function handlers.gpos_mark2ligature(start,kind,lookupname,markanchors,sequence) end end end - local index = has_attribute(start,ligacomp) + local index = start[ligacomp] local baseanchors = descriptions[basechar] if baseanchors then baseanchors = baseanchors.anchors @@ -10178,13 +10099,10 @@ function handlers.gpos_mark2mark(start,kind,lookupname,markanchors,sequence) local markchar = start.char if marks[markchar] then local base = start.prev -- [glyph] [basemark] [start=mark] - -- while base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp) do - -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature - -- end - local slc = has_attribute(start,ligacomp) + local slc = start[ligacomp] if slc then -- a rather messy loop ... needs checking with husayni while base do - local blc = has_attribute(base,ligacomp) + local blc = base[ligacomp] if blc and blc ~= slc then base = base.prev else @@ -10233,7 +10151,7 @@ function handlers.gpos_mark2mark(start,kind,lookupname,markanchors,sequence) end function handlers.gpos_cursive(start,kind,lookupname,exitanchors,sequence) -- to be checked - local alreadydone = cursonce and has_attribute(start,cursbase) + local alreadydone = cursonce and start[cursbase] if not alreadydone then local done = false local startchar = start.char @@ -10782,7 +10700,7 @@ function chainprocs.gpos_mark2ligature(start,stop,kind,chainname,currentcontext, end end -- todo: like marks a ligatures hash - local index = has_attribute(start,ligacomp) + local index = start[ligacomp] local baseanchors = descriptions[basechar].anchors if baseanchors then local baseanchors = baseanchors['baselig'] @@ -10824,7 +10742,7 @@ end function chainprocs.gpos_mark2mark(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) local markchar = start.char if marks[markchar] then ---~ local alreadydone = markonce and has_attribute(start,markmark) +--~ local alreadydone = markonce and start[markmark] --~ if not alreadydone then -- local markanchors = descriptions[markchar].anchors markanchors = markanchors and markanchors.mark local subtables = currentlookup.subtables @@ -10835,13 +10753,10 @@ function chainprocs.gpos_mark2mark(start,stop,kind,chainname,currentcontext,look end if markanchors then local base = start.prev -- [glyph] [basemark] [start=mark] - -- while (base and has_attribute(base,ligacomp) and has_attribute(base,ligacomp) ~= has_attribute(start,ligacomp)) do - -- base = base.prev -- KE: prevents mkmk for marks on different components of a ligature - -- end - local slc = has_attribute(start,ligacomp) + local slc = start[ligacomp] if slc then -- a rather messy loop ... needs checking with husayni while base do - local blc = has_attribute(base,ligacomp) + local blc = base[ligacomp] if blc and blc ~= slc then base = base.prev else @@ -10892,7 +10807,7 @@ end -- ! ! ! untested ! ! ! function chainprocs.gpos_cursive(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - local alreadydone = cursonce and has_attribute(start,cursbase) + local alreadydone = cursonce and start[cursbase] if not alreadydone then local startchar = start.char local subtables = currentlookup.subtables @@ -11557,7 +11472,7 @@ local function featuresprocessor(head,font,attr) local id = start.id if id == glyph_code then if start.font == font and start.subtype<256 then - local a = has_attribute(start,0) + local a = start[0] if a then a = a == attr else @@ -11609,11 +11524,11 @@ local function featuresprocessor(head,font,attr) local id = start.id if id == glyph_code then if start.font == font and start.subtype<256 then - local a = has_attribute(start,0) + local a = start[0] if a then - a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) + a = (a == attr) and (not attribute or start[state] == attribute) else - a = not attribute or has_attribute(start,state,attribute) + a = not attribute or start[state] == attribute end if a then local lookupmatch = lookupcache[start.char] @@ -11682,11 +11597,11 @@ local function featuresprocessor(head,font,attr) local id = start.id if id == glyph_code then if start.font == font and start.subtype<256 then - local a = has_attribute(start,0) + local a = start[0] if a then - a = (a == attr) and (not attribute or has_attribute(start,state,attribute)) + a = (a == attr) and (not attribute or start[state] == attribute) else - a = not attribute or has_attribute(start,state,attribute) + a = not attribute or start[state] == attribute end if a then for i=1,ns do |