From 36ee3b791e253a00877a7b25e2d94b1dfc90a174 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Wed, 12 Jun 2019 00:08:48 +0200 Subject: 2019-06-11 19:28:00 --- tex/context/base/mkiv/font-osd.lua | 1062 ++++++++++++++++++++++++++---------- 1 file changed, 767 insertions(+), 295 deletions(-) (limited to 'tex/context/base/mkiv/font-osd.lua') diff --git a/tex/context/base/mkiv/font-osd.lua b/tex/context/base/mkiv/font-osd.lua index 8530fc264..203c1d79d 100644 --- a/tex/context/base/mkiv/font-osd.lua +++ b/tex/context/base/mkiv/font-osd.lua @@ -143,6 +143,7 @@ local s_half = states.half local s_pref = states.pref local s_blwf = states.blwf local s_pstf = states.pstf +local s_init = states.init local replace_all_nbsp = nil @@ -209,10 +210,10 @@ if not indicgroups and characters then local indicorders = { bp = { }, -- before_postscript ap = { }, -- after_postscript - bs = { }, -- before_half - as = { }, -- after_half - bh = { }, -- before_subscript - ah = { }, -- after_subscript + bs = { }, -- before_subscript + as = { }, -- after_subscript + bh = { }, -- before_half + ah = { }, -- after_half bm = { }, -- before_main am = { }, -- after_main } @@ -324,17 +325,11 @@ local zw_char = { -- both_joiners_true } local dflt_true = { - dflt = true + dflt = true, } -local two_defaults = { - dev2 = dflt_true, -} - -local one_defaults = { - dev2 = dflt_true, -- set later - deva = dflt_true, -- set later -} +local two_defaults = { } +local one_defaults = { } local false_flags = { false, false, false, false } @@ -367,7 +362,7 @@ local sequence_reorder_reph = { } local sequence_reorder_pre_base_reordering_consonants = { - features = { dv03 = two_defaults }, + features = { dv03 = one_defaults }, flags = false_flags, name = "dv03_reorder_pre_base_reordering_consonants", order = { "dv03" }, @@ -399,22 +394,17 @@ local sequence_remove_joiners = { -- as it might depends on the font. Not that it's a bottleneck. local basic_shaping_forms = { - -- init = true, -- new - -- abvs = true, -- new akhn = true, blwf = true, - -- calt = true, -- new cjct = true, half = true, - -- haln = true, -- new nukt = true, pref = true, - -- pres = true, -- new pstf = true, - -- psts = true, -- new rkrf = true, rphf = true, vatu = true, + locl = true, } local valid = { @@ -439,6 +429,7 @@ local valid = { psts = true, haln = true, calt = true, + locl = true, } local scripts = { } @@ -446,8 +437,6 @@ local scripts = { } local scripts_one = { "deva", "mlym", "beng", "gujr", "guru", "knda", "orya", "taml", "telu" } local scripts_two = { "dev2", "mlm2", "bng2", "gjr2", "gur2", "knd2", "ory2", "tml2", "tel2" } -local scripts_old = { } for i=1,#scripts_one do local v = scripts_one[i] scripts_old[v] = v end -- self - local nofscripts = #scripts_one for i=1,nofscripts do @@ -455,7 +444,7 @@ for i=1,nofscripts do local two = scripts_two[i] scripts[one] = true scripts[two] = true - two_defaults[one] = dflt_true + two_defaults[two] = dflt_true one_defaults[one] = dflt_true one_defaults[two] = dflt_true end @@ -476,28 +465,75 @@ local function initializedevanagi(tfmdata) local sequences = resources.sequences local sharedfeatures = tfmdata.shared.features -- - local lastmatch = 0 - for s=1,#sequences do -- classify chars + gsubfeatures["dv01"] = two_defaults -- reorder matras + gsubfeatures["dv02"] = two_defaults -- reorder reph + gsubfeatures["dv03"] = one_defaults -- reorder pre base reordering consonants + gsubfeatures["dv04"] = one_defaults -- remove joiners + -- + local reorder_pre_base_reordering_consonants = copy(sequence_reorder_pre_base_reordering_consonants) + local reorder_reph = copy(sequence_reorder_reph) + local reorder_matras = copy(sequence_reorder_matras) + local remove_joiners = copy(sequence_remove_joiners) + + local lastmatch = 0 + for s=1,#sequences do -- classify chars and make sure basic_shaping_forms come first local features = sequences[s].features if features then for k, v in next, features do + if k == "locl" then + local steps = sequences[s].steps + local nofsteps = sequences[s].nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + for k, v in next, pre_mark do + local locl = coverage[k] + if locl then + if #locl > 0 then --contextchain; KE: is this right? + for j=1,#locl do + local ck = locl[j] + local f = ck[4] + local chainlookups = ck[6] + if chainlookups then + local chainlookup = chainlookups[f] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local steps = chainstep.steps + local nofsteps = chainstep.nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + locl = coverage[k] + end + end + end + end + end + end + if locl then + reorder_matras.steps[1].coverage[locl] = true + end + end + end + end + end + end if basic_shaping_forms[k] then - lastmatch = s + lastmatch = lastmatch + 1 + if s ~= lastmatch then + table.insert(sequences, lastmatch, table.remove(sequences, s)) + end end end end end local insertindex = lastmatch + 1 -- - gsubfeatures["dv01"] = two_defaults -- reorder matras - gsubfeatures["dv02"] = two_defaults -- reorder reph - gsubfeatures["dv03"] = two_defaults -- reorder pre base reordering consonants - gsubfeatures["dv04"] = one_defaults -- remove joiners - -- - local reorder_pre_base_reordering_consonants = copy(sequence_reorder_pre_base_reordering_consonants) - local reorder_reph = copy(sequence_reorder_reph) - local reorder_matras = copy(sequence_reorder_matras) - local remove_joiners = copy(sequence_remove_joiners) + if tfmdata.properties.language then + dflt_true[tfmdata.properties.language] = true + end -- insert(sequences,insertindex,reorder_pre_base_reordering_consonants) insert(sequences,insertindex,reorder_reph) @@ -505,6 +541,8 @@ local function initializedevanagi(tfmdata) insert(sequences,insertindex,remove_joiners) -- local blwfcache = { } + local vatucache = { } + local pstfcache = { } local seqsubset = { } local rephstep = { coverage = { } -- will be adapted each work @@ -513,6 +551,8 @@ local function initializedevanagi(tfmdata) reph = false, vattu = false, blwfcache = blwfcache, + vatucache = vatucache, + pstfcache = pstfcache, seqsubset = seqsubset, reorderreph = rephstep, @@ -525,8 +565,6 @@ local function initializedevanagi(tfmdata) -- resources.devanagari = devanagari -- - local old = scripts_old[script] or false - -- for s=1,#sequences do local sequence = sequences[s] local steps = sequence.steps @@ -534,41 +572,148 @@ local function initializedevanagi(tfmdata) local features = sequence.features local has_rphf = features.rphf local has_blwf = features.blwf - if has_rphf and has_rphf[old] then + local has_vatu = features.vatu + local has_pstf = features.pstf + if has_rphf and has_rphf[script] then devanagari.reph = true - elseif has_blwf and has_blwf[old] then + elseif (has_blwf and has_blwf[script] ) or (has_vatu and has_vatu[script] ) then devanagari.vattu = true for i=1,nofsteps do local step = steps[i] local coverage = step.coverage if coverage then for k, v in next, coverage do - if not blwfcache[k] then - blwfcache[k] = v + for h, w in next, halant do + if v[h] then + if not blwfcache[k] then + blwfcache[k] = v + end + end + if has_vatu and has_vatu[script] and not vatucache[k] then + vatucache[k] = v + end + end + end + end + end + elseif has_pstf and has_pstf[script] then + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + for k, v in next, coverage do + if not pstfcache[k] then + pstfcache[k] = v + end + end + for k, v in next, ra do + local r = coverage[k] + if r then + local found = false + if #r > 0 then -- contextchain; KE: is this right? + for j=1,#r do + local ck = r[j] + local f = ck[4] + local chainlookups = ck[6] + if chainlookups and chainlookups[f] then --KE: why is check for chainlookups[f] necessacy??? + local chainlookup = chainlookups[f] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local steps = chainstep.steps + local nofsteps = chainstep.nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + local h = coverage[k] + if h then + for k, v in next, h do + found = v and v.ligature + if found then + pre_base_reordering_consonants[found] = true + break + end + end + if found then + break + end + end + end + end + end + end + end + else + for k, v in next, r do + found = v and v.ligature + if found then + pre_base_reordering_consonants[found] = true + break + end + end + end + if found then + break + end end end end end end for kind, spec in next, features do - -- if spec.dev2 and valid[kind] then if valid[kind] and valid_two(spec)then for i=1,nofsteps do local step = steps[i] local coverage = step.coverage if coverage then - local reph = false + local reph, rephbase = false, false if kind == "rphf" then -- rphf acts on consonant + halant for k, v in next, ra do local r = coverage[k] if r then + rephbase = k local h = false - for k, v in next, halant do - local h = r[k] - if h then - reph = h.ligature or false - break + if #r > 0 then --contextchain; KE: is this right? + for j=1,#r do + local ck = r[j] + local f = ck[4] + local chainlookups = ck[6] + if chainlookups then + local chainlookup = chainlookups[f] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local steps = chainstep.steps + local nofsteps = chainstep.nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + local r = coverage[k] + if r then + for k, v in next, halant do + local h = r[k] + if h then + reph = h.ligature or false + break + end + end + if h then + break + end + end + end + end + end + end + end + else + for k, v in next, halant do + local h = r[k] + if h then + reph = h.ligature or false + break + end end end if reph then @@ -577,7 +722,7 @@ local function initializedevanagi(tfmdata) end end end - seqsubset[#seqsubset+1] = { kind, coverage, reph } + seqsubset[#seqsubset+1] = { kind, coverage, reph, rephbase } end end end @@ -592,11 +737,46 @@ local function initializedevanagi(tfmdata) local h = coverage[k] if h then local found = false - for k, v in next, h do - found = v and v.ligature - if found then - pre_base_reordering_consonants[k] = found - break + if #h > 0 then -- contextchain; KE: is this right? + for j=1,#h do + local ck = h[j] + local f = ck[4] + local chainlookups = ck[6] + if chainlookups then + local chainlookup = chainlookups[f] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local steps = chainstep.steps + local nofsteps = chainstep.nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + local h = coverage[k] + if h then + for k, v in next, h do + found = v and v.ligature + if found then + pre_base_reordering_consonants[found] = true + break + end + end + if found then + break + end + end + end + end + end + end + end + else + for k, v in next, h do + found = v and v.ligature + if found then + pre_base_reordering_consonants[found] = true + break + end end end if found then @@ -610,90 +790,17 @@ local function initializedevanagi(tfmdata) end end -- - -- The following presets need checking (by Kai). Most of these scripts share a common - -- handling (some need less but that doesn't hurt). The question is: what to enable. - -- - -- dv01_reorder_matras - -- dv02_reorder_reph - -- dv03_reorder_pre_base_reordering_consonants - -- dv04_remove_joiners - -- - if script == "deva" then - sharedfeatures["dv04"] = true - elseif script == "dev2" then - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "knda" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "knd2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "beng" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "bng2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "gurj" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "grj2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "guru" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "gur2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "telu" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "tel2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "mlym" then - sharedfeatures["pstf"] = true - elseif script == "mlm2" then - sharedfeatures["pstf"] = true - sharedfeatures["pref"] = true - sharedfeatures["dv03"] = true - gsubfeatures ["dv03"] = two_defaults - insert(sequences,insertindex,sequence_reorder_pre_base_reordering_consonants) - - elseif script == "taml" then - -- needs checking - sharedfeatures["dv04"] = true - sharedfeatures["pstf"] = true - elseif script == "tml2" then - -- needs checking - - else - report("todo: enable the right features for script %a",script) + if two_defaults[script] then + sharedfeatures["dv01"] = true -- dv01_reorder_matras + sharedfeatures["dv02"] = true -- dv02_reorder_reph + sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants + sharedfeatures["dv04"] = true -- dv04_remove_joiners + elseif one_defaults[script] then + sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants + sharedfeatures["dv04"] = true -- dv04_remove_joiners + end + if script == "mlym" or script == "taml" then + devanagari.left_matra_before_base = true end end end @@ -735,10 +842,13 @@ local function initialize_one(font,attr) -- we need a proper hook into the datas reph = false, vattu = false, blwfcache = { }, + vatucache = { }, + pstfcache = { }, } datasets.devanagari = devanagaridata local resources = tfmdata.resources local devanagari = resources.devanagari + for s=1,#datasets do local dataset = datasets[s] if dataset and dataset[1] then -- value @@ -746,31 +856,97 @@ local function initialize_one(font,attr) -- we need a proper hook into the datas if kind == "rphf" then -- deva devanagaridata.reph = true - elseif kind == "blwf" then + elseif kind == "blwf" or kind == "vatu" then -- deva devanagaridata.vattu = true -- dev2 devanagaridata.blwfcache = devanagari.blwfcache + devanagaridata.vatucache = devanagari.vatucache + devanagaridata.pstfcache = devanagari.pstfcache end end end end - return devanagaridata.reph, devanagaridata.vattu, devanagaridata.blwfcache + return devanagaridata.reph, devanagaridata.vattu, devanagaridata.blwfcache, devanagaridata.vatucache, devanagaridata.pstfcache + +end + +local function contextchain(contexts, n) + local char = getchar(n) + for k=1,#contexts do + local ck = contexts[k] + local seq = ck[3] + local f = ck[4] + local l = ck[5] + if (l - f) == 1 and seq[f+1][char] then + local ok = true + local c = n + for i=l+1,#seq do + c = getnext(c) + if not c or not seq[i][ischar(c)] then + ok = false + break + end + end + if ok then + c = getprev(n) + for i=1,f-1 do + c = getprev(c) + if not c or not seq[f-i][ischar(c)] then + ok = false + end + end + end + if ok then + return true + end + end + end + return false +end +local function order_matras(c) + local cn = getnext(c) + local char = getchar(cn) + while dependent_vowel[char] do + local next = getnext(cn) + local cc = c + local cchar = getchar(cc) + while cc ~= cn do + if (above_mark[char] and (below_mark[cchar] or post_mark[cchar])) or (below_mark[char] and (post_mark[cchar])) then + local prev, next = getboth(cn) + if next then + setprev(next,prev) + end + -- todo: setlink + setnext(prev,next) + setnext(getprev(cc),cn) + setprev(cn,getprev(cc)) + setnext(cn,cc) + setprev(cc,cn) + break + end + cc = getnext(cc) + cchar = getchar(cc) + end + cn = next + char = getchar(cn) + end end local function reorder_one(head,start,stop,font,attr,nbspaces) - local reph, vattu, blwfcache = initialize_one(font,attr) -- todo: a hash[font] + local reph, vattu, blwfcache, vatucache, pstfcache = initialize_one(font,attr) -- todo: a hash[font] - local current = start - local n = getnext(start) - local base = nil - local firstcons = nil - local lastcons = nil - local basefound = false + local devanagari = fontdata[font].resources.devanagari + local current = start + local n = getnext(start) + local base = nil + local firstcons = nil + local lastcons = nil + local basefound = false if reph and ra[getchar(start)] and halant[getchar(n)] then -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph @@ -853,8 +1029,11 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if not base then base = current elseif blwfcache[char] then - -- consonant has below-base (or post-base) form + -- consonant has below-base form setprop(current,a_state,s_blwf) + elseif pstfcache[char] then + -- consonant has post-base form + setprop(current,a_state,s_pstf) else base = current end @@ -933,16 +1112,14 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) current = next end - if base ~= stop and getprop(base,a_state) then + if base ~= stop and getprop(base,a_state) then -- a_state can also be init local next = getnext(base) if halant[getchar(next)] and not (next ~= stop and getchar(getnext(next)) == c_zwj) then setprop(base,a_state,unsetvalue) end end - -- ToDo: split two- or three-part matras into their parts. Then, move the left 'matra' part to the beginning of the syllable. - -- Not necessary for Devanagari. However it is necessay for other scripts, such as Tamil (e.g. TAMIL VOWEL SIGN O - 0BCA) - + -- split two- or three-part matras into their parts. Then, move the left 'matra' part to the beginning of the syllable. -- classify consonants and 'matra' parts as pre-base, above-base (Reph), below-base or post-base, and group elements of the syllable (consonants and 'matras') according to this classification local current, allreordered, moved = start, false, { [base] = true } @@ -968,6 +1145,17 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) n = getnext(n) ch = getchar(n) end + + local tpm = twopart_mark[ch] + while tpm do + local extra = copy_node(n) + copyinjection(extra,n) + ch = tpm[1] + setchar(n,ch) + setchar(extra,tpm[2]) + head = insert_node_after(head,current,extra) + tpm = twopart_mark[ch] + end while c ~= stop and dependent_vowel[ch] do c = n n = getnext(n) @@ -986,15 +1174,51 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) end end end - local bp = getprev(firstcons) - local cn = getnext(current) + local bp = getprev(firstcons) + local cn = getnext(current) local last = getnext(c) while cn ~= last do -- move pre-base matras... if pre_mark[getchar(cn)] then - if bp then - setnext(bp,cn) + if devanagari.left_matra_before_base then + local prev, next = getboth(cn) + setlink(prev,next) + if cn == stop then + stop = getprev(cn) + end + if base == start then + if head == start then + head = cn + end + start = cn + end + setlink(getprev(base),cn) + setlink(cn,base) + -- setlink(getprev(base),cn,base) -- maybe + cn = next + else + if bp then + setnext(bp,cn) + end + local prev, next = getboth(cn) + if next then + setprev(next,prev) + end + setnext(prev,next) + if cn == stop then + stop = prev + end + setprev(cn,bp) + setlink(cn,firstcons) + if firstcons == start then + if head == start then + head = cn + end + start = cn + end + cn = next end + elseif current ~= base and dependent_vowel[getchar(cn)] then local prev, next = getboth(cn) if next then setprev(next,prev) @@ -1003,17 +1227,19 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if cn == stop then stop = prev end - setprev(cn,bp) - setlink(cn,firstcons) - if firstcons == start then - if head == start then - head = cn - end - start = cn + setlink(b,cn,getnext(b)) + order_matras(cn) + cn = next + elseif current == base and dependent_vowel[getchar(cn)] then + local cnn = getnext(cn) + order_matras(cn) + cn = cnn + while cn ~= last and dependent_vowel[getchar(cn)] do + cn = getnext(cn) end - break + else + cn = getnext(cn) end - cn = getnext(cn) end allreordered = c == stop current = getnext(c) @@ -1075,6 +1301,13 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if halant[getchar(next)] then cns = next end + if not vatucache[char] then + next = getnext(cns) + while dependent_vowel[getchar(next)] do + cns = next + next = getnext(cns) + end + end elseif char == c_nbsp then nbspaces = nbspaces + 1 cns = current @@ -1082,6 +1315,13 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if halant[getchar(next)] then cns = next end + if not vatucache[char] then + next = getnext(cns) + while dependent_vowel[getchar(next)] do + cns = next + next = getnext(cns) + end + end end end current = getnext(current) @@ -1090,6 +1330,9 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if getchar(base) == c_nbsp then nbspaces = nbspaces - 1 + if base == stop then + stop = getprev(stop) + end head = remove_node(head,base) flush_node(base) end @@ -1114,7 +1357,7 @@ function handlers.devanagari_reorder_matras(head,start) -- no leak local char = ischar(current,startfont) local next = getnext(current) if char and getprop(current,a_syllabe) == startattr then - if halant[char] and not getprop(current,a_state) then + if halant[char] then -- a_state can also be init if next then local char = ischar(next,startfont) if char and zw_char[char] and getprop(next,a_syllabe) == startattr then @@ -1130,6 +1373,18 @@ function handlers.devanagari_reorder_matras(head,start) -- no leak -- setlink(current,start,next) -- maybe start = startnext break + -- elseif consonant[char] and ( not getprop(current,a_state) or getprop(current,a_state) == s_init) then + -- startnext = getnext(start) + -- head = remove_node(head,start) + -- if current == head then + -- setlink(start,current) + -- head = start + -- else + -- setlink(getprev(current),start) + -- setlink(start,current) + -- end + -- start = startnext + -- break end else break @@ -1148,6 +1403,8 @@ end -- In Devanagari reph has reordering position 'before postscript' and dev2 only -- follows step 2, 4, and 6. +local rephbase = { } + function handlers.devanagari_reorder_reph(head,start) local current = getnext(start) local startnext = nil @@ -1159,6 +1416,12 @@ function handlers.devanagari_reorder_reph(head,start) -- -- If reph should be positioned after post-base consonant forms, proceed to step 5. -- + local char = ischar(start,startfont) + local rephbase = rephbase[startfont][char] + if char and after_subscript[rephbase] then + goto step_5 + end + -- ::step_2:: -- -- If the reph repositioning class is not after post-base: target position is after @@ -1169,39 +1432,64 @@ function handlers.devanagari_reorder_reph(head,start) -- fixed in shaping engine, there was no case where reph position will be found on -- this step. -- - while current do - local char = ischar(current,startfont) - if char and getprop(current,a_syllabe) == startattr then - if halant[char] and not getprop(current,a_state) then - local next = getnext(current) - if next then - local nextchar = ischar(next,startfont) - if nextchar and zw_char[nextchar] and getprop(next,a_syllabe) == startattr then - current = next - next = getnext(current) + if char and not after_postscript[rephbase] then + while current do + local char = ischar(current,startfont) + if char and getprop(current,a_syllabe) == startattr then + if halant[char] then + local next = getnext(current) + if next then + local nextchar = ischar(next,startfont) + if nextchar and zw_char[nextchar] and getprop(next,a_syllabe) == startattr then + current = next + next = getnext(current) + end end + startnext = getnext(start) + head = remove_node(head,start) + setlink(start,next) + setlink(current,start) + -- setlink(current,start,next) -- maybe + start = startnext + startattr = getprop(start,a_syllabe) + break end - startnext = getnext(start) - head = remove_node(head,start) - setlink(start,next) - setlink(current,start) - -- setlink(current,start,next) -- maybe - start = startnext - startattr = getprop(start,a_syllabe) + current = getnext(current) + else break end - current = getnext(current) - else - break end end + -- ::step_3:: -- - -- If reph should be repositioned after the main consonant: from the first consonant + -- If reph should be repositioned after the main consonant: find the first consonant -- not ligated with main, or find the first consonant that is not a potential -- pre-base reordering Ra. -- - -- Kai: todo + if not startnext then + if char and after_main[rephbase] then + current = getnext(start) + while current do + local char = ischar(current,startfont) + if char and getprop(current,a_syllabe) == startattr then + if consonant[char] and not getprop(current,a_state) == s_pref then + startnext = getnext(start) + head = remove_node(head,start) + setlink(current,start) + setlink(start,getnext(current)) + -- setlink(current,start,getnext(current)) -- maybe + start = startnext + startattr = getprop(start,a_syllabe) + break + end + current = getnext(current) + else + break + end + end + end + end -- ::step_4:: -- @@ -1210,23 +1498,37 @@ function handlers.devanagari_reorder_reph(head,start) -- position should be before the first matra, syllable modifier sign or vedic sign. -- if not startnext then - current = getnext(start) - while current do - local char = ischar(current,startfont) - if char and getprop(current,a_syllabe) == startattr then - if getprop(current,a_state) == s_pstf then -- post-base - startnext = getnext(start) - head = remove_node(head,start) - setlink(getprev(current),start) - setlink(start,current) - -- setlink(getprev(current),start,current) -- maybe - start = startnext - startattr = getprop(start,a_syllabe) + if char and before_postscript[rephbase] then + current = getnext(start) + local c = nil + while current do + local char = ischar(current,startfont) + if char and getprop(current,a_syllabe) == startattr then + if getprop(current,a_state) == s_pstf then -- post-base + startnext = getnext(start) + head = remove_node(head,start) + setlink(getprev(current),start) + setlink(start,current) + -- setlink(getprev(current),start,current) -- maybe + start = startnext + startattr = getprop(start,a_syllabe) + break + elseif not c and ( vowel_modifier[char] or stress_tone_mark[char] ) then + c = current + end + current = getnext(current) + else + if c then + startnext = getnext(start) + head = remove_node(head,start) + setlink(getprev(c),start) + setlink(start,c) + -- setlink(getprev(c),start,c) -- maybe + start = startnext + startattr = getprop(start,a_syllabe) + end break end - current = getnext(current) - else - break end end end @@ -1245,7 +1547,10 @@ function handlers.devanagari_reorder_reph(head,start) while current do local char = ischar(current,startfont) if char and getprop(current,a_syllabe) == startattr then - if not c and mark_above_below_post[char] and not after_subscript[char] then + local state = getprop(current,a_state) + if before_subscript[rephbase] and (state == s_blwf or state == s_pstf) then + c = current + elseif after_subscript[rephbase] and (state == s_pstf) then c = current end current = getnext(current) @@ -1310,67 +1615,82 @@ end -- return head, start, done -- end +local reordered_pre_base_reordering_consonants = { } -- shared ? not reset ? + function handlers.devanagari_reorder_pre_base_reordering_consonants(head,start) - local current = start - local startnext = nil - local startprev = nil + if reordered_pre_base_reordering_consonants[start] then + return head, start, true + end + local current = start -- we could cache attributes here local startfont = getfont(start) local startattr = getprop(start,a_syllabe) - -- can be fast for loop + caching state while current do local char = ischar(current,startfont) + local next = getnext(current) if char and getprop(current,a_syllabe) == startattr then - local next = getnext(current) - if halant[char] and not getprop(current,a_state) then + if halant[char] then -- a_state can also be init if next then - local nextchar = ischar(next,startfont) - if nextchar and getprop(next,a_syllabe) == startattr then - if nextchar == c_zwnj or nextchar == c_zwj then - current = next - next = getnext(current) - end + local char = ischar(next,startfont) + if char and zw_char[char] and getprop(next,a_syllabe) == startattr then + current = next + next = getnext(current) end end - startnext = getnext(start) - removenode(start,start) + -- can be optimzied + local startnext = getnext(start) + head = remove_node(head,start) setlink(start,next) setlink(current,start) -- setlink(current,start,next) -- maybe + reordered_pre_base_reordering_consonants[start] = true start = startnext - break + return head, start, true + -- elseif consonant[char] and ( not getprop(current,a_state) or getprop(current,a_state) == s_init) then + -- startnext = getnext(start) + -- head = remove_node(head,start) + -- if current == head then + -- setlink(start,current) + -- head = start + -- else + -- setlink(getprev(current),start) + -- setlink(start,current) + -- end + -- start = startnext + -- break end - current = next else break end + current = next end - if not startnext then - current = getnext(start) - startattr = getprop(start,a_syllabe) - while current do - local char = ischar(current,startfont) - if char and getprop(current,a_syllabe) == startattr then - if not consonant[char] and getprop(current,a_state) then -- main - startnext = getnext(start) - removenode(start,start) - setlink(getprev(current),start) - setlink(start,current) - -- setlink(getprev(current),start,current) -- maybe - start = startnext - break - end - current = getnext(current) + + local startattr = getprop(start,a_syllabe) + local current = getprev(start) + while current and getprop(current,a_syllabe) == startattr do + local char = ischar(current) + if ( not dependent_vowel[char] and not getprop(current,a_state) or getprop(current,a_state) == s_init) then + startnext = getnext(start) + head = remove_node(head,start) + if current == head then + setlink(start,current) + head = start else - break + setlink(getprev(current),start) + setlink(start,current) end + reordered_pre_base_reordering_consonants[start] = true + start = startnext + break end + current = getprev(current) end + return head, start, true end function handlers.devanagari_remove_joiners(head,start,kind,lookupname,replacement) local stop = getnext(start) - local font = getfont(start) -- hm + local font = getfont(start) local last = start while stop do local char = ischar(stop,font) @@ -1411,15 +1731,15 @@ end -- of the actions local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pass over (determine stop in sweep) - local seqsubset, reorderreph = initialize_two(font,attr) - local reph = false -- was nil ... probably went unnoticed because never assigned local halfpos = nil local basepos = nil local subpos = nil local postpos = nil - local locl = { } + + reorderreph.coverage = { } + rephbase[font] = { } for i=1,#seqsubset do @@ -1429,17 +1749,17 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas local kind = subset[1] local lookupcache = subset[2] if kind == "rphf" then - reph = subset[3] + reorderreph.coverage[subset[3]] = true -- neat + rephbase[font][subset[3]] = subset[4] local current = start local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then --above-base: rphf Consonant + Halant + if found[getchar(next)] or contextchain(found, next) then --above-base: rphf Consonant + Halant local afternext = next ~= stop and getnext(next) if afternext and zw_char[getchar(afternext)] then -- ZWJ and ZWNJ prevent creation of reph current = afternext -- getnext(next) @@ -1459,15 +1779,16 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then -- pre-base: pref Halant + Consonant local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_pref) - setprop(next,a_state,s_pref) - current = next + if found[getchar(next)] or contextchain(found, next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then --KE: a_state can also be init... + setprop(current,a_state,s_pref) + setprop(next,a_state,s_pref) + current = next + end end end end @@ -1478,15 +1799,14 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then + if found[getchar(next)] or contextchain(found, next) then if next ~= stop and getchar(getnext(next)) == c_zwnj then -- zwnj prevent creation of half current = next - else + elseif (not getprop(current,a_state)) then --KE: a_state can also be init... setprop(current,a_state,s_half) if not halfpos then halfpos = current @@ -1498,21 +1818,22 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas end current = getnext(current) end - elseif kind == "blwf" then -- below-base: blwf / Halant + Consonant + elseif kind == "blwf" or kind == "vatu" then -- below-base: blwf / Halant + Consonant local current = start local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_blwf) - setprop(next,a_state,s_blwf) - current = next - subpos = current + if found[getchar(next)] or contextchain(found, next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then --KE: a_state can also be init... + setprop(current,a_state,s_blwf) + setprop(next,a_state,s_blwf) + current = next + subpos = current + end end end end @@ -1523,16 +1844,17 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_pstf) - setprop(next,a_state,s_pstf) - current = next - postpos = current + if found[getchar(next)] or contextchain(found, next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then --KE: a_state can also be init... + setprop(current,a_state,s_pstf) + setprop(next,a_state,s_pstf) + current = next + postpos = current + end end end end @@ -1541,12 +1863,6 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas end end - -- this one changes per word ... - - reorderreph.coverage = { [reph] = true } -- neat - - -- end of weird - local current, base, firstcons = start, nil, nil if getprop(start,a_state) == s_rphf then @@ -1612,7 +1928,7 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas end -- check whether consonant has below-base or post-base form or is pre-base reordering Ra local a = getprop(current,a_state) - if not (a == s_pref or a == s_blwf or a == s_pstf) then + if not (a == s_blwf or a == s_pstf or (a ~= s_rphf and a ~= s_blwf and ra[getchar(current)])) then base = current end end @@ -1630,7 +1946,7 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas end return head, stop, nbspaces else - if getprop(base,a_state) then + if getprop(base,a_state) then -- a_state can also be init setprop(base,a_state,unsetvalue) end basepos = base @@ -1647,24 +1963,27 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas -- Matra characters are classified and reordered by which consonant in a conjunct they have affinity for - local moved = { } + local moved = { } local current = start - local last = getnext(stop) + local last = getnext(stop) while current ~= last do - local char, target, cn = locl[current] or getchar(current), nil, getnext(current) + local char = getchar(current) + local target = nil + local cn = getnext(current) -- not so efficient (needed for malayalam) local tpm = twopart_mark[char] - if tpm then + while tpm do local extra = copy_node(current) copyinjection(extra,current) char = tpm[1] setchar(current,char) setchar(extra,tpm[2]) head = insert_node_after(head,current,extra) + tpm = twopart_mark[char] end -- if not moved[current] and dependent_vowel[char] then - if pre_mark[char] then -- Before first half form in the syllable + if pre_mark[char] then -- or: if before_main or before_half moved[current] = true -- can be helper to remove one node local prev, next = getboth(current) @@ -1672,17 +1991,47 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas if current == stop then stop = getprev(current) end - if halfpos == start then + + local pos + if before_main[char] then + pos = basepos + -- basepos = current -- is this correct? + else + -- must be before_half + pos = halfpos + -- halfpos = current -- is this correct? + end + + local ppos = getprev(pos) -- necessary? + while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) do + if getprop(ppos,a_state) == s_pref then + pos = ppos + end + ppos = getprev(ppos) + end + + local ppos = getprev(pos) -- necessary? + while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and halant[ischar(ppos)] do + ppos = getprev(ppos) + if ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and consonant[ischar(ppos)] then + pos = ppos + ppos = getprev(ppos) + else + break + end + end + + if pos == start then if head == start then head = current end start = current end - setlink(getprev(halfpos),current) - setlink(current,halfpos) - -- setlink(getprev(halfpos),current,halfpos) -- maybe - halfpos = current - elseif above_mark[char] then -- After main consonant + setlink(getprev(pos),current) + setlink(current,pos) + -- setlink(getprev(pos),current,pos) -- maybe + elseif above_mark[char] then + -- after main consonant target = basepos if subpos == basepos then subpos = current @@ -1691,13 +2040,25 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas postpos = current end basepos = current - elseif below_mark[char] then -- After subjoined consonants + elseif below_mark[char] then + -- after subjoined consonants target = subpos if postpos == subpos then postpos = current end subpos = current - elseif post_mark[char] then -- After post-form consonant + elseif post_mark[char] then + -- after post-form consonant + local n = getnext(postpos) -- nukta and vedic sign come first - is that right? and also halant+ra + while n do + local v = ischar(n,font) + if nukta[v] or stress_tone_mark[v] or vowel_modifier[v] then + postpos = n + else + break + end + n = getnext(n) + end target = postpos postpos = current end @@ -1718,9 +2079,44 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas current = cn end + -- reorder halant+Ra + + local current = getnext(start) + local last = getnext(stop) + while current ~= last do + local char = getchar(current) + local cn = getnext(current) + if halant[char] and ra[ischar(cn)] and getprop(cn,a_state) ~= s_rphf and getprop(cn,a_state) ~= s_blwf then + if after_main[ischar(cn)] then + local prev = getprev(current) + local next = getnext(cn) + local bpn = getnext(basepos) + while bpn and dependent_vowel[ischar(bpn)] do + basepos = bpn + bpn = getnext(bpn) + end + if basepos ~= prev then + setlink(prev,next) + setlink(cn, getnext(basepos)) + setlink(basepos, current) + if cn == stop then + stop = prev + end + cn = next + end + end + -- after_postscript + -- after_subscript + -- before_postscript + -- before_subscript + end + current = cn + end + -- Reorder marks to canonical order: Adjacent nukta and halant or nukta and vedic sign are always repositioned if necessary, so that the nukta is first. - local current, c = start, nil + local current = start + local c = nil while current ~= stop do local char = getchar(current) if halant[char] or stress_tone_mark[char] then @@ -1849,7 +2245,25 @@ local function analyze_next_chars_one(c,font,variant) -- skip one dependent vowe if not v then return c end - if dependent_vowel[v] then + local already_pre_mark -- = false + local already_above_mark -- = false + local already_below_mark -- = false + local already_post_mark -- = false + while dependent_vowel[v] do + local vowels = twopart_mark[v] or { v } + for k, v in next, vowels do + if pre_mark[v] and not already_pre_mark then + already_pre_mark = true + elseif above_mark[v] and not already_above_mark then + already_above_mark = true + elseif below_mark[v] and not already_below_mark then + already_below_mark = true + elseif post_mark[v] and not already_post_mark then + already_post_mark = true + else + return c + end + end c = getnext(c) n = getnext(c) if not n then @@ -2021,7 +2435,25 @@ local function analyze_next_chars_two(c,font) else -- c = ms_matra(c) -- same as one - if dependent_vowel[v] then + local already_pre_mark -- = false + local already_above_mark -- = false + local already_below_mark -- = false + local already_post_mark -- = false + while dependent_vowel[v] do + local vowels = twopart_mark[v] or { v } + for k, v in next, vowels do + if pre_mark[v] and not already_pre_mark then + already_pre_mark = true + elseif above_mark[v] and not already_above_mark then + already_above_mark = true + elseif below_mark[v] and not already_below_mark then + already_below_mark = true + elseif post_mark[v] and not already_post_mark then + already_post_mark = true + else + return c + end + end c = n n = getnext(c) if not n then @@ -2093,6 +2525,7 @@ local function method_one(head,font,attr) local start = true local done = false local nbspaces = 0 + local syllabe = 0 while current do local char = ischar(current,font) if char then @@ -2242,6 +2675,15 @@ local function method_one(head,font,attr) end end if syllablestart ~= syllableend then + if syllableend then + syllabe = syllabe + 1 + local c = syllablestart + local n = getnext(syllableend) + while c ~= n do + setprop(c,a_syllabe,syllabe) + c = getnext(c) + end + end head, current, nbspaces = reorder_one(head,syllablestart,syllableend,font,attr,nbspaces) current = getnext(current) end @@ -2283,6 +2725,21 @@ local function method_one(head,font,attr) head = replace_all_nbsp(head) end + current = head + local n = 0 + while current do + local char = ischar(current,font) + if char then + if n == 0 and not getprop(current,a_state) then + setprop(current,a_state,s_init) + end + n = n + 1 + else + n = 0 + end + current = getnext(current) + end + return head, done end @@ -2363,7 +2820,7 @@ local function method_two(head,font,attr) end if not syllableend and show_syntax_errors then local char = ischar(current,font) - if char and not getprop(current,a_state) then + if char and not getprop(current,a_state) then -- a_state can also be init local mark = mark_four[char] if mark then head, current = inject_syntax_error(head,current,char) @@ -2378,6 +2835,21 @@ local function method_two(head,font,attr) head = replace_all_nbsp(head) end + current = head + local n = 0 + while current do + local char = ischar(current,font) + if char then + if n == 0 and not getprop(current,a_state) then -- a_state can also be init + setprop(current,a_state,s_init) + end + n = n + 1 + else + n = 0 + end + current = getnext(current) + end + return head, done end -- cgit v1.2.3