diff options
Diffstat (limited to 'tex')
24 files changed, 2731 insertions, 712 deletions
diff --git a/tex/context/base/mkii/cont-new.mkii b/tex/context/base/mkii/cont-new.mkii index 605229e98..d908f260a 100644 --- a/tex/context/base/mkii/cont-new.mkii +++ b/tex/context/base/mkii/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2019.06.05 15:39} +\newcontextversion{2019.06.11 19:20} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/mkii/context.mkii b/tex/context/base/mkii/context.mkii index b8d1ff737..ada3588ea 100644 --- a/tex/context/base/mkii/context.mkii +++ b/tex/context/base/mkii/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2019.06.05 15:39} +\edef\contextversion{2019.06.11 19:20} %D For those who want to use this: diff --git a/tex/context/base/mkii/mult-pe.mkii b/tex/context/base/mkii/mult-pe.mkii index e1d0e9b5f..d2efcd7e9 100644 --- a/tex/context/base/mkii/mult-pe.mkii +++ b/tex/context/base/mkii/mult-pe.mkii @@ -570,6 +570,7 @@ \setinterfacevariable{temporary}{موقتی} \setinterfacevariable{test}{تست} \setinterfacevariable{text}{متن} +\setinterfacevariable{textnote}{textnote} \setinterfacevariable{three}{سه} \setinterfacevariable{thursday}{پنجشنبه} \setinterfacevariable{tight}{tight} diff --git a/tex/context/base/mkiv/buff-ver.lua b/tex/context/base/mkiv/buff-ver.lua index 969a28055..ad06dceec 100644 --- a/tex/context/base/mkiv/buff-ver.lua +++ b/tex/context/base/mkiv/buff-ver.lua @@ -127,7 +127,8 @@ local signal = "\000" visualizers.signal = signal visualizers.signalpattern = P(signal) -local functions = { __index = { +local functions = { + __index = { emptyline = f_emptyline, newline = f_newline, default = f_default, diff --git a/tex/context/base/mkiv/char-def.lua b/tex/context/base/mkiv/char-def.lua index b51250ce5..953d33f4a 100644 --- a/tex/context/base/mkiv/char-def.lua +++ b/tex/context/base/mkiv/char-def.lua @@ -22006,7 +22006,7 @@ characters.data={ category="mn", description="BENGALI SIGN CANDRABINDU", direction="nsm", - indic="o", + indic="m", indicorder="ap", linebreak="cm", unicodeslot=0x981, @@ -22474,7 +22474,7 @@ characters.data={ category="mc", description="BENGALI VOWEL SIGN II", direction="l", - indic="d", + indic="o", indicmark="r", indicorder="ap", linebreak="cm", @@ -22530,7 +22530,7 @@ characters.data={ description="BENGALI VOWEL SIGN E", direction="l", indic="d", - indicmark="r", + indicmark="l", indicorder="bh", linebreak="cm", unicodeslot=0x9C7, @@ -22541,7 +22541,7 @@ characters.data={ description="BENGALI VOWEL SIGN AI", direction="l", indic="d", - indicmark="r", + indicmark="l", indicorder="bh", linebreak="cm", unicodeslot=0x9C8, @@ -22551,7 +22551,8 @@ characters.data={ category="mc", description="BENGALI VOWEL SIGN O", direction="l", - indic="s", + indic="d", + indicmark="s", linebreak="cm", specials={ "char", 0x9C7, 0x9BE }, unicodeslot=0x9CB, @@ -22561,7 +22562,8 @@ characters.data={ category="mc", description="BENGALI VOWEL SIGN AU", direction="l", - indic="s", + indic="d", + indicmark="s", linebreak="cm", specials={ "char", 0x9C7, 0x9D7 }, unicodeslot=0x9CC, @@ -22761,6 +22763,7 @@ characters.data={ description="BENGALI LETTER RA WITH MIDDLE DIAGONAL", direction="l", indic="o", + indicclass="ra", indicorder="as", linebreak="al", shcode=0x9B0, @@ -23653,6 +23656,7 @@ characters.data={ description="GUJARATI SIGN ANUSVARA", direction="nsm", indic="o", + indicmark="t", linebreak="cm", unicodeslot=0xA82, }, @@ -24021,6 +24025,7 @@ characters.data={ description="GUJARATI LETTER YA", direction="l", indic="c", + indicorder="ap", linebreak="al", unicodeslot=0xAAF, }, @@ -24977,7 +24982,7 @@ characters.data={ description="ORIYA VOWEL SIGN AI", direction="l", indic="d", - indicmark="l", + indicmark="s", linebreak="cm", specials={ "char", 0xB47, 0xB56 }, unicodeslot=0xB48, @@ -26394,7 +26399,7 @@ characters.data={ description="TELUGU VOWEL SIGN AI", direction="nsm", indic="d", - indicmark="t", + indicmark="s", linebreak="cm", specials={ "char", 0xC46, 0xC56 }, unicodeslot=0xC48, @@ -27146,7 +27151,7 @@ characters.data={ description="KANNADA VOWEL SIGN II", direction="l", indic="d", - indicmark="r", + indicmark="s", linebreak="cm", specials={ "char", 0xCBF, 0xCD5 }, unicodeslot=0xCC0, @@ -27206,7 +27211,7 @@ characters.data={ description="KANNADA VOWEL SIGN EE", direction="l", indic="d", - indicmark="r", + indicmark="s", linebreak="cm", specials={ "char", 0xCC6, 0xCD5 }, unicodeslot=0xCC7, @@ -27216,7 +27221,7 @@ characters.data={ description="KANNADA VOWEL SIGN AI", direction="l", indic="d", - indicmark="r", + indicmark="s", linebreak="cm", specials={ "char", 0xCC6, 0xCD6 }, unicodeslot=0xCC8, @@ -27226,7 +27231,7 @@ characters.data={ description="KANNADA VOWEL SIGN O", direction="l", indic="d", - indicmark="r", + indicmark="s", linebreak="cm", specials={ "char", 0xCC6, 0xCC2 }, unicodeslot=0xCCA, @@ -27236,7 +27241,7 @@ characters.data={ description="KANNADA VOWEL SIGN OO", direction="l", indic="d", - indicmark="r", + indicmark="s", linebreak="cm", specials={ "char", 0xCCA, 0xCD5 }, unicodeslot=0xCCB, @@ -27256,7 +27261,7 @@ characters.data={ combining=0x9, description="KANNADA SIGN VIRAMA", direction="nsm", - indic="s", + indic="o", indicclass="halant", linebreak="cm", unicodeslot=0xCCD, @@ -27265,7 +27270,8 @@ characters.data={ category="mc", description="KANNADA LENGTH MARK", direction="l", - indic="o", + indic="d", + indicmark="r", indicorder="as", linebreak="cm", unicodeslot=0xCD5, @@ -27274,7 +27280,8 @@ characters.data={ category="mc", description="KANNADA AI LENGTH MARK", direction="l", - indic="o", + indic="d", + indicmark="b", indicorder="as", linebreak="cm", unicodeslot=0xCD6, @@ -28029,7 +28036,7 @@ characters.data={ combining=0x9, description="MALAYALAM SIGN VIRAMA", direction="nsm", - indic="s", + indic="o", indicclass="halant", linebreak="cm", synonyms={ "malayalam chandrakkala", "malayalam vowel half-u" }, @@ -120160,7 +120167,7 @@ characters.data={ combining=0xE6, description="COMBINING DEVANAGARI LETTER VI", direction="nsm", - indic="m", + indic="o", indicmark="t", linebreak="cm", unicodeslot=0xA8F0, @@ -257399,4 +257406,4 @@ characters.data={ synonyms={ "vs17" }, unicodeslot=0xE0100, }, -}
\ No newline at end of file +} diff --git a/tex/context/base/mkiv/cont-new.mkiv b/tex/context/base/mkiv/cont-new.mkiv index 4af95a2c0..a4c86506c 100644 --- a/tex/context/base/mkiv/cont-new.mkiv +++ b/tex/context/base/mkiv/cont-new.mkiv @@ -13,7 +13,7 @@ % \normalend % uncomment this to get the real base runtime -\newcontextversion{2019.06.05 15:39} +\newcontextversion{2019.06.11 19:20} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/mkiv/context.mkiv b/tex/context/base/mkiv/context.mkiv index 5189d1246..d9fa164d7 100644 --- a/tex/context/base/mkiv/context.mkiv +++ b/tex/context/base/mkiv/context.mkiv @@ -45,7 +45,7 @@ %D {YYYY.MM.DD HH:MM} format. \edef\contextformat {\jobname} -\edef\contextversion{2019.06.05 15:39} +\edef\contextversion{2019.06.11 19:20} \edef\contextkind {beta} %D Kind of special: diff --git a/tex/context/base/mkiv/font-osd.lua b/tex/context/base/mkiv/font-osd.lua index 8530fc264..203c1d79d 100644 --- a/tex/context/base/mkiv/font-osd.lua +++ b/tex/context/base/mkiv/font-osd.lua @@ -143,6 +143,7 @@ local s_half = states.half local s_pref = states.pref local s_blwf = states.blwf local s_pstf = states.pstf +local s_init = states.init local replace_all_nbsp = nil @@ -209,10 +210,10 @@ if not indicgroups and characters then local indicorders = { bp = { }, -- before_postscript ap = { }, -- after_postscript - bs = { }, -- before_half - as = { }, -- after_half - bh = { }, -- before_subscript - ah = { }, -- after_subscript + bs = { }, -- before_subscript + as = { }, -- after_subscript + bh = { }, -- before_half + ah = { }, -- after_half bm = { }, -- before_main am = { }, -- after_main } @@ -324,17 +325,11 @@ local zw_char = { -- both_joiners_true } local dflt_true = { - dflt = true + dflt = true, } -local two_defaults = { - dev2 = dflt_true, -} - -local one_defaults = { - dev2 = dflt_true, -- set later - deva = dflt_true, -- set later -} +local two_defaults = { } +local one_defaults = { } local false_flags = { false, false, false, false } @@ -367,7 +362,7 @@ local sequence_reorder_reph = { } local sequence_reorder_pre_base_reordering_consonants = { - features = { dv03 = two_defaults }, + features = { dv03 = one_defaults }, flags = false_flags, name = "dv03_reorder_pre_base_reordering_consonants", order = { "dv03" }, @@ -399,22 +394,17 @@ local sequence_remove_joiners = { -- as it might depends on the font. Not that it's a bottleneck. local basic_shaping_forms = { - -- init = true, -- new - -- abvs = true, -- new akhn = true, blwf = true, - -- calt = true, -- new cjct = true, half = true, - -- haln = true, -- new nukt = true, pref = true, - -- pres = true, -- new pstf = true, - -- psts = true, -- new rkrf = true, rphf = true, vatu = true, + locl = true, } local valid = { @@ -439,6 +429,7 @@ local valid = { psts = true, haln = true, calt = true, + locl = true, } local scripts = { } @@ -446,8 +437,6 @@ local scripts = { } local scripts_one = { "deva", "mlym", "beng", "gujr", "guru", "knda", "orya", "taml", "telu" } local scripts_two = { "dev2", "mlm2", "bng2", "gjr2", "gur2", "knd2", "ory2", "tml2", "tel2" } -local scripts_old = { } for i=1,#scripts_one do local v = scripts_one[i] scripts_old[v] = v end -- self - local nofscripts = #scripts_one for i=1,nofscripts do @@ -455,7 +444,7 @@ for i=1,nofscripts do local two = scripts_two[i] scripts[one] = true scripts[two] = true - two_defaults[one] = dflt_true + two_defaults[two] = dflt_true one_defaults[one] = dflt_true one_defaults[two] = dflt_true end @@ -476,28 +465,75 @@ local function initializedevanagi(tfmdata) local sequences = resources.sequences local sharedfeatures = tfmdata.shared.features -- - local lastmatch = 0 - for s=1,#sequences do -- classify chars + gsubfeatures["dv01"] = two_defaults -- reorder matras + gsubfeatures["dv02"] = two_defaults -- reorder reph + gsubfeatures["dv03"] = one_defaults -- reorder pre base reordering consonants + gsubfeatures["dv04"] = one_defaults -- remove joiners + -- + local reorder_pre_base_reordering_consonants = copy(sequence_reorder_pre_base_reordering_consonants) + local reorder_reph = copy(sequence_reorder_reph) + local reorder_matras = copy(sequence_reorder_matras) + local remove_joiners = copy(sequence_remove_joiners) + + local lastmatch = 0 + for s=1,#sequences do -- classify chars and make sure basic_shaping_forms come first local features = sequences[s].features if features then for k, v in next, features do + if k == "locl" then + local steps = sequences[s].steps + local nofsteps = sequences[s].nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + for k, v in next, pre_mark do + local locl = coverage[k] + if locl then + if #locl > 0 then --contextchain; KE: is this right? + for j=1,#locl do + local ck = locl[j] + local f = ck[4] + local chainlookups = ck[6] + if chainlookups then + local chainlookup = chainlookups[f] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local steps = chainstep.steps + local nofsteps = chainstep.nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + locl = coverage[k] + end + end + end + end + end + end + if locl then + reorder_matras.steps[1].coverage[locl] = true + end + end + end + end + end + end if basic_shaping_forms[k] then - lastmatch = s + lastmatch = lastmatch + 1 + if s ~= lastmatch then + table.insert(sequences, lastmatch, table.remove(sequences, s)) + end end end end end local insertindex = lastmatch + 1 -- - gsubfeatures["dv01"] = two_defaults -- reorder matras - gsubfeatures["dv02"] = two_defaults -- reorder reph - gsubfeatures["dv03"] = two_defaults -- reorder pre base reordering consonants - gsubfeatures["dv04"] = one_defaults -- remove joiners - -- - local reorder_pre_base_reordering_consonants = copy(sequence_reorder_pre_base_reordering_consonants) - local reorder_reph = copy(sequence_reorder_reph) - local reorder_matras = copy(sequence_reorder_matras) - local remove_joiners = copy(sequence_remove_joiners) + if tfmdata.properties.language then + dflt_true[tfmdata.properties.language] = true + end -- insert(sequences,insertindex,reorder_pre_base_reordering_consonants) insert(sequences,insertindex,reorder_reph) @@ -505,6 +541,8 @@ local function initializedevanagi(tfmdata) insert(sequences,insertindex,remove_joiners) -- local blwfcache = { } + local vatucache = { } + local pstfcache = { } local seqsubset = { } local rephstep = { coverage = { } -- will be adapted each work @@ -513,6 +551,8 @@ local function initializedevanagi(tfmdata) reph = false, vattu = false, blwfcache = blwfcache, + vatucache = vatucache, + pstfcache = pstfcache, seqsubset = seqsubset, reorderreph = rephstep, @@ -525,8 +565,6 @@ local function initializedevanagi(tfmdata) -- resources.devanagari = devanagari -- - local old = scripts_old[script] or false - -- for s=1,#sequences do local sequence = sequences[s] local steps = sequence.steps @@ -534,41 +572,148 @@ local function initializedevanagi(tfmdata) local features = sequence.features local has_rphf = features.rphf local has_blwf = features.blwf - if has_rphf and has_rphf[old] then + local has_vatu = features.vatu + local has_pstf = features.pstf + if has_rphf and has_rphf[script] then devanagari.reph = true - elseif has_blwf and has_blwf[old] then + elseif (has_blwf and has_blwf[script] ) or (has_vatu and has_vatu[script] ) then devanagari.vattu = true for i=1,nofsteps do local step = steps[i] local coverage = step.coverage if coverage then for k, v in next, coverage do - if not blwfcache[k] then - blwfcache[k] = v + for h, w in next, halant do + if v[h] then + if not blwfcache[k] then + blwfcache[k] = v + end + end + if has_vatu and has_vatu[script] and not vatucache[k] then + vatucache[k] = v + end + end + end + end + end + elseif has_pstf and has_pstf[script] then + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + for k, v in next, coverage do + if not pstfcache[k] then + pstfcache[k] = v + end + end + for k, v in next, ra do + local r = coverage[k] + if r then + local found = false + if #r > 0 then -- contextchain; KE: is this right? + for j=1,#r do + local ck = r[j] + local f = ck[4] + local chainlookups = ck[6] + if chainlookups and chainlookups[f] then --KE: why is check for chainlookups[f] necessacy??? + local chainlookup = chainlookups[f] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local steps = chainstep.steps + local nofsteps = chainstep.nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + local h = coverage[k] + if h then + for k, v in next, h do + found = v and v.ligature + if found then + pre_base_reordering_consonants[found] = true + break + end + end + if found then + break + end + end + end + end + end + end + end + else + for k, v in next, r do + found = v and v.ligature + if found then + pre_base_reordering_consonants[found] = true + break + end + end + end + if found then + break + end end end end end end for kind, spec in next, features do - -- if spec.dev2 and valid[kind] then if valid[kind] and valid_two(spec)then for i=1,nofsteps do local step = steps[i] local coverage = step.coverage if coverage then - local reph = false + local reph, rephbase = false, false if kind == "rphf" then -- rphf acts on consonant + halant for k, v in next, ra do local r = coverage[k] if r then + rephbase = k local h = false - for k, v in next, halant do - local h = r[k] - if h then - reph = h.ligature or false - break + if #r > 0 then --contextchain; KE: is this right? + for j=1,#r do + local ck = r[j] + local f = ck[4] + local chainlookups = ck[6] + if chainlookups then + local chainlookup = chainlookups[f] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local steps = chainstep.steps + local nofsteps = chainstep.nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + local r = coverage[k] + if r then + for k, v in next, halant do + local h = r[k] + if h then + reph = h.ligature or false + break + end + end + if h then + break + end + end + end + end + end + end + end + else + for k, v in next, halant do + local h = r[k] + if h then + reph = h.ligature or false + break + end end end if reph then @@ -577,7 +722,7 @@ local function initializedevanagi(tfmdata) end end end - seqsubset[#seqsubset+1] = { kind, coverage, reph } + seqsubset[#seqsubset+1] = { kind, coverage, reph, rephbase } end end end @@ -592,11 +737,46 @@ local function initializedevanagi(tfmdata) local h = coverage[k] if h then local found = false - for k, v in next, h do - found = v and v.ligature - if found then - pre_base_reordering_consonants[k] = found - break + if #h > 0 then -- contextchain; KE: is this right? + for j=1,#h do + local ck = h[j] + local f = ck[4] + local chainlookups = ck[6] + if chainlookups then + local chainlookup = chainlookups[f] + for j=1,#chainlookup do + local chainstep = chainlookup[j] + local steps = chainstep.steps + local nofsteps = chainstep.nofsteps + for i=1,nofsteps do + local step = steps[i] + local coverage = step.coverage + if coverage then + local h = coverage[k] + if h then + for k, v in next, h do + found = v and v.ligature + if found then + pre_base_reordering_consonants[found] = true + break + end + end + if found then + break + end + end + end + end + end + end + end + else + for k, v in next, h do + found = v and v.ligature + if found then + pre_base_reordering_consonants[found] = true + break + end end end if found then @@ -610,90 +790,17 @@ local function initializedevanagi(tfmdata) end end -- - -- The following presets need checking (by Kai). Most of these scripts share a common - -- handling (some need less but that doesn't hurt). The question is: what to enable. - -- - -- dv01_reorder_matras - -- dv02_reorder_reph - -- dv03_reorder_pre_base_reordering_consonants - -- dv04_remove_joiners - -- - if script == "deva" then - sharedfeatures["dv04"] = true - elseif script == "dev2" then - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "knda" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "knd2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "beng" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "bng2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "gurj" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "grj2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "guru" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "gur2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "telu" then - -- needs checking - sharedfeatures["dv04"] = true - elseif script == "tel2" then - -- needs checking - sharedfeatures["dv01"] = true - sharedfeatures["dv02"] = true - sharedfeatures["dv03"] = true - sharedfeatures["dv04"] = true - - elseif script == "mlym" then - sharedfeatures["pstf"] = true - elseif script == "mlm2" then - sharedfeatures["pstf"] = true - sharedfeatures["pref"] = true - sharedfeatures["dv03"] = true - gsubfeatures ["dv03"] = two_defaults - insert(sequences,insertindex,sequence_reorder_pre_base_reordering_consonants) - - elseif script == "taml" then - -- needs checking - sharedfeatures["dv04"] = true - sharedfeatures["pstf"] = true - elseif script == "tml2" then - -- needs checking - - else - report("todo: enable the right features for script %a",script) + if two_defaults[script] then + sharedfeatures["dv01"] = true -- dv01_reorder_matras + sharedfeatures["dv02"] = true -- dv02_reorder_reph + sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants + sharedfeatures["dv04"] = true -- dv04_remove_joiners + elseif one_defaults[script] then + sharedfeatures["dv03"] = true -- dv03_reorder_pre_base_reordering_consonants + sharedfeatures["dv04"] = true -- dv04_remove_joiners + end + if script == "mlym" or script == "taml" then + devanagari.left_matra_before_base = true end end end @@ -735,10 +842,13 @@ local function initialize_one(font,attr) -- we need a proper hook into the datas reph = false, vattu = false, blwfcache = { }, + vatucache = { }, + pstfcache = { }, } datasets.devanagari = devanagaridata local resources = tfmdata.resources local devanagari = resources.devanagari + for s=1,#datasets do local dataset = datasets[s] if dataset and dataset[1] then -- value @@ -746,31 +856,97 @@ local function initialize_one(font,attr) -- we need a proper hook into the datas if kind == "rphf" then -- deva devanagaridata.reph = true - elseif kind == "blwf" then + elseif kind == "blwf" or kind == "vatu" then -- deva devanagaridata.vattu = true -- dev2 devanagaridata.blwfcache = devanagari.blwfcache + devanagaridata.vatucache = devanagari.vatucache + devanagaridata.pstfcache = devanagari.pstfcache end end end end - return devanagaridata.reph, devanagaridata.vattu, devanagaridata.blwfcache + return devanagaridata.reph, devanagaridata.vattu, devanagaridata.blwfcache, devanagaridata.vatucache, devanagaridata.pstfcache + +end + +local function contextchain(contexts, n) + local char = getchar(n) + for k=1,#contexts do + local ck = contexts[k] + local seq = ck[3] + local f = ck[4] + local l = ck[5] + if (l - f) == 1 and seq[f+1][char] then + local ok = true + local c = n + for i=l+1,#seq do + c = getnext(c) + if not c or not seq[i][ischar(c)] then + ok = false + break + end + end + if ok then + c = getprev(n) + for i=1,f-1 do + c = getprev(c) + if not c or not seq[f-i][ischar(c)] then + ok = false + end + end + end + if ok then + return true + end + end + end + return false +end +local function order_matras(c) + local cn = getnext(c) + local char = getchar(cn) + while dependent_vowel[char] do + local next = getnext(cn) + local cc = c + local cchar = getchar(cc) + while cc ~= cn do + if (above_mark[char] and (below_mark[cchar] or post_mark[cchar])) or (below_mark[char] and (post_mark[cchar])) then + local prev, next = getboth(cn) + if next then + setprev(next,prev) + end + -- todo: setlink + setnext(prev,next) + setnext(getprev(cc),cn) + setprev(cn,getprev(cc)) + setnext(cn,cc) + setprev(cc,cn) + break + end + cc = getnext(cc) + cchar = getchar(cc) + end + cn = next + char = getchar(cn) + end end local function reorder_one(head,start,stop,font,attr,nbspaces) - local reph, vattu, blwfcache = initialize_one(font,attr) -- todo: a hash[font] + local reph, vattu, blwfcache, vatucache, pstfcache = initialize_one(font,attr) -- todo: a hash[font] - local current = start - local n = getnext(start) - local base = nil - local firstcons = nil - local lastcons = nil - local basefound = false + local devanagari = fontdata[font].resources.devanagari + local current = start + local n = getnext(start) + local base = nil + local firstcons = nil + local lastcons = nil + local basefound = false if reph and ra[getchar(start)] and halant[getchar(n)] then -- if syllable starts with Ra + H and script has 'Reph' then exclude Reph @@ -853,8 +1029,11 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if not base then base = current elseif blwfcache[char] then - -- consonant has below-base (or post-base) form + -- consonant has below-base form setprop(current,a_state,s_blwf) + elseif pstfcache[char] then + -- consonant has post-base form + setprop(current,a_state,s_pstf) else base = current end @@ -933,16 +1112,14 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) current = next end - if base ~= stop and getprop(base,a_state) then + if base ~= stop and getprop(base,a_state) then -- a_state can also be init local next = getnext(base) if halant[getchar(next)] and not (next ~= stop and getchar(getnext(next)) == c_zwj) then setprop(base,a_state,unsetvalue) end end - -- ToDo: split two- or three-part matras into their parts. Then, move the left 'matra' part to the beginning of the syllable. - -- Not necessary for Devanagari. However it is necessay for other scripts, such as Tamil (e.g. TAMIL VOWEL SIGN O - 0BCA) - + -- split two- or three-part matras into their parts. Then, move the left 'matra' part to the beginning of the syllable. -- classify consonants and 'matra' parts as pre-base, above-base (Reph), below-base or post-base, and group elements of the syllable (consonants and 'matras') according to this classification local current, allreordered, moved = start, false, { [base] = true } @@ -968,6 +1145,17 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) n = getnext(n) ch = getchar(n) end + + local tpm = twopart_mark[ch] + while tpm do + local extra = copy_node(n) + copyinjection(extra,n) + ch = tpm[1] + setchar(n,ch) + setchar(extra,tpm[2]) + head = insert_node_after(head,current,extra) + tpm = twopart_mark[ch] + end while c ~= stop and dependent_vowel[ch] do c = n n = getnext(n) @@ -986,15 +1174,51 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) end end end - local bp = getprev(firstcons) - local cn = getnext(current) + local bp = getprev(firstcons) + local cn = getnext(current) local last = getnext(c) while cn ~= last do -- move pre-base matras... if pre_mark[getchar(cn)] then - if bp then - setnext(bp,cn) + if devanagari.left_matra_before_base then + local prev, next = getboth(cn) + setlink(prev,next) + if cn == stop then + stop = getprev(cn) + end + if base == start then + if head == start then + head = cn + end + start = cn + end + setlink(getprev(base),cn) + setlink(cn,base) + -- setlink(getprev(base),cn,base) -- maybe + cn = next + else + if bp then + setnext(bp,cn) + end + local prev, next = getboth(cn) + if next then + setprev(next,prev) + end + setnext(prev,next) + if cn == stop then + stop = prev + end + setprev(cn,bp) + setlink(cn,firstcons) + if firstcons == start then + if head == start then + head = cn + end + start = cn + end + cn = next end + elseif current ~= base and dependent_vowel[getchar(cn)] then local prev, next = getboth(cn) if next then setprev(next,prev) @@ -1003,17 +1227,19 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if cn == stop then stop = prev end - setprev(cn,bp) - setlink(cn,firstcons) - if firstcons == start then - if head == start then - head = cn - end - start = cn + setlink(b,cn,getnext(b)) + order_matras(cn) + cn = next + elseif current == base and dependent_vowel[getchar(cn)] then + local cnn = getnext(cn) + order_matras(cn) + cn = cnn + while cn ~= last and dependent_vowel[getchar(cn)] do + cn = getnext(cn) end - break + else + cn = getnext(cn) end - cn = getnext(cn) end allreordered = c == stop current = getnext(c) @@ -1075,6 +1301,13 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if halant[getchar(next)] then cns = next end + if not vatucache[char] then + next = getnext(cns) + while dependent_vowel[getchar(next)] do + cns = next + next = getnext(cns) + end + end elseif char == c_nbsp then nbspaces = nbspaces + 1 cns = current @@ -1082,6 +1315,13 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if halant[getchar(next)] then cns = next end + if not vatucache[char] then + next = getnext(cns) + while dependent_vowel[getchar(next)] do + cns = next + next = getnext(cns) + end + end end end current = getnext(current) @@ -1090,6 +1330,9 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if getchar(base) == c_nbsp then nbspaces = nbspaces - 1 + if base == stop then + stop = getprev(stop) + end head = remove_node(head,base) flush_node(base) end @@ -1114,7 +1357,7 @@ function handlers.devanagari_reorder_matras(head,start) -- no leak local char = ischar(current,startfont) local next = getnext(current) if char and getprop(current,a_syllabe) == startattr then - if halant[char] and not getprop(current,a_state) then + if halant[char] then -- a_state can also be init if next then local char = ischar(next,startfont) if char and zw_char[char] and getprop(next,a_syllabe) == startattr then @@ -1130,6 +1373,18 @@ function handlers.devanagari_reorder_matras(head,start) -- no leak -- setlink(current,start,next) -- maybe start = startnext break + -- elseif consonant[char] and ( not getprop(current,a_state) or getprop(current,a_state) == s_init) then + -- startnext = getnext(start) + -- head = remove_node(head,start) + -- if current == head then + -- setlink(start,current) + -- head = start + -- else + -- setlink(getprev(current),start) + -- setlink(start,current) + -- end + -- start = startnext + -- break end else break @@ -1148,6 +1403,8 @@ end -- In Devanagari reph has reordering position 'before postscript' and dev2 only -- follows step 2, 4, and 6. +local rephbase = { } + function handlers.devanagari_reorder_reph(head,start) local current = getnext(start) local startnext = nil @@ -1159,6 +1416,12 @@ function handlers.devanagari_reorder_reph(head,start) -- -- If reph should be positioned after post-base consonant forms, proceed to step 5. -- + local char = ischar(start,startfont) + local rephbase = rephbase[startfont][char] + if char and after_subscript[rephbase] then + goto step_5 + end + -- ::step_2:: -- -- If the reph repositioning class is not after post-base: target position is after @@ -1169,39 +1432,64 @@ function handlers.devanagari_reorder_reph(head,start) -- fixed in shaping engine, there was no case where reph position will be found on -- this step. -- - while current do - local char = ischar(current,startfont) - if char and getprop(current,a_syllabe) == startattr then - if halant[char] and not getprop(current,a_state) then - local next = getnext(current) - if next then - local nextchar = ischar(next,startfont) - if nextchar and zw_char[nextchar] and getprop(next,a_syllabe) == startattr then - current = next - next = getnext(current) + if char and not after_postscript[rephbase] then + while current do + local char = ischar(current,startfont) + if char and getprop(current,a_syllabe) == startattr then + if halant[char] then + local next = getnext(current) + if next then + local nextchar = ischar(next,startfont) + if nextchar and zw_char[nextchar] and getprop(next,a_syllabe) == startattr then + current = next + next = getnext(current) + end end + startnext = getnext(start) + head = remove_node(head,start) + setlink(start,next) + setlink(current,start) + -- setlink(current,start,next) -- maybe + start = startnext + startattr = getprop(start,a_syllabe) + break end - startnext = getnext(start) - head = remove_node(head,start) - setlink(start,next) - setlink(current,start) - -- setlink(current,start,next) -- maybe - start = startnext - startattr = getprop(start,a_syllabe) + current = getnext(current) + else break end - current = getnext(current) - else - break end end + -- ::step_3:: -- - -- If reph should be repositioned after the main consonant: from the first consonant + -- If reph should be repositioned after the main consonant: find the first consonant -- not ligated with main, or find the first consonant that is not a potential -- pre-base reordering Ra. -- - -- Kai: todo + if not startnext then + if char and after_main[rephbase] then + current = getnext(start) + while current do + local char = ischar(current,startfont) + if char and getprop(current,a_syllabe) == startattr then + if consonant[char] and not getprop(current,a_state) == s_pref then + startnext = getnext(start) + head = remove_node(head,start) + setlink(current,start) + setlink(start,getnext(current)) + -- setlink(current,start,getnext(current)) -- maybe + start = startnext + startattr = getprop(start,a_syllabe) + break + end + current = getnext(current) + else + break + end + end + end + end -- ::step_4:: -- @@ -1210,23 +1498,37 @@ function handlers.devanagari_reorder_reph(head,start) -- position should be before the first matra, syllable modifier sign or vedic sign. -- if not startnext then - current = getnext(start) - while current do - local char = ischar(current,startfont) - if char and getprop(current,a_syllabe) == startattr then - if getprop(current,a_state) == s_pstf then -- post-base - startnext = getnext(start) - head = remove_node(head,start) - setlink(getprev(current),start) - setlink(start,current) - -- setlink(getprev(current),start,current) -- maybe - start = startnext - startattr = getprop(start,a_syllabe) + if char and before_postscript[rephbase] then + current = getnext(start) + local c = nil + while current do + local char = ischar(current,startfont) + if char and getprop(current,a_syllabe) == startattr then + if getprop(current,a_state) == s_pstf then -- post-base + startnext = getnext(start) + head = remove_node(head,start) + setlink(getprev(current),start) + setlink(start,current) + -- setlink(getprev(current),start,current) -- maybe + start = startnext + startattr = getprop(start,a_syllabe) + break + elseif not c and ( vowel_modifier[char] or stress_tone_mark[char] ) then + c = current + end + current = getnext(current) + else + if c then + startnext = getnext(start) + head = remove_node(head,start) + setlink(getprev(c),start) + setlink(start,c) + -- setlink(getprev(c),start,c) -- maybe + start = startnext + startattr = getprop(start,a_syllabe) + end break end - current = getnext(current) - else - break end end end @@ -1245,7 +1547,10 @@ function handlers.devanagari_reorder_reph(head,start) while current do local char = ischar(current,startfont) if char and getprop(current,a_syllabe) == startattr then - if not c and mark_above_below_post[char] and not after_subscript[char] then + local state = getprop(current,a_state) + if before_subscript[rephbase] and (state == s_blwf or state == s_pstf) then + c = current + elseif after_subscript[rephbase] and (state == s_pstf) then c = current end current = getnext(current) @@ -1310,67 +1615,82 @@ end -- return head, start, done -- end +local reordered_pre_base_reordering_consonants = { } -- shared ? not reset ? + function handlers.devanagari_reorder_pre_base_reordering_consonants(head,start) - local current = start - local startnext = nil - local startprev = nil + if reordered_pre_base_reordering_consonants[start] then + return head, start, true + end + local current = start -- we could cache attributes here local startfont = getfont(start) local startattr = getprop(start,a_syllabe) - -- can be fast for loop + caching state while current do local char = ischar(current,startfont) + local next = getnext(current) if char and getprop(current,a_syllabe) == startattr then - local next = getnext(current) - if halant[char] and not getprop(current,a_state) then + if halant[char] then -- a_state can also be init if next then - local nextchar = ischar(next,startfont) - if nextchar and getprop(next,a_syllabe) == startattr then - if nextchar == c_zwnj or nextchar == c_zwj then - current = next - next = getnext(current) - end + local char = ischar(next,startfont) + if char and zw_char[char] and getprop(next,a_syllabe) == startattr then + current = next + next = getnext(current) end end - startnext = getnext(start) - removenode(start,start) + -- can be optimzied + local startnext = getnext(start) + head = remove_node(head,start) setlink(start,next) setlink(current,start) -- setlink(current,start,next) -- maybe + reordered_pre_base_reordering_consonants[start] = true start = startnext - break + return head, start, true + -- elseif consonant[char] and ( not getprop(current,a_state) or getprop(current,a_state) == s_init) then + -- startnext = getnext(start) + -- head = remove_node(head,start) + -- if current == head then + -- setlink(start,current) + -- head = start + -- else + -- setlink(getprev(current),start) + -- setlink(start,current) + -- end + -- start = startnext + -- break end - current = next else break end + current = next end - if not startnext then - current = getnext(start) - startattr = getprop(start,a_syllabe) - while current do - local char = ischar(current,startfont) - if char and getprop(current,a_syllabe) == startattr then - if not consonant[char] and getprop(current,a_state) then -- main - startnext = getnext(start) - removenode(start,start) - setlink(getprev(current),start) - setlink(start,current) - -- setlink(getprev(current),start,current) -- maybe - start = startnext - break - end - current = getnext(current) + + local startattr = getprop(start,a_syllabe) + local current = getprev(start) + while current and getprop(current,a_syllabe) == startattr do + local char = ischar(current) + if ( not dependent_vowel[char] and not getprop(current,a_state) or getprop(current,a_state) == s_init) then + startnext = getnext(start) + head = remove_node(head,start) + if current == head then + setlink(start,current) + head = start else - break + setlink(getprev(current),start) + setlink(start,current) end + reordered_pre_base_reordering_consonants[start] = true + start = startnext + break end + current = getprev(current) end + return head, start, true end function handlers.devanagari_remove_joiners(head,start,kind,lookupname,replacement) local stop = getnext(start) - local font = getfont(start) -- hm + local font = getfont(start) local last = start while stop do local char = ischar(stop,font) @@ -1411,15 +1731,15 @@ end -- of the actions local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pass over (determine stop in sweep) - local seqsubset, reorderreph = initialize_two(font,attr) - local reph = false -- was nil ... probably went unnoticed because never assigned local halfpos = nil local basepos = nil local subpos = nil local postpos = nil - local locl = { } + + reorderreph.coverage = { } + rephbase[font] = { } for i=1,#seqsubset do @@ -1429,17 +1749,17 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas local kind = subset[1] local lookupcache = subset[2] if kind == "rphf" then - reph = subset[3] + reorderreph.coverage[subset[3]] = true -- neat + rephbase[font][subset[3]] = subset[4] local current = start local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then --above-base: rphf Consonant + Halant + if found[getchar(next)] or contextchain(found, next) then --above-base: rphf Consonant + Halant local afternext = next ~= stop and getnext(next) if afternext and zw_char[getchar(afternext)] then -- ZWJ and ZWNJ prevent creation of reph current = afternext -- getnext(next) @@ -1459,15 +1779,16 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then -- pre-base: pref Halant + Consonant local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_pref) - setprop(next,a_state,s_pref) - current = next + if found[getchar(next)] or contextchain(found, next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then --KE: a_state can also be init... + setprop(current,a_state,s_pref) + setprop(next,a_state,s_pref) + current = next + end end end end @@ -1478,15 +1799,14 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then + if found[getchar(next)] or contextchain(found, next) then if next ~= stop and getchar(getnext(next)) == c_zwnj then -- zwnj prevent creation of half current = next - else + elseif (not getprop(current,a_state)) then --KE: a_state can also be init... setprop(current,a_state,s_half) if not halfpos then halfpos = current @@ -1498,21 +1818,22 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas end current = getnext(current) end - elseif kind == "blwf" then -- below-base: blwf / Halant + Consonant + elseif kind == "blwf" or kind == "vatu" then -- below-base: blwf / Halant + Consonant local current = start local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_blwf) - setprop(next,a_state,s_blwf) - current = next - subpos = current + if found[getchar(next)] or contextchain(found, next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then --KE: a_state can also be init... + setprop(current,a_state,s_blwf) + setprop(next,a_state,s_blwf) + current = next + subpos = current + end end end end @@ -1523,16 +1844,17 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas local last = getnext(stop) while current ~= last do if current ~= stop then - local c = locl[current] or getchar(current) + local c = getchar(current) local found = lookupcache[c] if found then local next = getnext(current) - local n = locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_pstf) - setprop(next,a_state,s_pstf) - current = next - postpos = current + if found[getchar(next)] or contextchain(found, next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then --KE: a_state can also be init... + setprop(current,a_state,s_pstf) + setprop(next,a_state,s_pstf) + current = next + postpos = current + end end end end @@ -1541,12 +1863,6 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas end end - -- this one changes per word ... - - reorderreph.coverage = { [reph] = true } -- neat - - -- end of weird - local current, base, firstcons = start, nil, nil if getprop(start,a_state) == s_rphf then @@ -1612,7 +1928,7 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas end -- check whether consonant has below-base or post-base form or is pre-base reordering Ra local a = getprop(current,a_state) - if not (a == s_pref or a == s_blwf or a == s_pstf) then + if not (a == s_blwf or a == s_pstf or (a ~= s_rphf and a ~= s_blwf and ra[getchar(current)])) then base = current end end @@ -1630,7 +1946,7 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas end return head, stop, nbspaces else - if getprop(base,a_state) then + if getprop(base,a_state) then -- a_state can also be init setprop(base,a_state,unsetvalue) end basepos = base @@ -1647,24 +1963,27 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas -- Matra characters are classified and reordered by which consonant in a conjunct they have affinity for - local moved = { } + local moved = { } local current = start - local last = getnext(stop) + local last = getnext(stop) while current ~= last do - local char, target, cn = locl[current] or getchar(current), nil, getnext(current) + local char = getchar(current) + local target = nil + local cn = getnext(current) -- not so efficient (needed for malayalam) local tpm = twopart_mark[char] - if tpm then + while tpm do local extra = copy_node(current) copyinjection(extra,current) char = tpm[1] setchar(current,char) setchar(extra,tpm[2]) head = insert_node_after(head,current,extra) + tpm = twopart_mark[char] end -- if not moved[current] and dependent_vowel[char] then - if pre_mark[char] then -- Before first half form in the syllable + if pre_mark[char] then -- or: if before_main or before_half moved[current] = true -- can be helper to remove one node local prev, next = getboth(current) @@ -1672,17 +1991,47 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas if current == stop then stop = getprev(current) end - if halfpos == start then + + local pos + if before_main[char] then + pos = basepos + -- basepos = current -- is this correct? + else + -- must be before_half + pos = halfpos + -- halfpos = current -- is this correct? + end + + local ppos = getprev(pos) -- necessary? + while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) do + if getprop(ppos,a_state) == s_pref then + pos = ppos + end + ppos = getprev(ppos) + end + + local ppos = getprev(pos) -- necessary? + while ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and halant[ischar(ppos)] do + ppos = getprev(ppos) + if ppos and getprop(ppos,a_syllabe) == getprop(pos,a_syllabe) and consonant[ischar(ppos)] then + pos = ppos + ppos = getprev(ppos) + else + break + end + end + + if pos == start then if head == start then head = current end start = current end - setlink(getprev(halfpos),current) - setlink(current,halfpos) - -- setlink(getprev(halfpos),current,halfpos) -- maybe - halfpos = current - elseif above_mark[char] then -- After main consonant + setlink(getprev(pos),current) + setlink(current,pos) + -- setlink(getprev(pos),current,pos) -- maybe + elseif above_mark[char] then + -- after main consonant target = basepos if subpos == basepos then subpos = current @@ -1691,13 +2040,25 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas postpos = current end basepos = current - elseif below_mark[char] then -- After subjoined consonants + elseif below_mark[char] then + -- after subjoined consonants target = subpos if postpos == subpos then postpos = current end subpos = current - elseif post_mark[char] then -- After post-form consonant + elseif post_mark[char] then + -- after post-form consonant + local n = getnext(postpos) -- nukta and vedic sign come first - is that right? and also halant+ra + while n do + local v = ischar(n,font) + if nukta[v] or stress_tone_mark[v] or vowel_modifier[v] then + postpos = n + else + break + end + n = getnext(n) + end target = postpos postpos = current end @@ -1718,9 +2079,44 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) -- maybe do a pas current = cn end + -- reorder halant+Ra + + local current = getnext(start) + local last = getnext(stop) + while current ~= last do + local char = getchar(current) + local cn = getnext(current) + if halant[char] and ra[ischar(cn)] and getprop(cn,a_state) ~= s_rphf and getprop(cn,a_state) ~= s_blwf then + if after_main[ischar(cn)] then + local prev = getprev(current) + local next = getnext(cn) + local bpn = getnext(basepos) + while bpn and dependent_vowel[ischar(bpn)] do + basepos = bpn + bpn = getnext(bpn) + end + if basepos ~= prev then + setlink(prev,next) + setlink(cn, getnext(basepos)) + setlink(basepos, current) + if cn == stop then + stop = prev + end + cn = next + end + end + -- after_postscript + -- after_subscript + -- before_postscript + -- before_subscript + end + current = cn + end + -- Reorder marks to canonical order: Adjacent nukta and halant or nukta and vedic sign are always repositioned if necessary, so that the nukta is first. - local current, c = start, nil + local current = start + local c = nil while current ~= stop do local char = getchar(current) if halant[char] or stress_tone_mark[char] then @@ -1849,7 +2245,25 @@ local function analyze_next_chars_one(c,font,variant) -- skip one dependent vowe if not v then return c end - if dependent_vowel[v] then + local already_pre_mark -- = false + local already_above_mark -- = false + local already_below_mark -- = false + local already_post_mark -- = false + while dependent_vowel[v] do + local vowels = twopart_mark[v] or { v } + for k, v in next, vowels do + if pre_mark[v] and not already_pre_mark then + already_pre_mark = true + elseif above_mark[v] and not already_above_mark then + already_above_mark = true + elseif below_mark[v] and not already_below_mark then + already_below_mark = true + elseif post_mark[v] and not already_post_mark then + already_post_mark = true + else + return c + end + end c = getnext(c) n = getnext(c) if not n then @@ -2021,7 +2435,25 @@ local function analyze_next_chars_two(c,font) else -- c = ms_matra(c) -- same as one - if dependent_vowel[v] then + local already_pre_mark -- = false + local already_above_mark -- = false + local already_below_mark -- = false + local already_post_mark -- = false + while dependent_vowel[v] do + local vowels = twopart_mark[v] or { v } + for k, v in next, vowels do + if pre_mark[v] and not already_pre_mark then + already_pre_mark = true + elseif above_mark[v] and not already_above_mark then + already_above_mark = true + elseif below_mark[v] and not already_below_mark then + already_below_mark = true + elseif post_mark[v] and not already_post_mark then + already_post_mark = true + else + return c + end + end c = n n = getnext(c) if not n then @@ -2093,6 +2525,7 @@ local function method_one(head,font,attr) local start = true local done = false local nbspaces = 0 + local syllabe = 0 while current do local char = ischar(current,font) if char then @@ -2242,6 +2675,15 @@ local function method_one(head,font,attr) end end if syllablestart ~= syllableend then + if syllableend then + syllabe = syllabe + 1 + local c = syllablestart + local n = getnext(syllableend) + while c ~= n do + setprop(c,a_syllabe,syllabe) + c = getnext(c) + end + end head, current, nbspaces = reorder_one(head,syllablestart,syllableend,font,attr,nbspaces) current = getnext(current) end @@ -2283,6 +2725,21 @@ local function method_one(head,font,attr) head = replace_all_nbsp(head) end + current = head + local n = 0 + while current do + local char = ischar(current,font) + if char then + if n == 0 and not getprop(current,a_state) then + setprop(current,a_state,s_init) + end + n = n + 1 + else + n = 0 + end + current = getnext(current) + end + return head, done end @@ -2363,7 +2820,7 @@ local function method_two(head,font,attr) end if not syllableend and show_syntax_errors then local char = ischar(current,font) - if char and not getprop(current,a_state) then + if char and not getprop(current,a_state) then -- a_state can also be init local mark = mark_four[char] if mark then head, current = inject_syntax_error(head,current,char) @@ -2378,6 +2835,21 @@ local function method_two(head,font,attr) head = replace_all_nbsp(head) end + current = head + local n = 0 + while current do + local char = ischar(current,font) + if char then + if n == 0 and not getprop(current,a_state) then -- a_state can also be init + setprop(current,a_state,s_init) + end + n = n + 1 + else + n = 0 + end + current = getnext(current) + end + return head, done end diff --git a/tex/context/base/mkiv/lpdf-emb.lua b/tex/context/base/mkiv/lpdf-emb.lua index 8eab10c3b..5255eb038 100644 --- a/tex/context/base/mkiv/lpdf-emb.lua +++ b/tex/context/base/mkiv/lpdf-emb.lua @@ -91,7 +91,7 @@ end -- A couple of shared helpers. -local tounicodedictionary, widtharray, collectindices, subsetname, includecidset, tocidsetdictionary +local tounicodedictionary, widtharray, collectindices, subsetname, includecidset, forcecidset, tocidsetdictionary do @@ -230,14 +230,20 @@ end end includecidset = false + forcecidset = false -- for private testing only + + directives.register("backend.pdf.forcecidset",function(v) + forcecidset = v + end) tocidsetdictionary = function(indices,min,max) - if includecidset then + if includecidset or forcecidset then local b = { } local m = idiv(max+7,8) for i=0,max do b[i] = 0 end + b[0] = bor(b[0],lshift(1,7)) -- force notdef into the file for i=min,max do if indices[i] then local bi = idiv(i,8) diff --git a/tex/context/base/mkiv/mlib-pps.lua b/tex/context/base/mkiv/mlib-pps.lua index 5708577fe..73257968b 100644 --- a/tex/context/base/mkiv/mlib-pps.lua +++ b/tex/context/base/mkiv/mlib-pps.lua @@ -253,7 +253,7 @@ local function preset(t,k) return v end -local function startjob(plugmode,kind) +local function startjob(plugmode,kind,mpx) insert(stack,top) top = { textexts = { }, -- all boxes, optionally with a different color @@ -261,6 +261,7 @@ local function startjob(plugmode,kind) texlast = 0, texdata = setmetatableindex({},preset), -- references to textexts in order or usage plugmode = plugmode, -- some day we can then skip all pre/postscripts + extradata = mpx and metapost.getextradata(mpx), } if trace_runs then report_metapost("starting %s run at level %i in %s mode", @@ -580,8 +581,8 @@ end -- side effect of going single pass). function metapost.graphic_base_pass(specification) - local top = startjob(true,"base") local mpx = specification.mpx -- mandate + local top = startjob(true,"base",mpx) local data = specification.data or "" local inclusions = specification.inclusions or "" local initializations = specification.initializations or "" @@ -629,7 +630,7 @@ function metapost.process(specification,...) if type(specification) ~= "table" then oldschool(specification,...) else - startjob(specification.incontext or specification.useplugins,"process") + startjob(specification.incontext or specification.useplugins,"process",false) runmetapost(specification) stopjob() end @@ -910,7 +911,7 @@ local tx_reset, tx_process do end tx_process = function(object,prescript,before,after) - local data = top.texdata[metapost.properties.number] + local data = top.texdata[metapost.properties.number] -- the current figure number, messy local index = tonumber(prescript.tx_index) if index then if trace_textexts then @@ -932,19 +933,45 @@ local tx_reset, tx_process do top.texlast = mp_target -- local mp_text = top.texstrings[mp_index] + local mp_hash = prescript.tx_cache local box - if prescript.tx_cache == "no" then + if mp_hash == "no" then tex.runtoks("mptexttoks") box = textakebox("mptextbox") else - local hash = fmt(mp_text,mp_a or "-",mp_t or "-",mp_c or "-") - box = data.texhash[hash] + local cache = data.texhash + if mp_hash then + mp_hash = tonumber(mp_hash) + end + if mp_hash then + local extradata = top.extradata + if extradata then + cache = extradata.globalcache + if not cache then + cache = { } + extradata.globalcache = cache + end + if trace_runs then + if cache[mp_hash] then + report_textexts("reusing global entry %i",mp_hash) + else + report_textexts("storing global entry %i",mp_hash) + end + end + else + mp_hash = nil + end + end + if not mp_hash then + mp_hash = fmt(mp_text,mp_a or "-",mp_t or "-",mp_c or "-") + end + box = cache[mp_hash] if box then box = copy_list(box) else tex.runtoks("mptexttoks") box = textakebox("mptextbox") - data.texhash[hash] = box + cache[mp_hash] = box end end top.textexts[mp_target] = box diff --git a/tex/context/base/mkiv/mlib-run.lua b/tex/context/base/mkiv/mlib-run.lua index 3365e3d42..bd4818659 100644 --- a/tex/context/base/mkiv/mlib-run.lua +++ b/tex/context/base/mkiv/mlib-run.lua @@ -288,6 +288,11 @@ metapost.defaultmethod = "default" local mpxformats = { } local nofformats = 0 local mpxpreambles = { } +local mpxextradata = { } + +function metapost.getextradata(mpx) + return mpxextradata[mpx] +end function metapost.pushformat(specification,f,m) -- was: instance, name, method if type(specification) ~= "table" then @@ -338,6 +343,7 @@ function metapost.pushformat(specification,f,m) -- was: instance, name, method report_metapost("initializing instance %a using format %a and method %a",usedinstance,format,method) mpx = metapost.checkformat(format,method) mpxformats[usedinstance] = mpx + mpxextradata[mpx] = { } if mpp ~= "" then preamble = mpp end @@ -349,7 +355,6 @@ function metapost.pushformat(specification,f,m) -- was: instance, name, method return mpx end - -- luatex.wrapup(function() -- for k, mpx in next, mpxformats do -- mpx:finish() @@ -365,14 +370,16 @@ function metapost.reset(mpx) -- nothing elseif type(mpx) == "string" then if mpxformats[mpx] then - mpxformats[mpx]:finish() + mpxextradata[mpx] = nil mpxformats[mpx] = nil + mpxformats[mpx]:finish() end else for name, instance in next, mpxformats do if instance == mpx then + mpxextradata[mpx] = nil + mpxformats[mpx] = nil mpx:finish() - mpxformats[name] = nil break end end diff --git a/tex/context/base/mkiv/mult-fun.lua b/tex/context/base/mkiv/mult-fun.lua index 71d612156..57cf4778a 100644 --- a/tex/context/base/mkiv/mult-fun.lua +++ b/tex/context/base/mkiv/mult-fun.lua @@ -65,7 +65,7 @@ return { "withmask", "bitmapimage", "colordecimals", "ddecimal", "dddecimal", "ddddecimal", "colordecimalslist", "textext", "thetextext", "rawtextext", "textextoffset", "texbox", "thetexbox", "rawtexbox", "istextext", - "notcached", + "notcached", "keepcached", "verbatim", "thelabel", "label", "autoalign", diff --git a/tex/context/base/mkiv/spac-ver.mkiv b/tex/context/base/mkiv/spac-ver.mkiv index 27f9ffb70..c76555cba 100644 --- a/tex/context/base/mkiv/spac-ver.mkiv +++ b/tex/context/base/mkiv/spac-ver.mkiv @@ -2335,7 +2335,7 @@ \spac_vspacing_define_same_step\recurselevel\plustwo}% % whatever \global\c_spac_vspacing_special_done#1\relax} -\spac_vspacing_define_same_page{10} % 10 levels should be more than enough as a start +\spac_vspacing_define_same_page{12} % 12 levels should be more than enough as a start \def\spac_vspacing_same_page#1#2% level offset (starts at 0) {\ifnum#1>\c_spac_vspacing_special_done diff --git a/tex/context/base/mkiv/status-files.pdf b/tex/context/base/mkiv/status-files.pdf Binary files differindex 1c524b8d1..70635c11f 100644 --- a/tex/context/base/mkiv/status-files.pdf +++ b/tex/context/base/mkiv/status-files.pdf diff --git a/tex/context/base/mkiv/status-lua.pdf b/tex/context/base/mkiv/status-lua.pdf Binary files differindex f6553771d..f857f29b5 100644 --- a/tex/context/base/mkiv/status-lua.pdf +++ b/tex/context/base/mkiv/status-lua.pdf diff --git a/tex/context/base/mkiv/strc-flt.mkvi b/tex/context/base/mkiv/strc-flt.mkvi index 2af60b41d..8e8975a05 100644 --- a/tex/context/base/mkiv/strc-flt.mkvi +++ b/tex/context/base/mkiv/strc-flt.mkvi @@ -826,11 +826,47 @@ \strc_floats_finish_placement} \vbox} +%D \starttyping +%D \definefloat +%D [one] [figure] +%D [default=right, +%D rightmargindistance=-20cm, +%D criterium=129pt, +%D fallback=rightmargin] +%D +%D \definefloat +%D [two] [figure] +%D [default=right, +%D rightmargindistance=-20cm, +%D criterium=129pt, +%D fallback=three] +%D +%D \definefloat +%D [three] [figure] +%D [default=rightmargin, +%D rightmargindistance=0cm] +%D +%D \placefloat[one]{}{\blackrule[width=30pt]} \samplefile{tufte} +%D \placefloat[one]{}{\blackrule[width=60pt]} \samplefile{tufte} +%D \placefloat[one]{}{\blackrule[width=90pt]} \samplefile{tufte} +%D \placefloat[one]{}{\blackrule[width=130pt]} \samplefile{tufte} +%D \placefloat[two]{}{\blackrule[width=130pt]} \samplefile{tufte} +%D \stoptyping + \def\strc_floats_finish_placement {\doifsomething{\floatparameter\c!criterium} {\ifdim\wd\nextbox>\floatparameter\c!criterium\relax \edef\forcedfloatmethod{\floatparameter\c!fallback}% - \ifx\forcedfloatmethod\empty\let\forcedfloatmethod\v!here\fi + \ifx\forcedfloatmethod\empty \else + \doifelsecommandhandler\??float\forcedfloatmethod + {\let\currentfloat\forcedfloatmethod + \edef\floatlocation{\floatparameter\c!default}% + \let\forcedfloatmethod\floatlocation} + \donothing + \fi + \ifx\forcedfloatmethod\empty + \let\forcedfloatmethod\v!here + \fi \fi}% \strc_floats_check_extra_actions \strc_floats_analyze_variables_two diff --git a/tex/context/base/mkiv/util-jsn.lua b/tex/context/base/mkiv/util-jsn.lua index acbf16090..68c6a712e 100644 --- a/tex/context/base/mkiv/util-jsn.lua +++ b/tex/context/base/mkiv/util-jsn.lua @@ -20,155 +20,391 @@ if not modules then modules = { } end modules ['util-jsn'] = { local P, V, R, S, C, Cc, Cs, Ct, Cf, Cg = lpeg.P, lpeg.V, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Cs, lpeg.Ct, lpeg.Cf, lpeg.Cg local lpegmatch = lpeg.match local format, gsub = string.format, string.gsub +local formatters = string.formatters local utfchar = utf.char -local concat = table.concat +local concat, sortedkeys = table.concat, table.sortedkeys local tonumber, tostring, rawset, type, next = tonumber, tostring, rawset, type, next local json = utilities.json or { } utilities.json = json --- \\ \/ \b \f \n \r \t \uHHHH - -local lbrace = P("{") -local rbrace = P("}") -local lparent = P("[") -local rparent = P("]") -local comma = P(",") -local colon = P(":") -local dquote = P('"') - -local whitespace = lpeg.patterns.whitespace -local optionalws = whitespace^0 - -local escapes = { - ["b"] = "\010", - ["f"] = "\014", - ["n"] = "\n", - ["r"] = "\r", - ["t"] = "\t", -} +do --- todo: also handle larger utf16 + -- \\ \/ \b \f \n \r \t \uHHHH -local escape_un = P("\\u")/"" * (C(R("09","AF","af")^-4) / function(s) - return utfchar(tonumber(s,16)) -end) + local lbrace = P("{") + local rbrace = P("}") + local lparent = P("[") + local rparent = P("]") + local comma = P(",") + local colon = P(":") + local dquote = P('"') -local escape_bs = P([[\]]) / "" * (P(1) / escapes) -- if not found then P(1) is returned i.e. the to be escaped char + local whitespace = lpeg.patterns.whitespace + local optionalws = whitespace^0 -local jstring = dquote * Cs((escape_un + escape_bs + (1-dquote))^0) * dquote -local jtrue = P("true") * Cc(true) -local jfalse = P("false") * Cc(false) -local jnull = P("null") * Cc(nil) -local jnumber = (1-whitespace-rparent-rbrace-comma)^1 / tonumber + local escapes = { + ["b"] = "\010", + ["f"] = "\014", + ["n"] = "\n", + ["r"] = "\r", + ["t"] = "\t", + } -local key = jstring + -- todo: also handle larger utf16 -local jsonconverter = { "value", - hash = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace, - pair = Cg(optionalws * key * optionalws * colon * V("value")), - array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent), --- value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws, - value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, -} + local escape_un = P("\\u")/"" * (C(R("09","AF","af")^-4) / function(s) + return utfchar(tonumber(s,16)) + end) + + local escape_bs = P([[\]]) / "" * (P(1) / escapes) -- if not found then P(1) is returned i.e. the to be escaped char + + local jstring = dquote * Cs((escape_un + escape_bs + (1-dquote))^0) * dquote + local jtrue = P("true") * Cc(true) + local jfalse = P("false") * Cc(false) + local jnull = P("null") * Cc(nil) + local jnumber = (1-whitespace-rparent-rbrace-comma)^1 / tonumber + + local key = jstring + + local jsonconverter = { "value", + hash = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace, + pair = Cg(optionalws * key * optionalws * colon * V("value")), + array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent), + -- value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber + #rparent) * optionalws, + value = optionalws * (jstring + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, + } + + -- local jsonconverter = { "value", + -- hash = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace, + -- pair = Cg(optionalws * V("string") * optionalws * colon * V("value")), + -- array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent), + -- string = jstring, + -- value = optionalws * (V("string") + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, + -- } + + -- lpeg.print(jsonconverter) -- size 181 --- local jsonconverter = { "value", --- hash = lbrace * Cf(Ct("") * (V("pair") * (comma * V("pair"))^0 + optionalws),rawset) * rbrace, --- pair = Cg(optionalws * V("string") * optionalws * colon * V("value")), --- array = Ct(lparent * (V("value") * (comma * V("value"))^0 + optionalws) * rparent), --- string = jstring, --- value = optionalws * (V("string") + V("hash") + V("array") + jtrue + jfalse + jnull + jnumber) * optionalws, --- } + function json.tolua(str) + return lpegmatch(jsonconverter,str) + end --- lpeg.print(jsonconverter) -- size 181 + function json.load(filename) + local data = io.loaddata(filename) + if data then + return lpegmatch(jsonconverter,data) + end + end -function json.tolua(str) - return lpegmatch(jsonconverter,str) end -local escaper +do -local function tojson(value,t,n) -- we could optimize #t - local kind = type(value) - if kind == "table" then - local done = false - local size = #value - if size == 0 then - for k, v in next, value do - if done then - n = n + 1 ; t[n] = "," + -- It's pretty bad that JSON doesn't allow the trailing comma ... it's a + -- typical example of a spec that then forces all generators to check for + -- this. It's a way to make sure programmers keep jobs. + + local escaper + + local f_start_hash = formatters[ '%w{' ] + local f_start_array = formatters[ '%w[' ] + local f_start_hash_new = formatters[ "\n" .. '%w{' ] + local f_start_array_new = formatters[ "\n" .. '%w[' ] + local f_start_hash_key = formatters[ "\n" .. '%w"%s" : {' ] + local f_start_array_key = formatters[ "\n" .. '%w"%s" : [' ] + + local f_stop_hash = formatters[ "\n" .. '%w}' ] + local f_stop_array = formatters[ "\n" .. '%w]' ] + + local f_key_val_seq = formatters[ "\n" .. '%w"%s" : %s' ] + local f_key_val_str = formatters[ "\n" .. '%w"%s" : "%s"' ] + local f_key_val_num = f_key_val_seq + local f_key_val_yes = formatters[ "\n" .. '%w"%s" : true' ] + local f_key_val_nop = formatters[ "\n" .. '%w"%s" : false' ] + + local f_val_num = formatters[ "\n" .. '%w%s' ] + local f_val_str = formatters[ "\n" .. '%w"%s"' ] + local f_val_yes = formatters[ "\n" .. '%wtrue' ] + local f_val_nop = formatters[ "\n" .. '%wfalse' ] + local f_val_seq = f_val_num + + -- no empty tables because unknown if table or hash + + local t = { } + local n = 0 + + local function is_simple_table(tt) -- also used in util-tab so maybe public + local l = #tt + if l > 0 then + for i=1,l do + if type(tt[i]) == "table" then + return false + end + end + local nn = n + n = n + 1 t[n] = "[ " + for i=1,l do + if i > 1 then + n = n + 1 t[n] = ", " + end + local v = tt[i] + local tv = type(v) + if tv == "number" then + n = n + 1 t[n] = v + elseif tv == "string" then + n = n + 1 t[n] = '"' + n = n + 1 t[n] = lpegmatch(escaper,v) or v + n = n + 1 t[n] = '"' + elseif tv == "boolean" then + n = n + 1 t[n] = v and "true" or "false" else - n = n + 1 ; t[n] = "{" - done = true + n = n + 1 t[n] = tostring(v) end - n = n + 1 ; t[n] = format("%q:",k) - t, n = tojson(v,t,n) end - if done then - n = n + 1 ; t[n] = "}" + n = n + 1 t[n] = " ]" + local s = concat(t,"",nn+1,n) + n = nn + return s + end + return false + end + + local function tojsonpp(root,name,depth,level,size) + if root then + local indexed = size > 0 + n = n + 1 + if level == 0 then + if indexed then + t[n] = f_start_array(depth) + else + t[n] = f_start_hash(depth) + end + elseif name then + if tn == "string" then + name = lpegmatch(escaper,name) or name + elseif tn ~= "number" then + name = tostring(name) + end + if indexed then + t[n] = f_start_array_key(depth,name) + else + t[n] = f_start_hash_key(depth,name) + end else - n = n + 1 ; t[n] = "{}" + if indexed then + t[n] = f_start_array_new(depth) + else + t[n] = f_start_hash_new(depth) + end end - elseif size == 1 then - -- we can optimize for non tables - n = n + 1 ; t[n] = "[" - t, n = tojson(value[1],t,n) - n = n + 1 ; t[n] = "]" - else - for i=1,size do + depth = depth + 1 + if indexed then -- indexed + for i=1,size do + if i > 1 then + n = n + 1 t[n] = "," + end + local v = root[i] + local tv = type(v) + if tv == "number" then + n = n + 1 t[n] = f_val_num(depth,v) + elseif tv == "string" then + v = lpegmatch(escaper,v) or v + n = n + 1 t[n] = f_val_str(depth,v) + elseif tv == "table" then + if next(v) then + local st = is_simple_table(v) + if st then + n = n + 1 t[n] = f_val_seq(depth,st) + else + tojsonpp(v,k,depth,level+1,0) + end + end + elseif tv == "boolean" then + n = n + 1 + if v then + t[n] = f_val_yes(depth,v) + else + t[n] = f_val_nop(depth,v) + end + end + end + elseif next(root) then + local sk = sortedkeys(root) + for i=1,#sk do + if i > 1 then + n = n + 1 t[n] = "," + end + local k = sk[i] + local v = root[k] + local tv = type(v) + local tk = type(k) + if tv == "number" then + if tk == "number" then + n = n + 1 t[n] = f_key_val_num(depth,k,v) + elseif tk == "string" then + k = lpegmatch(escaper,k) or k + n = n + 1 t[n] = f_key_val_str(depth,k,v) + end + elseif tv == "string" then + if tk == "number" then + v = lpegmatch(escaper,v) or v + n = n + 1 t[n] = f_key_val_num(depth,k,v) + elseif tk == "string" then + k = lpegmatch(escaper,k) or k + v = lpegmatch(escaper,v) or v + n = n + 1 t[n] = f_key_val_str(depth,k,v) + end + elseif tv == "table" then + local l = #v + if l > 0 then + local st = is_simple_table(v) + if not st then + tojsonpp(v,k,depth,level+1,l) + elseif tk == "number" then + n = n + 1 t[n] = f_key_val_seq(depth,k,st) + elseif tk == "string" then + k = lpegmatch(escaper,k) or k + n = n + 1 t[n] = f_key_val_seq(depth,k,st) + end + elseif next(v) then + tojsonpp(v,k,depth,level+1,0) + end + elseif tv == "boolean" then + if tk == "number" then + n = n + 1 + if v then + t[n] = f_key_val_yes(depth,k) + else + t[n] = f_key_val_nop(depth,k) + end + elseif tk == "string" then + k = lpegmatch(escaper,k) or k + n = n + 1 + if v then + t[n] = f_key_val_yes(depth,k) + else + t[n] = f_key_val_nop(depth,k) + end + end + end + end + end + n = n + 1 + if indexed then + t[n] = f_stop_array(depth-1) + else + t[n] = f_stop_hash(depth-1) + end + end + end + + local function tojson(value,n) + local kind = type(value) + if kind == "table" then + local done = false + local size = #value + if size == 0 then + for k, v in next, value do + if done then + -- n = n + 1 ; t[n] = "," + n = n + 1 ; t[n] = ',"' + else + -- n = n + 1 ; t[n] = "{" + n = n + 1 ; t[n] = '{"' + done = true + end + n = n + 1 ; t[n] = lpegmatch(escaper,k) or k + n = n + 1 ; t[n] = '":' + t, n = tojson(v,n) + end if done then - n = n + 1 ; t[n] = "," + n = n + 1 ; t[n] = "}" else - n = n + 1 ; t[n] = "[" - done = true + n = n + 1 ; t[n] = "{}" end - t, n = tojson(value[i],t,n) + elseif size == 1 then + -- we can optimize for non tables + n = n + 1 ; t[n] = "[" + t, n = tojson(value[1],n) + n = n + 1 ; t[n] = "]" + else + for i=1,size do + if done then + n = n + 1 ; t[n] = "," + else + n = n + 1 ; t[n] = "[" + done = true + end + t, n = tojson(value[i],n) + end + n = n + 1 ; t[n] = "]" end - n = n + 1 ; t[n] = "]" + elseif kind == "string" then + n = n + 1 ; t[n] = '"' + n = n + 1 ; t[n] = lpegmatch(escaper,value) or value + n = n + 1 ; t[n] = '"' + elseif kind == "number" then + n = n + 1 ; t[n] = value + elseif kind == "boolean" then + n = n + 1 ; t[n] = tostring(value) end - elseif kind == "string" then - n = n + 1 ; t[n] = '"' - n = n + 1 ; t[n] = lpegmatch(escaper,value) or value - n = n + 1 ; t[n] = '"' - elseif kind == "number" then - n = n + 1 ; t[n] = value - elseif kind == "boolean" then - n = n + 1 ; t[n] = tostring(value) + return t, n end - return t, n -end -function json.tostring(value) - -- todo optimize for non table - local kind = type(value) - if kind == "table" then - if not escaper then - local escapes = { - ["\\"] = "\\u005C", - ["\""] = "\\u0022", - } - for i=0,0x20 do - escapes[utfchar(i)] = format("\\u%04X",i) - end - escaper = Cs( ( - (R('\0\x20') + S('\"\\')) / escapes - + P(1) - )^1 ) + -- escaping keys can become an option + + local function jsontostring(value,pretty) + -- todo optimize for non table + local kind = type(value) + if kind == "table" then + if not escaper then + local escapes = { + ["\\"] = "\\u005C", + ["\""] = "\\u0022", + } + for i=0,0x1F do + escapes[utfchar(i)] = format("\\u%04X",i) + end + escaper = Cs( ( + (R('\0\x20') + S('\"\\')) / escapes + + P(1) + )^1 ) + end + -- local to the closure (saves wrapping and local functions) + t = { } + n = 0 + if pretty then + tojsonpp(value,name,0,0,#value) +-- value = concat(t,"\n",1,n) + value = concat(t,"",1,n) + else + tojson(value,0) + value = concat(t,"",1,n) + end + t = nil + n = 0 + return value + elseif kind == "string" or kind == "number" then + return lpegmatch(escaper,value) or value + else + return tostring(value) end - return concat((tojson(value,{},0))) - elseif kind == "string" or kind == "number" then - return lpegmatch(escaper,value) or value - else - return tostring(value) end + + json.tostring = jsontostring + + function json.tojson(value) + return jsontostring(value,true) + end + end --- local tmp = [[ { "t" : "foobar", "a" : true, "b" : [ 123 , 456E-10, { "a" : true, "b" : [ 123 , 456 ] } ] } ]] +-- local tmp = [[ { "t\nt t" : "foo bar", "a" : true, "b" : [ 123 , 456E-10, { "a" : true, "b" : [ 123 , 456 ] } ] } ]] -- tmp = json.tolua(tmp) -- inspect(tmp) --- tmp = json.tostring(tmp) +-- tmp = json.tostring(tmp,true) -- inspect(tmp) -- tmp = json.tolua(tmp) -- inspect(tmp) @@ -176,13 +412,6 @@ end -- inspect(tmp) -- inspect(json.tostring(true)) -function json.load(filename) - local data = io.loaddata(filename) - if data then - return lpegmatch(jsonconverter,data) - end -end - -- local s = [[\foo"bar"]] -- local j = json.tostring { s = s } -- local l = json.tolua(j) diff --git a/tex/context/base/mkiv/util-pck.lua b/tex/context/base/mkiv/util-pck.lua index 83b85cd94..b90853fb6 100644 --- a/tex/context/base/mkiv/util-pck.lua +++ b/tex/context/base/mkiv/util-pck.lua @@ -55,7 +55,6 @@ local function pack(t,keys,skip,hash,index) local k = sk[i] if not skip or not skip[k] then local v = t[k] - -- if type(v) == "table" then pack(v,keys,skip,hash,index) if keys[k] then diff --git a/tex/context/interface/mkii/keys-pe.xml b/tex/context/interface/mkii/keys-pe.xml index 160f4f3fc..8532d9894 100644 --- a/tex/context/interface/mkii/keys-pe.xml +++ b/tex/context/interface/mkii/keys-pe.xml @@ -573,6 +573,7 @@ <cd:variable name='temporary' value='موقتی'/> <cd:variable name='test' value='تست'/> <cd:variable name='text' value='متن'/> + <cd:variable name='textnote' value='textnote'/> <cd:variable name='three' value='سه'/> <cd:variable name='thursday' value='پنجشنبه'/> <cd:variable name='tight' value='tight'/> diff --git a/tex/context/interface/mkiv/i-context.pdf b/tex/context/interface/mkiv/i-context.pdf Binary files differindex 746f0fb36..04e0e44ae 100644 --- a/tex/context/interface/mkiv/i-context.pdf +++ b/tex/context/interface/mkiv/i-context.pdf diff --git a/tex/context/interface/mkiv/i-readme.pdf b/tex/context/interface/mkiv/i-readme.pdf Binary files differindex 847353e60..4b14223aa 100644 --- a/tex/context/interface/mkiv/i-readme.pdf +++ b/tex/context/interface/mkiv/i-readme.pdf diff --git a/tex/generic/context/luatex/luatex-basics-chr.lua b/tex/generic/context/luatex/luatex-basics-chr.lua index 14a55d7b0..9036b2977 100644 --- a/tex/generic/context/luatex/luatex-basics-chr.lua +++ b/tex/generic/context/luatex/luatex-basics-chr.lua @@ -1,4 +1,4 @@ --- automatically generated from context data +-- automatically generated from context data (luatex-basics-prepare.tex) characters = characters or { } @@ -446,7 +446,6 @@ characters.classifiers={ [1803]=6, [1804]=6, [1805]=6, - [1807]=6, [1808]=3, [1809]=5, [1810]=2, @@ -627,6 +626,9 @@ characters.classifiers={ [2040]=6, [2041]=6, [2042]=2, + [2045]=5, + [2046]=6, + [2047]=6, [2070]=5, [2071]=5, [2072]=5, @@ -716,6 +718,7 @@ characters.classifiers={ [2235]=2, [2236]=2, [2237]=2, + [2259]=5, [2260]=5, [2261]=5, [2262]=5, @@ -792,6 +795,7 @@ characters.classifiers={ [2509]=5, [2530]=5, [2531]=5, + [2558]=5, [2561]=5, [2562]=5, [2620]=5, @@ -840,6 +844,7 @@ characters.classifiers={ [3008]=5, [3021]=5, [3072]=5, + [3076]=5, [3134]=5, [3135]=5, [3136]=5, @@ -901,6 +906,7 @@ characters.classifiers={ [3767]=5, [3768]=5, [3769]=5, + [3770]=5, [3771]=5, [3772]=5, [3784]=5, @@ -1139,6 +1145,7 @@ characters.classifiers={ [6261]=2, [6262]=2, [6263]=2, + [6264]=2, [6272]=4, [6273]=4, [6274]=4, @@ -1539,6 +1546,7 @@ characters.classifiers={ [43247]=5, [43248]=5, [43249]=5, + [43263]=5, [43302]=5, [43303]=5, [43304]=5, @@ -1702,6 +1710,83 @@ characters.classifiers={ [68525]=2, [68526]=2, [68527]=4, + [68864]=1, + [68865]=2, + [68866]=2, + [68867]=2, + [68868]=2, + [68869]=2, + [68870]=2, + [68871]=2, + [68872]=2, + [68873]=2, + [68874]=2, + [68875]=2, + [68876]=2, + [68877]=2, + [68878]=2, + [68879]=2, + [68880]=2, + [68881]=2, + [68882]=2, + [68883]=2, + [68884]=2, + [68885]=2, + [68886]=2, + [68887]=2, + [68888]=2, + [68889]=2, + [68890]=2, + [68891]=2, + [68892]=2, + [68893]=2, + [68894]=2, + [68895]=2, + [68896]=2, + [68897]=2, + [68898]=3, + [68899]=2, + [68900]=5, + [68901]=5, + [68902]=5, + [68903]=5, + [69424]=2, + [69425]=2, + [69426]=2, + [69427]=3, + [69428]=2, + [69429]=2, + [69430]=2, + [69431]=2, + [69432]=2, + [69433]=2, + [69434]=2, + [69435]=2, + [69436]=2, + [69437]=2, + [69438]=2, + [69439]=2, + [69440]=2, + [69441]=2, + [69442]=2, + [69443]=2, + [69444]=2, + [69445]=4, + [69446]=5, + [69447]=5, + [69448]=5, + [69449]=5, + [69450]=5, + [69451]=5, + [69452]=5, + [69453]=5, + [69454]=5, + [69455]=5, + [69456]=5, + [69457]=2, + [69458]=2, + [69459]=2, + [69460]=3, [69633]=5, [69688]=5, [69689]=5, @@ -1727,6 +1812,8 @@ characters.classifiers={ [69814]=5, [69817]=5, [69818]=5, + [69821]=4, + [69837]=4, [69888]=5, [69889]=5, [69890]=5, @@ -1776,6 +1863,7 @@ characters.classifiers={ [70378]=5, [70400]=5, [70401]=5, + [70459]=5, [70460]=5, [70464]=5, [70502]=5, @@ -1802,6 +1890,7 @@ characters.classifiers={ [70723]=5, [70724]=5, [70726]=5, + [70750]=5, [70835]=5, [70836]=5, [70837]=5, @@ -1855,6 +1944,24 @@ characters.classifiers={ [71465]=5, [71466]=5, [71467]=5, + [71727]=5, + [71728]=5, + [71729]=5, + [71730]=5, + [71731]=5, + [71732]=5, + [71733]=5, + [71734]=5, + [71735]=5, + [71737]=5, + [71738]=5, + [72148]=5, + [72149]=5, + [72150]=5, + [72151]=5, + [72154]=5, + [72155]=5, + [72160]=5, [72193]=5, [72194]=5, [72195]=5, @@ -1962,6 +2069,12 @@ characters.classifiers={ [73028]=5, [73029]=5, [73031]=5, + [73104]=5, + [73105]=5, + [73109]=5, + [73111]=5, + [73459]=5, + [73460]=5, [92912]=5, [92913]=5, [92914]=5, @@ -1974,6 +2087,7 @@ characters.classifiers={ [92980]=5, [92981]=5, [92982]=5, + [94031]=5, [94095]=5, [94096]=5, [94097]=5, @@ -2170,6 +2284,17 @@ characters.classifiers={ [122920]=5, [122921]=5, [122922]=5, + [123184]=5, + [123185]=5, + [123186]=5, + [123187]=5, + [123188]=5, + [123189]=5, + [123190]=5, + [123628]=5, + [123629]=5, + [123630]=5, + [123631]=5, [125136]=5, [125137]=5, [125138]=5, @@ -2273,6 +2398,7 @@ characters.indicgroups={ [2632]=true, [2635]=true, [2636]=true, + [2690]=true, [2757]=true, [2759]=true, [2760]=true, @@ -2284,7 +2410,6 @@ characters.indicgroups={ [3136]=true, [3142]=true, [3143]=true, - [3144]=true, [3146]=true, [3147]=true, [3148]=true, @@ -2292,6 +2417,21 @@ characters.indicgroups={ [3263]=true, [3270]=true, [3406]=true, + [4141]=true, + [4142]=true, + [4146]=true, + [4147]=true, + [4148]=true, + [4149]=true, + [4150]=true, + [4154]=true, + [4209]=true, + [4210]=true, + [4211]=true, + [4212]=true, + [4229]=true, + [4230]=true, + [4253]=true, [43232]=true, [43233]=true, [43234]=true, @@ -2310,6 +2450,8 @@ characters.indicgroups={ [43247]=true, [43248]=true, [43249]=true, + [43493]=true, + [43644]=true, }, ["after_half"]={}, ["after_main"]={ @@ -2331,6 +2473,7 @@ characters.indicgroups={ [2626]=true, [2672]=true, [2673]=true, + [2735]=true, [2750]=true, [2752]=true, [2753]=true, @@ -2494,10 +2637,24 @@ characters.indicgroups={ [3170]=true, [3171]=true, [3260]=true, + [3286]=true, [3298]=true, [3299]=true, [3426]=true, [3427]=true, + [4143]=true, + [4144]=true, + [4151]=true, + [4153]=true, + [4157]=true, + [4158]=true, + [4184]=true, + [4185]=true, + [4190]=true, + [4191]=true, + [4192]=true, + [4226]=true, + [4237]=true, }, ["consonant"]={ [2325]=true, @@ -2827,6 +2984,117 @@ characters.indicgroups={ [3384]=true, [3385]=true, [3386]=true, + [4096]=true, + [4097]=true, + [4098]=true, + [4099]=true, + [4100]=true, + [4101]=true, + [4102]=true, + [4103]=true, + [4104]=true, + [4105]=true, + [4106]=true, + [4107]=true, + [4108]=true, + [4109]=true, + [4110]=true, + [4111]=true, + [4112]=true, + [4113]=true, + [4114]=true, + [4115]=true, + [4116]=true, + [4117]=true, + [4118]=true, + [4119]=true, + [4120]=true, + [4121]=true, + [4122]=true, + [4123]=true, + [4124]=true, + [4125]=true, + [4126]=true, + [4127]=true, + [4128]=true, + [4155]=true, + [4156]=true, + [4157]=true, + [4158]=true, + [4159]=true, + [4176]=true, + [4177]=true, + [4186]=true, + [4187]=true, + [4188]=true, + [4189]=true, + [4190]=true, + [4191]=true, + [4192]=true, + [4193]=true, + [4197]=true, + [4198]=true, + [4206]=true, + [4207]=true, + [4208]=true, + [4213]=true, + [4214]=true, + [4215]=true, + [4216]=true, + [4217]=true, + [4218]=true, + [4219]=true, + [4220]=true, + [4221]=true, + [4222]=true, + [4223]=true, + [4224]=true, + [4225]=true, + [4226]=true, + [4238]=true, + [43488]=true, + [43489]=true, + [43490]=true, + [43491]=true, + [43492]=true, + [43495]=true, + [43496]=true, + [43497]=true, + [43498]=true, + [43499]=true, + [43500]=true, + [43501]=true, + [43502]=true, + [43503]=true, + [43514]=true, + [43515]=true, + [43516]=true, + [43517]=true, + [43518]=true, + [43616]=true, + [43617]=true, + [43618]=true, + [43619]=true, + [43620]=true, + [43621]=true, + [43622]=true, + [43623]=true, + [43624]=true, + [43625]=true, + [43626]=true, + [43628]=true, + [43629]=true, + [43630]=true, + [43631]=true, + [43633]=true, + [43634]=true, + [43635]=true, + [43636]=true, + [43637]=true, + [43638]=true, + [43642]=true, + [43646]=true, + [43647]=true, }, ["dependent_vowel"]={ [2362]=true, @@ -2855,13 +3123,14 @@ characters.indicgroups={ [2403]=true, [2494]=true, [2495]=true, - [2496]=true, [2497]=true, [2498]=true, [2499]=true, [2500]=true, [2503]=true, [2504]=true, + [2507]=true, + [2508]=true, [2622]=true, [2623]=true, [2624]=true, @@ -2936,6 +3205,8 @@ characters.indicgroups={ [3274]=true, [3275]=true, [3276]=true, + [3285]=true, + [3286]=true, [3298]=true, [3299]=true, [3390]=true, @@ -2954,6 +3225,35 @@ characters.indicgroups={ [3415]=true, [3426]=true, [3427]=true, + [4139]=true, + [4140]=true, + [4141]=true, + [4142]=true, + [4143]=true, + [4144]=true, + [4145]=true, + [4146]=true, + [4147]=true, + [4148]=true, + [4149]=true, + [4182]=true, + [4183]=true, + [4184]=true, + [4185]=true, + [4194]=true, + [4199]=true, + [4200]=true, + [4209]=true, + [4210]=true, + [4211]=true, + [4212]=true, + [4227]=true, + [4228]=true, + [4229]=true, + [4230]=true, + [4252]=true, + [4253]=true, + [43493]=true, }, ["halant"]={ [2381]=true, @@ -3111,6 +3411,20 @@ characters.indicgroups={ [3423]=true, [3424]=true, [3425]=true, + [4129]=true, + [4130]=true, + [4131]=true, + [4132]=true, + [4133]=true, + [4134]=true, + [4135]=true, + [4136]=true, + [4137]=true, + [4138]=true, + [4178]=true, + [4179]=true, + [4180]=true, + [4181]=true, }, ["nukta"]={ [2364]=true, @@ -3132,8 +3446,6 @@ characters.indicgroups={ [2383]=true, [2494]=true, [2496]=true, - [2503]=true, - [2504]=true, [2622]=true, [2624]=true, [2750]=true, @@ -3150,16 +3462,12 @@ characters.indicgroups={ [3139]=true, [3140]=true, [3262]=true, - [3264]=true, [3265]=true, [3266]=true, [3267]=true, [3268]=true, - [3271]=true, - [3272]=true, - [3274]=true, - [3275]=true, [3276]=true, + [3285]=true, [3390]=true, [3391]=true, [3392]=true, @@ -3168,25 +3476,58 @@ characters.indicgroups={ [3395]=true, [3396]=true, [3415]=true, + [4139]=true, + [4140]=true, + [4152]=true, + [4155]=true, + [4182]=true, + [4183]=true, + [4194]=true, + [4195]=true, + [4196]=true, + [4199]=true, + [4200]=true, + [4201]=true, + [4202]=true, + [4203]=true, + [4204]=true, + [4205]=true, + [4227]=true, + [4231]=true, + [4232]=true, + [4233]=true, + [4234]=true, + [4235]=true, + [4236]=true, + [4239]=true, + [4250]=true, + [4251]=true, + [4252]=true, + [43643]=true, + [43645]=true, }, ["pre_mark"]={ [2367]=true, [2382]=true, [2495]=true, + [2503]=true, + [2504]=true, [2623]=true, [2751]=true, [2887]=true, - [2888]=true, [3014]=true, [3015]=true, [3016]=true, [3398]=true, [3399]=true, [3400]=true, + [4145]=true, + [4228]=true, }, ["ra"]={ [2352]=true, [2480]=true, + [2544]=true, [2608]=true, [2736]=true, [2864]=true, @@ -3200,17 +3541,43 @@ characters.indicgroups={ [2386]=true, [2387]=true, [2388]=true, - [2507]=true, - [2508]=true, - [3277]=true, - [3405]=true, + [4151]=true, + [4195]=true, + [4196]=true, + [4201]=true, + [4202]=true, + [4203]=true, + [4204]=true, + [4205]=true, + [4231]=true, + [4232]=true, + [4233]=true, + [4234]=true, + [4235]=true, + [4236]=true, + [4237]=true, + [4239]=true, + [4250]=true, + [4251]=true, + [43643]=true, + [43644]=true, + [43645]=true, }, ["twopart_mark"]={ + [2507]={ 2503, 2494 }, + [2508]={ 2503, 2519 }, + [2888]={ 2887, 2902 }, [2891]={ 2887, 2878 }, [2892]={ 2887, 2903 }, [3018]={ 3014, 3006 }, [3019]={ 3015, 3006 }, [3020]={ 3014, 3031 }, + [3144]={ 3142, 3158 }, + [3264]={ 3263, 3285 }, + [3271]={ 3270, 3285 }, + [3272]={ 3270, 3286 }, + [3274]={ 3270, 3266 }, + [3275]={ 3274, 3285 }, [3402]={ 3398, 3390 }, [3403]={ 3399, 3390 }, [3404]={ 3398, 3415 }, @@ -3220,8 +3587,13 @@ characters.indicgroups={ [2305]=true, [2306]=true, [2307]=true, + [2433]=true, [3330]=true, [3331]=true, + [4150]=true, + [4152]=true, + [4153]=true, + [4154]=true, [43232]=true, [43233]=true, [43234]=true, @@ -3238,9 +3610,10 @@ characters.indicgroups={ [43245]=true, [43246]=true, [43247]=true, - [43248]=true, [43249]=true, }, } -- done + +return characters.indicgroups diff --git a/tex/generic/context/luatex/luatex-basics-prepare.tex b/tex/generic/context/luatex/luatex-basics-prepare.tex index 47eaa7341..069f772f7 100644 --- a/tex/generic/context/luatex/luatex-basics-prepare.tex +++ b/tex/generic/context/luatex/luatex-basics-prepare.tex @@ -15,7 +15,8 @@ %D is needed because we don't want to load the (mostly not used in generic) data %D files. -%D Indicorder is not yet filled in completely. +%D Indicorder is not yet filled in completely. Some 'indic=m' are now 'indic=o' +%D due to patches/analysis by Kai Eigner. \startluacode @@ -69,7 +70,7 @@ for k, c in next, chardata do end local template = string.formatters [ [[ --- automatically generated from context data +-- automatically generated from context data (luatex-basics-prepare.tex) characters = characters or { } @@ -84,6 +85,8 @@ characters.blockrange = { } %s -- done + +return characters.indicgroups ]] ] io.savedata("luatex-basics-chr.lua",template( @@ -91,8 +94,33 @@ io.savedata("luatex-basics-chr.lua",template( table.serialize(indicgroups,"characters.indicgroups") )) + +-- -- code for comparing patched tables (info we feed back in char-def.lua) + +-- local hans = table.load("t:/sources/luatex-basics-chr.lua") +-- local kai = table.load("e:/tmp/indic/luatex-basics-chr.lua") +-- +-- for name, h in table.sortedhash(hans) do +-- for kk, vv in table.sortedhash(kai[name]) do +-- if h[kk] ~= vv and vv then +-- local _h = h[kk] +-- local _k = vv +-- if type(_h) ~= "table" or type(_k) ~= "table" or not table.identical(_h,_k) then +-- print("what", name) +-- print("index",kk) +-- print("hex ",string.format("%04X",kk)) +-- print("hans ",h[kk]) +-- print("kai ", vv) +-- print("") +-- end +-- end +-- end +-- end + \stopluacode + + \startTEXpage[offset=10pt] \tttf generated file: luatex-basics-chr.lua \stopTEXpage diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index bafaac767..fae1b146f 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : c:/data/develop/context/sources/luatex-fonts-merged.lua -- parent file : c:/data/develop/context/sources/luatex-fonts.lua --- merge date : 06/05/19 15:39:40 +-- merge date : 06/11/19 19:20:27 do -- begin closure to overcome local limits and interference @@ -5425,7 +5425,6 @@ characters.classifiers={ [1803]=6, [1804]=6, [1805]=6, - [1807]=6, [1808]=3, [1809]=5, [1810]=2, @@ -5606,6 +5605,9 @@ characters.classifiers={ [2040]=6, [2041]=6, [2042]=2, + [2045]=5, + [2046]=6, + [2047]=6, [2070]=5, [2071]=5, [2072]=5, @@ -5695,6 +5697,7 @@ characters.classifiers={ [2235]=2, [2236]=2, [2237]=2, + [2259]=5, [2260]=5, [2261]=5, [2262]=5, @@ -5771,6 +5774,7 @@ characters.classifiers={ [2509]=5, [2530]=5, [2531]=5, + [2558]=5, [2561]=5, [2562]=5, [2620]=5, @@ -5819,6 +5823,7 @@ characters.classifiers={ [3008]=5, [3021]=5, [3072]=5, + [3076]=5, [3134]=5, [3135]=5, [3136]=5, @@ -5880,6 +5885,7 @@ characters.classifiers={ [3767]=5, [3768]=5, [3769]=5, + [3770]=5, [3771]=5, [3772]=5, [3784]=5, @@ -6118,6 +6124,7 @@ characters.classifiers={ [6261]=2, [6262]=2, [6263]=2, + [6264]=2, [6272]=4, [6273]=4, [6274]=4, @@ -6518,6 +6525,7 @@ characters.classifiers={ [43247]=5, [43248]=5, [43249]=5, + [43263]=5, [43302]=5, [43303]=5, [43304]=5, @@ -6681,6 +6689,83 @@ characters.classifiers={ [68525]=2, [68526]=2, [68527]=4, + [68864]=1, + [68865]=2, + [68866]=2, + [68867]=2, + [68868]=2, + [68869]=2, + [68870]=2, + [68871]=2, + [68872]=2, + [68873]=2, + [68874]=2, + [68875]=2, + [68876]=2, + [68877]=2, + [68878]=2, + [68879]=2, + [68880]=2, + [68881]=2, + [68882]=2, + [68883]=2, + [68884]=2, + [68885]=2, + [68886]=2, + [68887]=2, + [68888]=2, + [68889]=2, + [68890]=2, + [68891]=2, + [68892]=2, + [68893]=2, + [68894]=2, + [68895]=2, + [68896]=2, + [68897]=2, + [68898]=3, + [68899]=2, + [68900]=5, + [68901]=5, + [68902]=5, + [68903]=5, + [69424]=2, + [69425]=2, + [69426]=2, + [69427]=3, + [69428]=2, + [69429]=2, + [69430]=2, + [69431]=2, + [69432]=2, + [69433]=2, + [69434]=2, + [69435]=2, + [69436]=2, + [69437]=2, + [69438]=2, + [69439]=2, + [69440]=2, + [69441]=2, + [69442]=2, + [69443]=2, + [69444]=2, + [69445]=4, + [69446]=5, + [69447]=5, + [69448]=5, + [69449]=5, + [69450]=5, + [69451]=5, + [69452]=5, + [69453]=5, + [69454]=5, + [69455]=5, + [69456]=5, + [69457]=2, + [69458]=2, + [69459]=2, + [69460]=3, [69633]=5, [69688]=5, [69689]=5, @@ -6706,6 +6791,8 @@ characters.classifiers={ [69814]=5, [69817]=5, [69818]=5, + [69821]=4, + [69837]=4, [69888]=5, [69889]=5, [69890]=5, @@ -6755,6 +6842,7 @@ characters.classifiers={ [70378]=5, [70400]=5, [70401]=5, + [70459]=5, [70460]=5, [70464]=5, [70502]=5, @@ -6781,6 +6869,7 @@ characters.classifiers={ [70723]=5, [70724]=5, [70726]=5, + [70750]=5, [70835]=5, [70836]=5, [70837]=5, @@ -6834,6 +6923,24 @@ characters.classifiers={ [71465]=5, [71466]=5, [71467]=5, + [71727]=5, + [71728]=5, + [71729]=5, + [71730]=5, + [71731]=5, + [71732]=5, + [71733]=5, + [71734]=5, + [71735]=5, + [71737]=5, + [71738]=5, + [72148]=5, + [72149]=5, + [72150]=5, + [72151]=5, + [72154]=5, + [72155]=5, + [72160]=5, [72193]=5, [72194]=5, [72195]=5, @@ -6941,6 +7048,12 @@ characters.classifiers={ [73028]=5, [73029]=5, [73031]=5, + [73104]=5, + [73105]=5, + [73109]=5, + [73111]=5, + [73459]=5, + [73460]=5, [92912]=5, [92913]=5, [92914]=5, @@ -6953,6 +7066,7 @@ characters.classifiers={ [92980]=5, [92981]=5, [92982]=5, + [94031]=5, [94095]=5, [94096]=5, [94097]=5, @@ -7149,6 +7263,17 @@ characters.classifiers={ [122920]=5, [122921]=5, [122922]=5, + [123184]=5, + [123185]=5, + [123186]=5, + [123187]=5, + [123188]=5, + [123189]=5, + [123190]=5, + [123628]=5, + [123629]=5, + [123630]=5, + [123631]=5, [125136]=5, [125137]=5, [125138]=5, @@ -7251,6 +7376,7 @@ characters.indicgroups={ [2632]=true, [2635]=true, [2636]=true, + [2690]=true, [2757]=true, [2759]=true, [2760]=true, @@ -7262,7 +7388,6 @@ characters.indicgroups={ [3136]=true, [3142]=true, [3143]=true, - [3144]=true, [3146]=true, [3147]=true, [3148]=true, @@ -7270,6 +7395,21 @@ characters.indicgroups={ [3263]=true, [3270]=true, [3406]=true, + [4141]=true, + [4142]=true, + [4146]=true, + [4147]=true, + [4148]=true, + [4149]=true, + [4150]=true, + [4154]=true, + [4209]=true, + [4210]=true, + [4211]=true, + [4212]=true, + [4229]=true, + [4230]=true, + [4253]=true, [43232]=true, [43233]=true, [43234]=true, @@ -7288,6 +7428,8 @@ characters.indicgroups={ [43247]=true, [43248]=true, [43249]=true, + [43493]=true, + [43644]=true, }, ["after_half"]={}, ["after_main"]={ @@ -7309,6 +7451,7 @@ characters.indicgroups={ [2626]=true, [2672]=true, [2673]=true, + [2735]=true, [2750]=true, [2752]=true, [2753]=true, @@ -7472,10 +7615,24 @@ characters.indicgroups={ [3170]=true, [3171]=true, [3260]=true, + [3286]=true, [3298]=true, [3299]=true, [3426]=true, [3427]=true, + [4143]=true, + [4144]=true, + [4151]=true, + [4153]=true, + [4157]=true, + [4158]=true, + [4184]=true, + [4185]=true, + [4190]=true, + [4191]=true, + [4192]=true, + [4226]=true, + [4237]=true, }, ["consonant"]={ [2325]=true, @@ -7805,6 +7962,117 @@ characters.indicgroups={ [3384]=true, [3385]=true, [3386]=true, + [4096]=true, + [4097]=true, + [4098]=true, + [4099]=true, + [4100]=true, + [4101]=true, + [4102]=true, + [4103]=true, + [4104]=true, + [4105]=true, + [4106]=true, + [4107]=true, + [4108]=true, + [4109]=true, + [4110]=true, + [4111]=true, + [4112]=true, + [4113]=true, + [4114]=true, + [4115]=true, + [4116]=true, + [4117]=true, + [4118]=true, + [4119]=true, + [4120]=true, + [4121]=true, + [4122]=true, + [4123]=true, + [4124]=true, + [4125]=true, + [4126]=true, + [4127]=true, + [4128]=true, + [4155]=true, + [4156]=true, + [4157]=true, + [4158]=true, + [4159]=true, + [4176]=true, + [4177]=true, + [4186]=true, + [4187]=true, + [4188]=true, + [4189]=true, + [4190]=true, + [4191]=true, + [4192]=true, + [4193]=true, + [4197]=true, + [4198]=true, + [4206]=true, + [4207]=true, + [4208]=true, + [4213]=true, + [4214]=true, + [4215]=true, + [4216]=true, + [4217]=true, + [4218]=true, + [4219]=true, + [4220]=true, + [4221]=true, + [4222]=true, + [4223]=true, + [4224]=true, + [4225]=true, + [4226]=true, + [4238]=true, + [43488]=true, + [43489]=true, + [43490]=true, + [43491]=true, + [43492]=true, + [43495]=true, + [43496]=true, + [43497]=true, + [43498]=true, + [43499]=true, + [43500]=true, + [43501]=true, + [43502]=true, + [43503]=true, + [43514]=true, + [43515]=true, + [43516]=true, + [43517]=true, + [43518]=true, + [43616]=true, + [43617]=true, + [43618]=true, + [43619]=true, + [43620]=true, + [43621]=true, + [43622]=true, + [43623]=true, + [43624]=true, + [43625]=true, + [43626]=true, + [43628]=true, + [43629]=true, + [43630]=true, + [43631]=true, + [43633]=true, + [43634]=true, + [43635]=true, + [43636]=true, + [43637]=true, + [43638]=true, + [43642]=true, + [43646]=true, + [43647]=true, }, ["dependent_vowel"]={ [2362]=true, @@ -7833,13 +8101,14 @@ characters.indicgroups={ [2403]=true, [2494]=true, [2495]=true, - [2496]=true, [2497]=true, [2498]=true, [2499]=true, [2500]=true, [2503]=true, [2504]=true, + [2507]=true, + [2508]=true, [2622]=true, [2623]=true, [2624]=true, @@ -7914,6 +8183,8 @@ characters.indicgroups={ [3274]=true, [3275]=true, [3276]=true, + [3285]=true, + [3286]=true, [3298]=true, [3299]=true, [3390]=true, @@ -7932,6 +8203,35 @@ characters.indicgroups={ [3415]=true, [3426]=true, [3427]=true, + [4139]=true, + [4140]=true, + [4141]=true, + [4142]=true, + [4143]=true, + [4144]=true, + [4145]=true, + [4146]=true, + [4147]=true, + [4148]=true, + [4149]=true, + [4182]=true, + [4183]=true, + [4184]=true, + [4185]=true, + [4194]=true, + [4199]=true, + [4200]=true, + [4209]=true, + [4210]=true, + [4211]=true, + [4212]=true, + [4227]=true, + [4228]=true, + [4229]=true, + [4230]=true, + [4252]=true, + [4253]=true, + [43493]=true, }, ["halant"]={ [2381]=true, @@ -8089,6 +8389,20 @@ characters.indicgroups={ [3423]=true, [3424]=true, [3425]=true, + [4129]=true, + [4130]=true, + [4131]=true, + [4132]=true, + [4133]=true, + [4134]=true, + [4135]=true, + [4136]=true, + [4137]=true, + [4138]=true, + [4178]=true, + [4179]=true, + [4180]=true, + [4181]=true, }, ["nukta"]={ [2364]=true, @@ -8110,8 +8424,6 @@ characters.indicgroups={ [2383]=true, [2494]=true, [2496]=true, - [2503]=true, - [2504]=true, [2622]=true, [2624]=true, [2750]=true, @@ -8128,16 +8440,12 @@ characters.indicgroups={ [3139]=true, [3140]=true, [3262]=true, - [3264]=true, [3265]=true, [3266]=true, [3267]=true, [3268]=true, - [3271]=true, - [3272]=true, - [3274]=true, - [3275]=true, [3276]=true, + [3285]=true, [3390]=true, [3391]=true, [3392]=true, @@ -8146,25 +8454,58 @@ characters.indicgroups={ [3395]=true, [3396]=true, [3415]=true, + [4139]=true, + [4140]=true, + [4152]=true, + [4155]=true, + [4182]=true, + [4183]=true, + [4194]=true, + [4195]=true, + [4196]=true, + [4199]=true, + [4200]=true, + [4201]=true, + [4202]=true, + [4203]=true, + [4204]=true, + [4205]=true, + [4227]=true, + [4231]=true, + [4232]=true, + [4233]=true, + [4234]=true, + [4235]=true, + [4236]=true, + [4239]=true, + [4250]=true, + [4251]=true, + [4252]=true, + [43643]=true, + [43645]=true, }, ["pre_mark"]={ [2367]=true, [2382]=true, [2495]=true, + [2503]=true, + [2504]=true, [2623]=true, [2751]=true, [2887]=true, - [2888]=true, [3014]=true, [3015]=true, [3016]=true, [3398]=true, [3399]=true, [3400]=true, + [4145]=true, + [4228]=true, }, ["ra"]={ [2352]=true, [2480]=true, + [2544]=true, [2608]=true, [2736]=true, [2864]=true, @@ -8178,17 +8519,43 @@ characters.indicgroups={ [2386]=true, [2387]=true, [2388]=true, - [2507]=true, - [2508]=true, - [3277]=true, - [3405]=true, + [4151]=true, + [4195]=true, + [4196]=true, + [4201]=true, + [4202]=true, + [4203]=true, + [4204]=true, + [4205]=true, + [4231]=true, + [4232]=true, + [4233]=true, + [4234]=true, + [4235]=true, + [4236]=true, + [4237]=true, + [4239]=true, + [4250]=true, + [4251]=true, + [43643]=true, + [43644]=true, + [43645]=true, }, ["twopart_mark"]={ + [2507]={ 2503,2494 }, + [2508]={ 2503,2519 }, + [2888]={ 2887,2902 }, [2891]={ 2887,2878 }, [2892]={ 2887,2903 }, [3018]={ 3014,3006 }, [3019]={ 3015,3006 }, [3020]={ 3014,3031 }, + [3144]={ 3142,3158 }, + [3264]={ 3263,3285 }, + [3271]={ 3270,3285 }, + [3272]={ 3270,3286 }, + [3274]={ 3270,3266 }, + [3275]={ 3274,3285 }, [3402]={ 3398,3390 }, [3403]={ 3399,3390 }, [3404]={ 3398,3415 }, @@ -8198,8 +8565,13 @@ characters.indicgroups={ [2305]=true, [2306]=true, [2307]=true, + [2433]=true, [3330]=true, [3331]=true, + [4150]=true, + [4152]=true, + [4153]=true, + [4154]=true, [43232]=true, [43233]=true, [43234]=true, @@ -8216,10 +8588,10 @@ characters.indicgroups={ [43245]=true, [43246]=true, [43247]=true, - [43248]=true, [43249]=true, }, } +return characters.indicgroups end -- closure @@ -29258,6 +29630,7 @@ local s_half=states.half local s_pref=states.pref local s_blwf=states.blwf local s_pstf=states.pstf +local s_init=states.init local replace_all_nbsp=nil replace_all_nbsp=function(head) replace_all_nbsp=typesetters and typesetters.characters and typesetters.characters.replacenbspaces or function(head) @@ -29403,15 +29776,10 @@ local zw_char={ [c_zwj ]=true, } local dflt_true={ - dflt=true -} -local two_defaults={ - dev2=dflt_true, -} -local one_defaults={ - dev2=dflt_true, - deva=dflt_true, + dflt=true, } +local two_defaults={} +local one_defaults={} local false_flags={ false,false,false,false } local sequence_reorder_matras={ features={ dv01=two_defaults }, @@ -29440,7 +29808,7 @@ local sequence_reorder_reph={ } } local sequence_reorder_pre_base_reordering_consonants={ - features={ dv03=two_defaults }, + features={ dv03=one_defaults }, flags=false_flags, name="dv03_reorder_pre_base_reordering_consonants", order={ "dv03" }, @@ -29476,6 +29844,7 @@ local basic_shaping_forms={ rkrf=true, rphf=true, vatu=true, + locl=true, } local valid={ abvs=true, @@ -29499,18 +29868,18 @@ local valid={ psts=true, haln=true, calt=true, + locl=true, } local scripts={} local scripts_one={ "deva","mlym","beng","gujr","guru","knda","orya","taml","telu" } local scripts_two={ "dev2","mlm2","bng2","gjr2","gur2","knd2","ory2","tml2","tel2" } -local scripts_old={} for i=1,#scripts_one do local v=scripts_one[i] scripts_old[v]=v end local nofscripts=#scripts_one for i=1,nofscripts do local one=scripts_one[i] local two=scripts_two[i] scripts[one]=true scripts[two]=true - two_defaults[one]=dflt_true + two_defaults[two]=dflt_true one_defaults[one]=dflt_true one_defaults[two]=dflt_true end @@ -29526,31 +29895,79 @@ local function initializedevanagi(tfmdata) local gsubfeatures=resources.features.gsub local sequences=resources.sequences local sharedfeatures=tfmdata.shared.features + gsubfeatures["dv01"]=two_defaults + gsubfeatures["dv02"]=two_defaults + gsubfeatures["dv03"]=one_defaults + gsubfeatures["dv04"]=one_defaults + local reorder_pre_base_reordering_consonants=copy(sequence_reorder_pre_base_reordering_consonants) + local reorder_reph=copy(sequence_reorder_reph) + local reorder_matras=copy(sequence_reorder_matras) + local remove_joiners=copy(sequence_remove_joiners) local lastmatch=0 for s=1,#sequences do local features=sequences[s].features if features then for k,v in next,features do + if k=="locl" then + local steps=sequences[s].steps + local nofsteps=sequences[s].nofsteps + for i=1,nofsteps do + local step=steps[i] + local coverage=step.coverage + if coverage then + for k,v in next,pre_mark do + local locl=coverage[k] + if locl then + if #locl>0 then + for j=1,#locl do + local ck=locl[j] + local f=ck[4] + local chainlookups=ck[6] + if chainlookups then + local chainlookup=chainlookups[f] + for j=1,#chainlookup do + local chainstep=chainlookup[j] + local steps=chainstep.steps + local nofsteps=chainstep.nofsteps + for i=1,nofsteps do + local step=steps[i] + local coverage=step.coverage + if coverage then + locl=coverage[k] + end + end + end + end + end + end + if locl then + reorder_matras.steps[1].coverage[locl]=true + end + end + end + end + end + end if basic_shaping_forms[k] then - lastmatch=s + lastmatch=lastmatch+1 + if s~=lastmatch then + table.insert(sequences,lastmatch,table.remove(sequences,s)) + end end end end end local insertindex=lastmatch+1 - gsubfeatures["dv01"]=two_defaults - gsubfeatures["dv02"]=two_defaults - gsubfeatures["dv03"]=two_defaults - gsubfeatures["dv04"]=one_defaults - local reorder_pre_base_reordering_consonants=copy(sequence_reorder_pre_base_reordering_consonants) - local reorder_reph=copy(sequence_reorder_reph) - local reorder_matras=copy(sequence_reorder_matras) - local remove_joiners=copy(sequence_remove_joiners) + if tfmdata.properties.language then + dflt_true[tfmdata.properties.language]=true + end insert(sequences,insertindex,reorder_pre_base_reordering_consonants) insert(sequences,insertindex,reorder_reph) insert(sequences,insertindex,reorder_matras) insert(sequences,insertindex,remove_joiners) local blwfcache={} + local vatucache={} + local pstfcache={} local seqsubset={} local rephstep={ coverage={} @@ -29559,6 +29976,8 @@ local function initializedevanagi(tfmdata) reph=false, vattu=false, blwfcache=blwfcache, + vatucache=vatucache, + pstfcache=pstfcache, seqsubset=seqsubset, reorderreph=rephstep, } @@ -29566,7 +29985,6 @@ local function initializedevanagi(tfmdata) local pre_base_reordering_consonants={} reorder_pre_base_reordering_consonants.steps[1].coverage=pre_base_reordering_consonants resources.devanagari=devanagari - local old=scripts_old[script] or false for s=1,#sequences do local sequence=sequences[s] local steps=sequence.steps @@ -29574,17 +29992,89 @@ local function initializedevanagi(tfmdata) local features=sequence.features local has_rphf=features.rphf local has_blwf=features.blwf - if has_rphf and has_rphf[old] then + local has_vatu=features.vatu + local has_pstf=features.pstf + if has_rphf and has_rphf[script] then devanagari.reph=true - elseif has_blwf and has_blwf[old] then + elseif (has_blwf and has_blwf[script] ) or (has_vatu and has_vatu[script] ) then devanagari.vattu=true for i=1,nofsteps do local step=steps[i] local coverage=step.coverage if coverage then for k,v in next,coverage do - if not blwfcache[k] then - blwfcache[k]=v + for h,w in next,halant do + if v[h] then + if not blwfcache[k] then + blwfcache[k]=v + end + end + if has_vatu and has_vatu[script] and not vatucache[k] then + vatucache[k]=v + end + end + end + end + end + elseif has_pstf and has_pstf[script] then + for i=1,nofsteps do + local step=steps[i] + local coverage=step.coverage + if coverage then + for k,v in next,coverage do + if not pstfcache[k] then + pstfcache[k]=v + end + end + for k,v in next,ra do + local r=coverage[k] + if r then + local found=false + if #r>0 then + for j=1,#r do + local ck=r[j] + local f=ck[4] + local chainlookups=ck[6] + if chainlookups and chainlookups[f] then + local chainlookup=chainlookups[f] + for j=1,#chainlookup do + local chainstep=chainlookup[j] + local steps=chainstep.steps + local nofsteps=chainstep.nofsteps + for i=1,nofsteps do + local step=steps[i] + local coverage=step.coverage + if coverage then + local h=coverage[k] + if h then + for k,v in next,h do + found=v and v.ligature + if found then + pre_base_reordering_consonants[found]=true + break + end + end + if found then + break + end + end + end + end + end + end + end + else + for k,v in next,r do + found=v and v.ligature + if found then + pre_base_reordering_consonants[found]=true + break + end + end + end + if found then + break + end end end end @@ -29596,17 +30086,53 @@ local function initializedevanagi(tfmdata) local step=steps[i] local coverage=step.coverage if coverage then - local reph=false + local reph,rephbase=false,false if kind=="rphf" then for k,v in next,ra do local r=coverage[k] if r then + rephbase=k local h=false - for k,v in next,halant do - local h=r[k] - if h then - reph=h.ligature or false - break + if #r>0 then + for j=1,#r do + local ck=r[j] + local f=ck[4] + local chainlookups=ck[6] + if chainlookups then + local chainlookup=chainlookups[f] + for j=1,#chainlookup do + local chainstep=chainlookup[j] + local steps=chainstep.steps + local nofsteps=chainstep.nofsteps + for i=1,nofsteps do + local step=steps[i] + local coverage=step.coverage + if coverage then + local r=coverage[k] + if r then + for k,v in next,halant do + local h=r[k] + if h then + reph=h.ligature or false + break + end + end + if h then + break + end + end + end + end + end + end + end + else + for k,v in next,halant do + local h=r[k] + if h then + reph=h.ligature or false + break + end end end if reph then @@ -29615,7 +30141,7 @@ local function initializedevanagi(tfmdata) end end end - seqsubset[#seqsubset+1]={ kind,coverage,reph } + seqsubset[#seqsubset+1]={ kind,coverage,reph,rephbase } end end end @@ -29630,11 +30156,46 @@ local function initializedevanagi(tfmdata) local h=coverage[k] if h then local found=false - for k,v in next,h do - found=v and v.ligature - if found then - pre_base_reordering_consonants[k]=found - break + if #h>0 then + for j=1,#h do + local ck=h[j] + local f=ck[4] + local chainlookups=ck[6] + if chainlookups then + local chainlookup=chainlookups[f] + for j=1,#chainlookup do + local chainstep=chainlookup[j] + local steps=chainstep.steps + local nofsteps=chainstep.nofsteps + for i=1,nofsteps do + local step=steps[i] + local coverage=step.coverage + if coverage then + local h=coverage[k] + if h then + for k,v in next,h do + found=v and v.ligature + if found then + pre_base_reordering_consonants[found]=true + break + end + end + if found then + break + end + end + end + end + end + end + end + else + for k,v in next,h do + found=v and v.ligature + if found then + pre_base_reordering_consonants[found]=true + break + end end end if found then @@ -29647,62 +30208,17 @@ local function initializedevanagi(tfmdata) end end end - if script=="deva" then - sharedfeatures["dv04"]=true - elseif script=="dev2" then - sharedfeatures["dv01"]=true - sharedfeatures["dv02"]=true - sharedfeatures["dv03"]=true - sharedfeatures["dv04"]=true - elseif script=="knda" then - sharedfeatures["dv04"]=true - elseif script=="knd2" then - sharedfeatures["dv01"]=true - sharedfeatures["dv02"]=true - sharedfeatures["dv03"]=true - sharedfeatures["dv04"]=true - elseif script=="beng" then - sharedfeatures["dv04"]=true - elseif script=="bng2" then - sharedfeatures["dv01"]=true - sharedfeatures["dv02"]=true - sharedfeatures["dv03"]=true - sharedfeatures["dv04"]=true - elseif script=="gurj" then - sharedfeatures["dv04"]=true - elseif script=="grj2" then - sharedfeatures["dv01"]=true - sharedfeatures["dv02"]=true - sharedfeatures["dv03"]=true - sharedfeatures["dv04"]=true - elseif script=="guru" then - sharedfeatures["dv04"]=true - elseif script=="gur2" then - sharedfeatures["dv01"]=true - sharedfeatures["dv02"]=true - sharedfeatures["dv03"]=true - sharedfeatures["dv04"]=true - elseif script=="telu" then - sharedfeatures["dv04"]=true - elseif script=="tel2" then - sharedfeatures["dv01"]=true - sharedfeatures["dv02"]=true - sharedfeatures["dv03"]=true - sharedfeatures["dv04"]=true - elseif script=="mlym" then - sharedfeatures["pstf"]=true - elseif script=="mlm2" then - sharedfeatures["pstf"]=true - sharedfeatures["pref"]=true - sharedfeatures["dv03"]=true - gsubfeatures ["dv03"]=two_defaults - insert(sequences,insertindex,sequence_reorder_pre_base_reordering_consonants) - elseif script=="taml" then - sharedfeatures["dv04"]=true - sharedfeatures["pstf"]=true - elseif script=="tml2" then - else - report("todo: enable the right features for script %a",script) + if two_defaults[script] then + sharedfeatures["dv01"]=true + sharedfeatures["dv02"]=true + sharedfeatures["dv03"]=true + sharedfeatures["dv04"]=true + elseif one_defaults[script] then + sharedfeatures["dv03"]=true + sharedfeatures["dv04"]=true + end + if script=="mlym" or script=="taml" then + devanagari.left_matra_before_base=true end end end @@ -29735,6 +30251,8 @@ local function initialize_one(font,attr) reph=false, vattu=false, blwfcache={}, + vatucache={}, + pstfcache={}, } datasets.devanagari=devanagaridata local resources=tfmdata.resources @@ -29745,17 +30263,80 @@ local function initialize_one(font,attr) local kind=dataset[4] if kind=="rphf" then devanagaridata.reph=true - elseif kind=="blwf" then + elseif kind=="blwf" or kind=="vatu" then devanagaridata.vattu=true devanagaridata.blwfcache=devanagari.blwfcache + devanagaridata.vatucache=devanagari.vatucache + devanagaridata.pstfcache=devanagari.pstfcache + end + end + end + end + return devanagaridata.reph,devanagaridata.vattu,devanagaridata.blwfcache,devanagaridata.vatucache,devanagaridata.pstfcache +end +local function contextchain(contexts,n) + local char=getchar(n) + for k=1,#contexts do + local ck=contexts[k] + local seq=ck[3] + local f=ck[4] + local l=ck[5] + if (l-f)==1 and seq[f+1][char] then + local ok=true + local c=n + for i=l+1,#seq do + c=getnext(c) + if not c or not seq[i][ischar(c)] then + ok=false + break + end + end + if ok then + c=getprev(n) + for i=1,f-1 do + c=getprev(c) + if not c or not seq[f-i][ischar(c)] then + ok=false + end end end + if ok then + return true + end end end - return devanagaridata.reph,devanagaridata.vattu,devanagaridata.blwfcache + return false +end +local function order_matras(c) + local cn=getnext(c) + local char=getchar(cn) + while dependent_vowel[char] do + local next=getnext(cn) + local cc=c + local cchar=getchar(cc) + while cc~=cn do + if (above_mark[char] and (below_mark[cchar] or post_mark[cchar])) or (below_mark[char] and (post_mark[cchar])) then + local prev,next=getboth(cn) + if next then + setprev(next,prev) + end + setnext(prev,next) + setnext(getprev(cc),cn) + setprev(cn,getprev(cc)) + setnext(cn,cc) + setprev(cc,cn) + break + end + cc=getnext(cc) + cchar=getchar(cc) + end + cn=next + char=getchar(cn) + end end local function reorder_one(head,start,stop,font,attr,nbspaces) - local reph,vattu,blwfcache=initialize_one(font,attr) + local reph,vattu,blwfcache,vatucache,pstfcache=initialize_one(font,attr) + local devanagari=fontdata[font].resources.devanagari local current=start local n=getnext(start) local base=nil @@ -29838,6 +30419,8 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) base=current elseif blwfcache[char] then setprop(current,a_state,s_blwf) + elseif pstfcache[char] then + setprop(current,a_state,s_pstf) else base=current end @@ -29906,7 +30489,7 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) end current=next end - if base~=stop and getprop(base,a_state) then + if base~=stop and getprop(base,a_state) then local next=getnext(base) if halant[getchar(next)] and not (next~=stop and getchar(getnext(next))==c_zwj) then setprop(base,a_state,unsetvalue) @@ -29934,6 +30517,16 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) n=getnext(n) ch=getchar(n) end + local tpm=twopart_mark[ch] + while tpm do + local extra=copy_node(n) + copyinjection(extra,n) + ch=tpm[1] + setchar(n,ch) + setchar(extra,tpm[2]) + head=insert_node_after(head,current,extra) + tpm=twopart_mark[ch] + end while c~=stop and dependent_vowel[ch] do c=n n=getnext(n) @@ -29957,9 +30550,44 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) local last=getnext(c) while cn~=last do if pre_mark[getchar(cn)] then - if bp then - setnext(bp,cn) + if devanagari.left_matra_before_base then + local prev,next=getboth(cn) + setlink(prev,next) + if cn==stop then + stop=getprev(cn) + end + if base==start then + if head==start then + head=cn + end + start=cn + end + setlink(getprev(base),cn) + setlink(cn,base) + cn=next + else + if bp then + setnext(bp,cn) + end + local prev,next=getboth(cn) + if next then + setprev(next,prev) + end + setnext(prev,next) + if cn==stop then + stop=prev + end + setprev(cn,bp) + setlink(cn,firstcons) + if firstcons==start then + if head==start then + head=cn + end + start=cn + end + cn=next end + elseif current~=base and dependent_vowel[getchar(cn)] then local prev,next=getboth(cn) if next then setprev(next,prev) @@ -29968,17 +30596,19 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if cn==stop then stop=prev end - setprev(cn,bp) - setlink(cn,firstcons) - if firstcons==start then - if head==start then - head=cn - end - start=cn + setlink(b,cn,getnext(b)) + order_matras(cn) + cn=next + elseif current==base and dependent_vowel[getchar(cn)] then + local cnn=getnext(cn) + order_matras(cn) + cn=cnn + while cn~=last and dependent_vowel[getchar(cn)] do + cn=getnext(cn) end - break + else + cn=getnext(cn) end - cn=getnext(cn) end allreordered=c==stop current=getnext(c) @@ -30036,6 +30666,13 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if halant[getchar(next)] then cns=next end + if not vatucache[char] then + next=getnext(cns) + while dependent_vowel[getchar(next)] do + cns=next + next=getnext(cns) + end + end elseif char==c_nbsp then nbspaces=nbspaces+1 cns=current @@ -30043,6 +30680,13 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) if halant[getchar(next)] then cns=next end + if not vatucache[char] then + next=getnext(cns) + while dependent_vowel[getchar(next)] do + cns=next + next=getnext(cns) + end + end end end current=getnext(current) @@ -30050,6 +30694,9 @@ local function reorder_one(head,start,stop,font,attr,nbspaces) end if getchar(base)==c_nbsp then nbspaces=nbspaces-1 + if base==stop then + stop=getprev(stop) + end head=remove_node(head,base) flush_node(base) end @@ -30063,7 +30710,7 @@ function handlers.devanagari_reorder_matras(head,start) local char=ischar(current,startfont) local next=getnext(current) if char and getprop(current,a_syllabe)==startattr then - if halant[char] and not getprop(current,a_state) then + if halant[char] then if next then local char=ischar(next,startfont) if char and zw_char[char] and getprop(next,a_syllabe)==startattr then @@ -30085,6 +30732,7 @@ function handlers.devanagari_reorder_matras(head,start) end return head,start,true end +local rephbase={} function handlers.devanagari_reorder_reph(head,start) local current=getnext(start) local startnext=nil @@ -30092,44 +30740,29 @@ function handlers.devanagari_reorder_reph(head,start) local startfont=getfont(start) local startattr=getprop(start,a_syllabe) ::step_1:: - ::step_2:: - while current do - local char=ischar(current,startfont) - if char and getprop(current,a_syllabe)==startattr then - if halant[char] and not getprop(current,a_state) then - local next=getnext(current) - if next then - local nextchar=ischar(next,startfont) - if nextchar and zw_char[nextchar] and getprop(next,a_syllabe)==startattr then - current=next - next=getnext(current) - end - end - startnext=getnext(start) - head=remove_node(head,start) - setlink(start,next) - setlink(current,start) - start=startnext - startattr=getprop(start,a_syllabe) - break - end - current=getnext(current) - else - break - end + local char=ischar(start,startfont) + local rephbase=rephbase[startfont][char] + if char and after_subscript[rephbase] then + goto step_5 end - ::step_3:: - ::step_4:: - if not startnext then - current=getnext(start) + ::step_2:: + if char and not after_postscript[rephbase] then while current do local char=ischar(current,startfont) if char and getprop(current,a_syllabe)==startattr then - if getprop(current,a_state)==s_pstf then + if halant[char] then + local next=getnext(current) + if next then + local nextchar=ischar(next,startfont) + if nextchar and zw_char[nextchar] and getprop(next,a_syllabe)==startattr then + current=next + next=getnext(current) + end + end startnext=getnext(start) head=remove_node(head,start) - setlink(getprev(current),start) - setlink(start,current) + setlink(start,next) + setlink(current,start) start=startnext startattr=getprop(start,a_syllabe) break @@ -30140,6 +30773,63 @@ function handlers.devanagari_reorder_reph(head,start) end end end + ::step_3:: + if not startnext then + if char and after_main[rephbase] then + current=getnext(start) + while current do + local char=ischar(current,startfont) + if char and getprop(current,a_syllabe)==startattr then + if consonant[char] and not getprop(current,a_state)==s_pref then + startnext=getnext(start) + head=remove_node(head,start) + setlink(current,start) + setlink(start,getnext(current)) + start=startnext + startattr=getprop(start,a_syllabe) + break + end + current=getnext(current) + else + break + end + end + end + end + ::step_4:: + if not startnext then + if char and before_postscript[rephbase] then + current=getnext(start) + local c=nil + while current do + local char=ischar(current,startfont) + if char and getprop(current,a_syllabe)==startattr then + if getprop(current,a_state)==s_pstf then + startnext=getnext(start) + head=remove_node(head,start) + setlink(getprev(current),start) + setlink(start,current) + start=startnext + startattr=getprop(start,a_syllabe) + break + elseif not c and (vowel_modifier[char] or stress_tone_mark[char] ) then + c=current + end + current=getnext(current) + else + if c then + startnext=getnext(start) + head=remove_node(head,start) + setlink(getprev(c),start) + setlink(start,c) + start=startnext + startattr=getprop(start,a_syllabe) + end + break + end + end + end + end ::step_5:: if not startnext then current=getnext(start) @@ -30147,7 +30837,10 @@ function handlers.devanagari_reorder_reph(head,start) while current do local char=ischar(current,startfont) if char and getprop(current,a_syllabe)==startattr then - if not c and mark_above_below_post[char] and not after_subscript[char] then + local state=getprop(current,a_state) + if before_subscript[rephbase] and (state==s_blwf or state==s_pstf) then + c=current + elseif after_subscript[rephbase] and (state==s_pstf) then c=current end current=getnext(current) @@ -30187,63 +30880,64 @@ function handlers.devanagari_reorder_reph(head,start) end return head,start,true end +local reordered_pre_base_reordering_consonants={} function handlers.devanagari_reorder_pre_base_reordering_consonants(head,start) - local current=start - local startnext=nil - local startprev=nil + if reordered_pre_base_reordering_consonants[start] then + return head,start,true + end + local current=start local startfont=getfont(start) local startattr=getprop(start,a_syllabe) while current do local char=ischar(current,startfont) + local next=getnext(current) if char and getprop(current,a_syllabe)==startattr then - local next=getnext(current) - if halant[char] and not getprop(current,a_state) then + if halant[char] then if next then - local nextchar=ischar(next,startfont) - if nextchar and getprop(next,a_syllabe)==startattr then - if nextchar==c_zwnj or nextchar==c_zwj then - current=next - next=getnext(current) - end + local char=ischar(next,startfont) + if char and zw_char[char] and getprop(next,a_syllabe)==startattr then + current=next + next=getnext(current) end end - startnext=getnext(start) - removenode(start,start) + local startnext=getnext(start) + head=remove_node(head,start) setlink(start,next) setlink(current,start) + reordered_pre_base_reordering_consonants[start]=true start=startnext - break + return head,start,true end - current=next else break end + current=next end - if not startnext then - current=getnext(start) - startattr=getprop(start,a_syllabe) - while current do - local char=ischar(current,startfont) - if char and getprop(current,a_syllabe)==startattr then - if not consonant[char] and getprop(current,a_state) then - startnext=getnext(start) - removenode(start,start) - setlink(getprev(current),start) - setlink(start,current) - start=startnext - break - end - current=getnext(current) + local startattr=getprop(start,a_syllabe) + local current=getprev(start) + while current and getprop(current,a_syllabe)==startattr do + local char=ischar(current) + if (not dependent_vowel[char] and not getprop(current,a_state) or getprop(current,a_state)==s_init) then + startnext=getnext(start) + head=remove_node(head,start) + if current==head then + setlink(start,current) + head=start else - break + setlink(getprev(current),start) + setlink(start,current) end + reordered_pre_base_reordering_consonants[start]=true + start=startnext + break end + current=getprev(current) end return head,start,true end function handlers.devanagari_remove_joiners(head,start,kind,lookupname,replacement) local stop=getnext(start) - local font=getfont(start) + local font=getfont(start) local last=start while stop do local char=ischar(stop,font) @@ -30277,28 +30971,28 @@ local function initialize_two(font,attr) end local function reorder_two(head,start,stop,font,attr,nbspaces) local seqsubset,reorderreph=initialize_two(font,attr) - local reph=false local halfpos=nil local basepos=nil local subpos=nil local postpos=nil - local locl={} + reorderreph.coverage={} + rephbase[font]={} for i=1,#seqsubset do local subset=seqsubset[i] local kind=subset[1] local lookupcache=subset[2] if kind=="rphf" then - reph=subset[3] + reorderreph.coverage[subset[3]]=true + rephbase[font][subset[3]]=subset[4] local current=start local last=getnext(stop) while current~=last do if current~=stop then - local c=locl[current] or getchar(current) + local c=getchar(current) local found=lookupcache[c] if found then local next=getnext(current) - local n=locl[next] or getchar(next) - if found[n] then + if found[getchar(next)] or contextchain(found,next) then local afternext=next~=stop and getnext(next) if afternext and zw_char[getchar(afternext)] then current=afternext @@ -30318,15 +31012,16 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) local last=getnext(stop) while current~=last do if current~=stop then - local c=locl[current] or getchar(current) + local c=getchar(current) local found=lookupcache[c] if found then local next=getnext(current) - local n=locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_pref) - setprop(next,a_state,s_pref) - current=next + if found[getchar(next)] or contextchain(found,next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then + setprop(current,a_state,s_pref) + setprop(next,a_state,s_pref) + current=next + end end end end @@ -30337,15 +31032,14 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) local last=getnext(stop) while current~=last do if current~=stop then - local c=locl[current] or getchar(current) + local c=getchar(current) local found=lookupcache[c] if found then local next=getnext(current) - local n=locl[next] or getchar(next) - if found[n] then + if found[getchar(next)] or contextchain(found,next) then if next~=stop and getchar(getnext(next))==c_zwnj then current=next - else + elseif (not getprop(current,a_state)) then setprop(current,a_state,s_half) if not halfpos then halfpos=current @@ -30357,21 +31051,22 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) end current=getnext(current) end - elseif kind=="blwf" then + elseif kind=="blwf" or kind=="vatu" then local current=start local last=getnext(stop) while current~=last do if current~=stop then - local c=locl[current] or getchar(current) + local c=getchar(current) local found=lookupcache[c] if found then local next=getnext(current) - local n=locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_blwf) - setprop(next,a_state,s_blwf) - current=next - subpos=current + if found[getchar(next)] or contextchain(found,next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then + setprop(current,a_state,s_blwf) + setprop(next,a_state,s_blwf) + current=next + subpos=current + end end end end @@ -30382,16 +31077,17 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) local last=getnext(stop) while current~=last do if current~=stop then - local c=locl[current] or getchar(current) + local c=getchar(current) local found=lookupcache[c] if found then local next=getnext(current) - local n=locl[next] or getchar(next) - if found[n] then - setprop(current,a_state,s_pstf) - setprop(next,a_state,s_pstf) - current=next - postpos=current + if found[getchar(next)] or contextchain(found,next) then + if (not getprop(current,a_state) and not getprop(next,a_state)) then + setprop(current,a_state,s_pstf) + setprop(next,a_state,s_pstf) + current=next + postpos=current + end end end end @@ -30399,7 +31095,6 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) end end end - reorderreph.coverage={ [reph]=true } local current,base,firstcons=start,nil,nil if getprop(start,a_state)==s_rphf then current=getnext(getnext(start)) @@ -30460,7 +31155,7 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) firstcons=current end local a=getprop(current,a_state) - if not (a==s_pref or a==s_blwf or a==s_pstf) then + if not (a==s_blwf or a==s_pstf or (a~=s_rphf and a~=s_blwf and ra[getchar(current)])) then base=current end end @@ -30477,7 +31172,7 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) end return head,stop,nbspaces else - if getprop(base,a_state) then + if getprop(base,a_state) then setprop(base,a_state,unsetvalue) end basepos=base @@ -30495,34 +31190,59 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) local current=start local last=getnext(stop) while current~=last do - local char,target,cn=locl[current] or getchar(current),nil,getnext(current) + local char=getchar(current) + local target=nil + local cn=getnext(current) local tpm=twopart_mark[char] - if tpm then + while tpm do local extra=copy_node(current) copyinjection(extra,current) char=tpm[1] setchar(current,char) setchar(extra,tpm[2]) head=insert_node_after(head,current,extra) + tpm=twopart_mark[char] end if not moved[current] and dependent_vowel[char] then - if pre_mark[char] then + if pre_mark[char] then moved[current]=true local prev,next=getboth(current) setlink(prev,next) if current==stop then stop=getprev(current) end - if halfpos==start then + local pos + if before_main[char] then + pos=basepos + else + pos=halfpos + end + local ppos=getprev(pos) + while ppos and getprop(ppos,a_syllabe)==getprop(pos,a_syllabe) do + if getprop(ppos,a_state)==s_pref then + pos=ppos + end + ppos=getprev(ppos) + end + local ppos=getprev(pos) + while ppos and getprop(ppos,a_syllabe)==getprop(pos,a_syllabe) and halant[ischar(ppos)] do + ppos=getprev(ppos) + if ppos and getprop(ppos,a_syllabe)==getprop(pos,a_syllabe) and consonant[ischar(ppos)] then + pos=ppos + ppos=getprev(ppos) + else + break + end + end + if pos==start then if head==start then head=current end start=current end - setlink(getprev(halfpos),current) - setlink(current,halfpos) - halfpos=current - elseif above_mark[char] then + setlink(getprev(pos),current) + setlink(current,pos) + elseif above_mark[char] then target=basepos if subpos==basepos then subpos=current @@ -30531,13 +31251,23 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) postpos=current end basepos=current - elseif below_mark[char] then + elseif below_mark[char] then target=subpos if postpos==subpos then postpos=current end subpos=current - elseif post_mark[char] then + elseif post_mark[char] then + local n=getnext(postpos) + while n do + local v=ischar(n,font) + if nukta[v] or stress_tone_mark[v] or vowel_modifier[v] then + postpos=n + else + break + end + n=getnext(n) + end target=postpos postpos=current end @@ -30556,7 +31286,35 @@ local function reorder_two(head,start,stop,font,attr,nbspaces) end current=cn end - local current,c=start,nil + local current=getnext(start) + local last=getnext(stop) + while current~=last do + local char=getchar(current) + local cn=getnext(current) + if halant[char] and ra[ischar(cn)] and getprop(cn,a_state)~=s_rphf and getprop(cn,a_state)~=s_blwf then + if after_main[ischar(cn)] then + local prev=getprev(current) + local next=getnext(cn) + local bpn=getnext(basepos) + while bpn and dependent_vowel[ischar(bpn)] do + basepos=bpn + bpn=getnext(bpn) + end + if basepos~=prev then + setlink(prev,next) + setlink(cn,getnext(basepos)) + setlink(basepos,current) + if cn==stop then + stop=prev + end + cn=next + end + end + end + current=cn + end + local current=start + local c=nil while current~=stop do local char=getchar(current) if halant[char] or stress_tone_mark[char] then @@ -30675,7 +31433,25 @@ local function analyze_next_chars_one(c,font,variant) if not v then return c end - if dependent_vowel[v] then + local already_pre_mark + local already_above_mark + local already_below_mark + local already_post_mark + while dependent_vowel[v] do + local vowels=twopart_mark[v] or { v } + for k,v in next,vowels do + if pre_mark[v] and not already_pre_mark then + already_pre_mark=true + elseif above_mark[v] and not already_above_mark then + already_above_mark=true + elseif below_mark[v] and not already_below_mark then + already_below_mark=true + elseif post_mark[v] and not already_post_mark then + already_post_mark=true + else + return c + end + end c=getnext(c) n=getnext(c) if not n then @@ -30841,7 +31617,25 @@ local function analyze_next_chars_two(c,font) end end else - if dependent_vowel[v] then + local already_pre_mark + local already_above_mark + local already_below_mark + local already_post_mark + while dependent_vowel[v] do + local vowels=twopart_mark[v] or { v } + for k,v in next,vowels do + if pre_mark[v] and not already_pre_mark then + already_pre_mark=true + elseif above_mark[v] and not already_above_mark then + already_above_mark=true + elseif below_mark[v] and not already_below_mark then + already_below_mark=true + elseif post_mark[v] and not already_post_mark then + already_post_mark=true + else + return c + end + end c=n n=getnext(c) if not n then @@ -30908,6 +31702,7 @@ local function method_one(head,font,attr) local start=true local done=false local nbspaces=0 + local syllabe=0 while current do local char=ischar(current,font) if char then @@ -31044,6 +31839,15 @@ local function method_one(head,font,attr) end end if syllablestart~=syllableend then + if syllableend then + syllabe=syllabe+1 + local c=syllablestart + local n=getnext(syllableend) + while c~=n do + setprop(c,a_syllabe,syllabe) + c=getnext(c) + end + end head,current,nbspaces=reorder_one(head,syllablestart,syllableend,font,attr,nbspaces) current=getnext(current) end @@ -31082,6 +31886,20 @@ local function method_one(head,font,attr) if nbspaces>0 then head=replace_all_nbsp(head) end + current=head + local n=0 + while current do + local char=ischar(current,font) + if char then + if n==0 and not getprop(current,a_state) then + setprop(current,a_state,s_init) + end + n=n+1 + else + n=0 + end + current=getnext(current) + end return head,done end local function method_two(head,font,attr) @@ -31150,7 +31968,7 @@ local function method_two(head,font,attr) end if not syllableend and show_syntax_errors then local char=ischar(current,font) - if char and not getprop(current,a_state) then + if char and not getprop(current,a_state) then local mark=mark_four[char] if mark then head,current=inject_syntax_error(head,current,char) @@ -31163,6 +31981,20 @@ local function method_two(head,font,attr) if nbspaces>0 then head=replace_all_nbsp(head) end + current=head + local n=0 + while current do + local char=ischar(current,font) + if char then + if n==0 and not getprop(current,a_state) then + setprop(current,a_state,s_init) + end + n=n+1 + else + n=0 + end + current=getnext(current) + end return head,done end for i=1,nofscripts do |