From 6a5523efcc3df3f9d51d2a98360825904df56630 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Tue, 6 Sep 2011 14:02:00 +0200 Subject: beta 2011.09.06 14:02 --- tex/context/base/char-utf.lua | 61 ++++++++++----- tex/context/base/context-version.pdf | Bin 4093 -> 4086 bytes tex/context/base/context-version.png | Bin 105624 -> 104502 bytes tex/context/base/font-mis.lua | 2 +- tex/context/base/font-otc.lua | 86 ++++++++++++++++----- tex/context/base/font-otf.lua | 2 +- tex/context/base/font-otn.lua | 71 ++++++++--------- tex/context/base/status-files.pdf | Bin 23918 -> 23932 bytes tex/context/base/status-lua.pdf | Bin 162240 -> 162268 bytes tex/generic/context/luatex/luatex-fonts-merged.lua | 75 +++++++++--------- 10 files changed, 181 insertions(+), 116 deletions(-) diff --git a/tex/context/base/char-utf.lua b/tex/context/base/char-utf.lua index 43591dd76..4817c3537 100644 --- a/tex/context/base/char-utf.lua +++ b/tex/context/base/char-utf.lua @@ -35,6 +35,9 @@ local characters = characters characters.graphemes = allocate() local graphemes = characters.graphemes +characters.combined = allocate() +local combined = characters.combined + characters.decomposed = allocate() local decomposed = characters.decomposed @@ -73,32 +76,45 @@ local decomposed = allocate { } characters.decomposed = decomposed -local function initialize() - for unicode, v in next, characters.data do +local function initialize() -- maybe only 'mn' + local data = characters.data + for unicode, v in next, data do -- using vs and first testing for length is faster (.02->.01 s) local vs = v.specials - if vs and #vs == 3 and vs[1] == 'char' then + if vs and #vs == 3 and vs[1] == "char" then local one, two = vs[2], vs[3] - local first, second, combined = utfchar(one), utfchar(two), utfchar(unicode) + if data[two].category == "mn" then + local cgf = combined[one] + if not cgf then + cgf = { [two] = unicode } + combined[one] = cgf + else + cgf[two] = unicode + end + end + local first, second, combination = utfchar(one), utfchar(two), utfchar(unicode) local cgf = graphemes[first] if not cgf then - cgf = { } + cgf = { [second] = combination } graphemes[first] = cgf + else + cgf[second] = combination end - cgf[second] = combined if v.mathclass or v.mathspec then local mps = mathpairs[two] if not mps then - mps = { } + mps = { [one] = unicode } mathpairs[two] = mps + else + mps[one] = unicode -- here unicode end - mps[one] = unicode -- here unicode local mps = mathpairs[second] if not mps then - mps = { } + mps = { [first] = combination } mathpairs[second] = mps + else + mps[first] = combination end - mps[first] = combined end -- else -- local description = v.description @@ -123,8 +139,11 @@ local function initialize() end end initialize = false + characters.initialize = function() end -- when used outside tex end +characters.initialize = initialize + -- utffilters.addgrapheme(utfchar(318),'l','\string~') -- utffilters.addgrapheme('c','a','b') @@ -441,17 +460,23 @@ function utffilters.decompose(str) return str end -local textfileactions = resolvers.openers.helpers.textfileactions +local sequencers = utilities.sequencers + +if sequencers then + + local textfileactions = resolvers.openers.helpers.textfileactions -utilities.sequencers.appendaction (textfileactions,"system","characters.filters.utf.collapse") -utilities.sequencers.disableaction(textfileactions,"characters.filters.utf.collapse") + sequencers.appendaction (textfileactions,"system","characters.filters.utf.collapse") + sequencers.disableaction(textfileactions,"characters.filters.utf.collapse") -utilities.sequencers.appendaction (textfileactions,"system","characters.filters.utf.decompose") -utilities.sequencers.disableaction(textfileactions,"characters.filters.utf.decompose") + sequencers.appendaction (textfileactions,"system","characters.filters.utf.decompose") + sequencers.disableaction(textfileactions,"characters.filters.utf.decompose") + + function characters.filters.utf.enable() + sequencers.enableaction(textfileactions,"characters.filters.utf.collapse") + sequencers.enableaction(textfileactions,"characters.filters.utf.decompose") + end -function characters.filters.utf.enable() - utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.collapse") - utilities.sequencers.enableaction(textfileactions,"characters.filters.utf.decompose") end --[[ldx-- diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf index 9535172fd..fe96d9b72 100644 Binary files a/tex/context/base/context-version.pdf and b/tex/context/base/context-version.pdf differ diff --git a/tex/context/base/context-version.png b/tex/context/base/context-version.png index 9aa06a1a6..f9dc15ab5 100644 Binary files a/tex/context/base/context-version.png and b/tex/context/base/context-version.png differ diff --git a/tex/context/base/font-mis.lua b/tex/context/base/font-mis.lua index b3147ede3..fdb44bb12 100644 --- a/tex/context/base/font-mis.lua +++ b/tex/context/base/font-mis.lua @@ -22,7 +22,7 @@ local handlers = fonts.handlers handlers.otf = handlers.otf or { } local otf = handlers.otf -otf.version = otf.version or 2.733 +otf.version = otf.version or 2.735 otf.cache = otf.cache or containers.define("fonts", "otf", otf.version, true) function otf.loadcached(filename,format,sub) diff --git a/tex/context/base/font-otc.lua b/tex/context/base/font-otc.lua index fd3d7a761..ae463e750 100644 --- a/tex/context/base/font-otc.lua +++ b/tex/context/base/font-otc.lua @@ -51,7 +51,8 @@ local function addfeature(data,feature,specifications) local unicodes = resources.unicodes local lookuptypes = resources.lookuptypes local splitter = lpeg.splitter(" ",unicodes) - local done = 0 + local done = 0 + local skip = 0 if not specifications[1] then -- so we accept a one entry specification specifications = { specifications } @@ -61,6 +62,11 @@ local function addfeature(data,feature,specifications) local specification = specifications[s] local valid = specification.valid if not valid or valid(data,specification,feature) then + local initialize = specification.initialize + if initialize then + -- when false is returned we initialize only once + specification.initialize = initialize(specification) and initialize or nil + end local askedfeatures = specification.features or everywhere local subtables = specification.subtables or { specification.data } or { } local featuretype = types[specification.type or "substitution"] @@ -82,12 +88,23 @@ local function addfeature(data,feature,specifications) if type(ligature) == "string" then ligature = { lpegmatch(splitter,ligature) } end - if slookups then - slookups[full] = ligature + local present = true + for i=1,#ligature do + if not descriptions[ligature[i]] then + present = false + break + end + end + if present then + if slookups then + slookups[full] = ligature + else + description.slookups = { [full] = ligature } + end + done, added = done + 1, true else - description.slookups = { [full] = ligature } + skip = skip + 1 end - done, added = done + 1, true end end elseif featuretype == "gsub_single" then @@ -98,14 +115,14 @@ local function addfeature(data,feature,specifications) if description then local slookups = description.slookups replacement = tonumber(replacement) or unicodes[replacement] - if descriptions[replacement] then - if slookups then - slookups[full] = replacement - else - description.slookups = { [full] = replacement } + if descriptions[replacement] then + if slookups then + slookups[full] = replacement + else + description.slookups = { [full] = replacement } + end + done, added = done + 1, true end - done, added = done + 1, true - end end end end @@ -148,8 +165,8 @@ local function addfeature(data,feature,specifications) end end end - if done > 0 and trace_loading then - report_otf("enhance: registering %s feature (%s glyphs affected)",feature,done) + if trace_loading then + report_otf("enhance: registering feature '%s', %s glyphs affected, %s glyphs skipped",feature,done,skip) end end end @@ -184,9 +201,9 @@ local tlig = { local tlig_specification = { type = "ligature", - features = everywhere, -- { ["*"] = { ["*"] = true } }, + features = everywhere, data = tlig, - flags = noflags, -- { }, + flags = noflags, } otf.addfeature("tlig",tlig_specification) @@ -206,9 +223,9 @@ local trep = { local trep_specification = { type = "substitution", - features = everywhere, -- { ["*"] = { ["*"] = true } }, + features = everywhere, data = trep, - flags = noflags, -- { }, + flags = noflags, } otf.addfeature("trep",trep_specification) @@ -218,6 +235,39 @@ registerotffeature { description = 'tex replacements', } +-- tcom + +if characters.combined then + + local tcom = { } + + local function initialize() + characters.initialize() + for first, seconds in next, characters.combined do + for second, combination in next, seconds do + tcom[combination] = { first, second } + end + end + -- return false + end + + local tcom_specification = { + type = "ligature", + features = everywhere, + data = tcom, + flags = noflags, + initialize = initialize, + } + + otf.addfeature("tcom",tcom_specification) + + registerotffeature { + name = 'tcom', + description = 'tex combinations', + } + +end + -- anum local anum_arabic = { diff --git a/tex/context/base/font-otf.lua b/tex/context/base/font-otf.lua index fe81f8bb1..319cae65b 100644 --- a/tex/context/base/font-otf.lua +++ b/tex/context/base/font-otf.lua @@ -47,7 +47,7 @@ local otf = fonts.handlers.otf otf.glists = { "gsub", "gpos" } -otf.version = 2.733 -- beware: also sync font-mis.lua +otf.version = 2.735 -- beware: also sync font-mis.lua otf.cache = containers.define("fonts", "otf", otf.version, true) local fontdata = fonts.hashes.identifiers diff --git a/tex/context/base/font-otn.lua b/tex/context/base/font-otn.lua index 0cbdea839..a972d50e7 100644 --- a/tex/context/base/font-otn.lua +++ b/tex/context/base/font-otn.lua @@ -901,33 +901,6 @@ end local logwarning = report_subchain -function chainmores.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) - logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) - return start, false -end - --- handled later: --- --- function chainmores.gsub_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- return chainprocs.gsub_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- end - -function chainmores.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) - logprocess("%s: gsub_multiple not yet supported",cref(kind,chainname,chainlookupname)) - return start, false -end - -function chainmores.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) - logprocess("%s: gsub_alternate not yet supported",cref(kind,chainname,chainlookupname)) - return start, false -end - --- handled later: --- --- function chainmores.gsub_ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- return chainprocs.gsub_ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- end - local function logprocess(...) if trace_steps then registermessage(...) @@ -945,6 +918,11 @@ function chainprocs.chainsub(start,stop,kind,chainname,currentcontext,lookuphash return start, false end +function chainmores.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) + logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) + return start, false +end + -- The reversesub is a special case, which is why we need to store the replacements -- in a bit weird way. There is no lookup and the replacement comes from the lookup -- itself. It is meant mostly for dealing with Urdu. @@ -1057,7 +1035,7 @@ the match.

--ldx]]-- function chainprocs.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - delete_till_stop(start,stop) + delete_till_stop(start,stop) -- we can assume that marks are to be deleted local startchar = start.char local subtables = currentlookup.subtables local lookupname = subtables[1] @@ -1082,39 +1060,49 @@ function chainprocs.gsub_multiple(start,stop,kind,chainname,currentcontext,looku return start, false end +-- function chainmores.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) +-- logprocess("%s: gsub_multiple not yet supported",cref(kind,chainname,chainlookupname)) +-- return start, false +-- end + +chainmores.gsub_multiple = chainprocs.gsub_multiple + --[[ldx--

Here we replace start by new glyph. First we delete the rest of the match.

--ldx]]-- +-- char_1 mark_1 -> char_x mark_1 (ignore marks) +-- char_1 mark_1 -> char_x + +-- to be checked: do we always have just one glyph? +-- we can also have alternates for marks +-- marks come last anyway +-- are there cases where we need to delete the mark + function chainprocs.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - -- todo: marks ? - local n = delete_till_stop(start,stop) local current = start local subtables = currentlookup.subtables - local m = 0 while current do - if current.id == glyph_code then - m = m + 1 + if current.id == glyph_code then -- is this check needed? local currentchar = current.char local lookupname = subtables[1] local alternatives = lookuphash[lookupname] if not alternatives then if trace_bugs then - logwarning("%s: %s of %s, no alternative hit",cref(kind,chainname,chainlookupname,lookupname),m,n) + logwarning("%s: no alternative hit",cref(kind,chainname,chainlookupname,lookupname)) end else alternatives = alternatives[currentchar] if not alternatives then if trace_bugs then - logwarning("%s: %s of %s, no alternative for %s",cref(kind,chainname,chainlookupname,lookupname),m,n,gref(currentchar)) + logwarning("%s: no alternative for %s",cref(kind,chainname,chainlookupname,lookupname),gref(currentchar)) end else local choice, index = alternative_glyph(current,alternatives,kind,chainname,chainlookupname,lookupname) current.char = choice if trace_alternatives then - m = m + 1 - logprocess("%s: %s of %s, replacing single %s by alternative %s (%s)", - cref(kind,chainname,chainlookupname,lookupname),m,n,index,gref(currentchar),gref(choice)) + logprocess("%s: replacing single %s by alternative %s (%s)", + cref(kind,chainname,chainlookupname,lookupname),index,gref(currentchar),gref(choice)) end end end @@ -1128,6 +1116,13 @@ function chainprocs.gsub_alternate(start,stop,kind,chainname,currentcontext,look return start, false end +-- function chainmores.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) +-- logprocess("%s: gsub_alternate not yet supported",cref(kind,chainname,chainlookupname)) +-- return start, false +-- end + +chainmores.gsub_alternate = chainprocs.gsub_alternate + --[[ldx--

When we replace ligatures we use a helper that handles the marks. I might change this function (move code inline and handle the marks by a separate function). We diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf index c8a94ddc3..aaaede636 100644 Binary files a/tex/context/base/status-files.pdf and b/tex/context/base/status-files.pdf differ diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf index 876b2a00b..fa9d18a12 100644 Binary files a/tex/context/base/status-lua.pdf and b/tex/context/base/status-lua.pdf differ diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 928da0e83..b5fc340a2 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 09/05/11 23:49:07 +-- merge date : 09/06/11 14:02:36 do -- begin closure to overcome local limits and interference @@ -5012,7 +5012,7 @@ local otf = fonts.handlers.otf otf.glists = { "gsub", "gpos" } -otf.version = 2.733 -- beware: also sync font-mis.lua +otf.version = 2.735 -- beware: also sync font-mis.lua otf.cache = containers.define("fonts", "otf", otf.version, true) local fontdata = fonts.hashes.identifiers @@ -8983,33 +8983,6 @@ end local logwarning = report_subchain -function chainmores.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) - logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) - return start, false -end - --- handled later: --- --- function chainmores.gsub_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- return chainprocs.gsub_single(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- end - -function chainmores.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) - logprocess("%s: gsub_multiple not yet supported",cref(kind,chainname,chainlookupname)) - return start, false -end - -function chainmores.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) - logprocess("%s: gsub_alternate not yet supported",cref(kind,chainname,chainlookupname)) - return start, false -end - --- handled later: --- --- function chainmores.gsub_ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- return chainprocs.gsub_ligature(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) --- end - local function logprocess(...) if trace_steps then registermessage(...) @@ -9027,6 +9000,11 @@ function chainprocs.chainsub(start,stop,kind,chainname,currentcontext,lookuphash return start, false end +function chainmores.chainsub(start,stop,kind,chainname,currentcontext,lookuphash,lookuplist,chainlookupname,n) + logprocess("%s: a direct call to chainsub cannot happen",cref(kind,chainname,chainlookupname)) + return start, false +end + -- The reversesub is a special case, which is why we need to store the replacements -- in a bit weird way. There is no lookup and the replacement comes from the lookup -- itself. It is meant mostly for dealing with Urdu. @@ -9139,7 +9117,7 @@ the match.

--ldx]]-- function chainprocs.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - delete_till_stop(start,stop) + delete_till_stop(start,stop) -- we can assume that marks are to be deleted local startchar = start.char local subtables = currentlookup.subtables local lookupname = subtables[1] @@ -9164,39 +9142,49 @@ function chainprocs.gsub_multiple(start,stop,kind,chainname,currentcontext,looku return start, false end +-- function chainmores.gsub_multiple(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) +-- logprocess("%s: gsub_multiple not yet supported",cref(kind,chainname,chainlookupname)) +-- return start, false +-- end + +chainmores.gsub_multiple = chainprocs.gsub_multiple + --[[ldx--

Here we replace start by new glyph. First we delete the rest of the match.

--ldx]]-- +-- char_1 mark_1 -> char_x mark_1 (ignore marks) +-- char_1 mark_1 -> char_x + +-- to be checked: do we always have just one glyph? +-- we can also have alternates for marks +-- marks come last anyway +-- are there cases where we need to delete the mark + function chainprocs.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname) - -- todo: marks ? - local n = delete_till_stop(start,stop) local current = start local subtables = currentlookup.subtables - local m = 0 while current do - if current.id == glyph_code then - m = m + 1 + if current.id == glyph_code then -- is this check needed? local currentchar = current.char local lookupname = subtables[1] local alternatives = lookuphash[lookupname] if not alternatives then if trace_bugs then - logwarning("%s: %s of %s, no alternative hit",cref(kind,chainname,chainlookupname,lookupname),m,n) + logwarning("%s: no alternative hit",cref(kind,chainname,chainlookupname,lookupname)) end else alternatives = alternatives[currentchar] if not alternatives then if trace_bugs then - logwarning("%s: %s of %s, no alternative for %s",cref(kind,chainname,chainlookupname,lookupname),m,n,gref(currentchar)) + logwarning("%s: no alternative for %s",cref(kind,chainname,chainlookupname,lookupname),gref(currentchar)) end else local choice, index = alternative_glyph(current,alternatives,kind,chainname,chainlookupname,lookupname) current.char = choice if trace_alternatives then - m = m + 1 - logprocess("%s: %s of %s, replacing single %s by alternative %s (%s)", - cref(kind,chainname,chainlookupname,lookupname),m,n,index,gref(currentchar),gref(choice)) + logprocess("%s: replacing single %s by alternative %s (%s)", + cref(kind,chainname,chainlookupname,lookupname),index,gref(currentchar),gref(choice)) end end end @@ -9210,6 +9198,13 @@ function chainprocs.gsub_alternate(start,stop,kind,chainname,currentcontext,look return start, false end +-- function chainmores.gsub_alternate(start,stop,kind,chainname,currentcontext,lookuphash,currentlookup,chainlookupname,n) +-- logprocess("%s: gsub_alternate not yet supported",cref(kind,chainname,chainlookupname)) +-- return start, false +-- end + +chainmores.gsub_alternate = chainprocs.gsub_alternate + --[[ldx--

When we replace ligatures we use a helper that handles the marks. I might change this function (move code inline and handle the marks by a separate function). We -- cgit v1.2.3