From b614515b957fb2283a848d1204952a72f31b0ac7 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Thu, 23 Feb 2017 19:12:56 +0100 Subject: 2017-02-23 18:42:00 --- tex/context/base/mkii/cont-new.mkii | 2 +- tex/context/base/mkii/context.mkii | 2 +- tex/context/base/mkiv/char-obs.lua | 269 +++++++++++++++++++++ tex/context/base/mkiv/char-utf.lua | 229 +++--------------- tex/context/base/mkiv/cont-new.mkiv | 2 +- tex/context/base/mkiv/context.mkiv | 2 +- tex/context/base/mkiv/font-lib.mkvi | 3 +- tex/context/base/mkiv/font-lig.lua | 6 +- tex/context/base/mkiv/font-otc.lua | 1 - tex/context/base/mkiv/status-files.pdf | Bin 25646 -> 25649 bytes tex/context/base/mkiv/status-lua.pdf | Bin 373036 -> 419694 bytes tex/context/interface/mkiv/i-context.pdf | Bin 804461 -> 804461 bytes tex/context/interface/mkiv/i-readme.pdf | Bin 60772 -> 60772 bytes tex/generic/context/luatex/luatex-fonts-merged.lua | 3 +- 14 files changed, 305 insertions(+), 214 deletions(-) create mode 100644 tex/context/base/mkiv/char-obs.lua (limited to 'tex') diff --git a/tex/context/base/mkii/cont-new.mkii b/tex/context/base/mkii/cont-new.mkii index 1c5dfe2d5..6cdd793b7 100644 --- a/tex/context/base/mkii/cont-new.mkii +++ b/tex/context/base/mkii/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2017.02.23 17:07} +\newcontextversion{2017.02.23 18:37} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/mkii/context.mkii b/tex/context/base/mkii/context.mkii index a89ce83ff..4dd3a317e 100644 --- a/tex/context/base/mkii/context.mkii +++ b/tex/context/base/mkii/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2017.02.23 17:07} +\edef\contextversion{2017.02.23 18:37} %D For those who want to use this: diff --git a/tex/context/base/mkiv/char-obs.lua b/tex/context/base/mkiv/char-obs.lua new file mode 100644 index 000000000..0f0e43d3c --- /dev/null +++ b/tex/context/base/mkiv/char-obs.lua @@ -0,0 +1,269 @@ +------------------------ +----- char-ini.lua ----- +------------------------ + +-- local template_a = "\\startextendcatcodetable{%s}\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined\\stopextendcatcodetable" +-- local template_b = "\\chardef\\l=11\\chardef\\a=13\\let\\c\\catcode%s\\let\\a\\undefined\\let\\l\\undefined\\let\\c\\undefined" +-- +-- function characters.define(tobelettered, tobeactivated) -- catcodetables +-- local lettered, activated, l, a = { }, { }, 0, 0 +-- for u, chr in next, data do +-- -- we can use a macro instead of direct settings +-- local fallback = chr.fallback +-- if fallback then +-- -- texprint(format("{\\catcode %s=13\\unexpanded\\gdef %s{\\checkedchar{%s}{%s}}}",u,utfchar(u),u,fallback)) +-- texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\checkedchar{",u,"}{",fallback,"}}}") -- no texprint +-- a = a + 1 +-- activated[a] = "\\c"..u.."\\a" +-- else +-- local contextname = chr.contextname +-- local category = chr.category +-- if contextname then +-- if is_character[category] then +-- -- by this time, we're still in normal catcode mode +-- -- subtle: not "\\",contextname but "\\"..contextname +-- if chr.unicodeslot < 128 then +-- texprint(ctxcatcodes,format("\\chardef\\%s=%s",contextname,u)) +-- else +-- texprint(ctxcatcodes,format("\\let\\%s=%s",contextname,utfchar(u))) +-- if is_letter[category] then +-- l = l + 1 +-- lettered[l] = "\\c"..u.."\\l" +-- end +-- end +-- elseif is_command[category] then +-- -- this might change: contextcommand ipv contextname +-- -- texprint(format("{\\catcode %s=13\\unexpanded\\gdef %s{\\%s}}",u,utfchar(u),contextname)) +-- texsprint("{\\catcode",u,"=13\\unexpanded\\gdef ",utfchar(u),"{\\"..contextname,"}}") -- no texprint +-- a = a + 1 +-- activated[a] = "\\c"..u.."\\a" +-- end +-- elseif is_letter[category] then +-- if u >= 128 and u <= 65536 then -- catch private mess +-- l = l + 1 +-- lettered[l] = "\\c"..u.."\\l" +-- end +-- end +-- end +-- local range = chr.range +-- if range then +-- l = l + 1 +-- lettered[l] = format('\\dofastrecurse{"%05X}{"%05X}{1}{\\c\\fastrecursecounter\\l}',range.first,range.last) +-- end +-- end +-- -- if false then +-- l = l + 1 +-- lettered[l] = "\\c"..0x200C.."\\l" -- non-joiner +-- l = l + 1 +-- lettered[l] = "\\c"..0x200D.."\\l" -- joiner +-- -- fi +-- if tobelettered then +-- lettered = concat(lettered) +-- if true then +-- texsprint(ctxcatcodes,format(template_b,lettered)) -- global +-- else +-- for l=1,#tobelettered do +-- texsprint(ctxcatcodes,format(template_a,tobelettered[l],lettered)) +-- end +-- end +-- end +-- if tobeactivated then +-- activated = concat(activated) +-- for a=1,#tobeactivated do +-- texsprint(ctxcatcodes,format(template_a,tobeactivated[a],activated)) +-- end +-- end +-- end +-- +-- function characters.setcodes() +-- for code, chr in next, data do +-- local cc = chr.category +-- if cc == 'll' or cc == 'lu' or cc == 'lt' then +-- local lc, uc = chr.lccode, chr.uccode +-- if not lc then chr.lccode, lc = code, code end +-- if not uc then chr.uccode, uc = code, code end +-- texsprint(ctxcatcodes,format("\\setcclcuc{%i}{%i}{%i}",code,lc,uc)) +-- end +-- if cc == "lu" then +-- texprint(ctxcatcodes,"\\sfcode ",code,"999 ") +-- end +-- if cc == "lo" then +-- local range = chr.range +-- if range then +-- texsprint(ctxcatcodes,format('\\dofastrecurse{"%05X}{"%05X}{1}{\\setcclcucself\\fastrecursecounter}',range.first,range.last)) +-- end +-- end +-- end +-- end + +-- --[[ldx-- +--

The next variant has lazy token collecting, on a 140 page mk.tex this saves +-- about .25 seconds, which is understandable because we have no graphemes and +-- not collecting tokens is not only faster but also saves garbage collecting. +--

+-- --ldx]]-- +-- +-- function utffilters.collapse(str,filename) -- we can make high a seperate pass (never needed with collapse) +-- if skippable[filesuffix(filename)] then +-- return str +-- -- elseif find(filename,"^virtual://") then +-- -- return str +-- -- else +-- -- -- print("\n"..filename) +-- end +-- if str and str ~= "" then +-- local nstr = #str +-- if nstr > 1 then +-- if initialize then -- saves a call +-- initialize() +-- end +-- local tokens, t, first, done, n = { }, 0, false, false, 0 +-- for second in utfcharacters(str) do +-- if done then +-- if first then +-- if second == " " then +-- t = t + 1 +-- tokens[t] = first +-- first = second +-- else +-- -- local crs = high[second] +-- -- if crs then +-- -- t = t + 1 +-- -- tokens[t] = first +-- -- first = crs +-- -- else +-- local cgf = graphemes[first] +-- if cgf and cgf[second] then +-- first = cgf[second] +-- else +-- t = t + 1 +-- tokens[t] = first +-- first = second +-- end +-- -- end +-- end +-- elseif second == " " then +-- first = second +-- else +-- -- local crs = high[second] +-- -- if crs then +-- -- first = crs +-- -- else +-- first = second +-- -- end +-- end +-- elseif second == " " then +-- first = nil +-- n = n + 1 +-- else +-- -- local crs = high[second] +-- -- if crs then +-- -- for s in utfcharacters(str) do +-- -- if n == 1 then +-- -- break +-- -- else +-- -- t = t + 1 +-- -- tokens[t] = s +-- -- n = n - 1 +-- -- end +-- -- end +-- -- if first then +-- -- t = t + 1 +-- -- tokens[t] = first +-- -- end +-- -- first = crs +-- -- done = true +-- -- else +-- local cgf = graphemes[first] +-- if cgf and cgf[second] then +-- for s in utfcharacters(str) do +-- if n == 1 then +-- break +-- else +-- t = t + 1 +-- tokens[t] = s +-- n = n - 1 +-- end +-- end +-- first = cgf[second] +-- done = true +-- else +-- first = second +-- n = n + 1 +-- end +-- -- end +-- end +-- end +-- if done then +-- if first then +-- t = t + 1 +-- tokens[t] = first +-- end +-- return concat(tokens) -- seldom called +-- end +-- elseif nstr > 0 then +-- return high[str] or str -- this will go from here +-- end +-- end +-- return str +-- end + +-- function utffilters.decompose(str) +-- if str and str ~= "" then +-- local nstr = #str +-- if nstr > 1 then +-- -- if initialize then -- saves a call +-- -- initialize() +-- -- end +-- local tokens, t, done, n = { }, 0, false, 0 +-- for s in utfcharacters(str) do +-- local dec = decomposed[s] +-- if dec then +-- if not done then +-- if n > 0 then +-- for s in utfcharacters(str) do +-- if n == 0 then +-- break +-- else +-- t = t + 1 +-- tokens[t] = s +-- n = n - 1 +-- end +-- end +-- end +-- done = true +-- end +-- t = t + 1 +-- tokens[t] = dec +-- elseif done then +-- t = t + 1 +-- tokens[t] = s +-- else +-- n = n + 1 +-- end +-- end +-- if done then +-- return concat(tokens) -- seldom called +-- end +-- end +-- end +-- return str +-- end + +-- local replacer = nil +-- local finder = nil +-- +-- function utffilters.decompose(str) -- 3 to 4 times faster than the above +-- if not replacer then +-- if initialize then +-- initialize() +-- end +-- local tree = utfchartabletopattern(decomposed) +-- finder = lpeg.finder(tree,false,true) +-- replacer = lpeg.replacer(tree,decomposed,false,true) +-- end +-- if str and str ~= "" and #str > 1 and lpegmatch(finder,str) then +-- return lpegmatch(replacer,str) +-- end +-- return str +-- end diff --git a/tex/context/base/mkiv/char-utf.lua b/tex/context/base/mkiv/char-utf.lua index f4a6d50e1..5702f2087 100644 --- a/tex/context/base/mkiv/char-utf.lua +++ b/tex/context/base/mkiv/char-utf.lua @@ -17,6 +17,9 @@ of output (for instance ).

We implement these manipulations as filters. One can run multiple filters over a string.

+ +

The old code has now been moved to char-obs.lua which we keep around for +educational purposes.

--ldx]]-- local gsub, find = string.gsub, string.find @@ -42,21 +45,6 @@ local charfromnumber = characters.fromnumber characters = characters or { } local characters = characters -local graphemes = allocate() -characters.graphemes = graphemes - -local collapsed = allocate() -characters.collapsed = collapsed - --- local combined = allocate() --- characters.combined = combined - -local decomposed = allocate() -characters.decomposed = decomposed - -local mathpairs = allocate() -characters.mathpairs = mathpairs - local filters = allocate() characters.filters = filters @@ -94,8 +82,20 @@ local decomposed = allocate { characters.decomposed = decomposed -local function initialize() -- maybe in tex mode store in format ! - local data = characters.data +local graphemes = characters.graphemes +local collapsed = characters.collapsed +local mathpairs = characters.mathpairs + +if not graphemes then + + graphemes = allocate() + collapsed = allocate() + mathpairs = allocate() + + characters.graphemes = graphemes + characters.collapsed = collapsed + characters.mathpairs = mathpairs + local function backtrack(v,last,target) local vs = v.specials if vs and #vs == 3 and vs[1] == "char" then @@ -105,6 +105,7 @@ local function initialize() -- maybe in tex mode store in format ! backtrack(data[one],second,target) end end + local function setpair(one,two,unicode,first,second,combination) local mps = mathpairs[one] if not mps then @@ -121,6 +122,7 @@ local function initialize() -- maybe in tex mode store in format ! mps[second] = combination end end + for unicode, v in next, data do local vs = v.specials if vs and #vs == 3 and vs[1] == "char" then @@ -150,18 +152,16 @@ local function initialize() -- maybe in tex mode store in format ! setpair(one,two,unicode,first,second,combination) end end - initialize = false - characters.initialize = function() end -end -characters.initialize = initialize + if storage then + storage.register("characters/graphemes", characters.graphemes, "characters.graphemes") + storage.register("characters/collapsed", characters.collapsed, "characters.collapsed") + storage.register("characters/mathpairs", characters.mathpairs, "characters.mathpairs") + end ---[[ldx-- -

The next variant has lazy token collecting, on a 140 page mk.tex this saves -about .25 seconds, which is understandable because we have no graphemes and -not collecting tokens is not only faster but also saves garbage collecting. -

---ldx]]-- +end + +function characters.initialize() end -- dummy local skippable = { } local filesuffix = file.suffix @@ -179,119 +179,9 @@ function utffilters.setskippable(suffix,value) end end --- function utffilters.collapse(str,filename) -- we can make high a seperate pass (never needed with collapse) --- if skippable[filesuffix(filename)] then --- return str --- -- elseif find(filename,"^virtual://") then --- -- return str --- -- else --- -- -- print("\n"..filename) --- end --- if str and str ~= "" then --- local nstr = #str --- if nstr > 1 then --- if initialize then -- saves a call --- initialize() --- end --- local tokens, t, first, done, n = { }, 0, false, false, 0 --- for second in utfcharacters(str) do --- if done then --- if first then --- if second == " " then --- t = t + 1 --- tokens[t] = first --- first = second --- else --- -- local crs = high[second] --- -- if crs then --- -- t = t + 1 --- -- tokens[t] = first --- -- first = crs --- -- else --- local cgf = graphemes[first] --- if cgf and cgf[second] then --- first = cgf[second] --- else --- t = t + 1 --- tokens[t] = first --- first = second --- end --- -- end --- end --- elseif second == " " then --- first = second --- else --- -- local crs = high[second] --- -- if crs then --- -- first = crs --- -- else --- first = second --- -- end --- end --- elseif second == " " then --- first = nil --- n = n + 1 --- else --- -- local crs = high[second] --- -- if crs then --- -- for s in utfcharacters(str) do --- -- if n == 1 then --- -- break --- -- else --- -- t = t + 1 --- -- tokens[t] = s --- -- n = n - 1 --- -- end --- -- end --- -- if first then --- -- t = t + 1 --- -- tokens[t] = first --- -- end --- -- first = crs --- -- done = true --- -- else --- local cgf = graphemes[first] --- if cgf and cgf[second] then --- for s in utfcharacters(str) do --- if n == 1 then --- break --- else --- t = t + 1 --- tokens[t] = s --- n = n - 1 --- end --- end --- first = cgf[second] --- done = true --- else --- first = second --- n = n + 1 --- end --- -- end --- end --- end --- if done then --- if first then --- t = t + 1 --- tokens[t] = first --- end --- return concat(tokens) -- seldom called --- end --- elseif nstr > 0 then --- return high[str] or str -- this will go from here --- end --- end --- return str --- end - --- this is about twice as fast - local p_collapse = nil -- so we can reset if needed local function prepare() - if initialize then - initialize() - end local tree = utfchartabletopattern(collapsed) p_collapse = Cs((tree/collapsed + p_utf8character)^0 * P(-1)) -- the P(1) is needed in order to accept non utf end @@ -309,72 +199,9 @@ function utffilters.collapse(str,filename) end end --- function utffilters.decompose(str) --- if str and str ~= "" then --- local nstr = #str --- if nstr > 1 then --- -- if initialize then -- saves a call --- -- initialize() --- -- end --- local tokens, t, done, n = { }, 0, false, 0 --- for s in utfcharacters(str) do --- local dec = decomposed[s] --- if dec then --- if not done then --- if n > 0 then --- for s in utfcharacters(str) do --- if n == 0 then --- break --- else --- t = t + 1 --- tokens[t] = s --- n = n - 1 --- end --- end --- end --- done = true --- end --- t = t + 1 --- tokens[t] = dec --- elseif done then --- t = t + 1 --- tokens[t] = s --- else --- n = n + 1 --- end --- end --- if done then --- return concat(tokens) -- seldom called --- end --- end --- end --- return str --- end - --- local replacer = nil --- local finder = nil --- --- function utffilters.decompose(str) -- 3 to 4 times faster than the above --- if not replacer then --- if initialize then --- initialize() --- end --- local tree = utfchartabletopattern(decomposed) --- finder = lpeg.finder(tree,false,true) --- replacer = lpeg.replacer(tree,decomposed,false,true) --- end --- if str and str ~= "" and #str > 1 and lpegmatch(finder,str) then --- return lpegmatch(replacer,str) --- end --- return str --- end - local p_decompose = nil local function prepare() - if initialize then - initialize() - end local tree = utfchartabletopattern(decomposed) p_decompose = Cs((tree/decomposed + p_utf8character)^0 * P(-1)) end @@ -448,7 +275,7 @@ local p_reorder = nil -- return p, new -- end --- -- the next one isnto stable for similar weights +-- -- the next one into stable for similar weights local sorter = function(a,b) return b[2] < a[2] diff --git a/tex/context/base/mkiv/cont-new.mkiv b/tex/context/base/mkiv/cont-new.mkiv index c67cf1c25..4df7d0c92 100644 --- a/tex/context/base/mkiv/cont-new.mkiv +++ b/tex/context/base/mkiv/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2017.02.23 17:07} +\newcontextversion{2017.02.23 18:37} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/mkiv/context.mkiv b/tex/context/base/mkiv/context.mkiv index dc66b5564..94393a0a7 100644 --- a/tex/context/base/mkiv/context.mkiv +++ b/tex/context/base/mkiv/context.mkiv @@ -39,7 +39,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2017.02.23 17:07} +\edef\contextversion{2017.02.23 18:37} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/mkiv/font-lib.mkvi b/tex/context/base/mkiv/font-lib.mkvi index 316e70019..fa8797394 100644 --- a/tex/context/base/mkiv/font-lib.mkvi +++ b/tex/context/base/mkiv/font-lib.mkvi @@ -97,11 +97,12 @@ \registerctxluafile{font-ctx}{1.001} % after def as it overloads \registerctxluafile{font-ext}{1.001} -\registerctxluafile{font-lig}{1.001} % only for experiments so try to avoid it \registerctxluafile{font-fbk}{1.001} \registerctxluafile{font-aux}{1.001} +\registerctxluafile{font-lig}{1.001} % only for experiments so try to avoid it + %D Some low level helpers %D %D \starttyping diff --git a/tex/context/base/mkiv/font-lig.lua b/tex/context/base/mkiv/font-lig.lua index 823be9c3b..bb9ee0096 100644 --- a/tex/context/base/mkiv/font-lig.lua +++ b/tex/context/base/mkiv/font-lig.lua @@ -12,14 +12,10 @@ if not modules then modules = { } end modules ['font-lig'] = { local standalone = not characters if standalone then - require("char-def") require("char-utf") - if characters.initialize then - characters.initialize() - end end -local data = { } -- if we ever preload this i'll cache it +local data = { } for first, seconds in next, characters.graphemes do for second, combined in next, seconds do diff --git a/tex/context/base/mkiv/font-otc.lua b/tex/context/base/mkiv/font-otc.lua index a0dda593d..a99d3db9f 100644 --- a/tex/context/base/mkiv/font-otc.lua +++ b/tex/context/base/mkiv/font-otc.lua @@ -281,7 +281,6 @@ local function addfeature(data,feature,specifications) end end end - inspect(coverage) return coverage end diff --git a/tex/context/base/mkiv/status-files.pdf b/tex/context/base/mkiv/status-files.pdf index 6d7f14ae0..34c6150f3 100644 Binary files a/tex/context/base/mkiv/status-files.pdf and b/tex/context/base/mkiv/status-files.pdf differ diff --git a/tex/context/base/mkiv/status-lua.pdf b/tex/context/base/mkiv/status-lua.pdf index 189773237..f5e5e1bf3 100644 Binary files a/tex/context/base/mkiv/status-lua.pdf and b/tex/context/base/mkiv/status-lua.pdf differ diff --git a/tex/context/interface/mkiv/i-context.pdf b/tex/context/interface/mkiv/i-context.pdf index a62e63912..d847627dd 100644 Binary files a/tex/context/interface/mkiv/i-context.pdf and b/tex/context/interface/mkiv/i-context.pdf differ diff --git a/tex/context/interface/mkiv/i-readme.pdf b/tex/context/interface/mkiv/i-readme.pdf index e1648bc33..39d9d5def 100644 Binary files a/tex/context/interface/mkiv/i-readme.pdf and b/tex/context/interface/mkiv/i-readme.pdf differ diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 07c490253..414aa3e0b 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : c:/data/develop/context/sources/luatex-fonts-merged.lua -- parent file : c:/data/develop/context/sources/luatex-fonts.lua --- merge date : 02/23/17 17:07:53 +-- merge date : 02/23/17 18:37:07 do -- begin closure to overcome local limits and interference @@ -24573,7 +24573,6 @@ local function addfeature(data,feature,specifications) end end end - inspect(coverage) return coverage end local function prepare_ligature(list,featuretype,nocheck) -- cgit v1.2.3