diff options
author | Hans Hagen <pragma@wxs.nl> | 2017-04-02 20:46:19 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg42.2a@gmail.com> | 2017-04-02 20:46:19 +0200 |
commit | e32f57c9c5968f0c09130f6e24e28a96d6e1393d (patch) | |
tree | 476d22407b719a74b18a849d83fb8464f9a042c4 /tex | |
parent | 30ea6ac75b1cf62ea8e17228c07d54824285acfa (diff) | |
download | context-e32f57c9c5968f0c09130f6e24e28a96d6e1393d.tar.gz |
2017-04-02 19:57:00
Diffstat (limited to 'tex')
27 files changed, 633 insertions, 461 deletions
diff --git a/tex/context/base/mkii/cont-new.mkii b/tex/context/base/mkii/cont-new.mkii index 515a20ad4..eed67e1c2 100644 --- a/tex/context/base/mkii/cont-new.mkii +++ b/tex/context/base/mkii/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2017.03.26 16:15} +\newcontextversion{2017.04.02 19:51} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/mkii/context.mkii b/tex/context/base/mkii/context.mkii index 0b8726e31..c20ff4f1a 100644 --- a/tex/context/base/mkii/context.mkii +++ b/tex/context/base/mkii/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2017.03.26 16:15} +\edef\contextversion{2017.04.02 19:51} %D For those who want to use this: diff --git a/tex/context/base/mkiv/cont-new.mkiv b/tex/context/base/mkiv/cont-new.mkiv index 8a139dd1c..47ef5499d 100644 --- a/tex/context/base/mkiv/cont-new.mkiv +++ b/tex/context/base/mkiv/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2017.03.26 16:15} +\newcontextversion{2017.04.02 19:51} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/mkiv/context.mkiv b/tex/context/base/mkiv/context.mkiv index 5220613a6..ee6f6ddb2 100644 --- a/tex/context/base/mkiv/context.mkiv +++ b/tex/context/base/mkiv/context.mkiv @@ -39,7 +39,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2017.03.26 16:15} +\edef\contextversion{2017.04.02 19:51} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/mkiv/enco-ini.mkiv b/tex/context/base/mkiv/enco-ini.mkiv index 835ee61f5..50375251a 100644 --- a/tex/context/base/mkiv/enco-ini.mkiv +++ b/tex/context/base/mkiv/enco-ini.mkiv @@ -282,7 +282,7 @@ % some more \ifdefined\softhyphen \else - \let\softhyphen\- + \let\softhyphen\explicitdiscretionary \fi \def\hyphen {\softhyphen} diff --git a/tex/context/base/mkiv/font-ocl.lua b/tex/context/base/mkiv/font-ocl.lua index 3583e15d0..68d9ac650 100644 --- a/tex/context/base/mkiv/font-ocl.lua +++ b/tex/context/base/mkiv/font-ocl.lua @@ -140,8 +140,10 @@ local function initializecolr(tfmdata,kind,value) -- hm, always value -- are somewhat inefficient as each glyph gets the font set. It's a -- side effect of the fact that a font is handled when a character gets -- flushed. - { "special", "pdf:page:q" }, - { "special", "pdf:raw:" .. b } + -- { "special", "pdf:page:q" }, + -- { "special", "pdf:raw:" .. b } + -- This seems to be okay too: + { "special", "pdf:direct:q " .. b }, } local n = #t for i=1,s do @@ -152,8 +154,10 @@ local function initializecolr(tfmdata,kind,value) -- hm, always value n = n + 1 t[n] = { "right", -w } end end - n = n + 1 t[n] = { "special", "pdf:page:" .. e } - n = n + 1 t[n] = { "special", "pdf:raw:Q" } + -- n = n + 1 t[n] = { "special", "pdf:page:" .. e } + -- n = n + 1 t[n] = { "special", "pdf:raw:Q" } + -- This seems to be okay too: + n = n + 1 t[n] = { "special", "pdf:direct:" .. e .. " Q"} character.commands = t end end diff --git a/tex/context/base/mkiv/font-run.mkiv b/tex/context/base/mkiv/font-run.mkiv index e9a6f9ddb..ebb3a576c 100644 --- a/tex/context/base/mkiv/font-run.mkiv +++ b/tex/context/base/mkiv/font-run.mkiv @@ -14,6 +14,8 @@ %D [This code is hooked into the core macros and saves some format %D space. It needs a cleanup as it's real old derioved \MKII\ code] +%D +%D Better use \type{\bTABLE...\eTABLE}. \unprotect diff --git a/tex/context/base/mkiv/font-syn.lua b/tex/context/base/mkiv/font-syn.lua index 558d07fe7..c4dcf0bcd 100644 --- a/tex/context/base/mkiv/font-syn.lua +++ b/tex/context/base/mkiv/font-syn.lua @@ -804,6 +804,7 @@ local function collecthashes() local noffallbacks = 0 if specifications then -- maybe multiple passes (for the compatible and cffnames so that they have less preference) + local conflicts = setmetatableindex("table") for index=1,#specifications do local specification = specifications[index] local format = specification.format @@ -832,7 +833,6 @@ local function collecthashes() local instance = fullname .. instancenames[i] mapping[instance] = index nofmappings = nofmappings + 1 - end end -- if compatiblename and not mapping[compatiblename] then @@ -865,10 +865,22 @@ local function collecthashes() noffallbacks = noffallbacks + 1 end end + -- dangerous ... first match takes slot if not mapping[familyname] and not fallback[familyname] then fallback[familyname] = index noffallbacks = noffallbacks + 1 end + local conflict = conflicts[format] + conflict[familyname] = (conflict[familyname] or 0) + 1 + end + end + for format, conflict in next, conflicts do + local fallback = fallbacks[format] + for familyname, n in next, conflict do + if n > 1 then + fallback[familyname] = nil + noffallbacks = noffallbacks - n + end end end end diff --git a/tex/context/base/mkiv/lang-def.mkiv b/tex/context/base/mkiv/lang-def.mkiv index ef53c13e3..96bb88767 100644 --- a/tex/context/base/mkiv/lang-def.mkiv +++ b/tex/context/base/mkiv/lang-def.mkiv @@ -134,7 +134,6 @@ \c!rightquotation=\rightguillemot, \c!date={\v!day,{.},\space,\v!month,\space,\v!year}] - \installlanguage [\s!no] [\s!nb] \installlanguage [\s!norwegian] [\s!nb] \installlanguage [\s!bokmal] [\s!nb] diff --git a/tex/context/base/mkiv/lang-dis.lua b/tex/context/base/mkiv/lang-dis.lua index 448966d49..e2c0d220e 100644 --- a/tex/context/base/mkiv/lang-dis.lua +++ b/tex/context/base/mkiv/lang-dis.lua @@ -62,146 +62,158 @@ local getlanguagedata = languages.getdata local check_regular = true -local expanders = { - [discretionary_code] = function(d,template) - -- \discretionary - return template - end, - [explicit_code] = function(d,template) - -- \- - local pre, post, replace = getdisc(d) - local done = false - if pre then - local char = isglyph(pre) - if char and char <= 0 then - done = true - flush_list(pre) - pre = nil +local expanders -- this will go away + +-- the penalty has been determined by the mode (currently we force 1): +-- +-- 0 : exhyphenpenalty +-- 1 : hyphenpenalty +-- 2 : automatichyphenpenalty +-- +-- following a - : the pre and post chars are already appended and set +-- so we have pre=preex and post=postex .. however, the previous +-- hyphen is already injected ... downside: the font handler sees this +-- so this is another argument for doing a hyphenation pass in context + +if LUATEXVERSION < 1.005 then + + expanders = { + [discretionary_code] = function(d,template) + -- \discretionary + return template + end, + [explicit_code] = function(d,template) + -- \- + local pre, post, replace = getdisc(d) + local done = false + if pre then + local char = isglyph(pre) + if char and char <= 0 then + done = true + flush_list(pre) + pre = nil + end end - end - if post then - local char = isglyph(post) - if char and char <= 0 then - done = true - flush_list(post) - post = nil + if post then + local char = isglyph(post) + if char and char <= 0 then + done = true + flush_list(post) + post = nil + end end - end - if done then - -- todo: take existing penalty - setdisc(d,pre,post,replace,explicit_code,tex.exhyphenpenalty) - else - setsubtype(d,explicit_code) - end - return template - end, - [automatic_code] = function(d,template) - -- the penalty has been determined by the mode (currently we force 1): - -- - -- 0 : exhyphenpenalty - -- 1 : hyphenpenalty - -- 2 : automatichyphenpenalty - -- - -- following a - : the pre and post chars are already appended and set - -- so we have pre=preex and post=postex .. however, the previous - -- hyphen is already injected ... downside: the font handler sees this - -- so this is another argument for doing a hyphenation pass in context - local pre, post, replace = getdisc(d) - if pre then - -- we have a preex characters and want that one to replace the - -- character in front which is the trigger - if not template then - -- can there be font kerns already? - template = getprev(d) - if template and getid(template) ~= glyph_code then - template = getnext(d) + if done then + -- todo: take existing penalty + setdisc(d,pre,post,replace,explicit_code,tex.exhyphenpenalty) + else + setsubtype(d,explicit_code) + end + return template + end, + [automatic_code] = function(d,template) + local pre, post, replace = getdisc(d) + if pre then + -- we have a preex characters and want that one to replace the + -- character in front which is the trigger + if not template then + -- can there be font kerns already? + template = getprev(d) if template and getid(template) ~= glyph_code then - template = nil + template = getnext(d) + if template and getid(template) ~= glyph_code then + template = nil + end end end - end - if template then - local pseudohead = getprev(template) - if pseudohead then - while template ~= d do - pseudohead, template, removed = remove_node(pseudohead,template) - -- free old replace ? - replace = removed - -- break ? + if template then + local pseudohead = getprev(template) + if pseudohead then + while template ~= d do + pseudohead, template, removed = remove_node(pseudohead,template) + -- free old replace ? + replace = removed + -- break ? + end + else + -- can't happen end + setdisc(d,pre,post,replace,automatic_code,tex.hyphenpenalty) else - -- can't happen + -- print("lone regular discretionary ignored") end - setdisc(d,pre,post,replace,automatic_code,tex.hyphenpenalty) else - -- print("lone regular discretionary ignored") + setdisc(d,pre,post,replace,automatic_code,tex.hyphenpenalty) end - else - setdisc(d,pre,post,replace,automatic_code,tex.hyphenpenalty) - end - return template - end, - [regular_code] = function(d,template) - if check_regular then - -- simple - if not template then - -- can there be font kerns already? - template = getprev(d) - if template and getid(template) ~= glyph_code then - template = getnext(d) + return template + end, + [regular_code] = function(d,template) + if check_regular then + -- simple + if not template then + -- can there be font kerns already? + template = getprev(d) if template and getid(template) ~= glyph_code then - template = nil + template = getnext(d) + if template and getid(template) ~= glyph_code then + template = nil + end end end - end - if template then - local language = template and getlang(template) - local data = getlanguagedata(language) - local prechar = data.prehyphenchar - local postchar = data.posthyphenchar - local pre, post, replace = getdisc(d) -- pre can be set - local done = false - if prechar and prechar > 0 then - done = true - pre = copy_node(template) - setchar(pre,prechar) - end - if postchar and postchar > 0 then - done = true - post = copy_node(template) - setchar(post,postchar) - end - if done then - setdisc(d,pre,post,replace,regular_code,tex.hyphenpenalty) + if template then + local language = template and getlang(template) + local data = getlanguagedata(language) + local prechar = data.prehyphenchar + local postchar = data.posthyphenchar + local pre, post, replace = getdisc(d) -- pre can be set + local done = false + if prechar and prechar > 0 then + done = true + pre = copy_node(template) + setchar(pre,prechar) + end + if postchar and postchar > 0 then + done = true + post = copy_node(template) + setchar(post,postchar) + end + if done then + setdisc(d,pre,post,replace,regular_code,tex.hyphenpenalty) + end + else + -- print("lone regular discretionary ignored") end - else - -- print("lone regular discretionary ignored") + return template end - return template - else - -- maybe also set penalty here - setsubtype(d,regular_code) + end, + [disccodes.first] = function() + -- forget about them + end, + [disccodes.second] = function() + -- forget about them + end, + } + + function languages.expand(d,template,subtype) + if not subtype then + subtype = getsubtype(d) end - end, - [disccodes.first] = function() - -- forget about them - end, - [disccodes.second] = function() - -- forget about them - end, -} + if subtype ~= discretionary_code then + return expanders[subtype](d,template) + end + end -languages.expanders = expanders +else -function languages.expand(d,template,subtype) - if not subtype then - subtype = getsubtype(d) - end - if subtype ~= discretionary_code then - return expanders[subtype](d,template) + function languages.expand() + -- nothing to be fixed end + end +languages.expanders = expanders + +-- -- -- -- -- + local setlistcolor = nodes.tracers.colors.setlist function languages.visualizediscretionaries(head) diff --git a/tex/context/base/mkiv/lang-hyp.lua b/tex/context/base/mkiv/lang-hyp.lua index 50132bfe1..b85295f19 100644 --- a/tex/context/base/mkiv/lang-hyp.lua +++ b/tex/context/base/mkiv/lang-hyp.lua @@ -6,14 +6,6 @@ if not modules then modules = { } end modules ['lang-hyp'] = { license = "see context related readme files" } --- todo: hyphenate over range if needed --- todo: check boundary nodes - --- setattr: helper for full attr - --- to be considered: reset dictionary.hyphenated when a pattern is added --- or maybe an explicit reset of the cache - -- In an automated workflow hypenation of long titles can be somewhat problematic -- especially when demands conflict. For that reason I played a bit with a Lua based -- variant of the traditional hyphenation machinery. This mechanism has been extended @@ -24,7 +16,11 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- Being the result of two days experimenting the following implementation is probably -- not completely okay yet. If there is demand I might add some more features and plugs. -- The performance is quite okay but can probably improved a bit, although this is not --- the most critital code. +-- the most critital code. For instance, on a metafun manual run the overhead is about +-- 0.3 seconds on 19 seconds which is not that bad. +-- +-- In the procecess of wrapping up (for the ctx conference proceedings) I cleaned up +-- and extended the code a bit. It can be used in production. -- -- . a l g o r i t h m . -- 4l1g4 @@ -45,12 +41,12 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- -- ab1cd/ef=gh,2,2 : acd - efd (pattern/replacement,start,length -- --- In the procecess of wrapping up (for the ctx conference proceedings) I cleaned up --- and extended the code a bit. - --- todo: hjcodes (<32 == length) if i really want it - --- start: +-- todo : support hjcodes (<32 == length) like luatex does now (no need/demand so far) +-- maybe : support hyphenation over range (can alsready be done using attributes/language) +-- maybe : reset dictionary.hyphenated when a pattern is added and/or forced reset option +-- todo : check subtypes (because they have subtle meanings in the line breaking) +-- +-- word start (in tex engine): -- -- boundary : yes when wordboundary -- hlist : when hyphenationbounds 1 or 3 @@ -63,7 +59,7 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- glyph : exhyphenchar (one only) : yes (so no -- ---) -- otherwise : yes -- --- end: +-- word end (in tex engine): -- -- boundary : yes -- glyph : yes when different language @@ -78,8 +74,6 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- ins : when hyphenationbounds 2 or 3 -- adjust : when hyphenationbounds 2 or 3 --- todo: maybe subtypes (because they have subtle meanings in the line breaking) - local type, rawset, tonumber, next = type, rawset, tonumber, next local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs @@ -318,15 +312,14 @@ function traditional.lasttrace() return steps end --- We could reuse the w table but as we cache the resolved words --- there is not much gain in that complication. +-- We could reuse the w table but as we cache the resolved words there is not much gain in +-- that complication. -- --- Beware: word can be a table and when n is passed to we can --- assume reuse so we need to honor that n then. - --- todo: a fast variant for tex ... less lookups (we could check is --- dictionary has changed) ... although due to caching the already --- done words, we don't do much here +-- Beware: word can be a table and when n is passed to we can assume reuse so we need to +-- honor that n then. +-- +-- todo: a fast variant for tex ... less lookups (we could check is dictionary has changed) +-- ... although due to caching the already done words, we don't do much here local function hyphenate(dictionary,word,n) -- odd is okay nofwords = nofwords + 1 @@ -399,7 +392,6 @@ local function hyphenate(dictionary,word,n) -- odd is okay local specials = dictionary.specials local patterns = dictionary.patterns -- --- inspect(specials) local spec for i=1,l do for j=i,l do @@ -410,15 +402,14 @@ local function hyphenate(dictionary,word,n) -- odd is okay if not done then done = { } spec = nil - -- the string that we resolve has explicit fences (.) so - -- done starts at the first fence and runs upto the last - -- one so we need one slot less + -- the string that we resolve has explicit fences (.) so done starts at + -- the first fence and runs upto the last one so we need one slot less for i=1,l do done[i] = 0 end end - -- we run over the pattern that always has a (zero) value for - -- each character plus one more as we look at both sides + -- we run over the pattern that always has a (zero) value for each character + -- plus one more as we look at both sides for k=1,#m do local new = m[k] if not new then @@ -524,8 +515,8 @@ function traditional.injecthyphens(dictionary,word,specification) return word end - -- the following code is similar to code later on but here we have - -- strings while there we have hyphen specs + -- the following code is similar to code later on but here we have strings while there + -- we have hyphen specs local word = lpegmatch(p_split,word) local size = #word @@ -636,7 +627,7 @@ if context then local discretionary_code = disccodes.discretionary local explicit_code = disccodes.explicit local automatic_code = disccodes.automatic - ----- regular_code = disccodes.regular + local regular_code = disccodes.regular local nuts = nodes.nuts local tonut = nodes.tonut @@ -658,13 +649,18 @@ if context then local getattrlist = nuts.getattrlist local setattrlist = nuts.setattrlist local isglyph = nuts.isglyph + local ischar = nuts.ischar local setchar = nuts.setchar local setdisc = nuts.setdisc + local setlink = nuts.setlink + local setprev = nuts.setprev + local setnext = nuts.setnext local insert_before = nuts.insert_before local insert_after = nuts.insert_after local copy_node = nuts.copy + local copy_list = nuts.copy_list local remove_node = nuts.remove local end_of_math = nuts.end_of_math local node_tail = nuts.tail @@ -690,8 +686,9 @@ if context then local a_hyphenation = attributes.private("hyphenation") - local expand_explicit = languages.expanders[explicit_code] - local expand_automatic = languages.expanders[automatic_code] + local expanders = languages.expanders -- gone in 1.005 + local expand_explicit = expanders and expanders[explicit_code] + local expand_automatic = expanders and expanders[automatic_code] local interwordpenalty = 5000 @@ -699,11 +696,12 @@ if context then return dictionaries[language] end - setmetatableindex(dictionaries,function(t,k) -- for the moment we use an independent data structure + -- for the moment we use an independent data structure + + setmetatableindex(dictionaries,function(t,k) if type(k) == "string" then - -- this will force a load if not yet loaded (we need a nicer way) - -- for the moment that will do (nneeded for examples that register - -- a pattern specification + -- this will force a load if not yet loaded (we need a nicer way) for the moment + -- that will do (nneeded for examples that register a pattern specification languages.getnumber(k) end local specification = languages.getdata(k) @@ -778,11 +776,10 @@ if context then -- with less characters than either of them! This could be an option but such a narrow -- hsize doesn't make sense anyway. - -- We assume that featuresets are defined global ... local definitions - -- (also mid paragraph) make not much sense anyway. For the moment we - -- assume no predefined sets so we don't need to store them. Nor do we - -- need to hash them in order to save space ... no sane user will define - -- many of them. + -- We assume that featuresets are defined global ... local definitions (also mid paragraph) + -- make not much sense anyway. For the moment we assume no predefined sets so we don't need + -- to store them. Nor do we need to hash them in order to save space ... no sane user will + -- define many of them. local featuresets = hyphenators.featuresets or { } hyphenators.featuresets = featuresets @@ -804,7 +801,8 @@ if context then return noffeaturesets end - local function makeset(...) -- a bit overkill, supporting variants but who cares + local function makeset(...) + -- a bit overkill, supporting variants but who cares local set = { } for i=1,select("#",...) do local list = select(i,...) @@ -844,9 +842,34 @@ if context then return set end + -- category pd (tex also sees --- and -- as hyphens but do we really want that + local defaulthyphens = { - [0x2D] = true, -- hyphen - [0xAD] = true, -- soft hyphen + [0x002D] = true, -- HYPHEN-MINUS + [0x00AD] = 0x002D, -- SOFT HYPHEN (active in ConTeXt) + -- [0x058A] = true, -- ARMENIAN HYPHEN + -- [0x1400] = true, -- CANADIAN SYLLABICS HYPHEN + -- [0x1806] = true, -- MONGOLIAN TODO SOFT HYPHEN + [0x2010] = true, -- HYPHEN + -- [0x2011] = true, -- NON-BREAKING HYPHEN + -- [0x2012] = true, -- FIGURE DASH + [0x2013] = true, -- EN DASH + [0x2014] = true, -- EM DASH + -- [0x2015] = true, -- HORIZONTAL BAR + -- [0x2027] = true, -- HYPHENATION POINT + -- [0x2E17] = true, -- DOUBLE OBLIQUE HYPHEN + -- [0x2E1A] = true, -- HYPHEN WITH DIAERESIS + -- [0x2E3A] = true, -- TWO-EM DASH + -- [0x2E3B] = true, -- THREE-EM DASH + -- [0x2E40] = true, -- DOUBLE HYPHEN + -- [0x301C] = true, -- WAVE DASH + -- [0x3030] = true, -- WAVY DASH + -- [0x30A0] = true, -- KATAKANA-HIRAGANA DOUBLE HYPHEN + -- [0xFE31] = true, -- PRESENTATION FORM FOR VERTICAL EM DASH + -- [0xFE32] = true, -- PRESENTATION FORM FOR VERTICAL EN DASH + -- [0xFE58] = true, -- SMALL EM DASH + -- [0xFE63] = true, -- SMALL HYPHEN-MINUS + -- [0xFF0D] = true, -- FULLWIDTH HYPHEN-MINUS } local defaultjoiners = { @@ -868,13 +891,15 @@ if context then local charmin = tonumber(featureset.charmin) -- luatex now also has hyphenationmin local leftcharmin = tonumber(featureset.leftcharmin) local rightcharmin = tonumber(featureset.rightcharmin) - local rightedge = featureset.rightedge local leftchar = somehyphenchar(featureset.leftchar) local rightchar = somehyphenchar(featureset.rightchar) local rightchars = featureset.rightchars +local rightedge = featureset.rightedge +local autohyphen = v_yes -- featureset.autohyphen -- insert disc +local hyphenonly = v_yes -- featureset.hyphenonly -- don't hyphenate around rightchars = rightchars == v_word and true or tonumber(rightchars) - joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars - hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars + joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars -- table + hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars -- table -- not yet ok: extrachars have to be ignored so it cannot be all) featureset.extrachars = makeset(joinerchars or "",extrachars or "") featureset.hyphenchars = makeset(hyphenchars or "") @@ -886,8 +911,9 @@ if context then featureset.rightchars = rightchars featureset.leftchar = leftchar featureset.rightchar = rightchar - featureset.strict = rightedge == 'tex' - -- + -- featureset.strict = rightedge == "tex" +featureset.autohyphen = autohyphen == v_yes +featureset.hyphenonly = hyphenonly == v_yes return register(name,featureset) end @@ -959,10 +985,9 @@ if context then arguments = { "string", "string" } } - -- This is a relative large function with local variables and local - -- functions. A previous implementation had the functions outside but - -- this is cleaner and as efficient. The test runs 100 times over - -- tufte.tex, knuth.tex, zapf.tex, ward.tex and darwin.tex in lower + -- This is a relative large function with local variables and local functions. A previous + -- implementation had the functions outside but this is cleaner and as efficient. The test + -- runs 100 times over tufte.tex, knuth.tex, zapf.tex, ward.tex and darwin.tex in lower -- and uppercase with a 1mm hsize. -- -- language=0 language>0 4 | 3 * slower @@ -970,79 +995,89 @@ if context then -- tex 2.34 | 1.30 2.55 | 1.45 0.21 | 0.15 -- lua 2.42 | 1.38 3.30 | 1.84 0.88 | 0.46 -- - -- Of course we have extra overhead (virtual Lua machine) but also we - -- check attributes and support specific local options). The test puts - -- the typeset text in boxes and discards it. If we also flush the - -- runtime is 4.31|2.56 and 4.99|2.94 seconds so the relative difference - -- is (somehow) smaller. The test has 536 pages. There is a little bit - -- of extra overhead because we store the patterns in a different way. + -- Of course we have extra overhead (virtual Lua machine) but also we check attributes and + -- support specific local options). The test puts the typeset text in boxes and discards + -- it. If we also flush the runtime is 4.31|2.56 and 4.99|2.94 seconds so the relative + -- difference is (somehow) smaller. The test has 536 pages. There is a little bit of extra + -- overhead because we store the patterns in a different way. -- - -- As usual I will look for speedups. Some 0.01 seconds could be gained - -- by sharing patterns which is not impressive but it does save some - -- 3M memory on this test. (Some optimizations already brought the 3.30 - -- seconds down to 3.14 but it all depends on aggressive caching.) + -- As usual I will look for speedups. Some 0.01 seconds could be gained by sharing patterns + -- which is not impressive but it does save some 3M memory on this test. (Some optimizations + -- already brought the 3.30 seconds down to 3.14 but it all depends on aggressive caching.) - -- As we kick in the hyphenator before fonts get handled, we don't look - -- at implicit (font) kerns or ligatures. + -- As we kick in the hyphenator before fonts get handled, we don't look at implicit (font) + -- kerns or ligatures. local starttiming = statistics.starttiming local stoptiming = statistics.stoptiming - local strictids = { - [nodecodes.hlist] = true, - [nodecodes.vlist] = true, - [nodecodes.rule] = true, - [nodecodes.disc] = true, - [nodecodes.accent] = true, - [nodecodes.math] = true, - } + -- local strictids = { + -- [nodecodes.hlist] = true, + -- [nodecodes.vlist] = true, + -- [nodecodes.rule] = true, + -- [nodecodes.dir] = true, + -- [nodecodes.whatsit] = true, + -- [nodecodes.ins] = true, + -- [nodecodes.adjust] = true, + -- + -- [nodecodes.math] = true, + -- [nodecodes.disc] = true, + -- + -- [nodecodes.accent] = true, -- never used in context + -- } - -- local gf = getfield local gt = setmetatableindex("number") getfield = function(n,f) gt[f] = gt[f] + 1 return gf(n,f) end languages.GETFIELD = gt + -- a lot of overhead when only one char function traditional.hyphenate(head) - local first = tonut(head) - local tail = nil - local last = nil - local current = first - local dictionary = nil - local instance = nil - local characters = nil - local unicodes = nil - local exhyphenchar = tex.exhyphenchar - local extrachars = nil - local hyphenchars = nil - local language = nil - local start = nil - local stop = nil - local word = { } -- we reuse this table - local size = 0 - local leftchar = false - local rightchar = false -- utfbyte("-") - local leftexchar = false - local rightexchar = false -- utfbyte("-") - local leftmin = 0 - local rightmin = 0 - local charmin = 1 - local leftcharmin = nil - local rightcharmin = nil - ----- leftwordmin = nil - local rightwordmin = nil - local rightchars = nil - local leftchar = nil - local rightchar = nil - local attr = nil - local lastwordlast = nil - local hyphenated = hyphenate - local strict = nil - local hyphenpenalty = tex.hyphenpenalty + local first = tonut(head) + + + local tail = nil + local last = nil + local current = first + local dictionary = nil + local instance = nil + local characters = nil + local unicodes = nil + local exhyphenchar = tex.exhyphenchar + local extrachars = nil + local hyphenchars = nil + local language = nil + local start = nil + local stop = nil + local word = { } -- we reuse this table + local size = 0 + local leftchar = false + local rightchar = false -- utfbyte("-") + local leftexchar = false + local rightexchar = false -- utfbyte("-") + local leftmin = 0 + local rightmin = 0 + local charmin = 1 + local leftcharmin = nil + local rightcharmin = nil + ----- leftwordmin = nil + local rightwordmin = nil + local rightchars = nil + local leftchar = nil + local rightchar = nil + local attr = nil + local lastwordlast = nil + local hyphenated = hyphenate + ----- strict = nil + local exhyphenpenalty = tex.exhyphenpenalty + local hyphenpenalty = tex.hyphenpenalty + local autohyphen = false + local hyphenonly = false -- We cannot use an 'enabled' boolean (false when no characters or extras) because we -- can have plugins that set a characters metatable and so) ... it doesn't save much -- anyway. Using (unicodes and unicodes[code]) and a nil table when no characters also -- doesn't save much. So there not that much to gain for languages that don't hyphenate. -- - -- enabled = (unicodes and (next(unicodes) or getmetatable(unicodes))) or (extrachars and next(extrachars)) + -- enabled = (unicodes and (next(unicodes) or getmetatable(unicodes))) + -- or (extrachars and next(extrachars)) -- -- This can be used to not add characters i.e. keep size 0 but then we need to check for -- attributes that change it, which costs time too. Not much to gain there. @@ -1071,8 +1106,10 @@ if context then rightcharmin = f.rightcharmin leftchar = f.leftchar rightchar = f.rightchar - strict = f.strict and strictids + -- strict = f.strict and strictids rightchars = f.rightchars + autohyphen = f.autohyphen + hyphenonly = f.hyphenonly if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then -- so we can change mid paragraph but it's kind of unpredictable then if not tail then @@ -1117,7 +1154,9 @@ if context then rightcharmin = false leftchar = false rightchar = false - strict = false + -- strict = false + autohyphen = false + hyphenonly = false end return a @@ -1130,12 +1169,11 @@ if context then local rsize = 0 local position = 1 - -- todo: remember last dics and don't go back to before that (plus - -- message) .. for simplicity we also assume that we don't start - -- with a dics node + -- todo: remember last dics and don't go back to before that (plus message) ... + -- for simplicity we also assume that we don't start with a dics node -- - -- there can be a conflict: if we backtrack then we can end up in - -- another disc and get out of sync (dup chars and so) + -- there can be a conflict: if we backtrack then we can end up in another disc + -- and get out of sync (dup chars and so) while position <= size do if position >= leftmin and position <= rightmin then @@ -1237,8 +1275,7 @@ if context then return head end - local current = start - + local current = start local attrnode = start -- will be different, just the first char for i=1,rsize do @@ -1253,7 +1290,7 @@ if context then if leftchar then post = serialize(true,leftchar) end - setdisc(disc,pre,post,nil,discretionary_code,hyphenpenalty) + setdisc(disc,pre,post,nil,regular_code,hyphenpenalty) if attrnode then setattrlist(disc,attrnode) end @@ -1287,7 +1324,8 @@ if context then replace = nil end end - setdisc(disc,pre,post,replace,discretionary_code,hyphenpenalty) + -- maybe regular code + setdisc(disc,pre,post,replace,regular_code,hyphenpenalty) if attrnode then setattrlist(disc,attrnode) end @@ -1327,7 +1365,7 @@ if context then end pre = copy_node(glyph) setchar(pre,rightchar and rightchar > 0 and rightchar or code) - setdisc(disc,pre,post,replace,discretionary_code,hyphenpenalty) + setdisc(disc,pre,post,replace,automatic_code,hyphenpenalty) -- ex ? if attrnode then setattrlist(disc,attrnode) end @@ -1335,76 +1373,49 @@ if context then return current end + local function injectseries(current,last,next,attrnode) + local disc = new_disc() + local start = current + first, current = insert_before(first,current,disc) + setprev(start) + setnext(last) + if next then + setlink(current,next) + else + setnext(current) + end + local pre = copy_list(start) + local post = nil + local replace = start + setdisc(disc,pre,post,replace,automatic_code,hyphenpenalty) -- ex ? + if attrnode then + setattrlist(disc,attrnode) + end + return current + end + local a = getattr(first,a_hyphenation) if a ~= attr then attr = synchronizefeatureset(a) end - -- The first attribute in a word determines the way a word gets hyphenated - -- and if relevant, other properties are also set then. We could optimize for - -- silly one-char cases but it has no priority as the code is still not that - -- much slower than the native hyphenator and this variant also provides room - -- for extensions. + -- The first attribute in a word determines the way a word gets hyphenated and if + -- relevant, other properties are also set then. We could optimize for silly one-char + -- cases but it has no priority as the code is still not that much slower than the + -- native hyphenator and this variant also provides room for extensions. + + local skipping = false while current and current ~= last do -- and current local code, id = isglyph(current) if code then - local lang = getlang(current) - if lang ~= language then - if dictionary and size > charmin and leftmin + rightmin <= size then - -- only german has many words starting with an uppercase character - if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then - -- skip - else - local hyphens = hyphenated(dictionary,word,size) - if hyphens then - flush(hyphens) - end - end - end - language = lang - if language > 0 then - -- - dictionary = dictionaries[language] - instance = dictionary.instance - characters = dictionary.characters - unicodes = dictionary.unicodes - -- - local a = getattr(current,a_hyphenation) - attr = synchronizefeatureset(a) - leftchar = leftchar or (instance and posthyphenchar (instance)) -- we can make this more - rightchar = rightchar or (instance and prehyphenchar (instance)) -- efficient if needed - leftexchar = (instance and preexhyphenchar (instance)) - rightexchar = (instance and postexhyphenchar(instance)) - leftmin = leftcharmin or getfield(current,"left") - rightmin = rightcharmin or getfield(current,"right") - if not leftchar or leftchar < 0 then - leftchar = false - end - if not rightchar or rightchar < 0 then - rightchar = false - end - -- - local char = unicodes[code] or (extrachars and extrachars[code]) - if char then - word[1] = char - size = 1 - start = current - else - size = 0 - end - else - size = 0 - end - elseif language <= 0 then - -- - elseif size > 0 then - local char = unicodes[code] or (extrachars and extrachars[code]) - if char then - size = size + 1 - word[size] = char - elseif dictionary then - if size > charmin and leftmin + rightmin <= size then + if skipping then + current = getnext(current) + else + local lang = getlang(current) + if lang ~= language then + if dictionary and size > charmin and leftmin + rightmin <= size then + -- only german has many words starting with an uppercase character if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then -- skip else @@ -1414,67 +1425,151 @@ if context then end end end - size = 0 - -- maybe also a strict mode here: no hyphenation before hyphenchars and skip - -- the next set (but then, strict is an option) - if code == exhyphenchar then - current = inject(leftexchar,rightexchar,code,current) - elseif hyphenchars and hyphenchars[code] then - current = inject(leftchar,rightchar,code,current) + language = lang + if language > 0 then + -- + dictionary = dictionaries[language] + instance = dictionary.instance + characters = dictionary.characters + unicodes = dictionary.unicodes + -- + local a = getattr(current,a_hyphenation) + attr = synchronizefeatureset(a) + leftchar = leftchar or (instance and posthyphenchar (instance)) -- we can make this more + rightchar = rightchar or (instance and prehyphenchar (instance)) -- efficient if needed + leftexchar = (instance and preexhyphenchar (instance)) + rightexchar = (instance and postexhyphenchar(instance)) + leftmin = leftcharmin or getfield(current,"left") + rightmin = rightcharmin or getfield(current,"right") + if not leftchar or leftchar < 0 then + leftchar = false + end + if not rightchar or rightchar < 0 then + rightchar = false + end + -- + local char = unicodes[code] or (extrachars and extrachars[code]) + if char then + word[1] = char + size = 1 + start = current + else + size = 0 + end + else + size = 0 end - end - else - local a = getattr(current,a_hyphenation) - if a ~= attr then - attr = synchronizefeatureset(a) -- influences extrachars - leftchar = leftchar or (instance and posthyphenchar (instance)) -- we can make this more - rightchar = rightchar or (instance and prehyphenchar (instance)) -- efficient if needed - leftexchar = (instance and preexhyphenchar (instance)) - rightexchar = (instance and postexhyphenchar(instance)) - leftmin = leftcharmin or getfield(current,"left") - rightmin = rightcharmin or getfield(current,"right") - if not leftchar or leftchar < 0 then - leftchar = false + elseif language <= 0 then + -- + elseif size > 0 then + local char = unicodes[code] or (extrachars and extrachars[code]) + if char then + size = size + 1 + word[size] = char + elseif dictionary then + if not hyphenonly or code ~= exhyphenchar then + if size > charmin and leftmin + rightmin <= size then + if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then + -- skip + else + local hyphens = hyphenated(dictionary,word,size) + if hyphens then + flush(hyphens) + end + end + end + end + size = 0 + if code == exhyphenchar then -- normally the - + local next = getnext(current) + local last = current + local font = getfont(current) + while next and ischar(next,font) == code do + last = next + next = getnext(next) + end + if not autohyphen then + current = last + elseif current == last then + current = inject(leftexchar,rightexchar,code,current) + else + current = injectseries(current,last,next,current) + end + if hyphenonly then + skipping = true + end + elseif hyphenchars then + local char = hyphenchars[code] + if char == true then + char = code + end + if char then + current = inject(leftchar and char or nil,rightchar and char or nil,char,current) + end + end end - if not rightchar or rightchar < 0 then - rightchar = false + else + local a = getattr(current,a_hyphenation) + if a ~= attr then + attr = synchronizefeatureset(a) -- influences extrachars + leftchar = leftchar or (instance and posthyphenchar (instance)) -- we can make this more + rightchar = rightchar or (instance and prehyphenchar (instance)) -- efficient if needed + leftexchar = (instance and preexhyphenchar (instance)) + rightexchar = (instance and postexhyphenchar(instance)) + leftmin = leftcharmin or getfield(current,"left") + rightmin = rightcharmin or getfield(current,"right") + if not leftchar or leftchar < 0 then + leftchar = false + end + if not rightchar or rightchar < 0 then + rightchar = false + end + end + -- + local char = unicodes[code] or (extrachars and extrachars[code]) + if char then + word[1] = char + size = 1 + start = current end end - -- - local char = unicodes[code] or (extrachars and extrachars[code]) - if char then - word[1] = char - size = 1 - start = current - end + stop = current + current = getnext(current) end - stop = current - current = getnext(current) else + if skipping then + skipping = false + end if id == disc_code then - local subtype = getsubtype(current) - if subtype == discretionary_code then -- \discretionary - size = 0 - current = getnext(current) - elseif subtype == explicit_code then -- \- => only here - size = 0 - expand_explicit(current) - current = getnext(current) - elseif subtype == automatic_code then -- - => only here - size = 0 - expand_automatic(current) - current = getnext(current) + if expanded then + -- pre 1.005 + local subtype = getsubtype(current) + if subtype == discretionary_code then -- \discretionary + size = 0 + elseif subtype == explicit_code then -- \- => only here + -- automatic (-) : the old parser makes negative char entries + size = 0 + expand_explicit(current) + elseif subtype == automatic_code then -- - => only here + -- automatic (-) : the old hyphenator turns an exhyphen into glyph+disc + size = 0 + expand_automatic(current) + else + -- first : done by the hyphenator + -- second : done by the hyphenator + -- regular : done by the hyphenator + size = 0 + end else - -- automatic (-) : the hyphenator turns an exhyphen into glyph+disc - -- first : done by the hyphenator - -- second : done by the hyphenator - -- regular : done by the hyphenator size = 0 - current = getnext(current) end - elseif strict and strict[id] then - current = id == math_code and getnext(end_of_math(current)) or getnext(current) - size = 0 + current = getnext(current) + if hyphenonly then + skipping = true + end + -- elseif strict and strict[id] then + -- current = id == math_code and getnext(end_of_math(current)) or getnext(current) + -- size = 0 else current = id == math_code and getnext(end_of_math(current)) or getnext(current) end @@ -1493,8 +1588,8 @@ if context then end end end - -- we can have quit due to last so we need to flush the last seen word, we could move this in - -- the loop and test for current but ... messy + -- we can have quit due to last so we need to flush the last seen word, we could move + -- this in the loop and test for current but ... messy if dictionary and size > charmin and leftmin + rightmin <= size then if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then -- skip @@ -1547,18 +1642,27 @@ if context then return head, done end - local function expanded(head) + local expanded = function (head) local done = hyphenate(head) - if done then - for d in traverse_id(disc_code,tonut(head)) do - local s = getsubtype(d) - if s ~= discretionary_code then - expanders[s](d,template) - done = true + return head, done + end + + if LUATEXVERSION< 1.005 then + + expanded = function(head) + local done = hyphenate(head) + if done then + for d in traverse_id(disc_code,tonut(head)) do + local s = getsubtype(d) + if s ~= discretionary_code then + expanders[s](d,template) + done = true + end end end + return head, done end - return head, done + end local getcount = tex.getcount @@ -1587,7 +1691,7 @@ if context then methods.tex = original methods.original = original - methods.expanded = expanded + methods.expanded = expanded -- obsolete starting with 1.005 methods.traditional = languages.hyphenators.traditional.hyphenate methods.none = false -- function(head) return head, false end @@ -1679,54 +1783,54 @@ if context then else --- traditional.loadpatterns("nl","lang-nl") --- traditional.loadpatterns("de","lang-de") --- traditional.loadpatterns("us","lang-us") - --- traditional.registerpattern("nl","e1ë", { start = 1, length = 2, before = "e", after = "e" } ) --- traditional.registerpattern("nl","oo7ë", { start = 2, length = 3, before = "o", after = "e" } ) --- traditional.registerpattern("de","qqxc9xkqq",{ start = 3, length = 4, before = "ab", after = "cd" } ) - --- local specification = { --- leftcharmin = 2, --- rightcharmin = 2, --- leftchar = "<", --- rightchar = ">", --- } - --- print("reëel", traditional.injecthyphens(dictionaries.nl,"reëel", specification),"r{e>}{<e}{eë}el") --- print("reeëel", traditional.injecthyphens(dictionaries.nl,"reeëel", specification),"re{e>}{<e}{eë}el") --- print("rooëel", traditional.injecthyphens(dictionaries.nl,"rooëel", specification),"r{o>}{<e}{ooë}el") - --- print( "qxcxkq", traditional.injecthyphens(dictionaries.de, "qxcxkq", specification),"") --- print( "qqxcxkqq", traditional.injecthyphens(dictionaries.de, "qqxcxkqq", specification),"") --- print( "qqqxcxkqqq", traditional.injecthyphens(dictionaries.de, "qqqxcxkqqq", specification),"") --- print("qqqqxcxkqqqq",traditional.injecthyphens(dictionaries.de,"qqqqxcxkqqqq",specification),"") - --- print("kunstmatig", traditional.injecthyphens(dictionaries.nl,"kunstmatig", specification),"") --- print("kunststofmatig", traditional.injecthyphens(dictionaries.nl,"kunststofmatig", specification),"") --- print("kunst[stof]matig", traditional.injecthyphens(dictionaries.nl,"kunst[stof]matig", specification),"") - --- traditional.loadpatterns("us","lang-us") - --- local specification = { --- leftcharmin = 2, --- rightcharmin = 2, --- leftchar = false, --- rightchar = false, --- } - --- trace_steps = true - --- print("components", traditional.injecthyphens(dictionaries.us,"components", specification),"") --- print("single", traditional.injecthyphens(dictionaries.us,"single", specification),"sin-gle") --- print("everyday", traditional.injecthyphens(dictionaries.us,"everyday", specification),"every-day") --- print("associate", traditional.injecthyphens(dictionaries.us,"associate", specification),"as-so-ciate") --- print("philanthropic", traditional.injecthyphens(dictionaries.us,"philanthropic", specification),"phil-an-thropic") --- print("projects", traditional.injecthyphens(dictionaries.us,"projects", specification),"projects") --- print("Associate", traditional.injecthyphens(dictionaries.us,"Associate", specification),"As-so-ciate") --- print("Philanthropic", traditional.injecthyphens(dictionaries.us,"Philanthropic", specification),"Phil-an-thropic") --- print("Projects", traditional.injecthyphens(dictionaries.us,"Projects", specification),"Projects") + -- traditional.loadpatterns("nl","lang-nl") + -- traditional.loadpatterns("de","lang-de") + -- traditional.loadpatterns("us","lang-us") + + -- traditional.registerpattern("nl","e1ë", { start = 1, length = 2, before = "e", after = "e" } ) + -- traditional.registerpattern("nl","oo7ë", { start = 2, length = 3, before = "o", after = "e" } ) + -- traditional.registerpattern("de","qqxc9xkqq",{ start = 3, length = 4, before = "ab", after = "cd" } ) + + -- local specification = { + -- leftcharmin = 2, + -- rightcharmin = 2, + -- leftchar = "<", + -- rightchar = ">", + -- } + + -- print("reëel", traditional.injecthyphens(dictionaries.nl,"reëel", specification),"r{e>}{<e}{eë}el") + -- print("reeëel", traditional.injecthyphens(dictionaries.nl,"reeëel", specification),"re{e>}{<e}{eë}el") + -- print("rooëel", traditional.injecthyphens(dictionaries.nl,"rooëel", specification),"r{o>}{<e}{ooë}el") + + -- print( "qxcxkq", traditional.injecthyphens(dictionaries.de, "qxcxkq", specification),"") + -- print( "qqxcxkqq", traditional.injecthyphens(dictionaries.de, "qqxcxkqq", specification),"") + -- print( "qqqxcxkqqq", traditional.injecthyphens(dictionaries.de, "qqqxcxkqqq", specification),"") + -- print("qqqqxcxkqqqq",traditional.injecthyphens(dictionaries.de,"qqqqxcxkqqqq",specification),"") + + -- print("kunstmatig", traditional.injecthyphens(dictionaries.nl,"kunstmatig", specification),"") + -- print("kunststofmatig", traditional.injecthyphens(dictionaries.nl,"kunststofmatig", specification),"") + -- print("kunst[stof]matig", traditional.injecthyphens(dictionaries.nl,"kunst[stof]matig", specification),"") + + -- traditional.loadpatterns("us","lang-us") + + -- local specification = { + -- leftcharmin = 2, + -- rightcharmin = 2, + -- leftchar = false, + -- rightchar = false, + -- } + + -- trace_steps = true + + -- print("components", traditional.injecthyphens(dictionaries.us,"components", specification),"") + -- print("single", traditional.injecthyphens(dictionaries.us,"single", specification),"sin-gle") + -- print("everyday", traditional.injecthyphens(dictionaries.us,"everyday", specification),"every-day") + -- print("associate", traditional.injecthyphens(dictionaries.us,"associate", specification),"as-so-ciate") + -- print("philanthropic", traditional.injecthyphens(dictionaries.us,"philanthropic", specification),"phil-an-thropic") + -- print("projects", traditional.injecthyphens(dictionaries.us,"projects", specification),"projects") + -- print("Associate", traditional.injecthyphens(dictionaries.us,"Associate", specification),"As-so-ciate") + -- print("Philanthropic", traditional.injecthyphens(dictionaries.us,"Philanthropic", specification),"Phil-an-thropic") + -- print("Projects", traditional.injecthyphens(dictionaries.us,"Projects", specification),"Projects") end diff --git a/tex/context/base/mkiv/lang-hyp.mkiv b/tex/context/base/mkiv/lang-hyp.mkiv index ca9113386..c111bc31a 100644 --- a/tex/context/base/mkiv/lang-hyp.mkiv +++ b/tex/context/base/mkiv/lang-hyp.mkiv @@ -37,6 +37,22 @@ \definesystemattribute[hyphenation][public] +%D After a decade of playing with these things in \LUATEX|/|\MKIV\ it's time to +%D finish the way we deal with discretionaries. Apart from the fact that they play a +%D role in hyphenation they also need to be dealt with in fonts. Flattening, cleanup +%D and such are now more or less default in \CONTEXT\ so we can simplify some of the +%D code. We also use the new penalty mechanism. + +\newcount\compoundhyphenpenalty + +\automatichyphenmode \plusone +\hyphenpenaltymode \plusfour + +\hyphenpenalty 50 % hyphenator +\automatichyphenpenalty 50 % - +\explicithyphenpenalty 50 % \- +\compoundhyphenpenalty 50 + %D This command can change! At some point we will keep the setting with the %D paragraph and then the \type {\par} can go. @@ -51,7 +67,6 @@ % \enabledirectives[hyphenators.method]% % \endgroup} - % \exhyphenchar \hyphenasciicode % \preexhyphenchar \lessthanasciicode % \postexhyphenchar\morethanasciicode @@ -123,7 +138,9 @@ leftchar \numexpr\dummyparameter\s!lefthyphenchar\relax rightchar \numexpr\dummyparameter\s!righthyphenchar\relax alternative {\dummyparameter\c!alternative}% - rightedge {\dummyparameter\c!rightedge}% +rightedge {\dummyparameter\c!rightedge}% +% autohyphen {\dummyparameter\c!autohyphen} +% hyphenonly {\dummyparameter\c!hyphenonly} }% \relax \endgroup} diff --git a/tex/context/base/mkiv/lang-ini.mkiv b/tex/context/base/mkiv/lang-ini.mkiv index 3303e46d4..947422710 100644 --- a/tex/context/base/mkiv/lang-ini.mkiv +++ b/tex/context/base/mkiv/lang-ini.mkiv @@ -499,16 +499,6 @@ \fi \lang_basics_synchronize_min_max} -% \unexpanded\def\nohyphens % % % % % not clever, we still hyphenate but supress application -% {\ifx\dohyphens\relax -% \unexpanded\edef\dohyphens -% {\hyphenpenalty \the\hyphenpenalty -% \exhyphenpenalty\the\exhyphenpenalty -% \relax}% -% \fi -% \hyphenpenalty \plustenthousand -% \exhyphenpenalty\plustenthousand} - \unexpanded\def\nohyphens % nicer for url's {\ifx\dohyphens\relax \unexpanded\edef\dohyphens diff --git a/tex/context/base/mkiv/mult-low.lua b/tex/context/base/mkiv/mult-low.lua index 23ee8688f..4501afefb 100644 --- a/tex/context/base/mkiv/mult-low.lua +++ b/tex/context/base/mkiv/mult-low.lua @@ -424,5 +424,7 @@ return { "naturalhbox", "naturalvbox", "naturalhpack", "naturalvpack", -- "frule", + -- + "compoundhyphenpenalty", } } diff --git a/tex/context/base/mkiv/mult-prm.lua b/tex/context/base/mkiv/mult-prm.lua index 7715037dd..956f83636 100644 --- a/tex/context/base/mkiv/mult-prm.lua +++ b/tex/context/base/mkiv/mult-prm.lua @@ -234,8 +234,11 @@ return { "attribute", "attributedef", "hyphenpenaltymode", + "automatichyphenmode", "automatichyphenpenalty", + "automaticdiscretionary", "explicithyphenpenalty", + "explicitdiscretionary", "bodydir", "boundary", "boxdir", @@ -686,8 +689,11 @@ return { "attribute", "attributedef", "hyphenpenaltymode", + "automatichyphenmode", "automatichyphenpenalty", + "automaticdiscretionary", "explicithyphenpenalty", + "explicitdiscretionary", "badness", "baselineskip", "batchmode", diff --git a/tex/context/base/mkiv/node-fnt.lua b/tex/context/base/mkiv/node-fnt.lua index 3dc99e5f2..8aa088f88 100644 --- a/tex/context/base/mkiv/node-fnt.lua +++ b/tex/context/base/mkiv/node-fnt.lua @@ -139,7 +139,12 @@ fonts.hashes.processes = fontprocesses local ligaturing = nuts.ligaturing local kerning = nuts.kerning -local expanders +-- -- -- this will go away + +local disccodes = nodes.disccodes +local explicit_code = disccodes.explicit +local automatic_code = disccodes.automatic +local expanders = nil function fonts.setdiscexpansion(v) if v == nil or v == true then @@ -157,6 +162,8 @@ end fonts.setdiscexpansion(true) +-- -- -- till here + local function start_trace(head) run = run + 1 report_fonts() @@ -358,9 +365,10 @@ function handlers.characters(head,groupcode,size,packtype,direction) -- basefont is not supported in disc only runs ... it would mean a lot of -- ranges .. we could try to run basemode as a separate processor run but -- not for now (we can consider it when the new node code is tested - for d in traverse_id(disc_code,nuthead) do - -- we could use first_glyph, only doing replace is good enough + -- we could use first_glyph, only doing replace is good enough because + -- pre and post are normally used for hyphens and these come from fonts + -- that part of the hyphenated word local _, _, r = getdisc(d) if r then local prevfont = nil @@ -407,9 +415,7 @@ function handlers.characters(head,groupcode,size,packtype,direction) end elseif expanders then local subtype = getsubtype(d) - if subtype == discretionary_code then - -- already done when replace - else + if subtype == automatic_code or subtype == explicit_code then expanders[subtype](d) e = e + 1 end diff --git a/tex/context/base/mkiv/node-ini.lua b/tex/context/base/mkiv/node-ini.lua index 46196e32d..bdccf8cba 100644 --- a/tex/context/base/mkiv/node-ini.lua +++ b/tex/context/base/mkiv/node-ini.lua @@ -410,3 +410,11 @@ if not nodecodes.dir then report_codes("use a newer version of luatex") os.exit() end + +-- We don't need this sanitize-after-callback in ConTeXt and by disabling it we +-- also have a way to check if LuaTeX itself does the right thing. + +if node.fix_node_lists then + node.fix_node_lists(false) +end + diff --git a/tex/context/base/mkiv/page-mix.mkiv b/tex/context/base/mkiv/page-mix.mkiv index 7bd30ceee..7defece12 100644 --- a/tex/context/base/mkiv/page-mix.mkiv +++ b/tex/context/base/mkiv/page-mix.mkiv @@ -500,9 +500,14 @@ \unexpanded\def\strc_itemgroups_stop_columns {\page_mix_fast_columns_stop} % set by start -\setupmixedcolumns - [\s!itemgroupcolumns] - [\c!grid=\itemgroupparameter\c!grid] +% not used nor documented so commented: +% +% \setupmixedcolumns +% [\s!itemgroupcolumns] +% [\c!grid=\itemgroupparameter\c!grid] +% +% \setupitemgroup +% [\c!grid=\v!yes] % we need a value % better diff --git a/tex/context/base/mkiv/spac-ali.mkiv b/tex/context/base/mkiv/spac-ali.mkiv index 67cc1494c..af02f76ae 100644 --- a/tex/context/base/mkiv/spac-ali.mkiv +++ b/tex/context/base/mkiv/spac-ali.mkiv @@ -275,6 +275,8 @@ \fi \ifx\dohyphens\relax % was 2.5 in old implementation using scratch registers \hyphenpenalty\dimexpr2.8\hsize/\dimexpr#1\relax\relax % 50 in raggedright/raggedleft + %\else + % no need to do something as we're in \nohyphens \fi} \unexpanded\def\spac_align_set_tolerant diff --git a/tex/context/base/mkiv/status-files.pdf b/tex/context/base/mkiv/status-files.pdf Binary files differindex 547accaa3..beae5b1b3 100644 --- a/tex/context/base/mkiv/status-files.pdf +++ b/tex/context/base/mkiv/status-files.pdf diff --git a/tex/context/base/mkiv/status-lua.pdf b/tex/context/base/mkiv/status-lua.pdf Binary files differindex 84ecb4427..68038f64c 100644 --- a/tex/context/base/mkiv/status-lua.pdf +++ b/tex/context/base/mkiv/status-lua.pdf diff --git a/tex/context/base/mkiv/syst-ini.mkiv b/tex/context/base/mkiv/syst-ini.mkiv index 1b7f8fd19..bbc856a5e 100644 --- a/tex/context/base/mkiv/syst-ini.mkiv +++ b/tex/context/base/mkiv/syst-ini.mkiv @@ -1165,16 +1165,20 @@ %D For now: -\ifdefined\protrusionboundary \else \let\protrusionboundary\boundary \fi -\ifdefined\wordboundary \else \let\wordboundary \noboundary \fi +\ifdefined\protrusionboundary \else \let\protrusionboundary\boundary \fi +\ifdefined\wordboundary \else \let\wordboundary \noboundary \fi -\ifdefined\mathrulesfam \else \newcount\mathrulesfam \fi -\ifdefined\mathrulesmode \else \newcount\mathrulesmode \fi -\ifdefined\mathsurroundmode \else \newcount\mathsurroundmode \fi -\ifdefined\mathitalicsmode \else \newcount\mathitalicsmode \fi +\ifdefined\mathrulesfam \else \newcount\mathrulesfam \fi +\ifdefined\mathrulesmode \else \newcount\mathrulesmode \fi +\ifdefined\mathsurroundmode \else \newcount\mathsurroundmode \fi +\ifdefined\mathitalicsmode \else \newcount\mathitalicsmode \fi \ifdefined\hyphenpenaltymode \else \newcount\hyphenpenaltymode \fi \ifdefined\automatichyphenpenalty \else \newcount\automatichyphenpenalty \fi +\ifdefined\automatichyphenmode \else \newcount\automatichyphenmode \fi \ifdefined\explicithyphenpenalty \else \newcount\explicithyphenpenalty \fi +\ifdefined\explicitdiscretionary \else \let\explicitdiscretionary \- \fi +\ifdefined\automaticdiscretionary \else \def\automaticdiscretionary{\Uchar\exhyphenchar} \fi + \protect \endinput diff --git a/tex/context/base/mkiv/util-str.lua b/tex/context/base/mkiv/util-str.lua index 9e6be9999..46b15e15a 100644 --- a/tex/context/base/mkiv/util-str.lua +++ b/tex/context/base/mkiv/util-str.lua @@ -141,6 +141,7 @@ local pattern = )^1) function strings.tabtospace(str,tab) + -- no real gain in first checking if a \t is there return lpegmatch(pattern,str,1,tab or 7) end diff --git a/tex/context/interface/mkiv/i-context.pdf b/tex/context/interface/mkiv/i-context.pdf Binary files differindex d01f8a514..dd53876c8 100644 --- a/tex/context/interface/mkiv/i-context.pdf +++ b/tex/context/interface/mkiv/i-context.pdf diff --git a/tex/context/interface/mkiv/i-readme.pdf b/tex/context/interface/mkiv/i-readme.pdf Binary files differindex c23a7c045..519b14632 100644 --- a/tex/context/interface/mkiv/i-readme.pdf +++ b/tex/context/interface/mkiv/i-readme.pdf diff --git a/tex/context/modules/mkiv/m-asymptote.mkiv b/tex/context/modules/mkiv/m-asymptote.mkiv index 6252e7a60..c236ceee5 100644 --- a/tex/context/modules/mkiv/m-asymptote.mkiv +++ b/tex/context/modules/mkiv/m-asymptote.mkiv @@ -131,7 +131,7 @@ fill((1cm,2cm)--(3cm,3cm)--(4cm,0cm)--cycle); view = 'ortho' \stopluaparameterset -\asymptote[demo-1][type=prc,width=5cm,height=5cm,frame=on,display=yes,controls=yes] +\asymptote[demo-1][type=prc,width=5cm,height=5cm,frame=on,display=yes,controls=yes,preview=yes] \asymptote[demo-2][type=pdf,width=8cm,frame=on] % \typebuffer[asymptote:demo-1] diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index f1ad29ae5..1836b064a 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : c:/data/develop/context/sources/luatex-fonts-merged.lua -- parent file : c:/data/develop/context/sources/luatex-fonts.lua --- merge date : 03/26/17 16:15:15 +-- merge date : 04/02/17 19:51:22 do -- begin closure to overcome local limits and interference @@ -25920,8 +25920,7 @@ local function initializecolr(tfmdata,kind,value) local w=character.width or 0 local s=#colorlist local t={ - { "special","pdf:page:q" }, - { "special","pdf:raw:"..b } + { "special","pdf:direct:q "..b }, } local n=#t for i=1,s do @@ -25932,8 +25931,7 @@ local function initializecolr(tfmdata,kind,value) n=n+1 t[n]={ "right",-w } end end - n=n+1 t[n]={ "special","pdf:page:"..e } - n=n+1 t[n]={ "special","pdf:raw:Q" } + n=n+1 t[n]={ "special","pdf:direct:"..e.." Q"} character.commands=t end end |