From e32f57c9c5968f0c09130f6e24e28a96d6e1393d Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Sun, 2 Apr 2017 20:46:19 +0200 Subject: 2017-04-02 19:57:00 --- tex/context/base/mkii/cont-new.mkii | 2 +- tex/context/base/mkii/context.mkii | 2 +- tex/context/base/mkiv/cont-new.mkiv | 2 +- tex/context/base/mkiv/context.mkiv | 2 +- tex/context/base/mkiv/enco-ini.mkiv | 2 +- tex/context/base/mkiv/font-ocl.lua | 12 +- tex/context/base/mkiv/font-run.mkiv | 2 + tex/context/base/mkiv/font-syn.lua | 14 +- tex/context/base/mkiv/lang-def.mkiv | 1 - tex/context/base/mkiv/lang-dis.lua | 250 ++++---- tex/context/base/mkiv/lang-hyp.lua | 700 ++++++++++++--------- tex/context/base/mkiv/lang-hyp.mkiv | 21 +- tex/context/base/mkiv/lang-ini.mkiv | 10 - tex/context/base/mkiv/mult-low.lua | 2 + tex/context/base/mkiv/mult-prm.lua | 6 + tex/context/base/mkiv/node-fnt.lua | 18 +- tex/context/base/mkiv/node-ini.lua | 8 + tex/context/base/mkiv/page-mix.mkiv | 11 +- tex/context/base/mkiv/spac-ali.mkiv | 2 + tex/context/base/mkiv/status-files.pdf | Bin 25664 -> 25624 bytes tex/context/base/mkiv/status-lua.pdf | Bin 422697 -> 422692 bytes tex/context/base/mkiv/syst-ini.mkiv | 16 +- tex/context/base/mkiv/util-str.lua | 1 + tex/context/interface/mkiv/i-context.pdf | Bin 804294 -> 804302 bytes tex/context/interface/mkiv/i-readme.pdf | Bin 60771 -> 60771 bytes tex/context/modules/mkiv/m-asymptote.mkiv | 2 +- tex/generic/context/luatex/luatex-fonts-merged.lua | 8 +- 27 files changed, 633 insertions(+), 461 deletions(-) (limited to 'tex') diff --git a/tex/context/base/mkii/cont-new.mkii b/tex/context/base/mkii/cont-new.mkii index 515a20ad4..eed67e1c2 100644 --- a/tex/context/base/mkii/cont-new.mkii +++ b/tex/context/base/mkii/cont-new.mkii @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2017.03.26 16:15} +\newcontextversion{2017.04.02 19:51} %D This file is loaded at runtime, thereby providing an %D excellent place for hacks, patches, extensions and new diff --git a/tex/context/base/mkii/context.mkii b/tex/context/base/mkii/context.mkii index 0b8726e31..c20ff4f1a 100644 --- a/tex/context/base/mkii/context.mkii +++ b/tex/context/base/mkii/context.mkii @@ -20,7 +20,7 @@ %D your styles an modules. \edef\contextformat {\jobname} -\edef\contextversion{2017.03.26 16:15} +\edef\contextversion{2017.04.02 19:51} %D For those who want to use this: diff --git a/tex/context/base/mkiv/cont-new.mkiv b/tex/context/base/mkiv/cont-new.mkiv index 8a139dd1c..47ef5499d 100644 --- a/tex/context/base/mkiv/cont-new.mkiv +++ b/tex/context/base/mkiv/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2017.03.26 16:15} +\newcontextversion{2017.04.02 19:51} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/mkiv/context.mkiv b/tex/context/base/mkiv/context.mkiv index 5220613a6..ee6f6ddb2 100644 --- a/tex/context/base/mkiv/context.mkiv +++ b/tex/context/base/mkiv/context.mkiv @@ -39,7 +39,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2017.03.26 16:15} +\edef\contextversion{2017.04.02 19:51} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/mkiv/enco-ini.mkiv b/tex/context/base/mkiv/enco-ini.mkiv index 835ee61f5..50375251a 100644 --- a/tex/context/base/mkiv/enco-ini.mkiv +++ b/tex/context/base/mkiv/enco-ini.mkiv @@ -282,7 +282,7 @@ % some more \ifdefined\softhyphen \else - \let\softhyphen\- + \let\softhyphen\explicitdiscretionary \fi \def\hyphen {\softhyphen} diff --git a/tex/context/base/mkiv/font-ocl.lua b/tex/context/base/mkiv/font-ocl.lua index 3583e15d0..68d9ac650 100644 --- a/tex/context/base/mkiv/font-ocl.lua +++ b/tex/context/base/mkiv/font-ocl.lua @@ -140,8 +140,10 @@ local function initializecolr(tfmdata,kind,value) -- hm, always value -- are somewhat inefficient as each glyph gets the font set. It's a -- side effect of the fact that a font is handled when a character gets -- flushed. - { "special", "pdf:page:q" }, - { "special", "pdf:raw:" .. b } + -- { "special", "pdf:page:q" }, + -- { "special", "pdf:raw:" .. b } + -- This seems to be okay too: + { "special", "pdf:direct:q " .. b }, } local n = #t for i=1,s do @@ -152,8 +154,10 @@ local function initializecolr(tfmdata,kind,value) -- hm, always value n = n + 1 t[n] = { "right", -w } end end - n = n + 1 t[n] = { "special", "pdf:page:" .. e } - n = n + 1 t[n] = { "special", "pdf:raw:Q" } + -- n = n + 1 t[n] = { "special", "pdf:page:" .. e } + -- n = n + 1 t[n] = { "special", "pdf:raw:Q" } + -- This seems to be okay too: + n = n + 1 t[n] = { "special", "pdf:direct:" .. e .. " Q"} character.commands = t end end diff --git a/tex/context/base/mkiv/font-run.mkiv b/tex/context/base/mkiv/font-run.mkiv index e9a6f9ddb..ebb3a576c 100644 --- a/tex/context/base/mkiv/font-run.mkiv +++ b/tex/context/base/mkiv/font-run.mkiv @@ -14,6 +14,8 @@ %D [This code is hooked into the core macros and saves some format %D space. It needs a cleanup as it's real old derioved \MKII\ code] +%D +%D Better use \type{\bTABLE...\eTABLE}. \unprotect diff --git a/tex/context/base/mkiv/font-syn.lua b/tex/context/base/mkiv/font-syn.lua index 558d07fe7..c4dcf0bcd 100644 --- a/tex/context/base/mkiv/font-syn.lua +++ b/tex/context/base/mkiv/font-syn.lua @@ -804,6 +804,7 @@ local function collecthashes() local noffallbacks = 0 if specifications then -- maybe multiple passes (for the compatible and cffnames so that they have less preference) + local conflicts = setmetatableindex("table") for index=1,#specifications do local specification = specifications[index] local format = specification.format @@ -832,7 +833,6 @@ local function collecthashes() local instance = fullname .. instancenames[i] mapping[instance] = index nofmappings = nofmappings + 1 - end end -- if compatiblename and not mapping[compatiblename] then @@ -865,10 +865,22 @@ local function collecthashes() noffallbacks = noffallbacks + 1 end end + -- dangerous ... first match takes slot if not mapping[familyname] and not fallback[familyname] then fallback[familyname] = index noffallbacks = noffallbacks + 1 end + local conflict = conflicts[format] + conflict[familyname] = (conflict[familyname] or 0) + 1 + end + end + for format, conflict in next, conflicts do + local fallback = fallbacks[format] + for familyname, n in next, conflict do + if n > 1 then + fallback[familyname] = nil + noffallbacks = noffallbacks - n + end end end end diff --git a/tex/context/base/mkiv/lang-def.mkiv b/tex/context/base/mkiv/lang-def.mkiv index ef53c13e3..96bb88767 100644 --- a/tex/context/base/mkiv/lang-def.mkiv +++ b/tex/context/base/mkiv/lang-def.mkiv @@ -134,7 +134,6 @@ \c!rightquotation=\rightguillemot, \c!date={\v!day,{.},\space,\v!month,\space,\v!year}] - \installlanguage [\s!no] [\s!nb] \installlanguage [\s!norwegian] [\s!nb] \installlanguage [\s!bokmal] [\s!nb] diff --git a/tex/context/base/mkiv/lang-dis.lua b/tex/context/base/mkiv/lang-dis.lua index 448966d49..e2c0d220e 100644 --- a/tex/context/base/mkiv/lang-dis.lua +++ b/tex/context/base/mkiv/lang-dis.lua @@ -62,146 +62,158 @@ local getlanguagedata = languages.getdata local check_regular = true -local expanders = { - [discretionary_code] = function(d,template) - -- \discretionary - return template - end, - [explicit_code] = function(d,template) - -- \- - local pre, post, replace = getdisc(d) - local done = false - if pre then - local char = isglyph(pre) - if char and char <= 0 then - done = true - flush_list(pre) - pre = nil +local expanders -- this will go away + +-- the penalty has been determined by the mode (currently we force 1): +-- +-- 0 : exhyphenpenalty +-- 1 : hyphenpenalty +-- 2 : automatichyphenpenalty +-- +-- following a - : the pre and post chars are already appended and set +-- so we have pre=preex and post=postex .. however, the previous +-- hyphen is already injected ... downside: the font handler sees this +-- so this is another argument for doing a hyphenation pass in context + +if LUATEXVERSION < 1.005 then + + expanders = { + [discretionary_code] = function(d,template) + -- \discretionary + return template + end, + [explicit_code] = function(d,template) + -- \- + local pre, post, replace = getdisc(d) + local done = false + if pre then + local char = isglyph(pre) + if char and char <= 0 then + done = true + flush_list(pre) + pre = nil + end end - end - if post then - local char = isglyph(post) - if char and char <= 0 then - done = true - flush_list(post) - post = nil + if post then + local char = isglyph(post) + if char and char <= 0 then + done = true + flush_list(post) + post = nil + end end - end - if done then - -- todo: take existing penalty - setdisc(d,pre,post,replace,explicit_code,tex.exhyphenpenalty) - else - setsubtype(d,explicit_code) - end - return template - end, - [automatic_code] = function(d,template) - -- the penalty has been determined by the mode (currently we force 1): - -- - -- 0 : exhyphenpenalty - -- 1 : hyphenpenalty - -- 2 : automatichyphenpenalty - -- - -- following a - : the pre and post chars are already appended and set - -- so we have pre=preex and post=postex .. however, the previous - -- hyphen is already injected ... downside: the font handler sees this - -- so this is another argument for doing a hyphenation pass in context - local pre, post, replace = getdisc(d) - if pre then - -- we have a preex characters and want that one to replace the - -- character in front which is the trigger - if not template then - -- can there be font kerns already? - template = getprev(d) - if template and getid(template) ~= glyph_code then - template = getnext(d) + if done then + -- todo: take existing penalty + setdisc(d,pre,post,replace,explicit_code,tex.exhyphenpenalty) + else + setsubtype(d,explicit_code) + end + return template + end, + [automatic_code] = function(d,template) + local pre, post, replace = getdisc(d) + if pre then + -- we have a preex characters and want that one to replace the + -- character in front which is the trigger + if not template then + -- can there be font kerns already? + template = getprev(d) if template and getid(template) ~= glyph_code then - template = nil + template = getnext(d) + if template and getid(template) ~= glyph_code then + template = nil + end end end - end - if template then - local pseudohead = getprev(template) - if pseudohead then - while template ~= d do - pseudohead, template, removed = remove_node(pseudohead,template) - -- free old replace ? - replace = removed - -- break ? + if template then + local pseudohead = getprev(template) + if pseudohead then + while template ~= d do + pseudohead, template, removed = remove_node(pseudohead,template) + -- free old replace ? + replace = removed + -- break ? + end + else + -- can't happen end + setdisc(d,pre,post,replace,automatic_code,tex.hyphenpenalty) else - -- can't happen + -- print("lone regular discretionary ignored") end - setdisc(d,pre,post,replace,automatic_code,tex.hyphenpenalty) else - -- print("lone regular discretionary ignored") + setdisc(d,pre,post,replace,automatic_code,tex.hyphenpenalty) end - else - setdisc(d,pre,post,replace,automatic_code,tex.hyphenpenalty) - end - return template - end, - [regular_code] = function(d,template) - if check_regular then - -- simple - if not template then - -- can there be font kerns already? - template = getprev(d) - if template and getid(template) ~= glyph_code then - template = getnext(d) + return template + end, + [regular_code] = function(d,template) + if check_regular then + -- simple + if not template then + -- can there be font kerns already? + template = getprev(d) if template and getid(template) ~= glyph_code then - template = nil + template = getnext(d) + if template and getid(template) ~= glyph_code then + template = nil + end end end - end - if template then - local language = template and getlang(template) - local data = getlanguagedata(language) - local prechar = data.prehyphenchar - local postchar = data.posthyphenchar - local pre, post, replace = getdisc(d) -- pre can be set - local done = false - if prechar and prechar > 0 then - done = true - pre = copy_node(template) - setchar(pre,prechar) - end - if postchar and postchar > 0 then - done = true - post = copy_node(template) - setchar(post,postchar) - end - if done then - setdisc(d,pre,post,replace,regular_code,tex.hyphenpenalty) + if template then + local language = template and getlang(template) + local data = getlanguagedata(language) + local prechar = data.prehyphenchar + local postchar = data.posthyphenchar + local pre, post, replace = getdisc(d) -- pre can be set + local done = false + if prechar and prechar > 0 then + done = true + pre = copy_node(template) + setchar(pre,prechar) + end + if postchar and postchar > 0 then + done = true + post = copy_node(template) + setchar(post,postchar) + end + if done then + setdisc(d,pre,post,replace,regular_code,tex.hyphenpenalty) + end + else + -- print("lone regular discretionary ignored") end - else - -- print("lone regular discretionary ignored") + return template end - return template - else - -- maybe also set penalty here - setsubtype(d,regular_code) + end, + [disccodes.first] = function() + -- forget about them + end, + [disccodes.second] = function() + -- forget about them + end, + } + + function languages.expand(d,template,subtype) + if not subtype then + subtype = getsubtype(d) end - end, - [disccodes.first] = function() - -- forget about them - end, - [disccodes.second] = function() - -- forget about them - end, -} + if subtype ~= discretionary_code then + return expanders[subtype](d,template) + end + end -languages.expanders = expanders +else -function languages.expand(d,template,subtype) - if not subtype then - subtype = getsubtype(d) - end - if subtype ~= discretionary_code then - return expanders[subtype](d,template) + function languages.expand() + -- nothing to be fixed end + end +languages.expanders = expanders + +-- -- -- -- -- + local setlistcolor = nodes.tracers.colors.setlist function languages.visualizediscretionaries(head) diff --git a/tex/context/base/mkiv/lang-hyp.lua b/tex/context/base/mkiv/lang-hyp.lua index 50132bfe1..b85295f19 100644 --- a/tex/context/base/mkiv/lang-hyp.lua +++ b/tex/context/base/mkiv/lang-hyp.lua @@ -6,14 +6,6 @@ if not modules then modules = { } end modules ['lang-hyp'] = { license = "see context related readme files" } --- todo: hyphenate over range if needed --- todo: check boundary nodes - --- setattr: helper for full attr - --- to be considered: reset dictionary.hyphenated when a pattern is added --- or maybe an explicit reset of the cache - -- In an automated workflow hypenation of long titles can be somewhat problematic -- especially when demands conflict. For that reason I played a bit with a Lua based -- variant of the traditional hyphenation machinery. This mechanism has been extended @@ -24,7 +16,11 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- Being the result of two days experimenting the following implementation is probably -- not completely okay yet. If there is demand I might add some more features and plugs. -- The performance is quite okay but can probably improved a bit, although this is not --- the most critital code. +-- the most critital code. For instance, on a metafun manual run the overhead is about +-- 0.3 seconds on 19 seconds which is not that bad. +-- +-- In the procecess of wrapping up (for the ctx conference proceedings) I cleaned up +-- and extended the code a bit. It can be used in production. -- -- . a l g o r i t h m . -- 4l1g4 @@ -45,12 +41,12 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- -- ab1cd/ef=gh,2,2 : acd - efd (pattern/replacement,start,length -- --- In the procecess of wrapping up (for the ctx conference proceedings) I cleaned up --- and extended the code a bit. - --- todo: hjcodes (<32 == length) if i really want it - --- start: +-- todo : support hjcodes (<32 == length) like luatex does now (no need/demand so far) +-- maybe : support hyphenation over range (can alsready be done using attributes/language) +-- maybe : reset dictionary.hyphenated when a pattern is added and/or forced reset option +-- todo : check subtypes (because they have subtle meanings in the line breaking) +-- +-- word start (in tex engine): -- -- boundary : yes when wordboundary -- hlist : when hyphenationbounds 1 or 3 @@ -63,7 +59,7 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- glyph : exhyphenchar (one only) : yes (so no -- ---) -- otherwise : yes -- --- end: +-- word end (in tex engine): -- -- boundary : yes -- glyph : yes when different language @@ -78,8 +74,6 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- ins : when hyphenationbounds 2 or 3 -- adjust : when hyphenationbounds 2 or 3 --- todo: maybe subtypes (because they have subtle meanings in the line breaking) - local type, rawset, tonumber, next = type, rawset, tonumber, next local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs @@ -318,15 +312,14 @@ function traditional.lasttrace() return steps end --- We could reuse the w table but as we cache the resolved words --- there is not much gain in that complication. +-- We could reuse the w table but as we cache the resolved words there is not much gain in +-- that complication. -- --- Beware: word can be a table and when n is passed to we can --- assume reuse so we need to honor that n then. - --- todo: a fast variant for tex ... less lookups (we could check is --- dictionary has changed) ... although due to caching the already --- done words, we don't do much here +-- Beware: word can be a table and when n is passed to we can assume reuse so we need to +-- honor that n then. +-- +-- todo: a fast variant for tex ... less lookups (we could check is dictionary has changed) +-- ... although due to caching the already done words, we don't do much here local function hyphenate(dictionary,word,n) -- odd is okay nofwords = nofwords + 1 @@ -399,7 +392,6 @@ local function hyphenate(dictionary,word,n) -- odd is okay local specials = dictionary.specials local patterns = dictionary.patterns -- --- inspect(specials) local spec for i=1,l do for j=i,l do @@ -410,15 +402,14 @@ local function hyphenate(dictionary,word,n) -- odd is okay if not done then done = { } spec = nil - -- the string that we resolve has explicit fences (.) so - -- done starts at the first fence and runs upto the last - -- one so we need one slot less + -- the string that we resolve has explicit fences (.) so done starts at + -- the first fence and runs upto the last one so we need one slot less for i=1,l do done[i] = 0 end end - -- we run over the pattern that always has a (zero) value for - -- each character plus one more as we look at both sides + -- we run over the pattern that always has a (zero) value for each character + -- plus one more as we look at both sides for k=1,#m do local new = m[k] if not new then @@ -524,8 +515,8 @@ function traditional.injecthyphens(dictionary,word,specification) return word end - -- the following code is similar to code later on but here we have - -- strings while there we have hyphen specs + -- the following code is similar to code later on but here we have strings while there + -- we have hyphen specs local word = lpegmatch(p_split,word) local size = #word @@ -636,7 +627,7 @@ if context then local discretionary_code = disccodes.discretionary local explicit_code = disccodes.explicit local automatic_code = disccodes.automatic - ----- regular_code = disccodes.regular + local regular_code = disccodes.regular local nuts = nodes.nuts local tonut = nodes.tonut @@ -658,13 +649,18 @@ if context then local getattrlist = nuts.getattrlist local setattrlist = nuts.setattrlist local isglyph = nuts.isglyph + local ischar = nuts.ischar local setchar = nuts.setchar local setdisc = nuts.setdisc + local setlink = nuts.setlink + local setprev = nuts.setprev + local setnext = nuts.setnext local insert_before = nuts.insert_before local insert_after = nuts.insert_after local copy_node = nuts.copy + local copy_list = nuts.copy_list local remove_node = nuts.remove local end_of_math = nuts.end_of_math local node_tail = nuts.tail @@ -690,8 +686,9 @@ if context then local a_hyphenation = attributes.private("hyphenation") - local expand_explicit = languages.expanders[explicit_code] - local expand_automatic = languages.expanders[automatic_code] + local expanders = languages.expanders -- gone in 1.005 + local expand_explicit = expanders and expanders[explicit_code] + local expand_automatic = expanders and expanders[automatic_code] local interwordpenalty = 5000 @@ -699,11 +696,12 @@ if context then return dictionaries[language] end - setmetatableindex(dictionaries,function(t,k) -- for the moment we use an independent data structure + -- for the moment we use an independent data structure + + setmetatableindex(dictionaries,function(t,k) if type(k) == "string" then - -- this will force a load if not yet loaded (we need a nicer way) - -- for the moment that will do (nneeded for examples that register - -- a pattern specification + -- this will force a load if not yet loaded (we need a nicer way) for the moment + -- that will do (nneeded for examples that register a pattern specification languages.getnumber(k) end local specification = languages.getdata(k) @@ -778,11 +776,10 @@ if context then -- with less characters than either of them! This could be an option but such a narrow -- hsize doesn't make sense anyway. - -- We assume that featuresets are defined global ... local definitions - -- (also mid paragraph) make not much sense anyway. For the moment we - -- assume no predefined sets so we don't need to store them. Nor do we - -- need to hash them in order to save space ... no sane user will define - -- many of them. + -- We assume that featuresets are defined global ... local definitions (also mid paragraph) + -- make not much sense anyway. For the moment we assume no predefined sets so we don't need + -- to store them. Nor do we need to hash them in order to save space ... no sane user will + -- define many of them. local featuresets = hyphenators.featuresets or { } hyphenators.featuresets = featuresets @@ -804,7 +801,8 @@ if context then return noffeaturesets end - local function makeset(...) -- a bit overkill, supporting variants but who cares + local function makeset(...) + -- a bit overkill, supporting variants but who cares local set = { } for i=1,select("#",...) do local list = select(i,...) @@ -844,9 +842,34 @@ if context then return set end + -- category pd (tex also sees --- and -- as hyphens but do we really want that + local defaulthyphens = { - [0x2D] = true, -- hyphen - [0xAD] = true, -- soft hyphen + [0x002D] = true, -- HYPHEN-MINUS + [0x00AD] = 0x002D, -- SOFT HYPHEN (active in ConTeXt) + -- [0x058A] = true, -- ARMENIAN HYPHEN + -- [0x1400] = true, -- CANADIAN SYLLABICS HYPHEN + -- [0x1806] = true, -- MONGOLIAN TODO SOFT HYPHEN + [0x2010] = true, -- HYPHEN + -- [0x2011] = true, -- NON-BREAKING HYPHEN + -- [0x2012] = true, -- FIGURE DASH + [0x2013] = true, -- EN DASH + [0x2014] = true, -- EM DASH + -- [0x2015] = true, -- HORIZONTAL BAR + -- [0x2027] = true, -- HYPHENATION POINT + -- [0x2E17] = true, -- DOUBLE OBLIQUE HYPHEN + -- [0x2E1A] = true, -- HYPHEN WITH DIAERESIS + -- [0x2E3A] = true, -- TWO-EM DASH + -- [0x2E3B] = true, -- THREE-EM DASH + -- [0x2E40] = true, -- DOUBLE HYPHEN + -- [0x301C] = true, -- WAVE DASH + -- [0x3030] = true, -- WAVY DASH + -- [0x30A0] = true, -- KATAKANA-HIRAGANA DOUBLE HYPHEN + -- [0xFE31] = true, -- PRESENTATION FORM FOR VERTICAL EM DASH + -- [0xFE32] = true, -- PRESENTATION FORM FOR VERTICAL EN DASH + -- [0xFE58] = true, -- SMALL EM DASH + -- [0xFE63] = true, -- SMALL HYPHEN-MINUS + -- [0xFF0D] = true, -- FULLWIDTH HYPHEN-MINUS } local defaultjoiners = { @@ -868,13 +891,15 @@ if context then local charmin = tonumber(featureset.charmin) -- luatex now also has hyphenationmin local leftcharmin = tonumber(featureset.leftcharmin) local rightcharmin = tonumber(featureset.rightcharmin) - local rightedge = featureset.rightedge local leftchar = somehyphenchar(featureset.leftchar) local rightchar = somehyphenchar(featureset.rightchar) local rightchars = featureset.rightchars +local rightedge = featureset.rightedge +local autohyphen = v_yes -- featureset.autohyphen -- insert disc +local hyphenonly = v_yes -- featureset.hyphenonly -- don't hyphenate around rightchars = rightchars == v_word and true or tonumber(rightchars) - joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars - hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars + joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars -- table + hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars -- table -- not yet ok: extrachars have to be ignored so it cannot be all) featureset.extrachars = makeset(joinerchars or "",extrachars or "") featureset.hyphenchars = makeset(hyphenchars or "") @@ -886,8 +911,9 @@ if context then featureset.rightchars = rightchars featureset.leftchar = leftchar featureset.rightchar = rightchar - featureset.strict = rightedge == 'tex' - -- + -- featureset.strict = rightedge == "tex" +featureset.autohyphen = autohyphen == v_yes +featureset.hyphenonly = hyphenonly == v_yes return register(name,featureset) end @@ -959,10 +985,9 @@ if context then arguments = { "string", "string" } } - -- This is a relative large function with local variables and local - -- functions. A previous implementation had the functions outside but - -- this is cleaner and as efficient. The test runs 100 times over - -- tufte.tex, knuth.tex, zapf.tex, ward.tex and darwin.tex in lower + -- This is a relative large function with local variables and local functions. A previous + -- implementation had the functions outside but this is cleaner and as efficient. The test + -- runs 100 times over tufte.tex, knuth.tex, zapf.tex, ward.tex and darwin.tex in lower -- and uppercase with a 1mm hsize. -- -- language=0 language>0 4 | 3 * slower @@ -970,79 +995,89 @@ if context then -- tex 2.34 | 1.30 2.55 | 1.45 0.21 | 0.15 -- lua 2.42 | 1.38 3.30 | 1.84 0.88 | 0.46 -- - -- Of course we have extra overhead (virtual Lua machine) but also we - -- check attributes and support specific local options). The test puts - -- the typeset text in boxes and discards it. If we also flush the - -- runtime is 4.31|2.56 and 4.99|2.94 seconds so the relative difference - -- is (somehow) smaller. The test has 536 pages. There is a little bit - -- of extra overhead because we store the patterns in a different way. + -- Of course we have extra overhead (virtual Lua machine) but also we check attributes and + -- support specific local options). The test puts the typeset text in boxes and discards + -- it. If we also flush the runtime is 4.31|2.56 and 4.99|2.94 seconds so the relative + -- difference is (somehow) smaller. The test has 536 pages. There is a little bit of extra + -- overhead because we store the patterns in a different way. -- - -- As usual I will look for speedups. Some 0.01 seconds could be gained - -- by sharing patterns which is not impressive but it does save some - -- 3M memory on this test. (Some optimizations already brought the 3.30 - -- seconds down to 3.14 but it all depends on aggressive caching.) + -- As usual I will look for speedups. Some 0.01 seconds could be gained by sharing patterns + -- which is not impressive but it does save some 3M memory on this test. (Some optimizations + -- already brought the 3.30 seconds down to 3.14 but it all depends on aggressive caching.) - -- As we kick in the hyphenator before fonts get handled, we don't look - -- at implicit (font) kerns or ligatures. + -- As we kick in the hyphenator before fonts get handled, we don't look at implicit (font) + -- kerns or ligatures. local starttiming = statistics.starttiming local stoptiming = statistics.stoptiming - local strictids = { - [nodecodes.hlist] = true, - [nodecodes.vlist] = true, - [nodecodes.rule] = true, - [nodecodes.disc] = true, - [nodecodes.accent] = true, - [nodecodes.math] = true, - } + -- local strictids = { + -- [nodecodes.hlist] = true, + -- [nodecodes.vlist] = true, + -- [nodecodes.rule] = true, + -- [nodecodes.dir] = true, + -- [nodecodes.whatsit] = true, + -- [nodecodes.ins] = true, + -- [nodecodes.adjust] = true, + -- + -- [nodecodes.math] = true, + -- [nodecodes.disc] = true, + -- + -- [nodecodes.accent] = true, -- never used in context + -- } - -- local gf = getfield local gt = setmetatableindex("number") getfield = function(n,f) gt[f] = gt[f] + 1 return gf(n,f) end languages.GETFIELD = gt + -- a lot of overhead when only one char function traditional.hyphenate(head) - local first = tonut(head) - local tail = nil - local last = nil - local current = first - local dictionary = nil - local instance = nil - local characters = nil - local unicodes = nil - local exhyphenchar = tex.exhyphenchar - local extrachars = nil - local hyphenchars = nil - local language = nil - local start = nil - local stop = nil - local word = { } -- we reuse this table - local size = 0 - local leftchar = false - local rightchar = false -- utfbyte("-") - local leftexchar = false - local rightexchar = false -- utfbyte("-") - local leftmin = 0 - local rightmin = 0 - local charmin = 1 - local leftcharmin = nil - local rightcharmin = nil - ----- leftwordmin = nil - local rightwordmin = nil - local rightchars = nil - local leftchar = nil - local rightchar = nil - local attr = nil - local lastwordlast = nil - local hyphenated = hyphenate - local strict = nil - local hyphenpenalty = tex.hyphenpenalty + local first = tonut(head) + + + local tail = nil + local last = nil + local current = first + local dictionary = nil + local instance = nil + local characters = nil + local unicodes = nil + local exhyphenchar = tex.exhyphenchar + local extrachars = nil + local hyphenchars = nil + local language = nil + local start = nil + local stop = nil + local word = { } -- we reuse this table + local size = 0 + local leftchar = false + local rightchar = false -- utfbyte("-") + local leftexchar = false + local rightexchar = false -- utfbyte("-") + local leftmin = 0 + local rightmin = 0 + local charmin = 1 + local leftcharmin = nil + local rightcharmin = nil + ----- leftwordmin = nil + local rightwordmin = nil + local rightchars = nil + local leftchar = nil + local rightchar = nil + local attr = nil + local lastwordlast = nil + local hyphenated = hyphenate + ----- strict = nil + local exhyphenpenalty = tex.exhyphenpenalty + local hyphenpenalty = tex.hyphenpenalty + local autohyphen = false + local hyphenonly = false -- We cannot use an 'enabled' boolean (false when no characters or extras) because we -- can have plugins that set a characters metatable and so) ... it doesn't save much -- anyway. Using (unicodes and unicodes[code]) and a nil table when no characters also -- doesn't save much. So there not that much to gain for languages that don't hyphenate. -- - -- enabled = (unicodes and (next(unicodes) or getmetatable(unicodes))) or (extrachars and next(extrachars)) + -- enabled = (unicodes and (next(unicodes) or getmetatable(unicodes))) + -- or (extrachars and next(extrachars)) -- -- This can be used to not add characters i.e. keep size 0 but then we need to check for -- attributes that change it, which costs time too. Not much to gain there. @@ -1071,8 +1106,10 @@ if context then rightcharmin = f.rightcharmin leftchar = f.leftchar rightchar = f.rightchar - strict = f.strict and strictids + -- strict = f.strict and strictids rightchars = f.rightchars + autohyphen = f.autohyphen + hyphenonly = f.hyphenonly if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then -- so we can change mid paragraph but it's kind of unpredictable then if not tail then @@ -1117,7 +1154,9 @@ if context then rightcharmin = false leftchar = false rightchar = false - strict = false + -- strict = false + autohyphen = false + hyphenonly = false end return a @@ -1130,12 +1169,11 @@ if context then local rsize = 0 local position = 1 - -- todo: remember last dics and don't go back to before that (plus - -- message) .. for simplicity we also assume that we don't start - -- with a dics node + -- todo: remember last dics and don't go back to before that (plus message) ... + -- for simplicity we also assume that we don't start with a dics node -- - -- there can be a conflict: if we backtrack then we can end up in - -- another disc and get out of sync (dup chars and so) + -- there can be a conflict: if we backtrack then we can end up in another disc + -- and get out of sync (dup chars and so) while position <= size do if position >= leftmin and position <= rightmin then @@ -1237,8 +1275,7 @@ if context then return head end - local current = start - + local current = start local attrnode = start -- will be different, just the first char for i=1,rsize do @@ -1253,7 +1290,7 @@ if context then if leftchar then post = serialize(true,leftchar) end - setdisc(disc,pre,post,nil,discretionary_code,hyphenpenalty) + setdisc(disc,pre,post,nil,regular_code,hyphenpenalty) if attrnode then setattrlist(disc,attrnode) end @@ -1287,7 +1324,8 @@ if context then replace = nil end end - setdisc(disc,pre,post,replace,discretionary_code,hyphenpenalty) + -- maybe regular code + setdisc(disc,pre,post,replace,regular_code,hyphenpenalty) if attrnode then setattrlist(disc,attrnode) end @@ -1327,7 +1365,7 @@ if context then end pre = copy_node(glyph) setchar(pre,rightchar and rightchar > 0 and rightchar or code) - setdisc(disc,pre,post,replace,discretionary_code,hyphenpenalty) + setdisc(disc,pre,post,replace,automatic_code,hyphenpenalty) -- ex ? if attrnode then setattrlist(disc,attrnode) end @@ -1335,76 +1373,49 @@ if context then return current end + local function injectseries(current,last,next,attrnode) + local disc = new_disc() + local start = current + first, current = insert_before(first,current,disc) + setprev(start) + setnext(last) + if next then + setlink(current,next) + else + setnext(current) + end + local pre = copy_list(start) + local post = nil + local replace = start + setdisc(disc,pre,post,replace,automatic_code,hyphenpenalty) -- ex ? + if attrnode then + setattrlist(disc,attrnode) + end + return current + end + local a = getattr(first,a_hyphenation) if a ~= attr then attr = synchronizefeatureset(a) end - -- The first attribute in a word determines the way a word gets hyphenated - -- and if relevant, other properties are also set then. We could optimize for - -- silly one-char cases but it has no priority as the code is still not that - -- much slower than the native hyphenator and this variant also provides room - -- for extensions. + -- The first attribute in a word determines the way a word gets hyphenated and if + -- relevant, other properties are also set then. We could optimize for silly one-char + -- cases but it has no priority as the code is still not that much slower than the + -- native hyphenator and this variant also provides room for extensions. + + local skipping = false while current and current ~= last do -- and current local code, id = isglyph(current) if code then - local lang = getlang(current) - if lang ~= language then - if dictionary and size > charmin and leftmin + rightmin <= size then - -- only german has many words starting with an uppercase character - if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then - -- skip - else - local hyphens = hyphenated(dictionary,word,size) - if hyphens then - flush(hyphens) - end - end - end - language = lang - if language > 0 then - -- - dictionary = dictionaries[language] - instance = dictionary.instance - characters = dictionary.characters - unicodes = dictionary.unicodes - -- - local a = getattr(current,a_hyphenation) - attr = synchronizefeatureset(a) - leftchar = leftchar or (instance and posthyphenchar (instance)) -- we can make this more - rightchar = rightchar or (instance and prehyphenchar (instance)) -- efficient if needed - leftexchar = (instance and preexhyphenchar (instance)) - rightexchar = (instance and postexhyphenchar(instance)) - leftmin = leftcharmin or getfield(current,"left") - rightmin = rightcharmin or getfield(current,"right") - if not leftchar or leftchar < 0 then - leftchar = false - end - if not rightchar or rightchar < 0 then - rightchar = false - end - -- - local char = unicodes[code] or (extrachars and extrachars[code]) - if char then - word[1] = char - size = 1 - start = current - else - size = 0 - end - else - size = 0 - end - elseif language <= 0 then - -- - elseif size > 0 then - local char = unicodes[code] or (extrachars and extrachars[code]) - if char then - size = size + 1 - word[size] = char - elseif dictionary then - if size > charmin and leftmin + rightmin <= size then + if skipping then + current = getnext(current) + else + local lang = getlang(current) + if lang ~= language then + if dictionary and size > charmin and leftmin + rightmin <= size then + -- only german has many words starting with an uppercase character if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then -- skip else @@ -1414,67 +1425,151 @@ if context then end end end - size = 0 - -- maybe also a strict mode here: no hyphenation before hyphenchars and skip - -- the next set (but then, strict is an option) - if code == exhyphenchar then - current = inject(leftexchar,rightexchar,code,current) - elseif hyphenchars and hyphenchars[code] then - current = inject(leftchar,rightchar,code,current) + language = lang + if language > 0 then + -- + dictionary = dictionaries[language] + instance = dictionary.instance + characters = dictionary.characters + unicodes = dictionary.unicodes + -- + local a = getattr(current,a_hyphenation) + attr = synchronizefeatureset(a) + leftchar = leftchar or (instance and posthyphenchar (instance)) -- we can make this more + rightchar = rightchar or (instance and prehyphenchar (instance)) -- efficient if needed + leftexchar = (instance and preexhyphenchar (instance)) + rightexchar = (instance and postexhyphenchar(instance)) + leftmin = leftcharmin or getfield(current,"left") + rightmin = rightcharmin or getfield(current,"right") + if not leftchar or leftchar < 0 then + leftchar = false + end + if not rightchar or rightchar < 0 then + rightchar = false + end + -- + local char = unicodes[code] or (extrachars and extrachars[code]) + if char then + word[1] = char + size = 1 + start = current + else + size = 0 + end + else + size = 0 end - end - else - local a = getattr(current,a_hyphenation) - if a ~= attr then - attr = synchronizefeatureset(a) -- influences extrachars - leftchar = leftchar or (instance and posthyphenchar (instance)) -- we can make this more - rightchar = rightchar or (instance and prehyphenchar (instance)) -- efficient if needed - leftexchar = (instance and preexhyphenchar (instance)) - rightexchar = (instance and postexhyphenchar(instance)) - leftmin = leftcharmin or getfield(current,"left") - rightmin = rightcharmin or getfield(current,"right") - if not leftchar or leftchar < 0 then - leftchar = false + elseif language <= 0 then + -- + elseif size > 0 then + local char = unicodes[code] or (extrachars and extrachars[code]) + if char then + size = size + 1 + word[size] = char + elseif dictionary then + if not hyphenonly or code ~= exhyphenchar then + if size > charmin and leftmin + rightmin <= size then + if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then + -- skip + else + local hyphens = hyphenated(dictionary,word,size) + if hyphens then + flush(hyphens) + end + end + end + end + size = 0 + if code == exhyphenchar then -- normally the - + local next = getnext(current) + local last = current + local font = getfont(current) + while next and ischar(next,font) == code do + last = next + next = getnext(next) + end + if not autohyphen then + current = last + elseif current == last then + current = inject(leftexchar,rightexchar,code,current) + else + current = injectseries(current,last,next,current) + end + if hyphenonly then + skipping = true + end + elseif hyphenchars then + local char = hyphenchars[code] + if char == true then + char = code + end + if char then + current = inject(leftchar and char or nil,rightchar and char or nil,char,current) + end + end end - if not rightchar or rightchar < 0 then - rightchar = false + else + local a = getattr(current,a_hyphenation) + if a ~= attr then + attr = synchronizefeatureset(a) -- influences extrachars + leftchar = leftchar or (instance and posthyphenchar (instance)) -- we can make this more + rightchar = rightchar or (instance and prehyphenchar (instance)) -- efficient if needed + leftexchar = (instance and preexhyphenchar (instance)) + rightexchar = (instance and postexhyphenchar(instance)) + leftmin = leftcharmin or getfield(current,"left") + rightmin = rightcharmin or getfield(current,"right") + if not leftchar or leftchar < 0 then + leftchar = false + end + if not rightchar or rightchar < 0 then + rightchar = false + end + end + -- + local char = unicodes[code] or (extrachars and extrachars[code]) + if char then + word[1] = char + size = 1 + start = current end end - -- - local char = unicodes[code] or (extrachars and extrachars[code]) - if char then - word[1] = char - size = 1 - start = current - end + stop = current + current = getnext(current) end - stop = current - current = getnext(current) else + if skipping then + skipping = false + end if id == disc_code then - local subtype = getsubtype(current) - if subtype == discretionary_code then -- \discretionary - size = 0 - current = getnext(current) - elseif subtype == explicit_code then -- \- => only here - size = 0 - expand_explicit(current) - current = getnext(current) - elseif subtype == automatic_code then -- - => only here - size = 0 - expand_automatic(current) - current = getnext(current) + if expanded then + -- pre 1.005 + local subtype = getsubtype(current) + if subtype == discretionary_code then -- \discretionary + size = 0 + elseif subtype == explicit_code then -- \- => only here + -- automatic (-) : the old parser makes negative char entries + size = 0 + expand_explicit(current) + elseif subtype == automatic_code then -- - => only here + -- automatic (-) : the old hyphenator turns an exhyphen into glyph+disc + size = 0 + expand_automatic(current) + else + -- first : done by the hyphenator + -- second : done by the hyphenator + -- regular : done by the hyphenator + size = 0 + end else - -- automatic (-) : the hyphenator turns an exhyphen into glyph+disc - -- first : done by the hyphenator - -- second : done by the hyphenator - -- regular : done by the hyphenator size = 0 - current = getnext(current) end - elseif strict and strict[id] then - current = id == math_code and getnext(end_of_math(current)) or getnext(current) - size = 0 + current = getnext(current) + if hyphenonly then + skipping = true + end + -- elseif strict and strict[id] then + -- current = id == math_code and getnext(end_of_math(current)) or getnext(current) + -- size = 0 else current = id == math_code and getnext(end_of_math(current)) or getnext(current) end @@ -1493,8 +1588,8 @@ if context then end end end - -- we can have quit due to last so we need to flush the last seen word, we could move this in - -- the loop and test for current but ... messy + -- we can have quit due to last so we need to flush the last seen word, we could move + -- this in the loop and test for current but ... messy if dictionary and size > charmin and leftmin + rightmin <= size then if categories[word[1]] == "lu" and getfield(start,"uchyph") < 0 then -- skip @@ -1547,18 +1642,27 @@ if context then return head, done end - local function expanded(head) + local expanded = function (head) local done = hyphenate(head) - if done then - for d in traverse_id(disc_code,tonut(head)) do - local s = getsubtype(d) - if s ~= discretionary_code then - expanders[s](d,template) - done = true + return head, done + end + + if LUATEXVERSION< 1.005 then + + expanded = function(head) + local done = hyphenate(head) + if done then + for d in traverse_id(disc_code,tonut(head)) do + local s = getsubtype(d) + if s ~= discretionary_code then + expanders[s](d,template) + done = true + end end end + return head, done end - return head, done + end local getcount = tex.getcount @@ -1587,7 +1691,7 @@ if context then methods.tex = original methods.original = original - methods.expanded = expanded + methods.expanded = expanded -- obsolete starting with 1.005 methods.traditional = languages.hyphenators.traditional.hyphenate methods.none = false -- function(head) return head, false end @@ -1679,54 +1783,54 @@ if context then else --- traditional.loadpatterns("nl","lang-nl") --- traditional.loadpatterns("de","lang-de") --- traditional.loadpatterns("us","lang-us") - --- traditional.registerpattern("nl","e1ë", { start = 1, length = 2, before = "e", after = "e" } ) --- traditional.registerpattern("nl","oo7ë", { start = 2, length = 3, before = "o", after = "e" } ) --- traditional.registerpattern("de","qqxc9xkqq",{ start = 3, length = 4, before = "ab", after = "cd" } ) - --- local specification = { --- leftcharmin = 2, --- rightcharmin = 2, --- leftchar = "<", --- rightchar = ">", --- } - --- print("reëel", traditional.injecthyphens(dictionaries.nl,"reëel", specification),"r{e>}{}{}{}{}{}{