diff options
author | Context Git Mirror Bot <phg42.2a@gmail.com> | 2014-11-26 13:15:05 +0100 |
---|---|---|
committer | Context Git Mirror Bot <phg42.2a@gmail.com> | 2014-11-26 13:15:05 +0100 |
commit | 5e624488bd860906c17301941c96e6209a9e4cd6 (patch) | |
tree | 1a517cfd2d5d4787879fdd3673f0d123c970c9b9 /tex | |
parent | 156e528557cf571eb99ab05a0892429b3c2bf269 (diff) | |
download | context-5e624488bd860906c17301941c96e6209a9e4cd6.tar.gz |
2014-11-26 12:52:00
Diffstat (limited to 'tex')
37 files changed, 1553 insertions, 470 deletions
diff --git a/tex/context/base/buff-par.lua b/tex/context/base/buff-par.lua index 2c1cd40e9..d12eddebc 100644 --- a/tex/context/base/buff-par.lua +++ b/tex/context/base/buff-par.lua @@ -9,13 +9,14 @@ if not modules then modules = { } end modules ['buff-par'] = { local context, commands = context, commands local insert, remove, find, gmatch = table.insert, table.remove, string.find, string.gmatch -local strip, format = string.strip, string.format +local fullstrip, formatters = string.fullstrip, string.formatters local trace_parallel = false trackers.register("buffers.parallel", function(v) trace_parallel = v end) local report_parallel = logs.reporter("buffers","parallel") local variables = interfaces.variables +local v_all = variables.all local parallel = buffers.parallel or { } buffers.parallel = parallel @@ -40,7 +41,7 @@ function parallel.define(category,tags) end function parallel.reset(category,tags) - if not tags or tags == "" or tags == variables.all then + if not tags or tags == "" or tags == v_all then tags = table.keys(entries) else tags = settings_to_array(tags) @@ -65,10 +66,12 @@ end function parallel.save(category,tag,content) local dc = data[category] if not dc then + report_parallel("unknown category %a",category) return end local entries = dc.entries[tag] if not entries then + report_parallel("unknown entry %a",tag) return end local lines = entries.lines @@ -93,13 +96,14 @@ function parallel.save(category,tag,content) if trace_parallel and label ~= "" then report_parallel("reference found of category %a, tag %a, label %a",category,tag,label) end + line.content = fullstrip(content) line.label = label - line.content = strip(content) end else - line.content = strip(content) + line.content = fullstrip(content) line.label = "" end + -- print("[["..line.content.."]]") end function parallel.hassomecontent(category,tags) @@ -108,7 +112,7 @@ function parallel.hassomecontent(category,tags) return false end local entries = dc.entries - if not tags or tags == "" or tags == variables.all then + if not tags or tags == "" or tags == v_all then tags = table.keys(entries) else tags = utilities.parsers.settings_to_array(tags) @@ -126,22 +130,26 @@ function parallel.hassomecontent(category,tags) return false end -local save = resolvers.savers.byscheme +local ctx_doflushparallel = context.doflushparallel +local ctx_doifelse = commands.doifelse +local f_content = formatters["\\input{%s}"] +local save_byscheme = resolvers.savers.byscheme function parallel.place(category,tags,options) local dc = data[category] if not dc then return end - local entries = dc.entries - local tags = utilities.parsers.settings_to_array(tags) - local options = utilities.parsers.settings_to_hash(options) - local start, n, criterium = options.start, options.n, options.criterium - start, n = start and tonumber(start), n and tonumber(n) - local max = 1 + local entries = dc.entries + local tags = utilities.parsers.settings_to_array(tags) + local options = utilities.parsers.settings_to_hash(options) -- options can be hash too + local start = tonumber(options.start) + local n = tonumber(options.n) + local criterium = options.criterium + local max = 1 if n then max = n - elseif criterium == variables.all then + elseif criterium == v_all then max = 0 for t=1,#tags do local tag = tags[t] @@ -156,15 +164,17 @@ function parallel.place(category,tags,options) local tag = tags[t] local entry = entries[tag] if entry then - local lines = entry.lines - local number = entry.number + 1 - entry.number = number - local line = remove(lines,1) - if line and line.content then - local content = format("\\input{%s}",save("virtual","parallel",line.content)) - context.doflushparallel(tag,1,number,line.label,content) + local lines = entry.lines + local number = entry.number + 1 + entry.number = number + local line = remove(lines,1) + local content = line and line.content + local label = line and line.label or "" + if content then + local virtual = save_byscheme("virtual","parallel",content) + ctx_doflushparallel(tag,1,number,label,f_content(virtual)) else - context.doflushparallel(tag,0,number,"","") + ctx_doflushparallel(tag,0,number,"","") end end end @@ -180,5 +190,5 @@ commands.placeparallel = parallel.place commands.resetparallel = parallel.reset function commands.doifelseparallel(category,tags) - commands.doifelse(parallel.hassomecontent(category,tags)) + ctx_doifelse(parallel.hassomecontent(category,tags)) end diff --git a/tex/context/base/buff-par.mkvi b/tex/context/base/buff-par.mkvi index 404fa8ef3..5efff5125 100644 --- a/tex/context/base/buff-par.mkvi +++ b/tex/context/base/buff-par.mkvi @@ -39,6 +39,7 @@ \unprotect \installcorenamespace{parallel} +\installcorenamespace{parallelhandler} \installcommandhandler \??parallel {parallel} \??parallel @@ -46,22 +47,41 @@ [\c!setups=parallel:place:default] \let\buff_parallel_define_saved\defineparallel +\let\buff_parallel_setup_saved \setupparallel -\unexpanded\def\defineparallel - {\dodoubleargument\buff_parallel_define} +\unexpanded\def\defineparallel{\dodoubleargument\buff_parallel_define} +\unexpanded\def\setupparallel {\dotripleargument\buff_parallel_setup} \def\buff_parallel_define[#name][#instances]% - {\buff_parallel_define_saved[#name] + {\buff_parallel_define_saved[#name][\c!list={#instances}]% list is internal \ctxcommand{defineparallel("#name","#instances")}% - \processcommalist[#instances]\buff_parallel_define_instance + \expandafter\newtoks\csname\??parallelhandler#name\endcsname + \processcommacommand[#instances]{\buff_parallel_define_instance{#name}}% \setuevalue{\e!start#name}{\buff_parallel_start{#name}}% \setuevalue{\e!stop #name}{\buff_parallel_stop}} -\def\buff_parallel_define_instance#instance% - {\normalexpanded{\buff_parallel_define_saved[#instance][\currentparallel]}% +\let\buff_process_parallel_instance\relax + +\def\buff_process_parallel_instances + {\the\csname\??parallelhandler\currentparallel\endcsname} + +\def\buff_parallel_define_instance#name#instance% + {\normalexpanded{\buff_parallel_define_saved[#instance][#name]}% \definebuffer[#instance]% + %\edef\t_buff_parallel{\csname\??parallelhandler#name\endcsname}% + \expandafter\let\expandafter\t_buff_parallel\csname\??parallelhandler#name\endcsname + \appendtoks + \edef\currentparallelinstance{#instance}% + \buff_process_parallel_instance\relax + \to \t_buff_parallel \setuevalue{\e!stop#instance}{\buff_parallel_save{#instance}}} +\def\buff_parallel_setup[#name][#instances][#setups]% + {\processcommalist[#instances]{\buff_parallel_setup_instance{#name}{#setups}}} + +\def\buff_parallel_setup_instance#name#setups#instance% + {\buff_parallel_setup_saved[#name:#instance][#setups]} + \unexpanded\def\buff_parallel_start#name% {\pushmacro\currentparallel \edef\currentparallel{#name}% @@ -79,7 +99,13 @@ \def\buff_parallel_place[#name][#instance][#settings]% {\begingroup \edef\currentparallel{#name}% - \ctxcommand{placeparallel("\currentparallel","#instance","#settings")}% -- todo: pass options as k/v + \setupcurrentparallel[#settings]% + \ctxcommand{placeparallel("\currentparallel","#instance",{ + % setups = "\parallelparameter\c!setups", + start = "\parallelparameter\c!start", + n = "\parallelparameter\c!n", + criterium = "\parallelparameter\c!criterium", + })}% \endgroup} \def\doflushparallel#instance#status#line#label#content% called at lua end @@ -88,7 +114,7 @@ \def\currentparallelnumber {#status}% \def\currentparallelline {#line}% \def\currentparallellabel {#label}% - \def\currentparallelcontent {#content}% + \def\currentparallelcontent {#content}% can be kep at the lua end .. no need to use a virtual file \ifcase#status\relax \expandafter\buff_parallel_flush_nop \or @@ -100,7 +126,7 @@ {} \def\buff_parallel_flush_yes - {\directsetup{\namedparallelparameter\currentparallelinstance\c!setups}} + {\directsetup{\namedparallelparameter{\currentparallel:\currentparallelinstance}\c!setups}} \unexpanded\def\doifelseparallel#name#instance% {\ctxcommand{doifelseparallel("#name","#instance")}} @@ -113,9 +139,9 @@ \startsetups parallel:place:default \hangafter\plusone - \hangindent4em + \hangindent4\emwidth \dontleavehmode - \hbox to 3em \bgroup + \hbox to 3\emwidth \bgroup \hss \bf \doifsomething \currentparallellabel { diff --git a/tex/context/base/buff-ver.mkiv b/tex/context/base/buff-ver.mkiv index 707c235ff..019e645c4 100644 --- a/tex/context/base/buff-ver.mkiv +++ b/tex/context/base/buff-ver.mkiv @@ -82,10 +82,10 @@ {\spaceskip.5\emwidth\relax \let\obeyedspace\specialobeyedspace \let\controlspace\specialcontrolspace - % \edef\p_buff_lines{\typeparameter\c!lines}% - % \ifcsname\??typinglines\p_buff_lines\endcsname - % \csname\??typinglines\p_buff_lines\endcsname - % \fi + \edef\p_buff_lines{\typeparameter\c!lines}% + \ifcsname\??typinglines\p_buff_lines\endcsname + \csname\??typinglines\p_buff_lines\endcsname + \fi \edef\p_buff_space{\typeparameter\c!space}% \ifcsname\??typingspace\p_buff_space\endcsname \csname\??typingspace\p_buff_space\endcsname diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv index 7b474d3f2..7392a750e 100644 --- a/tex/context/base/cont-new.mkiv +++ b/tex/context/base/cont-new.mkiv @@ -11,7 +11,7 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -\newcontextversion{2014.11.17 14:32} +\newcontextversion{2014.11.26 12:50} %D This file is loaded at runtime, thereby providing an excellent place for %D hacks, patches, extensions and new features. diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf Binary files differindex 32eb18732..fbc02fe6b 100644 --- a/tex/context/base/context-version.pdf +++ b/tex/context/base/context-version.pdf diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv index 5125f8011..8229af432 100644 --- a/tex/context/base/context.mkiv +++ b/tex/context/base/context.mkiv @@ -28,7 +28,7 @@ %D up and the dependencies are more consistent. \edef\contextformat {\jobname} -\edef\contextversion{2014.11.17 14:32} +\edef\contextversion{2014.11.26 12:50} \edef\contextkind {beta} %D For those who want to use this: diff --git a/tex/context/base/core-env.lua b/tex/context/base/core-env.lua index 2cc84299b..94f237c2e 100644 --- a/tex/context/base/core-env.lua +++ b/tex/context/base/core-env.lua @@ -45,8 +45,8 @@ setmetatableindex(tex.modes, function(t,k) if csname_id(n) == undefined then return false else - modes[k] = function() return texgetcount(n) >= 1 end - return texgetcount(n) >= 1 + modes[k] = function() return texgetcount(n) == 1 end + return texgetcount(n) == 1 -- 2 is prevented end end end) @@ -60,8 +60,8 @@ setmetatableindex(tex.systemmodes, function(t,k) if csname_id(n) == undefined then return false else - systemmodes[k] = function() return texgetcount(n) >= 1 end - return texgetcount(n) >= 1 + systemmodes[k] = function() return texgetcount(n) == 1 end + return texgetcount(n) == 1 -- 2 is prevented end end end) diff --git a/tex/context/base/core-env.mkiv b/tex/context/base/core-env.mkiv index 47f29ed73..6f9343587 100644 --- a/tex/context/base/core-env.mkiv +++ b/tex/context/base/core-env.mkiv @@ -46,8 +46,6 @@ \installcorenamespace{modestack} -% todo: check prevent mode, also at the lua end - \setnewconstant\disabledmode \zerocount \setnewconstant\enabledmode \plusone \setnewconstant\preventedmode\plustwo @@ -163,7 +161,17 @@ % handy for mp \def\booleanmodevalue#1% - {\ifcsname\??mode#1\endcsname\ifcase\csname\??mode#1\endcsname\s!false\else\s!true\fi\else\s!false\fi} + {\ifcsname\??mode#1\endcsname + \ifcase\csname\??mode#1\endcsname + \s!false + \or + \s!true + \else + \s!false + \fi + \else + \s!false + \fi} % check macros @@ -213,8 +221,10 @@ \def\syst_modes_check_yes#1#2#3% {\ifcase\csname\??mode#3\endcsname \expandafter#2% - \else + \or \expandafter#1% + \else + \expandafter#2% \fi} \def\syst_modes_check#1#2#3% @@ -230,6 +240,10 @@ {\ifcsname\??mode#1\endcsname \ifcase\csname\??mode#1\endcsname \let\syst_modes_check_all_step\gobbleoneargument + \or + % enabled + \else + \let\syst_modes_check_all_step\gobbleoneargument \fi \else \let\syst_modes_check_all_step\gobbleoneargument diff --git a/tex/context/base/core-sys.mkiv b/tex/context/base/core-sys.mkiv index c3cc2a231..bd73ba08c 100644 --- a/tex/context/base/core-sys.mkiv +++ b/tex/context/base/core-sys.mkiv @@ -242,8 +242,14 @@ \installcommandhandler \??highlight {highlight} \??highlight % we could do with less +\setuphighlight + [\c!command=\v!yes] + \appendtoks - \setuevalue\currenthighlight{\typo_highlights_indeed{\currenthighlight}}% + \edef\p_command{\highlightparameter\c!command}% + \ifx\p_command\v!yes + \setuevalue\currenthighlight{\typo_highlights_indeed{\currenthighlight}}% + \fi \to \everydefinehighlight \ifdefined\dotaghighlight \else \let\dotaghighlight\relax \fi diff --git a/tex/context/base/font-ctx.lua b/tex/context/base/font-ctx.lua index 2a12a807a..f764edb6d 100644 --- a/tex/context/base/font-ctx.lua +++ b/tex/context/base/font-ctx.lua @@ -2091,11 +2091,11 @@ end) directives.register("nodes.injections.fontkern", function(v) setfield(kern,"subtype",v and 0 or 1) end) --- here +-- here (todo: closure) local trace_analyzing = false trackers.register("otf.analyzing", function(v) trace_analyzing = v end) -local otffeatures = constructors.newfeatures("otf") +----- otffeatures = constructors.newfeatures("otf") local registerotffeature = otffeatures.register local analyzers = fonts.analyzers @@ -2198,3 +2198,26 @@ function commands.purefontname(name) context(file.basename(name)) end end + +local list = storage.shared.bodyfontsizes or { } +storage.shared.bodyfontsizes = list + +function commands.registerbodyfontsize(size) + list[size] = true +end + +function commands.getbodyfontsizes(separator) + context(concat(sortedkeys(list),separator)) +end + +function commands.processbodyfontsizes(command) + local keys = sortedkeys(list) + if command then + local action = context[command] + for i=1,#keys do + action(keys[i]) + end + else + context(concat(keys,",")) + end +end diff --git a/tex/context/base/font-ini.mkvi b/tex/context/base/font-ini.mkvi index f174b132b..bdb8f9a11 100644 --- a/tex/context/base/font-ini.mkvi +++ b/tex/context/base/font-ini.mkvi @@ -1158,7 +1158,9 @@ \installcorenamespace{fontenvironmentknown} -\let\bodyfontenvironmentlist\empty % used in font-run (might change) +% \let\bodyfontenvironmentlist\empty % used in font-run (might change) + +\newtoks\bodyfontenvironmentlist \def\font_helpers_register_environment#class#body% {\expandafter\let\csname\??fontenvironmentknown#class#body\endcsname\empty} @@ -1179,6 +1181,12 @@ %D a bodyfont is loaded but changing them afterwards can be sort of tricky as %D values are not consulted afterwards. +\def\processbodyfontenvironmentlist#1% no \unexpanded as then we cannot use it in alignments + {\ctxcommand{processbodyfontsizes("\strippedcsname#1")}} + +\def\bodyfontenvironmentlist + {\ctxcommand{getbodyfontsizes()}} + \def\font_basics_define_body_font_environment_class[#class][#body][#settings]% {\edef\m_font_body{#body}% \ifx\m_font_body\s!default @@ -1187,7 +1195,8 @@ \else \normalizebodyfontsize\m_font_body_normalized\m_font_body \font_basics_define_body_font_environment_size[#class][\m_font_body_normalized][#settings]% - \addtocommalist\m_font_body_normalized\bodyfontenvironmentlist + %\addtocommalist\m_font_body_normalized\bodyfontenvironmentlist + \ctxcommand{registerbodyfontsize("\m_font_body_normalized")}% \fi} %D The empty case uses the same code but needs to ignore the current class diff --git a/tex/context/base/font-run.mkiv b/tex/context/base/font-run.mkiv index cb40adedd..1b8843b94 100644 --- a/tex/context/base/font-run.mkiv +++ b/tex/context/base/font-run.mkiv @@ -12,8 +12,8 @@ %C therefore copyrighted by \PRAGMA. See mreadme.pdf for %C details. -%D [This code is hooked into the core macros and saves some -%D format space. It needs a cleanup.] +%D [This code is hooked into the core macros and saves some format +%D space. It needs a cleanup.] \unprotect @@ -100,8 +100,8 @@ &&\tttf\tx\s!text&&\tttf\tx\s!script&&\tttf\tx\s!scriptscript &&\tttf\tx\s!x&&\tttf\tx\s!xx&&\tttf\tx\v!small&&\tttf\tx\v!big &&\tttf\tx\c!interlinespace&\cr - \noalign{\hrule} - \@EA\globalprocesscommalist\@EA[\bodyfontenvironmentlist]\next}} + \noalign{\hrule}% + \processbodyfontenvironmentlist\next}} \ifinsidefloat\else\stopbaselinecorrection\fi} \unexpanded\gdef\showfont diff --git a/tex/context/base/grph-inc.lua b/tex/context/base/grph-inc.lua index ea922b28c..80d878019 100644 --- a/tex/context/base/grph-inc.lua +++ b/tex/context/base/grph-inc.lua @@ -247,12 +247,14 @@ figures.order = figures_order -- frozen -- name checker -local pattern = (R("az","AZ") * P(":"))^-1 * ( -- a-z : | A-Z : - (R("az","09") + S("_/") - P("_")^2)^1 * P(".") * R("az")^1 + -- a-z | single _ | / - (R("az","09") + S("-/") - P("-")^2)^1 * P(".") * R("az")^1 + -- a-z | single - | / - (R("AZ","09") + S("_/") - P("_")^2)^1 * P(".") * R("AZ")^1 + -- A-Z | single _ | / - (R("AZ","09") + S("-/") - P("-")^2)^1 * P(".") * R("AZ")^1 -- A-Z | single - | / -) * P(-1) * Cc(false) + Cc(true) +local okay = P("m_k_i_v_") + +local pattern = (R("az","AZ") * P(":"))^-1 * ( -- a-z : | A-Z : + (okay + R("az","09") + S("_/") - P("_")^2)^1 * (P(".") * R("az")^1)^0 * P(-1) + -- a-z | single _ | / + (okay + R("az","09") + S("-/") - P("-")^2)^1 * (P(".") * R("az")^1)^0 * P(-1) + -- a-z | single - | / + (okay + R("AZ","09") + S("_/") - P("_")^2)^1 * (P(".") * R("AZ")^1)^0 * P(-1) + -- A-Z | single _ | / + (okay + R("AZ","09") + S("-/") - P("-")^2)^1 * (P(".") * R("AZ")^1)^0 * P(-1) -- A-Z | single - | / +) * Cc(false) + Cc(true) function figures.badname(name) if not name then diff --git a/tex/context/base/lang-hyp.lua b/tex/context/base/lang-hyp.lua index 205baccce..31ec8946c 100644 --- a/tex/context/base/lang-hyp.lua +++ b/tex/context/base/lang-hyp.lua @@ -6,6 +6,9 @@ if not modules then modules = { } end modules ['lang-hyp'] = { license = "see context related readme files" } +-- to be considered: reset dictionary.hyphenated when a pattern is added +-- or maybe an explicit reset of the cache + -- In an automated workflow hypenation of long titles can be somewhat problematic -- especially when demands conflict. For that reason I played a bit with a Lua based -- variant of the traditional hyphenation machinery. This mechanism has been extended @@ -36,16 +39,21 @@ if not modules then modules = { } end modules ['lang-hyp'] = { -- a s-s z o n-n y a l/sz=sz,2,3,ny=ny,6,3 -- -- ab1cd/ef=gh,2,2 : acd - efd (pattern/replacement,start,length +-- +-- In the procecess of wrapping up (for the ctx conference proceedings) I cleaned up +-- and extended the code a bit. -local type, rawset, tonumber = type, rawset, tonumber +local type, rawset, tonumber, next = type, rawset, tonumber, next local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs local lpegmatch = lpeg.match -local concat = table.concat - -local utfchar = utf.char -local utfbyte = utf.byte +local concat = table.concat +local insert = table.insert +local remove = table.remove +local formatters = string.formatters +local utfchar = utf.char +local utfbyte = utf.byte if not characters then require("char-ini") @@ -53,7 +61,14 @@ end local setmetatableindex = table.setmetatableindex -local languages = languages or { } +-- \enabletrackers[hyphenator.steps=silent] will not write to the terminal + +local trace_steps = false trackers.register("hyphenator.steps", function(v) trace_steps = v end) +local trace_visualize = false trackers.register("hyphenator.visualize",function(v) trace_visualize = v end) + +local report = logs.reporter("hyphenator") + +languages = languages or { } local hyphenators = languages.hyphenators or { } languages.hyphenators = hyphenators local traditional = hyphenators.traditional or { } @@ -64,28 +79,74 @@ local dictionaries = setmetatableindex(function(t,k) patterns = { }, hyphenated = { }, specials = { }, + exceptions = { }, + loaded = false, } t[k] = v return v end) +hyphenators.dictionaries = dictionaries + +local character = lpeg.patterns.utf8character local digit = R("09") -local character = lpeg.patterns.utf8character - P("/") -local splitpattern_k = Cs((digit/"" + character)^1) -local splitpattern_v = Ct(((digit/tonumber + Cc(0)) * character)^1 * (digit/tonumber)^0) -local splitpattern_v = - Ct(((digit/tonumber + Cc(0)) * character)^1 * (digit/tonumber)^0) * - (P("/") * Cf ( Ct("") * - Cg ( Cc("before") * C((1-lpeg.P("="))^1) * P("=") ) - * Cg ( Cc("after") * C((1-lpeg.P(","))^1) * P(",") ) - * Cg ( Cc("start") * ((1-lpeg.P(","))^1/tonumber) * P(",") ) - * Cg ( Cc("length") * ((1-lpeg.P(-1) )^1/tonumber) ) +local weight = digit/tonumber + Cc(0) +local fence = P(".") +local hyphen = P("-") +local space = P(" ") +local char = character - space +local validcharacter = (character - S("./")) +local keycharacter = character - S("/") +----- basepart = Ct( (Cc(0) * fence)^-1 * (weight * validcharacter)^1 * weight * (fence * Cc(0))^-1) +local specpart = (P("/") * Cf ( Ct("") * + Cg ( Cc("before") * C((1-P("="))^1) * P("=") ) * + Cg ( Cc("after") * C((1-P(","))^1) ) * + ( P(",") * + Cg ( Cc("start") * ((1-P(","))^1/tonumber) * P(",") ) * + Cg ( Cc("length") * ((1-P(-1) )^1/tonumber) ) + )^-1 , rawset))^-1 -local function register(patterns,specials,str,specification) - local k = lpegmatch(splitpattern_k,str) - local v1, v2 = lpegmatch(splitpattern_v,str) - patterns[k] = v1 +local make_hashkey_p = Cs((digit/"" + keycharacter)^1) +----- make_pattern_p = basepart * specpart +local make_hashkey_e = Cs((hyphen/"" + keycharacter)^1) +local make_pattern_e = Ct(P(char) * (hyphen * Cc(true) * P(char) + P(char) * Cc(false))^1) -- catch . and char after - + +-- local make_hashkey_c = Cs((digit + keycharacter/"")^1) +-- local make_pattern_c = Ct((P(1)/tonumber)^1) + +-- local cache = setmetatableindex(function(t,k) +-- local n = lpegmatch(make_hashkey_c,k) +-- local v = lpegmatch(make_pattern_c,n) +-- t[k] = v +-- return v +-- end) +-- +-- local weight_n = digit + Cc("0") +-- local basepart_n = Cs( (Cc("0") * fence)^-1 * (weight * validcharacter)^1 * weight * (fence * Cc("0"))^-1) / cache +-- local make_pattern_n = basepart_n * specpart + +local make_pattern_c = Ct((P(1)/tonumber)^1) + +-- us + nl: 17664 entries -> 827 unique (saves some 3M) + +local cache = setmetatableindex(function(t,k) + local v = lpegmatch(make_pattern_c,k) + t[k] = v + return v +end) + +local weight_n = digit + Cc("0") +local fence_n = fence / "0" +local char_n = validcharacter / "" +local basepart_n = Cs(fence_n^-1 * (weight_n * char_n)^1 * weight_n * fence_n^-1) / cache +local make_pattern_n = basepart_n * specpart + +local function register_pattern(patterns,specials,str,specification) + local k = lpegmatch(make_hashkey_p,str) + -- local v1, v2 = lpegmatch(make_pattern_p,str) + local v1, v2 = lpegmatch(make_pattern_n,str) + patterns[k] = v1 -- is this key still ok for complex patterns if specification then specials[k] = specification elseif v2 then @@ -93,17 +154,50 @@ local function register(patterns,specials,str,specification) end end -local word = ((Carg(1) * Carg(2) * C((1 - P(" "))^1)) / register + 1)^1 -local split = Ct(C(character)^1) +local function unregister_pattern(patterns,specials,str) + local k = lpegmatch(make_hashkey_p,str) + patterns[k] = nil + specials[k] = nil +end + +local function register_exception(exceptions,str,specification) + local k = lpegmatch(make_hashkey_e,str) + local v = lpegmatch(make_pattern_e,str) + exceptions[k] = v +end + +local p_pattern = ((Carg(1) * Carg(2) * C(char^1)) / register_pattern + 1)^1 +local p_exception = ((Carg(1) * C(char^1)) / register_exception + 1)^1 +local p_split = Ct(C(character)^1) function traditional.loadpatterns(language,filename) - local specification = require(filename) local dictionary = dictionaries[language] - if specification then - local patterns = specification.patterns - if patterns then - lpegmatch(word,patterns.data,1,dictionary.patterns,dictionary.specials) + if not dictionary.loaded then + if not filename or filename == "" then + filename = "lang-" .. language end + filename = file.addsuffix(filename,"lua") + local fullname = resolvers.findfile(filename) + if fullname and fullname ~= "" then + local specification = dofile(fullname) + if specification then + local patterns = specification.patterns + if patterns then + local data = patterns.data + if data and data ~= "" then + lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials) + end + end + local exceptions = specification.exceptions + if exceptions then + local data = exceptions.data + if data and data ~= "" then + lpegmatch(p_exception,data,1,dictionary.exceptions) + end + end + end + end + dictionary.loaded = true end return dictionary end @@ -113,30 +207,153 @@ local uccodes = characters.uccodes local nofwords = 0 local nofhashed = 0 -local function hyphenate(dictionary,word) +local steps = nil +local f_show = formatters["%w%s"] + +local function show_log() + if trace_steps == true then + report() + local w = #steps[1][1] + for i=1,#steps do + local s = steps[i] + report("%s%w%S %S",s[1],w - #s[1] + 3,s[2],s[3]) + end + report() + end +end + +local function show_1(wsplit) + local u = concat(wsplit," ") + steps = { { f_show(0,u), f_show(0,u) } } +end + +local function show_2(c,m,wsplit,done,i,spec) + local s = lpegmatch(p_split,c) + local t = { } + local n = #m + local w = #wsplit + for j=1,n do + t[#t+1] = m[j] + t[#t+1] = s[j] + end + local m = 2*i-2 + local l = #t + local s = spec and table.sequenced(spec) or "" + if m == 0 then + steps[#steps+1] = { f_show(m, concat(t,"",2)), f_show(1,concat(done," ",2,#done),s) } + elseif i+1 == w then + steps[#steps+1] = { f_show(m-1,concat(t,"",1,#t-1)), f_show(1,concat(done," ",2,#done),s) } + else + steps[#steps+1] = { f_show(m-1,concat(t)), f_show(1,concat(done," ",2,#done),s) } + end +end + +local function show_3(wsplit,done) + local t = { } + local h = { } + local n = #wsplit + for i=1,n do + local w = wsplit[i] + if i > 1 then + local d = done[i] + t[#t+1] = i > 2 and d % 2 == 1 and "-" or " " + h[#h+1] = d + end + t[#t+1] = w + h[#h+1] = w + end + steps[#steps+1] = { f_show(0,concat(h)), f_show(0,concat(t)) } + show_log() +end + +local function show_4(wsplit,done) + steps = { { concat(wsplit," ") } } + show_log() +end + +function traditional.lasttrace() + return steps +end + +-- We could reuse the w table but as we cache the resolved words +-- there is not much gain in that complication. +-- +-- Beware: word can be a table and when n is passed to we can +-- assume reuse so we need to honor that n then. + +-- todo: a fast variant for tex ... less lookups (we could check is +-- dictionary has changed) ... although due to caching the already +-- done words, we don't do much here + +local function hyphenate(dictionary,word,n) -- odd is okay nofwords = nofwords + 1 local hyphenated = dictionary.hyphenated - local isstring = type(word) == "string" - local done + local isstring = type(word) == "string" if isstring then - done = hyphenated[word] + local done = hyphenated[word] + if done ~= nil then + return done + end + elseif n then + local done = hyphenated[concat(word,"",1,n)] + if done ~= nil then + return done + end else - done = hyphenated[concat(word)] + local done = hyphenated[concat(word)] + if done ~= nil then + return done + end end + local key + if isstring then + key = word + word = lpegmatch(p_split,word) + if not n then + n = #word + end + else + if not n then + n = #word + end + key = concat(word,"",1,n) + end + local l = 1 + local w = { "." } + for i=1,n do + local c = word[i] + l = l + 1 + w[l] = lcchars[c] or c + end + l = l + 1 + w[l] = "." + local c = concat(w,"",2,l-1) + -- + local done = hyphenated[c] if done ~= nil then + hyphenated[key] = done + nofhashed = nofhashed + 1 return done - else - done = false end - local specials = dictionary.specials - local patterns = dictionary.patterns - local s = isstring and lpegmatch(split,word) or word - local l = #s - local w = { } - for i=1,l do - local si = s[i] - w[i] = lcchars[si] or si + -- + local exceptions = dictionary.exceptions + local exception = exceptions[c] + if exception then + if trace_steps then + show_4(w,exception) + end + hyphenated[key] = exception + nofhashed = nofhashed + 1 + return exception end + -- + if trace_steps then + show_1(w) + end + -- + local specials = dictionary.specials + local patterns = dictionary.patterns + -- local spec for i=1,l do for j=i,l do @@ -146,15 +363,23 @@ local function hyphenate(dictionary,word) local s = specials[c] if not done then done = { } - spec = { } + spec = nil + -- the string that we resolve has explicit fences (.) so + -- done starts at the first fence and runs upto the last + -- one so we need one slot less for i=1,l do done[i] = 0 end end + -- we run over the pattern that always has a (zero) value for + -- each character plus one more as we look at both sides for k=1,#m do local new = m[k] if not new then break + elseif new == true then + report("fatal error") + break elseif new > 0 then local pos = i + k - 1 local old = done[pos] @@ -163,136 +388,238 @@ local function hyphenate(dictionary,word) elseif new > old then done[pos] = new if s then - local b = i + s.start - 1 - local e = b + s.length - 1 - if pos >= b and pos <= e then - spec[pos] = s + local b = i + (s.start or 1) - 1 + if b > 0 then + local e = b + (s.length or 2) - 1 + if e > 0 then + if pos >= b and pos <= e then + if spec then + spec[pos] = { s, k - 1 } + else + spec = { [pos] = { s, k - 1 } } + end + end + end end end end end end + if trace_steps and done then + show_2(c,m,w,done,i,s) + end end end end + if trace_steps and done then + show_3(w,done) + end if done then local okay = false - for i=1,#done do + for i=3,#done do if done[i] % 2 == 1 then - done[i] = spec[i] or true + done[i-2] = spec and spec[i] or true okay = true else - done[i] = false + done[i-2] = false end end - if not okay then + if okay then + done[#done] = nil + done[#done] = nil + else done = false end + else + done = false end - hyphenated[isstring and word or concat(word)] = done + hyphenated[key] = done nofhashed = nofhashed + 1 return done end -local f_detail_1 = string.formatters["{%s}{%s}{}"] -local f_detail_2 = string.formatters["{%s%s}{%s%s}{%s}"] +function traditional.gettrace(language,word) + local dictionary = dictionaries[language] + if dictionary then + local hyphenated = dictionary.hyphenated + hyphenated[word] = nil + hyphenate(dictionary,word) + return steps + end +end + +local methods = setmetatableindex(function(t,k) local v = hyphenate t[k] = v return v end) + +function traditional.installmethod(name,f) + if rawget(methods,name) then + report("overloading %a is not permitted",name) + else + methods[name] = f + end +end + +local s_detail_1 = "-" +local f_detail_2 = formatters["%s-%s"] +local f_detail_3 = formatters["{%s}{%s}{}"] +local f_detail_4 = formatters["{%s%s}{%s%s}{%s}"] function traditional.injecthyphens(dictionary,word,specification) - local h = hyphenate(dictionary,word) - if not h then + if not word then + return false + end + if not specification then + return word + end + local hyphens = hyphenate(dictionary,word) + if not hyphens then return word end - local w = lpegmatch(split,word) - local r = { } - local l = #h - local n = 0 - local i = 1 - local leftmin = specification.lefthyphenmin or 2 - local rightmin = l - (specification.righthyphenmin or left) + 1 - local leftchar = specification.lefthyphenchar - local rightchar = specification.righthyphenchar - while i <= l do - if i > leftmin and i < rightmin then - local hi = h[i] - if not hi then - n = n + 1 - r[n] = w[i] - i = i + 1 - elseif hi == true then - n = n + 1 - r[n] = f_detail_1(rightchar,leftchar) - n = n + 1 - r[n] = w[i] - i = i + 1 + + -- the following code is similar to code later on but here we have + -- strings while there we have hyphen specs + + local word = lpegmatch(p_split,word) + local size = #word + + local leftmin = specification.leftcharmin or 2 + local rightmin = size - (specification.rightcharmin or leftmin) + local leftchar = specification.leftchar + local rightchar = specification.rightchar + + local result = { } + local rsize = 0 + local position = 1 + + while position <= size do + if position >= leftmin and position <= rightmin then + local hyphen = hyphens[position] + if not hyphen then + rsize = rsize + 1 + result[rsize] = word[position] + position = position + 1 + elseif hyphen == true then + rsize = rsize + 1 + result[rsize] = word[position] + rsize = rsize + 1 + if leftchar and rightchar then + result[rsize] = f_detail_3(rightchar,leftchar) + else + result[rsize] = s_detail_1 + end + position = position + 1 else - local b = i - hi.start - local e = b + hi.length - 1 - n = b - r[n] = f_detail_2(hi.before,rightchar,leftchar,hi.after,concat(w,"",b,e)) - if e + 1 == i then - i = i + 1 + local o, h = hyphen[2] + if o then + h = hyphen[1] + else + h = hyphen + o = 1 + end + local b = position - o + (h.start or 1) + local e = b + (h.length or 2) - 1 + if b > 0 and e >= b then + for i=1,b-position do + rsize = rsize + 1 + result[rsize] = word[position] + position = position + 1 + end + rsize = rsize + 1 + if leftchar and rightchar then + result[rsize] = f_detail_4(h.before,rightchar,leftchar,h.after,concat(word,"",b,e)) + else + result[rsize] = f_detail_2(h.before,h.after) + end + position = e + 1 else - i = e + 1 + -- error + rsize = rsize + 1 + result[rsize] = word[position] + position = position + 1 end end else - n = n + 1 - r[n] = w[i] - i = i + 1 + rsize = rsize + 1 + result[rsize] = word[position] + position = position + 1 end end - return concat(r) + return concat(result) end function traditional.registerpattern(language,str,specification) local dictionary = dictionaries[language] - register(dictionary.patterns,dictionary.specials,str,specification) + if specification == false then + unregister_pattern(dictionary.patterns,dictionary.specials,str) + else + register_pattern(dictionary.patterns,dictionary.specials,str,specification) + end end -- todo: unicodes or utfhash ? if context then - local nodecodes = nodes.nodecodes - local glyph_code = nodecodes.glyph - local math_code = nodecodes.math - - local nuts = nodes.nuts - local tonut = nodes.tonut - local nodepool = nuts.pool - - local new_disc = nodepool.disc - - local setfield = nuts.setfield - local getfield = nuts.getfield - local getchar = nuts.getchar - local getid = nuts.getid - local getnext = nuts.getnext - local getprev = nuts.getprev - local insert_before = nuts.insert_before - local insert_after = nuts.insert_after - local copy_node = nuts.copy - local remove_node = nuts.remove - local end_of_math = nuts.end_of_math - local node_tail = nuts.tail + local nodecodes = nodes.nodecodes + local glyph_code = nodecodes.glyph + local math_code = nodecodes.math + + local nuts = nodes.nuts + local tonut = nodes.tonut + local nodepool = nuts.pool + + local new_disc = nodepool.disc + + local setfield = nuts.setfield + local getfield = nuts.getfield + local getchar = nuts.getchar + local getid = nuts.getid + local getattr = nuts.getattr + local getnext = nuts.getnext + local getprev = nuts.getprev + local insert_before = nuts.insert_before + local insert_after = nuts.insert_after + local copy_node = nuts.copy + local remove_node = nuts.remove + local end_of_math = nuts.end_of_math + local node_tail = nuts.tail + + local setcolor = nodes.tracers.colors.set + + local variables = interfaces.variables + local v_reset = variables.reset + local v_yes = variables.yes + local v_all = variables.all + + local settings_to_array = utilities.parsers.settings_to_array + + local unsetvalue = attributes.unsetvalue + local texsetattribute = tex.setattribute + + local prehyphenchar = lang.prehyphenchar + local posthyphenchar = lang.posthyphenchar + + local lccodes = characters.lccodes + + local a_hyphenation = attributes.private("hyphenation") function traditional.loadpatterns(language) return dictionaries[language] end - statistics.register("hyphenation",function() - if nofwords > 0 then - return string.format("%s words hyphenated, %s unique",nofwords,nofhashed) + setmetatableindex(dictionaries,function(t,k) -- for the moment we use an independent data structure + if type(k) == "string" then + -- this will force a load if not yet loaded (we need a nicer way) + -- for the moment that will do (nneeded for examples that register + -- a pattern specification + languages.getnumber(k) end - end) - - setmetatableindex(dictionaries,function(t,k) -- we use an independent data structure local specification = languages.getdata(k) - local dictionary = { + local dictionary = { patterns = { }, + exceptions = { }, hyphenated = { }, specials = { }, - instance = 0, + instance = false, characters = { }, unicodes = { }, } @@ -304,21 +631,22 @@ if context then local data = patterns.data if data then -- regular patterns - lpegmatch(word,data,1,dictionary.patterns,dictionary.specials) + lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials) end local extra = patterns.extra if extra then -- special patterns - lpegmatch(word,extra,1,dictionary.patterns,dictionary.specials) + lpegmatch(p_pattern,extra,1,dictionary.patterns,dictionary.specials) + end + end + local exceptions = resources.exceptions + if exceptions then + local data = exceptions.data + if data and data ~= "" then + lpegmatch(p_exception,data,1,dictionary.exceptions) end end - local permitted = patterns.characters --- local additional = "[]()" --- local additional = specification.additional --- if additional then --- permitted = permitted .. additional -- has to be attribute driven --- end - local usedchars = lpegmatch(split,permitted) + local usedchars = lpegmatch(p_split,patterns.characters) local characters = { } local unicodes = { } for i=1,#usedchars do @@ -327,12 +655,18 @@ if context then local upper = uccodes[code] characters[char] = code unicodes [code] = char - unicodes [upper] = utfchar(upper) + if type(upper) == "table" then + for i=1,#upper do + local u = upper[i] + unicodes[u] = utfchar(u) + end + else + unicodes[upper] = utfchar(upper) + end end dictionary.characters = characters dictionary.unicodes = unicodes - setmetatableindex(characters,function(t,k) local v = utfbyte(k) t[k] = v return v end) -- can be non standard - -- setmetatableindex(unicodes, function(t,k) local v = utfchar(k) t[k] = v return v end) + setmetatableindex(characters,function(t,k) local v = k and utfbyte(k) t[k] = v return v end) end t[specification.number] = dictionary dictionary.instance = specification.instance -- needed for hyphenchars @@ -341,268 +675,554 @@ if context then return dictionary end) - local function flush(head,start,stop,dictionary,w,h,lefthyphenchar,righthyphenchar,characters,lefthyphenmin,righthyphenmin) - local r = { } - local l = #h - local n = 0 - local i = 1 - local left = lefthyphenmin - local right = l - righthyphenmin + 1 - while i <= l do - if i > left and i < right then - local hi = h[i] - if not hi then - n = n + 1 - r[n] = w[i] - i = i + 1 - elseif hi == true then - n = n + 1 - r[n] = true - n = n + 1 - r[n] = w[i] - i = i + 1 - else - local b = i - hi.start -- + 1 - 1 - local e = b + hi.length - 1 - n = b - r[n] = { hi.before, hi.after, concat(w,"",b,e) } - i = e + 1 + -- Beware: left and right min doesn't mean that in a 1 mmm hsize there can be snippets + -- with less characters than either of them! This could be an option but such a narrow + -- hsize doesn't make sense anyway. + + -- We assume that featuresets are defined global ... local definitions + -- (also mid paragraph) make not much sense anyway. For the moment we + -- assume no predefined sets so we don't need to store them. Nor do we + -- need to hash them in order to save space ... no sane user will define + -- many of them. + + local featuresets = hyphenators.featuresets or { } + hyphenators.featuresets = featuresets + + storage.shared.noflanguagesfeaturesets = storage.shared.noflanguagesfeaturesets or 0 + + local noffeaturesets = storage.shared.noflanguagesfeaturesets + + storage.register("languages/hyphenators/featuresets",featuresets,"languages.hyphenators.featuresets") + + ----- hash = table.sequenced(featureset,",") -- no need now + + local function register(name,featureset) + noffeaturesets = noffeaturesets + 1 + featureset.attribute = noffeaturesets + featuresets[noffeaturesets] = featureset -- access by attribute + featuresets[name] = featureset -- access by name + storage.shared.noflanguagesfeaturesets = noffeaturesets + return noffeaturesets + end + + local function makeset(...) -- a bit overkill, supporting variants but who cares + local set = { } + for i=1,select("#",...) do + local list = select(i,...) + local kind = type(list) + local used = nil + if kind == "string" then + if list == v_all then + -- not ok ... now all get ignored + return setmetatableindex(function(t,k) local v = utfchar(k) t[k] = v return v end) + elseif list ~= "" then + used = lpegmatch(p_split,list) + set = set or { } + for i=1,#used do + local char = used[i] + set[utfbyte(char)] = char + end + end + elseif kind == "table" then + if next(list) then + set = set or { } + for byte, char in next, list do + set[byte] = char == true and utfchar(byte) or char + end + elseif #list > 0 then + set = set or { } + for i=1,#list do + local l = list[i] + if type(l) == "number" then + set[l] = utfchar(l) + else + set[utfbyte(l)] = l + end + end end - else - n = n + 1 - r[n] = w[i] - i = i + 1 end end + return set + end - local function serialize(s,lefthyphenchar,righthyphenchar) - if not s then - return - elseif s == true then - local n = copy_node(stop) - setfield(n,"char",lefthyphenchar or righthyphenchar) - return n - end - local h = nil - local c = nil - if lefthyphenchar then - h = copy_node(stop) - setfield(h,"char",lefthyphenchar) - c = h - end - if #s == 1 then - local n = copy_node(stop) - setfield(n,"char",characters[s]) - if not h then - h = n - else - insert_after(c,c,n) - end - c = n - else - local t = lpegmatch(split,s) + local defaulthyphens = { + [0x2D] = true, -- hyphen + [0xAD] = true, -- soft hyphen + } + + local defaultjoiners = { + [0x200C] = true, -- nzwj + [0x200D] = true, -- zwj + } + + local function definefeatures(name,featureset) + local extrachars = featureset.characters -- "[]()" + local hyphenchars = featureset.hyphens + local joinerchars = featureset.joiners + local alternative = featureset.alternative + local rightwordmin = tonumber(featureset.rightwordmin) + local leftcharmin = tonumber(featureset.leftcharmin) + local rightcharmin = tonumber(featureset.rightcharmin) + -- + joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars + hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars + -- not yet ok: extrachars have to be ignored so it cannot be all) + featureset.extrachars = makeset(joinerchars or "",extrachars or "") + featureset.hyphenchars = makeset(hyphenchars or "") + featureset.alternative = alternative or "hyphenate" + featureset.rightwordmin = rightwordmin and rightwordmin > 0 and rightwordmin or nil + featureset.leftcharmin = leftcharmin and leftcharmin > 0 and leftcharmin or nil + featureset.rightcharmin = rightcharmin and rightcharmin > 0 and rightcharmin or nil + -- + return register(name,featureset) + end + + traditional.definefeatures = definefeatures + commands.definehyphenationfeatures = definefeatures + + function commands.sethyphenationfeatures(n) + if not n or n == v_reset then + n = false + else + local f = featuresets[n] + if not f and type(n) == "string" then + local t = settings_to_array(n) + local s = { } for i=1,#t do - local n = copy_node(stop) - setfield(n,"char",characters[t[i]]) - if not h then - h = n - else - insert_after(c,c,n) + local ti = t[i] + local fs = featuresets[ti] + if fs then + for k, v in next, fs do + s[k] = v + end end - c = n end + n = register(n,s) + else + n = f and f.attribute end - if righthyphenchar then - local n = copy_node(stop) - insert_after(c,c,n) - setfield(n,"char",righthyphenchar) - end - return h end + texsetattribute(a_hyphenation,n or unsetvalue) + end + + commands.registerhyphenationpattern = traditional.registerpattern - -- no grow + -- This is a relative large function with local variables and local + -- functions. A previous implementation had the functions outside but + -- this is cleaner and as efficient. The test runs 100 times over + -- tufte.tex, knuth.tex, zapf.tex, ward.tex and darwin.tex in lower + -- and uppercase with a 1mm hsize. + -- + -- language=0 language>0 4 | 3 * slower + -- + -- tex 2.34 | 1.30 2.55 | 1.45 0.21 | 0.15 + -- lua 2.42 | 1.38 3.30 | 1.84 0.88 | 0.46 + -- + -- Of course we have extra overhead (virtual Lua machine) but also we + -- check attributes and support specific local options). The test puts + -- the typeset text in boxes and discards it. If we also flush the + -- runtime is 4.31|2.56 and 4.99|2.94 seconds so the relative difference + -- is (somehow) smaller. The test has 536 pages. There is a little bit + -- of extra overhead because we store the patterns in a different way. + -- + -- As usual I will look for speedups. Some 0.01 seconds could be gained + -- by sharing patterns which is not impressive but it does save some + -- 3M memory on this test. (Some optimizations already brought the 3.30 + -- seconds down to 3.14 but it all depends on aggressive caching.) - local current = start - local size = #r - for i=1,size do - local ri = r[i] - if ri == true then - local n = new_disc() - if righthyphenchar then - setfield(n,"pre",serialize(true,righthyphenchar)) + local starttiming = statistics.starttiming + local stoptiming = statistics.stoptiming + + function traditional.hyphenate(head) + + local first = tonut(head) + local tail = nil + local last = nil + local current = first + local dictionary = nil + local instance = nil + local characters = nil + local unicodes = nil + local extrachars = nil + local hyphenchars = nil + local language = nil + local start = nil + local stop = nil + local word = { } -- we reuse this table + local size = 0 + local leftchar = false + local rightchar = false -- utfbyte("-") + local leftmin = 0 + local rightmin = 0 + local leftcharmin = nil + local rightcharmin = nil + local rightwordmin = nil + local attr = nil + local lastwordlast = nil + local hyphenated = hyphenate + + -- We cannot use an 'enabled' boolean (false when no characters or extras) because we + -- can have plugins that set a characters metatable and so) ... it doesn't save much + -- anyway. Using (unicodes and unicodes[code]) and a nil table when no characters also + -- doesn't save much. So there not that much to gain for languages that don't hyphenate. + -- + -- enabled = (unicodes and (next(unicodes) or getmetatable(unicodes))) or (extrachars and next(extrachars)) + -- + -- This can be used to not add characters i.e. keep size 0 but then we need to check for + -- attributes that change it, which costs time too. Not much to gain there. + + starttiming(traditional) + + local function synchronizefeatureset(a) + local f = a and featuresets[a] + if f then + hyphenated = methods[f.alternative or "hyphenate"] + extrachars = f.extrachars + hyphenchars = f.hyphenchars + rightwordmin = f.rightwordmin + leftcharmin = f.leftcharmin + rightcharmin = f.rightcharmin + if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then + -- so we can change mid paragraph but it's kind of unpredictable then + if not tail then + tail = node_tail(first) + end + last = tail + local inword = false + while last and rightwordmin > 0 do + local id = getid(last) + if id == glyph_code then + inword = true + if trace_visualize then + setcolor(last,"darkred") + end + elseif inword then + inword = false + rightwordmin = rightwordmin - 1 + end + last = getprev(last) + end + lastwordlast = rightwordmin end - if lefthyphenchar then - setfield(n,"post",serialize(true,lefthyphenchar)) + else + hyphenated = methods.hyphenate + extrachars = false + hyphenchars = false + rightwordmin = false + leftcharmin = false + rightcharmin = false + end + return a + end + + local function flush(hyphens) -- todo: no need for result + + local rightmin = size - rightmin + local result = { } + local rsize = 0 + local position = 1 + + -- todo: remember last dics and don't go back to before that (plus + -- message) .. for simplicity we also assume that we don't start + -- with a dics node + -- + -- there can be a conflict: if we backtrack then we can end up in + -- another disc and get out of sync (dup chars and so) + + while position <= size do + if position >= leftmin and position <= rightmin then + local hyphen = hyphens[position] + if not hyphen then + rsize = rsize + 1 + result[rsize] = word[position] + position = position + 1 + elseif hyphen == true then + rsize = rsize + 1 + result[rsize] = word[position] + rsize = rsize + 1 + result[rsize] = true + position = position + 1 + else + local o, h = hyphen[2] + if o then + -- { hyphen, offset) + h = hyphen[1] + else + -- hyphen + h = hyphen + o = 1 + end + local b = position - o + (h.start or 1) + local e = b + (h.length or 2) - 1 + if b > 0 and e >= b then + for i=1,b-position do + rsize = rsize + 1 + result[rsize] = word[position] + position = position + 1 + end + rsize = rsize + 1 + result[rsize] = { + h.before or "", -- pre + h.after or "", -- post + concat(word,"",b,e), -- replace + h.right, -- optional after pre + h.left, -- optional before post + } + position = e + 1 + else + -- error + rsize = rsize + 1 + result[rsize] = word[position] + position = position + 1 + end + end + else + rsize = rsize + 1 + result[rsize] = word[position] + position = position + 1 end - insert_before(head,current,n) - elseif type(ri) == "table" then - local n = new_disc() - local pre, post, replace = ri[1], ri[2], ri[3] - if pre then - setfield(n,"pre",serialize(pre,false,righthyphenchar)) + end + + local function serialize(replacement,leftchar,rightchar) + if not replacement then + return + elseif replacement == true then + local glyph = copy_node(stop) + setfield(glyph,"char",leftchar or rightchar) + return glyph end - if post then - setfield(n,"post",serialize(post,lefthyphenchar,false)) + local head = nil + local current = nil + if leftchar then + head = copy_node(stop) + current = head + setfield(head,"char",leftchar) end - if replace then - setfield(n,"replace",serialize(replace)) + local rsize = #replacement + if rsize == 1 then + local glyph = copy_node(stop) + setfield(glyph,"char",characters[replacement]) + if head then + insert_after(current,current,glyph) + else + head = glyph + end + current = glyph + elseif rsize > 0 then + local list = lpegmatch(p_split,replacement) -- this is an utf split (could be cached) + for i=1,#list do + local glyph = copy_node(stop) + setfield(glyph,"char",characters[list[i]]) + if head then + insert_after(current,current,glyph) + else + head = glyph + end + current = glyph + end end - insert_before(head,current,n) - else - setfield(current,"char",characters[ri]) - if i < size then - current = getnext(current) + if rightchar then + local glyph = copy_node(stop) + insert_after(current,current,glyph) + setfield(glyph,"char",rightchar) end + return head end - end - if current ~= stop then - local current = getnext(current) - local last = getnext(stop) - while current ~= last do - head, current = remove_node(head,current,true) - end - end - end - -- simple cases: no special .. only inject + local current = start - local prehyphenchar = lang.prehyphenchar - local posthyphenchar = lang.posthyphenchar - - local lccodes = characters.lccodes - - -- An experimental feature: - -- - -- \setupalign[verytolerant,flushleft] - -- \setuplayout[width=140pt] \showframe - -- longword longword long word longword longwordword \par - -- \enabledirectives[hyphenators.rightwordsmin=1] - -- longword longword long word longword longwordword \par - -- \disabledirectives[hyphenators.rightwordsmin] - -- - -- An alternative is of course to pack the words in an hbox. + for i=1,rsize do + local r = result[i] + if r == true then + local disc = new_disc() + if rightchar then + setfield(disc,"pre",serialize(true,rightchar)) + end + if leftchar then + setfield(disc,"post",serialize(true,leftchar)) + end + -- could be a replace as well + insert_before(first,current,disc) + elseif type(r) == "table" then + local disc = new_disc() + local pre = r[1] + local post = r[2] + local replace = r[3] + local right = r[4] ~= false and rightchar + local left = r[5] ~= false and leftchar + if pre and pre ~= "" then + setfield(disc,"pre",serialize(pre,false,right)) + end + if post and post ~= "" then + setfield(disc,"post",serialize(post,left,false)) + end + if replace and replace ~= "" then + setfield(disc,"replace",serialize(replace)) + end + insert_before(first,current,disc) + else + setfield(current,"char",characters[r]) + if i < rsize then + current = getnext(current) + end + end + end + if current and current ~= stop then + local current = getnext(current) + local last = getnext(stop) + while current ~= last do + first, current = remove_node(first,current,true) + end + end - local rightwordsmin = 0 -- todo: parproperties (each par has a number anyway) + end - function traditional.hyphenate(head) - local first = tonut(head) - local current = first - local dictionary = nil - local instance = nil - local characters = nil - local unicodes = nil - local language = nil - local start = nil - local stop = nil - local word = nil -- maybe reuse and pass size - local size = 0 - local leftchar = false - local rightchar = false -- utfbyte("-") - local leftmin = 0 - local rightmin = 0 - local lastone = nil - - if rightwordsmin > 0 then - lastone = node_tail(first) - local inword = false - while lastone and rightwordsmin > 0 do - local id = getid(lastone) - if id == glyph_code then - inword = true - elseif inword then - inword = false - rightwordsmin = rightwordsmin - 1 + local function inject() + if first ~= current then + local disc = new_disc() + first, current, glyph = remove_node(first,current) + first, current = insert_before(first,current,disc) + if trace_visualize then + setcolor(glyph,"darkred") -- these get checked in the colorizer + setcolor(disc,"darkgreen") -- these get checked in the colorizer + end + setfield(disc,"replace",glyph) + if rightchar then + local glyph = copy_node(glyph) + setfield(glyph,"char",rightchar) + setfield(disc,"pre",glyph) + end + if leftchar then + local glyph = copy_node(glyph) + setfield(glyph,"char",rightchar) + setfield(disc,"post",glyph) end - lastone = getprev(lastone) end + return current + end + + local a = getattr(first,a_hyphenation) + if a ~= attr then + attr = synchronizefeatureset(a) end - while current ~= lastone do + -- The first attribute in a word determines the way a word gets hyphenated + -- and if relevant, other properties are also set then. We could optimize for + -- silly one-char cases but it has no priority as the code is still not that + -- much slower than the native hyphenator and this variant also provides room + -- for extensions. + + while current and current ~= last do -- and current local id = getid(current) if id == glyph_code then - -- currently no lc/uc code support local code = getchar(current) local lang = getfield(current,"lang") if lang ~= language then - if dictionary then - if leftmin + rightmin < #word then - local done = hyphenate(dictionary,word) - if done then - flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin) - end + if size > 0 and dictionary and leftmin + rightmin <= size then + local hyphens = hyphenated(dictionary,word,size) + if hyphens then + flush(hyphens) end end - language = lang - dictionary = dictionaries[language] - instance = dictionary.instance - characters = dictionary.characters - unicodes = dictionary.unicodes - leftchar = instance and posthyphenchar(instance) - rightchar = instance and prehyphenchar (instance) - leftmin = getfield(current,"left") - rightmin = getfield(current,"right") - if not leftchar or leftchar < 0 then - leftchar = false - end - if not rightchar or rightchar < 0 then - rightchar = false - end - local char = unicodes[code] - if char then - word = { char } - size = 1 - start = current + language = lang + if language > 0 then + dictionary = dictionaries[language] + -- we could postpone these + instance = dictionary.instance + characters = dictionary.characters + unicodes = dictionary.unicodes + leftchar = instance and posthyphenchar(instance) + rightchar = instance and prehyphenchar (instance) + leftmin = leftcharmin or getfield(current,"left") + rightmin = rightcharmin or getfield(current,"right") + if not leftchar or leftchar < 0 then + leftchar = false + end + if not rightchar or rightchar < 0 then + rightchar = false + end + -- + local a = getattr(current,a_hyphenation) + if a ~= attr then + attr = synchronizefeatureset(a) -- influences extrachars + end + -- + local char = unicodes[code] or (extrachars and extrachars[code]) + if char then + word[1] = char + size = 1 + start = current + end + else + size = 0 end - elseif word then - local char = unicodes[code] + elseif language <= 0 then + -- + elseif size > 0 then + local char = unicodes[code] or (extrachars and extrachars[code]) if char then size = size + 1 word[size] = char elseif dictionary then - if leftmin + rightmin < #word then - local done = hyphenate(dictionary,word) - if done then - flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin) + if leftmin + rightmin <= size then + local hyphens = hyphenated(dictionary,word,size) + if hyphens then + flush(hyphens) end end - word = nil + size = 0 + if hyphenchars and hyphenchars[code] then + current = inject() + end end else - local char = unicodes[code] + local a = getattr(current,a_hyphenation) + if a ~= attr then + attr = synchronizefeatureset(a) -- influences extrachars + end + -- + local char = unicodes[code] or (extrachars and extrachars[code]) if char then - word = { char } - size = 1 - start = current - -- leftmin = getfield(current,"left") -- can be an option - -- rightmin = getfield(current,"right") -- can be an option + word[1] = char + size = 1 + start = current end end stop = current current = getnext(current) - elseif word then - if dictionary then - if leftmin + rightmin < #word then - local done = hyphenate(dictionary,word) - current = getnext(current) - if done then - flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin) + else + current = id == math_code and getnext(end_of_math(current)) or getnext(current) + if size > 0 then + if dictionary and leftmin + rightmin <= size then + local hyphens = hyphenated(dictionary,word,size) + if hyphens then + flush(hyphens) end - else - current = getnext(current) -- hm end - else - current = getnext(current) + size = 0 end - word = nil - elseif id == math_code then - current = getnext(end_of_math(current)) - else - current = getnext(current) end end + -- we can have quit due to last so we need to flush the last seen word + if size > 0 and dictionary and leftmin + rightmin <= size then + local hyphens = hyphenated(dictionary,word,size) + if hyphens then + flush(hyphens) + end + end + + stoptiming(traditional) + return head, true end + statistics.register("hyphenation",function() + if nofwords > 0 or statistics.elapsed(traditional) > 0 then + return string.format("%s words hyphenated, %s unique, used time %s", + nofwords,nofhashed,statistics.elapsedseconds(traditional) or 0) + end + end) + local texmethod = "builders.kernel.hyphenation" local oldmethod = texmethod local newmethod = texmethod @@ -617,11 +1237,17 @@ if context then -- \enabledirectives[hyphenators.method=traditional] -- \enabledirectives[hyphenators.method=builtin] - directives.register("hyphenators.method",function(v) - if type(v) == "string" then - local valid = languages.hyphenators[v] + -- this avoids a wrapper + + -- push / pop ? check first attribute + + local replaceaction = nodes.tasks.replaceaction + + local function setmethod(method) + if type(method) == "string" then + local valid = hyphenators[method] if valid and valid.hyphenate then - newmethod = "languages.hyphenators." .. v .. ".hyphenate" + newmethod = "languages.hyphenators." .. method .. ".hyphenate" else newmethod = texmethod end @@ -629,16 +1255,65 @@ if context then newmethod = texmethod end if oldmethod ~= newmethod then - nodes.tasks.replaceaction("processors","words",oldmethod,newmethod) + replaceaction("processors","words",oldmethod,newmethod) end oldmethod = newmethod - end) + end - -- experimental feature + hyphenators.setmethod = setmethod - directives.register("hyphenators.rightwordsmin",function(v) - rightwordsmin = tonumber(v) or 0 - end) + local stack = { } + + local function pushmethod(method) + insert(stack,oldmethod) + setmethod(method) + end + local function popmethod() + setmethod(remove(stack)) + end + + hyphenators.pushmethod = pushmethod + hyphenators.popmethod = popmethod + + directives.register("hyphenators.method",setmethod) + + function commands.setuphyphenation(specification) + local method = specification.method + if method then + setmethod(method) + end + end + + commands.pushhyphenation = pushmethod + commands.pophyphenation = popmethod + + local context = context + local ctx_NC = context.NC + local ctx_NR = context.NR + local ctx_verbatim = context.verbatim + + function commands.showhyphenationtrace(language,word) + local saved = trace_steps + trace_steps = "silent" + local steps = traditional.gettrace(language,word) + trace_steps = saved + if steps then + local n = #steps + if n > 0 then + context.starttabulate { "|r|l|l|l|" } + for i=1,n do + local s = steps[i] + ctx_NC() if i > 1 and i < n then context(i-1) end + ctx_NC() ctx_verbatim(s[1]) + ctx_NC() ctx_verbatim(s[2]) + ctx_NC() ctx_verbatim(s[3]) + ctx_NC() + ctx_NR() + end + context.stoptabulate() + end + end + end else @@ -647,14 +1322,14 @@ else -- traditional.loadpatterns("us","lang-us") -- traditional.registerpattern("nl","e1ë", { start = 1, length = 2, before = "e", after = "e" } ) --- traditional.registerpattern("nl","oo1ë", { start = 2, length = 3, before = "o", after = "e" } ) +-- traditional.registerpattern("nl","oo7ë", { start = 2, length = 3, before = "o", after = "e" } ) -- traditional.registerpattern("de","qqxc9xkqq",{ start = 3, length = 4, before = "ab", after = "cd" } ) -- local specification = { --- lefthyphenmin = 2, --- righthyphenmin = 2, --- lefthyphenchar = "<", --- righthyphenchar = ">", +-- leftcharmin = 2, +-- rightcharmin = 2, +-- leftchar = "<", +-- rightchar = ">", -- } -- print("reëel", traditional.injecthyphens(dictionaries.nl,"reëel", specification),"r{e>}{<e}{eë}el") @@ -670,5 +1345,26 @@ else -- print("kunststofmatig", traditional.injecthyphens(dictionaries.nl,"kunststofmatig", specification),"") -- print("kunst[stof]matig", traditional.injecthyphens(dictionaries.nl,"kunst[stof]matig", specification),"") +-- traditional.loadpatterns("us","lang-us") + +-- local specification = { +-- leftcharmin = 2, +-- rightcharmin = 2, +-- leftchar = false, +-- rightchar = false, +-- } + +-- trace_steps = true + +-- print("components", traditional.injecthyphens(dictionaries.us,"components", specification),"") +-- print("single", traditional.injecthyphens(dictionaries.us,"single", specification),"sin-gle") +-- print("everyday", traditional.injecthyphens(dictionaries.us,"everyday", specification),"every-day") +-- print("associate", traditional.injecthyphens(dictionaries.us,"associate", specification),"as-so-ciate") +-- print("philanthropic", traditional.injecthyphens(dictionaries.us,"philanthropic", specification),"phil-an-thropic") +-- print("projects", traditional.injecthyphens(dictionaries.us,"projects", specification),"projects") +-- print("Associate", traditional.injecthyphens(dictionaries.us,"Associate", specification),"As-so-ciate") +-- print("Philanthropic", traditional.injecthyphens(dictionaries.us,"Philanthropic", specification),"Phil-an-thropic") +-- print("Projects", traditional.injecthyphens(dictionaries.us,"Projects", specification),"Projects") + end diff --git a/tex/context/base/lang-hyp.mkiv b/tex/context/base/lang-hyp.mkiv index 0cd5a72ca..036974c87 100644 --- a/tex/context/base/lang-hyp.mkiv +++ b/tex/context/base/lang-hyp.mkiv @@ -32,21 +32,166 @@ \registerctxluafile{lang-hyp}{1.001} +\unprotect + +\definesystemattribute[hyphenation][public] + %D This command can change! At some point we will keep the setting with the %D paragraph and then the \type {\par} can go. +% \unexpanded\def\atleastoneword#1% +% {\begingroup +% \enabledirectives[hyphenators.method=traditional]% +% \enabledirectives[hyphenators.rightwordsmin=1]% +% \lefthyphenmin \plusfour +% \righthyphenmin\plusfour +% #1\par +% \disabledirectives[hyphenators.rightwordsmin]% +% \enabledirectives[hyphenators.method]% +% \endgroup} + +%D Here is the real way: + +\installcorenamespace{hyphenation} +\installcorenamespace{hyphenationfeatures} + +\installparameterhandler \??hyphenation {hyphenation} +\installsetuphandler \??hyphenation {hyphenation} + +\setuphyphenation + [\c!method=\s!default] + +\appendtoks + \ctxcommand{setuphyphenation{method="\hyphenationparameter\c!method"}}% +\to \everysetuphyphenation + +%D These are mostly meant for manuals: + +\unexpanded\def\starthyphenation[#1]% + {\begingroup + \ctxcommand{pushhyphenation("#1")}} + +\unexpanded\def\stophyphenation + {\ifhmode\par\fi + \ctxcommand{pophyphenation()}% + \endgroup} + +% This is a global setting, so we need to disable it when needed. However, as +% we are (hopefully) compatible and attribute driven one can also just keep it +% enabled. +% +% \setuphyphenation +% [\c!method=\s!traditional] % no translations + +\unexpanded\def\definehyphenationfeatures + {\dodoubleargument\lang_hyphenation_define_features} + +\unexpanded\def\lang_hyphenation_define_features[#1][#2]% + {\begingroup + \letdummyparameter\c!characters\empty % maybe \s! + \letdummyparameter\c!hyphens\empty % maybe \s! + \letdummyparameter\c!joiners\empty % maybe \s! + \letdummyparameter\c!rightwords\!!zerocount % maybe \s! + \letdummyparameter\s!lefthyphenmin\!!zerocount + \letdummyparameter\s!righthyphenmin\!!zerocount + \letdummyparameter\c!alternative\empty + \getdummyparameters[#2]% + \ctxcommand{definehyphenationfeatures("#1",{ + characters = \!!bs\dummyparameter\c!characters\!!es, + hyphens = \!!bs\dummyparameter\c!hyphens\!!es, + joiners = \!!bs\dummyparameter\c!joiners\!!es, + rightwordmin = \number\dummyparameter\c!rightwords, + leftcharmin = \number\dummyparameter\s!lefthyphenmin, + rightcharmin = \number\dummyparameter\s!righthyphenmin, + alternative = "\dummyparameter\c!alternative", + })}% + \endgroup} + +\unexpanded\def\sethyphenationfeatures[#1]% + {\ctxcommand{sethyphenationfeatures("#1")}} + +% todo: \start ... \stop too + +\unexpanded\def\registerhyphenationpattern + {\dodoubleempty\lang_hyphenation_register_pattern} + +\def\lang_hyphenation_register_pattern[#1][#2]% + {\ctxcommand{registerhyphenationpattern( + \ifsecondargument + \!!bs#1\!!es, + \!!bs#2\!!es + \else + \!!bs\currentlanguage\!!es, + \!!bs#1\!!es + \fi + )}} + +\unexpanded\def\unregisterhyphenationpattern + {\dodoubleempty\lang_hyphenation_unregister_pattern} + +\def\lang_hyphenation_unregister_pattern[#1][#2]% + {\ctxcommand{registerhyphenationpattern( + \ifsecondargument + \!!bs#1\!!es, + \!!bs#2\!!es, + \else + \!!bs\currentlanguage\!!es, + \!!bs#1\!!es, + \fi + false + )}} + +\unexpanded\def\showhyphenationtrace + {\dodoubleempty\lang_hyphenation_show_trace} + +\def\lang_hyphenation_show_trace[#1][#2]% + {\begingroup + \tt + \ctxcommand{showhyphenationtrace( + \ifsecondargument + \!!bs#1\!!es, + \!!bs#2\!!es + \else + \!!bs\currentlanguage\!!es, + \!!bs#1\!!es + \fi + )}% + \endgroup} + +% For old times sake: + \unexpanded\def\atleastoneword#1% {\begingroup - \enabledirectives[hyphenators.method=traditional]% - \enabledirectives[hyphenators.rightwordsmin=1]% - \lefthyphenmin \plusfour - \righthyphenmin\plusfour + \starthyphenation[\c!method=traditional]% this might become default or a faster switch + \sethyphenationfeatures[words]% #1\par - \disabledirectives[hyphenators.rightwordsmin]% - \enabledirectives[hyphenators.method]% + \stopthyphenation \endgroup} -\endinput +%D These are (at least now) not cummulative: + +\definehyphenationfeatures % just an example + [fences] + [\c!characters={[]()}] + +\definehyphenationfeatures + [words] + [\c!rightwords=1, + \s!lefthyphenmin=4, + \s!righthyphenmin=4] + +\definehyphenationfeatures + [default] + [\c!hyphens=\v!yes, + \c!joiners=\v!yes] + +% \sethyphenationfeatures +% [fences] + +% \sethyphenationfeatures +% [default,fences] + +\unprotect \endinput % \starttext % diff --git a/tex/context/base/lang-ini.lua b/tex/context/base/lang-ini.lua index 49eff4949..d0fbced4d 100644 --- a/tex/context/base/lang-ini.lua +++ b/tex/context/base/lang-ini.lua @@ -21,6 +21,9 @@ local utfbyte = utf.byte local format, gsub = string.format, string.gsub local concat, sortedkeys, sortedpairs = table.concat, table.sortedkeys, table.sortedpairs +local context = context +local commands = commands + local settings_to_array = utilities.parsers.settings_to_array local trace_patterns = false trackers.register("languages.patterns", function(v) trace_patterns = v end) @@ -152,7 +155,7 @@ local function loaddefinitions(tag,specification) -- instance:hyphenation(resources.exceptions and resources.exceptions.data or "") instance:patterns (validdata(resources.patterns, "patterns", tag) or "") instance:hyphenation(validdata(resources.exceptions,"exceptions",tag) or "") -data.resources = resources -- so we can use them otherwise + data.resources = resources -- so we can use them otherwise else report_initialization("invalid definition %a for language %a in %a",definition,tag,filename) end @@ -360,7 +363,7 @@ languages.associate('fr','latn','fra') statistics.register("loaded patterns", function() local result = languages.logger.report() if result ~= "none" then --- return result + -- return result return format("%s, load time: %s",result,statistics.elapsedtime(languages)) end end) diff --git a/tex/context/base/lang-ini.mkiv b/tex/context/base/lang-ini.mkiv index 4ed7839bd..1ffc37791 100644 --- a/tex/context/base/lang-ini.mkiv +++ b/tex/context/base/lang-ini.mkiv @@ -162,7 +162,6 @@ %D implementations support run time addition of patterns to a %D preloaded format). - %D \macros %D {preloadlanguages} %D @@ -354,13 +353,48 @@ \newtoks \everylanguage +% \def\lang_basics_synchronize% assumes that \currentlanguage is set % % % use different name as complex +% {\normallanguage\ctxcommand{languagenumber(% +% "\currentlanguage",% +% "\defaultlanguage\currentlanguage",% +% "\languageparameter\s!patterns"% +% )}\relax +% \the\everylanguage\relax} + +% (some 20%) faster but code jungle (the publication code can have excessive +% switching + +\installcorenamespace{languagenumbers} + +\def\lang_basics_synchronize_yes + {\zerocount % see below + \global\expandafter\chardef\csname\??languagenumbers\currentlanguage\endcsname + \ctxcommand{languagenumber(% + "\currentlanguage",% + "\defaultlanguage\currentlanguage",% + "\languageparameter\s!patterns"% + )}\relax + \normallanguage\csname\??languagenumbers\currentlanguage\endcsname} + +\let\lang_basics_synchronize_nop\zerocount % not loaded anyway + +\letvalue{\??languagenumbers}\lang_basics_synchronize_nop % initime + +\appendtoks + \letvalue{\??languagenumbers}\lang_basics_synchronize_yes % runtime +\to \everydump + \def\lang_basics_synchronize% assumes that \currentlanguage is set % % % use different name as complex - {\normallanguage\ctxcommand{languagenumber(% - "\currentlanguage",% - "\defaultlanguage\currentlanguage",% - "\languageparameter\s!patterns"% - )}\relax - \the\everylanguage\relax} + {\normallanguage\csname\??languagenumbers + \ifcsname\??languagenumbers\currentlanguage\endcsname + \currentlanguage + \fi + \endcsname + \relax + \the\everylanguage + \relax} + +% so far \newcount\hyphenminoffset diff --git a/tex/context/base/m-scite.mkiv b/tex/context/base/m-scite.mkiv index aed2c2631..93349122d 100644 --- a/tex/context/base/m-scite.mkiv +++ b/tex/context/base/m-scite.mkiv @@ -66,6 +66,8 @@ local f_mapping = [[ \let\string\slxR\string\letterrightbrace \let\string\slxM\string\letterdollar \let\string\slxV\string\letterbar +\let\string\slxU\string\letterhat +\let\string\slxD\string\letterunderscore \let\string\slxH\string\letterhash \let\string\slxB\string\letterbackslash \let\string\slxP\string\letterpercent @@ -76,6 +78,8 @@ local replacer = lpeg.replacer { ["{"] = "\\slxL ", ["}"] = "\\slxR ", ["$"] = "\\slxM ", + ["^"] = "\\slxU ", + ["_"] = "\\slxD ", ["|"] = "\\slxV ", ["#"] = "\\slxH ", ["\\"] = "\\slxB ", diff --git a/tex/context/base/math-noa.lua b/tex/context/base/math-noa.lua index da17f6a33..bc12867ed 100644 --- a/tex/context/base/math-noa.lua +++ b/tex/context/base/math-noa.lua @@ -971,8 +971,8 @@ local function movesubscript(parent,current_nucleus,current_char) local prev = getfield(parent,"prev") if prev and getid(prev) == math_noad then if not getfield(prev,"sup") and not getfield(prev,"sub") then - setfield(current_nucleus,"char",movesub[current_char or getchar(current_nucleus)]) -- {f} {'}_n => f_n^' + setfield(current_nucleus,"char",movesub[current_char or getchar(current_nucleus)]) local nucleus = getfield(parent,"nucleus") local sub = getfield(parent,"sub") local sup = getfield(parent,"sup") @@ -985,6 +985,18 @@ local function movesubscript(parent,current_nucleus,current_char) if trace_collapsing then report_collapsing("fixing subscript") end + elseif not getfield(prev,"sup") then + -- {f} {'}_n => f_n^' + setfield(current_nucleus,"char",movesub[current_char or getchar(current_nucleus)]) + local nucleus = getfield(parent,"nucleus") + local sup = getfield(parent,"sup") + setfield(prev,"sup",nucleus) + local dummy = copy_node(nucleus) + setfield(dummy,"char",0) + setfield(parent,"nucleus",dummy) + if trace_collapsing then + report_collapsing("fixing subscript") + end end end end @@ -1031,6 +1043,8 @@ local function collapsepair(pointer,what,n,parent,nested) -- todo: switch to tur movesubscript(parent,current_nucleus,current_char) end end + elseif not nested and movesub[current_char] then + movesubscript(parent,current_nucleus,current_char) end end end diff --git a/tex/context/base/mult-def.mkiv b/tex/context/base/mult-def.mkiv index 7791200f9..b73a6df3e 100644 --- a/tex/context/base/mult-def.mkiv +++ b/tex/context/base/mult-def.mkiv @@ -45,6 +45,12 @@ \def\c!nextrightquotation{nextrightquotation} \def\c!fences {fences} +\def\c!words {words} +\def\c!characters {characters} +\def\c!hyphens {hyphens} +\def\c!joiners {joiners} +\def\c!leftwords {leftwords} +\def\c!rightwords {rightwords} \def\c!keeptogether {keeptogether} \def\c!viewerprefix {viewerprefix} @@ -79,6 +85,7 @@ \def\c!properties {properties} \def\c!journalconversion {journalconversion} \def\c!register {register} +\def\c!note {note} \def\c!field {field} \def\c!ignore {ignore} \def\c!specification {specification} @@ -122,6 +129,8 @@ \def\v!vfenced {vfenced} \def\v!bothtext {bothtext} +\def\s!traditional {traditional} + \def\c!HL {HL} \def\c!VL {VL} \def\c!NL {NL} diff --git a/tex/context/base/mult-sys.mkiv b/tex/context/base/mult-sys.mkiv index 2a1261d8f..c79bc023d 100644 --- a/tex/context/base/mult-sys.mkiv +++ b/tex/context/base/mult-sys.mkiv @@ -268,6 +268,7 @@ \definesystemconstant {cite} \definesystemconstant {nocite} \definesystemconstant {list} +\definesystemconstant {register} \definesystemconstant {author} % \def\s!parent{->} % 1% faster / => does not work in assignments diff --git a/tex/context/base/node-fin.lua b/tex/context/base/node-fin.lua index 1566e099f..f38f0bf85 100644 --- a/tex/context/base/node-fin.lua +++ b/tex/context/base/node-fin.lua @@ -168,7 +168,6 @@ function states.finalize(namespace,attribute,head) -- is this one ok? return head, false, false end --- disc nodes can be ignored -- we need to deal with literals too (reset as well as oval) -- if id == glyph_code or (id == whatsit_code and getsubtype(stack) == pdfliteral_code) or (id == rule_code and stack.width ~= 0) or (id == glue_code and stack.leader) then @@ -181,6 +180,8 @@ local function process(namespace,attribute,head,inheritance,default) -- one attr local id = getid(stack) if id == glyph_code then check = true + elseif id == disc_code then + check = true -- indeed elseif id == glue_code then leader = getleader(stack) if leader then @@ -294,6 +295,8 @@ local function selective(namespace,attribute,head,inheritance,default) -- two at local id = getid(stack) if id == glyph_code then check = true + elseif id == disc_code then + check = true -- indeed elseif id == glue_code then leader = getleader(stack) if leader then diff --git a/tex/context/base/page-str.mkiv b/tex/context/base/page-str.mkiv index a8fab9c6c..8284d4baa 100644 --- a/tex/context/base/page-str.mkiv +++ b/tex/context/base/page-str.mkiv @@ -34,7 +34,7 @@ \unprotect \let \currentoutputstream \empty -\newif \ifinoutputstream % will becoem a conditional or mode +\newif \ifinoutputstream % will become a conditional or mode \newtoks \everyenableoutputstream \appendtoks diff --git a/tex/context/base/s-figures-names.mkiv b/tex/context/base/s-figures-names.mkiv index 913fcafdb..a2782efc9 100644 --- a/tex/context/base/s-figures-names.mkiv +++ b/tex/context/base/s-figures-names.mkiv @@ -29,12 +29,13 @@ \startluacode -local context = context -local ctx_NC = context.NC -local ctx_NR = context.NR -local ctx_red = context.red -local ctx_starttabulate = context.starttabulate -local ctx_stoptabulate = context.stoptabulate +local context = context +local ctx_NC = context.NC +local ctx_NR = context.NR +local ctx_red = context.red +local ctx_starttabulate = context.starttabulate +local ctx_stoptabulate = context.stoptabulate +local ctx_hyphenatedname = context.hyphenatedfilename trackers.enable("graphics.lognames") @@ -42,7 +43,7 @@ context.start() context.switchtobodyfont { "tt,small" } - local template = { "|Bl|l|" } + local template = { "|Bl|p|" } for _, data in table.sortedhash(figures.found) do ctx_starttabulate(template) @@ -65,12 +66,14 @@ context.start() ctx_NC() context("found file") ctx_NC() - context(data.foundname) + ctx_hyphenatedname(data.foundname) + -- context(data.foundname) ctx_NC() ctx_NR() ctx_NC() context("used file") ctx_NC() - context(data.fullname) + ctx_hyphenatedname(data.fullname) + -- context(data.fullname) ctx_NC() ctx_NR() if badname then ctx_NC() diff --git a/tex/context/base/spac-chr.lua b/tex/context/base/spac-chr.lua index 3fcfafe17..0cdec4b8f 100644 --- a/tex/context/base/spac-chr.lua +++ b/tex/context/base/spac-chr.lua @@ -19,6 +19,7 @@ local byte, lower = string.byte, string.lower local next = next local trace_characters = false trackers.register("typesetters.characters", function(v) trace_characters = v end) +local trace_nbsp = false trackers.register("typesetters.nbsp", function(v) trace_nbsp = v end) local report_characters = logs.reporter("typesetting","characters") @@ -39,6 +40,8 @@ local setattr = nuts.setattr local getfont = nuts.getfont local getchar = nuts.getchar +local setcolor = nodes.tracers.colors.set + local insert_node_before = nuts.insert_before local insert_node_after = nuts.insert_after local remove_node = nuts.remove @@ -50,6 +53,8 @@ local tasks = nodes.tasks local nodepool = nuts.pool local new_penalty = nodepool.penalty local new_glue = nodepool.glue +local new_kern = nodepool.kern +local new_rule = nodepool.rule local nodecodes = nodes.nodecodes local skipcodes = nodes.skipcodes @@ -114,6 +119,15 @@ local function inject_nobreak_space(unicode,head,current,space,spacestretch,spac setfield(current,"attr",nil) setattr(glue,a_character,unicode) head, current = insert_node_after(head,current,penalty) + if trace_nbsp then + local rule = new_rule(space) + local kern = new_kern(-space) + local penalty = new_penalty(10000) + setcolor(rule,"orange") + head, current = insert_node_after(head,current,rule) + head, current = insert_node_after(head,current,kern) + head, current = insert_node_after(head,current,penalty) + end head, current = insert_node_after(head,current,glue) return head, current end diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf Binary files differindex 0f51e6963..f9a8c021f 100644 --- a/tex/context/base/status-files.pdf +++ b/tex/context/base/status-files.pdf diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf Binary files differindex 6f4bc79ea..47c64e4a0 100644 --- a/tex/context/base/status-lua.pdf +++ b/tex/context/base/status-lua.pdf diff --git a/tex/context/base/strc-flt.mkvi b/tex/context/base/strc-flt.mkvi index ef4eae71c..94f730beb 100644 --- a/tex/context/base/strc-flt.mkvi +++ b/tex/context/base/strc-flt.mkvi @@ -1219,13 +1219,13 @@ \blank[\rootfloatparameter\c!spaceafter]% \strc_floats_end_text_group \page_floats_report_total} - + \def\borderedfloatbox {\begingroup \setupcurrentfloat[\c!location=\v!normal,\c!width=\v!fit,\c!height=\v!fit]% \inheritedfloatframed{\box\floatbox}% \endgroup} - + % minwidth=fit,width=max : no overshoot, as wide as graphic \def\strc_floats_align_content_indeed diff --git a/tex/context/base/strc-lst.mkvi b/tex/context/base/strc-lst.mkvi index ef3a3dbb1..6e7e28e99 100644 --- a/tex/context/base/strc-lst.mkvi +++ b/tex/context/base/strc-lst.mkvi @@ -821,7 +821,7 @@ % todo: provide packager via attributes -\doinstallinjector\v!list +\doinstallinjector\s!list \installcorenamespace{listalternativemethods} % the general wrapper of a rendering diff --git a/tex/context/base/strc-reg.mkiv b/tex/context/base/strc-reg.mkiv index 7a6f4a30d..eedf86a66 100644 --- a/tex/context/base/strc-reg.mkiv +++ b/tex/context/base/strc-reg.mkiv @@ -719,7 +719,7 @@ % \placeregister[index] % \stoptext -\doinstallinjector\v!register +\doinstallinjector\s!register %D Beware, we get funny side effects when a dangling \index precedes an %D placeindex as then flushing takes place inside the index. Took me hours diff --git a/tex/context/base/supp-box.lua b/tex/context/base/supp-box.lua index c69486306..e208ae39b 100644 --- a/tex/context/base/supp-box.lua +++ b/tex/context/base/supp-box.lua @@ -47,6 +47,7 @@ local copy_node = nuts.copy local copy_list = nuts.copy_list local find_tail = nuts.tail local traverse_id = nuts.traverse_id +local link_nodes = nuts.linked local listtoutf = nodes.listtoutf @@ -54,29 +55,64 @@ local nodepool = nuts.pool local new_penalty = nodepool.penalty local new_hlist = nodepool.hlist local new_glue = nodepool.glue +local new_rule = nodepool.rule +local new_kern = nodepool.kern + +local setlistcolor = nodes.tracers.colors.setlist local texget = tex.get +local texgetbox = tex.getbox -local function hyphenatedlist(head) +local function hyphenatedlist(head,usecolor) local current = head and tonut(head) while current do local id = getid(current) local next = getnext(current) local prev = getprev(current) if id == disc_code then - local hyphen = getfield(current,"pre") - if hyphen then - local penalty = new_penalty(-500) - -- insert_after etc - setfield(hyphen,"next",penalty) - setfield(penalty,"prev",hyphen) - setfield(prev,"next",hyphen) - setfield(next,"prev", penalty) - setfield(penalty,"next",next) - setfield(hyphen,"prev",prev) + local pre = getfield(current,"pre") + local post = getfield(current,"post") + local replace = getfield(current,"replace") + if pre then setfield(current,"pre",nil) - free_node(current) end + if post then + setfield(current,"post",nil) + end + if not usecolor then + -- nothing fancy done + elseif pre and post then + setlistcolor(pre,"darkmagenta") + setlistcolor(post,"darkcyan") + elseif pre then + setlistcolor(pre,"darkyellow") + elseif post then + setlistcolor(post,"darkyellow") + end + if replace then + flush_list(replace) + setfield(current,"replace",nil) + end + -- setfield(current,"replace",new_rule(65536)) -- new_kern(65536*2)) + setfield(current,"next",nil) + setfield(current,"prev",nil) + local list = link_nodes ( + pre and new_penalty(10000), + pre, + current, + post, + post and new_penalty(10000) + ) + local tail = find_tail(list) + if prev then + setfield(prev,"next",list) + setfield(list,"prev",prev) + end + if next then + setfield(tail,"next",next) + setfield(next,"prev",tail) + end + -- free_node(current) elseif id == vlist_code or id == hlist_code then hyphenatedlist(getlist(current)) end @@ -84,7 +120,12 @@ local function hyphenatedlist(head) end end -commands.hyphenatedlist = hyphenatedlist +function commands.hyphenatedlist(n,color) + local b = texgetbox(n) + if b then + hyphenatedlist(b.list,color) + end +end -- local function hyphenatedhack(head,pre) -- pre = tonut(pre) @@ -263,5 +304,5 @@ function commands.hboxtovbox(original) end function commands.boxtostring(n) - context.puretext(nodes.toutf(tex.box[n].list)) -- helper is defined later + context.puretext(nodes.toutf(texgetbox(n).list)) -- helper is defined later end diff --git a/tex/context/base/supp-box.mkiv b/tex/context/base/supp-box.mkiv index bc1e30749..5c4157bd1 100644 --- a/tex/context/base/supp-box.mkiv +++ b/tex/context/base/supp-box.mkiv @@ -1077,13 +1077,19 @@ %D \stoptyping \unexpanded\def\dohyphenatednextbox - {\ctxcommand{hyphenatedlist(tex.box[\number\nextbox].list)}% + {\ctxcommand{hyphenatedlist(\number\nextbox)}% \unhbox\nextbox} \unexpanded\def\hyphenatedword {\dowithnextboxcs\dohyphenatednextbox\hbox} \unexpanded\def\hyphenatedpar {\dowithnextboxcs\dohyphenatednextbox\hbox} \unexpanded\def\hyphenatedfile#1{\dowithnextboxcs\dohyphenatednextbox\hbox{\readfile{#1}\donothing\donothing}} +\unexpanded\def\dohyphenatednextboxcolor + {\ctxcommand{hyphenatedlist(\number\nextbox,true)}% + \unhbox\nextbox} + +\unexpanded\def\hyphenatedcoloredword{\dowithnextboxcs\dohyphenatednextboxcolor\hbox} + % D \starttyping % D \hyphenatedhack{\kern-.25em_}{alongword} % D \stoptyping diff --git a/tex/context/base/tabl-xtb.lua b/tex/context/base/tabl-xtb.lua index 46f08c6df..89ef21b79 100644 --- a/tex/context/base/tabl-xtb.lua +++ b/tex/context/base/tabl-xtb.lua @@ -818,7 +818,8 @@ function xtables.construct() end nofr = nofr + 1 result[nofr] = { - hpack_node_list(list), + -- hpack_node_list(list), + hpack_node_list(list,0,"exactly","TLT"), -- otherwise weird lap size, i < nofrange and rowdistance > 0 and rowdistance or false, -- might move false diff --git a/tex/context/base/typo-inj.mkiv b/tex/context/base/typo-inj.mkiv index 11c04e9f0..3ac1c6623 100644 --- a/tex/context/base/typo-inj.mkiv +++ b/tex/context/base/typo-inj.mkiv @@ -58,7 +58,8 @@ \unexpanded\def\domarkinjector#1#2% called at the lua end {\dontleavehmode\llap{\infofont\ifcase#1\else\red\fi<#2>\quad}} -% low level definers +% low level definers .. we could have \injectors_mark and \injectors_check and then +% use \v!list instead of \s!list \unexpanded\def\doinstallinjector#1% {\letvalue{typo_injectors_mark_#1}\donothing diff --git a/tex/context/base/typo-sus.lua b/tex/context/base/typo-sus.lua index ccb10d411..6f4947b1f 100644 --- a/tex/context/base/typo-sus.lua +++ b/tex/context/base/typo-sus.lua @@ -52,6 +52,7 @@ local nodepool = nuts.pool local new_rule = nodepool.rule local new_kern = nodepool.kern +local new_penalty = nodepool.penalty local a_characters = attributes.private("characters") @@ -99,6 +100,8 @@ end local function mark(head,current,id,color) if id == glue_code then + -- the glue can have stretch and/or shrink so the rule can overlap with the + -- following glyph .. no big deal as that one then sits on top of the rule local width = getfield(getfield(current,"spec"),"width") local rule = new_rule(width) local kern = new_kern(-width) @@ -194,7 +197,14 @@ function typesetters.showsuspects(head) elseif id == glue_code then local a = getattr(current,a_characters) if a then - head = mark(head,current,id,"orange") + local prev = getprev(current) + local prid = prev and getid(prev) + if prid == penalty_code and getfield(prev,"penalty") == 10000 then + head = mark(head,current,id,"orange") + head = insert_before(head,current,new_penalty(10000)) + else + head = mark(head,current,id,"darkmagenta") + end end current = getnext(current) else diff --git a/tex/context/base/x-asciimath.lua b/tex/context/base/x-asciimath.lua index 8cc349095..ff9c5c3d6 100644 --- a/tex/context/base/x-asciimath.lua +++ b/tex/context/base/x-asciimath.lua @@ -814,15 +814,21 @@ local p_reserved = local p_unicode = lpeg.utfchartabletopattern(table.keys(k_unicode)) / k_unicode +local p_texescape = patterns.texescape + +local function texescaped(s) + return lpegmatch(p_texescape,s) +end + local p_text = P("text") * p_spaces^0 * Cc("\\asciimathoptext") * ( -- maybe balanced - Cs( P("{") * (1-P("}"))^0 * P("}") ) - + Cs((P("(")/"{") * (1-P(")"))^0 * (P(")")/"}")) + Cs( P("{") * ((1-P("}"))^0/texescaped) * P("}") ) + + Cs((P("(")/"{") * ((1-P(")"))^0/texescaped) * (P(")")/"}")) ) - + Cc("\\asciimathoptext") * Cs(Cc("{") * patterns.undouble * Cc("}")) + + Cc("\\asciimathoptext") * Cs(Cc("{") * (patterns.undouble/texescaped) * Cc("}")) local m_left = { ["(:"] = s_langle, @@ -1676,6 +1682,8 @@ if not context then -- report_asciimath(cleanedup([[a "α" b]])) -- report_asciimath(cleanedup([[//4]])) +convert("4/18*100text(%)~~22,2") + -- convert([[sum x]]) -- convert([[sum^(1)_(2) x]]) -- convert([[lim_(1)^(2) x]]) diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua index 2c5a87812..409f5d255 100644 --- a/tex/generic/context/luatex/luatex-fonts-merged.lua +++ b/tex/generic/context/luatex/luatex-fonts-merged.lua @@ -1,6 +1,6 @@ -- merged file : luatex-fonts-merged.lua -- parent file : luatex-fonts.lua --- merge date : 11/17/14 14:32:07 +-- merge date : 11/26/14 12:50:01 do -- begin closure to overcome local limits and interference |