summaryrefslogtreecommitdiff
path: root/tex
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2014-11-26 13:15:05 +0100
committerContext Git Mirror Bot <phg42.2a@gmail.com>2014-11-26 13:15:05 +0100
commit5e624488bd860906c17301941c96e6209a9e4cd6 (patch)
tree1a517cfd2d5d4787879fdd3673f0d123c970c9b9 /tex
parent156e528557cf571eb99ab05a0892429b3c2bf269 (diff)
downloadcontext-5e624488bd860906c17301941c96e6209a9e4cd6.tar.gz
2014-11-26 12:52:00
Diffstat (limited to 'tex')
-rw-r--r--tex/context/base/buff-par.lua54
-rw-r--r--tex/context/base/buff-par.mkvi48
-rw-r--r--tex/context/base/buff-ver.mkiv8
-rw-r--r--tex/context/base/cont-new.mkiv2
-rw-r--r--tex/context/base/context-version.pdfbin4390 -> 4392 bytes
-rw-r--r--tex/context/base/context.mkiv2
-rw-r--r--tex/context/base/core-env.lua8
-rw-r--r--tex/context/base/core-env.mkiv22
-rw-r--r--tex/context/base/core-sys.mkiv8
-rw-r--r--tex/context/base/font-ctx.lua27
-rw-r--r--tex/context/base/font-ini.mkvi13
-rw-r--r--tex/context/base/font-run.mkiv8
-rw-r--r--tex/context/base/grph-inc.lua14
-rw-r--r--tex/context/base/lang-hyp.lua1404
-rw-r--r--tex/context/base/lang-hyp.mkiv159
-rw-r--r--tex/context/base/lang-ini.lua7
-rw-r--r--tex/context/base/lang-ini.mkiv48
-rw-r--r--tex/context/base/m-scite.mkiv4
-rw-r--r--tex/context/base/math-noa.lua16
-rw-r--r--tex/context/base/mult-def.mkiv9
-rw-r--r--tex/context/base/mult-sys.mkiv1
-rw-r--r--tex/context/base/node-fin.lua5
-rw-r--r--tex/context/base/page-str.mkiv2
-rw-r--r--tex/context/base/s-figures-names.mkiv21
-rw-r--r--tex/context/base/spac-chr.lua14
-rw-r--r--tex/context/base/status-files.pdfbin24712 -> 24694 bytes
-rw-r--r--tex/context/base/status-lua.pdfbin344033 -> 344500 bytes
-rw-r--r--tex/context/base/strc-flt.mkvi4
-rw-r--r--tex/context/base/strc-lst.mkvi2
-rw-r--r--tex/context/base/strc-reg.mkiv2
-rw-r--r--tex/context/base/supp-box.lua69
-rw-r--r--tex/context/base/supp-box.mkiv8
-rw-r--r--tex/context/base/tabl-xtb.lua3
-rw-r--r--tex/context/base/typo-inj.mkiv3
-rw-r--r--tex/context/base/typo-sus.lua12
-rw-r--r--tex/context/base/x-asciimath.lua14
-rw-r--r--tex/generic/context/luatex/luatex-fonts-merged.lua2
37 files changed, 1553 insertions, 470 deletions
diff --git a/tex/context/base/buff-par.lua b/tex/context/base/buff-par.lua
index 2c1cd40e9..d12eddebc 100644
--- a/tex/context/base/buff-par.lua
+++ b/tex/context/base/buff-par.lua
@@ -9,13 +9,14 @@ if not modules then modules = { } end modules ['buff-par'] = {
local context, commands = context, commands
local insert, remove, find, gmatch = table.insert, table.remove, string.find, string.gmatch
-local strip, format = string.strip, string.format
+local fullstrip, formatters = string.fullstrip, string.formatters
local trace_parallel = false trackers.register("buffers.parallel", function(v) trace_parallel = v end)
local report_parallel = logs.reporter("buffers","parallel")
local variables = interfaces.variables
+local v_all = variables.all
local parallel = buffers.parallel or { }
buffers.parallel = parallel
@@ -40,7 +41,7 @@ function parallel.define(category,tags)
end
function parallel.reset(category,tags)
- if not tags or tags == "" or tags == variables.all then
+ if not tags or tags == "" or tags == v_all then
tags = table.keys(entries)
else
tags = settings_to_array(tags)
@@ -65,10 +66,12 @@ end
function parallel.save(category,tag,content)
local dc = data[category]
if not dc then
+ report_parallel("unknown category %a",category)
return
end
local entries = dc.entries[tag]
if not entries then
+ report_parallel("unknown entry %a",tag)
return
end
local lines = entries.lines
@@ -93,13 +96,14 @@ function parallel.save(category,tag,content)
if trace_parallel and label ~= "" then
report_parallel("reference found of category %a, tag %a, label %a",category,tag,label)
end
+ line.content = fullstrip(content)
line.label = label
- line.content = strip(content)
end
else
- line.content = strip(content)
+ line.content = fullstrip(content)
line.label = ""
end
+ -- print("[["..line.content.."]]")
end
function parallel.hassomecontent(category,tags)
@@ -108,7 +112,7 @@ function parallel.hassomecontent(category,tags)
return false
end
local entries = dc.entries
- if not tags or tags == "" or tags == variables.all then
+ if not tags or tags == "" or tags == v_all then
tags = table.keys(entries)
else
tags = utilities.parsers.settings_to_array(tags)
@@ -126,22 +130,26 @@ function parallel.hassomecontent(category,tags)
return false
end
-local save = resolvers.savers.byscheme
+local ctx_doflushparallel = context.doflushparallel
+local ctx_doifelse = commands.doifelse
+local f_content = formatters["\\input{%s}"]
+local save_byscheme = resolvers.savers.byscheme
function parallel.place(category,tags,options)
local dc = data[category]
if not dc then
return
end
- local entries = dc.entries
- local tags = utilities.parsers.settings_to_array(tags)
- local options = utilities.parsers.settings_to_hash(options)
- local start, n, criterium = options.start, options.n, options.criterium
- start, n = start and tonumber(start), n and tonumber(n)
- local max = 1
+ local entries = dc.entries
+ local tags = utilities.parsers.settings_to_array(tags)
+ local options = utilities.parsers.settings_to_hash(options) -- options can be hash too
+ local start = tonumber(options.start)
+ local n = tonumber(options.n)
+ local criterium = options.criterium
+ local max = 1
if n then
max = n
- elseif criterium == variables.all then
+ elseif criterium == v_all then
max = 0
for t=1,#tags do
local tag = tags[t]
@@ -156,15 +164,17 @@ function parallel.place(category,tags,options)
local tag = tags[t]
local entry = entries[tag]
if entry then
- local lines = entry.lines
- local number = entry.number + 1
- entry.number = number
- local line = remove(lines,1)
- if line and line.content then
- local content = format("\\input{%s}",save("virtual","parallel",line.content))
- context.doflushparallel(tag,1,number,line.label,content)
+ local lines = entry.lines
+ local number = entry.number + 1
+ entry.number = number
+ local line = remove(lines,1)
+ local content = line and line.content
+ local label = line and line.label or ""
+ if content then
+ local virtual = save_byscheme("virtual","parallel",content)
+ ctx_doflushparallel(tag,1,number,label,f_content(virtual))
else
- context.doflushparallel(tag,0,number,"","")
+ ctx_doflushparallel(tag,0,number,"","")
end
end
end
@@ -180,5 +190,5 @@ commands.placeparallel = parallel.place
commands.resetparallel = parallel.reset
function commands.doifelseparallel(category,tags)
- commands.doifelse(parallel.hassomecontent(category,tags))
+ ctx_doifelse(parallel.hassomecontent(category,tags))
end
diff --git a/tex/context/base/buff-par.mkvi b/tex/context/base/buff-par.mkvi
index 404fa8ef3..5efff5125 100644
--- a/tex/context/base/buff-par.mkvi
+++ b/tex/context/base/buff-par.mkvi
@@ -39,6 +39,7 @@
\unprotect
\installcorenamespace{parallel}
+\installcorenamespace{parallelhandler}
\installcommandhandler \??parallel {parallel} \??parallel
@@ -46,22 +47,41 @@
[\c!setups=parallel:place:default]
\let\buff_parallel_define_saved\defineparallel
+\let\buff_parallel_setup_saved \setupparallel
-\unexpanded\def\defineparallel
- {\dodoubleargument\buff_parallel_define}
+\unexpanded\def\defineparallel{\dodoubleargument\buff_parallel_define}
+\unexpanded\def\setupparallel {\dotripleargument\buff_parallel_setup}
\def\buff_parallel_define[#name][#instances]%
- {\buff_parallel_define_saved[#name]
+ {\buff_parallel_define_saved[#name][\c!list={#instances}]% list is internal
\ctxcommand{defineparallel("#name","#instances")}%
- \processcommalist[#instances]\buff_parallel_define_instance
+ \expandafter\newtoks\csname\??parallelhandler#name\endcsname
+ \processcommacommand[#instances]{\buff_parallel_define_instance{#name}}%
\setuevalue{\e!start#name}{\buff_parallel_start{#name}}%
\setuevalue{\e!stop #name}{\buff_parallel_stop}}
-\def\buff_parallel_define_instance#instance%
- {\normalexpanded{\buff_parallel_define_saved[#instance][\currentparallel]}%
+\let\buff_process_parallel_instance\relax
+
+\def\buff_process_parallel_instances
+ {\the\csname\??parallelhandler\currentparallel\endcsname}
+
+\def\buff_parallel_define_instance#name#instance%
+ {\normalexpanded{\buff_parallel_define_saved[#instance][#name]}%
\definebuffer[#instance]%
+ %\edef\t_buff_parallel{\csname\??parallelhandler#name\endcsname}%
+ \expandafter\let\expandafter\t_buff_parallel\csname\??parallelhandler#name\endcsname
+ \appendtoks
+ \edef\currentparallelinstance{#instance}%
+ \buff_process_parallel_instance\relax
+ \to \t_buff_parallel
\setuevalue{\e!stop#instance}{\buff_parallel_save{#instance}}}
+\def\buff_parallel_setup[#name][#instances][#setups]%
+ {\processcommalist[#instances]{\buff_parallel_setup_instance{#name}{#setups}}}
+
+\def\buff_parallel_setup_instance#name#setups#instance%
+ {\buff_parallel_setup_saved[#name:#instance][#setups]}
+
\unexpanded\def\buff_parallel_start#name%
{\pushmacro\currentparallel
\edef\currentparallel{#name}%
@@ -79,7 +99,13 @@
\def\buff_parallel_place[#name][#instance][#settings]%
{\begingroup
\edef\currentparallel{#name}%
- \ctxcommand{placeparallel("\currentparallel","#instance","#settings")}% -- todo: pass options as k/v
+ \setupcurrentparallel[#settings]%
+ \ctxcommand{placeparallel("\currentparallel","#instance",{
+ % setups = "\parallelparameter\c!setups",
+ start = "\parallelparameter\c!start",
+ n = "\parallelparameter\c!n",
+ criterium = "\parallelparameter\c!criterium",
+ })}%
\endgroup}
\def\doflushparallel#instance#status#line#label#content% called at lua end
@@ -88,7 +114,7 @@
\def\currentparallelnumber {#status}%
\def\currentparallelline {#line}%
\def\currentparallellabel {#label}%
- \def\currentparallelcontent {#content}%
+ \def\currentparallelcontent {#content}% can be kep at the lua end .. no need to use a virtual file
\ifcase#status\relax
\expandafter\buff_parallel_flush_nop
\or
@@ -100,7 +126,7 @@
{}
\def\buff_parallel_flush_yes
- {\directsetup{\namedparallelparameter\currentparallelinstance\c!setups}}
+ {\directsetup{\namedparallelparameter{\currentparallel:\currentparallelinstance}\c!setups}}
\unexpanded\def\doifelseparallel#name#instance%
{\ctxcommand{doifelseparallel("#name","#instance")}}
@@ -113,9 +139,9 @@
\startsetups parallel:place:default
\hangafter\plusone
- \hangindent4em
+ \hangindent4\emwidth
\dontleavehmode
- \hbox to 3em \bgroup
+ \hbox to 3\emwidth \bgroup
\hss
\bf
\doifsomething \currentparallellabel {
diff --git a/tex/context/base/buff-ver.mkiv b/tex/context/base/buff-ver.mkiv
index 707c235ff..019e645c4 100644
--- a/tex/context/base/buff-ver.mkiv
+++ b/tex/context/base/buff-ver.mkiv
@@ -82,10 +82,10 @@
{\spaceskip.5\emwidth\relax
\let\obeyedspace\specialobeyedspace
\let\controlspace\specialcontrolspace
- % \edef\p_buff_lines{\typeparameter\c!lines}%
- % \ifcsname\??typinglines\p_buff_lines\endcsname
- % \csname\??typinglines\p_buff_lines\endcsname
- % \fi
+ \edef\p_buff_lines{\typeparameter\c!lines}%
+ \ifcsname\??typinglines\p_buff_lines\endcsname
+ \csname\??typinglines\p_buff_lines\endcsname
+ \fi
\edef\p_buff_space{\typeparameter\c!space}%
\ifcsname\??typingspace\p_buff_space\endcsname
\csname\??typingspace\p_buff_space\endcsname
diff --git a/tex/context/base/cont-new.mkiv b/tex/context/base/cont-new.mkiv
index 7b474d3f2..7392a750e 100644
--- a/tex/context/base/cont-new.mkiv
+++ b/tex/context/base/cont-new.mkiv
@@ -11,7 +11,7 @@
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.
-\newcontextversion{2014.11.17 14:32}
+\newcontextversion{2014.11.26 12:50}
%D This file is loaded at runtime, thereby providing an excellent place for
%D hacks, patches, extensions and new features.
diff --git a/tex/context/base/context-version.pdf b/tex/context/base/context-version.pdf
index 32eb18732..fbc02fe6b 100644
--- a/tex/context/base/context-version.pdf
+++ b/tex/context/base/context-version.pdf
Binary files differ
diff --git a/tex/context/base/context.mkiv b/tex/context/base/context.mkiv
index 5125f8011..8229af432 100644
--- a/tex/context/base/context.mkiv
+++ b/tex/context/base/context.mkiv
@@ -28,7 +28,7 @@
%D up and the dependencies are more consistent.
\edef\contextformat {\jobname}
-\edef\contextversion{2014.11.17 14:32}
+\edef\contextversion{2014.11.26 12:50}
\edef\contextkind {beta}
%D For those who want to use this:
diff --git a/tex/context/base/core-env.lua b/tex/context/base/core-env.lua
index 2cc84299b..94f237c2e 100644
--- a/tex/context/base/core-env.lua
+++ b/tex/context/base/core-env.lua
@@ -45,8 +45,8 @@ setmetatableindex(tex.modes, function(t,k)
if csname_id(n) == undefined then
return false
else
- modes[k] = function() return texgetcount(n) >= 1 end
- return texgetcount(n) >= 1
+ modes[k] = function() return texgetcount(n) == 1 end
+ return texgetcount(n) == 1 -- 2 is prevented
end
end
end)
@@ -60,8 +60,8 @@ setmetatableindex(tex.systemmodes, function(t,k)
if csname_id(n) == undefined then
return false
else
- systemmodes[k] = function() return texgetcount(n) >= 1 end
- return texgetcount(n) >= 1
+ systemmodes[k] = function() return texgetcount(n) == 1 end
+ return texgetcount(n) == 1 -- 2 is prevented
end
end
end)
diff --git a/tex/context/base/core-env.mkiv b/tex/context/base/core-env.mkiv
index 47f29ed73..6f9343587 100644
--- a/tex/context/base/core-env.mkiv
+++ b/tex/context/base/core-env.mkiv
@@ -46,8 +46,6 @@
\installcorenamespace{modestack}
-% todo: check prevent mode, also at the lua end
-
\setnewconstant\disabledmode \zerocount
\setnewconstant\enabledmode \plusone
\setnewconstant\preventedmode\plustwo
@@ -163,7 +161,17 @@
% handy for mp
\def\booleanmodevalue#1%
- {\ifcsname\??mode#1\endcsname\ifcase\csname\??mode#1\endcsname\s!false\else\s!true\fi\else\s!false\fi}
+ {\ifcsname\??mode#1\endcsname
+ \ifcase\csname\??mode#1\endcsname
+ \s!false
+ \or
+ \s!true
+ \else
+ \s!false
+ \fi
+ \else
+ \s!false
+ \fi}
% check macros
@@ -213,8 +221,10 @@
\def\syst_modes_check_yes#1#2#3%
{\ifcase\csname\??mode#3\endcsname
\expandafter#2%
- \else
+ \or
\expandafter#1%
+ \else
+ \expandafter#2%
\fi}
\def\syst_modes_check#1#2#3%
@@ -230,6 +240,10 @@
{\ifcsname\??mode#1\endcsname
\ifcase\csname\??mode#1\endcsname
\let\syst_modes_check_all_step\gobbleoneargument
+ \or
+ % enabled
+ \else
+ \let\syst_modes_check_all_step\gobbleoneargument
\fi
\else
\let\syst_modes_check_all_step\gobbleoneargument
diff --git a/tex/context/base/core-sys.mkiv b/tex/context/base/core-sys.mkiv
index c3cc2a231..bd73ba08c 100644
--- a/tex/context/base/core-sys.mkiv
+++ b/tex/context/base/core-sys.mkiv
@@ -242,8 +242,14 @@
\installcommandhandler \??highlight {highlight} \??highlight % we could do with less
+\setuphighlight
+ [\c!command=\v!yes]
+
\appendtoks
- \setuevalue\currenthighlight{\typo_highlights_indeed{\currenthighlight}}%
+ \edef\p_command{\highlightparameter\c!command}%
+ \ifx\p_command\v!yes
+ \setuevalue\currenthighlight{\typo_highlights_indeed{\currenthighlight}}%
+ \fi
\to \everydefinehighlight
\ifdefined\dotaghighlight \else \let\dotaghighlight\relax \fi
diff --git a/tex/context/base/font-ctx.lua b/tex/context/base/font-ctx.lua
index 2a12a807a..f764edb6d 100644
--- a/tex/context/base/font-ctx.lua
+++ b/tex/context/base/font-ctx.lua
@@ -2091,11 +2091,11 @@ end)
directives.register("nodes.injections.fontkern", function(v) setfield(kern,"subtype",v and 0 or 1) end)
--- here
+-- here (todo: closure)
local trace_analyzing = false trackers.register("otf.analyzing", function(v) trace_analyzing = v end)
-local otffeatures = constructors.newfeatures("otf")
+----- otffeatures = constructors.newfeatures("otf")
local registerotffeature = otffeatures.register
local analyzers = fonts.analyzers
@@ -2198,3 +2198,26 @@ function commands.purefontname(name)
context(file.basename(name))
end
end
+
+local list = storage.shared.bodyfontsizes or { }
+storage.shared.bodyfontsizes = list
+
+function commands.registerbodyfontsize(size)
+ list[size] = true
+end
+
+function commands.getbodyfontsizes(separator)
+ context(concat(sortedkeys(list),separator))
+end
+
+function commands.processbodyfontsizes(command)
+ local keys = sortedkeys(list)
+ if command then
+ local action = context[command]
+ for i=1,#keys do
+ action(keys[i])
+ end
+ else
+ context(concat(keys,","))
+ end
+end
diff --git a/tex/context/base/font-ini.mkvi b/tex/context/base/font-ini.mkvi
index f174b132b..bdb8f9a11 100644
--- a/tex/context/base/font-ini.mkvi
+++ b/tex/context/base/font-ini.mkvi
@@ -1158,7 +1158,9 @@
\installcorenamespace{fontenvironmentknown}
-\let\bodyfontenvironmentlist\empty % used in font-run (might change)
+% \let\bodyfontenvironmentlist\empty % used in font-run (might change)
+
+\newtoks\bodyfontenvironmentlist
\def\font_helpers_register_environment#class#body%
{\expandafter\let\csname\??fontenvironmentknown#class#body\endcsname\empty}
@@ -1179,6 +1181,12 @@
%D a bodyfont is loaded but changing them afterwards can be sort of tricky as
%D values are not consulted afterwards.
+\def\processbodyfontenvironmentlist#1% no \unexpanded as then we cannot use it in alignments
+ {\ctxcommand{processbodyfontsizes("\strippedcsname#1")}}
+
+\def\bodyfontenvironmentlist
+ {\ctxcommand{getbodyfontsizes()}}
+
\def\font_basics_define_body_font_environment_class[#class][#body][#settings]%
{\edef\m_font_body{#body}%
\ifx\m_font_body\s!default
@@ -1187,7 +1195,8 @@
\else
\normalizebodyfontsize\m_font_body_normalized\m_font_body
\font_basics_define_body_font_environment_size[#class][\m_font_body_normalized][#settings]%
- \addtocommalist\m_font_body_normalized\bodyfontenvironmentlist
+ %\addtocommalist\m_font_body_normalized\bodyfontenvironmentlist
+ \ctxcommand{registerbodyfontsize("\m_font_body_normalized")}%
\fi}
%D The empty case uses the same code but needs to ignore the current class
diff --git a/tex/context/base/font-run.mkiv b/tex/context/base/font-run.mkiv
index cb40adedd..1b8843b94 100644
--- a/tex/context/base/font-run.mkiv
+++ b/tex/context/base/font-run.mkiv
@@ -12,8 +12,8 @@
%C therefore copyrighted by \PRAGMA. See mreadme.pdf for
%C details.
-%D [This code is hooked into the core macros and saves some
-%D format space. It needs a cleanup.]
+%D [This code is hooked into the core macros and saves some format
+%D space. It needs a cleanup.]
\unprotect
@@ -100,8 +100,8 @@
&&\tttf\tx\s!text&&\tttf\tx\s!script&&\tttf\tx\s!scriptscript
&&\tttf\tx\s!x&&\tttf\tx\s!xx&&\tttf\tx\v!small&&\tttf\tx\v!big
&&\tttf\tx\c!interlinespace&\cr
- \noalign{\hrule}
- \@EA\globalprocesscommalist\@EA[\bodyfontenvironmentlist]\next}}
+ \noalign{\hrule}%
+ \processbodyfontenvironmentlist\next}}
\ifinsidefloat\else\stopbaselinecorrection\fi}
\unexpanded\gdef\showfont
diff --git a/tex/context/base/grph-inc.lua b/tex/context/base/grph-inc.lua
index ea922b28c..80d878019 100644
--- a/tex/context/base/grph-inc.lua
+++ b/tex/context/base/grph-inc.lua
@@ -247,12 +247,14 @@ figures.order = figures_order -- frozen
-- name checker
-local pattern = (R("az","AZ") * P(":"))^-1 * ( -- a-z : | A-Z :
- (R("az","09") + S("_/") - P("_")^2)^1 * P(".") * R("az")^1 + -- a-z | single _ | /
- (R("az","09") + S("-/") - P("-")^2)^1 * P(".") * R("az")^1 + -- a-z | single - | /
- (R("AZ","09") + S("_/") - P("_")^2)^1 * P(".") * R("AZ")^1 + -- A-Z | single _ | /
- (R("AZ","09") + S("-/") - P("-")^2)^1 * P(".") * R("AZ")^1 -- A-Z | single - | /
-) * P(-1) * Cc(false) + Cc(true)
+local okay = P("m_k_i_v_")
+
+local pattern = (R("az","AZ") * P(":"))^-1 * ( -- a-z : | A-Z :
+ (okay + R("az","09") + S("_/") - P("_")^2)^1 * (P(".") * R("az")^1)^0 * P(-1) + -- a-z | single _ | /
+ (okay + R("az","09") + S("-/") - P("-")^2)^1 * (P(".") * R("az")^1)^0 * P(-1) + -- a-z | single - | /
+ (okay + R("AZ","09") + S("_/") - P("_")^2)^1 * (P(".") * R("AZ")^1)^0 * P(-1) + -- A-Z | single _ | /
+ (okay + R("AZ","09") + S("-/") - P("-")^2)^1 * (P(".") * R("AZ")^1)^0 * P(-1) -- A-Z | single - | /
+) * Cc(false) + Cc(true)
function figures.badname(name)
if not name then
diff --git a/tex/context/base/lang-hyp.lua b/tex/context/base/lang-hyp.lua
index 205baccce..31ec8946c 100644
--- a/tex/context/base/lang-hyp.lua
+++ b/tex/context/base/lang-hyp.lua
@@ -6,6 +6,9 @@ if not modules then modules = { } end modules ['lang-hyp'] = {
license = "see context related readme files"
}
+-- to be considered: reset dictionary.hyphenated when a pattern is added
+-- or maybe an explicit reset of the cache
+
-- In an automated workflow hypenation of long titles can be somewhat problematic
-- especially when demands conflict. For that reason I played a bit with a Lua based
-- variant of the traditional hyphenation machinery. This mechanism has been extended
@@ -36,16 +39,21 @@ if not modules then modules = { } end modules ['lang-hyp'] = {
-- a s-s z o n-n y a l/sz=sz,2,3,ny=ny,6,3
--
-- ab1cd/ef=gh,2,2 : acd - efd (pattern/replacement,start,length
+--
+-- In the procecess of wrapping up (for the ctx conference proceedings) I cleaned up
+-- and extended the code a bit.
-local type, rawset, tonumber = type, rawset, tonumber
+local type, rawset, tonumber, next = type, rawset, tonumber, next
local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs
local lpegmatch = lpeg.match
-local concat = table.concat
-
-local utfchar = utf.char
-local utfbyte = utf.byte
+local concat = table.concat
+local insert = table.insert
+local remove = table.remove
+local formatters = string.formatters
+local utfchar = utf.char
+local utfbyte = utf.byte
if not characters then
require("char-ini")
@@ -53,7 +61,14 @@ end
local setmetatableindex = table.setmetatableindex
-local languages = languages or { }
+-- \enabletrackers[hyphenator.steps=silent] will not write to the terminal
+
+local trace_steps = false trackers.register("hyphenator.steps", function(v) trace_steps = v end)
+local trace_visualize = false trackers.register("hyphenator.visualize",function(v) trace_visualize = v end)
+
+local report = logs.reporter("hyphenator")
+
+languages = languages or { }
local hyphenators = languages.hyphenators or { }
languages.hyphenators = hyphenators
local traditional = hyphenators.traditional or { }
@@ -64,28 +79,74 @@ local dictionaries = setmetatableindex(function(t,k)
patterns = { },
hyphenated = { },
specials = { },
+ exceptions = { },
+ loaded = false,
}
t[k] = v
return v
end)
+hyphenators.dictionaries = dictionaries
+
+local character = lpeg.patterns.utf8character
local digit = R("09")
-local character = lpeg.patterns.utf8character - P("/")
-local splitpattern_k = Cs((digit/"" + character)^1)
-local splitpattern_v = Ct(((digit/tonumber + Cc(0)) * character)^1 * (digit/tonumber)^0)
-local splitpattern_v =
- Ct(((digit/tonumber + Cc(0)) * character)^1 * (digit/tonumber)^0) *
- (P("/") * Cf ( Ct("") *
- Cg ( Cc("before") * C((1-lpeg.P("="))^1) * P("=") )
- * Cg ( Cc("after") * C((1-lpeg.P(","))^1) * P(",") )
- * Cg ( Cc("start") * ((1-lpeg.P(","))^1/tonumber) * P(",") )
- * Cg ( Cc("length") * ((1-lpeg.P(-1) )^1/tonumber) )
+local weight = digit/tonumber + Cc(0)
+local fence = P(".")
+local hyphen = P("-")
+local space = P(" ")
+local char = character - space
+local validcharacter = (character - S("./"))
+local keycharacter = character - S("/")
+----- basepart = Ct( (Cc(0) * fence)^-1 * (weight * validcharacter)^1 * weight * (fence * Cc(0))^-1)
+local specpart = (P("/") * Cf ( Ct("") *
+ Cg ( Cc("before") * C((1-P("="))^1) * P("=") ) *
+ Cg ( Cc("after") * C((1-P(","))^1) ) *
+ ( P(",") *
+ Cg ( Cc("start") * ((1-P(","))^1/tonumber) * P(",") ) *
+ Cg ( Cc("length") * ((1-P(-1) )^1/tonumber) )
+ )^-1
, rawset))^-1
-local function register(patterns,specials,str,specification)
- local k = lpegmatch(splitpattern_k,str)
- local v1, v2 = lpegmatch(splitpattern_v,str)
- patterns[k] = v1
+local make_hashkey_p = Cs((digit/"" + keycharacter)^1)
+----- make_pattern_p = basepart * specpart
+local make_hashkey_e = Cs((hyphen/"" + keycharacter)^1)
+local make_pattern_e = Ct(P(char) * (hyphen * Cc(true) * P(char) + P(char) * Cc(false))^1) -- catch . and char after -
+
+-- local make_hashkey_c = Cs((digit + keycharacter/"")^1)
+-- local make_pattern_c = Ct((P(1)/tonumber)^1)
+
+-- local cache = setmetatableindex(function(t,k)
+-- local n = lpegmatch(make_hashkey_c,k)
+-- local v = lpegmatch(make_pattern_c,n)
+-- t[k] = v
+-- return v
+-- end)
+--
+-- local weight_n = digit + Cc("0")
+-- local basepart_n = Cs( (Cc("0") * fence)^-1 * (weight * validcharacter)^1 * weight * (fence * Cc("0"))^-1) / cache
+-- local make_pattern_n = basepart_n * specpart
+
+local make_pattern_c = Ct((P(1)/tonumber)^1)
+
+-- us + nl: 17664 entries -> 827 unique (saves some 3M)
+
+local cache = setmetatableindex(function(t,k)
+ local v = lpegmatch(make_pattern_c,k)
+ t[k] = v
+ return v
+end)
+
+local weight_n = digit + Cc("0")
+local fence_n = fence / "0"
+local char_n = validcharacter / ""
+local basepart_n = Cs(fence_n^-1 * (weight_n * char_n)^1 * weight_n * fence_n^-1) / cache
+local make_pattern_n = basepart_n * specpart
+
+local function register_pattern(patterns,specials,str,specification)
+ local k = lpegmatch(make_hashkey_p,str)
+ -- local v1, v2 = lpegmatch(make_pattern_p,str)
+ local v1, v2 = lpegmatch(make_pattern_n,str)
+ patterns[k] = v1 -- is this key still ok for complex patterns
if specification then
specials[k] = specification
elseif v2 then
@@ -93,17 +154,50 @@ local function register(patterns,specials,str,specification)
end
end
-local word = ((Carg(1) * Carg(2) * C((1 - P(" "))^1)) / register + 1)^1
-local split = Ct(C(character)^1)
+local function unregister_pattern(patterns,specials,str)
+ local k = lpegmatch(make_hashkey_p,str)
+ patterns[k] = nil
+ specials[k] = nil
+end
+
+local function register_exception(exceptions,str,specification)
+ local k = lpegmatch(make_hashkey_e,str)
+ local v = lpegmatch(make_pattern_e,str)
+ exceptions[k] = v
+end
+
+local p_pattern = ((Carg(1) * Carg(2) * C(char^1)) / register_pattern + 1)^1
+local p_exception = ((Carg(1) * C(char^1)) / register_exception + 1)^1
+local p_split = Ct(C(character)^1)
function traditional.loadpatterns(language,filename)
- local specification = require(filename)
local dictionary = dictionaries[language]
- if specification then
- local patterns = specification.patterns
- if patterns then
- lpegmatch(word,patterns.data,1,dictionary.patterns,dictionary.specials)
+ if not dictionary.loaded then
+ if not filename or filename == "" then
+ filename = "lang-" .. language
end
+ filename = file.addsuffix(filename,"lua")
+ local fullname = resolvers.findfile(filename)
+ if fullname and fullname ~= "" then
+ local specification = dofile(fullname)
+ if specification then
+ local patterns = specification.patterns
+ if patterns then
+ local data = patterns.data
+ if data and data ~= "" then
+ lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
+ end
+ end
+ local exceptions = specification.exceptions
+ if exceptions then
+ local data = exceptions.data
+ if data and data ~= "" then
+ lpegmatch(p_exception,data,1,dictionary.exceptions)
+ end
+ end
+ end
+ end
+ dictionary.loaded = true
end
return dictionary
end
@@ -113,30 +207,153 @@ local uccodes = characters.uccodes
local nofwords = 0
local nofhashed = 0
-local function hyphenate(dictionary,word)
+local steps = nil
+local f_show = formatters["%w%s"]
+
+local function show_log()
+ if trace_steps == true then
+ report()
+ local w = #steps[1][1]
+ for i=1,#steps do
+ local s = steps[i]
+ report("%s%w%S %S",s[1],w - #s[1] + 3,s[2],s[3])
+ end
+ report()
+ end
+end
+
+local function show_1(wsplit)
+ local u = concat(wsplit," ")
+ steps = { { f_show(0,u), f_show(0,u) } }
+end
+
+local function show_2(c,m,wsplit,done,i,spec)
+ local s = lpegmatch(p_split,c)
+ local t = { }
+ local n = #m
+ local w = #wsplit
+ for j=1,n do
+ t[#t+1] = m[j]
+ t[#t+1] = s[j]
+ end
+ local m = 2*i-2
+ local l = #t
+ local s = spec and table.sequenced(spec) or ""
+ if m == 0 then
+ steps[#steps+1] = { f_show(m, concat(t,"",2)), f_show(1,concat(done," ",2,#done),s) }
+ elseif i+1 == w then
+ steps[#steps+1] = { f_show(m-1,concat(t,"",1,#t-1)), f_show(1,concat(done," ",2,#done),s) }
+ else
+ steps[#steps+1] = { f_show(m-1,concat(t)), f_show(1,concat(done," ",2,#done),s) }
+ end
+end
+
+local function show_3(wsplit,done)
+ local t = { }
+ local h = { }
+ local n = #wsplit
+ for i=1,n do
+ local w = wsplit[i]
+ if i > 1 then
+ local d = done[i]
+ t[#t+1] = i > 2 and d % 2 == 1 and "-" or " "
+ h[#h+1] = d
+ end
+ t[#t+1] = w
+ h[#h+1] = w
+ end
+ steps[#steps+1] = { f_show(0,concat(h)), f_show(0,concat(t)) }
+ show_log()
+end
+
+local function show_4(wsplit,done)
+ steps = { { concat(wsplit," ") } }
+ show_log()
+end
+
+function traditional.lasttrace()
+ return steps
+end
+
+-- We could reuse the w table but as we cache the resolved words
+-- there is not much gain in that complication.
+--
+-- Beware: word can be a table and when n is passed to we can
+-- assume reuse so we need to honor that n then.
+
+-- todo: a fast variant for tex ... less lookups (we could check is
+-- dictionary has changed) ... although due to caching the already
+-- done words, we don't do much here
+
+local function hyphenate(dictionary,word,n) -- odd is okay
nofwords = nofwords + 1
local hyphenated = dictionary.hyphenated
- local isstring = type(word) == "string"
- local done
+ local isstring = type(word) == "string"
if isstring then
- done = hyphenated[word]
+ local done = hyphenated[word]
+ if done ~= nil then
+ return done
+ end
+ elseif n then
+ local done = hyphenated[concat(word,"",1,n)]
+ if done ~= nil then
+ return done
+ end
else
- done = hyphenated[concat(word)]
+ local done = hyphenated[concat(word)]
+ if done ~= nil then
+ return done
+ end
end
+ local key
+ if isstring then
+ key = word
+ word = lpegmatch(p_split,word)
+ if not n then
+ n = #word
+ end
+ else
+ if not n then
+ n = #word
+ end
+ key = concat(word,"",1,n)
+ end
+ local l = 1
+ local w = { "." }
+ for i=1,n do
+ local c = word[i]
+ l = l + 1
+ w[l] = lcchars[c] or c
+ end
+ l = l + 1
+ w[l] = "."
+ local c = concat(w,"",2,l-1)
+ --
+ local done = hyphenated[c]
if done ~= nil then
+ hyphenated[key] = done
+ nofhashed = nofhashed + 1
return done
- else
- done = false
end
- local specials = dictionary.specials
- local patterns = dictionary.patterns
- local s = isstring and lpegmatch(split,word) or word
- local l = #s
- local w = { }
- for i=1,l do
- local si = s[i]
- w[i] = lcchars[si] or si
+ --
+ local exceptions = dictionary.exceptions
+ local exception = exceptions[c]
+ if exception then
+ if trace_steps then
+ show_4(w,exception)
+ end
+ hyphenated[key] = exception
+ nofhashed = nofhashed + 1
+ return exception
end
+ --
+ if trace_steps then
+ show_1(w)
+ end
+ --
+ local specials = dictionary.specials
+ local patterns = dictionary.patterns
+ --
local spec
for i=1,l do
for j=i,l do
@@ -146,15 +363,23 @@ local function hyphenate(dictionary,word)
local s = specials[c]
if not done then
done = { }
- spec = { }
+ spec = nil
+ -- the string that we resolve has explicit fences (.) so
+ -- done starts at the first fence and runs upto the last
+ -- one so we need one slot less
for i=1,l do
done[i] = 0
end
end
+ -- we run over the pattern that always has a (zero) value for
+ -- each character plus one more as we look at both sides
for k=1,#m do
local new = m[k]
if not new then
break
+ elseif new == true then
+ report("fatal error")
+ break
elseif new > 0 then
local pos = i + k - 1
local old = done[pos]
@@ -163,136 +388,238 @@ local function hyphenate(dictionary,word)
elseif new > old then
done[pos] = new
if s then
- local b = i + s.start - 1
- local e = b + s.length - 1
- if pos >= b and pos <= e then
- spec[pos] = s
+ local b = i + (s.start or 1) - 1
+ if b > 0 then
+ local e = b + (s.length or 2) - 1
+ if e > 0 then
+ if pos >= b and pos <= e then
+ if spec then
+ spec[pos] = { s, k - 1 }
+ else
+ spec = { [pos] = { s, k - 1 } }
+ end
+ end
+ end
end
end
end
end
end
+ if trace_steps and done then
+ show_2(c,m,w,done,i,s)
+ end
end
end
end
+ if trace_steps and done then
+ show_3(w,done)
+ end
if done then
local okay = false
- for i=1,#done do
+ for i=3,#done do
if done[i] % 2 == 1 then
- done[i] = spec[i] or true
+ done[i-2] = spec and spec[i] or true
okay = true
else
- done[i] = false
+ done[i-2] = false
end
end
- if not okay then
+ if okay then
+ done[#done] = nil
+ done[#done] = nil
+ else
done = false
end
+ else
+ done = false
end
- hyphenated[isstring and word or concat(word)] = done
+ hyphenated[key] = done
nofhashed = nofhashed + 1
return done
end
-local f_detail_1 = string.formatters["{%s}{%s}{}"]
-local f_detail_2 = string.formatters["{%s%s}{%s%s}{%s}"]
+function traditional.gettrace(language,word)
+ local dictionary = dictionaries[language]
+ if dictionary then
+ local hyphenated = dictionary.hyphenated
+ hyphenated[word] = nil
+ hyphenate(dictionary,word)
+ return steps
+ end
+end
+
+local methods = setmetatableindex(function(t,k) local v = hyphenate t[k] = v return v end)
+
+function traditional.installmethod(name,f)
+ if rawget(methods,name) then
+ report("overloading %a is not permitted",name)
+ else
+ methods[name] = f
+ end
+end
+
+local s_detail_1 = "-"
+local f_detail_2 = formatters["%s-%s"]
+local f_detail_3 = formatters["{%s}{%s}{}"]
+local f_detail_4 = formatters["{%s%s}{%s%s}{%s}"]
function traditional.injecthyphens(dictionary,word,specification)
- local h = hyphenate(dictionary,word)
- if not h then
+ if not word then
+ return false
+ end
+ if not specification then
+ return word
+ end
+ local hyphens = hyphenate(dictionary,word)
+ if not hyphens then
return word
end
- local w = lpegmatch(split,word)
- local r = { }
- local l = #h
- local n = 0
- local i = 1
- local leftmin = specification.lefthyphenmin or 2
- local rightmin = l - (specification.righthyphenmin or left) + 1
- local leftchar = specification.lefthyphenchar
- local rightchar = specification.righthyphenchar
- while i <= l do
- if i > leftmin and i < rightmin then
- local hi = h[i]
- if not hi then
- n = n + 1
- r[n] = w[i]
- i = i + 1
- elseif hi == true then
- n = n + 1
- r[n] = f_detail_1(rightchar,leftchar)
- n = n + 1
- r[n] = w[i]
- i = i + 1
+
+ -- the following code is similar to code later on but here we have
+ -- strings while there we have hyphen specs
+
+ local word = lpegmatch(p_split,word)
+ local size = #word
+
+ local leftmin = specification.leftcharmin or 2
+ local rightmin = size - (specification.rightcharmin or leftmin)
+ local leftchar = specification.leftchar
+ local rightchar = specification.rightchar
+
+ local result = { }
+ local rsize = 0
+ local position = 1
+
+ while position <= size do
+ if position >= leftmin and position <= rightmin then
+ local hyphen = hyphens[position]
+ if not hyphen then
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ elseif hyphen == true then
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ rsize = rsize + 1
+ if leftchar and rightchar then
+ result[rsize] = f_detail_3(rightchar,leftchar)
+ else
+ result[rsize] = s_detail_1
+ end
+ position = position + 1
else
- local b = i - hi.start
- local e = b + hi.length - 1
- n = b
- r[n] = f_detail_2(hi.before,rightchar,leftchar,hi.after,concat(w,"",b,e))
- if e + 1 == i then
- i = i + 1
+ local o, h = hyphen[2]
+ if o then
+ h = hyphen[1]
+ else
+ h = hyphen
+ o = 1
+ end
+ local b = position - o + (h.start or 1)
+ local e = b + (h.length or 2) - 1
+ if b > 0 and e >= b then
+ for i=1,b-position do
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ end
+ rsize = rsize + 1
+ if leftchar and rightchar then
+ result[rsize] = f_detail_4(h.before,rightchar,leftchar,h.after,concat(word,"",b,e))
+ else
+ result[rsize] = f_detail_2(h.before,h.after)
+ end
+ position = e + 1
else
- i = e + 1
+ -- error
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
end
end
else
- n = n + 1
- r[n] = w[i]
- i = i + 1
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
end
end
- return concat(r)
+ return concat(result)
end
function traditional.registerpattern(language,str,specification)
local dictionary = dictionaries[language]
- register(dictionary.patterns,dictionary.specials,str,specification)
+ if specification == false then
+ unregister_pattern(dictionary.patterns,dictionary.specials,str)
+ else
+ register_pattern(dictionary.patterns,dictionary.specials,str,specification)
+ end
end
-- todo: unicodes or utfhash ?
if context then
- local nodecodes = nodes.nodecodes
- local glyph_code = nodecodes.glyph
- local math_code = nodecodes.math
-
- local nuts = nodes.nuts
- local tonut = nodes.tonut
- local nodepool = nuts.pool
-
- local new_disc = nodepool.disc
-
- local setfield = nuts.setfield
- local getfield = nuts.getfield
- local getchar = nuts.getchar
- local getid = nuts.getid
- local getnext = nuts.getnext
- local getprev = nuts.getprev
- local insert_before = nuts.insert_before
- local insert_after = nuts.insert_after
- local copy_node = nuts.copy
- local remove_node = nuts.remove
- local end_of_math = nuts.end_of_math
- local node_tail = nuts.tail
+ local nodecodes = nodes.nodecodes
+ local glyph_code = nodecodes.glyph
+ local math_code = nodecodes.math
+
+ local nuts = nodes.nuts
+ local tonut = nodes.tonut
+ local nodepool = nuts.pool
+
+ local new_disc = nodepool.disc
+
+ local setfield = nuts.setfield
+ local getfield = nuts.getfield
+ local getchar = nuts.getchar
+ local getid = nuts.getid
+ local getattr = nuts.getattr
+ local getnext = nuts.getnext
+ local getprev = nuts.getprev
+ local insert_before = nuts.insert_before
+ local insert_after = nuts.insert_after
+ local copy_node = nuts.copy
+ local remove_node = nuts.remove
+ local end_of_math = nuts.end_of_math
+ local node_tail = nuts.tail
+
+ local setcolor = nodes.tracers.colors.set
+
+ local variables = interfaces.variables
+ local v_reset = variables.reset
+ local v_yes = variables.yes
+ local v_all = variables.all
+
+ local settings_to_array = utilities.parsers.settings_to_array
+
+ local unsetvalue = attributes.unsetvalue
+ local texsetattribute = tex.setattribute
+
+ local prehyphenchar = lang.prehyphenchar
+ local posthyphenchar = lang.posthyphenchar
+
+ local lccodes = characters.lccodes
+
+ local a_hyphenation = attributes.private("hyphenation")
function traditional.loadpatterns(language)
return dictionaries[language]
end
- statistics.register("hyphenation",function()
- if nofwords > 0 then
- return string.format("%s words hyphenated, %s unique",nofwords,nofhashed)
+ setmetatableindex(dictionaries,function(t,k) -- for the moment we use an independent data structure
+ if type(k) == "string" then
+ -- this will force a load if not yet loaded (we need a nicer way)
+ -- for the moment that will do (nneeded for examples that register
+ -- a pattern specification
+ languages.getnumber(k)
end
- end)
-
- setmetatableindex(dictionaries,function(t,k) -- we use an independent data structure
local specification = languages.getdata(k)
- local dictionary = {
+ local dictionary = {
patterns = { },
+ exceptions = { },
hyphenated = { },
specials = { },
- instance = 0,
+ instance = false,
characters = { },
unicodes = { },
}
@@ -304,21 +631,22 @@ if context then
local data = patterns.data
if data then
-- regular patterns
- lpegmatch(word,data,1,dictionary.patterns,dictionary.specials)
+ lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
end
local extra = patterns.extra
if extra then
-- special patterns
- lpegmatch(word,extra,1,dictionary.patterns,dictionary.specials)
+ lpegmatch(p_pattern,extra,1,dictionary.patterns,dictionary.specials)
+ end
+ end
+ local exceptions = resources.exceptions
+ if exceptions then
+ local data = exceptions.data
+ if data and data ~= "" then
+ lpegmatch(p_exception,data,1,dictionary.exceptions)
end
end
- local permitted = patterns.characters
--- local additional = "[]()"
--- local additional = specification.additional
--- if additional then
--- permitted = permitted .. additional -- has to be attribute driven
--- end
- local usedchars = lpegmatch(split,permitted)
+ local usedchars = lpegmatch(p_split,patterns.characters)
local characters = { }
local unicodes = { }
for i=1,#usedchars do
@@ -327,12 +655,18 @@ if context then
local upper = uccodes[code]
characters[char] = code
unicodes [code] = char
- unicodes [upper] = utfchar(upper)
+ if type(upper) == "table" then
+ for i=1,#upper do
+ local u = upper[i]
+ unicodes[u] = utfchar(u)
+ end
+ else
+ unicodes[upper] = utfchar(upper)
+ end
end
dictionary.characters = characters
dictionary.unicodes = unicodes
- setmetatableindex(characters,function(t,k) local v = utfbyte(k) t[k] = v return v end) -- can be non standard
- -- setmetatableindex(unicodes, function(t,k) local v = utfchar(k) t[k] = v return v end)
+ setmetatableindex(characters,function(t,k) local v = k and utfbyte(k) t[k] = v return v end)
end
t[specification.number] = dictionary
dictionary.instance = specification.instance -- needed for hyphenchars
@@ -341,268 +675,554 @@ if context then
return dictionary
end)
- local function flush(head,start,stop,dictionary,w,h,lefthyphenchar,righthyphenchar,characters,lefthyphenmin,righthyphenmin)
- local r = { }
- local l = #h
- local n = 0
- local i = 1
- local left = lefthyphenmin
- local right = l - righthyphenmin + 1
- while i <= l do
- if i > left and i < right then
- local hi = h[i]
- if not hi then
- n = n + 1
- r[n] = w[i]
- i = i + 1
- elseif hi == true then
- n = n + 1
- r[n] = true
- n = n + 1
- r[n] = w[i]
- i = i + 1
- else
- local b = i - hi.start -- + 1 - 1
- local e = b + hi.length - 1
- n = b
- r[n] = { hi.before, hi.after, concat(w,"",b,e) }
- i = e + 1
+ -- Beware: left and right min doesn't mean that in a 1 mmm hsize there can be snippets
+ -- with less characters than either of them! This could be an option but such a narrow
+ -- hsize doesn't make sense anyway.
+
+ -- We assume that featuresets are defined global ... local definitions
+ -- (also mid paragraph) make not much sense anyway. For the moment we
+ -- assume no predefined sets so we don't need to store them. Nor do we
+ -- need to hash them in order to save space ... no sane user will define
+ -- many of them.
+
+ local featuresets = hyphenators.featuresets or { }
+ hyphenators.featuresets = featuresets
+
+ storage.shared.noflanguagesfeaturesets = storage.shared.noflanguagesfeaturesets or 0
+
+ local noffeaturesets = storage.shared.noflanguagesfeaturesets
+
+ storage.register("languages/hyphenators/featuresets",featuresets,"languages.hyphenators.featuresets")
+
+ ----- hash = table.sequenced(featureset,",") -- no need now
+
+ local function register(name,featureset)
+ noffeaturesets = noffeaturesets + 1
+ featureset.attribute = noffeaturesets
+ featuresets[noffeaturesets] = featureset -- access by attribute
+ featuresets[name] = featureset -- access by name
+ storage.shared.noflanguagesfeaturesets = noffeaturesets
+ return noffeaturesets
+ end
+
+ local function makeset(...) -- a bit overkill, supporting variants but who cares
+ local set = { }
+ for i=1,select("#",...) do
+ local list = select(i,...)
+ local kind = type(list)
+ local used = nil
+ if kind == "string" then
+ if list == v_all then
+ -- not ok ... now all get ignored
+ return setmetatableindex(function(t,k) local v = utfchar(k) t[k] = v return v end)
+ elseif list ~= "" then
+ used = lpegmatch(p_split,list)
+ set = set or { }
+ for i=1,#used do
+ local char = used[i]
+ set[utfbyte(char)] = char
+ end
+ end
+ elseif kind == "table" then
+ if next(list) then
+ set = set or { }
+ for byte, char in next, list do
+ set[byte] = char == true and utfchar(byte) or char
+ end
+ elseif #list > 0 then
+ set = set or { }
+ for i=1,#list do
+ local l = list[i]
+ if type(l) == "number" then
+ set[l] = utfchar(l)
+ else
+ set[utfbyte(l)] = l
+ end
+ end
end
- else
- n = n + 1
- r[n] = w[i]
- i = i + 1
end
end
+ return set
+ end
- local function serialize(s,lefthyphenchar,righthyphenchar)
- if not s then
- return
- elseif s == true then
- local n = copy_node(stop)
- setfield(n,"char",lefthyphenchar or righthyphenchar)
- return n
- end
- local h = nil
- local c = nil
- if lefthyphenchar then
- h = copy_node(stop)
- setfield(h,"char",lefthyphenchar)
- c = h
- end
- if #s == 1 then
- local n = copy_node(stop)
- setfield(n,"char",characters[s])
- if not h then
- h = n
- else
- insert_after(c,c,n)
- end
- c = n
- else
- local t = lpegmatch(split,s)
+ local defaulthyphens = {
+ [0x2D] = true, -- hyphen
+ [0xAD] = true, -- soft hyphen
+ }
+
+ local defaultjoiners = {
+ [0x200C] = true, -- nzwj
+ [0x200D] = true, -- zwj
+ }
+
+ local function definefeatures(name,featureset)
+ local extrachars = featureset.characters -- "[]()"
+ local hyphenchars = featureset.hyphens
+ local joinerchars = featureset.joiners
+ local alternative = featureset.alternative
+ local rightwordmin = tonumber(featureset.rightwordmin)
+ local leftcharmin = tonumber(featureset.leftcharmin)
+ local rightcharmin = tonumber(featureset.rightcharmin)
+ --
+ joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars
+ hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars
+ -- not yet ok: extrachars have to be ignored so it cannot be all)
+ featureset.extrachars = makeset(joinerchars or "",extrachars or "")
+ featureset.hyphenchars = makeset(hyphenchars or "")
+ featureset.alternative = alternative or "hyphenate"
+ featureset.rightwordmin = rightwordmin and rightwordmin > 0 and rightwordmin or nil
+ featureset.leftcharmin = leftcharmin and leftcharmin > 0 and leftcharmin or nil
+ featureset.rightcharmin = rightcharmin and rightcharmin > 0 and rightcharmin or nil
+ --
+ return register(name,featureset)
+ end
+
+ traditional.definefeatures = definefeatures
+ commands.definehyphenationfeatures = definefeatures
+
+ function commands.sethyphenationfeatures(n)
+ if not n or n == v_reset then
+ n = false
+ else
+ local f = featuresets[n]
+ if not f and type(n) == "string" then
+ local t = settings_to_array(n)
+ local s = { }
for i=1,#t do
- local n = copy_node(stop)
- setfield(n,"char",characters[t[i]])
- if not h then
- h = n
- else
- insert_after(c,c,n)
+ local ti = t[i]
+ local fs = featuresets[ti]
+ if fs then
+ for k, v in next, fs do
+ s[k] = v
+ end
end
- c = n
end
+ n = register(n,s)
+ else
+ n = f and f.attribute
end
- if righthyphenchar then
- local n = copy_node(stop)
- insert_after(c,c,n)
- setfield(n,"char",righthyphenchar)
- end
- return h
end
+ texsetattribute(a_hyphenation,n or unsetvalue)
+ end
+
+ commands.registerhyphenationpattern = traditional.registerpattern
- -- no grow
+ -- This is a relative large function with local variables and local
+ -- functions. A previous implementation had the functions outside but
+ -- this is cleaner and as efficient. The test runs 100 times over
+ -- tufte.tex, knuth.tex, zapf.tex, ward.tex and darwin.tex in lower
+ -- and uppercase with a 1mm hsize.
+ --
+ -- language=0 language>0 4 | 3 * slower
+ --
+ -- tex 2.34 | 1.30 2.55 | 1.45 0.21 | 0.15
+ -- lua 2.42 | 1.38 3.30 | 1.84 0.88 | 0.46
+ --
+ -- Of course we have extra overhead (virtual Lua machine) but also we
+ -- check attributes and support specific local options). The test puts
+ -- the typeset text in boxes and discards it. If we also flush the
+ -- runtime is 4.31|2.56 and 4.99|2.94 seconds so the relative difference
+ -- is (somehow) smaller. The test has 536 pages. There is a little bit
+ -- of extra overhead because we store the patterns in a different way.
+ --
+ -- As usual I will look for speedups. Some 0.01 seconds could be gained
+ -- by sharing patterns which is not impressive but it does save some
+ -- 3M memory on this test. (Some optimizations already brought the 3.30
+ -- seconds down to 3.14 but it all depends on aggressive caching.)
- local current = start
- local size = #r
- for i=1,size do
- local ri = r[i]
- if ri == true then
- local n = new_disc()
- if righthyphenchar then
- setfield(n,"pre",serialize(true,righthyphenchar))
+ local starttiming = statistics.starttiming
+ local stoptiming = statistics.stoptiming
+
+ function traditional.hyphenate(head)
+
+ local first = tonut(head)
+ local tail = nil
+ local last = nil
+ local current = first
+ local dictionary = nil
+ local instance = nil
+ local characters = nil
+ local unicodes = nil
+ local extrachars = nil
+ local hyphenchars = nil
+ local language = nil
+ local start = nil
+ local stop = nil
+ local word = { } -- we reuse this table
+ local size = 0
+ local leftchar = false
+ local rightchar = false -- utfbyte("-")
+ local leftmin = 0
+ local rightmin = 0
+ local leftcharmin = nil
+ local rightcharmin = nil
+ local rightwordmin = nil
+ local attr = nil
+ local lastwordlast = nil
+ local hyphenated = hyphenate
+
+ -- We cannot use an 'enabled' boolean (false when no characters or extras) because we
+ -- can have plugins that set a characters metatable and so) ... it doesn't save much
+ -- anyway. Using (unicodes and unicodes[code]) and a nil table when no characters also
+ -- doesn't save much. So there not that much to gain for languages that don't hyphenate.
+ --
+ -- enabled = (unicodes and (next(unicodes) or getmetatable(unicodes))) or (extrachars and next(extrachars))
+ --
+ -- This can be used to not add characters i.e. keep size 0 but then we need to check for
+ -- attributes that change it, which costs time too. Not much to gain there.
+
+ starttiming(traditional)
+
+ local function synchronizefeatureset(a)
+ local f = a and featuresets[a]
+ if f then
+ hyphenated = methods[f.alternative or "hyphenate"]
+ extrachars = f.extrachars
+ hyphenchars = f.hyphenchars
+ rightwordmin = f.rightwordmin
+ leftcharmin = f.leftcharmin
+ rightcharmin = f.rightcharmin
+ if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then
+ -- so we can change mid paragraph but it's kind of unpredictable then
+ if not tail then
+ tail = node_tail(first)
+ end
+ last = tail
+ local inword = false
+ while last and rightwordmin > 0 do
+ local id = getid(last)
+ if id == glyph_code then
+ inword = true
+ if trace_visualize then
+ setcolor(last,"darkred")
+ end
+ elseif inword then
+ inword = false
+ rightwordmin = rightwordmin - 1
+ end
+ last = getprev(last)
+ end
+ lastwordlast = rightwordmin
end
- if lefthyphenchar then
- setfield(n,"post",serialize(true,lefthyphenchar))
+ else
+ hyphenated = methods.hyphenate
+ extrachars = false
+ hyphenchars = false
+ rightwordmin = false
+ leftcharmin = false
+ rightcharmin = false
+ end
+ return a
+ end
+
+ local function flush(hyphens) -- todo: no need for result
+
+ local rightmin = size - rightmin
+ local result = { }
+ local rsize = 0
+ local position = 1
+
+ -- todo: remember last dics and don't go back to before that (plus
+ -- message) .. for simplicity we also assume that we don't start
+ -- with a dics node
+ --
+ -- there can be a conflict: if we backtrack then we can end up in
+ -- another disc and get out of sync (dup chars and so)
+
+ while position <= size do
+ if position >= leftmin and position <= rightmin then
+ local hyphen = hyphens[position]
+ if not hyphen then
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ elseif hyphen == true then
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ rsize = rsize + 1
+ result[rsize] = true
+ position = position + 1
+ else
+ local o, h = hyphen[2]
+ if o then
+ -- { hyphen, offset)
+ h = hyphen[1]
+ else
+ -- hyphen
+ h = hyphen
+ o = 1
+ end
+ local b = position - o + (h.start or 1)
+ local e = b + (h.length or 2) - 1
+ if b > 0 and e >= b then
+ for i=1,b-position do
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ end
+ rsize = rsize + 1
+ result[rsize] = {
+ h.before or "", -- pre
+ h.after or "", -- post
+ concat(word,"",b,e), -- replace
+ h.right, -- optional after pre
+ h.left, -- optional before post
+ }
+ position = e + 1
+ else
+ -- error
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ end
+ end
+ else
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
end
- insert_before(head,current,n)
- elseif type(ri) == "table" then
- local n = new_disc()
- local pre, post, replace = ri[1], ri[2], ri[3]
- if pre then
- setfield(n,"pre",serialize(pre,false,righthyphenchar))
+ end
+
+ local function serialize(replacement,leftchar,rightchar)
+ if not replacement then
+ return
+ elseif replacement == true then
+ local glyph = copy_node(stop)
+ setfield(glyph,"char",leftchar or rightchar)
+ return glyph
end
- if post then
- setfield(n,"post",serialize(post,lefthyphenchar,false))
+ local head = nil
+ local current = nil
+ if leftchar then
+ head = copy_node(stop)
+ current = head
+ setfield(head,"char",leftchar)
end
- if replace then
- setfield(n,"replace",serialize(replace))
+ local rsize = #replacement
+ if rsize == 1 then
+ local glyph = copy_node(stop)
+ setfield(glyph,"char",characters[replacement])
+ if head then
+ insert_after(current,current,glyph)
+ else
+ head = glyph
+ end
+ current = glyph
+ elseif rsize > 0 then
+ local list = lpegmatch(p_split,replacement) -- this is an utf split (could be cached)
+ for i=1,#list do
+ local glyph = copy_node(stop)
+ setfield(glyph,"char",characters[list[i]])
+ if head then
+ insert_after(current,current,glyph)
+ else
+ head = glyph
+ end
+ current = glyph
+ end
end
- insert_before(head,current,n)
- else
- setfield(current,"char",characters[ri])
- if i < size then
- current = getnext(current)
+ if rightchar then
+ local glyph = copy_node(stop)
+ insert_after(current,current,glyph)
+ setfield(glyph,"char",rightchar)
end
+ return head
end
- end
- if current ~= stop then
- local current = getnext(current)
- local last = getnext(stop)
- while current ~= last do
- head, current = remove_node(head,current,true)
- end
- end
- end
- -- simple cases: no special .. only inject
+ local current = start
- local prehyphenchar = lang.prehyphenchar
- local posthyphenchar = lang.posthyphenchar
-
- local lccodes = characters.lccodes
-
- -- An experimental feature:
- --
- -- \setupalign[verytolerant,flushleft]
- -- \setuplayout[width=140pt] \showframe
- -- longword longword long word longword longwordword \par
- -- \enabledirectives[hyphenators.rightwordsmin=1]
- -- longword longword long word longword longwordword \par
- -- \disabledirectives[hyphenators.rightwordsmin]
- --
- -- An alternative is of course to pack the words in an hbox.
+ for i=1,rsize do
+ local r = result[i]
+ if r == true then
+ local disc = new_disc()
+ if rightchar then
+ setfield(disc,"pre",serialize(true,rightchar))
+ end
+ if leftchar then
+ setfield(disc,"post",serialize(true,leftchar))
+ end
+ -- could be a replace as well
+ insert_before(first,current,disc)
+ elseif type(r) == "table" then
+ local disc = new_disc()
+ local pre = r[1]
+ local post = r[2]
+ local replace = r[3]
+ local right = r[4] ~= false and rightchar
+ local left = r[5] ~= false and leftchar
+ if pre and pre ~= "" then
+ setfield(disc,"pre",serialize(pre,false,right))
+ end
+ if post and post ~= "" then
+ setfield(disc,"post",serialize(post,left,false))
+ end
+ if replace and replace ~= "" then
+ setfield(disc,"replace",serialize(replace))
+ end
+ insert_before(first,current,disc)
+ else
+ setfield(current,"char",characters[r])
+ if i < rsize then
+ current = getnext(current)
+ end
+ end
+ end
+ if current and current ~= stop then
+ local current = getnext(current)
+ local last = getnext(stop)
+ while current ~= last do
+ first, current = remove_node(first,current,true)
+ end
+ end
- local rightwordsmin = 0 -- todo: parproperties (each par has a number anyway)
+ end
- function traditional.hyphenate(head)
- local first = tonut(head)
- local current = first
- local dictionary = nil
- local instance = nil
- local characters = nil
- local unicodes = nil
- local language = nil
- local start = nil
- local stop = nil
- local word = nil -- maybe reuse and pass size
- local size = 0
- local leftchar = false
- local rightchar = false -- utfbyte("-")
- local leftmin = 0
- local rightmin = 0
- local lastone = nil
-
- if rightwordsmin > 0 then
- lastone = node_tail(first)
- local inword = false
- while lastone and rightwordsmin > 0 do
- local id = getid(lastone)
- if id == glyph_code then
- inword = true
- elseif inword then
- inword = false
- rightwordsmin = rightwordsmin - 1
+ local function inject()
+ if first ~= current then
+ local disc = new_disc()
+ first, current, glyph = remove_node(first,current)
+ first, current = insert_before(first,current,disc)
+ if trace_visualize then
+ setcolor(glyph,"darkred") -- these get checked in the colorizer
+ setcolor(disc,"darkgreen") -- these get checked in the colorizer
+ end
+ setfield(disc,"replace",glyph)
+ if rightchar then
+ local glyph = copy_node(glyph)
+ setfield(glyph,"char",rightchar)
+ setfield(disc,"pre",glyph)
+ end
+ if leftchar then
+ local glyph = copy_node(glyph)
+ setfield(glyph,"char",rightchar)
+ setfield(disc,"post",glyph)
end
- lastone = getprev(lastone)
end
+ return current
+ end
+
+ local a = getattr(first,a_hyphenation)
+ if a ~= attr then
+ attr = synchronizefeatureset(a)
end
- while current ~= lastone do
+ -- The first attribute in a word determines the way a word gets hyphenated
+ -- and if relevant, other properties are also set then. We could optimize for
+ -- silly one-char cases but it has no priority as the code is still not that
+ -- much slower than the native hyphenator and this variant also provides room
+ -- for extensions.
+
+ while current and current ~= last do -- and current
local id = getid(current)
if id == glyph_code then
- -- currently no lc/uc code support
local code = getchar(current)
local lang = getfield(current,"lang")
if lang ~= language then
- if dictionary then
- if leftmin + rightmin < #word then
- local done = hyphenate(dictionary,word)
- if done then
- flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin)
- end
+ if size > 0 and dictionary and leftmin + rightmin <= size then
+ local hyphens = hyphenated(dictionary,word,size)
+ if hyphens then
+ flush(hyphens)
end
end
- language = lang
- dictionary = dictionaries[language]
- instance = dictionary.instance
- characters = dictionary.characters
- unicodes = dictionary.unicodes
- leftchar = instance and posthyphenchar(instance)
- rightchar = instance and prehyphenchar (instance)
- leftmin = getfield(current,"left")
- rightmin = getfield(current,"right")
- if not leftchar or leftchar < 0 then
- leftchar = false
- end
- if not rightchar or rightchar < 0 then
- rightchar = false
- end
- local char = unicodes[code]
- if char then
- word = { char }
- size = 1
- start = current
+ language = lang
+ if language > 0 then
+ dictionary = dictionaries[language]
+ -- we could postpone these
+ instance = dictionary.instance
+ characters = dictionary.characters
+ unicodes = dictionary.unicodes
+ leftchar = instance and posthyphenchar(instance)
+ rightchar = instance and prehyphenchar (instance)
+ leftmin = leftcharmin or getfield(current,"left")
+ rightmin = rightcharmin or getfield(current,"right")
+ if not leftchar or leftchar < 0 then
+ leftchar = false
+ end
+ if not rightchar or rightchar < 0 then
+ rightchar = false
+ end
+ --
+ local a = getattr(current,a_hyphenation)
+ if a ~= attr then
+ attr = synchronizefeatureset(a) -- influences extrachars
+ end
+ --
+ local char = unicodes[code] or (extrachars and extrachars[code])
+ if char then
+ word[1] = char
+ size = 1
+ start = current
+ end
+ else
+ size = 0
end
- elseif word then
- local char = unicodes[code]
+ elseif language <= 0 then
+ --
+ elseif size > 0 then
+ local char = unicodes[code] or (extrachars and extrachars[code])
if char then
size = size + 1
word[size] = char
elseif dictionary then
- if leftmin + rightmin < #word then
- local done = hyphenate(dictionary,word)
- if done then
- flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin)
+ if leftmin + rightmin <= size then
+ local hyphens = hyphenated(dictionary,word,size)
+ if hyphens then
+ flush(hyphens)
end
end
- word = nil
+ size = 0
+ if hyphenchars and hyphenchars[code] then
+ current = inject()
+ end
end
else
- local char = unicodes[code]
+ local a = getattr(current,a_hyphenation)
+ if a ~= attr then
+ attr = synchronizefeatureset(a) -- influences extrachars
+ end
+ --
+ local char = unicodes[code] or (extrachars and extrachars[code])
if char then
- word = { char }
- size = 1
- start = current
- -- leftmin = getfield(current,"left") -- can be an option
- -- rightmin = getfield(current,"right") -- can be an option
+ word[1] = char
+ size = 1
+ start = current
end
end
stop = current
current = getnext(current)
- elseif word then
- if dictionary then
- if leftmin + rightmin < #word then
- local done = hyphenate(dictionary,word)
- current = getnext(current)
- if done then
- flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin)
+ else
+ current = id == math_code and getnext(end_of_math(current)) or getnext(current)
+ if size > 0 then
+ if dictionary and leftmin + rightmin <= size then
+ local hyphens = hyphenated(dictionary,word,size)
+ if hyphens then
+ flush(hyphens)
end
- else
- current = getnext(current) -- hm
end
- else
- current = getnext(current)
+ size = 0
end
- word = nil
- elseif id == math_code then
- current = getnext(end_of_math(current))
- else
- current = getnext(current)
end
end
+ -- we can have quit due to last so we need to flush the last seen word
+ if size > 0 and dictionary and leftmin + rightmin <= size then
+ local hyphens = hyphenated(dictionary,word,size)
+ if hyphens then
+ flush(hyphens)
+ end
+ end
+
+ stoptiming(traditional)
+
return head, true
end
+ statistics.register("hyphenation",function()
+ if nofwords > 0 or statistics.elapsed(traditional) > 0 then
+ return string.format("%s words hyphenated, %s unique, used time %s",
+ nofwords,nofhashed,statistics.elapsedseconds(traditional) or 0)
+ end
+ end)
+
local texmethod = "builders.kernel.hyphenation"
local oldmethod = texmethod
local newmethod = texmethod
@@ -617,11 +1237,17 @@ if context then
-- \enabledirectives[hyphenators.method=traditional]
-- \enabledirectives[hyphenators.method=builtin]
- directives.register("hyphenators.method",function(v)
- if type(v) == "string" then
- local valid = languages.hyphenators[v]
+ -- this avoids a wrapper
+
+ -- push / pop ? check first attribute
+
+ local replaceaction = nodes.tasks.replaceaction
+
+ local function setmethod(method)
+ if type(method) == "string" then
+ local valid = hyphenators[method]
if valid and valid.hyphenate then
- newmethod = "languages.hyphenators." .. v .. ".hyphenate"
+ newmethod = "languages.hyphenators." .. method .. ".hyphenate"
else
newmethod = texmethod
end
@@ -629,16 +1255,65 @@ if context then
newmethod = texmethod
end
if oldmethod ~= newmethod then
- nodes.tasks.replaceaction("processors","words",oldmethod,newmethod)
+ replaceaction("processors","words",oldmethod,newmethod)
end
oldmethod = newmethod
- end)
+ end
- -- experimental feature
+ hyphenators.setmethod = setmethod
- directives.register("hyphenators.rightwordsmin",function(v)
- rightwordsmin = tonumber(v) or 0
- end)
+ local stack = { }
+
+ local function pushmethod(method)
+ insert(stack,oldmethod)
+ setmethod(method)
+ end
+ local function popmethod()
+ setmethod(remove(stack))
+ end
+
+ hyphenators.pushmethod = pushmethod
+ hyphenators.popmethod = popmethod
+
+ directives.register("hyphenators.method",setmethod)
+
+ function commands.setuphyphenation(specification)
+ local method = specification.method
+ if method then
+ setmethod(method)
+ end
+ end
+
+ commands.pushhyphenation = pushmethod
+ commands.pophyphenation = popmethod
+
+ local context = context
+ local ctx_NC = context.NC
+ local ctx_NR = context.NR
+ local ctx_verbatim = context.verbatim
+
+ function commands.showhyphenationtrace(language,word)
+ local saved = trace_steps
+ trace_steps = "silent"
+ local steps = traditional.gettrace(language,word)
+ trace_steps = saved
+ if steps then
+ local n = #steps
+ if n > 0 then
+ context.starttabulate { "|r|l|l|l|" }
+ for i=1,n do
+ local s = steps[i]
+ ctx_NC() if i > 1 and i < n then context(i-1) end
+ ctx_NC() ctx_verbatim(s[1])
+ ctx_NC() ctx_verbatim(s[2])
+ ctx_NC() ctx_verbatim(s[3])
+ ctx_NC()
+ ctx_NR()
+ end
+ context.stoptabulate()
+ end
+ end
+ end
else
@@ -647,14 +1322,14 @@ else
-- traditional.loadpatterns("us","lang-us")
-- traditional.registerpattern("nl","e1ë", { start = 1, length = 2, before = "e", after = "e" } )
--- traditional.registerpattern("nl","oo1ë", { start = 2, length = 3, before = "o", after = "e" } )
+-- traditional.registerpattern("nl","oo7ë", { start = 2, length = 3, before = "o", after = "e" } )
-- traditional.registerpattern("de","qqxc9xkqq",{ start = 3, length = 4, before = "ab", after = "cd" } )
-- local specification = {
--- lefthyphenmin = 2,
--- righthyphenmin = 2,
--- lefthyphenchar = "<",
--- righthyphenchar = ">",
+-- leftcharmin = 2,
+-- rightcharmin = 2,
+-- leftchar = "<",
+-- rightchar = ">",
-- }
-- print("reëel", traditional.injecthyphens(dictionaries.nl,"reëel", specification),"r{e>}{<e}{eë}el")
@@ -670,5 +1345,26 @@ else
-- print("kunststofmatig", traditional.injecthyphens(dictionaries.nl,"kunststofmatig", specification),"")
-- print("kunst[stof]matig", traditional.injecthyphens(dictionaries.nl,"kunst[stof]matig", specification),"")
+-- traditional.loadpatterns("us","lang-us")
+
+-- local specification = {
+-- leftcharmin = 2,
+-- rightcharmin = 2,
+-- leftchar = false,
+-- rightchar = false,
+-- }
+
+-- trace_steps = true
+
+-- print("components", traditional.injecthyphens(dictionaries.us,"components", specification),"")
+-- print("single", traditional.injecthyphens(dictionaries.us,"single", specification),"sin-gle")
+-- print("everyday", traditional.injecthyphens(dictionaries.us,"everyday", specification),"every-day")
+-- print("associate", traditional.injecthyphens(dictionaries.us,"associate", specification),"as-so-ciate")
+-- print("philanthropic", traditional.injecthyphens(dictionaries.us,"philanthropic", specification),"phil-an-thropic")
+-- print("projects", traditional.injecthyphens(dictionaries.us,"projects", specification),"projects")
+-- print("Associate", traditional.injecthyphens(dictionaries.us,"Associate", specification),"As-so-ciate")
+-- print("Philanthropic", traditional.injecthyphens(dictionaries.us,"Philanthropic", specification),"Phil-an-thropic")
+-- print("Projects", traditional.injecthyphens(dictionaries.us,"Projects", specification),"Projects")
+
end
diff --git a/tex/context/base/lang-hyp.mkiv b/tex/context/base/lang-hyp.mkiv
index 0cd5a72ca..036974c87 100644
--- a/tex/context/base/lang-hyp.mkiv
+++ b/tex/context/base/lang-hyp.mkiv
@@ -32,21 +32,166 @@
\registerctxluafile{lang-hyp}{1.001}
+\unprotect
+
+\definesystemattribute[hyphenation][public]
+
%D This command can change! At some point we will keep the setting with the
%D paragraph and then the \type {\par} can go.
+% \unexpanded\def\atleastoneword#1%
+% {\begingroup
+% \enabledirectives[hyphenators.method=traditional]%
+% \enabledirectives[hyphenators.rightwordsmin=1]%
+% \lefthyphenmin \plusfour
+% \righthyphenmin\plusfour
+% #1\par
+% \disabledirectives[hyphenators.rightwordsmin]%
+% \enabledirectives[hyphenators.method]%
+% \endgroup}
+
+%D Here is the real way:
+
+\installcorenamespace{hyphenation}
+\installcorenamespace{hyphenationfeatures}
+
+\installparameterhandler \??hyphenation {hyphenation}
+\installsetuphandler \??hyphenation {hyphenation}
+
+\setuphyphenation
+ [\c!method=\s!default]
+
+\appendtoks
+ \ctxcommand{setuphyphenation{method="\hyphenationparameter\c!method"}}%
+\to \everysetuphyphenation
+
+%D These are mostly meant for manuals:
+
+\unexpanded\def\starthyphenation[#1]%
+ {\begingroup
+ \ctxcommand{pushhyphenation("#1")}}
+
+\unexpanded\def\stophyphenation
+ {\ifhmode\par\fi
+ \ctxcommand{pophyphenation()}%
+ \endgroup}
+
+% This is a global setting, so we need to disable it when needed. However, as
+% we are (hopefully) compatible and attribute driven one can also just keep it
+% enabled.
+%
+% \setuphyphenation
+% [\c!method=\s!traditional] % no translations
+
+\unexpanded\def\definehyphenationfeatures
+ {\dodoubleargument\lang_hyphenation_define_features}
+
+\unexpanded\def\lang_hyphenation_define_features[#1][#2]%
+ {\begingroup
+ \letdummyparameter\c!characters\empty % maybe \s!
+ \letdummyparameter\c!hyphens\empty % maybe \s!
+ \letdummyparameter\c!joiners\empty % maybe \s!
+ \letdummyparameter\c!rightwords\!!zerocount % maybe \s!
+ \letdummyparameter\s!lefthyphenmin\!!zerocount
+ \letdummyparameter\s!righthyphenmin\!!zerocount
+ \letdummyparameter\c!alternative\empty
+ \getdummyparameters[#2]%
+ \ctxcommand{definehyphenationfeatures("#1",{
+ characters = \!!bs\dummyparameter\c!characters\!!es,
+ hyphens = \!!bs\dummyparameter\c!hyphens\!!es,
+ joiners = \!!bs\dummyparameter\c!joiners\!!es,
+ rightwordmin = \number\dummyparameter\c!rightwords,
+ leftcharmin = \number\dummyparameter\s!lefthyphenmin,
+ rightcharmin = \number\dummyparameter\s!righthyphenmin,
+ alternative = "\dummyparameter\c!alternative",
+ })}%
+ \endgroup}
+
+\unexpanded\def\sethyphenationfeatures[#1]%
+ {\ctxcommand{sethyphenationfeatures("#1")}}
+
+% todo: \start ... \stop too
+
+\unexpanded\def\registerhyphenationpattern
+ {\dodoubleempty\lang_hyphenation_register_pattern}
+
+\def\lang_hyphenation_register_pattern[#1][#2]%
+ {\ctxcommand{registerhyphenationpattern(
+ \ifsecondargument
+ \!!bs#1\!!es,
+ \!!bs#2\!!es
+ \else
+ \!!bs\currentlanguage\!!es,
+ \!!bs#1\!!es
+ \fi
+ )}}
+
+\unexpanded\def\unregisterhyphenationpattern
+ {\dodoubleempty\lang_hyphenation_unregister_pattern}
+
+\def\lang_hyphenation_unregister_pattern[#1][#2]%
+ {\ctxcommand{registerhyphenationpattern(
+ \ifsecondargument
+ \!!bs#1\!!es,
+ \!!bs#2\!!es,
+ \else
+ \!!bs\currentlanguage\!!es,
+ \!!bs#1\!!es,
+ \fi
+ false
+ )}}
+
+\unexpanded\def\showhyphenationtrace
+ {\dodoubleempty\lang_hyphenation_show_trace}
+
+\def\lang_hyphenation_show_trace[#1][#2]%
+ {\begingroup
+ \tt
+ \ctxcommand{showhyphenationtrace(
+ \ifsecondargument
+ \!!bs#1\!!es,
+ \!!bs#2\!!es
+ \else
+ \!!bs\currentlanguage\!!es,
+ \!!bs#1\!!es
+ \fi
+ )}%
+ \endgroup}
+
+% For old times sake:
+
\unexpanded\def\atleastoneword#1%
{\begingroup
- \enabledirectives[hyphenators.method=traditional]%
- \enabledirectives[hyphenators.rightwordsmin=1]%
- \lefthyphenmin \plusfour
- \righthyphenmin\plusfour
+ \starthyphenation[\c!method=traditional]% this might become default or a faster switch
+ \sethyphenationfeatures[words]%
#1\par
- \disabledirectives[hyphenators.rightwordsmin]%
- \enabledirectives[hyphenators.method]%
+ \stopthyphenation
\endgroup}
-\endinput
+%D These are (at least now) not cummulative:
+
+\definehyphenationfeatures % just an example
+ [fences]
+ [\c!characters={[]()}]
+
+\definehyphenationfeatures
+ [words]
+ [\c!rightwords=1,
+ \s!lefthyphenmin=4,
+ \s!righthyphenmin=4]
+
+\definehyphenationfeatures
+ [default]
+ [\c!hyphens=\v!yes,
+ \c!joiners=\v!yes]
+
+% \sethyphenationfeatures
+% [fences]
+
+% \sethyphenationfeatures
+% [default,fences]
+
+\unprotect \endinput
% \starttext
%
diff --git a/tex/context/base/lang-ini.lua b/tex/context/base/lang-ini.lua
index 49eff4949..d0fbced4d 100644
--- a/tex/context/base/lang-ini.lua
+++ b/tex/context/base/lang-ini.lua
@@ -21,6 +21,9 @@ local utfbyte = utf.byte
local format, gsub = string.format, string.gsub
local concat, sortedkeys, sortedpairs = table.concat, table.sortedkeys, table.sortedpairs
+local context = context
+local commands = commands
+
local settings_to_array = utilities.parsers.settings_to_array
local trace_patterns = false trackers.register("languages.patterns", function(v) trace_patterns = v end)
@@ -152,7 +155,7 @@ local function loaddefinitions(tag,specification)
-- instance:hyphenation(resources.exceptions and resources.exceptions.data or "")
instance:patterns (validdata(resources.patterns, "patterns", tag) or "")
instance:hyphenation(validdata(resources.exceptions,"exceptions",tag) or "")
-data.resources = resources -- so we can use them otherwise
+ data.resources = resources -- so we can use them otherwise
else
report_initialization("invalid definition %a for language %a in %a",definition,tag,filename)
end
@@ -360,7 +363,7 @@ languages.associate('fr','latn','fra')
statistics.register("loaded patterns", function()
local result = languages.logger.report()
if result ~= "none" then
--- return result
+ -- return result
return format("%s, load time: %s",result,statistics.elapsedtime(languages))
end
end)
diff --git a/tex/context/base/lang-ini.mkiv b/tex/context/base/lang-ini.mkiv
index 4ed7839bd..1ffc37791 100644
--- a/tex/context/base/lang-ini.mkiv
+++ b/tex/context/base/lang-ini.mkiv
@@ -162,7 +162,6 @@
%D implementations support run time addition of patterns to a
%D preloaded format).
-
%D \macros
%D {preloadlanguages}
%D
@@ -354,13 +353,48 @@
\newtoks \everylanguage
+% \def\lang_basics_synchronize% assumes that \currentlanguage is set % % % use different name as complex
+% {\normallanguage\ctxcommand{languagenumber(%
+% "\currentlanguage",%
+% "\defaultlanguage\currentlanguage",%
+% "\languageparameter\s!patterns"%
+% )}\relax
+% \the\everylanguage\relax}
+
+% (some 20%) faster but code jungle (the publication code can have excessive
+% switching
+
+\installcorenamespace{languagenumbers}
+
+\def\lang_basics_synchronize_yes
+ {\zerocount % see below
+ \global\expandafter\chardef\csname\??languagenumbers\currentlanguage\endcsname
+ \ctxcommand{languagenumber(%
+ "\currentlanguage",%
+ "\defaultlanguage\currentlanguage",%
+ "\languageparameter\s!patterns"%
+ )}\relax
+ \normallanguage\csname\??languagenumbers\currentlanguage\endcsname}
+
+\let\lang_basics_synchronize_nop\zerocount % not loaded anyway
+
+\letvalue{\??languagenumbers}\lang_basics_synchronize_nop % initime
+
+\appendtoks
+ \letvalue{\??languagenumbers}\lang_basics_synchronize_yes % runtime
+\to \everydump
+
\def\lang_basics_synchronize% assumes that \currentlanguage is set % % % use different name as complex
- {\normallanguage\ctxcommand{languagenumber(%
- "\currentlanguage",%
- "\defaultlanguage\currentlanguage",%
- "\languageparameter\s!patterns"%
- )}\relax
- \the\everylanguage\relax}
+ {\normallanguage\csname\??languagenumbers
+ \ifcsname\??languagenumbers\currentlanguage\endcsname
+ \currentlanguage
+ \fi
+ \endcsname
+ \relax
+ \the\everylanguage
+ \relax}
+
+% so far
\newcount\hyphenminoffset
diff --git a/tex/context/base/m-scite.mkiv b/tex/context/base/m-scite.mkiv
index aed2c2631..93349122d 100644
--- a/tex/context/base/m-scite.mkiv
+++ b/tex/context/base/m-scite.mkiv
@@ -66,6 +66,8 @@ local f_mapping = [[
\let\string\slxR\string\letterrightbrace
\let\string\slxM\string\letterdollar
\let\string\slxV\string\letterbar
+\let\string\slxU\string\letterhat
+\let\string\slxD\string\letterunderscore
\let\string\slxH\string\letterhash
\let\string\slxB\string\letterbackslash
\let\string\slxP\string\letterpercent
@@ -76,6 +78,8 @@ local replacer = lpeg.replacer {
["{"] = "\\slxL ",
["}"] = "\\slxR ",
["$"] = "\\slxM ",
+ ["^"] = "\\slxU ",
+ ["_"] = "\\slxD ",
["|"] = "\\slxV ",
["#"] = "\\slxH ",
["\\"] = "\\slxB ",
diff --git a/tex/context/base/math-noa.lua b/tex/context/base/math-noa.lua
index da17f6a33..bc12867ed 100644
--- a/tex/context/base/math-noa.lua
+++ b/tex/context/base/math-noa.lua
@@ -971,8 +971,8 @@ local function movesubscript(parent,current_nucleus,current_char)
local prev = getfield(parent,"prev")
if prev and getid(prev) == math_noad then
if not getfield(prev,"sup") and not getfield(prev,"sub") then
- setfield(current_nucleus,"char",movesub[current_char or getchar(current_nucleus)])
-- {f} {'}_n => f_n^'
+ setfield(current_nucleus,"char",movesub[current_char or getchar(current_nucleus)])
local nucleus = getfield(parent,"nucleus")
local sub = getfield(parent,"sub")
local sup = getfield(parent,"sup")
@@ -985,6 +985,18 @@ local function movesubscript(parent,current_nucleus,current_char)
if trace_collapsing then
report_collapsing("fixing subscript")
end
+ elseif not getfield(prev,"sup") then
+ -- {f} {'}_n => f_n^'
+ setfield(current_nucleus,"char",movesub[current_char or getchar(current_nucleus)])
+ local nucleus = getfield(parent,"nucleus")
+ local sup = getfield(parent,"sup")
+ setfield(prev,"sup",nucleus)
+ local dummy = copy_node(nucleus)
+ setfield(dummy,"char",0)
+ setfield(parent,"nucleus",dummy)
+ if trace_collapsing then
+ report_collapsing("fixing subscript")
+ end
end
end
end
@@ -1031,6 +1043,8 @@ local function collapsepair(pointer,what,n,parent,nested) -- todo: switch to tur
movesubscript(parent,current_nucleus,current_char)
end
end
+ elseif not nested and movesub[current_char] then
+ movesubscript(parent,current_nucleus,current_char)
end
end
end
diff --git a/tex/context/base/mult-def.mkiv b/tex/context/base/mult-def.mkiv
index 7791200f9..b73a6df3e 100644
--- a/tex/context/base/mult-def.mkiv
+++ b/tex/context/base/mult-def.mkiv
@@ -45,6 +45,12 @@
\def\c!nextrightquotation{nextrightquotation}
\def\c!fences {fences}
+\def\c!words {words}
+\def\c!characters {characters}
+\def\c!hyphens {hyphens}
+\def\c!joiners {joiners}
+\def\c!leftwords {leftwords}
+\def\c!rightwords {rightwords}
\def\c!keeptogether {keeptogether}
\def\c!viewerprefix {viewerprefix}
@@ -79,6 +85,7 @@
\def\c!properties {properties}
\def\c!journalconversion {journalconversion}
\def\c!register {register}
+\def\c!note {note}
\def\c!field {field}
\def\c!ignore {ignore}
\def\c!specification {specification}
@@ -122,6 +129,8 @@
\def\v!vfenced {vfenced}
\def\v!bothtext {bothtext}
+\def\s!traditional {traditional}
+
\def\c!HL {HL}
\def\c!VL {VL}
\def\c!NL {NL}
diff --git a/tex/context/base/mult-sys.mkiv b/tex/context/base/mult-sys.mkiv
index 2a1261d8f..c79bc023d 100644
--- a/tex/context/base/mult-sys.mkiv
+++ b/tex/context/base/mult-sys.mkiv
@@ -268,6 +268,7 @@
\definesystemconstant {cite}
\definesystemconstant {nocite}
\definesystemconstant {list}
+\definesystemconstant {register}
\definesystemconstant {author}
% \def\s!parent{->} % 1% faster / => does not work in assignments
diff --git a/tex/context/base/node-fin.lua b/tex/context/base/node-fin.lua
index 1566e099f..f38f0bf85 100644
--- a/tex/context/base/node-fin.lua
+++ b/tex/context/base/node-fin.lua
@@ -168,7 +168,6 @@ function states.finalize(namespace,attribute,head) -- is this one ok?
return head, false, false
end
--- disc nodes can be ignored
-- we need to deal with literals too (reset as well as oval)
-- if id == glyph_code or (id == whatsit_code and getsubtype(stack) == pdfliteral_code) or (id == rule_code and stack.width ~= 0) or (id == glue_code and stack.leader) then
@@ -181,6 +180,8 @@ local function process(namespace,attribute,head,inheritance,default) -- one attr
local id = getid(stack)
if id == glyph_code then
check = true
+ elseif id == disc_code then
+ check = true -- indeed
elseif id == glue_code then
leader = getleader(stack)
if leader then
@@ -294,6 +295,8 @@ local function selective(namespace,attribute,head,inheritance,default) -- two at
local id = getid(stack)
if id == glyph_code then
check = true
+ elseif id == disc_code then
+ check = true -- indeed
elseif id == glue_code then
leader = getleader(stack)
if leader then
diff --git a/tex/context/base/page-str.mkiv b/tex/context/base/page-str.mkiv
index a8fab9c6c..8284d4baa 100644
--- a/tex/context/base/page-str.mkiv
+++ b/tex/context/base/page-str.mkiv
@@ -34,7 +34,7 @@
\unprotect
\let \currentoutputstream \empty
-\newif \ifinoutputstream % will becoem a conditional or mode
+\newif \ifinoutputstream % will become a conditional or mode
\newtoks \everyenableoutputstream
\appendtoks
diff --git a/tex/context/base/s-figures-names.mkiv b/tex/context/base/s-figures-names.mkiv
index 913fcafdb..a2782efc9 100644
--- a/tex/context/base/s-figures-names.mkiv
+++ b/tex/context/base/s-figures-names.mkiv
@@ -29,12 +29,13 @@
\startluacode
-local context = context
-local ctx_NC = context.NC
-local ctx_NR = context.NR
-local ctx_red = context.red
-local ctx_starttabulate = context.starttabulate
-local ctx_stoptabulate = context.stoptabulate
+local context = context
+local ctx_NC = context.NC
+local ctx_NR = context.NR
+local ctx_red = context.red
+local ctx_starttabulate = context.starttabulate
+local ctx_stoptabulate = context.stoptabulate
+local ctx_hyphenatedname = context.hyphenatedfilename
trackers.enable("graphics.lognames")
@@ -42,7 +43,7 @@ context.start()
context.switchtobodyfont { "tt,small" }
- local template = { "|Bl|l|" }
+ local template = { "|Bl|p|" }
for _, data in table.sortedhash(figures.found) do
ctx_starttabulate(template)
@@ -65,12 +66,14 @@ context.start()
ctx_NC()
context("found file")
ctx_NC()
- context(data.foundname)
+ ctx_hyphenatedname(data.foundname)
+ -- context(data.foundname)
ctx_NC() ctx_NR()
ctx_NC()
context("used file")
ctx_NC()
- context(data.fullname)
+ ctx_hyphenatedname(data.fullname)
+ -- context(data.fullname)
ctx_NC() ctx_NR()
if badname then
ctx_NC()
diff --git a/tex/context/base/spac-chr.lua b/tex/context/base/spac-chr.lua
index 3fcfafe17..0cdec4b8f 100644
--- a/tex/context/base/spac-chr.lua
+++ b/tex/context/base/spac-chr.lua
@@ -19,6 +19,7 @@ local byte, lower = string.byte, string.lower
local next = next
local trace_characters = false trackers.register("typesetters.characters", function(v) trace_characters = v end)
+local trace_nbsp = false trackers.register("typesetters.nbsp", function(v) trace_nbsp = v end)
local report_characters = logs.reporter("typesetting","characters")
@@ -39,6 +40,8 @@ local setattr = nuts.setattr
local getfont = nuts.getfont
local getchar = nuts.getchar
+local setcolor = nodes.tracers.colors.set
+
local insert_node_before = nuts.insert_before
local insert_node_after = nuts.insert_after
local remove_node = nuts.remove
@@ -50,6 +53,8 @@ local tasks = nodes.tasks
local nodepool = nuts.pool
local new_penalty = nodepool.penalty
local new_glue = nodepool.glue
+local new_kern = nodepool.kern
+local new_rule = nodepool.rule
local nodecodes = nodes.nodecodes
local skipcodes = nodes.skipcodes
@@ -114,6 +119,15 @@ local function inject_nobreak_space(unicode,head,current,space,spacestretch,spac
setfield(current,"attr",nil)
setattr(glue,a_character,unicode)
head, current = insert_node_after(head,current,penalty)
+ if trace_nbsp then
+ local rule = new_rule(space)
+ local kern = new_kern(-space)
+ local penalty = new_penalty(10000)
+ setcolor(rule,"orange")
+ head, current = insert_node_after(head,current,rule)
+ head, current = insert_node_after(head,current,kern)
+ head, current = insert_node_after(head,current,penalty)
+ end
head, current = insert_node_after(head,current,glue)
return head, current
end
diff --git a/tex/context/base/status-files.pdf b/tex/context/base/status-files.pdf
index 0f51e6963..f9a8c021f 100644
--- a/tex/context/base/status-files.pdf
+++ b/tex/context/base/status-files.pdf
Binary files differ
diff --git a/tex/context/base/status-lua.pdf b/tex/context/base/status-lua.pdf
index 6f4bc79ea..47c64e4a0 100644
--- a/tex/context/base/status-lua.pdf
+++ b/tex/context/base/status-lua.pdf
Binary files differ
diff --git a/tex/context/base/strc-flt.mkvi b/tex/context/base/strc-flt.mkvi
index ef4eae71c..94f730beb 100644
--- a/tex/context/base/strc-flt.mkvi
+++ b/tex/context/base/strc-flt.mkvi
@@ -1219,13 +1219,13 @@
\blank[\rootfloatparameter\c!spaceafter]%
\strc_floats_end_text_group
\page_floats_report_total}
-
+
\def\borderedfloatbox
{\begingroup
\setupcurrentfloat[\c!location=\v!normal,\c!width=\v!fit,\c!height=\v!fit]%
\inheritedfloatframed{\box\floatbox}%
\endgroup}
-
+
% minwidth=fit,width=max : no overshoot, as wide as graphic
\def\strc_floats_align_content_indeed
diff --git a/tex/context/base/strc-lst.mkvi b/tex/context/base/strc-lst.mkvi
index ef3a3dbb1..6e7e28e99 100644
--- a/tex/context/base/strc-lst.mkvi
+++ b/tex/context/base/strc-lst.mkvi
@@ -821,7 +821,7 @@
% todo: provide packager via attributes
-\doinstallinjector\v!list
+\doinstallinjector\s!list
\installcorenamespace{listalternativemethods} % the general wrapper of a rendering
diff --git a/tex/context/base/strc-reg.mkiv b/tex/context/base/strc-reg.mkiv
index 7a6f4a30d..eedf86a66 100644
--- a/tex/context/base/strc-reg.mkiv
+++ b/tex/context/base/strc-reg.mkiv
@@ -719,7 +719,7 @@
% \placeregister[index]
% \stoptext
-\doinstallinjector\v!register
+\doinstallinjector\s!register
%D Beware, we get funny side effects when a dangling \index precedes an
%D placeindex as then flushing takes place inside the index. Took me hours
diff --git a/tex/context/base/supp-box.lua b/tex/context/base/supp-box.lua
index c69486306..e208ae39b 100644
--- a/tex/context/base/supp-box.lua
+++ b/tex/context/base/supp-box.lua
@@ -47,6 +47,7 @@ local copy_node = nuts.copy
local copy_list = nuts.copy_list
local find_tail = nuts.tail
local traverse_id = nuts.traverse_id
+local link_nodes = nuts.linked
local listtoutf = nodes.listtoutf
@@ -54,29 +55,64 @@ local nodepool = nuts.pool
local new_penalty = nodepool.penalty
local new_hlist = nodepool.hlist
local new_glue = nodepool.glue
+local new_rule = nodepool.rule
+local new_kern = nodepool.kern
+
+local setlistcolor = nodes.tracers.colors.setlist
local texget = tex.get
+local texgetbox = tex.getbox
-local function hyphenatedlist(head)
+local function hyphenatedlist(head,usecolor)
local current = head and tonut(head)
while current do
local id = getid(current)
local next = getnext(current)
local prev = getprev(current)
if id == disc_code then
- local hyphen = getfield(current,"pre")
- if hyphen then
- local penalty = new_penalty(-500)
- -- insert_after etc
- setfield(hyphen,"next",penalty)
- setfield(penalty,"prev",hyphen)
- setfield(prev,"next",hyphen)
- setfield(next,"prev", penalty)
- setfield(penalty,"next",next)
- setfield(hyphen,"prev",prev)
+ local pre = getfield(current,"pre")
+ local post = getfield(current,"post")
+ local replace = getfield(current,"replace")
+ if pre then
setfield(current,"pre",nil)
- free_node(current)
end
+ if post then
+ setfield(current,"post",nil)
+ end
+ if not usecolor then
+ -- nothing fancy done
+ elseif pre and post then
+ setlistcolor(pre,"darkmagenta")
+ setlistcolor(post,"darkcyan")
+ elseif pre then
+ setlistcolor(pre,"darkyellow")
+ elseif post then
+ setlistcolor(post,"darkyellow")
+ end
+ if replace then
+ flush_list(replace)
+ setfield(current,"replace",nil)
+ end
+ -- setfield(current,"replace",new_rule(65536)) -- new_kern(65536*2))
+ setfield(current,"next",nil)
+ setfield(current,"prev",nil)
+ local list = link_nodes (
+ pre and new_penalty(10000),
+ pre,
+ current,
+ post,
+ post and new_penalty(10000)
+ )
+ local tail = find_tail(list)
+ if prev then
+ setfield(prev,"next",list)
+ setfield(list,"prev",prev)
+ end
+ if next then
+ setfield(tail,"next",next)
+ setfield(next,"prev",tail)
+ end
+ -- free_node(current)
elseif id == vlist_code or id == hlist_code then
hyphenatedlist(getlist(current))
end
@@ -84,7 +120,12 @@ local function hyphenatedlist(head)
end
end
-commands.hyphenatedlist = hyphenatedlist
+function commands.hyphenatedlist(n,color)
+ local b = texgetbox(n)
+ if b then
+ hyphenatedlist(b.list,color)
+ end
+end
-- local function hyphenatedhack(head,pre)
-- pre = tonut(pre)
@@ -263,5 +304,5 @@ function commands.hboxtovbox(original)
end
function commands.boxtostring(n)
- context.puretext(nodes.toutf(tex.box[n].list)) -- helper is defined later
+ context.puretext(nodes.toutf(texgetbox(n).list)) -- helper is defined later
end
diff --git a/tex/context/base/supp-box.mkiv b/tex/context/base/supp-box.mkiv
index bc1e30749..5c4157bd1 100644
--- a/tex/context/base/supp-box.mkiv
+++ b/tex/context/base/supp-box.mkiv
@@ -1077,13 +1077,19 @@
%D \stoptyping
\unexpanded\def\dohyphenatednextbox
- {\ctxcommand{hyphenatedlist(tex.box[\number\nextbox].list)}%
+ {\ctxcommand{hyphenatedlist(\number\nextbox)}%
\unhbox\nextbox}
\unexpanded\def\hyphenatedword {\dowithnextboxcs\dohyphenatednextbox\hbox}
\unexpanded\def\hyphenatedpar {\dowithnextboxcs\dohyphenatednextbox\hbox}
\unexpanded\def\hyphenatedfile#1{\dowithnextboxcs\dohyphenatednextbox\hbox{\readfile{#1}\donothing\donothing}}
+\unexpanded\def\dohyphenatednextboxcolor
+ {\ctxcommand{hyphenatedlist(\number\nextbox,true)}%
+ \unhbox\nextbox}
+
+\unexpanded\def\hyphenatedcoloredword{\dowithnextboxcs\dohyphenatednextboxcolor\hbox}
+
% D \starttyping
% D \hyphenatedhack{\kern-.25em_}{alongword}
% D \stoptyping
diff --git a/tex/context/base/tabl-xtb.lua b/tex/context/base/tabl-xtb.lua
index 46f08c6df..89ef21b79 100644
--- a/tex/context/base/tabl-xtb.lua
+++ b/tex/context/base/tabl-xtb.lua
@@ -818,7 +818,8 @@ function xtables.construct()
end
nofr = nofr + 1
result[nofr] = {
- hpack_node_list(list),
+ -- hpack_node_list(list),
+ hpack_node_list(list,0,"exactly","TLT"), -- otherwise weird lap
size,
i < nofrange and rowdistance > 0 and rowdistance or false, -- might move
false
diff --git a/tex/context/base/typo-inj.mkiv b/tex/context/base/typo-inj.mkiv
index 11c04e9f0..3ac1c6623 100644
--- a/tex/context/base/typo-inj.mkiv
+++ b/tex/context/base/typo-inj.mkiv
@@ -58,7 +58,8 @@
\unexpanded\def\domarkinjector#1#2% called at the lua end
{\dontleavehmode\llap{\infofont\ifcase#1\else\red\fi<#2>\quad}}
-% low level definers
+% low level definers .. we could have \injectors_mark and \injectors_check and then
+% use \v!list instead of \s!list
\unexpanded\def\doinstallinjector#1%
{\letvalue{typo_injectors_mark_#1}\donothing
diff --git a/tex/context/base/typo-sus.lua b/tex/context/base/typo-sus.lua
index ccb10d411..6f4947b1f 100644
--- a/tex/context/base/typo-sus.lua
+++ b/tex/context/base/typo-sus.lua
@@ -52,6 +52,7 @@ local nodepool = nuts.pool
local new_rule = nodepool.rule
local new_kern = nodepool.kern
+local new_penalty = nodepool.penalty
local a_characters = attributes.private("characters")
@@ -99,6 +100,8 @@ end
local function mark(head,current,id,color)
if id == glue_code then
+ -- the glue can have stretch and/or shrink so the rule can overlap with the
+ -- following glyph .. no big deal as that one then sits on top of the rule
local width = getfield(getfield(current,"spec"),"width")
local rule = new_rule(width)
local kern = new_kern(-width)
@@ -194,7 +197,14 @@ function typesetters.showsuspects(head)
elseif id == glue_code then
local a = getattr(current,a_characters)
if a then
- head = mark(head,current,id,"orange")
+ local prev = getprev(current)
+ local prid = prev and getid(prev)
+ if prid == penalty_code and getfield(prev,"penalty") == 10000 then
+ head = mark(head,current,id,"orange")
+ head = insert_before(head,current,new_penalty(10000))
+ else
+ head = mark(head,current,id,"darkmagenta")
+ end
end
current = getnext(current)
else
diff --git a/tex/context/base/x-asciimath.lua b/tex/context/base/x-asciimath.lua
index 8cc349095..ff9c5c3d6 100644
--- a/tex/context/base/x-asciimath.lua
+++ b/tex/context/base/x-asciimath.lua
@@ -814,15 +814,21 @@ local p_reserved =
local p_unicode =
lpeg.utfchartabletopattern(table.keys(k_unicode)) / k_unicode
+local p_texescape = patterns.texescape
+
+local function texescaped(s)
+ return lpegmatch(p_texescape,s)
+end
+
local p_text =
P("text")
* p_spaces^0
* Cc("\\asciimathoptext")
* ( -- maybe balanced
- Cs( P("{") * (1-P("}"))^0 * P("}") )
- + Cs((P("(")/"{") * (1-P(")"))^0 * (P(")")/"}"))
+ Cs( P("{") * ((1-P("}"))^0/texescaped) * P("}") )
+ + Cs((P("(")/"{") * ((1-P(")"))^0/texescaped) * (P(")")/"}"))
)
- + Cc("\\asciimathoptext") * Cs(Cc("{") * patterns.undouble * Cc("}"))
+ + Cc("\\asciimathoptext") * Cs(Cc("{") * (patterns.undouble/texescaped) * Cc("}"))
local m_left = {
["(:"] = s_langle,
@@ -1676,6 +1682,8 @@ if not context then
-- report_asciimath(cleanedup([[a "α" b]]))
-- report_asciimath(cleanedup([[//4]]))
+convert("4/18*100text(%)~~22,2")
+
-- convert([[sum x]])
-- convert([[sum^(1)_(2) x]])
-- convert([[lim_(1)^(2) x]])
diff --git a/tex/generic/context/luatex/luatex-fonts-merged.lua b/tex/generic/context/luatex/luatex-fonts-merged.lua
index 2c5a87812..409f5d255 100644
--- a/tex/generic/context/luatex/luatex-fonts-merged.lua
+++ b/tex/generic/context/luatex/luatex-fonts-merged.lua
@@ -1,6 +1,6 @@
-- merged file : luatex-fonts-merged.lua
-- parent file : luatex-fonts.lua
--- merge date : 11/17/14 14:32:07
+-- merge date : 11/26/14 12:50:01
do -- begin closure to overcome local limits and interference