summaryrefslogtreecommitdiff
path: root/tex/context/base/lang-hyp.lua
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2014-11-26 13:15:05 +0100
committerContext Git Mirror Bot <phg42.2a@gmail.com>2014-11-26 13:15:05 +0100
commit5e624488bd860906c17301941c96e6209a9e4cd6 (patch)
tree1a517cfd2d5d4787879fdd3673f0d123c970c9b9 /tex/context/base/lang-hyp.lua
parent156e528557cf571eb99ab05a0892429b3c2bf269 (diff)
downloadcontext-5e624488bd860906c17301941c96e6209a9e4cd6.tar.gz
2014-11-26 12:52:00
Diffstat (limited to 'tex/context/base/lang-hyp.lua')
-rw-r--r--tex/context/base/lang-hyp.lua1404
1 files changed, 1050 insertions, 354 deletions
diff --git a/tex/context/base/lang-hyp.lua b/tex/context/base/lang-hyp.lua
index 205baccce..31ec8946c 100644
--- a/tex/context/base/lang-hyp.lua
+++ b/tex/context/base/lang-hyp.lua
@@ -6,6 +6,9 @@ if not modules then modules = { } end modules ['lang-hyp'] = {
license = "see context related readme files"
}
+-- to be considered: reset dictionary.hyphenated when a pattern is added
+-- or maybe an explicit reset of the cache
+
-- In an automated workflow hypenation of long titles can be somewhat problematic
-- especially when demands conflict. For that reason I played a bit with a Lua based
-- variant of the traditional hyphenation machinery. This mechanism has been extended
@@ -36,16 +39,21 @@ if not modules then modules = { } end modules ['lang-hyp'] = {
-- a s-s z o n-n y a l/sz=sz,2,3,ny=ny,6,3
--
-- ab1cd/ef=gh,2,2 : acd - efd (pattern/replacement,start,length
+--
+-- In the procecess of wrapping up (for the ctx conference proceedings) I cleaned up
+-- and extended the code a bit.
-local type, rawset, tonumber = type, rawset, tonumber
+local type, rawset, tonumber, next = type, rawset, tonumber, next
local P, R, S, Cg, Cf, Ct, Cc, C, Carg, Cs = lpeg.P, lpeg.R, lpeg.S, lpeg.Cg, lpeg.Cf, lpeg.Ct, lpeg.Cc, lpeg.C, lpeg.Carg, lpeg.Cs
local lpegmatch = lpeg.match
-local concat = table.concat
-
-local utfchar = utf.char
-local utfbyte = utf.byte
+local concat = table.concat
+local insert = table.insert
+local remove = table.remove
+local formatters = string.formatters
+local utfchar = utf.char
+local utfbyte = utf.byte
if not characters then
require("char-ini")
@@ -53,7 +61,14 @@ end
local setmetatableindex = table.setmetatableindex
-local languages = languages or { }
+-- \enabletrackers[hyphenator.steps=silent] will not write to the terminal
+
+local trace_steps = false trackers.register("hyphenator.steps", function(v) trace_steps = v end)
+local trace_visualize = false trackers.register("hyphenator.visualize",function(v) trace_visualize = v end)
+
+local report = logs.reporter("hyphenator")
+
+languages = languages or { }
local hyphenators = languages.hyphenators or { }
languages.hyphenators = hyphenators
local traditional = hyphenators.traditional or { }
@@ -64,28 +79,74 @@ local dictionaries = setmetatableindex(function(t,k)
patterns = { },
hyphenated = { },
specials = { },
+ exceptions = { },
+ loaded = false,
}
t[k] = v
return v
end)
+hyphenators.dictionaries = dictionaries
+
+local character = lpeg.patterns.utf8character
local digit = R("09")
-local character = lpeg.patterns.utf8character - P("/")
-local splitpattern_k = Cs((digit/"" + character)^1)
-local splitpattern_v = Ct(((digit/tonumber + Cc(0)) * character)^1 * (digit/tonumber)^0)
-local splitpattern_v =
- Ct(((digit/tonumber + Cc(0)) * character)^1 * (digit/tonumber)^0) *
- (P("/") * Cf ( Ct("") *
- Cg ( Cc("before") * C((1-lpeg.P("="))^1) * P("=") )
- * Cg ( Cc("after") * C((1-lpeg.P(","))^1) * P(",") )
- * Cg ( Cc("start") * ((1-lpeg.P(","))^1/tonumber) * P(",") )
- * Cg ( Cc("length") * ((1-lpeg.P(-1) )^1/tonumber) )
+local weight = digit/tonumber + Cc(0)
+local fence = P(".")
+local hyphen = P("-")
+local space = P(" ")
+local char = character - space
+local validcharacter = (character - S("./"))
+local keycharacter = character - S("/")
+----- basepart = Ct( (Cc(0) * fence)^-1 * (weight * validcharacter)^1 * weight * (fence * Cc(0))^-1)
+local specpart = (P("/") * Cf ( Ct("") *
+ Cg ( Cc("before") * C((1-P("="))^1) * P("=") ) *
+ Cg ( Cc("after") * C((1-P(","))^1) ) *
+ ( P(",") *
+ Cg ( Cc("start") * ((1-P(","))^1/tonumber) * P(",") ) *
+ Cg ( Cc("length") * ((1-P(-1) )^1/tonumber) )
+ )^-1
, rawset))^-1
-local function register(patterns,specials,str,specification)
- local k = lpegmatch(splitpattern_k,str)
- local v1, v2 = lpegmatch(splitpattern_v,str)
- patterns[k] = v1
+local make_hashkey_p = Cs((digit/"" + keycharacter)^1)
+----- make_pattern_p = basepart * specpart
+local make_hashkey_e = Cs((hyphen/"" + keycharacter)^1)
+local make_pattern_e = Ct(P(char) * (hyphen * Cc(true) * P(char) + P(char) * Cc(false))^1) -- catch . and char after -
+
+-- local make_hashkey_c = Cs((digit + keycharacter/"")^1)
+-- local make_pattern_c = Ct((P(1)/tonumber)^1)
+
+-- local cache = setmetatableindex(function(t,k)
+-- local n = lpegmatch(make_hashkey_c,k)
+-- local v = lpegmatch(make_pattern_c,n)
+-- t[k] = v
+-- return v
+-- end)
+--
+-- local weight_n = digit + Cc("0")
+-- local basepart_n = Cs( (Cc("0") * fence)^-1 * (weight * validcharacter)^1 * weight * (fence * Cc("0"))^-1) / cache
+-- local make_pattern_n = basepart_n * specpart
+
+local make_pattern_c = Ct((P(1)/tonumber)^1)
+
+-- us + nl: 17664 entries -> 827 unique (saves some 3M)
+
+local cache = setmetatableindex(function(t,k)
+ local v = lpegmatch(make_pattern_c,k)
+ t[k] = v
+ return v
+end)
+
+local weight_n = digit + Cc("0")
+local fence_n = fence / "0"
+local char_n = validcharacter / ""
+local basepart_n = Cs(fence_n^-1 * (weight_n * char_n)^1 * weight_n * fence_n^-1) / cache
+local make_pattern_n = basepart_n * specpart
+
+local function register_pattern(patterns,specials,str,specification)
+ local k = lpegmatch(make_hashkey_p,str)
+ -- local v1, v2 = lpegmatch(make_pattern_p,str)
+ local v1, v2 = lpegmatch(make_pattern_n,str)
+ patterns[k] = v1 -- is this key still ok for complex patterns
if specification then
specials[k] = specification
elseif v2 then
@@ -93,17 +154,50 @@ local function register(patterns,specials,str,specification)
end
end
-local word = ((Carg(1) * Carg(2) * C((1 - P(" "))^1)) / register + 1)^1
-local split = Ct(C(character)^1)
+local function unregister_pattern(patterns,specials,str)
+ local k = lpegmatch(make_hashkey_p,str)
+ patterns[k] = nil
+ specials[k] = nil
+end
+
+local function register_exception(exceptions,str,specification)
+ local k = lpegmatch(make_hashkey_e,str)
+ local v = lpegmatch(make_pattern_e,str)
+ exceptions[k] = v
+end
+
+local p_pattern = ((Carg(1) * Carg(2) * C(char^1)) / register_pattern + 1)^1
+local p_exception = ((Carg(1) * C(char^1)) / register_exception + 1)^1
+local p_split = Ct(C(character)^1)
function traditional.loadpatterns(language,filename)
- local specification = require(filename)
local dictionary = dictionaries[language]
- if specification then
- local patterns = specification.patterns
- if patterns then
- lpegmatch(word,patterns.data,1,dictionary.patterns,dictionary.specials)
+ if not dictionary.loaded then
+ if not filename or filename == "" then
+ filename = "lang-" .. language
end
+ filename = file.addsuffix(filename,"lua")
+ local fullname = resolvers.findfile(filename)
+ if fullname and fullname ~= "" then
+ local specification = dofile(fullname)
+ if specification then
+ local patterns = specification.patterns
+ if patterns then
+ local data = patterns.data
+ if data and data ~= "" then
+ lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
+ end
+ end
+ local exceptions = specification.exceptions
+ if exceptions then
+ local data = exceptions.data
+ if data and data ~= "" then
+ lpegmatch(p_exception,data,1,dictionary.exceptions)
+ end
+ end
+ end
+ end
+ dictionary.loaded = true
end
return dictionary
end
@@ -113,30 +207,153 @@ local uccodes = characters.uccodes
local nofwords = 0
local nofhashed = 0
-local function hyphenate(dictionary,word)
+local steps = nil
+local f_show = formatters["%w%s"]
+
+local function show_log()
+ if trace_steps == true then
+ report()
+ local w = #steps[1][1]
+ for i=1,#steps do
+ local s = steps[i]
+ report("%s%w%S %S",s[1],w - #s[1] + 3,s[2],s[3])
+ end
+ report()
+ end
+end
+
+local function show_1(wsplit)
+ local u = concat(wsplit," ")
+ steps = { { f_show(0,u), f_show(0,u) } }
+end
+
+local function show_2(c,m,wsplit,done,i,spec)
+ local s = lpegmatch(p_split,c)
+ local t = { }
+ local n = #m
+ local w = #wsplit
+ for j=1,n do
+ t[#t+1] = m[j]
+ t[#t+1] = s[j]
+ end
+ local m = 2*i-2
+ local l = #t
+ local s = spec and table.sequenced(spec) or ""
+ if m == 0 then
+ steps[#steps+1] = { f_show(m, concat(t,"",2)), f_show(1,concat(done," ",2,#done),s) }
+ elseif i+1 == w then
+ steps[#steps+1] = { f_show(m-1,concat(t,"",1,#t-1)), f_show(1,concat(done," ",2,#done),s) }
+ else
+ steps[#steps+1] = { f_show(m-1,concat(t)), f_show(1,concat(done," ",2,#done),s) }
+ end
+end
+
+local function show_3(wsplit,done)
+ local t = { }
+ local h = { }
+ local n = #wsplit
+ for i=1,n do
+ local w = wsplit[i]
+ if i > 1 then
+ local d = done[i]
+ t[#t+1] = i > 2 and d % 2 == 1 and "-" or " "
+ h[#h+1] = d
+ end
+ t[#t+1] = w
+ h[#h+1] = w
+ end
+ steps[#steps+1] = { f_show(0,concat(h)), f_show(0,concat(t)) }
+ show_log()
+end
+
+local function show_4(wsplit,done)
+ steps = { { concat(wsplit," ") } }
+ show_log()
+end
+
+function traditional.lasttrace()
+ return steps
+end
+
+-- We could reuse the w table but as we cache the resolved words
+-- there is not much gain in that complication.
+--
+-- Beware: word can be a table and when n is passed to we can
+-- assume reuse so we need to honor that n then.
+
+-- todo: a fast variant for tex ... less lookups (we could check is
+-- dictionary has changed) ... although due to caching the already
+-- done words, we don't do much here
+
+local function hyphenate(dictionary,word,n) -- odd is okay
nofwords = nofwords + 1
local hyphenated = dictionary.hyphenated
- local isstring = type(word) == "string"
- local done
+ local isstring = type(word) == "string"
if isstring then
- done = hyphenated[word]
+ local done = hyphenated[word]
+ if done ~= nil then
+ return done
+ end
+ elseif n then
+ local done = hyphenated[concat(word,"",1,n)]
+ if done ~= nil then
+ return done
+ end
else
- done = hyphenated[concat(word)]
+ local done = hyphenated[concat(word)]
+ if done ~= nil then
+ return done
+ end
end
+ local key
+ if isstring then
+ key = word
+ word = lpegmatch(p_split,word)
+ if not n then
+ n = #word
+ end
+ else
+ if not n then
+ n = #word
+ end
+ key = concat(word,"",1,n)
+ end
+ local l = 1
+ local w = { "." }
+ for i=1,n do
+ local c = word[i]
+ l = l + 1
+ w[l] = lcchars[c] or c
+ end
+ l = l + 1
+ w[l] = "."
+ local c = concat(w,"",2,l-1)
+ --
+ local done = hyphenated[c]
if done ~= nil then
+ hyphenated[key] = done
+ nofhashed = nofhashed + 1
return done
- else
- done = false
end
- local specials = dictionary.specials
- local patterns = dictionary.patterns
- local s = isstring and lpegmatch(split,word) or word
- local l = #s
- local w = { }
- for i=1,l do
- local si = s[i]
- w[i] = lcchars[si] or si
+ --
+ local exceptions = dictionary.exceptions
+ local exception = exceptions[c]
+ if exception then
+ if trace_steps then
+ show_4(w,exception)
+ end
+ hyphenated[key] = exception
+ nofhashed = nofhashed + 1
+ return exception
end
+ --
+ if trace_steps then
+ show_1(w)
+ end
+ --
+ local specials = dictionary.specials
+ local patterns = dictionary.patterns
+ --
local spec
for i=1,l do
for j=i,l do
@@ -146,15 +363,23 @@ local function hyphenate(dictionary,word)
local s = specials[c]
if not done then
done = { }
- spec = { }
+ spec = nil
+ -- the string that we resolve has explicit fences (.) so
+ -- done starts at the first fence and runs upto the last
+ -- one so we need one slot less
for i=1,l do
done[i] = 0
end
end
+ -- we run over the pattern that always has a (zero) value for
+ -- each character plus one more as we look at both sides
for k=1,#m do
local new = m[k]
if not new then
break
+ elseif new == true then
+ report("fatal error")
+ break
elseif new > 0 then
local pos = i + k - 1
local old = done[pos]
@@ -163,136 +388,238 @@ local function hyphenate(dictionary,word)
elseif new > old then
done[pos] = new
if s then
- local b = i + s.start - 1
- local e = b + s.length - 1
- if pos >= b and pos <= e then
- spec[pos] = s
+ local b = i + (s.start or 1) - 1
+ if b > 0 then
+ local e = b + (s.length or 2) - 1
+ if e > 0 then
+ if pos >= b and pos <= e then
+ if spec then
+ spec[pos] = { s, k - 1 }
+ else
+ spec = { [pos] = { s, k - 1 } }
+ end
+ end
+ end
end
end
end
end
end
+ if trace_steps and done then
+ show_2(c,m,w,done,i,s)
+ end
end
end
end
+ if trace_steps and done then
+ show_3(w,done)
+ end
if done then
local okay = false
- for i=1,#done do
+ for i=3,#done do
if done[i] % 2 == 1 then
- done[i] = spec[i] or true
+ done[i-2] = spec and spec[i] or true
okay = true
else
- done[i] = false
+ done[i-2] = false
end
end
- if not okay then
+ if okay then
+ done[#done] = nil
+ done[#done] = nil
+ else
done = false
end
+ else
+ done = false
end
- hyphenated[isstring and word or concat(word)] = done
+ hyphenated[key] = done
nofhashed = nofhashed + 1
return done
end
-local f_detail_1 = string.formatters["{%s}{%s}{}"]
-local f_detail_2 = string.formatters["{%s%s}{%s%s}{%s}"]
+function traditional.gettrace(language,word)
+ local dictionary = dictionaries[language]
+ if dictionary then
+ local hyphenated = dictionary.hyphenated
+ hyphenated[word] = nil
+ hyphenate(dictionary,word)
+ return steps
+ end
+end
+
+local methods = setmetatableindex(function(t,k) local v = hyphenate t[k] = v return v end)
+
+function traditional.installmethod(name,f)
+ if rawget(methods,name) then
+ report("overloading %a is not permitted",name)
+ else
+ methods[name] = f
+ end
+end
+
+local s_detail_1 = "-"
+local f_detail_2 = formatters["%s-%s"]
+local f_detail_3 = formatters["{%s}{%s}{}"]
+local f_detail_4 = formatters["{%s%s}{%s%s}{%s}"]
function traditional.injecthyphens(dictionary,word,specification)
- local h = hyphenate(dictionary,word)
- if not h then
+ if not word then
+ return false
+ end
+ if not specification then
+ return word
+ end
+ local hyphens = hyphenate(dictionary,word)
+ if not hyphens then
return word
end
- local w = lpegmatch(split,word)
- local r = { }
- local l = #h
- local n = 0
- local i = 1
- local leftmin = specification.lefthyphenmin or 2
- local rightmin = l - (specification.righthyphenmin or left) + 1
- local leftchar = specification.lefthyphenchar
- local rightchar = specification.righthyphenchar
- while i <= l do
- if i > leftmin and i < rightmin then
- local hi = h[i]
- if not hi then
- n = n + 1
- r[n] = w[i]
- i = i + 1
- elseif hi == true then
- n = n + 1
- r[n] = f_detail_1(rightchar,leftchar)
- n = n + 1
- r[n] = w[i]
- i = i + 1
+
+ -- the following code is similar to code later on but here we have
+ -- strings while there we have hyphen specs
+
+ local word = lpegmatch(p_split,word)
+ local size = #word
+
+ local leftmin = specification.leftcharmin or 2
+ local rightmin = size - (specification.rightcharmin or leftmin)
+ local leftchar = specification.leftchar
+ local rightchar = specification.rightchar
+
+ local result = { }
+ local rsize = 0
+ local position = 1
+
+ while position <= size do
+ if position >= leftmin and position <= rightmin then
+ local hyphen = hyphens[position]
+ if not hyphen then
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ elseif hyphen == true then
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ rsize = rsize + 1
+ if leftchar and rightchar then
+ result[rsize] = f_detail_3(rightchar,leftchar)
+ else
+ result[rsize] = s_detail_1
+ end
+ position = position + 1
else
- local b = i - hi.start
- local e = b + hi.length - 1
- n = b
- r[n] = f_detail_2(hi.before,rightchar,leftchar,hi.after,concat(w,"",b,e))
- if e + 1 == i then
- i = i + 1
+ local o, h = hyphen[2]
+ if o then
+ h = hyphen[1]
+ else
+ h = hyphen
+ o = 1
+ end
+ local b = position - o + (h.start or 1)
+ local e = b + (h.length or 2) - 1
+ if b > 0 and e >= b then
+ for i=1,b-position do
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ end
+ rsize = rsize + 1
+ if leftchar and rightchar then
+ result[rsize] = f_detail_4(h.before,rightchar,leftchar,h.after,concat(word,"",b,e))
+ else
+ result[rsize] = f_detail_2(h.before,h.after)
+ end
+ position = e + 1
else
- i = e + 1
+ -- error
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
end
end
else
- n = n + 1
- r[n] = w[i]
- i = i + 1
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
end
end
- return concat(r)
+ return concat(result)
end
function traditional.registerpattern(language,str,specification)
local dictionary = dictionaries[language]
- register(dictionary.patterns,dictionary.specials,str,specification)
+ if specification == false then
+ unregister_pattern(dictionary.patterns,dictionary.specials,str)
+ else
+ register_pattern(dictionary.patterns,dictionary.specials,str,specification)
+ end
end
-- todo: unicodes or utfhash ?
if context then
- local nodecodes = nodes.nodecodes
- local glyph_code = nodecodes.glyph
- local math_code = nodecodes.math
-
- local nuts = nodes.nuts
- local tonut = nodes.tonut
- local nodepool = nuts.pool
-
- local new_disc = nodepool.disc
-
- local setfield = nuts.setfield
- local getfield = nuts.getfield
- local getchar = nuts.getchar
- local getid = nuts.getid
- local getnext = nuts.getnext
- local getprev = nuts.getprev
- local insert_before = nuts.insert_before
- local insert_after = nuts.insert_after
- local copy_node = nuts.copy
- local remove_node = nuts.remove
- local end_of_math = nuts.end_of_math
- local node_tail = nuts.tail
+ local nodecodes = nodes.nodecodes
+ local glyph_code = nodecodes.glyph
+ local math_code = nodecodes.math
+
+ local nuts = nodes.nuts
+ local tonut = nodes.tonut
+ local nodepool = nuts.pool
+
+ local new_disc = nodepool.disc
+
+ local setfield = nuts.setfield
+ local getfield = nuts.getfield
+ local getchar = nuts.getchar
+ local getid = nuts.getid
+ local getattr = nuts.getattr
+ local getnext = nuts.getnext
+ local getprev = nuts.getprev
+ local insert_before = nuts.insert_before
+ local insert_after = nuts.insert_after
+ local copy_node = nuts.copy
+ local remove_node = nuts.remove
+ local end_of_math = nuts.end_of_math
+ local node_tail = nuts.tail
+
+ local setcolor = nodes.tracers.colors.set
+
+ local variables = interfaces.variables
+ local v_reset = variables.reset
+ local v_yes = variables.yes
+ local v_all = variables.all
+
+ local settings_to_array = utilities.parsers.settings_to_array
+
+ local unsetvalue = attributes.unsetvalue
+ local texsetattribute = tex.setattribute
+
+ local prehyphenchar = lang.prehyphenchar
+ local posthyphenchar = lang.posthyphenchar
+
+ local lccodes = characters.lccodes
+
+ local a_hyphenation = attributes.private("hyphenation")
function traditional.loadpatterns(language)
return dictionaries[language]
end
- statistics.register("hyphenation",function()
- if nofwords > 0 then
- return string.format("%s words hyphenated, %s unique",nofwords,nofhashed)
+ setmetatableindex(dictionaries,function(t,k) -- for the moment we use an independent data structure
+ if type(k) == "string" then
+ -- this will force a load if not yet loaded (we need a nicer way)
+ -- for the moment that will do (nneeded for examples that register
+ -- a pattern specification
+ languages.getnumber(k)
end
- end)
-
- setmetatableindex(dictionaries,function(t,k) -- we use an independent data structure
local specification = languages.getdata(k)
- local dictionary = {
+ local dictionary = {
patterns = { },
+ exceptions = { },
hyphenated = { },
specials = { },
- instance = 0,
+ instance = false,
characters = { },
unicodes = { },
}
@@ -304,21 +631,22 @@ if context then
local data = patterns.data
if data then
-- regular patterns
- lpegmatch(word,data,1,dictionary.patterns,dictionary.specials)
+ lpegmatch(p_pattern,data,1,dictionary.patterns,dictionary.specials)
end
local extra = patterns.extra
if extra then
-- special patterns
- lpegmatch(word,extra,1,dictionary.patterns,dictionary.specials)
+ lpegmatch(p_pattern,extra,1,dictionary.patterns,dictionary.specials)
+ end
+ end
+ local exceptions = resources.exceptions
+ if exceptions then
+ local data = exceptions.data
+ if data and data ~= "" then
+ lpegmatch(p_exception,data,1,dictionary.exceptions)
end
end
- local permitted = patterns.characters
--- local additional = "[]()"
--- local additional = specification.additional
--- if additional then
--- permitted = permitted .. additional -- has to be attribute driven
--- end
- local usedchars = lpegmatch(split,permitted)
+ local usedchars = lpegmatch(p_split,patterns.characters)
local characters = { }
local unicodes = { }
for i=1,#usedchars do
@@ -327,12 +655,18 @@ if context then
local upper = uccodes[code]
characters[char] = code
unicodes [code] = char
- unicodes [upper] = utfchar(upper)
+ if type(upper) == "table" then
+ for i=1,#upper do
+ local u = upper[i]
+ unicodes[u] = utfchar(u)
+ end
+ else
+ unicodes[upper] = utfchar(upper)
+ end
end
dictionary.characters = characters
dictionary.unicodes = unicodes
- setmetatableindex(characters,function(t,k) local v = utfbyte(k) t[k] = v return v end) -- can be non standard
- -- setmetatableindex(unicodes, function(t,k) local v = utfchar(k) t[k] = v return v end)
+ setmetatableindex(characters,function(t,k) local v = k and utfbyte(k) t[k] = v return v end)
end
t[specification.number] = dictionary
dictionary.instance = specification.instance -- needed for hyphenchars
@@ -341,268 +675,554 @@ if context then
return dictionary
end)
- local function flush(head,start,stop,dictionary,w,h,lefthyphenchar,righthyphenchar,characters,lefthyphenmin,righthyphenmin)
- local r = { }
- local l = #h
- local n = 0
- local i = 1
- local left = lefthyphenmin
- local right = l - righthyphenmin + 1
- while i <= l do
- if i > left and i < right then
- local hi = h[i]
- if not hi then
- n = n + 1
- r[n] = w[i]
- i = i + 1
- elseif hi == true then
- n = n + 1
- r[n] = true
- n = n + 1
- r[n] = w[i]
- i = i + 1
- else
- local b = i - hi.start -- + 1 - 1
- local e = b + hi.length - 1
- n = b
- r[n] = { hi.before, hi.after, concat(w,"",b,e) }
- i = e + 1
+ -- Beware: left and right min doesn't mean that in a 1 mmm hsize there can be snippets
+ -- with less characters than either of them! This could be an option but such a narrow
+ -- hsize doesn't make sense anyway.
+
+ -- We assume that featuresets are defined global ... local definitions
+ -- (also mid paragraph) make not much sense anyway. For the moment we
+ -- assume no predefined sets so we don't need to store them. Nor do we
+ -- need to hash them in order to save space ... no sane user will define
+ -- many of them.
+
+ local featuresets = hyphenators.featuresets or { }
+ hyphenators.featuresets = featuresets
+
+ storage.shared.noflanguagesfeaturesets = storage.shared.noflanguagesfeaturesets or 0
+
+ local noffeaturesets = storage.shared.noflanguagesfeaturesets
+
+ storage.register("languages/hyphenators/featuresets",featuresets,"languages.hyphenators.featuresets")
+
+ ----- hash = table.sequenced(featureset,",") -- no need now
+
+ local function register(name,featureset)
+ noffeaturesets = noffeaturesets + 1
+ featureset.attribute = noffeaturesets
+ featuresets[noffeaturesets] = featureset -- access by attribute
+ featuresets[name] = featureset -- access by name
+ storage.shared.noflanguagesfeaturesets = noffeaturesets
+ return noffeaturesets
+ end
+
+ local function makeset(...) -- a bit overkill, supporting variants but who cares
+ local set = { }
+ for i=1,select("#",...) do
+ local list = select(i,...)
+ local kind = type(list)
+ local used = nil
+ if kind == "string" then
+ if list == v_all then
+ -- not ok ... now all get ignored
+ return setmetatableindex(function(t,k) local v = utfchar(k) t[k] = v return v end)
+ elseif list ~= "" then
+ used = lpegmatch(p_split,list)
+ set = set or { }
+ for i=1,#used do
+ local char = used[i]
+ set[utfbyte(char)] = char
+ end
+ end
+ elseif kind == "table" then
+ if next(list) then
+ set = set or { }
+ for byte, char in next, list do
+ set[byte] = char == true and utfchar(byte) or char
+ end
+ elseif #list > 0 then
+ set = set or { }
+ for i=1,#list do
+ local l = list[i]
+ if type(l) == "number" then
+ set[l] = utfchar(l)
+ else
+ set[utfbyte(l)] = l
+ end
+ end
end
- else
- n = n + 1
- r[n] = w[i]
- i = i + 1
end
end
+ return set
+ end
- local function serialize(s,lefthyphenchar,righthyphenchar)
- if not s then
- return
- elseif s == true then
- local n = copy_node(stop)
- setfield(n,"char",lefthyphenchar or righthyphenchar)
- return n
- end
- local h = nil
- local c = nil
- if lefthyphenchar then
- h = copy_node(stop)
- setfield(h,"char",lefthyphenchar)
- c = h
- end
- if #s == 1 then
- local n = copy_node(stop)
- setfield(n,"char",characters[s])
- if not h then
- h = n
- else
- insert_after(c,c,n)
- end
- c = n
- else
- local t = lpegmatch(split,s)
+ local defaulthyphens = {
+ [0x2D] = true, -- hyphen
+ [0xAD] = true, -- soft hyphen
+ }
+
+ local defaultjoiners = {
+ [0x200C] = true, -- nzwj
+ [0x200D] = true, -- zwj
+ }
+
+ local function definefeatures(name,featureset)
+ local extrachars = featureset.characters -- "[]()"
+ local hyphenchars = featureset.hyphens
+ local joinerchars = featureset.joiners
+ local alternative = featureset.alternative
+ local rightwordmin = tonumber(featureset.rightwordmin)
+ local leftcharmin = tonumber(featureset.leftcharmin)
+ local rightcharmin = tonumber(featureset.rightcharmin)
+ --
+ joinerchars = joinerchars == v_yes and defaultjoiners or joinerchars
+ hyphenchars = hyphenchars == v_yes and defaulthyphens or hyphenchars
+ -- not yet ok: extrachars have to be ignored so it cannot be all)
+ featureset.extrachars = makeset(joinerchars or "",extrachars or "")
+ featureset.hyphenchars = makeset(hyphenchars or "")
+ featureset.alternative = alternative or "hyphenate"
+ featureset.rightwordmin = rightwordmin and rightwordmin > 0 and rightwordmin or nil
+ featureset.leftcharmin = leftcharmin and leftcharmin > 0 and leftcharmin or nil
+ featureset.rightcharmin = rightcharmin and rightcharmin > 0 and rightcharmin or nil
+ --
+ return register(name,featureset)
+ end
+
+ traditional.definefeatures = definefeatures
+ commands.definehyphenationfeatures = definefeatures
+
+ function commands.sethyphenationfeatures(n)
+ if not n or n == v_reset then
+ n = false
+ else
+ local f = featuresets[n]
+ if not f and type(n) == "string" then
+ local t = settings_to_array(n)
+ local s = { }
for i=1,#t do
- local n = copy_node(stop)
- setfield(n,"char",characters[t[i]])
- if not h then
- h = n
- else
- insert_after(c,c,n)
+ local ti = t[i]
+ local fs = featuresets[ti]
+ if fs then
+ for k, v in next, fs do
+ s[k] = v
+ end
end
- c = n
end
+ n = register(n,s)
+ else
+ n = f and f.attribute
end
- if righthyphenchar then
- local n = copy_node(stop)
- insert_after(c,c,n)
- setfield(n,"char",righthyphenchar)
- end
- return h
end
+ texsetattribute(a_hyphenation,n or unsetvalue)
+ end
+
+ commands.registerhyphenationpattern = traditional.registerpattern
- -- no grow
+ -- This is a relative large function with local variables and local
+ -- functions. A previous implementation had the functions outside but
+ -- this is cleaner and as efficient. The test runs 100 times over
+ -- tufte.tex, knuth.tex, zapf.tex, ward.tex and darwin.tex in lower
+ -- and uppercase with a 1mm hsize.
+ --
+ -- language=0 language>0 4 | 3 * slower
+ --
+ -- tex 2.34 | 1.30 2.55 | 1.45 0.21 | 0.15
+ -- lua 2.42 | 1.38 3.30 | 1.84 0.88 | 0.46
+ --
+ -- Of course we have extra overhead (virtual Lua machine) but also we
+ -- check attributes and support specific local options). The test puts
+ -- the typeset text in boxes and discards it. If we also flush the
+ -- runtime is 4.31|2.56 and 4.99|2.94 seconds so the relative difference
+ -- is (somehow) smaller. The test has 536 pages. There is a little bit
+ -- of extra overhead because we store the patterns in a different way.
+ --
+ -- As usual I will look for speedups. Some 0.01 seconds could be gained
+ -- by sharing patterns which is not impressive but it does save some
+ -- 3M memory on this test. (Some optimizations already brought the 3.30
+ -- seconds down to 3.14 but it all depends on aggressive caching.)
- local current = start
- local size = #r
- for i=1,size do
- local ri = r[i]
- if ri == true then
- local n = new_disc()
- if righthyphenchar then
- setfield(n,"pre",serialize(true,righthyphenchar))
+ local starttiming = statistics.starttiming
+ local stoptiming = statistics.stoptiming
+
+ function traditional.hyphenate(head)
+
+ local first = tonut(head)
+ local tail = nil
+ local last = nil
+ local current = first
+ local dictionary = nil
+ local instance = nil
+ local characters = nil
+ local unicodes = nil
+ local extrachars = nil
+ local hyphenchars = nil
+ local language = nil
+ local start = nil
+ local stop = nil
+ local word = { } -- we reuse this table
+ local size = 0
+ local leftchar = false
+ local rightchar = false -- utfbyte("-")
+ local leftmin = 0
+ local rightmin = 0
+ local leftcharmin = nil
+ local rightcharmin = nil
+ local rightwordmin = nil
+ local attr = nil
+ local lastwordlast = nil
+ local hyphenated = hyphenate
+
+ -- We cannot use an 'enabled' boolean (false when no characters or extras) because we
+ -- can have plugins that set a characters metatable and so) ... it doesn't save much
+ -- anyway. Using (unicodes and unicodes[code]) and a nil table when no characters also
+ -- doesn't save much. So there not that much to gain for languages that don't hyphenate.
+ --
+ -- enabled = (unicodes and (next(unicodes) or getmetatable(unicodes))) or (extrachars and next(extrachars))
+ --
+ -- This can be used to not add characters i.e. keep size 0 but then we need to check for
+ -- attributes that change it, which costs time too. Not much to gain there.
+
+ starttiming(traditional)
+
+ local function synchronizefeatureset(a)
+ local f = a and featuresets[a]
+ if f then
+ hyphenated = methods[f.alternative or "hyphenate"]
+ extrachars = f.extrachars
+ hyphenchars = f.hyphenchars
+ rightwordmin = f.rightwordmin
+ leftcharmin = f.leftcharmin
+ rightcharmin = f.rightcharmin
+ if rightwordmin and rightwordmin > 0 and lastwordlast ~= rightwordmin then
+ -- so we can change mid paragraph but it's kind of unpredictable then
+ if not tail then
+ tail = node_tail(first)
+ end
+ last = tail
+ local inword = false
+ while last and rightwordmin > 0 do
+ local id = getid(last)
+ if id == glyph_code then
+ inword = true
+ if trace_visualize then
+ setcolor(last,"darkred")
+ end
+ elseif inword then
+ inword = false
+ rightwordmin = rightwordmin - 1
+ end
+ last = getprev(last)
+ end
+ lastwordlast = rightwordmin
end
- if lefthyphenchar then
- setfield(n,"post",serialize(true,lefthyphenchar))
+ else
+ hyphenated = methods.hyphenate
+ extrachars = false
+ hyphenchars = false
+ rightwordmin = false
+ leftcharmin = false
+ rightcharmin = false
+ end
+ return a
+ end
+
+ local function flush(hyphens) -- todo: no need for result
+
+ local rightmin = size - rightmin
+ local result = { }
+ local rsize = 0
+ local position = 1
+
+ -- todo: remember last dics and don't go back to before that (plus
+ -- message) .. for simplicity we also assume that we don't start
+ -- with a dics node
+ --
+ -- there can be a conflict: if we backtrack then we can end up in
+ -- another disc and get out of sync (dup chars and so)
+
+ while position <= size do
+ if position >= leftmin and position <= rightmin then
+ local hyphen = hyphens[position]
+ if not hyphen then
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ elseif hyphen == true then
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ rsize = rsize + 1
+ result[rsize] = true
+ position = position + 1
+ else
+ local o, h = hyphen[2]
+ if o then
+ -- { hyphen, offset)
+ h = hyphen[1]
+ else
+ -- hyphen
+ h = hyphen
+ o = 1
+ end
+ local b = position - o + (h.start or 1)
+ local e = b + (h.length or 2) - 1
+ if b > 0 and e >= b then
+ for i=1,b-position do
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ end
+ rsize = rsize + 1
+ result[rsize] = {
+ h.before or "", -- pre
+ h.after or "", -- post
+ concat(word,"",b,e), -- replace
+ h.right, -- optional after pre
+ h.left, -- optional before post
+ }
+ position = e + 1
+ else
+ -- error
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
+ end
+ end
+ else
+ rsize = rsize + 1
+ result[rsize] = word[position]
+ position = position + 1
end
- insert_before(head,current,n)
- elseif type(ri) == "table" then
- local n = new_disc()
- local pre, post, replace = ri[1], ri[2], ri[3]
- if pre then
- setfield(n,"pre",serialize(pre,false,righthyphenchar))
+ end
+
+ local function serialize(replacement,leftchar,rightchar)
+ if not replacement then
+ return
+ elseif replacement == true then
+ local glyph = copy_node(stop)
+ setfield(glyph,"char",leftchar or rightchar)
+ return glyph
end
- if post then
- setfield(n,"post",serialize(post,lefthyphenchar,false))
+ local head = nil
+ local current = nil
+ if leftchar then
+ head = copy_node(stop)
+ current = head
+ setfield(head,"char",leftchar)
end
- if replace then
- setfield(n,"replace",serialize(replace))
+ local rsize = #replacement
+ if rsize == 1 then
+ local glyph = copy_node(stop)
+ setfield(glyph,"char",characters[replacement])
+ if head then
+ insert_after(current,current,glyph)
+ else
+ head = glyph
+ end
+ current = glyph
+ elseif rsize > 0 then
+ local list = lpegmatch(p_split,replacement) -- this is an utf split (could be cached)
+ for i=1,#list do
+ local glyph = copy_node(stop)
+ setfield(glyph,"char",characters[list[i]])
+ if head then
+ insert_after(current,current,glyph)
+ else
+ head = glyph
+ end
+ current = glyph
+ end
end
- insert_before(head,current,n)
- else
- setfield(current,"char",characters[ri])
- if i < size then
- current = getnext(current)
+ if rightchar then
+ local glyph = copy_node(stop)
+ insert_after(current,current,glyph)
+ setfield(glyph,"char",rightchar)
end
+ return head
end
- end
- if current ~= stop then
- local current = getnext(current)
- local last = getnext(stop)
- while current ~= last do
- head, current = remove_node(head,current,true)
- end
- end
- end
- -- simple cases: no special .. only inject
+ local current = start
- local prehyphenchar = lang.prehyphenchar
- local posthyphenchar = lang.posthyphenchar
-
- local lccodes = characters.lccodes
-
- -- An experimental feature:
- --
- -- \setupalign[verytolerant,flushleft]
- -- \setuplayout[width=140pt] \showframe
- -- longword longword long word longword longwordword \par
- -- \enabledirectives[hyphenators.rightwordsmin=1]
- -- longword longword long word longword longwordword \par
- -- \disabledirectives[hyphenators.rightwordsmin]
- --
- -- An alternative is of course to pack the words in an hbox.
+ for i=1,rsize do
+ local r = result[i]
+ if r == true then
+ local disc = new_disc()
+ if rightchar then
+ setfield(disc,"pre",serialize(true,rightchar))
+ end
+ if leftchar then
+ setfield(disc,"post",serialize(true,leftchar))
+ end
+ -- could be a replace as well
+ insert_before(first,current,disc)
+ elseif type(r) == "table" then
+ local disc = new_disc()
+ local pre = r[1]
+ local post = r[2]
+ local replace = r[3]
+ local right = r[4] ~= false and rightchar
+ local left = r[5] ~= false and leftchar
+ if pre and pre ~= "" then
+ setfield(disc,"pre",serialize(pre,false,right))
+ end
+ if post and post ~= "" then
+ setfield(disc,"post",serialize(post,left,false))
+ end
+ if replace and replace ~= "" then
+ setfield(disc,"replace",serialize(replace))
+ end
+ insert_before(first,current,disc)
+ else
+ setfield(current,"char",characters[r])
+ if i < rsize then
+ current = getnext(current)
+ end
+ end
+ end
+ if current and current ~= stop then
+ local current = getnext(current)
+ local last = getnext(stop)
+ while current ~= last do
+ first, current = remove_node(first,current,true)
+ end
+ end
- local rightwordsmin = 0 -- todo: parproperties (each par has a number anyway)
+ end
- function traditional.hyphenate(head)
- local first = tonut(head)
- local current = first
- local dictionary = nil
- local instance = nil
- local characters = nil
- local unicodes = nil
- local language = nil
- local start = nil
- local stop = nil
- local word = nil -- maybe reuse and pass size
- local size = 0
- local leftchar = false
- local rightchar = false -- utfbyte("-")
- local leftmin = 0
- local rightmin = 0
- local lastone = nil
-
- if rightwordsmin > 0 then
- lastone = node_tail(first)
- local inword = false
- while lastone and rightwordsmin > 0 do
- local id = getid(lastone)
- if id == glyph_code then
- inword = true
- elseif inword then
- inword = false
- rightwordsmin = rightwordsmin - 1
+ local function inject()
+ if first ~= current then
+ local disc = new_disc()
+ first, current, glyph = remove_node(first,current)
+ first, current = insert_before(first,current,disc)
+ if trace_visualize then
+ setcolor(glyph,"darkred") -- these get checked in the colorizer
+ setcolor(disc,"darkgreen") -- these get checked in the colorizer
+ end
+ setfield(disc,"replace",glyph)
+ if rightchar then
+ local glyph = copy_node(glyph)
+ setfield(glyph,"char",rightchar)
+ setfield(disc,"pre",glyph)
+ end
+ if leftchar then
+ local glyph = copy_node(glyph)
+ setfield(glyph,"char",rightchar)
+ setfield(disc,"post",glyph)
end
- lastone = getprev(lastone)
end
+ return current
+ end
+
+ local a = getattr(first,a_hyphenation)
+ if a ~= attr then
+ attr = synchronizefeatureset(a)
end
- while current ~= lastone do
+ -- The first attribute in a word determines the way a word gets hyphenated
+ -- and if relevant, other properties are also set then. We could optimize for
+ -- silly one-char cases but it has no priority as the code is still not that
+ -- much slower than the native hyphenator and this variant also provides room
+ -- for extensions.
+
+ while current and current ~= last do -- and current
local id = getid(current)
if id == glyph_code then
- -- currently no lc/uc code support
local code = getchar(current)
local lang = getfield(current,"lang")
if lang ~= language then
- if dictionary then
- if leftmin + rightmin < #word then
- local done = hyphenate(dictionary,word)
- if done then
- flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin)
- end
+ if size > 0 and dictionary and leftmin + rightmin <= size then
+ local hyphens = hyphenated(dictionary,word,size)
+ if hyphens then
+ flush(hyphens)
end
end
- language = lang
- dictionary = dictionaries[language]
- instance = dictionary.instance
- characters = dictionary.characters
- unicodes = dictionary.unicodes
- leftchar = instance and posthyphenchar(instance)
- rightchar = instance and prehyphenchar (instance)
- leftmin = getfield(current,"left")
- rightmin = getfield(current,"right")
- if not leftchar or leftchar < 0 then
- leftchar = false
- end
- if not rightchar or rightchar < 0 then
- rightchar = false
- end
- local char = unicodes[code]
- if char then
- word = { char }
- size = 1
- start = current
+ language = lang
+ if language > 0 then
+ dictionary = dictionaries[language]
+ -- we could postpone these
+ instance = dictionary.instance
+ characters = dictionary.characters
+ unicodes = dictionary.unicodes
+ leftchar = instance and posthyphenchar(instance)
+ rightchar = instance and prehyphenchar (instance)
+ leftmin = leftcharmin or getfield(current,"left")
+ rightmin = rightcharmin or getfield(current,"right")
+ if not leftchar or leftchar < 0 then
+ leftchar = false
+ end
+ if not rightchar or rightchar < 0 then
+ rightchar = false
+ end
+ --
+ local a = getattr(current,a_hyphenation)
+ if a ~= attr then
+ attr = synchronizefeatureset(a) -- influences extrachars
+ end
+ --
+ local char = unicodes[code] or (extrachars and extrachars[code])
+ if char then
+ word[1] = char
+ size = 1
+ start = current
+ end
+ else
+ size = 0
end
- elseif word then
- local char = unicodes[code]
+ elseif language <= 0 then
+ --
+ elseif size > 0 then
+ local char = unicodes[code] or (extrachars and extrachars[code])
if char then
size = size + 1
word[size] = char
elseif dictionary then
- if leftmin + rightmin < #word then
- local done = hyphenate(dictionary,word)
- if done then
- flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin)
+ if leftmin + rightmin <= size then
+ local hyphens = hyphenated(dictionary,word,size)
+ if hyphens then
+ flush(hyphens)
end
end
- word = nil
+ size = 0
+ if hyphenchars and hyphenchars[code] then
+ current = inject()
+ end
end
else
- local char = unicodes[code]
+ local a = getattr(current,a_hyphenation)
+ if a ~= attr then
+ attr = synchronizefeatureset(a) -- influences extrachars
+ end
+ --
+ local char = unicodes[code] or (extrachars and extrachars[code])
if char then
- word = { char }
- size = 1
- start = current
- -- leftmin = getfield(current,"left") -- can be an option
- -- rightmin = getfield(current,"right") -- can be an option
+ word[1] = char
+ size = 1
+ start = current
end
end
stop = current
current = getnext(current)
- elseif word then
- if dictionary then
- if leftmin + rightmin < #word then
- local done = hyphenate(dictionary,word)
- current = getnext(current)
- if done then
- flush(first,start,stop,dictionary,word,done,leftchar,rightchar,characters,leftmin,rightmin)
+ else
+ current = id == math_code and getnext(end_of_math(current)) or getnext(current)
+ if size > 0 then
+ if dictionary and leftmin + rightmin <= size then
+ local hyphens = hyphenated(dictionary,word,size)
+ if hyphens then
+ flush(hyphens)
end
- else
- current = getnext(current) -- hm
end
- else
- current = getnext(current)
+ size = 0
end
- word = nil
- elseif id == math_code then
- current = getnext(end_of_math(current))
- else
- current = getnext(current)
end
end
+ -- we can have quit due to last so we need to flush the last seen word
+ if size > 0 and dictionary and leftmin + rightmin <= size then
+ local hyphens = hyphenated(dictionary,word,size)
+ if hyphens then
+ flush(hyphens)
+ end
+ end
+
+ stoptiming(traditional)
+
return head, true
end
+ statistics.register("hyphenation",function()
+ if nofwords > 0 or statistics.elapsed(traditional) > 0 then
+ return string.format("%s words hyphenated, %s unique, used time %s",
+ nofwords,nofhashed,statistics.elapsedseconds(traditional) or 0)
+ end
+ end)
+
local texmethod = "builders.kernel.hyphenation"
local oldmethod = texmethod
local newmethod = texmethod
@@ -617,11 +1237,17 @@ if context then
-- \enabledirectives[hyphenators.method=traditional]
-- \enabledirectives[hyphenators.method=builtin]
- directives.register("hyphenators.method",function(v)
- if type(v) == "string" then
- local valid = languages.hyphenators[v]
+ -- this avoids a wrapper
+
+ -- push / pop ? check first attribute
+
+ local replaceaction = nodes.tasks.replaceaction
+
+ local function setmethod(method)
+ if type(method) == "string" then
+ local valid = hyphenators[method]
if valid and valid.hyphenate then
- newmethod = "languages.hyphenators." .. v .. ".hyphenate"
+ newmethod = "languages.hyphenators." .. method .. ".hyphenate"
else
newmethod = texmethod
end
@@ -629,16 +1255,65 @@ if context then
newmethod = texmethod
end
if oldmethod ~= newmethod then
- nodes.tasks.replaceaction("processors","words",oldmethod,newmethod)
+ replaceaction("processors","words",oldmethod,newmethod)
end
oldmethod = newmethod
- end)
+ end
- -- experimental feature
+ hyphenators.setmethod = setmethod
- directives.register("hyphenators.rightwordsmin",function(v)
- rightwordsmin = tonumber(v) or 0
- end)
+ local stack = { }
+
+ local function pushmethod(method)
+ insert(stack,oldmethod)
+ setmethod(method)
+ end
+ local function popmethod()
+ setmethod(remove(stack))
+ end
+
+ hyphenators.pushmethod = pushmethod
+ hyphenators.popmethod = popmethod
+
+ directives.register("hyphenators.method",setmethod)
+
+ function commands.setuphyphenation(specification)
+ local method = specification.method
+ if method then
+ setmethod(method)
+ end
+ end
+
+ commands.pushhyphenation = pushmethod
+ commands.pophyphenation = popmethod
+
+ local context = context
+ local ctx_NC = context.NC
+ local ctx_NR = context.NR
+ local ctx_verbatim = context.verbatim
+
+ function commands.showhyphenationtrace(language,word)
+ local saved = trace_steps
+ trace_steps = "silent"
+ local steps = traditional.gettrace(language,word)
+ trace_steps = saved
+ if steps then
+ local n = #steps
+ if n > 0 then
+ context.starttabulate { "|r|l|l|l|" }
+ for i=1,n do
+ local s = steps[i]
+ ctx_NC() if i > 1 and i < n then context(i-1) end
+ ctx_NC() ctx_verbatim(s[1])
+ ctx_NC() ctx_verbatim(s[2])
+ ctx_NC() ctx_verbatim(s[3])
+ ctx_NC()
+ ctx_NR()
+ end
+ context.stoptabulate()
+ end
+ end
+ end
else
@@ -647,14 +1322,14 @@ else
-- traditional.loadpatterns("us","lang-us")
-- traditional.registerpattern("nl","e1ë", { start = 1, length = 2, before = "e", after = "e" } )
--- traditional.registerpattern("nl","oo1ë", { start = 2, length = 3, before = "o", after = "e" } )
+-- traditional.registerpattern("nl","oo7ë", { start = 2, length = 3, before = "o", after = "e" } )
-- traditional.registerpattern("de","qqxc9xkqq",{ start = 3, length = 4, before = "ab", after = "cd" } )
-- local specification = {
--- lefthyphenmin = 2,
--- righthyphenmin = 2,
--- lefthyphenchar = "<",
--- righthyphenchar = ">",
+-- leftcharmin = 2,
+-- rightcharmin = 2,
+-- leftchar = "<",
+-- rightchar = ">",
-- }
-- print("reëel", traditional.injecthyphens(dictionaries.nl,"reëel", specification),"r{e>}{<e}{eë}el")
@@ -670,5 +1345,26 @@ else
-- print("kunststofmatig", traditional.injecthyphens(dictionaries.nl,"kunststofmatig", specification),"")
-- print("kunst[stof]matig", traditional.injecthyphens(dictionaries.nl,"kunst[stof]matig", specification),"")
+-- traditional.loadpatterns("us","lang-us")
+
+-- local specification = {
+-- leftcharmin = 2,
+-- rightcharmin = 2,
+-- leftchar = false,
+-- rightchar = false,
+-- }
+
+-- trace_steps = true
+
+-- print("components", traditional.injecthyphens(dictionaries.us,"components", specification),"")
+-- print("single", traditional.injecthyphens(dictionaries.us,"single", specification),"sin-gle")
+-- print("everyday", traditional.injecthyphens(dictionaries.us,"everyday", specification),"every-day")
+-- print("associate", traditional.injecthyphens(dictionaries.us,"associate", specification),"as-so-ciate")
+-- print("philanthropic", traditional.injecthyphens(dictionaries.us,"philanthropic", specification),"phil-an-thropic")
+-- print("projects", traditional.injecthyphens(dictionaries.us,"projects", specification),"projects")
+-- print("Associate", traditional.injecthyphens(dictionaries.us,"Associate", specification),"As-so-ciate")
+-- print("Philanthropic", traditional.injecthyphens(dictionaries.us,"Philanthropic", specification),"Phil-an-thropic")
+-- print("Projects", traditional.injecthyphens(dictionaries.us,"Projects", specification),"Projects")
+
end