summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/lang-ini.lua
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2016-07-18 17:05:11 +0200
committerContext Git Mirror Bot <phg42.2a@gmail.com>2016-07-18 17:05:11 +0200
commit50928735daee408de73737b055b2535d96424824 (patch)
treea16ca5d16734b5bc146f053fe0368e61f4c72400 /tex/context/base/mkiv/lang-ini.lua
parent3eb2d078f0023266585aec42d98326d72567b9d6 (diff)
downloadcontext-50928735daee408de73737b055b2535d96424824.tar.gz
2016-07-18 16:51:00
Diffstat (limited to 'tex/context/base/mkiv/lang-ini.lua')
-rw-r--r--tex/context/base/mkiv/lang-ini.lua107
1 files changed, 87 insertions, 20 deletions
diff --git a/tex/context/base/mkiv/lang-ini.lua b/tex/context/base/mkiv/lang-ini.lua
index 347cb0281..62786d9ab 100644
--- a/tex/context/base/mkiv/lang-ini.lua
+++ b/tex/context/base/mkiv/lang-ini.lua
@@ -22,7 +22,7 @@ local type, tonumber = type, tonumber
local utfbyte = utf.byte
local format, gsub, gmatch, find = string.format, string.gsub, string.gmatch, string.find
local concat, sortedkeys, sortedpairs, keys, insert = table.concat, table.sortedkeys, table.sortedpairs, table.keys, table.insert
-local utfbytes, strip = string.utfvalues, string.strip
+local utfbytes, strip, utfcharacters = string.utfvalues, string.strip, utf.characters
local context = context
local commands = commands
@@ -72,6 +72,11 @@ storage.register("languages/associated",associated,"languages.associated")
storage.register("languages/numbers", numbers, "languages.numbers")
storage.register("languages/data", data, "languages.data")
+local variables = interfaces.variables
+
+local v_reset = variables.reset
+local v_yes = variables.yes
+
local nofloaded = 0
local function resolve(tag)
@@ -133,25 +138,63 @@ local function validdata(loaded,what,tag)
end
end
-local function sethjcodes(instance,loaded,what)
+-- languages.hjcounts[unicode].count
+
+-- hjcode: 0 not to be hyphenated
+-- 1--31 length
+-- 32 zero length
+-- > 32 hyphenated with length 1
+
+local function sethjcodes(instance,loaded,what,factor)
local l = loaded[what]
local c = l and l.characters
if c then
- local h = l.codehash
+ local hjcounts = factor and languages.hjcounts or false
+ --
+ local h = loaded.codehash
if not h then
h = { }
- l.codehash = h
+ loaded.codehash = h
+ end
+ --
+ local function setcode(l)
+ local u = uccodes[l]
+ local s = 1
+ if hjcounts then
+ local c = hjcounts[l]
+ if c then
+ c = c.count
+ if not c then
+ -- error, keep as 1
+ elseif c <= 0 then
+ -- counts as 0 i.e. ignored
+ s = 32
+ elseif c >= 31 then
+ -- counts as 31
+ s = 31
+ else
+ -- count c times
+ s = c
+ end
+ end
+ end
+ sethjcode(instance,l,s)
+ h[l] = s
+ if u ~= l and type(u) == "number" then
+ sethjcode(instance,u,s)
+ h[u] = s
+ end
end
+ --
local s = tex.savinghyphcodes
tex.savinghyphcodes = 0
- for l in utfbytes(c) do
- local u = uccodes[l]
- sethjcode(instance,l,l)
- h[l] = l
- if type(u) == "number" then
- -- we don't want ß -> SS
- sethjcode(instance,u,l)
- h[u] = l
+ if type(c) == "table" then
+ for l in next, c do
+ setcode(utfbyte(l))
+ end
+ else
+ for l in utfbytes(c) do
+ setcode(l)
end
end
tex.savinghyphcodes = s
@@ -255,7 +298,7 @@ local function loaddefinitions(tag,specification)
local definition = definitions[i]
if definition == "" then
-- error
- elseif definition == "reset" then -- interfaces.variables.reset
+ elseif definition == v_reset then
if trace_patterns then
report_initialization("clearing patterns for language %a",tag)
end
@@ -278,8 +321,8 @@ local function loaddefinitions(tag,specification)
local loaded = table.load(fullname,gzipped and gzip.load)
if loaded then -- todo: version test
ok, nofloaded = true, nofloaded + 1
- sethjcodes(instance,loaded,"patterns")
- sethjcodes(instance,loaded,"exceptions")
+ sethjcodes(instance,loaded,"patterns",specification.factor)
+ sethjcodes(instance,loaded,"exceptions",specification.factor)
local p = validdata(loaded,"patterns",tag)
local e = validdata(loaded,"exceptions",tag)
if p and p ~= "" then
@@ -396,10 +439,11 @@ if environment.initex then
else
- function languages.getnumber(tag,default,patterns)
+ function languages.getnumber(tag,default,patterns,factor)
local l = registered[tag]
if l then
if l.dirty then
+ l.factor = factor == v_yes and true or false
if trace_patterns then
report_initialization("checking patterns for %a with default %a",tag,default)
end
@@ -454,19 +498,43 @@ function languages.postexhyphenchar(what) return postexhyphenchar(tolang(what))
-- e['user-friendly'] = 'user=friend-ly'
-- e['exceptionally-friendly'] = 'excep-tionally=friend-ly'
+local invalid = { "{", "}", "-" }
+
+local function collecthjcodes(data,str)
+ local found = data.extras and data.extras.characters or { }
+ for s in utfcharacters(str) do
+ if not found[s] then
+ found[s] = true
+ end
+ end
+ for i=1,#invalid do -- less checks this way
+ local c = invalid[i]
+ if found[c] then
+ found[c] = nil
+ end
+ end
+ data.extras = { characters = found }
+ sethjcodes(data.instance,data,"extras",data.factor)
+end
+
function languages.loadwords(tag,filename)
local data, instance = resolve(tag)
if data then
statistics.starttiming(languages)
- instance:hyphenation(io.loaddata(filename) or "")
+ local str = io.loaddata(filename) or ""
+ collecthjcodes(data,str)
+ instance:hyphenation(str)
statistics.stoptiming(languages)
end
end
+
function languages.setexceptions(tag,str)
local data, instance = resolve(tag)
if data then
- instance:hyphenation(strip(str)) -- we need to strip leading spaces
+ str = strip(str) -- we need to strip leading spaces
+ collecthjcodes(data,str)
+ instance:hyphenation(str)
end
end
@@ -523,7 +591,7 @@ end)
implement {
name = "languagenumber",
actions = { languages.getnumber, context },
- arguments = { "string", "string", "string" }
+ arguments = { "string", "string", "string", "string" }
}
implement {
@@ -555,7 +623,6 @@ implement {
arguments = { "string", "string" }
}
-
implement {
name = "currentprehyphenchar",
actions = function()