summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/lang-ini.lua
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2016-07-02 12:19:51 +0200
committerContext Git Mirror Bot <phg42.2a@gmail.com>2016-07-02 12:19:51 +0200
commitda9149010f4d34eef23a504682d82cdcf4fac8f5 (patch)
treea213e0a93e1e4ac2b8a6a724e32db3e8d6dc1fc5 /tex/context/base/mkiv/lang-ini.lua
parentc6fcaaa08d694397e9db4d1f9497926c193a5d95 (diff)
downloadcontext-da9149010f4d34eef23a504682d82cdcf4fac8f5.tar.gz
2016-07-01 16:31:00
Diffstat (limited to 'tex/context/base/mkiv/lang-ini.lua')
-rw-r--r--tex/context/base/mkiv/lang-ini.lua111
1 files changed, 105 insertions, 6 deletions
diff --git a/tex/context/base/mkiv/lang-ini.lua b/tex/context/base/mkiv/lang-ini.lua
index 0677cb003..347cb0281 100644
--- a/tex/context/base/mkiv/lang-ini.lua
+++ b/tex/context/base/mkiv/lang-ini.lua
@@ -20,8 +20,8 @@ if not modules then modules = { } end modules ['lang-ini'] = {
local type, tonumber = type, tonumber
local utfbyte = utf.byte
-local format, gsub = string.format, string.gsub
-local concat, sortedkeys, sortedpairs = table.concat, table.sortedkeys, table.sortedpairs
+local format, gsub, gmatch, find = string.format, string.gsub, string.gmatch, string.find
+local concat, sortedkeys, sortedpairs, keys, insert = table.concat, table.sortedkeys, table.sortedpairs, table.keys, table.insert
local utfbytes, strip = string.utfvalues, string.strip
local context = context
@@ -29,6 +29,7 @@ local commands = commands
local implement = interfaces.implement
local settings_to_array = utilities.parsers.settings_to_array
+local settings_to_set = utilities.parsers.settings_to_set
local trace_patterns = false trackers.register("languages.patterns", function(v) trace_patterns = v end)
@@ -119,7 +120,7 @@ local function validdata(loaded,what,tag)
if dataset then
local data = dataset.data
if not data or data == "" then
- return nil
+ -- nothing
elseif dataset.compression == "zlib" then
data = zlib.decompress(data)
if dataset.length and dataset.length ~= #data then
@@ -157,14 +158,95 @@ local function sethjcodes(instance,loaded,what)
end
end
+-- 2'2 conflicts with 4' ... and luatex barks on it
+
+local P, R, Cs, Ct, lpegmatch, lpegpatterns = lpeg.P, lpeg.R, lpeg.Cs, lpeg.Ct, lpeg.match, lpeg.patterns
+
+local utfsplit = utf.split
+
+local space = lpegpatterns.space
+local whitespace = lpegpatterns.whitespace^1
+local nospace = lpegpatterns.utf8char - whitespace
+local digit = lpegpatterns.digit
+----- endofstring = #whitespace + P(-1)
+local endofstring = #whitespace
+
+local word = (digit/"")^0 * (digit/"" * endofstring + digit/" " + nospace)^1
+local anyword = (1-whitespace)^1
+local analyze = Ct((whitespace + Cs(word))^1)
+
+local function unique(tag,requested,loaded)
+ local nofloaded = #loaded
+ if nofloaded == 0 then
+ return ""
+ elseif nofloaded == 1 then
+ return loaded[1]
+ else
+ insert(loaded,1," ") -- no need then for special first word
+ -- insert(loaded, " ")
+ loaded = concat(loaded," ")
+ local t = lpegmatch(analyze,loaded) or { }
+ local h = { }
+ local b = { }
+ for i=1,#t do
+ local ti = t[i]
+ local hi = h[ti]
+ if not hi then
+ h[ti] = 1
+ elseif hi == 1 then
+ h[ti] = 2
+ b[#b+1] = utfsplit(ti," ")
+ end
+ end
+ -- sort
+ local nofbad = #b
+ if nofbad > 0 then
+ local word
+ for i=1,nofbad do
+ local bi = b[i]
+ local p = P(bi[1])
+ for i=2,#bi do
+ p = p * digit * P(bi[i])
+ end
+ if word then
+ word = word + p
+ else
+ word = p
+ end
+ report_initialization("language %a, patterns %a, discarding conflict (0-9)%{[0-9]}t(0-9)",tag,requested,bi)
+ end
+ t, h, b = nil, nil, nil -- permit gc
+ local someword = digit^0 * word * digit^0 * endofstring / ""
+ -- local strip = Cs(someword^-1 * (someword + anyword + whitespace)^1)
+ local strip = Cs((someword + anyword + whitespace)^1)
+ return lpegmatch(strip,loaded) or loaded
+ else
+ return loaded
+ end
+ end
+end
+
local function loaddefinitions(tag,specification)
statistics.starttiming(languages)
local data, instance = resolve(tag)
- local definitions = settings_to_array(specification.patterns or "")
+ local requested = specification.patterns or ""
+ local definitions = settings_to_array(requested)
if #definitions > 0 then
if trace_patterns then
report_initialization("pattern specification for language %a: %s",tag,specification.patterns)
end
+ local ploaded = instance:patterns()
+ local eloaded = instance:hyphenation()
+ if not ploaded or ploaded == "" then
+ ploaded = { }
+ else
+ ploaded = { ploaded }
+ end
+ if not eloaded or eloaded == "" then
+ eloaded = { }
+ else
+ eloaded = { eloaded }
+ end
local dataused = data.used
local ok = false
local resources = data.resources or { }
@@ -178,6 +260,9 @@ local function loaddefinitions(tag,specification)
report_initialization("clearing patterns for language %a",tag)
end
instance:clear_patterns()
+ instance:clear_hyphenation()
+ ploaded = { }
+ eloaded = { }
elseif not dataused[definition] then
dataused[definition] = definition
local filename = "lang-" .. definition .. ".lua"
@@ -195,8 +280,14 @@ local function loaddefinitions(tag,specification)
ok, nofloaded = true, nofloaded + 1
sethjcodes(instance,loaded,"patterns")
sethjcodes(instance,loaded,"exceptions")
- instance:patterns (validdata(loaded,"patterns", tag) or "")
- instance:hyphenation(validdata(loaded,"exceptions",tag) or "")
+ local p = validdata(loaded,"patterns",tag)
+ local e = validdata(loaded,"exceptions",tag)
+ if p and p ~= "" then
+ ploaded[#ploaded+1] = p
+ end
+ if e and e ~= "" then
+ eloaded[#eloaded+1] = e
+ end
resources[#resources+1] = loaded -- so we can use them otherwise
else
report_initialization("invalid definition %a for language %a in %a",definition,tag,filename)
@@ -208,6 +299,14 @@ local function loaddefinitions(tag,specification)
report_initialization("definition %a for language %a already loaded",definition,tag)
end
end
+ if #ploaded > 0 then
+ instance:clear_patterns()
+ instance:patterns(unique(tag,requested,ploaded))
+ end
+ if #eloaded > 0 then
+ instance:clear_hyphenation()
+ instance:hyphenation(concat(eloaded," "))
+ end
return ok
elseif trace_patterns then
report_initialization("no definitions for language %a",tag)