summaryrefslogtreecommitdiff
path: root/tex/context/base/lang-ini.lua
diff options
context:
space:
mode:
authorMarius <mariausol@gmail.com>2010-07-04 15:32:09 +0300
committerMarius <mariausol@gmail.com>2010-07-04 15:32:09 +0300
commit85b7bc695629926641c7cb752fd478adfdf374f3 (patch)
tree80293f5aaa7b95a500a78392c39688d8ee7a32fc /tex/context/base/lang-ini.lua
downloadcontext-85b7bc695629926641c7cb752fd478adfdf374f3.tar.gz
stable 2010-05-24 13:10
Diffstat (limited to 'tex/context/base/lang-ini.lua')
-rw-r--r--tex/context/base/lang-ini.lua321
1 files changed, 321 insertions, 0 deletions
diff --git a/tex/context/base/lang-ini.lua b/tex/context/base/lang-ini.lua
new file mode 100644
index 000000000..239e5390c
--- /dev/null
+++ b/tex/context/base/lang-ini.lua
@@ -0,0 +1,321 @@
+if not modules then modules = { } end modules ['lang-ini'] = {
+ version = 1.001,
+ comment = "companion to lang-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+}
+
+-- needs a cleanup (share locals)
+
+local utf = unicode.utf8
+local utfbyte = utf.byte
+local format = string.format
+local concat = table.concat
+local lpegmatch = lpeg.match
+
+local trace_patterns = false trackers.register("languages.patterns", function(v) trace_patterns = v end)
+
+languages = languages or {}
+languages.version = 1.009
+languages.hyphenation = languages.hyphenation or { }
+languages.hyphenation.data = languages.hyphenation.data or { }
+
+local langdata = languages.hyphenation.data
+
+-- 002D : hyphen-minus (ascii)
+-- 2010 : hyphen
+-- 2011 : nonbreakable hyphen
+-- 2013 : endash (compound hyphen)
+
+--~ lang:hyphenation(string)
+--~ string =lang:hyphenation()
+--~ lang:clear_hyphenation()
+
+-- we can consider hiding data (faster access too)
+
+-- loading the 26 languages that we normally load in mkiv, the string based variant
+-- takes .84 seconds (probably due to the sub's) while the lpeg variant takes .78
+-- seconds
+--
+-- the following lpeg can probably be improved (it was one of the first I made)
+
+local leftbrace = lpeg.P("{")
+local rightbrace = lpeg.P("}")
+local spaces = lpeg.S(" \r\n\t\f")
+local spacing = spaces^0
+local validchar = 1-(spaces+rightbrace+leftbrace)
+local validword = validchar^1
+local content = spacing * leftbrace * spacing * lpeg.C((spacing * validword)^0) * spacing * rightbrace * lpeg.P(true)
+
+local command = lpeg.P("\\patterns")
+local parser = (1-command)^0 * command * content
+
+local function filterpatterns(filename)
+ if file.extname(filename) == "rpl" then
+ return io.loaddata(resolvers.find_file(filename)) or ""
+ else
+ return lpegmatch(parser,io.loaddata(resolvers.find_file(filename)) or "")
+ end
+end
+
+local command = lpeg.P("\\hyphenation")
+local parser = (1-command)^0 * command * content
+
+local function filterexceptions(filename)
+ if file.extname(filename) == "rhl" then
+ return io.loaddata(resolvers.find_file(filename)) or ""
+ else
+ return lpegmatch(parser,io.loaddata(resolvers.find_file(filename)) or {}) -- "" ?
+ end
+end
+
+local function record(tag)
+ local data = langdata[tag]
+ if not data then
+ data = lang.new()
+ langdata[tag] = data or 0
+ end
+ return data
+end
+
+languages.hyphenation.record = record
+
+function languages.hyphenation.define(tag)
+ local data = record(tag)
+ return data:id()
+end
+
+function languages.hyphenation.number(tag)
+ local d = langdata[tag]
+ return (d and d:id()) or 0
+end
+
+lang.exceptions = lang.hyphenation
+
+local function loadthem(tag, filename, filter, target)
+ statistics.starttiming(languages)
+ local data = record(tag)
+ local fullname = (filename and filename ~= "" and resolvers.find_file(filename)) or ""
+ local ok = fullname ~= ""
+ if ok then
+ if trace_patterns then
+ logs.report("languages","filtering %s for language '%s' from '%s'",target,tag,fullname)
+ end
+ lang[target](data,filterpatterns(fullname))
+ else
+ if trace_patterns then
+ logs.report("languages","no %s for language '%s' in '%s'",target,tag,filename or "?")
+ end
+ lang[target](data,"")
+ end
+ langdata[tag] = data
+ statistics.stoptiming(languages)
+ return ok
+end
+
+function languages.hyphenation.loadpatterns(tag, patterns)
+ return loadthem(tag, patterns, filterpatterns, "patterns")
+end
+
+function languages.hyphenation.loadexceptions(tag, exceptions)
+ return loadthem(tag, patterns, filterexceptions, "exceptions")
+end
+
+function languages.hyphenation.exceptions(tag, ...)
+ local data = record(tag)
+ data:hyphenation(...)
+end
+
+function languages.hyphenation.hyphenate(tag, str)
+ return lang.hyphenate(record(tag), str)
+end
+
+function languages.hyphenation.lefthyphenmin(tag, value)
+ local data = record(tag)
+ if value then data:lefthyphenmin(value) end
+ return data:lefthyphenmin()
+end
+function languages.hyphenation.righthyphenmin(tag, value)
+ local data = record(tag)
+ if value then data:righthyphenmin(value) end
+ return data:righthyphenmin()
+end
+
+function languages.hyphenation.n()
+ return table.count(langdata)
+end
+
+languages.registered = languages.registered or { }
+languages.associated = languages.associated or { }
+languages.numbers = languages.numbers or { }
+
+storage.register("languages/registered",languages.registered,"languages.registered")
+storage.register("languages/associated",languages.associated,"languages.associated")
+
+local numbers = languages.numbers
+local registered = languages.registered
+local associated = languages.associated
+
+-- we can speed this one up with locals if needed
+
+local function tolang(what)
+ local kind = type(what)
+ if kind == "number" then
+ local w = what >= 0 and what <= 0x7FFF and numbers[what]
+ return (w and langdata[w]) or 0
+ elseif kind == "string" then
+ return langdata[what]
+ else
+ return what
+ end
+end
+
+function languages.setup(what,settings)
+ what = languages.tolang(what or tex.language)
+ local lefthyphen = settings.lefthyphen
+ local righthyphen = settings.righthyphen
+ lefthyphen = lefthyphen ~= "" and lefthyphen or nil
+ righthyphen = righthyphen ~= "" and righthyphen or nil
+ lefthyphen = lefthyphen and utfbyte(lefthyphen) or 0
+ righthyphen = righthyphen and utfbyte(righthyphen) or 0
+ lang.posthyphenchar(what,lefthyphen)
+ lang.prehyphenchar (what,righthyphen)
+ lang.postexhyphenchar(what,lefthyphen)
+ lang.preexhyphenchar (what,righthyphen)
+end
+
+function languages.prehyphenchar(what)
+ return lang.prehyphenchar(tolang(what))
+end
+function languages.posthyphenchar(what)
+ return lang.posthyphenchar(tolang(what))
+end
+
+languages.tolang = tolang
+
+function languages.register(tag,parent,patterns,exceptions)
+ parent = parent or tag
+ registered[tag] = {
+ parent = parent,
+ patterns = patterns or format("lang-%s.pat",parent),
+ exceptions = exceptions or format("lang-%s.hyp",parent),
+ loaded = false,
+ number = 0,
+ }
+end
+
+function languages.associate(tag,script,language)
+ associated[tag] = { script, language }
+end
+
+function languages.association(tag)
+ if type(tag) == "number" then
+ tag = numbers[tag]
+ end
+ local lat = tag and associated[tag]
+ if lat then
+ return lat[1], lat[2]
+ else
+ return nil, nil
+ end
+end
+
+function languages.loadable(tag)
+ local l = registered[tag]
+ if l and l.patterns and resolvers.find_file(patterns) then
+ return true
+ else
+ return false
+ end
+end
+
+languages.share = false -- we don't share language numbers
+
+function languages.enable(tags)
+ -- beware: we cannot set tex.language, but need tex.normallanguage
+ for i=1,#tags do
+ local tag = tags[i]
+ local l = registered[tag]
+ if l and l ~= "" then
+ if not l.loaded then
+ local tag = l.parent
+ local number = languages.hyphenation.number(tag)
+ if languages.share and number > 0 then
+ l.number = number
+ else
+ -- we assume the same filenames
+ l.number = languages.hyphenation.define(tag)
+ languages.hyphenation.loadpatterns(tag,l.patterns)
+ languages.hyphenation.loadexceptions(tag,l.exceptions)
+ numbers[l.number] = tag
+ end
+ l.loaded = true
+ if trace_patterns then
+ logs.report("languages","assigning number %s",l.number)
+ end
+ end
+ if l.number > 0 then
+ return l.number
+ end
+ end
+ end
+ return 0
+end
+
+-- e['implementer']= 'imple{m}{-}{-}menter'
+-- e['manual'] = 'man{}{}{}'
+-- e['as'] = 'a-s'
+-- e['user-friendly'] = 'user=friend-ly'
+-- e['exceptionally-friendly'] = 'excep-tionally=friend-ly'
+
+function languages.hyphenation.loadwords(tag, filename)
+ local id = languages.hyphenation.number(tag)
+ if id > 0 then
+ local l = lang.new(id) or 0
+ statistics.starttiming(languages)
+ local data = io.loaddata(filename) or ""
+ l:hyphenation(data)
+ statistics.stoptiming(languages)
+ end
+end
+
+languages.hyphenation.define ("zerolanguage")
+languages.hyphenation.loadpatterns ("zerolanguage") -- else bug
+languages.hyphenation.loadexceptions("zerolanguage") -- else bug
+
+languages.logger = languages.logger or { }
+
+function languages.logger.report()
+ local result = { }
+ local sorted = table.sortedkeys(registered)
+ for i=1,#sorted do
+ local tag = sorted[i]
+ local l = registered[tag]
+ if l.loaded then
+ local p = (l.patterns and "pat") or '-'
+ local e = (l.exceptions and "exc") or '-'
+ result[#result+1] = format("%s:%s:%s:%s:%s", tag, l.parent, p, e, l.number)
+ end
+ end
+ return (#result > 0 and concat(result," ")) or "none"
+end
+
+-- must happen at the tex end
+
+languages.associate('en','latn','eng')
+languages.associate('uk','latn','eng')
+languages.associate('nl','latn','nld')
+languages.associate('de','latn','deu')
+languages.associate('fr','latn','fra')
+
+statistics.register("loaded patterns", function()
+ local result = languages.logger.report()
+ if result ~= "none" then
+ return result
+ end
+end)
+
+statistics.register("language load time", function()
+ return statistics.elapsedseconds(languages, format(", n=%s",languages.hyphenation.n()))
+end)