diff options
Diffstat (limited to 'tex/context/base/lang-ini.lua')
-rw-r--r-- | tex/context/base/lang-ini.lua | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/tex/context/base/lang-ini.lua b/tex/context/base/lang-ini.lua new file mode 100644 index 000000000..239e5390c --- /dev/null +++ b/tex/context/base/lang-ini.lua @@ -0,0 +1,321 @@ +if not modules then modules = { } end modules ['lang-ini'] = { + version = 1.001, + comment = "companion to lang-ini.mkiv", + author = "Hans Hagen, PRAGMA-ADE, Hasselt NL", + copyright = "PRAGMA ADE / ConTeXt Development Team", + license = "see context related readme files" +} + +-- needs a cleanup (share locals) + +local utf = unicode.utf8 +local utfbyte = utf.byte +local format = string.format +local concat = table.concat +local lpegmatch = lpeg.match + +local trace_patterns = false trackers.register("languages.patterns", function(v) trace_patterns = v end) + +languages = languages or {} +languages.version = 1.009 +languages.hyphenation = languages.hyphenation or { } +languages.hyphenation.data = languages.hyphenation.data or { } + +local langdata = languages.hyphenation.data + +-- 002D : hyphen-minus (ascii) +-- 2010 : hyphen +-- 2011 : nonbreakable hyphen +-- 2013 : endash (compound hyphen) + +--~ lang:hyphenation(string) +--~ string =lang:hyphenation() +--~ lang:clear_hyphenation() + +-- we can consider hiding data (faster access too) + +-- loading the 26 languages that we normally load in mkiv, the string based variant +-- takes .84 seconds (probably due to the sub's) while the lpeg variant takes .78 +-- seconds +-- +-- the following lpeg can probably be improved (it was one of the first I made) + +local leftbrace = lpeg.P("{") +local rightbrace = lpeg.P("}") +local spaces = lpeg.S(" \r\n\t\f") +local spacing = spaces^0 +local validchar = 1-(spaces+rightbrace+leftbrace) +local validword = validchar^1 +local content = spacing * leftbrace * spacing * lpeg.C((spacing * validword)^0) * spacing * rightbrace * lpeg.P(true) + +local command = lpeg.P("\\patterns") +local parser = (1-command)^0 * command * content + +local function filterpatterns(filename) + if file.extname(filename) == "rpl" then + return io.loaddata(resolvers.find_file(filename)) or "" + else + return lpegmatch(parser,io.loaddata(resolvers.find_file(filename)) or "") + end +end + +local command = lpeg.P("\\hyphenation") +local parser = (1-command)^0 * command * content + +local function filterexceptions(filename) + if file.extname(filename) == "rhl" then + return io.loaddata(resolvers.find_file(filename)) or "" + else + return lpegmatch(parser,io.loaddata(resolvers.find_file(filename)) or {}) -- "" ? + end +end + +local function record(tag) + local data = langdata[tag] + if not data then + data = lang.new() + langdata[tag] = data or 0 + end + return data +end + +languages.hyphenation.record = record + +function languages.hyphenation.define(tag) + local data = record(tag) + return data:id() +end + +function languages.hyphenation.number(tag) + local d = langdata[tag] + return (d and d:id()) or 0 +end + +lang.exceptions = lang.hyphenation + +local function loadthem(tag, filename, filter, target) + statistics.starttiming(languages) + local data = record(tag) + local fullname = (filename and filename ~= "" and resolvers.find_file(filename)) or "" + local ok = fullname ~= "" + if ok then + if trace_patterns then + logs.report("languages","filtering %s for language '%s' from '%s'",target,tag,fullname) + end + lang[target](data,filterpatterns(fullname)) + else + if trace_patterns then + logs.report("languages","no %s for language '%s' in '%s'",target,tag,filename or "?") + end + lang[target](data,"") + end + langdata[tag] = data + statistics.stoptiming(languages) + return ok +end + +function languages.hyphenation.loadpatterns(tag, patterns) + return loadthem(tag, patterns, filterpatterns, "patterns") +end + +function languages.hyphenation.loadexceptions(tag, exceptions) + return loadthem(tag, patterns, filterexceptions, "exceptions") +end + +function languages.hyphenation.exceptions(tag, ...) + local data = record(tag) + data:hyphenation(...) +end + +function languages.hyphenation.hyphenate(tag, str) + return lang.hyphenate(record(tag), str) +end + +function languages.hyphenation.lefthyphenmin(tag, value) + local data = record(tag) + if value then data:lefthyphenmin(value) end + return data:lefthyphenmin() +end +function languages.hyphenation.righthyphenmin(tag, value) + local data = record(tag) + if value then data:righthyphenmin(value) end + return data:righthyphenmin() +end + +function languages.hyphenation.n() + return table.count(langdata) +end + +languages.registered = languages.registered or { } +languages.associated = languages.associated or { } +languages.numbers = languages.numbers or { } + +storage.register("languages/registered",languages.registered,"languages.registered") +storage.register("languages/associated",languages.associated,"languages.associated") + +local numbers = languages.numbers +local registered = languages.registered +local associated = languages.associated + +-- we can speed this one up with locals if needed + +local function tolang(what) + local kind = type(what) + if kind == "number" then + local w = what >= 0 and what <= 0x7FFF and numbers[what] + return (w and langdata[w]) or 0 + elseif kind == "string" then + return langdata[what] + else + return what + end +end + +function languages.setup(what,settings) + what = languages.tolang(what or tex.language) + local lefthyphen = settings.lefthyphen + local righthyphen = settings.righthyphen + lefthyphen = lefthyphen ~= "" and lefthyphen or nil + righthyphen = righthyphen ~= "" and righthyphen or nil + lefthyphen = lefthyphen and utfbyte(lefthyphen) or 0 + righthyphen = righthyphen and utfbyte(righthyphen) or 0 + lang.posthyphenchar(what,lefthyphen) + lang.prehyphenchar (what,righthyphen) + lang.postexhyphenchar(what,lefthyphen) + lang.preexhyphenchar (what,righthyphen) +end + +function languages.prehyphenchar(what) + return lang.prehyphenchar(tolang(what)) +end +function languages.posthyphenchar(what) + return lang.posthyphenchar(tolang(what)) +end + +languages.tolang = tolang + +function languages.register(tag,parent,patterns,exceptions) + parent = parent or tag + registered[tag] = { + parent = parent, + patterns = patterns or format("lang-%s.pat",parent), + exceptions = exceptions or format("lang-%s.hyp",parent), + loaded = false, + number = 0, + } +end + +function languages.associate(tag,script,language) + associated[tag] = { script, language } +end + +function languages.association(tag) + if type(tag) == "number" then + tag = numbers[tag] + end + local lat = tag and associated[tag] + if lat then + return lat[1], lat[2] + else + return nil, nil + end +end + +function languages.loadable(tag) + local l = registered[tag] + if l and l.patterns and resolvers.find_file(patterns) then + return true + else + return false + end +end + +languages.share = false -- we don't share language numbers + +function languages.enable(tags) + -- beware: we cannot set tex.language, but need tex.normallanguage + for i=1,#tags do + local tag = tags[i] + local l = registered[tag] + if l and l ~= "" then + if not l.loaded then + local tag = l.parent + local number = languages.hyphenation.number(tag) + if languages.share and number > 0 then + l.number = number + else + -- we assume the same filenames + l.number = languages.hyphenation.define(tag) + languages.hyphenation.loadpatterns(tag,l.patterns) + languages.hyphenation.loadexceptions(tag,l.exceptions) + numbers[l.number] = tag + end + l.loaded = true + if trace_patterns then + logs.report("languages","assigning number %s",l.number) + end + end + if l.number > 0 then + return l.number + end + end + end + return 0 +end + +-- e['implementer']= 'imple{m}{-}{-}menter' +-- e['manual'] = 'man{}{}{}' +-- e['as'] = 'a-s' +-- e['user-friendly'] = 'user=friend-ly' +-- e['exceptionally-friendly'] = 'excep-tionally=friend-ly' + +function languages.hyphenation.loadwords(tag, filename) + local id = languages.hyphenation.number(tag) + if id > 0 then + local l = lang.new(id) or 0 + statistics.starttiming(languages) + local data = io.loaddata(filename) or "" + l:hyphenation(data) + statistics.stoptiming(languages) + end +end + +languages.hyphenation.define ("zerolanguage") +languages.hyphenation.loadpatterns ("zerolanguage") -- else bug +languages.hyphenation.loadexceptions("zerolanguage") -- else bug + +languages.logger = languages.logger or { } + +function languages.logger.report() + local result = { } + local sorted = table.sortedkeys(registered) + for i=1,#sorted do + local tag = sorted[i] + local l = registered[tag] + if l.loaded then + local p = (l.patterns and "pat") or '-' + local e = (l.exceptions and "exc") or '-' + result[#result+1] = format("%s:%s:%s:%s:%s", tag, l.parent, p, e, l.number) + end + end + return (#result > 0 and concat(result," ")) or "none" +end + +-- must happen at the tex end + +languages.associate('en','latn','eng') +languages.associate('uk','latn','eng') +languages.associate('nl','latn','nld') +languages.associate('de','latn','deu') +languages.associate('fr','latn','fra') + +statistics.register("loaded patterns", function() + local result = languages.logger.report() + if result ~= "none" then + return result + end +end) + +statistics.register("language load time", function() + return statistics.elapsedseconds(languages, format(", n=%s",languages.hyphenation.n())) +end) |