summaryrefslogtreecommitdiff
path: root/tex/context/base/mkiv/lang-def.lua
diff options
context:
space:
mode:
authorContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
committerContext Git Mirror Bot <phg42.2a@gmail.com>2016-01-12 17:15:07 +0100
commit8d8d528d2ad52599f11250cfc567fea4f37f2a8b (patch)
tree94286bc131ef7d994f9432febaf03fe23d10eef8 /tex/context/base/mkiv/lang-def.lua
parentf5aed2e51223c36c84c5f25a6cad238b2af59087 (diff)
downloadcontext-8d8d528d2ad52599f11250cfc567fea4f37f2a8b.tar.gz
2016-01-12 16:26:00
Diffstat (limited to 'tex/context/base/mkiv/lang-def.lua')
-rw-r--r--tex/context/base/mkiv/lang-def.lua466
1 files changed, 466 insertions, 0 deletions
diff --git a/tex/context/base/mkiv/lang-def.lua b/tex/context/base/mkiv/lang-def.lua
new file mode 100644
index 000000000..c0c3981f7
--- /dev/null
+++ b/tex/context/base/mkiv/lang-def.lua
@@ -0,0 +1,466 @@
+if not modules then modules = { } end modules ['lang-def'] = {
+ version = 1.001,
+ comment = "companion to lang-ini.mkiv",
+ author = "Hans Hagen, PRAGMA-ADE, Hasselt NL",
+ copyright = "PRAGMA ADE / ConTeXt Development Team",
+ license = "see context related readme files"
+ -- dataonly = true, -- saves 10K
+}
+
+local rawget = rawget
+local lower = string.lower
+
+languages = languages or { }
+local languages = languages
+languages.data = languages.data or { }
+local data = languages.data
+
+local allocate = utilities.storage.allocate
+local setmetatableindex = table.setmetatableindex
+
+-- The specifications are based on an analysis done by Arthur. The
+-- names of tags were changed by Hans. The data is not yet used but
+-- will be some day.
+--
+-- description
+--
+-- The description is only meant as an indication; for example 'no' is
+-- "Norwegian, undetermined" because that's really what it is.
+--
+-- script
+--
+-- This is the 4-letter script tag according to ISO 15924, the
+-- official standard.
+--
+-- bibliographical and terminological
+--
+-- Then we have *two* ISO-639 3-letter tags: one is supposed to be used
+-- for "bibliographical" purposes, the other for "terminological". The
+-- first one is quite special (and mostly used in American libraries),
+-- and the more interesting one is the other (apparently it's that one
+-- we find everywhere).
+--
+-- context
+--
+-- These are the ones used in ConteXt. Kind of numberplate ones.
+--
+-- opentype
+--
+-- This is the 3-letter OpenType language tag, obviously.
+--
+-- variant
+--
+-- This is actually the rfc4646: an extension of ISO-639 that also defines
+-- codes for variants like de-1901 for "German, 1901 orthography" or zh-Hans for
+-- "Chinese, simplified characters" ('Hans' is the ISO-15924 tag for
+-- "HAN ideographs, Simplified" :-) As I said yesterday, I think this
+-- should be the reference since it's exactly what we want: it's really
+-- standard (it's a RFC) and it's more than simply languages. To my
+-- knowledge this is the only system that addresses this issue.
+--
+-- Warning: it's not unique! Because we have two "German" languages
+-- (and could, potentially, have two Chinese, etc.)
+--
+-- Beware: the abbreviations are lowercased, which makes it more
+-- convenient to use them.
+--
+-- todo: add default features
+
+local specifications = allocate {
+ {
+ ["description"] = "Dutch",
+ ["script"] = "latn",
+ -- ["bibliographical"] = "nld",
+ -- ["terminological"] = "nld",
+ ["context"] = "nl",
+ ["opentype"] = "nld",
+ ["variant"] = "nl",
+ },
+ {
+ ["description"] = "Basque",
+ ["script"] = "latn",
+ ["bibliographical"] = "baq",
+ ["terminological"] = "eus",
+ ["context"] = "ba",
+ ["opentype"] = "euq",
+ ["variant"] = "eu",
+ },
+ {
+ ["description"] = "Welsh",
+ ["script"] = "latn",
+ ["bibliographical"] = "wel",
+ ["terminological"] = "cym",
+ ["context"] = "cy",
+ ["opentype"] = "wel",
+ ["variant"] = "cy",
+ },
+ {
+ ["description"] = "Icelandic",
+ ["script"] = "latn",
+ ["bibliographical"] = "ice",
+ ["terminological"] = "isl",
+ ["context"] = "is",
+ ["opentype"] = "isl",
+ ["variant"] = "is",
+ },
+ {
+ ["description"] = "Norwegian, undetermined",
+ ["script"] = "latn",
+ ["bibliographical"] = "nor",
+ ["terminological"] = "nor",
+ ["context"] = "no",
+ ["variant"] = "no",
+ },
+ {
+ ["description"] = "Norwegian bokmal",
+ ["script"] = "latn",
+ ["bibliographical"] = "nob",
+ ["terminological"] = "nob",
+ ["opentype"] = "nor", -- not sure!
+ ["variant"] = "nb",
+ },
+ {
+ ["description"] = "Norwegian nynorsk",
+ ["script"] = "latn",
+ ["bibliographical"] = "nno",
+ ["terminological"] = "nno",
+ ["opentype"] = "nny",
+ ["variant"] = "nn",
+ },
+ {
+ ["description"] = "Ancient Greek",
+ ["script"] = "grek",
+ ["bibliographical"] = "grc",
+ ["terminological"] = "grc",
+ ["context"] = "agr",
+ ["variant"] = "grc",
+ },
+ {
+ ["description"] = "German, 1901 orthography",
+ ["script"] = "latn",
+ ["terminological"] = "deu",
+ ["context"] = "deo",
+ ["opentype"] = "deu",
+ ["variant"] = "de-1901",
+ },
+ {
+ ["description"] = "German, 1996 orthography",
+ ["script"] = "latn",
+ ["bibliographical"] = "ger",
+ ["terminological"] = "deu",
+ ["context"] = "de",
+ ["opentype"] = "deu",
+ ["variant"] = "de-1996",
+ },
+ {
+ ["description"] = "Afrikaans",
+ ["script"] = "latn",
+ ["bibliographical"] = "afr",
+ ["terminological"] = "afr",
+ ["context"] = "af",
+ ["opentype"] = "afk",
+ ["variant"] = "af",
+ },
+ {
+ ["description"] = "Catalan",
+ ["script"] = "latn",
+ ["bibliographical"] = "cat",
+ ["terminological"] = "cat",
+ ["context"] = "ca",
+ ["opentype"] = "cat",
+ ["variant"] = "ca",
+ },
+ {
+ ["description"] = "Czech",
+ ["script"] = "latn",
+ ["bibliographical"] = "cze",
+ ["terminological"] = "ces",
+ ["context"] = "cz",
+ ["opentype"] = "csy",
+ ["variant"] = "cs",
+ },
+ {
+ ["description"] = "Greek",
+ ["script"] = "grek",
+ ["bibliographical"] = "gre",
+ ["terminological"] = "ell",
+ ["context"] = "gr",
+ ["opentype"] = "ell",
+ ["variant"] = "el",
+ },
+ {
+ ["description"] = "American English",
+ ["script"] = "latn",
+ ["bibliographical"] = "eng",
+ ["terminological"] = "eng",
+ ["context"] = "us",
+ ["opentype"] = "eng",
+ ["variant"] = "en-US",
+ },
+ {
+ ["description"] = "British English",
+ ["script"] = "latn",
+ ["bibliographical"] = "eng",
+ ["terminological"] = "eng",
+ ["context"] = "uk",
+ ["opentype"] = "eng",
+ ["variant"] = "en-UK", -- Could be en-GB as well ...
+ },
+ {
+ ["description"] = "Spanish",
+ ["script"] = "latn",
+ ["bibliographical"] = "spa",
+ ["terminological"] = "spa",
+ ["context"] = "es",
+ ["opentype"] = "esp",
+ ["variant"] = "es",
+ },
+ {
+ ["description"] = "Finnish",
+ ["script"] = "latn",
+ ["bibliographical"] = "fin",
+ ["terminological"] = "fin",
+ ["context"] = "fi",
+ ["opentype"] = "fin",
+ ["variant"] = "fi",
+ },
+ {
+ ["description"] = "French",
+ ["script"] = "latn",
+ ["bibliographical"] = "fre",
+ ["terminological"] = "fra",
+ ["context"] = "fr",
+ ["opentype"] = "fra",
+ ["variant"] = "fr",
+ },
+ {
+ ["description"] = "Croatian",
+ ["script"] = "latn",
+ ["bibliographical"] = "scr",
+ ["terminological"] = "hrv",
+ ["context"] = "hr",
+ ["opentype"] = "hrv",
+ ["variant"] = "hr",
+ },
+ {
+ ["description"] = "Hungarian",
+ ["script"] = "latn",
+ ["bibliographical"] = "hun",
+ ["terminological"] = "hun",
+ ["context"] = "hu",
+ ["opentype"] = "hun",
+ ["variant"] = "hu",
+ },
+ {
+ ["description"] = "Italian",
+ ["script"] = "latn",
+ ["bibliographical"] = "ita",
+ ["terminological"] = "ita",
+ ["context"] = "it",
+ ["opentype"] = "ita",
+ ["variant"] = "it",
+ },
+ {
+ ["description"] = "Japanese",
+ ["script"] = "jpan",
+ ["bibliographical"] = "jpn",
+ ["terminological"] = "jpn",
+ ["context"] = "ja",
+ ["opentype"] = "jan",
+ ["variant"] = "ja",
+ },
+ {
+ ["description"] = "Latin",
+ ["script"] = "latn",
+ ["bibliographical"] = "lat",
+ ["terminological"] = "lat",
+ ["context"] = "la",
+ ["opentype"] = "lat",
+ ["variant"] = "la",
+ },
+ {
+ ["description"] = "Portuguese",
+ ["script"] = "latn",
+ ["bibliographical"] = "por",
+ ["terminological"] = "por",
+ ["context"] = "pt",
+ ["opentype"] = "ptg",
+ ["variant"] = "pt",
+ },
+ {
+ ["description"] = "Polish",
+ ["script"] = "latn",
+ ["bibliographical"] = "pol",
+ ["terminological"] = "pol",
+ ["context"] = "pl",
+ ["opentype"] = "plk",
+ ["variant"] = "pl",
+ },
+ {
+ ["description"] = "Romanian",
+ ["script"] = "latn",
+ ["bibliographical"] = "rum",
+ ["terminological"] = "ron",
+ ["context"] = "ro",
+ ["opentype"] = "rom",
+ ["variant"] = "ro",
+ },
+ {
+ ["description"] = "Russian",
+ ["script"] = "cyrl",
+ ["bibliographical"] = "rus",
+ ["terminological"] = "rus",
+ ["context"] = "ru",
+ ["opentype"] = "rus",
+ ["variant"] = "ru",
+ },
+ {
+ ["description"] = "Slovak",
+ ["script"] = "latn",
+ ["bibliographical"] = "slo",
+ ["terminological"] = "slk",
+ ["context"] = "sk",
+ ["opentype"] = "sky",
+ ["variant"] = "sk",
+ },
+ {
+ ["description"] = "Slovenian",
+ ["script"] = "latn",
+ ["bibliographical"] = "slv",
+ ["terminological"] = "slv",
+ ["context"] = "sl",
+ ["opentype"] = "slv",
+ ["variant"] = "sl",
+ },
+ {
+ ["description"] = "Swedish",
+ ["script"] = "latn",
+ ["bibliographical"] = "swe",
+ ["terminological"] = "swe",
+ ["context"] = "sv",
+ ["opentype"] = "sve",
+ ["variant"] = "sv",
+ },
+ {
+ ["description"] = "Thai",
+ ["script"] = "thai",
+ -- ["bibliographical"] = "",
+ -- ["terminological"] = "",
+ ["context"] = "th",
+ ["opentype"] = "tha",
+ -- ["variant"] = "",
+ },
+ {
+ ["description"] = "Turkish",
+ ["script"] = "latn",
+ ["bibliographical"] = "tur",
+ ["terminological"] = "tur",
+ ["context"] = "tr",
+ ["opentype"] = "trk",
+ ["variant"] = "tr",
+ },
+ {
+ ["description"] = "Vietnamese",
+ ["script"] = "latn",
+ ["bibliographical"] = "vie",
+ ["terminological"] = "vie",
+ ["context"] = "vn",
+ ["opentype"] = "vit",
+ ["variant"] = "vi",
+ },
+ {
+ ["description"] = "Chinese, simplified",
+ ["script"] = "hans",
+ ["opentypescript"] = "hani",
+ ["bibliographical"] = "chi",
+ ["terminological"] = "zho",
+ ["context"] = "cn",
+ ["opentype"] = "zhs",
+ ["variant"] = "zh-hans",
+ },
+}
+
+data.specifications = specifications
+
+local variants = { } data.variants = variants
+local contexts = { } data.contexts = contexts
+local records = { } data.records = records
+local scripts = { } data.scripts = scripts
+local opentypes = { } data.opentypes = opentypes
+local opentypescripts = { } data.opentypescripts = opentypescripts
+
+for k=1,#specifications do
+ local specification = specifications[k]
+ local variant = specification.variant
+ if variant then
+ variants[lower(variant)] = specification
+ end
+ local opentype = specification.opentype
+ if opentype then
+ opentypes[lower(opentype)] = specification
+ end
+ local script = specification.script
+ if script then
+ scripts[lower(script)] = specification
+ end
+ local opentypescript = specification.opentypescript
+ if opentypescript then
+ opentypescripts[lower(opentypescript)] = specification
+ end
+ local context = context
+ if context then
+ if type(context) == "table" then
+ for k=1,#context do
+ contexts[context[k]] = specification
+ end
+ else
+ contexts[context] = specification
+ end
+ end
+end
+
+local defaultvariant = variants["en-us"]
+
+local function get(k,key)
+ local v = rawget(variants,k) or rawget(opentypes,k) or rawget(contexts,k)
+ return v and v[key]
+end
+
+setmetatableindex(variants, function(t,k)
+ k = lower(k)
+ local v = get(k,"language") or defaultvariant.language
+ t[k] = v
+ return v
+end)
+
+setmetatableindex(opentypes, function(t,k)
+ k = lower(k)
+ local v = get(k,"opentype") or "dflt"
+ t[k] = v
+ return v
+end)
+
+setmetatableindex(opentypescripts, function(t,k)
+ k = lower(k)
+ local v = get(k,"opentypescript") or get(k,"script") or defaultvariant.opentypescript or defaultvariant.script
+ t[k] = v
+ return v
+end)
+
+setmetatableindex(contexts, function(t,k)
+ k = lower(str)
+ local v = get(k,"context") or defaultvariant.context
+ v = type(v) == "table" and v[1] or v
+ t[k] = v
+ return v
+end)
+
+setmetatableindex(records, function(t,k) -- how useful is this one?
+ k = lower(k)
+ local v = get(k) or defaultvariant
+ t[k] = v
+ return v
+end)
+
+-- print(opentypes.nl,opentypescripts.nl)
+-- print(opentypes.de,opentypescripts.de)